diff --git a/training/lc_classifier_ztf/feature_computation/dataset.py b/training/lc_classifier_ztf/feature_computation/dataset.py index c848b8b04..21f0218b9 100644 --- a/training/lc_classifier_ztf/feature_computation/dataset.py +++ b/training/lc_classifier_ztf/feature_computation/dataset.py @@ -25,9 +25,9 @@ def create_astro_object(lc_df: pd.DataFrame, object_info: pd.Series) -> AstroObj lc_df = lc_df[lc_df["fid"].isin(["g", "r"]) \ & ((lc_df["procstatus"] == "0") \ - | (lc_df["procstatus"] == "57"))] + | (lc_df["procstatus"] == "57"))] - if len(lc_df[lc_df["detected"]]) == 0: + if len(lc_df[lc_df["detected"]]) < 2: raise NoDetections() diff_flux = lc_df[ @@ -112,6 +112,11 @@ def create_astro_object(lc_df: pd.DataFrame, object_info: pd.Series) -> AstroObj os.path.join(data_dir, "objects_with_wise_20241209.parquet") ) object_df.set_index("oid", inplace=True) + + exclude_df = pd.read_parquet( + os.path.join(data_dir, "objects_to_exclude_20241209.parquet") + ) + exclude_df.set_index("oid", inplace=True) for lc_filename in tqdm(lightcurve_filenames): batch_i_str = lc_filename.split(".")[0].split("_")[2] @@ -123,6 +128,10 @@ def create_astro_object(lc_df: pd.DataFrame, object_info: pd.Series) -> AstroObj astro_objects_list = [] for oid in batch_oids: + if oid in exclude_df.index: + print(oid + ' excluded from sample') + continue + lc = lightcurves.loc[[oid]] object_info = object_df.loc[oid] try: @@ -131,7 +140,7 @@ def create_astro_object(lc_df: pd.DataFrame, object_info: pd.Series) -> AstroObj astro_objects_list.append(astro_object) except NoDetections: print(object_info) - print("Object with no detections") + print("Object with not enough detections") save_astro_objects_batch( astro_objects_list,