From e9abc69d82af033b191d48fc597124c844d5ae1b Mon Sep 17 00:00:00 2001 From: yashgadhiya10 Date: Wed, 2 Oct 2024 14:56:41 -0400 Subject: [PATCH] Revert "Merge pull request #412 from nasaharvest/Add-data-North-Uganda-2017" This reverts commit 2172cd3ab5e7e820a604f4528c39ac484e342aac, reversing changes made to 9155e638a28ec3cec518f1b9558c5424a215386e. --- data/datasets.dvc | 6 +++--- data/raw.dvc | 6 +++--- data/report.txt | 9 --------- datasets.py | 33 --------------------------------- 4 files changed, 6 insertions(+), 48 deletions(-) diff --git a/data/datasets.dvc b/data/datasets.dvc index 6d5b7485..66359ef4 100644 --- a/data/datasets.dvc +++ b/data/datasets.dvc @@ -1,6 +1,6 @@ outs: -- md5: b45a782128aebf6786df801a75fbd46d.dir - size: 734199009 - nfiles: 62 +- md5: b073343b2883fe4537cbdb98bd447cc2.dir + size: 728780256 + nfiles: 61 path: datasets hash: md5 diff --git a/data/raw.dvc b/data/raw.dvc index f13f5629..bc5cdb43 100644 --- a/data/raw.dvc +++ b/data/raw.dvc @@ -1,6 +1,6 @@ outs: -- md5: 0916e33f6eef6c80a87e319427005f5e.dir - size: 446720790 - nfiles: 408 +- md5: 7ba2a5b698999a9bfa635252b5090ff7.dir + size: 446381456 + nfiles: 406 path: raw hash: md5 diff --git a/data/report.txt b/data/report.txt index 46f3000b..dcb2ab64 100644 --- a/data/report.txt +++ b/data/report.txt @@ -512,12 +512,3 @@ eo_data_export_failed 227 ✔ training amount: 293, positive class: 16.4% ✔ validation amount: 235, positive class: 18.3% ✔ testing amount: 245, positive class: 16.7% - - - -Uganda_NorthCEO2017 (Timesteps: 24) ----------------------------------------------------------------------------- -eo_data_complete 1000 -✔ training amount: 387, positive class: 1.3% -✔ validation amount: 294, positive class: 1.0% -✔ testing amount: 319, positive class: 1.3% diff --git a/datasets.py b/datasets.py index e6d5282e..7e690524 100644 --- a/datasets.py +++ b/datasets.py @@ -508,38 +508,6 @@ def load_labels(self) -> pd.DataFrame: return df -class Uganda_NorthCEO2017(LabeledDataset): - def load_labels(self) -> pd.DataFrame: - raw_folder = raw_dir / "Uganda_North_2017" - df1 = pd.read_csv( - raw_folder - / "ceo-UNHCR-North-Uganda-Feb-2017---Feb-2018-(Set-1)-sample-data-2024-08-29.csv" - ) - df2 = pd.read_csv( - raw_folder - / "ceo-UNHCR-North-Uganda-Feb-2017---Feb-2018-(Set-2)-sample-data-2024-08-29.csv" - ) - df = pd.concat([df1, df2]) - - # Discard rows with no label - df = df[~df["Does this pixel contain active cropland?"].isna()].copy() - df[CLASS_PROB] = df["Does this pixel contain active cropland?"] == "Crop" - df[CLASS_PROB] = df[CLASS_PROB].astype(int) - df["num_labelers"] = 1 - df = df.groupby([LON, LAT], as_index=False, sort=False).agg( - { - CLASS_PROB: "mean", - "num_labelers": "sum", - "plotid": join_unique, - "sampleid": join_unique, - "email": join_unique, - } - ) - df[START], df[END] = date(2017, 1, 1), date(2018, 12, 31) - df[SUBSET] = train_val_test_split(df.index, 0.3, 0.3) - return df - - class Uganda_NorthCEO2021(LabeledDataset): def load_labels(self) -> pd.DataFrame: raw_folder = raw_dir / "Uganda_North_2021" @@ -1570,7 +1538,6 @@ def load_labels(self) -> pd.DataFrame: TanzaniaCropArea2019(), FranceCropArea2020(), Uganda_NorthCEO2016(), - Uganda_NorthCEO2017(), ] if __name__ == "__main__":