From a472f979efaafccb21f07de1c4884413a13d6911 Mon Sep 17 00:00:00 2001 From: yashgadhiya10 Date: Tue, 1 Oct 2024 10:29:06 -0400 Subject: [PATCH 1/7] Created new dataset North Uganda 2020 --- data/raw.dvc | 6 +++--- datasets.py | 33 +++++++++++++++++++++++++++++++++ 2 files changed, 36 insertions(+), 3 deletions(-) diff --git a/data/raw.dvc b/data/raw.dvc index f13f5629..ea890038 100644 --- a/data/raw.dvc +++ b/data/raw.dvc @@ -1,6 +1,6 @@ outs: -- md5: 0916e33f6eef6c80a87e319427005f5e.dir - size: 446720790 - nfiles: 408 +- md5: f08af5bba486092e8c1ce82375a2e247.dir + size: 447063881 + nfiles: 410 path: raw hash: md5 diff --git a/datasets.py b/datasets.py index e6d5282e..c796f1df 100644 --- a/datasets.py +++ b/datasets.py @@ -540,6 +540,38 @@ def load_labels(self) -> pd.DataFrame: return df +class Uganda_NorthCEO2020(LabeledDataset): + def load_labels(self) -> pd.DataFrame: + raw_folder = raw_dir / "Uganda_North_2020" + df1 = pd.read_csv( + raw_folder + / "ceo-UNHCR-North-Uganda-Feb-2019---Feb-2020-(Set-1)-sample-data-2024-10-01.csv" + ) + df2 = pd.read_csv( + raw_folder + / "ceo-UNHCR-North-Uganda-Feb-2019---Feb-2020-(Set-2)-sample-data-2024-10-01.csv" + ) + df = pd.concat([df1, df2]) + + # Discard rows with no label + df = df[~df["Does this pixel contain active cropland?"].isna()].copy() + df[CLASS_PROB] = df["Does this pixel contain active cropland?"] == "Crop" + df[CLASS_PROB] = df[CLASS_PROB].astype(int) + df["num_labelers"] = 1 + df = df.groupby([LON, LAT], as_index=False, sort=False).agg( + { + CLASS_PROB: "mean", + "num_labelers": "sum", + "plotid": join_unique, + "sampleid": join_unique, + "email": join_unique, + } + ) + df[START], df[END] = date(2019, 1, 1), date(2020, 12, 31) + df[SUBSET] = train_val_test_split(df.index, 0.3, 0.3) + return df + + class Uganda_NorthCEO2021(LabeledDataset): def load_labels(self) -> pd.DataFrame: raw_folder = raw_dir / "Uganda_North_2021" @@ -1571,6 +1603,7 @@ def load_labels(self) -> pd.DataFrame: FranceCropArea2020(), Uganda_NorthCEO2016(), Uganda_NorthCEO2017(), + Uganda_NorthCEO2020(), ] if __name__ == "__main__": From b7c7400716c0c96a204029ba7dd2c6da0a34777c Mon Sep 17 00:00:00 2001 From: Dataset bot Date: Fri, 22 Nov 2024 22:10:31 +0000 Subject: [PATCH 2/7] Automated dataset updates --- data/datasets.dvc | 6 +++--- data/report.txt | 9 +++++++++ 2 files changed, 12 insertions(+), 3 deletions(-) diff --git a/data/datasets.dvc b/data/datasets.dvc index 0229b588..446b420c 100644 --- a/data/datasets.dvc +++ b/data/datasets.dvc @@ -1,6 +1,6 @@ outs: -- md5: 255771180339afd476bb65d11d0406e5.dir - size: 745188620 - nfiles: 62 +- md5: 7150e59ea7ea89f362cc843236ea051f.dir + size: 750604023 + nfiles: 63 path: datasets hash: md5 diff --git a/data/report.txt b/data/report.txt index 4d6dd396..1d384350 100644 --- a/data/report.txt +++ b/data/report.txt @@ -519,3 +519,12 @@ eo_data_complete 1000 ✔ training amount: 387, positive class: 1.3% ✔ validation amount: 294, positive class: 1.0% ✔ testing amount: 319, positive class: 1.3% + + + +Uganda_NorthCEO2020 (Timesteps: 24) +---------------------------------------------------------------------------- +eo_data_complete 1000 +✔ training amount: 387, positive class: 21.4% +✔ validation amount: 294, positive class: 15.3% +✔ testing amount: 319, positive class: 14.1% From cbf8448662e3ec43e946513b8c6a40828c8c6fc1 Mon Sep 17 00:00:00 2001 From: ivanzvonkov Date: Sun, 24 Nov 2024 13:28:10 -0500 Subject: [PATCH 3/7] Trigger Build From d34cdf2e2bceff30bd4f30805c872ee84c4ed129 Mon Sep 17 00:00:00 2001 From: yashgadhiya10 Date: Tue, 26 Nov 2024 22:59:36 -0500 Subject: [PATCH 4/7] Fixed wrong 2020 csv file upload --- data/raw.dvc | 4 ++-- datasets.py | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/data/raw.dvc b/data/raw.dvc index ea890038..c4545e09 100644 --- a/data/raw.dvc +++ b/data/raw.dvc @@ -1,6 +1,6 @@ outs: -- md5: f08af5bba486092e8c1ce82375a2e247.dir - size: 447063881 +- md5: b5b44ce605c7cd43d7e94d2778d8d34a.dir + size: 447063610 nfiles: 410 path: raw hash: md5 diff --git a/datasets.py b/datasets.py index 0c3d8ce2..392c9384 100644 --- a/datasets.py +++ b/datasets.py @@ -545,11 +545,11 @@ def load_labels(self) -> pd.DataFrame: raw_folder = raw_dir / "Uganda_North_2020" df1 = pd.read_csv( raw_folder - / "ceo-UNHCR-North-Uganda-Feb-2019---Feb-2020-(Set-1)-sample-data-2024-10-01.csv" + / "ceo-UNHCR-North-Uganda-Feb-2020---Feb-2021-(Set-1)-sample-data-2024-11-27.csv" ) df2 = pd.read_csv( raw_folder - / "ceo-UNHCR-North-Uganda-Feb-2019---Feb-2020-(Set-2)-sample-data-2024-10-01.csv" + / "ceo-UNHCR-North-Uganda-Feb-2020---Feb-2021-(Set-2)-sample-data-2024-11-27.csv" ) df = pd.concat([df1, df2]) From 0862f5848a9248ce9b5c0b8add7cf792e9ab6ae0 Mon Sep 17 00:00:00 2001 From: ivanzvonkov Date: Mon, 2 Dec 2024 18:18:02 -0500 Subject: [PATCH 5/7] Remove Uganda 2020 --- data/datasets.dvc | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/data/datasets.dvc b/data/datasets.dvc index 446b420c..0229b588 100644 --- a/data/datasets.dvc +++ b/data/datasets.dvc @@ -1,6 +1,6 @@ outs: -- md5: 7150e59ea7ea89f362cc843236ea051f.dir - size: 750604023 - nfiles: 63 +- md5: 255771180339afd476bb65d11d0406e5.dir + size: 745188620 + nfiles: 62 path: datasets hash: md5 From d5e59c7a3e2285f567f5d0a7e15158c11a50187f Mon Sep 17 00:00:00 2001 From: Dataset bot Date: Mon, 2 Dec 2024 23:31:03 +0000 Subject: [PATCH 6/7] Automated dataset updates --- data/datasets.dvc | 6 +++--- data/report.txt | 6 +++--- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/data/datasets.dvc b/data/datasets.dvc index 0229b588..40956867 100644 --- a/data/datasets.dvc +++ b/data/datasets.dvc @@ -1,6 +1,6 @@ outs: -- md5: 255771180339afd476bb65d11d0406e5.dir - size: 745188620 - nfiles: 62 +- md5: ebe078dafe84bf33fd0b6244a5d96976.dir + size: 750599973 + nfiles: 63 path: datasets hash: md5 diff --git a/data/report.txt b/data/report.txt index 1d384350..99fa0970 100644 --- a/data/report.txt +++ b/data/report.txt @@ -525,6 +525,6 @@ eo_data_complete 1000 Uganda_NorthCEO2020 (Timesteps: 24) ---------------------------------------------------------------------------- eo_data_complete 1000 -✔ training amount: 387, positive class: 21.4% -✔ validation amount: 294, positive class: 15.3% -✔ testing amount: 319, positive class: 14.1% +✔ training amount: 387, positive class: 11.9% +✔ validation amount: 294, positive class: 7.8% +✔ testing amount: 319, positive class: 7.8% From 0731600061c7d4254a426701bdc1cd2414209847 Mon Sep 17 00:00:00 2001 From: ivanzvonkov Date: Mon, 2 Dec 2024 20:14:19 -0500 Subject: [PATCH 7/7] Trigger Build