From e9abc69d82af033b191d48fc597124c844d5ae1b Mon Sep 17 00:00:00 2001
From: yashgadhiya10 <ygadhiya@umd.edu>
Date: Wed, 2 Oct 2024 14:56:41 -0400
Subject: [PATCH] Revert "Merge pull request #412 from
 nasaharvest/Add-data-North-Uganda-2017"

This reverts commit 2172cd3ab5e7e820a604f4528c39ac484e342aac, reversing
changes made to 9155e638a28ec3cec518f1b9558c5424a215386e.
---
 data/datasets.dvc |  6 +++---
 data/raw.dvc      |  6 +++---
 data/report.txt   |  9 ---------
 datasets.py       | 33 ---------------------------------
 4 files changed, 6 insertions(+), 48 deletions(-)

diff --git a/data/datasets.dvc b/data/datasets.dvc
index 6d5b7485..66359ef4 100644
--- a/data/datasets.dvc
+++ b/data/datasets.dvc
@@ -1,6 +1,6 @@
 outs:
-- md5: b45a782128aebf6786df801a75fbd46d.dir
-  size: 734199009
-  nfiles: 62
+- md5: b073343b2883fe4537cbdb98bd447cc2.dir
+  size: 728780256
+  nfiles: 61
   path: datasets
   hash: md5
diff --git a/data/raw.dvc b/data/raw.dvc
index f13f5629..bc5cdb43 100644
--- a/data/raw.dvc
+++ b/data/raw.dvc
@@ -1,6 +1,6 @@
 outs:
-- md5: 0916e33f6eef6c80a87e319427005f5e.dir
-  size: 446720790
-  nfiles: 408
+- md5: 7ba2a5b698999a9bfa635252b5090ff7.dir
+  size: 446381456
+  nfiles: 406
   path: raw
   hash: md5
diff --git a/data/report.txt b/data/report.txt
index 46f3000b..dcb2ab64 100644
--- a/data/report.txt
+++ b/data/report.txt
@@ -512,12 +512,3 @@ eo_data_export_failed    227
 ✔ training amount: 293, positive class: 16.4%
 ✔ validation amount: 235, positive class: 18.3%
 ✔ testing amount: 245, positive class: 16.7%
-
-
-
-Uganda_NorthCEO2017 (Timesteps: 24)
-----------------------------------------------------------------------------
-eo_data_complete    1000
-✔ training amount: 387, positive class: 1.3%
-✔ validation amount: 294, positive class: 1.0%
-✔ testing amount: 319, positive class: 1.3%
diff --git a/datasets.py b/datasets.py
index e6d5282e..7e690524 100644
--- a/datasets.py
+++ b/datasets.py
@@ -508,38 +508,6 @@ def load_labels(self) -> pd.DataFrame:
         return df
 
 
-class Uganda_NorthCEO2017(LabeledDataset):
-    def load_labels(self) -> pd.DataFrame:
-        raw_folder = raw_dir / "Uganda_North_2017"
-        df1 = pd.read_csv(
-            raw_folder
-            / "ceo-UNHCR-North-Uganda-Feb-2017---Feb-2018-(Set-1)-sample-data-2024-08-29.csv"
-        )
-        df2 = pd.read_csv(
-            raw_folder
-            / "ceo-UNHCR-North-Uganda-Feb-2017---Feb-2018-(Set-2)-sample-data-2024-08-29.csv"
-        )
-        df = pd.concat([df1, df2])
-
-        # Discard rows with no label
-        df = df[~df["Does this pixel contain active cropland?"].isna()].copy()
-        df[CLASS_PROB] = df["Does this pixel contain active cropland?"] == "Crop"
-        df[CLASS_PROB] = df[CLASS_PROB].astype(int)
-        df["num_labelers"] = 1
-        df = df.groupby([LON, LAT], as_index=False, sort=False).agg(
-            {
-                CLASS_PROB: "mean",
-                "num_labelers": "sum",
-                "plotid": join_unique,
-                "sampleid": join_unique,
-                "email": join_unique,
-            }
-        )
-        df[START], df[END] = date(2017, 1, 1), date(2018, 12, 31)
-        df[SUBSET] = train_val_test_split(df.index, 0.3, 0.3)
-        return df
-
-
 class Uganda_NorthCEO2021(LabeledDataset):
     def load_labels(self) -> pd.DataFrame:
         raw_folder = raw_dir / "Uganda_North_2021"
@@ -1570,7 +1538,6 @@ def load_labels(self) -> pd.DataFrame:
     TanzaniaCropArea2019(),
     FranceCropArea2020(),
     Uganda_NorthCEO2016(),
-    Uganda_NorthCEO2017(),
 ]
 
 if __name__ == "__main__":