diff --git a/data/datasets.dvc b/data/datasets.dvc index 276fcda3..9bf7bcb0 100644 --- a/data/datasets.dvc +++ b/data/datasets.dvc @@ -1,6 +1,6 @@ outs: -- md5: d70a2e3e7b64b45737ab212c869b76e1.dir - size: 693418938 - nfiles: 54 +- md5: add693a42b38c020b895e8e7c855210d.dir + size: 709637820 + nfiles: 55 path: datasets hash: md5 diff --git a/data/raw.dvc b/data/raw.dvc index ca09426f..4df4834c 100644 --- a/data/raw.dvc +++ b/data/raw.dvc @@ -1,6 +1,6 @@ outs: -- md5: 03ecf7c484499852ddf368170fa7201e.dir - size: 444911361 - nfiles: 393 +- md5: 7b5c138e97f360400e6b3fc080d896e2.dir + size: 445019716 + nfiles: 394 path: raw hash: md5 diff --git a/data/report.txt b/data/report.txt index 08984ce3..25645569 100644 --- a/data/report.txt +++ b/data/report.txt @@ -450,3 +450,10 @@ eo_data_complete 1000 ✔ training amount: 387, positive class: 21.4% ✔ validation amount: 294, positive class: 15.3% ✔ testing amount: 319, positive class: 14.1% + + + +UgandaNorthCorLabel2022 (Timesteps: 24) +---------------------------------------------------------------------------- +eo_data_complete 2975 +✔ training amount: 2975, positive class: 51.6% diff --git a/datasets.py b/datasets.py index ed83fece..84dafa12 100644 --- a/datasets.py +++ b/datasets.py @@ -540,6 +540,18 @@ def load_labels(self) -> pd.DataFrame: return df +class UgandaNorthCorLabel2022(LabeledDataset): + def load_labels(self) -> pd.DataFrame: + raw_folder = raw_dir / "Uganda_North_2022_GEE_labels" + df = pd.read_csv(raw_folder / "UGA_2022_labels_from_GEE.csv") + df.rename(columns={"lat": LAT, "long": LON}, inplace=True) + df = df.drop_duplicates(subset=[LAT, LON]).reset_index(drop=True) + df[CLASS_PROB] = (df["class_probability"] == 1).astype(int) + df[START], df[END] = date(2022, 1, 1), date(2023, 12, 31) + df[SUBSET] = "training" + return df + + datasets: List[LabeledDataset] = [ CustomLabeledDataset( dataset="geowiki_landcover_2017", @@ -1327,6 +1339,7 @@ def load_labels(self) -> pd.DataFrame: Uganda_NorthCEO2022(), Uganda_NorthCEO2021(), UgandaNorthCEO2019(), + UgandaNorthCorLabel2022(), ] if __name__ == "__main__":