Skip to content

Commit

Permalink
Merge pull request #399 from nasaharvest/Add-data-UGA_2022_GEE
Browse files Browse the repository at this point in the history
Created new dataset
  • Loading branch information
adebowaledaniel authored Jun 11, 2024
2 parents 716def3 + 589e938 commit f1d15ab
Show file tree
Hide file tree
Showing 4 changed files with 26 additions and 6 deletions.
6 changes: 3 additions & 3 deletions data/datasets.dvc
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
outs:
- md5: d70a2e3e7b64b45737ab212c869b76e1.dir
size: 693418938
nfiles: 54
- md5: add693a42b38c020b895e8e7c855210d.dir
size: 709637820
nfiles: 55
path: datasets
hash: md5
6 changes: 3 additions & 3 deletions data/raw.dvc
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
outs:
- md5: 03ecf7c484499852ddf368170fa7201e.dir
size: 444911361
nfiles: 393
- md5: 7b5c138e97f360400e6b3fc080d896e2.dir
size: 445019716
nfiles: 394
path: raw
hash: md5
7 changes: 7 additions & 0 deletions data/report.txt
Original file line number Diff line number Diff line change
Expand Up @@ -450,3 +450,10 @@ eo_data_complete 1000
✔ training amount: 387, positive class: 21.4%
✔ validation amount: 294, positive class: 15.3%
✔ testing amount: 319, positive class: 14.1%



UgandaNorthCorLabel2022 (Timesteps: 24)
----------------------------------------------------------------------------
eo_data_complete 2975
✔ training amount: 2975, positive class: 51.6%
13 changes: 13 additions & 0 deletions datasets.py
Original file line number Diff line number Diff line change
Expand Up @@ -540,6 +540,18 @@ def load_labels(self) -> pd.DataFrame:
return df


class UgandaNorthCorLabel2022(LabeledDataset):
def load_labels(self) -> pd.DataFrame:
raw_folder = raw_dir / "Uganda_North_2022_GEE_labels"
df = pd.read_csv(raw_folder / "UGA_2022_labels_from_GEE.csv")
df.rename(columns={"lat": LAT, "long": LON}, inplace=True)
df = df.drop_duplicates(subset=[LAT, LON]).reset_index(drop=True)
df[CLASS_PROB] = (df["class_probability"] == 1).astype(int)
df[START], df[END] = date(2022, 1, 1), date(2023, 12, 31)
df[SUBSET] = "training"
return df


datasets: List[LabeledDataset] = [
CustomLabeledDataset(
dataset="geowiki_landcover_2017",
Expand Down Expand Up @@ -1327,6 +1339,7 @@ def load_labels(self) -> pd.DataFrame:
Uganda_NorthCEO2022(),
Uganda_NorthCEO2021(),
UgandaNorthCEO2019(),
UgandaNorthCorLabel2022(),
]

if __name__ == "__main__":
Expand Down

0 comments on commit f1d15ab

Please sign in to comment.