Skip to content

Commit

Permalink
Merge pull request #412 from nasaharvest/Add-data-North-Uganda-2017
Browse files Browse the repository at this point in the history
Created new Dataset North-Uganda-2017
  • Loading branch information
yashgadhiya10 authored Sep 9, 2024
2 parents 9155e63 + d9471b6 commit 2172cd3
Show file tree
Hide file tree
Showing 4 changed files with 48 additions and 6 deletions.
6 changes: 3 additions & 3 deletions data/datasets.dvc
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
outs:
- md5: b073343b2883fe4537cbdb98bd447cc2.dir
size: 728780256
nfiles: 61
- md5: b45a782128aebf6786df801a75fbd46d.dir
size: 734199009
nfiles: 62
path: datasets
hash: md5
6 changes: 3 additions & 3 deletions data/raw.dvc
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
outs:
- md5: 7ba2a5b698999a9bfa635252b5090ff7.dir
size: 446381456
nfiles: 406
- md5: 0916e33f6eef6c80a87e319427005f5e.dir
size: 446720790
nfiles: 408
path: raw
hash: md5
9 changes: 9 additions & 0 deletions data/report.txt
Original file line number Diff line number Diff line change
Expand Up @@ -512,3 +512,12 @@ eo_data_export_failed 227
✔ training amount: 293, positive class: 16.4%
✔ validation amount: 235, positive class: 18.3%
✔ testing amount: 245, positive class: 16.7%



Uganda_NorthCEO2017 (Timesteps: 24)
----------------------------------------------------------------------------
eo_data_complete 1000
✔ training amount: 387, positive class: 1.3%
✔ validation amount: 294, positive class: 1.0%
✔ testing amount: 319, positive class: 1.3%
33 changes: 33 additions & 0 deletions datasets.py
Original file line number Diff line number Diff line change
Expand Up @@ -508,6 +508,38 @@ def load_labels(self) -> pd.DataFrame:
return df


class Uganda_NorthCEO2017(LabeledDataset):
def load_labels(self) -> pd.DataFrame:
raw_folder = raw_dir / "Uganda_North_2017"
df1 = pd.read_csv(
raw_folder
/ "ceo-UNHCR-North-Uganda-Feb-2017---Feb-2018-(Set-1)-sample-data-2024-08-29.csv"
)
df2 = pd.read_csv(
raw_folder
/ "ceo-UNHCR-North-Uganda-Feb-2017---Feb-2018-(Set-2)-sample-data-2024-08-29.csv"
)
df = pd.concat([df1, df2])

# Discard rows with no label
df = df[~df["Does this pixel contain active cropland?"].isna()].copy()
df[CLASS_PROB] = df["Does this pixel contain active cropland?"] == "Crop"
df[CLASS_PROB] = df[CLASS_PROB].astype(int)
df["num_labelers"] = 1
df = df.groupby([LON, LAT], as_index=False, sort=False).agg(
{
CLASS_PROB: "mean",
"num_labelers": "sum",
"plotid": join_unique,
"sampleid": join_unique,
"email": join_unique,
}
)
df[START], df[END] = date(2017, 1, 1), date(2018, 12, 31)
df[SUBSET] = train_val_test_split(df.index, 0.3, 0.3)
return df


class Uganda_NorthCEO2021(LabeledDataset):
def load_labels(self) -> pd.DataFrame:
raw_folder = raw_dir / "Uganda_North_2021"
Expand Down Expand Up @@ -1538,6 +1570,7 @@ def load_labels(self) -> pd.DataFrame:
TanzaniaCropArea2019(),
FranceCropArea2020(),
Uganda_NorthCEO2016(),
Uganda_NorthCEO2017(),
]

if __name__ == "__main__":
Expand Down

0 comments on commit 2172cd3

Please sign in to comment.