Skip to content

Commit

Permalink
merge master branch into this branch
Browse files Browse the repository at this point in the history
  • Loading branch information
hannah-rae committed Feb 26, 2024
2 parents 52933cf + 120282b commit f6e1600
Show file tree
Hide file tree
Showing 10 changed files with 2,550 additions and 129 deletions.
50 changes: 50 additions & 0 deletions data/all_dataset_params.json
Original file line number Diff line number Diff line change
Expand Up @@ -808,6 +808,56 @@
12
]
},
"geowiki_landcover_2017,Kenya,Mali,Mali_lower_CEO_2019,Mali_upper_CEO_2019,Togo,Rwanda,Uganda,open_buildings,digitalearthafrica_eastern,digitalearthafrica_sahel,Ethiopia,Ethiopia_Tigray_2020,Ethiopia_Tigray_2021,Ethiopia_Bure_Jimma_2019,Ethiopia_Bure_Jimma_2020,Malawi_CEO_2020,Malawi_CEO_2019,Malawi_FAO,Malawi_FAO_corrected,Zambia_CEO_2019,Tanzania_CEO_2019,Namibia_corrective_labels_2020,Malawi_corrected,Namibia_CEO_2020,Namibia_WFP,Sudan_Blue_Nile_CEO_2019,Hawaii_CEO_2020,Senegal_CEO_2022,HawaiiAgriculturalLandUse2020,KenyaCEO2019,HawaiiCorrective2020,HawaiiCorrectiveGuided2020,MalawiCorrectiveLabels2020,SudanBlueNileCEO2020,SudanBlueNileCorrectiveLabels2019,EthiopiaTigrayCorrective2020,SudanAlGadarefCEO2019,MaliStratifiedCEO2019,SudanAlGadarefCEO2020,NamibiaNorthStratified2020,Namibia_field_samples_22_23,SudanGedarefDarfurAlJazirah2022,Uganda_NorthCEO2022,Uganda_NorthCEO2021_February_2022": {
"normalizing_dict": {
"mean": [
-11.253425188439937,
-18.32938103852523,
1435.394781661943,
1401.2376604488686,
1424.1887305810615,
1659.930606706183,
2418.9610242192266,
2797.4266694196153,
2686.556230689381,
3059.6146535391536,
839.3203256303339,
2492.394136925002,
1656.5102430595289,
290.4116372888038,
0.004740448735646688,
862.6486426458467,
5.645431586296536,
0.3394156161911947
],
"std": [
4.148125981435058,
5.060516119375943,
994.0712142249523,
965.2564704509458,
1160.026751746973,
1108.4164346403447,
1050.1378976686253,
1116.0387648387118,
1073.380355703031,
1145.5432935794531,
654.3036779889275,
1165.4722835564185,
1054.8715358765717,
35.62925408430905,
0.014667963835620806,
646.2067949711964,
7.102936150840518,
0.22947527099163295
]
},
"train_num_timesteps": [
12
],
"val_num_timesteps": [
12
]
},
"geowiki_landcover_2017,Kenya,Mali,Mali_lower_CEO_2019,Mali_upper_CEO_2019,Togo,Rwanda,Uganda,open_buildings,digitalearthafrica_eastern,digitalearthafrica_sahel,Ethiopia,Ethiopia_Tigray_2020,Ethiopia_Tigray_2021,Ethiopia_Bure_Jimma_2019,Ethiopia_Bure_Jimma_2020,Malawi_CEO_2020,Malawi_CEO_2019,Malawi_FAO,Malawi_FAO_corrected,Zambia_CEO_2019,Tanzania_CEO_2019,Namibia_corrective_labels_2020,Malawi_corrected,Namibia_CEO_2020,Namibia_WFP,Sudan_Blue_Nile_CEO_2019,Hawaii_CEO_2020,Senegal_CEO_2022,HawaiiAgriculturalLandUse2020,KenyaCEO2019,HawaiiCorrective2020,HawaiiCorrectiveGuided2020,MalawiCorrectiveLabels2020,SudanBlueNileCEO2020,SudanBlueNileCorrectiveLabels2019,EthiopiaTigrayCorrective2020,SudanAlGadarefCEO2019,MaliStratifiedCEO2019,SudanAlGadarefCEO2020,NamibiaNorthStratified2020,Namibia_field_samples_22_23,SudanGedarefDarfurAlJazirah2022,Uganda_NorthCEO2022_February_2022": {
"normalizing_dict": {
"mean": [
Expand Down
6 changes: 3 additions & 3 deletions data/datasets.dvc
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
outs:
- md5: 63c6f22459e60941aa459671fbff891b.dir
size: 671350235
nfiles: 48
- md5: b7c381babff36cca1ef539c77e677de4.dir
size: 676810981
nfiles: 49
path: datasets
hash: md5
6 changes: 3 additions & 3 deletions data/models.dvc
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
outs:
- md5: 3c2840cad82955c5d02cbc3ef60757a0.dir
size: 82333214
nfiles: 66
- md5: e2f4f187db0f865868e57c4b69d1409c.dir
size: 83854781
nfiles: 68
path: models
hash: md5
17 changes: 17 additions & 0 deletions data/models.json
Original file line number Diff line number Diff line change
Expand Up @@ -356,6 +356,23 @@
"roc_auc_score": 0.8442
}
},
"Uganda_North_2021_V1": {
"params": "https://wandb.ai/nasa-harvest/crop-mask/runs/jecuc6f8",
"test_metrics": {
"accuracy": 0.8308,
"f1_score": 0.5545,
"precision_score": 0.6364,
"recall_score": 0.4912,
"roc_auc_score": 0.8323
},
"val_metrics": {
"accuracy": 0.8355,
"f1_score": 0.4865,
"precision_score": 0.5625,
"recall_score": 0.4286,
"roc_auc_score": 0.8153
}
},
"Uganda_North_2022_V1": {
"params": "https://wandb.ai/nasa-harvest/crop-mask/runs/v5f8m489",
"test_metrics": {
Expand Down
6 changes: 3 additions & 3 deletions data/raw.dvc
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
outs:
- md5: c033b32e0901a197d9f743c8329b24ae.dir
size: 443888649
nfiles: 387
- md5: 09700b46df6451c928d36156bd01507a.dir
size: 444212594
nfiles: 389
path: raw
hash: md5
9 changes: 9 additions & 0 deletions data/report.txt
Original file line number Diff line number Diff line change
Expand Up @@ -422,3 +422,12 @@ eo_data_complete 1000
✔ training amount: 387, positive class: 20.9%
✔ validation amount: 294, positive class: 23.1%
✔ testing amount: 319, positive class: 17.6%



Uganda_NorthCEO2021 (Timesteps: 24)
----------------------------------------------------------------------------
eo_data_complete 1000
✔ training amount: 387, positive class: 16.0%
✔ validation amount: 294, positive class: 14.3%
✔ testing amount: 319, positive class: 17.9%
33 changes: 33 additions & 0 deletions datasets.py
Original file line number Diff line number Diff line change
Expand Up @@ -448,6 +448,38 @@ def load_labels(self) -> pd.DataFrame:
return df


class Uganda_NorthCEO2021(LabeledDataset):
def load_labels(self) -> pd.DataFrame:
raw_folder = raw_dir / "Uganda_North_2021"
df1 = pd.read_csv(
raw_folder
/ "ceo-UNHCR-North-Uganda-Feb-2021---Feb-2022-(Set-1)-sample-data-2024-02-07.csv"
)
df2 = pd.read_csv(
raw_folder
/ "ceo-UNHCR-North-Uganda-Feb-2021---Feb-2022-(Set-2)-sample-data-2024-02-07.csv"
)
df = pd.concat([df1, df2])

# Discard rows with no label
df = df[~df["Does this pixel contain active cropland?"].isna()].copy()
df[CLASS_PROB] = df["Does this pixel contain active cropland?"] == "Crop"
df[CLASS_PROB] = df[CLASS_PROB].astype(int)
df["num_labelers"] = 1
df = df.groupby([LON, LAT], as_index=False, sort=False).agg(
{
CLASS_PROB: "mean",
"num_labelers": "sum",
"plotid": join_unique,
"sampleid": join_unique,
"email": join_unique,
}
)
df[START], df[END] = date(2021, 1, 1), date(2022, 12, 31)
df[SUBSET] = train_val_test_split(df.index, 0.3, 0.3)
return df


datasets: List[LabeledDataset] = [
CustomLabeledDataset(
dataset="geowiki_landcover_2017",
Expand Down Expand Up @@ -1232,6 +1264,7 @@ def load_labels(self) -> pd.DataFrame:
Namibia_field_samples_22_23(),
SudanGedarefDarfurAlJazirah2022(),
Uganda_NorthCEO2022(),
Uganda_NorthCEO2021(),
]

if __name__ == "__main__":
Expand Down
Loading

0 comments on commit f6e1600

Please sign in to comment.