Skip to content

Commit

Permalink
Merge branch 'master' into Uganda_North_2022_V3
Browse files Browse the repository at this point in the history
  • Loading branch information
adebowaledaniel authored Jun 12, 2024
2 parents 67a8b0a + b2fa1f7 commit e353150
Show file tree
Hide file tree
Showing 4 changed files with 90 additions and 6 deletions.
6 changes: 3 additions & 3 deletions data/datasets.dvc
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
outs:
- md5: add693a42b38c020b895e8e7c855210d.dir
size: 709637820
nfiles: 55
- md5: c02aab5a545456ddcdcef0a64f8e2813.dir
size: 715450376
nfiles: 57
path: datasets
hash: md5
6 changes: 3 additions & 3 deletions data/raw.dvc
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
outs:
- md5: 7b5c138e97f360400e6b3fc080d896e2.dir
size: 445019716
nfiles: 394
- md5: 2e67f02eb223f5b41aac4686136977ab.dir
size: 445415281
nfiles: 398
path: raw
hash: md5
18 changes: 18 additions & 0 deletions data/report.txt
Original file line number Diff line number Diff line change
Expand Up @@ -457,3 +457,21 @@ UgandaNorthCorLabel2022 (Timesteps: 24)
----------------------------------------------------------------------------
eo_data_complete 2975
✔ training amount: 2975, positive class: 51.6%



KenyaCropArea2019 (Timesteps: 24)
----------------------------------------------------------------------------
eo_data_complete 544
✔ training amount: 232, positive class: 23.7%
✔ validation amount: 149, positive class: 18.1%
✔ testing amount: 163, positive class: 20.2%



RwandaCropArea2019 (Timesteps: 24)
----------------------------------------------------------------------------
eo_data_complete 525
✔ training amount: 222, positive class: 38.3%
✔ validation amount: 145, positive class: 31.0%
✔ testing amount: 158, positive class: 34.8%
66 changes: 66 additions & 0 deletions datasets.py
Original file line number Diff line number Diff line change
Expand Up @@ -552,6 +552,70 @@ def load_labels(self) -> pd.DataFrame:
return df


class KenyaCropArea2019(LabeledDataset):
def load_labels(self) -> pd.DataFrame:
raw_folder = raw_dir / "Kenya_Crop_Area_2019"
df1 = pd.read_csv(
raw_folder / "ceo-Kenya-Crop-Area-Estimation-Reference-Sample-2019---Set-1"
"-sample-data-2024-06-10.csv"
)
df2 = pd.read_csv(
raw_folder / "ceo-Kenya-Crop-Area-Estimation-Reference-Sample-2019---Set-2"
"-sample-data-2024-06-10.csv"
)
df = pd.concat([df1, df2])

# Discard rows with no label
df = df[~df["Does this point correspond to active cropland?"].isna()].copy()
df[CLASS_PROB] = df["Does this point correspond to active cropland?"] == "Crop"
df[CLASS_PROB] = df[CLASS_PROB].astype(int)
df["num_labelers"] = 1
df = df.groupby([LON, LAT], as_index=False, sort=False).agg(
{
CLASS_PROB: "mean",
"num_labelers": "sum",
"plotid": join_unique,
"sampleid": join_unique,
"email": join_unique,
}
)
df[START], df[END] = date(2019, 1, 1), date(2020, 12, 31)
df[SUBSET] = train_val_test_split(df.index, 0.3, 0.3)
return df


class RwandaCropArea2019(LabeledDataset):
def load_labels(self) -> pd.DataFrame:
raw_folder = raw_dir / "Rwanda-Crop-Area-2019"
df1 = pd.read_csv(
raw_folder / "ceo-Rwanda-Cropland-Area-Estimation-Reference-Sample-2019---Set-1"
"-sample-data-2024-06-10.csv"
)
df2 = pd.read_csv(
raw_folder / "ceo-Rwanda-Cropland-Area-Estimation-Reference-Sample-2019---Set-2"
"-sample-data-2024-06-10.csv"
)
df = pd.concat([df1, df2])

# Discard rows with no label
df = df[~df["Does this point correspond to active cropland?"].isna()].copy()
df[CLASS_PROB] = df["Does this point correspond to active cropland?"] == "Crop"
df[CLASS_PROB] = df[CLASS_PROB].astype(int)
df["num_labelers"] = 1
df = df.groupby([LON, LAT], as_index=False, sort=False).agg(
{
CLASS_PROB: "mean",
"num_labelers": "sum",
"plotid": join_unique,
"sampleid": join_unique,
"email": join_unique,
}
)
df[START], df[END] = date(2019, 1, 1), date(2020, 12, 31)
df[SUBSET] = train_val_test_split(df.index, 0.3, 0.3)
return df


datasets: List[LabeledDataset] = [
CustomLabeledDataset(
dataset="geowiki_landcover_2017",
Expand Down Expand Up @@ -1340,6 +1404,8 @@ def load_labels(self) -> pd.DataFrame:
Uganda_NorthCEO2021(),
UgandaNorthCEO2019(),
UgandaNorthCorLabel2022(),
KenyaCropArea2019(),
RwandaCropArea2019(),
]

if __name__ == "__main__":
Expand Down

0 comments on commit e353150

Please sign in to comment.