Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add data north uganda 2016 #408

Merged
merged 13 commits into from
Aug 2, 2024
6 changes: 3 additions & 3 deletions data/datasets.dvc
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
outs:
- md5: a91622aa2324bcabbd664c59d3d6d6df.dir
size: 724558428
nfiles: 60
- md5: b073343b2883fe4537cbdb98bd447cc2.dir
size: 728780256
nfiles: 61
path: datasets
hash: md5
6 changes: 3 additions & 3 deletions data/raw.dvc
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
outs:
- md5: 2ef29247f3459c578895650ba50a129c.dir
size: 446045451
nfiles: 404
- md5: 7ba2a5b698999a9bfa635252b5090ff7.dir
size: 446381456
nfiles: 406
path: raw
hash: md5
10 changes: 10 additions & 0 deletions data/report.txt
Original file line number Diff line number Diff line change
Expand Up @@ -502,3 +502,13 @@ eo_data_complete 650
✔ training amount: 265, positive class: 22.3%
✔ validation amount: 184, positive class: 22.3%
✔ testing amount: 201, positive class: 26.4%



Uganda_NorthCEO2016 (Timesteps: 24)
----------------------------------------------------------------------------
eo_data_complete 773
eo_data_export_failed 227
✔ training amount: 293, positive class: 16.4%
✔ validation amount: 235, positive class: 18.3%
✔ testing amount: 245, positive class: 16.7%
33 changes: 33 additions & 0 deletions datasets.py
Original file line number Diff line number Diff line change
Expand Up @@ -476,6 +476,38 @@ def load_labels(self) -> pd.DataFrame:
return df


class Uganda_NorthCEO2016(LabeledDataset):
def load_labels(self) -> pd.DataFrame:
raw_folder = raw_dir / "Uganda_North_2016"
df1 = pd.read_csv(
raw_folder
/ "ceo-UNHCR-North-Uganda-Feb-2016---Feb-2017-(Set-1)-sample-data-2024-07-25.csv"
)
df2 = pd.read_csv(
raw_folder
/ "ceo-UNHCR-North-Uganda-Feb-2016---Feb-2017-(Set-2)-sample-data-2024-07-25.csv"
)
df = pd.concat([df1, df2])

# Discard rows with no label
df = df[~df["Does this pixel contain active cropland?"].isna()].copy()
df[CLASS_PROB] = df["Does this pixel contain active cropland?"] == "Crop"
df[CLASS_PROB] = df[CLASS_PROB].astype(int)
df["num_labelers"] = 1
df = df.groupby([LON, LAT], as_index=False, sort=False).agg(
{
CLASS_PROB: "mean",
"num_labelers": "sum",
"plotid": join_unique,
"sampleid": join_unique,
"email": join_unique,
}
)
df[START], df[END] = date(2016, 1, 1), date(2017, 12, 31)
df[SUBSET] = train_val_test_split(df.index, 0.3, 0.3)
return df


class Uganda_NorthCEO2021(LabeledDataset):
def load_labels(self) -> pd.DataFrame:
raw_folder = raw_dir / "Uganda_North_2021"
Expand Down Expand Up @@ -1505,6 +1537,7 @@ def load_labels(self) -> pd.DataFrame:
MalawiCropArea2020(),
TanzaniaCropArea2019(),
FranceCropArea2020(),
Uganda_NorthCEO2016(),
]

if __name__ == "__main__":
Expand Down
Loading