From 205881f536eff68ba7baffb720fe33c7e9962cfd Mon Sep 17 00:00:00 2001 From: yashgadhiya10 Date: Thu, 29 Aug 2024 22:36:50 +0530 Subject: [PATCH 1/5] Created new Dataset North-Uganda-2017 --- data/raw.dvc | 6 +++--- datasets.py | 32 ++++++++++++++++++++++++++++++++ 2 files changed, 35 insertions(+), 3 deletions(-) diff --git a/data/raw.dvc b/data/raw.dvc index bc5cdb43..f13f5629 100644 --- a/data/raw.dvc +++ b/data/raw.dvc @@ -1,6 +1,6 @@ outs: -- md5: 7ba2a5b698999a9bfa635252b5090ff7.dir - size: 446381456 - nfiles: 406 +- md5: 0916e33f6eef6c80a87e319427005f5e.dir + size: 446720790 + nfiles: 408 path: raw hash: md5 diff --git a/datasets.py b/datasets.py index 7e690524..920cbc46 100644 --- a/datasets.py +++ b/datasets.py @@ -506,6 +506,37 @@ def load_labels(self) -> pd.DataFrame: df[START], df[END] = date(2016, 1, 1), date(2017, 12, 31) df[SUBSET] = train_val_test_split(df.index, 0.3, 0.3) return df + +class Uganda_NorthCEO2017(LabeledDataset): + def load_labels(self) -> pd.DataFrame: + raw_folder = raw_dir / "Uganda_North_2017" + df1 = pd.read_csv( + raw_folder + / "ceo-UNHCR-North-Uganda-Feb-2017---Feb-2018-(Set-1)-sample-data-2024-08-29.csv" + ) + df2 = pd.read_csv( + raw_folder + / "ceo-UNHCR-North-Uganda-Feb-2017---Feb-2018-(Set-2)-sample-data-2024-08-29.csv" + ) + df = pd.concat([df1, df2]) + + # Discard rows with no label + df = df[~df["Does this pixel contain active cropland?"].isna()].copy() + df[CLASS_PROB] = df["Does this pixel contain active cropland?"] == "Crop" + df[CLASS_PROB] = df[CLASS_PROB].astype(int) + df["num_labelers"] = 1 + df = df.groupby([LON, LAT], as_index=False, sort=False).agg( + { + CLASS_PROB: "mean", + "num_labelers": "sum", + "plotid": join_unique, + "sampleid": join_unique, + "email": join_unique, + } + ) + df[START], df[END] = date(2017, 1, 1), date(2018, 12, 31) + df[SUBSET] = train_val_test_split(df.index, 0.3, 0.3) + return df class Uganda_NorthCEO2021(LabeledDataset): @@ -1538,6 +1569,7 @@ def load_labels(self) -> pd.DataFrame: TanzaniaCropArea2019(), FranceCropArea2020(), Uganda_NorthCEO2016(), + Uganda_NorthCEO2017(), ] if __name__ == "__main__": From dae8e3f75c6f6f73ce75b9f6553fbdeb1a09cf37 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Thu, 29 Aug 2024 17:16:49 +0000 Subject: [PATCH 2/5] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- datasets.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/datasets.py b/datasets.py index 920cbc46..e6d5282e 100644 --- a/datasets.py +++ b/datasets.py @@ -506,7 +506,8 @@ def load_labels(self) -> pd.DataFrame: df[START], df[END] = date(2016, 1, 1), date(2017, 12, 31) df[SUBSET] = train_val_test_split(df.index, 0.3, 0.3) return df - + + class Uganda_NorthCEO2017(LabeledDataset): def load_labels(self) -> pd.DataFrame: raw_folder = raw_dir / "Uganda_North_2017" From 7f401ed41c4ec59a54d8495d5cbf9a9213109e9f Mon Sep 17 00:00:00 2001 From: Dataset bot Date: Thu, 29 Aug 2024 17:34:27 +0000 Subject: [PATCH 3/5] Automated dataset updates --- data/datasets.dvc | 6 +++--- data/report.txt | 9 +++++++++ 2 files changed, 12 insertions(+), 3 deletions(-) diff --git a/data/datasets.dvc b/data/datasets.dvc index 66359ef4..cc275ce1 100644 --- a/data/datasets.dvc +++ b/data/datasets.dvc @@ -1,6 +1,6 @@ outs: -- md5: b073343b2883fe4537cbdb98bd447cc2.dir - size: 728780256 - nfiles: 61 +- md5: 2f40325140d64ae6eb20c7b2f2611060.dir + size: 728914229 + nfiles: 62 path: datasets hash: md5 diff --git a/data/report.txt b/data/report.txt index dcb2ab64..a2629b2f 100644 --- a/data/report.txt +++ b/data/report.txt @@ -512,3 +512,12 @@ eo_data_export_failed 227 ✔ training amount: 293, positive class: 16.4% ✔ validation amount: 235, positive class: 18.3% ✔ testing amount: 245, positive class: 16.7% + + + +Uganda_NorthCEO2017 (Timesteps: 24) +---------------------------------------------------------------------------- +eo_data_exporting 1000 +✖ training: 387 labels, but 0 features +✖ validation: 294 labels, but 0 features +✖ testing: 319 labels, but 0 features From 518f5e9a258a648d0d795176d84752da8ca132f0 Mon Sep 17 00:00:00 2001 From: Dataset bot Date: Fri, 30 Aug 2024 04:29:56 +0000 Subject: [PATCH 4/5] Automated dataset updates --- data/datasets.dvc | 4 ++-- data/report.txt | 8 ++++---- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/data/datasets.dvc b/data/datasets.dvc index cc275ce1..6d5b7485 100644 --- a/data/datasets.dvc +++ b/data/datasets.dvc @@ -1,6 +1,6 @@ outs: -- md5: 2f40325140d64ae6eb20c7b2f2611060.dir - size: 728914229 +- md5: b45a782128aebf6786df801a75fbd46d.dir + size: 734199009 nfiles: 62 path: datasets hash: md5 diff --git a/data/report.txt b/data/report.txt index a2629b2f..46f3000b 100644 --- a/data/report.txt +++ b/data/report.txt @@ -517,7 +517,7 @@ eo_data_export_failed 227 Uganda_NorthCEO2017 (Timesteps: 24) ---------------------------------------------------------------------------- -eo_data_exporting 1000 -✖ training: 387 labels, but 0 features -✖ validation: 294 labels, but 0 features -✖ testing: 319 labels, but 0 features +eo_data_complete 1000 +✔ training amount: 387, positive class: 1.3% +✔ validation amount: 294, positive class: 1.0% +✔ testing amount: 319, positive class: 1.3% From 89889754fbffd7e5265f84b545b5f441e34b3e95 Mon Sep 17 00:00:00 2001 From: yashgadhiya10 Date: Mon, 9 Sep 2024 17:04:22 -0400 Subject: [PATCH 5/5] Trigger Build