nasaharvest · yashgadhiya10 · Aug 2, 2024 · Jul 26, 2024 · Jul 26, 2024 · Jul 26, 2024
diff --git a/data/datasets.dvc b/data/datasets.dvc
@@ -1,6 +1,6 @@
 outs:
-- md5: a91622aa2324bcabbd664c59d3d6d6df.dir
-  size: 724558428
-  nfiles: 60
+- md5: b073343b2883fe4537cbdb98bd447cc2.dir
+  size: 728780256
+  nfiles: 61
   path: datasets
   hash: md5
diff --git a/data/raw.dvc b/data/raw.dvc
@@ -1,6 +1,6 @@
 outs:
-- md5: 2ef29247f3459c578895650ba50a129c.dir
-  size: 446045451
-  nfiles: 404
+- md5: 7ba2a5b698999a9bfa635252b5090ff7.dir
+  size: 446381456
+  nfiles: 406
   path: raw
   hash: md5
diff --git a/data/report.txt b/data/report.txt
@@ -502,3 +502,13 @@ eo_data_complete    650
 ✔ training amount: 265, positive class: 22.3%
 ✔ validation amount: 184, positive class: 22.3%
 ✔ testing amount: 201, positive class: 26.4%
+
+
+
+Uganda_NorthCEO2016 (Timesteps: 24)
+----------------------------------------------------------------------------
+eo_data_complete         773
+eo_data_export_failed    227
+✔ training amount: 293, positive class: 16.4%
+✔ validation amount: 235, positive class: 18.3%
+✔ testing amount: 245, positive class: 16.7%
diff --git a/datasets.py b/datasets.py
@@ -476,6 +476,38 @@ def load_labels(self) -> pd.DataFrame:
         return df
 
 
+class Uganda_NorthCEO2016(LabeledDataset):
+    def load_labels(self) -> pd.DataFrame:
+        raw_folder = raw_dir / "Uganda_North_2016"
+        df1 = pd.read_csv(
+            raw_folder
+            / "ceo-UNHCR-North-Uganda-Feb-2016---Feb-2017-(Set-1)-sample-data-2024-07-25.csv"
+        )
+        df2 = pd.read_csv(
+            raw_folder
+            / "ceo-UNHCR-North-Uganda-Feb-2016---Feb-2017-(Set-2)-sample-data-2024-07-25.csv"
+        )
+        df = pd.concat([df1, df2])
+
+        # Discard rows with no label
+        df = df[~df["Does this pixel contain active cropland?"].isna()].copy()
+        df[CLASS_PROB] = df["Does this pixel contain active cropland?"] == "Crop"
+        df[CLASS_PROB] = df[CLASS_PROB].astype(int)
+        df["num_labelers"] = 1
+        df = df.groupby([LON, LAT], as_index=False, sort=False).agg(
+            {
+                CLASS_PROB: "mean",
+                "num_labelers": "sum",
+                "plotid": join_unique,
+                "sampleid": join_unique,
+                "email": join_unique,
+            }
+        )
+        df[START], df[END] = date(2016, 1, 1), date(2017, 12, 31)
+        df[SUBSET] = train_val_test_split(df.index, 0.3, 0.3)
+        return df
+
+
 class Uganda_NorthCEO2021(LabeledDataset):
     def load_labels(self) -> pd.DataFrame:
         raw_folder = raw_dir / "Uganda_North_2021"
@@ -1505,6 +1537,7 @@ def load_labels(self) -> pd.DataFrame:
     MalawiCropArea2020(),
     TanzaniaCropArea2019(),
     FranceCropArea2020(),
+    Uganda_NorthCEO2016(),
 ]
 
 if __name__ == "__main__":