adap · jafermarq · Jan 18, 2024 · Jan 18, 2024 · Jan 18, 2024 · Jan 18, 2024
@@ -1,6 +1,7 @@
 # Flower Example using Pandas
 
-This introductory example to Flower uses Pandas, but deep knowledge of Pandas is not necessarily required to run the example. However, it will help you understand how to adapt Flower to your use case.
+This introductory example to Flower uses Pandas, but deep knowledge of Pandas is not necessarily required to run the example. However, it will help you understand how to adapt Flower to your use case. This example uses [Flower Datasets](https://flower.dev/docs/datasets/) to
+download, partition and preprocess the dataset.
 Running this example in itself is quite easy.
 
 ## Project Setup
@@ -69,13 +70,13 @@ Now you are ready to start the Flower clients which will participate in the lear
 Start client 1 in the first terminal:
 
 ```shell
-$ python3 client.py
+$ python3 client.py --node-id 0
 ```
 
 Start client 2 in the second terminal:
 
 ```shell
-$ python3 client.py
+$ python3 client.py --node-id 1
 ```
 
 You will see that the server is printing aggregated statistics about the dataset distributed amongst clients. Have a look to the [Flower Quickstarter documentation](https://flower.dev/docs/quickstart-pandas.html) for a detailed explanation.
@@ -1,15 +1,15 @@
-import warnings
+import argparse
 from typing import Dict, List, Tuple
 
 import numpy as np
 import pandas as pd
 
 import flwr as fl
 
+from flwr_datasets import FederatedDataset
 
-df = pd.read_csv("./data/client.csv")
 
-column_names = ["sepal length (cm)", "sepal width (cm)"]
+column_names = ["sepal_length", "sepal_width"]
 
 
 def compute_hist(df: pd.DataFrame, col_name: str) -> np.ndarray:
@@ -19,23 +19,47 @@ def compute_hist(df: pd.DataFrame, col_name: str) -> np.ndarray:
 
 # Define Flower client
 class FlowerClient(fl.client.NumPyClient):
+    def __init__(self, X: pd.DataFrame):
+        self.X = X
+
     def fit(
         self, parameters: List[np.ndarray], config: Dict[str, str]
     ) -> Tuple[List[np.ndarray], int, Dict]:
         hist_list = []
         # Execute query locally
-        for c in column_names:
-            hist = compute_hist(df, c)
+        for c in self.X.columns:
+            hist = compute_hist(self.X, c)
             hist_list.append(hist)
         return (
             hist_list,
-            len(df),
+            len(self.X),
             {},
         )
 
 
-# Start Flower client
-fl.client.start_numpy_client(
-    server_address="127.0.0.1:8080",
-    client=FlowerClient(),
-)
+if __name__ == "__main__":
+    N_CLIENTS = 2
+
+    parser = argparse.ArgumentParser(description="Flower")
+    parser.add_argument(
+        "--node-id",
+        type=int,
+        choices=range(0, N_CLIENTS),
+        required=True,
+        help="Specifies the node id of artificially partitioned datasets.",
+    )
+    args = parser.parse_args()
+    partition_id = args.node_id
+
+    # Load the partition data
+    fds = FederatedDataset(dataset="hitorilabs/iris", partitioners={"train": N_CLIENTS})
+
+    dataset = fds.load_partition(partition_id, "train").with_format("pandas")[:]
+    # Use just the specified columns
+    X = dataset[column_names]
+
+    # Start Flower client
+    fl.client.start_numpy_client(
+        server_address="127.0.0.1:8080",
+        client=FlowerClient(X),
+    )
@@ -12,6 +12,6 @@ maintainers = ["The Flower Authors <hello@flower.dev>"]
 [tool.poetry.dependencies]
 python = ">=3.8,<3.11"
 flwr = ">=1.0,<2.0"
+flwr-datasets = { extras = ["vision"], version = ">=0.0.2,<1.0.0" }
 numpy = "1.23.2"
 pandas = "2.0.0"
-scikit-learn = "1.3.1"
@@ -1,4 +1,4 @@
 flwr>=1.0, <2.0
+flwr-datasets[vision]>=0.0.2, <1.0.0
 numpy==1.23.2
 pandas==2.0.0
-scikit-learn==1.3.1
@@ -2,13 +2,9 @@ echo "Starting server"
 python server.py &
 sleep 3  # Sleep for 3s to give the server enough time to start
 
-# Download data
-mkdir -p ./data
-python -c "from sklearn.datasets import load_iris; load_iris(as_frame=True)['data'].to_csv('./data/client.csv')"
-
 for i in `seq 0 1`; do
     echo "Starting client $i"
-    python client.py &
+    python client.py --node-id ${i} &
 done
 
 # This will allow you to use CTRL+C to stop all background processes

@@ -1,5 +1,4 @@
-import pickle
-from typing import Callable, Dict, List, Optional, Tuple, Union
+from typing import Dict, List, Optional, Tuple, Union
 
 import numpy as np
 
@@ -9,9 +8,6 @@
     EvaluateRes,
     FitIns,
     FitRes,
-    Metrics,
-    MetricsAggregationFn,
-    NDArrays,
     Parameters,
     Scalar,
     ndarrays_to_parameters,
@@ -23,11 +19,6 @@
 
 
 class FedAnalytics(Strategy):
-    def __init__(
-        self, compute_fns: List[Callable] = None, col_names: List[str] = None
-    ) -> None:
-        super().__init__()
-
     def initialize_parameters(
         self, client_manager: Optional[ClientManager] = None
     ) -> Optional[Parameters]: