scaleoutsystems · sztoor · Jun 14, 2024 · Jun 14, 2024 · Jun 14, 2024 · Jun 14, 2024
diff --git a/examples/monai-2D-mednist/README.rst b/examples/monai-2D-mednist/README.rst
@@ -1,33 +1,34 @@
 FEDn Project: MonAI 2D Classification with the MedNIST Dataset (PyTorch)
 ------------------------------------------------------------------------
 
-This is an example FEDn Project based on the  MonAI 2D Classification with the MedNIST Dataset. 
+This is an example FEDn Project based on the  MonAI 2D Classification with the MedNIST Dataset.
 The example is intented as a minimalistic quickstart and automates the handling of training data
-by letting the client download and create its partition of the dataset as it starts up. 
+by letting the client download and create its partition of the dataset as it starts up.
+
+Links:
 
-Links: 
-
 -  MonAI: https://monai.io/
 -  Base example notebook: https://github.com/Project-MONAI/tutorials/blob/main/2d_classification/mednist_tutorial.ipynb
--  MedNIST dataset: https://github.com/Project-MONAI/MONAI-extra-test-data/releases/download/0.8.1/MedNIST.tar.gz 
+-  MedNIST dataset: https://github.com/Project-MONAI/MONAI-extra-test-data/releases/download/0.8.1/MedNIST.tar.gz
 
 Prerequisites
 -------------
 
 Using FEDn Studio:
 
 -  `Python 3.8, 3.9, 3.10 or 3.11 <https://www.python.org/downloads>`__
--  `A FEDn Studio account <https://fedn.scaleoutsystems.com/signup>`__   
+-  `A FEDn Studio account <https://fedn.scaleoutsystems.com/signup>`__
 
 If using pseudo-distributed mode with docker-compose:
 
 -  `Docker <https://docs.docker.com/get-docker>`__
 -  `Docker Compose <https://docs.docker.com/compose/install>`__
 
+
 Creating the compute package and seed model
 -------------------------------------------
 
-Install fedn: 
+Install fedn:
 
 .. code-block::
 
@@ -54,13 +55,31 @@ Next, generate a seed model (the first model in a global model trail):
 
    fedn run build --path client
 
-This will create a seed model called 'seed.npz' in the root of the project. This step will take a few minutes, depending on hardware and internet connection (builds a virtualenv).  
+This will create a seed model called 'seed.npz' in the root of the project. This step will take a few minutes, depending on hardware and internet connection (builds a virtualenv).
+
+Download and Prepare the data
+-------------------------------------------
+
+Install requirements:
+
+.. code-block::
+
+   pip install -r requirements.txt
+
+Download and divide the data into parts. Set the number of
+data parts as an arguments python prepare_data.py NR-OF-DATAPARTS. In the
+below command we divide the dataset into 10 parts.
+.. code-block::
+
+    python prepare_data.py 10
+
+
 
 Using FEDn Studio
 -----------------
 
 Follow the guide here to set up your FEDn Studio project and learn how to connect clients (using token authentication): `Studio guide <https://fedn.readthedocs.io/en/stable/studio.html>`__.
-On the step "Upload Files", upload 'package.tgz' and 'seed.npz' created above. 
+On the step "Upload Files", upload 'package.tgz' and 'seed.npz' created above.
 
 Connecting clients:
 ===================
@@ -70,23 +89,27 @@ Connecting clients:
 .. code-block::
 
    export FEDN_PACKAGE_EXTRACT_DIR=package
-   export FEDN_DATA_PATH=./data/
+   export FEDN_DATA_PATH=<full_path_to_the_dir>/data/
    export FEDN_CLIENT_SETTINGS_PATH=<full_path_to_the_dir>/client_settings.yaml
+   export FEDN_DATA_SPLIT_INDEX=0
+
    fedn client start -in client.yaml --secure=True --force-ssl
 
 Connecting clients using Docker:
 ================================
 
-For convenience, there is a Docker image hosted on ghrc.io with fedn preinstalled. To start a client using Docker: 
+For convenience, there is a Docker image hosted on ghrc.io with fedn preinstalled. To start a client using Docker:
 
 .. code-block::
 
    docker run \
      -v $PWD/client.yaml:/app/client.yaml \
      -v $PWD/client_settings.yaml:/app/client_settings.yaml \
+     -v $PWD/data:/app/data \
      -e FEDN_PACKAGE_EXTRACT_DIR=package \
-     -e FEDN_DATA_PATH=./data/ \
+     -e FEDN_DATA_PATH=/app/data/ \
      -e FEDN_CLIENT_SETTINGS_PATH=/app/client_settings.yaml \
+     -e FEDN_DATA_SPLIT_INDEX=0 \
      ghcr.io/scaleoutsystems/fedn/fedn:0.9.0 run client -in client.yaml --force-ssl --secure=True
 
 
@@ -107,8 +130,8 @@ Start a pseudo-distributed FEDn network using docker-compose:
     -f docker-compose.override.yaml \
     up
 
-This starts up local services for MongoDB, Minio, the API Server, one Combiner and two clients. 
-You can verify the deployment using these urls: 
+This starts up local services for MongoDB, Minio, the API Server, one Combiner and two clients.
+You can verify the deployment using these urls:
 
 - API Server: http://localhost:8092/get_controller_status
 - Minio: http://localhost:9000
@@ -123,18 +146,18 @@ Upload the package and seed model to FEDn controller using the APIClient. In Pyt
    client.set_active_package("package.tgz", helper="numpyhelper")
    client.set_active_model("seed.npz")
 
-You can now start a training session with 5 rounds (default): 
+You can now start a training session with 5 rounds (default):
 
 .. code-block::
 
    client.start_session()
 
-Automate experimentation with several clients  
+Automate experimentation with several clients
 =============================================
 
-If you want to scale the number of clients, you can do so by modifying ``docker-compose.override.yaml``. For example, 
-in order to run with 3 clients, change the environment variable ``FEDN_NUM_DATA_SPLITS`` to 3, and add one more client 
-by copying ``client1`` and setting ``FEDN_DATA_PATH`` to ``/app/package/data3/``
+If you want to scale the number of clients, you can do so by modifying ``docker-compose.override.yaml``. For example,
+in order to run with 3 clients, change the environment variable ``FEDN_NUM_DATA_SPLITS`` to 3, and add one more client
+by copying ``client1``.
 
 
 Access message logs and validation data from MongoDB  

diff --git a/examples/monai-2D-mednist/README_MonAI_Tutorial.rst b/examples/monai-2D-mednist/README_MonAI_Tutorial.rst
@@ -0,0 +1,13 @@
+
+Implementing 2D Classification Model with MedNIST Dataset Using FEDn  
+--------------------------------------------------------------------
+
+This example provides a step-by-step guide to deploying and running a 2D classification model using the MedNIST dataset in a federated environment with the `FEDn framework <https://www.scaleoutsystems.com/framework>`__ developed by `Scaleout Systems <https://www.scaleoutsystems.com/>`__ . This example builds on the centralized `example <https://github.com/Project-MONAI/tutorials/blob/main/2d_classification/mednist_tutorial.ipynb>`__  from the MonAI project and adapts it for federated learning settings, utilizing the same code for ease of transition. 
+
+The FEDn framework supports researchers with its robust  `open-source SDK <https://fedn.readthedocs.io/en/stable/quickstart.html>`__ and a `public SaaS platform <https://fedn.readthedocs.io/en/stable/studio.html>`__ , enabling scalable and efficient federated learning use cases. 
+
+Getting Started
+---------------
+
+For a step-by-step example guide, click `here <https://github.com/scaleoutsystems/fedn/tree/master/examples/monai-2D-mednist>`__ . 
+
diff --git a/examples/monai-2D-mednist/client/data.py b/examples/monai-2D-mednist/client/data.py
@@ -1,66 +1,15 @@
 import os
 import random
-
 import numpy as np
 import PIL
 import torch
-import yaml
-from monai.apps import download_and_extract
 
 dir_path = os.path.dirname(os.path.realpath(__file__))
 abs_path = os.path.abspath(dir_path)
 
 DATA_CLASSES = {"AbdomenCT": 0, "BreastMRI": 1, "CXR": 2, "ChestCT": 3, "Hand": 4, "HeadCT": 5}
 
 
-def split_data(data_path="data/MedNIST", splits=100, validation_split=0.9):
-    # create clients
-    clients = {"client " + str(i): {"train": [], "validation": []} for i in range(splits)}
-
-    for class_ in os.listdir(data_path):
-        if os.path.isdir(os.path.join(data_path, class_)):
-            patients_in_class = [os.path.join(class_, patient) for patient in os.listdir(os.path.join(data_path, class_))]
-            np.random.shuffle(patients_in_class)
-            chops = np.int32(np.linspace(0, len(patients_in_class), splits + 1))
-            for split in range(splits):
-                p = patients_in_class[chops[split] : chops[split + 1]]
-                valsplit = np.int32(len(p) * validation_split)
-
-                clients["client " + str(split)]["train"] += p[:valsplit]
-                clients["client " + str(split)]["validation"] += p[valsplit:]
-
-    with open(os.path.join(os.path.dirname(data_path), "data_splits.yaml"), "w") as file:
-        yaml.dump(clients, file, default_flow_style=False)
-
-
-def get_data(out_dir="data"):
-    """Get data from the external repository.
-
-    :param out_dir: Path to data directory. If doesn't
-    :type data_dir: str
-    """
-    # Make dir if necessary
-    if not os.path.exists(out_dir):
-        os.mkdir(out_dir)
-
-    resource = "https://github.com/Project-MONAI/MONAI-extra-test-data/releases/download/0.8.1/MedNIST.tar.gz"
-    md5 = "0bc7306e7427e00ad1c5526a6677552d"
-
-    compressed_file = os.path.join(out_dir, "MedNIST.tar.gz")
-
-    data_dir = os.path.abspath(out_dir)
-    print("data_dir:", data_dir)
-    if os.path.exists(data_dir):
-        print("path exist.")
-        if not os.path.exists(compressed_file):
-            print("compressed file does not exist, downloading and extracting data.")
-            download_and_extract(resource, compressed_file, data_dir, md5)
-        else:
-            print("files already exist.")
-
-    split_data()
-
-
 def get_classes(data_path):
     """Get a list of classes from the dataset
 
@@ -148,6 +97,5 @@ def __getitem__(self, index):
         return (self.transforms(os.path.join(self.data_path, self.image_files[index])), DATA_CLASSES[os.path.dirname(self.image_files[index])])
 
 
-if __name__ == "__main__":
-    # Prepare data if not already done
-    get_data()
+
+
diff --git a/examples/monai-2D-mednist/client/python_env.yaml b/examples/monai-2D-mednist/client/python_env.yaml
@@ -2,11 +2,11 @@ name: monai-2d-mdnist
 build_dependencies:
   - pip
   - setuptools
-  - wheel==0.37.1
+  - wheel
 dependencies:
   - torch==2.2.1
   - torchvision==0.17.1
-  - fedn==0.9.0
+  - fedn
   - monai-weekly[pillow, tqdm]
-  - scikit-learn
-  - tensorboard   
+  - numpy==1.26.4
+  - scikit-learn  
diff --git a/examples/monai-2D-mednist/client/train.py b/examples/monai-2D-mednist/client/train.py
@@ -22,7 +22,6 @@
 dir_path = os.path.dirname(os.path.realpath(__file__))
 sys.path.append(os.path.abspath(dir_path))
 
-
 train_transforms = Compose(
     [
         LoadImage(image_only=True),
@@ -54,18 +53,16 @@ def train(in_model_path, out_model_path, data_path=None, client_settings_path=No
     if client_settings_path is None:
         client_settings_path = os.environ.get("FEDN_CLIENT_SETTINGS_PATH", dir_path + "/client_settings.yaml")
 
-    print("client_settings_path: ", client_settings_path)
     with open(client_settings_path, "r") as fh:  # Used by CJG for local training
         try:
             client_settings = dict(yaml.safe_load(fh))
         except yaml.YAMLError:
             raise
 
-    print("client settings: ", client_settings)
     batch_size = client_settings["batch_size"]
     max_epochs = client_settings["local_epochs"]
     num_workers = client_settings["num_workers"]
-    split_index = client_settings["split_index"]
+    split_index = os.environ.get("FEDN_DATA_SPLIT_INDEX")  #client_settings["split_index"]
     lr = client_settings["lr"]
 
     if data_path is None:
@@ -76,8 +73,7 @@ def train(in_model_path, out_model_path, data_path=None, client_settings_path=No
 
     image_list = clients["client " + str(split_index)]["train"]
 
-    train_ds = MedNISTDataset(data_path="data/MedNIST", transforms=train_transforms, image_files=image_list)
-
+    train_ds = MedNISTDataset(data_path=data_path+'/MedNIST/', transforms=train_transforms, image_files=image_list)
     train_loader = DataLoader(train_ds, batch_size=batch_size, shuffle=True, num_workers=num_workers)
 
     # Load parmeters and initialize model

diff --git a/examples/monai-2D-mednist/client/validate.py b/examples/monai-2D-mednist/client/validate.py
@@ -45,7 +45,7 @@ def validate(in_model_path, out_json_path, data_path=None, client_settings_path=
 
     num_workers = client_settings["num_workers"]
     batch_size = client_settings["batch_size"]
-    split_index = client_settings["split_index"]
+    split_index = os.environ.get("FEDN_DATA_SPLIT_INDEX")  # client_settings["split_index"]
 
     if data_path is None:
         data_path = os.environ.get("FEDN_DATA_PATH")
@@ -55,7 +55,7 @@ def validate(in_model_path, out_json_path, data_path=None, client_settings_path=
 
     image_list = clients["client " + str(split_index)]["validation"]
 
-    val_ds = MedNISTDataset(data_path="data/MedNIST", transforms=val_transforms, image_files=image_list)
+    val_ds = MedNISTDataset(data_path=data_path+"/MedNIST/", transforms=val_transforms, image_files=image_list)
 
     val_loader = DataLoader(val_ds, batch_size=batch_size, shuffle=True, num_workers=num_workers)
 

diff --git a/examples/monai-2D-mednist/client_settings.yaml b/examples/monai-2D-mednist/client_settings.yaml
@@ -1,6 +1,5 @@
 lr: 0.01
-batch_size: 32
-local_epochs: 10
+batch_size: 8
+local_epochs: 1
 num_workers: 1
 sample_size: 30
-split_index: 4 
diff --git a/examples/monai-2D-mednist/docker-compose.override.yaml b/examples/monai-2D-mednist/docker-compose.override.yaml
@@ -15,22 +15,28 @@ services:
       service: client
     environment:
       <<: *defaults
-      FEDN_DATA_PATH: /app/package/client/data/MedNIST
-      FEDN_CLIENT_SETTINGS_PATH: /app/client_settings.yaml  
+      FEDN_DATA_PATH: /app/data/MedNIST
+      FEDN_CLIENT_SETTINGS_PATH: /app/client_settings.yaml
+      FEDN_DATA_SPLIT_INDEX: 0
     deploy:
       replicas: 1
     volumes:
       - ${HOST_REPO_DIR:-.}/fedn:/app/fedn
-      - ${HOST_REPO_DIR:-.}/examples/monai-2D-mednist/client_settings.yaml:/app/client_settings.yaml  
+      - ${HOST_REPO_DIR:-.}/examples/monai-2D-mednist/client_settings.yaml:/app/client_settings.yaml
+      - ${HOST_REPO_DIR:-.}/examples/monai-2D-mednist/data:/app/data
 
   client2:
     extends:
       file: ${HOST_REPO_DIR:-.}/docker-compose.yaml
       service: client
     environment:
       <<: *defaults
-      FEDN_DATA_PATH: /app/package/client/data/MedNIST
+      FEDN_DATA_PATH: /app/data/MedNIST
+      FEDN_CLIENT_SETTINGS_PATH: /app/client_settings.yaml
+      FEDN_DATA_SPLIT_INDEX: 1
     deploy:
       replicas: 1
     volumes:
       - ${HOST_REPO_DIR:-.}/fedn:/app/fedn
+      - ${HOST_REPO_DIR:-.}/examples/monai-2D-mednist/client_settings.yaml:/app/client_settings.yaml
+      - ${HOST_REPO_DIR:-.}/examples/monai-2D-mednist/data:/app/data