Changes to ml decorator tests

ORNL · Dec 17, 2024 · ed2794e · ed2794e
1 parent 1f7db22
commit ed2794e
Show file tree

Hide file tree

Showing 16 changed files with 102 additions and 664 deletions.
diff --git a/.github/workflows/create-release-n-publish.yml b/.github/workflows/create-release-n-publish.yml
@@ -121,7 +121,7 @@ jobs:
         run: make services
 
       - name: Test with pytest
-        run: pytest --ignore=tests/decorator_tests/ml_tests/llm_tests
+        run: pytest
 
       - name: Test notebooks
         run: |

diff --git a/.github/workflows/run-tests-kafka.yml b/.github/workflows/run-tests-kafka.yml
@@ -58,7 +58,7 @@ jobs:
           make tests
 
       - name: Test notebooks
-        run: pytest --ignore=notebooks/zambeze.ipynb --nbmake "notebooks/" --nbmake-timeout=600 --ignore=notebooks/dask_from_CLI.ipynb
+        run: pytest --nbmake "notebooks/" --nbmake-timeout=600 --ignore=notebooks/dask_from_CLI.ipynb
 
       - name: Stop services
         run: docker compose -f deployment/compose-kafka.yml down

diff --git a/.github/workflows/run-tests-py11.yml b/.github/workflows/run-tests-py11.yml
@@ -78,7 +78,7 @@ jobs:
           export MQ_TYPE=kafka
           export MQ_PORT=9092
           # Ignoring heavy tests. They are executed with Kafka in another GH Action.
-          pytest --ignore=tests/decorator_tests/ml_tests --ignore=tests/adapters/test_tensorboard.py
+          pytest
 
       - name: Stop services
         run: docker compose -f deployment/compose-kafka.yml down

diff --git a/.github/workflows/run_examples.sh b/.github/workflows/run_examples.sh
@@ -4,6 +4,24 @@
 set -e
 set -o pipefail
 
+# Display usage/help message
+usage() {
+  echo -e "\nUsage: $0 <examples_dir> <with_mongo>\n"
+  echo "Arguments:"
+  echo "  examples_dir   Path to the examples directory (Mandatory)"
+  echo "  with_mongo     Boolean flag (true/false) indicating whether to include MongoDB support (Mandatory)"
+  echo -e "\nExample:"
+  echo "  $0 examples true"
+  echo "  $0 examples false"
+  exit 1
+}
+
+# Check if the required arguments are provided
+if [[ -z "$1" || -z "$2" ]]; then
+  echo "Error: Missing mandatory arguments!"
+  usage
+fi
+
 # Function to run tests with common steps
 run_test() {
   test_path="${EXAMPLES_DIR}/${1}_example.py"
@@ -29,6 +47,10 @@ run_test() {
   elif [[ "$test_type" =~ "tensorboard" ]]; then
     echo "Installing tensorboard"
     pip install .[tensorboard] > /dev/null 2>&1
+  elif [[ "$test_type" =~ "llm_complex" ]]; then
+    echo "Defining python path for llm_complex..."
+    export PYTHONPATH=$PYTHONPATH:${EXAMPLES_DIR}/llm_complex
+    echo $PYTHONPATH
   fi
 
   # Run the test and capture output
@@ -51,7 +73,7 @@ echo "Using examples directory: $EXAMPLES_DIR"
 echo "With Mongo? ${WITH_MONGO}"
 
 # Define the test cases
-tests=("instrumented_simple" "instrumented_loop" "dask" "mlflow" "tensorboard")
+tests=("instrumented_simple" "instrumented_loop" "dask" "mlflow" "tensorboard" "llm_complex/llm_search")
 
 # Iterate over the tests and run them
 for test_ in "${tests[@]}"; do

diff --git a/.gitignore b/.gitignore
@@ -20,4 +20,5 @@ test.py
 time.txt
 tmp/
 deployment/data
-examples/llm_complex/output_data
+**/*output_data*
+
diff --git a/Makefile b/Makefile
@@ -11,6 +11,7 @@ help:
 	@printf "\033[32mtests\033[0m                     run unit tests with pytest\n"
 	@printf "\033[32mtests-in-container\033[0m        run unit tests with pytest inside Flowcept's container\n"
 	@printf "\033[32mtests-in-container-mongo\033[0m  run unit tests inside container with MongoDB\n"
+	@printf "\033[32mtests-in-container-kafka\033[0m  run unit tests inside container with Kafka and MongoDB\n"
 	@printf "\033[32mtests-all\033[0m                 run all unit tests with pytest, including long-running ones\n"
 	@printf "\033[32mtests-notebooks\033[0m           test the notebooks using pytest\n"
 	@printf "\033[32mclean\033[0m                     remove cache directories and Sphinx build output\n"
@@ -43,6 +44,7 @@ clean:
 	find . -type d -name "mlruns" -exec rm -rf {} \; 2>/dev/null || true
 	find . -type d -name "__pycache__" -exec rm -rf {} \;  2>/dev/null || true
 	find . -type d -name "*tfevents*" -exec rm -rf {} \;  2>/dev/null || true
+	find . -type d -name "*output_data*" -exec rm -rf {} \;  2>/dev/null || true
 	# sphinx-build -M clean docs docs/_build This needs to be fixed.
 
 # Build the HTML documentation using Sphinx
@@ -73,13 +75,13 @@ run:
 	docker run --rm -v $(shell pwd):/flowcept -e KVDB_HOST=flowcept_redis -e MQ_HOST=flowcept_redis -e MONGO_HOST=flowcept_mongo --network flowcept_default -it flowcept
 
 tests-in-container-mongo:
-	docker run --rm -v $(shell pwd):/flowcept -e KVDB_HOST=flowcept_redis -e MQ_HOST=flowcept_redis -e MONGO_HOST=flowcept_mongo -e MONGO_ENABLED=true -e LMDB_ENABLED=false --network flowcept_default flowcept /opt/conda/envs/flowcept/bin/pytest --ignore=tests/decorator_tests/ml_tests
+	docker run --rm -v $(shell pwd):/flowcept -e KVDB_HOST=flowcept_redis -e MQ_HOST=flowcept_redis -e MONGO_HOST=flowcept_mongo -e MONGO_ENABLED=true -e LMDB_ENABLED=false --network flowcept_default flowcept /opt/conda/envs/flowcept/bin/pytest
 
 tests-in-container:
-	docker run --rm -v $(shell pwd):/flowcept -e KVDB_HOST=flowcept_redis -e MQ_HOST=flowcept_redis -e MONGO_ENABLED=false -e LMDB_ENABLED=true --network flowcept_default flowcept /opt/conda/envs/flowcept/bin/pytest --ignore=tests/decorator_tests/ml_tests
+	docker run --rm -v $(shell pwd):/flowcept -e KVDB_HOST=flowcept_redis -e MQ_HOST=flowcept_redis -e MONGO_ENABLED=false -e LMDB_ENABLED=true --network flowcept_default flowcept /opt/conda/envs/flowcept/bin/pytest
 
 tests-in-container-kafka:
-	docker run --rm -v $(shell pwd):/flowcept -e KVDB_HOST=flowcept_redis -e MQ_HOST=kafka -e MONGO_HOST=flowcept_mongo  -e MQ_PORT=29092 -e MQ_TYPE=kafka -e MONGO_ENABLED=true -e LMDB_ENABLED=false --network flowcept_default flowcept /opt/conda/envs/flowcept/bin/pytest --ignore=tests/decorator_tests/ml_tests
+	docker run --rm -v $(shell pwd):/flowcept -e KVDB_HOST=flowcept_redis -e MQ_HOST=kafka -e MONGO_HOST=flowcept_mongo  -e MQ_PORT=29092 -e MQ_TYPE=kafka -e MONGO_ENABLED=true -e LMDB_ENABLED=false --network flowcept_default flowcept /opt/conda/envs/flowcept/bin/pytest
 
 # This command can be removed once we have our CLI
 liveness:
@@ -88,7 +90,7 @@ liveness:
 # Run unit tests using pytest
 .PHONY: tests
 tests:
-	pytest --ignore=tests/decorator_tests/ml_tests/llm_tests
+	pytest
 
 .PHONY: tests-notebooks
 tests-notebooks:

diff --git a/examples/llm_complex/llm_search.py → examples/llm_complex/llm_search_example.py b/examples/llm_complex/llm_search.py → examples/llm_complex/llm_search_example.py
diff --git a/src/flowcept/instrumentation/decorators/flowcept_torch.py b/src/flowcept/instrumentation/decorators/flowcept_torch.py
@@ -219,7 +219,7 @@ def _register_as_workflow(self):
             workflow_obj.name = cls.__name__
             workflow_obj.campaign_id = self._campaign_id
             workflow_obj.parent_workflow_id = self._parent_workflow_id
-            _custom_metadata = self._custom_metadata
+            _custom_metadata = self._custom_metadata or {}
             _custom_metadata["workflow_type"] = "TorchModule"
 
             if self._should_get_profile:

diff --git a/tests/adapters/test_tensorboard.py b/tests/adapters/test_tensorboard.py
@@ -52,6 +52,13 @@ def run_tensorboard_hparam_tuning(self):
         (x_train, y_train), (x_test, y_test) = fashion_mnist.load_data()
         x_train, x_test = x_train / 255.0, x_test / 255.0
 
+        # Reduce the dataset size for faster debugging
+        DEBUG_SAMPLES_TRAIN = 100  # Number of training samples to keep
+        DEBUG_SAMPLES_TEST = 20  # Number of test samples to keep
+
+        x_train, y_train = x_train[:DEBUG_SAMPLES_TRAIN], y_train[:DEBUG_SAMPLES_TRAIN]
+        x_test, y_test = x_test[:DEBUG_SAMPLES_TEST], y_test[:DEBUG_SAMPLES_TEST]
+
         HP_NUM_UNITS = hp.HParam("num_units", hp.Discrete([16]))
         HP_DROPOUT = hp.HParam("dropout", hp.RealInterval(0.1, 0.2))
         HP_OPTIMIZER = hp.HParam("optimizer", hp.Discrete(["adam", "sgd"]))

diff --git a/tests/decorator_tests/ml_tests/dl_trainer.py b/tests/decorator_tests/ml_tests/dl_trainer.py
@@ -4,26 +4,20 @@
 from torch.utils.data import Subset, DataLoader
 from torchvision import datasets, transforms
 from torch import nn, optim
-from torch.nn import functional as F
-
+from torch.nn import functional as F, Conv2d, Dropout, MaxPool2d, ReLU, Linear, Softmax
 
 from flowcept import (
     Flowcept,
 )
-from flowcept.instrumentation.decorators.flowcept_torch import (
-    register_modules,
-    register_module_as_workflow,
-    torch_task,
-)
-from flowcept.instrumentation.decorators.responsible_ai import (
-    model_profiler,
-)
 
 import threading
 
+from flowcept.instrumentation.decorators.flowcept_torch import flowcept_torch
+
 thread_state = threading.local()
 
 
+@flowcept_torch
 class MyNet(nn.Module):
     def __init__(
         self,
@@ -35,22 +29,9 @@ def __init__(
         parent_workflow_id=None,
         parent_task_id=None,
     ):
-        super(MyNet, self).__init__()
+        super().__init__()
         print("parent workflow id", parent_workflow_id)
-        self.workflow_id = register_module_as_workflow(self, parent_workflow_id=parent_workflow_id)
         self.parent_task_id = parent_task_id
-        Conv2d, Dropout, MaxPool2d, ReLU, Softmax, Linear = register_modules(
-            [
-                nn.Conv2d,
-                nn.Dropout,
-                nn.MaxPool2d,
-                nn.ReLU,
-                nn.Softmax,
-                nn.Linear,
-            ],
-            workflow_id=self.workflow_id,
-            parent_task_id=self.parent_task_id,
-        )
 
         self.model_type = "CNN"
         # TODO: add if len conv_in_outs > 0
@@ -79,7 +60,6 @@ def __init__(
                 self.fc_layers.append(Softmax(dim=softmax_dims[i]))
         self.view_size = fc_in_outs[0][0]
 
-    @torch_task()
     def forward(self, x):
         x = self.conv_layers(x)
         x = x.view(-1, self.view_size)
@@ -89,7 +69,7 @@ def forward(self, x):
 
 class ModelTrainer(object):
     @staticmethod
-    def build_train_test_loader(batch_size=128, random_seed=0, debug=True, subset_size=1000):
+    def build_train_test_loader(batch_size=128, random_seed=0, debug=True, subset_size=10):
         torch.manual_seed(random_seed)
 
         # Load the full MNIST dataset
@@ -157,7 +137,6 @@ def _test(model, device, test_loader):
 
     # @model_explainer()
     @staticmethod
-    @model_profiler()
     def model_fit(
         conv_in_outs=[[1, 10], [10, 20]],
         conv_kernel_sizes=[5, 5],
@@ -181,43 +160,41 @@ def model_fit(
         #  We are calling the consumer api here (sometimes for the second time)
         #  because we are capturing at two levels: at the model.fit and at
         #  every layer. Can we do it better?
-        with Flowcept(
-            bundle_exec_id=workflow_id,
-            start_persistence=False,
-        ):
-            train_loader, test_loader = ModelTrainer.build_train_test_loader()
-            if torch.backends.mps.is_available():
-                device = torch.device("mps")
-            else:
-                device = torch.device("cpu")
-            model = MyNet(
-                conv_in_outs=conv_in_outs,
-                conv_kernel_sizes=conv_kernel_sizes,
-                conv_pool_sizes=conv_pool_sizes,
-                fc_in_outs=fc_in_outs,
-                softmax_dims=softmax_dims,
-                parent_workflow_id=workflow_id,
-            )
-            model = model.to(device)
-            optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.5)
-            test_info = {}
-            print("Starting training....")
-            for epoch in range(1, max_epochs + 1):
-                ModelTrainer._train(model, device, train_loader, optimizer, epoch)
-                test_info = ModelTrainer._test(model, device, test_loader)
-            print("Finished training....")
-            batch = next(iter(test_loader))
-            test_data, _ = batch
-            result = test_info.copy()
-            result.update(
-                {
-                    "model": model,
-                    "test_data": test_data,
-                    "task_id": task_id,
-                    "random_seed": random_seed,
-                }
-            )
-            return result
+        train_loader, test_loader = ModelTrainer.build_train_test_loader()
+        if torch.backends.mps.is_available():
+            device = torch.device("mps")
+        else:
+            device = torch.device("cpu")
+        model = MyNet(
+            conv_in_outs=conv_in_outs,
+            conv_kernel_sizes=conv_kernel_sizes,
+            conv_pool_sizes=conv_pool_sizes,
+            fc_in_outs=fc_in_outs,
+            softmax_dims=softmax_dims,
+            parent_workflow_id=workflow_id,
+        )
+        model = model.to(device)
+        optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.5)
+        test_info = {}
+        print("Starting training....")
+        for epoch in range(1, max_epochs + 1):
+            ModelTrainer._train(model, device, train_loader, optimizer, epoch)
+            test_info = ModelTrainer._test(model, device, test_loader)
+        print("Finished training....")
+        batch = next(iter(test_loader))
+        test_data, _ = batch
+        result = test_info.copy()
+
+        best_obj_id = Flowcept.db.save_torch_model(model, task_id=task_id, workflow_id=workflow_id, custom_metadata=result)
+        result.update(
+            {
+                "best_obj_id": best_obj_id,
+                "test_data": test_data,
+                "task_id": task_id,
+                "random_seed": random_seed,
+            }
+        )
+        return result
 
     @staticmethod
     def generate_hp_confs(hp_conf: dict):

diff --git a/tests/decorator_tests/ml_tests/llm_tests/__init__.py b/tests/decorator_tests/ml_tests/llm_tests/__init__.py