diff --git a/.flake8 b/.flake8 deleted file mode 100644 index d8e08ca5..00000000 --- a/.flake8 +++ /dev/null @@ -1,9 +0,0 @@ -[flake8] -max-line-length = 79 -max-complexity = 10 -select = E9, - F63, - F7, - F82 -statistics = True -show-source = True diff --git a/.github/workflows/bump-version.yml b/.github/workflows/bump-version.yml deleted file mode 100644 index 856c4a1f..00000000 --- a/.github/workflows/bump-version.yml +++ /dev/null @@ -1,38 +0,0 @@ -name: Version Bumper -on: - pull_request: - branches: - - disabled -# - '*' # matches every branch that doesn't contain a '/' -# - '*/*' # matches every branch containing a single '/' -# - '**' # matches every branch -# - '!main' # excludes main -jobs: - build: - name: Version Bumper - runs-on: ubuntu-latest - if: ${{ !startsWith(github.ref, 'refs/tags/v') }} - steps: - - uses: actions/checkout@v2 - - name: Set up Python 3.10 - uses: actions/setup-python@v3 - with: - python-version: "3.10" - - name: Get branch names - id: branch-name - uses: tj-actions/branch-names@v6 - - name: Update version.py - run: | - export PYTHONPATH=$PYTHONPATH:flowcept - export BRANCH_NAME="${{ steps.branch-name.outputs.current_branch }}" - python .github/workflows/version_bumper.py - - name: Commit new version - run: | - git config --global user.name 'Flowcept CI Bot' - git config --global user.email 'flowcept@users.noreply.github.com' - pwd - ls -la - git branch - git add flowcept/version.py - git commit -m "Flowcept CI Bot: bumping version" - git push --force diff --git a/.github/workflows/code-formatting.yml b/.github/workflows/code-formatting.yml deleted file mode 100644 index e7295188..00000000 --- a/.github/workflows/code-formatting.yml +++ /dev/null @@ -1,26 +0,0 @@ -name: Code Formatting -on: [pull_request] - -permissions: - contents: read - -jobs: - build: - - runs-on: ubuntu-latest - if: "!contains(github.event.head_commit.message, 'CI Bot')" - steps: - - uses: actions/checkout@v3 - - name: Set up Python 3.10 - uses: actions/setup-python@v3 - with: - python-version: "3.10" - - name: Install dependencies - run: | - python -m pip install --upgrade pip - pip install -r extra_requirements/dev-requirements.txt - - name: Run black checks - run: black --check . - - name: Run flake8 checks - run: | - flake8 . --count diff --git a/.github/workflows/create-release-n-publish.yml b/.github/workflows/create-release-n-publish.yml index f2c95537..dc8bef4c 100644 --- a/.github/workflows/create-release-n-publish.yml +++ b/.github/workflows/create-release-n-publish.yml @@ -2,21 +2,22 @@ name: Release and Publish on: push: # It has to be push, otherwise error happens in code below. branches: [ "main" ] - #branches: [ "main", "dev" ] # use this only to test the CI + # branches: [ "main", "dev" ] # use this only to test the CI #branches: [ "disabled" ] jobs: build: name: Create Release and Publish runs-on: ubuntu-latest -# env: -# FLOWCEPT_SETTINGS_PATH: 'resources/settings.yaml' + timeout-minutes: 60 steps: - name: Checkout code - uses: actions/checkout@v2 - - name: Set up Python 3.9 - uses: actions/setup-python@v3 + uses: actions/checkout@v4 + + - name: Set up Python 3.10 + uses: actions/setup-python@v5 with: - python-version: "3.9" + python-version: "3.10" + - name: Get branch names id: branch-name uses: tj-actions/branch-names@v6 @@ -30,12 +31,13 @@ jobs: git config --global user.name 'Flowcept CI Bot' git config --global user.email 'flowcept@users.noreply.github.com' git branch - git add flowcept/version.py + git add src/flowcept/version.py git commit -m "Flowcept CI Bot: bumping master version" git push --force + - name: Get Latest PR and Create Release run: | - export CURRENT_VERSION=`python -c "f = open('flowcept/version.py'); exec(f.read()); print(locals()['__version__']); f.close()"` + export CURRENT_VERSION=`python -c "f = open('src/flowcept/version.py'); exec(f.read()); print(locals()['__version__']); f.close()"` echo $CURRENT_VERSION REPOSITORY=${{ github.repository }} ACCESS_TOKEN=${{ secrets.GITHUB_TOKEN }} @@ -61,6 +63,7 @@ jobs: \"prerelease\": false}" \ -H "Authorization: Bearer ${ACCESS_TOKEN}" \ https://api.github.com/repos/${REPOSITORY}/releases + - name: Install pypa/build run: >- python -m @@ -81,6 +84,7 @@ jobs: password: ${{ secrets.TEST_PYPI_API_TOKEN }} repository_url: https://test.pypi.org/legacy/ verbose: true + - name: Publish distribution to PyPI #if: startsWith(github.ref, 'refs/tags') uses: pypa/gh-action-pypi-publish@release/v1 @@ -98,11 +102,11 @@ jobs: - name: Test pip install multiple adapters run: pip install flowcept[mlflow,tensorboard] - name: Install our dependencies - run: pip install flowcept[fulldev] # This will install all dependencies, for all adapters and dev deps. + run: pip install flowcept[all] # This will install all dependencies, for all adapters and dev deps. - name: Pip list run: pip list - name: Run Docker Compose - run: docker compose -f deployment/compose-full.yml up -d + run: docker compose -f deployment/compose.yml up -d - name: Test with pytest run: | mkdir -p ~/.flowcept diff --git a/.github/workflows/run-checks.yml b/.github/workflows/run-checks.yml new file mode 100644 index 00000000..bc34dedd --- /dev/null +++ b/.github/workflows/run-checks.yml @@ -0,0 +1,30 @@ +name: Ruff linter and formatter checks +on: [pull_request] + +permissions: + contents: read + +jobs: + build: + runs-on: ubuntu-latest + if: "!contains(github.event.head_commit.message, 'CI Bot')" + + steps: + - uses: actions/checkout@v4 + + - name: Set up Python 3.10 + uses: actions/setup-python@v5 + with: + python-version: "3.10" + cache: "pip" + + - name: Install package and dependencies + run: | + python -m pip install --upgrade pip + python -m pip install .[all] + + - name: Run ruff linter checks + run: ruff check src + + - name: Run ruff formatter checks + run: ruff format --check src \ No newline at end of file diff --git a/.github/workflows/run-tests-kafka.yml b/.github/workflows/run-tests-kafka.yml index 608eee77..c5c044f4 100644 --- a/.github/workflows/run-tests-kafka.yml +++ b/.github/workflows/run-tests-kafka.yml @@ -3,52 +3,62 @@ on: pull_request: branches: [ "dev", "main" ] types: [opened, synchronize, reopened] -# branches: [ "disabled" ] jobs: build: runs-on: ubuntu-latest - timeout-minutes: 60 + timeout-minutes: 40 if: "!contains(github.event.head_commit.message, 'CI Bot')" + steps: - - uses: actions/checkout@v3 - - name: Set up Python 3.9 - uses: actions/setup-python@v3 - with: - python-version: "3.9" - - name: Check python version - run: python --version - - name: Install our dependencies - run: | - python -m pip install --upgrade pip - pip install -e .[fulldev] - - name: Pip list - run: pip list - - name: Run Docker Compose - run: docker compose -f deployment/compose-kafka.yml up -d - - name: Wait 1 min - run: sleep 60 - - name: Check liveness - run: | - export MQ_TYPE=kafka - export MQ_PORT=9092 - python -c 'from flowcept.configs import MQ_TYPE, MQ_PORT; print(f"MQ_TYPE={MQ_TYPE}"); print(f"MQ_TYPE={MQ_PORT}")' - python -c 'from flowcept import Flowcept; assert Flowcept.services_alive()' - - name: Run Tests with Kafka - run: | - export MQ_TYPE=kafka - export MQ_PORT=9092 - pytest --ignore=tests/decorator_tests/ml_tests/llm_tests - - name: Test notebooks - run: | - pip install -e .[full] - export MQ_TYPE=kafka - export MQ_PORT=9092 - python -c 'from flowcept.configs import MQ_TYPE, MQ_PORT; print(f"MQ_TYPE={MQ_TYPE}"); print(f"MQ_TYPE={MQ_PORT}")' - python -c 'from flowcept import Flowcept; assert Flowcept.services_alive()' - - python flowcept/flowcept_webserver/app.py & - sleep 3 - export FLOWCEPT_SETTINGS_PATH=~/.flowcept/settings.yaml - pytest --ignore=notebooks/zambeze.ipynb --nbmake "notebooks/" --nbmake-timeout=600 --ignore=notebooks/dask_from_CLI.ipynb + - uses: actions/checkout@v4 + + - name: Set up Python 3.10 + uses: actions/setup-python@v5 + with: + python-version: "3.10" + cache: "pip" + + - name: Copy settings file + run: | + mkdir ~/.flowcept + cp resources/sample_settings.yaml ~/.flowcept + mv ~/.flowcept/sample_settings.yaml ~/.flowcept/settings.yaml + + - name: Install package and dependencies + run: | + python -m pip install --upgrade pip + python -m pip install .[all] + + - name: Run docker compose + run: docker compose -f deployment/compose-kafka.yml up -d + + - name: Wait for one minute + run: sleep 60 + + - name: Check liveness + run: | + export MQ_TYPE=kafka + export MQ_PORT=9092 + python -c 'from flowcept.configs import MQ_TYPE, MQ_PORT; print(f"MQ_TYPE={MQ_TYPE}"); print(f"MQ_PORT={MQ_PORT}")' + python -c 'from flowcept import Flowcept; assert Flowcept.services_alive()' + + - name: Run tests with kafka + run: | + export MQ_TYPE=kafka + export MQ_PORT=9092 + pytest --ignore=tests/decorator_tests/ml_tests/llm_tests + + - name: Test notebooks + run: | + pip install -e .[all] + export MQ_TYPE=kafka + export MQ_PORT=9092 + python -c 'from flowcept.configs import MQ_TYPE, MQ_PORT; print(f"MQ_TYPE={MQ_TYPE}"); print(f"MQ_PORT={MQ_PORT}")' + python -c 'from flowcept import Flowcept; assert Flowcept.services_alive()' + + python flowcept/flowcept_webserver/app.py & + sleep 3 + export FLOWCEPT_SETTINGS_PATH=~/.flowcept/settings.yaml + pytest --ignore=notebooks/zambeze.ipynb --nbmake "notebooks/" --nbmake-timeout=600 --ignore=notebooks/dask_from_CLI.ipynb diff --git a/.github/workflows/run-tests.yml b/.github/workflows/run-tests.yml index 738b6fff..e821a028 100644 --- a/.github/workflows/run-tests.yml +++ b/.github/workflows/run-tests.yml @@ -1,54 +1,72 @@ -name: Unit, integration, and notebook tests (Py39) +name: Unit, integration, and notebook tests on: [push] -# branches: [ "disabled" ] jobs: build: runs-on: ubuntu-latest - timeout-minutes: 60 + timeout-minutes: 40 if: "!contains(github.event.head_commit.message, 'CI Bot')" + steps: - - uses: actions/checkout@v3 - - name: Set up Python 3.9 - uses: actions/setup-python@v3 - with: - python-version: "3.9" - - name: Check python version - run: python --version - - name: Install our dependencies - run: | - python -m pip install --upgrade pip - pip install -e .[fulldev] # This will install all dependencies, for all adapters and dev deps. - - name: Pip list - run: pip list - - name: Start Docker Compose with Redis - run: docker compose -f deployment/compose-full.yml up -d - - name: Test with pytest with Redis - run: | - pytest --ignore=tests/decorator_tests/ml_tests/llm_tests - - name: Test notebooks with Redis - run: | - pip install -e . - python flowcept/flowcept_webserver/app.py & - sleep 3 - export FLOWCEPT_SETTINGS_PATH=~/.flowcept/settings.yaml - pytest --nbmake "notebooks/" --nbmake-timeout=600 --ignore=notebooks/dask_from_CLI.ipynb --ignore=notebooks/mlflow.ipynb - - name: Shut down compose - run: docker compose -f deployment/compose-full.yml down - - name: Start Docker Compose with Kafka - run: docker compose -f deployment/compose-kafka.yml up -d - - name: Wait 1 min - run: sleep 60 - - name: Check liveness - run: | - export MQ_TYPE=kafka - export MQ_PORT=9092 - python -c 'from flowcept.configs import MQ_TYPE, MQ_PORT; print(f"MQ_TYPE={MQ_TYPE}"); print(f"MQ_TYPE={MQ_PORT}")' - python -c 'from flowcept import Flowcept; assert Flowcept.services_alive()' - - name: Run Tests with Kafka - run: | - export MQ_TYPE=kafka - export MQ_PORT=9092 - # Ignoring heavy tests. They are executed with Kafka in another GH Action. - pytest --ignore=tests/decorator_tests/ml_tests --ignore=tests/adapters/test_tensorboard.py + - uses: actions/checkout@v4 + + - name: Set up Python 3.10 + uses: actions/setup-python@v5 + with: + python-version: "3.10" + cache: "pip" + + - name: Show OS Info + run: '[[ "$OSTYPE" == "linux-gnu"* ]] && { echo "OS Type: Linux"; (command -v lsb_release &> /dev/null && lsb_release -a) || cat /etc/os-release; uname -r; } || [[ "$OSTYPE" == "darwin"* ]] && { echo "OS Type: macOS"; sw_vers; uname -r; } || echo "Unsupported OS type: $OSTYPE"' + + - name: Copy settings file + run: | + mkdir ~/.flowcept + cp resources/sample_settings.yaml ~/.flowcept + mv ~/.flowcept/sample_settings.yaml ~/.flowcept/settings.yaml + + - name: Install package and dependencies + run: | + python -m pip install --upgrade pip + python -m pip install .[all] + + - name: List installed packages + run: pip list + + - name: Start docker compose with redis + run: docker compose -f deployment/compose.yml up -d + + - name: Test with pytest and redis + run: | + pytest --ignore=tests/decorator_tests/ml_tests/llm_tests + + - name: Test notebooks with pytest and redis + run: | + python src/flowcept/flowcept_webserver/app.py & + sleep 3 + export FLOWCEPT_SETTINGS_PATH=~/.flowcept/settings.yaml + pytest --nbmake "notebooks/" --nbmake-timeout=600 --ignore=notebooks/dask_from_CLI.ipynb + + - name: Shut down docker compose + run: docker compose -f deployment/compose.yml down + + - name: Start docker compose with kafka + run: docker compose -f deployment/compose-kafka.yml up -d + + - name: Wait for one minute + run: sleep 60 + + - name: Check liveness + run: | + export MQ_TYPE=kafka + export MQ_PORT=9092 + python -c 'from flowcept.configs import MQ_TYPE, MQ_PORT; print(f"MQ_TYPE={MQ_TYPE}"); print(f"MQ_PORT={MQ_PORT}")' + python -c 'from flowcept import Flowcept; assert Flowcept.services_alive()' + + - name: Run tests with kafka + run: | + export MQ_TYPE=kafka + export MQ_PORT=9092 + # Ignoring heavy tests. They are executed with Kafka in another GH Action. + pytest --ignore=tests/decorator_tests/ml_tests --ignore=tests/adapters/test_tensorboard.py diff --git a/.github/workflows/test-python-310-macos.yml b/.github/workflows/test-python-310-macos.yml deleted file mode 100644 index 6b5880e4..00000000 --- a/.github/workflows/test-python-310-macos.yml +++ /dev/null @@ -1,47 +0,0 @@ -name: Test Python 3.10 - MacOS -on: - pull_request: - branches: [ "disabled" ] #[ "dev", "main" ] - types: [opened, synchronize, reopened] -jobs: - build: - runs-on: macos-latest - timeout-minutes: 60 - if: "!contains(github.event.head_commit.message, 'CI Bot')" -# env: -# FLOWCEPT_SETTINGS_PATH: 'resources/settings.yaml' - steps: - - uses: actions/checkout@v3 - - name: Set up Python 3.10 - uses: actions/setup-python@v3 - with: - python-version: "3.10" - - name: Check python version - run: python --version - - name: Install our dependencies - run: | - python -m pip install --upgrade pip - pip install -e .[full] - pip install -r extra_requirements/dev-requirements.txt - - name: Install docker - run: | - brew install docker docker-compose - brew install colima - colima start - mkdir -p ~/.docker/cli-plugins - echo $HOMEBREW_PREFIX - ln -sfn $HOMEBREW_PREFIX/opt/docker-compose/bin/docker-compose ~/.docker/cli-plugins/docker-compose - #ln -sfn /usr/local/opt/docker-compose/bin/docker-compose ~/.docker/cli-plugins/docker-compose - - name: Run Docker Compose - run: | - docker compose version - docker compose -f deployment/compose-full.yml up -d - - name: Test with pytest - run: | - pytest --ignore=tests/decorator_tests/ml_tests/llm_tests/ - - name: Test notebooks - run: | - python flowcept/flowcept_webserver/app.py & - sleep 3 - export FLOWCEPT_SETTINGS_PATH=~/.flowcept/settings.yaml - pytest --nbmake "notebooks/" --nbmake-timeout=600 --ignore=notebooks/dask_from_CLI.ipynb diff --git a/.github/workflows/test-python-310.yml b/.github/workflows/test-python-310.yml deleted file mode 100644 index 6d49709e..00000000 --- a/.github/workflows/test-python-310.yml +++ /dev/null @@ -1,38 +0,0 @@ -name: Test Python 3.10 -on: - pull_request: - branches: [ "dev", "main" ] - types: [opened, synchronize, reopened] - # branches: [ "disabled" ] - -jobs: - - build: - runs-on: ubuntu-latest - timeout-minutes: 60 - if: "!contains(github.event.head_commit.message, 'CI Bot')" - steps: - - uses: actions/checkout@v3 - - name: Set up Python 3.10 - uses: actions/setup-python@v3 - with: - python-version: "3.10" - - name: Check python version - run: python --version - - name: Install our dependencies - run: | - python -m pip install --upgrade pip - pip install -e .[full] - pip install -r extra_requirements/dev-requirements.txt - - name: Run Docker Compose - run: docker compose -f deployment/compose-full.yml up -d - - name: Test with pytest - run: | - pytest --ignore=tests/decorator_tests/ml_tests/llm_tests - - name: Test notebooks - run: | - pip install -e . - python flowcept/flowcept_webserver/app.py & - sleep 3 - export FLOWCEPT_SETTINGS_PATH=~/.flowcept/settings.yaml - pytest --nbmake "notebooks/" --nbmake-timeout=600 --ignore=notebooks/dask_from_CLI.ipynb diff --git a/.github/workflows/test-python-311.yml b/.github/workflows/test-python-311.yml deleted file mode 100644 index 5962c3d2..00000000 --- a/.github/workflows/test-python-311.yml +++ /dev/null @@ -1,40 +0,0 @@ -name: Test Python 3.11 -on: - pull_request: - # branches: [ "dev", "main" ] - # Apparently there is a lib incompatibility with the Dask adapter for py3.11. See: - # https://github.com/ORNL/flowcept/actions/runs/6203429881/job/16844012184 - branches: [ "disabled" ] - -jobs: - - build: - runs-on: ubuntu-latest - timeout-minutes: 60 - if: "!contains(github.event.head_commit.message, 'CI Bot')" -# env: -# FLOWCEPT_SETTINGS_PATH: 'resources/settings.yaml' - steps: - - uses: actions/checkout@v3 - - name: Set up Python 3.11 - uses: actions/setup-python@v3 - with: - python-version: "3.11" - - name: Check python version - run: python --version - - name: Install our dependencies - run: | - python -m pip install --upgrade pip - pip install -e .[full] - pip install -r extra_requirements/dev-requirements.txt - - name: Run Docker Compose - run: docker compose -f deployment/compose-full.yml up -d - - name: Test with pytest - run: | - pytest --ignore=tests/decorator_tests/ml_tests/llm_tests/ - - name: Test notebooks - run: | - python flowcept/flowcept_webserver/app.py & - sleep 3 - export FLOWCEPT_SETTINGS_PATH=~/.flowcept/settings.yaml - pytest --nbmake "notebooks/" --ignore=notebooks/dask_from_CLI.ipynb diff --git a/.github/workflows/version_bumper.py b/.github/workflows/version_bumper.py index 685af304..de54b20f 100644 --- a/.github/workflows/version_bumper.py +++ b/.github/workflows/version_bumper.py @@ -1,15 +1,15 @@ import re -version_file_path = "flowcept/version.py" +version_file_path = "src/flowcept/version.py" with open(version_file_path) as f: - exec(f.read()) + code_str = f.read() + exec(code_str) version = locals()["__version__"] split_version = version.split(".") old_patch_str = split_version[2] re_found = re.findall(r"(\d+)(.*)", old_patch_str)[0] old_patch_number = re_found[0] -# old_branch = re_found[1] new_patch_str = old_patch_str.replace( old_patch_number, str(int(old_patch_number) + 1) @@ -19,13 +19,7 @@ new_version = ".".join(split_version) print("New version: " + new_version) +new_code_str = code_str.replace(version, new_version) with open(version_file_path, "w") as f: - f.write( - f"""# WARNING: CHANGE THIS FILE MANUALLY ONLY TO RESOLVE CONFLICTS! -# This file is supposed to be automatically modified by the CI Bot. -# The expected format is: .. -# See .github/workflows/version_bumper.py -__version__ = "{new_version}" -""" - ) + f.write(new_code_str) diff --git a/README.md b/README.md index e18f1108..db9eaa7c 100644 --- a/README.md +++ b/README.md @@ -1,9 +1,8 @@ [![Build](https://github.com/ORNL/flowcept/actions/workflows/create-release-n-publish.yml/badge.svg)](https://github.com/ORNL/flowcept/actions/workflows/create-release-n-publish.yml) [![PyPI](https://badge.fury.io/py/flowcept.svg)](https://pypi.org/project/flowcept) [![Tests](https://github.com/ORNL/flowcept/actions/workflows/run-tests.yml/badge.svg)](https://github.com/ORNL/flowcept/actions/workflows/run-tests.yml) -[![Code Formatting](https://github.com/ORNL/flowcept/actions/workflows/code-formatting.yml/badge.svg)](https://github.com/ORNL/flowcept/actions/workflows/code-formatting.yml) +[![Code Formatting](https://github.com/ORNL/flowcept/actions/workflows/run-checks.yml/badge.svg)](https://github.com/ORNL/flowcept/actions/workflows/run-checks.yml) [![License: MIT](https://img.shields.io/github/license/ORNL/flowcept)](LICENSE) -[![Code style: black](https://img.shields.io/badge/code%20style-black-000000.svg)](https://github.com/psf/black) # FlowCept @@ -32,18 +31,31 @@ term 'plugin' in the codebase as a synonym to adapter. Future releases should st 1. Install FlowCept: -`pip install .[full]` in this directory (or `pip install flowcept[full]`). +`pip install .[all]` in this directory (or `pip install flowcept[all]`) if you want to install all dependencies. For convenience, this will install all dependencies for all adapters. But it can install dependencies for adapters you will not use. For this reason, you may want to install like this: `pip install .[adapter_key1,adapter_key2]` for the adapters we have implemented, e.g., `pip install .[dask]`. -See [extra_requirements](extra_requirements) if you want to install the dependencies individually. +Currently, the optional dependencies available are: + +``` +pip install flowcept[mlflow] # To install mlflow's adapter. +pip install flowcept[dask] # To install dask's adapter. +pip install flowcept[tensorboard] # To install tensorboaard's adapter +pip install flowcept[kafka] # To utilize Kafka as the MQ, instead of Redis +pip install flowcept[nvidia] # To capture NVIDIA GPU runtime information. +pip install flowcept[analytics] # For extra analytics features. +pip install flowcept[dev] # To install dev dependencies +``` + +You do not need to install any optional dependency to run Flowcept without any adapter, e.g., if you want to use simple instrumentation (see below). +In this case, you need to remove the adapter part from the [settings.yaml](resources/settings.yaml) file. 2. Start MongoDB and Redis: To enable the full advantages of FlowCept, one needs to start a Redis and MongoDB instances. FlowCept uses Redis as its message queue system and Mongo for its persistent database. -For convenience, we set up a [docker-compose file](deployment/compose.yml) deployment file for this. Run `docker-compose -f deployment/compose.yml up`. RabbitMQ is only needed if Zambeze messages are observed, otherwise, feel free to comment out RabbitMQ service in the compose file. +For convenience, we set up a [docker-compose file](deployment/compose.yml) deployment file for this. Run `docker-compose -f deployment/compose.yml up`. 3. Define the settings (e.g., routes and ports) accordingly in the [settings.yaml](resources/settings.yaml) file. You may need to set the environment variable `FLOWCEPT_SETTINGS_PATH` with the absolute path to the settings file. @@ -99,7 +111,9 @@ If you are doing extensive performance evaluation experiments using this softwar ## Install AMD GPU Lib -On the machines that have AMD GPUs, we use the official AMD ROCM library to capture GPU runtime data. +This section is only important if you want to enable GPU runtime data capture and the GPU is from AMD. NVIDIA GPUs don't need this step. + +For AMD GPUs, we rely on the official AMD ROCM library to capture GPU data. Unfortunately, this library is not available as a pypi/conda package, so you must manually install it. See instructions in the link: https://rocm.docs.amd.com/projects/amdsmi/en/latest/ @@ -114,13 +128,14 @@ Here is a summary: Current code is compatible with this version: amdsmi==24.6.2+2b02a07 Which was installed using Frontier's /opt/rocm-6.2.0/share/amd_smi -## See also +## Torch Dependencies -- [Zambeze Repository](https://github.com/ORNL/zambeze) +Some unit tests utilize `torch==2.2.2`, `torchtext=0.17.2`, and `torchvision==0.17.2`. They are only really needed to run some tests and will be installed if you run `pip install flowcept[ml_dev]` or `pip install flowcept[all]`. +If you want to use FlowCept with Torch, please adapt torch dependencies according to your project's dependencies. ## Cite us -If you used FlowCept for your research, consider citing our paper. +If you used FlowCept in your research, consider citing our paper. ``` Towards Lightweight Data Integration using Multi-workflow Provenance and Data Observability diff --git a/extra_requirements/amd-requirements.txt b/extra_requirements/amd-requirements.txt deleted file mode 100644 index 2b39d7de..00000000 --- a/extra_requirements/amd-requirements.txt +++ /dev/null @@ -1,12 +0,0 @@ -# On the machines that have AMD GPUs, we use the official AMD ROCM library to capture GPU runtime data. Unfortunately, this library is not available as a pypi/conda package, so you must manually install it. See instructions in the link: https://rocm.docs.amd.com/projects/amdsmi/en/latest/ - -# Here is a summary: - -# 1. Install the AMD drivers on the machine (check if they are available already under `/opt/rocm-*`). -# 2. Suppose it is /opt/rocm-6.2.0. Then, make sure it has a share/amd_smi subdirectory and pyproject.toml or setup.py in it. -# 3. Copy the amd_smi to your home directory: `cp -r /opt/rocm-6.2.0/share/amd_smi ~` -# 4. cd ~/amd_smi -# 5. In your python environment, do a pip install . - -# Current code is compatible with this version: amdsmi==24.6.2+2b02a07 -# Which was installed using Frontier's /opt/rocm-6.2.0/share/amd_smi diff --git a/extra_requirements/analytics-requirements.txt b/extra_requirements/analytics-requirements.txt deleted file mode 100644 index 5f323650..00000000 --- a/extra_requirements/analytics-requirements.txt +++ /dev/null @@ -1,3 +0,0 @@ -seaborn==0.13.2 -plotly==5.18.0 -scipy==1.10.1 diff --git a/extra_requirements/dask-requirements.txt b/extra_requirements/dask-requirements.txt deleted file mode 100644 index 8f8a8a10..00000000 --- a/extra_requirements/dask-requirements.txt +++ /dev/null @@ -1,5 +0,0 @@ -tomli==1.1.0 -dask[distributed]==2023.11.0 -#dask[distributed]==2023.5.0 - - diff --git a/extra_requirements/dev-requirements.txt b/extra_requirements/dev-requirements.txt deleted file mode 100644 index 7b044ead..00000000 --- a/extra_requirements/dev-requirements.txt +++ /dev/null @@ -1,14 +0,0 @@ -pytest==6.2.4 -flake8==5.0.4 -black==23.1.0 -numpy<2.0.0 -bokeh==2.4.2 -jupyterlab -nbmake -# Pytorch models stuff: -torch -torchvision -datasets -torchtext -sacremoses -nltk diff --git a/extra_requirements/kafka-requirements.txt b/extra_requirements/kafka-requirements.txt deleted file mode 100644 index 8f27adb3..00000000 --- a/extra_requirements/kafka-requirements.txt +++ /dev/null @@ -1 +0,0 @@ -confluent-kafka==2.5.3 diff --git a/extra_requirements/mlflow-requirements.txt b/extra_requirements/mlflow-requirements.txt deleted file mode 100644 index f21f410c..00000000 --- a/extra_requirements/mlflow-requirements.txt +++ /dev/null @@ -1,4 +0,0 @@ -mlflow-skinny>2.1.1,<=2.16.2 -SQLAlchemy==1.4.42 -alembic==1.8.1 -watchdog==2.2.1 diff --git a/extra_requirements/nvidia-requirements.txt b/extra_requirements/nvidia-requirements.txt deleted file mode 100644 index 475d88b7..00000000 --- a/extra_requirements/nvidia-requirements.txt +++ /dev/null @@ -1 +0,0 @@ -nvidia-ml-py==11.525.131 diff --git a/extra_requirements/responsible_ai-requirements.txt b/extra_requirements/responsible_ai-requirements.txt deleted file mode 100644 index 9fa1df3f..00000000 --- a/extra_requirements/responsible_ai-requirements.txt +++ /dev/null @@ -1,2 +0,0 @@ -#shap==0.42.1 -torch diff --git a/extra_requirements/tensorboard-requirements.txt b/extra_requirements/tensorboard-requirements.txt deleted file mode 100644 index 6e337fab..00000000 --- a/extra_requirements/tensorboard-requirements.txt +++ /dev/null @@ -1,3 +0,0 @@ -tensorboard -tensorflow -tbparse==0.0.7 diff --git a/extra_requirements/zambeze-requirements.txt b/extra_requirements/zambeze-requirements.txt deleted file mode 100644 index 9e4d8e25..00000000 --- a/extra_requirements/zambeze-requirements.txt +++ /dev/null @@ -1 +0,0 @@ -pika==1.3.1 diff --git a/flowcept/analytics/__init__.py b/flowcept/analytics/__init__.py deleted file mode 100644 index 6bf2c5a6..00000000 --- a/flowcept/analytics/__init__.py +++ /dev/null @@ -1,10 +0,0 @@ -from flowcept.analytics.analytics_utils import ( - clean_dataframe, - analyze_correlations_used_vs_generated, - analyze_correlations, - analyze_correlations_used_vs_telemetry_diff, - analyze_correlations_generated_vs_telemetry_diff, - analyze_correlations_between, - describe_col, - describe_cols, -) diff --git a/flowcept/commons/daos/__init__.py b/flowcept/commons/daos/__init__.py deleted file mode 100644 index e69de29b..00000000 diff --git a/flowcept/commons/daos/mq_dao/__init__.py b/flowcept/commons/daos/mq_dao/__init__.py deleted file mode 100644 index e69de29b..00000000 diff --git a/flowcept/commons/flowcept_dataclasses/__init__.py b/flowcept/commons/flowcept_dataclasses/__init__.py deleted file mode 100644 index e69de29b..00000000 diff --git a/flowcept/flowcept_api/__init__.py b/flowcept/flowcept_api/__init__.py deleted file mode 100644 index e69de29b..00000000 diff --git a/flowcept/flowcept_webserver/__init__.py b/flowcept/flowcept_webserver/__init__.py deleted file mode 100644 index e69de29b..00000000 diff --git a/flowcept/flowcept_webserver/resources/__init__.py b/flowcept/flowcept_webserver/resources/__init__.py deleted file mode 100644 index e69de29b..00000000 diff --git a/flowcept/flowceptor/__init__.py b/flowcept/flowceptor/__init__.py deleted file mode 100644 index e69de29b..00000000 diff --git a/flowcept/flowceptor/adapters/__init__.py b/flowcept/flowceptor/adapters/__init__.py deleted file mode 100644 index e69de29b..00000000 diff --git a/flowcept/flowceptor/adapters/dask/__init__.py b/flowcept/flowceptor/adapters/dask/__init__.py deleted file mode 100644 index e69de29b..00000000 diff --git a/flowcept/flowceptor/adapters/mlflow/__init__.py b/flowcept/flowceptor/adapters/mlflow/__init__.py deleted file mode 100755 index e69de29b..00000000 diff --git a/flowcept/flowceptor/adapters/tensorboard/__init__.py b/flowcept/flowceptor/adapters/tensorboard/__init__.py deleted file mode 100755 index e69de29b..00000000 diff --git a/flowcept/flowceptor/adapters/zambeze/__init__.py b/flowcept/flowceptor/adapters/zambeze/__init__.py deleted file mode 100755 index e69de29b..00000000 diff --git a/flowcept/flowceptor/consumers/__init__.py b/flowcept/flowceptor/consumers/__init__.py deleted file mode 100644 index e69de29b..00000000 diff --git a/flowcept/instrumentation/__init__.py b/flowcept/instrumentation/__init__.py deleted file mode 100644 index e69de29b..00000000 diff --git a/notebooks/zambeze.ipynb b/notebooks/zambeze.ipynb deleted file mode 100644 index 979e87a7..00000000 --- a/notebooks/zambeze.ipynb +++ /dev/null @@ -1,233 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": null, - "id": "63d4d068-b0e9-4756-a5c8-e6d5b929a498", - "metadata": { - "tags": [] - }, - "outputs": [], - "source": [ - "# ! pip install flowcept[zambeze]" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "dc5fbc8b-37b7-456b-b3b8-d481b5146d24", - "metadata": { - "tags": [] - }, - "outputs": [], - "source": [ - "# Optionally set up env vars to control Flowcept's log level\n", - "%env LOG_STREAM_LEVEL=\"error\"\n", - "%env LOG_FILE_LEVEL=\"debug\"" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "20dcc9d3-8895-4159-8892-5c7968c5a0ac", - "metadata": { - "tags": [] - }, - "outputs": [], - "source": [ - "# Sleeps are used because these notebooks are being tested automatically as part of the CI/CD. \n", - "# In a normal user interaction, these sleeps would not be necessary.\n", - "from time import sleep" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "710ec404-47c9-4e01-a0f4-a3e22eb9598b", - "metadata": {}, - "outputs": [], - "source": [ - "def connect_zambeze_queue():\n", - " import pika\n", - " connection = pika.BlockingConnection(\n", - " pika.ConnectionParameters('localhost', 5672)\n", - " )\n", - " channel = connection.channel()\n", - " channel.queue_declare('hello')\n", - " return channel\n", - "\n", - "def send_zambeze_message(channel):\n", - " from uuid import uuid4\n", - " import json\n", - " another_act_id = str(uuid4())\n", - " act_id = str(uuid4())\n", - " msg = {\n", - " \"name\": \"ImageMagick\",\n", - " \"activity_id\": act_id,\n", - " \"campaign_id\": \"campaign-uuid\",\n", - " \"origin_agent_id\": \"def-uuid\",\n", - " \"files\": [\"globus://Users/6o1/file.txt\"],\n", - " \"command\": \"convert\",\n", - " \"activity_status\": \"CREATED\",\n", - " \"arguments\": [\n", - " \"-delay\",\n", - " \"20\",\n", - " \"-loop\",\n", - " \"0\",\n", - " \"~/tests/campaigns/imagesequence/*.jpg\",\n", - " \"a.gif\",\n", - " ],\n", - " \"kwargs\": {},\n", - " \"depends_on\": [another_act_id],\n", - " }\n", - " channel.basic_publish(\n", - " exchange=\"\",\n", - " routing_key=\"hello\",\n", - " body=json.dumps(msg),\n", - " )\n", - " return act_id" - ] - }, - { - "cell_type": "markdown", - "id": "ab40e7aa-a87f-4c8d-bac3-2d41bf2e5d40", - "metadata": {}, - "source": [ - "## Start Zambeze Flowceptor" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "6d04a420-97aa-426b-a700-99c21318add7", - "metadata": {}, - "outputs": [], - "source": [ - "from flowcept import ZambezeInterceptor\n", - "interceptor = ZambezeInterceptor()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "fe597f91-4a2b-4b3e-aa8e-2c19c5b4f0ce", - "metadata": {}, - "outputs": [], - "source": [ - "from flowcept import Flowcept\n", - "flowcept = Flowcept(interceptor)\n", - "flowcept.start()" - ] - }, - { - "cell_type": "markdown", - "id": "b28d0a53-7c14-46c5-bb46-872594a49c8d", - "metadata": {}, - "source": [ - "## Send Zambeze Message" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "d76d4b5a-21f3-43a6-9e5b-574e3b2a3bb1", - "metadata": {}, - "outputs": [], - "source": [ - "channel = connect_zambeze_queue()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "11bdd8d0-3be3-4e9e-bcc6-733653c3c097", - "metadata": {}, - "outputs": [], - "source": [ - "act_id = send_zambeze_message(channel)\n", - "act_id" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "cb06f2c0-32f9-45db-b2dc-c2acccf9e764", - "metadata": {}, - "outputs": [], - "source": [ - "sleep(10)" - ] - }, - { - "cell_type": "markdown", - "id": "b9f36a26-e27b-4e22-8a35-d6f018726c8f", - "metadata": {}, - "source": [ - "## Check the task in Flowcept's database" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "8674ac56-96cf-4e7e-8c4b-7470808c5037", - "metadata": {}, - "outputs": [], - "source": [ - "from flowcept import TaskQueryAPI\n", - "query_api = TaskQueryAPI()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "ea981434-215b-4e7e-8b2d-be998dc5ce96", - "metadata": {}, - "outputs": [], - "source": [ - "_filter = {\"task_id\": act_id}\n", - "query_api.query(_filter)" - ] - }, - { - "cell_type": "markdown", - "id": "e108e673-78c2-49b7-91a2-8954c93976de", - "metadata": { - "tags": [] - }, - "source": [ - "## Stop consumer" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "feb37d75-7ad6-4b0a-8213-70af8718c3b4", - "metadata": {}, - "outputs": [], - "source": [ - "flowcept.stop()" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3 (ipykernel)", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.9.19" - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} diff --git a/pyproject.toml b/pyproject.toml index 5b3fb8a3..c78cc9e8 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,3 +1,104 @@ -[tool.black] -line-length = 78 -target-version = ['py37'] +[build-system] +requires = ["hatchling"] +build-backend = "hatchling.build" + +[project] +name = "flowcept" +dynamic = ["version"] +requires-python = ">=3.10" +dependencies = [ + "flask-restful", + "msgpack", + "omegaconf", + "pandas", + "psutil", + "py-cpuinfo", + "pymongo", + "redis", + "requests" +] +authors = [{name = "Oak Ridge National Laboratory"}] +description = "Capture and query workflow provenance data using data observability" +readme = "README.md" +license = "MIT" +classifiers = [ + "License :: OSI Approved :: MIT License", + "Operating System :: OS Independent", + "Programming Language :: Python :: 3" +] +keywords = [ + "ai", + "ml", + "machine-learning", + "provenance", + "lineage", + "responsible-ai", + "databases", + "big-data", + "provenance", + "tensorboard", + "data-integration", + "scientific-workflows", + "dask", + "reproducibility", + "workflows", + "parallel-processing", + "lineage", + "model-management", + "mlflow", + "responsible-ai", + "data-analytics" +] + +[project.urls] +GitHub = "https://github.com/ORNL/flowcept" + +[project.optional-dependencies] +analytics = ["seaborn", "plotly", "scipy"] +dask = ["tomli", "dask[distributed]"] +kafka = ["confluent-kafka"] +mlflow = ["mlflow-skinny", "SQLAlchemy", "alembic", "watchdog"] +nvidia = ["nvidia-ml-py"] +responsibleai = ["torch"] +tensorboard = ["tensorboard", "tensorflow", "tbparse"] +dev = [ + "jupyterlab", + "pika", + "pytest", + "nbmake", + "ruff", +] +# Torch and some other ml-specific libs, only used for dev purposes, require the following specific versions. +ml_dev = [ + "torch==2.2.2", + "torchvision==0.17.2", + "torchtext==0.17.2", + "datasets==2.17.0", + "numpy<2.0", + "sacremoses", + "nltk" +] +all = [ + "flowcept[analytics]", + "flowcept[dask]", + "flowcept[kafka]", + "flowcept[mlflow]", + "flowcept[nvidia]", + "flowcept[responsibleai]", + "flowcept[tensorboard]", + "flowcept[dev]", + "flowcept[ml_dev]" +] + +[tool.hatch.version] +path = "src/flowcept/version.py" + +[tool.ruff] +line-length = 100 + +[tool.ruff.lint] +extend-select = ["E501", "D"] +ignore = ["D200", "D212"] + +[tool.ruff.lint.pydocstyle] +convention = "numpy" diff --git a/requirements.txt b/requirements.txt deleted file mode 100644 index 2b54d0b8..00000000 --- a/requirements.txt +++ /dev/null @@ -1,11 +0,0 @@ -redis==4.4.2 -psutil==5.9.5 -py-cpuinfo==9.0.0 -pymongo==4.3.3 -pandas -omegaconf -flask -requests -flask_restful -Werkzeug -msgpack diff --git a/setup.py b/setup.py deleted file mode 100644 index 6fe78529..00000000 --- a/setup.py +++ /dev/null @@ -1,143 +0,0 @@ -import os -import re -import shutil -from setuptools import setup, find_packages - - -PROJECT_NAME = os.getenv("PROJECT_NAME", "flowcept") - -with open("flowcept/version.py") as f: - exec(f.read()) - version = locals()["__version__"] - - -def get_descriptions(): - with open("README.md") as f: - readme_content = f.read() - - pattern = r"# {}\s*?\n\n(.+?)\n\n".format(re.escape(PROJECT_NAME)) - match = re.search(pattern, readme_content, re.DOTALL | re.IGNORECASE) - - if match: - _short_description = match.group(1) - _short_description = _short_description.strip().replace("\n", "") - return _short_description, readme_content - else: - raise Exception("Could not find a match for the description!") - - -def get_requirements(file_path): - with open(file_path) as f: - __requirements = [] - for line in f.read().splitlines(): - if not line.startswith("#"): - __requirements.append(line) - return __requirements - - -def create_settings_file(): - directory_path = os.path.expanduser(f"~/.{PROJECT_NAME}") - os.makedirs(directory_path, exist_ok=True) - source_file = "resources/sample_settings.yaml" - destination_file = os.path.join(directory_path, "settings.yaml") - shutil.copyfile(source_file, destination_file) - print(f"Copied settings file to {destination_file}") - - -requirements = get_requirements("requirements.txt") -full_requirements = requirements.copy() - -# We don't install dev requirements in the user lib. -extras_requirement_keys = [ - "zambeze", - "mlflow", - "dask", - "nvidia", - "amd", - "analytics", - "responsible_ai", - "kafka", - "tensorboard", -] - -skip_full = {"amd", "nvidia"} - -extras_require = dict() -for req in extras_requirement_keys: - req_path = f"extra_requirements/{req}-requirements.txt" - _requirements = get_requirements(req_path) - extras_require[req] = _requirements - if req not in skip_full: - full_requirements.extend(_requirements) - - -extras_require["full"] = full_requirements - -fulldev = full_requirements.copy() -fulldev.extend(get_requirements(f"extra_requirements/dev-requirements.txt")) - -extras_require["fulldev"] = fulldev - -keywords = [ - "ai", - "ml", - "machine-learning", - "provenance", - "lineage", - "responsible-ai", - "databases", - "big-data", - "provenance", - "tensorboard", - "data-integration", - "scientific-workflows", - "dask", - "reproducibility", - "workflows", - "parallel-processing", - "lineage", - "model-management", - "mlflow", - "responsible-ai", - "data-analytics", -] - -short_description, long_description = get_descriptions() - -create_settings_file() - -setup( - name=PROJECT_NAME, - version=version, - license="MIT", - author="Oak Ridge National Laboratory", - # author_email="support@flowcept.org", - description=short_description, - long_description=long_description, - long_description_content_type="text/markdown", - url="https://github.com/ORNL/flowcept", - include_package_data=True, - install_requires=requirements, - extras_require=extras_require, - packages=find_packages(exclude=("tests", "notebooks", "deployment")), - keywords=keywords, - classifiers=[ - "License :: OSI Approved :: MIT License", - "Operating System :: OS Independent", - "Programming Language :: Python :: 3.8", - "Programming Language :: Python :: 3.9", - "Programming Language :: Python :: 3.10", - "Intended Audience :: Developers", - "Intended Audience :: Science/Research", - "Natural Language :: English", - # "Topic :: Documentation :: Sphinx", - "Topic :: System :: Distributed Computing", - "Topic :: Scientific/Engineering", - "Topic :: Scientific/Engineering :: Artificial Intelligence", - "Topic :: Scientific/Engineering :: Information Analysis", - "Topic :: System :: Logging", - "Topic :: System :: Monitoring", - "Topic :: Database", - ], - python_requires=">=3.9", -) diff --git a/flowcept/__init__.py b/src/flowcept/__init__.py similarity index 74% rename from flowcept/__init__.py rename to src/flowcept/__init__.py index 4438052a..a8dc26f7 100644 --- a/flowcept/__init__.py +++ b/src/flowcept/__init__.py @@ -1,3 +1,5 @@ +"""Flowcept package.""" + import flowcept from flowcept.configs import SETTINGS_PATH @@ -31,9 +33,7 @@ ) except Exception as _exp: flowcept.commons.logger.error( - flowcept.commons.get_adapter_exception_msg( - Vocabulary.Settings.ZAMBEZE_KIND - ) + flowcept.commons.get_adapter_exception_msg(Vocabulary.Settings.ZAMBEZE_KIND) ) flowcept.commons.logger.exception(_exp) @@ -44,9 +44,7 @@ ) except Exception as _exp: flowcept.commons.logger.error( - flowcept.commons.get_adapter_exception_msg( - Vocabulary.Settings.TENSORBOARD_KIND - ) + flowcept.commons.get_adapter_exception_msg(Vocabulary.Settings.TENSORBOARD_KIND) ) flowcept.commons.logger.exception(_exp) @@ -57,9 +55,7 @@ ) except Exception as _exp: flowcept.commons.logger.error( - flowcept.commons.get_adapter_exception_msg( - Vocabulary.Settings.MLFLOW_KIND - ) + flowcept.commons.get_adapter_exception_msg(Vocabulary.Settings.MLFLOW_KIND) ) flowcept.commons.logger.exception(_exp) @@ -70,7 +66,19 @@ FlowceptDaskWorkerAdapter, ) except Exception as _exp: - flowcept.commons.get_adapter_exception_msg( - Vocabulary.Settings.DASK_KIND - ) + flowcept.commons.get_adapter_exception_msg(Vocabulary.Settings.DASK_KIND) flowcept.commons.logger.exception(_exp) + +__all__ = [ + "FlowceptDaskWorkerAdapter", + "FlowceptDaskSchedulerAdapter", + "MLFlowInterceptor", + "TensorboardInterceptor", + "ZambezeInterceptor", + "WorkflowObject", + "flowcept_task", + "TaskQueryAPI", + "Flowcept", + "__version__", + "SETTINGS_PATH", +] diff --git a/src/flowcept/analytics/__init__.py b/src/flowcept/analytics/__init__.py new file mode 100644 index 00000000..f4be6deb --- /dev/null +++ b/src/flowcept/analytics/__init__.py @@ -0,0 +1,23 @@ +"""Analytics subpackage.""" + +from flowcept.analytics.analytics_utils import ( + clean_dataframe, + analyze_correlations_used_vs_generated, + analyze_correlations, + analyze_correlations_used_vs_telemetry_diff, + analyze_correlations_generated_vs_telemetry_diff, + analyze_correlations_between, + describe_col, + describe_cols, +) + +__all__ = [ + "clean_dataframe", + "analyze_correlations_used_vs_generated", + "analyze_correlations", + "analyze_correlations_generated_vs_telemetry_diff", + "analyze_correlations_used_vs_telemetry_diff", + "analyze_correlations_between", + "describe_col", + "describe_cols", +] diff --git a/flowcept/analytics/analytics_utils.py b/src/flowcept/analytics/analytics_utils.py similarity index 78% rename from flowcept/analytics/analytics_utils.py rename to src/flowcept/analytics/analytics_utils.py index e1adee4a..1a70a779 100644 --- a/flowcept/analytics/analytics_utils.py +++ b/src/flowcept/analytics/analytics_utils.py @@ -1,6 +1,7 @@ +"""Analytics utility module.""" + import logging import numbers - import numpy as np import pandas as pd @@ -10,12 +11,14 @@ def is_list(val): + """Check if list.""" if type(val) in {list, np.array, pd.Series}: return True return False def flatten_list_with_sum(val): + """Flatten list with sum.""" _sum = 0 if is_list(val): for el in val: @@ -39,7 +42,7 @@ def clean_dataframe( sum_lists=False, aggregate_telemetry=False, ) -> pd.DataFrame: - """ + """Clean the dataframe. :param sum_lists: :param keep_task_id: @@ -49,12 +52,11 @@ def clean_dataframe( :param logger: :param keep_telemetry_percent_columns: :param aggregate_telemetry: We use some very simplistic forms of aggregations just - to reduce the complexity of the dataframe. Use this feature very carefully as the aggregation may be misleading. + to reduce the complexity of the dataframe. Use this feature very carefully as the + aggregation may be misleading. :return: """ - has_telemetry_diff_columns = any( - col.startswith("telemetry_diff") for col in df.columns - ) + has_telemetry_diff_columns = any(col.startswith("telemetry_diff") for col in df.columns) logmsg = f"Number of columns originally: {len(df.columns)}" if logger: @@ -76,9 +78,7 @@ def clean_dataframe( if sum_lists: # Identify the original columns that were lists or lists of lists list_cols = [ - col - for col in dfa.columns - if any(isinstance(val, (list, list)) for val in dfa[col]) + col for col in dfa.columns if any(isinstance(val, (list, list)) for val in dfa[col]) ] cols_to_drop = [] @@ -114,40 +114,26 @@ def clean_dataframe( if aggregate_telemetry and has_telemetry_diff_columns: cols_to_drop = [] - network_cols = [ - col - for col in dfa.columns - if col.startswith("telemetry_diff.network") - ] - dfa["telemetry_diff.network.activity"] = dfa[network_cols].mean( - axis=1 - ) + network_cols = [col for col in dfa.columns if col.startswith("telemetry_diff.network")] + dfa["telemetry_diff.network.activity"] = dfa[network_cols].mean(axis=1) io_sum_cols = [col for col in dfa.columns if "disk.io_sum" in col] dfa["telemetry_diff.disk.activity"] = dfa[io_sum_cols].mean(axis=1) - processes_nums_cols = [ - col for col in dfa.columns if "telemetry_diff.process.num_" in col - ] - dfa["telemetry_diff.process.activity"] = dfa[processes_nums_cols].sum( - axis=1 - ) + processes_nums_cols = [col for col in dfa.columns if "telemetry_diff.process.num_" in col] + dfa["telemetry_diff.process.activity"] = dfa[processes_nums_cols].sum(axis=1) cols_to_drop.extend(processes_nums_cols) cols_to_drop.extend(network_cols) cols_to_drop.extend(io_sum_cols) - cols_to_drop.extend( - [col for col in dfa.columns if "disk.io_per_disk" in col] - ) + cols_to_drop.extend([col for col in dfa.columns if "disk.io_per_disk" in col]) dfa.drop(columns=cols_to_drop, inplace=True) # Removing any leftover cols cols_to_drop = [ - col - for col in dfa.columns - if "telemetry_at_start" in col or "telemetry_at_end" in col + col for col in dfa.columns if "telemetry_at_start" in col or "telemetry_at_end" in col ] if len(cols_to_drop): dfa.drop(columns=cols_to_drop, inplace=True) @@ -164,11 +150,10 @@ def clean_dataframe( def analyze_correlations(df, method="kendall", threshold=0): + """Analyze correlations.""" # Create a mask to select the upper triangle of the correlation matrix correlation_matrix = df.corr(method=method, numeric_only=True) - mask = correlation_matrix.where( - np.triu(np.ones(correlation_matrix.shape), k=1).astype(bool) - ) + mask = correlation_matrix.where(np.triu(np.ones(correlation_matrix.shape), k=1).astype(bool)) corrs = [] # Iterate through the selected upper triangle of the correlation matrix @@ -177,9 +162,7 @@ def analyze_correlations(df, method="kendall", threshold=0): pair = (mask.columns[i], mask.columns[j]) corr = mask.iloc[i, j] # Get correlation value if abs(corr) >= threshold and pair[0] != pair[1]: - corrs.append( - (mask.columns[i], mask.columns[j], round(corr, 2)) - ) + corrs.append((mask.columns[i], mask.columns[j], round(corr, 2))) return pd.DataFrame( corrs, @@ -194,6 +177,7 @@ def analyze_correlations_between( method="kendall", threshold=0, ): + """Analyze correlations.""" corr_df = analyze_correlations(df, method, threshold) filtered_df = corr_df[ ( @@ -208,9 +192,8 @@ def analyze_correlations_between( return filtered_df -def analyze_correlations_used_vs_generated( - df: pd.DataFrame, method="kendall", threshold=0 -): +def analyze_correlations_used_vs_generated(df: pd.DataFrame, method="kendall", threshold=0): + """Analyze correlations.""" return analyze_correlations_between( df, col_pattern1="used[.]", @@ -220,9 +203,8 @@ def analyze_correlations_used_vs_generated( ) -def analyze_correlations_used_vs_telemetry_diff( - df: pd.DataFrame, method="kendall", threshold=0 -): +def analyze_correlations_used_vs_telemetry_diff(df: pd.DataFrame, method="kendall", threshold=0): + """Analyze correlations.""" return analyze_correlations_between( df, col_pattern1="^used[.]*", @@ -235,6 +217,7 @@ def analyze_correlations_used_vs_telemetry_diff( def analyze_correlations_generated_vs_telemetry_diff( df: pd.DataFrame, method="kendall", threshold=0 ): + """Analyze correlations.""" return analyze_correlations_between( df, col_pattern1="^generated[.]*", @@ -245,21 +228,19 @@ def analyze_correlations_generated_vs_telemetry_diff( def format_number(num): + """Format a number.""" suffixes = ["", "K", "M", "B", "T"] idx = 0 while abs(num) >= 1000 and idx < len(suffixes) - 1: idx += 1 num /= 1000.0 formatted = f"{num:.2f}" if num % 1 != 0 else f"{int(num)}" - formatted = ( - formatted.rstrip("0").rstrip(".") - if "." in formatted - else formatted.rstrip(".") - ) + formatted = formatted.rstrip("0").rstrip(".") if "." in formatted else formatted.rstrip(".") return f"{formatted}{suffixes[idx]}" def describe_col(df, col, label=None): + """Describe a column.""" label = col if label is None else label return { "label": label, @@ -274,15 +255,14 @@ def describe_col(df, col, label=None): def describe_cols(df, cols, col_labels): + """Describe columns.""" return pd.DataFrame( - [ - describe_col(df, col, col_label) - for col, col_label in zip(cols, col_labels) - ] + [describe_col(df, col, col_label) for col, col_label in zip(cols, col_labels)] ) def identify_pareto(df): + """Identify pareto.""" datav = df.values pareto = [] for i, point in enumerate(datav): @@ -292,14 +272,10 @@ def identify_pareto(df): def find_outliers_zscore(row, threshold=3): - numeric_columns = [ - col - for col, val in row.items() - if pd.api.types.is_numeric_dtype(type(val)) - ] + """Find outliers.""" + numeric_columns = [col for col, val in row.items() if pd.api.types.is_numeric_dtype(type(val))] z_scores = np.abs( - (row[numeric_columns] - row[numeric_columns].mean()) - / row[numeric_columns].std() + (row[numeric_columns] - row[numeric_columns].mean()) / row[numeric_columns].std() ) outliers_columns = list(z_scores[z_scores > threshold].index) return outliers_columns diff --git a/flowcept/analytics/data_augmentation.py b/src/flowcept/analytics/data_augmentation.py similarity index 82% rename from flowcept/analytics/data_augmentation.py rename to src/flowcept/analytics/data_augmentation.py index 88282d91..d69f5860 100644 --- a/flowcept/analytics/data_augmentation.py +++ b/src/flowcept/analytics/data_augmentation.py @@ -1,8 +1,9 @@ +"""Data augmentation module.""" + from typing import List import h2o import numpy as np import pandas as pd - from h2o.automl import H2OAutoML from typing_extensions import deprecated @@ -18,10 +19,9 @@ def train_model( train_test_split_size=0.8, seed=1234, ): + """Train model.""" h2o_df = h2o.H2OFrame(df) - train, test = h2o_df.split_frame( - ratios=[train_test_split_size], seed=seed - ) + train, test = h2o_df.split_frame(ratios=[train_test_split_size], seed=seed) aml = H2OAutoML(max_models=max_models, seed=seed) aml.train(x=x_cols, y=y_col, training_frame=train) @@ -30,6 +30,7 @@ def train_model( @deprecated def augment_df_linearly(df, N, cols_to_augment, seed=1234): + """Linearly augment dataframe.""" np.random.seed(seed) new_df = df.copy() new_df["original"] = 1 @@ -45,14 +46,13 @@ def augment_df_linearly(df, N, cols_to_augment, seed=1234): augmented_data["original"] = [0] * N - appended_df = pd.concat( - [new_df, pd.DataFrame(augmented_data)], ignore_index=True - ) + appended_df = pd.concat([new_df, pd.DataFrame(augmented_data)], ignore_index=True) return appended_df @deprecated def augment_data(df, N, augmentation_model: H2OAutoML, x_cols, y_col): + """Augment data.""" new_df = augment_df_linearly(df, N, x_cols) h2odf = h2o.H2OFrame(new_df.loc[new_df["original"] == 0][x_cols]) h2opred = augmentation_model.predict(h2odf) diff --git a/flowcept/analytics/plot.py b/src/flowcept/analytics/plot.py similarity index 94% rename from flowcept/analytics/plot.py rename to src/flowcept/analytics/plot.py index 216f4a30..22c318e7 100644 --- a/flowcept/analytics/plot.py +++ b/src/flowcept/analytics/plot.py @@ -1,3 +1,5 @@ +"""Plot module.""" + import pandas as pd import matplotlib.pyplot as plt import seaborn as sns @@ -7,10 +9,9 @@ from flowcept.analytics.analytics_utils import format_number, identify_pareto -def heatmap( - df: pd.DataFrame, method="kendall", figsize=(13, 10), heatmap_args={} -): - """ +def heatmap(df: pd.DataFrame, method="kendall", figsize=(13, 10), heatmap_args={}): + """Heat map plot. + :param figsize: :param heatmap_args: Any other argument for the heatmap. :param df: dataframe to plot the heatmap @@ -46,6 +47,7 @@ def scatter2d_with_colors( horizon_quantile=0.5, plot_pareto=True, ): + """Scatter 2D plot with colors.""" x_label = x_col if x_label is None else x_label y_label = y_col if y_label is None else y_label color_label = color_col if color_label is None else color_label diff --git a/flowcept/commons/__init__.py b/src/flowcept/commons/__init__.py similarity index 85% rename from flowcept/commons/__init__.py rename to src/flowcept/commons/__init__.py index d329e65d..d97ea2dc 100644 --- a/flowcept/commons/__init__.py +++ b/src/flowcept/commons/__init__.py @@ -1,10 +1,12 @@ -from flowcept.commons.utils import get_adapter_exception_msg +"""Commons subpackage.""" + from flowcept.commons.flowcept_logger import FlowceptLogger logger = FlowceptLogger() def singleton(cls): + """Create a singleton.""" instances = {} class SingletonWrapper(cls): diff --git a/src/flowcept/commons/daos/__init__.py b/src/flowcept/commons/daos/__init__.py new file mode 100644 index 00000000..0b152133 --- /dev/null +++ b/src/flowcept/commons/daos/__init__.py @@ -0,0 +1 @@ +"""DAOS subpackage.""" diff --git a/flowcept/commons/daos/autoflush_buffer.py b/src/flowcept/commons/daos/autoflush_buffer.py similarity index 94% rename from flowcept/commons/daos/autoflush_buffer.py rename to src/flowcept/commons/daos/autoflush_buffer.py index bac16ba7..19926009 100644 --- a/flowcept/commons/daos/autoflush_buffer.py +++ b/src/flowcept/commons/daos/autoflush_buffer.py @@ -1,10 +1,13 @@ -from typing import Callable +"""Autoflush module.""" +from typing import Callable from threading import Thread, Event from flowcept.commons.flowcept_logger import FlowceptLogger class AutoflushBuffer: + """Autoflush class.""" + def __init__( self, max_size, @@ -32,6 +35,7 @@ def __init__( self._flush_function_kwargs = flush_function_kwargs def append(self, item): + """Append it.""" # if self.stop_event.is_set(): # return buffer = self._buffers[self._current_buffer_index] @@ -40,6 +44,7 @@ def append(self, item): self._swap_event.set() def time_based_flush(self): + """Time flush.""" while not self._stop_event.is_set(): self._swap_event.wait(self._flush_interval) if not self._stop_event.is_set(): @@ -68,6 +73,7 @@ def _flush_buffers(self): break def stop(self): + """Stop it.""" self._stop_event.set() self._swap_event.set() self._flush_thread.join() diff --git a/flowcept/commons/daos/document_db_dao.py b/src/flowcept/commons/daos/document_db_dao.py similarity index 79% rename from flowcept/commons/daos/document_db_dao.py rename to src/flowcept/commons/daos/document_db_dao.py index 910bceed..2e1bd603 100644 --- a/flowcept/commons/daos/document_db_dao.py +++ b/src/flowcept/commons/daos/document_db_dao.py @@ -1,3 +1,5 @@ +"""Document module.""" + from typing import List, Dict, Tuple, Any import io import json @@ -35,6 +37,8 @@ @singleton class DocumentDBDao(object): + """Document class.""" + def __init__(self, create_index=MONGO_CREATE_INDEX): self.logger = FlowceptLogger() @@ -53,46 +57,27 @@ def __init__(self, create_index=MONGO_CREATE_INDEX): def _create_indices(self): # Creating task collection indices: - existing_indices = [ - list(x["key"].keys())[0] - for x in self._tasks_collection.list_indexes() - ] + existing_indices = [list(x["key"].keys())[0] for x in self._tasks_collection.list_indexes()] if TaskObject.task_id_field() not in existing_indices: - self._tasks_collection.create_index( - TaskObject.task_id_field(), unique=True - ) + self._tasks_collection.create_index(TaskObject.task_id_field(), unique=True) if TaskObject.workflow_id_field() not in existing_indices: - self._tasks_collection.create_index( - TaskObject.workflow_id_field() - ) + self._tasks_collection.create_index(TaskObject.workflow_id_field()) # Creating workflow collection indices: - existing_indices = [ - list(x["key"].keys())[0] - for x in self._wfs_collection.list_indexes() - ] + existing_indices = [list(x["key"].keys())[0] for x in self._wfs_collection.list_indexes()] if WorkflowObject.workflow_id_field() not in existing_indices: - self._wfs_collection.create_index( - WorkflowObject.workflow_id_field(), unique=True - ) + self._wfs_collection.create_index(WorkflowObject.workflow_id_field(), unique=True) # Creating objects collection indices: - existing_indices = [ - list(x["key"].keys())[0] - for x in self._obj_collection.list_indexes() - ] + existing_indices = [list(x["key"].keys())[0] for x in self._obj_collection.list_indexes()] if "object_id" not in existing_indices: self._obj_collection.create_index("object_id", unique=True) if WorkflowObject.workflow_id_field() not in existing_indices: - self._obj_collection.create_index( - WorkflowObject.workflow_id_field(), unique=False - ) + self._obj_collection.create_index(WorkflowObject.workflow_id_field(), unique=False) if TaskObject.task_id_field() not in existing_indices: - self._obj_collection.create_index( - TaskObject.task_id_field(), unique=False - ) + self._obj_collection.create_index(TaskObject.task_id_field(), unique=False) def task_query( self, @@ -103,36 +88,46 @@ def task_query( aggregation: List[Tuple] = None, remove_json_unserializables=True, ) -> List[Dict]: - """ + """Generate a mongo query pipeline. + Generates a MongoDB query pipeline based on the provided arguments. - Parameters: - filter (dict): The filter criteria for the $match stage. - projection (list, optional): List of fields to include in the $project stage. Defaults to None. - limit (int, optional): The maximum number of documents to return. Defaults to 0 (no limit). - sort (list of tuples, optional): List of (field, order) tuples specifying the sorting order. Defaults to None. - aggregation (list of tuples, optional): List of (aggregation_operator, field_name) tuples - specifying additional aggregation operations. Defaults to None. - remove_json_unserializables: removes fields that are not JSON serializable. Defaults to True - - Returns: - list: A list with the result set. - - Example: - # Create a pipeline with a filter, projection, sorting, and aggregation - rs = find( - filter={"campaign_id": "mycampaign1"}, - projection=["workflow_id", "started_at", "ended_at"], - limit=10, - sort=[("workflow_id", ASC), ("end_time", DESC)], - aggregation=[("avg", "ended_at"), ("min", "started_at")] - ) - """ + Parameters + ---------- + filter (dict): + The filter criteria for the $match stage. + projection (list, optional): + List of fields to include in the $project stage. Defaults to None. + limit (int, optional): + The maximum number of documents to return. Defaults to 0 (no limit). + sort (list of tuples, optional): + List of (field, order) tuples specifying the sorting order. Defaults to None. + aggregation (list of tuples, optional): + List of (aggregation_operator, field_name) tuples specifying + additional aggregation operations. Defaults to None. + remove_json_unserializables: + Removes fields that are not JSON serializable. Defaults to True + + Returns + ------- + list: + A list with the result set. + + Example + ------- + Create a pipeline with a filter, projection, sorting, and aggregation. + + rs = find( + filter={"campaign_id": "mycampaign1"}, + projection=["workflow_id", "started_at", "ended_at"], + limit=10, + sort=[("workflow_id", ASC), ("end_time", DESC)], + aggregation=[("avg", "ended_at"), ("min", "started_at")] + ) + """ if aggregation is not None: try: - rs = self._pipeline( - filter, projection, limit, sort, aggregation - ) + rs = self._pipeline(filter, projection, limit, sort, aggregation) except Exception as e: self.logger.exception(e) return None @@ -231,6 +226,7 @@ def _pipeline( return None def insert_one(self, doc: Dict) -> ObjectId: + """Insert only one.""" try: r = self._tasks_collection.insert_one(doc) return r.inserted_id @@ -239,6 +235,7 @@ def insert_one(self, doc: Dict) -> ObjectId: return None def insert_many(self, doc_list: List[Dict]) -> List[ObjectId]: + """Insert many.""" try: r = self._tasks_collection.insert_many(doc_list) return r.inserted_ids @@ -246,18 +243,15 @@ def insert_many(self, doc_list: List[Dict]) -> List[ObjectId]: self.logger.exception(e) return None - def insert_and_update_many( - self, indexing_key, doc_list: List[Dict] - ) -> bool: + def insert_and_update_many(self, indexing_key, doc_list: List[Dict]) -> bool: + """Insert and update.""" try: if len(doc_list) == 0: return False t0 = 0 if PERF_LOG: t0 = time() - indexed_buffer = curate_dict_task_messages( - doc_list, indexing_key, t0 - ) + indexed_buffer = curate_dict_task_messages(doc_list, indexing_key, t0) t1 = perf_log("doc_curate_dict_task_messages", t0) if len(indexed_buffer) == 0: return False @@ -279,7 +273,8 @@ def insert_and_update_many( return False def delete_ids(self, ids_list: List[ObjectId]) -> bool: - if type(ids_list) != list: + """Delete the ids.""" + if type(ids_list) is not list: ids_list = [ids_list] try: self._tasks_collection.delete_many({"_id": {"$in": ids_list}}) @@ -289,7 +284,8 @@ def delete_ids(self, ids_list: List[ObjectId]) -> bool: return False def delete_keys(self, key_name, keys_list: List[Any]) -> bool: - if type(keys_list) != list: + """Delete the keys.""" + if type(keys_list) is not list: keys_list = [keys_list] try: self._tasks_collection.delete_many({key_name: {"$in": keys_list}}) @@ -299,6 +295,7 @@ def delete_keys(self, key_name, keys_list: List[Any]) -> bool: return False def delete_with_filter(self, filter) -> bool: + """Delete with filter.""" try: self._tasks_collection.delete_many(filter) return True @@ -307,6 +304,7 @@ def delete_with_filter(self, filter) -> bool: return False def count(self) -> int: + """Count it.""" try: return self._tasks_collection.count_documents({}) except Exception as e: @@ -314,6 +312,7 @@ def count(self) -> int: return -1 def workflow_insert_or_update(self, workflow_obj: WorkflowObject) -> bool: + """Insert or update workflow.""" _dict = workflow_obj.to_dict().copy() workflow_id = _dict.pop(WorkflowObject.workflow_id_field(), None) if workflow_id is None: @@ -328,9 +327,7 @@ def workflow_insert_or_update(self, workflow_obj: WorkflowObject) -> bool: # "Interceptor_ID must be a string, as Mongo can only record string keys." # ) # return False - update_query.update( - {"$push": {"interceptor_ids": {"$each": interceptor_ids}}} - ) + update_query.update({"$push": {"interceptor_ids": {"$each": interceptor_ids}}}) machine_info = _dict.pop("machine_info", None) if machine_info is not None: @@ -348,12 +345,8 @@ def workflow_insert_or_update(self, workflow_obj: WorkflowObject) -> bool: ) try: - result = self._wfs_collection.update_one( - _filter, update_query, upsert=True - ) - return (result.upserted_id is not None) or result.raw_result[ - "updatedExisting" - ] + result = self._wfs_collection.update_one(_filter, update_query, upsert=True) + return (result.upserted_id is not None) or result.raw_result["updatedExisting"] except Exception as e: self.logger.exception(e) return False @@ -366,6 +359,7 @@ def workflow_query( sort: List[Tuple] = None, remove_json_unserializables=True, ) -> List[Dict]: + """Get the workflow query.""" # TODO refactor: reuse code for task_query instead of copy & paste _projection = {} if projection is not None: @@ -395,14 +389,14 @@ def dump_to_file( export_format="json", should_zip=False, ): + """Dump it to file.""" if collection_name == MONGO_TASK_COLLECTION: _collection = self._tasks_collection elif collection_name == MONGO_WORKFLOWS_COLLECTION: _collection = self._wfs_collection else: - raise Exception( - f"Sorry, only {MONGO_TASK_COLLECTION} and {MONGO_WORKFLOWS_COLLECTION} collections are currently available for dump." - ) + msg = f"Only {MONGO_TASK_COLLECTION} and {MONGO_WORKFLOWS_COLLECTION} " + raise Exception(msg + "collections are currently available for dump.") if export_format != "json": raise Exception("Sorry, only JSON is currently supported.") @@ -426,9 +420,7 @@ def dump_to_file( try: if should_zip: in_memory_stream = io.BytesIO() - with zipfile.ZipFile( - in_memory_stream, "w", zipfile.ZIP_DEFLATED - ) as zip_file: + with zipfile.ZipFile(in_memory_stream, "w", zipfile.ZIP_DEFLATED) as zip_file: zip_file.writestr("dump_file.json", json_data) compressed_data = in_memory_stream.getvalue() with open(output_file, "wb") as f: @@ -443,6 +435,7 @@ def dump_to_file( return def liveness_test(self) -> bool: + """Test for livelyness.""" try: self._db.list_collection_names() return True @@ -463,6 +456,7 @@ def save_object( custom_metadata=None, pickle_=False, ): + """Save an object.""" if object_id is None: object_id = str(uuid4()) obj_doc = {"object_id": object_id} @@ -485,5 +479,6 @@ def save_object( return object_id def get_objects(self, filter): + """Get some objects.""" documents = self._obj_collection.find(filter) return list(documents) diff --git a/flowcept/commons/daos/keyvalue_dao.py b/src/flowcept/commons/daos/keyvalue_dao.py similarity index 85% rename from flowcept/commons/daos/keyvalue_dao.py rename to src/flowcept/commons/daos/keyvalue_dao.py index 6dc64051..aa7d4b1d 100644 --- a/flowcept/commons/daos/keyvalue_dao.py +++ b/src/flowcept/commons/daos/keyvalue_dao.py @@ -1,3 +1,5 @@ +"""Key value module.""" + from redis import Redis from flowcept.commons.flowcept_logger import FlowceptLogger @@ -11,6 +13,8 @@ @singleton class KeyValueDAO: + """Key value class.""" + def __init__(self, connection=None): self.logger = FlowceptLogger() if connection is None: @@ -24,28 +28,35 @@ def __init__(self, connection=None): self._redis = connection def delete_set(self, set_name: str): + """Delete it.""" self._redis.delete(set_name) def add_key_into_set(self, set_name: str, key): + """Add a key.""" self._redis.sadd(set_name, key) def remove_key_from_set(self, set_name: str, key): + """Remove a key.""" self.logger.debug(f"Removing key {key} from set: {set_name}") self._redis.srem(set_name, key) self.logger.debug(f"Removed key {key} from set: {set_name}") def set_has_key(self, set_name: str, key) -> bool: + """Set the key.""" return self._redis.sismember(set_name, key) def set_count(self, set_name: str): + """Set the count.""" return self._redis.scard(set_name) def set_is_empty(self, set_name: str) -> bool: + """Set as empty.""" _count = self.set_count(set_name) self.logger.info(f"Set {set_name} has {_count}") return _count == 0 def delete_all_matching_sets(self, key_pattern): + """Delete matching sets.""" matching_sets = self._redis.keys(key_pattern) for set_name in matching_sets: self.delete_set(set_name) diff --git a/src/flowcept/commons/daos/mq_dao/__init__.py b/src/flowcept/commons/daos/mq_dao/__init__.py new file mode 100644 index 00000000..00b1359a --- /dev/null +++ b/src/flowcept/commons/daos/mq_dao/__init__.py @@ -0,0 +1 @@ +"""MQ subpackage.""" diff --git a/flowcept/commons/daos/mq_dao/mq_dao_base.py b/src/flowcept/commons/daos/mq_dao/mq_dao_base.py similarity index 72% rename from flowcept/commons/daos/mq_dao/mq_dao_base.py rename to src/flowcept/commons/daos/mq_dao/mq_dao_base.py index aa5d8902..ac91bcb6 100644 --- a/flowcept/commons/daos/mq_dao/mq_dao_base.py +++ b/src/flowcept/commons/daos/mq_dao/mq_dao_base.py @@ -1,3 +1,5 @@ +"""MQ base module.""" + from abc import ABC, abstractmethod from typing import Union, List, Callable @@ -28,11 +30,14 @@ class MQDao(ABC): + """MQ base class.""" + ENCODER = GenericJSONEncoder if JSON_SERIALIZER == "complex" else None # TODO we don't have a unit test to cover complex dict! @staticmethod def build(*args, **kwargs) -> "MQDao": + """Build it.""" if MQ_TYPE == "redis": from flowcept.commons.daos.mq_dao.mq_dao_redis import MQDaoRedis @@ -46,11 +51,14 @@ def build(*args, **kwargs) -> "MQDao": @staticmethod def _get_set_name(exec_bundle_id=None): - """ - :param exec_bundle_id: A way to group one or many interceptors, and treat each group as a bundle to control when their time_based threads started and ended. + """Get the set name. + + :param exec_bundle_id: A way to group one or many interceptors, and + treat each group as a bundle to control when their time_based + threads started and ended. :return: """ - set_id = f"started_mq_thread_execution" + set_id = "started_mq_thread_execution" if exec_bundle_id is not None: set_id += "_" + str(exec_bundle_id) return set_id @@ -76,12 +84,11 @@ def __init__(self, kv_host=None, kv_port=None, adapter_settings=None): self.buffer: Union[AutoflushBuffer, List] = None @abstractmethod - def _bulk_publish( - self, buffer, channel=MQ_CHANNEL, serializer=msgpack.dumps - ): + def _bulk_publish(self, buffer, channel=MQ_CHANNEL, serializer=msgpack.dumps): raise NotImplementedError() def bulk_publish(self, buffer): + """Publish it.""" self.logger.info(f"Going to flush {len(buffer)} to MQ...") if MQ_CHUNK_SIZE > 1: for chunk in chunked(buffer, MQ_CHUNK_SIZE): @@ -89,47 +96,41 @@ def bulk_publish(self, buffer): else: self._bulk_publish(buffer) - def register_time_based_thread_init( - self, interceptor_instance_id: str, exec_bundle_id=None - ): + def register_time_based_thread_init(self, interceptor_instance_id: str, exec_bundle_id=None): + """Register the time.""" set_name = MQDao._get_set_name(exec_bundle_id) self.logger.info( - f"Registering the beginning of the time_based MQ flush thread {set_name}.{interceptor_instance_id}" + f"Register start of time_based MQ flush thread {set_name}.{interceptor_instance_id}" ) self._keyvalue_dao.add_key_into_set(set_name, interceptor_instance_id) - def register_time_based_thread_end( - self, interceptor_instance_id: str, exec_bundle_id=None - ): + def register_time_based_thread_end(self, interceptor_instance_id: str, exec_bundle_id=None): + """Register time.""" set_name = MQDao._get_set_name(exec_bundle_id) self.logger.info( - f"Registering the end of the time_based MQ flush thread {set_name}.{interceptor_instance_id}" - ) - self._keyvalue_dao.remove_key_from_set( - set_name, interceptor_instance_id + f"Registering end of time_based MQ flush thread {set_name}.{interceptor_instance_id}" ) + self._keyvalue_dao.remove_key_from_set(set_name, interceptor_instance_id) self.logger.info( - f"Done registering the end of the time_based MQ flush thread {set_name}.{interceptor_instance_id}" + f"Done registering time_based MQ flush thread {set_name}.{interceptor_instance_id}" ) def all_time_based_threads_ended(self, exec_bundle_id=None): + """Get all time.""" set_name = MQDao._get_set_name(exec_bundle_id) return self._keyvalue_dao.set_is_empty(set_name) def init_buffer(self, interceptor_instance_id: str, exec_bundle_id=None): + """Create the buffer.""" if flowcept.configs.DB_FLUSH_MODE == "online": - self.logger.info( - f"Starting MQ time-based flushing! bundle: {exec_bundle_id}; interceptor id: {interceptor_instance_id}" - ) + msg = "Starting MQ time-based flushing! bundle: " + self.logger.info(msg + f"{exec_bundle_id}; interceptor id: {interceptor_instance_id}") self.buffer = AutoflushBuffer( max_size=MQ_BUFFER_SIZE, flush_interval=MQ_INSERTION_BUFFER_TIME, flush_function=self.bulk_publish, ) - # - self.register_time_based_thread_init( - interceptor_instance_id, exec_bundle_id - ) + self.register_time_based_thread_init(interceptor_instance_id, exec_bundle_id) self._time_based_flushing_started = True else: self.buffer = list() @@ -146,20 +147,15 @@ def _close_buffer(self): self.buffer = list() def stop(self, interceptor_instance_id: str, bundle_exec_id: int = None): - self.logger.info( - f"MQ publisher received stop signal! bundle: {bundle_exec_id}; interceptor id: {interceptor_instance_id}" - ) + """Stop it.""" + msg0 = "MQ publisher received stop signal! bundle: " + self.logger.info(msg0 + f"{bundle_exec_id}; interceptor id: {interceptor_instance_id}") self._close_buffer() - self.logger.info( - f"Flushed MQ for the last time! Now going to send stop msg. bundle: {bundle_exec_id}; interceptor id: {interceptor_instance_id}" - ) - self._send_mq_dao_time_thread_stop( - interceptor_instance_id, bundle_exec_id - ) + msg = "Flushed MQ for last time! Send stop msg. bundle: " + self.logger.info(msg + f"{bundle_exec_id}; interceptor id: {interceptor_instance_id}") + self._send_mq_dao_time_thread_stop(interceptor_instance_id, bundle_exec_id) - def _send_mq_dao_time_thread_stop( - self, interceptor_instance_id, exec_bundle_id=None - ): + def _send_mq_dao_time_thread_stop(self, interceptor_instance_id, exec_bundle_id=None): # These control_messages are handled by the document inserter # TODO: these should be constants msg = { @@ -172,22 +168,24 @@ def _send_mq_dao_time_thread_stop( self.send_message(msg) def send_document_inserter_stop(self): + """Send the document.""" # These control_messages are handled by the document inserter msg = {"type": "flowcept_control", "info": "stop_document_inserter"} self.send_message(msg) @abstractmethod - def send_message( - self, message: dict, channel=MQ_CHANNEL, serializer=msgpack.dumps - ): + def send_message(self, message: dict, channel=MQ_CHANNEL, serializer=msgpack.dumps): + """Send a message.""" raise NotImplementedError() @abstractmethod def message_listener(self, message_handler: Callable): + """Get message listener.""" raise NotImplementedError() @abstractmethod def liveness_test(self): + """Get livelyness of it.""" try: response = self._kv_conn.ping() if response: diff --git a/flowcept/commons/daos/mq_dao/mq_dao_kafka.py b/src/flowcept/commons/daos/mq_dao/mq_dao_kafka.py similarity index 82% rename from flowcept/commons/daos/mq_dao/mq_dao_kafka.py rename to src/flowcept/commons/daos/mq_dao/mq_dao_kafka.py index 5deac449..31a43ac7 100644 --- a/flowcept/commons/daos/mq_dao/mq_dao_kafka.py +++ b/src/flowcept/commons/daos/mq_dao/mq_dao_kafka.py @@ -1,3 +1,5 @@ +"""MQ kafka module.""" + from typing import Callable import msgpack @@ -17,6 +19,8 @@ class MQDaoKafka(MQDao): + """MQ kafka class.""" + def __init__(self, kv_host=None, kv_port=None, adapter_settings=None): super().__init__(kv_host, kv_port, adapter_settings) @@ -26,6 +30,7 @@ def __init__(self, kv_host=None, kv_port=None, adapter_settings=None): self._producer = Producer(self._kafka_conf) def message_listener(self, message_handler: Callable): + """Get message listener.""" self._kafka_conf.update( { "group.id": "my_group", @@ -56,26 +61,18 @@ def message_listener(self, message_handler: Callable): finally: consumer.close() - def send_message( - self, message: dict, channel=MQ_CHANNEL, serializer=msgpack.dumps - ): - self._producer.produce( - channel, key=channel, value=serializer(message) - ) + def send_message(self, message: dict, channel=MQ_CHANNEL, serializer=msgpack.dumps): + """Send the message.""" + self._producer.produce(channel, key=channel, value=serializer(message)) self._producer.flush() - def _bulk_publish( - self, buffer, channel=MQ_CHANNEL, serializer=msgpack.dumps - ): + def _bulk_publish(self, buffer, channel=MQ_CHANNEL, serializer=msgpack.dumps): for message in buffer: try: self.logger.debug( - f"Going to send Message:" - f"\n\t[BEGIN_MSG]{message}\n[END_MSG]\t" - ) - self._producer.produce( - channel, key=channel, value=serializer(message) + f"Going to send Message:" f"\n\t[BEGIN_MSG]{message}\n[END_MSG]\t" ) + self._producer.produce(channel, key=channel, value=serializer(message)) except Exception as e: self.logger.exception(e) self.logger.error( @@ -93,6 +90,7 @@ def _bulk_publish( perf_log("mq_pipe_flush", t0) def liveness_test(self): + """Get the livelyness of it.""" try: super().liveness_test() admin_client = AdminClient(self._kafka_conf) diff --git a/flowcept/commons/daos/mq_dao/mq_dao_redis.py b/src/flowcept/commons/daos/mq_dao/mq_dao_redis.py similarity index 76% rename from flowcept/commons/daos/mq_dao/mq_dao_redis.py rename to src/flowcept/commons/daos/mq_dao/mq_dao_redis.py index 790f3ab3..dc3a69db 100644 --- a/flowcept/commons/daos/mq_dao/mq_dao_redis.py +++ b/src/flowcept/commons/daos/mq_dao/mq_dao_redis.py @@ -1,3 +1,5 @@ +"""MQ redis module.""" + from typing import Callable import msgpack @@ -6,32 +8,22 @@ from flowcept.commons.daos.mq_dao.mq_dao_base import MQDao from flowcept.commons.utils import perf_log from flowcept.configs import ( - MQ_HOST, - MQ_PORT, MQ_CHANNEL, - MQ_PASSWORD, - JSON_SERIALIZER, - MQ_BUFFER_SIZE, - MQ_INSERTION_BUFFER_TIME, - MQ_CHUNK_SIZE, PERF_LOG, - MQ_URI, - ENRICH_MESSAGES, - DB_FLUSH_MODE, - MQ_TYPE, ) class MQDaoRedis(MQDao): + """MQ redis class.""" + MESSAGE_TYPES_IGNORE = {"psubscribe"} def __init__(self, kv_host=None, kv_port=None, adapter_settings=None): super().__init__(kv_host, kv_port, adapter_settings) - self._producer = ( - self._kv_conn - ) # if MQ is redis, we use the same KV for the MQ + self._producer = self._kv_conn # if MQ is redis, we use the same KV for the MQ def message_listener(self, message_handler: Callable): + """Get message listener.""" pubsub = self._kv_conn.pubsub() pubsub.psubscribe(MQ_CHANNEL) for message in pubsub.listen(): @@ -44,20 +36,16 @@ def message_listener(self, message_handler: Callable): if not message_handler(msg_obj): break - def send_message( - self, message: dict, channel=MQ_CHANNEL, serializer=msgpack.dumps - ): + def send_message(self, message: dict, channel=MQ_CHANNEL, serializer=msgpack.dumps): + """Send the message.""" self._producer.publish(channel, serializer(message)) - def _bulk_publish( - self, buffer, channel=MQ_CHANNEL, serializer=msgpack.dumps - ): + def _bulk_publish(self, buffer, channel=MQ_CHANNEL, serializer=msgpack.dumps): pipe = self._producer.pipeline() for message in buffer: try: self.logger.debug( - f"Going to send Message:" - f"\n\t[BEGIN_MSG]{message}\n[END_MSG]\t" + f"Going to send Message:" f"\n\t[BEGIN_MSG]{message}\n[END_MSG]\t" ) pipe.publish(MQ_CHANNEL, serializer(message)) except Exception as e: @@ -77,6 +65,7 @@ def _bulk_publish( perf_log("mq_pipe_execute", t0) def liveness_test(self): + """Get the livelyness of it.""" try: super().liveness_test() return True diff --git a/src/flowcept/commons/flowcept_dataclasses/__init__.py b/src/flowcept/commons/flowcept_dataclasses/__init__.py new file mode 100644 index 00000000..13895429 --- /dev/null +++ b/src/flowcept/commons/flowcept_dataclasses/__init__.py @@ -0,0 +1 @@ +"""Dataclasses subpackage.""" diff --git a/flowcept/commons/flowcept_dataclasses/base_settings_dataclasses.py b/src/flowcept/commons/flowcept_dataclasses/base_settings_dataclasses.py similarity index 77% rename from flowcept/commons/flowcept_dataclasses/base_settings_dataclasses.py rename to src/flowcept/commons/flowcept_dataclasses/base_settings_dataclasses.py index eb089c1e..9f0e6313 100644 --- a/flowcept/commons/flowcept_dataclasses/base_settings_dataclasses.py +++ b/src/flowcept/commons/flowcept_dataclasses/base_settings_dataclasses.py @@ -1,3 +1,5 @@ +"""Base settings module.""" + import abc from dataclasses import dataclass, field from typing import Optional, Any @@ -5,12 +7,16 @@ @dataclass class KeyValue: + """Key value class.""" + key: str value: Any @dataclass class BaseSettings(abc.ABC): + """Base settings class.""" + key: str kind: str observer_type: str = field(init=False) diff --git a/flowcept/commons/flowcept_dataclasses/task_object.py b/src/flowcept/commons/flowcept_dataclasses/task_object.py similarity index 87% rename from flowcept/commons/flowcept_dataclasses/task_object.py rename to src/flowcept/commons/flowcept_dataclasses/task_object.py index 68bd4b2d..2b07dfd2 100644 --- a/flowcept/commons/flowcept_dataclasses/task_object.py +++ b/src/flowcept/commons/flowcept_dataclasses/task_object.py @@ -1,3 +1,5 @@ +"""Task object module.""" + from enum import Enum from typing import Dict, AnyStr, Any, Union, List import msgpack @@ -14,7 +16,12 @@ ) -class Status(str, Enum): # inheriting from str here for JSON serialization +class Status(str, Enum): + """Status class. + + Inheriting from str here for JSON serialization. + """ + SUBMITTED = "SUBMITTED" WAITING = "WAITING" RUNNING = "RUNNING" @@ -24,10 +31,13 @@ class Status(str, Enum): # inheriting from str here for JSON serialization @staticmethod def get_finished_statuses(): + """Get finished status.""" return [Status.FINISHED, Status.ERROR] class TaskObject: + """Task class.""" + type = "task" task_id: AnyStr = None # Any way to identify a task utc_timestamp: float = None @@ -62,6 +72,7 @@ class TaskObject: @staticmethod def get_time_field_names(): + """Get the time field.""" return [ "started_at", "ended_at", @@ -72,6 +83,7 @@ def get_time_field_names(): @staticmethod def get_dict_field_names(): + """Get field names.""" return [ "used", "generated", @@ -82,15 +94,19 @@ def get_dict_field_names(): @staticmethod def task_id_field(): + """Get task id.""" return "task_id" @staticmethod def workflow_id_field(): + """Get workflow id.""" return "workflow_id" def enrich(self, adapter_key=None): + """Enrich it.""" if adapter_key is not None: - # TODO :base-interceptor-refactor: :code-reorg: :usability: revisit all times we assume settings is not none + # TODO :base-interceptor-refactor: :code-reorg: :usability: + # revisit all times we assume settings is not none self.adapter_id = adapter_key if self.utc_timestamp is None: @@ -115,6 +131,7 @@ def enrich(self, adapter_key=None): self.hostname = HOSTNAME def to_dict(self): + """Convert to dictionary.""" result_dict = {} for attr, value in self.__dict__.items(): if value is not None: @@ -130,10 +147,12 @@ def to_dict(self): return result_dict def serialize(self): + """Serialize it.""" return msgpack.dumps(self.to_dict()) @staticmethod def enrich_task_dict(task_dict: dict): + """Enrich the task.""" attributes = { "campaign_id": CAMPAIGN_ID, "node_name": NODE_NAME, @@ -143,9 +162,7 @@ def enrich_task_dict(task_dict: dict): "hostname": HOSTNAME, } for key, fallback_value in attributes.items(): - if ( - key not in task_dict or task_dict[key] is None - ) and fallback_value is not None: + if (key not in task_dict or task_dict[key] is None) and fallback_value is not None: task_dict[key] = fallback_value # @staticmethod diff --git a/flowcept/commons/flowcept_dataclasses/telemetry.py b/src/flowcept/commons/flowcept_dataclasses/telemetry.py similarity index 85% rename from flowcept/commons/flowcept_dataclasses/telemetry.py rename to src/flowcept/commons/flowcept_dataclasses/telemetry.py index 39dc5b8a..628541ba 100644 --- a/flowcept/commons/flowcept_dataclasses/telemetry.py +++ b/src/flowcept/commons/flowcept_dataclasses/telemetry.py @@ -1,23 +1,27 @@ +"""Telemetry module.""" + from typing import List, Dict -from dataclasses import dataclass, asdict def remove_none_values(_dict): + """Remove the none values.""" return {k: v for (k, v) in _dict if v is not None} class Telemetry: - """ - Class representing telemetry information captured in the platform where - the experiment runs. + """Telemetry class. - We are using psutils and the data it can capture depends on the platform. - So, we won't use dataclasses because we can't list all possible info - to be captured in any platform. + Class representing telemetry information captured in the platform where + the experiment runs. + We are using psutils and the data it can capture depends on the platform. + So, we won't use dataclasses because we can't list all possible info to + be captured in any platform. """ class CPU: + """CPU class.""" + times_avg: Dict[str, float] = None percent_all: float = None @@ -25,19 +29,27 @@ class CPU: percent_per_cpu: List[float] = None class Memory: + """Memory class.""" + virtual: Dict[str, float] swap: Dict[str, float] class Network: + """Network class.""" + netio: Dict[str, int] netio_per_interface: Dict[str, Dict[str, int]] class Disk: + """Disk class.""" + disk_usage: Dict[str, float] io: Dict[str, float] io_per_disk: Dict[str, Dict[str, float]] class Process: + """Process class.""" + pid: int cpu_number: int memory: Dict[str, float] @@ -75,6 +87,7 @@ class Process: gpu: Dict = None # TODO: use dataclasses def to_dict(self): + """Convert to dictionary.""" ret = {} if self.cpu is not None: ret["cpu"] = self.cpu.__dict__ diff --git a/flowcept/commons/flowcept_dataclasses/workflow_object.py b/src/flowcept/commons/flowcept_dataclasses/workflow_object.py similarity index 89% rename from flowcept/commons/flowcept_dataclasses/workflow_object.py rename to src/flowcept/commons/flowcept_dataclasses/workflow_object.py index 2dc23995..6e093a71 100644 --- a/flowcept/commons/flowcept_dataclasses/workflow_object.py +++ b/src/flowcept/commons/flowcept_dataclasses/workflow_object.py @@ -1,3 +1,5 @@ +"""Workflow module.""" + from typing import Dict, AnyStr, List import msgpack from omegaconf import OmegaConf @@ -18,6 +20,8 @@ # Not a dataclass because a dataclass stores keys even when there's no value, # adding unnecessary overhead. class WorkflowObject: + """Workflow class.""" + workflow_id: AnyStr = None parent_workflow_id: AnyStr = None machine_info: Dict = None @@ -37,9 +41,7 @@ class WorkflowObject: used: Dict = None generated: Dict = None - def __init__( - self, workflow_id=None, name=None, used=None, generated=None - ): + def __init__(self, workflow_id=None, name=None, used=None, generated=None): self.workflow_id = workflow_id self.name = name self.used = used @@ -47,16 +49,19 @@ def __init__( @staticmethod def workflow_id_field(): + """Get workflow id.""" return "workflow_id" @staticmethod def from_dict(dict_obj: Dict) -> "WorkflowObject": + """Convert from dictionary.""" wf_obj = WorkflowObject() for k, v in dict_obj.items(): setattr(wf_obj, k, v) return wf_obj def to_dict(self): + """Convert to dictionary.""" result_dict = {} for attr, value in self.__dict__.items(): if value is not None: @@ -65,11 +70,13 @@ def to_dict(self): return result_dict def enrich(self, adapter_key=None): + """Enrich it.""" self.utc_timestamp = flowcept.commons.utils.get_utc_now() self.flowcept_settings = OmegaConf.to_container(settings) if adapter_key is not None: - # TODO :base-interceptor-refactor: :code-reorg: :usability: revisit all times we assume settings is not none + # TODO :base-interceptor-refactor: :code-reorg: :usability: + # revisit all times we assume settings is not none self.adapter_id = adapter_key if self.user is None: @@ -91,10 +98,12 @@ def enrich(self, adapter_key=None): self.flowcept_version = __version__ def serialize(self): + """Serialize it.""" return msgpack.dumps(self.to_dict()) @staticmethod def deserialize(serialized_data) -> "WorkflowObject": + """Deserialize it.""" dict_obj = msgpack.loads(serialized_data) obj = WorkflowObject() for k, v in dict_obj.items(): @@ -102,6 +111,7 @@ def deserialize(serialized_data) -> "WorkflowObject": return obj def __repr__(self): + """Set the repr.""" return ( f"WorkflowObject(" f"workflow_id={repr(self.workflow_id)}, " @@ -121,4 +131,5 @@ def __repr__(self): ) def __str__(self): + """Set the string.""" return self.__repr__() diff --git a/flowcept/commons/flowcept_logger.py b/src/flowcept/commons/flowcept_logger.py similarity index 80% rename from flowcept/commons/flowcept_logger.py rename to src/flowcept/commons/flowcept_logger.py index 4d915bf8..26f9b49e 100644 --- a/flowcept/commons/flowcept_logger.py +++ b/src/flowcept/commons/flowcept_logger.py @@ -1,3 +1,5 @@ +"""Logger module.""" + import logging from flowcept.configs import ( @@ -10,6 +12,8 @@ class FlowceptLogger(object): + """Logger class.""" + _instance = None @classmethod @@ -27,7 +31,8 @@ def _build_logger(cls): file_handler.setLevel(file_level) # Create formatters and add it to handlers - base_format = f"[%(name)s][%(levelname)s][{HOSTNAME}][pid=%(process)d][thread=%(thread)d][function=%(funcName)s][%(message)s]" + fmt = f"[%(name)s][%(levelname)s][{HOSTNAME}][pid=%(process)d]" + base_format = fmt + "[thread=%(thread)d][function=%(funcName)s][%(message)s]" stream_format = logging.Formatter(base_format) file_format = logging.Formatter(f"[%(asctime)s]{base_format}") stream_handler.setFormatter(stream_format) @@ -42,9 +47,8 @@ def _build_logger(cls): return logger def __new__(cls, *args, **kwargs) -> logging.Logger: + """Create a new instance.""" if not cls._instance: - cls._instance = super(FlowceptLogger, cls).__new__( - cls, *args, **kwargs - ) + cls._instance = super(FlowceptLogger, cls).__new__(cls, *args, **kwargs) cls._instance._logger = FlowceptLogger._build_logger() return cls._instance._logger diff --git a/flowcept/commons/query_utils.py b/src/flowcept/commons/query_utils.py similarity index 78% rename from flowcept/commons/query_utils.py rename to src/flowcept/commons/query_utils.py index f878f987..1b3f6eea 100644 --- a/flowcept/commons/query_utils.py +++ b/src/flowcept/commons/query_utils.py @@ -1,3 +1,5 @@ +"""Query utilities.""" + import numbers from datetime import timedelta from typing import List, Dict @@ -8,6 +10,7 @@ def get_doc_status(row): + """Get document status.""" if row.get("status"): return row.get("status") elif row.get("finished"): @@ -23,11 +26,12 @@ def get_doc_status(row): def to_datetime(logger, df, column_name, _shift_hours=0): + """Convert to datetime.""" if column_name in df.columns: try: - df[column_name] = pd.to_datetime( - df[column_name], unit="s" - ) + timedelta(hours=_shift_hours) + df[column_name] = pd.to_datetime(df[column_name], unit="s") + timedelta( + hours=_shift_hours + ) except Exception as _e: logger.info(_e) @@ -35,28 +39,25 @@ def to_datetime(logger, df, column_name, _shift_hours=0): def _calc_telemetry_diff_for_row(start, end): if isinstance(start, numbers.Number): return end - start - elif type(start) == dict: + elif type(start) is dict: diff_dict = {} for key in start: - diff_dict[key] = _calc_telemetry_diff_for_row( - start[key], end[key] - ) + diff_dict[key] = _calc_telemetry_diff_for_row(start[key], end[key]) return diff_dict - elif type(start) == list: + elif type(start) is list: diff_list = [] for i in range(0, len(start)): diff_list.append(_calc_telemetry_diff_for_row(start[i], end[i])) return diff_list - elif type(start) == str: + elif type(start) is str: return start else: - raise Exception( - "This is unexpected", start, end, type(start), type(end) - ) + raise Exception("This is unexpected", start, end, type(start), type(end)) def calculate_telemetry_diff_for_docs(docs: List[Dict]): + """Calculate telemetry difference.""" new_docs = [] for doc in docs: new_doc = doc.copy() diff --git a/flowcept/commons/settings_factory.py b/src/flowcept/commons/settings_factory.py similarity index 95% rename from flowcept/commons/settings_factory.py rename to src/flowcept/commons/settings_factory.py index 4bde09af..9791d5b8 100644 --- a/flowcept/commons/settings_factory.py +++ b/src/flowcept/commons/settings_factory.py @@ -1,3 +1,5 @@ +"""Settings module.""" + from flowcept.commons.vocabulary import Vocabulary from flowcept.configs import settings @@ -33,13 +35,13 @@ def _build_base_settings(kind: str, settings_dict: dict) -> BaseSettings: def get_settings(adapter_key: str) -> BaseSettings: + """Get the settings.""" if adapter_key is None: # TODO: :base-interceptor-refactor: return None settings_dict = settings[Vocabulary.Settings.ADAPTERS][adapter_key] if not settings_dict: raise Exception( - f"You must specify the adapter <<{adapter_key}>> in" - f" the settings YAML file." + f"You must specify the adapter <<{adapter_key}>> in" f" the settings YAML file." ) settings_dict["key"] = adapter_key kind = settings_dict[Vocabulary.Settings.KIND] diff --git a/flowcept/commons/utils.py b/src/flowcept/commons/utils.py similarity index 79% rename from flowcept/commons/utils.py rename to src/flowcept/commons/utils.py index 2270e89e..87247cb7 100644 --- a/flowcept/commons/utils.py +++ b/src/flowcept/commons/utils.py @@ -1,3 +1,5 @@ +"""Utilities.""" + from datetime import datetime, timedelta import json from time import time, sleep @@ -18,17 +20,20 @@ def get_utc_now() -> float: + """Get UTC time.""" now = datetime.utcnow() return now.timestamp() def get_utc_now_str() -> str: + """Get UTC string.""" format_string = "%Y-%m-%dT%H:%M:%S.%f" now = datetime.utcnow() return now.strftime(format_string) def get_utc_minutes_ago(minutes_ago=1): + """Get UTC minutes.""" now = datetime.utcnow() rounded = now - timedelta( minutes=now.minute % minutes_ago + minutes_ago, @@ -39,6 +44,7 @@ def get_utc_minutes_ago(minutes_ago=1): def perf_log(func_name, t0: float): + """Configure the performance log.""" if PERF_LOG: t1 = time() logger = FlowceptLogger() @@ -48,6 +54,7 @@ def perf_log(func_name, t0: float): def get_status_from_str(status_str: str) -> Status: + """Get the status.""" # TODO: complete this utility function if status_str.lower() in {"finished"}: return Status.FINISHED @@ -58,6 +65,7 @@ def get_status_from_str(status_str: str) -> Status: def get_adapter_exception_msg(adapter_kind): + """Get the adapter.""" return ( f"You have an adapter for {adapter_kind} in" f" {SETTINGS_PATH} but we couldn't import its interceptor." @@ -74,6 +82,7 @@ def assert_by_querying_tasks_until( max_trials=30, max_time=60, ): + """Assert by query.""" from flowcept.flowcept_api.task_query_api import TaskQueryAPI query_api = TaskQueryAPI() @@ -84,18 +93,14 @@ def assert_by_querying_tasks_until( docs = query_api.query(filter) if condition_to_evaluate is None: if docs is not None and len(docs): - flowcept.commons.logger.debug( - "Query conditions have been met! :D" - ) + flowcept.commons.logger.debug("Query conditions have been met! :D") return True else: try: if condition_to_evaluate(docs): - flowcept.commons.logger.debug( - "Query conditions have been met! :D" - ) + flowcept.commons.logger.debug("Query conditions have been met! :D") return True - except: + except Exception: pass trials += 1 @@ -110,14 +115,14 @@ def assert_by_querying_tasks_until( def chunked(iterable, size): + """Chunk it.""" for i in range(0, len(iterable), size): yield iterable[i : i + size] # TODO: consider reusing this function in the function assert_by_querying_task_collections_until -def evaluate_until( - evaluation_condition: Callable, max_trials=30, max_time=60, msg="" -): +def evaluate_until(evaluation_condition: Callable, max_trials=30, max_time=60, msg=""): + """Evaluate something.""" start_time = time() trials = 0 @@ -126,54 +131,42 @@ def evaluate_until( return True # Condition met trials += 1 - flowcept.commons.logger.debug( - f"Condition not yet met. Trials={trials}/{max_trials}. {msg}" - ) + flowcept.commons.logger.debug(f"Condition not yet met. Trials={trials}/{max_trials}. {msg}") sleep(1) return False # Condition not met within max_trials or max_time class GenericJSONEncoder(json.JSONEncoder): + """JSON encoder class.""" + def default(self, obj): + """Run the default method.""" if isinstance(obj, (list, tuple)): return [self.default(item) for item in obj] elif isinstance(obj, dict): - return { - self.default(key): self.default(value) - for key, value in obj.items() - } + return {self.default(key): self.default(value) for key, value in obj.items()} elif hasattr(obj, "__dict__"): return self.default(obj.__dict__) elif isinstance(obj, object): try: return str(obj) - except: + except Exception: return None - elif ( - isinstance(obj, np.int) - or isinstance(obj, np.int32) - or isinstance(obj, np.int64) - ): + elif isinstance(obj, np.int) or isinstance(obj, np.int32) or isinstance(obj, np.int64): return int(obj) elif ( - isinstance(obj, np.float) - or isinstance(obj, np.float32) - or isinstance(obj, np.float64) + isinstance(obj, np.float) or isinstance(obj, np.float32) or isinstance(obj, np.float64) ): return float(obj) return super().default(obj) def replace_non_serializable(obj): - if isinstance( - obj, (int, float, bool, str, list, tuple, dict, type(None)) - ): + """Replace it.""" + if isinstance(obj, (int, float, bool, str, list, tuple, dict, type(None))): if isinstance(obj, dict): - return { - key: replace_non_serializable(value) - for key, value in obj.items() - } + return {key: replace_non_serializable(value) for key, value in obj.items()} elif isinstance(obj, (list, tuple)): return [replace_non_serializable(item) for item in obj] else: @@ -184,6 +177,7 @@ def replace_non_serializable(obj): def get_gpu_vendor(): + """Get GPU vendor.""" system = platform.system() # Linux @@ -194,9 +188,7 @@ def get_gpu_vendor(): # Check for AMD GPU using lspci try: - lspci_output = subprocess.check_output( - "lspci", shell=True - ).decode() + lspci_output = subprocess.check_output("lspci", shell=True).decode() if "AMD" in lspci_output: return "AMD" except subprocess.CalledProcessError: @@ -232,12 +224,13 @@ def get_gpu_vendor(): class GenericJSONDecoder(json.JSONDecoder): + """JSON decoder class.""" + def __init__(self, *args, **kwargs): - json.JSONDecoder.__init__( - self, object_hook=self.object_hook, *args, **kwargs - ) + json.JSONDecoder.__init__(self, object_hook=self.object_hook, *args, **kwargs) def object_hook(self, dct): + """Get object hook.""" if "__class__" in dct: class_name = dct.pop("__class__") module_name = dct.pop("__module__") diff --git a/flowcept/commons/vocabulary.py b/src/flowcept/commons/vocabulary.py similarity index 74% rename from flowcept/commons/vocabulary.py rename to src/flowcept/commons/vocabulary.py index a7b4a978..3611836f 100644 --- a/flowcept/commons/vocabulary.py +++ b/src/flowcept/commons/vocabulary.py @@ -1,5 +1,12 @@ +"""Vocab module.""" + + class Vocabulary: + """Vocab class.""" + class Settings: + """Setting class.""" + ADAPTERS = "adapters" KIND = "kind" diff --git a/flowcept/configs.py b/src/flowcept/configs.py similarity index 85% rename from flowcept/configs.py rename to src/flowcept/configs.py index b872af98..6b288614 100644 --- a/flowcept/configs.py +++ b/src/flowcept/configs.py @@ -1,3 +1,5 @@ +"""Configuration module.""" + import os import socket import getpass @@ -62,9 +64,7 @@ MQ_PORT = int(os.getenv("MQ_PORT", settings["mq"].get("port", "6379"))) MQ_BUFFER_SIZE = int(settings["mq"].get("buffer_size", 50)) -MQ_INSERTION_BUFFER_TIME = int( - settings["mq"].get("insertion_buffer_time_secs", 5) -) +MQ_INSERTION_BUFFER_TIME = int(settings["mq"].get("insertion_buffer_time_secs", 5)) MQ_INSERTION_BUFFER_TIME = random.randint( int(MQ_INSERTION_BUFFER_TIME * 0.9), int(MQ_INSERTION_BUFFER_TIME * 1.4), @@ -84,12 +84,8 @@ # MongoDB Settings # ###################### MONGO_URI = settings["mongodb"].get("uri", os.environ.get("MONGO_URI", None)) -MONGO_HOST = settings["mongodb"].get( - "host", os.environ.get("MONGO_HOST", "localhost") -) -MONGO_PORT = int( - settings["mongodb"].get("port", os.environ.get("MONGO_PORT", "27017")) -) +MONGO_HOST = settings["mongodb"].get("host", os.environ.get("MONGO_HOST", "localhost")) +MONGO_PORT = int(settings["mongodb"].get("port", os.environ.get("MONGO_PORT", "27017"))) MONGO_DB = settings["mongodb"].get("db", PROJECT_NAME) MONGO_CREATE_INDEX = settings["mongodb"].get("create_collection_index", True) @@ -97,24 +93,16 @@ MONGO_WORKFLOWS_COLLECTION = "workflows" # In seconds: -MONGO_INSERTION_BUFFER_TIME = int( - settings["mongodb"].get("insertion_buffer_time_secs", 5) -) +MONGO_INSERTION_BUFFER_TIME = int(settings["mongodb"].get("insertion_buffer_time_secs", 5)) MONGO_INSERTION_BUFFER_TIME = random.randint( int(MONGO_INSERTION_BUFFER_TIME * 0.9), int(MONGO_INSERTION_BUFFER_TIME * 1.4), ) -MONGO_ADAPTIVE_BUFFER_SIZE = settings["mongodb"].get( - "adaptive_buffer_size", True -) +MONGO_ADAPTIVE_BUFFER_SIZE = settings["mongodb"].get("adaptive_buffer_size", True) MONGO_MAX_BUFFER_SIZE = int(settings["mongodb"].get("max_buffer_size", 50)) -MONGO_MIN_BUFFER_SIZE = max( - 1, int(settings["mongodb"].get("min_buffer_size", 10)) -) -MONGO_REMOVE_EMPTY_FIELDS = settings["mongodb"].get( - "remove_empty_fields", False -) +MONGO_MIN_BUFFER_SIZE = max(1, int(settings["mongodb"].get("min_buffer_size", 10))) +MONGO_REMOVE_EMPTY_FIELDS = settings["mongodb"].get("remove_empty_fields", False) ###################### @@ -125,9 +113,7 @@ # DEBUG_MODE = settings["project"].get("debug", False) PERF_LOG = settings["project"].get("performance_logging", False) JSON_SERIALIZER = settings["project"].get("json_serializer", "default") -REPLACE_NON_JSON_SERIALIZABLE = settings["project"].get( - "replace_non_json_serializable", True -) +REPLACE_NON_JSON_SERIALIZABLE = settings["project"].get("replace_non_json_serializable", True) ENRICH_MESSAGES = settings["project"].get("enrich_messages", True) TELEMETRY_CAPTURE = settings["project"].get("telemetry_capture", None) @@ -142,17 +128,12 @@ # We could move this to the static part of TelemetryCapture N_GPUS = dict() GPU_HANDLES = None -if ( - TELEMETRY_CAPTURE is not None - and TELEMETRY_CAPTURE.get("gpu", None) is not None -): +if TELEMETRY_CAPTURE is not None and TELEMETRY_CAPTURE.get("gpu", None) is not None: if eval(TELEMETRY_CAPTURE.get("gpu", "None")) is not None: try: visible_devices_var = os.environ.get("CUDA_VISIBLE_DEVICES", None) if visible_devices_var is not None: - visible_devices = [ - int(i) for i in visible_devices_var.split(",") - ] + visible_devices = [int(i) for i in visible_devices_var.split(",")] if len(visible_devices): N_GPUS["nvidia"] = visible_devices GPU_HANDLES = [] # TODO @@ -161,15 +142,12 @@ N_GPUS["nvidia"] = list(range(0, nvmlDeviceGetCount())) GPU_HANDLES = [] - except Exception as e: - # print(e) + except Exception: pass try: visible_devices_var = os.environ.get("ROCR_VISIBLE_DEVICES", None) if visible_devices_var is not None: - visible_devices = [ - int(i) for i in visible_devices_var.split(",") - ] + visible_devices = [int(i) for i in visible_devices_var.split(",")] if len(visible_devices): N_GPUS["amd"] = visible_devices from amdsmi import ( @@ -185,8 +163,7 @@ amdsmi_init() GPU_HANDLES = amdsmi_get_processor_handles() N_GPUS["amd"] = list(range(0, len(GPU_HANDLES))) - except Exception as e: - # print(e) + except Exception: pass if len(N_GPUS.get("amd", [])): @@ -220,10 +197,10 @@ if LOGIN_NAME is None: try: LOGIN_NAME = sys_metadata.get("login_name", getpass.getuser()) - except: + except Exception: try: LOGIN_NAME = os.getlogin() - except: + except Exception: LOGIN_NAME = None SYS_NAME = SYS_NAME if SYS_NAME is not None else os.uname()[0] @@ -231,14 +208,14 @@ try: HOSTNAME = socket.getfqdn() -except: +except Exception: try: HOSTNAME = socket.gethostname() - except: + except Exception: try: with open("/etc/hostname", "r") as f: HOSTNAME = f.read().strip() - except: + except Exception: HOSTNAME = "unknown_hostname" diff --git a/src/flowcept/flowcept_api/__init__.py b/src/flowcept/flowcept_api/__init__.py new file mode 100644 index 00000000..24dd8a4b --- /dev/null +++ b/src/flowcept/flowcept_api/__init__.py @@ -0,0 +1 @@ +"""API subpackage.""" diff --git a/flowcept/flowcept_api/db_api.py b/src/flowcept/flowcept_api/db_api.py similarity index 86% rename from flowcept/flowcept_api/db_api.py rename to src/flowcept/flowcept_api/db_api.py index 3671628a..c3980acc 100644 --- a/flowcept/flowcept_api/db_api.py +++ b/src/flowcept/flowcept_api/db_api.py @@ -1,3 +1,5 @@ +"""DB module.""" + import uuid from typing import List @@ -13,6 +15,8 @@ @singleton class DBAPI(object): + """DB class.""" + def __init__( self, with_webserver=False, @@ -20,18 +24,16 @@ def __init__( self.logger = FlowceptLogger() self.with_webserver = with_webserver if self.with_webserver: - raise NotImplementedError( - f"We did not implement webserver API for this yet." - ) + raise NotImplementedError("We did not implement webserver API for this yet.") self._dao = DocumentDBDao() def insert_or_update_task(self, task: TaskObject): + """Insert or update task.""" self._dao.insert_one(task.to_dict()) - def insert_or_update_workflow( - self, workflow_obj: WorkflowObject - ) -> WorkflowObject: + def insert_or_update_workflow(self, workflow_obj: WorkflowObject) -> WorkflowObject: + """Insert or update workflow.""" if workflow_obj.workflow_id is None: workflow_obj.workflow_id = str(uuid.uuid4()) ret = self._dao.workflow_insert_or_update(workflow_obj) @@ -42,9 +44,8 @@ def insert_or_update_workflow( return workflow_obj def get_workflow(self, workflow_id) -> WorkflowObject: - wfobs = self.workflow_query( - filter={WorkflowObject.workflow_id_field(): workflow_id} - ) + """Get the workflow.""" + wfobs = self.workflow_query(filter={WorkflowObject.workflow_id_field(): workflow_id}) if wfobs is None or len(wfobs) == 0: self.logger.error("Could not retrieve workflow with that filter.") return None @@ -52,6 +53,7 @@ def get_workflow(self, workflow_id) -> WorkflowObject: return wfobs[0] def workflow_query(self, filter) -> List[WorkflowObject]: + """Get workflow query.""" results = self._dao.workflow_query(filter=filter) if results is None: self.logger.error("Could not retrieve workflow with that filter.") @@ -74,9 +76,10 @@ def dump_to_file( export_format="json", should_zip=False, ): + """Dump to the file.""" if filter is None and not should_zip: self.logger.error( - "I am sorry, we will not allow you to dump the entire database without filter and without even zipping it. You are likely doing something wrong or perhaps not using the best tool for a database dump." + "Not allowed to dump entire database without filter and without zipping it." ) return False try: @@ -102,6 +105,7 @@ def save_object( custom_metadata=None, pickle=False, ): + """Save the object.""" return self._dao.save_object( object, object_id, @@ -122,6 +126,7 @@ def query( remove_json_unserializables=True, type="task", ): + """Query it.""" if type == "task": return self._dao.task_query( filter, @@ -139,8 +144,7 @@ def query( return self._dao.get_objects(filter) else: raise Exception( - f"You used type={type}, but we only have " - f"collections for task and workflow." + f"You used type={type}, but we only have " f"collections for task and workflow." ) def save_torch_model( @@ -150,16 +154,17 @@ def save_torch_model( workflow_id=None, custom_metadata: dict = None, ) -> str: - """ + """Save model. + Save the PyTorch model's state_dict to a MongoDB collection as binary data. Args: model (torch.nn.Module): The PyTorch model to be saved. custom_metadata (Dict[str, str]): Custom metadata to be stored with the model. - Returns: + Returns + ------- str: The object ID of the saved model in the database. - """ import torch import io @@ -184,6 +189,7 @@ def save_torch_model( return obj_id def load_torch_model(self, torch_model, object_id: str): + """Load it.""" import torch import io diff --git a/flowcept/flowcept_api/flowcept_controller.py b/src/flowcept/flowcept_api/flowcept_controller.py similarity index 88% rename from flowcept/flowcept_api/flowcept_controller.py rename to src/flowcept/flowcept_api/flowcept_controller.py index a348d910..5f7db556 100644 --- a/flowcept/flowcept_api/flowcept_controller.py +++ b/src/flowcept/flowcept_api/flowcept_controller.py @@ -1,3 +1,5 @@ +"""Controller module.""" + from typing import List, Union from time import sleep @@ -11,7 +13,6 @@ from flowcept.commons.daos.mq_dao.mq_dao_base import MQDao from flowcept.configs import ( MQ_INSTANCES, - INSTRUMENTATION, INSTRUMENTATION_ENABLED, ) from flowcept.flowcept_api.db_api import DBAPI @@ -21,23 +22,21 @@ class Flowcept(object): - db = DBAPI() + """Flowcept class.""" + db = DBAPI() current_workflow_id = None def __init__( self, - interceptors: Union[ - BaseInterceptor, List[BaseInterceptor], str - ] = None, + interceptors: Union[BaseInterceptor, List[BaseInterceptor], str] = None, bundle_exec_id=None, start_doc_inserter=True, workflow_id: str = None, workflow_name: str = None, workflow_args: str = None, ): - """ - Flowcept controller. + """Flowcept controller. This class controls the interceptors, including instrumentation. If using for instrumentation, we assume one instance of this class @@ -45,8 +44,12 @@ def __init__( Parameters ---------- - interceptors - list of Flowcept interceptors. If none, instrumentation will be used. If a string is passed, no interceptor will be started. # TODO: improve clarity for the documentation. + interceptors - list of Flowcept interceptors. If none, instrumentation + will be used. If a string is passed, no interceptor will be + started. # TODO: improve clarity for the documentation. + bundle_exec_id - A way to group interceptors. + start_doc_inserter - Whether you want to start consuming MQ messages to inject in the DB. """ self.logger = FlowceptLogger() @@ -66,9 +69,7 @@ def __init__( if not INSTRUMENTATION_ENABLED: self.enabled = False return - interceptors = [ - flowcept.instrumentation.decorators.instrumentation_interceptor - ] + interceptors = [flowcept.instrumentation.decorators.instrumentation_interceptor] elif not isinstance(interceptors, list): interceptors = [interceptors] self._interceptors: List[BaseInterceptor] = interceptors @@ -78,10 +79,9 @@ def __init__( self.workflow_args = workflow_args def start(self): + """Start it.""" if self.is_started or not self.enabled: - self.logger.warning( - "Consumer may be already started or instrumentation is not set" - ) + self.logger.warning("Consumer may be already started or instrumentation is not set") return self if self._interceptors and len(self._interceptors): @@ -96,9 +96,7 @@ def start(self): self.logger.debug(f"...Flowceptor {key} started ok!") if ( - self.current_workflow_id - or self.workflow_args - or self.workflow_name + self.current_workflow_id or self.workflow_args or self.workflow_name ) and interceptor.kind == "instrumentation": wf_obj = WorkflowObject( self.current_workflow_id, @@ -138,6 +136,7 @@ def start(self): return self def stop(self): + """Stop it.""" if not self.is_started or not self.enabled: self.logger.warning("Consumer is already stopped!") return @@ -164,20 +163,22 @@ def stop(self): self.logger.debug("All stopped!") def __enter__(self): + """Run the start function.""" self.start() return self def __exit__(self, exc_type, exc_val, exc_tb): + """Run the stop function.""" self.stop() @staticmethod def start_instrumentation_interceptor(): - flowcept.instrumentation.decorators.instrumentation_interceptor.start( - None - ) + """Start it.""" + flowcept.instrumentation.decorators.instrumentation_interceptor.start(None) @staticmethod def services_alive() -> bool: + """Get alive services.""" if not MQDao.build().liveness_test(): logger.error("MQ Not Ready!") return False diff --git a/flowcept/flowcept_api/task_query_api.py b/src/flowcept/flowcept_api/task_query_api.py similarity index 78% rename from flowcept/flowcept_api/task_query_api.py rename to src/flowcept/flowcept_api/task_query_api.py index f15d351b..a4e8131a 100644 --- a/flowcept/flowcept_api/task_query_api.py +++ b/src/flowcept/flowcept_api/task_query_api.py @@ -1,6 +1,5 @@ -""" - General overview of this module. -""" +"""Task module.""" + from collections import OrderedDict from typing import List, Dict, Tuple from datetime import timedelta @@ -34,9 +33,7 @@ @singleton class TaskQueryAPI(object): - """ - General overview of this class. - """ + """Task class.""" ASC = pymongo.ASCENDING DESC = pymongo.DESCENDING @@ -61,13 +58,9 @@ def __init__( r = requests.get(_base_url) if r.status_code > 300: raise Exception(r.text) - self.logger.debug( - "Ok, webserver is ready to receive requests." - ) - except Exception as e: - raise Exception( - f"Error when accessing the webserver at {_base_url}" - ) + self.logger.debug("Ok, webserver is ready to receive requests.") + except Exception: + raise Exception(f"Error when accessing the webserver at {_base_url}") def query( self, @@ -78,31 +71,43 @@ def query( aggregation: List[Tuple] = None, remove_json_unserializables=True, ) -> List[Dict]: - """ + """Generate a mongo query pipeline. + Generates a MongoDB query pipeline based on the provided arguments. - Parameters: - filter (dict): The filter criteria for the $match stage. - projection (list, optional): List of fields to include in the $project stage. Defaults to None. - limit (int, optional): The maximum number of documents to return. Defaults to 0 (no limit). - sort (list of tuples, optional): List of (field, order) tuples specifying the sorting order. Defaults to None. - aggregation (list of tuples, optional): List of (aggregation_operator, field_name) tuples - specifying additional aggregation operations. Defaults to None. - remove_json_unserializables: removes fields that are not JSON serializable. Defaults to True - - Returns: - list: A list with the result set. - - Example: - # Create a pipeline with a filter, projection, sorting, and aggregation - rs = find( - filter={"campaign_id": "mycampaign1"}, - projection=["workflow_id", "started_at", "ended_at"], - limit=10, - sort=[("workflow_id", ASC), ("end_time", DESC)], - aggregation=[("avg", "ended_at"), ("min", "started_at")] - ) - """ + Parameters + ---------- + filter (dict): + The filter criteria for the $match stage. + projection (list, optional): + List of fields to include in the $project stage. Defaults to None. + limit (int, optional): + The maximum number of documents to return. Defaults to 0 (no limit). + sort (list of tuples, optional): + List of (field, order) tuples specifying the sorting order. Defaults to None. + aggregation (list of tuples, optional): + List of (aggregation_operator, field_name) tuples specifying + additional aggregation operations. Defaults to None. + remove_json_unserializables: + Removes fields that are not JSON serializable. Defaults to True + + Returns + ------- + list: + A list with the result set. + + Example + ------- + Create a pipeline with a filter, projection, sorting, and aggregation. + + rs = find( + filter={"campaign_id": "mycampaign1"}, + projection=["workflow_id", "started_at", "ended_at"], + limit=10, + sort=[("workflow_id", ASC), ("end_time", DESC)], + aggregation=[("avg", "ended_at"), ("min", "started_at")] + ) + """ if self._with_webserver: request_data = {"filter": json.dumps(filter)} if projection: @@ -114,9 +119,7 @@ def query( if aggregation: request_data["aggregation"] = json.dumps(aggregation) if remove_json_unserializables: - request_data[ - "remove_json_unserializables" - ] = remove_json_unserializables + request_data["remove_json_unserializables"] = remove_json_unserializables r = requests.post(self._url, json=request_data) if 200 <= r.status_code < 300: @@ -139,23 +142,10 @@ def query( else: self.logger.error("Error when executing query.") - def get_subworkflows_tasks_from_a_parent_workflow( - self, parent_workflow_id: str - ) -> List[Dict]: - """ - - Parameters - ---------- - parent_workflow_id - - Returns - ------- - - """ + def get_subworkflows_tasks_from_a_parent_workflow(self, parent_workflow_id: str) -> List[Dict]: + """Get subworkflows.""" db_api = DBAPI() - sub_wfs = db_api.workflow_query( - {"parent_workflow_id": parent_workflow_id} - ) + sub_wfs = db_api.workflow_query({"parent_workflow_id": parent_workflow_id}) if not sub_wfs: return None tasks = [] @@ -182,6 +172,7 @@ def df_query( sum_lists=False, aggregate_telemetry=False, ) -> pd.DataFrame: + """Get dataframe query.""" # TODO: assert that if clean_dataframe is False, other clean_dataframe # related args should be default. docs = self.query( @@ -195,9 +186,7 @@ def df_query( if len(docs) == 0: return pd.DataFrame() - df = self._get_dataframe_from_task_docs( - docs, calculate_telemetry_diff, shift_hours - ) + df = self._get_dataframe_from_task_docs(docs, calculate_telemetry_diff, shift_hours) # Clean the telemetry DataFrame if specified if clean_dataframe: df = clean_df( @@ -256,8 +245,7 @@ def _get_dataframe_from_task_docs( if "_id" in df.columns: try: df["doc_generated_time"] = df["_id"].apply( - lambda _id: ObjectId(_id).generation_time - + timedelta(hours=shift_hours) + lambda _id: ObjectId(_id).generation_time + timedelta(hours=shift_hours) ) except Exception as e: self.logger.info(e) @@ -265,28 +253,25 @@ def _get_dataframe_from_task_docs( try: df["elapsed_time"] = df["ended_at"] - df["started_at"] df["elapsed_time"] = df["elapsed_time"].apply( - lambda x: x.total_seconds() - if isinstance(x, timedelta) - else -1 + lambda x: x.total_seconds() if isinstance(x, timedelta) else -1 ) except Exception as e: self.logger.info(e) return df - def get_errored_tasks( - self, workflow_id=None, campaign_id=None, filter=None - ): + def get_errored_tasks(self, workflow_id=None, campaign_id=None, filter=None): + """Get errored tasks.""" # TODO: implement raise NotImplementedError() - def get_successful_tasks( - self, workflow_id=None, campaign_id=None, filter=None - ): + def get_successful_tasks(self, workflow_id=None, campaign_id=None, filter=None): + """Get successful tasks.""" # TODO: implement raise NotImplementedError() def df_get_campaign_tasks(self, campaign_id=None, filter=None): + """Get campaign tasks.""" # TODO: implement raise NotImplementedError() @@ -304,24 +289,42 @@ def df_get_top_k_tasks( sum_lists=False, aggregate_telemetry=False, ): - """ - Retrieve the top K tasks from the (optionally telemetry-aware) DataFrame based on specified sorting criteria. + """Get top tasks. + + Retrieve the top K tasks from the (optionally telemetry-aware) + DataFrame based on specified sorting criteria. + + Parameters + ---------- + - sort (List[Tuple], optional): A list of tuples specifying sorting + criteria for columns. Each tuple should contain a column name and a + sorting order, where the sorting order can be TaskQueryAPI.ASC for + ascending or TaskQueryAPI.DESC for descending. - Parameters: - - sort (List[Tuple], optional): A list of tuples specifying sorting criteria for columns. Each tuple should contain - a column name and a sorting order, where the sorting order can be TaskQueryAPI.ASC for ascending or - TaskQueryAPI.DESC for descending. - k (int, optional): The number of top tasks to retrieve. Defaults to 5. - - filter (optional): A filter condition to apply to the DataFrame. It should follow pymongo's query filter syntax. See: https://www.w3schools.com/python/python_mongodb_query.asp - - clean_telemetry_dataframe (bool, optional): If True, clean the DataFrame using the clean_df function. - - calculate_telemetry_diff (bool, optional): If True, calculate telemetry differences in the DataFrame. - Returns: - pandas.DataFrame: A DataFrame containing the top K tasks based on the specified sorting criteria. + - filter (optional): A filter condition to apply to the DataFrame. It + should follow pymongo's query filter syntax. See: + https://www.w3schools.com/python/python_mongodb_query.asp - Raises: - - Exception: If a specified column in the sorting criteria is not present in the DataFrame. - - Exception: If an invalid sorting order is provided. Use the constants TaskQueryAPI.ASC or TaskQueryAPI.DESC. + - clean_telemetry_dataframe (bool, optional): If True, clean the + DataFrame using the clean_df function. + + - calculate_telemetry_diff (bool, optional): If True, calculate + telemetry differences in the DataFrame. + + Returns + ------- + pandas.DataFrame: A DataFrame containing the top K tasks + based on the specified sorting criteria. + + Raises + ------ + - Exception: If a specified column in the sorting criteria is not + present in the DataFrame. + + - Exception: If an invalid sorting order is provided. Use the + constants TaskQueryAPI.ASC or TaskQueryAPI.DESC. """ # Retrieve telemetry DataFrame based on filter and calculation options df = self.df_query( @@ -360,17 +363,13 @@ def df_get_top_k_tasks( f"The available columns are:\n{list(df.columns)}" ) if order not in {TaskQueryAPI.ASC, TaskQueryAPI.DESC}: - raise Exception( - f"Use the constants TaskQueryAPI.ASC or TaskQueryAPI.DESC to express the sorting order." - ) + raise Exception("Use TaskQueryAPI.ASC or TaskQueryAPI.DESC for sorting order.") sort_col_names.append(col_name) sort_col_orders.append((order == TaskQueryAPI.ASC)) # Sort the DataFrame based on sorting criteria and retrieve the top K rows - result_df = df.sort_values( - by=sort_col_names, ascending=sort_col_orders - ) + result_df = df.sort_values(by=sort_col_names, ascending=sort_col_orders) result_df = result_df.head(k) return result_df @@ -390,7 +389,8 @@ def df_get_tasks_quantiles( aggregate_telemetry=False, calculate_telemetry_diff=False, ) -> pd.DataFrame: - """ + """Get tasks. + # TODO: write docstring :param calculate_telemetry_diff: :param clean_dataframe: @@ -400,7 +400,8 @@ def df_get_tasks_quantiles( :param limit: :return: """ - # TODO: :idea: think of finding the clauses, quantile threshold, and sort order automatically + # TODO: :idea: think of finding the clauses, quantile threshold, and + # sort order automatically df = self.df_query( filter=filter, calculate_telemetry_diff=calculate_telemetry_diff, @@ -417,9 +418,8 @@ def df_get_tasks_quantiles( query_parts = [] for col_name, condition, quantile in clauses: if col_name not in df.columns: - raise Exception( - f"Column {col_name} is not in the dataframe. The available columns are:\n{list(df.columns)}" - ) + msg = f"Column {col_name} is not in dataframe. " + raise Exception(msg + f"The available columns are:\n{list(df.columns)}") if 0 > quantile > 1: raise Exception("Quantile must be 0 < float_number < 1.") if condition not in {">", "<", ">=", "<=", "==", "!="}: @@ -436,20 +436,17 @@ def df_get_tasks_quantiles( sort_col_names, sort_col_orders = [], [] for col_name, order in sort: if col_name not in result_df.columns: - raise Exception( - f"Column {col_name} is not in the resulting dataframe. The available columns are:\n{list(result_df.columns)}" - ) + msg = f"Column {col_name} is not in resulting dataframe. " + raise Exception(msg + f"Available columns are:\n{list(result_df.columns)}") if order not in {TaskQueryAPI.ASC, TaskQueryAPI.DESC}: raise Exception( - f"Use the constants TaskQueryAPI.ASC or TaskQueryAPI.DESC to express the sorting order." + "Use TaskQueryAPI.ASC or TaskQueryAPI.DESC to express sorting order." ) sort_col_names.append(col_name) sort_col_orders.append((order == TaskQueryAPI.ASC)) - result_df = result_df.sort_values( - by=sort_col_names, ascending=sort_col_orders - ) + result_df = result_df.sort_values(by=sort_col_names, ascending=sort_col_orders) if limit > 0: result_df = result_df.head(limit) @@ -459,6 +456,7 @@ def df_get_tasks_quantiles( def find_interesting_tasks_based_on_correlations_generated_and_telemetry_data( self, filter=None, correlation_threshold=0.5, top_k=50 ): + """Find tasks.""" return self.find_interesting_tasks_based_on_xyz( filter=filter, correlation_threshold=correlation_threshold, @@ -474,11 +472,12 @@ def find_interesting_tasks_based_on_xyz( correlation_threshold=0.5, top_k=50, ): - """ - Returns the most interesting tasks for which (xy) and (xz) are highly correlated, meaning that - y is very senstive to x as well as z is very sensitive to x. - It returns a sorted dict, based on a score calculated depending on how many - high (xy) and (xz) correlations are found. + """Find tasks. + + Returns the most interesting tasks for which (xy) and (xz) are highly + correlated, meaning that y is very senstive to x as well as z is very + sensitive to x. It returns a sorted dict, based on a score calculated + depending on how many high (xy) and (xz) correlations are found. :param pattern_x: :param pattern_y: :param pattern_z: @@ -487,27 +486,17 @@ def find_interesting_tasks_based_on_xyz( :param top_k: :return: """ - self.logger.warning( - "This is an experimental feature. Use it with carefully!" - ) + self.logger.warning("This is an experimental feature. Use it with carefully!") # TODO: improve and optmize this function. df = self.df_query(filter=filter, calculate_telemetry_diff=True) corr_df1 = analyze_correlations_between(df, pattern_x, pattern_y) corr_df2 = analyze_correlations_between(df, pattern_x, pattern_z) - result_df1 = corr_df1[ - abs(corr_df1["correlation"]) >= correlation_threshold - ] - result_df1 = result_df1.iloc[ - result_df1["correlation"].abs().argsort() - ][::-1].head(top_k) - - result_df2 = corr_df2[ - abs(corr_df2["correlation"]) >= correlation_threshold - ] - result_df2 = result_df2.iloc[ - result_df2["correlation"].abs().argsort() - ][::-1].head(top_k) + result_df1 = corr_df1[abs(corr_df1["correlation"]) >= correlation_threshold] + result_df1 = result_df1.iloc[result_df1["correlation"].abs().argsort()][::-1].head(top_k) + + result_df2 = corr_df2[abs(corr_df2["correlation"]) >= correlation_threshold] + result_df2 = result_df2.iloc[result_df2["correlation"].abs().argsort()][::-1].head(top_k) cols = [] for index, row in result_df1.iterrows(): x_col = row["col_1"] @@ -543,10 +532,7 @@ def find_interesting_tasks_based_on_xyz( (row["y_col"], "<=", 0.5), ] xcol_sort = TaskQueryAPI.MINIMUM_FIRST - if ( - SORT_ORDERS is not None - and SORT_ORDERS[row["x_col"]] == "maximum_first" - ): + if SORT_ORDERS is not None and SORT_ORDERS[row["x_col"]] == "maximum_first": xcol_sort = TaskQueryAPI.MAXIMUM_FIRST sort = [ @@ -644,6 +630,7 @@ def df_find_outliers( sum_lists=False, aggregate_telemetry=False, ): + """Find outliers.""" df = self.df_query( filter=filter, calculate_telemetry_diff=calculate_telemetry_diff, @@ -655,7 +642,5 @@ def df_find_outliers( sum_lists=sum_lists, aggregate_telemetry=aggregate_telemetry, ) - df["outlier_columns"] = df.apply( - find_outliers_zscore, axis=1, threshold=outlier_threshold - ) + df["outlier_columns"] = df.apply(find_outliers_zscore, axis=1, threshold=outlier_threshold) return df[df["outlier_columns"].apply(len) > 0] diff --git a/src/flowcept/flowcept_webserver/__init__.py b/src/flowcept/flowcept_webserver/__init__.py new file mode 100644 index 00000000..47d54805 --- /dev/null +++ b/src/flowcept/flowcept_webserver/__init__.py @@ -0,0 +1 @@ +"""Webserver subpackage.""" diff --git a/flowcept/flowcept_webserver/app.py b/src/flowcept/flowcept_webserver/app.py similarity index 89% rename from flowcept/flowcept_webserver/app.py rename to src/flowcept/flowcept_webserver/app.py index 85c17bf0..2da59109 100644 --- a/flowcept/flowcept_webserver/app.py +++ b/src/flowcept/flowcept_webserver/app.py @@ -1,5 +1,7 @@ +"""App module.""" + from flask_restful import Api -from flask import Flask, request, jsonify +from flask import Flask from flowcept.configs import WEBSERVER_HOST, WEBSERVER_PORT from flowcept.flowcept_webserver.resources.query_rsrc import TaskQuery @@ -18,6 +20,7 @@ @app.route("/") def liveness(): + """Livelyness string.""" return "Server up!" diff --git a/src/flowcept/flowcept_webserver/resources/__init__.py b/src/flowcept/flowcept_webserver/resources/__init__.py new file mode 100644 index 00000000..e22c57aa --- /dev/null +++ b/src/flowcept/flowcept_webserver/resources/__init__.py @@ -0,0 +1 @@ +"""Resources subpackage.""" diff --git a/flowcept/flowcept_webserver/resources/query_rsrc.py b/src/flowcept/flowcept_webserver/resources/query_rsrc.py similarity index 87% rename from flowcept/flowcept_webserver/resources/query_rsrc.py rename to src/flowcept/flowcept_webserver/resources/query_rsrc.py index 691333ee..623e0055 100644 --- a/flowcept/flowcept_webserver/resources/query_rsrc.py +++ b/src/flowcept/flowcept_webserver/resources/query_rsrc.py @@ -1,3 +1,5 @@ +"""Query resources.""" + import json from flask_restful import Resource, reqparse @@ -5,9 +7,12 @@ class TaskQuery(Resource): + """TaskQuery class.""" + ROUTE = "/task_query" def post(self): + """Post it.""" parser = reqparse.RequestParser() req_args = ["filter", "projection", "sort", "limit", "aggregation"] for arg in req_args: @@ -29,4 +34,4 @@ def post(self): if docs is not None and len(docs): return docs, 201 else: - return f"Could not find matching docs", 404 + return "Could not find matching docs", 404 diff --git a/flowcept/flowcept_webserver/resources/task_messages_rsrc.py b/src/flowcept/flowcept_webserver/resources/task_messages_rsrc.py similarity index 85% rename from flowcept/flowcept_webserver/resources/task_messages_rsrc.py rename to src/flowcept/flowcept_webserver/resources/task_messages_rsrc.py index a168e2e7..3f111e80 100644 --- a/flowcept/flowcept_webserver/resources/task_messages_rsrc.py +++ b/src/flowcept/flowcept_webserver/resources/task_messages_rsrc.py @@ -1,3 +1,5 @@ +"""Module for TaskMessages object.""" + from flask import jsonify, request from flask_restful import Resource @@ -5,9 +7,12 @@ class TaskMessages(Resource): + """TaskMessages class.""" + ROUTE = "/task_messages" def get(self): + """Get something.""" args = request.args task_id = args.get("task_id", None) filter = {} diff --git a/src/flowcept/flowceptor/__init__.py b/src/flowcept/flowceptor/__init__.py new file mode 100644 index 00000000..4ad47f22 --- /dev/null +++ b/src/flowcept/flowceptor/__init__.py @@ -0,0 +1 @@ +"""Flowceptor subpackage.""" diff --git a/src/flowcept/flowceptor/adapters/__init__.py b/src/flowcept/flowceptor/adapters/__init__.py new file mode 100644 index 00000000..6740d523 --- /dev/null +++ b/src/flowcept/flowceptor/adapters/__init__.py @@ -0,0 +1 @@ +"""Adapters subpackage.""" diff --git a/flowcept/flowceptor/adapters/base_interceptor.py b/src/flowcept/flowceptor/adapters/base_interceptor.py similarity index 79% rename from flowcept/flowceptor/adapters/base_interceptor.py rename to src/flowcept/flowceptor/adapters/base_interceptor.py index ee097fa4..8d9ab517 100644 --- a/flowcept/flowceptor/adapters/base_interceptor.py +++ b/src/flowcept/flowceptor/adapters/base_interceptor.py @@ -1,4 +1,6 @@ -from abc import ABCMeta, abstractmethod +"""Base module.""" + +from abc import abstractmethod from uuid import uuid4 from flowcept.commons.flowcept_dataclasses.workflow_object import ( @@ -14,8 +16,6 @@ from flowcept.flowceptor.telemetry_capture import TelemetryCapture -from flowcept.version import __version__ - # TODO :base-interceptor-refactor: :ml-refactor: :code-reorg: :usability: # Consider creating a new concept for instrumentation-based 'interception'. @@ -25,11 +25,11 @@ # in the code. https://github.com/ORNL/flowcept/issues/109 # class BaseInterceptor(object, metaclass=ABCMeta): class BaseInterceptor(object): + """Base interceptor class.""" + def __init__(self, plugin_key=None, kind=None): self.logger = FlowceptLogger() - if ( - plugin_key is not None - ): # TODO :base-interceptor-refactor: :code-reorg: :usability: + if plugin_key is not None: # TODO :base-interceptor-refactor: :code-reorg: :usability: self.settings = get_settings(plugin_key) else: self.settings = None @@ -42,45 +42,39 @@ def __init__(self, plugin_key=None, kind=None): self.kind = kind def prepare_task_msg(self, *args, **kwargs) -> TaskObject: + """Prepare a task.""" raise NotImplementedError() def start(self, bundle_exec_id) -> "BaseInterceptor": - """ - Starts an interceptor - :return: - """ + """Start an interceptor.""" self._bundle_exec_id = bundle_exec_id - self._mq_dao.init_buffer( - self._interceptor_instance_id, bundle_exec_id - ) + self._mq_dao.init_buffer(self._interceptor_instance_id, bundle_exec_id) return self def stop(self) -> bool: - """ - Gracefully stops an interceptor - :return: - """ + """Stop an interceptor.""" self._mq_dao.stop(self._interceptor_instance_id, self._bundle_exec_id) def observe(self, *args, **kwargs): - """ - This method implements data observability over a data channel - (e.g., a file, a DBMS, an MQ) - :return: + """Oberve data. + + This method implements data observability over a data channel (e.g., a + file, a DBMS, an MQ) """ raise NotImplementedError() @abstractmethod def callback(self, *args, **kwargs): - """ + """Implement a callback. + Method that implements the logic that decides what do to when a change - (e.g., task state change) is identified. - If it's an interesting change, it calls self.intercept; otherwise, - let it go.... + (e.g., task state change) is identified. If it's an interesting + change, it calls self.intercept; otherwise, let it go.... """ raise NotImplementedError() def send_workflow_message(self, workflow_obj: WorkflowObject): + """Send workflow.""" wf_id = workflow_obj.workflow_id or str(uuid4()) workflow_obj.workflow_id = wf_id if wf_id in self._saved_workflows: @@ -88,24 +82,22 @@ def send_workflow_message(self, workflow_obj: WorkflowObject): self._saved_workflows.add(wf_id) if self._mq_dao.buffer is None: # TODO :base-interceptor-refactor: :code-reorg: :usability: - raise Exception( - f"This interceptor {id(self)} has never been started!" - ) + raise Exception(f"This interceptor {id(self)} has never been started!") workflow_obj.interceptor_ids = [self._interceptor_instance_id] machine_info = self.telemetry_capture.capture_machine_info() if machine_info is not None: if workflow_obj.machine_info is None: workflow_obj.machine_info = dict() - # TODO :refactor-base-interceptor: we might want to register machine info even when there's no observer - workflow_obj.machine_info[ - self._interceptor_instance_id - ] = machine_info + # TODO :refactor-base-interceptor: we might want to register + # machine info even when there's no observer + workflow_obj.machine_info[self._interceptor_instance_id] = machine_info if ENRICH_MESSAGES: workflow_obj.enrich(self.settings.key if self.settings else None) self.intercept(workflow_obj.to_dict()) return wf_id def intercept(self, obj_msg): + """Intercept it.""" self._mq_dao.buffer.append(obj_msg) # def intercept_appends_only(self, obj_msg): diff --git a/src/flowcept/flowceptor/adapters/dask/__init__.py b/src/flowcept/flowceptor/adapters/dask/__init__.py new file mode 100644 index 00000000..2950584e --- /dev/null +++ b/src/flowcept/flowceptor/adapters/dask/__init__.py @@ -0,0 +1 @@ +"""Dask subpackage.""" diff --git a/flowcept/flowceptor/adapters/dask/dask_dataclasses.py b/src/flowcept/flowceptor/adapters/dask/dask_dataclasses.py similarity index 84% rename from flowcept/flowceptor/adapters/dask/dask_dataclasses.py rename to src/flowcept/flowceptor/adapters/dask/dask_dataclasses.py index 319e3ecb..fe8181ad 100644 --- a/flowcept/flowceptor/adapters/dask/dask_dataclasses.py +++ b/src/flowcept/flowceptor/adapters/dask/dask_dataclasses.py @@ -1,3 +1,5 @@ +"""Dataclasses module.""" + from dataclasses import dataclass from flowcept.commons.flowcept_dataclasses.base_settings_dataclasses import ( @@ -7,6 +9,8 @@ @dataclass class DaskSettings(BaseSettings): + """Dask settings.""" + redis_port: int redis_host: str worker_should_get_input: bool @@ -17,5 +21,6 @@ class DaskSettings(BaseSettings): kind = "dask" def __post_init__(self): + """Set attributes after init.""" self.observer_type = "outsourced" self.observer_subtype = None diff --git a/flowcept/flowceptor/adapters/dask/dask_interceptor.py b/src/flowcept/flowceptor/adapters/dask/dask_interceptor.py similarity index 89% rename from flowcept/flowceptor/adapters/dask/dask_interceptor.py rename to src/flowcept/flowceptor/adapters/dask/dask_interceptor.py index a096148d..6b3ba13e 100644 --- a/flowcept/flowceptor/adapters/dask/dask_interceptor.py +++ b/src/flowcept/flowceptor/adapters/dask/dask_interceptor.py @@ -1,3 +1,5 @@ +"""Interceptor module.""" + import inspect from flowcept import WorkflowObject @@ -18,6 +20,7 @@ def get_run_spec_data(task_msg: TaskObject, run_spec): + """Get the run specs.""" # def _get_arg(arg_name): # if type(run_spec) == dict: # return run_spec.get(arg_name, None) @@ -90,6 +93,7 @@ def get_run_spec_data(task_msg: TaskObject, run_spec): def get_task_deps(task_state, task_msg: TaskObject): + """Get the task dependencies.""" if len(task_state.dependencies): task_msg.dependencies = [t.key for t in task_state.dependencies] if len(task_state.dependents): @@ -97,6 +101,7 @@ def get_task_deps(task_state, task_msg: TaskObject): def get_times_from_task_state(task_msg, ts): + """Get the times.""" for times in ts.startstops: if times["action"] == "compute": task_msg.started_at = times["start"] @@ -104,12 +109,15 @@ def get_times_from_task_state(task_msg, ts): class DaskSchedulerInterceptor(BaseInterceptor): + """Dask scheduler.""" + def __init__(self, scheduler, plugin_key="dask"): self._scheduler = scheduler super().__init__(plugin_key) super().start(bundle_exec_id=self._scheduler.address) def callback(self, task_id, start, finish, *args, **kwargs): + """Implement the callback.""" try: if task_id in self._scheduler.tasks: ts = self._scheduler.tasks[task_id] @@ -133,9 +141,7 @@ def callback(self, task_id, start, finish, *args, **kwargs): if REGISTER_WORKFLOW: if hasattr(self._scheduler, "current_workflow"): - wf_obj: WorkflowObject = ( - self._scheduler.current_workflow - ) + wf_obj: WorkflowObject = self._scheduler.current_workflow task_msg.workflow_id = wf_obj.workflow_id self.send_workflow_message(wf_obj) else: @@ -150,19 +156,24 @@ def callback(self, task_id, start, finish, *args, **kwargs): class DaskWorkerInterceptor(BaseInterceptor): + """Dask worker.""" + def __init__(self, plugin_key="dask"): self._plugin_key = plugin_key self._worker = None # super().__init__ goes to setup_worker. def setup_worker(self, worker): - """ + """Set the worker. + Dask Worker's constructor happens actually in this setup method. That's why we call the super() constructor here. """ self._worker = worker super().__init__(self._plugin_key) - self._generated_workflow_id = True # TODO: :refactor: This is just to avoid the auto-generation of workflow id, which doesnt make sense in Dask case.. + # TODO: :refactor: This is just to avoid the auto-generation of + # workflow id, which doesnt make sense in Dask case.. + self._generated_workflow_id = True super().start(bundle_exec_id=self._worker.scheduler.address) # Note that both scheduler and worker get the exact same input. # Worker does not resolve intermediate inputs, just like the scheduler. @@ -170,6 +181,7 @@ def setup_worker(self, worker): # workers. def callback(self, task_id, start, finish, *args, **kwargs): + """Implement the callback.""" try: if task_id not in self._worker.state.tasks: return @@ -181,9 +193,7 @@ def callback(self, task_id, start, finish, *args, **kwargs): if ts.state == "executing": if TELEMETRY_CAPTURE is not None: - task_msg.telemetry_at_start = ( - self.telemetry_capture.capture() - ) + task_msg.telemetry_at_start = self.telemetry_capture.capture() task_msg.status = Status.RUNNING task_msg.address = self._worker.worker_address if self.settings.worker_create_timestamps: @@ -195,9 +205,7 @@ def callback(self, task_id, start, finish, *args, **kwargs): else: get_times_from_task_state(task_msg, ts) if TELEMETRY_CAPTURE is not None: - task_msg.telemetry_at_end = ( - self.telemetry_capture.capture() - ) + task_msg.telemetry_at_end = self.telemetry_capture.capture() elif ts.state == "error": task_msg.status = Status.ERROR @@ -210,9 +218,7 @@ def callback(self, task_id, start, finish, *args, **kwargs): "traceback": ts.traceback_text, } if TELEMETRY_CAPTURE is not None: - task_msg.telemetry_at_end = ( - self.telemetry_capture.capture() - ) + task_msg.telemetry_at_end = self.telemetry_capture.capture() else: return @@ -224,16 +230,12 @@ def callback(self, task_id, start, finish, *args, **kwargs): if task_id in self._worker.data.memory: task_msg.generated = self._worker.data.memory[task_id] if REPLACE_NON_JSON_SERIALIZABLE: - task_msg.generated = replace_non_serializable( - task_msg.generated - ) + task_msg.generated = replace_non_serializable(task_msg.generated) if ENRICH_MESSAGES: task_msg.enrich(self._plugin_key) self.intercept(task_msg.to_dict()) except Exception as e: - self.logger.error( - f"Error with dask worker: {self._worker.worker_address}" - ) + self.logger.error(f"Error with dask worker: {self._worker.worker_address}") self.logger.exception(e) diff --git a/flowcept/flowceptor/adapters/dask/dask_plugins.py b/src/flowcept/flowceptor/adapters/dask/dask_plugins.py similarity index 89% rename from flowcept/flowceptor/adapters/dask/dask_plugins.py rename to src/flowcept/flowceptor/adapters/dask/dask_plugins.py index 039f1a9c..de7fcd96 100644 --- a/flowcept/flowceptor/adapters/dask/dask_plugins.py +++ b/src/flowcept/flowceptor/adapters/dask/dask_plugins.py @@ -1,3 +1,5 @@ +"""Dask plugin module.""" + from uuid import uuid4 from dask.distributed import WorkerPlugin, SchedulerPlugin @@ -40,6 +42,7 @@ def register_dask_workflow( custom_metadata: dict = None, used: dict = None, ): + """Register the workflow.""" workflow_id = workflow_id or str(uuid4()) dask_client.run_on_scheduler( _set_workflow_on_scheduler, @@ -53,28 +56,37 @@ def register_dask_workflow( class FlowceptDaskSchedulerAdapter(SchedulerPlugin): + """Dask schedule adapter.""" + def __init__(self, scheduler): self.address = scheduler.address self.interceptor = DaskSchedulerInterceptor(scheduler) def transition(self, key, start, finish, *args, **kwargs): + """Get the transition.""" self.interceptor.callback(key, start, finish, args, kwargs) def close(self): + """Close it.""" self.interceptor.logger.debug("Going to close scheduler!") self.interceptor.stop() class FlowceptDaskWorkerAdapter(WorkerPlugin): + """Dask worker adapter.""" + def __init__(self): self.interceptor = DaskWorkerInterceptor() def setup(self, worker): + """Set it up.""" self.interceptor.setup_worker(worker) def transition(self, key, start, finish, *args, **kwargs): + """Run the transition.""" self.interceptor.callback(key, start, finish, args, kwargs) def teardown(self, worker): + """Tear it down.""" self.interceptor.logger.debug("Going to close worker!") self.interceptor.stop() diff --git a/flowcept/flowceptor/adapters/interceptor_state_manager.py b/src/flowcept/flowceptor/adapters/interceptor_state_manager.py similarity index 85% rename from flowcept/flowceptor/adapters/interceptor_state_manager.py rename to src/flowcept/flowceptor/adapters/interceptor_state_manager.py index 5dab5f76..64e5f040 100644 --- a/flowcept/flowceptor/adapters/interceptor_state_manager.py +++ b/src/flowcept/flowceptor/adapters/interceptor_state_manager.py @@ -1,3 +1,5 @@ +"""Interceptor module.""" + from redis import Redis from flowcept.commons.flowcept_dataclasses.base_settings_dataclasses import ( @@ -6,6 +8,8 @@ class InterceptorStateManager(object): + """Interceptor class.""" + def __init__(self, settings: BaseSettings): self._set_name = settings.key @@ -22,13 +26,17 @@ def __init__(self, settings: BaseSettings): ) def reset(self): + """Reset it.""" self._db.delete(self._set_name) def count(self): + """Count it.""" return self._db.scard(self._set_name) def add_element_id(self, element_id: str): + """Add an element.""" self._db.sadd(self._set_name, element_id) def has_element_id(self, element_id) -> bool: + """Check for element.""" return self._db.sismember(self._set_name, element_id) diff --git a/src/flowcept/flowceptor/adapters/mlflow/__init__.py b/src/flowcept/flowceptor/adapters/mlflow/__init__.py new file mode 100755 index 00000000..e9fe7ec8 --- /dev/null +++ b/src/flowcept/flowceptor/adapters/mlflow/__init__.py @@ -0,0 +1 @@ +"""MLFlow subpackage.""" diff --git a/flowcept/flowceptor/adapters/mlflow/interception_event_handler.py b/src/flowcept/flowceptor/adapters/mlflow/interception_event_handler.py similarity index 71% rename from flowcept/flowceptor/adapters/mlflow/interception_event_handler.py rename to src/flowcept/flowceptor/adapters/mlflow/interception_event_handler.py index 89582917..bf859600 100644 --- a/flowcept/flowceptor/adapters/mlflow/interception_event_handler.py +++ b/src/flowcept/flowceptor/adapters/mlflow/interception_event_handler.py @@ -1,11 +1,16 @@ -from watchdog.events import LoggingEventHandler, FileSystemEventHandler +"""Event handler module.""" + +from watchdog.events import FileSystemEventHandler class InterceptionEventHandler(FileSystemEventHandler): + """Event handler class.""" + def __init__(self, interceptor_instance, callback_function): super().__init__() self.callback_function = callback_function self.interceptor_instance = interceptor_instance def on_modified(self, event): + """Get on modified.""" self.callback_function(self.interceptor_instance) diff --git a/flowcept/flowceptor/adapters/mlflow/mlflow_dao.py b/src/flowcept/flowceptor/adapters/mlflow/mlflow_dao.py similarity index 94% rename from flowcept/flowceptor/adapters/mlflow/mlflow_dao.py rename to src/flowcept/flowceptor/adapters/mlflow/mlflow_dao.py index a4359638..1ebda157 100644 --- a/flowcept/flowceptor/adapters/mlflow/mlflow_dao.py +++ b/src/flowcept/flowceptor/adapters/mlflow/mlflow_dao.py @@ -1,3 +1,5 @@ +"""DAO module.""" + from typing import List from sqlalchemy.engine import Row, create_engine from sqlalchemy import text @@ -13,6 +15,8 @@ @singleton class MLFlowDAO: + """DAO class.""" + _LIMIT = 10 # TODO: This should not at all be hard coded. # This value needs to be greater than the amount of @@ -32,6 +36,7 @@ def _get_db_engine(sqlite_file): raise Exception(f"Could not create DB engine with uri: {db_uri}") def get_finished_run_uuids(self) -> List[Row]: + """Get the finished run.""" sql = text( dedent( f""" @@ -56,6 +61,7 @@ def get_finished_run_uuids(self) -> List[Row]: conn.close() def get_run_data(self, run_uuid: str) -> RunData: + """Get the run data.""" # TODO: consider outer joins to get the run data even if there's # no metric or param or if the task hasn't finished yet sql = text( @@ -95,14 +101,14 @@ def get_run_data(self, run_uuid: str) -> RunData: metric_key = tuple_dict.get("metric_key", None) metric_value = tuple_dict.get("metric_value", None) if metric_key and metric_value: - if not (metric_key in run_data_dict["generated"]): + if metric_key not in run_data_dict["generated"]: run_data_dict["generated"][metric_key] = None run_data_dict["generated"][metric_key] = metric_value param_key = tuple_dict.get("parameter_key", None) param_value = tuple_dict.get("parameter_value", None) if param_key and param_value: - if not (param_key in run_data_dict["used"]): + if param_key not in run_data_dict["used"]: run_data_dict["used"][param_key] = None run_data_dict["used"][param_key] = param_value diff --git a/flowcept/flowceptor/adapters/mlflow/mlflow_dataclasses.py b/src/flowcept/flowceptor/adapters/mlflow/mlflow_dataclasses.py similarity index 83% rename from flowcept/flowceptor/adapters/mlflow/mlflow_dataclasses.py rename to src/flowcept/flowceptor/adapters/mlflow/mlflow_dataclasses.py index 4555ff98..22392e95 100644 --- a/flowcept/flowceptor/adapters/mlflow/mlflow_dataclasses.py +++ b/src/flowcept/flowceptor/adapters/mlflow/mlflow_dataclasses.py @@ -1,3 +1,5 @@ +"""Dataclasses module.""" + from dataclasses import dataclass from typing import List @@ -8,6 +10,8 @@ @dataclass class MLFlowSettings(BaseSettings): + """MLFlow settings.""" + file_path: str log_params: List[str] log_metrics: List[str] @@ -17,12 +21,15 @@ class MLFlowSettings(BaseSettings): kind = "mlflow" def __post_init__(self): + """Set attributes after init.""" self.observer_type = "file" self.observer_subtype = "sqlite" @dataclass class RunData: + """Run data class.""" + task_id: str start_time: int end_time: int diff --git a/flowcept/flowceptor/adapters/mlflow/mlflow_interceptor.py b/src/flowcept/flowceptor/adapters/mlflow/mlflow_interceptor.py similarity index 82% rename from flowcept/flowceptor/adapters/mlflow/mlflow_interceptor.py rename to src/flowcept/flowceptor/adapters/mlflow/mlflow_interceptor.py index 741d30aa..cf780abc 100644 --- a/flowcept/flowceptor/adapters/mlflow/mlflow_interceptor.py +++ b/src/flowcept/flowceptor/adapters/mlflow/mlflow_interceptor.py @@ -1,8 +1,9 @@ +"""Interceptor module.""" + import os import time from threading import Thread -from watchdog.observers import Observer from watchdog.observers.polling import PollingObserver from flowcept.commons.flowcept_dataclasses.task_object import TaskObject @@ -22,6 +23,8 @@ class MLFlowInterceptor(BaseInterceptor): + """Interceptor class.""" + def __init__(self, plugin_key="mlflow"): super().__init__(plugin_key) self._observer: PollingObserver = None @@ -30,6 +33,7 @@ def __init__(self, plugin_key="mlflow"): self.dao = MLFlowDAO(self.settings) def prepare_task_msg(self, mlflow_run_data: RunData) -> TaskObject: + """Prepare a task.""" task_msg = TaskObject() task_msg.task_id = mlflow_run_data.task_id task_msg.utc_timestamp = get_utc_now() @@ -39,11 +43,12 @@ def prepare_task_msg(self, mlflow_run_data: RunData) -> TaskObject: return task_msg def callback(self): - """ + """Implement a callback. + This function is called whenever a change is identified in the data. - It decides what to do in the event of a change. - If it's an interesting change, it calls self.intercept; otherwise, - let it go.... + It decides what to do in the event of a change. If it's an + interesting change, it calls self.intercept; otherwise, let it + go.... """ runs = self.dao.get_finished_run_uuids() if not runs: @@ -51,9 +56,7 @@ def callback(self): for run_uuid_tuple in runs: run_uuid = run_uuid_tuple[0] if not self.state_manager.has_element_id(run_uuid): - self.logger.debug( - f"We need to intercept this Run: {run_uuid}" - ) + self.logger.debug(f"We need to intercept this Run: {run_uuid}") run_data = self.dao.get_run_data(run_uuid) self.state_manager.add_element_id(run_uuid) if not run_data: @@ -62,12 +65,14 @@ def callback(self): self.intercept(task_msg) def start(self, bundle_exec_id) -> "MLFlowInterceptor": + """Start it.""" super().start(bundle_exec_id) self._observer_thread = Thread(target=self.observe, daemon=True) self._observer_thread.start() return self def stop(self) -> bool: + """Stop it.""" super().stop() self.logger.debug("Interceptor stopping...") self._observer.stop() @@ -76,10 +81,9 @@ def stop(self) -> bool: return True def observe(self): + """Observe it.""" self.logger.debug("Observing") - event_handler = InterceptionEventHandler( - self, self.__class__.callback - ) + event_handler = InterceptionEventHandler(self, self.__class__.callback) while not os.path.isfile(self.settings.file_path): self.logger.warning( f"I can't watch the file {self.settings.file_path}," @@ -89,9 +93,7 @@ def observe(self): ) time.sleep(self.settings.watch_interval_sec) - self._observer = Observer() - self._observer.schedule( - event_handler, self.settings.file_path, recursive=True - ) + self._observer = PollingObserver() + self._observer.schedule(event_handler, self.settings.file_path, recursive=True) self._observer.start() self.logger.info(f"Watching {self.settings.file_path}") diff --git a/src/flowcept/flowceptor/adapters/tensorboard/__init__.py b/src/flowcept/flowceptor/adapters/tensorboard/__init__.py new file mode 100755 index 00000000..503104ff --- /dev/null +++ b/src/flowcept/flowceptor/adapters/tensorboard/__init__.py @@ -0,0 +1 @@ +"""Tensorboard subpackage.""" diff --git a/flowcept/flowceptor/adapters/tensorboard/tensorboard_dataclasses.py b/src/flowcept/flowceptor/adapters/tensorboard/tensorboard_dataclasses.py similarity index 80% rename from flowcept/flowceptor/adapters/tensorboard/tensorboard_dataclasses.py rename to src/flowcept/flowceptor/adapters/tensorboard/tensorboard_dataclasses.py index 584e96a5..6275289f 100644 --- a/flowcept/flowceptor/adapters/tensorboard/tensorboard_dataclasses.py +++ b/src/flowcept/flowceptor/adapters/tensorboard/tensorboard_dataclasses.py @@ -1,3 +1,5 @@ +"""Tensorboard dataclasses module.""" + from dataclasses import dataclass from typing import List @@ -8,6 +10,8 @@ @dataclass class TensorboardSettings(BaseSettings): + """Tensorboard settings.""" + file_path: str log_tags: List[str] log_metrics: List[str] @@ -17,5 +21,6 @@ class TensorboardSettings(BaseSettings): kind = "tensorboard" def __post_init__(self): + """Set attributes after init.""" self.observer_type = "file" self.observer_subtype = "binary" diff --git a/flowcept/flowceptor/adapters/tensorboard/tensorboard_interceptor.py b/src/flowcept/flowceptor/adapters/tensorboard/tensorboard_interceptor.py similarity index 85% rename from flowcept/flowceptor/adapters/tensorboard/tensorboard_interceptor.py rename to src/flowcept/flowceptor/adapters/tensorboard/tensorboard_interceptor.py index abf53edc..f55c9e9a 100644 --- a/flowcept/flowceptor/adapters/tensorboard/tensorboard_interceptor.py +++ b/src/flowcept/flowceptor/adapters/tensorboard/tensorboard_interceptor.py @@ -1,7 +1,8 @@ +"""Tensorboard interceptor module.""" + import os import time -from watchdog.observers import Observer from tbparse import SummaryReader from watchdog.observers.polling import PollingObserver @@ -22,6 +23,8 @@ class TensorboardInterceptor(BaseInterceptor): + """Tensorboard interceptor.""" + def __init__(self, plugin_key="tensorboard"): super().__init__(plugin_key) self._observer: PollingObserver = None @@ -30,11 +33,12 @@ def __init__(self, plugin_key="tensorboard"): self.log_metrics = set(self.settings.log_metrics) def callback(self): - """ + """Implement the callback. + This function is called whenever a change is identified in the data. - It decides what to do in the event of a change. - If it's an interesting change, it calls self.intercept; otherwise, - let it go.... + It decides what to do in the event of a change. If it's an + interesting change, it calls self.intercept; otherwise, let it + go.... """ self.logger.debug("New tensorboard directory event!") # TODO: now we're waiting for the file to be completely written. @@ -45,9 +49,7 @@ def callback(self): for child_event_file in reader.children: child_event = reader.children[child_event_file] if self.state_manager.has_element_id(child_event.log_path): - self.logger.debug( - f"Already extracted metric from {child_event_file}." - ) + self.logger.debug(f"Already extracted metric from {child_event_file}.") continue event_tags = child_event.get_tags() @@ -89,11 +91,13 @@ def callback(self): self.state_manager.add_element_id(child_event.log_path) def start(self, bundle_exec_id) -> "TensorboardInterceptor": + """Start it.""" super().start(bundle_exec_id) self.observe() return self def stop(self) -> bool: + """Stop it.""" self.logger.debug("Interceptor stopping...") super().stop() self._observer.stop() @@ -101,13 +105,11 @@ def stop(self) -> bool: return True def observe(self): - event_handler = InterceptionEventHandler( - self, self.__class__.callback - ) + """Observe it.""" + event_handler = InterceptionEventHandler(self, self.__class__.callback) while not os.path.isdir(self.settings.file_path): self.logger.debug( - f"I can't watch the file {self.settings.file_path}," - f" as it does not exist." + f"I can't watch the file {self.settings.file_path}," f" as it does not exist." ) self.logger.debug( f"\tI will sleep for {self.settings.watch_interval_sec} sec." @@ -115,9 +117,7 @@ def observe(self): ) time.sleep(self.settings.watch_interval_sec) - self._observer = Observer() - self._observer.schedule( - event_handler, self.settings.file_path, recursive=True - ) + self._observer = PollingObserver() + self._observer.schedule(event_handler, self.settings.file_path, recursive=True) self._observer.start() self.logger.debug(f"Watching {self.settings.file_path}") diff --git a/src/flowcept/flowceptor/adapters/zambeze/__init__.py b/src/flowcept/flowceptor/adapters/zambeze/__init__.py new file mode 100755 index 00000000..61fde820 --- /dev/null +++ b/src/flowcept/flowceptor/adapters/zambeze/__init__.py @@ -0,0 +1 @@ +"""Zambeze subpackage.""" diff --git a/flowcept/flowceptor/adapters/zambeze/zambeze_dataclasses.py b/src/flowcept/flowceptor/adapters/zambeze/zambeze_dataclasses.py similarity index 84% rename from flowcept/flowceptor/adapters/zambeze/zambeze_dataclasses.py rename to src/flowcept/flowceptor/adapters/zambeze/zambeze_dataclasses.py index 827a4ed2..49b7fb94 100644 --- a/flowcept/flowceptor/adapters/zambeze/zambeze_dataclasses.py +++ b/src/flowcept/flowceptor/adapters/zambeze/zambeze_dataclasses.py @@ -1,3 +1,5 @@ +"""Zambeze dataclass module.""" + from dataclasses import dataclass from typing import List, Dict @@ -9,6 +11,8 @@ @dataclass class ZambezeMessage: + """Zambeze message.""" + name: str activity_id: str campaign_id: str @@ -23,6 +27,8 @@ class ZambezeMessage: @dataclass class ZambezeSettings(BaseSettings): + """Zambeze settings.""" + host: str port: int queue_names: List[str] @@ -30,5 +36,6 @@ class ZambezeSettings(BaseSettings): kind = "zambeze" def __post_init__(self): + """Set attributes after init.""" self.observer_type = "message_broker" self.observer_subtype = "rabbit_mq" diff --git a/flowcept/flowceptor/adapters/zambeze/zambeze_interceptor.py b/src/flowcept/flowceptor/adapters/zambeze/zambeze_interceptor.py similarity index 81% rename from flowcept/flowceptor/adapters/zambeze/zambeze_interceptor.py rename to src/flowcept/flowceptor/adapters/zambeze/zambeze_interceptor.py index 27b77d9d..05efc3ca 100644 --- a/flowcept/flowceptor/adapters/zambeze/zambeze_interceptor.py +++ b/src/flowcept/flowceptor/adapters/zambeze/zambeze_interceptor.py @@ -1,3 +1,5 @@ +"""Zambeze interceptor module.""" + from threading import Thread from time import sleep import pika @@ -12,6 +14,8 @@ class ZambezeInterceptor(BaseInterceptor): + """Zambeze interceptor.""" + def __init__(self, plugin_key="zambeze"): super().__init__(plugin_key) self._consumer_tag = None @@ -19,18 +23,15 @@ def __init__(self, plugin_key="zambeze"): self._observer_thread: Thread = None def prepare_task_msg(self, zambeze_msg: Dict) -> TaskObject: + """Prepare a task.""" task_msg = TaskObject() task_msg.utc_timestamp = get_utc_now() task_msg.campaign_id = zambeze_msg.get("campaign_id", None) task_msg.task_id = zambeze_msg.get("activity_id", None) task_msg.activity_id = zambeze_msg.get("name", None) task_msg.dependencies = zambeze_msg.get("depends_on", None) - task_msg.custom_metadata = { - "command": zambeze_msg.get("command", None) - } - task_msg.status = get_status_from_str( - zambeze_msg.get("activity_status", None) - ) + task_msg.custom_metadata = {"command": zambeze_msg.get("command", None)} + task_msg.status = get_status_from_str(zambeze_msg.get("activity_status", None)) task_msg.used = { "args": zambeze_msg.get("arguments", None), "kwargs": zambeze_msg.get("kwargs", None), @@ -39,20 +40,21 @@ def prepare_task_msg(self, zambeze_msg: Dict) -> TaskObject: return task_msg def start(self, bundle_exec_id) -> "ZambezeInterceptor": + """Start it.""" super().start(bundle_exec_id) self._observer_thread = Thread(target=self.observe) self._observer_thread.start() return self def stop(self) -> bool: + """Stop it.""" self.logger.debug("Interceptor stopping...") super().stop() try: self._channel.stop_consuming() except Exception as e: self.logger.warning( - f"This exception is expected to occur after " - f"channel.basic_cancel: {e}" + f"This exception is expected to occur after " f"channel.basic_cancel: {e}" ) sleep(2) self._observer_thread.join() @@ -60,10 +62,9 @@ def stop(self) -> bool: return True def observe(self): + """Observe it.""" connection = pika.BlockingConnection( - pika.ConnectionParameters( - host=self.settings.host, port=self.settings.port - ) + pika.ConnectionParameters(host=self.settings.host, port=self.settings.port) ) self._channel = connection.channel() for queue in self.settings.queue_names: @@ -76,9 +77,7 @@ def observe(self): on_message_callback=self.callback, auto_ack=True, ) - self.logger.debug( - f"Waiting for Zambeze messages on queue {queue}" - ) + self.logger.debug(f"Waiting for Zambeze messages on queue {queue}") try: self._channel.start_consuming() @@ -90,13 +89,13 @@ def observe(self): def _intercept(self, body_obj): self.logger.debug( - f"I'm a Zambeze interceptor and I need to intercept this:" - f"\n\t{json.dumps(body_obj)}" + f"Zambeze interceptor needs to intercept this:" f"\n\t{json.dumps(body_obj)}" ) task_msg = self.prepare_task_msg(body_obj) self.intercept(task_msg.to_dict()) def callback(self, ch, method, properties, body): + """Implement the callback.""" body_obj = json.loads(body) if self.settings.key_values_to_filter is not None: for key_value in self.settings.key_values_to_filter: diff --git a/src/flowcept/flowceptor/consumers/__init__.py b/src/flowcept/flowceptor/consumers/__init__.py new file mode 100644 index 00000000..e05187f2 --- /dev/null +++ b/src/flowcept/flowceptor/consumers/__init__.py @@ -0,0 +1 @@ +"""Consumers subpackage.""" diff --git a/flowcept/flowceptor/consumers/consumer_utils.py b/src/flowcept/flowceptor/consumers/consumer_utils.py similarity index 66% rename from flowcept/flowceptor/consumers/consumer_utils.py rename to src/flowcept/flowceptor/consumers/consumer_utils.py index d3a844d0..f6664160 100644 --- a/flowcept/flowceptor/consumers/consumer_utils.py +++ b/src/flowcept/flowceptor/consumers/consumer_utils.py @@ -1,24 +1,23 @@ -from typing import List, Dict +"""Consumer utilities module.""" +from typing import List, Dict from flowcept.commons.flowcept_dataclasses.task_object import TaskObject def curate_task_msg(task_msg_dict: dict): + """Curate a task message.""" # Converting any arg to kwarg in the form {"arg1": val1, "arg2: val2} for field in TaskObject.get_dict_field_names(): if field not in task_msg_dict: continue field_val = task_msg_dict[field] - if type(field_val) == dict and not field_val: + if type(field_val) is dict and not field_val: task_msg_dict.pop(field) # removing empty fields continue - if type(field_val) == dict: + if type(field_val) is dict: original_field_val = field_val.copy() for k in original_field_val: - if ( - type(original_field_val[k]) == dict - and not original_field_val[k] - ): + if type(original_field_val[k]) is dict and not original_field_val[k]: field_val.pop(k) # removing inner empty fields task_msg_dict[field] = field_val else: @@ -47,27 +46,27 @@ def remove_empty_fields_from_dict(obj: dict): def curate_dict_task_messages( doc_list: List[Dict], indexing_key: str, utc_time_at_insertion: float = 0 ): - """ - This function removes duplicates based on the - indexing_key (e.g., task_id) locally before sending - to MongoDB. - # It also avoids tasks changing states once they go into finished state. - This is needed because we can't guarantee MQ orders. - # Finished states have higher priority in status changes, as we don't expect a - # status change once a task goes into finished state. - It also resolves updates (instead of replacement) of - inner nested fields in a JSON object. + """Remove duplicates. + + This function removes duplicates based on the indexing_key (e.g., task_id) + locally before sending to MongoDB. + + It also avoids tasks changing states once they go into finished state. + This is needed because we can't guarantee MQ orders. + + Finished states have higher priority in status changes, as we don't expect + a status change once a task goes into finished state. + + It also resolves updates (instead of replacement) of inner nested fields + in a JSON object. + :param doc_list: :param indexing_key: #the key we want to index. E.g., task_id in tasks collection :return: """ indexed_buffer = {} for doc in doc_list: - if ( - (len(doc) == 1) - and (indexing_key in doc) - and (doc[indexing_key] in indexed_buffer) - ): + if (len(doc) == 1) and (indexing_key in doc) and (doc[indexing_key] in indexed_buffer): # This task_msg does not add any metadata continue @@ -87,25 +86,11 @@ def curate_dict_task_messages( indexed_buffer[indexing_key_value] = doc continue - # if ( - # "finished" in indexed_buffer[indexing_key_value] - # and "status" in doc - # ): - # doc.pop("status") - # - # if "status" in doc: - # for finished_status in Status.get_finished_statuses(): - # if finished_status == doc["status"]: - # indexed_buffer[indexing_key_value]["finished"] = True - # break - for field in TaskObject.get_dict_field_names(): if field in doc: if doc[field] is not None and len(doc[field]): if field in indexed_buffer[indexing_key_value]: - indexed_buffer[indexing_key_value][field].update( - doc[field] - ) + indexed_buffer[indexing_key_value][field].update(doc[field]) else: indexed_buffer[indexing_key_value][field] = doc[field] doc.pop(field) diff --git a/flowcept/flowceptor/consumers/document_inserter.py b/src/flowcept/flowceptor/consumers/document_inserter.py similarity index 83% rename from flowcept/flowceptor/consumers/document_inserter.py rename to src/flowcept/flowceptor/consumers/document_inserter.py index c0fb0a64..ab4b567e 100644 --- a/flowcept/flowceptor/consumers/document_inserter.py +++ b/src/flowcept/flowceptor/consumers/document_inserter.py @@ -1,3 +1,5 @@ +"""Document module.""" + from datetime import datetime from time import time, sleep from threading import Thread, Event, Lock @@ -31,10 +33,13 @@ class DocumentInserter: + """Document class.""" + DECODER = GenericJSONDecoder if JSON_SERIALIZER == "complex" else None + # TODO: :code-reorg: Should this be in utils? @staticmethod - def remove_empty_fields(d): # TODO: :code-reorg: Should this be in utils? + def remove_empty_fields(d): """Remove empty fields from a dictionary recursively.""" for key, value in list(d.items()): if isinstance(value, dict): @@ -95,18 +100,13 @@ def _set_buffer_size(self): @staticmethod def flush_function(buffer, doc_dao, logger=flowcept.commons.logger): + """Flush it.""" logger.info( - f"Current Doc buffer size: {len(buffer)}, " - f"Gonna flush {len(buffer)} msgs to DocDB!" - ) - inserted = doc_dao.insert_and_update_many( - TaskObject.task_id_field(), buffer + f"Current Doc buffer size: {len(buffer)}, " f"Gonna flush {len(buffer)} msgs to DocDB!" ) + inserted = doc_dao.insert_and_update_many(TaskObject.task_id_field(), buffer) if not inserted: - logger.warning( - f"Could not insert the buffer correctly. " - f"Buffer content={buffer}" - ) + logger.warning(f"Could not insert the buffer correctly. " f"Buffer content={buffer}") else: logger.info(f"Flushed {len(buffer)} msgs to DocDB!") @@ -125,14 +125,10 @@ def _handle_task_message(self, message: Dict): for time_field in TaskObject.get_time_field_names(): if time_field in message: has_time_fields = True - message[time_field] = datetime.fromtimestamp( - message[time_field], pytz.utc - ) + message[time_field] = datetime.fromtimestamp(message[time_field], pytz.utc) if not has_time_fields: - message["registered_at"] = datetime.fromtimestamp( - time(), pytz.utc - ) + message["registered_at"] = datetime.fromtimestamp(time(), pytz.utc) if ENRICH_MESSAGES: TaskObject.enrich_task_dict(message) @@ -140,8 +136,7 @@ def _handle_task_message(self, message: Dict): message.pop("type") self.logger.debug( - f"Received following msg in DocInserter:" - f"\n\t[BEGIN_MSG]{message}\n[END_MSG]\t" + f"Received following msg in DocInserter:" f"\n\t[BEGIN_MSG]{message}\n[END_MSG]\t" ) if MONGO_REMOVE_EMPTY_FIELDS: remove_empty_fields_from_dict(message) @@ -156,8 +151,7 @@ def _handle_task_message(self, message: Dict): def _handle_workflow_message(self, message: Dict): message.pop("type") self.logger.debug( - f"Received following msg in DocInserter:" - f"\n\t[BEGIN_MSG]{message}\n[END_MSG]\t" + f"Received following msg in DocInserter:" f"\n\t[BEGIN_MSG]{message}\n[END_MSG]\t" ) if MONGO_REMOVE_EMPTY_FIELDS: remove_empty_fields_from_dict(message) @@ -173,7 +167,7 @@ def _handle_control_message(self, message): exec_bundle_id = message.get("exec_bundle_id", None) interceptor_instance_id = message.get("interceptor_instance_id") self.logger.info( - f"I'm doc inserter id {id(self)}. I ack that I received mq_dao_thread_stopped message " + f"DocInserter id {id(self)}. Received mq_dao_thread_stopped message " f"in DocInserter from the interceptor " f"{'' if exec_bundle_id is None else exec_bundle_id}_{interceptor_instance_id}!" ) @@ -181,9 +175,7 @@ def _handle_control_message(self, message): f"Begin register_time_based_thread_end " f"{'' if exec_bundle_id is None else exec_bundle_id}_{interceptor_instance_id}!" ) - self._mq_dao.register_time_based_thread_end( - interceptor_instance_id, exec_bundle_id - ) + self._mq_dao.register_time_based_thread_end(interceptor_instance_id, exec_bundle_id) self.logger.info( f"Done register_time_based_thread_end " f"{'' if exec_bundle_id is None else exec_bundle_id}_{interceptor_instance_id}!" @@ -194,6 +186,7 @@ def _handle_control_message(self, message): return "stop" def start(self) -> "DocumentInserter": + """Start it.""" self._main_thread = Thread(target=self._start) self._main_thread.start() return self @@ -234,12 +227,11 @@ def _message_handler(self, msg_obj: dict): return True def stop(self, bundle_exec_id=None): + """Stop it.""" if self.check_safe_stops: max_trials = 60 trial = 0 - while not self._mq_dao.all_time_based_threads_ended( - bundle_exec_id - ): + while not self._mq_dao.all_time_based_threads_ended(bundle_exec_id): trial += 1 sleep_time = 3 self.logger.info( @@ -248,17 +240,12 @@ def stop(self, bundle_exec_id=None): ) sleep(sleep_time) if trial >= max_trials: - if ( - len(self._task_dicts_buffer) == 0 - ): # and len(self._mq_dao._buffer) == 0: - self.logger.critical( - f"Doc Inserter {id(self)} gave up on waiting for the signal. It is probably safe to stop by now." - ) + if len(self._task_dicts_buffer) == 0: # and len(self._mq_dao._buffer) == 0: + msg = f"DocInserter {id(self)} gave up waiting for signal. " + self.logger.critical(msg + "Safe to stop now.") break self.logger.info("Sending message to stop document inserter.") self._mq_dao.send_document_inserter_stop() - self.logger.info( - f"Doc Inserter {id(self)} Sent message to stop itself." - ) + self.logger.info(f"Doc Inserter {id(self)} Sent message to stop itself.") self._main_thread.join() self.logger.info("Document Inserter is stopped.") diff --git a/flowcept/flowceptor/telemetry_capture.py b/src/flowcept/flowceptor/telemetry_capture.py similarity index 85% rename from flowcept/flowceptor/telemetry_capture.py rename to src/flowcept/flowceptor/telemetry_capture.py index 6a3271e2..4cc82547 100644 --- a/flowcept/flowceptor/telemetry_capture.py +++ b/src/flowcept/flowceptor/telemetry_capture.py @@ -1,3 +1,5 @@ +"""Telemetry module.""" + import psutil import platform import cpuinfo @@ -16,13 +18,10 @@ if GPU_TYPE == "nvidia": try: - import pynvml from pynvml import ( - nvmlDeviceGetCount, nvmlDeviceGetHandleByIndex, nvmlDeviceGetMemoryInfo, nvmlDeviceGetName, - nvmlInit, nvmlShutdown, nvmlDeviceGetTemperature, nvmlDeviceGetPowerUsage, @@ -36,9 +35,7 @@ try: from amdsmi import ( amdsmi_get_gpu_memory_usage, - amdsmi_get_processor_handles, amdsmi_shut_down, - amdsmi_get_gpu_memory_usage, AmdSmiMemoryType, AmdSmiTemperatureType, amdsmi_get_gpu_activity, @@ -47,20 +44,22 @@ amdsmi_get_temp_metric, AmdSmiTemperatureMetric, amdsmi_get_gpu_metrics_info, - amdsmi_get_processor_handles, - amdsmi_init, ) except Exception as e: print(f"Exception to import AMD libs! {e}") pass -# from amdsmi import amdsmi_init, amdsmi_get_processor_handles - class TelemetryCapture: - _gpu_unsuccessful_queries = ( - dict() - ) # TODO: refactor; I need this to avoid querying GPU stuff that is generating errors. The idea is to try once and if it fails, add this in this dictionary to avoid trying again. The mapping will be {gpu_device_id: {query_type: True or False}}; False if it found that it's unsuccessful. If it's mapping to an empty dict, the whole GPU is bad for capture. + """Telemetry class.""" + + # TODO: refactor; I need this to avoid querying GPU stuff that is + # generating errors. The idea is to try once and if it fails, add this in + # this dictionary to avoid trying again. The mapping will be + # {gpu_device_id:{query_type: True or False}}; False if it found that + # it's unsuccessful. If it's mapping to an empty dict, the whole GPU is + # bad for capture. + _gpu_unsuccessful_queries = dict() def __init__(self, conf=TELEMETRY_CAPTURE): self.logger = FlowceptLogger() @@ -82,9 +81,7 @@ def __init__(self, conf=TELEMETRY_CAPTURE): self._gpu_conf = set(self._gpu_conf) if len(self._gpu_conf): - self.logger.info( - f"These are the visible GPUs by Flowcept Capture: {N_GPUS}" - ) + self.logger.info(f"These are the visible GPUs by Flowcept Capture: {N_GPUS}") # TODO: refactor! This below is bad coding nvidia = N_GPUS.get("nvidia", []) amd = N_GPUS.get("amd", []) @@ -96,11 +93,11 @@ def __init__(self, conf=TELEMETRY_CAPTURE): self._gpu_capture_func = self.__get_gpu_info_amd else: self.logger.exception( - "You are trying to capture telemetry GPU info, but we" - " couldn't detect any GPU, neither NVIDIA nor AMD. Consider disabling GPU capture in the settings file." + "No GPU found. Consider disabling GPU capture in the settings file." ) def capture(self) -> Telemetry: + """Capture it.""" if self.conf is None: return None tel = Telemetry() @@ -129,6 +126,7 @@ def capture(self) -> Telemetry: return tel def capture_machine_info(self): + """Capture info.""" # TODO: add ifs for each type of telem; improve this method overall if self.conf is None or self.conf.get("machine_info", None) is None: return None @@ -212,7 +210,7 @@ def _capture_process_info(self): p.pid = psutil_p.pid try: p.cpu_number = psutil_p.cpu_num() - except: + except Exception: pass p.memory = psutil_p.memory_info()._asdict() p.memory_percent = psutil_p.memory_percent() @@ -224,7 +222,7 @@ def _capture_process_info(self): p.num_connections = len(psutil_p.connections()) try: p.io_counters = psutil_p.io_counters()._asdict() - except: + except Exception: pass p.num_open_files = len(psutil_p.open_files()) p.num_threads = psutil_p.num_threads() @@ -240,9 +238,7 @@ def _capture_cpu(self, capt_cpu, capt_per_cpu): cpu.times_avg = psutil.cpu_times(percpu=False)._asdict() cpu.percent_all = psutil.cpu_percent() if capt_per_cpu: - cpu.times_per_cpu = [ - c._asdict() for c in psutil.cpu_times(percpu=True) - ] + cpu.times_per_cpu = [c._asdict() for c in psutil.cpu_times(percpu=True)] cpu.percent_per_cpu = psutil.cpu_percent(percpu=True) return cpu except Exception as e: @@ -260,9 +256,7 @@ def __get_gpu_info_nvidia(self, gpu_ix: int = 0): flowcept_gpu_info = { "total": nvidia_info.total, "used": nvidia_info.used, - "temperature": nvmlDeviceGetTemperature( - handle, NVML_TEMPERATURE_GPU - ), + "temperature": nvmlDeviceGetTemperature(handle, NVML_TEMPERATURE_GPU), "power_usage": nvmlDeviceGetPowerUsage(handle), "name": nvmlDeviceGetName(handle), "device_ix": gpu_ix, @@ -270,14 +264,10 @@ def __get_gpu_info_nvidia(self, gpu_ix: int = 0): return flowcept_gpu_info def __register_unsuccessful_gpu_query(self, gpu_ix, gpu_info_key): - self.logger.error( - f"Error to get {gpu_info_key} for the GPU device ix {gpu_ix}" - ) + self.logger.error(f"Error to get {gpu_info_key} for the GPU device ix {gpu_ix}") if gpu_ix not in TelemetryCapture._gpu_unsuccessful_queries: TelemetryCapture._gpu_unsuccessful_queries[gpu_ix] = {} - TelemetryCapture._gpu_unsuccessful_queries[gpu_ix][ - gpu_info_key - ] = True + TelemetryCapture._gpu_unsuccessful_queries[gpu_ix][gpu_info_key] = True def __get_gpu_info_amd(self, gpu_ix: int = 0): # See: https://rocm.docs.amd.com/projects/amdsmi/en/docs-5.7.1/py-interface_readme_link.html#api @@ -285,9 +275,7 @@ def __get_gpu_info_amd(self, gpu_ix: int = 0): flowcept_gpu_info = {"gpu_ix": gpu_ix} if "used" in self._gpu_conf: - flowcept_gpu_info["used"] = amdsmi_get_gpu_memory_usage( - device, AmdSmiMemoryType.VRAM - ) + flowcept_gpu_info["used"] = amdsmi_get_gpu_memory_usage(device, AmdSmiMemoryType.VRAM) if "usage" in self._gpu_conf: flowcept_gpu_info["usage"] = amdsmi_get_gpu_activity(device) if "power" in self._gpu_conf: @@ -321,23 +309,18 @@ def __get_gpu_info_amd(self, gpu_ix: int = 0): def _capture_gpu(self): try: - if ( - self._visible_gpus is None - or self._gpu_conf is None - or len(self._gpu_conf) == 0 - ): + if self._visible_gpus is None or self._gpu_conf is None or len(self._gpu_conf) == 0: return gpu_telemetry = {} for gpu_ix in self._visible_gpus: - gpu_telemetry[f"gpu_{gpu_ix}"] = self._gpu_capture_func( - gpu_ix - ) + gpu_telemetry[f"gpu_{gpu_ix}"] = self._gpu_capture_func(gpu_ix) return gpu_telemetry except Exception as e: self.logger.exception(e) return None def shutdown_gpu_telemetry(self): + """Shutdown GPU telemetry.""" if ( self.conf is None or self._visible_gpus is None @@ -345,7 +328,7 @@ def shutdown_gpu_telemetry(self): or len(self._gpu_conf) == 0 ): self.logger.debug( - "Gpu capture is off or gpu capture has never been initialized, so we won't shut down." + "GPU capture is off or has never been initialized, so we won't shut down." ) return None if self._gpu_type == "nvidia": diff --git a/src/flowcept/instrumentation/__init__.py b/src/flowcept/instrumentation/__init__.py new file mode 100644 index 00000000..f0a528ae --- /dev/null +++ b/src/flowcept/instrumentation/__init__.py @@ -0,0 +1 @@ +"""Instrumentation subpackage.""" diff --git a/flowcept/instrumentation/decorators/__init__.py b/src/flowcept/instrumentation/decorators/__init__.py similarity index 96% rename from flowcept/instrumentation/decorators/__init__.py rename to src/flowcept/instrumentation/decorators/__init__.py index 362bdbc7..1c599e16 100644 --- a/flowcept/instrumentation/decorators/__init__.py +++ b/src/flowcept/instrumentation/decorators/__init__.py @@ -1,3 +1,5 @@ +"""Decorators subpackage.""" + from flowcept.flowceptor.adapters.base_interceptor import BaseInterceptor # TODO :base-interceptor-refactor: :ml-refactor: :code-reorg: :usability: diff --git a/flowcept/instrumentation/decorators/flowcept_task.py b/src/flowcept/instrumentation/decorators/flowcept_task.py similarity index 90% rename from flowcept/instrumentation/decorators/flowcept_task.py rename to src/flowcept/instrumentation/decorators/flowcept_task.py index d538de18..0b8cf358 100644 --- a/flowcept/instrumentation/decorators/flowcept_task.py +++ b/src/flowcept/instrumentation/decorators/flowcept_task.py @@ -1,3 +1,5 @@ +"""Task module.""" + from time import time from functools import wraps import flowcept.commons @@ -17,24 +19,23 @@ # TODO: :code-reorg: consider moving it to utils and reusing it in dask interceptor def default_args_handler(task_message: TaskObject, *args, **kwargs): + """Get default arguments.""" args_handled = {} if args is not None and len(args): for i in range(len(args)): args_handled[f"arg_{i}"] = args[i] if kwargs is not None and len(kwargs): - task_message.workflow_id = task_message.workflow_id or kwargs.pop( - "workflow_id", None - ) + task_message.workflow_id = task_message.workflow_id or kwargs.pop("workflow_id", None) args_handled.update(kwargs) - task_message.workflow_id = ( - task_message.workflow_id or Flowcept.current_workflow_id - ) + task_message.workflow_id = task_message.workflow_id or Flowcept.current_workflow_id if REPLACE_NON_JSON_SERIALIZABLE: args_handled = replace_non_serializable(args_handled) return args_handled def telemetry_flowcept_task(func=None): + """Get telemetry task.""" + def decorator(func): @wraps(func) def wrapper(*args, **kwargs): @@ -72,6 +73,8 @@ def wrapper(*args, **kwargs): def lightweight_flowcept_task(func=None): + """Get lightweight task.""" + def decorator(func): @wraps(func) def wrapper(*args, **kwargs): @@ -117,24 +120,22 @@ def wrapper(*args, **kwargs): def flowcept_task(func=None, **decorator_kwargs): + """Get flowcept task.""" + def decorator(func): @wraps(func) def wrapper(*args, **kwargs): if not INSTRUMENTATION_ENABLED: return func(*args, **kwargs) - args_handler = decorator_kwargs.get( - "args_handler", default_args_handler - ) + args_handler = decorator_kwargs.get("args_handler", default_args_handler) task_obj = TaskObject() task_obj.activity_id = func.__name__ task_obj.used = args_handler(task_obj, *args, **kwargs) task_obj.started_at = time() task_obj.task_id = str(task_obj.started_at) - task_obj.telemetry_at_start = ( - instrumentation_interceptor.telemetry_capture.capture() - ) + task_obj.telemetry_at_start = instrumentation_interceptor.telemetry_capture.capture() try: result = func(*args, **kwargs) task_obj.status = Status.FINISHED @@ -143,9 +144,7 @@ def wrapper(*args, **kwargs): result = None task_obj.stderr = str(e) task_obj.ended_at = time() - task_obj.telemetry_at_end = ( - instrumentation_interceptor.telemetry_capture.capture() - ) + task_obj.telemetry_at_end = instrumentation_interceptor.telemetry_capture.capture() try: if isinstance(result, dict): task_obj.generated = args_handler(task_obj, **result) diff --git a/flowcept/instrumentation/decorators/flowcept_torch.py b/src/flowcept/instrumentation/decorators/flowcept_torch.py similarity index 80% rename from flowcept/instrumentation/decorators/flowcept_torch.py rename to src/flowcept/instrumentation/decorators/flowcept_torch.py index 7a6205b9..7c779526 100644 --- a/flowcept/instrumentation/decorators/flowcept_torch.py +++ b/src/flowcept/instrumentation/decorators/flowcept_torch.py @@ -1,3 +1,5 @@ +"""Pytorch module.""" + from time import time from functools import wraps import flowcept.commons @@ -11,7 +13,6 @@ import torch from torch import nn -import flowcept.commons from flowcept.commons.flowcept_dataclasses.workflow_object import ( WorkflowObject, ) @@ -57,6 +58,8 @@ def _inspect_torch_tensor(tensor: torch.Tensor): def full_torch_task(func=None): + """Get pytorch task.""" + def decorator(func): @wraps(func) def wrapper(*args, **kwargs): @@ -71,15 +74,9 @@ def wrapper(*args, **kwargs): task_obj["workflow_id"] = args[0].workflow_id task_obj["used"] = { "tensor": _inspect_torch_tensor(args[1]), - **{ - k: v - for k, v in vars(args[0]).items() - if not k.startswith("_") - }, + **{k: v for k, v in vars(args[0]).items() if not k.startswith("_")}, } - task_obj[ - "telemetry_at_start" - ] = ( + task_obj["telemetry_at_start"] = ( instrumentation_interceptor.telemetry_capture.capture().to_dict() ) try: @@ -90,9 +87,7 @@ def wrapper(*args, **kwargs): result = None task_obj["stderr"] = str(e) task_obj["ended_at"] = time() - task_obj[ - "telemetry_at_end" - ] = ( + task_obj["telemetry_at_end"] = ( instrumentation_interceptor.telemetry_capture.capture().to_dict() ) task_obj["generated"] = { @@ -123,6 +118,8 @@ def wrapper(*args, **kwargs): def lightweight_tensor_inspection_torch_task(func=None): + """Get lightweight pytorch task.""" + def decorator(func): @wraps(func) def wrapper(*args, **kwargs): @@ -156,6 +153,8 @@ def wrapper(*args, **kwargs): def lightweight_telemetry_tensor_inspection_torch_task(func=None): + """Get lightweight tensor inspect task.""" + def decorator(func): @wraps(func) def wrapper(*args, **kwargs): @@ -189,45 +188,9 @@ def wrapper(*args, **kwargs): return decorator(func) -# def lightweight_telemetry_tensor_inspection_counted_torch_task(func=None): -# def decorator(func): -# @wraps(func) -# def wrapper(*args, **kwargs): -# FrequencyCount.counter += 1 -# result = func(*args, **kwargs) -# if FrequencyCount.counter < FrequencyCount.MAX: -# return result -# FrequencyCount.counter = 0 -# used = {"tensor": _inspect_torch_tensor(args[1])} -# for k, v in vars(args[0]).items(): -# if not k.startswith("_"): -# if isinstance(v, torch.Tensor): -# used[k] = _inspect_torch_tensor(v) -# elif callable(v): -# used[k] = v.__qualname__ -# else: -# used[k] = v -# task_dict = dict( -# type="task", -# workflow_id=args[0].workflow_id, -# parent_task_id=args[0].parent_task_id, -# activity_id=args[0].__class__.__name__, -# used=used, -# generated={"tensor": _inspect_torch_tensor(result)}, -# telemetry_at_start=instrumentation_interceptor.telemetry_capture.capture().to_dict(), -# ) -# instrumentation_interceptor.intercept(task_dict) -# return result -# -# return wrapper -# -# if func is None: -# return decorator -# else: -# return decorator(func) - - def lightweight_telemetry_torch_task(func=None): + """Get lightweight telemetry torch task.""" + def decorator(func): @wraps(func) def wrapper(*args, **kwargs): @@ -252,6 +215,7 @@ def wrapper(*args, **kwargs): def torch_task(): + """Get the torch task.""" mode = INSTRUMENTATION["torch"]["mode"] if mode is None: return lambda _: _ @@ -271,9 +235,7 @@ def torch_task(): elif mode == "full": return full_torch_task else: - raise NotImplementedError( - f"There is no torch instrumentation mode {mode}" - ) + raise NotImplementedError(f"There is no torch instrumentation mode {mode}") @torch_task() @@ -283,12 +245,10 @@ def _our_forward(self, *args, **kwargs): def _create_dynamic_class(base_class, class_name, extra_attributes): attributes = { - "__init__": lambda self, *args, **kwargs: super( - self.__class__, self - ).__init__(*args, **kwargs), - "forward": lambda self, *args, **kwargs: _our_forward( - self, *args, **kwargs + "__init__": lambda self, *args, **kwargs: super(self.__class__, self).__init__( + *args, **kwargs ), + "forward": lambda self, *args, **kwargs: _our_forward(self, *args, **kwargs), **extra_attributes, } @@ -300,6 +260,7 @@ def register_modules( workflow_id: str = None, parent_task_id: str = None, ) -> Dict[nn.Module, nn.Module]: + """Register some modules.""" flowcept_torch_modules: List[nn.Module] = [] for module in modules: @@ -324,6 +285,7 @@ def register_module_as_workflow( # parent_task_id=None, custom_metadata: Dict = None, ): + """Register as a workflow.""" workflow_obj = WorkflowObject() workflow_obj.workflow_id = str(uuid.uuid4()) workflow_obj.parent_workflow_id = parent_workflow_id diff --git a/flowcept/instrumentation/decorators/responsible_ai.py b/src/flowcept/instrumentation/decorators/responsible_ai.py similarity index 86% rename from flowcept/instrumentation/decorators/responsible_ai.py rename to src/flowcept/instrumentation/decorators/responsible_ai.py index 18d19f97..6d971c32 100644 --- a/flowcept/instrumentation/decorators/responsible_ai.py +++ b/src/flowcept/instrumentation/decorators/responsible_ai.py @@ -1,3 +1,5 @@ +"""AI module.""" + from functools import wraps import numpy as np from torch import nn @@ -52,9 +54,7 @@ def _inspect_inner_modules(model, modules_dict={}, in_named=None): } if in_named is not None: modules_dict[key]["in_named"] = in_named - modules_dict[key].update( - {k: v for k, v in model.__dict__.items() if not k.startswith("_")} - ) + modules_dict[key].update({k: v for k, v in model.__dict__.items() if not k.startswith("_")}) for name, module in model.named_children(): if isinstance(module, nn.Module): _inspect_inner_modules(module, modules_dict, in_named=name) @@ -62,18 +62,18 @@ def _inspect_inner_modules(model, modules_dict={}, in_named=None): def model_profiler(): + """Get the model profiler.""" + def decorator(func): @wraps(func) def wrapper(*args, **kwargs): result = func(*args, **kwargs) - if type(result) != dict or "model" not in result: - raise Exception( - "We expect that you give us the model so we can profile it. Return a dict with a 'model' key in it with the pytorch model to be profiled." - ) + if type(result) is not dict or "model" not in result: + msg = "We expect a model so we can profile it. " + msg2 = "Return a dict with a 'model' key with the pytorch model to be profiled." + raise Exception(msg + msg2) - random_seed = ( - result["random_seed"] if "random_seed" in result else None - ) + random_seed = result["random_seed"] if "random_seed" in result else None model = result.pop("model", None) nparams = 0 @@ -107,9 +107,9 @@ def wrapper(*args, **kwargs): ret["responsible_ai_metadata"] = {} ret["responsible_ai_metadata"].update(this_result) - if INSTRUMENTATION.get("torch", False) and INSTRUMENTATION[ - "torch" - ].get("save_models", False): + if INSTRUMENTATION.get("torch", False) and INSTRUMENTATION["torch"].get( + "save_models", False + ): obj_id = Flowcept.db.save_torch_model( model, custom_metadata=ret["responsible_ai_metadata"] ) diff --git a/flowcept/main.py b/src/flowcept/main.py similarity index 85% rename from flowcept/main.py rename to src/flowcept/main.py index f6870e9c..82c847ec 100644 --- a/flowcept/main.py +++ b/src/flowcept/main.py @@ -1,3 +1,5 @@ +"""Main driver module.""" + import sys from flowcept import ( @@ -19,21 +21,17 @@ def main(): + """Run the main driver.""" interceptors = [] for plugin_key in settings["plugins"]: plugin_settings_obj = settings["plugins"][plugin_key] - if ( - "enabled" in plugin_settings_obj - and not plugin_settings_obj["enabled"] - ): + if "enabled" in plugin_settings_obj and not plugin_settings_obj["enabled"]: continue kind = plugin_settings_obj["kind"] if kind in INTERCEPTORS: - interceptor = INTERCEPTORS[plugin_settings_obj["kind"]]( - plugin_key - ) + interceptor = INTERCEPTORS[plugin_settings_obj["kind"]](plugin_key) interceptors.append(interceptor) consumer = Flowcept(interceptors) diff --git a/flowcept/version.py b/src/flowcept/version.py similarity index 83% rename from flowcept/version.py rename to src/flowcept/version.py index 090f3289..e9ef8d77 100644 --- a/flowcept/version.py +++ b/src/flowcept/version.py @@ -1,5 +1,7 @@ +"""Version module.""" + # WARNING: CHANGE THIS FILE MANUALLY ONLY TO RESOLVE CONFLICTS! # This file is supposed to be automatically modified by the CI Bot. # The expected format is: .. # See .github/workflows/version_bumper.py -__version__ = "0.5.4" +__version__ = "0.6.5" diff --git a/tests/adapters/test_file_observer.py b/tests/adapters/test_file_observer.py new file mode 100644 index 00000000..35b9d268 --- /dev/null +++ b/tests/adapters/test_file_observer.py @@ -0,0 +1,63 @@ +import os +import time +import unittest +from watchdog.observers.polling import PollingObserver +from watchdog.events import FileSystemEventHandler +import tempfile +import threading + + +class TestFileObserver(unittest.TestCase): + def setUp(self): + self.test_file = tempfile.NamedTemporaryFile(delete=False) + self.test_file_name = self.test_file.name + self.test_file.close() + + # Thread event to signal that the callback was called + self.callback_called_event = threading.Event() + + # Define the callback function to be triggered on modification + def callback(file_path): + if file_path == self.test_file_name: + print(f"Callback triggered for {file_path}") + self.callback_called_event.set() + + # Create an event handler and bind it to the callback + self.event_handler = FileSystemEventHandler() + self.event_handler.on_modified = lambda event: callback(event.src_path) + + # Set up watchdog observer + self.observer = PollingObserver() + self.observer.schedule(self.event_handler, path=self.test_file_name, recursive=False) + self.observer.start() + + def tearDown(self): + # Stop the observer and remove the temporary file + self.observer.stop() + self.observer.join() + os.unlink(self.test_file_name) + + def test_file_observer_callback(self): + # Modify the file to trigger the callback + with open(self.test_file_name, 'a') as f: + f.write('File has been modified.') + f.flush() + os.fsync(f.fileno()) # Ensure file system updates + + # Add a small delay to ensure the observer catches the event + time.sleep(0.5) + + # Wait for the callback to be called (max wait 5 seconds) + callback_triggered = self.callback_called_event.wait(timeout=5) + + # Assert that the callback was called + self.assertTrue(callback_triggered, "Callback was not triggered upon file modification.") + + # Additional assertions to ensure file was actually modified + with open(self.test_file_name, 'r') as f: + content = f.read() + self.assertIn('File has been modified.', content, "File modification did not occur as expected.") + + +if __name__ == "__main__": + unittest.main() diff --git a/tests/adapters/test_mlflow.py b/tests/adapters/test_mlflow.py index 3be4ffc9..e0ea7990 100644 --- a/tests/adapters/test_mlflow.py +++ b/tests/adapters/test_mlflow.py @@ -1,6 +1,10 @@ import unittest from time import sleep import numpy as np +import os +import uuid +import mlflow + from flowcept.commons.flowcept_logger import FlowceptLogger from flowcept import MLFlowInterceptor, Flowcept from flowcept.commons.utils import ( @@ -10,61 +14,67 @@ class TestMLFlow(unittest.TestCase): + interceptor = None + def __init__(self, *args, **kwargs): super(TestMLFlow, self).__init__(*args, **kwargs) - self.interceptor = MLFlowInterceptor() self.logger = FlowceptLogger() - def test_pure_run_mlflow(self, epochs=10, batch_size=64): - import uuid - import mlflow + @classmethod + def setUpClass(cls): + TestMLFlow.interceptor = MLFlowInterceptor() + if os.path.exists(TestMLFlow.interceptor.settings.file_path): + os.remove(TestMLFlow.interceptor.settings.file_path) + open(TestMLFlow.interceptor.settings.file_path, "w") + sleep(1) + mlflow.set_tracking_uri(f"sqlite:///{TestMLFlow.interceptor.settings.file_path}") + mlflow.delete_experiment(mlflow.create_experiment("starter")) + sleep(1) - mlflow.set_tracking_uri( - f"sqlite:///" f"{self.interceptor.settings.file_path}" - ) + def test_pure_run_mlflow(self, epochs=10, batch_size=64): experiment_name = "LinearRegression" - experiment_id = mlflow.create_experiment( - experiment_name + str(uuid.uuid4()) - ) + experiment_id = mlflow.create_experiment(experiment_name + str(uuid.uuid4())) with mlflow.start_run(experiment_id=experiment_id) as run: + sleep(5) mlflow.log_params({"number_epochs": epochs}) mlflow.log_params({"batch_size": batch_size}) # Actual training code would come here self.logger.debug("\nTrained model") mlflow.log_metric("loss", np.random.random()) - + run_data = TestMLFlow.interceptor.dao.get_run_data(run.info.run_uuid) + assert run_data.task_id == run.info.run_uuid return run.info.run_uuid def test_get_runs(self): - runs = self.interceptor.dao.get_finished_run_uuids() + runs = TestMLFlow.interceptor.dao.get_finished_run_uuids() assert len(runs) > 0 for run in runs: - assert type(run[0]) == str + assert isinstance(run[0], str) self.logger.debug(run[0]) def test_get_run_data(self): run_uuid = self.test_pure_run_mlflow() - run_data = self.interceptor.dao.get_run_data(run_uuid) + run_data = TestMLFlow.interceptor.dao.get_run_data(run_uuid) assert run_data.task_id == run_uuid def test_check_state_manager(self): - self.interceptor.state_manager.reset() - self.interceptor.state_manager.add_element_id("dummy-value") + TestMLFlow.interceptor.state_manager.reset() + TestMLFlow.interceptor.state_manager.add_element_id("dummy-value") self.test_pure_run_mlflow() - runs = self.interceptor.dao.get_finished_run_uuids() + runs = TestMLFlow.interceptor.dao.get_finished_run_uuids() assert len(runs) > 0 for run_tuple in runs: run_uuid = run_tuple[0] - assert type(run_uuid) == str - if not self.interceptor.state_manager.has_element_id(run_uuid): + assert isinstance(run_uuid, str) + if not TestMLFlow.interceptor.state_manager.has_element_id(run_uuid): self.logger.debug(f"We need to intercept {run_uuid}") - self.interceptor.state_manager.add_element_id(run_uuid) + TestMLFlow.interceptor.state_manager.add_element_id(run_uuid) def test_observer_and_consumption(self): - with Flowcept(self.interceptor): + assert TestMLFlow.interceptor is not None + with Flowcept(TestMLFlow.interceptor): run_uuid = self.test_pure_run_mlflow() - # sleep(3) - + print(run_uuid) assert evaluate_until( lambda: self.interceptor.state_manager.has_element_id(run_uuid), ) @@ -78,9 +88,7 @@ def test_multiple_tasks(self): run_ids = [] with Flowcept(self.interceptor): for i in range(1, 10): - run_ids.append( - self.test_pure_run_mlflow(epochs=i * 10, batch_size=i * 2) - ) + run_ids.append(self.test_pure_run_mlflow(epochs=i * 10, batch_size=i * 2)) sleep(3) for run_id in run_ids: diff --git a/tests/adapters/test_tensorboard.py b/tests/adapters/test_tensorboard.py index 347efe72..55bea4dc 100644 --- a/tests/adapters/test_tensorboard.py +++ b/tests/adapters/test_tensorboard.py @@ -80,9 +80,7 @@ def train_test_model(hparams, logdir): model = tf.keras.models.Sequential( [ tf.keras.layers.Flatten(), - tf.keras.layers.Dense( - hparams[HP_NUM_UNITS], activation=tf.nn.relu - ), + tf.keras.layers.Dense(hparams[HP_NUM_UNITS], activation=tf.nn.relu), tf.keras.layers.Dropout(hparams[HP_DROPOUT]), tf.keras.layers.Dense(10, activation=tf.nn.softmax), ] @@ -155,9 +153,7 @@ def test_observer_and_consumption(self): ) assert assert_by_querying_tasks_until({"workflow_id": wf_id}) - @unittest.skip( - "This test is useful only for developing. No need to run " "in CI" - ) + @unittest.skip("This test is useful only for developing. No need to run " "in CI") def test_read_tensorboard_hparam_tuning(self): self.reset_log_dir() self.run_tensorboard_hparam_tuning() diff --git a/tests/adapters/test_zambeze.py b/tests/adapters/test_zambeze.py deleted file mode 100644 index ad43efcf..00000000 --- a/tests/adapters/test_zambeze.py +++ /dev/null @@ -1,89 +0,0 @@ -from time import sleep -import unittest -import json -import pika -from uuid import uuid4 - -from pika.exceptions import AMQPConnectionError - -from flowcept.commons.flowcept_logger import FlowceptLogger -from flowcept import ZambezeInterceptor, Flowcept, TaskQueryAPI -from flowcept.flowceptor.adapters.zambeze.zambeze_dataclasses import ( - ZambezeMessage, -) -from flowcept.commons.utils import assert_by_querying_tasks_until - - -class TestZambeze(unittest.TestCase): - def __init__(self, *args, **kwargs): - super(TestZambeze, self).__init__(*args, **kwargs) - self.logger = FlowceptLogger() - interceptor = ZambezeInterceptor() - try: - self._connected = False - self._connection = pika.BlockingConnection( - pika.ConnectionParameters( - interceptor.settings.host, - interceptor.settings.port, - ) - ) - self._connected = self._connection.is_open - except AMQPConnectionError: - print("Failed to connect to RabbitMQ. Is it running?") - return - except Exception as e: - print(f"An error occurred: {e}") - return - - self.consumer = Flowcept(interceptor) - self._channel = self._connection.channel() - self._queue_names = interceptor.settings.queue_names - self._channel.queue_declare(queue=self._queue_names[0]) - self.consumer.start() - - def test_send_message(self): - if not self._connected: - self.logger.warning( - "RabbitMQ was not found. Skipping this " "Zambeze test." - ) - assert True - return - another_act_id = str(uuid4()) - act_id = str(uuid4()) - msg = ZambezeMessage( - **{ - "name": "ImageMagick", - "activity_id": act_id, - "campaign_id": "campaign-uuid", - "origin_agent_id": "def-uuid", - "files": ["globus://Users/6o1/file.txt"], - "command": "convert", - "activity_status": "CREATED", - "arguments": [ - "-delay", - "20", - "-loop", - "0", - "~/tests/campaigns/imagesequence/*.jpg", - "a.gif", - ], - "kwargs": {}, - "depends_on": [another_act_id], - } - ) - - self._channel.basic_publish( - exchange="", - routing_key=self._queue_names[0], - body=json.dumps(msg.__dict__), - ) - print("Zambeze Activity_id", act_id) - self.logger.debug(" [x] Sent msg") - sleep(5) - self._connection.close() - assert assert_by_querying_tasks_until({"task_id": act_id}) - self.consumer.stop() - - -if __name__ == "__main__": - unittest.main() diff --git a/tests/decorator_tests/ml_tests/llm_tests/llm_trainer.py b/tests/decorator_tests/ml_tests/llm_tests/llm_trainer.py index 80b8433a..2a365c72 100644 --- a/tests/decorator_tests/ml_tests/llm_tests/llm_trainer.py +++ b/tests/decorator_tests/ml_tests/llm_tests/llm_trainer.py @@ -276,7 +276,7 @@ def model_train( ): from distributed.worker import thread_state - dask_task_id = thread_state.key + dask_task_id = thread_state.key if hasattr(thread_state, "key") else None # TODO :ml-refactor: save device type and random seed: https://pytorch.org/docs/stable/notes/randomness.html # TODO :base-interceptor-refactor: Can we do it better?