diff --git a/.github/ISSUE_TEMPLATE/bug-report.yml b/.github/ISSUE_TEMPLATE/bug-report.yml index 66e9002627..a89ff5e880 100644 --- a/.github/ISSUE_TEMPLATE/bug-report.yml +++ b/.github/ISSUE_TEMPLATE/bug-report.yml @@ -104,7 +104,6 @@ body: description: Is this issue related to any of the Nebari integrations? multiple: true options: - - "Prefect" - "Keycloak" - "conda-store" - "Dask" diff --git a/.github/ISSUE_TEMPLATE/release-checklist.md b/.github/ISSUE_TEMPLATE/release-checklist.md index ea27537a27..06c15d3154 100644 --- a/.github/ISSUE_TEMPLATE/release-checklist.md +++ b/.github/ISSUE_TEMPLATE/release-checklist.md @@ -39,6 +39,8 @@ Release captain responsible - <@gh_username> - [Do we need to update the `dask` versions in the `nebari-dask`?](https://github.com/conda-forge/nebari-dask-feedstock/blob/main/recipe/meta.yaml#L13-L16) - Will there be an accompanying blog post? - [ ] Prepare for the release. + - [ ] Update the [`nebari upgrade`](https://github.com/nebari-dev/nebari/blob/develop/src/_nebari/upgrade.py) for this release + - [ ] Add upgrade messaging including deprecation warnings, version specific warnings and so on. - [ ] Announce build freeze. - [ ] Release Candidate (RC) cycle. - Is this a hotfix? @@ -63,6 +65,7 @@ _These steps must be actioned in the order they appear in this checklist._ - [ ] [Tag, build and push docker images](https://github.com/nebari-dev/nebari-docker-images/releases/new) - [ ] [Update and cut release for `nebari-dask` meta package on Conda-Forge.](https://github.com/conda-forge/nebari-dask-feedstock) +- [ ] Update `CURRENT_RELEASE` (and any other tags) in the [`constants.py`](https://github.com/nebari-dev/nebari/blob/develop/src/_nebari/constants.py#L1) - [ ] [Cut PyPI release via GHA release workflow.](https://github.com/nebari-dev/nebari/releases/new) - Avoid appending `v` to tag. - Copy release notes from `RELEASE.md`. diff --git a/.github/workflows/test-provider.yaml b/.github/workflows/test-provider.yaml index ce34caa6c2..3c0a3fa89c 100644 --- a/.github/workflows/test-provider.yaml +++ b/.github/workflows/test-provider.yaml @@ -1,28 +1,29 @@ -name: "Kubernetes Tests" +# This is only workflow that requires cloud credentials and therefore will not run on PRs coming from forks. +name: "Test Nebari Provider" on: + schedule: + - cron: "0 3 * * *" pull_request: paths: - - ".github/workflows/kubernetes_test.yaml" + - ".github/workflows/test-provider.yaml" + - ".github/failed-workflow-issue-templates/test-provider.md" + - ".github/actions/publish-from-template" - "tests/**" - "scripts/**" - "src/**" - "pyproject.toml" - - "pytest.ini" - - ".cirun.yml" push: branches: - main - develop - release/\d{4}.\d{1,2}.\d{1,2} paths: - - ".github/workflows/kubernetes_test.yaml" + - ".github/workflows/test-provider.yaml" - "tests/**" - "scripts/**" - "src/**" - "pyproject.toml" - - "pytest.ini" - - ".cirun.yml" workflow_call: inputs: pr_number: @@ -30,169 +31,120 @@ on: type: string jobs: - test-kubernetes: - name: "Kubernetes Tests" - runs-on: "cirun-runner--${{ github.run_id }}" - defaults: - run: - shell: bash -l {0} + test-render-providers: + # avoid running on PRs coming from a fork + if: github.event.pull_request.head.repo.full_name == github.repository || github.event_name != 'pull_request' + name: "Test Nebari Provider" + runs-on: ubuntu-latest + permissions: + id-token: write + contents: read + pull-requests: write + strategy: + matrix: + provider: + - aws + - azure + - do + - gcp + - local + - existing + cicd: + - none + - github-actions + - gitlab-ci + fail-fast: false steps: - - - name: "Set NEBARI_IMAGE_TAG=main" - run: | - echo "NEBARI_IMAGE_TAG=main" >> "$GITHUB_ENV" - echo "GITHUB_BASE_REF: ${GITHUB_BASE_REF}" - echo "GITHUB_HEAD_REF: ${GITHUB_HEAD_REF}" - echo "GITHUB_REF: ${GITHUB_REF}" - - - name: 'Checkout Infrastructure' - uses: actions/checkout@main + - name: "Checkout Infrastructure" + uses: actions/checkout@v3 - name: Checkout the branch from the PR that triggered the job if: ${{ github.event_name == 'issue_comment' }} - run: | - hub version - hub pr checkout ${{ inputs.pr_number }} + run: hub pr checkout ${{ inputs.pr_number }} env: GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - name: Set up Python - uses: conda-incubator/setup-miniconda@v2 - env: - CONDA: /home/runnerx/miniconda3 + uses: actions/setup-python@v4 with: python-version: 3.8 - miniconda-version: "latest" - - name: Install Nebari - run: | - conda install --quiet --yes -c anaconda pip - pip install .[dev] - playwright install - - name: Download and Install Kubectl - run: | - mkdir -p bin - pushd bin - curl -LO https://storage.googleapis.com/kubernetes-release/release/v1.19.0/bin/linux/amd64/kubectl - chmod +x kubectl + - name: Retrieve secret from Vault + uses: hashicorp/vault-action@v2.5.0 + with: + method: jwt + url: "https://quansight-vault-public-vault-b2379fa7.d415e30e.z1.hashicorp.cloud:8200" + namespace: "admin/quansight" + role: "repository-nebari-dev-nebari-role" + secrets: | + kv/data/repository/nebari-dev/nebari/amazon_web_services/nebari-dev-ci role_name | AWS_ROLE_ARN; + kv/data/repository/nebari-dev/nebari/google_cloud_platform/nebari-dev-ci/github-nebari-dev-repo-ci project_id | PROJECT_ID; + kv/data/repository/nebari-dev/nebari/google_cloud_platform/nebari-dev-ci/github-nebari-dev-repo-ci workload_identity_provider | GCP_WORKFLOW_PROVIDER; + kv/data/repository/nebari-dev/nebari/google_cloud_platform/nebari-dev-ci/github-nebari-dev-repo-ci service_account_name | GCP_SERVICE_ACCOUNT; + kv/data/repository/nebari-dev/nebari/azure/nebari-dev-ci/github-nebari-dev-repo-ci client_id | ARM_CLIENT_ID; + kv/data/repository/nebari-dev/nebari/azure/nebari-dev-ci/github-nebari-dev-repo-ci tenant_id | ARM_TENANT_ID; + kv/data/repository/nebari-dev/nebari/azure/nebari-dev-ci/github-nebari-dev-repo-ci subscription_id | ARM_SUBSCRIPTION_ID; + kv/data/repository/nebari-dev/nebari/shared_secrets DIGITALOCEAN_TOKEN | DIGITALOCEAN_TOKEN; + kv/data/repository/nebari-dev/nebari/shared_secrets SPACES_ACCESS_KEY_ID | SPACES_ACCESS_KEY_ID; + kv/data/repository/nebari-dev/nebari/shared_secrets SPACES_SECRET_ACCESS_KEY | SPACES_SECRET_ACCESS_KEY; + + - name: 'Authenticate to GCP' + if: ${{ matrix.provider == 'gcp' }} + uses: 'google-github-actions/auth@v1' + with: + token_format: access_token + create_credentials_file: 'true' + workload_identity_provider: ${{ env.GCP_WORKFLOW_PROVIDER }} + service_account: ${{ env.GCP_SERVICE_ACCOUNT }} - echo "$PWD" >> $GITHUB_PATH - popd - - name: Enable docker permissions for user + - name: Set required environment variables + if: ${{ matrix.provider == 'gcp' }} run: | - sudo docker ps - sudo usermod -aG docker $USER && newgrp docker + echo "GOOGLE_CREDENTIALS=${{ env.GOOGLE_APPLICATION_CREDENTIALS }}" >> $GITHUB_ENV - docker info - docker ps - - name: Get routing table for docker pods - run: | - ip route - - name: Initialize Nebari Cloud - run: | - mkdir -p local-deployment - cd local-deployment - nebari init local --project=thisisatest --domain github-actions.nebari.dev --auth-provider=password + - name: 'Authenticate to AWS' + if: ${{ matrix.provider == 'aws' }} + uses: aws-actions/configure-aws-credentials@v1 + with: + role-to-assume: ${{ env.AWS_ROLE_ARN }} + role-session-name: github-action + aws-region: us-east-1 - # Need smaller profiles on Local Kind - sed -i -E 's/(cpu_guarantee):\s+[0-9\.]+/\1: 0.25/g' "nebari-config.yaml" - sed -i -E 's/(mem_guarantee):\s+[A-Za-z0-9\.]+/\1: 0.25G/g' "nebari-config.yaml" + - name: 'Azure login' + if: ${{ matrix.provider == 'azure' }} + uses: azure/login@v1 + with: + client-id: ${{ env.ARM_CLIENT_ID }} + tenant-id: ${{ env.ARM_TENANT_ID }} + subscription-id: ${{ env.ARM_SUBSCRIPTION_ID }} - cat nebari-config.yaml - - name: Deploy Nebari - run: | - cd local-deployment - nebari deploy --config nebari-config.yaml --disable-prompt - - name: Basic kubectl checks after deployment - if: always() - run: | - kubectl get all,cm,secret,ing -A - - name: Check github-actions.nebari.dev resolves - run: | - nslookup github-actions.nebari.dev - - name: Curl jupyterhub login page + - name: Install Nebari run: | - curl -k https://github-actions.nebari.dev/hub/home -i + pip install --upgrade pip + pip install .[dev] - ### CYPRESS TESTS - - name: Setup Node - uses: actions/setup-node@v3 - with: - node-version: '16' - - name: npm version + - name: Nebari Initialize run: | - npm --version - - name: Install Cypress dependencies - run: | - sudo apt-get -y update - sudo apt-get install -y libgtk2.0-0 libgtk-3-0 libgbm-dev libnotify-dev libgconf-2-4 libnss3 libxss1 libasound2 libxtst6 xauth xvfb - - - name: Get nebari-config.yaml full path - run: echo "NEBARI_CONFIG_PATH=`realpath ./local-deployment/nebari-config.yaml`" >> "$GITHUB_ENV" + nebari init "${{ matrix.provider }}" --project "TestProvider" --domain "${{ matrix.provider }}.nebari.dev" --auth-provider password --disable-prompt --ci-provider ${{ matrix.cicd }} + cat "nebari-config.yaml" - - name: Create example-user + - name: Nebari Render run: | - export CYPRESS_EXAMPLE_USER_NAME=example-user - export CYPRESS_EXAMPLE_USER_PASSWORD=P@sswo3d + nebari render -c "nebari-config.yaml" -o "nebari-${{ matrix.provider }}-${{ matrix.cicd }}-deployment" + cp "nebari-config.yaml" "nebari-${{ matrix.provider }}-${{ matrix.cicd }}-deployment/nebari-config.yaml" - echo "CYPRESS_EXAMPLE_USER_NAME=${CYPRESS_EXAMPLE_USER_NAME}" >> $GITHUB_ENV - echo "CYPRESS_EXAMPLE_USER_PASSWORD=${CYPRESS_EXAMPLE_USER_PASSWORD}" >> $GITHUB_ENV - - nebari keycloak adduser --user "${CYPRESS_EXAMPLE_USER_NAME}" "${CYPRESS_EXAMPLE_USER_PASSWORD}" --config "${NEBARI_CONFIG_PATH}" - nebari keycloak listusers --config "${NEBARI_CONFIG_PATH}" - - - name: Cypress run - uses: cypress-io/github-action@v4 - env: - CYPRESS_BASE_URL: https://github-actions.nebari.dev/ + - name: Nebari Render Artifact + uses: actions/upload-artifact@master with: - working-directory: tests/tests_e2e + name: "nebari-${{ matrix.provider }}-${{ matrix.cicd }}-artifact" + path: "nebari-${{ matrix.provider }}-${{ matrix.cicd }}-deployment" - - name: Playwright Tests + - if: failure() || github.event_name == 'pull_request' + name: Publish information from template + uses: ./.github/actions/publish-from-template env: - KEYCLOAK_USERNAME: ${{ env.CYPRESS_EXAMPLE_USER_NAME }} - KEYCLOAK_PASSWORD: ${{ env.CYPRESS_EXAMPLE_USER_PASSWORD }} - NEBARI_FULL_URL: https://github-actions.nebari.dev/ - working-directory: tests/tests_e2e/playwright - run: | - # create environment file - envsubst < .env.tpl > .env - # run playwright pytest tests in headed mode with the chromium browser - xvfb-run pytest --browser chromium - - - name: Save Cypress screenshots and videos - if: always() - uses: actions/upload-artifact@v3 + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + PROVIDER: ${{ matrix.provider }} + CICD: ${{ matrix.cicd }} with: - name: e2e-cypress - path: | - ./tests/tests_e2e/cypress/screenshots/ - ./tests/tests_e2e/cypress/videos/ - ./tests/tests_e2e/playwright/videos/ - - - name: Deployment Pytests - run: | - export KEYCLOAK_USERNAME=${CYPRESS_EXAMPLE_USER_NAME} - export KEYCLOAK_PASSWORD=${CYPRESS_EXAMPLE_USER_PASSWORD} - pytest tests/tests_deployment/ -v -s - - - name: JupyterHub Notebook Tests - timeout-minutes: 2 - # run jhub-client after pytest since jhubctl can cleanup - # the running server - run: | - sleep 60 - export JUPYTERHUB_USERNAME=${CYPRESS_EXAMPLE_USER_NAME} - export JUPYTERHUB_PASSWORD=${CYPRESS_EXAMPLE_USER_PASSWORD} - jhubctl --verbose run --hub=https://github-actions.nebari.dev \ - --auth-type=keycloak \ - --validate --no-verify-ssl \ - --kernel python3 \ - --stop-server \ - --notebook tests/tests_deployment/assets/notebook/simple.ipynb \ - - ### CLEANUP AFTER TESTS - - name: Cleanup nebari deployment - run: | - cd local-deployment - nebari destroy --config nebari-config.yaml --disable-prompt + filename: .github/failed-workflow-issue-templates/test-provider.md diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml index f4d74840ca..6a8fa4a446 100644 --- a/.github/workflows/test.yaml +++ b/.github/workflows/test.yaml @@ -5,8 +5,6 @@ on: paths: - ".github/workflows/test.yaml" - "tests/**" - - "tests_deployment/**" - - "tests_e2e/cypress/**" - "scripts/**" - "src/**" - "pyproject.toml" @@ -19,8 +17,6 @@ on: paths: - ".github/workflows/test.yaml" - "tests/**" - - "tests_deployment/**" - - "tests_e2e/cypress/**" - "scripts/**" - "src/**" - "pyproject.toml" diff --git a/.github/workflows/test_helm_charts.yaml b/.github/workflows/test_helm_charts.yaml index f1b97c268c..daf9abb6da 100644 --- a/.github/workflows/test_helm_charts.yaml +++ b/.github/workflows/test_helm_charts.yaml @@ -7,9 +7,14 @@ on: schedule: # Run every Monday at 13:00 UTC - cron: "0 13 * * 1" + pull_request: + paths: + - ".github/workflows/test_helm_charts.yaml" + - "scripts/helm-validate.py" push: paths: - ".github/workflows/test_helm_charts.yaml" + - "scripts/helm-validate.py" workflow_dispatch: jobs: diff --git a/.github/workflows/kubernetes_test.yaml b/.github/workflows/test_local_integration.yaml similarity index 66% rename from .github/workflows/kubernetes_test.yaml rename to .github/workflows/test_local_integration.yaml index ce34caa6c2..8ddc1f9690 100644 --- a/.github/workflows/kubernetes_test.yaml +++ b/.github/workflows/test_local_integration.yaml @@ -1,9 +1,14 @@ -name: "Kubernetes Tests" +name: "Local Integration Tests" + +env: + TEST_USERNAME: "test-user" + TEST_PASSWORD: "P@sswo3d" + NEBARI_IMAGE_TAG: "main" on: pull_request: paths: - - ".github/workflows/kubernetes_test.yaml" + - ".github/workflows/test_local_integration.yaml" - "tests/**" - "scripts/**" - "src/**" @@ -16,7 +21,7 @@ on: - develop - release/\d{4}.\d{1,2}.\d{1,2} paths: - - ".github/workflows/kubernetes_test.yaml" + - ".github/workflows/test_local_integration.yaml" - "tests/**" - "scripts/**" - "src/**" @@ -30,23 +35,18 @@ on: type: string jobs: - test-kubernetes: - name: "Kubernetes Tests" + test-local-integration: runs-on: "cirun-runner--${{ github.run_id }}" defaults: run: shell: bash -l {0} + concurrency: + group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }} steps: - - - name: "Set NEBARI_IMAGE_TAG=main" - run: | - echo "NEBARI_IMAGE_TAG=main" >> "$GITHUB_ENV" - echo "GITHUB_BASE_REF: ${GITHUB_BASE_REF}" - echo "GITHUB_HEAD_REF: ${GITHUB_HEAD_REF}" - echo "GITHUB_REF: ${GITHUB_REF}" - - name: 'Checkout Infrastructure' uses: actions/checkout@main + with: + fetch-depth: 0 - name: Checkout the branch from the PR that triggered the job if: ${{ github.event_name == 'issue_comment' }} @@ -61,23 +61,19 @@ jobs: env: CONDA: /home/runnerx/miniconda3 with: + auto-update-conda: true python-version: 3.8 miniconda-version: "latest" - - name: Install Nebari + + - name: Install Nebari and playwright run: | - conda install --quiet --yes -c anaconda pip pip install .[dev] playwright install - - name: Download and Install Kubectl - run: | - mkdir -p bin - pushd bin - curl -LO https://storage.googleapis.com/kubernetes-release/release/v1.19.0/bin/linux/amd64/kubectl - chmod +x kubectl + - uses: azure/setup-kubectl@v3 + with: + version: v1.19.16 - echo "$PWD" >> $GITHUB_PATH - popd - name: Enable docker permissions for user run: | sudo docker ps @@ -85,9 +81,11 @@ jobs: docker info docker ps + - name: Get routing table for docker pods run: | ip route + - name: Initialize Nebari Cloud run: | mkdir -p local-deployment @@ -99,59 +97,51 @@ jobs: sed -i -E 's/(mem_guarantee):\s+[A-Za-z0-9\.]+/\1: 0.25G/g' "nebari-config.yaml" cat nebari-config.yaml + - name: Deploy Nebari + working-directory: local-deployment run: | - cd local-deployment nebari deploy --config nebari-config.yaml --disable-prompt + - name: Basic kubectl checks after deployment if: always() run: | kubectl get all,cm,secret,ing -A + - name: Check github-actions.nebari.dev resolves run: | nslookup github-actions.nebari.dev + - name: Curl jupyterhub login page run: | curl -k https://github-actions.nebari.dev/hub/home -i - ### CYPRESS TESTS - - name: Setup Node - uses: actions/setup-node@v3 - with: - node-version: '16' - - name: npm version - run: | - npm --version - - name: Install Cypress dependencies - run: | - sudo apt-get -y update - sudo apt-get install -y libgtk2.0-0 libgtk-3-0 libgbm-dev libnotify-dev libgconf-2-4 libnss3 libxss1 libasound2 libxtst6 xauth xvfb - - - name: Get nebari-config.yaml full path - run: echo "NEBARI_CONFIG_PATH=`realpath ./local-deployment/nebari-config.yaml`" >> "$GITHUB_ENV" - - name: Create example-user + working-directory: local-deployment run: | - export CYPRESS_EXAMPLE_USER_NAME=example-user - export CYPRESS_EXAMPLE_USER_PASSWORD=P@sswo3d + nebari keycloak adduser --user "${TEST_USERNAME}" "${TEST_PASSWORD}" --config nebari-config.yaml + nebari keycloak listusers --config nebari-config.yaml - echo "CYPRESS_EXAMPLE_USER_NAME=${CYPRESS_EXAMPLE_USER_NAME}" >> $GITHUB_ENV - echo "CYPRESS_EXAMPLE_USER_PASSWORD=${CYPRESS_EXAMPLE_USER_PASSWORD}" >> $GITHUB_ENV + - uses: actions/setup-node@v3 + with: + node-version: 16 - nebari keycloak adduser --user "${CYPRESS_EXAMPLE_USER_NAME}" "${CYPRESS_EXAMPLE_USER_PASSWORD}" --config "${NEBARI_CONFIG_PATH}" - nebari keycloak listusers --config "${NEBARI_CONFIG_PATH}" + - name: Get nebari-config.yaml full path + run: echo "NEBARI_CONFIG_PATH=`realpath ./local-deployment/nebari-config.yaml`" >> "$GITHUB_ENV" - name: Cypress run - uses: cypress-io/github-action@v4 + uses: cypress-io/github-action@v6 env: + CYPRESS_EXAMPLE_USER_NAME: ${{ env.TEST_USERNAME }} + CYPRESS_EXAMPLE_USER_PASSWORD: ${{ env.TEST_PASSWORD }} CYPRESS_BASE_URL: https://github-actions.nebari.dev/ with: working-directory: tests/tests_e2e - name: Playwright Tests env: - KEYCLOAK_USERNAME: ${{ env.CYPRESS_EXAMPLE_USER_NAME }} - KEYCLOAK_PASSWORD: ${{ env.CYPRESS_EXAMPLE_USER_PASSWORD }} + KEYCLOAK_USERNAME: ${{ env.TEST_USERNAME }} + KEYCLOAK_PASSWORD: ${{ env.TEST_PASSWORD }} NEBARI_FULL_URL: https://github-actions.nebari.dev/ working-directory: tests/tests_e2e/playwright run: | @@ -171,20 +161,22 @@ jobs: ./tests/tests_e2e/playwright/videos/ - name: Deployment Pytests + env: + KEYCLOAK_USERNAME: ${{ env.TEST_USERNAME }} + KEYCLOAK_PASSWORD: ${{ env.TEST_PASSWORD }} run: | - export KEYCLOAK_USERNAME=${CYPRESS_EXAMPLE_USER_NAME} - export KEYCLOAK_PASSWORD=${CYPRESS_EXAMPLE_USER_PASSWORD} pytest tests/tests_deployment/ -v -s - name: JupyterHub Notebook Tests timeout-minutes: 2 # run jhub-client after pytest since jhubctl can cleanup # the running server + env: + JUPYTERHUB_USERNAME: ${{ env.TEST_USERNAME }} + JUPYTERHUB_PASSWORD: ${{ env.TEST_PASSWORD }} run: | sleep 60 - export JUPYTERHUB_USERNAME=${CYPRESS_EXAMPLE_USER_NAME} - export JUPYTERHUB_PASSWORD=${CYPRESS_EXAMPLE_USER_PASSWORD} - jhubctl --verbose run --hub=https://github-actions.nebari.dev \ + jhubctl --verbose run --hub=https://github-actions.nebari.dev\ --auth-type=keycloak \ --validate --no-verify-ssl \ --kernel python3 \ @@ -193,6 +185,7 @@ jobs: ### CLEANUP AFTER TESTS - name: Cleanup nebari deployment + if: always() + working-directory: local-deployment run: | - cd local-deployment nebari destroy --config nebari-config.yaml --disable-prompt diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 7284f9f7bf..97a47a9b47 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -22,7 +22,7 @@ ci: repos: # general - repo: https://github.com/pre-commit/pre-commit-hooks - rev: v4.4.0 + rev: v4.5.0 hooks: - id: end-of-file-fixer exclude: "^docs-sphinx/cli.html" @@ -30,8 +30,6 @@ repos: exclude: "^docs-sphinx/cli.html" - id: check-json - id: check-yaml - # jinja2 templates for helm charts - exclude: "src/_nebari/stages/kubernetes_services/template/modules/kubernetes/services/(clearml/chart/templates/.*|prefect/chart/templates/.*)" args: [--allow-multiple-documents] - id: check-toml # Lint: Checks that non-binary executables have a proper shebang. @@ -53,13 +51,13 @@ repos: # python - repo: https://github.com/psf/black - rev: 23.9.1 + rev: 23.10.1 hooks: - id: black args: ["--line-length=88", "--exclude=/src/_nebari/template/"] - repo: https://github.com/astral-sh/ruff-pre-commit - rev: v0.0.292 + rev: v0.1.4 hooks: - id: ruff args: ["--fix"] @@ -75,7 +73,7 @@ repos: # terraform - repo: https://github.com/antonbabenko/pre-commit-terraform - rev: v1.83.4 + rev: v1.83.5 hooks: - id: terraform_fmt args: diff --git a/README.md b/README.md index 54ad76dabf..1787360b84 100644 --- a/README.md +++ b/README.md @@ -14,7 +14,7 @@ | :---------- | :-----| | Project | [![License](https://img.shields.io/badge/License-BSD%203--Clause-gray.svg?colorA=2D2A56&colorB=5936D9&style=flat.svg)](https://opensource.org/licenses/BSD-3-Clause) [![Nebari documentation](https://img.shields.io/badge/%F0%9F%93%96%20Read-the%20docs-gray.svg?colorA=2D2A56&colorB=5936D9&style=flat.svg)](https://www.nebari.dev/docs/welcome) [![PyPI](https://img.shields.io/pypi/v/nebari)](https://badge.fury.io/py/nebari) [![conda version](https://img.shields.io/conda/vn/conda-forge/nebari)]((https://anaconda.org/conda-forge/nebari)) | | Community | [![GH discussions](https://img.shields.io/badge/%F0%9F%92%AC%20-Participate%20in%20discussions-gray.svg?colorA=2D2A56&colorB=5936D9&style=flat.svg)](https://github.com/nebari-dev/nebari/discussions) [![Open an issue](https://img.shields.io/badge/%F0%9F%93%9D%20Open-an%20issue-gray.svg?colorA=2D2A56&colorB=5936D9&style=flat.svg)](https://github.com/nebari-dev/nebari/issues/new/choose) [![Community guidelines](https://img.shields.io/badge/🤝%20Community-guidelines-gray.svg?colorA=2D2A56&colorB=5936D9&style=flat.svg)](https://www.nebari.dev/docs/community/) | -| CI | [![Kubernetes Tests](https://github.com/nebari-dev/nebari/actions/workflows/kubernetes_test.yaml/badge.svg)](https://github.com/nebari-dev/nebari/actions/workflows/kubernetes_test.yaml) [![Tests](https://github.com/nebari-dev/nebari/actions/workflows/test.yaml/badge.svg)](https://github.com/nebari-dev/nebari/actions/workflows/test.yaml) [![Test Nebari Provider](https://github.com/nebari-dev/nebari/actions/workflows/test-provider.yaml/badge.svg)](https://github.com/nebari-dev/nebari/actions/workflows/test-provider.yaml) | +| CI | [![Kubernetes Tests](https://github.com/nebari-dev/nebari/actions/workflows/test_local_integration.yaml/badge.svg)](https://github.com/nebari-dev/nebari/actions/workflows/kubernetes_test.yaml) [![Tests](https://github.com/nebari-dev/nebari/actions/workflows/test.yaml/badge.svg)](https://github.com/nebari-dev/nebari/actions/workflows/test.yaml) [![Test Nebari Provider](https://github.com/nebari-dev/nebari/actions/workflows/test-provider.yaml/badge.svg)](https://github.com/nebari-dev/nebari/actions/workflows/test-provider.yaml) | ## Table of contents diff --git a/RELEASE.md b/RELEASE.md index b5f7d5467a..44dc102ea9 100644 --- a/RELEASE.md +++ b/RELEASE.md @@ -12,6 +12,39 @@ This file is copied to nebari-dev/nebari-docs using a GitHub Action. --> ## Upcoming Release +## Release 2023.11.1 - November 15, 2023 + +### Feature changes and enhancements + +* Upgrade conda-store to latest version 2023 .10.1 +* Minor improvements and bug fixes + +### Breaking Changes + +> WARNING: Prefect, ClearML and kbatch were removed in this release and upgrading to this version will result in all of them being uninstalled. + +### What's Changed +* BUG: fix incorrect config override #2086 by @fangchenli in https://github.com/nebari-dev/nebari/pull/2087 +* ENH: add AWS IAM permissions_boundary option #2078 by @fangchenli in https://github.com/nebari-dev/nebari/pull/2082 +* CI: cleanup local integration workflow by @fangchenli in https://github.com/nebari-dev/nebari/pull/2079 +* [pre-commit.ci] pre-commit autoupdate by @pre-commit-ci in https://github.com/nebari-dev/nebari/pull/2099 +* ENH: check missing GCP services by @fangchenli in https://github.com/nebari-dev/nebari/pull/2036 +* ENH: use packaging for version parsing, add unit tests by @fangchenli in https://github.com/nebari-dev/nebari/pull/2048 +* ENH: specify required field when retrieving available gcp regions by @fangchenli in https://github.com/nebari-dev/nebari/pull/2033 +* Upgrade conda-store to 2023.10.1 by @iameskild in https://github.com/nebari-dev/nebari/pull/2092 +* Add upgrade command for 2023.11.1 by @iameskild in https://github.com/nebari-dev/nebari/pull/2103 +* CLN: cleanup typing and typing import in init by @fangchenli in https://github.com/nebari-dev/nebari/pull/2107 +* Remove kbatch, prefect and clearml by @iameskild in https://github.com/nebari-dev/nebari/pull/2101 +* Fix integration tests, helm-validate script by @iameskild in https://github.com/nebari-dev/nebari/pull/2102 +* Re-enable AWS tags support by @iameskild in https://github.com/nebari-dev/nebari/pull/2096 +* Update upgrade instructions for 2023.11.1 by @iameskild in https://github.com/nebari-dev/nebari/pull/2112 +* Update nebari-git env pins by by @iameskild in https://github.com/nebari-dev/nebari/pull/2113 +* Update release notes for 2023.11.1 by @iameskild in https://github.com/nebari-dev/nebari/pull/2114 + + +**Full Changelog**: https://github.com/nebari-dev/nebari/compare/2023.10.1...2023.11.1 + + ## Release 2023.10.1 - October 20, 2023 This release includes a major refactor which introduces a Pluggy-based extension mechanism which allow developers to build new stages. This is the initial implementation diff --git a/pyproject.toml b/pyproject.toml index 089b262a52..20d2cc6c5a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -73,27 +73,30 @@ dependencies = [ "rich==13.5.1", "ruamel.yaml==0.17.32", "typer==0.9.0", + "packaging==23.2", ] [project.optional-dependencies] dev = [ "black==22.3.0", - "mypy==1.6.1", + "coverage[toml]", "dask-gateway", "diagrams", - "python-dotenv", "escapism", "importlib-metadata<5.0", "jhub-client", + "jinja2", + "mypy==1.6.1", "paramiko", "pre-commit", - "pytest", - "pytest-timeout", - "pytest-playwright", "pytest-cov", - "coverage[toml]", - "jinja2", + "pytest-playwright", + "pytest-timeout", + "pytest", + "python-dotenv", + "python-hcl2", "setuptools==63.4.3", + "tqdm", ] docs = [ "sphinx", diff --git a/pytest.ini b/pytest.ini index 3dceafe383..0555ec6b2d 100644 --- a/pytest.ini +++ b/pytest.ini @@ -12,3 +12,8 @@ markers = testpaths = tests xfail_strict = True + +log_format = %(asctime)s %(levelname)9s %(lineno)4s %(module)s: %(message)s +log_date_format = %Y-%m-%d %H:%M:%S +log_cli = True +log_cli_level = INFO diff --git a/scripts/helm-validate.py b/scripts/helm-validate.py index b655bea2c7..a916d2a2e5 100644 --- a/scripts/helm-validate.py +++ b/scripts/helm-validate.py @@ -214,8 +214,8 @@ def add_workflow_job_summary(chart_index: dict): if __name__ == "__main__": # charts = generate_index_of_helm_charts() - STAGES_DIR = "nebari/template/stages" - SKIP_CHARTS = ["prefect", "clearml", "helm-extensions"] + STAGES_DIR = "src/_nebari/stages" + SKIP_CHARTS = ["helm-extensions"] charts = HelmChartIndexer( stages_dir=STAGES_DIR, skip_charts=SKIP_CHARTS diff --git a/src/_nebari/constants.py b/src/_nebari/constants.py index 26b91fd7fe..89860e8ba8 100644 --- a/src/_nebari/constants.py +++ b/src/_nebari/constants.py @@ -1,4 +1,4 @@ -CURRENT_RELEASE = "2023.10.1" +CURRENT_RELEASE = "2023.11.1" # NOTE: Terraform cannot be upgraded further due to Hashicorp licensing changes # implemented in August 2023. @@ -15,7 +15,7 @@ DEFAULT_NEBARI_IMAGE_TAG = CURRENT_RELEASE DEFAULT_NEBARI_WORKFLOW_CONTROLLER_IMAGE_TAG = "2023.7.2" -DEFAULT_CONDA_STORE_IMAGE_TAG = "v0.4.14" +DEFAULT_CONDA_STORE_IMAGE_TAG = "2023.10.1" LATEST_SUPPORTED_PYTHON_VERSION = "3.10" diff --git a/src/_nebari/initialize.py b/src/_nebari/initialize.py index 70ec8bc5db..2f07647521 100644 --- a/src/_nebari/initialize.py +++ b/src/_nebari/initialize.py @@ -47,7 +47,7 @@ def render_config( ssl_cert_email: str = None, ): config = { - "provider": cloud_provider.value, + "provider": cloud_provider, "namespace": namespace, "nebari_version": __version__, } diff --git a/src/_nebari/provider/cicd/github.py b/src/_nebari/provider/cicd/github.py index ac6fe15451..7b58464c43 100644 --- a/src/_nebari/provider/cicd/github.py +++ b/src/_nebari/provider/cicd/github.py @@ -108,12 +108,6 @@ def gha_env_vars(config: schema.Main): if os.environ.get("NEBARI_GH_BRANCH"): env_vars["NEBARI_GH_BRANCH"] = "${{ secrets.NEBARI_GH_BRANCH }}" - # This assumes that the user is using the omitting sensitive values configuration for the token. - if config.prefect.enabled: - env_vars[ - "NEBARI_SECRET_prefect_token" - ] = "${{ secrets.NEBARI_SECRET_PREFECT_TOKEN }}" - if config.provider == schema.ProviderEnum.aws: env_vars["AWS_ACCESS_KEY_ID"] = "${{ secrets.AWS_ACCESS_KEY_ID }}" env_vars["AWS_SECRET_ACCESS_KEY"] = "${{ secrets.AWS_SECRET_ACCESS_KEY }}" diff --git a/src/_nebari/provider/cloud/google_cloud.py b/src/_nebari/provider/cloud/google_cloud.py index 746bcbc7c5..ba95f713cf 100644 --- a/src/_nebari/provider/cloud/google_cloud.py +++ b/src/_nebari/provider/cloud/google_cloud.py @@ -2,7 +2,7 @@ import json import os import subprocess -from typing import Dict, List +from typing import Dict, List, Set from _nebari import constants from _nebari.provider.cloud.commons import filter_by_highest_supported_k8s_version @@ -30,14 +30,14 @@ def projects() -> Dict[str, str]: @functools.lru_cache() -def regions(project: str) -> Dict[str, str]: - """Return a dict of available regions.""" +def regions() -> Set[str]: + """Return a set of available regions.""" check_credentials() output = subprocess.check_output( - ["gcloud", "compute", "regions", "list", "--project", project, "--format=json"] + ["gcloud", "compute", "regions", "list", "--format=json(name)"] ) - data = json.loads(output.decode("utf-8")) - return {_["description"]: _["name"] for _ in data} + data = json.loads(output) + return {_["name"] for _ in data} @functools.lru_cache() @@ -89,6 +89,22 @@ def instances(project: str) -> Dict[str, str]: return {_["description"]: _["name"] for _ in data} +def activated_services() -> Set[str]: + """Return a list of activated services.""" + check_credentials() + output = subprocess.check_output( + [ + "gcloud", + "services", + "list", + "--enabled", + "--format=json(config.title)", + ] + ) + data = json.loads(output) + return {service["config"]["title"] for service in data} + + def cluster_exists(cluster_name: str, project_id: str, region: str) -> bool: """Check if a GKE cluster exists.""" try: @@ -250,6 +266,26 @@ def gcp_cleanup(config: schema.Main): delete_service_account(service_account_name, project_id) +def check_missing_service() -> None: + """Check if all required services are activated.""" + required = { + "Compute Engine API", + "Kubernetes Engine API", + "Cloud Monitoring API", + "Cloud Autoscaling API", + "Identity and Access Management (IAM) API", + "Cloud Resource Manager API", + } + activated = activated_services() + common = required.intersection(activated) + missing = required.difference(common) + if missing: + raise ValueError( + f"""Missing required services: {missing}\n + Please see the documentation for more information: {constants.GCP_ENV_DOCS}""" + ) + + # Getting pricing data could come from here # https://cloudpricingcalculator.appspot.com/static/data/pricelist.json @@ -257,9 +293,9 @@ def gcp_cleanup(config: schema.Main): ### PYDANTIC VALIDATORS ### -def validate_region(project_id: str, region: str) -> str: +def validate_region(region: str) -> str: """Validate the GCP region is valid.""" - available_regions = regions(project_id) + available_regions = regions() if region not in available_regions: raise ValueError( f"Region {region} is not one of available regions {available_regions}" diff --git a/src/_nebari/stages/infrastructure/__init__.py b/src/_nebari/stages/infrastructure/__init__.py index a1b9f37735..1e34cb05ef 100644 --- a/src/_nebari/stages/infrastructure/__init__.py +++ b/src/_nebari/stages/infrastructure/__init__.py @@ -5,8 +5,7 @@ import re import sys import tempfile -import typing -from typing import Any, Dict, List, Optional, Tuple, Type +from typing import Any, Dict, List, Optional, Tuple, Type, Union import pydantic @@ -52,9 +51,9 @@ class DigitalOceanInputVars(schema.Base): name: str environment: str region: str - tags: typing.List[str] + tags: List[str] kubernetes_version: str - node_groups: typing.Dict[str, DigitalOceanNodeGroup] + node_groups: Dict[str, DigitalOceanNodeGroup] kubeconfig_filename: str = get_kubeconfig_filename() @@ -128,6 +127,7 @@ class AWSNodeGroupInputVars(schema.Base): desired_size: int max_size: int single_subnet: bool + permissions_boundary: Optional[str] = None class AWSInputVars(schema.Base): @@ -140,7 +140,9 @@ class AWSInputVars(schema.Base): node_groups: List[AWSNodeGroupInputVars] availability_zones: List[str] vpc_cidr_block: str + permissions_boundary: Optional[str] = None kubeconfig_filename: str = get_kubeconfig_filename() + tags: Dict[str, str] = {} def _calculate_node_groups(config: schema.Main): @@ -214,7 +216,7 @@ class DigitalOceanProvider(schema.Base): region: str kubernetes_version: str # Digital Ocean image slugs are listed here https://slugs.do-api.dev/ - node_groups: typing.Dict[str, DigitalOceanNodeGroup] = { + node_groups: Dict[str, DigitalOceanNodeGroup] = { "general": DigitalOceanNodeGroup( instance="g-8vcpu-32gb", min_nodes=1, max_nodes=1 ), @@ -225,7 +227,7 @@ class DigitalOceanProvider(schema.Base): instance="g-4vcpu-16gb", min_nodes=1, max_nodes=5 ), } - tags: typing.Optional[typing.List[str]] = [] + tags: Optional[List[str]] = [] @pydantic.validator("region") def _validate_region(cls, value): @@ -287,7 +289,7 @@ class GCPCIDRBlock(schema.Base): class GCPMasterAuthorizedNetworksConfig(schema.Base): - cidr_blocks: typing.List[GCPCIDRBlock] + cidr_blocks: List[GCPCIDRBlock] class GCPPrivateClusterConfig(schema.Base): @@ -312,34 +314,28 @@ class GCPNodeGroup(schema.Base): min_nodes: pydantic.conint(ge=0) = 0 max_nodes: pydantic.conint(ge=1) = 1 preemptible: bool = False - labels: typing.Dict[str, str] = {} - guest_accelerators: typing.List[GCPGuestAccelerator] = [] + labels: Dict[str, str] = {} + guest_accelerators: List[GCPGuestAccelerator] = [] class GoogleCloudPlatformProvider(schema.Base): region: str project: str kubernetes_version: str - availability_zones: typing.Optional[typing.List[str]] = [] + availability_zones: Optional[List[str]] = [] release_channel: str = constants.DEFAULT_GKE_RELEASE_CHANNEL - node_groups: typing.Dict[str, GCPNodeGroup] = { + node_groups: Dict[str, GCPNodeGroup] = { "general": GCPNodeGroup(instance="n1-standard-8", min_nodes=1, max_nodes=1), "user": GCPNodeGroup(instance="n1-standard-4", min_nodes=0, max_nodes=5), "worker": GCPNodeGroup(instance="n1-standard-4", min_nodes=0, max_nodes=5), } - tags: typing.Optional[typing.List[str]] = [] + tags: Optional[List[str]] = [] networking_mode: str = "ROUTE" network: str = "default" - subnetwork: typing.Optional[typing.Union[str, None]] = None - ip_allocation_policy: typing.Optional[ - typing.Union[GCPIPAllocationPolicy, None] - ] = None - master_authorized_networks_config: typing.Optional[ - typing.Union[GCPCIDRBlock, None] - ] = None - private_cluster_config: typing.Optional[ - typing.Union[GCPPrivateClusterConfig, None] - ] = None + subnetwork: Optional[Union[str, None]] = None + ip_allocation_policy: Optional[Union[GCPIPAllocationPolicy, None]] = None + master_authorized_networks_config: Optional[Union[GCPCIDRBlock, None]] = None + private_cluster_config: Optional[Union[GCPPrivateClusterConfig, None]] = None @pydantic.root_validator def validate_all(cls, values): @@ -353,7 +349,7 @@ def validate_all(cls, values): raise ValueError("The `google_cloud_platform.region` field is required.") # validate region - google_cloud.validate_region(project_id, region) + google_cloud.validate_region(region) # validate kubernetes version kubernetes_version = values.get("kubernetes_version") @@ -379,18 +375,18 @@ class AzureProvider(schema.Base): kubernetes_version: str storage_account_postfix: str resource_group_name: str = None - node_groups: typing.Dict[str, AzureNodeGroup] = { + node_groups: Dict[str, AzureNodeGroup] = { "general": AzureNodeGroup(instance="Standard_D8_v3", min_nodes=1, max_nodes=1), "user": AzureNodeGroup(instance="Standard_D4_v3", min_nodes=0, max_nodes=5), "worker": AzureNodeGroup(instance="Standard_D4_v3", min_nodes=0, max_nodes=5), } storage_account_postfix: str - vnet_subnet_id: typing.Optional[typing.Union[str, None]] = None + vnet_subnet_id: Optional[Union[str, None]] = None private_cluster_enabled: bool = False - resource_group_name: typing.Optional[str] = None - tags: typing.Optional[typing.Dict[str, str]] = {} - network_profile: typing.Optional[typing.Dict[str, str]] = None - max_pods: typing.Optional[int] = None + resource_group_name: Optional[str] = None + tags: Optional[Dict[str, str]] = {} + network_profile: Optional[Dict[str, str]] = None + max_pods: Optional[int] = None @pydantic.validator("kubernetes_version") def _validate_kubernetes_version(cls, value): @@ -432,13 +428,14 @@ class AWSNodeGroup(schema.Base): max_nodes: int gpu: bool = False single_subnet: bool = False + permissions_boundary: Optional[str] = None class AmazonWebServicesProvider(schema.Base): region: str kubernetes_version: str - availability_zones: typing.Optional[typing.List[str]] - node_groups: typing.Dict[str, AWSNodeGroup] = { + availability_zones: Optional[List[str]] + node_groups: Dict[str, AWSNodeGroup] = { "general": AWSNodeGroup(instance="m5.2xlarge", min_nodes=1, max_nodes=1), "user": AWSNodeGroup( instance="m5.xlarge", min_nodes=1, max_nodes=5, single_subnet=False @@ -447,9 +444,11 @@ class AmazonWebServicesProvider(schema.Base): instance="m5.xlarge", min_nodes=1, max_nodes=5, single_subnet=False ), } - existing_subnet_ids: typing.List[str] = None - existing_security_group_ids: str = None + existing_subnet_ids: List[str] = None + existing_security_group_id: str = None vpc_cidr_block: str = "10.10.0.0/16" + permissions_boundary: Optional[str] = None + tags: Optional[Dict[str, str]] = {} @pydantic.root_validator def validate_all(cls, values): @@ -487,8 +486,8 @@ def validate_all(cls, values): class LocalProvider(schema.Base): - kube_context: typing.Optional[str] - node_selectors: typing.Dict[str, KeyValueDict] = { + kube_context: Optional[str] + node_selectors: Dict[str, KeyValueDict] = { "general": KeyValueDict(key="kubernetes.io/os", value="linux"), "user": KeyValueDict(key="kubernetes.io/os", value="linux"), "worker": KeyValueDict(key="kubernetes.io/os", value="linux"), @@ -496,8 +495,8 @@ class LocalProvider(schema.Base): class ExistingProvider(schema.Base): - kube_context: typing.Optional[str] - node_selectors: typing.Dict[str, KeyValueDict] = { + kube_context: Optional[str] + node_selectors: Dict[str, KeyValueDict] = { "general": KeyValueDict(key="kubernetes.io/os", value="linux"), "user": KeyValueDict(key="kubernetes.io/os", value="linux"), "worker": KeyValueDict(key="kubernetes.io/os", value="linux"), @@ -528,12 +527,12 @@ class ExistingProvider(schema.Base): class InputSchema(schema.Base): - local: typing.Optional[LocalProvider] - existing: typing.Optional[ExistingProvider] - google_cloud_platform: typing.Optional[GoogleCloudPlatformProvider] - amazon_web_services: typing.Optional[AmazonWebServicesProvider] - azure: typing.Optional[AzureProvider] - digital_ocean: typing.Optional[DigitalOceanProvider] + local: Optional[LocalProvider] + existing: Optional[ExistingProvider] + google_cloud_platform: Optional[GoogleCloudPlatformProvider] + amazon_web_services: Optional[AmazonWebServicesProvider] + azure: Optional[AzureProvider] + digital_ocean: Optional[DigitalOceanProvider] @pydantic.root_validator(pre=True) def check_provider(cls, values): @@ -543,7 +542,7 @@ def check_provider(cls, values): # TODO: all cloud providers has required fields, but local and existing don't. # And there is no way to initialize a model without user input here. # We preserve the original behavior here, but we should find a better way to do this. - if provider in ["local", "existing"]: + if provider in ["local", "existing"] and provider not in values: values[provider] = provider_enum_model_map[provider]() else: # if the provider field is invalid, it won't be set when this validator is called @@ -576,20 +575,20 @@ class NodeSelectorKeyValue(schema.Base): class KubernetesCredentials(schema.Base): host: str cluster_ca_certifiate: str - token: typing.Optional[str] - username: typing.Optional[str] - password: typing.Optional[str] - client_certificate: typing.Optional[str] - client_key: typing.Optional[str] - config_path: typing.Optional[str] - config_context: typing.Optional[str] + token: Optional[str] + username: Optional[str] + password: Optional[str] + client_certificate: Optional[str] + client_key: Optional[str] + config_path: Optional[str] + config_context: Optional[str] class OutputSchema(schema.Base): node_selectors: Dict[str, NodeSelectorKeyValue] kubernetes_credentials: KubernetesCredentials kubeconfig_filename: str - nfs_endpoint: typing.Optional[str] + nfs_endpoint: Optional[str] class KubernetesInfrastructureStage(NebariTerraformStage): @@ -756,7 +755,7 @@ def input_vars(self, stage_outputs: Dict[str, Dict[str, Any]]): name=self.config.escaped_project_name, environment=self.config.namespace, existing_subnet_ids=self.config.amazon_web_services.existing_subnet_ids, - existing_security_group_id=self.config.amazon_web_services.existing_security_group_ids, + existing_security_group_id=self.config.amazon_web_services.existing_security_group_id, region=self.config.amazon_web_services.region, kubernetes_version=self.config.amazon_web_services.kubernetes_version, node_groups=[ @@ -768,11 +767,14 @@ def input_vars(self, stage_outputs: Dict[str, Dict[str, Any]]): desired_size=node_group.min_nodes, max_size=node_group.max_nodes, single_subnet=node_group.single_subnet, + permissions_boundary=node_group.permissions_boundary, ) for name, node_group in self.config.amazon_web_services.node_groups.items() ], availability_zones=self.config.amazon_web_services.availability_zones, vpc_cidr_block=self.config.amazon_web_services.vpc_cidr_block, + permissions_boundary=self.config.amazon_web_services.permissions_boundary, + tags=self.config.amazon_web_services.tags, ).dict() else: raise ValueError(f"Unknown provider: {self.config.provider}") diff --git a/src/_nebari/stages/infrastructure/template/aws/locals.tf b/src/_nebari/stages/infrastructure/template/aws/locals.tf index d2a065dd75..c414a4b5a0 100644 --- a/src/_nebari/stages/infrastructure/template/aws/locals.tf +++ b/src/_nebari/stages/infrastructure/template/aws/locals.tf @@ -1,9 +1,11 @@ locals { - additional_tags = { - Project = var.name - Owner = "terraform" - Environment = var.environment - } - + additional_tags = merge( + { + Project = var.name + Owner = "terraform" + Environment = var.environment + }, + var.tags, + ) cluster_name = "${var.name}-${var.environment}" } diff --git a/src/_nebari/stages/infrastructure/template/aws/modules/kubernetes/policy.tf b/src/_nebari/stages/infrastructure/template/aws/modules/kubernetes/policy.tf index c26dcd8c2c..6916bc6532 100644 --- a/src/_nebari/stages/infrastructure/template/aws/modules/kubernetes/policy.tf +++ b/src/_nebari/stages/infrastructure/template/aws/modules/kubernetes/policy.tf @@ -21,8 +21,8 @@ resource "aws_iam_role" "cluster" { }] Version = "2012-10-17" }) - - tags = var.tags + permissions_boundary = var.permissions_boundary + tags = var.tags } resource "aws_iam_role_policy_attachment" "cluster-policy" { @@ -50,8 +50,8 @@ resource "aws_iam_role" "node-group" { }] Version = "2012-10-17" }) - - tags = var.tags + permissions_boundary = var.permissions_boundary + tags = var.tags } resource "aws_iam_role_policy_attachment" "node-group-policy" { diff --git a/src/_nebari/stages/infrastructure/template/aws/modules/kubernetes/variables.tf b/src/_nebari/stages/infrastructure/template/aws/modules/kubernetes/variables.tf index 8c6a9ee958..e22c640929 100644 --- a/src/_nebari/stages/infrastructure/template/aws/modules/kubernetes/variables.tf +++ b/src/_nebari/stages/infrastructure/template/aws/modules/kubernetes/variables.tf @@ -69,3 +69,9 @@ variable "public_access_cidrs" { type = list(string) default = ["0.0.0.0/0"] } + +variable "permissions_boundary" { + description = "ARN of the policy that is used to set the permissions boundary for the role" + type = string + default = null +} diff --git a/src/_nebari/stages/infrastructure/template/aws/variables.tf b/src/_nebari/stages/infrastructure/template/aws/variables.tf index 593827d0af..c07c8f60f2 100644 --- a/src/_nebari/stages/infrastructure/template/aws/variables.tf +++ b/src/_nebari/stages/infrastructure/template/aws/variables.tf @@ -65,3 +65,15 @@ variable "eks_public_access_cidrs" { type = list(string) default = ["0.0.0.0/0"] } + +variable "permissions_boundary" { + description = "ARN of the policy that is used to set the permissions boundary for the role" + type = string + default = null +} + +variable "tags" { + description = "Additional tags to add to resources" + type = map(string) + default = {} +} diff --git a/src/_nebari/stages/kubernetes_ingress/__init__.py b/src/_nebari/stages/kubernetes_ingress/__init__.py index 5a966b1459..cabe61f0cd 100644 --- a/src/_nebari/stages/kubernetes_ingress/__init__.py +++ b/src/_nebari/stages/kubernetes_ingress/__init__.py @@ -24,17 +24,6 @@ TIMEOUT = 10 # seconds -def add_clearml_dns(zone_name, record_name, record_type, ip_or_hostname): - dns_records = [ - f"app.clearml.{record_name}", - f"api.clearml.{record_name}", - f"files.clearml.{record_name}", - ] - - for dns_record in dns_records: - update_record(zone_name, dns_record, record_type, ip_or_hostname) - - def provision_ingress_dns( stage_outputs: Dict[str, Dict[str, Any]], config: schema.Main, @@ -60,13 +49,9 @@ def provision_ingress_dns( schema.ProviderEnum.azure, }: update_record(zone_name, record_name, "A", ip_or_hostname) - if config.clearml.enabled: - add_clearml_dns(zone_name, record_name, "A", ip_or_hostname) elif config.provider == schema.ProviderEnum.aws: update_record(zone_name, record_name, "CNAME", ip_or_hostname) - if config.clearml.enabled: - add_clearml_dns(zone_name, record_name, "CNAME", ip_or_hostname) else: logger.info( f"Couldn't update the DNS record for cloud provider: {config.provider}" diff --git a/src/_nebari/stages/kubernetes_services/__init__.py b/src/_nebari/stages/kubernetes_services/__init__.py index 2b8bb6fd80..65baac4f97 100644 --- a/src/_nebari/stages/kubernetes_services/__init__.py +++ b/src/_nebari/stages/kubernetes_services/__init__.py @@ -37,13 +37,6 @@ def to_yaml(cls, representer, node): return representer.represent_str(node.value) -class Prefect(schema.Base): - enabled: bool = False - image: typing.Optional[str] - overrides: typing.Dict = {} - token: typing.Optional[str] - - class DefaultImages(schema.Base): jupyterhub: str = f"quay.io/nebari/nebari-jupyterhub:{set_docker_image_tag()}" jupyterlab: str = f"quay.io/nebari/nebari-jupyterlab:{set_docker_image_tag()}" @@ -195,20 +188,10 @@ class ArgoWorkflows(schema.Base): nebari_workflow_controller: NebariWorkflowController = NebariWorkflowController() -class KBatch(schema.Base): - enabled: bool = True - - class Monitoring(schema.Base): enabled: bool = True -class ClearML(schema.Base): - enabled: bool = False - enable_forward_auth: bool = False - overrides: typing.Dict = {} - - class JupyterHub(schema.Base): overrides: typing.Dict = {} @@ -228,7 +211,6 @@ class JupyterLab(schema.Base): class InputSchema(schema.Base): - prefect: Prefect = Prefect() default_images: DefaultImages = DefaultImages() storage: Storage = Storage() theme: Theme = Theme() @@ -238,60 +220,56 @@ class InputSchema(schema.Base): name="dask", channels=["conda-forge"], dependencies=[ - "python=3.10.8", - "ipykernel=6.21.0", - "ipywidgets==7.7.1", - f"nebari-dask =={set_nebari_dask_version()}", - "python-graphviz=0.20.1", - "pyarrow=10.0.1", - "s3fs=2023.1.0", - "gcsfs=2023.1.0", - "numpy=1.23.5", - "numba=0.56.4", - "pandas=1.5.3", - { - "pip": [ - "kbatch==0.4.2", - ], - }, + "python==3.11.6", + "ipykernel==6.26.0", + "ipywidgets==8.1.1", + f"nebari-dask=={set_nebari_dask_version()}", + "python-graphviz==0.20.1", + "pyarrow==14.0.1", + "s3fs==2023.10.0", + "gcsfs==2023.10.0", + "numpy=1.26.0", + "numba=0.58.1", + "pandas=2.1.3", + "xarray==2023.10.1", ], ), "environment-dashboard.yaml": CondaEnvironment( name="dashboard", channels=["conda-forge"], dependencies=[ - "python=3.10", - "cufflinks-py=0.17.3", - "dash=2.8.1", - "geopandas=0.12.2", - "geopy=2.3.0", - "geoviews=1.9.6", - "gunicorn=20.1.0", - "holoviews=1.15.4", - "ipykernel=6.21.2", - "ipywidgets=8.0.4", - "jupyter=1.0.0", - "jupyterlab=3.6.1", - "jupyter_bokeh=3.0.5", - "matplotlib=3.7.0", + "python==3.11.6", + "cufflinks-py==0.17.3", + "dash==2.14.1", + "geopandas==0.14.1", + "geopy==2.4.0", + "geoviews==1.11.0", + "gunicorn==21.2.0", + "holoviews==1.18.1", + "ipykernel==6.26.0", + "ipywidgets==8.1.1", + "jupyter==1.0.0", + "jupyter_bokeh==3.0.7", + "matplotlib==3.8.1", f"nebari-dask=={set_nebari_dask_version()}", - "nodejs=18.12.1", - "numpy", - "openpyxl=3.1.1", - "pandas=1.5.3", - "panel=0.14.3", - "param=1.12.3", - "plotly=5.13.0", - "python-graphviz=0.20.1", - "rich=13.3.1", - "streamlit=1.9.0", - "sympy=1.11.1", - "voila=0.4.0", - "pip=23.0", + "nodejs=20.8.1", + "numpy==1.26.0", + "openpyxl==3.1.2", + "pandas==2.1.3", + "panel==1.3.1", + "param==2.0.1", + "plotly==5.18.0", + "python-graphviz==0.20.1", + "rich==13.6.0", + "streamlit==1.28.1", + "sympy==1.12", + "voila==0.5.5", + "xarray==2023.10.1", + "pip==23.3.1", { "pip": [ - "streamlit-image-comparison==0.0.3", - "noaa-coops==0.2.1", + "streamlit-image-comparison==0.0.4", + "noaa-coops==0.1.9", "dash_core_components==2.0.0", "dash_html_components==2.0.0", ], @@ -301,9 +279,7 @@ class InputSchema(schema.Base): } conda_store: CondaStore = CondaStore() argo_workflows: ArgoWorkflows = ArgoWorkflows() - kbatch: KBatch = KBatch() monitoring: Monitoring = Monitoring() - clearml: ClearML = ClearML() jupyterhub: JupyterHub = JupyterHub() jupyterlab: JupyterLab = JupyterLab() @@ -382,23 +358,6 @@ class ArgoWorkflowsInputVars(schema.Base): ) -class KBatchInputVars(schema.Base): - kbatch_enabled: bool = Field(alias="kbatch-enabled") - - -class PrefectInputVars(schema.Base): - prefect_enabled: bool = Field(alias="prefect-enabled") - prefect_token: str = Field(None, alias="prefect-token") - prefect_image: str = Field(None, alias="prefect-image") - prefect_overrides: Dict = Field(alias="prefect-overrides") - - -class ClearMLInputVars(schema.Base): - clearml_enabled: bool = Field(alias="clearml-enabled") - clearml_enable_forwardauth: bool = Field(alias="clearml-enable-forwardauth") - clearml_overrides: List[str] = Field(alias="clearml-overrides") - - class KubernetesServicesStage(NebariTerraformStage): name = "07-kubernetes-services" priority = 70 @@ -520,23 +479,6 @@ def input_vars(self, stage_outputs: Dict[str, Dict[str, Any]]): keycloak_read_only_user_credentials=keycloak_read_only_user_credentials, ) - kbatch_vars = KBatchInputVars( - kbatch_enabled=self.config.kbatch.enabled, - ) - - prefect_vars = PrefectInputVars( - prefect_enabled=self.config.prefect.enabled, - prefect_token=self.config.prefect.token, - prefect_image=self.config.prefect.image, - prefect_overrides=self.config.prefect.overrides, - ) - - clearml_vars = ClearMLInputVars( - clearml_enabled=self.config.clearml.enabled, - clearml_enable_forwardauth=self.config.clearml.enable_forward_auth, - clearml_overrides=[json.dumps(self.config.clearml.overrides)], - ) - return { **kubernetes_services_vars.dict(by_alias=True), **conda_store_vars.dict(by_alias=True), @@ -544,9 +486,6 @@ def input_vars(self, stage_outputs: Dict[str, Dict[str, Any]]): **dask_gateway_vars.dict(by_alias=True), **monitoring_vars.dict(by_alias=True), **argo_workflows_vars.dict(by_alias=True), - **kbatch_vars.dict(by_alias=True), - **prefect_vars.dict(by_alias=True), - **clearml_vars.dict(by_alias=True), } def check( diff --git a/src/_nebari/stages/kubernetes_services/template/clearml.tf b/src/_nebari/stages/kubernetes_services/template/clearml.tf deleted file mode 100644 index 6c619fc656..0000000000 --- a/src/_nebari/stages/kubernetes_services/template/clearml.tf +++ /dev/null @@ -1,31 +0,0 @@ -# ======================= VARIABLES ====================== -variable "clearml-enabled" { - description = "Clearml enabled or disabled" - type = bool -} - -variable "clearml-enable-forwardauth" { - description = "Clearml enabled or disabled forward authentication" - type = bool -} - - -variable "clearml-overrides" { - description = "Clearml helm chart overrides" - type = list(string) -} - - -# ====================== RESOURCES ======================= -module "clearml" { - count = var.clearml-enabled ? 1 : 0 - - source = "./modules/kubernetes/services/clearml" - - namespace = var.environment - external-url = var.endpoint - - overrides = var.clearml-overrides - - enable-forward-auth = var.clearml-enable-forwardauth -} diff --git a/src/_nebari/stages/kubernetes_services/template/jupyterhub.tf b/src/_nebari/stages/kubernetes_services/template/jupyterhub.tf index 009d857436..9fa68cbf53 100644 --- a/src/_nebari/stages/kubernetes_services/template/jupyterhub.tf +++ b/src/_nebari/stages/kubernetes_services/template/jupyterhub.tf @@ -112,8 +112,6 @@ module "jupyterhub" { "dask-gateway" ], (var.monitoring-enabled ? ["monitoring"] : []), - (var.prefect-enabled ? ["prefect"] : []), - (var.kbatch-enabled ? ["kbatch"] : []) ) general-node-group = var.node_groups.general diff --git a/src/_nebari/stages/kubernetes_services/template/kbatch.tf b/src/_nebari/stages/kubernetes_services/template/kbatch.tf deleted file mode 100644 index d400fbdd5e..0000000000 --- a/src/_nebari/stages/kubernetes_services/template/kbatch.tf +++ /dev/null @@ -1,23 +0,0 @@ -# ======================= VARIABLES ====================== -variable "kbatch-enabled" { - description = "kbatch enabled or disabled" - type = bool -} - - -# ====================== RESOURCES ======================= -module "kbatch" { - count = var.kbatch-enabled ? 1 : 0 - - source = "./modules/kubernetes/services/kbatch" - - namespace = var.environment - external-url = var.endpoint - - jupyterhub_api_token = module.jupyterhub.services.kbatch.api_token - node-group = var.node_groups.user - - dask-gateway-address = module.dask-gateway.config.gateway.address - dask-gateway-proxy-address = module.dask-gateway.config.gateway.proxy_address - dask-worker-image = var.dask-worker-image -} diff --git a/src/_nebari/stages/kubernetes_services/template/modules/kubernetes/services/clearml/chart/Chart.yaml b/src/_nebari/stages/kubernetes_services/template/modules/kubernetes/services/clearml/chart/Chart.yaml deleted file mode 100644 index 98418b2c21..0000000000 --- a/src/_nebari/stages/kubernetes_services/template/modules/kubernetes/services/clearml/chart/Chart.yaml +++ /dev/null @@ -1,14 +0,0 @@ -apiVersion: v2 -appVersion: "1.0.2" -description: Clearml server Helm chart for Kubernetes -name: clearml-server-cloud-ready -version: "2.0.2+1" -dependencies: - - name: redis - version: "~10.9.0" - repository: "https://charts.bitnami.com/bitnami" - condition: redis.enabled - - name: mongodb - version: "~10.3.2" - repository: "https://charts.bitnami.com/bitnami" - condition: mongodb.enabled diff --git a/src/_nebari/stages/kubernetes_services/template/modules/kubernetes/services/clearml/chart/LICENSE b/src/_nebari/stages/kubernetes_services/template/modules/kubernetes/services/clearml/chart/LICENSE deleted file mode 100644 index 688d2d8043..0000000000 --- a/src/_nebari/stages/kubernetes_services/template/modules/kubernetes/services/clearml/chart/LICENSE +++ /dev/null @@ -1,557 +0,0 @@ - Server Side Public License - VERSION 1, OCTOBER 16, 2018 - - Copyright © 2021 allegro.ai, Inc. - - Everyone is permitted to copy and distribute verbatim copies of this - license document, but changing it is not allowed. - - TERMS AND CONDITIONS - - 0. Definitions. - - “This License” refers to Server Side Public License. - - “Copyright” also means copyright-like laws that apply to other kinds of - works, such as semiconductor masks. - - “The Program” refers to any copyrightable work licensed under this - License. Each licensee is addressed as “you”. “Licensees” and - “recipients” may be individuals or organizations. - - To “modify” a work means to copy from or adapt all or part of the work in - a fashion requiring copyright permission, other than the making of an - exact copy. The resulting work is called a “modified version” of the - earlier work or a work “based on” the earlier work. - - A “covered work” means either the unmodified Program or a work based on - the Program. - - To “propagate” a work means to do anything with it that, without - permission, would make you directly or secondarily liable for - infringement under applicable copyright law, except executing it on a - computer or modifying a private copy. Propagation includes copying, - distribution (with or without modification), making available to the - public, and in some countries other activities as well. - - To “convey” a work means any kind of propagation that enables other - parties to make or receive copies. Mere interaction with a user through a - computer network, with no transfer of a copy, is not conveying. - - An interactive user interface displays “Appropriate Legal Notices” to the - extent that it includes a convenient and prominently visible feature that - (1) displays an appropriate copyright notice, and (2) tells the user that - there is no warranty for the work (except to the extent that warranties - are provided), that licensees may convey the work under this License, and - how to view a copy of this License. If the interface presents a list of - user commands or options, such as a menu, a prominent item in the list - meets this criterion. - - 1. Source Code. - - The “source code” for a work means the preferred form of the work for - making modifications to it. “Object code” means any non-source form of a - work. - - A “Standard Interface” means an interface that either is an official - standard defined by a recognized standards body, or, in the case of - interfaces specified for a particular programming language, one that is - widely used among developers working in that language. The “System - Libraries” of an executable work include anything, other than the work as - a whole, that (a) is included in the normal form of packaging a Major - Component, but which is not part of that Major Component, and (b) serves - only to enable use of the work with that Major Component, or to implement - a Standard Interface for which an implementation is available to the - public in source code form. A “Major Component”, in this context, means a - major essential component (kernel, window system, and so on) of the - specific operating system (if any) on which the executable work runs, or - a compiler used to produce the work, or an object code interpreter used - to run it. - - The “Corresponding Source” for a work in object code form means all the - source code needed to generate, install, and (for an executable work) run - the object code and to modify the work, including scripts to control - those activities. However, it does not include the work's System - Libraries, or general-purpose tools or generally available free programs - which are used unmodified in performing those activities but which are - not part of the work. For example, Corresponding Source includes - interface definition files associated with source files for the work, and - the source code for shared libraries and dynamically linked subprograms - that the work is specifically designed to require, such as by intimate - data communication or control flow between those subprograms and other - parts of the work. - - The Corresponding Source need not include anything that users can - regenerate automatically from other parts of the Corresponding Source. - - The Corresponding Source for a work in source code form is that same work. - - 2. Basic Permissions. - - All rights granted under this License are granted for the term of - copyright on the Program, and are irrevocable provided the stated - conditions are met. This License explicitly affirms your unlimited - permission to run the unmodified Program, subject to section 13. The - output from running a covered work is covered by this License only if the - output, given its content, constitutes a covered work. This License - acknowledges your rights of fair use or other equivalent, as provided by - copyright law. Subject to section 13, you may make, run and propagate - covered works that you do not convey, without conditions so long as your - license otherwise remains in force. You may convey covered works to - others for the sole purpose of having them make modifications exclusively - for you, or provide you with facilities for running those works, provided - that you comply with the terms of this License in conveying all - material for which you do not control copyright. Those thus making or - running the covered works for you must do so exclusively on your - behalf, under your direction and control, on terms that prohibit them - from making any copies of your copyrighted material outside their - relationship with you. - - Conveying under any other circumstances is permitted solely under the - conditions stated below. Sublicensing is not allowed; section 10 makes it - unnecessary. - - 3. Protecting Users' Legal Rights From Anti-Circumvention Law. - - No covered work shall be deemed part of an effective technological - measure under any applicable law fulfilling obligations under article 11 - of the WIPO copyright treaty adopted on 20 December 1996, or similar laws - prohibiting or restricting circumvention of such measures. - - When you convey a covered work, you waive any legal power to forbid - circumvention of technological measures to the extent such circumvention is - effected by exercising rights under this License with respect to the - covered work, and you disclaim any intention to limit operation or - modification of the work as a means of enforcing, against the work's users, - your or third parties' legal rights to forbid circumvention of - technological measures. - - 4. Conveying Verbatim Copies. - - You may convey verbatim copies of the Program's source code as you - receive it, in any medium, provided that you conspicuously and - appropriately publish on each copy an appropriate copyright notice; keep - intact all notices stating that this License and any non-permissive terms - added in accord with section 7 apply to the code; keep intact all notices - of the absence of any warranty; and give all recipients a copy of this - License along with the Program. You may charge any price or no price for - each copy that you convey, and you may offer support or warranty - protection for a fee. - - 5. Conveying Modified Source Versions. - - You may convey a work based on the Program, or the modifications to - produce it from the Program, in the form of source code under the terms - of section 4, provided that you also meet all of these conditions: - - a) The work must carry prominent notices stating that you modified it, - and giving a relevant date. - - b) The work must carry prominent notices stating that it is released - under this License and any conditions added under section 7. This - requirement modifies the requirement in section 4 to “keep intact all - notices”. - - c) You must license the entire work, as a whole, under this License to - anyone who comes into possession of a copy. This License will therefore - apply, along with any applicable section 7 additional terms, to the - whole of the work, and all its parts, regardless of how they are - packaged. This License gives no permission to license the work in any - other way, but it does not invalidate such permission if you have - separately received it. - - d) If the work has interactive user interfaces, each must display - Appropriate Legal Notices; however, if the Program has interactive - interfaces that do not display Appropriate Legal Notices, your work - need not make them do so. - - A compilation of a covered work with other separate and independent - works, which are not by their nature extensions of the covered work, and - which are not combined with it such as to form a larger program, in or on - a volume of a storage or distribution medium, is called an “aggregate” if - the compilation and its resulting copyright are not used to limit the - access or legal rights of the compilation's users beyond what the - individual works permit. Inclusion of a covered work in an aggregate does - not cause this License to apply to the other parts of the aggregate. - - 6. Conveying Non-Source Forms. - - You may convey a covered work in object code form under the terms of - sections 4 and 5, provided that you also convey the machine-readable - Corresponding Source under the terms of this License, in one of these - ways: - - a) Convey the object code in, or embodied in, a physical product - (including a physical distribution medium), accompanied by the - Corresponding Source fixed on a durable physical medium customarily - used for software interchange. - - b) Convey the object code in, or embodied in, a physical product - (including a physical distribution medium), accompanied by a written - offer, valid for at least three years and valid for as long as you - offer spare parts or customer support for that product model, to give - anyone who possesses the object code either (1) a copy of the - Corresponding Source for all the software in the product that is - covered by this License, on a durable physical medium customarily used - for software interchange, for a price no more than your reasonable cost - of physically performing this conveying of source, or (2) access to - copy the Corresponding Source from a network server at no charge. - - c) Convey individual copies of the object code with a copy of the - written offer to provide the Corresponding Source. This alternative is - allowed only occasionally and noncommercially, and only if you received - the object code with such an offer, in accord with subsection 6b. - - d) Convey the object code by offering access from a designated place - (gratis or for a charge), and offer equivalent access to the - Corresponding Source in the same way through the same place at no - further charge. You need not require recipients to copy the - Corresponding Source along with the object code. If the place to copy - the object code is a network server, the Corresponding Source may be on - a different server (operated by you or a third party) that supports - equivalent copying facilities, provided you maintain clear directions - next to the object code saying where to find the Corresponding Source. - Regardless of what server hosts the Corresponding Source, you remain - obligated to ensure that it is available for as long as needed to - satisfy these requirements. - - e) Convey the object code using peer-to-peer transmission, provided you - inform other peers where the object code and Corresponding Source of - the work are being offered to the general public at no charge under - subsection 6d. - - A separable portion of the object code, whose source code is excluded - from the Corresponding Source as a System Library, need not be included - in conveying the object code work. - - A “User Product” is either (1) a “consumer product”, which means any - tangible personal property which is normally used for personal, family, - or household purposes, or (2) anything designed or sold for incorporation - into a dwelling. In determining whether a product is a consumer product, - doubtful cases shall be resolved in favor of coverage. For a particular - product received by a particular user, “normally used” refers to a - typical or common use of that class of product, regardless of the status - of the particular user or of the way in which the particular user - actually uses, or expects or is expected to use, the product. A product - is a consumer product regardless of whether the product has substantial - commercial, industrial or non-consumer uses, unless such uses represent - the only significant mode of use of the product. - - “Installation Information” for a User Product means any methods, - procedures, authorization keys, or other information required to install - and execute modified versions of a covered work in that User Product from - a modified version of its Corresponding Source. The information must - suffice to ensure that the continued functioning of the modified object - code is in no case prevented or interfered with solely because - modification has been made. - - If you convey an object code work under this section in, or with, or - specifically for use in, a User Product, and the conveying occurs as part - of a transaction in which the right of possession and use of the User - Product is transferred to the recipient in perpetuity or for a fixed term - (regardless of how the transaction is characterized), the Corresponding - Source conveyed under this section must be accompanied by the - Installation Information. But this requirement does not apply if neither - you nor any third party retains the ability to install modified object - code on the User Product (for example, the work has been installed in - ROM). - - The requirement to provide Installation Information does not include a - requirement to continue to provide support service, warranty, or updates - for a work that has been modified or installed by the recipient, or for - the User Product in which it has been modified or installed. Access - to a network may be denied when the modification itself materially - and adversely affects the operation of the network or violates the - rules and protocols for communication across the network. - - Corresponding Source conveyed, and Installation Information provided, in - accord with this section must be in a format that is publicly documented - (and with an implementation available to the public in source code form), - and must require no special password or key for unpacking, reading or - copying. - - 7. Additional Terms. - - “Additional permissions” are terms that supplement the terms of this - License by making exceptions from one or more of its conditions. - Additional permissions that are applicable to the entire Program shall be - treated as though they were included in this License, to the extent that - they are valid under applicable law. If additional permissions apply only - to part of the Program, that part may be used separately under those - permissions, but the entire Program remains governed by this License - without regard to the additional permissions. When you convey a copy of - a covered work, you may at your option remove any additional permissions - from that copy, or from any part of it. (Additional permissions may be - written to require their own removal in certain cases when you modify the - work.) You may place additional permissions on material, added by you to - a covered work, for which you have or can give appropriate copyright - permission. - - Notwithstanding any other provision of this License, for material you add - to a covered work, you may (if authorized by the copyright holders of - that material) supplement the terms of this License with terms: - - a) Disclaiming warranty or limiting liability differently from the - terms of sections 15 and 16 of this License; or - - b) Requiring preservation of specified reasonable legal notices or - author attributions in that material or in the Appropriate Legal - Notices displayed by works containing it; or - - c) Prohibiting misrepresentation of the origin of that material, or - requiring that modified versions of such material be marked in - reasonable ways as different from the original version; or - - d) Limiting the use for publicity purposes of names of licensors or - authors of the material; or - - e) Declining to grant rights under trademark law for use of some trade - names, trademarks, or service marks; or - - f) Requiring indemnification of licensors and authors of that material - by anyone who conveys the material (or modified versions of it) with - contractual assumptions of liability to the recipient, for any - liability that these contractual assumptions directly impose on those - licensors and authors. - - All other non-permissive additional terms are considered “further - restrictions” within the meaning of section 10. If the Program as you - received it, or any part of it, contains a notice stating that it is - governed by this License along with a term that is a further restriction, - you may remove that term. If a license document contains a further - restriction but permits relicensing or conveying under this License, you - may add to a covered work material governed by the terms of that license - document, provided that the further restriction does not survive such - relicensing or conveying. - - If you add terms to a covered work in accord with this section, you must - place, in the relevant source files, a statement of the additional terms - that apply to those files, or a notice indicating where to find the - applicable terms. Additional terms, permissive or non-permissive, may be - stated in the form of a separately written license, or stated as - exceptions; the above requirements apply either way. - - 8. Termination. - - You may not propagate or modify a covered work except as expressly - provided under this License. Any attempt otherwise to propagate or modify - it is void, and will automatically terminate your rights under this - License (including any patent licenses granted under the third paragraph - of section 11). - - However, if you cease all violation of this License, then your license - from a particular copyright holder is reinstated (a) provisionally, - unless and until the copyright holder explicitly and finally terminates - your license, and (b) permanently, if the copyright holder fails to - notify you of the violation by some reasonable means prior to 60 days - after the cessation. - - Moreover, your license from a particular copyright holder is reinstated - permanently if the copyright holder notifies you of the violation by some - reasonable means, this is the first time you have received notice of - violation of this License (for any work) from that copyright holder, and - you cure the violation prior to 30 days after your receipt of the notice. - - Termination of your rights under this section does not terminate the - licenses of parties who have received copies or rights from you under - this License. If your rights have been terminated and not permanently - reinstated, you do not qualify to receive new licenses for the same - material under section 10. - - 9. Acceptance Not Required for Having Copies. - - You are not required to accept this License in order to receive or run a - copy of the Program. Ancillary propagation of a covered work occurring - solely as a consequence of using peer-to-peer transmission to receive a - copy likewise does not require acceptance. However, nothing other than - this License grants you permission to propagate or modify any covered - work. These actions infringe copyright if you do not accept this License. - Therefore, by modifying or propagating a covered work, you indicate your - acceptance of this License to do so. - - 10. Automatic Licensing of Downstream Recipients. - - Each time you convey a covered work, the recipient automatically receives - a license from the original licensors, to run, modify and propagate that - work, subject to this License. You are not responsible for enforcing - compliance by third parties with this License. - - An “entity transaction” is a transaction transferring control of an - organization, or substantially all assets of one, or subdividing an - organization, or merging organizations. If propagation of a covered work - results from an entity transaction, each party to that transaction who - receives a copy of the work also receives whatever licenses to the work - the party's predecessor in interest had or could give under the previous - paragraph, plus a right to possession of the Corresponding Source of the - work from the predecessor in interest, if the predecessor has it or can - get it with reasonable efforts. - - You may not impose any further restrictions on the exercise of the rights - granted or affirmed under this License. For example, you may not impose a - license fee, royalty, or other charge for exercise of rights granted - under this License, and you may not initiate litigation (including a - cross-claim or counterclaim in a lawsuit) alleging that any patent claim - is infringed by making, using, selling, offering for sale, or importing - the Program or any portion of it. - - 11. Patents. - - A “contributor” is a copyright holder who authorizes use under this - License of the Program or a work on which the Program is based. The work - thus licensed is called the contributor's “contributor version”. - - A contributor's “essential patent claims” are all patent claims owned or - controlled by the contributor, whether already acquired or hereafter - acquired, that would be infringed by some manner, permitted by this - License, of making, using, or selling its contributor version, but do not - include claims that would be infringed only as a consequence of further - modification of the contributor version. For purposes of this definition, - “control” includes the right to grant patent sublicenses in a manner - consistent with the requirements of this License. - - Each contributor grants you a non-exclusive, worldwide, royalty-free - patent license under the contributor's essential patent claims, to make, - use, sell, offer for sale, import and otherwise run, modify and propagate - the contents of its contributor version. - - In the following three paragraphs, a “patent license” is any express - agreement or commitment, however denominated, not to enforce a patent - (such as an express permission to practice a patent or covenant not to - sue for patent infringement). To “grant” such a patent license to a party - means to make such an agreement or commitment not to enforce a patent - against the party. - - If you convey a covered work, knowingly relying on a patent license, and - the Corresponding Source of the work is not available for anyone to copy, - free of charge and under the terms of this License, through a publicly - available network server or other readily accessible means, then you must - either (1) cause the Corresponding Source to be so available, or (2) - arrange to deprive yourself of the benefit of the patent license for this - particular work, or (3) arrange, in a manner consistent with the - requirements of this License, to extend the patent license to downstream - recipients. “Knowingly relying” means you have actual knowledge that, but - for the patent license, your conveying the covered work in a country, or - your recipient's use of the covered work in a country, would infringe - one or more identifiable patents in that country that you have reason - to believe are valid. - - If, pursuant to or in connection with a single transaction or - arrangement, you convey, or propagate by procuring conveyance of, a - covered work, and grant a patent license to some of the parties receiving - the covered work authorizing them to use, propagate, modify or convey a - specific copy of the covered work, then the patent license you grant is - automatically extended to all recipients of the covered work and works - based on it. - - A patent license is “discriminatory” if it does not include within the - scope of its coverage, prohibits the exercise of, or is conditioned on - the non-exercise of one or more of the rights that are specifically - granted under this License. You may not convey a covered work if you are - a party to an arrangement with a third party that is in the business of - distributing software, under which you make payment to the third party - based on the extent of your activity of conveying the work, and under - which the third party grants, to any of the parties who would receive the - covered work from you, a discriminatory patent license (a) in connection - with copies of the covered work conveyed by you (or copies made from - those copies), or (b) primarily for and in connection with specific - products or compilations that contain the covered work, unless you - entered into that arrangement, or that patent license was granted, prior - to 28 March 2007. - - Nothing in this License shall be construed as excluding or limiting any - implied license or other defenses to infringement that may otherwise be - available to you under applicable patent law. - - 12. No Surrender of Others' Freedom. - - If conditions are imposed on you (whether by court order, agreement or - otherwise) that contradict the conditions of this License, they do not - excuse you from the conditions of this License. If you cannot use, - propagate or convey a covered work so as to satisfy simultaneously your - obligations under this License and any other pertinent obligations, then - as a consequence you may not use, propagate or convey it at all. For - example, if you agree to terms that obligate you to collect a royalty for - further conveying from those to whom you convey the Program, the only way - you could satisfy both those terms and this License would be to refrain - entirely from conveying the Program. - - 13. Offering the Program as a Service. - - If you make the functionality of the Program or a modified version - available to third parties as a service, you must make the Service Source - Code available via network download to everyone at no charge, under the - terms of this License. Making the functionality of the Program or - modified version available to third parties as a service includes, - without limitation, enabling third parties to interact with the - functionality of the Program or modified version remotely through a - computer network, offering a service the value of which entirely or - primarily derives from the value of the Program or modified version, or - offering a service that accomplishes for users the primary purpose of the - Program or modified version. - - “Service Source Code” means the Corresponding Source for the Program or - the modified version, and the Corresponding Source for all programs that - you use to make the Program or modified version available as a service, - including, without limitation, management software, user interfaces, - application program interfaces, automation software, monitoring software, - backup software, storage software and hosting software, all such that a - user could run an instance of the service using the Service Source Code - you make available. - - 14. Revised Versions of this License. - - MongoDB, Inc. may publish revised and/or new versions of the Server Side - Public License from time to time. Such new versions will be similar in - spirit to the present version, but may differ in detail to address new - problems or concerns. - - Each version is given a distinguishing version number. If the Program - specifies that a certain numbered version of the Server Side Public - License “or any later version” applies to it, you have the option of - following the terms and conditions either of that numbered version or of - any later version published by MongoDB, Inc. If the Program does not - specify a version number of the Server Side Public License, you may - choose any version ever published by MongoDB, Inc. - - If the Program specifies that a proxy can decide which future versions of - the Server Side Public License can be used, that proxy's public statement - of acceptance of a version permanently authorizes you to choose that - version for the Program. - - Later license versions may give you additional or different permissions. - However, no additional obligations are imposed on any author or copyright - holder as a result of your choosing to follow a later version. - - 15. Disclaimer of Warranty. - - THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY - APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT - HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM “AS IS” WITHOUT WARRANTY - OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, - THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM - IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF - ALL NECESSARY SERVICING, REPAIR OR CORRECTION. - - 16. Limitation of Liability. - - IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING - WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS - THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING - ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF - THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO - LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU - OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER - PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE - POSSIBILITY OF SUCH DAMAGES. - - 17. Interpretation of Sections 15 and 16. - - If the disclaimer of warranty and limitation of liability provided above - cannot be given local legal effect according to their terms, reviewing - courts shall apply local law that most closely approximates an absolute - waiver of all civil liability in connection with the Program, unless a - warranty or assumption of liability accompanies a copy of the Program in - return for a fee. - - END OF TERMS AND CONDITIONS diff --git a/src/_nebari/stages/kubernetes_services/template/modules/kubernetes/services/clearml/chart/README.md b/src/_nebari/stages/kubernetes_services/template/modules/kubernetes/services/clearml/chart/README.md deleted file mode 100644 index 76dd362412..0000000000 --- a/src/_nebari/stages/kubernetes_services/template/modules/kubernetes/services/clearml/chart/README.md +++ /dev/null @@ -1,146 +0,0 @@ -# ClearML Server for Kubernetes Clusters Using Helm - -# Cloud Ready Version (Advanced) - -## Auto-Magical Experiment Manager & Version Control for AI - -[![GitHub license](https://img.shields.io/badge/license-SSPL-green.svg)](https://img.shields.io/badge/license-SSPL-green.svg) -[![GitHub version](https://img.shields.io/github/release-pre/allegroai/clearml-server.svg)](https://img.shields.io/github/release-pre/allegroai/clearml-server.svg) -[![PyPI status](https://img.shields.io/badge/status-beta-yellow.svg)](https://img.shields.io/badge/status-beta-yellow.svg) - -## Introduction - -The **clearml-server** is the backend service infrastructure for [ClearML](https://github.com/allegroai/clearml). It allows multiple users to collaborate and manage their -experiments. By default, \*ClearML is set up to work with the ClearML Demo Server, which is open to anyone and resets periodically. In order to host your own server, you will need -to install **clearml-server** and point ClearML to it. - -**clearml-server** contains the following components: - -- The ClearML Web-App, a single-page UI for experiment management and browsing -- RESTful API for: - - Documenting and logging experiment information, statistics and results - - Querying experiments history, logs and results -- Locally-hosted file server for storing images and models making them easily accessible using the Web-App - -Use this repository to add **clearml-server** to your Helm and then deploy **clearml-server** on Kubernetes clusters using Helm. - -## Deploying Your Own Elasticsearch, Redis and Mongodb - -ClearML Server requires that you have elasticsearch, redis and mongodb services. This chart default templates contains [bitnami](https://bitnami.com/) charts for -[redis](https://github.com/bitnami/charts/tree/master/bitnami/redis) and [mongodb](https://github.com/bitnami/charts/tree/master/bitnami/mongodb), and the official chart for -elasticsearch (which is currently still beta). You can either use the default ones, or use your own deployments and set their name and ports in the appropriate sections of this -chart. In order to use your own deployment, make sure to disable the existing one in the `values.yaml` (for example, in order to disable elastic set -`elasticsearch.enabled = false`) - -## Prerequisites - -1. a Kubernetes cluster -2. Persistent Volumes for `pvc-apiserver.yaml`, `pvc-fileserver.yaml`, and `pvc-agentservices.yaml`. -3. Persistent volumes for elasticsearch, mongodb and redis (redis is optional). See relevant information for each chart: - - [elasticsearch](https://github.com/elastic/helm-charts/blob/7.6.2/elasticsearch/values.yaml) - - [mongodb](https://github.com/bitnami/charts/tree/master/bitnami/mongodb#parameters) - - [redis](https://github.com/bitnami/charts/tree/master/bitnami/redis#parameters) Make sure to define the following values for each PV: - - elasticsearch - in the `values.yaml` set `elasticsearch.persistence.enabled=true` and set `elasticsearch.volumeClaimTemplate.storageClassName` to the storageClassName used in - your elasticsearch PV. - - mongodb - in order to define a persistent volume for mongodb, in the `values.yaml` set `mongodb.persistence.enabled=true` and set `mongodb.persistence.storageClass` to the - storageClassName used in your mongodb PV. Read [here](https://github.com/bitnami/charts/tree/master/bitnami/mongodb#parameters) for more details. - - redis - in order to define a persistent volume for redis, in the `values.yaml` set `redis.master.persistence.enabled=true` and set `redis.master.persistence.storageClass` to - the storageClassName used in your redis PV. Read [here](https://github.com/bitnami/charts/tree/master/bitnami/redis#parameters) for more details. -4. `kubectl` is installed and configured (see [Install and Set Up kubectl](https://kubernetes.io/docs/tasks/tools/install-kubectl/) in the Kubernetes documentation) -5. `helm` installed (see [Installing Helm](https://helm.sh/docs/using_helm/#installing-helm) in the Helm documentation) - -## Deploying ClearML Server in Kubernetes Clusters Using Helm - -1. Add the **clearml-server** repository to your Helm: - - ``` - helm repo add allegroai https://allegroai.github.io/clearml-server-helm-cloud-ready/ - ``` - -2. Confirm the **clearml-server** repository is now in Helm: - - ``` - helm search repo clearml - ``` - - The helm search results must include `allegroai/clearml-server-cloud-ready`. - -3. Install `clearml-server-cloud-ready` on your cluster: - - ``` - helm install clearml-server allegroai/clearml-server-cloud-ready --namespace=clearml --create-namespace - ``` - - A clearml `namespace` is created in your cluster and **clearml-server** is deployed in it. - -## Updating ClearML Server application using Helm - -1. If you are upgrading from the [single node version](https://github.com/allegroai/clearml-server-helm) of ClearML Server helm charts, follow these steps first: - - 1. Log in to the node previously labeled as `app=trains` - 2. Copy each folder under /opt/clearml/data to it's persistent volume. - 3. Follow the [Deploying ClearML Server](##-Deploying-ClearML-Server-in-Kubernetes-Clusters-Using-Helm) instructions to deploy Clearml - -2. Update using new or updated `values.yaml` - - ``` - helm upgrade clearml-server allegroai/clearml-server-cloud-ready -f new-values.yaml - ``` - -3. If there are no breaking changes, you can update your deployment to match repository version: - - ``` - helm upgrade clearml-server allegroai/clearml-server-cloud-ready - ``` - - **Important**: - - - If you previously deployed a **clearml-server**, you may encounter errors. If so, you must first delete old deployment using the following command: - - ``` - helm delete --purge clearml-server - ``` - - After running the `helm delete` command, you can run the `helm install` command. - -## Port Mapping - -After **clearml-server** is deployed, the services expose the following node ports: - -- API server on `30008` -- Web server on `30080` -- File server on `30081` - -## Accessing ClearML Server - -Access **clearml-server** by creating a load balancer and domain name with records pointing to the load balancer. - -Once you have a load balancer and domain name set up, follow these steps to configure access to clearml-server on your k8s cluster: - -1. Create domain records - - - Create 3 records to be used for Web-App, File server and API access using the following rules: - - - `app.` - - `files.` - - `api.` - - (*for example, `app.clearml.mydomainname.com`, `files.clearml.mydomainname.com` and `api.clearml.mydomainname.com`*) - -2. Point the records you created to the load balancer - -3. Configure the load balancer to redirect traffic coming from the records you created: - - - `app.` should be redirected to k8s cluster nodes on port `30080` - - `files.` should be redirected to k8s cluster nodes on port `30081` - - `api.` should be redirected to k8s cluster nodes on port `30008` - -## Additional Configuration for ClearML Server - -You can also configure the **clearml-server** for: - -- fixed users (users with credentials) -- non-responsive experiment watchdog settings - -For detailed instructions, see the [Optional Configuration](https://github.com/allegroai/clearml-server#optional-configuration) section in the **clearml-server** repository README -file. diff --git a/src/_nebari/stages/kubernetes_services/template/modules/kubernetes/services/clearml/chart/charts/mongodb-10.3.7.tgz b/src/_nebari/stages/kubernetes_services/template/modules/kubernetes/services/clearml/chart/charts/mongodb-10.3.7.tgz deleted file mode 100644 index 945fe93208..0000000000 Binary files a/src/_nebari/stages/kubernetes_services/template/modules/kubernetes/services/clearml/chart/charts/mongodb-10.3.7.tgz and /dev/null differ diff --git a/src/_nebari/stages/kubernetes_services/template/modules/kubernetes/services/clearml/chart/charts/redis-10.9.0.tgz b/src/_nebari/stages/kubernetes_services/template/modules/kubernetes/services/clearml/chart/charts/redis-10.9.0.tgz deleted file mode 100644 index 9bd7302ed2..0000000000 Binary files a/src/_nebari/stages/kubernetes_services/template/modules/kubernetes/services/clearml/chart/charts/redis-10.9.0.tgz and /dev/null differ diff --git a/src/_nebari/stages/kubernetes_services/template/modules/kubernetes/services/clearml/chart/templates/NOTES.txt b/src/_nebari/stages/kubernetes_services/template/modules/kubernetes/services/clearml/chart/templates/NOTES.txt deleted file mode 100644 index ea4b98ca4b..0000000000 --- a/src/_nebari/stages/kubernetes_services/template/modules/kubernetes/services/clearml/chart/templates/NOTES.txt +++ /dev/null @@ -1,22 +0,0 @@ -1. Get the application URL by running these commands: -{{- if .Values.ingress.enabled }} -{{- range $host := .Values.ingress.hosts }} - {{- range .paths }} - http{{ if $.Values.ingress.tls }}s{{ end }}://{{ $host.host }}{{ .path }} - {{- end }} -{{- end }} -{{- else if contains "NodePort" .Values.webserver.service.type }} - export NODE_PORT=$(kubectl get --namespace {{ .Release.Namespace }} -o jsonpath="{.spec.ports[0].nodePort}" services {{ include "clearml.fullname" . }}) - export NODE_IP=$(kubectl get nodes --namespace {{ .Release.Namespace }} -o jsonpath="{.items[0].status.addresses[0].address}") - echo http://$NODE_IP:$NODE_PORT -{{- else if contains "LoadBalancer" .Values.webserver.service.type }} - NOTE: It may take a few minutes for the LoadBalancer IP to be available. - You can watch the status of by running 'kubectl get --namespace {{ .Release.Namespace }} svc -w {{ include "clearml.fullname" . }}' - export SERVICE_IP=$(kubectl get svc --namespace {{ .Release.Namespace }} {{ include "clearml.fullname" . }} --template "{{"{{ range (index .status.loadBalancer.ingress 0) }}{{.}}{{ end }}"}}") - echo http://$SERVICE_IP:{{ .Values.webserver.service.port }} -{{- else if contains "ClusterIP" .Values.webserver.service.type }} - export POD_NAME=$(kubectl get pods --namespace {{ .Release.Namespace }} -l "app.kubernetes.io/name={{ include "clearml.name" . }},app.kubernetes.io/instance={{ .Release.Name }}" -o jsonpath="{.items[0].metadata.name}") - export CONTAINER_PORT=$(kubectl get pod --namespace {{ .Release.Namespace }} $POD_NAME -o jsonpath="{.spec.containers[0].ports[0].containerPort}") - echo "Visit http://127.0.0.1:8080 to use your application" - kubectl --namespace {{ .Release.Namespace }} port-forward $POD_NAME 8080:$CONTAINER_PORT -{{- end }} diff --git a/src/_nebari/stages/kubernetes_services/template/modules/kubernetes/services/clearml/chart/templates/_helpers.tpl b/src/_nebari/stages/kubernetes_services/template/modules/kubernetes/services/clearml/chart/templates/_helpers.tpl deleted file mode 100644 index 3cfc5b0a12..0000000000 --- a/src/_nebari/stages/kubernetes_services/template/modules/kubernetes/services/clearml/chart/templates/_helpers.tpl +++ /dev/null @@ -1,97 +0,0 @@ -{{/* -Expand the name of the chart. -*/}} -{{- define "clearml.name" -}} -{{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" }} -{{- end }} - -{{/* -Create a default fully qualified app name. -We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec). -If release name contains chart name it will be used as a full name. -*/}} -{{- define "clearml.fullname" -}} -{{- if .Values.fullnameOverride }} -{{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" }} -{{- else }} -{{- $name := default .Chart.Name .Values.nameOverride }} -{{- if contains $name .Release.Name }} -{{- .Release.Name | trunc 63 | trimSuffix "-" }} -{{- else }} -{{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" }} -{{- end }} -{{- end }} -{{- end }} - -{{/* -Create chart name and version as used by the chart label. -*/}} -{{- define "clearml.chart" -}} -{{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" }} -{{- end }} - -{{/* -Common labels -*/}} -{{- define "clearml.labels" -}} -helm.sh/chart: {{ include "clearml.chart" . }} -{{ include "clearml.selectorLabels" . }} -{{- if .Chart.AppVersion }} -app.kubernetes.io/version: {{ .Chart.AppVersion | quote }} -{{- end }} -app.kubernetes.io/managed-by: {{ .Release.Service }} -{{- end }} - -{{/* -Selector labels -*/}} -{{- define "clearml.selectorLabels" -}} -app.kubernetes.io/name: {{ include "clearml.name" . }} -app.kubernetes.io/instance: {{ .Release.Name }} -{{- end }} - -Selector labels (apiserver) -*/}} -{{- define "clearml.selectorLabelsApiServer" -}} -app.kubernetes.io/name: {{ include "clearml.name" . }} -app.kubernetes.io/instance: {{ .Release.Name }}-apiserver -{{- end }} - -Selector labels (fileserver) -*/}} -{{- define "clearml.selectorLabelsFileServer" -}} -app.kubernetes.io/name: {{ include "clearml.name" . }} -app.kubernetes.io/instance: {{ .Release.Name }}-fileserver -{{- end }} - -Selector labels (webserver) -*/}} -{{- define "clearml.selectorLabelsWebServer" -}} -app.kubernetes.io/name: {{ include "clearml.name" . }} -app.kubernetes.io/instance: {{ .Release.Name }}-webserver -{{- end }} - -Selector labels (agentservices) -*/}} -{{- define "clearml.selectorLabelsAgentServices" -}} -app.kubernetes.io/name: {{ include "clearml.name" . }} -app.kubernetes.io/instance: {{ .Release.Name }}-agentservices -{{- end }} - -Selector labels (agent) -*/}} -{{- define "clearml.selectorLabelsAgent" -}} -app.kubernetes.io/name: {{ include "clearml.name" . }} -app.kubernetes.io/instance: {{ .Release.Name }}-agent -{{- end }} - -{{/* -Create the name of the service account to use -*/}} -{{- define "clearml.serviceAccountName" -}} -{{- if .Values.serviceAccount.create }} -{{- default (include "clearml.fullname" .) .Values.serviceAccount.name }} -{{- else }} -{{- default "default" .Values.serviceAccount.name }} -{{- end }} -{{- end }} diff --git a/src/_nebari/stages/kubernetes_services/template/modules/kubernetes/services/clearml/chart/templates/deployment-agent.yaml b/src/_nebari/stages/kubernetes_services/template/modules/kubernetes/services/clearml/chart/templates/deployment-agent.yaml deleted file mode 100644 index ea371ca659..0000000000 --- a/src/_nebari/stages/kubernetes_services/template/modules/kubernetes/services/clearml/chart/templates/deployment-agent.yaml +++ /dev/null @@ -1,105 +0,0 @@ -{{- range .Values.agentGroups }} ---- -apiVersion: apps/v1 -kind: Deployment -metadata: - name: {{ include "clearml.fullname" $ }}-{{ .name }}-agent - labels: - {{- include "clearml.labels" $ | nindent 4 }} -spec: - replicas: {{ .replicaCount }} - selector: - matchLabels: - {{- include "clearml.selectorLabelsAgent" $ | nindent 6 }} - template: - metadata: - {{- with .podAnnotations }} - annotations: - {{- toYaml . | nindent 8 }} - {{- end }} - labels: - {{- include "clearml.selectorLabelsAgent" $ | nindent 8 }} - spec: - volumes: - {{ if .clearmlConfig }} - - name: agent-clearml-conf-volume - secret: - secretName: {{ .name }}-conf - items: - - key: clearml.conf - path: clearml.conf - {{ end }} - initContainers: - - name: init-agent-{{ .name }} - image: "{{ .image.repository }}:{{ .image.tag | default $.Chart.AppVersion }}" - command: - - /bin/sh - - -c - - > - set -x; - while [ $(curl -sw '%{http_code}' "http://{{ include "clearml.fullname" $ }}-apiserver:{{ $.Values.apiserver.service.port }}/debug.ping" -o /dev/null) -ne 200 ] ; do - echo "waiting for apiserver" ; - sleep 5 ; - done - containers: - - name: {{ $.Chart.Name }}-{{ .name }} - image: "{{ .image.repository }}:{{ .image.tag }}" - imagePullPolicy: {{ .image.pullPolicy }} - securityContext: - privileged: true - resources: - limits: - nvidia.com/gpu: - {{ .nvidiaGpusPerAgent }} - env: - - name: CLEARML_API_HOST - value: 'http://{{ include "clearml.fullname" $ }}-apiserver:{{ $.Values.apiserver.service.port }}' - - name: CLEARML_WEB_HOST - value: 'http://{{ include "clearml.fullname" $ }}-webserver:{{ $.Values.webserver.service.port }}' - - name: CLEARML_FILES_HOST - value: 'http://{{ include "clearml.fullname" $ }}-fileserver:{{ $.Values.fileserver.service.port }}' - - name: CLEARML_AGENT_GIT_USER - value: {{ .clearmlGitUser}} - - name: CLEARML_AGENT_GIT_PASS - value: {{ .clearmlGitPassword}} - - name: AWS_ACCESS_KEY_ID - value: {{ .awsAccessKeyId}} - - name: AWS_SECRET_ACCESS_KEY - value: {{ .awsSecretAccessKey}} - - name: AWS_DEFAULT_REGION - value: {{ .awsDefaultRegion}} - - name: AZURE_STORAGE_ACCOUNT - value: {{ .azureStorageAccount}} - - name: AZURE_STORAGE_KEY - value: {{ .azureStorageKey}} - - name: CLEARML_API_ACCESS_KEY - valueFrom: - secretKeyRef: - name: clearml-conf - key: tests_user_key - - name: CLEARML_API_SECRET_KEY - valueFrom: - secretKeyRef: - name: clearml-conf - key: tests_user_secret - command: - - /bin/sh - - -c - - "apt-get update ; - apt-get install -y curl python3-pip git; - python3 -m pip install -U pip ; - python3 -m pip install clearml-agent{{ .agentVersion}} ; - CLEARML_AGENT_K8S_HOST_MOUNT=/root/.clearml:/root/.clearml clearml-agent daemon --queue {{ .queues}}" - {{- with .nodeSelector }} - nodeSelector: - {{- toYaml . | nindent 8 }} - {{- end }} - {{- with .affinity }} - affinity: - {{- toYaml . | nindent 8 }} - {{- end }} - {{- with .tolerations }} - tolerations: - {{- toYaml . | nindent 8 }} - {{- end }} -{{- end }} diff --git a/src/_nebari/stages/kubernetes_services/template/modules/kubernetes/services/clearml/chart/templates/deployment-agentservices.yaml b/src/_nebari/stages/kubernetes_services/template/modules/kubernetes/services/clearml/chart/templates/deployment-agentservices.yaml deleted file mode 100644 index 97fbaccc3b..0000000000 --- a/src/_nebari/stages/kubernetes_services/template/modules/kubernetes/services/clearml/chart/templates/deployment-agentservices.yaml +++ /dev/null @@ -1,100 +0,0 @@ -apiVersion: apps/v1 -kind: Deployment -metadata: - name: {{ include "clearml.fullname" . }}-agentservices - labels: - {{- include "clearml.labels" . | nindent 4 }} -spec: - replicas: {{ .Values.agentservices.replicaCount }} - selector: - matchLabels: - {{- include "clearml.selectorLabelsAgentServices" . | nindent 6 }} - template: - metadata: - {{- with .Values.agentservices.podAnnotations }} - annotations: - {{- toYaml . | nindent 8 }} - {{- end }} - labels: - {{- include "clearml.selectorLabelsAgentServices" . | nindent 8 }} - spec: - volumes: - - name: agentservices-data - persistentVolumeClaim: - claimName: {{ include "clearml.fullname" . }}-agentservices-data - initContainers: - - name: init-agentservices - image: "{{ .Values.agentservices.image.repository }}:{{ .Values.agentservices.image.tag | default .Chart.AppVersion }}" - command: - - /bin/sh - - -c - - > - set -x; - while [ $(curl -sw '%{http_code}' "http://{{ include "clearml.fullname" . }}-apiserver:{{ .Values.apiserver.service.port }}/debug.ping" -o /dev/null) -ne 200 ] ; do - echo "waiting for apiserver" ; - sleep 5 ; - done - containers: - - name: {{ .Chart.Name }} - image: "{{ .Values.agentservices.image.repository }}:{{ .Values.agentservices.image.tag | default .Chart.AppVersion }}" - imagePullPolicy: {{ .Values.agentservices.image.pullPolicy }} - env: - - name: CLEARML_HOST_IP - value: {{ .Values.agentservices.clearmlHostIp }} - - name: CLEARML_API_HOST - value: "http://{{ include "clearml.fullname" . }}-apiserver:{{ .Values.apiserver.service.port }}" - - name: CLEARML_WEB_HOST - value: {{ .Values.agentservices.clearmlWebHost }} - - name: CLEARML_FILES_HOST - value: {{ .Values.agentservices.clearmlFilesHost }} - - name: CLEARML_AGENT_GIT_USER - value: {{ .Values.agentservices.clearmlGitUser }} - - name: CLEARML_AGENT_GIT_PASS - value: {{ .Values.agentservices.clearmlGitPassword }} - - name: CLEARML_AGENT_UPDATE_VERSION - value: {{ .Values.agentservices.agentVersion }} - - name: CLEARML_AGENT_DEFAULT_BASE_DOCKER - value: {{ .Values.agentservices.defaultBaseDocker }} - - name: AWS_ACCESS_KEY_ID - value: {{ .Values.agentservices.awsAccessKeyId }} - - name: AWS_SECRET_ACCESS_KEY - value: {{ .Values.agentservices.awsSecretAccessKey }} - - name: AWS_DEFAULT_REGION - value: {{ .Values.agentservices.awsDefaultRegion }} - - name: AZURE_STORAGE_ACCOUNT - value: {{ .Values.agentservices.azureStorageAccount }} - - name: AZURE_STORAGE_KEY - value: {{ .Values.agentservices.azureStorageKey }} - - name: GOOGLE_APPLICATION_CREDENTIALS - value: {{ .Values.agentservices.googleCredentials }} - - name: CLEARML_WORKER_ID - value: {{ .Values.agentservices.clearmlWorkerId }} - - name: CLEARML_API_ACCESS_KEY - valueFrom: - secretKeyRef: - name: clearml-conf - key: tests_user_key - - name: CLEARML_API_SECRET_KEY - valueFrom: - secretKeyRef: - name: clearml-conf - key: tests_user_secret - args: - - agentservices - volumeMounts: - - name: agentservices-data - mountPath: /root/.clearml - resources: - {{- toYaml .Values.agentservices.resources | nindent 12 }} - {{- with .Values.agentservices.nodeSelector }} - nodeSelector: - {{- toYaml . | nindent 8 }} - {{- end }} - {{- with .Values.agentservices.affinity }} - affinity: - {{- toYaml . | nindent 8 }} - {{- end }} - {{- with .Values.agentservices.tolerations }} - tolerations: - {{- toYaml . | nindent 8 }} - {{- end }} diff --git a/src/_nebari/stages/kubernetes_services/template/modules/kubernetes/services/clearml/chart/templates/deployment-apiserver.yaml b/src/_nebari/stages/kubernetes_services/template/modules/kubernetes/services/clearml/chart/templates/deployment-apiserver.yaml deleted file mode 100644 index a4c30f4aac..0000000000 --- a/src/_nebari/stages/kubernetes_services/template/modules/kubernetes/services/clearml/chart/templates/deployment-apiserver.yaml +++ /dev/null @@ -1,122 +0,0 @@ -apiVersion: apps/v1 -kind: Deployment -metadata: - name: {{ include "clearml.fullname" . }}-apiserver - labels: - {{- include "clearml.labels" . | nindent 4 }} -spec: - replicas: {{ .Values.apiserver.replicaCount }} - selector: - matchLabels: - {{- include "clearml.selectorLabelsApiServer" . | nindent 6 }} - template: - metadata: - {{- with .Values.apiserver.podAnnotations }} - annotations: - {{- toYaml . | nindent 8 }} - {{- end }} - labels: - {{- include "clearml.selectorLabelsApiServer" . | nindent 8 }} - spec: - {{- if .Values.apiserver.storage.enableConfigVolume }} - volumes: - - name: apiserver-config - persistentVolumeClaim: - claimName: {{ include "clearml.fullname" . }}-apiserver-config - {{- end }} - containers: - - name: {{ .Chart.Name }} - image: "{{ .Values.apiserver.image.repository }}:{{ .Values.apiserver.image.tag | default .Chart.AppVersion }}" - imagePullPolicy: {{ .Values.apiserver.image.pullPolicy }} - ports: - - name: http - containerPort: 8008 - protocol: TCP - env: - - name: CLEARML_ELASTIC_SERVICE_HOST - value: "{{ tpl .Values.elasticsearch.name . }}" - - name: CLEARML_ELASTIC_SERVICE_PORT - value: "{{ .Values.elasticsearch.httpPort }}" - - name: CLEARML_MONGODB_SERVICE_HOST - value: "{{ tpl .Values.mongodb.service.name . }}" - - name: CLEARML_MONGODB_SERVICE_PORT - value: "{{ .Values.mongodb.service.port }}" - - name: CLEARML_REDIS_SERVICE_HOST - value: "{{ tpl .Values.redis.master.name . }}" - - name: CLEARML_REDIS_SERVICE_PORT - value: "{{ .Values.redis.master.port }}" - - name: CLEARML__APISERVER__PRE_POPULATE__ENABLED - value: "{{ .Values.apiserver.prepopulateEnabled }}" - - name: CLEARML__APISERVER__PRE_POPULATE__ZIP_FILES - value: "{{ .Values.apiserver.prepopulateZipFiles }}" - - name: CLEARML_SERVER_DEPLOYMENT_TYPE - value: "helm-cloud" - - name: CLEARML_CONFIG_DIR - value: /opt/clearml/config - - name: CLEARML__APISERVER__DEFAULT_COMPANY - value: {{ .Values.clearml.defaultCompany }} - - name: CLEARML__SECURE__HTTP__SESSION_SECRET__APISERVER - valueFrom: - secretKeyRef: - name: clearml-conf - key: http_session - - name: CLEARML__SECURE__AUTH__TOKEN_SECRET - valueFrom: - secretKeyRef: - name: clearml-conf - key: auth_token - - name: CLEARML__SECURE__CREDENTIALS__APISERVER__USER_KEY - valueFrom: - secretKeyRef: - name: clearml-conf - key: apiserver_key - - name: CLEARML__SECURE__CREDENTIALS__APISERVER__USER_SECRET - valueFrom: - secretKeyRef: - name: clearml-conf - key: apiserver_secret - - name: CLEARML__SECURE__CREDENTIALS__TESTS__USER_KEY - valueFrom: - secretKeyRef: - name: clearml-conf - key: tests_user_key - - name: CLEARML__SECURE__CREDENTIALS__TESTS__USER_SECRET - valueFrom: - secretKeyRef: - name: clearml-conf - key: tests_user_secret - {{- if .Values.apiserver.extraEnvs }} - {{ toYaml .Values.apiserver.extraEnvs | nindent 10 }} - {{- end }} - args: - - apiserver - livenessProbe: - initialDelaySeconds: {{ .Values.apiserver.livenessDelay }} - httpGet: - path: /debug.ping - port: 8008 - readinessProbe: - initialDelaySeconds: {{ .Values.apiserver.readinessDelay }} - failureThreshold: 8 - httpGet: - path: /debug.ping - port: 8008 - {{- if .Values.apiserver.storage.enableConfigVolume }} - volumeMounts: - - name: apiserver-config - mountPath: /opt/clearml/config - {{- end }} - resources: - {{- toYaml .Values.apiserver.resources | nindent 12 }} - {{- with .Values.apiserver.nodeSelector }} - nodeSelector: - {{- toYaml . | nindent 8 }} - {{- end }} - {{- with .Values.apiserver.affinity }} - affinity: - {{- toYaml . | nindent 8 }} - {{- end }} - {{- with .Values.apiserver.tolerations }} - tolerations: - {{- toYaml . | nindent 8 }} - {{- end }} diff --git a/src/_nebari/stages/kubernetes_services/template/modules/kubernetes/services/clearml/chart/templates/deployment-elastic.yaml b/src/_nebari/stages/kubernetes_services/template/modules/kubernetes/services/clearml/chart/templates/deployment-elastic.yaml deleted file mode 100644 index 0ea328f3f4..0000000000 --- a/src/_nebari/stages/kubernetes_services/template/modules/kubernetes/services/clearml/chart/templates/deployment-elastic.yaml +++ /dev/null @@ -1,264 +0,0 @@ -# Source: clearml-server-cloud-ready/charts/elasticsearch/templates/poddisruptionbudget.yaml -apiVersion: policy/v1beta1 -kind: PodDisruptionBudget -metadata: - name: "clearml-elastic-master-pdb" -spec: - maxUnavailable: 1 - selector: - matchLabels: - app: "clearml-elastic-master" ---- -# Source: clearml-server-cloud-ready/charts/elasticsearch/templates/configmap.yaml -apiVersion: v1 -kind: ConfigMap -metadata: - name: clearml-elastic-master-config - labels: - heritage: "Helm" - release: "clearml-server" - chart: "elasticsearch" - app: "clearml-elastic-master" -data: - elasticsearch.yml: | - xpack.security.enabled: false ---- -# Source: clearml-server-cloud-ready/charts/elasticsearch/templates/service.yaml -kind: Service -apiVersion: v1 -metadata: - name: clearml-elastic-master - labels: - heritage: "Helm" - release: "clearml-server" - chart: "elasticsearch" - app: "clearml-elastic-master" - annotations: - {} -spec: - type: ClusterIP - selector: - heritage: "Helm" - chart: "elasticsearch" - release: "clearml-server" - app: "clearml-elastic-master" - ports: - - name: http - protocol: TCP - port: 9200 - - name: transport - protocol: TCP - port: 9300 ---- -# Source: clearml-server-cloud-ready/charts/elasticsearch/templates/service.yaml -kind: Service -apiVersion: v1 -metadata: - name: clearml-elastic-master-headless - labels: - heritage: "Helm" - release: "clearml-server" - chart: "elasticsearch" - app: "clearml-elastic-master" - annotations: - service.alpha.kubernetes.io/tolerate-unready-endpoints: "true" -spec: - clusterIP: None # This is needed for statefulset hostnames like elasticsearch-0 to resolve - # Create endpoints also if the related pod isn't ready - publishNotReadyAddresses: true - selector: - app: "clearml-elastic-master" - ports: - - name: http - port: 9200 - - name: transport - port: 9300 ---- -# Source: clearml-server-cloud-ready/charts/elasticsearch/templates/statefulset.yaml -apiVersion: apps/v1 -kind: StatefulSet -metadata: - name: clearml-elastic-master - labels: - heritage: "Helm" - release: "clearml-server" - chart: "elasticsearch" - app: "clearml-elastic-master" - annotations: - esMajorVersion: "7" -spec: - serviceName: clearml-elastic-master-headless - selector: - matchLabels: - app: "clearml-elastic-master" - replicas: 1 - podManagementPolicy: Parallel - updateStrategy: - type: RollingUpdate - volumeClaimTemplates: - - metadata: - name: clearml-elastic-master - spec: - accessModes: - - ReadWriteOnce - resources: - requests: - storage: 50Gi - template: - metadata: - name: "clearml-elastic-master" - labels: - heritage: "Helm" - release: "clearml-server" - chart: "elasticsearch" - app: "clearml-elastic-master" - annotations: - - configchecksum: 74bf3a32b86b711225b81f59050eb46d9c7e332399326f6fd4ee8627b4febfa - spec: - {{- with .Values.elasticsearch.nodeSelector }} - nodeSelector: - {{- toYaml . | nindent 8 }} - {{- end }} - securityContext: - fsGroup: 1000 - runAsUser: 1000 - affinity: - podAntiAffinity: - requiredDuringSchedulingIgnoredDuringExecution: - - labelSelector: - matchExpressions: - - key: app - operator: In - values: - - "clearml-elastic-master" - topologyKey: kubernetes.io/hostname - terminationGracePeriodSeconds: 120 - volumes: - - name: esconfig - configMap: - name: clearml-elastic-master-config - initContainers: - - name: configure-sysctl - securityContext: - runAsUser: 0 - privileged: true - image: "docker.elastic.co/elasticsearch/elasticsearch:7.6.2" - imagePullPolicy: "IfNotPresent" - command: ["sysctl", "-w", "vm.max_map_count=262144"] - resources: - {} - - containers: - - name: "elasticsearch" - securityContext: - capabilities: - # drop: - # - ALL - add: - - IPC_LOCK - - SYS_RESOURCE - # runAsNonRoot: true - runAsUser: 0 - # privileged: true - # runAsUser: 1000 - image: {{ .Values.elasticsearch.image }} - imagePullPolicy: "Always" #"IfNotPresent" - readinessProbe: - exec: - command: - - sh - - -c - - | - #!/usr/bin/env bash -e - # If the node is starting up wait for the cluster to be ready (request params: 'wait_for_status=yellow&timeout=1s' ) - # Once it has started only check that the node itself is responding - START_FILE=/tmp/.es_start_file - - http () { - local path="${1}" - if [ -n "${ELASTIC_USERNAME}" ] && [ -n "${ELASTIC_PASSWORD}" ]; then - BASIC_AUTH="-u ${ELASTIC_USERNAME}:${ELASTIC_PASSWORD}" - else - BASIC_AUTH='' - fi - curl -XGET -s -k --fail ${BASIC_AUTH} http://127.0.0.1:9200${path} - } - - if [ -f "${START_FILE}" ]; then - echo 'Elasticsearch is already running, lets check the node is healthy and there are master nodes available' - http "/_cluster/health?timeout=0s" - else - echo 'Waiting for elasticsearch cluster to become ready (request params: "wait_for_status=yellow&timeout=1s" )' - if http "/_cluster/health?wait_for_status=yellow&timeout=1s" ; then - touch ${START_FILE} - exit 0 - else - echo 'Cluster is not yet ready (request params: "wait_for_status=yellow&timeout=1s" )' - exit 1 - fi - fi - failureThreshold: 3 - initialDelaySeconds: 10 - periodSeconds: 10 - successThreshold: 3 - timeoutSeconds: 5 - ports: - - name: http - containerPort: 9200 - - name: transport - containerPort: 9300 - resources: - limits: - cpu: 1000m - memory: 4Gi - requests: - cpu: 1000m - memory: 4Gi - env: - - name: node.name - valueFrom: - fieldRef: - fieldPath: metadata.name - - name: cluster.initial_master_nodes - value: "clearml-elastic-master-0," - - name: discovery.seed_hosts - value: "clearml-elastic-master-headless" - - name: cluster.name - value: "clearml-elastic" - - name: network.host - value: "0.0.0.0" - - name: ES_JAVA_OPTS - value: "-Xmx2g -Xms2g" - - name: node.data - value: "true" - - name: node.ingest - value: "true" - - name: node.master - value: "true" - - name: bootstrap.memory_lock - value: "true" - - name: cluster.routing.allocation.node_initial_primaries_recoveries - value: "500" - - name: cluster.routing.allocation.disk.watermark.low - value: 500mb - - name: cluster.routing.allocation.disk.watermark.high - value: 500mb - - name: cluster.routing.allocation.disk.watermark.flood_stage - value: 500mb - - name: http.compression_level - value: "7" - - name: reindex.remote.whitelist - value: '*.*' - - name: xpack.monitoring.enabled - value: "false" - - name: xpack.security.enabled - value: "false" - volumeMounts: - - name: "clearml-elastic-master" - mountPath: /usr/share/elasticsearch/data - - - name: esconfig - mountPath: /usr/share/elasticsearch/config/elasticsearch.yml - subPath: elasticsearch.yml ---- diff --git a/src/_nebari/stages/kubernetes_services/template/modules/kubernetes/services/clearml/chart/templates/deployment-fileserver.yaml b/src/_nebari/stages/kubernetes_services/template/modules/kubernetes/services/clearml/chart/templates/deployment-fileserver.yaml deleted file mode 100644 index f3cf04f02a..0000000000 --- a/src/_nebari/stages/kubernetes_services/template/modules/kubernetes/services/clearml/chart/templates/deployment-fileserver.yaml +++ /dev/null @@ -1,69 +0,0 @@ -apiVersion: apps/v1 -kind: Deployment -metadata: - name: {{ include "clearml.fullname" . }}-fileserver - labels: - {{- include "clearml.labels" . | nindent 4 }} -spec: - replicas: {{ .Values.fileserver.replicaCount }} - selector: - matchLabels: - {{- include "clearml.selectorLabelsFileServer" . | nindent 6 }} - template: - metadata: - {{- with .Values.fileserver.podAnnotations }} - annotations: - {{- toYaml . | nindent 8 }} - {{- end }} - labels: - {{- include "clearml.selectorLabelsFileServer" . | nindent 8 }} - spec: - volumes: - - name: fileserver-data - persistentVolumeClaim: - claimName: {{ include "clearml.fullname" . }}-fileserver-data - containers: - - name: {{ .Chart.Name }} - image: "{{ .Values.fileserver.image.repository }}:{{ .Values.fileserver.image.tag | default .Chart.AppVersion }}" - imagePullPolicy: {{ .Values.fileserver.image.pullPolicy }} - ports: - - name: http - containerPort: 8081 - protocol: TCP - env: - - name: CLEARML_CONFIG_DIR - value: /opt/clearml/config - {{- if .Values.fileserver.extraEnvs }} - {{ toYaml .Values.fileserver.extraEnvs | nindent 10 }} - {{- end }} - args: - - fileserver - livenessProbe: - exec: - command: - - curl - - -X OPTIONS - - http://localhost:8081/ - readinessProbe: - exec: - command: - - curl - - -X OPTIONS - - http://localhost:8081/ - volumeMounts: - - name: fileserver-data - mountPath: /mnt/fileserver - resources: - {{- toYaml .Values.fileserver.resources | nindent 12 }} - {{- with .Values.fileserver.nodeSelector }} - nodeSelector: - {{- toYaml . | nindent 8 }} - {{- end }} - {{- with .Values.fileserver.affinity }} - affinity: - {{- toYaml . | nindent 8 }} - {{- end }} - {{- with .Values.fileserver.tolerations }} - tolerations: - {{- toYaml . | nindent 8 }} - {{- end }} diff --git a/src/_nebari/stages/kubernetes_services/template/modules/kubernetes/services/clearml/chart/templates/deployment-webserver.yaml b/src/_nebari/stages/kubernetes_services/template/modules/kubernetes/services/clearml/chart/templates/deployment-webserver.yaml deleted file mode 100644 index 57c738129d..0000000000 --- a/src/_nebari/stages/kubernetes_services/template/modules/kubernetes/services/clearml/chart/templates/deployment-webserver.yaml +++ /dev/null @@ -1,64 +0,0 @@ -apiVersion: apps/v1 -kind: Deployment -metadata: - name: {{ include "clearml.fullname" . }}-webserver - labels: - {{- include "clearml.labels" . | nindent 4 }} -spec: - replicas: {{ .Values.webserver.replicaCount }} - selector: - matchLabels: - {{- include "clearml.selectorLabelsWebServer" . | nindent 6 }} - template: - metadata: - {{- with .Values.webserver.podAnnotations }} - annotations: - {{- toYaml . | nindent 8 }} - {{- end }} - labels: - {{- include "clearml.selectorLabelsWebServer" . | nindent 8 }} - spec: - containers: - - name: {{ .Chart.Name }} - image: "{{ .Values.webserver.image.repository }}:{{ .Values.webserver.image.tag | default .Chart.AppVersion }}" - imagePullPolicy: {{ .Values.webserver.image.pullPolicy }} - ports: - - name: http - containerPort: 80 - protocol: TCP - livenessProbe: - exec: - command: - - curl - - -X OPTIONS - - http://0.0.0.0:80/ - readinessProbe: - exec: - command: - - curl - - -X OPTIONS - - http://0.0.0.0:80/ - env: - - name: NGINX_APISERVER_ADDRESS - value: "http://{{ include "clearml.fullname" . }}-apiserver:{{ .Values.apiserver.service.port }}" - - name: NGINX_FILESERVER_ADDRESS - value: "http://{{ include "clearml.fullname" . }}-fileserver:{{ .Values.fileserver.service.port }}" - {{- if .Values.webserver.extraEnvs }} - {{ toYaml .Values.webserver.extraEnvs | nindent 10 }} - {{- end }} - args: - - webserver - resources: - {{- toYaml .Values.webserver.resources | nindent 12 }} - {{- with .Values.webserver.nodeSelector }} - nodeSelector: - {{- toYaml . | nindent 8 }} - {{- end }} - {{- with .Values.webserver.affinity }} - affinity: - {{- toYaml . | nindent 8 }} - {{- end }} - {{- with .Values.webserver.tolerations }} - tolerations: - {{- toYaml . | nindent 8 }} - {{- end }} diff --git a/src/_nebari/stages/kubernetes_services/template/modules/kubernetes/services/clearml/chart/templates/ingress.yaml b/src/_nebari/stages/kubernetes_services/template/modules/kubernetes/services/clearml/chart/templates/ingress.yaml deleted file mode 100644 index dcdac14779..0000000000 --- a/src/_nebari/stages/kubernetes_services/template/modules/kubernetes/services/clearml/chart/templates/ingress.yaml +++ /dev/null @@ -1,48 +0,0 @@ -{{- if .Values.ingress.enabled -}} -{{- $fullName := include "clearml.fullname" . -}} -{{- if semverCompare ">=1.14-0" .Capabilities.KubeVersion.GitVersion -}} -apiVersion: networking.k8s.io/v1beta1 -{{- else -}} -apiVersion: extensions/v1beta1 -{{- end }} -kind: Ingress -metadata: - name: {{ $fullName }} - labels: - {{- include "clearml.labels" . | nindent 4 }} - {{- with .Values.ingress.annotations }} - annotations: - {{- toYaml . | nindent 4 }} - {{- end }} -spec: - {{- if .Values.ingress.tls.secretName }} - tls: - - hosts: - - "app.{{ .Values.ingress.host }}" - - "files.{{ .Values.ingress.host }}" - - "api.{{ .Values.ingress.host }}" - secretName: {{ .Values.ingress.tls.secretName }} - {{- end }} - rules: - - host: "app.{{ .Values.ingress.host }}" - http: - paths: - - path: "/*" - backend: - serviceName: {{ include "clearml.fullname" . }}-webserver - servicePort: {{ .Values.webserver.service.port }} - - host: "api.{{ .Values.ingress.host }}" - http: - paths: - - path: "/*" - backend: - serviceName: {{ include "clearml.fullname" . }}-apiserver - servicePort: {{ .Values.apiserver.service.port }} - - host: "files.{{ .Values.ingress.host }}" - http: - paths: - - path: "/*" - backend: - serviceName: {{ include "clearml.fullname" . }}-fileserver - servicePort: {{ .Values.fileserver.service.port }} -{{- end }} diff --git a/src/_nebari/stages/kubernetes_services/template/modules/kubernetes/services/clearml/chart/templates/pvc-agentservices.yaml b/src/_nebari/stages/kubernetes_services/template/modules/kubernetes/services/clearml/chart/templates/pvc-agentservices.yaml deleted file mode 100644 index 47dc2f4db3..0000000000 --- a/src/_nebari/stages/kubernetes_services/template/modules/kubernetes/services/clearml/chart/templates/pvc-agentservices.yaml +++ /dev/null @@ -1,13 +0,0 @@ -kind: PersistentVolumeClaim -apiVersion: v1 -metadata: - name: {{ include "clearml.fullname" . }}-agentservices-data - labels: - {{- include "clearml.labels" . | nindent 4 }} -spec: - accessModes: - - ReadWriteOnce - resources: - requests: - storage: {{ .Values.agentservices.storage.data.size | quote }} - storageClassName: {{ .Values.agentservices.storage.data.class | quote }} diff --git a/src/_nebari/stages/kubernetes_services/template/modules/kubernetes/services/clearml/chart/templates/pvc-apiserver.yaml b/src/_nebari/stages/kubernetes_services/template/modules/kubernetes/services/clearml/chart/templates/pvc-apiserver.yaml deleted file mode 100644 index 80f226be98..0000000000 --- a/src/_nebari/stages/kubernetes_services/template/modules/kubernetes/services/clearml/chart/templates/pvc-apiserver.yaml +++ /dev/null @@ -1,15 +0,0 @@ -{{- if .Values.apiserver.storage.enableConfigVolume }} -kind: PersistentVolumeClaim -apiVersion: v1 -metadata: - name: {{ include "clearml.fullname" . }}-apiserver-config - labels: - {{- include "clearml.labels" . | nindent 4 }} -spec: - accessModes: - - ReadWriteOnce - resources: - requests: - storage: {{ .Values.apiserver.storage.config.size | quote }} - storageClassName: {{ .Values.apiserver.storage.config.class | quote }} -{{- end }} diff --git a/src/_nebari/stages/kubernetes_services/template/modules/kubernetes/services/clearml/chart/templates/pvc-fileserver.yaml b/src/_nebari/stages/kubernetes_services/template/modules/kubernetes/services/clearml/chart/templates/pvc-fileserver.yaml deleted file mode 100644 index 293e3577f0..0000000000 --- a/src/_nebari/stages/kubernetes_services/template/modules/kubernetes/services/clearml/chart/templates/pvc-fileserver.yaml +++ /dev/null @@ -1,13 +0,0 @@ -kind: PersistentVolumeClaim -apiVersion: v1 -metadata: - name: {{ include "clearml.fullname" . }}-fileserver-data - labels: - {{- include "clearml.labels" . | nindent 4 }} -spec: - accessModes: - - ReadWriteOnce - resources: - requests: - storage: {{ .Values.fileserver.storage.data.size | quote }} - storageClassName: {{ .Values.fileserver.storage.data.class | quote }} diff --git a/src/_nebari/stages/kubernetes_services/template/modules/kubernetes/services/clearml/chart/templates/secret-agent.yaml b/src/_nebari/stages/kubernetes_services/template/modules/kubernetes/services/clearml/chart/templates/secret-agent.yaml deleted file mode 100644 index 720bfe47b5..0000000000 --- a/src/_nebari/stages/kubernetes_services/template/modules/kubernetes/services/clearml/chart/templates/secret-agent.yaml +++ /dev/null @@ -1,11 +0,0 @@ -{{- range .Values.agentGroups }} ---- -{{ if .clearmlConfig }} -apiVersion: v1 -kind: Secret -metadata: - name: {{ .name }}-conf -data: - clearml.conf: {{ .clearmlConfig | b64enc }} -{{ end }} -{{- end }} diff --git a/src/_nebari/stages/kubernetes_services/template/modules/kubernetes/services/clearml/chart/templates/secrets.yaml b/src/_nebari/stages/kubernetes_services/template/modules/kubernetes/services/clearml/chart/templates/secrets.yaml deleted file mode 100644 index 2fde1e7ef8..0000000000 --- a/src/_nebari/stages/kubernetes_services/template/modules/kubernetes/services/clearml/chart/templates/secrets.yaml +++ /dev/null @@ -1,11 +0,0 @@ -apiVersion: v1 -kind: Secret -metadata: - name: clearml-conf -data: - apiserver_key: NTQ0MkYzNDQzTUpNT1JXWkEzWkg= - apiserver_secret: QnhhcElSbzlaSU5pOHgyNUNSeHo4V2RtcjJwUWp6dVdWQjRQTkFTWnFDdFR5V2dXVlE= - http_session: OVR3MjBSYmhKMWJMQmlIRU9XWHZocGxLR1ViVGdMekF0d0ZOMm9MUXZXd1MwdVJwRDU= - auth_token: MVNDZjBvdjNObTU0NFRkMm9aMGdYU3JzTng1WGhNV2RWbEt6MXRPZ2N4MTU4YkQ1UlY= - tests_user_key: RU5QMzlFUU00U0xBQ0dENUZYQjc= - tests_user_secret: bFBjbTBpbWJjQlo4bXdnTzd0cGFkdXRpUzNnbkpEMDV4OWo3YWZ3WFBTMzVJS2JwaVE= diff --git a/src/_nebari/stages/kubernetes_services/template/modules/kubernetes/services/clearml/chart/templates/service-apiserver.yaml b/src/_nebari/stages/kubernetes_services/template/modules/kubernetes/services/clearml/chart/templates/service-apiserver.yaml deleted file mode 100644 index 38683605fe..0000000000 --- a/src/_nebari/stages/kubernetes_services/template/modules/kubernetes/services/clearml/chart/templates/service-apiserver.yaml +++ /dev/null @@ -1,15 +0,0 @@ -apiVersion: v1 -kind: Service -metadata: - name: {{ include "clearml.fullname" . }}-apiserver - labels: - {{- include "clearml.labels" . | nindent 4 }} -spec: - type: {{ .Values.apiserver.service.type }} - ports: - - port: {{ .Values.apiserver.service.port }} - targetPort: {{ .Values.apiserver.service.port }} - nodePort: 30008 - protocol: TCP - selector: - {{- include "clearml.selectorLabelsApiServer" . | nindent 4 }} diff --git a/src/_nebari/stages/kubernetes_services/template/modules/kubernetes/services/clearml/chart/templates/service-fileserver.yaml b/src/_nebari/stages/kubernetes_services/template/modules/kubernetes/services/clearml/chart/templates/service-fileserver.yaml deleted file mode 100644 index 6f96e276c2..0000000000 --- a/src/_nebari/stages/kubernetes_services/template/modules/kubernetes/services/clearml/chart/templates/service-fileserver.yaml +++ /dev/null @@ -1,15 +0,0 @@ -apiVersion: v1 -kind: Service -metadata: - name: {{ include "clearml.fullname" . }}-fileserver - labels: - {{- include "clearml.labels" . | nindent 4 }} -spec: - type: {{ .Values.fileserver.service.type }} - ports: - - port: {{ .Values.fileserver.service.port }} - targetPort: {{ .Values.fileserver.service.port }} - nodePort: 30081 - protocol: TCP - selector: - {{- include "clearml.selectorLabelsFileServer" . | nindent 4 }} diff --git a/src/_nebari/stages/kubernetes_services/template/modules/kubernetes/services/clearml/chart/templates/service-webserver.yaml b/src/_nebari/stages/kubernetes_services/template/modules/kubernetes/services/clearml/chart/templates/service-webserver.yaml deleted file mode 100644 index 5ed6e8a052..0000000000 --- a/src/_nebari/stages/kubernetes_services/template/modules/kubernetes/services/clearml/chart/templates/service-webserver.yaml +++ /dev/null @@ -1,15 +0,0 @@ -apiVersion: v1 -kind: Service -metadata: - name: {{ include "clearml.fullname" . }}-webserver - labels: - {{- include "clearml.labels" . | nindent 4 }} -spec: - type: {{ .Values.webserver.service.type }} - ports: - - port: {{ .Values.webserver.service.port }} - targetPort: {{ .Values.webserver.service.port }} - nodePort: 30080 - protocol: TCP - selector: - {{- include "clearml.selectorLabelsWebServer" . | nindent 4 }} diff --git a/src/_nebari/stages/kubernetes_services/template/modules/kubernetes/services/clearml/chart/values.yaml b/src/_nebari/stages/kubernetes_services/template/modules/kubernetes/services/clearml/chart/values.yaml deleted file mode 100644 index 35ba5901be..0000000000 --- a/src/_nebari/stages/kubernetes_services/template/modules/kubernetes/services/clearml/chart/values.yaml +++ /dev/null @@ -1,264 +0,0 @@ -clearml: - defaultCompany: "d1bd92a3b039400cbafc60a7a5b1e52b" -ingress: - enabled: false - name: clearml-server-ingress - annotations: {} - host: "" - tls: - secretName: "" - -apiserver: - prepopulateEnabled: "true" - prepopulateZipFiles: "/opt/clearml/db-pre-populate" - prepopulateArtifactsPath: "/mnt/fileserver" - configDir: /opt/clearml/config - - service: - type: NodePort - port: 8008 - - livenessDelay: 60 - readinessDelay: 60 - - replicaCount: 1 - - image: - repository: "allegroai/clearml" - pullPolicy: IfNotPresent - tag: "1.0.2" - - extraEnvs: [] - - podAnnotations: {} - - resources: {} - # We usually recommend not to specify default resources and to leave this as a conscious - # choice for the user. This also increases chances charts run on environments with little - # resources, such as Minikube. If you do want to specify resources, uncomment the following - # lines, adjust them as necessary, and remove the curly braces after 'resources:'. - # limits: - # cpu: 100m - # memory: 128Mi - # requests: - # cpu: 100m - # memory: 128Mi - nodeSelector: - app: "clearml" - - tolerations: [] - - affinity: {} - - # Optional: used in pvc-apiserver containing optional server configuration files - storage: - enableConfigVolume: false - config: - class: "standard" - size: 1Gi - -fileserver: - service: - type: NodePort - port: 8081 - - replicaCount: 1 - - image: - repository: "allegroai/clearml" - pullPolicy: IfNotPresent - tag: "1.0.2" - - extraEnvs: [] - - podAnnotations: {} - - resources: {} - # We usually recommend not to specify default resources and to leave this as a conscious - # choice for the user. This also increases chances charts run on environments with little - # resources, such as Minikube. If you do want to specify resources, uncomment the following - # lines, adjust them as necessary, and remove the curly braces after 'resources:'. - # limits: - # cpu: 100m - # memory: 128Mi - # requests: - # cpu: 100m - # memory: 128Mi - - nodeSelector: - app: "clearml" - - tolerations: [] - - affinity: {} - - storage: - data: - class: "standard" - size: 50Gi - -webserver: - extraEnvs: [] - - service: - type: NodePort - port: 80 - - replicaCount: 1 - - image: - repository: "allegroai/clearml" - pullPolicy: IfNotPresent - tag: "1.0.2" - - podAnnotations: {} - - resources: {} - # We usually recommend not to specify default resources and to leave this as a conscious - # choice for the user. This also increases chances charts run on environments with little - # resources, such as Minikube. If you do want to specify resources, uncomment the following - # lines, adjust them as necessary, and remove the curly braces after 'resources:'. - # limits: - # cpu: 100m - # memory: 128Mi - # requests: - # cpu: 100m - # memory: 128Mi - - nodeSelector: - app: "clearml" - - tolerations: [] - - affinity: {} - -agentservices: - clearmlHostIp: null - agentVersion: "" - clearmlWebHost: null - clearmlFilesHost: null - clearmlGitUser: null - clearmlGitPassword: null - awsAccessKeyId: null - awsSecretAccessKey: null - awsDefaultRegion: null - azureStorageAccount: null - azureStorageKey: null - googleCredentials: null - clearmlWorkerId: "clearml-services" - - replicaCount: 1 - - image: - repository: "allegroai/clearml-agent-services" - pullPolicy: IfNotPresent - tag: "latest" - - extraEnvs: [] - - podAnnotations: {} - - resources: {} - # We usually recommend not to specify default resources and to leave this as a conscious - # choice for the user. This also increases chances charts run on environments with little - # resources, such as Minikube. If you do want to specify resources, uncomment the following - # lines, adjust them as necessary, and remove the curly braces after 'resources:'. - # limits: - # cpu: 100m - # memory: 128Mi - # requests: - # cpu: 100m - # memory: 128Mi - - nodeSelector: - app: "clearml" - - tolerations: [] - - affinity: {} - - storage: - data: - class: "standard" - size: 50Gi - -agentGroups: - - name: agent-group0 - replicaCount: 0 - nvidiaGpusPerAgent: 1 - agentVersion: "" # if set, it *MUST* include comparison operator (for example ">=0.16.1") - queues: "default" # multiple queues can be specified separated by a space (for example "important_jobs default") - clearmlGitUser: null - clearmlGitPassword: null - clearmlAccessKey: null - clearmlSecretKey: null - awsAccessKeyId: null - awsSecretAccessKey: null - awsDefaultRegion: null - azureStorageAccount: null - azureStorageKey: null - clearmlConfig: |- - sdk { - } - - image: - repository: "nvidia/cuda" - pullPolicy: IfNotPresent - tag: "11.0-base-ubuntu18.04" - - podAnnotations: {} - - nodeSelector: - app: "clearml" - - tolerations: [] - - affinity: {} - -redis: # configuration from https://github.com/bitnami/charts/blob/master/bitnami/redis/values.yaml - enabled: true - image: - registry: docker.io - repository: bitnami/redis - tag: 5.0.10-debian-10-r88 - usePassword: false - databaseNumber: 0 - master: - name: "{{ .Release.Name }}-redis-master" - port: 6379 - persistence: - enabled: true - accessModes: - - ReadWriteOnce - size: 5Gi - cluster: - enabled: false - -mongodb: # configuration from https://github.com/bitnami/charts/blob/master/bitnami/mongodb/values.yaml - enabled: true - image: - registry: docker.io - repository: bitnami/mongodb - tag: 3.6.21-debian-9-r71 - architecture: standalone - auth: - enabled: false - replicaCount: 1 - persistence: - enabled: true - accessModes: - - ReadWriteOnce - size: 50Gi - service: - name: "{{ .Release.Name }}-mongodb" - type: ClusterIP - port: 27017 - portName: mongo-service - -elasticsearch: - enabled: false - name: "{{ .Release.Name }}-elastic-master" - image: balast/elasticsearch:6_50 - httpPort: 9200 - nodeSelector: - app: "clearml" diff --git a/src/_nebari/stages/kubernetes_services/template/modules/kubernetes/services/clearml/ingress.tf b/src/_nebari/stages/kubernetes_services/template/modules/kubernetes/services/clearml/ingress.tf deleted file mode 100644 index 1af5f7ed2c..0000000000 --- a/src/_nebari/stages/kubernetes_services/template/modules/kubernetes/services/clearml/ingress.tf +++ /dev/null @@ -1,101 +0,0 @@ -locals { - clearml_webserver_subdomain = "app.clearml" - clearml_fileserver_subdomain = "files.clearml" - clearml_apiserver_subdomain = "api.clearml" - clearml-prefix = "clearml-clearml-server-cloud-ready" - clearml_webserver = "${local.clearml-prefix}-webserver" - clearml_fileserver = "${local.clearml-prefix}-fileserver" - clearml_apiserver = "${local.clearml-prefix}-apiserver" - - forward_auth_middleware = "traefik-forward-auth" - clearml_middleware = var.enable-forward-auth ? [ - { - name = local.forward_auth_middleware - namespace = var.namespace - } - ] : [] -} - -resource "kubernetes_manifest" "clearml-app" { - manifest = { - apiVersion = "traefik.containo.us/v1alpha1" - kind = "IngressRoute" - metadata = { - name = "clearml-app" - namespace = var.namespace - } - spec = { - entryPoints = ["websecure"] - routes = [ - { - kind = "Rule" - match = "Host(`${local.clearml_webserver_subdomain}.${var.external-url}`)" - middlewares = local.clearml_middleware - services = [ - { - name = local.clearml_webserver - port = 80 - namespace = var.namespace - } - ] - } - ] - } - } -} - -resource "kubernetes_manifest" "clearml-files" { - manifest = { - apiVersion = "traefik.containo.us/v1alpha1" - kind = "IngressRoute" - metadata = { - name = "clearml-files" - namespace = var.namespace - } - spec = { - entryPoints = ["websecure"] - routes = [ - { - kind = "Rule" - match = "Host(`${local.clearml_fileserver_subdomain}.${var.external-url}`)" - middlewares = local.clearml_middleware - services = [ - { - name = local.clearml_fileserver - port = 8081 - namespace = var.namespace - } - ] - } - ] - } - } -} - -resource "kubernetes_manifest" "clearml-api" { - manifest = { - apiVersion = "traefik.containo.us/v1alpha1" - kind = "IngressRoute" - metadata = { - name = "clearml-api" - namespace = var.namespace - } - spec = { - entryPoints = ["websecure"] - routes = [ - { - kind = "Rule" - match = "Host(`${local.clearml_apiserver_subdomain}.${var.external-url}`)" - middlewares = local.clearml_middleware - services = [ - { - name = local.clearml_apiserver - port = 8008 - namespace = var.namespace - } - ] - } - ] - } - } -} diff --git a/src/_nebari/stages/kubernetes_services/template/modules/kubernetes/services/clearml/main.tf b/src/_nebari/stages/kubernetes_services/template/modules/kubernetes/services/clearml/main.tf deleted file mode 100644 index c14f831f2f..0000000000 --- a/src/_nebari/stages/kubernetes_services/template/modules/kubernetes/services/clearml/main.tf +++ /dev/null @@ -1,58 +0,0 @@ -resource "helm_release" "clearml" { - name = "clearml" - namespace = var.namespace - chart = "${path.module}/chart" - dependency_update = true - values = concat([ - file("${path.module}/chart/values.yaml") - ], var.overrides) - - dynamic "set" { - for_each = var.node_selector - content { - name = "apiserver.nodeSelector.${set.key}" - value = set.value - } - } - - dynamic "set" { - for_each = var.node_selector - content { - name = "fileserver.nodeSelector.${set.key}" - value = set.value - } - } - - dynamic "set" { - for_each = var.node_selector - content { - name = "webserver.nodeSelector.${set.key}" - value = set.value - } - } - - dynamic "set" { - for_each = var.node_selector - content { - name = "agentservices.nodeSelector.${set.key}" - value = set.value - } - } - - // dynamic "set" { - // for_each = var.node_selector - // content { - // name = "agentGroups.nodeSelector.${set.key}" - // value = set.value - // } - // } - - dynamic "set" { - for_each = var.node_selector - content { - name = "elasticsearch.nodeSelector.${set.key}" - value = set.value - } - } - -} diff --git a/src/_nebari/stages/kubernetes_services/template/modules/kubernetes/services/clearml/variables.tf b/src/_nebari/stages/kubernetes_services/template/modules/kubernetes/services/clearml/variables.tf deleted file mode 100644 index 7f70e6d484..0000000000 --- a/src/_nebari/stages/kubernetes_services/template/modules/kubernetes/services/clearml/variables.tf +++ /dev/null @@ -1,34 +0,0 @@ -variable "namespace" { - description = "namespace to deploy clearml" - type = string - default = "dev" -} - -variable "node_selector" { - description = "Node to deploy on" - default = { - "app" = "clearml" - } -} - -variable "elasticsearch_image" { - description = "Elasticsearch docker image" - type = string - default = "balast/elasticsearch:6_50" -} - -variable "external-url" { - description = "External url that jupyterhub cluster is accessible" - type = string -} - -variable "enable-forward-auth" { - type = bool - default = true -} - -variable "overrides" { - description = "Clearml helm chart overrides" - type = list(string) - default = [] -} diff --git a/src/_nebari/stages/kubernetes_services/template/modules/kubernetes/services/conda-store/config/conda_store_config.py b/src/_nebari/stages/kubernetes_services/template/modules/kubernetes/services/conda-store/config/conda_store_config.py index 6f9e1089d9..6ed6232ba8 100644 --- a/src/_nebari/stages/kubernetes_services/template/modules/kubernetes/services/conda-store/config/conda_store_config.py +++ b/src/_nebari/stages/kubernetes_services/template/modules/kubernetes/services/conda-store/config/conda_store_config.py @@ -132,12 +132,13 @@ async def authenticate(self, request): namespaces.add(group_name) role_bindings[f"{group_name}/*"] = roles - conda_store = get_conda_store(request) - for namespace in namespaces: - _namespace = api.get_namespace(conda_store.db, name=namespace) - if _namespace is None: - conda_store.db.add(orm.Namespace(name=namespace)) - conda_store.db.commit() + conda_store = await get_conda_store(request) + with conda_store.session_factory() as db: + for namespace in namespaces: + _namespace = api.get_namespace(db, name=namespace) + if _namespace is None: + db.add(orm.Namespace(name=namespace)) + db.commit() return schema.AuthenticationToken( primary_namespace=username, diff --git a/src/_nebari/stages/kubernetes_services/template/modules/kubernetes/services/jupyterhub/files/jupyter/jupyter_notebook_config.py.tpl b/src/_nebari/stages/kubernetes_services/template/modules/kubernetes/services/jupyterhub/files/jupyter/jupyter_notebook_config.py.tpl deleted file mode 100644 index 62270bb602..0000000000 --- a/src/_nebari/stages/kubernetes_services/template/modules/kubernetes/services/jupyterhub/files/jupyter/jupyter_notebook_config.py.tpl +++ /dev/null @@ -1,45 +0,0 @@ -# To help jupyterhub-idle-culler cull user servers, we configure the kernel manager to cull -# idle kernels that would otherwise make the user servers report themselves as active which -# is part of what jupyterhub-idle-culler considers. - -# Extra config available at: -# https://zero-to-jupyterhub.readthedocs.io/en/1.x/jupyterhub/customizing/user-management.html#culling-user-pods - -# Timeout (in seconds) in which a terminal has been inactive and ready to -# be culled. -c.TerminalManager.cull_inactive_timeout = ${terminal_cull_inactive_timeout} * 60 - -# The interval (in seconds) on which to check for terminals exceeding the -# inactive timeout value. -c.TerminalManager.cull_interval = ${terminal_cull_interval} * 60 - -# cull_idle_timeout: timeout (in seconds) after which an idle kernel is -# considered ready to be culled -c.MappingKernelManager.cull_idle_timeout = ${kernel_cull_idle_timeout} * 60 - -# cull_interval: the interval (in seconds) on which to check for idle -# kernels exceeding the cull timeout value -c.MappingKernelManager.cull_interval = ${kernel_cull_interval} * 60 - -# cull_connected: whether to consider culling kernels which have one -# or more connections -c.MappingKernelManager.cull_connected = ${kernel_cull_connected} - -# cull_busy: whether to consider culling kernels which are currently -# busy running some code -c.MappingKernelManager.cull_busy = ${kernel_cull_busy} - -# Shut down the server after N seconds with no kernels or terminals -# running and no activity. -c.NotebookApp.shutdown_no_activity_timeout = ${server_shutdown_no_activity_timeout} * 60 - -############################################################################### -# JupyterHub idle culler total timeout corresponds (approximately) to: -# max(cull_idle_timeout, cull_inactive_timeout) + shutdown_no_activity_timeout - -from argo_jupyter_scheduler.executor import ArgoExecutor -from argo_jupyter_scheduler.scheduler import ArgoScheduler - -c.Scheduler.execution_manager_class=ArgoExecutor -c.SchedulerApp.scheduler_class=ArgoScheduler -c.SchedulerApp.scheduler_class.use_conda_store_env=True diff --git a/src/_nebari/stages/kubernetes_services/template/modules/kubernetes/services/kbatch/main.tf b/src/_nebari/stages/kubernetes_services/template/modules/kubernetes/services/kbatch/main.tf deleted file mode 100644 index b00ff0d07a..0000000000 --- a/src/_nebari/stages/kubernetes_services/template/modules/kubernetes/services/kbatch/main.tf +++ /dev/null @@ -1,76 +0,0 @@ -resource "helm_release" "kbatch" { - name = "kbatch" - namespace = var.namespace - repository = "https://kbatch-dev.github.io/helm-chart" - chart = "kbatch-proxy" - version = local.kbatch_version - - values = concat([ - file("${path.module}/values.yaml"), - jsonencode({ - app = { - jupyterhub_api_token = var.jupyterhub_api_token - jupyterhub_api_url = "https://${var.external-url}/hub/api/" - extra_env = { - KBATCH_PREFIX = "" - KBATCH_JOB_EXTRA_ENV = jsonencode({ - DASK_GATEWAY__AUTH__TYPE = "jupyterhub" - DASK_GATEWAY__CLUSTER__OPTIONS__IMAGE = "${var.dask-worker-image.name}:${var.dask-worker-image.tag}" - DASK_GATEWAY__ADDRESS = "${var.dask-gateway-address}" - DASK_GATEWAY__PROXY_ADDRESS = "${var.dask-gateway-proxy-address}" - }) - } - } - image = { - tag = local.kbatch_version - } - }) - ]) - - set_sensitive { - name = "jupyterHubToken" - value = var.jupyterhub_api_token - } - - set { - name = "kbatchImage" - value = var.image - } - - set { - name = "namespace" - value = var.namespace - } - -} - -resource "kubernetes_cluster_role" "kbatch" { - metadata { - name = "${var.name}-kbatch" - } - - rule { - api_groups = ["", "batch"] - resources = ["*"] - verbs = ["get", "watch", "list", "patch", "create", "delete"] - } -} - - -resource "kubernetes_cluster_role_binding" "kbatch" { - metadata { - name = "${var.name}-kbatch" - } - - role_ref { - api_group = "rbac.authorization.k8s.io" - kind = "ClusterRole" - name = kubernetes_cluster_role.kbatch.metadata.0.name - } - subject { - kind = "ServiceAccount" - name = local.kbatch_service_account_name - namespace = var.namespace - api_group = "" - } -} diff --git a/src/_nebari/stages/kubernetes_services/template/modules/kubernetes/services/kbatch/values.yaml b/src/_nebari/stages/kubernetes_services/template/modules/kubernetes/services/kbatch/values.yaml deleted file mode 100644 index b7c2489d1f..0000000000 --- a/src/_nebari/stages/kubernetes_services/template/modules/kubernetes/services/kbatch/values.yaml +++ /dev/null @@ -1 +0,0 @@ -# https://github.com/kbatch-dev/helm-chart/blob/main/kbatch/values.yaml diff --git a/src/_nebari/stages/kubernetes_services/template/modules/kubernetes/services/kbatch/variables.tf b/src/_nebari/stages/kubernetes_services/template/modules/kubernetes/services/kbatch/variables.tf deleted file mode 100644 index 40cba6cd29..0000000000 --- a/src/_nebari/stages/kubernetes_services/template/modules/kubernetes/services/kbatch/variables.tf +++ /dev/null @@ -1,57 +0,0 @@ -variable "name" { - description = "name prefix to assign to kbatch" - type = string - default = "nebari" -} - -variable "jupyterhub_api_token" { - type = string - default = "" -} - -variable "namespace" { - type = string - default = "dev" -} - -variable "image" { - type = string - default = "" -} - -variable "node-group" { - description = "Node key value pair for bound resources" - type = object({ - key = string - value = string - }) -} - -variable "external-url" { - description = "External url that jupyterhub cluster is accessible" - type = string -} - -variable "overrides" { - description = "kbatch helm chart list of overrides" - type = list(string) - default = [] -} - -variable "dask-gateway-address" { - description = "Dask Gateway address" - type = string -} - -variable "dask-gateway-proxy-address" { - description = "Dask Gateway proxy-address" - type = string -} - -variable "dask-worker-image" { - description = "Dask worker image" - type = object({ - name = string - tag = string - }) -} diff --git a/src/_nebari/stages/kubernetes_services/template/modules/kubernetes/services/kbatch/versions.tf b/src/_nebari/stages/kubernetes_services/template/modules/kubernetes/services/kbatch/versions.tf deleted file mode 100644 index 268d9c72d9..0000000000 --- a/src/_nebari/stages/kubernetes_services/template/modules/kubernetes/services/kbatch/versions.tf +++ /dev/null @@ -1,18 +0,0 @@ -terraform { - required_providers { - helm = { - source = "hashicorp/helm" - version = "2.1.2" - } - kubernetes = { - source = "hashicorp/kubernetes" - version = "2.20.0" - } - } - required_version = ">= 1.0" -} - -locals { - kbatch_service_account_name = "kbatch-kbatch-proxy" - kbatch_version = "0.4.2" -} diff --git a/src/_nebari/stages/kubernetes_services/template/modules/kubernetes/services/prefect/chart/.helmignore b/src/_nebari/stages/kubernetes_services/template/modules/kubernetes/services/prefect/chart/.helmignore deleted file mode 100644 index 0e8a0eb36f..0000000000 --- a/src/_nebari/stages/kubernetes_services/template/modules/kubernetes/services/prefect/chart/.helmignore +++ /dev/null @@ -1,23 +0,0 @@ -# Patterns to ignore when building packages. -# This supports shell glob matching, relative path matching, and -# negation (prefixed with !). Only one pattern per line. -.DS_Store -# Common VCS dirs -.git/ -.gitignore -.bzr/ -.bzrignore -.hg/ -.hgignore -.svn/ -# Common backup files -*.swp -*.bak -*.tmp -*.orig -*~ -# Various IDEs -.project -.idea/ -*.tmproj -.vscode/ diff --git a/src/_nebari/stages/kubernetes_services/template/modules/kubernetes/services/prefect/chart/Chart.yaml b/src/_nebari/stages/kubernetes_services/template/modules/kubernetes/services/prefect/chart/Chart.yaml deleted file mode 100644 index 45d157bb49..0000000000 --- a/src/_nebari/stages/kubernetes_services/template/modules/kubernetes/services/prefect/chart/Chart.yaml +++ /dev/null @@ -1,23 +0,0 @@ -apiVersion: v2 -name: prefect -description: A Helm chart for deploying prefect - -# A chart can be either an 'application' or a 'library' chart. -# -# Application charts are a collection of templates that can be packaged into versioned archives -# to be deployed. -# -# Library charts provide useful utilities or functions for the chart developer. They're included as -# a dependency of application charts to inject those utilities and functions into the rendering -# pipeline. Library charts do not define any templates and therefore cannot be deployed. -type: application - -# This is the chart version. This version number should be incremented each time you make changes -# to the chart and its templates, including the app version. -# Versions are expected to follow Semantic Versioning (https://semver.org/) -version: 0.1.0 - -# This is the version number of the application being deployed. This version number should be -# incremented each time you make changes to the application. Versions are not expected to -# follow Semantic Versioning. They should reflect the version the application is using. -appVersion: 0.1.0 diff --git a/src/_nebari/stages/kubernetes_services/template/modules/kubernetes/services/prefect/chart/templates/_helpers.tpl b/src/_nebari/stages/kubernetes_services/template/modules/kubernetes/services/prefect/chart/templates/_helpers.tpl deleted file mode 100644 index 9e91be6aad..0000000000 --- a/src/_nebari/stages/kubernetes_services/template/modules/kubernetes/services/prefect/chart/templates/_helpers.tpl +++ /dev/null @@ -1,63 +0,0 @@ -{{/* vim: set filetype=mustache: */}} -{{/* -Expand the name of the chart. -*/}} -{{- define "prefect.name" -}} -{{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" }} -{{- end }} - -{{/* -Create a default fully qualified app name. -We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec). -If release name contains chart name it will be used as a full name. -*/}} -{{- define "prefect.fullname" -}} -{{- if .Values.fullnameOverride }} -{{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" }} -{{- else }} -{{- $name := default .Chart.Name .Values.nameOverride }} -{{- if contains $name .Release.Name }} -{{- .Release.Name | trunc 63 | trimSuffix "-" }} -{{- else }} -{{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" }} -{{- end }} -{{- end }} -{{- end }} - -{{/* -Create chart name and version as used by the chart label. -*/}} -{{- define "prefect.chart" -}} -{{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" }} -{{- end }} - -{{/* -Common labels -*/}} -{{- define "prefect.labels" -}} -helm.sh/chart: {{ include "prefect.chart" . }} -{{ include "prefect.selectorLabels" . }} -{{- if .Chart.AppVersion }} -app.kubernetes.io/version: {{ .Chart.AppVersion | quote }} -{{- end }} -app.kubernetes.io/managed-by: {{ .Release.Service }} -{{- end }} - -{{/* -Selector labels -*/}} -{{- define "prefect.selectorLabels" -}} -app.kubernetes.io/name: {{ include "prefect.name" . }} -app.kubernetes.io/instance: {{ .Release.Name }} -{{- end }} - -{{/* -Create the name of the service account to use -*/}} -{{- define "prefect.serviceAccountName" -}} -{{- if .Values.serviceAccount.create }} -{{- default (include "prefect.fullname" .) .Values.serviceAccount.name }} -{{- else }} -{{- default "default" .Values.serviceAccount.name }} -{{- end }} -{{- end }} diff --git a/src/_nebari/stages/kubernetes_services/template/modules/kubernetes/services/prefect/chart/templates/prefect.yaml b/src/_nebari/stages/kubernetes_services/template/modules/kubernetes/services/prefect/chart/templates/prefect.yaml deleted file mode 100644 index b6ecdb82b6..0000000000 --- a/src/_nebari/stages/kubernetes_services/template/modules/kubernetes/services/prefect/chart/templates/prefect.yaml +++ /dev/null @@ -1,111 +0,0 @@ ---- -apiVersion: apps/v1 -kind: Deployment -metadata: - labels: - app: prefect-agent - name: prefect-agent -spec: - replicas: 1 - selector: - matchLabels: - app: prefect-agent - template: - metadata: - labels: - app: prefect-agent - spec: - serviceAccountName: {{ .Values.serviceAccount }} - containers: - - args: - - prefect agent start kubernetes -e JUPYTERHUB_API_TOKEN=$JUPYTERHUB_API_TOKEN {{- range $k, $v := .Values.secretEnvVars }} {{ printf " -e %s=$%s" $k $k -}} {{ end }} - command: - - /bin/bash - - -c - env: - {{- range $k, $v := .Values.envVars }} - - name: {{ $k }} - value: {{ squote $v -}} - {{ end }} - {{- range $i, $k := keys .Values.secretEnvVars }} - - name: {{ $k }} - valueFrom: - secretKeyRef: - name: prefect-envsecret-{{ $i }} - key: prefectEnvSecret{{ $i -}} - {{ end }} - - name: PREFECT__CLOUD__AGENT__AUTH_TOKEN - valueFrom: - secretKeyRef: - name: prefect-token - key: prefectToken - - name: JUPYTERHUB_API_TOKEN - valueFrom: - secretKeyRef: - name: jupyterhub-token - key: jupyterHubToken - - name: PREFECT__CLOUD__API - value: {{ .Values.cloudApi }} - - name: NAMESPACE - value: {{ .Values.namespace }} - - name: SERVICE_ACCOUNT_NAME - value: {{ .Values.serviceAccount }} - - name: PREFECT__BACKEND - value: cloud - - name: PREFECT__CLOUD__AGENT__AGENT_ADDRESS - value: http://:8080 - image: {{ .Values.prefectImage }} - imagePullPolicy: Always - livenessProbe: - failureThreshold: 2 - httpGet: - path: /api/health - port: 8080 - initialDelaySeconds: 40 - periodSeconds: 40 - name: agent - resources: - limits: - cpu: 100m - memory: 128Mi - ---- -apiVersion: v1 -kind: ServiceAccount -metadata: - name: {{ .Values.serviceAccount }} - ---- -apiVersion: rbac.authorization.k8s.io/v1 -kind: Role -metadata: - name: prefect-agent-rbac - namespace: {{ .Values.namespace }} -rules: - - apiGroups: - - batch - - extensions - resources: - - jobs - verbs: - - '*' - - apiGroups: - - '*' - resources: - - pods - verbs: - - '*' - ---- -apiVersion: rbac.authorization.k8s.io/v1 -kind: RoleBinding -metadata: - name: prefect-agent-rbac - namespace: {{ .Values.namespace }} -roleRef: - apiGroup: rbac.authorization.k8s.io - kind: Role - name: prefect-agent-rbac -subjects: - - kind: ServiceAccount - name: {{ .Values.serviceAccount }} diff --git a/src/_nebari/stages/kubernetes_services/template/modules/kubernetes/services/prefect/chart/templates/secret.yaml b/src/_nebari/stages/kubernetes_services/template/modules/kubernetes/services/prefect/chart/templates/secret.yaml deleted file mode 100644 index 02af94d0c3..0000000000 --- a/src/_nebari/stages/kubernetes_services/template/modules/kubernetes/services/prefect/chart/templates/secret.yaml +++ /dev/null @@ -1,29 +0,0 @@ -apiVersion: v1 -kind: Secret -metadata: - name: prefect-token -data: - prefectToken: "{{ .Values.prefectToken | b64enc }}" - ---- -apiVersion: v1 -kind: Secret -metadata: - name: jupyterhub-token -data: - jupyterHubToken: "{{ .Values.jupyterHubToken | b64enc }}" - ---- -{{ $index := dict "index" "0" }} -{{- range $k, $v := .Values.secretEnvVars }} -{{ $i := get $index "index" }} -{{ $ni := add1 $i }} -apiVersion: v1 -kind: Secret -metadata: - name: prefect-envsecret-{{ $i }} -data: - prefectEnvSecret{{ $i }}: "{{ $v | b64enc }}" -{{ $_ := set $index "index" $ni }} ---- -{{ end }} diff --git a/src/_nebari/stages/kubernetes_services/template/modules/kubernetes/services/prefect/chart/values.yaml b/src/_nebari/stages/kubernetes_services/template/modules/kubernetes/services/prefect/chart/values.yaml deleted file mode 100644 index 46063e6e3a..0000000000 --- a/src/_nebari/stages/kubernetes_services/template/modules/kubernetes/services/prefect/chart/values.yaml +++ /dev/null @@ -1,5 +0,0 @@ -prefectImage: -namespace: -serviceAccount: prefect -prefectToken: "" -cloudApi: "" diff --git a/src/_nebari/stages/kubernetes_services/template/modules/kubernetes/services/prefect/main.tf b/src/_nebari/stages/kubernetes_services/template/modules/kubernetes/services/prefect/main.tf deleted file mode 100644 index 2cab6c42cf..0000000000 --- a/src/_nebari/stages/kubernetes_services/template/modules/kubernetes/services/prefect/main.tf +++ /dev/null @@ -1,33 +0,0 @@ -resource "helm_release" "prefect" { - name = "prefect" - namespace = var.namespace - chart = "${path.module}/chart" - - values = concat([ - file("${path.module}/values.yaml")], var.overrides) - - set_sensitive { - name = "prefectToken" - value = var.prefect_token - } - - set_sensitive { - name = "jupyterHubToken" - value = var.jupyterhub_api_token - } - - set { - name = "prefectImage" - value = var.image - } - - set { - name = "namespace" - value = var.namespace - } - - set { - name = "cloudApi" - value = var.cloud_api - } -} diff --git a/src/_nebari/stages/kubernetes_services/template/modules/kubernetes/services/prefect/values.yaml b/src/_nebari/stages/kubernetes_services/template/modules/kubernetes/services/prefect/values.yaml deleted file mode 100644 index 65fbeba700..0000000000 --- a/src/_nebari/stages/kubernetes_services/template/modules/kubernetes/services/prefect/values.yaml +++ /dev/null @@ -1,17 +0,0 @@ - -envVars: {} - -secretEnvVars: {} - -prefect_agent: - job: - imagePullSecrets: '' - imagePullPolicy: '' - resources: - requests: - memory: '' - cpu: '' - limits: - memory: '' - cpu: '' - prefectLabels: '[]' diff --git a/src/_nebari/stages/kubernetes_services/template/modules/kubernetes/services/prefect/variables.tf b/src/_nebari/stages/kubernetes_services/template/modules/kubernetes/services/prefect/variables.tf deleted file mode 100644 index a6dbdd935a..0000000000 --- a/src/_nebari/stages/kubernetes_services/template/modules/kubernetes/services/prefect/variables.tf +++ /dev/null @@ -1,29 +0,0 @@ -variable "jupyterhub_api_token" { - type = string - default = "" -} - -variable "namespace" { - type = string - default = "dev" -} - -variable "prefect_token" { - type = string -} - -variable "image" { - type = string - default = "prefecthq/prefect:0.14.22-python3.8" -} - -variable "cloud_api" { - type = string - default = "https://api.prefect.io" -} - -variable "overrides" { - description = "Prefect helm chart list of overrides" - type = list(string) - default = [] -} diff --git a/src/_nebari/stages/kubernetes_services/template/prefect.tf b/src/_nebari/stages/kubernetes_services/template/prefect.tf deleted file mode 100644 index dea9c18a9a..0000000000 --- a/src/_nebari/stages/kubernetes_services/template/prefect.tf +++ /dev/null @@ -1,34 +0,0 @@ -# ======================= VARIABLES ====================== -variable "prefect-enabled" { - description = "Prefect enabled or disabled" - type = bool -} - -variable "prefect-image" { - description = "Prefect image" - type = string -} - -variable "prefect-token" { - description = "Prefect token" - type = string -} - -variable "prefect-overrides" { - description = "Prefect token" - type = map(any) -} - - -# ====================== RESOURCES ======================= -module "prefect" { - count = var.prefect-enabled ? 1 : 0 - - source = "./modules/kubernetes/services/prefect" - - namespace = var.environment - jupyterhub_api_token = module.jupyterhub.services.prefect.api_token - prefect_token = var.prefect-token - image = var.prefect-image - overrides = [yamlencode(var.prefect-overrides)] -} diff --git a/src/_nebari/subcommands/init.py b/src/_nebari/subcommands/init.py index b4276438b3..f519b97f8f 100644 --- a/src/_nebari/subcommands/init.py +++ b/src/_nebari/subcommands/init.py @@ -2,7 +2,7 @@ import os import pathlib import re -import typing +from typing import Optional import questionary import rich @@ -84,17 +84,17 @@ class GitRepoEnum(str, enum.Enum): class InitInputs(schema.Base): cloud_provider: ProviderEnum = ProviderEnum.local project_name: schema.project_name_pydantic = "" - domain_name: typing.Optional[str] = None - namespace: typing.Optional[schema.namespace_pydantic] = "dev" + domain_name: Optional[str] = None + namespace: Optional[schema.namespace_pydantic] = "dev" auth_provider: AuthenticationEnum = AuthenticationEnum.password auth_auto_provision: bool = False - repository: typing.Optional[schema.github_url_pydantic] = None + repository: Optional[schema.github_url_pydantic] = None repository_auto_provision: bool = False ci_provider: CiEnum = CiEnum.none terraform_state: TerraformStateEnum = TerraformStateEnum.remote - kubernetes_version: typing.Union[str, None] = None - region: typing.Union[str, None] = None - ssl_cert_email: typing.Union[schema.email_pydantic, None] = None + kubernetes_version: Optional[str] = None + region: Optional[str] = None + ssl_cert_email: Optional[schema.email_pydantic] = None disable_prompt: bool = False output: pathlib.Path = pathlib.Path("nebari-config.yaml") @@ -448,7 +448,7 @@ def check_cloud_provider_region(region: str, cloud_provider: str) -> str: if not region: region = GCP_DEFAULT_REGION rich.print(DEFAULT_REGION_MSG.format(region=region)) - if region not in google_cloud.regions(os.environ["PROJECT_ID"]): + if region not in google_cloud.regions(): raise ValueError( f"Invalid region `{region}`. Please refer to the GCP docs for a list of valid regions: {GCP_REGIONS}" ) @@ -490,7 +490,7 @@ def init( "Project name must (1) consist of only letters, numbers, hyphens, and underscores, (2) begin and end with a letter, and (3) contain between 3 and 16 characters.", ), ), - domain_name: typing.Optional[str] = typer.Option( + domain_name: Optional[str] = typer.Option( None, "--domain-name", "--domain", diff --git a/src/_nebari/upgrade.py b/src/_nebari/upgrade.py index 077c582038..7756e18190 100644 --- a/src/_nebari/upgrade.py +++ b/src/_nebari/upgrade.py @@ -5,8 +5,10 @@ import string from abc import ABC from pathlib import Path +from typing import Any, ClassVar, Dict import rich +from packaging.version import Version from pydantic.error_wrappers import ValidationError from rich.prompt import Prompt @@ -80,17 +82,29 @@ def do_upgrade(config_filename, attempt_fixes=False): class UpgradeStep(ABC): - _steps = {} - - version = "" # Each subclass must have a version - these should be full release versions (not dev/prerelease) + _steps: ClassVar[Dict[str, Any]] = {} + version: ClassVar[str] = "" def __init_subclass__(cls): - assert cls.version != "" + try: + parsed_version = Version(cls.version) + except ValueError as exc: + raise ValueError(f"Invalid version string {cls.version}") from exc + + cls.parsed_version = parsed_version + assert ( + rounded_ver_parse(cls.version) == parsed_version + ), f"Invalid version {cls.version}: must be a full release version, not a dev/prerelease/postrelease version" assert ( cls.version not in cls._steps - ) # Would mean multiple upgrades for the same step + ), f"Duplicate UpgradeStep version {cls.version}" cls._steps[cls.version] = cls + @classmethod + def clear_steps_registry(cls): + """Clears the steps registry. Useful for testing.""" + cls._steps.clear() + @classmethod def has_step(cls, version): return version in cls._steps @@ -157,9 +171,7 @@ def upgrade_step(self, config, start_version, config_filename, *args, **kwargs): for any actions that are only required for the particular upgrade you are creating. """ finish_version = self.get_version() - __rounded_finish_version__ = ".".join( - [str(c) for c in rounded_ver_parse(finish_version)] - ) + __rounded_finish_version__ = str(rounded_ver_parse(finish_version)) rich.print( f"\n---> Starting upgrade from [green]{start_version or 'old version'}[/green] to [green]{finish_version}[/green]\n" ) @@ -636,7 +648,25 @@ def _version_specific_upgrade( return config -__rounded_version__ = ".".join([str(c) for c in rounded_ver_parse(__version__)]) +class Upgrade_2023_11_1(UpgradeStep): + version = "2023.11.1" + + def _version_specific_upgrade( + self, config, start_version, config_filename: Path, *args, **kwargs + ): + rich.print("\n ⚠️ Warning ⚠️") + rich.print( + "-> Please run the [green]rm -rf stages[/green] so that we can regenerate an updated set of Terraform scripts for your deployment." + ) + rich.print("\n ⚠️ Deprecation Warning ⚠️") + rich.print( + f"-> ClearML, Prefect and kbatch are no longer supported in Nebari version [green]{self.version}[/green] and will be uninstalled." + ) + + return config + + +__rounded_version__ = str(rounded_ver_parse(__version__)) # Manually-added upgrade steps must go above this line if not UpgradeStep.has_step(__rounded_version__): diff --git a/src/_nebari/utils.py b/src/_nebari/utils.py index d32d0056a9..3378116a1d 100644 --- a/src/_nebari/utils.py +++ b/src/_nebari/utils.py @@ -93,6 +93,7 @@ def kill_process(): if timeout_timer is not None: timeout_timer.cancel() + process.stdout.close() return process.wait( timeout=10 ) # Should already have finished because we have drained stdout diff --git a/src/_nebari/version.py b/src/_nebari/version.py index 7af6817cbe..fcfa649cec 100644 --- a/src/_nebari/version.py +++ b/src/_nebari/version.py @@ -1,26 +1,25 @@ """a backport for the nebari version references.""" -import re from importlib.metadata import distribution +from packaging.version import Version + __version__ = distribution("nebari").version -def rounded_ver_parse(versionstr): +def rounded_ver_parse(version: str) -> Version: """ - Take a package version string and return an int tuple of only (major,minor,patch), - ignoring and post/dev etc. + Rounds a version string to the nearest patch version. + + Parameters + ---------- + version : str + A version string. - So: - rounded_ver_parse("0.1.2") returns (0,1,2) - rounded_ver_parse("0.1.2.dev65+g2de53174") returns (0,1,2) - rounded_ver_parse("0.1") returns (0,1,0) + Returns + ------- + packaging.version.Version + A version object. """ - m = re.match( - "^(?P[0-9]+)(\\.(?P[0-9]+)(\\.(?P[0-9]+))?)?", versionstr - ) - assert m is not None - major = int(m.group("major") or 0) - minor = int(m.group("minor") or 0) - patch = int(m.group("patch") or 0) - return (major, minor, patch) + base_version = Version(version).base_version + return Version(base_version) diff --git a/tests/test_dependencies.py b/tests/test_dependencies.py deleted file mode 100644 index 1bef9a64d5..0000000000 --- a/tests/test_dependencies.py +++ /dev/null @@ -1,65 +0,0 @@ -import subprocess -from pathlib import Path - -import pytest - -SRC_DIR = Path(__file__).parent.parent -PYPROJECT = SRC_DIR / "pyproject.toml" - - -@pytest.mark.conda -def test_build_by_conda_forge(tmp_path): - """ - This test ensures that nebari can be built and packaged by conda-forge. - - This is achieved by walking through the following steps: - 1. Use Python build package to generate the `sdist` .tar.gz file - 2. Use grayskull package to generate the `meta.yaml` recipe file - 3. Use conda build to attempt to build the nebari package from the `meta.yaml` - - These steps mimic what takes places on the conda-forge/nebari-feedstock repo whenever - a new version of the package gets released. - - NOTE: this test requires conda and conda-build - """ - - assert PYPROJECT.exists() - - try: - # build sdist - subprocess.run( - ["python", "-m", "build", SRC_DIR, "--outdir", tmp_path], - stdout=subprocess.PIPE, - stderr=subprocess.PIPE, - check=True, - ) - - # get location of sdist file built above - sdist_loc = next(tmp_path.glob("*.tar.gz")) - # run grayskull to create the meta.yaml using the local sdist file - subprocess.run( - [ - "grayskull", - "pypi", - "--strict-conda-forge", - sdist_loc, - "--output", - tmp_path, - ], - stdout=subprocess.PIPE, - stderr=subprocess.PIPE, - check=True, - ) - - # get the directory the meta.yaml is in - meta_loc = tmp_path / "nebari" - # try to run conda build to build package from meta.yaml - subprocess.run( - ["conda", "build", meta_loc], - stdout=subprocess.PIPE, - stderr=subprocess.PIPE, - check=True, - ) - except subprocess.CalledProcessError as e: - print(e.stderr.decode("utf-8")) - raise e diff --git a/tests/tests_unit/conftest.py b/tests/tests_unit/conftest.py index a65a4ce8aa..e98661c214 100644 --- a/tests/tests_unit/conftest.py +++ b/tests/tests_unit/conftest.py @@ -192,6 +192,18 @@ def nebari_render(nebari_config, nebari_stages, tmp_path): return tmp_path, config_filename +@pytest.fixture +def new_upgrade_cls(): + from _nebari.upgrade import UpgradeStep + + assert UpgradeStep._steps + steps_cache = UpgradeStep._steps.copy() + UpgradeStep.clear_steps_registry() + assert not UpgradeStep._steps + yield UpgradeStep + UpgradeStep._steps = steps_cache + + @pytest.fixture def config_schema(): return nebari_plugin_manager.config_schema diff --git a/tests/tests_unit/test_provider.py b/tests/tests_unit/test_provider.py new file mode 100644 index 0000000000..3c4f35a1d0 --- /dev/null +++ b/tests/tests_unit/test_provider.py @@ -0,0 +1,54 @@ +from contextlib import nullcontext + +import pytest + +from _nebari.provider.cloud.google_cloud import check_missing_service + + +@pytest.mark.parametrize( + "activated_services, exception", + [ + ( + { + "Compute Engine API", + "Kubernetes Engine API", + "Cloud Monitoring API", + "Cloud Autoscaling API", + "Identity and Access Management (IAM) API", + "Cloud Resource Manager API", + }, + nullcontext(), + ), + ( + { + "Compute Engine API", + "Kubernetes Engine API", + "Cloud Monitoring API", + "Cloud Autoscaling API", + "Identity and Access Management (IAM) API", + "Cloud Resource Manager API", + "Cloud SQL Admin API", + }, + nullcontext(), + ), + ( + { + "Compute Engine API", + "Kubernetes Engine API", + "Cloud Monitoring API", + "Cloud Autoscaling API", + "Cloud SQL Admin API", + }, + pytest.raises(ValueError, match=r"Missing required services:.*"), + ), + ], +) +def test_gcp_missing_service(monkeypatch, activated_services, exception): + def mock_return(): + return activated_services + + monkeypatch.setattr( + "_nebari.provider.cloud.google_cloud.activated_services", mock_return + ) + with exception: + check_missing_service() diff --git a/tests/tests_unit/test_schema.py b/tests/tests_unit/test_schema.py index 21efe47c2c..b4fb58bc62 100644 --- a/tests/tests_unit/test_schema.py +++ b/tests/tests_unit/test_schema.py @@ -137,3 +137,33 @@ def test_multiple_providers(config_schema): msg = r"Multiple providers set: \['local', 'existing'\]" with pytest.raises(ValidationError, match=msg): config_schema(**config_dict) + + +def test_aws_premissions_boundary(config_schema): + permissions_boundary = "arn:aws:iam::123456789012:policy/MyBoundaryPolicy" + config_dict = { + "project_name": "test", + "provider": "aws", + "amazon_web_services": { + "region": "us-east-1", + "kubernetes_version": "1.19", + "permissions_boundary": f"{permissions_boundary}", + }, + } + config = config_schema(**config_dict) + assert config.provider == "aws" + assert config.amazon_web_services.permissions_boundary == permissions_boundary + + +@pytest.mark.parametrize("provider", ["local", "existing"]) +def test_setted_provider(config_schema, provider): + config_dict = { + "project_name": "test", + "provider": provider, + f"{provider}": {"kube_context": "some_context"}, + } + config = config_schema(**config_dict) + assert config.provider == provider + result_config_dict = config.dict() + assert provider in result_config_dict + assert result_config_dict[provider]["kube_context"] == "some_context" diff --git a/tests/tests_unit/test_upgrade.py b/tests/tests_unit/test_upgrade.py index 0946dcd99d..4871a1fe07 100644 --- a/tests/tests_unit/test_upgrade.py +++ b/tests/tests_unit/test_upgrade.py @@ -1,3 +1,4 @@ +from contextlib import nullcontext from pathlib import Path import pytest @@ -76,7 +77,7 @@ def test_upgrade_4_0( assert not hasattr(config.security, "users") assert not hasattr(config.security, "groups") - __rounded_version__ = ".".join([str(c) for c in rounded_ver_parse(__version__)]) + __rounded_version__ = rounded_ver_parse(__version__) # Check image versions have been bumped up assert ( @@ -99,3 +100,49 @@ def test_upgrade_4_0( tmp_qhub_config_backup = Path(tmp_path, f"{old_qhub_config_path.name}.old.backup") assert orig_contents == tmp_qhub_config_backup.read_text() + + +@pytest.mark.parametrize( + "version_str, exception", + [ + ("1.0.0", nullcontext()), + ("1.cool.0", pytest.raises(ValueError, match=r"Invalid version string .*")), + ("0,1.0", pytest.raises(ValueError, match=r"Invalid version string .*")), + ("", pytest.raises(ValueError, match=r"Invalid version string .*")), + ( + "1.0.0-rc1", + pytest.raises( + AssertionError, + match=r"Invalid version .*: must be a full release version, not a dev/prerelease/postrelease version", + ), + ), + ( + "1.0.0dev1", + pytest.raises( + AssertionError, + match=r"Invalid version .*: must be a full release version, not a dev/prerelease/postrelease version", + ), + ), + ], +) +def test_version_string(new_upgrade_cls, version_str, exception): + with exception: + + class DummyUpgrade(new_upgrade_cls): + version = version_str + + +def test_duplicated_version(new_upgrade_cls): + duplicated_version = "1.2.3" + with pytest.raises( + AssertionError, match=rf"Duplicate UpgradeStep version {duplicated_version}" + ): + + class DummyUpgrade(new_upgrade_cls): + version = duplicated_version + + class DummyUpgrade2(new_upgrade_cls): + version = duplicated_version + + class DummyUpgrade3(new_upgrade_cls): + version = "1.2.4"