From 148776114985b9bebf08986c18f4e592efe682c8 Mon Sep 17 00:00:00 2001 From: Ruinong Tian Date: Fri, 7 Jun 2024 07:42:01 +0000 Subject: [PATCH] change for v2 major version release Details: 1. Make each major version has its own cuda and base image version 2. Update image key dependencies accordingly --- .../v0/v0.12/v0.12.0/gpu_cuda_version.json | 4 + build_artifacts/v1/v1.4/gpu_cuda_version.json | 4 + build_artifacts/v1/v1.5/gpu_cuda_version.json | 4 + build_artifacts/v1/v1.6/gpu_cuda_version.json | 4 + build_artifacts/v1/v1.7/gpu_cuda_version.json | 4 + .../v1/v1.8/v1.8.0/gpu_cuda_version.json | 4 + build_artifacts/v2/v2.0/v2.0.0/Dockerfile | 204 ++++++++++++++++++ build_artifacts/v2/v2.0/v2.0.0/cpu.env.in | 50 +++++ .../code_editor_machine_settings.json | 4 + .../dirs/etc/code-editor/extensions.txt | 3 + .../v2/v2.0/v2.0.0/dirs/etc/conda/.condarc | 6 + .../dirs/etc/jupyter/jupyter_server_config.py | 28 +++ .../conf.d/supervisord-code-editor.conf | 11 + .../supervisor/conf.d/supervisord-common.conf | 18 ++ .../conf.d/supervisord-jupyter-lab.conf | 11 + .../etc/supervisor/conf.d/supervisord.conf | 27 +++ .../dirs/usr/local/bin/entrypoint-code-editor | 16 ++ .../usr/local/bin/entrypoint-jupyter-server | 19 ++ .../dirs/usr/local/bin/restart-jupyter-server | 6 + .../dirs/usr/local/bin/start-code-editor | 25 +++ .../dirs/usr/local/bin/start-jupyter-server | 36 ++++ .../v2/v2.0/v2.0.0/gpu.arg_based_env.in | 1 + build_artifacts/v2/v2.0/v2.0.0/gpu.env.in | 50 +++++ .../v2/v2.0/v2.0.0/gpu_cuda_version.json | 4 + .../v2/v2.0/v2.0.0/patch_glue_pyspark.json | 15 ++ .../v2/v2.0/v2.0.0/patch_glue_spark.json | 15 ++ .../v2/v2.0/v2.0.0/source-version.txt | 1 + src/config.py | 2 - src/main.py | 23 +- template/v0/gpu_cuda_version.json | 4 + template/v1/gpu_cuda_version.json | 4 + template/v2/Dockerfile | 204 ++++++++++++++++++ .../code_editor_machine_settings.json | 4 + .../v2/dirs/etc/code-editor/extensions.txt | 3 + template/v2/dirs/etc/conda/.condarc | 6 + .../dirs/etc/jupyter/jupyter_server_config.py | 28 +++ .../conf.d/supervisord-code-editor.conf | 11 + .../supervisor/conf.d/supervisord-common.conf | 18 ++ .../conf.d/supervisord-jupyter-lab.conf | 11 + .../etc/supervisor/conf.d/supervisord.conf | 27 +++ .../dirs/usr/local/bin/entrypoint-code-editor | 16 ++ .../usr/local/bin/entrypoint-jupyter-server | 19 ++ .../dirs/usr/local/bin/restart-jupyter-server | 6 + .../v2/dirs/usr/local/bin/start-code-editor | 25 +++ .../dirs/usr/local/bin/start-jupyter-server | 36 ++++ template/v2/gpu_cuda_version.json | 4 + test/test_artifacts/v2/altair.test.Dockerfile | 20 ++ ...dewhisperer-jupyterlab-ext.test.Dockerfile | 6 + ...amazon-sagemaker-sql-magic.test.Dockerfile | 6 + ...mazon_sagemaker_sql_editor.test.Dockerfile | 6 + .../v2/autogluon.test.Dockerfile | 13 ++ .../v2/aws-glue-sessions/glue_notebook.ipynb | 91 ++++++++ .../run_glue_sessions_notebook.sh | 10 + test/test_artifacts/v2/boto3.test.Dockerfile | 15 ++ .../v2/glue-sessions.test.Dockerfile | 13 ++ .../v2/jupyter-ai.test.Dockerfile | 6 + .../v2/jupyter-collaboration.test.Dockerfile | 6 + .../v2/jupyter-dash.test.Dockerfile | 6 + .../v2/jupyterlab-git.test.Dockerfile | 6 + .../v2/jupyterlab-lsp.test.Dockerfile | 7 + test/test_artifacts/v2/keras.test.Dockerfile | 24 +++ .../v2/matplotlib.test.Dockerfile | 18 ++ .../v2/notebook.test.Dockerfile | 6 + test/test_artifacts/v2/numpy.test.Dockerfile | 13 ++ test/test_artifacts/v2/pandas.test.Dockerfile | 8 + .../v2/python-lsp-server.test.Dockerfile | 7 + .../v2/pytorch.examples.Dockerfile | 21 ++ test/test_artifacts/v2/run_pandas_tests.py | 33 +++ .../v2/sagemaker-code-editor.test.Dockerfile | 9 + ...-headless-execution-driver.test.Dockerfile | 7 + ...studio-analytics-extension.test.Dockerfile | 11 + .../sagemaker-studio-analytics-extension.sh | 2 + ...sagemaker_studio_analytics_extension.ipynb | 53 +++++ test/test_artifacts/v2/scipy.test.Dockerfile | 12 ++ .../scripts/run_altair_example_notebooks.sh | 17 ++ .../v2/scripts/run_autogluon_tests.sh | 17 ++ .../v2/scripts/run_boto3_tests.sh | 10 + .../v2/scripts/run_keras_tests.sh | 11 + .../v2/scripts/run_matplotlib_tests.sh | 5 + .../v2/scripts/run_pysdk_tests.sh | 20 ++ .../run_sagemaker_code_editor_tests.sh | 70 ++++++ test/test_artifacts/v2/serve.test.Dockerfile | 6 + .../v2/sm-python-sdk.test.Dockerfile | 12 ++ .../v2/tensorflow.examples.Dockerfile | 16 ++ .../run_tensorflow_example_notebooks.sh | 29 +++ test/test_main.py | 40 +++- 86 files changed, 1704 insertions(+), 9 deletions(-) create mode 100644 build_artifacts/v0/v0.12/v0.12.0/gpu_cuda_version.json create mode 100644 build_artifacts/v1/v1.4/gpu_cuda_version.json create mode 100644 build_artifacts/v1/v1.5/gpu_cuda_version.json create mode 100644 build_artifacts/v1/v1.6/gpu_cuda_version.json create mode 100644 build_artifacts/v1/v1.7/gpu_cuda_version.json create mode 100644 build_artifacts/v1/v1.8/v1.8.0/gpu_cuda_version.json create mode 100644 build_artifacts/v2/v2.0/v2.0.0/Dockerfile create mode 100644 build_artifacts/v2/v2.0/v2.0.0/cpu.env.in create mode 100644 build_artifacts/v2/v2.0/v2.0.0/dirs/etc/code-editor/code_editor_machine_settings.json create mode 100644 build_artifacts/v2/v2.0/v2.0.0/dirs/etc/code-editor/extensions.txt create mode 100644 build_artifacts/v2/v2.0/v2.0.0/dirs/etc/conda/.condarc create mode 100644 build_artifacts/v2/v2.0/v2.0.0/dirs/etc/jupyter/jupyter_server_config.py create mode 100644 build_artifacts/v2/v2.0/v2.0.0/dirs/etc/supervisor/conf.d/supervisord-code-editor.conf create mode 100644 build_artifacts/v2/v2.0/v2.0.0/dirs/etc/supervisor/conf.d/supervisord-common.conf create mode 100644 build_artifacts/v2/v2.0/v2.0.0/dirs/etc/supervisor/conf.d/supervisord-jupyter-lab.conf create mode 100644 build_artifacts/v2/v2.0/v2.0.0/dirs/etc/supervisor/conf.d/supervisord.conf create mode 100755 build_artifacts/v2/v2.0/v2.0.0/dirs/usr/local/bin/entrypoint-code-editor create mode 100755 build_artifacts/v2/v2.0/v2.0.0/dirs/usr/local/bin/entrypoint-jupyter-server create mode 100755 build_artifacts/v2/v2.0/v2.0.0/dirs/usr/local/bin/restart-jupyter-server create mode 100755 build_artifacts/v2/v2.0/v2.0.0/dirs/usr/local/bin/start-code-editor create mode 100755 build_artifacts/v2/v2.0/v2.0.0/dirs/usr/local/bin/start-jupyter-server create mode 100644 build_artifacts/v2/v2.0/v2.0.0/gpu.arg_based_env.in create mode 100644 build_artifacts/v2/v2.0/v2.0.0/gpu.env.in create mode 100644 build_artifacts/v2/v2.0/v2.0.0/gpu_cuda_version.json create mode 100644 build_artifacts/v2/v2.0/v2.0.0/patch_glue_pyspark.json create mode 100644 build_artifacts/v2/v2.0/v2.0.0/patch_glue_spark.json create mode 100644 build_artifacts/v2/v2.0/v2.0.0/source-version.txt create mode 100644 template/v0/gpu_cuda_version.json create mode 100644 template/v1/gpu_cuda_version.json create mode 100644 template/v2/Dockerfile create mode 100644 template/v2/dirs/etc/code-editor/code_editor_machine_settings.json create mode 100644 template/v2/dirs/etc/code-editor/extensions.txt create mode 100644 template/v2/dirs/etc/conda/.condarc create mode 100644 template/v2/dirs/etc/jupyter/jupyter_server_config.py create mode 100644 template/v2/dirs/etc/supervisor/conf.d/supervisord-code-editor.conf create mode 100644 template/v2/dirs/etc/supervisor/conf.d/supervisord-common.conf create mode 100644 template/v2/dirs/etc/supervisor/conf.d/supervisord-jupyter-lab.conf create mode 100644 template/v2/dirs/etc/supervisor/conf.d/supervisord.conf create mode 100755 template/v2/dirs/usr/local/bin/entrypoint-code-editor create mode 100755 template/v2/dirs/usr/local/bin/entrypoint-jupyter-server create mode 100755 template/v2/dirs/usr/local/bin/restart-jupyter-server create mode 100755 template/v2/dirs/usr/local/bin/start-code-editor create mode 100755 template/v2/dirs/usr/local/bin/start-jupyter-server create mode 100644 template/v2/gpu_cuda_version.json create mode 100644 test/test_artifacts/v2/altair.test.Dockerfile create mode 100644 test/test_artifacts/v2/amazon-codewhisperer-jupyterlab-ext.test.Dockerfile create mode 100644 test/test_artifacts/v2/amazon-sagemaker-sql-magic.test.Dockerfile create mode 100644 test/test_artifacts/v2/amazon_sagemaker_sql_editor.test.Dockerfile create mode 100644 test/test_artifacts/v2/autogluon.test.Dockerfile create mode 100644 test/test_artifacts/v2/aws-glue-sessions/glue_notebook.ipynb create mode 100644 test/test_artifacts/v2/aws-glue-sessions/run_glue_sessions_notebook.sh create mode 100644 test/test_artifacts/v2/boto3.test.Dockerfile create mode 100644 test/test_artifacts/v2/glue-sessions.test.Dockerfile create mode 100644 test/test_artifacts/v2/jupyter-ai.test.Dockerfile create mode 100644 test/test_artifacts/v2/jupyter-collaboration.test.Dockerfile create mode 100644 test/test_artifacts/v2/jupyter-dash.test.Dockerfile create mode 100644 test/test_artifacts/v2/jupyterlab-git.test.Dockerfile create mode 100644 test/test_artifacts/v2/jupyterlab-lsp.test.Dockerfile create mode 100644 test/test_artifacts/v2/keras.test.Dockerfile create mode 100644 test/test_artifacts/v2/matplotlib.test.Dockerfile create mode 100644 test/test_artifacts/v2/notebook.test.Dockerfile create mode 100644 test/test_artifacts/v2/numpy.test.Dockerfile create mode 100644 test/test_artifacts/v2/pandas.test.Dockerfile create mode 100644 test/test_artifacts/v2/python-lsp-server.test.Dockerfile create mode 100644 test/test_artifacts/v2/pytorch.examples.Dockerfile create mode 100644 test/test_artifacts/v2/run_pandas_tests.py create mode 100644 test/test_artifacts/v2/sagemaker-code-editor.test.Dockerfile create mode 100644 test/test_artifacts/v2/sagemaker-headless-execution-driver.test.Dockerfile create mode 100644 test/test_artifacts/v2/sagemaker-studio-analytics-extension.test.Dockerfile create mode 100644 test/test_artifacts/v2/sagemaker-studio-analytics-extension/sagemaker-studio-analytics-extension.sh create mode 100644 test/test_artifacts/v2/sagemaker-studio-analytics-extension/sagemaker_studio_analytics_extension.ipynb create mode 100644 test/test_artifacts/v2/scipy.test.Dockerfile create mode 100644 test/test_artifacts/v2/scripts/run_altair_example_notebooks.sh create mode 100644 test/test_artifacts/v2/scripts/run_autogluon_tests.sh create mode 100644 test/test_artifacts/v2/scripts/run_boto3_tests.sh create mode 100644 test/test_artifacts/v2/scripts/run_keras_tests.sh create mode 100644 test/test_artifacts/v2/scripts/run_matplotlib_tests.sh create mode 100644 test/test_artifacts/v2/scripts/run_pysdk_tests.sh create mode 100644 test/test_artifacts/v2/scripts/run_sagemaker_code_editor_tests.sh create mode 100644 test/test_artifacts/v2/serve.test.Dockerfile create mode 100644 test/test_artifacts/v2/sm-python-sdk.test.Dockerfile create mode 100644 test/test_artifacts/v2/tensorflow.examples.Dockerfile create mode 100644 test/test_artifacts/v2/tensorflow/run_tensorflow_example_notebooks.sh diff --git a/build_artifacts/v0/v0.12/v0.12.0/gpu_cuda_version.json b/build_artifacts/v0/v0.12/v0.12.0/gpu_cuda_version.json new file mode 100644 index 00000000..abff259f --- /dev/null +++ b/build_artifacts/v0/v0.12/v0.12.0/gpu_cuda_version.json @@ -0,0 +1,4 @@ +{ + "TAG_FOR_BASE_MICROMAMBA_IMAGE": "jammy-cuda-11.8.0", + "CUDA_MAJOR_MINOR_VERSION": "11.8" +} \ No newline at end of file diff --git a/build_artifacts/v1/v1.4/gpu_cuda_version.json b/build_artifacts/v1/v1.4/gpu_cuda_version.json new file mode 100644 index 00000000..abff259f --- /dev/null +++ b/build_artifacts/v1/v1.4/gpu_cuda_version.json @@ -0,0 +1,4 @@ +{ + "TAG_FOR_BASE_MICROMAMBA_IMAGE": "jammy-cuda-11.8.0", + "CUDA_MAJOR_MINOR_VERSION": "11.8" +} \ No newline at end of file diff --git a/build_artifacts/v1/v1.5/gpu_cuda_version.json b/build_artifacts/v1/v1.5/gpu_cuda_version.json new file mode 100644 index 00000000..abff259f --- /dev/null +++ b/build_artifacts/v1/v1.5/gpu_cuda_version.json @@ -0,0 +1,4 @@ +{ + "TAG_FOR_BASE_MICROMAMBA_IMAGE": "jammy-cuda-11.8.0", + "CUDA_MAJOR_MINOR_VERSION": "11.8" +} \ No newline at end of file diff --git a/build_artifacts/v1/v1.6/gpu_cuda_version.json b/build_artifacts/v1/v1.6/gpu_cuda_version.json new file mode 100644 index 00000000..abff259f --- /dev/null +++ b/build_artifacts/v1/v1.6/gpu_cuda_version.json @@ -0,0 +1,4 @@ +{ + "TAG_FOR_BASE_MICROMAMBA_IMAGE": "jammy-cuda-11.8.0", + "CUDA_MAJOR_MINOR_VERSION": "11.8" +} \ No newline at end of file diff --git a/build_artifacts/v1/v1.7/gpu_cuda_version.json b/build_artifacts/v1/v1.7/gpu_cuda_version.json new file mode 100644 index 00000000..abff259f --- /dev/null +++ b/build_artifacts/v1/v1.7/gpu_cuda_version.json @@ -0,0 +1,4 @@ +{ + "TAG_FOR_BASE_MICROMAMBA_IMAGE": "jammy-cuda-11.8.0", + "CUDA_MAJOR_MINOR_VERSION": "11.8" +} \ No newline at end of file diff --git a/build_artifacts/v1/v1.8/v1.8.0/gpu_cuda_version.json b/build_artifacts/v1/v1.8/v1.8.0/gpu_cuda_version.json new file mode 100644 index 00000000..abff259f --- /dev/null +++ b/build_artifacts/v1/v1.8/v1.8.0/gpu_cuda_version.json @@ -0,0 +1,4 @@ +{ + "TAG_FOR_BASE_MICROMAMBA_IMAGE": "jammy-cuda-11.8.0", + "CUDA_MAJOR_MINOR_VERSION": "11.8" +} \ No newline at end of file diff --git a/build_artifacts/v2/v2.0/v2.0.0/Dockerfile b/build_artifacts/v2/v2.0/v2.0.0/Dockerfile new file mode 100644 index 00000000..f95278b9 --- /dev/null +++ b/build_artifacts/v2/v2.0/v2.0.0/Dockerfile @@ -0,0 +1,204 @@ +ARG TAG_FOR_BASE_MICROMAMBA_IMAGE +FROM mambaorg/micromamba:$TAG_FOR_BASE_MICROMAMBA_IMAGE + +ARG CUDA_MAJOR_MINOR_VERSION='' +ARG ENV_IN_FILENAME +ARG ARG_BASED_ENV_IN_FILENAME + +ARG AMZN_BASE="/opt/amazon/sagemaker" +ARG DB_ROOT_DIR="/opt/db" +ARG DIRECTORY_TREE_STAGE_DIR="${AMZN_BASE}/dir-staging" + +ARG NB_USER="sagemaker-user" +ARG NB_UID=1000 +ARG NB_GID=100 + +# https://www.openssl.org/source/ +ARG FIPS_VALIDATED_SSL=3.0.8 + +ENV SAGEMAKER_LOGGING_DIR="/var/log/sagemaker/" +ENV STUDIO_LOGGING_DIR="/var/log/studio/" +ENV EDITOR="nano" + +USER root +RUN usermod "--login=${NB_USER}" "--home=/home/${NB_USER}" --move-home "-u ${NB_UID}" "${MAMBA_USER}" && \ + groupmod "--new-name=${NB_USER}" --non-unique "-g ${NB_GID}" "${MAMBA_USER}" && \ + # Update the expected value of MAMBA_USER for the + # _entrypoint.sh consistency check. + echo "${NB_USER}" > "/etc/arg_mamba_user" && \ + : +ENV MAMBA_USER=$NB_USER +ENV USER=$NB_USER + +RUN apt-get update && apt-get upgrade -y && \ + apt-get install -y --no-install-recommends sudo gettext-base wget curl unzip git rsync build-essential openssh-client nano cron less mandoc && \ + # We just install tzdata below but leave default time zone as UTC. This helps packages like Pandas to function correctly. + DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends tzdata krb5-user libkrb5-dev libsasl2-dev libsasl2-modules && \ + chmod g+w /etc/passwd && \ + echo "ALL ALL=(ALL) NOPASSWD: ALL" >> /etc/sudoers && \ + touch /etc/krb5.conf.lock && chown ${NB_USER}:${MAMBA_USER} /etc/krb5.conf* && \ + # Note that we do NOT run `rm -rf /var/lib/apt/lists/*` here. If we did, anyone building on top of our images will + # not be able to run any `apt-get install` commands and that would hamper customizability of the images. + curl "https://awscli.amazonaws.com/awscli-exe-linux-x86_64.zip" -o "awscliv2.zip" && \ + unzip awscliv2.zip && \ + sudo ./aws/install && \ + rm -rf aws awscliv2.zip && \ + : +RUN echo "source /usr/local/bin/_activate_current_env.sh" | tee --append /etc/profile + +# CodeEditor - create server, user data dirs +RUN mkdir -p /opt/amazon/sagemaker/sagemaker-code-editor-server-data /opt/amazon/sagemaker/sagemaker-code-editor-user-data \ + && chown $MAMBA_USER:$MAMBA_USER /opt/amazon/sagemaker/sagemaker-code-editor-server-data /opt/amazon/sagemaker/sagemaker-code-editor-user-data + +# create dir to store user data files +RUN mkdir -p /opt/amazon/sagemaker/user-data \ + && chown $MAMBA_USER:$MAMBA_USER /opt/amazon/sagemaker/user-data + + +# Merge in OS directory tree contents. +RUN mkdir -p ${DIRECTORY_TREE_STAGE_DIR} +COPY dirs/ ${DIRECTORY_TREE_STAGE_DIR}/ +RUN rsync -a ${DIRECTORY_TREE_STAGE_DIR}/ / && \ + rm -rf ${DIRECTORY_TREE_STAGE_DIR} + +# CodeEditor - download the extensions +RUN mkdir -p /etc/code-editor/extensions && \ + while IFS= read -r url || [ -n "$url" ]; do \ + echo "Downloading extension from ${url}..." && \ + wget --no-check-certificate -P /etc/code-editor/extensions "${url}"; \ + done < /etc/code-editor/extensions.txt + +USER $MAMBA_USER +COPY --chown=$MAMBA_USER:$MAMBA_USER $ENV_IN_FILENAME *.in /tmp/ + +# Make sure that $ENV_IN_FILENAME has a newline at the end before the `tee` command runs. Otherwise, nasty things +# will happen. +RUN if [[ -z $ARG_BASED_ENV_IN_FILENAME ]] ; \ + then echo 'No ARG_BASED_ENV_IN_FILENAME passed' ; \ + else envsubst < /tmp/$ARG_BASED_ENV_IN_FILENAME | tee --append /tmp/$ENV_IN_FILENAME ; \ + fi + +ARG CONDA_OVERRIDE_CUDA=$CUDA_MAJOR_MINOR_VERSION +# Enforce dependencies are all installed from conda-forge +RUN micromamba install -y --name base --file /tmp/$ENV_IN_FILENAME && \ + micromamba clean --all --yes --force-pkgs-dirs && \ + rm -rf /tmp/*.in + + +ARG MAMBA_DOCKERFILE_ACTIVATE=1 +RUN sudo ln -s $(which python3) /usr/bin/python + +# Update npm version +RUN npm i -g npm + +# Enforce to use `conda-forge` as only channel, by removing `defaults` +RUN conda config --remove channels defaults +RUN micromamba config append channels conda-forge --env + +# Configure CodeEditor - Install extensions and set preferences +RUN \ + extensionloc=/opt/amazon/sagemaker/sagemaker-code-editor-server-data/extensions && mkdir -p "${extensionloc}" \ + # Loop through all vsix files in /etc/code-editor/extensions and install them + && for ext in /etc/code-editor/extensions/*.vsix; do \ + echo "Installing extension ${ext}..."; \ + sagemaker-code-editor --install-extension "${ext}" --extensions-dir "${extensionloc}" --server-data-dir /opt/amazon/sagemaker/sagemaker-code-editor-server-data --user-data-dir /opt/amazon/sagemaker/sagemaker-code-editor-user-data; \ + done \ + # Copy the settings + && cp /etc/code-editor/code_editor_machine_settings.json /opt/amazon/sagemaker/sagemaker-code-editor-server-data/data/Machine/settings.json + +# Install glue kernels, and move to shared directory +# Also patching base kernel so Studio background code doesn't start session silently +RUN install-glue-kernels && \ + SITE_PACKAGES=$(pip show aws-glue-sessions | grep Location | awk '{print $2}') && \ + jupyter-kernelspec install $SITE_PACKAGES/aws_glue_interactive_sessions_kernel/glue_pyspark --user && \ + jupyter-kernelspec install $SITE_PACKAGES/aws_glue_interactive_sessions_kernel/glue_spark --user && \ + mv /home/sagemaker-user/.local/share/jupyter/kernels/glue_pyspark /opt/conda/share/jupyter/kernels && \ + mv /home/sagemaker-user/.local/share/jupyter/kernels/glue_spark /opt/conda/share/jupyter/kernels && \ + sed -i '/if not store_history and (/i\ if "sm_analytics_runtime_check" in code:\n return await self._complete_cell()\n' \ + "$SITE_PACKAGES/aws_glue_interactive_sessions_kernel/glue_kernel_base/BaseKernel.py" + + +# Patch glue kernels to use kernel wrapper +COPY patch_glue_pyspark.json /opt/conda/share/jupyter/kernels/glue_pyspark/kernel.json +COPY patch_glue_spark.json /opt/conda/share/jupyter/kernels/glue_spark/kernel.json + +# Configure RTC - disable jupyter_collaboration by default +RUN jupyter labextension disable @jupyter/collaboration-extension + +USER root +RUN HOME_DIR="/home/${NB_USER}/licenses" \ + && mkdir -p ${HOME_DIR} \ + && curl -o ${HOME_DIR}/oss_compliance.zip https://aws-dlinfra-utilities.s3.amazonaws.com/oss_compliance.zip \ + && unzip ${HOME_DIR}/oss_compliance.zip -d ${HOME_DIR}/ \ + && cp ${HOME_DIR}/oss_compliance/test/testOSSCompliance /usr/local/bin/testOSSCompliance \ + && chmod +x /usr/local/bin/testOSSCompliance \ + && chmod +x ${HOME_DIR}/oss_compliance/generate_oss_compliance.sh \ + && ${HOME_DIR}/oss_compliance/generate_oss_compliance.sh ${HOME_DIR} python \ + && rm -rf ${HOME_DIR}/oss_compliance* + +# Create logging directories for supervisor +RUN mkdir -p $SAGEMAKER_LOGGING_DIR && \ + chmod a+rw $SAGEMAKER_LOGGING_DIR && \ + mkdir -p ${STUDIO_LOGGING_DIR} && \ + chown ${NB_USER}:${MAMBA_USER} ${STUDIO_LOGGING_DIR} + +# Clean up CodeEditor artifacts +RUN rm -rf /etc/code-editor + +# Create supervisord runtime directory +RUN mkdir -p /var/run/supervisord && \ + chmod a+rw /var/run/supervisord + +# Create root directory for DB +# Create logging directories for supervisor +RUN mkdir -p $DB_ROOT_DIR && \ + chmod a+rw $DB_ROOT_DIR + +USER $MAMBA_USER +ENV PATH="/opt/conda/bin:/opt/conda/condabin:$PATH" +WORKDIR "/home/${NB_USER}" + +# Install FIPS Provider for OpenSSL, on top of existing OpenSSL installation +# v3.0.8 is latest FIPS validated provider, so this is the one we install +# But we need to run tests against the installed version. +# see https://github.com/openssl/openssl/blob/master/README-FIPS.md https://www.openssl.org/source/ +RUN INSTALLED_SSL=$(micromamba list | grep openssl | tr -s ' ' | cut -d ' ' -f 3 | head -n 1) && \ + # download source code for installed, and FIPS validated openssl versions + curl -L https://www.openssl.org/source/openssl-$FIPS_VALIDATED_SSL.tar.gz > openssl-$FIPS_VALIDATED_SSL.tar.gz && \ + curl -L https://www.openssl.org/source/openssl-$INSTALLED_SSL.tar.gz > openssl-$INSTALLED_SSL.tar.gz && \ + tar -xf openssl-$FIPS_VALIDATED_SSL.tar.gz && tar -xf openssl-$INSTALLED_SSL.tar.gz && cd openssl-$FIPS_VALIDATED_SSL && \ + # Configure both versions to enable FIPS and build + ./Configure enable-fips --prefix=/opt/conda --openssldir=/opt/conda/ssl && make && \ + cd ../openssl-$INSTALLED_SSL && \ + ./Configure enable-fips --prefix=/opt/conda --openssldir=/opt/conda/ssl && make && \ + # Copy validated provider to installed version for testing + cp ../openssl-$FIPS_VALIDATED_SSL/providers/fips.so providers/. && \ + cp ../openssl-$FIPS_VALIDATED_SSL/providers/fipsmodule.cnf providers/. && \ + make tests && cd ../openssl-$FIPS_VALIDATED_SSL && \ + # After tests pass, install FIPS provider and remove source code + make install_fips && cd .. && rm -rf ./openssl-* +# Create new config file with fips-enabled. Then user can override OPENSSL_CONF to enable FIPS +# e.g. export OPENSSL_CONF=/opt/conda/ssl/openssl-fips.cnf +RUN cp /opt/conda/ssl/openssl.cnf /opt/conda/ssl/openssl-fips.cnf && \ + sed -i "s:# .include fipsmodule.cnf:.include /opt/conda/ssl/fipsmodule.cnf:" /opt/conda/ssl/openssl-fips.cnf && \ + sed -i 's:# fips = fips_sect:fips = fips_sect:' /opt/conda/ssl/openssl-fips.cnf +ENV OPENSSL_MODULES=/opt/conda/lib64/ossl-modules/ + +# Install Kerberos. +# Make sure no dependency is added/updated +RUN pip install "krb5>=0.5.1,<0.6" && \ + pip show krb5 | grep Require | xargs -i sh -c '[ $(echo {} | cut -d: -f2 | wc -w) -eq 0 ] ' + +# https://stackoverflow.com/questions/122327 +RUN SYSTEM_PYTHON_PATH=$(python3 -c "from __future__ import print_function;import sysconfig; print(sysconfig.get_paths().get('purelib'))") && \ + # Remove SparkRKernel as it's not supported \ + jupyter-kernelspec remove -f -y sparkrkernel && \ + # Patch Sparkmagic lib to support Custom Certificates \ + # https://github.com/jupyter-incubator/sparkmagic/pull/435/files \ + cp -a ${SYSTEM_PYTHON_PATH}/sagemaker_studio_analytics_extension/patches/configuration.py ${SYSTEM_PYTHON_PATH}/sparkmagic/utils/ && \ + cp -a ${SYSTEM_PYTHON_PATH}/sagemaker_studio_analytics_extension/patches/reliablehttpclient.py ${SYSTEM_PYTHON_PATH}/sparkmagic/livyclientlib/reliablehttpclient.py && \ + sed -i 's= "python"= "/opt/conda/bin/python"=g' /opt/conda/share/jupyter/kernels/pysparkkernel/kernel.json /opt/conda/share/jupyter/kernels/sparkkernel/kernel.json && \ + sed -i 's="Spark"="SparkMagic Spark"=g' /opt/conda/share/jupyter/kernels/sparkkernel/kernel.json && \ + sed -i 's="PySpark"="SparkMagic PySpark"=g' /opt/conda/share/jupyter/kernels/pysparkkernel/kernel.json + +ENV SHELL=/bin/bash diff --git a/build_artifacts/v2/v2.0/v2.0.0/cpu.env.in b/build_artifacts/v2/v2.0/v2.0.0/cpu.env.in new file mode 100644 index 00000000..35b1557c --- /dev/null +++ b/build_artifacts/v2/v2.0/v2.0.0/cpu.env.in @@ -0,0 +1,50 @@ +# This file is auto-generated. +conda-forge::jupyter-collaboration[version='>=1.1.0'] +conda-forge::sagemaker-code-editor[version='>=1.1.0'] +conda-forge::amazon_sagemaker_sql_editor[version='>=0.1.7'] +conda-forge::amazon-sagemaker-sql-magic[version='>=0.1.1'] +conda-forge::langchain[version='>=0.1.9'] +conda-forge::fastapi[version='>=0.110.3'] +conda-forge::uvicorn[version='>=0.29.0'] +conda-forge::pytorch[version='>=2.3.0'] +conda-forge::tensorflow[version='>=2.15.0'] +conda-forge::python[version='>=3.10.14'] +conda-forge::pip[version='>=23.3.2'] +conda-forge::torchvision[version='>=0.15.2'] +conda-forge::numpy[version='>=1.26.4'] +conda-forge::pandas[version='>=2.1.4'] +conda-forge::scikit-learn[version='>=1.4.2'] +conda-forge::jinja2[version='>=3.1.4'] +conda-forge::matplotlib[version='>=3.8.4'] +conda-forge::sagemaker-headless-execution-driver[version='>=0.0.12'] +conda-forge::ipython[version='>=8.22.2'] +conda-forge::scipy[version='>=1.11.4'] +conda-forge::keras[version='>=2.15.0'] +conda-forge::py-xgboost-cpu[version='>=1.7.6'] +conda-forge::jupyterlab[version='>=4.1.6'] +conda-forge::ipywidgets[version='>=8.1.2'] +conda-forge::conda[version='>=23.11.0'] +conda-forge::boto3[version='>=1.34.51'] +conda-forge::sagemaker-python-sdk[version='>=2.219.0'] +conda-forge::supervisor[version='>=4.2.5'] +conda-forge::aws-glue-sessions[version='>=1.0.5'] +conda-forge::sagemaker-kernel-wrapper[version='>=0.0.2'] +conda-forge::jupyter-ai[version='>=2.14.1'] +conda-forge::jupyter-scheduler[version='>=2.5.2'] +conda-forge::jupyter-lsp[version='>=2.2.5'] +conda-forge::jupyterlab-lsp[version='>=5.0.3'] +conda-forge::python-lsp-server[version='>=1.11.0'] +conda-forge::notebook[version='>=7.1.3'] +conda-forge::altair[version='>=5.3.0'] +conda-forge::sagemaker-studio-analytics-extension[version='>=0.0.21'] +conda-forge::jupyter-dash[version='>=0.4.2'] +conda-forge::sagemaker-jupyterlab-extension[version='>=0.3.2'] +conda-forge::sagemaker-jupyterlab-emr-extension[version='>=0.1.9'] +conda-forge::amazon-sagemaker-jupyter-scheduler[version='>=3.0.11'] +conda-forge::jupyter-server-proxy[version='>=4.1.2'] +conda-forge::amazon-codewhisperer-jupyterlab-ext[version='>=2.0.2'] +conda-forge::jupyterlab-git[version='>=0.50.0'] +conda-forge::thrift_sasl[version='>=0.4.3'] +conda-forge::pyhive[version='>=0.7.0'] +conda-forge::python-gssapi[version='>=1.8.3'] +conda-forge::langchain-aws[version='>=0.1.1,<0.2.0'] diff --git a/build_artifacts/v2/v2.0/v2.0.0/dirs/etc/code-editor/code_editor_machine_settings.json b/build_artifacts/v2/v2.0/v2.0.0/dirs/etc/code-editor/code_editor_machine_settings.json new file mode 100644 index 00000000..44fb8ef7 --- /dev/null +++ b/build_artifacts/v2/v2.0/v2.0.0/dirs/etc/code-editor/code_editor_machine_settings.json @@ -0,0 +1,4 @@ +{ + "python.terminal.activateEnvironment": false, + "python.defaultInterpreterPath": "/opt/conda/bin/python" +} diff --git a/build_artifacts/v2/v2.0/v2.0.0/dirs/etc/code-editor/extensions.txt b/build_artifacts/v2/v2.0/v2.0.0/dirs/etc/code-editor/extensions.txt new file mode 100644 index 00000000..29d683eb --- /dev/null +++ b/build_artifacts/v2/v2.0/v2.0.0/dirs/etc/code-editor/extensions.txt @@ -0,0 +1,3 @@ +https://open-vsx.org/api/ms-toolsai/jupyter/2023.9.100/file/ms-toolsai.jupyter-2023.9.100.vsix +https://open-vsx.org/api/ms-python/python/2023.20.0/file/ms-python.python-2023.20.0.vsix +https://open-vsx.org/api/amazonwebservices/aws-toolkit-vscode/1.99.0/file/amazonwebservices.aws-toolkit-vscode-1.99.0.vsix diff --git a/build_artifacts/v2/v2.0/v2.0.0/dirs/etc/conda/.condarc b/build_artifacts/v2/v2.0/v2.0.0/dirs/etc/conda/.condarc new file mode 100644 index 00000000..c3616df5 --- /dev/null +++ b/build_artifacts/v2/v2.0/v2.0.0/dirs/etc/conda/.condarc @@ -0,0 +1,6 @@ +envs_dirs: + - ~/.conda/envs + - /opt/conda/envs +pkgs_dirs: + - ~/.conda/pkgs + - /opt/conda/pkgs diff --git a/build_artifacts/v2/v2.0/v2.0.0/dirs/etc/jupyter/jupyter_server_config.py b/build_artifacts/v2/v2.0/v2.0.0/dirs/etc/jupyter/jupyter_server_config.py new file mode 100644 index 00000000..0182cc23 --- /dev/null +++ b/build_artifacts/v2/v2.0/v2.0.0/dirs/etc/jupyter/jupyter_server_config.py @@ -0,0 +1,28 @@ +# Default Jupyter server config +# Note: those config can be overridden by user-level configs. + +c.ServerApp.terminado_settings = {"shell_command": ["/bin/bash"]} +c.ServerApp.tornado_settings = {"compress_response": True} + +# Do not delete files to trash. Instead, permanently delete files. +c.FileContentsManager.delete_to_trash = False + +# Allow deleting non-empty directory via file browser. Related documentation: +# https://github.com/jupyter-server/jupyter_server/blob/main/jupyter_server/services/contents/filemanager.py#L125-L129 +c.FileContentsManager.always_delete_dir = True + +# Enable `allow_hidden` by default, so hidden files are accessible via Jupyter server +# Related documentation: https://jupyterlab.readthedocs.io/en/stable/user/files.html#displaying-hidden-files +c.ContentsManager.allow_hidden = True + +# This will set the LanguageServerManager.extra_node_roots setting if amazon_sagemaker_sql_editor exists in the +# environment. Ignore otherwise, don't fail the JL server start +# Related documentation: https://jupyterlab-lsp.readthedocs.io/en/v3.4.0/Configuring.html +try: + import os + + module = __import__("amazon_sagemaker_sql_editor") + module_location = os.path.dirname(module.__file__) + c.LanguageServerManager.extra_node_roots = [f"{module_location}/sql-language-server"] +except: + pass diff --git a/build_artifacts/v2/v2.0/v2.0.0/dirs/etc/supervisor/conf.d/supervisord-code-editor.conf b/build_artifacts/v2/v2.0/v2.0.0/dirs/etc/supervisor/conf.d/supervisord-code-editor.conf new file mode 100644 index 00000000..cac5669b --- /dev/null +++ b/build_artifacts/v2/v2.0/v2.0.0/dirs/etc/supervisor/conf.d/supervisord-code-editor.conf @@ -0,0 +1,11 @@ +[include] +files = supervisord-common.conf + +[program:codeeditorserver] +directory=%(ENV_HOME)s +command=start-code-editor +autostart=true +autorestart=true +stdout_logfile=/dev/fd/1 ; Redirect web server logs to stdout +stdout_logfile_maxbytes = 0 ; Fix: https://github.com/Supervisor/supervisor/issues/935 +stderr_logfile_maxbytes = 0 ; Fix: https://github.com/Supervisor/supervisor/issues/935 diff --git a/build_artifacts/v2/v2.0/v2.0.0/dirs/etc/supervisor/conf.d/supervisord-common.conf b/build_artifacts/v2/v2.0/v2.0.0/dirs/etc/supervisor/conf.d/supervisord-common.conf new file mode 100644 index 00000000..27820d4c --- /dev/null +++ b/build_artifacts/v2/v2.0/v2.0.0/dirs/etc/supervisor/conf.d/supervisord-common.conf @@ -0,0 +1,18 @@ +[supervisord] +nodaemon=true + +pidfile=/var/run/supervisord/supervisord.pid +logfile=%(ENV_STUDIO_LOGGING_DIR)s/%(ENV_SAGEMAKER_APP_TYPE_LOWERCASE)s/supervisord/supervisord.log +logfile_maxbytes=5MB +logfile_backups=10 +redirect_stderr=true + +[unix_http_server] +file=/var/run/supervisord/supervisor.sock +chmod=0700 + +[supervisorctl] +serverurl=unix:///var/run/supervisord/supervisor.sock + +[rpcinterface:supervisor] +supervisor.rpcinterface_factory = supervisor.rpcinterface:make_main_rpcinterface diff --git a/build_artifacts/v2/v2.0/v2.0.0/dirs/etc/supervisor/conf.d/supervisord-jupyter-lab.conf b/build_artifacts/v2/v2.0/v2.0.0/dirs/etc/supervisor/conf.d/supervisord-jupyter-lab.conf new file mode 100644 index 00000000..5694ac11 --- /dev/null +++ b/build_artifacts/v2/v2.0/v2.0.0/dirs/etc/supervisor/conf.d/supervisord-jupyter-lab.conf @@ -0,0 +1,11 @@ +[include] +files = supervisord-common.conf + +[program:jupyterlabserver] +directory=%(ENV_HOME)s +command=start-jupyter-server +stopasgroup=true +stdout_logfile=/dev/stdout +stdout_logfile_maxbytes=0 +stderr_logfile=/dev/stderr +stderr_logfile_maxbytes=0 diff --git a/build_artifacts/v2/v2.0/v2.0.0/dirs/etc/supervisor/conf.d/supervisord.conf b/build_artifacts/v2/v2.0/v2.0.0/dirs/etc/supervisor/conf.d/supervisord.conf new file mode 100644 index 00000000..686f4a5c --- /dev/null +++ b/build_artifacts/v2/v2.0/v2.0.0/dirs/etc/supervisor/conf.d/supervisord.conf @@ -0,0 +1,27 @@ +[supervisord] +nodaemon=true + +pidfile=/var/run/supervisord/supervisord.pid +logfile=%(ENV_STUDIO_LOGGING_DIR)s/%(ENV_SAGEMAKER_APP_TYPE_LOWERCASE)s/supervisord/supervisord.log +logfile_maxbytes=5MB +logfile_backups=10 +redirect_stderr=true + +[unix_http_server] +file=/var/run/supervisord/supervisor.sock +chmod=0700 + +[supervisorctl] +serverurl=unix:///var/run/supervisord/supervisor.sock + +[rpcinterface:supervisor] +supervisor.rpcinterface_factory = supervisor.rpcinterface:make_main_rpcinterface + +[program:jupyterlabserver] +directory=%(ENV_HOME)s +command=start-jupyter-server +stopasgroup=true +stdout_logfile=/dev/stdout +stdout_logfile_maxbytes=0 +stderr_logfile=/dev/stderr +stderr_logfile_maxbytes=0 diff --git a/build_artifacts/v2/v2.0/v2.0.0/dirs/usr/local/bin/entrypoint-code-editor b/build_artifacts/v2/v2.0/v2.0.0/dirs/usr/local/bin/entrypoint-code-editor new file mode 100755 index 00000000..bf55a371 --- /dev/null +++ b/build_artifacts/v2/v2.0/v2.0.0/dirs/usr/local/bin/entrypoint-code-editor @@ -0,0 +1,16 @@ +#!/bin/bash + +set -e + +# Generate and execute the shell code to modifies shell variables to include +# micromamba commands (e.g. using `micromamba activate` to activate environments) +eval "$(micromamba shell hook --shell=bash)" + +# Activate conda environment 'base', where supervisord is installed +micromamba activate base + +# Set up SAGEMAKER_APP_TYPE_LOWERCASE based on SAGEMAKER_APP_TYPE +export SAGEMAKER_APP_TYPE_LOWERCASE=$(echo $SAGEMAKER_APP_TYPE | tr '[:upper:]' '[:lower:]') + +mkdir -p $STUDIO_LOGGING_DIR/$SAGEMAKER_APP_TYPE_LOWERCASE/supervisord +exec supervisord -c /etc/supervisor/conf.d/supervisord-code-editor.conf -n diff --git a/build_artifacts/v2/v2.0/v2.0.0/dirs/usr/local/bin/entrypoint-jupyter-server b/build_artifacts/v2/v2.0/v2.0.0/dirs/usr/local/bin/entrypoint-jupyter-server new file mode 100755 index 00000000..ceda89d0 --- /dev/null +++ b/build_artifacts/v2/v2.0/v2.0.0/dirs/usr/local/bin/entrypoint-jupyter-server @@ -0,0 +1,19 @@ +#!/bin/bash + +set -e + +# Generate and execute the shell code to modifies shell variables to include +# micromamba commands (e.g. using `micromamba activate` to activate environments) +eval "$(micromamba shell hook --shell=bash)" + +# Activate conda environment 'base', where supervisord is installed +micromamba activate base + +# Set up SAGEMAKER_APP_TYPE_LOWERCASE based on SAGEMAKER_APP_TYPE +export SAGEMAKER_APP_TYPE_LOWERCASE=$(echo $SAGEMAKER_APP_TYPE | tr '[:upper:]' '[:lower:]') + +# Start supervisord with supervisord configuration +# Since program 'jupyterlabserver' autostarts by default, it will be started +# automatically along with supervisord +mkdir -p $STUDIO_LOGGING_DIR/$SAGEMAKER_APP_TYPE_LOWERCASE/supervisord +exec supervisord -c /etc/supervisor/conf.d/supervisord.conf -n diff --git a/build_artifacts/v2/v2.0/v2.0.0/dirs/usr/local/bin/restart-jupyter-server b/build_artifacts/v2/v2.0/v2.0.0/dirs/usr/local/bin/restart-jupyter-server new file mode 100755 index 00000000..6f2af98d --- /dev/null +++ b/build_artifacts/v2/v2.0/v2.0.0/dirs/usr/local/bin/restart-jupyter-server @@ -0,0 +1,6 @@ +#!/bin/bash +set -e +echo "Restarting the Jupyter server. This page should refresh in a few seconds. Note that any terminals will be closed." +echo "If this page doesn't refresh after a few seconds, try reloading your browser window." +echo "Restarting now..." +nohup supervisorctl -c /etc/supervisor/conf.d/supervisord.conf restart jupyterlabserver > /dev/null 2>&1 & diff --git a/build_artifacts/v2/v2.0/v2.0.0/dirs/usr/local/bin/start-code-editor b/build_artifacts/v2/v2.0/v2.0.0/dirs/usr/local/bin/start-code-editor new file mode 100755 index 00000000..bc97106c --- /dev/null +++ b/build_artifacts/v2/v2.0/v2.0.0/dirs/usr/local/bin/start-code-editor @@ -0,0 +1,25 @@ +#!/bin/bash +set -e + +eval "$(micromamba shell hook --shell=bash)" + +# Activate conda environment 'base', which is the default environment for sagemaker-distribution +micromamba activate base + +# Start code-editor server +if [ -n "$SAGEMAKER_APP_TYPE_LOWERCASE" ]; then + # SAGEMAKER_APP_TYPE is set, indicating the server is running within a SageMaker + # app. Configure the base url to be `//default`. + sagemaker-code-editor --host 0.0.0.0 --port 8888 \ + --without-connection-token \ + --base-path "/$SAGEMAKER_APP_TYPE_LOWERCASE/default" \ + --server-data-dir /opt/amazon/sagemaker/sagemaker-code-editor-server-data \ + --extensions-dir /opt/amazon/sagemaker/sagemaker-code-editor-server-data/extensions \ + --user-data-dir /opt/amazon/sagemaker/sagemaker-code-editor-user-data +else + sagemaker-code-editor --host 0.0.0.0 --port 8888 \ + --without-connection-token \ + --server-data-dir /opt/amazon/sagemaker/sagemaker-code-editor-server-data \ + --extension-dir /opt/amazon/sagemaker/sagemaker-code-editor-server-data/extensions \ + --user-data-dir /opt/amazon/sagemaker/sagemaker-code-editor-user-data +fi diff --git a/build_artifacts/v2/v2.0/v2.0.0/dirs/usr/local/bin/start-jupyter-server b/build_artifacts/v2/v2.0/v2.0.0/dirs/usr/local/bin/start-jupyter-server new file mode 100755 index 00000000..6ff4eac3 --- /dev/null +++ b/build_artifacts/v2/v2.0/v2.0.0/dirs/usr/local/bin/start-jupyter-server @@ -0,0 +1,36 @@ +#!/bin/bash +set -e + +eval "$(micromamba shell hook --shell=bash)" + +# Activate conda environment 'base', which is the default environment for Cosmos +micromamba activate base + +# Start Jupyter server in rtc mode for shared spaces +if [ -n "$SAGEMAKER_APP_TYPE_LOWERCASE" ] && [ "$SAGEMAKER_SPACE_TYPE_LOWERCASE" == "shared" ]; then + jupyter labextension enable @jupyter/collaboration-extension + # SAGEMAKER_APP_TYPE is set, indicating the server is running within a SageMaker + # app. Configure the base url to be `//default`. + # SAGEMAKER_SPACE_TYPE_LOWERCASE flag is used to determine if the server should start + # in real-time-collaboration mode for a given space. + jupyter lab --ip 0.0.0.0 --port 8888 \ + --ServerApp.base_url="/$SAGEMAKER_APP_TYPE_LOWERCASE/default" \ + --ServerApp.token='' \ + --ServerApp.allow_origin='*' \ + --collaborative \ + --ServerApp.identity_provider_class=sagemaker_jupyterlab_extension_common.identity.SagemakerIdentityProvider \ + --YDocExtension.ystore_class=sagemaker_jupyterlab_extension_common.ydoc_override.ydoc.MySQLiteYStore + +# Start Jupyter server +elif [ -n "$SAGEMAKER_APP_TYPE_LOWERCASE" ]; then + # SAGEMAKER_APP_TYPE is set, indicating the server is running within a SageMaker + # app. Configure the base url to be `//default`. + jupyter lab --ip 0.0.0.0 --port 8888 \ + --ServerApp.base_url="/$SAGEMAKER_APP_TYPE_LOWERCASE/default" \ + --ServerApp.token='' \ + --ServerApp.allow_origin='*' +else + jupyter lab --ip 0.0.0.0 --port 8888 \ + --ServerApp.token='' \ + --ServerApp.allow_origin='*' +fi diff --git a/build_artifacts/v2/v2.0/v2.0.0/gpu.arg_based_env.in b/build_artifacts/v2/v2.0/v2.0.0/gpu.arg_based_env.in new file mode 100644 index 00000000..7f89cf4a --- /dev/null +++ b/build_artifacts/v2/v2.0/v2.0.0/gpu.arg_based_env.in @@ -0,0 +1 @@ +conda-forge::cuda-toolkit=$CUDA_MAJOR_MINOR_VERSION diff --git a/build_artifacts/v2/v2.0/v2.0.0/gpu.env.in b/build_artifacts/v2/v2.0/v2.0.0/gpu.env.in new file mode 100644 index 00000000..8557bdd4 --- /dev/null +++ b/build_artifacts/v2/v2.0/v2.0.0/gpu.env.in @@ -0,0 +1,50 @@ +# This file is auto-generated. +conda-forge::jupyter-collaboration[version='>=1.1.0'] +conda-forge::sagemaker-code-editor[version='>=1.1.0'] +conda-forge::amazon_sagemaker_sql_editor[version='>=0.1.7'] +conda-forge::amazon-sagemaker-sql-magic[version='>=0.1.1'] +conda-forge::langchain[version='>=0.1.9'] +conda-forge::fastapi[version='>=0.110.3'] +conda-forge::uvicorn[version='>=0.29.0'] +conda-forge::pytorch[version='>=2.3.0'] +conda-forge::tensorflow[version='>=2.15.0'] +conda-forge::python[version='>=3.10.14'] +conda-forge::pip[version='>=23.3.2'] +conda-forge::torchvision[version='>=0.15.2'] +conda-forge::numpy[version='>=1.26.4'] +conda-forge::pandas[version='>=2.1.4'] +conda-forge::scikit-learn[version='>=1.4.2'] +conda-forge::jinja2[version='>=3.1.4'] +conda-forge::matplotlib[version='>=3.8.4'] +conda-forge::sagemaker-headless-execution-driver[version='>=0.0.12'] +conda-forge::ipython[version='>=8.22.2'] +conda-forge::scipy[version='>=1.11.4'] +conda-forge::keras[version='>=2.15.0'] +conda-forge::py-xgboost-gpu[version='>=1.7.6'] +conda-forge::jupyterlab[version='>=4.1.6'] +conda-forge::ipywidgets[version='>=8.1.2'] +conda-forge::conda[version='>=23.11.0'] +conda-forge::boto3[version='>=1.34.51'] +conda-forge::sagemaker-python-sdk[version='>=2.219.0'] +conda-forge::supervisor[version='>=4.2.5'] +conda-forge::aws-glue-sessions[version='>=1.0.5'] +conda-forge::sagemaker-kernel-wrapper[version='>=0.0.2'] +conda-forge::jupyter-ai[version='>=2.14.1'] +conda-forge::jupyter-scheduler[version='>=2.5.2'] +conda-forge::jupyter-lsp[version='>=2.2.5'] +conda-forge::jupyterlab-lsp[version='>=5.0.3'] +conda-forge::python-lsp-server[version='>=1.11.0'] +conda-forge::notebook[version='>=7.1.3'] +conda-forge::altair[version='>=5.3.0'] +conda-forge::sagemaker-studio-analytics-extension[version='>=0.0.21'] +conda-forge::jupyter-dash[version='>=0.4.2'] +conda-forge::sagemaker-jupyterlab-extension[version='>=0.3.2'] +conda-forge::sagemaker-jupyterlab-emr-extension[version='>=0.1.9'] +conda-forge::amazon-sagemaker-jupyter-scheduler[version='>=3.0.11'] +conda-forge::jupyter-server-proxy[version='>=4.1.2'] +conda-forge::amazon-codewhisperer-jupyterlab-ext[version='>=2.0.2'] +conda-forge::jupyterlab-git[version='>=0.50.0'] +conda-forge::thrift_sasl[version='>=0.4.3'] +conda-forge::pyhive[version='>=0.7.0'] +conda-forge::python-gssapi[version='>=1.8.3'] +conda-forge::langchain-aws[version='>=0.1.1,<0.2.0'] diff --git a/build_artifacts/v2/v2.0/v2.0.0/gpu_cuda_version.json b/build_artifacts/v2/v2.0/v2.0.0/gpu_cuda_version.json new file mode 100644 index 00000000..72f5cebb --- /dev/null +++ b/build_artifacts/v2/v2.0/v2.0.0/gpu_cuda_version.json @@ -0,0 +1,4 @@ +{ + "TAG_FOR_BASE_MICROMAMBA_IMAGE": "jammy-cuda-12.4.1", + "CUDA_MAJOR_MINOR_VERSION": "12.4" +} \ No newline at end of file diff --git a/build_artifacts/v2/v2.0/v2.0.0/patch_glue_pyspark.json b/build_artifacts/v2/v2.0/v2.0.0/patch_glue_pyspark.json new file mode 100644 index 00000000..ab70fd8d --- /dev/null +++ b/build_artifacts/v2/v2.0/v2.0.0/patch_glue_pyspark.json @@ -0,0 +1,15 @@ +{ + "argv": [ + "/opt/conda/bin/python", + "-m", + "sagemaker_kernel_wrapper.sm_gis_wrapper", + "-m", + "aws_glue_interactive_sessions_kernel.glue_pyspark.GlueKernel", + "-f", + "{connection_file}" + ], + "display_name": "Glue PySpark and Ray", + "env": {"request_origin": "SageMakerStudioPySparkNotebook", "glue_version": "3.0"}, + "language": "python" +} + diff --git a/build_artifacts/v2/v2.0/v2.0.0/patch_glue_spark.json b/build_artifacts/v2/v2.0/v2.0.0/patch_glue_spark.json new file mode 100644 index 00000000..1bd168e7 --- /dev/null +++ b/build_artifacts/v2/v2.0/v2.0.0/patch_glue_spark.json @@ -0,0 +1,15 @@ +{ + "argv": [ + "/opt/conda/bin/python", + "-m", + "sagemaker_kernel_wrapper.sm_gis_wrapper", + "-m", + "aws_glue_interactive_sessions_kernel.glue_spark.GlueKernel", + "-f", + "{connection_file}" + ], + "display_name": "Glue Spark", + "env": {"request_origin": "SageMakerStudioSparkNotebook", "glue_version": "3.0"}, + "language": "python" +} + diff --git a/build_artifacts/v2/v2.0/v2.0.0/source-version.txt b/build_artifacts/v2/v2.0/v2.0.0/source-version.txt new file mode 100644 index 00000000..afa2b351 --- /dev/null +++ b/build_artifacts/v2/v2.0/v2.0.0/source-version.txt @@ -0,0 +1 @@ +1.8.0 \ No newline at end of file diff --git a/src/config.py b/src/config.py index 1b654e6d..4c84946a 100644 --- a/src/config.py +++ b/src/config.py @@ -1,8 +1,6 @@ _image_generator_configs = [ { "build_args": { - "TAG_FOR_BASE_MICROMAMBA_IMAGE": "jammy-cuda-11.8.0", - "CUDA_MAJOR_MINOR_VERSION": "11.8", # Should match the previous one. "ENV_IN_FILENAME": "gpu.env.in", "ARG_BASED_ENV_IN_FILENAME": "gpu.arg_based_env.in", }, diff --git a/src/main.py b/src/main.py index c6c8db06..2536cdb8 100644 --- a/src/main.py +++ b/src/main.py @@ -2,6 +2,7 @@ import base64 import copy import glob +import json import os import shutil @@ -108,14 +109,19 @@ def _copy_static_files(base_version_dir, new_version_dir, new_version_major, run for f in glob.glob(f"{base_version_dir}/patch_*"): shutil.copy2(f, new_version_dir) - # For patches, get Dockerfile+dirs from base patch - # For minor/major, get Dockerfile+dirs from template + # For patches, get Dockerfile+dirs+gpu_cuda_version from base patch + # For minor/major, get Dockerfile+dirs+gpu_cuda_version from template if runtime_version_upgrade_type == _PATCH: base_path = base_version_dir else: base_path = f"template/v{new_version_major}" + for f in glob.glob(os.path.relpath(f"{base_path}/Dockerfile")): shutil.copy2(f, new_version_dir) + + for f in glob.glob(os.path.relpath(f"{base_path}/gpu_cuda_version.json")): + shutil.copy2(f, new_version_dir) + if int(new_version_major) >= 1: # dirs directory doesn't exist for v0. It was introduced only for v1 dirs_relative_path = os.path.relpath(f"{base_path}/dirs") @@ -232,6 +238,11 @@ def _get_config_for_image(target_version_dir: str, image_generator_config, force config_for_image["build_args"].pop("ARG_BASED_ENV_IN_FILENAME", None) return config_for_image +def _get_gpu_cuda_config(target_version_dir: str) -> dict: + json_file_path = os.path.join(target_version_dir, "gpu_cuda_version.json") + with open(json_file_path, "r") as f: + gpu_cuda_config = json.load(f) + return gpu_cuda_config # Returns a tuple of: 1/ list of actual images generated; 2/ list of tagged images. A given image can be tagged by # multiple different strings - for e.g., a CPU image can be tagged as '1.3.2-cpu', '1.3-cpu', '1-cpu' and/or @@ -247,8 +258,14 @@ def _build_local_images( for image_generator_config in _image_generator_configs: config = _get_config_for_image(target_version_dir, image_generator_config, force) try: + # Pass in TAG_FOR_BASE_MICROMAMBA_IMAGE and CUDA_MAJOR_MINOR_VERSION into "buildargs" + build_args = config["build_args"] + if image_generator_config["image_type"] == "gpu": + gpu_cuda_config = _get_gpu_cuda_config(target_version_dir) + build_args["TAG_FOR_BASE_MICROMAMBA_IMAGE"] = gpu_cuda_config["TAG_FOR_BASE_MICROMAMBA_IMAGE"] + build_args["CUDA_MAJOR_MINOR_VERSION"] = gpu_cuda_config["CUDA_MAJOR_MINOR_VERSION"] image, log_gen = _docker_client.images.build( - path=target_version_dir, rm=True, pull=True, buildargs=config["build_args"] + path=target_version_dir, rm=True, pull=True, buildargs=build_args ) except BuildError as e: for line in e.build_log: diff --git a/template/v0/gpu_cuda_version.json b/template/v0/gpu_cuda_version.json new file mode 100644 index 00000000..abff259f --- /dev/null +++ b/template/v0/gpu_cuda_version.json @@ -0,0 +1,4 @@ +{ + "TAG_FOR_BASE_MICROMAMBA_IMAGE": "jammy-cuda-11.8.0", + "CUDA_MAJOR_MINOR_VERSION": "11.8" +} \ No newline at end of file diff --git a/template/v1/gpu_cuda_version.json b/template/v1/gpu_cuda_version.json new file mode 100644 index 00000000..abff259f --- /dev/null +++ b/template/v1/gpu_cuda_version.json @@ -0,0 +1,4 @@ +{ + "TAG_FOR_BASE_MICROMAMBA_IMAGE": "jammy-cuda-11.8.0", + "CUDA_MAJOR_MINOR_VERSION": "11.8" +} \ No newline at end of file diff --git a/template/v2/Dockerfile b/template/v2/Dockerfile new file mode 100644 index 00000000..f95278b9 --- /dev/null +++ b/template/v2/Dockerfile @@ -0,0 +1,204 @@ +ARG TAG_FOR_BASE_MICROMAMBA_IMAGE +FROM mambaorg/micromamba:$TAG_FOR_BASE_MICROMAMBA_IMAGE + +ARG CUDA_MAJOR_MINOR_VERSION='' +ARG ENV_IN_FILENAME +ARG ARG_BASED_ENV_IN_FILENAME + +ARG AMZN_BASE="/opt/amazon/sagemaker" +ARG DB_ROOT_DIR="/opt/db" +ARG DIRECTORY_TREE_STAGE_DIR="${AMZN_BASE}/dir-staging" + +ARG NB_USER="sagemaker-user" +ARG NB_UID=1000 +ARG NB_GID=100 + +# https://www.openssl.org/source/ +ARG FIPS_VALIDATED_SSL=3.0.8 + +ENV SAGEMAKER_LOGGING_DIR="/var/log/sagemaker/" +ENV STUDIO_LOGGING_DIR="/var/log/studio/" +ENV EDITOR="nano" + +USER root +RUN usermod "--login=${NB_USER}" "--home=/home/${NB_USER}" --move-home "-u ${NB_UID}" "${MAMBA_USER}" && \ + groupmod "--new-name=${NB_USER}" --non-unique "-g ${NB_GID}" "${MAMBA_USER}" && \ + # Update the expected value of MAMBA_USER for the + # _entrypoint.sh consistency check. + echo "${NB_USER}" > "/etc/arg_mamba_user" && \ + : +ENV MAMBA_USER=$NB_USER +ENV USER=$NB_USER + +RUN apt-get update && apt-get upgrade -y && \ + apt-get install -y --no-install-recommends sudo gettext-base wget curl unzip git rsync build-essential openssh-client nano cron less mandoc && \ + # We just install tzdata below but leave default time zone as UTC. This helps packages like Pandas to function correctly. + DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends tzdata krb5-user libkrb5-dev libsasl2-dev libsasl2-modules && \ + chmod g+w /etc/passwd && \ + echo "ALL ALL=(ALL) NOPASSWD: ALL" >> /etc/sudoers && \ + touch /etc/krb5.conf.lock && chown ${NB_USER}:${MAMBA_USER} /etc/krb5.conf* && \ + # Note that we do NOT run `rm -rf /var/lib/apt/lists/*` here. If we did, anyone building on top of our images will + # not be able to run any `apt-get install` commands and that would hamper customizability of the images. + curl "https://awscli.amazonaws.com/awscli-exe-linux-x86_64.zip" -o "awscliv2.zip" && \ + unzip awscliv2.zip && \ + sudo ./aws/install && \ + rm -rf aws awscliv2.zip && \ + : +RUN echo "source /usr/local/bin/_activate_current_env.sh" | tee --append /etc/profile + +# CodeEditor - create server, user data dirs +RUN mkdir -p /opt/amazon/sagemaker/sagemaker-code-editor-server-data /opt/amazon/sagemaker/sagemaker-code-editor-user-data \ + && chown $MAMBA_USER:$MAMBA_USER /opt/amazon/sagemaker/sagemaker-code-editor-server-data /opt/amazon/sagemaker/sagemaker-code-editor-user-data + +# create dir to store user data files +RUN mkdir -p /opt/amazon/sagemaker/user-data \ + && chown $MAMBA_USER:$MAMBA_USER /opt/amazon/sagemaker/user-data + + +# Merge in OS directory tree contents. +RUN mkdir -p ${DIRECTORY_TREE_STAGE_DIR} +COPY dirs/ ${DIRECTORY_TREE_STAGE_DIR}/ +RUN rsync -a ${DIRECTORY_TREE_STAGE_DIR}/ / && \ + rm -rf ${DIRECTORY_TREE_STAGE_DIR} + +# CodeEditor - download the extensions +RUN mkdir -p /etc/code-editor/extensions && \ + while IFS= read -r url || [ -n "$url" ]; do \ + echo "Downloading extension from ${url}..." && \ + wget --no-check-certificate -P /etc/code-editor/extensions "${url}"; \ + done < /etc/code-editor/extensions.txt + +USER $MAMBA_USER +COPY --chown=$MAMBA_USER:$MAMBA_USER $ENV_IN_FILENAME *.in /tmp/ + +# Make sure that $ENV_IN_FILENAME has a newline at the end before the `tee` command runs. Otherwise, nasty things +# will happen. +RUN if [[ -z $ARG_BASED_ENV_IN_FILENAME ]] ; \ + then echo 'No ARG_BASED_ENV_IN_FILENAME passed' ; \ + else envsubst < /tmp/$ARG_BASED_ENV_IN_FILENAME | tee --append /tmp/$ENV_IN_FILENAME ; \ + fi + +ARG CONDA_OVERRIDE_CUDA=$CUDA_MAJOR_MINOR_VERSION +# Enforce dependencies are all installed from conda-forge +RUN micromamba install -y --name base --file /tmp/$ENV_IN_FILENAME && \ + micromamba clean --all --yes --force-pkgs-dirs && \ + rm -rf /tmp/*.in + + +ARG MAMBA_DOCKERFILE_ACTIVATE=1 +RUN sudo ln -s $(which python3) /usr/bin/python + +# Update npm version +RUN npm i -g npm + +# Enforce to use `conda-forge` as only channel, by removing `defaults` +RUN conda config --remove channels defaults +RUN micromamba config append channels conda-forge --env + +# Configure CodeEditor - Install extensions and set preferences +RUN \ + extensionloc=/opt/amazon/sagemaker/sagemaker-code-editor-server-data/extensions && mkdir -p "${extensionloc}" \ + # Loop through all vsix files in /etc/code-editor/extensions and install them + && for ext in /etc/code-editor/extensions/*.vsix; do \ + echo "Installing extension ${ext}..."; \ + sagemaker-code-editor --install-extension "${ext}" --extensions-dir "${extensionloc}" --server-data-dir /opt/amazon/sagemaker/sagemaker-code-editor-server-data --user-data-dir /opt/amazon/sagemaker/sagemaker-code-editor-user-data; \ + done \ + # Copy the settings + && cp /etc/code-editor/code_editor_machine_settings.json /opt/amazon/sagemaker/sagemaker-code-editor-server-data/data/Machine/settings.json + +# Install glue kernels, and move to shared directory +# Also patching base kernel so Studio background code doesn't start session silently +RUN install-glue-kernels && \ + SITE_PACKAGES=$(pip show aws-glue-sessions | grep Location | awk '{print $2}') && \ + jupyter-kernelspec install $SITE_PACKAGES/aws_glue_interactive_sessions_kernel/glue_pyspark --user && \ + jupyter-kernelspec install $SITE_PACKAGES/aws_glue_interactive_sessions_kernel/glue_spark --user && \ + mv /home/sagemaker-user/.local/share/jupyter/kernels/glue_pyspark /opt/conda/share/jupyter/kernels && \ + mv /home/sagemaker-user/.local/share/jupyter/kernels/glue_spark /opt/conda/share/jupyter/kernels && \ + sed -i '/if not store_history and (/i\ if "sm_analytics_runtime_check" in code:\n return await self._complete_cell()\n' \ + "$SITE_PACKAGES/aws_glue_interactive_sessions_kernel/glue_kernel_base/BaseKernel.py" + + +# Patch glue kernels to use kernel wrapper +COPY patch_glue_pyspark.json /opt/conda/share/jupyter/kernels/glue_pyspark/kernel.json +COPY patch_glue_spark.json /opt/conda/share/jupyter/kernels/glue_spark/kernel.json + +# Configure RTC - disable jupyter_collaboration by default +RUN jupyter labextension disable @jupyter/collaboration-extension + +USER root +RUN HOME_DIR="/home/${NB_USER}/licenses" \ + && mkdir -p ${HOME_DIR} \ + && curl -o ${HOME_DIR}/oss_compliance.zip https://aws-dlinfra-utilities.s3.amazonaws.com/oss_compliance.zip \ + && unzip ${HOME_DIR}/oss_compliance.zip -d ${HOME_DIR}/ \ + && cp ${HOME_DIR}/oss_compliance/test/testOSSCompliance /usr/local/bin/testOSSCompliance \ + && chmod +x /usr/local/bin/testOSSCompliance \ + && chmod +x ${HOME_DIR}/oss_compliance/generate_oss_compliance.sh \ + && ${HOME_DIR}/oss_compliance/generate_oss_compliance.sh ${HOME_DIR} python \ + && rm -rf ${HOME_DIR}/oss_compliance* + +# Create logging directories for supervisor +RUN mkdir -p $SAGEMAKER_LOGGING_DIR && \ + chmod a+rw $SAGEMAKER_LOGGING_DIR && \ + mkdir -p ${STUDIO_LOGGING_DIR} && \ + chown ${NB_USER}:${MAMBA_USER} ${STUDIO_LOGGING_DIR} + +# Clean up CodeEditor artifacts +RUN rm -rf /etc/code-editor + +# Create supervisord runtime directory +RUN mkdir -p /var/run/supervisord && \ + chmod a+rw /var/run/supervisord + +# Create root directory for DB +# Create logging directories for supervisor +RUN mkdir -p $DB_ROOT_DIR && \ + chmod a+rw $DB_ROOT_DIR + +USER $MAMBA_USER +ENV PATH="/opt/conda/bin:/opt/conda/condabin:$PATH" +WORKDIR "/home/${NB_USER}" + +# Install FIPS Provider for OpenSSL, on top of existing OpenSSL installation +# v3.0.8 is latest FIPS validated provider, so this is the one we install +# But we need to run tests against the installed version. +# see https://github.com/openssl/openssl/blob/master/README-FIPS.md https://www.openssl.org/source/ +RUN INSTALLED_SSL=$(micromamba list | grep openssl | tr -s ' ' | cut -d ' ' -f 3 | head -n 1) && \ + # download source code for installed, and FIPS validated openssl versions + curl -L https://www.openssl.org/source/openssl-$FIPS_VALIDATED_SSL.tar.gz > openssl-$FIPS_VALIDATED_SSL.tar.gz && \ + curl -L https://www.openssl.org/source/openssl-$INSTALLED_SSL.tar.gz > openssl-$INSTALLED_SSL.tar.gz && \ + tar -xf openssl-$FIPS_VALIDATED_SSL.tar.gz && tar -xf openssl-$INSTALLED_SSL.tar.gz && cd openssl-$FIPS_VALIDATED_SSL && \ + # Configure both versions to enable FIPS and build + ./Configure enable-fips --prefix=/opt/conda --openssldir=/opt/conda/ssl && make && \ + cd ../openssl-$INSTALLED_SSL && \ + ./Configure enable-fips --prefix=/opt/conda --openssldir=/opt/conda/ssl && make && \ + # Copy validated provider to installed version for testing + cp ../openssl-$FIPS_VALIDATED_SSL/providers/fips.so providers/. && \ + cp ../openssl-$FIPS_VALIDATED_SSL/providers/fipsmodule.cnf providers/. && \ + make tests && cd ../openssl-$FIPS_VALIDATED_SSL && \ + # After tests pass, install FIPS provider and remove source code + make install_fips && cd .. && rm -rf ./openssl-* +# Create new config file with fips-enabled. Then user can override OPENSSL_CONF to enable FIPS +# e.g. export OPENSSL_CONF=/opt/conda/ssl/openssl-fips.cnf +RUN cp /opt/conda/ssl/openssl.cnf /opt/conda/ssl/openssl-fips.cnf && \ + sed -i "s:# .include fipsmodule.cnf:.include /opt/conda/ssl/fipsmodule.cnf:" /opt/conda/ssl/openssl-fips.cnf && \ + sed -i 's:# fips = fips_sect:fips = fips_sect:' /opt/conda/ssl/openssl-fips.cnf +ENV OPENSSL_MODULES=/opt/conda/lib64/ossl-modules/ + +# Install Kerberos. +# Make sure no dependency is added/updated +RUN pip install "krb5>=0.5.1,<0.6" && \ + pip show krb5 | grep Require | xargs -i sh -c '[ $(echo {} | cut -d: -f2 | wc -w) -eq 0 ] ' + +# https://stackoverflow.com/questions/122327 +RUN SYSTEM_PYTHON_PATH=$(python3 -c "from __future__ import print_function;import sysconfig; print(sysconfig.get_paths().get('purelib'))") && \ + # Remove SparkRKernel as it's not supported \ + jupyter-kernelspec remove -f -y sparkrkernel && \ + # Patch Sparkmagic lib to support Custom Certificates \ + # https://github.com/jupyter-incubator/sparkmagic/pull/435/files \ + cp -a ${SYSTEM_PYTHON_PATH}/sagemaker_studio_analytics_extension/patches/configuration.py ${SYSTEM_PYTHON_PATH}/sparkmagic/utils/ && \ + cp -a ${SYSTEM_PYTHON_PATH}/sagemaker_studio_analytics_extension/patches/reliablehttpclient.py ${SYSTEM_PYTHON_PATH}/sparkmagic/livyclientlib/reliablehttpclient.py && \ + sed -i 's= "python"= "/opt/conda/bin/python"=g' /opt/conda/share/jupyter/kernels/pysparkkernel/kernel.json /opt/conda/share/jupyter/kernels/sparkkernel/kernel.json && \ + sed -i 's="Spark"="SparkMagic Spark"=g' /opt/conda/share/jupyter/kernels/sparkkernel/kernel.json && \ + sed -i 's="PySpark"="SparkMagic PySpark"=g' /opt/conda/share/jupyter/kernels/pysparkkernel/kernel.json + +ENV SHELL=/bin/bash diff --git a/template/v2/dirs/etc/code-editor/code_editor_machine_settings.json b/template/v2/dirs/etc/code-editor/code_editor_machine_settings.json new file mode 100644 index 00000000..44fb8ef7 --- /dev/null +++ b/template/v2/dirs/etc/code-editor/code_editor_machine_settings.json @@ -0,0 +1,4 @@ +{ + "python.terminal.activateEnvironment": false, + "python.defaultInterpreterPath": "/opt/conda/bin/python" +} diff --git a/template/v2/dirs/etc/code-editor/extensions.txt b/template/v2/dirs/etc/code-editor/extensions.txt new file mode 100644 index 00000000..29d683eb --- /dev/null +++ b/template/v2/dirs/etc/code-editor/extensions.txt @@ -0,0 +1,3 @@ +https://open-vsx.org/api/ms-toolsai/jupyter/2023.9.100/file/ms-toolsai.jupyter-2023.9.100.vsix +https://open-vsx.org/api/ms-python/python/2023.20.0/file/ms-python.python-2023.20.0.vsix +https://open-vsx.org/api/amazonwebservices/aws-toolkit-vscode/1.99.0/file/amazonwebservices.aws-toolkit-vscode-1.99.0.vsix diff --git a/template/v2/dirs/etc/conda/.condarc b/template/v2/dirs/etc/conda/.condarc new file mode 100644 index 00000000..c3616df5 --- /dev/null +++ b/template/v2/dirs/etc/conda/.condarc @@ -0,0 +1,6 @@ +envs_dirs: + - ~/.conda/envs + - /opt/conda/envs +pkgs_dirs: + - ~/.conda/pkgs + - /opt/conda/pkgs diff --git a/template/v2/dirs/etc/jupyter/jupyter_server_config.py b/template/v2/dirs/etc/jupyter/jupyter_server_config.py new file mode 100644 index 00000000..0182cc23 --- /dev/null +++ b/template/v2/dirs/etc/jupyter/jupyter_server_config.py @@ -0,0 +1,28 @@ +# Default Jupyter server config +# Note: those config can be overridden by user-level configs. + +c.ServerApp.terminado_settings = {"shell_command": ["/bin/bash"]} +c.ServerApp.tornado_settings = {"compress_response": True} + +# Do not delete files to trash. Instead, permanently delete files. +c.FileContentsManager.delete_to_trash = False + +# Allow deleting non-empty directory via file browser. Related documentation: +# https://github.com/jupyter-server/jupyter_server/blob/main/jupyter_server/services/contents/filemanager.py#L125-L129 +c.FileContentsManager.always_delete_dir = True + +# Enable `allow_hidden` by default, so hidden files are accessible via Jupyter server +# Related documentation: https://jupyterlab.readthedocs.io/en/stable/user/files.html#displaying-hidden-files +c.ContentsManager.allow_hidden = True + +# This will set the LanguageServerManager.extra_node_roots setting if amazon_sagemaker_sql_editor exists in the +# environment. Ignore otherwise, don't fail the JL server start +# Related documentation: https://jupyterlab-lsp.readthedocs.io/en/v3.4.0/Configuring.html +try: + import os + + module = __import__("amazon_sagemaker_sql_editor") + module_location = os.path.dirname(module.__file__) + c.LanguageServerManager.extra_node_roots = [f"{module_location}/sql-language-server"] +except: + pass diff --git a/template/v2/dirs/etc/supervisor/conf.d/supervisord-code-editor.conf b/template/v2/dirs/etc/supervisor/conf.d/supervisord-code-editor.conf new file mode 100644 index 00000000..cac5669b --- /dev/null +++ b/template/v2/dirs/etc/supervisor/conf.d/supervisord-code-editor.conf @@ -0,0 +1,11 @@ +[include] +files = supervisord-common.conf + +[program:codeeditorserver] +directory=%(ENV_HOME)s +command=start-code-editor +autostart=true +autorestart=true +stdout_logfile=/dev/fd/1 ; Redirect web server logs to stdout +stdout_logfile_maxbytes = 0 ; Fix: https://github.com/Supervisor/supervisor/issues/935 +stderr_logfile_maxbytes = 0 ; Fix: https://github.com/Supervisor/supervisor/issues/935 diff --git a/template/v2/dirs/etc/supervisor/conf.d/supervisord-common.conf b/template/v2/dirs/etc/supervisor/conf.d/supervisord-common.conf new file mode 100644 index 00000000..27820d4c --- /dev/null +++ b/template/v2/dirs/etc/supervisor/conf.d/supervisord-common.conf @@ -0,0 +1,18 @@ +[supervisord] +nodaemon=true + +pidfile=/var/run/supervisord/supervisord.pid +logfile=%(ENV_STUDIO_LOGGING_DIR)s/%(ENV_SAGEMAKER_APP_TYPE_LOWERCASE)s/supervisord/supervisord.log +logfile_maxbytes=5MB +logfile_backups=10 +redirect_stderr=true + +[unix_http_server] +file=/var/run/supervisord/supervisor.sock +chmod=0700 + +[supervisorctl] +serverurl=unix:///var/run/supervisord/supervisor.sock + +[rpcinterface:supervisor] +supervisor.rpcinterface_factory = supervisor.rpcinterface:make_main_rpcinterface diff --git a/template/v2/dirs/etc/supervisor/conf.d/supervisord-jupyter-lab.conf b/template/v2/dirs/etc/supervisor/conf.d/supervisord-jupyter-lab.conf new file mode 100644 index 00000000..5694ac11 --- /dev/null +++ b/template/v2/dirs/etc/supervisor/conf.d/supervisord-jupyter-lab.conf @@ -0,0 +1,11 @@ +[include] +files = supervisord-common.conf + +[program:jupyterlabserver] +directory=%(ENV_HOME)s +command=start-jupyter-server +stopasgroup=true +stdout_logfile=/dev/stdout +stdout_logfile_maxbytes=0 +stderr_logfile=/dev/stderr +stderr_logfile_maxbytes=0 diff --git a/template/v2/dirs/etc/supervisor/conf.d/supervisord.conf b/template/v2/dirs/etc/supervisor/conf.d/supervisord.conf new file mode 100644 index 00000000..686f4a5c --- /dev/null +++ b/template/v2/dirs/etc/supervisor/conf.d/supervisord.conf @@ -0,0 +1,27 @@ +[supervisord] +nodaemon=true + +pidfile=/var/run/supervisord/supervisord.pid +logfile=%(ENV_STUDIO_LOGGING_DIR)s/%(ENV_SAGEMAKER_APP_TYPE_LOWERCASE)s/supervisord/supervisord.log +logfile_maxbytes=5MB +logfile_backups=10 +redirect_stderr=true + +[unix_http_server] +file=/var/run/supervisord/supervisor.sock +chmod=0700 + +[supervisorctl] +serverurl=unix:///var/run/supervisord/supervisor.sock + +[rpcinterface:supervisor] +supervisor.rpcinterface_factory = supervisor.rpcinterface:make_main_rpcinterface + +[program:jupyterlabserver] +directory=%(ENV_HOME)s +command=start-jupyter-server +stopasgroup=true +stdout_logfile=/dev/stdout +stdout_logfile_maxbytes=0 +stderr_logfile=/dev/stderr +stderr_logfile_maxbytes=0 diff --git a/template/v2/dirs/usr/local/bin/entrypoint-code-editor b/template/v2/dirs/usr/local/bin/entrypoint-code-editor new file mode 100755 index 00000000..bf55a371 --- /dev/null +++ b/template/v2/dirs/usr/local/bin/entrypoint-code-editor @@ -0,0 +1,16 @@ +#!/bin/bash + +set -e + +# Generate and execute the shell code to modifies shell variables to include +# micromamba commands (e.g. using `micromamba activate` to activate environments) +eval "$(micromamba shell hook --shell=bash)" + +# Activate conda environment 'base', where supervisord is installed +micromamba activate base + +# Set up SAGEMAKER_APP_TYPE_LOWERCASE based on SAGEMAKER_APP_TYPE +export SAGEMAKER_APP_TYPE_LOWERCASE=$(echo $SAGEMAKER_APP_TYPE | tr '[:upper:]' '[:lower:]') + +mkdir -p $STUDIO_LOGGING_DIR/$SAGEMAKER_APP_TYPE_LOWERCASE/supervisord +exec supervisord -c /etc/supervisor/conf.d/supervisord-code-editor.conf -n diff --git a/template/v2/dirs/usr/local/bin/entrypoint-jupyter-server b/template/v2/dirs/usr/local/bin/entrypoint-jupyter-server new file mode 100755 index 00000000..ceda89d0 --- /dev/null +++ b/template/v2/dirs/usr/local/bin/entrypoint-jupyter-server @@ -0,0 +1,19 @@ +#!/bin/bash + +set -e + +# Generate and execute the shell code to modifies shell variables to include +# micromamba commands (e.g. using `micromamba activate` to activate environments) +eval "$(micromamba shell hook --shell=bash)" + +# Activate conda environment 'base', where supervisord is installed +micromamba activate base + +# Set up SAGEMAKER_APP_TYPE_LOWERCASE based on SAGEMAKER_APP_TYPE +export SAGEMAKER_APP_TYPE_LOWERCASE=$(echo $SAGEMAKER_APP_TYPE | tr '[:upper:]' '[:lower:]') + +# Start supervisord with supervisord configuration +# Since program 'jupyterlabserver' autostarts by default, it will be started +# automatically along with supervisord +mkdir -p $STUDIO_LOGGING_DIR/$SAGEMAKER_APP_TYPE_LOWERCASE/supervisord +exec supervisord -c /etc/supervisor/conf.d/supervisord.conf -n diff --git a/template/v2/dirs/usr/local/bin/restart-jupyter-server b/template/v2/dirs/usr/local/bin/restart-jupyter-server new file mode 100755 index 00000000..6f2af98d --- /dev/null +++ b/template/v2/dirs/usr/local/bin/restart-jupyter-server @@ -0,0 +1,6 @@ +#!/bin/bash +set -e +echo "Restarting the Jupyter server. This page should refresh in a few seconds. Note that any terminals will be closed." +echo "If this page doesn't refresh after a few seconds, try reloading your browser window." +echo "Restarting now..." +nohup supervisorctl -c /etc/supervisor/conf.d/supervisord.conf restart jupyterlabserver > /dev/null 2>&1 & diff --git a/template/v2/dirs/usr/local/bin/start-code-editor b/template/v2/dirs/usr/local/bin/start-code-editor new file mode 100755 index 00000000..bc97106c --- /dev/null +++ b/template/v2/dirs/usr/local/bin/start-code-editor @@ -0,0 +1,25 @@ +#!/bin/bash +set -e + +eval "$(micromamba shell hook --shell=bash)" + +# Activate conda environment 'base', which is the default environment for sagemaker-distribution +micromamba activate base + +# Start code-editor server +if [ -n "$SAGEMAKER_APP_TYPE_LOWERCASE" ]; then + # SAGEMAKER_APP_TYPE is set, indicating the server is running within a SageMaker + # app. Configure the base url to be `//default`. + sagemaker-code-editor --host 0.0.0.0 --port 8888 \ + --without-connection-token \ + --base-path "/$SAGEMAKER_APP_TYPE_LOWERCASE/default" \ + --server-data-dir /opt/amazon/sagemaker/sagemaker-code-editor-server-data \ + --extensions-dir /opt/amazon/sagemaker/sagemaker-code-editor-server-data/extensions \ + --user-data-dir /opt/amazon/sagemaker/sagemaker-code-editor-user-data +else + sagemaker-code-editor --host 0.0.0.0 --port 8888 \ + --without-connection-token \ + --server-data-dir /opt/amazon/sagemaker/sagemaker-code-editor-server-data \ + --extension-dir /opt/amazon/sagemaker/sagemaker-code-editor-server-data/extensions \ + --user-data-dir /opt/amazon/sagemaker/sagemaker-code-editor-user-data +fi diff --git a/template/v2/dirs/usr/local/bin/start-jupyter-server b/template/v2/dirs/usr/local/bin/start-jupyter-server new file mode 100755 index 00000000..6ff4eac3 --- /dev/null +++ b/template/v2/dirs/usr/local/bin/start-jupyter-server @@ -0,0 +1,36 @@ +#!/bin/bash +set -e + +eval "$(micromamba shell hook --shell=bash)" + +# Activate conda environment 'base', which is the default environment for Cosmos +micromamba activate base + +# Start Jupyter server in rtc mode for shared spaces +if [ -n "$SAGEMAKER_APP_TYPE_LOWERCASE" ] && [ "$SAGEMAKER_SPACE_TYPE_LOWERCASE" == "shared" ]; then + jupyter labextension enable @jupyter/collaboration-extension + # SAGEMAKER_APP_TYPE is set, indicating the server is running within a SageMaker + # app. Configure the base url to be `//default`. + # SAGEMAKER_SPACE_TYPE_LOWERCASE flag is used to determine if the server should start + # in real-time-collaboration mode for a given space. + jupyter lab --ip 0.0.0.0 --port 8888 \ + --ServerApp.base_url="/$SAGEMAKER_APP_TYPE_LOWERCASE/default" \ + --ServerApp.token='' \ + --ServerApp.allow_origin='*' \ + --collaborative \ + --ServerApp.identity_provider_class=sagemaker_jupyterlab_extension_common.identity.SagemakerIdentityProvider \ + --YDocExtension.ystore_class=sagemaker_jupyterlab_extension_common.ydoc_override.ydoc.MySQLiteYStore + +# Start Jupyter server +elif [ -n "$SAGEMAKER_APP_TYPE_LOWERCASE" ]; then + # SAGEMAKER_APP_TYPE is set, indicating the server is running within a SageMaker + # app. Configure the base url to be `//default`. + jupyter lab --ip 0.0.0.0 --port 8888 \ + --ServerApp.base_url="/$SAGEMAKER_APP_TYPE_LOWERCASE/default" \ + --ServerApp.token='' \ + --ServerApp.allow_origin='*' +else + jupyter lab --ip 0.0.0.0 --port 8888 \ + --ServerApp.token='' \ + --ServerApp.allow_origin='*' +fi diff --git a/template/v2/gpu_cuda_version.json b/template/v2/gpu_cuda_version.json new file mode 100644 index 00000000..72f5cebb --- /dev/null +++ b/template/v2/gpu_cuda_version.json @@ -0,0 +1,4 @@ +{ + "TAG_FOR_BASE_MICROMAMBA_IMAGE": "jammy-cuda-12.4.1", + "CUDA_MAJOR_MINOR_VERSION": "12.4" +} \ No newline at end of file diff --git a/test/test_artifacts/v2/altair.test.Dockerfile b/test/test_artifacts/v2/altair.test.Dockerfile new file mode 100644 index 00000000..3747904b --- /dev/null +++ b/test/test_artifacts/v2/altair.test.Dockerfile @@ -0,0 +1,20 @@ +ARG SAGEMAKER_DISTRIBUTION_IMAGE +FROM $SAGEMAKER_DISTRIBUTION_IMAGE + +ARG MAMBA_DOCKERFILE_ACTIVATE=1 + +CMD ["python", "-c", "import altair"] + +RUN sudo apt-get update && \ + sudo apt-get install -y git && \ + git clone --recursive https://github.com/altair-viz/altair_notebooks.git && \ + : + +WORKDIR "altair_notebooks/notebooks" +COPY --chown=$MAMBA_USER:$MAMBA_USER scripts/run_altair_example_notebooks.sh ./ +RUN chmod +x run_altair_example_notebooks.sh + +# Example notebooks' dependencies +RUN micromamba install -y --freeze-installed -c conda-forge papermill vega_datasets pandas matplotlib numpy + +CMD ["./run_altair_example_notebooks.sh"] diff --git a/test/test_artifacts/v2/amazon-codewhisperer-jupyterlab-ext.test.Dockerfile b/test/test_artifacts/v2/amazon-codewhisperer-jupyterlab-ext.test.Dockerfile new file mode 100644 index 00000000..1ef68a71 --- /dev/null +++ b/test/test_artifacts/v2/amazon-codewhisperer-jupyterlab-ext.test.Dockerfile @@ -0,0 +1,6 @@ +ARG SAGEMAKER_DISTRIBUTION_IMAGE +FROM $SAGEMAKER_DISTRIBUTION_IMAGE + +ARG MAMBA_DOCKERFILE_ACTIVATE=1 + +CMD ["python", "-c", "import amazon_codewhisperer_jupyterlab_ext"] diff --git a/test/test_artifacts/v2/amazon-sagemaker-sql-magic.test.Dockerfile b/test/test_artifacts/v2/amazon-sagemaker-sql-magic.test.Dockerfile new file mode 100644 index 00000000..642c9e6a --- /dev/null +++ b/test/test_artifacts/v2/amazon-sagemaker-sql-magic.test.Dockerfile @@ -0,0 +1,6 @@ +ARG SAGEMAKER_DISTRIBUTION_IMAGE +FROM $SAGEMAKER_DISTRIBUTION_IMAGE + +ARG MAMBA_DOCKERFILE_ACTIVATE=1 + +CMD ["python", "-c", "import amazon_sagemaker_sql_magic"] diff --git a/test/test_artifacts/v2/amazon_sagemaker_sql_editor.test.Dockerfile b/test/test_artifacts/v2/amazon_sagemaker_sql_editor.test.Dockerfile new file mode 100644 index 00000000..2182ba15 --- /dev/null +++ b/test/test_artifacts/v2/amazon_sagemaker_sql_editor.test.Dockerfile @@ -0,0 +1,6 @@ +ARG SAGEMAKER_DISTRIBUTION_IMAGE +FROM $SAGEMAKER_DISTRIBUTION_IMAGE + +ARG MAMBA_DOCKERFILE_ACTIVATE=1 + +CMD ["python", "-c", "import amazon_sagemaker_sql_editor"] diff --git a/test/test_artifacts/v2/autogluon.test.Dockerfile b/test/test_artifacts/v2/autogluon.test.Dockerfile new file mode 100644 index 00000000..fb02d1ab --- /dev/null +++ b/test/test_artifacts/v2/autogluon.test.Dockerfile @@ -0,0 +1,13 @@ +ARG SAGEMAKER_DISTRIBUTION_IMAGE +FROM $SAGEMAKER_DISTRIBUTION_IMAGE as base + +ARG MAMBA_DOCKERFILE_ACTIVATE=1 + +RUN micromamba install --freeze-installed -y -c conda-forge pytest + +RUN git clone --recursive https://github.com/autogluon/autogluon.git + +WORKDIR "autogluon" +COPY --chown=$MAMBA_USER:$MAMBA_USER scripts/run_autogluon_tests.sh . +RUN chmod +x run_autogluon_tests.sh +CMD ["./run_autogluon_tests.sh"] diff --git a/test/test_artifacts/v2/aws-glue-sessions/glue_notebook.ipynb b/test/test_artifacts/v2/aws-glue-sessions/glue_notebook.ipynb new file mode 100644 index 00000000..b491f310 --- /dev/null +++ b/test/test_artifacts/v2/aws-glue-sessions/glue_notebook.ipynb @@ -0,0 +1,91 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "id": "1a710e7c-7ebf-477a-88b5-3d85cb08cf19", + "metadata": {}, + "outputs": [], + "source": [ + "%status" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "0ce599e8-6dcc-42c4-b10d-8e4e898eb436", + "metadata": {}, + "outputs": [], + "source": [ + "%stop_session" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "013565d2-26dc-4710-83ca-1d00711be6c9", + "metadata": {}, + "outputs": [], + "source": [ + "%glue_ray" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e81bb7c2-bec2-4c4b-8d4d-59bf5e6a9daf", + "metadata": {}, + "outputs": [], + "source": [ + "%etl" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "7a0b5de5-bf14-40f9-a944-f98e5a96e0f4", + "metadata": {}, + "outputs": [], + "source": [ + "%streaming" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "cf24f505-6f26-447e-acc3-4af4556bb386", + "metadata": {}, + "outputs": [], + "source": [ + "%help" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "33149d30-420e-4ebf-b32c-ca635db7cb10", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Glue PySpark and Ray", + "language": "python", + "name": "glue_pyspark" + }, + "language_info": { + "codemirror_mode": { + "name": "python", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "Python_Glue_Session", + "pygments_lexer": "python3" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/test/test_artifacts/v2/aws-glue-sessions/run_glue_sessions_notebook.sh b/test/test_artifacts/v2/aws-glue-sessions/run_glue_sessions_notebook.sh new file mode 100644 index 00000000..1aa73e37 --- /dev/null +++ b/test/test_artifacts/v2/aws-glue-sessions/run_glue_sessions_notebook.sh @@ -0,0 +1,10 @@ +#!/bin/bash + +# Create an empty notebook file for papermill's output +touch nb_output.ipynb + +kernels=('glue_pyspark' 'glue_spark') +nb='script' +for kernel in ${kernels[@]}; do + papermill 'glue_notebook.ipynb' 'nb_output.ipynb' -k $kernel +done diff --git a/test/test_artifacts/v2/boto3.test.Dockerfile b/test/test_artifacts/v2/boto3.test.Dockerfile new file mode 100644 index 00000000..e195e39c --- /dev/null +++ b/test/test_artifacts/v2/boto3.test.Dockerfile @@ -0,0 +1,15 @@ +ARG SAGEMAKER_DISTRIBUTION_IMAGE +FROM $SAGEMAKER_DISTRIBUTION_IMAGE + +ARG MAMBA_DOCKERFILE_ACTIVATE=1 +RUN sudo apt-get update && sudo apt-get install -y git && \ + git clone --recursive https://github.com/boto/boto3.git && \ + : + +# For Running boto3 tests, we need pytest +RUN micromamba install -y --freeze-installed -c conda-forge pytest + +WORKDIR "boto3" +COPY --chown=$MAMBA_USER:$MAMBA_USER scripts/run_boto3_tests.sh . +RUN chmod +x run_boto3_tests.sh +CMD ["./run_boto3_tests.sh"] diff --git a/test/test_artifacts/v2/glue-sessions.test.Dockerfile b/test/test_artifacts/v2/glue-sessions.test.Dockerfile new file mode 100644 index 00000000..b820533e --- /dev/null +++ b/test/test_artifacts/v2/glue-sessions.test.Dockerfile @@ -0,0 +1,13 @@ +ARG SAGEMAKER_DISTRIBUTION_IMAGE +FROM $SAGEMAKER_DISTRIBUTION_IMAGE + +ARG MAMBA_DOCKERFILE_ACTIVATE=1 + +COPY --chown=$MAMBA_USER:$MAMBA_USER aws-glue-sessions/run_glue_sessions_notebook.sh . +RUN chmod +x run_glue_sessions_notebook.sh +COPY --chown=$MAMBA_USER:$MAMBA_USER aws-glue-sessions/glue_notebook.ipynb . +RUN chmod +x glue_notebook.ipynb + +RUN micromamba install -y --freeze-installed -c conda-forge papermill + +CMD ["./run_glue_sessions_notebook.sh"] diff --git a/test/test_artifacts/v2/jupyter-ai.test.Dockerfile b/test/test_artifacts/v2/jupyter-ai.test.Dockerfile new file mode 100644 index 00000000..34ddaa3f --- /dev/null +++ b/test/test_artifacts/v2/jupyter-ai.test.Dockerfile @@ -0,0 +1,6 @@ +ARG SAGEMAKER_DISTRIBUTION_IMAGE +FROM $SAGEMAKER_DISTRIBUTION_IMAGE + +ARG MAMBA_DOCKERFILE_ACTIVATE=1 + +CMD ["python", "-c", "import jupyter_ai"] diff --git a/test/test_artifacts/v2/jupyter-collaboration.test.Dockerfile b/test/test_artifacts/v2/jupyter-collaboration.test.Dockerfile new file mode 100644 index 00000000..0b005de1 --- /dev/null +++ b/test/test_artifacts/v2/jupyter-collaboration.test.Dockerfile @@ -0,0 +1,6 @@ +ARG SAGEMAKER_DISTRIBUTION_IMAGE +FROM $SAGEMAKER_DISTRIBUTION_IMAGE + +ARG MAMBA_DOCKERFILE_ACTIVATE=1 + +CMD ["python", "-c", "import jupyter_collaboration; import jupyter_server_fileid; from jupyter_ydoc import YBlob; yblob = YBlob(); assert yblob.get() == b''; yblob.set(b'012'); assert yblob.get() == b'012'"] diff --git a/test/test_artifacts/v2/jupyter-dash.test.Dockerfile b/test/test_artifacts/v2/jupyter-dash.test.Dockerfile new file mode 100644 index 00000000..540527ad --- /dev/null +++ b/test/test_artifacts/v2/jupyter-dash.test.Dockerfile @@ -0,0 +1,6 @@ +ARG SAGEMAKER_DISTRIBUTION_IMAGE +FROM $SAGEMAKER_DISTRIBUTION_IMAGE + +ARG MAMBA_DOCKERFILE_ACTIVATE=1 + +CMD ["python", "-c", "import plotly.express as px; import sys; fig = px.bar(x=['a', 'b', 'c'], y=[1, 3, 2]); fig.write_html('first_figure.html', auto_open=False)"] diff --git a/test/test_artifacts/v2/jupyterlab-git.test.Dockerfile b/test/test_artifacts/v2/jupyterlab-git.test.Dockerfile new file mode 100644 index 00000000..7d5cbd96 --- /dev/null +++ b/test/test_artifacts/v2/jupyterlab-git.test.Dockerfile @@ -0,0 +1,6 @@ +ARG SAGEMAKER_DISTRIBUTION_IMAGE +FROM $SAGEMAKER_DISTRIBUTION_IMAGE + +ARG MAMBA_DOCKERFILE_ACTIVATE=1 + +CMD ["python", "-c", "import jupyterlab_git"] diff --git a/test/test_artifacts/v2/jupyterlab-lsp.test.Dockerfile b/test/test_artifacts/v2/jupyterlab-lsp.test.Dockerfile new file mode 100644 index 00000000..c13df62a --- /dev/null +++ b/test/test_artifacts/v2/jupyterlab-lsp.test.Dockerfile @@ -0,0 +1,7 @@ +ARG SAGEMAKER_DISTRIBUTION_IMAGE +FROM $SAGEMAKER_DISTRIBUTION_IMAGE + +ARG MAMBA_DOCKERFILE_ACTIVATE=1 + +CMD ["python", "-c", "import jupyter_lsp"] +CMD ["python", "-c", "import jupyterlab_lsp"] diff --git a/test/test_artifacts/v2/keras.test.Dockerfile b/test/test_artifacts/v2/keras.test.Dockerfile new file mode 100644 index 00000000..f5cb224f --- /dev/null +++ b/test/test_artifacts/v2/keras.test.Dockerfile @@ -0,0 +1,24 @@ +ARG SAGEMAKER_DISTRIBUTION_IMAGE +FROM $SAGEMAKER_DISTRIBUTION_IMAGE + +ARG MAMBA_DOCKERFILE_ACTIVATE=1 + +RUN sudo apt-get update && sudo apt-get install -y git graphviz && \ + git clone --recursive https://github.com/keras-team/keras-io.git && \ + : + +# Some of the keras guides requires pydot and graphviz to be installed +RUN micromamba install -y --freeze-installed conda-forge::pydot "nvidia::cuda-nvcc>=11.8,<11.9" +ENV XLA_FLAGS=--xla_gpu_cuda_data_dir=/opt/conda + +WORKDIR "keras-io/guides" + +# Checkout a specific commit known to be compatible with the runtime's current version of TensorFlow. +# keras-io made backwards incompatible changes that broke these tests. Pinning at this commit for now +# at least until the runtime's TensorFlow dependency is upgraded to the next minor version +RUN git checkout 861b59747b43ce326bb0a12384a07d6632249901 + +COPY --chown=$MAMBA_USER:$MAMBA_USER scripts/run_keras_tests.sh . +RUN chmod +x run_keras_tests.sh +# Run tests in run_keras_tests.sh +CMD ["./run_keras_tests.sh"] diff --git a/test/test_artifacts/v2/matplotlib.test.Dockerfile b/test/test_artifacts/v2/matplotlib.test.Dockerfile new file mode 100644 index 00000000..4d290016 --- /dev/null +++ b/test/test_artifacts/v2/matplotlib.test.Dockerfile @@ -0,0 +1,18 @@ +ARG SAGEMAKER_DISTRIBUTION_IMAGE +FROM $SAGEMAKER_DISTRIBUTION_IMAGE + +ARG MAMBA_DOCKERFILE_ACTIVATE=1 + +RUN sudo apt-get update && sudo apt-get install -y git && \ + git clone --recursive https://github.com/matplotlib/matplotlib.git && \ + : + +# TODO: Come up with a different way to test matplotlib installation. +# Currently we will be running all the python files in galleries/tutorials +# But this directory structure might change in the future. In the past, "galleries/tutorials" +# didn't exist. Previously the repository just had a "tutorials" folder. +WORKDIR "matplotlib/galleries/tutorials" +COPY --chown=$MAMBA_USER:$MAMBA_USER scripts/run_matplotlib_tests.sh . +RUN chmod +x run_matplotlib_tests.sh +# Run tests in run_matplotlib_tests.sh +CMD ["./run_matplotlib_tests.sh"] diff --git a/test/test_artifacts/v2/notebook.test.Dockerfile b/test/test_artifacts/v2/notebook.test.Dockerfile new file mode 100644 index 00000000..9afb9e08 --- /dev/null +++ b/test/test_artifacts/v2/notebook.test.Dockerfile @@ -0,0 +1,6 @@ +ARG SAGEMAKER_DISTRIBUTION_IMAGE +FROM $SAGEMAKER_DISTRIBUTION_IMAGE + +ARG MAMBA_DOCKERFILE_ACTIVATE=1 + +CMD ["python", "-c", "import notebook"] diff --git a/test/test_artifacts/v2/numpy.test.Dockerfile b/test/test_artifacts/v2/numpy.test.Dockerfile new file mode 100644 index 00000000..bab08af3 --- /dev/null +++ b/test/test_artifacts/v2/numpy.test.Dockerfile @@ -0,0 +1,13 @@ +ARG SAGEMAKER_DISTRIBUTION_IMAGE +FROM $SAGEMAKER_DISTRIBUTION_IMAGE + +ARG MAMBA_DOCKERFILE_ACTIVATE=1 + +# Inorder to test numpy, we need pytest and hypothesis to be installed. +RUN micromamba install -y --freeze-installed -c conda-forge pytest hypothesis meson +# Some unit tests in numpy requires gcc to be installed. +RUN sudo apt-get update && sudo apt-get install -y gcc +# Check https://numpy.org/doc/stable/reference/testing.html +# numpy.test() returns True if tests succeed else False. +# We need to flip the result so that we exit with status code as 0 if all the tests succeeded. +CMD ["python", "-c", "import numpy,sys; tests_succeeded = numpy.test(); sys.exit(not tests_succeeded)"] diff --git a/test/test_artifacts/v2/pandas.test.Dockerfile b/test/test_artifacts/v2/pandas.test.Dockerfile new file mode 100644 index 00000000..f3c2f73e --- /dev/null +++ b/test/test_artifacts/v2/pandas.test.Dockerfile @@ -0,0 +1,8 @@ +ARG SAGEMAKER_DISTRIBUTION_IMAGE +FROM $SAGEMAKER_DISTRIBUTION_IMAGE + +ARG MAMBA_DOCKERFILE_ACTIVATE=1 +RUN micromamba install -y --freeze-installed -c conda-forge pytest hypothesis pytest-asyncio lxml + +COPY --chown=$MAMBA_USER:$MAMBA_USER run_pandas_tests.py . +CMD ["python", "run_pandas_tests.py"] diff --git a/test/test_artifacts/v2/python-lsp-server.test.Dockerfile b/test/test_artifacts/v2/python-lsp-server.test.Dockerfile new file mode 100644 index 00000000..eb73f0e0 --- /dev/null +++ b/test/test_artifacts/v2/python-lsp-server.test.Dockerfile @@ -0,0 +1,7 @@ +ARG SAGEMAKER_DISTRIBUTION_IMAGE +FROM $SAGEMAKER_DISTRIBUTION_IMAGE + +ARG MAMBA_DOCKERFILE_ACTIVATE=1 + +# "Confirm that installation succeeded" by running this - https://github.com/python-lsp/python-lsp-server#installation +CMD ["pylsp", "--help"] diff --git a/test/test_artifacts/v2/pytorch.examples.Dockerfile b/test/test_artifacts/v2/pytorch.examples.Dockerfile new file mode 100644 index 00000000..f2c3231a --- /dev/null +++ b/test/test_artifacts/v2/pytorch.examples.Dockerfile @@ -0,0 +1,21 @@ +ARG SAGEMAKER_DISTRIBUTION_IMAGE +FROM $SAGEMAKER_DISTRIBUTION_IMAGE + +ARG MAMBA_DOCKERFILE_ACTIVATE=1 +RUN git clone --recursive https://github.com/pytorch/examples + +# During automation some tests fails with `libcuda.so: cannot open shared object file: No such file or directory` +# But libcuda.so.1 exists. Adding this resolves, but also adding `2>/dev/null` to ignore if not needed. +RUN sudo ln -s /usr/lib/x86_64-linux-gnu/libcuda.so.1 /usr/lib/x86_64-linux-gnu/libcuda.so 2>/dev/null + +WORKDIR "examples" + +# There is a line in run_python_examples.sh which looks like: BASE_DIR=`pwd`"/"`dirname $0` +# When we run the shell script through /usr/local/bin/_entrypoint.sh, that line above doesn't work correctly. In our +# case, we properly set `pwd` to the directory that contains all the examples, so we just modify the script to change +# the previous line to look like: BASE_DIR=`pwd` +RUN sed -i 's/^BASE_DIR=.*pwd.*dirname.*/BASE_DIR=`pwd`/' run_python_examples.sh +RUN ./run_python_examples.sh install_deps + +# We skip `imagenet` because it requires a lot of resources and so aren't a good fit for us. +CMD ["./run_python_examples.sh", "dcgan,fast_neural_style,distributed,mnist,mnist_forward_forward,mnist_hogwild,mnist_rnn,regression,reinforcement_learning,siamese_network,super_resolution,time_sequence_prediction,vae,word_language_model,fx"] diff --git a/test/test_artifacts/v2/run_pandas_tests.py b/test/test_artifacts/v2/run_pandas_tests.py new file mode 100644 index 00000000..2995581f --- /dev/null +++ b/test/test_artifacts/v2/run_pandas_tests.py @@ -0,0 +1,33 @@ +import os +import site +import sys + +import pandas + +# We change the working directory here because there is at least one test (`test_html_template_extends_options`) which +# expects the directory to be 'pandas'. Ideally, we would have changed directories through a `WORKDIR` in Dockerfile +# but unfortunately it doesn't accept dynamic arguments. +site_packages_dir = site.getsitepackages()[0] +os.chdir(site_packages_dir) + +# pandas.test() by default runs with `-m "not slow and not network and not db"`. However, we found a few tests in the +# test_network.py file that should have been marked as "network" but weren't, so we skip those here. We skip S3 specific +# tests for the same reason. +# We skip `test_plain_axes` too: the Pandas dev environment expects matplotlib to be ">=3.6.1, <3.7.0" but the runtime +# expectation is just ">=3.6.1". Our image contains v3.7.1, so it meets the latter requirement but not the former. This +# particular test, however, only works with the former requirement. (We verified that the test succeeds if we manually +# drop the version to v3.6.x) So, we skip it. +# Also skipping specific TestFrameFlexArithmetic test; failing due to known issue https://github.com/pandas-dev/pandas/issues/54546 +tests_succeeded = pandas.test( + [ + "-m", + "(not slow and not network and not db)", + "-k", + "(not test_network and not s3 and not test_plain_axes)", + "--no-strict-data-files", + "--ignore", + "pandas/tests/frame/test_arithmetic.py::TestFrameFlexArithmetic::test_floordiv_axis0_numexpr_path", + ] +) + +sys.exit(not tests_succeeded) diff --git a/test/test_artifacts/v2/sagemaker-code-editor.test.Dockerfile b/test/test_artifacts/v2/sagemaker-code-editor.test.Dockerfile new file mode 100644 index 00000000..aca7efdf --- /dev/null +++ b/test/test_artifacts/v2/sagemaker-code-editor.test.Dockerfile @@ -0,0 +1,9 @@ +ARG SAGEMAKER_DISTRIBUTION_IMAGE +FROM $SAGEMAKER_DISTRIBUTION_IMAGE + +ARG MAMBA_DOCKERFILE_ACTIVATE=1 + +COPY --chown=$MAMBA_USER:$MAMBA_USER scripts/run_sagemaker_code_editor_tests.sh ./ +RUN chmod +x run_sagemaker_code_editor_tests.sh + +CMD ["./run_sagemaker_code_editor_tests.sh"] diff --git a/test/test_artifacts/v2/sagemaker-headless-execution-driver.test.Dockerfile b/test/test_artifacts/v2/sagemaker-headless-execution-driver.test.Dockerfile new file mode 100644 index 00000000..5f57f316 --- /dev/null +++ b/test/test_artifacts/v2/sagemaker-headless-execution-driver.test.Dockerfile @@ -0,0 +1,7 @@ +ARG SAGEMAKER_DISTRIBUTION_IMAGE +FROM $SAGEMAKER_DISTRIBUTION_IMAGE + +ARG MAMBA_DOCKERFILE_ACTIVATE=1 + +# Execute the unit tests for sagemaker-headless-execution-driver +CMD ["python", "-c", "import sagemaker_headless_execution_driver.headless_execution as execution_driver"] diff --git a/test/test_artifacts/v2/sagemaker-studio-analytics-extension.test.Dockerfile b/test/test_artifacts/v2/sagemaker-studio-analytics-extension.test.Dockerfile new file mode 100644 index 00000000..0aba7650 --- /dev/null +++ b/test/test_artifacts/v2/sagemaker-studio-analytics-extension.test.Dockerfile @@ -0,0 +1,11 @@ +ARG SAGEMAKER_DISTRIBUTION_IMAGE +FROM $SAGEMAKER_DISTRIBUTION_IMAGE + +ARG MAMBA_DOCKERFILE_ACTIVATE=1 + +COPY --chown=$MAMBA_USER:$MAMBA_USER ./sagemaker-studio-analytics-extension . +RUN chmod +x ./sagemaker-studio-analytics-extension.sh + +RUN micromamba install -y --freeze-installed -c conda-forge papermill + +CMD ["./sagemaker-studio-analytics-extension.sh"] diff --git a/test/test_artifacts/v2/sagemaker-studio-analytics-extension/sagemaker-studio-analytics-extension.sh b/test/test_artifacts/v2/sagemaker-studio-analytics-extension/sagemaker-studio-analytics-extension.sh new file mode 100644 index 00000000..c864676b --- /dev/null +++ b/test/test_artifacts/v2/sagemaker-studio-analytics-extension/sagemaker-studio-analytics-extension.sh @@ -0,0 +1,2 @@ +#!/bin/bash +papermill 'sagemaker_studio_analytics_extension.ipynb' 'nb_output.ipynb' diff --git a/test/test_artifacts/v2/sagemaker-studio-analytics-extension/sagemaker_studio_analytics_extension.ipynb b/test/test_artifacts/v2/sagemaker-studio-analytics-extension/sagemaker_studio_analytics_extension.ipynb new file mode 100644 index 00000000..fb92e3a8 --- /dev/null +++ b/test/test_artifacts/v2/sagemaker-studio-analytics-extension/sagemaker_studio_analytics_extension.ipynb @@ -0,0 +1,53 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "id": "3d5383b9-e06d-42de-b7b7-3ad9603c9585", + "metadata": {}, + "outputs": [], + "source": [ + "%load_ext sagemaker_studio_analytics_extension.magics" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "feb385d0-c80e-4d65-afe7-8e6beb07e836", + "metadata": {}, + "outputs": [], + "source": [ + "%sm_analytics?" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a3faecce-f008-4ac9-94e7-d6bfc6f88bb1", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.12" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/test/test_artifacts/v2/scipy.test.Dockerfile b/test/test_artifacts/v2/scipy.test.Dockerfile new file mode 100644 index 00000000..e117b158 --- /dev/null +++ b/test/test_artifacts/v2/scipy.test.Dockerfile @@ -0,0 +1,12 @@ +ARG SAGEMAKER_DISTRIBUTION_IMAGE +FROM $SAGEMAKER_DISTRIBUTION_IMAGE + +ARG MAMBA_DOCKERFILE_ACTIVATE=1 + +# Inorder to test scipy, we need pytest and hypothesis to be installed. +RUN micromamba install -y --freeze-installed -c conda-forge pytest hypothesis scipy-tests pooch +# Check https://github.com/numpy/numpy/blob/main/doc/TESTS.rst +# Note: Testing guidelines are same for numpy and scipy. +# scipy.test() returns True if tests succeed else False. +# We need to flip the result so that we exit with status code as 0 if all the tests succeeded. +CMD ["python", "-c", "import scipy,sys; tests_succeeded = scipy.test(); sys.exit(not tests_succeeded)"] diff --git a/test/test_artifacts/v2/scripts/run_altair_example_notebooks.sh b/test/test_artifacts/v2/scripts/run_altair_example_notebooks.sh new file mode 100644 index 00000000..1bb4f370 --- /dev/null +++ b/test/test_artifacts/v2/scripts/run_altair_example_notebooks.sh @@ -0,0 +1,17 @@ +#!/bin/bash + +# Create an empty notebook file for papermill's output +touch nb_output.ipynb + +# List of example notebooks under the altair_notebooks' notebooks/ subdirectory, excluding examples +example_notebooks=('02-Tutorial.ipynb' + '03-ScatterCharts.ipynb' + '04-BarCharts.ipynb' + '05-LineCharts.ipynb' + '07-LayeredCharts.ipynb' + '08-CarsDataset.ipynb' +) + +for nb in ${example_notebooks[@]}; do + papermill $nb 'nb_output.ipynb' +done diff --git a/test/test_artifacts/v2/scripts/run_autogluon_tests.sh b/test/test_artifacts/v2/scripts/run_autogluon_tests.sh new file mode 100644 index 00000000..7a136c25 --- /dev/null +++ b/test/test_artifacts/v2/scripts/run_autogluon_tests.sh @@ -0,0 +1,17 @@ +#!/bin/bash + +AUTOGLUON_VERSION=$(micromamba list | grep autogluon | tr -s ' ' | head -n 1 | cut -d ' ' -f 3) +git checkout tags/v$AUTOGLUON_VERSION + +# Run autogluon quick start as end-to-end check +jupyter nbconvert --execute --to python docs/tutorials/tabular/tabular-quick-start.ipynb +jupyter nbconvert --execute --to python docs/tutorials/timeseries/forecasting-quick-start.ipynb + +# Detect gpu and run multimodal quick start if presented +python -c "import torch; exit(0) if torch.cuda.is_available() else exit(1)" +ret=$? + +if [ $ret -eq 0 ] +then + jupyter nbconvert --execute --to python docs/tutorials/multimodal/multimodal_prediction/multimodal-quick-start.ipynb +fi diff --git a/test/test_artifacts/v2/scripts/run_boto3_tests.sh b/test/test_artifacts/v2/scripts/run_boto3_tests.sh new file mode 100644 index 00000000..ab0c7803 --- /dev/null +++ b/test/test_artifacts/v2/scripts/run_boto3_tests.sh @@ -0,0 +1,10 @@ +#!/bin/bash + +# We need to checkout the version of boto3 that is installed in the mamba environment. + +boto3_version=$(micromamba list | grep boto3 | tr -s ' ' | cut -d ' ' -f 3) +# Checkout the corresponding boto3 version +git checkout tags/$boto3_version + +# Run the unit and functional tests +pytest tests/unit tests/functional || exit $? diff --git a/test/test_artifacts/v2/scripts/run_keras_tests.sh b/test/test_artifacts/v2/scripts/run_keras_tests.sh new file mode 100644 index 00000000..aa8f7fdd --- /dev/null +++ b/test/test_artifacts/v2/scripts/run_keras_tests.sh @@ -0,0 +1,11 @@ +#!/bin/bash + +# Ref: https://keras.io/guides/, https://github.com/keras-team/keras-io/tree/master + +for file in *.py; do + if [ "$file" != "transfer_learning.py" ]; then + # skipping transfer_learning.py because it has 20 epochs and it takes a very long time to execute + # https://github.com/keras-team/keras-io/blob/master/guides/transfer_learning.py#L562 + python "$file" || exit $? + fi +done diff --git a/test/test_artifacts/v2/scripts/run_matplotlib_tests.sh b/test/test_artifacts/v2/scripts/run_matplotlib_tests.sh new file mode 100644 index 00000000..848e7421 --- /dev/null +++ b/test/test_artifacts/v2/scripts/run_matplotlib_tests.sh @@ -0,0 +1,5 @@ +#!/bin/bash +# Run all the tutorials +for file in *.py; do + python "$file" || exit $? +done diff --git a/test/test_artifacts/v2/scripts/run_pysdk_tests.sh b/test/test_artifacts/v2/scripts/run_pysdk_tests.sh new file mode 100644 index 00000000..2f49f122 --- /dev/null +++ b/test/test_artifacts/v2/scripts/run_pysdk_tests.sh @@ -0,0 +1,20 @@ +#!/bin/bash + +# We need to checkout the version of sagemaker-python-sdk that is installed in the mamba environment. + +pysdk_version=$(micromamba list | grep sagemaker-python-sdk | tr -s ' ' | cut -d ' ' -f 3) +# Checkout the corresponding sagemaker-python-sdk version +git checkout tags/v$pysdk_version + +# Install test dependencies of sagemaker-python-sdk +# Using pip as some of the packages are not available on conda-forge +pip install -r requirements/extras/test_requirements.txt + +# Run the unit tests, ignoring tests which require AWS Configuration +# TODO: Re-evaluate the ignored tests since we are setting the AWS_DEFAULT_REGION as part of the Dockerfile. +pytest tests/unit --ignore=tests/unit/sagemaker/feature_store/ --ignore=tests/unit/sagemaker/jumpstart/ --ignore=tests/unit/sagemaker/workflow/ \ + --ignore=tests/unit/sagemaker/async_inference --ignore=tests/unit/test_model_card.py --ignore=tests/unit/test_model_card.py --ignore=tests/unit/test_processing.py \ + --ignore=tests/unit/test_tensorboard.py --ignore=tests/unit/sagemaker/async_inference --ignore=tests/unit/sagemaker/experiments --ignore tests/unit/sagemaker/local \ + --ignore tests/unit/sagemaker/monitor/test_data_capture_config.py --ignore tests/unit/sagemaker/experiments --ignore tests/unit/sagemaker/remote_function \ + --ignore tests/unit/sagemaker/model/test_deploy.py --deselect tests/unit/test_estimator.py::test_insert_invalid_source_code_args \ + --deselect tests/unit/sagemaker/tensorflow/test_estimator.py::test_insert_invalid_source_code_args || exit $? diff --git a/test/test_artifacts/v2/scripts/run_sagemaker_code_editor_tests.sh b/test/test_artifacts/v2/scripts/run_sagemaker_code_editor_tests.sh new file mode 100644 index 00000000..0b7dda58 --- /dev/null +++ b/test/test_artifacts/v2/scripts/run_sagemaker_code_editor_tests.sh @@ -0,0 +1,70 @@ +#!/bin/bash + +# Exit immediately if a command exits with a non-zero status. +set -e + +sagemaker-code-editor --version +echo "Verified that sagemaker-code-editor is installed" + +# Verify that data dirs are created and have correct ownership +data_dirs=("/opt/amazon/sagemaker/sagemaker-code-editor-server-data" "/opt/amazon/sagemaker/sagemaker-code-editor-user-data") +data_dirs_owner="sagemaker-user" + +for dir in "${data_dirs[@]}"; do + if [[ -d "$dir" ]]; then + echo "$dir exists." + if [[ $(stat -c '%U' "$dir") == "$data_dirs_owner" ]]; then + echo "$dir is owned by $data_dirs_owner." + else + echo "Error: $dir is not owned by $data_dirs_owner." + exit 1 + fi + else + echo "Error: $dir does not exist." + exit 1 + fi +done + +# Check that extensions are installed correctly +extensions_base_dir="/opt/amazon/sagemaker/sagemaker-code-editor-server-data/extensions" +if [[ ! -d $extensions_base_dir ]]; then + echo "Extension base directory $extensions_base_dir does not exist." + exit 1 +fi + +installed_extensions=("ms-python.python" "ms-toolsai.jupyter" "amazonwebservices.aws-toolkit-vscode") +for extension in "${installed_extensions[@]}"; do + # In this pattern, we're looking for versioning to follow immediately after the extension name + # For ex - ms-toolsai.jupyter-2023.9.100 + pattern="${extension}-[0-9]*" + + # Use the find command to search for directories matching the current pattern + found_dirs=$(find "$extensions_base_dir" -maxdepth 1 -type d -name "$pattern") + + if [[ -z $found_dirs ]]; then + echo "Directory matching pattern '$pattern' does not exist in $extensions_base_dir." + exit 1 + else + echo "Directory exists for pattern '$pattern':" + echo "$found_dirs" + fi +done +echo "Verified that all extension folders are present in $extensions_base_dir." + +# Check that settings file is copied +MACHINE_SETTINGS_FILE_PATH="/opt/amazon/sagemaker/sagemaker-code-editor-server-data/data/Machine/settings.json" +if [ ! -f "$MACHINE_SETTINGS_FILE_PATH" ]; then + echo "Error: Settings file does not exist at $MACHINE_SETTINGS_FILE_PATH." + exit 1 +fi + +echo "Settings file exists at $FILE_PATH." + +# Check that code-editor artifacts folder is deleted +ARTIFACTS_DIR="/etc/code-editor" +if [ ! -d "$ARTIFACTS_DIR" ]; then + echo "Directory $ARTIFACTS_DIR has been successfully removed." +else + echo "Error: Directory $ARTIFACTS_DIR still exists." + exit 1 +fi diff --git a/test/test_artifacts/v2/serve.test.Dockerfile b/test/test_artifacts/v2/serve.test.Dockerfile new file mode 100644 index 00000000..19dd8d5d --- /dev/null +++ b/test/test_artifacts/v2/serve.test.Dockerfile @@ -0,0 +1,6 @@ +ARG SAGEMAKER_DISTRIBUTION_IMAGE +FROM $SAGEMAKER_DISTRIBUTION_IMAGE + +ARG MAMBA_DOCKERFILE_ACTIVATE=1 + +CMD ["python", "-c", "import fastapi, uvicorn, langchain"] diff --git a/test/test_artifacts/v2/sm-python-sdk.test.Dockerfile b/test/test_artifacts/v2/sm-python-sdk.test.Dockerfile new file mode 100644 index 00000000..623efbf6 --- /dev/null +++ b/test/test_artifacts/v2/sm-python-sdk.test.Dockerfile @@ -0,0 +1,12 @@ +ARG SAGEMAKER_DISTRIBUTION_IMAGE +FROM $SAGEMAKER_DISTRIBUTION_IMAGE + +ARG MAMBA_DOCKERFILE_ACTIVATE=1 +RUN git clone --recursive https://github.com/aws/sagemaker-python-sdk.git + +# Sagemaker Python SDK's unit tests requires AWS_DEFAULT_REGION to be set. So, using an arbitrary value of us-east-1 +ENV AWS_DEFAULT_REGION=us-east-1 +WORKDIR "sagemaker-python-sdk" +COPY --chown=$MAMBA_USER:$MAMBA_USER scripts/run_pysdk_tests.sh . +RUN chmod +x run_pysdk_tests.sh +CMD ["./run_pysdk_tests.sh"] diff --git a/test/test_artifacts/v2/tensorflow.examples.Dockerfile b/test/test_artifacts/v2/tensorflow.examples.Dockerfile new file mode 100644 index 00000000..7d5d6e64 --- /dev/null +++ b/test/test_artifacts/v2/tensorflow.examples.Dockerfile @@ -0,0 +1,16 @@ +ARG SAGEMAKER_DISTRIBUTION_IMAGE +FROM $SAGEMAKER_DISTRIBUTION_IMAGE + +ARG MAMBA_DOCKERFILE_ACTIVATE=1 +RUN sudo apt-get update && \ + sudo apt-get install -y git && \ + git clone --recursive https://github.com/tensorflow/docs.git && \ + : + +WORKDIR "docs/site/en/guide" +COPY --chown=$MAMBA_USER:$MAMBA_USER tensorflow ./ +RUN chmod +x run_tensorflow_example_notebooks.sh + +RUN micromamba install -y --freeze-installed -c conda-forge papermill + +CMD ["./run_tensorflow_example_notebooks.sh"] diff --git a/test/test_artifacts/v2/tensorflow/run_tensorflow_example_notebooks.sh b/test/test_artifacts/v2/tensorflow/run_tensorflow_example_notebooks.sh new file mode 100644 index 00000000..22887459 --- /dev/null +++ b/test/test_artifacts/v2/tensorflow/run_tensorflow_example_notebooks.sh @@ -0,0 +1,29 @@ +#!/bin/bash + +# Create an empty notebook file for papermill's output +touch nb_output.ipynb + +# List of all referenced notebook files in Basics, Core, and In Depth sections of Tensorflow docs, excluding experimentals. +# https://www.tensorflow.org/guide +example_notebooks=('basics.ipynb' + 'tensor.ipynb' + 'variable.ipynb' + 'autodiff.ipynb' + 'intro_to_graphs.ipynb' + 'intro_to_modules.ipynb' + 'basic_training_loops.ipynb' + 'core/quickstart_core.ipynb' + 'core/logistic_regression_core.ipynb' + 'core/mlp_core.ipynb' + 'core/matrix_core.ipynb' + 'core/optimizers_core.ipynb' + 'tensor_slicing.ipynb' + 'advanced_autodiff.ipynb' + 'ragged_tensor.ipynb' + 'sparse_tensor.ipynb' + 'random_numbers.ipynb' +) + +for nb in ${example_notebooks[@]}; do + papermill $nb 'nb_output.ipynb' +done diff --git a/test/test_main.py b/test/test_main.py index 7fc3b372..575ca644 100644 --- a/test/test_main.py +++ b/test/test_main.py @@ -1,6 +1,7 @@ from __future__ import absolute_import import base64 +import json import pytest @@ -98,6 +99,15 @@ def _create_prev_docker_file(file_path): """ARG TAG_FOR_BASE_MICROMAMBA_IMAGE FROM mambaorg / micromamba:$TAG_FOR_BASE_MICROMAMBA_IMAGE\nprevious_dockerfile\n""" ) + +def _create_gpu_cuda_config_file(file_path): + gpu_cuda_config_context = { + "TAG_FOR_BASE_MICROMAMBA_IMAGE": "jammy-cuda-test-version", + "CUDA_MAJOR_MINOR_VERSION": "test-major-minor-version" + } + + with open(file_path, "w") as gpu_cuda_config: + json.dump(gpu_cuda_config_context, gpu_cuda_config, indent=4) def _create_new_version_artifacts_helper(mocker, tmp_path, version, target_version): @@ -111,12 +121,14 @@ def mock_get_dir_for_version(base_version): input_version = get_semver(version) # Create directory for base version input_version_dir = create_and_get_semver_dir(input_version) + print('input_version_dir', input_version_dir) # Create env.in and env.out for base version _create_docker_cpu_env_in_file(input_version_dir + "/cpu.env.in") _create_docker_gpu_env_in_file(input_version_dir + "/gpu.env.in") _create_docker_cpu_env_out_file(input_version_dir + "/cpu.env.out") _create_docker_gpu_env_out_file(input_version_dir + "/gpu.env.out") _create_prev_docker_file(input_version_dir + "/Dockerfile") + _create_gpu_cuda_config_file(input_version_dir + "/gpu_cuda_version.json") os.makedirs(tmp_path / "template") next_version = get_semver(target_version) next_major_version = "v" + str(next_version.major) @@ -125,6 +137,7 @@ def mock_get_dir_for_version(base_version): # Create dirs directory under template os.makedirs(tmp_path / "template" / next_major_version / "dirs") _create_template_docker_file(tmp_path / "template" / next_major_version / "Dockerfile") + _create_gpu_cuda_config_file(tmp_path / "template" / next_major_version / "gpu_cuda_version.json") def _create_additional_packages_env_in_file_helper( @@ -226,9 +239,16 @@ def _create_and_assert_patch_version_upgrade( new_version_dir = new_version_dir / ("v" + str(next_version) + "-" + pre_release_identifier) next_major_version_dir_name = "v" + str(next_version.major) if next_version.major == 0: - rel_path.side_effect = [str(base_version_dir / "Dockerfile")] + rel_path.side_effect = [ + str(base_version_dir / "Dockerfile"), + str(base_version_dir / "gpu_cuda_version.json"), + ] else: - rel_path.side_effect = [str(base_version_dir, "Dockerfile"), str(base_version_dir / "dirs")] + rel_path.side_effect = [ + str(base_version_dir / "Dockerfile"), + str(base_version_dir / "gpu_cuda_version.json"), + str(base_version_dir / "dirs"), + ] _create_new_version_artifacts_helper(mocker, tmp_path, input_version, str(next_version)) _create_additional_packages_env_in_file_helper( mocker, tmp_path, str(next_version), include_additional_package, use_existing_package_as_additional_package @@ -305,10 +325,14 @@ def _create_and_assert_minor_version_upgrade( new_version_dir = new_version_dir / ("v" + str(next_version) + "-" + pre_release_identifier) next_major_version_dir_name = "v" + str(next_version.major) if next_version.major == 0: - rel_path.side_effect = [str(tmp_path / "template" / next_major_version_dir_name / "Dockerfile")] + rel_path.side_effect = [ + str(tmp_path / "template" / next_major_version_dir_name / "Dockerfile"), + str(tmp_path / "template" / next_major_version_dir_name / "gpu_cuda_version.json"), + ] else: rel_path.side_effect = [ str(tmp_path / "template" / next_major_version_dir_name / "Dockerfile"), + str(tmp_path / "template" / next_major_version_dir_name / "gpu_cuda_version.json"), str(tmp_path / "template" / next_major_version_dir_name / "dirs"), ] _create_new_version_artifacts_helper(mocker, tmp_path, input_version, "1.3.0") @@ -327,8 +351,11 @@ def _create_and_assert_minor_version_upgrade( assert "cpu.env.in" in new_version_dir_files assert "gpu.env.in" in new_version_dir_files assert "Dockerfile" in new_version_dir_files + assert "gpu_cuda_version.json" in new_version_dir_files with open(new_version_dir / "Dockerfile", "r") as f: assert "template_dockerfile" in f.read() + with open(new_version_dir / "gpu_cuda_version.json", "r") as f: + assert "test-major-minor-version" in f.read() if next_version.major >= 1: assert "dirs" in new_version_dir_files if include_additional_package: @@ -386,10 +413,14 @@ def _create_and_assert_major_version_upgrade( new_version_dir = new_version_dir / ("v" + str(next_version) + "-" + pre_release_identifier) next_major_version_dir_name = "v" + str(next_version.major) if next_version.major == 0: - rel_path.side_effect = [str(tmp_path / "template" / next_major_version_dir_name / "Dockerfile")] + rel_path.side_effect = [ + str(tmp_path / "template" / next_major_version_dir_name / "Dockerfile"), + str(tmp_path / "template" / next_major_version_dir_name / "gpu_cuda_version.json"), + ] else: rel_path.side_effect = [ str(tmp_path / "template" / next_major_version_dir_name / "Dockerfile"), + str(tmp_path / "template" / next_major_version_dir_name / "gpu_cuda_version.json"), str(tmp_path / "template" / next_major_version_dir_name / "dirs"), ] _create_new_version_artifacts_helper(mocker, tmp_path, input_version, str(next_version)) @@ -471,6 +502,7 @@ def mock_get_dir_for_version(base_version): _create_docker_cpu_env_in_file(input_version_dir + "/cpu.env.in") _create_docker_cpu_env_in_file(input_version_dir + "/gpu.env.in") _create_prev_docker_file(input_version_dir + "/Dockerfile") + _create_gpu_cuda_config_file(input_version_dir + "/gpu_cuda_version.json") # Assert env.out doesn't exist assert os.path.exists(input_version_dir + "/cpu.env.out") is False assert os.path.exists(input_version_dir + "/gpu.env.out") is False