Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

change for v2 major version release #434

Closed
wants to merge 2 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions build_artifacts/v0/v0.12/v0.12.0/gpu_cuda_version.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
{
"TAG_FOR_BASE_MICROMAMBA_IMAGE": "jammy-cuda-11.8.0",
"CUDA_MAJOR_MINOR_VERSION": "11.8"
}
4 changes: 4 additions & 0 deletions build_artifacts/v1/v1.4/gpu_cuda_version.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
{
"TAG_FOR_BASE_MICROMAMBA_IMAGE": "jammy-cuda-11.8.0",
"CUDA_MAJOR_MINOR_VERSION": "11.8"
}
4 changes: 4 additions & 0 deletions build_artifacts/v1/v1.5/gpu_cuda_version.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
{
"TAG_FOR_BASE_MICROMAMBA_IMAGE": "jammy-cuda-11.8.0",
"CUDA_MAJOR_MINOR_VERSION": "11.8"
}
4 changes: 4 additions & 0 deletions build_artifacts/v1/v1.6/gpu_cuda_version.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
{
"TAG_FOR_BASE_MICROMAMBA_IMAGE": "jammy-cuda-11.8.0",
"CUDA_MAJOR_MINOR_VERSION": "11.8"
}
4 changes: 4 additions & 0 deletions build_artifacts/v1/v1.7/gpu_cuda_version.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
{
"TAG_FOR_BASE_MICROMAMBA_IMAGE": "jammy-cuda-11.8.0",
"CUDA_MAJOR_MINOR_VERSION": "11.8"
}
4 changes: 4 additions & 0 deletions build_artifacts/v1/v1.8/v1.8.0/gpu_cuda_version.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
{
"TAG_FOR_BASE_MICROMAMBA_IMAGE": "jammy-cuda-11.8.0",
"CUDA_MAJOR_MINOR_VERSION": "11.8"
}
204 changes: 204 additions & 0 deletions build_artifacts/v2/v2.0/v2.0.0/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,204 @@
ARG TAG_FOR_BASE_MICROMAMBA_IMAGE
FROM mambaorg/micromamba:$TAG_FOR_BASE_MICROMAMBA_IMAGE

ARG CUDA_MAJOR_MINOR_VERSION=''
ARG ENV_IN_FILENAME
ARG ARG_BASED_ENV_IN_FILENAME

ARG AMZN_BASE="/opt/amazon/sagemaker"
ARG DB_ROOT_DIR="/opt/db"
ARG DIRECTORY_TREE_STAGE_DIR="${AMZN_BASE}/dir-staging"

ARG NB_USER="sagemaker-user"
ARG NB_UID=1000
ARG NB_GID=100

# https://www.openssl.org/source/
ARG FIPS_VALIDATED_SSL=3.0.8

ENV SAGEMAKER_LOGGING_DIR="/var/log/sagemaker/"
ENV STUDIO_LOGGING_DIR="/var/log/studio/"
ENV EDITOR="nano"

USER root
RUN usermod "--login=${NB_USER}" "--home=/home/${NB_USER}" --move-home "-u ${NB_UID}" "${MAMBA_USER}" && \
groupmod "--new-name=${NB_USER}" --non-unique "-g ${NB_GID}" "${MAMBA_USER}" && \
# Update the expected value of MAMBA_USER for the
# _entrypoint.sh consistency check.
echo "${NB_USER}" > "/etc/arg_mamba_user" && \
:
ENV MAMBA_USER=$NB_USER
ENV USER=$NB_USER

RUN apt-get update && apt-get upgrade -y && \
apt-get install -y --no-install-recommends sudo gettext-base wget curl unzip git rsync build-essential openssh-client nano cron less mandoc && \
# We just install tzdata below but leave default time zone as UTC. This helps packages like Pandas to function correctly.
DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends tzdata krb5-user libkrb5-dev libsasl2-dev libsasl2-modules && \
chmod g+w /etc/passwd && \
echo "ALL ALL=(ALL) NOPASSWD: ALL" >> /etc/sudoers && \
touch /etc/krb5.conf.lock && chown ${NB_USER}:${MAMBA_USER} /etc/krb5.conf* && \
# Note that we do NOT run `rm -rf /var/lib/apt/lists/*` here. If we did, anyone building on top of our images will
# not be able to run any `apt-get install` commands and that would hamper customizability of the images.
curl "https://awscli.amazonaws.com/awscli-exe-linux-x86_64.zip" -o "awscliv2.zip" && \
unzip awscliv2.zip && \
sudo ./aws/install && \
rm -rf aws awscliv2.zip && \
:
RUN echo "source /usr/local/bin/_activate_current_env.sh" | tee --append /etc/profile

# CodeEditor - create server, user data dirs
RUN mkdir -p /opt/amazon/sagemaker/sagemaker-code-editor-server-data /opt/amazon/sagemaker/sagemaker-code-editor-user-data \
&& chown $MAMBA_USER:$MAMBA_USER /opt/amazon/sagemaker/sagemaker-code-editor-server-data /opt/amazon/sagemaker/sagemaker-code-editor-user-data

# create dir to store user data files
RUN mkdir -p /opt/amazon/sagemaker/user-data \
&& chown $MAMBA_USER:$MAMBA_USER /opt/amazon/sagemaker/user-data


# Merge in OS directory tree contents.
RUN mkdir -p ${DIRECTORY_TREE_STAGE_DIR}
COPY dirs/ ${DIRECTORY_TREE_STAGE_DIR}/
RUN rsync -a ${DIRECTORY_TREE_STAGE_DIR}/ / && \
rm -rf ${DIRECTORY_TREE_STAGE_DIR}

# CodeEditor - download the extensions
RUN mkdir -p /etc/code-editor/extensions && \
while IFS= read -r url || [ -n "$url" ]; do \
echo "Downloading extension from ${url}..." && \
wget --no-check-certificate -P /etc/code-editor/extensions "${url}"; \
done < /etc/code-editor/extensions.txt

USER $MAMBA_USER
COPY --chown=$MAMBA_USER:$MAMBA_USER $ENV_IN_FILENAME *.in /tmp/

# Make sure that $ENV_IN_FILENAME has a newline at the end before the `tee` command runs. Otherwise, nasty things
# will happen.
RUN if [[ -z $ARG_BASED_ENV_IN_FILENAME ]] ; \
then echo 'No ARG_BASED_ENV_IN_FILENAME passed' ; \
else envsubst < /tmp/$ARG_BASED_ENV_IN_FILENAME | tee --append /tmp/$ENV_IN_FILENAME ; \
fi

ARG CONDA_OVERRIDE_CUDA=$CUDA_MAJOR_MINOR_VERSION
# Enforce dependencies are all installed from conda-forge
RUN micromamba install -y --name base --file /tmp/$ENV_IN_FILENAME && \
micromamba clean --all --yes --force-pkgs-dirs && \
rm -rf /tmp/*.in


ARG MAMBA_DOCKERFILE_ACTIVATE=1
RUN sudo ln -s $(which python3) /usr/bin/python

# Update npm version
RUN npm i -g npm

# Enforce to use `conda-forge` as only channel, by removing `defaults`
RUN conda config --remove channels defaults
RUN micromamba config append channels conda-forge --env

# Configure CodeEditor - Install extensions and set preferences
RUN \
extensionloc=/opt/amazon/sagemaker/sagemaker-code-editor-server-data/extensions && mkdir -p "${extensionloc}" \
# Loop through all vsix files in /etc/code-editor/extensions and install them
&& for ext in /etc/code-editor/extensions/*.vsix; do \
echo "Installing extension ${ext}..."; \
sagemaker-code-editor --install-extension "${ext}" --extensions-dir "${extensionloc}" --server-data-dir /opt/amazon/sagemaker/sagemaker-code-editor-server-data --user-data-dir /opt/amazon/sagemaker/sagemaker-code-editor-user-data; \
done \
# Copy the settings
&& cp /etc/code-editor/code_editor_machine_settings.json /opt/amazon/sagemaker/sagemaker-code-editor-server-data/data/Machine/settings.json

# Install glue kernels, and move to shared directory
# Also patching base kernel so Studio background code doesn't start session silently
RUN install-glue-kernels && \
SITE_PACKAGES=$(pip show aws-glue-sessions | grep Location | awk '{print $2}') && \
jupyter-kernelspec install $SITE_PACKAGES/aws_glue_interactive_sessions_kernel/glue_pyspark --user && \
jupyter-kernelspec install $SITE_PACKAGES/aws_glue_interactive_sessions_kernel/glue_spark --user && \
mv /home/sagemaker-user/.local/share/jupyter/kernels/glue_pyspark /opt/conda/share/jupyter/kernels && \
mv /home/sagemaker-user/.local/share/jupyter/kernels/glue_spark /opt/conda/share/jupyter/kernels && \
sed -i '/if not store_history and (/i\ if "sm_analytics_runtime_check" in code:\n return await self._complete_cell()\n' \
"$SITE_PACKAGES/aws_glue_interactive_sessions_kernel/glue_kernel_base/BaseKernel.py"


# Patch glue kernels to use kernel wrapper
COPY patch_glue_pyspark.json /opt/conda/share/jupyter/kernels/glue_pyspark/kernel.json
COPY patch_glue_spark.json /opt/conda/share/jupyter/kernels/glue_spark/kernel.json

# Configure RTC - disable jupyter_collaboration by default
RUN jupyter labextension disable @jupyter/collaboration-extension

USER root
RUN HOME_DIR="/home/${NB_USER}/licenses" \
&& mkdir -p ${HOME_DIR} \
&& curl -o ${HOME_DIR}/oss_compliance.zip https://aws-dlinfra-utilities.s3.amazonaws.com/oss_compliance.zip \
&& unzip ${HOME_DIR}/oss_compliance.zip -d ${HOME_DIR}/ \
&& cp ${HOME_DIR}/oss_compliance/test/testOSSCompliance /usr/local/bin/testOSSCompliance \
&& chmod +x /usr/local/bin/testOSSCompliance \
&& chmod +x ${HOME_DIR}/oss_compliance/generate_oss_compliance.sh \
&& ${HOME_DIR}/oss_compliance/generate_oss_compliance.sh ${HOME_DIR} python \
&& rm -rf ${HOME_DIR}/oss_compliance*

# Create logging directories for supervisor
RUN mkdir -p $SAGEMAKER_LOGGING_DIR && \
chmod a+rw $SAGEMAKER_LOGGING_DIR && \
mkdir -p ${STUDIO_LOGGING_DIR} && \
chown ${NB_USER}:${MAMBA_USER} ${STUDIO_LOGGING_DIR}

# Clean up CodeEditor artifacts
RUN rm -rf /etc/code-editor

# Create supervisord runtime directory
RUN mkdir -p /var/run/supervisord && \
chmod a+rw /var/run/supervisord

# Create root directory for DB
# Create logging directories for supervisor
RUN mkdir -p $DB_ROOT_DIR && \
chmod a+rw $DB_ROOT_DIR

USER $MAMBA_USER
ENV PATH="/opt/conda/bin:/opt/conda/condabin:$PATH"
WORKDIR "/home/${NB_USER}"

# Install FIPS Provider for OpenSSL, on top of existing OpenSSL installation
# v3.0.8 is latest FIPS validated provider, so this is the one we install
# But we need to run tests against the installed version.
# see https://github.com/openssl/openssl/blob/master/README-FIPS.md https://www.openssl.org/source/
RUN INSTALLED_SSL=$(micromamba list | grep openssl | tr -s ' ' | cut -d ' ' -f 3 | head -n 1) && \
# download source code for installed, and FIPS validated openssl versions
curl -L https://www.openssl.org/source/openssl-$FIPS_VALIDATED_SSL.tar.gz > openssl-$FIPS_VALIDATED_SSL.tar.gz && \
curl -L https://www.openssl.org/source/openssl-$INSTALLED_SSL.tar.gz > openssl-$INSTALLED_SSL.tar.gz && \
tar -xf openssl-$FIPS_VALIDATED_SSL.tar.gz && tar -xf openssl-$INSTALLED_SSL.tar.gz && cd openssl-$FIPS_VALIDATED_SSL && \
# Configure both versions to enable FIPS and build
./Configure enable-fips --prefix=/opt/conda --openssldir=/opt/conda/ssl && make && \
cd ../openssl-$INSTALLED_SSL && \
./Configure enable-fips --prefix=/opt/conda --openssldir=/opt/conda/ssl && make && \
# Copy validated provider to installed version for testing
cp ../openssl-$FIPS_VALIDATED_SSL/providers/fips.so providers/. && \
cp ../openssl-$FIPS_VALIDATED_SSL/providers/fipsmodule.cnf providers/. && \
make tests && cd ../openssl-$FIPS_VALIDATED_SSL && \
# After tests pass, install FIPS provider and remove source code
make install_fips && cd .. && rm -rf ./openssl-*
# Create new config file with fips-enabled. Then user can override OPENSSL_CONF to enable FIPS
# e.g. export OPENSSL_CONF=/opt/conda/ssl/openssl-fips.cnf
RUN cp /opt/conda/ssl/openssl.cnf /opt/conda/ssl/openssl-fips.cnf && \
sed -i "s:# .include fipsmodule.cnf:.include /opt/conda/ssl/fipsmodule.cnf:" /opt/conda/ssl/openssl-fips.cnf && \
sed -i 's:# fips = fips_sect:fips = fips_sect:' /opt/conda/ssl/openssl-fips.cnf
ENV OPENSSL_MODULES=/opt/conda/lib64/ossl-modules/

# Install Kerberos.
# Make sure no dependency is added/updated
RUN pip install "krb5>=0.5.1,<0.6" && \
pip show krb5 | grep Require | xargs -i sh -c '[ $(echo {} | cut -d: -f2 | wc -w) -eq 0 ] '

# https://stackoverflow.com/questions/122327
RUN SYSTEM_PYTHON_PATH=$(python3 -c "from __future__ import print_function;import sysconfig; print(sysconfig.get_paths().get('purelib'))") && \
# Remove SparkRKernel as it's not supported \
jupyter-kernelspec remove -f -y sparkrkernel && \
# Patch Sparkmagic lib to support Custom Certificates \
# https://github.com/jupyter-incubator/sparkmagic/pull/435/files \
cp -a ${SYSTEM_PYTHON_PATH}/sagemaker_studio_analytics_extension/patches/configuration.py ${SYSTEM_PYTHON_PATH}/sparkmagic/utils/ && \
cp -a ${SYSTEM_PYTHON_PATH}/sagemaker_studio_analytics_extension/patches/reliablehttpclient.py ${SYSTEM_PYTHON_PATH}/sparkmagic/livyclientlib/reliablehttpclient.py && \
sed -i 's= "python"= "/opt/conda/bin/python"=g' /opt/conda/share/jupyter/kernels/pysparkkernel/kernel.json /opt/conda/share/jupyter/kernels/sparkkernel/kernel.json && \
sed -i 's="Spark"="SparkMagic Spark"=g' /opt/conda/share/jupyter/kernels/sparkkernel/kernel.json && \
sed -i 's="PySpark"="SparkMagic PySpark"=g' /opt/conda/share/jupyter/kernels/pysparkkernel/kernel.json

ENV SHELL=/bin/bash
50 changes: 50 additions & 0 deletions build_artifacts/v2/v2.0/v2.0.0/cpu.env.in
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
# This file is auto-generated.
conda-forge::jupyter-collaboration[version='>=1.1.0']
conda-forge::sagemaker-code-editor[version='>=1.1.0']
conda-forge::amazon_sagemaker_sql_editor[version='>=0.1.7']
conda-forge::amazon-sagemaker-sql-magic[version='>=0.1.1']
conda-forge::langchain[version='>=0.1.9']
conda-forge::fastapi[version='>=0.110.3']
conda-forge::uvicorn[version='>=0.29.0']
conda-forge::pytorch[version='>=2.3.0']
conda-forge::tensorflow[version='>=2.15.0']
conda-forge::python[version='>=3.10.14']
conda-forge::pip[version='>=23.3.2']
conda-forge::torchvision[version='>=0.15.2']
conda-forge::numpy[version='>=1.26.4']
conda-forge::pandas[version='>=2.1.4']
conda-forge::scikit-learn[version='>=1.4.2']
conda-forge::jinja2[version='>=3.1.4']
conda-forge::matplotlib[version='>=3.8.4']
conda-forge::sagemaker-headless-execution-driver[version='>=0.0.12']
conda-forge::ipython[version='>=8.22.2']
conda-forge::scipy[version='>=1.11.4']
conda-forge::keras[version='>=2.15.0']
conda-forge::py-xgboost-cpu[version='>=1.7.6']
conda-forge::jupyterlab[version='>=4.1.6']
conda-forge::ipywidgets[version='>=8.1.2']
conda-forge::conda[version='>=23.11.0']
conda-forge::boto3[version='>=1.34.51']
conda-forge::sagemaker-python-sdk[version='>=2.219.0']
conda-forge::supervisor[version='>=4.2.5']
conda-forge::aws-glue-sessions[version='>=1.0.5']
conda-forge::sagemaker-kernel-wrapper[version='>=0.0.2']
conda-forge::jupyter-ai[version='>=2.14.1']
conda-forge::jupyter-scheduler[version='>=2.5.2']
conda-forge::jupyter-lsp[version='>=2.2.5']
conda-forge::jupyterlab-lsp[version='>=5.0.3']
conda-forge::python-lsp-server[version='>=1.11.0']
conda-forge::notebook[version='>=7.1.3']
conda-forge::altair[version='>=5.3.0']
conda-forge::sagemaker-studio-analytics-extension[version='>=0.0.21']
conda-forge::jupyter-dash[version='>=0.4.2']
conda-forge::sagemaker-jupyterlab-extension[version='>=0.3.2']
conda-forge::sagemaker-jupyterlab-emr-extension[version='>=0.1.9']
conda-forge::amazon-sagemaker-jupyter-scheduler[version='>=3.0.11']
conda-forge::jupyter-server-proxy[version='>=4.1.2']
conda-forge::amazon-codewhisperer-jupyterlab-ext[version='>=2.0.2']
conda-forge::jupyterlab-git[version='>=0.50.0']
conda-forge::thrift_sasl[version='>=0.4.3']
conda-forge::pyhive[version='>=0.7.0']
conda-forge::python-gssapi[version='>=1.8.3']
conda-forge::langchain-aws[version='>=0.1.1,<0.2.0']
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
{
"python.terminal.activateEnvironment": false,
"python.defaultInterpreterPath": "/opt/conda/bin/python"
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
https://open-vsx.org/api/ms-toolsai/jupyter/2023.9.100/file/ms-toolsai.jupyter-2023.9.100.vsix
https://open-vsx.org/api/ms-python/python/2023.20.0/file/ms-python.python-2023.20.0.vsix
https://open-vsx.org/api/amazonwebservices/aws-toolkit-vscode/1.99.0/file/amazonwebservices.aws-toolkit-vscode-1.99.0.vsix
6 changes: 6 additions & 0 deletions build_artifacts/v2/v2.0/v2.0.0/dirs/etc/conda/.condarc
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
envs_dirs:
- ~/.conda/envs
- /opt/conda/envs
pkgs_dirs:
- ~/.conda/pkgs
- /opt/conda/pkgs
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
# Default Jupyter server config
# Note: those config can be overridden by user-level configs.

c.ServerApp.terminado_settings = {"shell_command": ["/bin/bash"]}
c.ServerApp.tornado_settings = {"compress_response": True}

# Do not delete files to trash. Instead, permanently delete files.
c.FileContentsManager.delete_to_trash = False

# Allow deleting non-empty directory via file browser. Related documentation:
# https://github.com/jupyter-server/jupyter_server/blob/main/jupyter_server/services/contents/filemanager.py#L125-L129
c.FileContentsManager.always_delete_dir = True

# Enable `allow_hidden` by default, so hidden files are accessible via Jupyter server
# Related documentation: https://jupyterlab.readthedocs.io/en/stable/user/files.html#displaying-hidden-files
c.ContentsManager.allow_hidden = True

# This will set the LanguageServerManager.extra_node_roots setting if amazon_sagemaker_sql_editor exists in the
# environment. Ignore otherwise, don't fail the JL server start
# Related documentation: https://jupyterlab-lsp.readthedocs.io/en/v3.4.0/Configuring.html
try:
import os

module = __import__("amazon_sagemaker_sql_editor")
module_location = os.path.dirname(module.__file__)
c.LanguageServerManager.extra_node_roots = [f"{module_location}/sql-language-server"]
except:
pass
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
[include]
files = supervisord-common.conf

[program:codeeditorserver]
directory=%(ENV_HOME)s
command=start-code-editor
autostart=true
autorestart=true
stdout_logfile=/dev/fd/1 ; Redirect web server logs to stdout
stdout_logfile_maxbytes = 0 ; Fix: https://github.com/Supervisor/supervisor/issues/935
stderr_logfile_maxbytes = 0 ; Fix: https://github.com/Supervisor/supervisor/issues/935
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
[supervisord]
nodaemon=true

pidfile=/var/run/supervisord/supervisord.pid
logfile=%(ENV_STUDIO_LOGGING_DIR)s/%(ENV_SAGEMAKER_APP_TYPE_LOWERCASE)s/supervisord/supervisord.log
logfile_maxbytes=5MB
logfile_backups=10
redirect_stderr=true

[unix_http_server]
file=/var/run/supervisord/supervisor.sock
chmod=0700

[supervisorctl]
serverurl=unix:///var/run/supervisord/supervisor.sock

[rpcinterface:supervisor]
supervisor.rpcinterface_factory = supervisor.rpcinterface:make_main_rpcinterface
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
[include]
files = supervisord-common.conf

[program:jupyterlabserver]
directory=%(ENV_HOME)s
command=start-jupyter-server
stopasgroup=true
stdout_logfile=/dev/stdout
stdout_logfile_maxbytes=0
stderr_logfile=/dev/stderr
stderr_logfile_maxbytes=0
Loading
Loading