Skip to content

Commit

Permalink
Adds build artifacts for pending 1.1.0 release (#121)
Browse files Browse the repository at this point in the history
* fix: add rsync installation to template dockerfile

* chore: generate build artifacts for 1.1.0 release
  • Loading branch information
just4brown authored Nov 1, 2023
1 parent 08b4fe7 commit 2485cdc
Show file tree
Hide file tree
Showing 14 changed files with 315 additions and 1 deletion.
113 changes: 113 additions & 0 deletions build_artifacts/v1/v1.1/v1.1.0/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,113 @@
ARG TAG_FOR_BASE_MICROMAMBA_IMAGE
FROM mambaorg/micromamba:$TAG_FOR_BASE_MICROMAMBA_IMAGE

ARG CUDA_MAJOR_MINOR_VERSION=''
ARG ENV_IN_FILENAME
ARG ARG_BASED_ENV_IN_FILENAME

ARG AMZN_BASE="/opt/amazon/sagemaker"
ARG DIRECTORY_TREE_STAGE_DIR="${AMZN_BASE}/dir-staging"

ARG NB_USER="sagemaker-user"
ARG NB_UID=1000
ARG NB_GID=100

ENV SAGEMAKER_LOGGING_DIR="/var/log/sagemaker/"
ENV STUDIO_LOGGING_DIR="/var/log/studio/"

USER root
RUN usermod "--login=${NB_USER}" "--home=/home/${NB_USER}" --move-home "-u ${NB_UID}" "${MAMBA_USER}" && \
groupmod "--new-name=${NB_USER}" --non-unique "-g ${NB_GID}" "${MAMBA_USER}" && \
# Update the expected value of MAMBA_USER for the
# _entrypoint.sh consistency check.
echo "${NB_USER}" > "/etc/arg_mamba_user" && \
:
ENV MAMBA_USER=$NB_USER

RUN apt-get update && \
apt-get install -y --no-install-recommends sudo gettext-base wget curl unzip git rsync build-essential && \
# We just install tzdata below but leave default time zone as UTC. This helps packages like Pandas to function correctly.
DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends tzdata awscli krb5-user libkrb5-dev && \
chmod g+w /etc/passwd && \
echo "ALL ALL=(ALL) NOPASSWD: ALL" >> /etc/sudoers && \
touch /etc/krb5.conf.lock && chown ${NB_USER}:${MAMBA_USER} /etc/krb5.conf* && \
# Note that we do NOT run `rm -rf /var/lib/apt/lists/*` here. If we did, anyone building on top of our images will
# not be able to run any `apt-get install` commands and that would hamper customizability of the images.
:
RUN echo "source /usr/local/bin/_activate_current_env.sh" | tee --append /etc/profile

USER $MAMBA_USER
COPY --chown=$MAMBA_USER:$MAMBA_USER $ENV_IN_FILENAME *.in /tmp/

# Make sure that $ENV_IN_FILENAME has a newline at the end before the `tee` command runs. Otherwise, nasty things
# will happen.
RUN if [[ -z $ARG_BASED_ENV_IN_FILENAME ]] ; \
then echo 'No ARG_BASED_ENV_IN_FILENAME passed' ; \
else envsubst < /tmp/$ARG_BASED_ENV_IN_FILENAME | tee --append /tmp/$ENV_IN_FILENAME ; \
fi

ARG CONDA_OVERRIDE_CUDA=$CUDA_MAJOR_MINOR_VERSION
RUN micromamba install -y --name base --file /tmp/$ENV_IN_FILENAME && \
micromamba clean --all --yes --force-pkgs-dirs && \
rm -rf /tmp/*.in

ARG MAMBA_DOCKERFILE_ACTIVATE=1
RUN sudo ln -s $(which python3) /usr/bin/python

# Install glue kernels, and move to shared directory
# Also patching base kernel so Studio background code doesn't start session silently
RUN install-glue-kernels && \
SITE_PACKAGES=$(pip show aws-glue-sessions | grep Location | awk '{print $2}') && \
jupyter-kernelspec install $SITE_PACKAGES/aws_glue_interactive_sessions_kernel/glue_pyspark --user && \
jupyter-kernelspec install $SITE_PACKAGES/aws_glue_interactive_sessions_kernel/glue_spark --user && \
mv /home/sagemaker-user/.local/share/jupyter/kernels/glue_pyspark /opt/conda/share/jupyter/kernels && \
mv /home/sagemaker-user/.local/share/jupyter/kernels/glue_spark /opt/conda/share/jupyter/kernels && \
sed -i '/if not store_history and (/i\ if "sm_analytics_runtime_check" in code:\n return await self._complete_cell()\n' \
"$SITE_PACKAGES/aws_glue_interactive_sessions_kernel/glue_kernel_base/BaseKernel.py"


# Patch glue kernels to use kernel wrapper
COPY patch_glue_pyspark.json /opt/conda/share/jupyter/kernels/glue_pyspark/kernel.json
COPY patch_glue_spark.json /opt/conda/share/jupyter/kernels/glue_spark/kernel.json

USER root
RUN HOME_DIR="/home/${NB_USER}/licenses" \
&& mkdir -p ${HOME_DIR} \
&& curl -o ${HOME_DIR}/oss_compliance.zip https://aws-dlinfra-utilities.s3.amazonaws.com/oss_compliance.zip \
&& unzip ${HOME_DIR}/oss_compliance.zip -d ${HOME_DIR}/ \
&& cp ${HOME_DIR}/oss_compliance/test/testOSSCompliance /usr/local/bin/testOSSCompliance \
&& chmod +x /usr/local/bin/testOSSCompliance \
&& chmod +x ${HOME_DIR}/oss_compliance/generate_oss_compliance.sh \
&& ${HOME_DIR}/oss_compliance/generate_oss_compliance.sh ${HOME_DIR} python \
&& rm -rf ${HOME_DIR}/oss_compliance*

# Merge in OS directory tree contents.
RUN mkdir -p ${DIRECTORY_TREE_STAGE_DIR}
COPY dirs/ ${DIRECTORY_TREE_STAGE_DIR}/
RUN rsync -a ${DIRECTORY_TREE_STAGE_DIR}/ / && \
rm -rf ${DIRECTORY_TREE_STAGE_DIR}

# Create logging directories for supervisor
RUN mkdir -p $SAGEMAKER_LOGGING_DIR && \
chmod a+rw $SAGEMAKER_LOGGING_DIR && \
mkdir -p ${STUDIO_LOGGING_DIR} && \
chown ${NB_USER}:${MAMBA_USER} ${STUDIO_LOGGING_DIR}

# Create supervisord runtime directory
RUN mkdir -p /var/run/supervisord && \
chmod a+rw /var/run/supervisord

USER $MAMBA_USER
ENV PATH="/opt/conda/bin:/opt/conda/condabin:$PATH"
WORKDIR "/home/${NB_USER}"

# https://stackoverflow.com/questions/122327
RUN SYSTEM_PYTHON_PATH=$(python3 -c "from __future__ import print_function;import sysconfig; print(sysconfig.get_paths().get('purelib'))") && \
# Remove SparkRKernel as it's not supported \
jupyter-kernelspec remove -f -y sparkrkernel && \
# Patch Sparkmagic lib to support Custom Certificates \
# https://github.com/jupyter-incubator/sparkmagic/pull/435/files \
cp -a ${SYSTEM_PYTHON_PATH}/sagemaker_studio_analytics_extension/patches/configuration.py ${SYSTEM_PYTHON_PATH}/sparkmagic/utils/ && \
cp -a ${SYSTEM_PYTHON_PATH}/sagemaker_studio_analytics_extension/patches/reliablehttpclient.py ${SYSTEM_PYTHON_PATH}/sparkmagic/livyclientlib/reliablehttpclient.py

ENV SHELL=/bin/bash
34 changes: 34 additions & 0 deletions build_artifacts/v1/v1.1/v1.1.0/cpu.env.in
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
# This file is auto-generated.
conda-forge::pytorch[version='>=2.0.0,<3.0.0']
conda-forge::tensorflow[version='>=2.12.1,<3.0.0']
conda-forge::python[version='>=3.10.12,<3.11.0']
conda-forge::pip[version='>=23.3.1,<24.0.0']
conda-forge::torchvision[version='>=0.15.2,<1.0.0']
conda-forge::numpy[version='>=1.26.0,<2.0.0']
conda-forge::pandas[version='>=2.1.1,<3.0.0']
conda-forge::scikit-learn[version='>=1.3.1,<2.0.0']
conda-forge::jinja2[version='>=3.1.2,<4.0.0']
conda-forge::matplotlib[version='>=3.8.0,<4.0.0']
conda-forge::sagemaker-headless-execution-driver[version='>=0.0.9,<1.0.0']
conda-forge::ipython[version='>=8.16.1,<9.0.0']
conda-forge::scipy[version='>=1.11.3,<2.0.0']
conda-forge::keras[version='>=2.12.0,<3.0.0']
conda-forge::py-xgboost-cpu[version='>=1.7.6,<2.0.0']
conda-forge::jupyterlab[version='>=4.0.7,<5.0.0']
conda-forge::ipywidgets[version='>=8.1.1,<9.0.0']
conda-forge::conda[version='>=23.9.0,<24.0.0']
conda-forge::boto3[version='>=1.28.68,<2.0.0']
conda-forge::sagemaker-python-sdk[version='>=2.194.0,<3.0.0']
conda-forge::supervisor[version='>=4.2.5,<5.0.0']
conda-forge::autogluon[version='>=0.8.2,<1.0.0']
conda-forge::aws-glue-sessions[version='>=1.0.2,<2.0.0']
conda-forge::sagemaker-kernel-wrapper[version='>=0.0.2,<1.0.0']
conda-forge::jupyter-ai[version='>=2.3.0,<3.0.0']
conda-forge::jupyter-scheduler[version='>=2.2.0,<3.0.0']
conda-forge::jupyter_core[version='>=5.3.1,<6.0.0']
conda-forge::jupyter-lsp[version='>=2.2.0,<3.0.0']
conda-forge::python-lsp-server[version='>=1.8.2,<2.0.0']
conda-forge::notebook[version='>=7.0.6,<8.0.0']
conda-forge::altair[version='>=5.1.2,<6.0.0']
conda-forge::sagemaker-studio-analytics-extension[version='>=0.0.21,<1.0.0']
conda-forge::jupyter-dash[version='>=0.4.2,<1.0.0']
6 changes: 6 additions & 0 deletions build_artifacts/v1/v1.1/v1.1.0/dirs/etc/conda/.condarc
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
envs_dirs:
- ~/.conda/envs
- /opt/conda/envs
pkgs_dirs:
- ~/.conda/pkgs
- /opt/conda/pkgs
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
# Default Jupyter server config
# Note: those config can be overridden by user-level configs.

from nb_conda_kernels import CondaKernelSpecManager

c.ServerApp.terminado_settings = { 'shell_command': ['/bin/bash'] }
c.ServerApp.tornado_settings = { 'compress_response': True }

# Do not delete files to trash. Instead, permanently delete files.
c.FileContentsManager.delete_to_trash = False

# Allow deleting non-empty directory via file browser
# Related documentation: https://github.com/jupyter-server/jupyter_server/blob/main/jupyter_server/services/contents/filemanager.py#L125-L129
c.FileContentsManager.always_delete_dir = True

# Enable `allow_hidden` by default, so hidden files are accessible via Jupyter server
# Related documentation: https://jupyterlab.readthedocs.io/en/stable/user/files.html#displaying-hidden-files
c.ContentsManager.allow_hidden = True

# Kernel manager configurations
# Use Conda kernelspec managers, so that kernels installed in different conda
# environments can be detected and used in Jupyter
c.ServerApp.kernel_spec_manager_class=CondaKernelSpecManager
c.CondaKernelSpecManager.conda_only = True
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
[supervisord]
nodaemon=true

pidfile=/var/run/supervisord/supervisord.pid
logfile=%(ENV_SAGEMAKER_LOGGING_DIR)s/supervisord/supervisord.log
logfile_maxbytes=5MB
logfile_backups=10
redirect_stderr=true

[unix_http_server]
file=/var/run/supervisord/supervisor.sock
chmod=0700

[supervisorctl]
serverurl=unix:///var/run/supervisord/supervisor.sock

[rpcinterface:supervisor]
supervisor.rpcinterface_factory = supervisor.rpcinterface:make_main_rpcinterface

[program:jupyterlabserver]
directory=%(ENV_HOME)s
command=start-jupyter-server
stopasgroup=true
stdout_logfile=/dev/stdout
stdout_logfile_maxbytes=0
stderr_logfile=/dev/stderr
stderr_logfile_maxbytes=0
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
#!/bin/bash

set -e

# Generate and execute the shell code to modifies shell variables to include
# micromamba commands (e.g. using `micromamba activate` to activate environments)
eval "$(micromamba shell hook --shell=bash)"

# Activate conda environment 'base', where supervisord is installed
micromamba activate base

# Start supervisord with supervisord configuration
# Since program 'jupyterlabserver' autostarts by default, it will be started
# automatically along with supervisord
mkdir -p $SAGEMAKER_LOGGING_DIR/supervisord
exec supervisord -c /etc/supervisor/conf.d/supervisord.conf -n
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
#!/bin/bash
set -e
echo "Restarting the Jupyter server. This page should refresh in a few seconds. Note that any terminals will be closed."
echo "If this page doesn't refresh after a few seconds, try reloading your browser window."
echo "Restarting now..."
nohup supervisorctl -c /etc/supervisor/conf.d/supervisord.conf restart jupyterlabserver > /dev/null 2>&1 &
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
#!/bin/bash
set -e

eval "$(micromamba shell hook --shell=bash)"

# Activate conda environment 'base', which is the default environment for Cosmos
micromamba activate base

# Start Jupyter server
if [ -n "$SAGEMAKER_APP_TYPE" ]; then
# SAGEMAKER_APP_TYPE is set, indicating the server is running within a SageMaker
# app. Configure the base url to be `/<app-type-in-lower-case>/default`.
SAGEMAKER_APP_TYPE_LOWERCASE=$(echo $SAGEMAKER_APP_TYPE | tr '[:upper:]' '[:lower:]')
jupyter lab --ip 0.0.0.0 --port 8888 \
--ServerApp.base_url="/$SAGEMAKER_APP_TYPE_LOWERCASE/default" \
--ServerApp.token='' \
--ServerApp.allow_origin='*'
else
jupyter lab --ip 0.0.0.0 --port 8888 \
--ServerApp.token='' \
--ServerApp.allow_origin='*'
fi
1 change: 1 addition & 0 deletions build_artifacts/v1/v1.1/v1.1.0/gpu.arg_based_env.in
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
conda-forge::cudatoolkit=$CUDA_MAJOR_MINOR_VERSION
34 changes: 34 additions & 0 deletions build_artifacts/v1/v1.1/v1.1.0/gpu.env.in
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
# This file is auto-generated.
conda-forge::pytorch-gpu[version='>=2.0.0,<3.0.0']
conda-forge::tensorflow[version='>=2.12.1,<3.0.0']
conda-forge::python[version='>=3.10.12,<3.11.0']
conda-forge::pip[version='>=23.3.1,<24.0.0']
conda-forge::torchvision[version='>=0.15.2,<1.0.0']
conda-forge::numpy[version='>=1.26.0,<2.0.0']
conda-forge::pandas[version='>=2.1.1,<3.0.0']
conda-forge::scikit-learn[version='>=1.3.1,<2.0.0']
conda-forge::jinja2[version='>=3.1.2,<4.0.0']
conda-forge::matplotlib[version='>=3.8.0,<4.0.0']
conda-forge::sagemaker-headless-execution-driver[version='>=0.0.9,<1.0.0']
conda-forge::ipython[version='>=8.16.1,<9.0.0']
conda-forge::scipy[version='>=1.11.3,<2.0.0']
conda-forge::keras[version='>=2.12.0,<3.0.0']
conda-forge::py-xgboost-gpu[version='>=1.7.6,<2.0.0']
conda-forge::jupyterlab[version='>=4.0.7,<5.0.0']
conda-forge::ipywidgets[version='>=8.1.1,<9.0.0']
conda-forge::conda[version='>=23.9.0,<24.0.0']
conda-forge::boto3[version='>=1.28.68,<2.0.0']
conda-forge::sagemaker-python-sdk[version='>=2.194.0,<3.0.0']
conda-forge::supervisor[version='>=4.2.5,<5.0.0']
conda-forge::autogluon[version='>=0.8.2,<1.0.0']
conda-forge::aws-glue-sessions[version='>=1.0.2,<2.0.0']
conda-forge::sagemaker-kernel-wrapper[version='>=0.0.2,<1.0.0']
conda-forge::jupyter-ai[version='>=2.3.0,<3.0.0']
conda-forge::jupyter-scheduler[version='>=2.2.0,<3.0.0']
conda-forge::jupyter_core[version='>=5.3.1,<6.0.0']
conda-forge::jupyter-lsp[version='>=2.2.0,<3.0.0']
conda-forge::python-lsp-server[version='>=1.8.2,<2.0.0']
conda-forge::notebook[version='>=7.0.6,<8.0.0']
conda-forge::altair[version='>=5.1.2,<6.0.0']
conda-forge::sagemaker-studio-analytics-extension[version='>=0.0.21,<1.0.0']
conda-forge::jupyter-dash[version='>=0.4.2,<1.0.0']
15 changes: 15 additions & 0 deletions build_artifacts/v1/v1.1/v1.1.0/patch_glue_pyspark.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
{
"argv": [
"/opt/conda/bin/python",
"-m",
"sagemaker_kernel_wrapper.sm_gis_wrapper",
"-m",
"aws_glue_interactive_sessions_kernel.glue_pyspark.GlueKernel",
"-f",
"{connection_file}"
],
"display_name": "Glue PySpark and Ray",
"env": {"request_origin": "SageMakerStudioPySparkNotebook", "glue_version": "3.0"},
"language": "python"
}

15 changes: 15 additions & 0 deletions build_artifacts/v1/v1.1/v1.1.0/patch_glue_spark.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
{
"argv": [
"/opt/conda/bin/python",
"-m",
"sagemaker_kernel_wrapper.sm_gis_wrapper",
"-m",
"aws_glue_interactive_sessions_kernel.glue_spark.GlueKernel",
"-f",
"{connection_file}"
],
"display_name": "Glue Spark",
"env": {"request_origin": "SageMakerStudioSparkNotebook", "glue_version": "3.0"},
"language": "python"
}

1 change: 1 addition & 0 deletions build_artifacts/v1/v1.1/v1.1.0/source-version.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
1.0.0
2 changes: 1 addition & 1 deletion template/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ RUN usermod "--login=${NB_USER}" "--home=/home/${NB_USER}" --move-home "-u ${NB_
ENV MAMBA_USER=$NB_USER

RUN apt-get update && \
apt-get install -y --no-install-recommends sudo gettext-base wget curl unzip git build-essential && \
apt-get install -y --no-install-recommends sudo gettext-base wget curl unzip git rsync build-essential && \
# We just install tzdata below but leave default time zone as UTC. This helps packages like Pandas to function correctly.
DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends tzdata awscli krb5-user libkrb5-dev && \
chmod g+w /etc/passwd && \
Expand Down

0 comments on commit 2485cdc

Please sign in to comment.