diff --git a/build_artifacts/v0/v0.9/v0.9.0/Dockerfile b/build_artifacts/v0/v0.9/v0.9.0/Dockerfile index f23713a6..675331b0 100644 --- a/build_artifacts/v0/v0.9/v0.9.0/Dockerfile +++ b/build_artifacts/v0/v0.9/v0.9.0/Dockerfile @@ -50,6 +50,21 @@ RUN micromamba install -y --name base --file /tmp/$ENV_IN_FILENAME && \ ARG MAMBA_DOCKERFILE_ACTIVATE=1 RUN sudo ln -s $(which python3) /usr/bin/python +# Install glue kernels, and move to shared directory +# Also patching base kernel so Studio background code doesn't start session silently +RUN install-glue-kernels && \ + SITE_PACKAGES=$(pip show aws-glue-sessions | grep Location | awk '{print $2}') && \ + jupyter-kernelspec install $SITE_PACKAGES/aws_glue_interactive_sessions_kernel/glue_pyspark --user && \ + jupyter-kernelspec install $SITE_PACKAGES/aws_glue_interactive_sessions_kernel/glue_spark --user && \ + mv /home/sagemaker-user/.local/share/jupyter/kernels/glue_pyspark /opt/conda/share/jupyter/kernels && \ + mv /home/sagemaker-user/.local/share/jupyter/kernels/glue_spark /opt/conda/share/jupyter/kernels && \ + sed -i '/if not store_history and (/i\ if "sm_analytics_runtime_check" in code:\n return await self._complete_cell()\n' \ + "$SITE_PACKAGES/aws_glue_interactive_sessions_kernel/glue_kernel_base/BaseKernel.py" + +# Patch glue kernels to use kernel wrapper +COPY patch_glue_pyspark.json /opt/conda/share/jupyter/kernels/glue_pyspark/kernel.json +COPY patch_glue_spark.json /opt/conda/share/jupyter/kernels/glue_spark/kernel.json + USER root RUN HOME_DIR="/home/${NB_USER}/licenses" \ && mkdir -p ${HOME_DIR} \ diff --git a/build_artifacts/v0/v0.9/v0.9.0/cpu.env.in b/build_artifacts/v0/v0.9/v0.9.0/cpu.env.in index d0f1547c..a0ef43a4 100644 --- a/build_artifacts/v0/v0.9/v0.9.0/cpu.env.in +++ b/build_artifacts/v0/v0.9/v0.9.0/cpu.env.in @@ -19,3 +19,5 @@ conda-forge::conda[version='>=23.9.0,<24.0.0'] conda-forge::boto3[version='>=1.28.63,<2.0.0'] conda-forge::sagemaker-python-sdk[version='>=2.189.0,<3.0.0'] conda-forge::sagemaker-studio-analytics-extension[version='>=0.0.21,<1'] +conda-forge::aws-glue-sessions[version='>=1.0.2,<2'] +conda-forge::sagemaker-kernel-wrapper[version='>=0.0.2,<1'] diff --git a/build_artifacts/v0/v0.9/v0.9.0/gpu.env.in b/build_artifacts/v0/v0.9/v0.9.0/gpu.env.in index c7708ad5..e1dcd248 100644 --- a/build_artifacts/v0/v0.9/v0.9.0/gpu.env.in +++ b/build_artifacts/v0/v0.9/v0.9.0/gpu.env.in @@ -19,3 +19,5 @@ conda-forge::conda[version='>=23.9.0,<24.0.0'] conda-forge::boto3[version='>=1.28.63,<2.0.0'] conda-forge::sagemaker-python-sdk[version='>=2.189.0,<3.0.0'] conda-forge::sagemaker-studio-analytics-extension[version='>=0.0.21,<1'] +conda-forge::aws-glue-sessions[version='>=1.0.2,<2'] +conda-forge::sagemaker-kernel-wrapper[version='>=0.0.2,<1'] diff --git a/build_artifacts/v0/v0.9/v0.9.0/patch_glue_pyspark.json b/build_artifacts/v0/v0.9/v0.9.0/patch_glue_pyspark.json new file mode 100644 index 00000000..bbd69d7b --- /dev/null +++ b/build_artifacts/v0/v0.9/v0.9.0/patch_glue_pyspark.json @@ -0,0 +1,14 @@ +{ + "argv": [ + "/opt/conda/bin/python", + "-m", + "sagemaker_kernel_wrapper.sm_gis_wrapper", + "-m", + "aws_glue_interactive_sessions_kernel.glue_pyspark.GlueKernel", + "-f", + "{connection_file}" + ], + "display_name": "Glue PySpark and Ray", + "env": {"request_origin": "SageMakerStudioPySparkNotebook", "glue_version": "3.0"}, + "language": "python" +} diff --git a/build_artifacts/v0/v0.9/v0.9.0/patch_glue_spark.json b/build_artifacts/v0/v0.9/v0.9.0/patch_glue_spark.json new file mode 100644 index 00000000..b759fc9a --- /dev/null +++ b/build_artifacts/v0/v0.9/v0.9.0/patch_glue_spark.json @@ -0,0 +1,14 @@ +{ + "argv": [ + "/opt/conda/bin/python", + "-m", + "sagemaker_kernel_wrapper.sm_gis_wrapper", + "-m", + "aws_glue_interactive_sessions_kernel.glue_spark.GlueKernel", + "-f", + "{connection_file}" + ], + "display_name": "Glue Spark", + "env": {"request_origin": "SageMakerStudioSparkNotebook", "glue_version": "3.0"}, + "language": "python" +} diff --git a/template/Dockerfile b/template/Dockerfile index 93d63a3c..0e3e9b3e 100644 --- a/template/Dockerfile +++ b/template/Dockerfile @@ -55,12 +55,16 @@ ARG MAMBA_DOCKERFILE_ACTIVATE=1 RUN sudo ln -s $(which python3) /usr/bin/python # Install glue kernels, and move to shared directory +# Also patching base kernel so Studio background code doesn't start session silently RUN install-glue-kernels && \ SITE_PACKAGES=$(pip show aws-glue-sessions | grep Location | awk '{print $2}') && \ jupyter-kernelspec install $SITE_PACKAGES/aws_glue_interactive_sessions_kernel/glue_pyspark --user && \ jupyter-kernelspec install $SITE_PACKAGES/aws_glue_interactive_sessions_kernel/glue_spark --user && \ mv /home/sagemaker-user/.local/share/jupyter/kernels/glue_pyspark /opt/conda/share/jupyter/kernels && \ - mv /home/sagemaker-user/.local/share/jupyter/kernels/glue_spark /opt/conda/share/jupyter/kernels + mv /home/sagemaker-user/.local/share/jupyter/kernels/glue_spark /opt/conda/share/jupyter/kernels && \ + sed -i '/if not store_history and (/i\ if "sm_analytics_runtime_check" in code:\n return await self._complete_cell()\n' \ + "$SITE_PACKAGES/aws_glue_interactive_sessions_kernel/glue_kernel_base/BaseKernel.py" + # Patch glue kernels to use kernel wrapper COPY patch_glue_pyspark.json /opt/conda/share/jupyter/kernels/glue_pyspark/kernel.json diff --git a/test/test_artifacts/v0/aws-glue-sessions/glue_notebook.ipynb b/test/test_artifacts/v0/aws-glue-sessions/glue_notebook.ipynb new file mode 100644 index 00000000..b491f310 --- /dev/null +++ b/test/test_artifacts/v0/aws-glue-sessions/glue_notebook.ipynb @@ -0,0 +1,91 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "id": "1a710e7c-7ebf-477a-88b5-3d85cb08cf19", + "metadata": {}, + "outputs": [], + "source": [ + "%status" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "0ce599e8-6dcc-42c4-b10d-8e4e898eb436", + "metadata": {}, + "outputs": [], + "source": [ + "%stop_session" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "013565d2-26dc-4710-83ca-1d00711be6c9", + "metadata": {}, + "outputs": [], + "source": [ + "%glue_ray" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e81bb7c2-bec2-4c4b-8d4d-59bf5e6a9daf", + "metadata": {}, + "outputs": [], + "source": [ + "%etl" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "7a0b5de5-bf14-40f9-a944-f98e5a96e0f4", + "metadata": {}, + "outputs": [], + "source": [ + "%streaming" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "cf24f505-6f26-447e-acc3-4af4556bb386", + "metadata": {}, + "outputs": [], + "source": [ + "%help" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "33149d30-420e-4ebf-b32c-ca635db7cb10", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Glue PySpark and Ray", + "language": "python", + "name": "glue_pyspark" + }, + "language_info": { + "codemirror_mode": { + "name": "python", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "Python_Glue_Session", + "pygments_lexer": "python3" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/test/test_artifacts/v0/aws-glue-sessions/run_glue_sessions_notebook.sh b/test/test_artifacts/v0/aws-glue-sessions/run_glue_sessions_notebook.sh new file mode 100644 index 00000000..fbf5faf2 --- /dev/null +++ b/test/test_artifacts/v0/aws-glue-sessions/run_glue_sessions_notebook.sh @@ -0,0 +1,11 @@ +#!/bin/bash + +# Create an empty notebook file for papermill's output +touch nb_output.ipynb + +kernels=('glue_pyspark' 'glue_spark') +nb='script' +for kernel in ${kernels[@]}; do + papermill 'glue_notebook.ipynb' 'nb_output.ipynb' -k $kernel +done + diff --git a/test/test_artifacts/v0/glue-sessions.test.Dockerfile b/test/test_artifacts/v0/glue-sessions.test.Dockerfile new file mode 100644 index 00000000..46606021 --- /dev/null +++ b/test/test_artifacts/v0/glue-sessions.test.Dockerfile @@ -0,0 +1,11 @@ +ARG COSMOS_IMAGE +FROM $COSMOS_IMAGE + +ARG MAMBA_DOCKERFILE_ACTIVATE=1 + +COPY --chown=$MAMBA_USER:$MAMBA_USER aws-glue-sessions/ . +RUN chmod +x run_glue_sessions_notebook.sh + +RUN micromamba install -y --freeze-installed -c conda-forge papermill + +CMD ["./run_glue_sessions_notebook.sh"] diff --git a/test/test_dockerfile_based_harness.py b/test/test_dockerfile_based_harness.py index 6bce7884..2bac78b3 100644 --- a/test/test_dockerfile_based_harness.py +++ b/test/test_dockerfile_based_harness.py @@ -54,6 +54,7 @@ def test_dockerfiles_for_cpu(dockerfile_path: str, required_packages: List[str], ("sm-python-sdk.test.Dockerfile", ['sagemaker-python-sdk']), ("pytorch.examples.Dockerfile", ['pytorch']), ("tensorflow.examples.Dockerfile", ['tensorflow']), + ("glue-sessions.test.Dockerfile", ['aws-glue-sessions']), ("jupyter-ai.test.Dockerfile", ['jupyter-ai']), ("jupyter-dash.test.Dockerfile", ['jupyter-dash']), ("jupyter-lsp.test.Dockerfile", ['jupyter-lsp']),