diff --git a/custom_image_utils/shell_script_generator.py b/custom_image_utils/shell_script_generator.py index c84f7f1..89730c3 100644 --- a/custom_image_utils/shell_script_generator.py +++ b/custom_image_utils/shell_script_generator.py @@ -30,16 +30,27 @@ base_obj_type="images" +function execute_with_retries() ( + set +x + local -r cmd="$*" + + for ((i = 0; i < 3; i++)); do + if eval "$cmd"; then return 0 ; fi + sleep 5 + done + return 1 +) + function exit_handler() {{ echo 'Cleaning up before exiting.' if [[ -f /tmp/{run_id}/vm_created ]]; then echo 'Deleting VM instance.' - gcloud compute instances delete {image_name}-install \ + execute_with_retries gcloud compute instances delete {image_name}-install \ --project={project_id} --zone={zone} -q elif [[ -f /tmp/{run_id}/disk_created ]]; then echo 'Deleting disk.' - gcloud compute ${{base_obj_type}} delete {image_name}-install --project={project_id} --zone={zone} -q + execute_with_retries gcloud compute ${{base_obj_type}} delete {image_name}-install --project={project_id} --zone={zone} -q fi echo 'Uploading local logs to GCS bucket.' @@ -99,6 +110,7 @@ done local cert_args="" + local num_src_certs="0" if [[ -n '{trusted_cert}' ]] && [[ -f '{trusted_cert}' ]]; then # build tls/ directory from variables defined near the header of # the examples/secure-boot/create-key-pair.sh file @@ -124,9 +136,9 @@ local -a src_img_modulus_md5sums=() mapfile -t src_img_modulus_md5sums < <(print_img_dbs_modulus_md5sums {dataproc_base_image}) - local num_src_certs="${{#src_img_modulus_md5sums[@]}}" + num_src_certs="${{#src_img_modulus_md5sums[@]}}" echo "${{num_src_certs}} db certificates attached to source image" - if [[ ${{num_src_certs}} -eq 0 ]]; then + if [[ "${{num_src_certs}}" -eq "0" ]]; then echo "no db certificates in source image" cert_list=default_cert_list else @@ -153,7 +165,7 @@ fi date - set -x + if [[ -z "${{cert_args}}" && "${{num_src_certs}}" -ne "0" ]]; then echo 'Re-using base image' base_obj_type="reuse" @@ -163,7 +175,7 @@ echo 'Creating image.' base_obj_type="images" instance_disk_args='--image-project={project_id} --image={image_name}-install --boot-disk-size={disk_size}G --boot-disk-type=pd-ssd' - time gcloud compute images create {image_name}-install \ + time execute_with_retries gcloud compute images create {image_name}-install \ --project={project_id} \ --source-image={dataproc_base_image} \ ${{cert_args}} \ @@ -174,7 +186,7 @@ echo 'Creating disk.' base_obj_type="disks" instance_disk_args='--disk=auto-delete=yes,boot=yes,mode=rw,name={image_name}-install' - time gcloud compute disks create {image_name}-install \ + time execute_with_retries gcloud compute disks create {image_name}-install \ --project={project_id} \ --zone={zone} \ --image={dataproc_base_image} \ @@ -182,12 +194,11 @@ --size={disk_size}GB touch "/tmp/{run_id}/disk_created" fi - set +x date echo 'Creating VM instance to run customization script.' - set -x - time gcloud compute instances create {image_name}-install \ + ( set -x + time execute_with_retries gcloud compute instances create {image_name}-install \ --project={project_id} \ --zone={zone} \ {network_flag} \ @@ -199,24 +210,23 @@ {service_account_flag} \ --scopes=cloud-platform \ {metadata_flag} \ - --metadata-from-file startup-script=startup_script/run.sh - set +x + --metadata-from-file startup-script=startup_script/run.sh ) touch /tmp/{run_id}/vm_created # clean up intermediate install image if [[ "${{base_obj_type}}" == "images" ]] ; then - gcloud compute images delete -q {image_name}-install --project={project_id} + execute_with_retries gcloud compute images delete -q {image_name}-install --project={project_id} fi echo 'Waiting for customization script to finish and VM shutdown.' - gcloud compute instances tail-serial-port-output {image_name}-install \ + execute_with_retries gcloud compute instances tail-serial-port-output {image_name}-install \ --project={project_id} \ --zone={zone} \ --port=1 2>&1 \ | grep 'startup-script' \ | sed -e 's/ {image_name}-install.*startup-script://g' \ - | dd bs=64 of={log_dir}/startup-script.log \ + | dd bs=1 of={log_dir}/startup-script.log \ || true echo 'Checking customization script result.' date @@ -233,14 +243,13 @@ date echo 'Creating custom image.' - set -x - time gcloud compute images create {image_name} \ + ( set -x + time execute_with_retries gcloud compute images create {image_name} \ --project={project_id} \ --source-disk-zone={zone} \ --source-disk={image_name}-install \ {storage_location_flag} \ - --family={family} - set +x + --family={family} ) touch /tmp/{run_id}/image_created }} diff --git a/examples/secure-boot/README.md b/examples/secure-boot/README.md index 81648aa..4807eb7 100644 --- a/examples/secure-boot/README.md +++ b/examples/secure-boot/README.md @@ -52,8 +52,8 @@ in the file examples/secure-boot/env.json.sample. ```bash cp examples/secure-boot/env.json.sample env.json vi env.json -docker build -t dataproc-custom-images:latest . -docker run -it dataproc-custom-images:latest /bin/bash examples/secure-boot/cuda.sh +docker build -t dataproc-cuda-pre-init:latest . +docker run -it dataproc-cuda-pre-init:latest /bin/bash examples/secure-boot/cuda.sh ``` To do the same, but for all dataproc variants including supported @@ -64,6 +64,6 @@ script can be run in docker: ```bash cp examples/secure-boot/env.json.sample env.json vi env.json -docker build -t dataproc-custom-images:latest . -docker run -it dataproc-custom-images:latest /bin/bash examples/secure-boot/build-current-images.sh +docker build -t dataproc-dask-rapids-pre-init:latest . +docker run -it dataproc-dask-rapids-pre-init:latest /bin/bash examples/secure-boot/build-current-images.sh ``` diff --git a/examples/secure-boot/build-current-images.sh b/examples/secure-boot/build-current-images.sh index cd7bafa..0d7846d 100644 --- a/examples/secure-boot/build-current-images.sh +++ b/examples/secure-boot/build-current-images.sh @@ -15,7 +15,7 @@ # # This script creates a custom image pre-loaded with cuda -set -e +set -ex function configure_service_account() { # Create service account @@ -84,25 +84,50 @@ configure_service_account # screen session name session_name="build-current-images" -# Run all image generation scripts simultaneously +readonly timestamp="$(date +%F-%H-%M)" +#readonly timestamp="2024-10-24-04-21" +export timestamp + +export tmpdir=/tmp/${timestamp}; +mkdir ${tmpdir} +export ZONE="$(jq -r .ZONE env.json)" +gcloud compute instances list --zones "${ZONE}" --format json > ${tmpdir}/instances.json +gcloud compute images list --format json > ${tmpdir}/images.json + +# Run generation scripts simultaneously for each dataproc image version screen -US "${session_name}" -c examples/secure-boot/pre-init.screenrc -# tail -n 3 /tmp/custom-image-cuda-pre-init-2-*/logs/workflow.log -# grep -A6 'Filesystem.*Avail' /tmp/custom-image-cuda-pre-init-2-*/logs/startup-script.log | perl -ne 'print $1,$/ if( m:( Filesystem.* Avail.*| /dev/.*/\s*$|^--): )' +# tail -n 3 /tmp/custom-image-*/logs/workflow.log +# tail -n 3 /tmp/custom-image-*/logs/startup-script.log +# tail -n 3 /tmp/custom-image-${PURPOSE}-2-*/logs/workflow.log +function find_disk_usage() { + test -f /tmp/genline.pl || cat > /tmp/genline.pl<<'EOF' +#!/usr/bin/perl -w +use strict; + +my $fn = $ARGV[0]; +my( $config ) = ( $fn =~ /custom-image-(.*-(debian|rocky|ubuntu)\d+)-\d+/ ); + +my @raw_lines = ; +my( $l ) = grep { m: /dev/.*/\s*$: } @raw_lines; +my( $stats ) = ( $l =~ m:\s*/dev/\S+\s+(.*?)\s*$: ); + +my( $dp_version ) = ($config =~ /-pre-init-(.+)/); +$dp_version =~ s/-/./; + +my($max) = map { / maximum-disk-used: (\d+)/ } @raw_lines; +$max+=3; +my $i_dp_version = sprintf(q{%-15s}, qq{"$dp_version"}); + +print( qq{ $i_dp_version) disk_size_gb="$max" ;; # $stats # $config}, $/ ); +EOF + for f in $(grep -l 'Customization script suc' /tmp/custom-image-*/logs/workflow.log|sed -e 's/workflow.log/startup-script.log/') + do + grep -A20 'Filesystem.*Avail' $f | perl /tmp/genline.pl $f + done +} -revoke_bindings +# sleep 8m ; grep 'Customization script' /tmp/custom-image-*/logs/workflow.log +# grep maximum-disk-used /tmp/custom-image-*/logs/startup-script.log -# -# disk size - 20241009 -# -# Filesystem Size Used Avail Use% Mounted on - -# /dev/sda1 40G 29G 9.1G 76% / # 2.0-debian10 -# /dev/sda2 33G 30G 3.4G 90% / # 2.0-rocky8 -# /dev/sda1 36G 29G 7.0G 81% / # 2.0-ubuntu18 -# /dev/sda1 40G 35G 2.7G 93% / # 2.1-debian11 -# /dev/sda2 36G 33G 3.4G 91% / # 2.1-rocky8 -# /dev/root 36G 34G 2.1G 95% / # 2.1-ubuntu20 -# /dev/sda1 40G 37G 1.1G 98% / # 2.2-debian12 -# /dev/sda2 54G 34G 21G 63% / # 2.2-rocky9 -# /dev/root 39G 37G 2.4G 94% / # 2.2-ubuntu22 +revoke_bindings diff --git a/examples/secure-boot/dask.sh b/examples/secure-boot/dask.sh index 946608d..e1c1229 100644 --- a/examples/secure-boot/dask.sh +++ b/examples/secure-boot/dask.sh @@ -22,10 +22,10 @@ set -euxo pipefail function os_id() { grep '^ID=' /etc/os-release | cut -d= -f2 | xargs ; } -function os_version() { grep '^VERSION_ID=' /etc/os-release | cut -d= -f2 | xargs ; } -function os_codename() { grep '^VERSION_CODENAME=' /etc/os-release | cut -d= -f2 | xargs ; } function is_ubuntu() { [[ "$(os_id)" == 'ubuntu' ]] ; } function is_ubuntu18() { is_ubuntu && [[ "$(os_version)" == '18.04'* ]] ; } +function is_debian() { [[ "$(os_id)" == 'debian' ]] ; } +function is_debuntu() { is_debian || is_ubuntu ; } function print_metadata_value() { local readonly tmpfile=$(mktemp) @@ -64,52 +64,34 @@ function get_metadata_value() { return ${return_code} } -function get_metadata_attribute() { +function get_metadata_attribute() ( set +x local -r attribute_name="$1" local -r default_value="${2:-}" get_metadata_value "attributes/${attribute_name}" || echo -n "${default_value}" - set -x -} +) -readonly DEFAULT_CUDA_VERSION="12.4" -readonly CUDA_VERSION=$(get_metadata_attribute 'cuda-version' ${DEFAULT_CUDA_VERSION}) function is_cuda12() { [[ "${CUDA_VERSION%%.*}" == "12" ]] ; } function is_cuda11() { [[ "${CUDA_VERSION%%.*}" == "11" ]] ; } -readonly DASK_RUNTIME="$(get_metadata_attribute dask-runtime || echo 'standalone')" - -# Dask 'standalone' config -readonly DASK_SERVICE=dask-cluster -readonly DASK_WORKER_SERVICE=dask-worker -readonly DASK_SCHEDULER_SERVICE=dask-scheduler - -readonly KNOX_HOME=/usr/lib/knox -readonly KNOX_DASK_DIR="${KNOX_HOME}/data/services/dask/0.1.0" -readonly KNOX_DASKWS_DIR="${KNOX_HOME}/data/services/daskws/0.1.0" - function execute_with_retries() { - local -r cmd=$1 - for ((i = 0; i < 10; i++)); do + local -r cmd="$*" + for i in {0..9} ; do if eval "$cmd"; then - return 0 - fi + return 0 ; fi sleep 5 done echo "Cmd '${cmd}' failed." return 1 } -DASK_CONDA_ENV="/opt/conda/miniconda3/envs/dask" - function configure_dask_yarn() { readonly DASK_YARN_CONFIG_DIR=/etc/dask/ readonly DASK_YARN_CONFIG_FILE=${DASK_YARN_CONFIG_DIR}/config.yaml - dask_python="$(realpath "${DASK_CONDA_ENV}/bin/python")" # Minimal custom configuration is required for this # setup. Please see https://yarn.dask.org/en/latest/quickstart.html#usage # for information on tuning Dask-Yarn environments. - mkdir -p ${DASK_YARN_CONFIG_DIR} + mkdir -p "${DASK_YARN_CONFIG_DIR}" cat <"${DASK_YARN_CONFIG_FILE}" # Config file for Dask Yarn. @@ -118,16 +100,13 @@ function configure_dask_yarn() { # https://yarn.dask.org/en/latest/configuration.html#default-configuration yarn: - environment: python://${dask_python} + environment: python://${DASK_CONDA_ENV}/bin/python worker: count: 2 EOF } -enable_worker_service="0" -ROLE="$(get_metadata_attribute dataproc-role)" -MASTER="$(get_metadata_attribute dataproc-master)" function install_systemd_dask_worker() { echo "Installing systemd Dask Worker service..." local -r dask_worker_local_dir="/tmp/${DASK_WORKER_SERVICE}" @@ -164,17 +143,18 @@ EOF if [[ "${ROLE}" != "Master" ]]; then enable_worker_service="1" else - local RUN_WORKER_ON_MASTER="$(get_metadata_attribute dask-worker-on-master || echo 'true')" + local RUN_WORKER_ON_MASTER="$(get_metadata_attribute dask-worker-on-master 'true')" # Enable service on single-node cluster (no workers) local worker_count="$(get_metadata_attribute dataproc-worker-count)" - if [[ "${worker_count}" == "0" ]]; then RUN_WORKER_ON_MASTER='true'; fi - - if [[ "${RUN_WORKER_ON_MASTER}" == "true" ]]; then + if [[ "${worker_count}" == "0" || "${RUN_WORKER_ON_MASTER}" == "true" ]]; then enable_worker_service="1" fi fi - if [[ "${enable_worker_service}" == "1" ]]; then systemctl enable "${DASK_WORKER_SERVICE}" ; fi + if [[ "${enable_worker_service}" == "1" ]]; then + systemctl enable "${DASK_WORKER_SERVICE}" + systemctl restart "${DASK_WORKER_SERVICE}" + fi } function install_systemd_dask_scheduler() { @@ -185,7 +165,6 @@ function install_systemd_dask_scheduler() { mkdir -p "${dask_scheduler_local_dir}" - local DASK_SCHEDULER_LAUNCHER="/usr/local/bin/${DASK_SCHEDULER_SERVICE}-launcher.sh" cat <"${DASK_SCHEDULER_LAUNCHER}" @@ -234,9 +213,11 @@ function configure_knox_for_dask() { fi local DASK_UI_PORT=8787 - sed -i \ - "/<\/topology>/i DASK<\/role>http://localhost:${DASK_UI_PORT}<\/url><\/service> DASKWS<\/role>ws:\/\/${MASTER}:${DASK_UI_PORT}<\/url><\/service>" \ - /etc/knox/conf/topologies/default.xml + if [[ -f /etc/knox/conf/topologies/default.xml ]]; then + sed -i \ + "/<\/topology>/i DASK<\/role>http://localhost:${DASK_UI_PORT}<\/url><\/service> DASKWS<\/role>ws:\/\/${MASTER}:${DASK_UI_PORT}<\/url><\/service>" \ + /etc/knox/conf/topologies/default.xml + fi mkdir -p "${KNOX_DASK_DIR}" @@ -378,10 +359,12 @@ EOF chown -R knox:knox "${KNOX_DASK_DIR}" "${KNOX_DASKWS_DIR}" - restart_knox + # Do not restart knox during pre-init script run + if [[ -n "${ROLE}" ]]; then + restart_knox + fi } - function configure_fluentd_for_dask() { if [[ "$(hostname -s)" == "${MASTER}" ]]; then cat >/etc/google-fluentd/config.d/dataproc-dask.conf < /dev/null 2>&1 + local retval=$? + sync + if [[ "$retval" == "0" ]] ; then is_installed="1" break - else - "${conda}" config --set channel_priority flexible fi + "${conda}" config --set channel_priority flexible done if [[ "${is_installed}" == "0" ]]; then echo "failed to install dask" return 1 fi + ) } function main() { + # Install Dask install_dask + # In "standalone" mode, Dask relies on a systemd unit to launch. + # In "yarn" mode, it relies a config.yaml file. if [[ "${DASK_RUNTIME}" == "yarn" ]]; then # Create Dask YARN config file configure_dask_yarn @@ -529,7 +517,121 @@ function main() { echo "Dask for ${DASK_RUNTIME} successfully initialized." } +function exit_handler() ( + set +e + echo "Exit handler invoked" -main + # Free conda cache + /opt/conda/miniconda3/bin/conda clean -a > /dev/null 2>&1 + + # Clear pip cache + pip cache purge || echo "unable to purge pip cache" -df -h + # remove the tmpfs conda pkgs_dirs + if [[ -d /mnt/shm ]] ; then /opt/conda/miniconda3/bin/conda config --remove pkgs_dirs /mnt/shm ; fi + + # Clean up shared memory mounts + for shmdir in /var/cache/apt/archives /var/cache/dnf /mnt/shm ; do + if grep -q "^tmpfs ${shmdir}" /proc/mounts ; then + rm -rf ${shmdir}/* + umount -f ${shmdir} + fi + done + + # Clean up OS package cache ; re-hold systemd package + if is_debuntu ; then + apt-get -y -qq clean + apt-get -y -qq autoremove + else + dnf clean all + fi + + # print disk usage statistics + if is_debuntu ; then + # Rocky doesn't have sort -h and fails when the argument is passed + du --max-depth 3 -hx / | sort -h | tail -10 + fi + + # Process disk usage logs from installation period + rm -f /tmp/keep-running-df + sleep 6s + # compute maximum size of disk during installation + # Log file contains logs like the following (minus the preceeding #): +#Filesystem Size Used Avail Use% Mounted on +#/dev/vda2 6.8G 2.5G 4.0G 39% / + df --si + perl -e '$max=( sort + map { (split)[2] =~ /^(\d+)/ } + grep { m:^/: } )[-1]; +print( "maximum-disk-used: $max", $/ );' < /tmp/disk-usage.log + + echo "exit_handler has completed" + + # zero free disk space + if [[ -n "$(get_metadata_attribute creating-image)" ]]; then + dd if=/dev/zero of=/zero ; sync ; rm -f /zero + fi + + return 0 +) + +trap exit_handler EXIT + +function prepare_to_install() { + readonly DEFAULT_CUDA_VERSION="12.4" + CUDA_VERSION=$(get_metadata_attribute 'cuda-version' ${DEFAULT_CUDA_VERSION}) + readonly CUDA_VERSION + + readonly ROLE=$(get_metadata_attribute dataproc-role) + readonly MASTER=$(get_metadata_attribute dataproc-master) + + # Dask config + DASK_RUNTIME="$(get_metadata_attribute dask-runtime || echo 'standalone')" + readonly DASK_RUNTIME + readonly DASK_SERVICE=dask-cluster + readonly DASK_WORKER_SERVICE=dask-worker + readonly DASK_SCHEDULER_SERVICE=dask-scheduler + readonly DASK_CONDA_ENV="/opt/conda/miniconda3/envs/dask" + + # Knox config + readonly KNOX_HOME=/usr/lib/knox + readonly KNOX_DASK_DIR="${KNOX_HOME}/data/services/dask/0.1.0" + readonly KNOX_DASKWS_DIR="${KNOX_HOME}/data/services/daskws/0.1.0" + enable_worker_service="0" + + free_mem="$(awk '/^MemFree/ {print $2}' /proc/meminfo)" + # Write to a ramdisk instead of churning the persistent disk + if [[ ${free_mem} -ge 5250000 ]]; then + mkdir -p /mnt/shm + mount -t tmpfs tmpfs /mnt/shm + + # Download conda packages to tmpfs + /opt/conda/miniconda3/bin/conda config --add pkgs_dirs /mnt/shm + mount -t tmpfs tmpfs /mnt/shm + + # Download pip packages to tmpfs + pip config set global.cache-dir /mnt/shm || echo "unable to set global.cache-dir" + + # Download OS packages to tmpfs + if is_debuntu ; then + mount -t tmpfs tmpfs /var/cache/apt/archives + else + mount -t tmpfs tmpfs /var/cache/dnf + fi + fi + + # Monitor disk usage in a screen session + if is_debuntu ; then + apt-get install -y -qq screen + elif is_rocky ; then + dnf -y -q install screen + fi + rm -f /tmp/disk-usage.log + touch /tmp/keep-running-df + screen -d -m -US keep-running-df \ + bash -c 'while [[ -f /tmp/keep-running-df ]] ; do df --si / | tee -a /tmp/disk-usage.log ; sleep 5s ; done' +} + +prepare_to_install + +main diff --git a/examples/secure-boot/install_gpu_driver.sh b/examples/secure-boot/install_gpu_driver.sh index d8d38d0..c0129dc 100644 --- a/examples/secure-boot/install_gpu_driver.sh +++ b/examples/secure-boot/install_gpu_driver.sh @@ -16,24 +16,26 @@ set -euxo pipefail -function os_id() { grep '^ID=' /etc/os-release | cut -d= -f2 | xargs ; } -function os_version() { grep '^VERSION_ID=' /etc/os-release | cut -d= -f2 | xargs ; } -function os_codename() { grep '^VERSION_CODENAME=' /etc/os-release | cut -d= -f2 | xargs ; } -function is_rocky() { [[ "$(os_id)" == 'rocky' ]] ; } -function is_rocky8() { is_rocky && [[ "$(os_version)" == '8'* ]] ; } -function is_rocky9() { is_rocky && [[ "$(os_version)" == '9'* ]] ; } -function is_ubuntu() { [[ "$(os_id)" == 'ubuntu' ]] ; } -function is_ubuntu18() { is_ubuntu && [[ "$(os_version)" == '18.04'* ]] ; } -function is_ubuntu20() { is_ubuntu && [[ "$(os_version)" == '20.04'* ]] ; } -function is_ubuntu22() { is_ubuntu && [[ "$(os_version)" == '22.04'* ]] ; } -function is_debian() { [[ "$(os_id)" == 'debian' ]] ; } -function is_debian10() { is_debian && [[ "$(os_version)" == '10'* ]] ; } -function is_debian11() { is_debian && [[ "$(os_version)" == '11'* ]] ; } -function is_debian12() { is_debian && [[ "$(os_version)" == '12'* ]] ; } -function os_vercat() { set +x +function os_id() ( set +x ; grep '^ID=' /etc/os-release | cut -d= -f2 | xargs ; ) +function os_version() ( set +x ; grep '^VERSION_ID=' /etc/os-release | cut -d= -f2 | xargs ; ) +function os_codename() ( set +x ; grep '^VERSION_CODENAME=' /etc/os-release | cut -d= -f2 | xargs ; ) +function is_rocky() ( set +x ; [[ "$(os_id)" == 'rocky' ]] ; ) +function is_rocky8() ( set +x ; is_rocky && [[ "$(os_version)" == '8'* ]] ; ) +function is_rocky9() ( set +x ; is_rocky && [[ "$(os_version)" == '9'* ]] ; ) +function is_ubuntu() ( set +x ; [[ "$(os_id)" == 'ubuntu' ]] ; ) +function is_ubuntu18() ( set +x ; is_ubuntu && [[ "$(os_version)" == '18.04'* ]] ; ) +function is_ubuntu20() ( set +x ; is_ubuntu && [[ "$(os_version)" == '20.04'* ]] ; ) +function is_ubuntu22() ( set +x ; is_ubuntu && [[ "$(os_version)" == '22.04'* ]] ; ) +function is_debian() ( set +x ; [[ "$(os_id)" == 'debian' ]] ; ) +function is_debian10() ( set +x ; is_debian && [[ "$(os_version)" == '10'* ]] ; ) +function is_debian11() ( set +x ; is_debian && [[ "$(os_version)" == '11'* ]] ; ) +function is_debian12() ( set +x ; is_debian && [[ "$(os_version)" == '12'* ]] ; ) +function is_debuntu() ( set +x ; is_debian || is_ubuntu ; ) + +function os_vercat() ( set +x if is_ubuntu ; then os_version | sed -e 's/[^0-9]//g' elif is_rocky ; then os_version | sed -e 's/[^0-9].*$//g' - else os_version ; fi ; set -x ; } + else os_version ; fi ; ) function remove_old_backports { if is_debian12 ; then return ; fi @@ -60,9 +62,9 @@ function remove_old_backports { function compare_versions_lte { [ "$1" = "$(echo -e "$1\n$2" | sort -V | head -n1)" ] ; } # Return true if the first argument is less than the second argument -function compare_versions_lt() { +function compare_versions_lt() ( set +x [ "$1" = "$2" ] && return 1 || compare_versions_lte $1 $2 -} +) function print_metadata_value() { local readonly tmpfile=$(mktemp) @@ -85,7 +87,7 @@ function print_metadata_value_if_exists() { return ${return_code} } -function get_metadata_value() { +function get_metadata_value() ( set +x local readonly varname=$1 local -r MDS_PREFIX=http://metadata.google.internal/computeMetadata/v1 @@ -97,17 +99,16 @@ function get_metadata_value() { print_metadata_value_if_exists ${MDS_PREFIX}/project/${varname} return_code=$? fi - set -x + return ${return_code} -} +) -function get_metadata_attribute() { +function get_metadata_attribute() ( set +x local -r attribute_name="$1" local -r default_value="${2:-}" get_metadata_value "attributes/${attribute_name}" || echo -n "${default_value}" - set -x -} +) OS_NAME=$(lsb_release -is | tr '[:upper:]' '[:lower:]') distribution=$(. /etc/os-release;echo $ID$VERSION_ID) @@ -142,8 +143,8 @@ CUDA_VERSION=$(get_metadata_attribute 'cuda-version' "${DEFAULT_CUDA_VERSION}") readonly CUDA_VERSION readonly CUDA_FULL_VERSION="${CUDA_SUBVER["${CUDA_VERSION}"]}" -function is_cuda12() { [[ "${CUDA_VERSION%%.*}" == "12" ]] ; } -function is_cuda11() { [[ "${CUDA_VERSION%%.*}" == "11" ]] ; } +function is_cuda12() ( set +x ; [[ "${CUDA_VERSION%%.*}" == "12" ]] ; ) +function is_cuda11() ( set +x ; [[ "${CUDA_VERSION%%.*}" == "11" ]] ; ) readonly DEFAULT_DRIVER=${DRIVER_FOR_CUDA["${CUDA_VERSION}"]} DRIVER_VERSION=$(get_metadata_attribute 'gpu-driver-version' "${DEFAULT_DRIVER}") if is_debian11 || is_ubuntu22 || is_ubuntu20 ; then DRIVER_VERSION="560.28.03" ; fi @@ -155,8 +156,8 @@ readonly DRIVER=${DRIVER_VERSION%%.*} # Parameters for NVIDIA-provided CUDNN library readonly DEFAULT_CUDNN_VERSION=${CUDNN_FOR_CUDA["${CUDA_VERSION}"]} CUDNN_VERSION=$(get_metadata_attribute 'cudnn-version' "${DEFAULT_CUDNN_VERSION}") -function is_cudnn8() { [[ "${CUDNN_VERSION%%.*}" == "8" ]] ; } -function is_cudnn9() { [[ "${CUDNN_VERSION%%.*}" == "9" ]] ; } +function is_cudnn8() ( set +x ; [[ "${CUDNN_VERSION%%.*}" == "8" ]] ; ) +function is_cudnn9() ( set +x ; [[ "${CUDNN_VERSION%%.*}" == "9" ]] ; ) if is_rocky \ && (compare_versions_lte "${CUDNN_VERSION}" "8.0.5.39") ; then CUDNN_VERSION="8.0.5.39" @@ -258,16 +259,19 @@ NVIDIA_SMI_PATH='/usr/bin' MIG_MAJOR_CAPS=0 IS_MIG_ENABLED=0 -function execute_with_retries() { +function execute_with_retries() ( set +x local -r cmd="$*" + + if [[ "$cmd" =~ "^apt-get install" ]] ; then + cmd="apt-get -y clean && $cmd" + fi for ((i = 0; i < 3; i++)); do - if eval "$cmd"; then set -x ; return 0 ; fi + if eval "$cmd" ; then return 0 ; fi sleep 5 done - set -x return 1 -} +) CUDA_KEYRING_PKG_INSTALLED="0" function install_cuda_keyring_pkg() { @@ -275,9 +279,9 @@ function install_cuda_keyring_pkg() { local kr_ver=1.1 curl -fsSL --retry-connrefused --retry 10 --retry-max-time 30 \ "${NVIDIA_REPO_URL}/cuda-keyring_${kr_ver}-1_all.deb" \ - -o /tmp/cuda-keyring.deb - dpkg -i "/tmp/cuda-keyring.deb" - rm -f "/tmp/cuda-keyring.deb" + -o "${download_dir}/cuda-keyring.deb" + dpkg -i "${download_dir}/cuda-keyring.deb" + rm -f "${download_dir}/cuda-keyring.deb" CUDA_KEYRING_PKG_INSTALLED="1" } @@ -297,10 +301,10 @@ function install_local_cuda_repo() { readonly DIST_KEYRING_DIR="/var/${pkgname}" curl -fsSL --retry-connrefused --retry 3 --retry-max-time 5 \ - "${LOCAL_DEB_URL}" -o "/tmp/${LOCAL_INSTALLER_DEB}" + "${LOCAL_DEB_URL}" -o "${download_dir}/${LOCAL_INSTALLER_DEB}" - dpkg -i "/tmp/${LOCAL_INSTALLER_DEB}" - rm "/tmp/${LOCAL_INSTALLER_DEB}" + dpkg -i "${download_dir}/${LOCAL_INSTALLER_DEB}" + rm "${download_dir}/${LOCAL_INSTALLER_DEB}" cp ${DIST_KEYRING_DIR}/cuda-*-keyring.gpg /usr/share/keyrings/ if is_ubuntu ; then @@ -325,11 +329,11 @@ function install_local_cudnn_repo() { # ${NVIDIA_BASE_DL_URL}/redist/cudnn/v8.6.0/local_installers/11.8/cudnn-linux-x86_64-8.6.0.163_cuda11-archive.tar.xz curl -fsSL --retry-connrefused --retry 3 --retry-max-time 5 \ - "${local_deb_url}" -o /tmp/local-installer.deb + "${local_deb_url}" -o "${download_dir}/local-installer.deb" - dpkg -i /tmp/local-installer.deb + dpkg -i "${download_dir}/local-installer.deb" - rm -f /tmp/local-installer.deb + rm -f "${download_dir}/local-installer.deb" cp /var/cudnn-local-repo-*-${CUDNN}*/cudnn-local-*-keyring.gpg /usr/share/keyrings @@ -356,8 +360,9 @@ function install_local_cudnn8_repo() { pkgname="cudnn-local-repo-${cudnn8_shortname}-${CUDNN_VERSION}" CUDNN8_PKG_NAME="${pkgname}" - local_deb_fn="${pkgname}_1.0-1_amd64.deb" - local_deb_url="${NVIDIA_BASE_DL_URL}/redist/cudnn/v${CUDNN}/local_installers/${CUDNN8_CUDA_VER}/${local_deb_fn}" + deb_fn="${pkgname}_1.0-1_amd64.deb" + local_deb_fn="${download_dir}/${deb_fn}" + local_deb_url="${NVIDIA_BASE_DL_URL}/redist/cudnn/v${CUDNN}/local_installers/${CUDNN8_CUDA_VER}/${deb_fn}" curl -fsSL --retry-connrefused --retry 3 --retry-max-time 5 \ "${local_deb_url}" -o "${local_deb_fn}" @@ -380,7 +385,9 @@ function install_nvidia_nccl() { if is_rocky ; then time execute_with_retries \ dnf -y -q install \ - "libnccl-${nccl_version}" "libnccl-devel-${nccl_version}" "libnccl-static-${nccl_version}" + "libnccl-${nccl_version}" "libnccl-devel-${nccl_version}" "libnccl-static-${nccl_version}" \ + > "${install_log}" 2>&1 || { cat "${install_log}" && exit -4 ; } + sync elif is_ubuntu ; then install_cuda_keyring_pkg @@ -389,11 +396,15 @@ function install_nvidia_nccl() { if is_ubuntu18 ; then time execute_with_retries \ apt-get install -q -y \ - libnccl2 libnccl-dev + libnccl2 libnccl-dev \ + > "${install_log}" 2>&1 || { cat "${install_log}" && exit -4 ; } + sync else time execute_with_retries \ apt-get install -q -y \ - "libnccl2=${nccl_version}" "libnccl-dev=${nccl_version}" + "libnccl2=${nccl_version}" "libnccl-dev=${nccl_version}" \ + > "${install_log}" 2>&1 || { cat "${install_log}" && exit -4 ; } + sync fi else echo "Unsupported OS: '${OS_NAME}'" @@ -405,8 +416,8 @@ function install_nvidia_nccl() { fi } -function is_src_nvidia() { [[ "${GPU_DRIVER_PROVIDER}" == "NVIDIA" ]] ; } -function is_src_os() { [[ "${GPU_DRIVER_PROVIDER}" == "OS" ]] ; } +function is_src_nvidia() ( set +x ; [[ "${GPU_DRIVER_PROVIDER}" == "NVIDIA" ]] ; ) +function is_src_os() ( set +x ; [[ "${GPU_DRIVER_PROVIDER}" == "OS" ]] ; ) function install_nvidia_cudnn() { local major_version @@ -416,17 +427,21 @@ function install_nvidia_cudnn() { if is_rocky ; then if is_cudnn8 ; then - execute_with_retries "dnf -y -q install" \ + time execute_with_retries dnf -y -q install \ "libcudnn${major_version}" \ - "libcudnn${major_version}-devel" + "libcudnn${major_version}-devel" \ + > "${install_log}" 2>&1 || { cat "${install_log}" && exit -4 ; } + sync elif is_cudnn9 ; then - execute_with_retries "dnf -y -q install" \ + time execute_with_retries dnf -y -q install \ "libcudnn9-static-cuda-${CUDA_VERSION%%.*}" \ - "libcudnn9-devel-cuda-${CUDA_VERSION%%.*}" + "libcudnn9-devel-cuda-${CUDA_VERSION%%.*}" \ + > "${install_log}" 2>&1 || { cat "${install_log}" && exit -4 ; } + sync else echo "Unsupported cudnn version: '${major_version}'" fi - elif is_debian || is_ubuntu; then + elif is_debuntu; then if is_debian12 && is_src_os ; then apt-get -y install nvidia-cudnn else @@ -436,20 +451,24 @@ function install_nvidia_cudnn() { apt-get update -qq - execute_with_retries \ + time execute_with_retries \ apt-get -y install --no-install-recommends \ "libcudnn8=${cudnn_pkg_version}" \ - "libcudnn8-dev=${cudnn_pkg_version}" + "libcudnn8-dev=${cudnn_pkg_version}" \ + > "${install_log}" 2>&1 || { cat "${install_log}" && exit -4 ; } + sync elif is_cudnn9 ; then install_cuda_keyring_pkg apt-get update -qq - execute_with_retries \ + time execute_with_retries \ apt-get -y install --no-install-recommends \ "libcudnn9-cuda-${CUDA_VERSION%%.*}" \ "libcudnn9-dev-cuda-${CUDA_VERSION%%.*}" \ - "libcudnn9-static-cuda-${CUDA_VERSION%%.*}" + "libcudnn9-static-cuda-${CUDA_VERSION%%.*}" \ + > "${install_log}" 2>&1 || { cat "${install_log}" && exit -4 ; } + sync else echo "Unsupported cudnn version: [${CUDNN_VERSION}]" fi @@ -459,8 +478,10 @@ function install_nvidia_cudnn() { packages=( "libcudnn${major_version}=${cudnn_pkg_version}" "libcudnn${major_version}-dev=${cudnn_pkg_version}") - execute_with_retries \ - "apt-get install -q -y --no-install-recommends ${packages[*]}" + time execute_with_retries \ + apt-get install -q -y --no-install-recommends "${packages[*]}" \ + > "${install_log}" 2>&1 || { cat "${install_log}" && exit -4 ; } + sync else echo "Unsupported OS: '${OS_NAME}'" exit 1 @@ -581,7 +602,7 @@ function add_nonfree_components() { } function add_repo_nvidia_container_toolkit() { - if is_debian || is_ubuntu ; then + if is_debuntu ; then local kr_path=/usr/share/keyrings/nvidia-container-toolkit-keyring.gpg local sources_list_path=/etc/apt/sources.list.d/nvidia-container-toolkit.list # https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/latest/install-guide.html @@ -597,7 +618,7 @@ function add_repo_nvidia_container_toolkit() { } function add_repo_cuda() { - if is_debian || is_ubuntu ; then + if is_debuntu ; then local kr_path=/usr/share/keyrings/cuda-archive-keyring.gpg local sources_list_path="/etc/apt/sources.list.d/cuda-${shortname}-x86_64.list" echo "deb [signed-by=${kr_path}] https://developer.download.nvidia.com/compute/cuda/repos/${shortname}/x86_64/ /" \ @@ -626,8 +647,7 @@ function build_driver_from_github() { tarball_fn="${DRIVER_VERSION}.tar.gz" curl -fsSL --retry-connrefused --retry 10 --retry-max-time 30 \ "https://github.com/NVIDIA/open-gpu-kernel-modules/archive/refs/tags/${tarball_fn}" \ - -o "${tarball_fn}" - tar xzf "${tarball_fn}" + | tar xz mv "open-gpu-kernel-modules-${DRIVER_VERSION}" open-gpu-kernel-modules } cd open-gpu-kernel-modules @@ -635,6 +655,7 @@ function build_driver_from_github() { time make -j$(nproc) modules \ > /var/log/open-gpu-kernel-modules-build.log \ 2> /var/log/open-gpu-kernel-modules-build_error.log + sync if [[ -n "${PSN}" ]]; then #configure_dkms_certs @@ -671,38 +692,47 @@ function build_driver_from_packages() { fi add_contrib_component apt-get update -qq - execute_with_retries "apt-get install -y -qq --no-install-recommends dkms" + execute_with_retries apt-get install -y -qq --no-install-recommends dkms \ + > "${install_log}" 2>&1 || { cat "${install_log}" && exit -4 ; } #configure_dkms_certs - time execute_with_retries "apt-get install -y -qq --no-install-recommends ${pkglist[@]}" + time execute_with_retries apt-get install -y -qq --no-install-recommends "${pkglist[@]}" \ + > "${install_log}" 2>&1 || { cat "${install_log}" && exit -4 ; } + sync elif is_rocky ; then #configure_dkms_certs - if execute_with_retries dnf -y -q module install "nvidia-driver:${DRIVER}-dkms" ; then + if time execute_with_retries dnf -y -q module install "nvidia-driver:${DRIVER}-dkms" \ + > "${install_log}" 2>&1 || { cat "${install_log}" && exit -4 ; } ; then echo "nvidia-driver:${DRIVER}-dkms installed successfully" else - time execute_with_retries dnf -y -q module install 'nvidia-driver:latest' + time execute_with_retries dnf -y -q module install 'nvidia-driver:latest' \ + > "${install_log}" 2>&1 || { cat "${install_log}" && exit -4 ; } fi + sync fi #clear_dkms_key } function install_nvidia_userspace_runfile() { - if test -d /run/nvidia-userspace ; then return ; fi + if test -f "${download_dir}/userspace-complete" ; then return ; fi curl -fsSL --retry-connrefused --retry 10 --retry-max-time 30 \ - "${USERSPACE_URL}" -o userspace.run - time bash "./userspace.run" --no-kernel-modules --silent --install-libglvnd \ - > /dev/null 2>&1 - rm -f userspace.run - mkdir -p /run/nvidia-userspace + "${USERSPACE_URL}" -o "${download_dir}/userspace.run" + time bash "${download_dir}/userspace.run" --no-kernel-modules --silent --install-libglvnd \ + > "${install_log}" 2>&1 || { cat "${install_log}" && exit -4 ; } + rm -f "${download_dir}/userspace.run" + touch "${download_dir}/userspace-complete" + sync } function install_cuda_runfile() { - if test -d /run/nvidia-cuda ; then return ; fi - curl -fsSL --retry-connrefused --retry 10 --retry-max-time 30 \ - "${NVIDIA_CUDA_URL}" -o cuda.run - time bash "./cuda.run" --silent --toolkit --no-opengl-libs - rm -f cuda.run - mkdir -p /run/nvidia-cuda + if test -f "${download_dir}/cuda-complete" ; then return ; fi + time curl -fsSL --retry-connrefused --retry 10 --retry-max-time 30 \ + "${NVIDIA_CUDA_URL}" -o "${download_dir}/cuda.run" + time bash "${download_dir}/cuda.run" --silent --toolkit --no-opengl-libs \ + > "${install_log}" 2>&1 || { cat "${install_log}" && exit -4 ; } + rm -f "${download_dir}/cuda.run" + touch "${download_dir}/cuda-complete" + sync } function install_cuda_toolkit() { @@ -714,11 +744,15 @@ function install_cuda_toolkit() { fi cuda_package="cuda=${CUDA_FULL_VERSION}-1" readonly cudatk_package - if is_ubuntu || is_debian ; then + if is_debuntu ; then # if is_ubuntu ; then execute_with_retries "apt-get install -y -qq --no-install-recommends cuda-drivers-${DRIVER}=${DRIVER_VERSION}-1" ; fi - time execute_with_retries "apt-get install -y -qq --no-install-recommends ${cuda_package} ${cudatk_package}" + time execute_with_retries apt-get install -y -qq --no-install-recommends ${cuda_package} ${cudatk_package} \ + > "${install_log}" 2>&1 || { cat "${install_log}" ; exit -4 ; } + sync elif is_rocky ; then - time execute_with_retries "dnf -y -q install ${cudatk_package}" + time execute_with_retries dnf -y -q install "${cudatk_package}" \ + > "${install_log}" 2>&1 || { cat "${install_log}" && exit -4 ; } + sync fi } @@ -776,7 +810,7 @@ function install_nvidia_gpu_driver() { load_kernel_module install_cuda_runfile - elif is_debian || is_ubuntu ; then + elif is_debuntu ; then install_cuda_keyring_pkg build_driver_from_packages @@ -798,7 +832,11 @@ function install_nvidia_gpu_driver() { exit 1 fi ldconfig - echo "NVIDIA GPU driver provided by NVIDIA was installed successfully" + if is_src_os ; then + echo "NVIDIA GPU driver provided by ${OS_NAME} was installed successfully" + else + echo "NVIDIA GPU driver provided by NVIDIA was installed successfully" + fi } # Collects 'gpu_utilization' and 'gpu_memory_utilization' metrics @@ -814,7 +852,9 @@ function install_gpu_agent() { "${GPU_AGENT_REPO_URL}/report_gpu_metrics.py" \ | sed -e 's/-u --format=/--format=/' \ | dd status=none of="${install_dir}/report_gpu_metrics.py" - pip install -r "${install_dir}/requirements.txt" + time execute_with_retries pip install -r "${install_dir}/requirements.txt" \ + > "${install_log}" 2>&1 || { cat "${install_log}" && exit -4 ; } + sync # Generate GPU service. cat </lib/systemd/system/gpu-utilization-agent.service @@ -839,7 +879,6 @@ EOF systemctl --no-reload --now enable gpu-utilization-agent.service } -readonly bdcfg="/usr/local/bin/bdconfig" function set_hadoop_property() { local -r config_file=$1 local -r property=$2 @@ -993,7 +1032,6 @@ EOF systemctl start dataproc-cgroup-device-permissions } -nvsmi_works="0" function nvsmi() { local nvsmi="/usr/bin/nvidia-smi" if [[ "${nvsmi_works}" == "1" ]] ; then echo "nvidia-smi is working" >&2 @@ -1020,24 +1058,28 @@ function main() { remove_old_backports - if is_debian || is_ubuntu ; then + if is_debuntu ; then export DEBIAN_FRONTEND=noninteractive - execute_with_retries "apt-get install -y -qq pciutils linux-headers-${uname_r}" + time execute_with_retries apt-get install -y -qq pciutils "linux-headers-${uname_r}" > /dev/null 2>&1 elif is_rocky ; then - execute_with_retries "dnf -y -q update --exclude=systemd*,kernel*" - execute_with_retries "dnf -y -q install pciutils gcc" + time execute_with_retries dnf -y -q update --exclude=systemd*,kernel* \ + > "${install_log}" 2>&1 || { cat "${install_log}" && exit -4 ; } + time execute_with_retries dnf -y -q install pciutils gcc \ + > "${install_log}" 2>&1 || { cat "${install_log}" && exit -4 ; } local dnf_cmd="dnf -y -q install kernel-devel-${uname_r}" local kernel_devel_pkg_out="$(eval "${dnf_cmd} 2>&1")" if [[ "${kernel_devel_pkg_out}" =~ 'Unable to find a match: kernel-devel-' ]] ; then # this kernel-devel may have been migrated to the vault local vault="https://download.rockylinux.org/vault/rocky/$(os_version)" - execute_with_retries dnf -y -q --setopt=localpkg_gpgcheck=1 install \ + time execute_with_retries dnf -y -q --setopt=localpkg_gpgcheck=1 install \ "${vault}/BaseOS/x86_64/os/Packages/k/kernel-${uname_r}.rpm" \ "${vault}/BaseOS/x86_64/os/Packages/k/kernel-core-${uname_r}.rpm" \ "${vault}/BaseOS/x86_64/os/Packages/k/kernel-modules-${uname_r}.rpm" \ "${vault}/BaseOS/x86_64/os/Packages/k/kernel-modules-core-${uname_r}.rpm" \ - "${vault}/AppStream/x86_64/os/Packages/k/kernel-devel-${uname_r}.rpm" + "${vault}/AppStream/x86_64/os/Packages/k/kernel-devel-${uname_r}.rpm" \ + > "${install_log}" 2>&1 || { cat "${install_log}" ; exit -4 ; } + sync else execute_with_retries "${dnf_cmd}" fi @@ -1215,21 +1257,139 @@ function clean_up_sources_lists() { sed -i -e 's:deb https:deb [signed-by=/usr/share/keyrings/mysql.gpg] https:g' /etc/apt/sources.list.d/mysql.list fi - if -f /etc/apt/trusted.gpg ; then mv /etc/apt/trusted.gpg /etc/apt/old-trusted.gpg ; fi + if [[ -f /etc/apt/trusted.gpg ]] ; then mv /etc/apt/trusted.gpg /etc/apt/old-trusted.gpg ; fi } -if is_debian ; then - clean_up_sources_lists - apt-get update - if is_debian12 ; then - apt-mark unhold systemd libsystemd0 ; fi -fi +function exit_handler() { + echo "Exit handler invoked" + set +ex + # Purge private key material until next grant + clear_dkms_key -configure_dkms_certs + # Free conda cache + /opt/conda/miniconda3/bin/conda clean -a -main + # Clear pip cache + pip cache purge || echo "unable to purge pip cache" + + # remove the tmpfs conda pkgs_dirs + if [[ -d /mnt/shm ]] ; then /opt/conda/miniconda3/bin/conda config --remove pkgs_dirs /mnt/shm ; fi + + # remove the tmpfs pip cache-dir + pip config unset global.cache-dir || echo "unable to set global pip cache" + + # Clean up shared memory mounts + for shmdir in /mnt/shm /var/cache/apt/archives /var/cache/dnf ; do + if grep -q "^tmpfs ${shmdir}" /proc/mounts ; then + rm -rf ${shmdir}/* + sync + + execute_with_retries umount -f ${shmdir} + fi + done + + # Clean up OS package cache ; re-hold systemd package + if is_debuntu ; then + apt-get -y -qq clean + apt-get -y -qq autoremove + if is_debian12 ; then + apt-mark hold systemd libsystemd0 ; fi + else + dnf clean all + fi + + # print disk usage statistics + if is_debuntu ; then + # Rocky doesn't have sort -h and fails when the argument is passed + du --max-depth 3 -hx / | sort -h | tail -10 + fi + + # Process disk usage logs from installation period + rm -f /tmp/keep-running-df + sleep 6s + # compute maximum size of disk during installation + # Log file contains logs like the following (minus the preceeding #): +#Filesystem Size Used Avail Use% Mounted on +#/dev/vda2 6.8G 2.5G 4.0G 39% / + df --si + perl -e '$max=( sort + map { (split)[2] =~ /^(\d+)/ } + grep { m:^/: } )[-1]; +print( "maximum-disk-used: $max", $/ );' < /tmp/disk-usage.log + + echo "exit_handler has completed" + + # zero free disk space + if [[ -n "$(get_metadata_attribute creating-image)" ]]; then + dd if=/dev/zero of=/zero ; sync ; rm -f /zero + fi + + return 0 +} + +trap exit_handler EXIT + +function prepare_to_install(){ + nvsmi_works="0" + readonly bdcfg="/usr/local/bin/bdconfig" + download_dir=/tmp/ + free_mem="$(awk '/^MemFree/ {print $2}' /proc/meminfo)" + # Write to a ramdisk instead of churning the persistent disk + if [[ ${free_mem} -ge 5250000 ]]; then + download_dir="/mnt/shm" + mkdir -p "${download_dir}" + mount -t tmpfs tmpfs "${download_dir}" + + # Download conda packages to tmpfs + /opt/conda/miniconda3/bin/conda config --add pkgs_dirs "${download_dir}" + + # Download pip packages to tmpfs + pip config set global.cache-dir "${download_dir}" || echo "unable to set global.cache-dir" + + # Download OS packages to tmpfs + if is_debuntu ; then + mount -t tmpfs tmpfs /var/cache/apt/archives + else + mount -t tmpfs tmpfs /var/cache/dnf + fi + fi + install_log="${download_dir}/install.log" + + if is_debuntu ; then + clean_up_sources_lists + apt-get update -qq + apt-get -y clean + apt-get -y -qq autoremove + if is_debian12 ; then + apt-mark unhold systemd libsystemd0 ; fi + else + dnf clean all + fi + + # Clean conda cache + /opt/conda/miniconda3/bin/conda clean -a + + # zero free disk space + if [[ -n "$(get_metadata_attribute creating-image)" ]]; then + set +e + time dd if=/dev/zero of=/zero ; sync ; rm -f /zero + set -e + fi -clear_dkms_key + configure_dkms_certs -df -h + # Monitor disk usage in a screen session + if is_debuntu ; then + apt-get install -y -qq screen > /dev/null 2>&1 + elif is_rocky ; then + dnf -y -q install screen > /dev/null 2>&1 + fi + touch /tmp/keep-running-df + screen -d -m -US keep-running-df \ + bash -c 'while [[ -f /tmp/keep-running-df ]] ; do df --si / | tee -a /tmp/disk-usage.log ; sleep 5s ; done' +} + +prepare_to_install + +main diff --git a/examples/secure-boot/pre-init.screenrc b/examples/secure-boot/pre-init.screenrc index ef601d6..db4eafc 100644 --- a/examples/secure-boot/pre-init.screenrc +++ b/examples/secure-boot/pre-init.screenrc @@ -4,14 +4,14 @@ # screen -L -t monitor 0 /bin/bash -screen -L -t 2.2-debian12 1 /bin/bash -x examples/secure-boot/pre-init.sh 2.2-debian12 -screen -L -t 2.1-debian11 2 /bin/bash -x examples/secure-boot/pre-init.sh 2.1-debian11 -screen -L -t 2.0-debian10 3 /bin/bash -x examples/secure-boot/pre-init.sh 2.0-debian10 +screen -L -t 2.0-debian10 1 /bin/bash -x examples/secure-boot/pre-init.sh 2.0-debian10 +screen -L -t 2.0-rocky8 2 /bin/bash -x examples/secure-boot/pre-init.sh 2.0-rocky8 +screen -L -t 2.0-ubuntu18 3 /bin/bash -x examples/secure-boot/pre-init.sh 2.0-ubuntu18 -screen -L -t 2.2-ubuntu22 4 /bin/bash -x examples/secure-boot/pre-init.sh 2.2-ubuntu22 -screen -L -t 2.1-ubuntu20 5 /bin/bash -x examples/secure-boot/pre-init.sh 2.1-ubuntu20 -screen -L -t 2.0-ubuntu18 6 /bin/bash -x examples/secure-boot/pre-init.sh 2.0-ubuntu18 +screen -L -t 2.1-debian11 4 /bin/bash -x examples/secure-boot/pre-init.sh 2.1-debian11 +screen -L -t 2.1-rocky8 5 /bin/bash -x examples/secure-boot/pre-init.sh 2.1-rocky8 +screen -L -t 2.1-ubuntu20 6 /bin/bash -x examples/secure-boot/pre-init.sh 2.1-ubuntu20 -screen -L -t 2.2-rocky9 7 /bin/bash -x examples/secure-boot/pre-init.sh 2.2-rocky9 -screen -L -t 2.1-rocky8 8 /bin/bash -x examples/secure-boot/pre-init.sh 2.1-rocky8 -screen -L -t 2.0-rocky8 9 /bin/bash -x examples/secure-boot/pre-init.sh 2.0-rocky8 +screen -L -t 2.2-debian12 7 /bin/bash -x examples/secure-boot/pre-init.sh 2.2-debian12 +screen -L -t 2.2-rocky9 8 /bin/bash -x examples/secure-boot/pre-init.sh 2.2-rocky9 +screen -L -t 2.2-ubuntu22 9 /bin/bash -x examples/secure-boot/pre-init.sh 2.2-ubuntu22 diff --git a/examples/secure-boot/pre-init.sh b/examples/secure-boot/pre-init.sh index 6be197a..7797b4f 100644 --- a/examples/secure-boot/pre-init.sh +++ b/examples/secure-boot/pre-init.sh @@ -17,7 +17,6 @@ # pre-init.sh set -e -readonly timestamp="$(date +%F-%H-%M)" IMAGE_VERSION="$1" if [[ -z "${IMAGE_VERSION}" ]] ; then @@ -42,8 +41,8 @@ metadata="public_secret_name=${public_secret_name}" metadata="${metadata},private_secret_name=${private_secret_name}" metadata="${metadata},secret_project=${secret_project}" metadata="${metadata},secret_version=${secret_version}" -metadata="${metadata},dask-runtime=yarn" -metadata="${metadata},rapids-runtime=SPARK" +metadata="${metadata},dask-runtime=standalone" +metadata="${metadata},rapids-runtime=DASK" metadata="${metadata},cuda-version=12.4" # If no OS family specified, default to debian @@ -57,53 +56,30 @@ else dataproc_version="${IMAGE_VERSION}" fi -# base image -> cuda -# case "${dataproc_version}" in -# "2.2-rocky9" ) disk_size_gb="54" ;; -# "2.1-rocky8" ) disk_size_gb="36" ;; -# "2.0-rocky8" ) disk_size_gb="33" ;; -# "2.2-ubuntu22" ) disk_size_gb="40" ;; -# "2.1-ubuntu20" ) disk_size_gb="37" ;; -# "2.0-ubuntu18" ) disk_size_gb="37" ;; -# "2.2-debian12" ) disk_size_gb="40" ;; -# "2.1-debian11" ) disk_size_gb="40" ;; -# "2.0-debian10" ) disk_size_gb="40" ;; -# esac - -# cuda image -> dask -# case "${dataproc_version}" in -# "2.2-rocky9" ) disk_size_gb="54" ;; -# "2.1-rocky8" ) disk_size_gb="37" ;; -# "2.0-rocky8" ) disk_size_gb="40" ;; -# "2.2-ubuntu22" ) disk_size_gb="45" ;; -# "2.1-ubuntu20" ) disk_size_gb="42" ;; -# "2.0-ubuntu18" ) disk_size_gb="37" ;; -# "2.2-debian12" ) disk_size_gb="46" ;; -# "2.1-debian11" ) disk_size_gb="40" ;; -# "2.0-debian10" ) disk_size_gb="40" ;; -# esac - -# dask image -> rapids -case "${dataproc_version}" in - "2.2-rocky9" ) disk_size_gb="54" ;; - "2.1-rocky8" ) disk_size_gb="37" ;; - "2.0-rocky8" ) disk_size_gb="40" ;; - "2.2-ubuntu22" ) disk_size_gb="45" ;; - "2.1-ubuntu20" ) disk_size_gb="42" ;; - "2.0-ubuntu18" ) disk_size_gb="37" ;; - "2.2-debian12" ) disk_size_gb="46" ;; - "2.1-debian11" ) disk_size_gb="40" ;; - "2.0-debian10" ) disk_size_gb="40" ;; -esac - - function generate() { local extra_args="$*" - set -x + local image_name="${PURPOSE}-${dataproc_version/\./-}-${timestamp}" + + local image="$(jq -r ".[] | select(.name == \"${image_name}\").name" "${tmpdir}/images.json")" + + if [[ -n "${image}" ]] ; then + echo "Image already exists" + return + fi + + local instance="$(jq -r ".[] | select(.name == \"${image_name}-install\").name" "${tmpdir}/instances.json")" + + if [[ -n "${instance}" ]]; then + # if previous run ended without cleanup... + echo "cleaning up instance from previous run" + gcloud -q compute instances delete "${image_name}-install" \ + --zone "${custom_image_zone}" + fi + set -xe python generate_custom_image.py \ - --machine-type "n1-standard-4" \ + --machine-type "n1-standard-8" \ --accelerator "type=nvidia-tesla-t4" \ - --image-name "${PURPOSE}-${dataproc_version/\./-}-${timestamp}" \ + --image-name "${image_name}" \ --customization-script "${customization_script}" \ --service-account "${GSA}" \ --metadata "${metadata}" \ @@ -116,32 +92,51 @@ function generate() { set +x } -function generate_from_dataproc_version() { - local dataproc_version="$1" - generate --dataproc-version "${dataproc_version}" -} +function generate_from_dataproc_version() { generate --dataproc-version "$1" ; } function generate_from_base_purpose() { - local base_purpose="$1" - generate --base-image-uri "https://www.googleapis.com/compute/v1/projects/${PROJECT_ID}/global/images/${base_purpose}-${dataproc_version/\./-}-${timestamp}" + generate --base-image-uri "https://www.googleapis.com/compute/v1/projects/${PROJECT_ID}/global/images/${1}-${dataproc_version/\./-}-${timestamp}" } +# base image -> cuda +case "${dataproc_version}" in + "2.0-debian10" ) disk_size_gb="38" ;; # 40G 31G 7.8G 80% / # cuda-pre-init-2-0-debian10 + "2.0-rocky8" ) disk_size_gb="35" ;; # 38G 32G 6.2G 84% / # cuda-pre-init-2-0-rocky8 + "2.0-ubuntu18" ) disk_size_gb="37" ;; # 39G 30G 8.5G 79% / # cuda-pre-init-2-0-ubuntu18 + "2.1-debian11" ) disk_size_gb="37" ;; # 39G 34G 4.1G 90% / # cuda-pre-init-2-1-debian11 + "2.1-rocky8" ) disk_size_gb="38" ;; # 41G 35G 6.1G 86% / # cuda-pre-init-2-1-rocky8 + "2.1-ubuntu20" ) disk_size_gb="35" ;; # 37G 32G 4.4G 88% / # cuda-pre-init-2-1-ubuntu20 + "2.2-debian12" ) disk_size_gb="38" ;; # 40G 35G 3.3G 92% / # cuda-pre-init-2-2-debian12 + "2.2-rocky9" ) disk_size_gb="40" ;; # 42G 36G 5.9G 86% / # cuda-pre-init-2-2-rocky9 + "2.2-ubuntu22" ) disk_size_gb="38" ;; # 40G 35G 4.8G 88% / # cuda-pre-init-2-2-ubuntu22 +esac + # Install GPU drivers + cuda on dataproc base image PURPOSE="cuda-pre-init" customization_script="examples/secure-boot/install_gpu_driver.sh" - time generate_from_dataproc_version "${dataproc_version}" -# Install dask on cuda base image -base_purpose="${PURPOSE}" -PURPOSE="dask-pre-init" -customization_script="examples/secure-boot/dask.sh" +# cuda image -> rapids +case "${dataproc_version}" in + "2.0-debian10" ) disk_size_gb="44" ;; # 47G 41G 4.0G 91% / # rapids-pre-init-2-0-debian10 + "2.0-rocky8" ) disk_size_gb="45" ;; # 49G 42G 7.0G 86% / # rapids-pre-init-2-0-rocky8 + "2.0-ubuntu18" ) disk_size_gb="43" ;; # 45G 40G 4.9G 90% / # rapids-pre-init-2-0-ubuntu18 + "2.1-debian11" ) disk_size_gb="46" ;; # 49G 43G 3.6G 93% / # rapids-pre-init-2-1-debian11 + "2.1-rocky8" ) disk_size_gb="48" ;; # 52G 45G 7.2G 87% / # rapids-pre-init-2-1-rocky8 + "2.1-ubuntu20" ) disk_size_gb="45" ;; # 47G 42G 5.2G 89% / # rapids-pre-init-2-1-ubuntu20 + "2.2-debian12" ) disk_size_gb="48" ;; # 51G 45G 3.8G 93% / # rapids-pre-init-2-2-debian12 + "2.2-rocky9" ) disk_size_gb="49" ;; # 53G 46G 7.2G 87% / # rapids-pre-init-2-2-rocky9 + "2.2-ubuntu22" ) disk_size_gb="48" ;; # 50G 45G 5.6G 89% / # rapids-pre-init-2-2-ubuntu22 +esac -time generate_from_base_purpose "${base_purpose}" +#disk_size_gb="50" -# Install rapids on dask base image -base_purpose="${PURPOSE}" +# Install dask with rapids on base image PURPOSE="rapids-pre-init" customization_script="examples/secure-boot/rapids.sh" +time generate_from_base_purpose "cuda-pre-init" -time generate_from_base_purpose "${base_purpose}" +# Install dask without rapids on base image +PURPOSE="dask-pre-init" +customization_script="examples/secure-boot/dask.sh" +time generate_from_base_purpose "cuda-pre-init" diff --git a/examples/secure-boot/rapids.sh b/examples/secure-boot/rapids.sh index 6bade61..6c5c9d4 100644 --- a/examples/secure-boot/rapids.sh +++ b/examples/secure-boot/rapids.sh @@ -19,69 +19,58 @@ set -euxo pipefail -# Detect dataproc image version from its various names -if (! test -v DATAPROC_IMAGE_VERSION) && test -v DATAPROC_VERSION; then - DATAPROC_IMAGE_VERSION="${DATAPROC_VERSION}" -fi - -function get_metadata_attribute() { - local -r attribute_name=$1 - local -r default_value="${2:-}" - /usr/share/google/get_metadata_value "attributes/${attribute_name}" || echo -n "${default_value}" +function os_id() { grep '^ID=' /etc/os-release | cut -d= -f2 | xargs ; } +function is_ubuntu() { [[ "$(os_id)" == 'ubuntu' ]] ; } +function is_ubuntu18() { is_ubuntu && [[ "$(os_version)" == '18.04'* ]] ; } +function is_debian() { [[ "$(os_id)" == 'debian' ]] ; } +function is_debuntu() { is_debian || is_ubuntu ; } + +function print_metadata_value() { + local readonly tmpfile=$(mktemp) + http_code=$(curl -f "${1}" -H "Metadata-Flavor: Google" -w "%{http_code}" \ + -s -o ${tmpfile} 2>/dev/null) + local readonly return_code=$? + # If the command completed successfully, print the metadata value to stdout. + if [[ ${return_code} == 0 && ${http_code} == 200 ]]; then + cat ${tmpfile} + fi + rm -f ${tmpfile} + return ${return_code} } -readonly SPARK_VERSION_ENV=$(spark-submit --version 2>&1 | sed -n 's/.*version[[:blank:]]\+\([0-9]\+\.[0-9]\).*/\1/p' | head -n1) -if [[ "${SPARK_VERSION_ENV%%.*}" == "3" ]]; then - DEFAULT_CUDA_VERSION="12.4" - readonly DEFAULT_XGBOOST_VERSION="2.0.3" - readonly SPARK_VERSION="${SPARK_VERSION_ENV}" - readonly DEFAULT_XGBOOST_GPU_SUB_VERSION="" -else - DEFAULT_CUDA_VERSION="10.1" - readonly DEFAULT_XGBOOST_VERSION="1.0.0" - readonly DEFAULT_XGBOOST_GPU_SUB_VERSION="Beta5" - readonly SPARK_VERSION="2.x" -fi - -# RAPIDS config -readonly RAPIDS_RUNTIME=$(get_metadata_attribute 'rapids-runtime' 'SPARK') -if [[ "${RAPIDS_RUNTIME}" != "SPARK" ]]; then # to match install_gpu_driver.sh ; they should both probably be removed - DEFAULT_CUDA_VERSION='11.8' -fi -readonly DEFAULT_CUDA_VERSION -CUDA_VERSION=$(get_metadata_attribute 'cuda-version' ${DEFAULT_CUDA_VERSION}) - -readonly CUDA_VERSION -function is_cuda12() { [[ "${CUDA_VERSION%%.*}" == "12" ]] ; } -function is_cuda11() { [[ "${CUDA_VERSION%%.*}" == "11" ]] ; } - -readonly DEFAULT_DASK_RAPIDS_VERSION="24.08" -readonly RAPIDS_VERSION=$(get_metadata_attribute 'rapids-version' ${DEFAULT_DASK_RAPIDS_VERSION}) - -readonly ROLE=$(/usr/share/google/get_metadata_value attributes/dataproc-role) -readonly MASTER=$(/usr/share/google/get_metadata_value attributes/dataproc-master) - -readonly RUN_WORKER_ON_MASTER=$(get_metadata_attribute 'dask-cuda-worker-on-master' 'true') - -# SPARK config -readonly DEFAULT_SPARK_RAPIDS_VERSION="24.08.0" -readonly SPARK_RAPIDS_VERSION=$(get_metadata_attribute 'spark-rapids-version' ${DEFAULT_SPARK_RAPIDS_VERSION}) -readonly XGBOOST_VERSION=$(get_metadata_attribute 'xgboost-version' ${DEFAULT_XGBOOST_VERSION}) -readonly XGBOOST_GPU_SUB_VERSION=$(get_metadata_attribute 'spark-gpu-sub-version' ${DEFAULT_XGBOOST_GPU_SUB_VERSION}) +function print_metadata_value_if_exists() { + local return_code=1 + local readonly url=$1 + print_metadata_value ${url} + return_code=$? + return ${return_code} +} -# Scala config -readonly SCALA_VER="2.12" +function get_metadata_value() { + set +x + local readonly varname=$1 + local -r MDS_PREFIX=http://metadata.google.internal/computeMetadata/v1 + # Print the instance metadata value. + print_metadata_value_if_exists ${MDS_PREFIX}/instance/${varname} + return_code=$? + # If the instance doesn't have the value, try the project. + if [[ ${return_code} != 0 ]]; then + print_metadata_value_if_exists ${MDS_PREFIX}/project/${varname} + return_code=$? + fi + set -x + return ${return_code} +} -# Dask config -readonly DASK_RUNTIME="$(/usr/share/google/get_metadata_value attributes/dask-runtime || echo 'standalone')" -readonly DASK_LAUNCHER=/usr/local/bin/dask-launcher.sh -readonly DASK_SERVICE=dask-cluster -readonly DASK_WORKER_SERVICE=dask-worker -readonly DASK_SCHEDULER_SERVICE=dask-scheduler -readonly DASK_YARN_CONFIG_FILE=/etc/dask/config.yaml +function get_metadata_attribute() ( + set +x + local -r attribute_name="$1" + local -r default_value="${2:-}" + get_metadata_value "attributes/${attribute_name}" || echo -n "${default_value}" +) -# Dataproc configurations -readonly SPARK_CONF_DIR='/etc/spark/conf' +function is_cuda12() { [[ "${CUDA_VERSION%%.*}" == "12" ]] ; } +function is_cuda11() { [[ "${CUDA_VERSION%%.*}" == "11" ]] ; } function execute_with_retries() { local -r cmd="$*" @@ -94,112 +83,30 @@ function execute_with_retries() { return 1 } -readonly conda_env="/opt/conda/miniconda3/envs/dask-rapids" -function install_dask_rapids() { - if is_cuda12 ; then - local python_spec="python>=3.11" - local cuda_spec="cuda-version>=12,<13" - local dask_spec="dask>=2024.8" - local numba_spec="numba" - elif is_cuda11 ; then - local python_spec="python>=3.9" - local cuda_spec="cuda-version>=11,<=11.8" - local dask_spec="dask" - local numba_spec="numba" - fi - - local CONDA_PACKAGES=( - "${cuda_spec}" - "rapids=${RAPIDS_VERSION}" - "${dask_spec}" - "dask-bigquery" - "dask-ml" - "dask-sql" - "cudf" - "${numba_spec}" - ) - - # Install cuda, rapids, dask - local is_installed="0" - mamba="/opt/conda/default/bin/mamba" - conda="/opt/conda/default/bin/conda" - - for installer in "${mamba}" "${conda}" ; do - set +e - test -d "${conda_env}" || \ - time "${installer}" "create" -m -n 'dask-rapids' -y --no-channel-priority \ - -c 'conda-forge' -c 'nvidia' -c 'rapidsai' \ - ${CONDA_PACKAGES[*]} \ - "${python_spec}" - if [[ "$?" == "0" ]] ; then - is_installed="1" - break - else - "${conda}" config --set channel_priority flexible - fi - set -e - done - if [[ "${is_installed}" == "0" ]]; then - echo "failed to install dask" - return 1 - fi - set -e -} +function configure_dask_yarn() { + readonly DASK_YARN_CONFIG_DIR=/etc/dask/ + readonly DASK_YARN_CONFIG_FILE=${DASK_YARN_CONFIG_DIR}/config.yaml + # Minimal custom configuration is required for this + # setup. Please see https://yarn.dask.org/en/latest/quickstart.html#usage + # for information on tuning Dask-Yarn environments. + mkdir -p "${DASK_YARN_CONFIG_DIR}" -function install_spark_rapids() { - local -r rapids_repo_url='https://repo1.maven.org/maven2/ai/rapids' - local -r nvidia_repo_url='https://repo1.maven.org/maven2/com/nvidia' - local -r dmlc_repo_url='https://repo.maven.apache.org/maven2/ml/dmlc' - - if [[ "${SPARK_VERSION}" == "3"* ]]; then - execute_with_retries wget -nv --timeout=30 --tries=5 --retry-connrefused \ - "${dmlc_repo_url}/xgboost4j-spark-gpu_${SCALA_VER}/${XGBOOST_VERSION}/xgboost4j-spark-gpu_${SCALA_VER}-${XGBOOST_VERSION}.jar" \ - -P /usr/lib/spark/jars/ - execute_with_retries wget -nv --timeout=30 --tries=5 --retry-connrefused \ - "${dmlc_repo_url}/xgboost4j-gpu_${SCALA_VER}/${XGBOOST_VERSION}/xgboost4j-gpu_${SCALA_VER}-${XGBOOST_VERSION}.jar" \ - -P /usr/lib/spark/jars/ - execute_with_retries wget -nv --timeout=30 --tries=5 --retry-connrefused \ - "${nvidia_repo_url}/rapids-4-spark_${SCALA_VER}/${SPARK_RAPIDS_VERSION}/rapids-4-spark_${SCALA_VER}-${SPARK_RAPIDS_VERSION}.jar" \ - -P /usr/lib/spark/jars/ - else - execute_with_retries wget -nv --timeout=30 --tries=5 --retry-connrefused \ - "${rapids_repo_url}/xgboost4j-spark_${SPARK_VERSION}/${XGBOOST_VERSION}-${XGBOOST_GPU_SUB_VERSION}/xgboost4j-spark_${SPARK_VERSION}-${XGBOOST_VERSION}-${XGBOOST_GPU_SUB_VERSION}.jar" \ - -P /usr/lib/spark/jars/ - execute_with_retries wget -nv --timeout=30 --tries=5 --retry-connrefused \ - "${rapids_repo_url}/xgboost4j_${SPARK_VERSION}/${XGBOOST_VERSION}-${XGBOOST_GPU_SUB_VERSION}/xgboost4j_${SPARK_VERSION}-${XGBOOST_VERSION}-${XGBOOST_GPU_SUB_VERSION}.jar" \ - -P /usr/lib/spark/jars/ - fi -} + cat <"${DASK_YARN_CONFIG_FILE}" +# Config file for Dask Yarn. +# +# These values are joined on top of the default config, found at +# https://yarn.dask.org/en/latest/configuration.html#default-configuration -function configure_spark() { - if [[ "${SPARK_VERSION}" == "3"* ]]; then - cat >>${SPARK_CONF_DIR}/spark-defaults.conf <>${SPARK_CONF_DIR}/spark-defaults.conf <> "\${LOGFILE}" 2>&1 +${DASK_CONDA_ENV}/bin/dask-cuda-worker "${MASTER}:8786" --local-directory="${dask_worker_local_dir}" --memory-limit=auto >> "\${LOGFILE}" 2>&1 EOF chmod 750 "${DASK_WORKER_LAUNCHER}" @@ -237,8 +144,9 @@ EOF if [[ "${ROLE}" != "Master" ]]; then enable_worker_service="1" else + local RUN_WORKER_ON_MASTER=$(get_metadata_attribute dask-cuda-worker-on-master 'true') # Enable service on single-node cluster (no workers) - local worker_count="$(/usr/share/google/get_metadata_value attributes/dataproc-worker-count)" + local worker_count="$(get_metadata_attribute dataproc-worker-count)" if [[ "${worker_count}" == "0" || "${RUN_WORKER_ON_MASTER}" == "true" ]]; then enable_worker_service="1" fi @@ -250,46 +158,370 @@ EOF fi } -function configure_dask_yarn() { - # Replace config file on cluster. - cat <"${DASK_YARN_CONFIG_FILE}" -# Config file for Dask Yarn. -# -# These values are joined on top of the default config, found at -# https://yarn.dask.org/en/latest/configuration.html#default-configuration +function install_systemd_dask_scheduler() { + # only run scheduler on primary master + if [[ "$(hostname -s)" != "${MASTER}" ]]; then return ; fi + echo "Installing systemd Dask Scheduler service..." + local -r dask_scheduler_local_dir="/tmp/${DASK_SCHEDULER_SERVICE}" -yarn: - environment: python://${conda_env}/bin/python + mkdir -p "${dask_scheduler_local_dir}" - worker: - count: 2 - gpus: 1 - class: "dask_cuda.CUDAWorker" + local DASK_SCHEDULER_LAUNCHER="/usr/local/bin/${DASK_SCHEDULER_SERVICE}-launcher.sh" + + cat <"${DASK_SCHEDULER_LAUNCHER}" +#!/bin/bash +LOGFILE="/var/log/${DASK_SCHEDULER_SERVICE}.log" +echo "dask scheduler starting, logging to \${LOGFILE}" +${DASK_CONDA_ENV}/bin/dask scheduler >> "\${LOGFILE}" 2>&1 EOF + + chmod 750 "${DASK_SCHEDULER_LAUNCHER}" + + local -r dask_service_file="/usr/lib/systemd/system/${DASK_SCHEDULER_SERVICE}.service" + cat <"${dask_service_file}" +[Unit] +Description=Dask Scheduler Service +[Service] +Type=simple +Restart=on-failure +ExecStart=/bin/bash -c 'exec ${DASK_SCHEDULER_LAUNCHER}' +[Install] +WantedBy=multi-user.target +EOF + chmod a+r "${dask_service_file}" + + systemctl daemon-reload + + # Enable the service + systemctl enable "${DASK_SCHEDULER_SERVICE}" } -function main() { - if [[ "${RAPIDS_RUNTIME}" == "DASK" ]]; then - # Install RAPIDS - install_dask_rapids - - # In "standalone" mode, Dask relies on a shell script to launch. - # In "yarn" mode, it relies a config.yaml file. - if [[ "${DASK_RUNTIME}" == "standalone" ]]; then - install_systemd_dask_worker - elif [[ "${DASK_RUNTIME}" == "yarn" ]]; then - configure_dask_yarn +function install_systemd_dask_service() { + install_systemd_dask_scheduler + install_systemd_dask_worker +} + +function restart_knox() { + systemctl stop knox + rm -rf "${KNOX_HOME}/data/deployments/*" + systemctl start knox +} + +function configure_knox_for_dask() { + if [[ ! -d "${KNOX_HOME}" ]]; then + echo "Skip configuring Knox rules for Dask" + return 0 + fi + + local DASK_UI_PORT=8787 + if [[ -f /etc/knox/conf/topologies/default.xml ]]; then + sed -i \ + "/<\/topology>/i DASK<\/role>http://localhost:${DASK_UI_PORT}<\/url><\/service> DASKWS<\/role>ws:\/\/${MASTER}:${DASK_UI_PORT}<\/url><\/service>" \ + /etc/knox/conf/topologies/default.xml + fi + + mkdir -p "${KNOX_DASK_DIR}" + + cat >"${KNOX_DASK_DIR}/service.xml" <<'EOF' + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +EOF + + cat >"${KNOX_DASK_DIR}/rewrite.xml" <<'EOF' + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +EOF + + mkdir -p "${KNOX_DASKWS_DIR}" + + cat >"${KNOX_DASKWS_DIR}/service.xml" <<'EOF' + + + + + + + + + + + + + + + + + + + +EOF + + cat >"${KNOX_DASKWS_DIR}/rewrite.xml" <<'EOF' + + + + + + + +EOF + + chown -R knox:knox "${KNOX_DASK_DIR}" "${KNOX_DASKWS_DIR}" + + # Do not restart knox during pre-init script run + if [[ -n "${ROLE}" ]]; then + restart_knox + fi +} + +function configure_fluentd_for_dask() { + if [[ "$(hostname -s)" == "${MASTER}" ]]; then + cat >/etc/google-fluentd/config.d/dataproc-dask.conf < + @type tail + path /var/log/dask-scheduler.log + pos_file /var/tmp/fluentd.dataproc.dask.scheduler.pos + read_from_head true + tag google.dataproc.dask-scheduler + + @type none + + + + + @type record_transformer + + filename dask-scheduler.log + + +EOF + fi + + if [[ "${enable_worker_service}" == "1" ]]; then + cat >>/etc/google-fluentd/config.d/dataproc-dask.conf < + @type tail + path /var/log/dask-worker.log + pos_file /var/tmp/fluentd.dataproc.dask.worker.pos + read_from_head true + tag google.dataproc.dask-worker + + @type none + + + + + @type record_transformer + + filename dask-worker.log + + +EOF + fi + + systemctl restart google-fluentd +} + +function install_dask_rapids() { + if is_cuda12 ; then + local python_spec="python>=3.11" + local cuda_spec="cuda-version>=12,<13" + local dask_spec="dask>=2024.7" + local numba_spec="numba" + elif is_cuda11 ; then + local python_spec="python>=3.9" + local cuda_spec="cuda-version>=11,<12.0a0" + local dask_spec="dask" + local numba_spec="numba" + fi + + rapids_spec="rapids>=${RAPIDS_VERSION}" + CONDA_PACKAGES=() + if [[ "${DASK_RUNTIME}" == 'yarn' ]]; then + # Pin `distributed` and `dask` package versions to old release + # because `dask-yarn` 0.9 uses skein in a way which + # is not compatible with `distributed` package 2022.2 and newer: + # https://github.com/dask/dask-yarn/issues/155 + + dask_spec="dask<2022.2" + python_spec="python>=3.7,<3.8.0a0" + rapids_spec="rapids<=24.05" + if is_ubuntu18 ; then + # the libuuid.so.1 distributed with fiona 1.8.22 dumps core when calling uuid_generate_time_generic + CONDA_PACKAGES+=("fiona<1.8.22") fi - echo "RAPIDS installed with Dask runtime" - elif [[ "${RAPIDS_RUNTIME}" == "SPARK" ]]; then - install_spark_rapids - configure_spark - echo "RAPIDS initialized with Spark runtime" + CONDA_PACKAGES+=('dask-yarn=0.9' "distributed<2022.2") + fi + + CONDA_PACKAGES+=( + "${cuda_spec}" + "${rapids_spec}" + "${dask_spec}" + "dask-bigquery" + "dask-ml" + "dask-sql" + "cudf" + "${numba_spec}" + ) + + # Install cuda, rapids, dask + mamba="/opt/conda/miniconda3/bin/mamba" + conda="/opt/conda/miniconda3/bin/conda" + + "${conda}" remove -n dask --all || echo "unable to remove conda environment [dask]" + + ( set +e + local is_installed="0" + for installer in "${mamba}" "${conda}" ; do + test -d "${DASK_CONDA_ENV}" || \ + time "${installer}" "create" -m -n 'dask-rapids' -y --no-channel-priority \ + -c 'conda-forge' -c 'nvidia' -c 'rapidsai' \ + ${CONDA_PACKAGES[*]} \ + "${python_spec}" \ + > "${install_log}" 2>&1 && retval=$? || { retval=$? ; cat "${install_log}" ; } + sync + if [[ "$retval" == "0" ]] ; then + is_installed="1" + break + fi + "${conda}" config --set channel_priority flexible + done + if [[ "${is_installed}" == "0" ]]; then + echo "failed to install dask" + return 1 + fi + ) +} + +function main() { + # Install Dask with RAPIDS + install_dask_rapids + + # In "standalone" mode, Dask relies on a systemd unit to launch. + # In "yarn" mode, it relies a config.yaml file. + if [[ "${DASK_RUNTIME}" == "yarn" ]]; then + # Create Dask YARN config file + configure_dask_yarn else - echo "Unsupported RAPIDS Runtime: ${RAPIDS_RUNTIME}" - exit 1 + # Create Dask service + install_systemd_dask_service + + if [[ "$(hostname -s)" == "${MASTER}" ]]; then + systemctl start "${DASK_SCHEDULER_SERVICE}" + systemctl status "${DASK_SCHEDULER_SERVICE}" + fi + + echo "Starting Dask 'standalone' cluster..." + if [[ "${enable_worker_service}" == "1" ]]; then + systemctl start "${DASK_WORKER_SERVICE}" + systemctl status "${DASK_WORKER_SERVICE}" + fi + + configure_knox_for_dask + + local DASK_CLOUD_LOGGING="$(get_metadata_attribute dask-cloud-logging || echo 'false')" + if [[ "${DASK_CLOUD_LOGGING}" == "true" ]]; then + configure_fluentd_for_dask + fi fi + echo "Dask RAPIDS for ${DASK_RUNTIME} successfully initialized." if [[ "${ROLE}" == "Master" ]]; then systemctl restart hadoop-yarn-resourcemanager.service # Restart NodeManager on Master as well if this is a single-node-cluster. @@ -301,4 +533,131 @@ function main() { fi } +function exit_handler() ( + set +e + echo "Exit handler invoked" + + # Free conda cache + /opt/conda/miniconda3/bin/conda clean -a > /dev/null 2>&1 + + # Clear pip cache + pip cache purge || echo "unable to purge pip cache" + + # remove the tmpfs conda pkgs_dirs + if [[ -d /mnt/shm ]] ; then /opt/conda/miniconda3/bin/conda config --remove pkgs_dirs /mnt/shm ; fi + + # Clean up shared memory mounts + for shmdir in /var/cache/apt/archives /var/cache/dnf /mnt/shm ; do + if grep -q "^tmpfs ${shmdir}" /proc/mounts ; then + rm -rf ${shmdir}/* + umount -f ${shmdir} + fi + done + + # Clean up OS package cache ; re-hold systemd package + if is_debuntu ; then + apt-get -y -qq clean + apt-get -y -qq autoremove + else + dnf clean all + fi + + # print disk usage statistics + if is_debuntu ; then + # Rocky doesn't have sort -h and fails when the argument is passed + du --max-depth 3 -hx / | sort -h | tail -10 + fi + + # Process disk usage logs from installation period + rm -f "${tmpdir}/keep-running-df" + sleep 6s + # compute maximum size of disk during installation + # Log file contains logs like the following (minus the preceeding #): +#Filesystem Size Used Avail Use% Mounted on +#/dev/vda2 6.8G 2.5G 4.0G 39% / + df -h / | tee -a "${tmpdir}/disk-usage.log" + perl -e '$max=( sort + map { (split)[2] =~ /^(\d+)/ } + grep { m:^/: } )[-1]; +print( "maximum-disk-used: $max", $/ );' < "${tmpdir}/disk-usage.log" + + echo "exit_handler has completed" + + # zero free disk space + if [[ -n "$(get_metadata_attribute creating-image)" ]]; then + dd if=/dev/zero of=/zero ; sync ; rm -f /zero + fi + + return 0 +) + +function prepare_to_install(){ + readonly DEFAULT_CUDA_VERSION="12.4" + CUDA_VERSION=$(get_metadata_attribute 'cuda-version' ${DEFAULT_CUDA_VERSION}) + readonly CUDA_VERSION + + readonly ROLE=$(get_metadata_attribute dataproc-role) + readonly MASTER=$(get_metadata_attribute dataproc-master) + + # RAPIDS config + RAPIDS_RUNTIME=$(get_metadata_attribute 'rapids-runtime' 'DASK') + readonly RAPIDS_RUNTIME + + readonly DEFAULT_DASK_RAPIDS_VERSION="24.08" + readonly RAPIDS_VERSION=$(get_metadata_attribute 'rapids-version' ${DEFAULT_DASK_RAPIDS_VERSION}) + + # Dask config + DASK_RUNTIME="$(get_metadata_attribute dask-runtime || echo 'standalone')" + readonly DASK_RUNTIME + readonly DASK_SERVICE=dask-cluster + readonly DASK_WORKER_SERVICE=dask-worker + readonly DASK_SCHEDULER_SERVICE=dask-scheduler + readonly DASK_CONDA_ENV="/opt/conda/miniconda3/envs/dask-rapids" + + # Knox config + readonly KNOX_HOME=/usr/lib/knox + readonly KNOX_DASK_DIR="${KNOX_HOME}/data/services/dask/0.1.0" + readonly KNOX_DASKWS_DIR="${KNOX_HOME}/data/services/daskws/0.1.0" + enable_worker_service="0" + + free_mem="$(awk '/^MemFree/ {print $2}' /proc/meminfo)" + # Write to a ramdisk instead of churning the persistent disk + if [[ ${free_mem} -ge 5250000 ]]; then + tmpdir=/mnt/shm + mkdir -p /mnt/shm + mount -t tmpfs tmpfs /mnt/shm + + # Download conda packages to tmpfs + /opt/conda/miniconda3/bin/conda config --add pkgs_dirs /mnt/shm + mount -t tmpfs tmpfs /mnt/shm + + # Download pip packages to tmpfs + pip config set global.cache-dir /mnt/shm || echo "unable to set global.cache-dir" + + # Download OS packages to tmpfs + if is_debuntu ; then + mount -t tmpfs tmpfs /var/cache/apt/archives + else + mount -t tmpfs tmpfs /var/cache/dnf + fi + else + tmpdir=/tmp + fi + install_log="${tmpdir}/install.log" + trap exit_handler EXIT + + # Monitor disk usage in a screen session + if is_debuntu ; then + apt-get install -y -qq screen + else + dnf -y -q install screen + fi + df -h / | tee "${tmpdir}/disk-usage.log" + touch "${tmpdir}/keep-running-df" + screen -d -m -US keep-running-df \ + bash -c "while [[ -f ${tmpdir}/keep-running-df ]] ; do df -h / | tee -a ${tmpdir}/disk-usage.log ; sleep 5s ; done" +} + +prepare_to_install + main