diff --git a/README.md b/README.md index f0a1111..e55a81b 100644 --- a/README.md +++ b/README.md @@ -133,10 +133,10 @@ python generate_custom_image.py \ default value of 300 seconds will be used. * **--dry-run**: Dry run mode which only validates input and generates workflow script without creating image. Disabled by default. -* **--trusted-cert**: a certificate in DER format to be inserted - into the custom image's EFI boot sector. Can be generated by - reading examples/secure-boot/README.md. This argument is mutually - exclusive with base-image-family + +* **--trusted-cert**: (Optional) Pass an empty string to this + argument to disable support for shielded-secure-boot. + * **--metadata**: VM metadata which can be read by the customization script with `/usr/share/google/get_metadata_value attributes/` at runtime. The value of this flag takes the form of `key1=value1,key2=value2,...`. If the diff --git a/custom_image_utils/args_parser.py b/custom_image_utils/args_parser.py index 95bf9a1..2b0542d 100644 --- a/custom_image_utils/args_parser.py +++ b/custom_image_utils/args_parser.py @@ -228,8 +228,7 @@ def parse_args(args): type=str, required=False, default="tls/db.der", - help="""(Optional) Inserts the specified DER-format certificate into - the custom image's EFI boot sector for use with secure boot.""") - + help="""(Optional) Pass an empty string to this argument to + disable support for shielded-secure-boot.""") return parser.parse_args(args) diff --git a/custom_image_utils/shell_script_generator.py b/custom_image_utils/shell_script_generator.py index 89730c3..82d44b7 100644 --- a/custom_image_utils/shell_script_generator.py +++ b/custom_image_utils/shell_script_generator.py @@ -35,7 +35,10 @@ local -r cmd="$*" for ((i = 0; i < 3; i++)); do - if eval "$cmd"; then return 0 ; fi + set -x + time eval "$cmd" > "/tmp/{run_id}/install.log" 2>&1 && retval=$? || {{ retval=$? ; cat "/tmp/{run_id}/install.log" ; }} + set +x + if [[ $retval == 0 ]] ; then return 0 ; fi sleep 5 done return 1 @@ -44,23 +47,24 @@ function exit_handler() {{ echo 'Cleaning up before exiting.' - if [[ -f /tmp/{run_id}/vm_created ]]; then + if [[ -f /tmp/{run_id}/vm_created ]]; then ( set +e echo 'Deleting VM instance.' - execute_with_retries gcloud compute instances delete {image_name}-install \ - --project={project_id} --zone={zone} -q - elif [[ -f /tmp/{run_id}/disk_created ]]; then + execute_with_retries \ + gcloud compute instances delete {image_name}-install --project={project_id} --zone={zone} -q + ) elif [[ -f /tmp/{run_id}/disk_created ]]; then echo 'Deleting disk.' - execute_with_retries gcloud compute ${{base_obj_type}} delete {image_name}-install --project={project_id} --zone={zone} -q + execute_with_retries \ + gcloud compute ${{base_obj_type}} delete {image_name}-install --project={project_id} --zone={zone} -q fi echo 'Uploading local logs to GCS bucket.' gsutil -m rsync -r {log_dir}/ {gcs_log_dir}/ if [[ -f /tmp/{run_id}/image_created ]]; then - echo -e "${{GREEN}}Workflow succeeded, check logs at {log_dir}/ or {gcs_log_dir}/${{NC}}" + echo -e "${{GREEN}}Workflow succeeded${{NC}}, check logs at {log_dir}/ or {gcs_log_dir}/" exit 0 else - echo -e "${{RED}}Workflow failed, check logs at {log_dir}/ or {gcs_log_dir}/${{NC}}" + echo -e "${{RED}}Workflow failed${{NC}}, check logs at {log_dir}/ or {gcs_log_dir}/" exit 1 fi }} @@ -111,11 +115,13 @@ local cert_args="" local num_src_certs="0" + metadata_arg="{metadata_flag}" if [[ -n '{trusted_cert}' ]] && [[ -f '{trusted_cert}' ]]; then # build tls/ directory from variables defined near the header of # the examples/secure-boot/create-key-pair.sh file eval "$(bash examples/secure-boot/create-key-pair.sh)" + metadata_arg="${{metadata_arg}},public_secret_name=${{public_secret_name}},private_secret_name=${{private_secret_name}},secret_project=${{secret_project}},secret_version=${{secret_version}}" # by default, a gcloud secret with the name of efi-db-pub-key-042 is # created in the current project to store the certificate installed @@ -132,16 +138,20 @@ local -a cert_list=() - local -a default_cert_list=("{trusted_cert}" "${{MS_UEFI_CA}}") + local -a default_cert_list + default_cert_list=("{trusted_cert}" "${{MS_UEFI_CA}}") local -a src_img_modulus_md5sums=() mapfile -t src_img_modulus_md5sums < <(print_img_dbs_modulus_md5sums {dataproc_base_image}) num_src_certs="${{#src_img_modulus_md5sums[@]}}" - echo "${{num_src_certs}} db certificates attached to source image" - if [[ "${{num_src_certs}}" -eq "0" ]]; then + echo "debug - num_src_certs: [${{#src_img_modulus_md5sums[*]}}]" + echo "value of src_img_modulus_md5sums: [${{src_img_modulus_md5sums}}]" + if [[ -z "${{src_img_modulus_md5sums}}" ]]; then + num_src_certs=0 echo "no db certificates in source image" - cert_list=default_cert_list + cert_list=( "${{default_cert_list[@]}}" ) else + echo "${{num_src_certs}} db certificates attached to source image" echo "db certs exist in source image" for cert in ${{default_cert_list[*]}}; do if test_element_in_array "$(print_modulus_md5sum ${{cert}})" ${{src_img_modulus_md5sums[@]}} ; then @@ -175,7 +185,8 @@ echo 'Creating image.' base_obj_type="images" instance_disk_args='--image-project={project_id} --image={image_name}-install --boot-disk-size={disk_size}G --boot-disk-type=pd-ssd' - time execute_with_retries gcloud compute images create {image_name}-install \ + execute_with_retries \ + gcloud compute images create {image_name}-install \ --project={project_id} \ --source-image={dataproc_base_image} \ ${{cert_args}} \ @@ -186,7 +197,7 @@ echo 'Creating disk.' base_obj_type="disks" instance_disk_args='--disk=auto-delete=yes,boot=yes,mode=rw,name={image_name}-install' - time execute_with_retries gcloud compute disks create {image_name}-install \ + execute_with_retries gcloud compute disks create {image_name}-install \ --project={project_id} \ --zone={zone} \ --image={dataproc_base_image} \ @@ -197,8 +208,7 @@ date echo 'Creating VM instance to run customization script.' - ( set -x - time execute_with_retries gcloud compute instances create {image_name}-install \ + execute_with_retries gcloud compute instances create {image_name}-install \ --project={project_id} \ --zone={zone} \ {network_flag} \ @@ -209,15 +219,16 @@ {accelerator_flag} \ {service_account_flag} \ --scopes=cloud-platform \ - {metadata_flag} \ - --metadata-from-file startup-script=startup_script/run.sh ) + "${{metadata_arg}}" \ + --metadata-from-file startup-script=startup_script/run.sh touch /tmp/{run_id}/vm_created # clean up intermediate install image - if [[ "${{base_obj_type}}" == "images" ]] ; then - execute_with_retries gcloud compute images delete -q {image_name}-install --project={project_id} - fi + if [[ "${{base_obj_type}}" == "images" ]] ; then ( set +e + # This sometimes returns an API error but deletes the image despite the failure + gcloud compute images delete -q {image_name}-install --project={project_id} + ) fi echo 'Waiting for customization script to finish and VM shutdown.' execute_with_retries gcloud compute instances tail-serial-port-output {image_name}-install \ @@ -226,7 +237,7 @@ --port=1 2>&1 \ | grep 'startup-script' \ | sed -e 's/ {image_name}-install.*startup-script://g' \ - | dd bs=1 of={log_dir}/startup-script.log \ + | dd status=none bs=1 of={log_dir}/startup-script.log \ || true echo 'Checking customization script result.' date @@ -243,13 +254,12 @@ date echo 'Creating custom image.' - ( set -x - time execute_with_retries gcloud compute images create {image_name} \ + execute_with_retries gcloud compute images create {image_name} \ --project={project_id} \ --source-disk-zone={zone} \ --source-disk={image_name}-install \ {storage_location_flag} \ - --family={family} ) + --family={family} touch /tmp/{run_id}/image_created }} diff --git a/examples/secure-boot/build-current-images.sh b/examples/secure-boot/build-current-images.sh index 0d7846d..f9147d2 100644 --- a/examples/secure-boot/build-current-images.sh +++ b/examples/secure-boot/build-current-images.sh @@ -49,6 +49,15 @@ function configure_service_account() { gcloud secrets add-iam-policy-binding "${public_secret_name}" \ --member="serviceAccount:${GSA}" \ --role="roles/secretmanager.secretAccessor" > /dev/null 2>&1 + + gcloud projects add-iam-policy-binding "${PROJECT_ID}" \ + --member="serviceAccount:${GSA}" \ + --role=roles/compute.instanceAdmin.v1 > /dev/null 2>&1 + + gcloud iam service-accounts add-iam-policy-binding "${GSA}" \ + --member="serviceAccount:${GSA}" \ + --role=roles/iam.serviceAccountUser > /dev/null 2>&1 + } function revoke_bindings() { @@ -66,6 +75,15 @@ function revoke_bindings() { gcloud projects remove-iam-policy-binding "${PROJECT_ID}" \ --member="serviceAccount:${GSA}" \ --role="roles/secretmanager.viewer" > /dev/null 2>&1 + + gcloud projects remove-iam-policy-binding "${PROJECT_ID}" \ + --member="serviceAccount:${GSA}" \ + --role=roles/compute.instanceAdmin.v1 > /dev/null 2>&1 + + gcloud iam service-accounts remove-iam-policy-binding "${GSA}" \ + --member="serviceAccount:${GSA}" \ + --role=roles/iam.serviceAccountUser > /dev/null 2>&1 + } export PROJECT_ID="$(jq -r .PROJECT_ID env.json)" @@ -85,49 +103,25 @@ configure_service_account session_name="build-current-images" readonly timestamp="$(date +%F-%H-%M)" -#readonly timestamp="2024-10-24-04-21" +#readonly timestamp="2024-11-27-06-47" export timestamp export tmpdir=/tmp/${timestamp}; -mkdir ${tmpdir} +mkdir -p ${tmpdir} export ZONE="$(jq -r .ZONE env.json)" gcloud compute instances list --zones "${ZONE}" --format json > ${tmpdir}/instances.json gcloud compute images list --format json > ${tmpdir}/images.json # Run generation scripts simultaneously for each dataproc image version -screen -US "${session_name}" -c examples/secure-boot/pre-init.screenrc +screen -L -US "${session_name}" -c examples/secure-boot/pre-init.screenrc -# tail -n 3 /tmp/custom-image-*/logs/workflow.log -# tail -n 3 /tmp/custom-image-*/logs/startup-script.log -# tail -n 3 /tmp/custom-image-${PURPOSE}-2-*/logs/workflow.log function find_disk_usage() { - test -f /tmp/genline.pl || cat > /tmp/genline.pl<<'EOF' -#!/usr/bin/perl -w -use strict; - -my $fn = $ARGV[0]; -my( $config ) = ( $fn =~ /custom-image-(.*-(debian|rocky|ubuntu)\d+)-\d+/ ); - -my @raw_lines = ; -my( $l ) = grep { m: /dev/.*/\s*$: } @raw_lines; -my( $stats ) = ( $l =~ m:\s*/dev/\S+\s+(.*?)\s*$: ); - -my( $dp_version ) = ($config =~ /-pre-init-(.+)/); -$dp_version =~ s/-/./; - -my($max) = map { / maximum-disk-used: (\d+)/ } @raw_lines; -$max+=3; -my $i_dp_version = sprintf(q{%-15s}, qq{"$dp_version"}); - -print( qq{ $i_dp_version) disk_size_gb="$max" ;; # $stats # $config}, $/ ); -EOF - for f in $(grep -l 'Customization script suc' /tmp/custom-image-*/logs/workflow.log|sed -e 's/workflow.log/startup-script.log/') - do - grep -A20 'Filesystem.*Avail' $f | perl /tmp/genline.pl $f + grep 'Customization script' /tmp/custom-image-*/logs/workflow.log +# grep maximum-disk-used /tmp/custom-image-*/logs/startup-script.log + for workflow_log in $(grep -l "Customization script" /tmp/custom-image-*/logs/workflow.log) ; do + startup_log=$(echo "${workflow_log}" | sed -e 's/workflow.log/startup-script.log/') + grep -A5 'Filesystem.*1K-blocks' "${startup_log}" | perl examples/secure-boot/genline.pl "${workflow_log}" done } -# sleep 8m ; grep 'Customization script' /tmp/custom-image-*/logs/workflow.log -# grep maximum-disk-used /tmp/custom-image-*/logs/startup-script.log - revoke_bindings diff --git a/examples/secure-boot/create-key-pair.sh b/examples/secure-boot/create-key-pair.sh index 3039042..8f2a42a 100644 --- a/examples/secure-boot/create-key-pair.sh +++ b/examples/secure-boot/create-key-pair.sh @@ -74,7 +74,6 @@ function create_key () { fi if [[ -f "${PRIVATE_KEY}" ]]; then - echo "key already exists. Skipping generation." >&2 modulus_md5sum="$(cat tls/modulus-md5sum.txt)" return fi diff --git a/examples/secure-boot/dask.sh b/examples/secure-boot/dask.sh index e1c1229..b71b4e5 100644 --- a/examples/secure-boot/dask.sh +++ b/examples/secure-boot/dask.sh @@ -517,8 +517,8 @@ function main() { echo "Dask for ${DASK_RUNTIME} successfully initialized." } -function exit_handler() ( - set +e +function exit_handler() { + set +ex echo "Exit handler invoked" # Free conda cache @@ -527,16 +527,30 @@ function exit_handler() ( # Clear pip cache pip cache purge || echo "unable to purge pip cache" - # remove the tmpfs conda pkgs_dirs - if [[ -d /mnt/shm ]] ; then /opt/conda/miniconda3/bin/conda config --remove pkgs_dirs /mnt/shm ; fi - - # Clean up shared memory mounts - for shmdir in /var/cache/apt/archives /var/cache/dnf /mnt/shm ; do - if grep -q "^tmpfs ${shmdir}" /proc/mounts ; then - rm -rf ${shmdir}/* - umount -f ${shmdir} - fi - done + # If system memory was sufficient to mount memory-backed filesystems + if [[ "${tmpdir}" == "/mnt/shm" ]] ; then + # Stop hadoop services + systemctl list-units | perl -n -e 'qx(systemctl stop $1) if /^.*? ((hadoop|knox|hive|mapred|yarn|hdfs)\S*).service/' + + # remove the tmpfs conda pkgs_dirs + /opt/conda/miniconda3/bin/conda config --remove pkgs_dirs /mnt/shm || echo "unable to remove pkgs_dirs conda config" + + # remove the tmpfs pip cache-dir + pip config unset global.cache-dir || echo "unable to unset global pip cache" + + # Clean up shared memory mounts + for shmdir in /var/cache/apt/archives /var/cache/dnf /mnt/shm ; do + if grep -q "^tmpfs ${shmdir}" /proc/mounts ; then + rm -rf ${shmdir}/* + sync + sleep 3s + execute_with_retries umount -f ${shmdir} + fi + done + + umount -f /tmp + systemctl list-units | perl -n -e 'qx(systemctl start $1) if /^.*? ((hadoop|knox|hive|mapred|yarn|hdfs)\S*).service/' + fi # Clean up OS package cache ; re-hold systemd package if is_debuntu ; then @@ -546,36 +560,62 @@ function exit_handler() ( dnf clean all fi - # print disk usage statistics - if is_debuntu ; then - # Rocky doesn't have sort -h and fails when the argument is passed - du --max-depth 3 -hx / | sort -h | tail -10 + # print disk usage statistics for large components + if is_ubuntu ; then + du -hs \ + /usr/lib/{pig,hive,hadoop,jvm,spark,google-cloud-sdk,x86_64-linux-gnu} \ + /usr/lib \ + /opt/nvidia/* \ + /usr/local/cuda-1?.? \ + /opt/conda/miniconda3 + elif is_debian ; then + du -hs \ + /usr/lib/{pig,hive,hadoop,jvm,spark,google-cloud-sdk,x86_64-linux-gnu} \ + /usr/lib \ + /usr/local/cuda-1?.? \ + /opt/conda/miniconda3 + else + du -hs \ + /var/lib/docker \ + /usr/lib/{pig,hive,hadoop,firmware,jvm,spark,atlas} \ + /usr/lib64/google-cloud-sdk \ + /usr/lib \ + /opt/nvidia/* \ + /usr/local/cuda-1?.? \ + /opt/conda/miniconda3 fi # Process disk usage logs from installation period - rm -f /tmp/keep-running-df - sleep 6s + rm -f /run/keep-running-df + sync + sleep 5.01s # compute maximum size of disk during installation # Log file contains logs like the following (minus the preceeding #): -#Filesystem Size Used Avail Use% Mounted on -#/dev/vda2 6.8G 2.5G 4.0G 39% / - df --si - perl -e '$max=( sort - map { (split)[2] =~ /^(\d+)/ } - grep { m:^/: } )[-1]; -print( "maximum-disk-used: $max", $/ );' < /tmp/disk-usage.log +#Filesystem 1K-blocks Used Available Use% Mounted on +#/dev/vda2 7096908 2611344 4182932 39% / + df / | tee -a "/run/disk-usage.log" + + perl -e '@siz=( sort { $a => $b } + map { (split)[2] =~ /^(\d+)/ } + grep { m:^/: } ); +$max=$siz[0]; $min=$siz[-1]; $inc=$max-$min; +print( " samples-taken: ", scalar @siz, $/, + "maximum-disk-used: $max", $/, + "minimum-disk-used: $min", $/, + " increased-by: $inc", $/ )' < "/run/disk-usage.log" echo "exit_handler has completed" # zero free disk space if [[ -n "$(get_metadata_attribute creating-image)" ]]; then - dd if=/dev/zero of=/zero ; sync ; rm -f /zero + dd if=/dev/zero of=/zero + sync + sleep 3s + rm -f /zero fi return 0 -) - -trap exit_handler EXIT +} function prepare_to_install() { readonly DEFAULT_CUDA_VERSION="12.4" @@ -601,7 +641,8 @@ function prepare_to_install() { free_mem="$(awk '/^MemFree/ {print $2}' /proc/meminfo)" # Write to a ramdisk instead of churning the persistent disk - if [[ ${free_mem} -ge 5250000 ]]; then + if [[ ${free_mem} -ge 10500000 ]]; then + tmpdir=/mnt/shm mkdir -p /mnt/shm mount -t tmpfs tmpfs /mnt/shm @@ -618,18 +659,22 @@ function prepare_to_install() { else mount -t tmpfs tmpfs /var/cache/dnf fi + else + tmpdir=/tmp fi + install_log="/run/install.log" + trap exit_handler EXIT # Monitor disk usage in a screen session if is_debuntu ; then apt-get install -y -qq screen - elif is_rocky ; then + else dnf -y -q install screen fi - rm -f /tmp/disk-usage.log - touch /tmp/keep-running-df + df / | tee "/run/disk-usage.log" + touch "/run/keep-running-df" screen -d -m -US keep-running-df \ - bash -c 'while [[ -f /tmp/keep-running-df ]] ; do df --si / | tee -a /tmp/disk-usage.log ; sleep 5s ; done' + bash -c "while [[ -f /run/keep-running-df ]] ; do df / | tee -a /run/disk-usage.log ; sleep 5s ; done" } prepare_to_install diff --git a/examples/secure-boot/env.json.sample b/examples/secure-boot/env.json.sample index c8a89b5..2461fc0 100644 --- a/examples/secure-boot/env.json.sample +++ b/examples/secure-boot/env.json.sample @@ -3,5 +3,6 @@ "PURPOSE":"cuda-pre-init", "BUCKET":"my-bucket-name", "IMAGE_VERSION":"2.2-debian12", - "ZONE":"us-west4-a" + "ZONE":"us-west4-a", + "SUBNET":"my-subnet" } diff --git a/examples/secure-boot/genline.pl b/examples/secure-boot/genline.pl new file mode 100644 index 0000000..81ab752 --- /dev/null +++ b/examples/secure-boot/genline.pl @@ -0,0 +1,28 @@ +#!/usr/bin/perl -w +use strict; +use POSIX qw(ceil); + +# /tmp/custom-image-cuda-pre-init-2-0-debian10-2024-11-14-20-00-20241114-200043/logs/workflow.log +my $fn = $ARGV[0]; +my( $config, $purpose, $dp_version, $timestamp ) = + ( $fn =~ + m{custom-image- + ( + (.+)- + (\d+-\d+-(debian|rocky|ubuntu)\d+) + )- + (\d{4}(?:-\d{2}){4}) + }x + ); +$dp_version =~ s/-/./; + +my @raw_lines = ; +my( $l ) = grep { m: /dev/.*/\s*$: } @raw_lines; +my( $stats ) = ( $l =~ m:\s*/dev/\S+\s+(.*?)\s*$: ); +$stats =~ s:(\d{4,}):sprintf(q{%7s}, sprintf(q{%.2fG},($1/1024)/1024)):eg; + +my($max) = map { / maximum-disk-used: (\d+)/ } @raw_lines; +my($gbmax) = ceil((($max / 1024) / 1024) * 1.03); +$gbmax = 30 if $gbmax < 30; +my $i_dp_version = sprintf(q{%-15s}, qq{"$dp_version"}); +print( qq{ $i_dp_version) disk_size_gb="$gbmax" ;; # $stats # $purpose}, $/ ); diff --git a/examples/secure-boot/install_gpu_driver.sh b/examples/secure-boot/install_gpu_driver.sh index c0129dc..25efb2a 100644 --- a/examples/secure-boot/install_gpu_driver.sh +++ b/examples/secure-boot/install_gpu_driver.sh @@ -16,20 +16,35 @@ set -euxo pipefail -function os_id() ( set +x ; grep '^ID=' /etc/os-release | cut -d= -f2 | xargs ; ) +function os_id() ( set +x ; grep '^ID=' /etc/os-release | cut -d= -f2 | xargs ; ) function os_version() ( set +x ; grep '^VERSION_ID=' /etc/os-release | cut -d= -f2 | xargs ; ) function os_codename() ( set +x ; grep '^VERSION_CODENAME=' /etc/os-release | cut -d= -f2 | xargs ; ) -function is_rocky() ( set +x ; [[ "$(os_id)" == 'rocky' ]] ; ) -function is_rocky8() ( set +x ; is_rocky && [[ "$(os_version)" == '8'* ]] ; ) -function is_rocky9() ( set +x ; is_rocky && [[ "$(os_version)" == '9'* ]] ; ) -function is_ubuntu() ( set +x ; [[ "$(os_id)" == 'ubuntu' ]] ; ) -function is_ubuntu18() ( set +x ; is_ubuntu && [[ "$(os_version)" == '18.04'* ]] ; ) -function is_ubuntu20() ( set +x ; is_ubuntu && [[ "$(os_version)" == '20.04'* ]] ; ) -function is_ubuntu22() ( set +x ; is_ubuntu && [[ "$(os_version)" == '22.04'* ]] ; ) -function is_debian() ( set +x ; [[ "$(os_id)" == 'debian' ]] ; ) -function is_debian10() ( set +x ; is_debian && [[ "$(os_version)" == '10'* ]] ; ) -function is_debian11() ( set +x ; is_debian && [[ "$(os_version)" == '11'* ]] ; ) -function is_debian12() ( set +x ; is_debian && [[ "$(os_version)" == '12'* ]] ; ) + +function version_ge() ( set +x ; [ "$1" = "$(echo -e "$1\n$2" | sort -V | tail -n1)" ] ; ) +function version_gt() ( set +x ; [ "$1" = "$2" ] && return 1 || version_ge $1 $2 ; ) +function version_le() ( set +x ; [ "$1" = "$(echo -e "$1\n$2" | sort -V | head -n1)" ] ; ) +function version_lt() ( set +x ; [ "$1" = "$2" ] && return 1 || version_le $1 $2 ; ) + +readonly -A supported_os=( + ['debian']="10 11 12" + ['rocky']="8 9" + ['ubuntu']="18.04 20.04 22.04" +) + +# dynamically define OS version test utility functions +if [[ "$(os_id)" == "rocky" ]]; +then _os_version=$(os_version | sed -e 's/[^0-9].*$//g') +else _os_version="$(os_version)"; fi +for os_id_val in 'rocky' 'ubuntu' 'debian' ; do + eval "function is_${os_id_val}() ( set +x ; [[ \"$(os_id)\" == '${os_id_val}' ]] ; )" + + for osver in $(echo "${supported_os["${os_id_val}"]}") ; do + eval "function is_${os_id_val}${osver%%.*}() ( set +x ; is_${os_id_val} && [[ \"${_os_version}\" == \"${osver}\" ]] ; )" + eval "function ge_${os_id_val}${osver%%.*}() ( set +x ; is_${os_id_val} && version_ge \"${_os_version}\" \"${osver}\" ; )" + eval "function le_${os_id_val}${osver%%.*}() ( set +x ; is_${os_id_val} && version_le \"${_os_version}\" \"${osver}\" ; )" + done +done + function is_debuntu() ( set +x ; is_debian || is_ubuntu ; ) function os_vercat() ( set +x @@ -37,8 +52,8 @@ function os_vercat() ( set +x elif is_rocky ; then os_version | sed -e 's/[^0-9].*$//g' else os_version ; fi ; ) -function remove_old_backports { - if is_debian12 ; then return ; fi +function repair_old_backports { + if ge_debian12 || ! is_debuntu ; then return ; fi # This script uses 'apt-get update' and is therefore potentially dependent on # backports repositories which have been archived. In order to mitigate this # problem, we will use archive.debian.org for the oldoldstable repo @@ -58,14 +73,6 @@ function remove_old_backports { done } -# Return true if the first argument is equal to or less than the second argument -function compare_versions_lte { [ "$1" = "$(echo -e "$1\n$2" | sort -V | head -n1)" ] ; } - -# Return true if the first argument is less than the second argument -function compare_versions_lt() ( set +x - [ "$1" = "$2" ] && return 1 || compare_versions_lte $1 $2 -) - function print_metadata_value() { local readonly tmpfile=$(mktemp) http_code=$(curl -f "${1}" -H "Metadata-Flavor: Google" -w "%{http_code}" \ @@ -120,52 +127,93 @@ readonly ROLE # CUDA version and Driver version # https://docs.nvidia.com/deeplearning/frameworks/support-matrix/index.html +# https://developer.nvidia.com/cuda-downloads +# Rocky8: 12.0: 525.147.05 readonly -A DRIVER_FOR_CUDA=( - [11.8]="525.147.05" [12.1]="530.30.02" [12.4]="550.54.14" - [12.5]="555.42.06" [12.6]="560.28.03" + ["11.8"]="560.35.03" + ["12.0"]="525.60.13" ["12.4"]="560.35.03" ["12.6"]="560.35.03" +) +# https://developer.nvidia.com/cudnn-downloads +if is_debuntu ; then +readonly -A CUDNN_FOR_CUDA=( + ["11.8"]="9.5.1.17" + ["12.0"]="9.5.1.17" ["12.4"]="9.5.1.17" ["12.6"]="9.5.1.17" ) +elif is_rocky ; then +# rocky: +# 12.0: 8.8.1.3 +# 12.1: 8.9.3.28 +# 12.2: 8.9.7.29 +# 12.3: 9.0.0.312 +# 12.4: 9.1.1.17 +# 12.5: 9.2.1.18 +# 12.6: 9.5.1.17 readonly -A CUDNN_FOR_CUDA=( - [11.8]="8.6.0.163" [12.1]="8.9.0" [12.4]="9.1.0.70" - [12.5]="9.2.1.18" + ["11.8"]="9.5.1.17" + ["12.0"]="8.8.1.3" ["12.4"]="9.1.1.17" ["12.6"]="9.5.1.17" ) +fi +# https://developer.nvidia.com/nccl/nccl-download +# 12.2: 2.19.3, 12.5: 2.21.5 readonly -A NCCL_FOR_CUDA=( - [11.8]="2.15.5" [12.1]="2.17.1" [12.4]="2.21.5" - [12.5]="2.22.3" + ["11.8"]="2.15.5" + ["12.0"]="2.16.5" ["12.4"]="2.23.4" ["12.6"]="2.23.4" ) readonly -A CUDA_SUBVER=( - [11.8]="11.8.0" [12.1]="12.1.0" [12.4]="12.4.1" - [12.5]="12.5.1" + ["11.8"]="11.8.0" + ["12.0"]="12.0.0" ["12.4"]="12.4.1" ["12.6"]="12.6.2" ) RAPIDS_RUNTIME=$(get_metadata_attribute 'rapids-runtime' 'SPARK') readonly DEFAULT_CUDA_VERSION='12.4' CUDA_VERSION=$(get_metadata_attribute 'cuda-version' "${DEFAULT_CUDA_VERSION}") +if ( ( ge_debian12 || ge_rocky9 ) && version_le "${CUDA_VERSION%%.*}" "11" ) ; then + # CUDA 11 no longer supported on debian12 - 2024-11-22, rocky9 - 2024-11-27 + CUDA_VERSION="${DEFAULT_CUDA_VERSION}" +fi + +if ( version_ge "${CUDA_VERSION}" "12" && (le_debian11 || le_ubuntu18) ) ; then + # Only CUDA 12.0 supported on older debuntu + CUDA_VERSION="12.0" +fi readonly CUDA_VERSION readonly CUDA_FULL_VERSION="${CUDA_SUBVER["${CUDA_VERSION}"]}" function is_cuda12() ( set +x ; [[ "${CUDA_VERSION%%.*}" == "12" ]] ; ) +function le_cuda12() ( set +x ; version_le "${CUDA_VERSION%%.*}" "12" ; ) +function ge_cuda12() ( set +x ; version_ge "${CUDA_VERSION%%.*}" "12" ; ) + function is_cuda11() ( set +x ; [[ "${CUDA_VERSION%%.*}" == "11" ]] ; ) -readonly DEFAULT_DRIVER=${DRIVER_FOR_CUDA["${CUDA_VERSION}"]} +function le_cuda11() ( set +x ; version_le "${CUDA_VERSION%%.*}" "11" ; ) +function ge_cuda11() ( set +x ; version_ge "${CUDA_VERSION%%.*}" "11" ; ) + +DEFAULT_DRIVER="${DRIVER_FOR_CUDA[${CUDA_VERSION}]}" +if ( ge_ubuntu22 && version_le "${CUDA_VERSION}" "12.0" ) ; then + DEFAULT_DRIVER="560.28.03" ; fi +if ( is_debian11 || is_ubuntu20 ) ; then DEFAULT_DRIVER="560.28.03" ; fi +if ( is_rocky && le_cuda11 ) ; then DEFAULT_DRIVER="525.147.05" ; fi +if ( is_ubuntu20 && le_cuda11 ) ; then DEFAULT_DRIVER="535.183.06" ; fi +if ( is_rocky9 && ge_cuda12 ) ; then DEFAULT_DRIVER="565.57.01" ; fi DRIVER_VERSION=$(get_metadata_attribute 'gpu-driver-version' "${DEFAULT_DRIVER}") -if is_debian11 || is_ubuntu22 || is_ubuntu20 ; then DRIVER_VERSION="560.28.03" ; fi -if is_ubuntu20 && is_cuda11 ; then DRIVER_VERSION="535.183.06" ; fi readonly DRIVER_VERSION readonly DRIVER=${DRIVER_VERSION%%.*} -# Parameters for NVIDIA-provided CUDNN library +readonly DEFAULT_CUDNN8_VERSION="8.0.5.39" +readonly DEFAULT_CUDNN9_VERSION="9.1.0.70" + +# Parameters for NVIDIA-provided cuDNN library readonly DEFAULT_CUDNN_VERSION=${CUDNN_FOR_CUDA["${CUDA_VERSION}"]} CUDNN_VERSION=$(get_metadata_attribute 'cudnn-version' "${DEFAULT_CUDNN_VERSION}") function is_cudnn8() ( set +x ; [[ "${CUDNN_VERSION%%.*}" == "8" ]] ; ) function is_cudnn9() ( set +x ; [[ "${CUDNN_VERSION%%.*}" == "9" ]] ; ) -if is_rocky \ - && (compare_versions_lte "${CUDNN_VERSION}" "8.0.5.39") ; then - CUDNN_VERSION="8.0.5.39" -elif (is_ubuntu20 || is_ubuntu22 || is_debian12) && is_cudnn8 ; then +# The minimum cuDNN version supported by rocky is ${DEFAULT_CUDNN8_VERSION} +if is_rocky && (version_le "${CUDNN_VERSION}" "${DEFAULT_CUDNN8_VERSION}") ; then + CUDNN_VERSION="${DEFAULT_CUDNN8_VERSION}" +elif (ge_ubuntu20 || ge_debian12) && is_cudnn8 ; then # cuDNN v8 is not distribution for ubuntu20+, debian12 - CUDNN_VERSION="9.1.0.70" - -elif (is_ubuntu18 || is_debian10 || is_debian11) && is_cudnn9 ; then + CUDNN_VERSION="${DEFAULT_CUDNN9_VERSION}" +elif (le_ubuntu18 || le_debian11) && is_cudnn9 ; then # cuDNN v9 is not distributed for ubuntu18, debian10, debian11 ; fall back to 8 CUDNN_VERSION="8.8.0.121" fi @@ -181,14 +229,14 @@ readonly USERSPACE_URL=$(get_metadata_attribute 'gpu-driver-url' "${DEFAULT_USER # Short name for urls if is_ubuntu22 ; then - # at the time of writing 20240721 there is no ubuntu2204 in the index of repos at + # at the time of writing 20241125 there is no ubuntu2204 in the index of repos at # https://developer.download.nvidia.com/compute/machine-learning/repos/ # use packages from previous release until such time as nvidia # release ubuntu2204 builds nccl_shortname="ubuntu2004" shortname="$(os_id)$(os_vercat)" -elif is_rocky9 ; then +elif ge_rocky9 ; then # use packages from previous release until such time as nvidia # release rhel9 builds @@ -212,30 +260,53 @@ NCCL_REPO_URL=$(get_metadata_attribute 'nccl-repo-url' "${DEFAULT_NCCL_REPO_URL} readonly NCCL_REPO_URL readonly NCCL_REPO_KEY="${NVIDIA_BASE_DL_URL}/machine-learning/repos/${nccl_shortname}/x86_64/7fa2af80.pub" # 3bf863cc.pub -readonly -A DEFAULT_NVIDIA_CUDA_URLS=( - [11.8]="${NVIDIA_BASE_DL_URL}/cuda/11.8.0/local_installers/cuda_11.8.0_520.61.05_linux.run" - [12.1]="${NVIDIA_BASE_DL_URL}/cuda/12.1.0/local_installers/cuda_12.1.0_530.30.02_linux.run" - [12.4]="${NVIDIA_BASE_DL_URL}/cuda/12.4.0/local_installers/cuda_12.4.0_550.54.14_linux.run" -) -readonly DEFAULT_NVIDIA_CUDA_URL=${DEFAULT_NVIDIA_CUDA_URLS["${CUDA_VERSION}"]} -NVIDIA_CUDA_URL=$(get_metadata_attribute 'cuda-url' "${DEFAULT_NVIDIA_CUDA_URL}") -readonly NVIDIA_CUDA_URL +function set_cuda_runfile_url() { + local RUNFILE_DRIVER_VERSION="${DRIVER_VERSION}" + local RUNFILE_CUDA_VERSION="${CUDA_FULL_VERSION}" + + if ge_cuda12 ; then + if ( le_debian11 || le_ubuntu18 ) ; then + RUNFILE_DRIVER_VERSION="525.60.13" + RUNFILE_CUDA_VERSION="12.0.0" + elif ( le_rocky8 && version_le "${DATAPROC_IMAGE_VERSION}" "2.0" ) ; then + RUNFILE_DRIVER_VERSION="525.147.05" + RUNFILE_CUDA_VERSION="12.0.0" + fi + else + RUNFILE_DRIVER_VERSION="520.61.05" + RUNFILE_CUDA_VERSION="11.8.0" + fi + + readonly RUNFILE_FILENAME="cuda_${RUNFILE_CUDA_VERSION}_${RUNFILE_DRIVER_VERSION}_linux.run" + CUDA_RELEASE_BASE_URL="${NVIDIA_BASE_DL_URL}/cuda/${RUNFILE_CUDA_VERSION}" + DEFAULT_NVIDIA_CUDA_URL="${CUDA_RELEASE_BASE_URL}/local_installers/${RUNFILE_FILENAME}" + readonly DEFAULT_NVIDIA_CUDA_URL + + NVIDIA_CUDA_URL=$(get_metadata_attribute 'cuda-url' "${DEFAULT_NVIDIA_CUDA_URL}") + readonly NVIDIA_CUDA_URL +} + +set_cuda_runfile_url # Parameter for NVIDIA-provided Rocky Linux GPU driver readonly NVIDIA_ROCKY_REPO_URL="${NVIDIA_REPO_URL}/cuda-${shortname}.repo" CUDNN_TARBALL="cudnn-${CUDA_VERSION}-linux-x64-v${CUDNN_VERSION}.tgz" CUDNN_TARBALL_URL="${NVIDIA_BASE_DL_URL}/redist/cudnn/v${CUDNN_VERSION%.*}/${CUDNN_TARBALL}" -if ( compare_versions_lte "8.3.1.22" "${CUDNN_VERSION}" ); then +if ( version_ge "${CUDNN_VERSION}" "8.3.1.22" ); then + # When version is greater than or equal to 8.3.1.22 but less than 8.4.1.50 use this format CUDNN_TARBALL="cudnn-linux-x86_64-${CUDNN_VERSION}_cuda${CUDA_VERSION%.*}-archive.tar.xz" - if ( compare_versions_lte "${CUDNN_VERSION}" "8.4.1.50" ); then + if ( version_le "${CUDNN_VERSION}" "8.4.1.50" ); then + # When cuDNN version is greater than or equal to 8.4.1.50 use this format CUDNN_TARBALL="cudnn-linux-x86_64-${CUDNN_VERSION}_cuda${CUDA_VERSION}-archive.tar.xz" fi + # Use legacy url format with one of the tarball name formats depending on version as above CUDNN_TARBALL_URL="${NVIDIA_BASE_DL_URL}/redist/cudnn/v${CUDNN_VERSION%.*}/local_installers/${CUDA_VERSION}/${CUDNN_TARBALL}" fi -if ( compare_versions_lte "12.0" "${CUDA_VERSION}" ); then - # When cuda version is greater than 12.0 - CUDNN_TARBALL_URL="${NVIDIA_BASE_DL_URL}/cudnn/redist/cudnn/linux-x86_64/cudnn-linux-x86_64-9.2.0.82_cuda12-archive.tar.xz" +if ( version_ge "${CUDA_VERSION}" "12.0" ); then + # Use modern url format When cuda version is greater than or equal to 12.0 + CUDNN_TARBALL="cudnn-linux-x86_64-${CUDNN_VERSION}_cuda${CUDA_VERSION%%.*}-archive.tar.xz" + CUDNN_TARBALL_URL="${NVIDIA_BASE_DL_URL}/cudnn/redist/cudnn/linux-x86_64/${CUDNN_TARBALL}" fi readonly CUDNN_TARBALL readonly CUDNN_TARBALL_URL @@ -264,10 +335,14 @@ function execute_with_retries() ( local -r cmd="$*" if [[ "$cmd" =~ "^apt-get install" ]] ; then - cmd="apt-get -y clean && $cmd" + apt-get -y clean + apt-get -y autoremove fi for ((i = 0; i < 3; i++)); do - if eval "$cmd" ; then return 0 ; fi + set -x + time eval "$cmd" > "${install_log}" 2>&1 && retval=$? || { retval=$? ; cat "${install_log}" ; } + set +x + if [[ $retval == 0 ]] ; then return 0 ; fi sleep 5 done return 1 @@ -279,9 +354,9 @@ function install_cuda_keyring_pkg() { local kr_ver=1.1 curl -fsSL --retry-connrefused --retry 10 --retry-max-time 30 \ "${NVIDIA_REPO_URL}/cuda-keyring_${kr_ver}-1_all.deb" \ - -o "${download_dir}/cuda-keyring.deb" - dpkg -i "${download_dir}/cuda-keyring.deb" - rm -f "${download_dir}/cuda-keyring.deb" + -o "${tmpdir}/cuda-keyring.deb" + dpkg -i "${tmpdir}/cuda-keyring.deb" + rm -f "${tmpdir}/cuda-keyring.deb" CUDA_KEYRING_PKG_INSTALLED="1" } @@ -301,10 +376,10 @@ function install_local_cuda_repo() { readonly DIST_KEYRING_DIR="/var/${pkgname}" curl -fsSL --retry-connrefused --retry 3 --retry-max-time 5 \ - "${LOCAL_DEB_URL}" -o "${download_dir}/${LOCAL_INSTALLER_DEB}" + "${LOCAL_DEB_URL}" -o "${tmpdir}/${LOCAL_INSTALLER_DEB}" - dpkg -i "${download_dir}/${LOCAL_INSTALLER_DEB}" - rm "${download_dir}/${LOCAL_INSTALLER_DEB}" + dpkg -i "${tmpdir}/${LOCAL_INSTALLER_DEB}" + rm "${tmpdir}/${LOCAL_INSTALLER_DEB}" cp ${DIST_KEYRING_DIR}/cuda-*-keyring.gpg /usr/share/keyrings/ if is_ubuntu ; then @@ -329,11 +404,11 @@ function install_local_cudnn_repo() { # ${NVIDIA_BASE_DL_URL}/redist/cudnn/v8.6.0/local_installers/11.8/cudnn-linux-x86_64-8.6.0.163_cuda11-archive.tar.xz curl -fsSL --retry-connrefused --retry 3 --retry-max-time 5 \ - "${local_deb_url}" -o "${download_dir}/local-installer.deb" + "${local_deb_url}" -o "${tmpdir}/local-installer.deb" - dpkg -i "${download_dir}/local-installer.deb" + dpkg -i "${tmpdir}/local-installer.deb" - rm -f "${download_dir}/local-installer.deb" + rm -f "${tmpdir}/local-installer.deb" cp /var/cudnn-local-repo-*-${CUDNN}*/cudnn-local-*-keyring.gpg /usr/share/keyrings @@ -361,7 +436,7 @@ function install_local_cudnn8_repo() { CUDNN8_PKG_NAME="${pkgname}" deb_fn="${pkgname}_1.0-1_amd64.deb" - local_deb_fn="${download_dir}/${deb_fn}" + local_deb_fn="${tmpdir}/${deb_fn}" local_deb_url="${NVIDIA_BASE_DL_URL}/redist/cudnn/v${CUDNN}/local_installers/${CUDNN8_CUDA_VER}/${deb_fn}" curl -fsSL --retry-connrefused --retry 3 --retry-max-time 5 \ "${local_deb_url}" -o "${local_deb_fn}" @@ -383,10 +458,9 @@ function install_nvidia_nccl() { local -r nccl_version="${NCCL_VERSION}-1+cuda${CUDA_VERSION}" if is_rocky ; then - time execute_with_retries \ + execute_with_retries \ dnf -y -q install \ - "libnccl-${nccl_version}" "libnccl-devel-${nccl_version}" "libnccl-static-${nccl_version}" \ - > "${install_log}" 2>&1 || { cat "${install_log}" && exit -4 ; } + "libnccl-${nccl_version}" "libnccl-devel-${nccl_version}" "libnccl-static-${nccl_version}" sync elif is_ubuntu ; then install_cuda_keyring_pkg @@ -394,16 +468,14 @@ function install_nvidia_nccl() { apt-get update -qq if is_ubuntu18 ; then - time execute_with_retries \ + execute_with_retries \ apt-get install -q -y \ - libnccl2 libnccl-dev \ - > "${install_log}" 2>&1 || { cat "${install_log}" && exit -4 ; } + libnccl2 libnccl-dev sync else - time execute_with_retries \ + execute_with_retries \ apt-get install -q -y \ - "libnccl2=${nccl_version}" "libnccl-dev=${nccl_version}" \ - > "${install_log}" 2>&1 || { cat "${install_log}" && exit -4 ; } + "libnccl2=${nccl_version}" "libnccl-dev=${nccl_version}" sync fi else @@ -427,22 +499,20 @@ function install_nvidia_cudnn() { if is_rocky ; then if is_cudnn8 ; then - time execute_with_retries dnf -y -q install \ + execute_with_retries dnf -y -q install \ "libcudnn${major_version}" \ - "libcudnn${major_version}-devel" \ - > "${install_log}" 2>&1 || { cat "${install_log}" && exit -4 ; } + "libcudnn${major_version}-devel" sync elif is_cudnn9 ; then - time execute_with_retries dnf -y -q install \ + execute_with_retries dnf -y -q install \ "libcudnn9-static-cuda-${CUDA_VERSION%%.*}" \ - "libcudnn9-devel-cuda-${CUDA_VERSION%%.*}" \ - > "${install_log}" 2>&1 || { cat "${install_log}" && exit -4 ; } + "libcudnn9-devel-cuda-${CUDA_VERSION%%.*}" sync else echo "Unsupported cudnn version: '${major_version}'" fi elif is_debuntu; then - if is_debian12 && is_src_os ; then + if ge_debian12 && is_src_os ; then apt-get -y install nvidia-cudnn else local CUDNN="${CUDNN_VERSION%.*}" @@ -451,23 +521,21 @@ function install_nvidia_cudnn() { apt-get update -qq - time execute_with_retries \ + execute_with_retries \ apt-get -y install --no-install-recommends \ "libcudnn8=${cudnn_pkg_version}" \ - "libcudnn8-dev=${cudnn_pkg_version}" \ - > "${install_log}" 2>&1 || { cat "${install_log}" && exit -4 ; } + "libcudnn8-dev=${cudnn_pkg_version}" sync elif is_cudnn9 ; then install_cuda_keyring_pkg apt-get update -qq - time execute_with_retries \ + execute_with_retries \ apt-get -y install --no-install-recommends \ "libcudnn9-cuda-${CUDA_VERSION%%.*}" \ "libcudnn9-dev-cuda-${CUDA_VERSION%%.*}" \ - "libcudnn9-static-cuda-${CUDA_VERSION%%.*}" \ - > "${install_log}" 2>&1 || { cat "${install_log}" && exit -4 ; } + "libcudnn9-static-cuda-${CUDA_VERSION%%.*}" sync else echo "Unsupported cudnn version: [${CUDNN_VERSION}]" @@ -478,9 +546,8 @@ function install_nvidia_cudnn() { packages=( "libcudnn${major_version}=${cudnn_pkg_version}" "libcudnn${major_version}-dev=${cudnn_pkg_version}") - time execute_with_retries \ - apt-get install -q -y --no-install-recommends "${packages[*]}" \ - > "${install_log}" 2>&1 || { cat "${install_log}" && exit -4 ; } + execute_with_retries \ + apt-get install -q -y --no-install-recommends "${packages[*]}" sync else echo "Unsupported OS: '${OS_NAME}'" @@ -577,7 +644,7 @@ function clear_dkms_key { } function add_contrib_component() { - if is_debian12 ; then + if ge_debian12 ; then # Include in sources file components on which nvidia-kernel-open-dkms depends local -r debian_sources="/etc/apt/sources.list.d/debian.sources" local components="main contrib" @@ -590,7 +657,7 @@ function add_contrib_component() { function add_nonfree_components() { if is_src_nvidia ; then return; fi - if is_debian12 ; then + if ge_debian12 ; then # Include in sources file components on which nvidia-open-kernel-dkms depends local -r debian_sources="/etc/apt/sources.list.d/debian.sources" local components="main contrib non-free non-free-firmware" @@ -621,7 +688,7 @@ function add_repo_cuda() { if is_debuntu ; then local kr_path=/usr/share/keyrings/cuda-archive-keyring.gpg local sources_list_path="/etc/apt/sources.list.d/cuda-${shortname}-x86_64.list" -echo "deb [signed-by=${kr_path}] https://developer.download.nvidia.com/compute/cuda/repos/${shortname}/x86_64/ /" \ + echo "deb [signed-by=${kr_path}] https://developer.download.nvidia.com/compute/cuda/repos/${shortname}/x86_64/ /" \ | sudo tee "${sources_list_path}" curl "${NVIDIA_BASE_DL_URL}/cuda/repos/${shortname}/x86_64/cuda-archive-keyring.gpg" \ -o "${kr_path}" @@ -675,7 +742,7 @@ function build_driver_from_github() { } function build_driver_from_packages() { - if is_ubuntu || is_debian ; then + if is_debuntu ; then if [[ -n "$(apt-cache search -n "nvidia-driver-${DRIVER}-server-open")" ]] ; then local pkglist=("nvidia-driver-${DRIVER}-server-open") ; else local pkglist=("nvidia-driver-${DRIVER}-open") ; fi @@ -692,21 +759,17 @@ function build_driver_from_packages() { fi add_contrib_component apt-get update -qq - execute_with_retries apt-get install -y -qq --no-install-recommends dkms \ - > "${install_log}" 2>&1 || { cat "${install_log}" && exit -4 ; } + execute_with_retries apt-get install -y -qq --no-install-recommends dkms #configure_dkms_certs - time execute_with_retries apt-get install -y -qq --no-install-recommends "${pkglist[@]}" \ - > "${install_log}" 2>&1 || { cat "${install_log}" && exit -4 ; } + execute_with_retries apt-get install -y -qq --no-install-recommends "${pkglist[@]}" sync elif is_rocky ; then #configure_dkms_certs - if time execute_with_retries dnf -y -q module install "nvidia-driver:${DRIVER}-dkms" \ - > "${install_log}" 2>&1 || { cat "${install_log}" && exit -4 ; } ; then + if execute_with_retries dnf -y -q module install "nvidia-driver:${DRIVER}-dkms" ; then echo "nvidia-driver:${DRIVER}-dkms installed successfully" else - time execute_with_retries dnf -y -q module install 'nvidia-driver:latest' \ - > "${install_log}" 2>&1 || { cat "${install_log}" && exit -4 ; } + execute_with_retries dnf -y -q module install 'nvidia-driver:latest' fi sync fi @@ -714,30 +777,28 @@ function build_driver_from_packages() { } function install_nvidia_userspace_runfile() { - if test -f "${download_dir}/userspace-complete" ; then return ; fi + if test -f "${tmpdir}/userspace-complete" ; then return ; fi curl -fsSL --retry-connrefused --retry 10 --retry-max-time 30 \ - "${USERSPACE_URL}" -o "${download_dir}/userspace.run" - time bash "${download_dir}/userspace.run" --no-kernel-modules --silent --install-libglvnd \ - > "${install_log}" 2>&1 || { cat "${install_log}" && exit -4 ; } - rm -f "${download_dir}/userspace.run" - touch "${download_dir}/userspace-complete" + "${USERSPACE_URL}" -o "${tmpdir}/userspace.run" + execute_with_retries bash "${tmpdir}/userspace.run" --no-kernel-modules --silent --install-libglvnd --tmpdir="${tmpdir}" + rm -f "${tmpdir}/userspace.run" + touch "${tmpdir}/userspace-complete" sync } function install_cuda_runfile() { - if test -f "${download_dir}/cuda-complete" ; then return ; fi + if test -f "${tmpdir}/cuda-complete" ; then return ; fi time curl -fsSL --retry-connrefused --retry 10 --retry-max-time 30 \ - "${NVIDIA_CUDA_URL}" -o "${download_dir}/cuda.run" - time bash "${download_dir}/cuda.run" --silent --toolkit --no-opengl-libs \ - > "${install_log}" 2>&1 || { cat "${install_log}" && exit -4 ; } - rm -f "${download_dir}/cuda.run" - touch "${download_dir}/cuda-complete" + "${NVIDIA_CUDA_URL}" -o "${tmpdir}/cuda.run" + execute_with_retries bash "${tmpdir}/cuda.run" --silent --toolkit --no-opengl-libs --tmpdir="${tmpdir}" + rm -f "${tmpdir}/cuda.run" + touch "${tmpdir}/cuda-complete" sync } function install_cuda_toolkit() { local cudatk_package=cuda-toolkit - if is_debian12 && is_src_os ; then + if ge_debian12 && is_src_os ; then cudatk_package="${cudatk_package}=${CUDA_FULL_VERSION}-1" elif [[ -n "${CUDA_VERSION}" ]]; then cudatk_package="${cudatk_package}-${CUDA_VERSION//./-}" @@ -746,47 +807,32 @@ function install_cuda_toolkit() { readonly cudatk_package if is_debuntu ; then # if is_ubuntu ; then execute_with_retries "apt-get install -y -qq --no-install-recommends cuda-drivers-${DRIVER}=${DRIVER_VERSION}-1" ; fi - time execute_with_retries apt-get install -y -qq --no-install-recommends ${cuda_package} ${cudatk_package} \ - > "${install_log}" 2>&1 || { cat "${install_log}" ; exit -4 ; } - sync + execute_with_retries apt-get install -y -qq --no-install-recommends ${cuda_package} ${cudatk_package} + sync elif is_rocky ; then - time execute_with_retries dnf -y -q install "${cudatk_package}" \ - > "${install_log}" 2>&1 || { cat "${install_log}" && exit -4 ; } + # rocky9: cuda-11-[7,8], cuda-12-[1..6] + execute_with_retries dnf -y -q install "${cudatk_package}" sync fi } -function install_drivers_aliases() { - if is_rocky ; then return ; fi - if ! (is_debian12 || is_debian11) ; then return ; fi - if (is_debian12 && is_cuda11) && is_src_nvidia ; then return ; fi # don't install on debian 12 / cuda11 with drivers from nvidia - # Add a modprobe alias to prefer the open kernel modules - local conffile="/etc/modprobe.d/nvidia-aliases.conf" - echo -n "" > "${conffile}" - local prefix - if is_src_os ; then prefix="nvidia-current-open" - elif is_src_nvidia ; then prefix="nvidia-current" ; fi - local suffix - for suffix in uvm peermem modeset drm; do - echo "alias nvidia-${suffix} ${prefix}-${suffix}" >> "${conffile}" - done - echo "alias nvidia ${prefix}" >> "${conffile}" -} - function load_kernel_module() { # for some use cases, the kernel module needs to be removed before first use of nvidia-smi for module in nvidia_uvm nvidia_drm nvidia_modeset nvidia ; do rmmod ${module} > /dev/null 2>&1 || echo "unable to rmmod ${module}" done - install_drivers_aliases depmod -a modprobe nvidia + for suffix in uvm modeset drm; do + modprobe "nvidia-${suffix}" + done + # TODO: if peermem is available, also modprobe nvidia-peermem } # Install NVIDIA GPU driver provided by NVIDIA function install_nvidia_gpu_driver() { - if is_debian12 && is_src_os ; then + if ( ge_debian12 && is_src_os ) ; then add_nonfree_components add_repo_nvidia_container_toolkit apt-get update -qq @@ -800,33 +846,25 @@ function install_nvidia_gpu_driver() { libglvnd0 \ libcuda1 #clear_dkms_key - load_kernel_module - elif is_ubuntu18 || is_debian10 || (is_debian12 && is_cuda11) ; then + elif ( le_ubuntu18 || le_debian10 || (ge_debian12 && le_cuda11) ) ; then install_nvidia_userspace_runfile build_driver_from_github - load_kernel_module - install_cuda_runfile elif is_debuntu ; then install_cuda_keyring_pkg build_driver_from_packages - load_kernel_module - install_cuda_toolkit elif is_rocky ; then add_repo_cuda build_driver_from_packages - load_kernel_module - install_cuda_toolkit - else echo "Unsupported OS: '${OS_NAME}'" exit 1 @@ -852,8 +890,7 @@ function install_gpu_agent() { "${GPU_AGENT_REPO_URL}/report_gpu_metrics.py" \ | sed -e 's/-u --format=/--format=/' \ | dd status=none of="${install_dir}/report_gpu_metrics.py" - time execute_with_retries pip install -r "${install_dir}/requirements.txt" \ - > "${install_log}" 2>&1 || { cat "${install_log}" && exit -4 ; } + execute_with_retries pip install -r "${install_dir}/requirements.txt" sync # Generate GPU service. @@ -957,7 +994,8 @@ function configure_gpu_script() { # need to update the getGpusResources.sh script to look for MIG devices since if multiple GPUs nvidia-smi still # lists those because we only disable the specific GIs via CGROUPs. Here we just create it based off of: # https://raw.githubusercontent.com/apache/spark/master/examples/src/main/scripts/getGpusResources.sh - cat > ${spark_gpu_script_dir}/getGpusResources.sh <<'EOF' + local -r gpus_resources_script="${spark_gpu_script_dir}/getGpusResources.sh" + cat > "${gpus_resources_script}" <<'EOF' #!/usr/bin/env bash # @@ -977,31 +1015,17 @@ function configure_gpu_script() { # limitations under the License. # -CACHE_FILE="/var/run/nvidia-gpu-index.txt" -if [[ -f "${CACHE_FILE}" ]]; then - cat "${CACHE_FILE}" - exit 0 -fi -NV_SMI_L_CACHE_FILE="/var/run/nvidia-smi_-L.txt" -if [[ -f "${NV_SMI_L_CACHE_FILE}" ]]; then - NVIDIA_SMI_L="$(cat "${NV_SMI_L_CACHE_FILE}")" -else - NVIDIA_SMI_L="$(nvidia-smi -L | tee "${NV_SMI_L_CACHE_FILE}")" -fi - -NUM_MIG_DEVICES=$(echo "${NVIDIA_SMI_L}" | grep -e MIG -e H100 -e A100 | wc -l || echo '0') - -if [[ "${NUM_MIG_DEVICES}" -gt "0" ]] ; then - MIG_INDEX=$(( $NUM_MIG_DEVICES - 1 )) - ADDRS="$(perl -e 'print(join(q{,},map{qq{"$_"}}(0..$ARGV[0])),$/)' "${MIG_INDEX}")" -else - ADDRS=$(nvidia-smi --query-gpu=index --format=csv,noheader | perl -e 'print(join(q{,},map{chomp; qq{"$_"}}))') -fi +ADDRS=$(nvidia-smi --query-gpu=index --format=csv,noheader | perl -e 'print(join(q{,},map{chomp; qq{"$_"}}))') -echo {\"name\": \"gpu\", \"addresses\":[$ADDRS]} | tee "${CACHE_FILE}" +echo {\"name\": \"gpu\", \"addresses\":[${ADDRS}]} EOF - chmod a+rwx -R ${spark_gpu_script_dir} + chmod a+rx "${gpus_resources_script}" + + local spark_defaults_conf="/etc/spark/conf.dist/spark-defaults.conf" + if ! grep spark.executor.resource.gpu.discoveryScript "${spark_defaults_conf}" ; then + echo "spark.executor.resource.gpu.discoveryScript=${gpus_resources_script}" >> "${spark_defaults_conf}" + fi } function configure_gpu_isolation() { @@ -1050,41 +1074,39 @@ function nvsmi() { "${nvsmi}" $* } -function main() { - if ! is_debian && ! is_ubuntu && ! is_rocky ; then - echo "Unsupported OS: '$(os_name)'" - exit 1 - fi - - remove_old_backports - +function install_dependencies() { if is_debuntu ; then - export DEBIAN_FRONTEND=noninteractive - time execute_with_retries apt-get install -y -qq pciutils "linux-headers-${uname_r}" > /dev/null 2>&1 + execute_with_retries apt-get install -y -qq pciutils "linux-headers-${uname_r}" screen elif is_rocky ; then - time execute_with_retries dnf -y -q update --exclude=systemd*,kernel* \ - > "${install_log}" 2>&1 || { cat "${install_log}" && exit -4 ; } - time execute_with_retries dnf -y -q install pciutils gcc \ - > "${install_log}" 2>&1 || { cat "${install_log}" && exit -4 ; } + execute_with_retries dnf -y -q install pciutils gcc screen local dnf_cmd="dnf -y -q install kernel-devel-${uname_r}" - local kernel_devel_pkg_out="$(eval "${dnf_cmd} 2>&1")" - if [[ "${kernel_devel_pkg_out}" =~ 'Unable to find a match: kernel-devel-' ]] ; then + local install_log="${tmpdir}/install.log" + set +e + eval "${dnf_cmd}" > "${install_log}" 2>&1 + local retval="$?" + set -e + + if [[ "${retval}" == "0" ]] ; then return ; fi + + if grep -q 'Unable to find a match: kernel-devel-' "${install_log}" ; then # this kernel-devel may have been migrated to the vault - local vault="https://download.rockylinux.org/vault/rocky/$(os_version)" - time execute_with_retries dnf -y -q --setopt=localpkg_gpgcheck=1 install \ + local os_ver="$(echo $uname_r | perl -pe 's/.*el(\d+_\d+)\..*/$1/; s/_/./')" + local vault="https://download.rockylinux.org/vault/rocky/${os_ver}" + dnf_cmd="$(echo dnf -y -q --setopt=localpkg_gpgcheck=1 install \ "${vault}/BaseOS/x86_64/os/Packages/k/kernel-${uname_r}.rpm" \ "${vault}/BaseOS/x86_64/os/Packages/k/kernel-core-${uname_r}.rpm" \ "${vault}/BaseOS/x86_64/os/Packages/k/kernel-modules-${uname_r}.rpm" \ "${vault}/BaseOS/x86_64/os/Packages/k/kernel-modules-core-${uname_r}.rpm" \ - "${vault}/AppStream/x86_64/os/Packages/k/kernel-devel-${uname_r}.rpm" \ - > "${install_log}" 2>&1 || { cat "${install_log}" ; exit -4 ; } - sync - else - execute_with_retries "${dnf_cmd}" + "${vault}/AppStream/x86_64/os/Packages/k/kernel-devel-${uname_r}.rpm" + )" fi + + execute_with_retries "${dnf_cmd}" fi +} +function main() { # This configuration should be run on all nodes # regardless if they have attached GPUs configure_yarn @@ -1111,6 +1133,8 @@ function main() { if [[ $IS_MIG_ENABLED -eq 0 ]]; then install_nvidia_gpu_driver + load_kernel_module + if [[ -n ${CUDNN_VERSION} ]]; then install_nvidia_nccl install_nvidia_cudnn @@ -1128,7 +1152,7 @@ function main() { rmmod ${module} > /dev/null 2>&1 || echo "unable to rmmod ${module}" done - MIG_GPU_LIST="$(nvsmi -L | grep -e MIG -e H100 -e A100 || echo -n "")" + MIG_GPU_LIST="$(nvsmi -L | grep -e MIG -e P100 -e H100 -e A100 || echo -n "")" if test -n "$(nvsmi -L)" ; then # cache the result of the gpu query ADDRS=$(nvsmi --query-gpu=index --format=csv,noheader | perl -e 'print(join(q{,},map{chomp; qq{"$_"}}))') @@ -1241,8 +1265,10 @@ function clean_up_sources_lists() { # cran-r # if [[ -f /etc/apt/sources.list.d/cran-r.list ]]; then + keyid="0x95c0faf38db3ccad0c080a7bdc78b2ddeabc47b7" + if is_ubuntu18 ; then keyid="0x51716619E084DAB9"; fi rm -f /usr/share/keyrings/cran-r.gpg - curl 'https://keyserver.ubuntu.com/pks/lookup?op=get&search=0x95c0faf38db3ccad0c080a7bdc78b2ddeabc47b7' | \ + curl "https://keyserver.ubuntu.com/pks/lookup?op=get&search=${keyid}" | \ gpg --dearmor -o /usr/share/keyrings/cran-r.gpg sed -i -e 's:deb http:deb [signed-by=/usr/share/keyrings/cran-r.gpg] http:g' /etc/apt/sources.list.d/cran-r.list fi @@ -1262,132 +1288,180 @@ function clean_up_sources_lists() { } function exit_handler() { - echo "Exit handler invoked" set +ex + echo "Exit handler invoked" + # Purge private key material until next grant clear_dkms_key - # Free conda cache - /opt/conda/miniconda3/bin/conda clean -a - # Clear pip cache pip cache purge || echo "unable to purge pip cache" - # remove the tmpfs conda pkgs_dirs - if [[ -d /mnt/shm ]] ; then /opt/conda/miniconda3/bin/conda config --remove pkgs_dirs /mnt/shm ; fi + # If system memory was sufficient to mount memory-backed filesystems + if [[ "${tmpdir}" == "/mnt/shm" ]] ; then + # remove the tmpfs pip cache-dir + pip config unset global.cache-dir || echo "unable to unset global pip cache" - # remove the tmpfs pip cache-dir - pip config unset global.cache-dir || echo "unable to set global pip cache" - - # Clean up shared memory mounts - for shmdir in /mnt/shm /var/cache/apt/archives /var/cache/dnf ; do - if grep -q "^tmpfs ${shmdir}" /proc/mounts ; then - rm -rf ${shmdir}/* - sync + # Clean up shared memory mounts + for shmdir in /var/cache/apt/archives /var/cache/dnf /mnt/shm /tmp ; do + if grep -q "^tmpfs ${shmdir}" /proc/mounts && ! grep -q "^tmpfs ${shmdir}" /etc/fstab ; then + umount -f ${shmdir} + fi + done - execute_with_retries umount -f ${shmdir} - fi - done + # restart services stopped during preparation stage + # systemctl list-units | perl -n -e 'qx(systemctl start $1) if /^.*? ((hadoop|knox|hive|mapred|yarn|hdfs)\S*).service/' + fi - # Clean up OS package cache ; re-hold systemd package if is_debuntu ; then + # Clean up OS package cache apt-get -y -qq clean apt-get -y -qq autoremove - if is_debian12 ; then + # re-hold systemd package + if ge_debian12 ; then apt-mark hold systemd libsystemd0 ; fi else dnf clean all fi - # print disk usage statistics - if is_debuntu ; then - # Rocky doesn't have sort -h and fails when the argument is passed - du --max-depth 3 -hx / | sort -h | tail -10 + # print disk usage statistics for large components + if is_ubuntu ; then + du -hs \ + /usr/lib/{pig,hive,hadoop,jvm,spark,google-cloud-sdk,x86_64-linux-gnu} \ + /usr/lib \ + /opt/nvidia/* \ + /usr/local/cuda-1?.? \ + /opt/conda/miniconda3 | sort -h + elif is_debian ; then + du -hs \ + /usr/lib/{pig,hive,hadoop,jvm,spark,google-cloud-sdk,x86_64-linux-gnu} \ + /usr/lib \ + /usr/local/cuda-1?.? \ + /opt/conda/miniconda3 | sort -h + else + du -hs \ + /var/lib/docker \ + /usr/lib/{pig,hive,hadoop,firmware,jvm,spark,atlas} \ + /usr/lib64/google-cloud-sdk \ + /usr/lib \ + /opt/nvidia/* \ + /usr/local/cuda-1?.? \ + /opt/conda/miniconda3 fi # Process disk usage logs from installation period - rm -f /tmp/keep-running-df - sleep 6s + rm -f /run/keep-running-df + sync + sleep 5.01s # compute maximum size of disk during installation # Log file contains logs like the following (minus the preceeding #): -#Filesystem Size Used Avail Use% Mounted on -#/dev/vda2 6.8G 2.5G 4.0G 39% / - df --si - perl -e '$max=( sort +#Filesystem 1K-blocks Used Available Use% Mounted on +#/dev/vda2 7096908 2611344 4182932 39% / + df / | tee -a "/run/disk-usage.log" + + perl -e '@siz=( sort { $a => $b } map { (split)[2] =~ /^(\d+)/ } - grep { m:^/: } )[-1]; -print( "maximum-disk-used: $max", $/ );' < /tmp/disk-usage.log + grep { m:^/: } ); +$max=$siz[0]; $min=$siz[-1]; $inc=$max-$min; +print( " samples-taken: ", scalar @siz, $/, + "maximum-disk-used: $max", $/, + "minimum-disk-used: $min", $/, + " increased-by: $inc", $/ )' < "/run/disk-usage.log" echo "exit_handler has completed" # zero free disk space if [[ -n "$(get_metadata_attribute creating-image)" ]]; then - dd if=/dev/zero of=/zero ; sync ; rm -f /zero + dd if=/dev/zero of=/zero + sync + sleep 3s + rm -f /zero fi return 0 } -trap exit_handler EXIT +function set_proxy(){ + export METADATA_HTTP_PROXY="$(get_metadata_attribute http-proxy)" + export http_proxy="${METADATA_HTTP_PROXY}" + export https_proxy="${METADATA_HTTP_PROXY}" + export HTTP_PROXY="${METADATA_HTTP_PROXY}" + export HTTPS_PROXY="${METADATA_HTTP_PROXY}" + export no_proxy=metadata.google.internal,169.254.169.254 + export NO_PROXY=metadata.google.internal,169.254.169.254 +} -function prepare_to_install(){ - nvsmi_works="0" - readonly bdcfg="/usr/local/bin/bdconfig" - download_dir=/tmp/ +function mount_ramdisk(){ + local free_mem free_mem="$(awk '/^MemFree/ {print $2}' /proc/meminfo)" + if [[ ${free_mem} -lt 10500000 ]]; then return 0 ; fi + # Write to a ramdisk instead of churning the persistent disk - if [[ ${free_mem} -ge 5250000 ]]; then - download_dir="/mnt/shm" - mkdir -p "${download_dir}" - mount -t tmpfs tmpfs "${download_dir}" - # Download conda packages to tmpfs - /opt/conda/miniconda3/bin/conda config --add pkgs_dirs "${download_dir}" + tmpdir="/mnt/shm" + mkdir -p "${tmpdir}" + mount -t tmpfs tmpfs "${tmpdir}" - # Download pip packages to tmpfs - pip config set global.cache-dir "${download_dir}" || echo "unable to set global.cache-dir" + # Clear pip cache + # TODO: make this conditional on which OSs have pip without cache purge + pip cache purge || echo "unable to purge pip cache" - # Download OS packages to tmpfs - if is_debuntu ; then - mount -t tmpfs tmpfs /var/cache/apt/archives - else - mount -t tmpfs tmpfs /var/cache/dnf - fi + # Download pip packages to tmpfs + pip config set global.cache-dir "${tmpdir}" || echo "unable to set global.cache-dir" + + # Download OS packages to tmpfs + if is_debuntu ; then + mount -t tmpfs tmpfs /var/cache/apt/archives + else + mount -t tmpfs tmpfs /var/cache/dnf + fi +} + +function prepare_to_install(){ + nvsmi_works="0" + readonly bdcfg="/usr/local/bin/bdconfig" + tmpdir=/tmp/ + if ! is_debuntu && ! is_rocky ; then + echo "Unsupported OS: '$(os_name)'" + exit 1 fi - install_log="${download_dir}/install.log" + + repair_old_backports + + export DEBIAN_FRONTEND=noninteractive + + trap exit_handler EXIT + mount_ramdisk + install_log="${tmpdir}/install.log" + + set_proxy if is_debuntu ; then clean_up_sources_lists apt-get update -qq apt-get -y clean + sleep 5s apt-get -y -qq autoremove - if is_debian12 ; then + if ge_debian12 ; then apt-mark unhold systemd libsystemd0 ; fi else dnf clean all fi - # Clean conda cache - /opt/conda/miniconda3/bin/conda clean -a - # zero free disk space - if [[ -n "$(get_metadata_attribute creating-image)" ]]; then - set +e - time dd if=/dev/zero of=/zero ; sync ; rm -f /zero - set -e - fi + if [[ -n "$(get_metadata_attribute creating-image)" ]]; then ( set +e + time dd if=/dev/zero of=/zero status=none ; sync ; sleep 3s ; rm -f /zero + ) fi configure_dkms_certs + install_dependencies + # Monitor disk usage in a screen session - if is_debuntu ; then - apt-get install -y -qq screen > /dev/null 2>&1 - elif is_rocky ; then - dnf -y -q install screen > /dev/null 2>&1 - fi - touch /tmp/keep-running-df + df / > "/run/disk-usage.log" + touch "/run/keep-running-df" screen -d -m -US keep-running-df \ - bash -c 'while [[ -f /tmp/keep-running-df ]] ; do df --si / | tee -a /tmp/disk-usage.log ; sleep 5s ; done' + bash -c "while [[ -f /run/keep-running-df ]] ; do df / | tee -a /run/disk-usage.log ; sleep 5s ; done" } prepare_to_install diff --git a/examples/secure-boot/pre-init.sh b/examples/secure-boot/pre-init.sh index 7797b4f..57e8e62 100644 --- a/examples/secure-boot/pre-init.sh +++ b/examples/secure-boot/pre-init.sh @@ -25,6 +25,9 @@ export PROJECT_ID="$(jq -r .PROJECT_ID env.json)" export PURPOSE="$(jq -r .PURPOSE env.json)" export BUCKET="$(jq -r .BUCKET env.json)" export ZONE="$(jq -r .ZONE env.json)" +export SUBNET="$(jq -r .SUBNET env.json)" + +export region="$(echo "${ZONE}" | perl -pe 's/-[a-z]+$//')" custom_image_zone="${ZONE}" disk_size_gb="30" # greater than or equal to 30 @@ -37,13 +40,12 @@ gcloud config set project ${PROJECT_ID} #gcloud auth login eval "$(bash examples/secure-boot/create-key-pair.sh)" -metadata="public_secret_name=${public_secret_name}" -metadata="${metadata},private_secret_name=${private_secret_name}" -metadata="${metadata},secret_project=${secret_project}" -metadata="${metadata},secret_version=${secret_version}" -metadata="${metadata},dask-runtime=standalone" +metadata="dask-runtime=standalone" metadata="${metadata},rapids-runtime=DASK" metadata="${metadata},cuda-version=12.4" +metadata="${metadata},creating-image=c9h" +metadata="${metadata},rapids-mirror-disk=rapids-mirror-${region}" +metadata="${metadata},rapids-mirror-host=10.42.79.42" # If no OS family specified, default to debian if [[ "${IMAGE_VERSION}" != *-* ]] ; then @@ -67,6 +69,12 @@ function generate() { return fi + local install_image="$(jq -r ".[] | select(.name == \"${image_name}-install\").name" "${tmpdir}/images.json")" + if [[ -n "${install_image}" ]] ; then + echo "Install image already exists. Cleaning up after aborted run." + gcloud -q compute images delete "${image_name}-install" + fi + local instance="$(jq -r ".[] | select(.name == \"${image_name}-install\").name" "${tmpdir}/instances.json")" if [[ -n "${instance}" ]]; then @@ -77,7 +85,7 @@ function generate() { fi set -xe python generate_custom_image.py \ - --machine-type "n1-standard-8" \ + --machine-type "n1-standard-16" \ --accelerator "type=nvidia-tesla-t4" \ --image-name "${image_name}" \ --customization-script "${customization_script}" \ @@ -86,6 +94,7 @@ function generate() { --zone "${custom_image_zone}" \ --disk-size "${disk_size_gb}" \ --gcs-bucket "${BUCKET}" \ + --subnet "${SUBNET}" \ --shutdown-instance-timer-sec=30 \ --no-smoke-test \ ${extra_args} @@ -100,15 +109,15 @@ function generate_from_base_purpose() { # base image -> cuda case "${dataproc_version}" in - "2.0-debian10" ) disk_size_gb="38" ;; # 40G 31G 7.8G 80% / # cuda-pre-init-2-0-debian10 - "2.0-rocky8" ) disk_size_gb="35" ;; # 38G 32G 6.2G 84% / # cuda-pre-init-2-0-rocky8 - "2.0-ubuntu18" ) disk_size_gb="37" ;; # 39G 30G 8.5G 79% / # cuda-pre-init-2-0-ubuntu18 - "2.1-debian11" ) disk_size_gb="37" ;; # 39G 34G 4.1G 90% / # cuda-pre-init-2-1-debian11 - "2.1-rocky8" ) disk_size_gb="38" ;; # 41G 35G 6.1G 86% / # cuda-pre-init-2-1-rocky8 - "2.1-ubuntu20" ) disk_size_gb="35" ;; # 37G 32G 4.4G 88% / # cuda-pre-init-2-1-ubuntu20 - "2.2-debian12" ) disk_size_gb="38" ;; # 40G 35G 3.3G 92% / # cuda-pre-init-2-2-debian12 - "2.2-rocky9" ) disk_size_gb="40" ;; # 42G 36G 5.9G 86% / # cuda-pre-init-2-2-rocky9 - "2.2-ubuntu22" ) disk_size_gb="38" ;; # 40G 35G 4.8G 88% / # cuda-pre-init-2-2-ubuntu22 + "2.0-debian10" ) disk_size_gb="30" ;; # 29.30G 28.29G 0 100% / # cuda-pre-init-2-0-debian10 + "2.0-rocky8" ) disk_size_gb="30" ;; # 29.79G 28.94G 0.85G 98% / # cuda-pre-init-2-0-rocky8 + "2.0-ubuntu18" ) disk_size_gb="30" ;; # 28.89G 27.64G 1.24G 96% / # cuda-pre-init-2-0-ubuntu18 + "2.1-debian11" ) disk_size_gb="32" ;; # 31.26G 30.74G 0 100% / # cuda-pre-init-2-1-debian11 + "2.1-rocky8" ) disk_size_gb="34" ;; # 33.79G 32.00G 1.80G 95% / # cuda-pre-init-2-1-rocky8 + "2.1-ubuntu20" ) disk_size_gb="32" ;; # 30.83G 30.35G 0.46G 99% / # cuda-pre-init-2-1-ubuntu20 + "2.2-debian12" ) disk_size_gb="34" ;; # 33.23G 32.71G 0 100% / # cuda-pre-init-2-2-debian12 + "2.2-rocky9" ) disk_size_gb="35" ;; # 34.79G 33.16G 1.64G 96% / # cuda-pre-init-2-2-rocky9 + "2.2-ubuntu22" ) disk_size_gb="35" ;; # 33.74G 32.94G 0.78G 98% / # cuda-pre-init-2-2-ubuntu22 esac # Install GPU drivers + cuda on dataproc base image @@ -118,15 +127,15 @@ time generate_from_dataproc_version "${dataproc_version}" # cuda image -> rapids case "${dataproc_version}" in - "2.0-debian10" ) disk_size_gb="44" ;; # 47G 41G 4.0G 91% / # rapids-pre-init-2-0-debian10 - "2.0-rocky8" ) disk_size_gb="45" ;; # 49G 42G 7.0G 86% / # rapids-pre-init-2-0-rocky8 - "2.0-ubuntu18" ) disk_size_gb="43" ;; # 45G 40G 4.9G 90% / # rapids-pre-init-2-0-ubuntu18 - "2.1-debian11" ) disk_size_gb="46" ;; # 49G 43G 3.6G 93% / # rapids-pre-init-2-1-debian11 - "2.1-rocky8" ) disk_size_gb="48" ;; # 52G 45G 7.2G 87% / # rapids-pre-init-2-1-rocky8 - "2.1-ubuntu20" ) disk_size_gb="45" ;; # 47G 42G 5.2G 89% / # rapids-pre-init-2-1-ubuntu20 - "2.2-debian12" ) disk_size_gb="48" ;; # 51G 45G 3.8G 93% / # rapids-pre-init-2-2-debian12 - "2.2-rocky9" ) disk_size_gb="49" ;; # 53G 46G 7.2G 87% / # rapids-pre-init-2-2-rocky9 - "2.2-ubuntu22" ) disk_size_gb="48" ;; # 50G 45G 5.6G 89% / # rapids-pre-init-2-2-ubuntu22 + "2.0-debian10" ) disk_size_gb="41" ;; # 40.12G 37.51G 0.86G 98% / # rapids-pre-init-2-0-debian10 + "2.0-rocky8" ) disk_size_gb="41" ;; # 38.79G 38.04G 0.76G 99% / # rapids-pre-init-2-0-rocky8 + "2.0-ubuntu18" ) disk_size_gb="40" ;; # 37.62G 36.69G 0.91G 98% / # rapids-pre-init-2-0-ubuntu18 + "2.1-debian11" ) disk_size_gb="44" ;; # 42.09G 39.77G 0.49G 99% / # rapids-pre-init-2-1-debian11 + "2.1-rocky8" ) disk_size_gb="44" ;; # 43.79G 41.11G 2.68G 94% / # rapids-pre-init-2-1-rocky8 + "2.1-ubuntu20" ) disk_size_gb="45" ;; # 39.55G 39.39G 0.15G 100% / # rapids-pre-init-2-1-ubuntu20 + "2.2-debian12" ) disk_size_gb="46" ;; # 44.06G 41.73G 0.41G 100% / # rapids-pre-init-2-2-debian12 + "2.2-rocky9" ) disk_size_gb="45" ;; # 44.79G 42.29G 2.51G 95% / # rapids-pre-init-2-2-rocky9 + "2.2-ubuntu22" ) disk_size_gb="46" ;; # 42.46G 41.97G 0.48G 99% / # rapids-pre-init-2-2-ubuntu22 esac #disk_size_gb="50" @@ -134,9 +143,9 @@ esac # Install dask with rapids on base image PURPOSE="rapids-pre-init" customization_script="examples/secure-boot/rapids.sh" -time generate_from_base_purpose "cuda-pre-init" +#time generate_from_base_purpose "cuda-pre-init" -# Install dask without rapids on base image -PURPOSE="dask-pre-init" -customization_script="examples/secure-boot/dask.sh" -time generate_from_base_purpose "cuda-pre-init" +## Install dask without rapids on base image +#PURPOSE="dask-pre-init" +#customization_script="examples/secure-boot/dask.sh" +#time generate_from_base_purpose "cuda-pre-init" diff --git a/examples/secure-boot/rapids.sh b/examples/secure-boot/rapids.sh index 6c5c9d4..308003f 100644 --- a/examples/secure-boot/rapids.sh +++ b/examples/secure-boot/rapids.sh @@ -19,11 +19,12 @@ set -euxo pipefail -function os_id() { grep '^ID=' /etc/os-release | cut -d= -f2 | xargs ; } -function is_ubuntu() { [[ "$(os_id)" == 'ubuntu' ]] ; } -function is_ubuntu18() { is_ubuntu && [[ "$(os_version)" == '18.04'* ]] ; } -function is_debian() { [[ "$(os_id)" == 'debian' ]] ; } -function is_debuntu() { is_debian || is_ubuntu ; } +function os_id() ( set +x ; grep '^ID=' /etc/os-release | cut -d= -f2 | xargs ; ) +function os_version() ( set +x ; grep '^VERSION_ID=' /etc/os-release | cut -d= -f2 | xargs ; ) +function is_ubuntu() ( set +x ; [[ "$(os_id)" == 'ubuntu' ]] ; ) +function is_ubuntu18() ( set +x ; is_ubuntu && [[ "$(os_version)" == '18.04'* ]] ; ) +function is_debian() ( set +x ; [[ "$(os_id)" == 'debian' ]] ; ) +function is_debuntu() ( set +x ; is_debian || is_ubuntu ; ) function print_metadata_value() { local readonly tmpfile=$(mktemp) @@ -72,17 +73,6 @@ function get_metadata_attribute() ( function is_cuda12() { [[ "${CUDA_VERSION%%.*}" == "12" ]] ; } function is_cuda11() { [[ "${CUDA_VERSION%%.*}" == "11" ]] ; } -function execute_with_retries() { - local -r cmd="$*" - for i in {0..9} ; do - if eval "$cmd"; then - return 0 ; fi - sleep 5 - done - echo "Cmd '${cmd}' failed." - return 1 -} - function configure_dask_yarn() { readonly DASK_YARN_CONFIG_DIR=/etc/dask/ readonly DASK_YARN_CONFIG_FILE=${DASK_YARN_CONFIG_DIR}/config.yaml @@ -98,7 +88,7 @@ function configure_dask_yarn() { # https://yarn.dask.org/en/latest/configuration.html#default-configuration yarn: - environment: python://${DASK_CONDA_ENV}/bin/python + environment: python://${RAPIDS_CONDA_ENV}/bin/python worker: count: 2 @@ -120,7 +110,7 @@ function install_systemd_dask_worker() { LOGFILE="/var/log/${DASK_WORKER_SERVICE}.log" nvidia-smi -c DEFAULT echo "dask-cuda-worker starting, logging to \${LOGFILE}" -${DASK_CONDA_ENV}/bin/dask-cuda-worker "${MASTER}:8786" --local-directory="${dask_worker_local_dir}" --memory-limit=auto >> "\${LOGFILE}" 2>&1 +${RAPIDS_CONDA_ENV}/bin/dask-cuda-worker "${MASTER}:8786" --local-directory="${dask_worker_local_dir}" --memory-limit=auto >> "\${LOGFILE}" 2>&1 EOF chmod 750 "${DASK_WORKER_LAUNCHER}" @@ -172,7 +162,7 @@ function install_systemd_dask_scheduler() { #!/bin/bash LOGFILE="/var/log/${DASK_SCHEDULER_SERVICE}.log" echo "dask scheduler starting, logging to \${LOGFILE}" -${DASK_CONDA_ENV}/bin/dask scheduler >> "\${LOGFILE}" 2>&1 +${RAPIDS_CONDA_ENV}/bin/dask scheduler >> "\${LOGFILE}" 2>&1 EOF chmod 750 "${DASK_SCHEDULER_LAUNCHER}" @@ -419,16 +409,24 @@ EOF } function install_dask_rapids() { +#To enable CUDA support, UCX requires the CUDA Runtime library (libcudart). +#The library can be installed with the appropriate command below: + +#* For CUDA 11, run: conda install cudatoolkit cuda-version=11 +#* For CUDA 12, run: conda install cuda-cudart cuda-version=12 + if is_cuda12 ; then local python_spec="python>=3.11" local cuda_spec="cuda-version>=12,<13" - local dask_spec="dask>=2024.7" + local dask_spec="dask" local numba_spec="numba" + local cudart_spec="cuda-cudart" elif is_cuda11 ; then local python_spec="python>=3.9" local cuda_spec="cuda-version>=11,<12.0a0" local dask_spec="dask" local numba_spec="numba" + local cudart_spec="cudatoolkit" fi rapids_spec="rapids>=${RAPIDS_VERSION}" @@ -451,36 +449,59 @@ function install_dask_rapids() { CONDA_PACKAGES+=( "${cuda_spec}" + "${cudart_spec}" "${rapids_spec}" "${dask_spec}" - "dask-bigquery" - "dask-ml" - "dask-sql" "cudf" "${numba_spec}" ) # Install cuda, rapids, dask - mamba="/opt/conda/miniconda3/bin/mamba" - conda="/opt/conda/miniconda3/bin/conda" + mamba="${CONDA_ROOT}/bin/mamba" + conda="${CONDA_ROOT}/bin/conda" + + readonly DASK_CONDA_ENV="${CONDA_ROOT}/envs/${RAPIDS_ENV_NAME}" + if test -d "${DASK_CONDA_ENV}" ; then + "${conda}" remove -n "${RAPIDS_ENV_NAME}" --all > /dev/null 2>&1 || rm -rf "${DASK_CONDA_ENV}" + fi + # Unpin conda version and upgrade +# perl -ni -e 'print unless /^conda /' "${CONDA_ROOT}/conda-meta/pinned" +# "${mamba}" install conda mamba libmamba libmambapy conda-libmamba-solver - "${conda}" remove -n dask --all || echo "unable to remove conda environment [dask]" + # This error occurs when we set channel_alias +# util_files_to_patch="$(find "${CONDA_ROOT}" -name utils.py | grep mamba/utils.py)" +# perl -pi -e 's[raise ValueError\("missing key][print("missing key]' ${util_files_to_patch} +# File "/home/zhyue/mambaforge/lib/python3.9/site-packages/mamba/utils.py", line 393, in compute_final_precs +# raise ValueError("missing key {} in channels: {}".format(key, lookup_dict)) + + CONDA_EXE="${CONDA_ROOT}/bin/conda" + CONDA_PYTHON_EXE="${CONDA_ROOT}/bin/python" + PATH="${CONDA_ROOT}/bin/condabin:${CONDA_ROOT}/bin:${PATH}" ( set +e local is_installed="0" for installer in "${mamba}" "${conda}" ; do - test -d "${DASK_CONDA_ENV}" || \ - time "${installer}" "create" -m -n 'dask-rapids' -y --no-channel-priority \ + echo "${installer}" "create" -q -m -n "${RAPIDS_ENV_NAME}" -y --no-channel-priority \ + -c 'conda-forge' -c 'nvidia' -c 'rapidsai' \ + ${CONDA_PACKAGES[*]} \ + "${python_spec}" +# read placeholder + # for debugging, consider -vvv + time "${installer}" "create" -q -m -n "${RAPIDS_ENV_NAME}" -y --no-channel-priority \ -c 'conda-forge' -c 'nvidia' -c 'rapidsai' \ ${CONDA_PACKAGES[*]} \ "${python_spec}" \ - > "${install_log}" 2>&1 && retval=$? || { retval=$? ; cat "${install_log}" ; } + && retval=$? || retval=$? sync if [[ "$retval" == "0" ]] ; then is_installed="1" break + else + test -d "${RAPIDS_CONDA_ENV}" && ( "${conda}" remove -n "${RAPIDS_ENV_NAME}" --all > /dev/null 2>&1 || rm -rf "${RAPIDS_CONDA_ENV}" ) + "${conda}" config --set channel_priority flexible + df -h + clean_conda_cache fi - "${conda}" config --set channel_priority flexible done if [[ "${is_installed}" == "0" ]]; then echo "failed to install dask" @@ -533,26 +554,38 @@ function main() { fi } -function exit_handler() ( +function clean_conda_cache() { + if ! grep -q "${rapids_mirror_mountpoint}" /proc/mounts ; then + "${CONDA}" clean -a + fi +} + +function exit_handler() { set +e + set -x echo "Exit handler invoked" - # Free conda cache - /opt/conda/miniconda3/bin/conda clean -a > /dev/null 2>&1 + unmount_rapids_mirror - # Clear pip cache - pip cache purge || echo "unable to purge pip cache" + mv ~/.condarc.default ~/.condarc + mv /root/.config/pip/pip.conf.default /root/.config/pip/pip.conf - # remove the tmpfs conda pkgs_dirs - if [[ -d /mnt/shm ]] ; then /opt/conda/miniconda3/bin/conda config --remove pkgs_dirs /mnt/shm ; fi + # If system memory was sufficient to mount memory-backed filesystems + if [[ "${tmpdir}" == "/mnt/shm" ]] ; then + echo "cleaning up tmpfs mounts" - # Clean up shared memory mounts - for shmdir in /var/cache/apt/archives /var/cache/dnf /mnt/shm ; do - if grep -q "^tmpfs ${shmdir}" /proc/mounts ; then - rm -rf ${shmdir}/* - umount -f ${shmdir} - fi - done + # Clean up shared memory mounts + for shmdir in /var/cache/apt/archives /var/cache/dnf /mnt/shm /tmp ; do + if grep -q "^tmpfs ${shmdir}" /proc/mounts ; then + sync + umount -f ${shmdir} + fi + done + else + clean_conda_cache + # Clear pip cache from non-tmpfs + pip cache purge || echo "unable to purge pip cache" + fi # Clean up OS package cache ; re-hold systemd package if is_debuntu ; then @@ -562,36 +595,129 @@ function exit_handler() ( dnf clean all fi - # print disk usage statistics - if is_debuntu ; then - # Rocky doesn't have sort -h and fails when the argument is passed - du --max-depth 3 -hx / | sort -h | tail -10 + # print disk usage statistics for large components + if is_ubuntu ; then + du -hs \ + /usr/lib/{pig,hive,hadoop,jvm,spark,google-cloud-sdk,x86_64-linux-gnu} \ + /usr/lib \ + /opt/nvidia/* \ + /usr/local/cuda-1?.? \ + ${CONDA_ROOT} + elif is_debian ; then + du -hs \ + /usr/lib/{pig,hive,hadoop,jvm,spark,google-cloud-sdk,x86_64-linux-gnu} \ + /usr/lib \ + /usr/local/cuda-1?.? \ + ${CONDA_ROOT} + else + du -hs \ + /var/lib/docker \ + /usr/lib/{pig,hive,hadoop,firmware,jvm,spark,atlas} \ + /usr/lib64/google-cloud-sdk \ + /usr/lib \ + /opt/nvidia/* \ + /usr/local/cuda-1?.? \ + ${CONDA_ROOT} fi # Process disk usage logs from installation period - rm -f "${tmpdir}/keep-running-df" - sleep 6s + rm -f /run/keep-running-df + sync + sleep 5.01s # compute maximum size of disk during installation # Log file contains logs like the following (minus the preceeding #): -#Filesystem Size Used Avail Use% Mounted on -#/dev/vda2 6.8G 2.5G 4.0G 39% / - df -h / | tee -a "${tmpdir}/disk-usage.log" - perl -e '$max=( sort +#Filesystem 1K-blocks Used Available Use% Mounted on +#/dev/vda2 7096908 2611344 4182932 39% / + set +x + df / | tee -a "/run/disk-usage.log" + perl -e '@siz=( sort { $a => $b } map { (split)[2] =~ /^(\d+)/ } - grep { m:^/: } )[-1]; -print( "maximum-disk-used: $max", $/ );' < "${tmpdir}/disk-usage.log" - + grep { m:^/: } ); +$max=$siz[0]; $min=$siz[-1]; $inc=$max-$min; +print( " samples-taken: ", scalar @siz, $/, + "maximum-disk-used: $max", $/, + "minimum-disk-used: $min", $/, + " increased-by: $inc", $/ )' < "/run/disk-usage.log" + set -x echo "exit_handler has completed" # zero free disk space if [[ -n "$(get_metadata_attribute creating-image)" ]]; then - dd if=/dev/zero of=/zero ; sync ; rm -f /zero + eval "dd if=/dev/zero of=/zero" + sync + sleep 3s + rm -f /zero fi return 0 -) +} + +function unmount_rapids_mirror() { + if ! grep -q "${rapids_mirror_mountpoint}" /proc/mounts ; then return ; fi + + umount "${rapids_mirror_mountpoint}" + umount "${rapids_mirror_mountpoint}_ro" + gcloud compute instances detach-disk "$(hostname -s)" \ + --device-name "${RAPIDS_MIRROR_DISK_NAME}" \ + --zone "${ZONE}" \ + --disk-scope regional +} + +function mount_rapids_mirror() { + # use a regional mirror instead of fetching from cloudflare CDN + export RAPIDS_MIRROR_DISK_NAME="$(gcloud compute disks list | awk "/${RAPIDS_MIRROR_DISK}-/ {print \$1}" | sort | tail -1)" + export RAPIDS_DISK_FQN="projects/${PROJECT_ID}/regions/${REGION}/disks/${RAPIDS_MIRROR_DISK_NAME}" + + if [[ -z "${RAPIDS_MIRROR_DISK_NAME}" ]]; then return ; fi + + # If the service account can describe the disk, attempt to attach and mount it + eval gcloud compute disks describe "${RAPIDS_MIRROR_DISK_NAME}" --region "${REGION}" > /tmp/mirror-disk.txt + if [[ "$?" != "0" ]] ; then return ; fi + + if ! grep -q "${rapids_mirror_mountpoint}" /proc/mounts ; then + gcloud compute instances attach-disk "$(hostname -s)" \ + --disk "${RAPIDS_DISK_FQN}" \ + --device-name "${RAPIDS_MIRROR_DISK_NAME}" \ + --disk-scope "regional" \ + --zone "${ZONE}" \ + --mode=ro + + mkdir -p "${rapids_mirror_mountpoint}" "${rapids_mirror_mountpoint}_ro" "${tmpdir}/overlay" "${tmpdir}/workdir" + mount -o ro "/dev/disk/by-id/google-${RAPIDS_MIRROR_DISK_NAME}" "${rapids_mirror_mountpoint}_ro" + mount -t overlay overlay -o lowerdir="${rapids_mirror_mountpoint}_ro",upperdir="${tmpdir}/overlay",workdir="${tmpdir}/workdir" "${rapids_mirror_mountpoint}" + fi + ${CONDA} config --add pkgs_dirs "${rapids_mirror_mountpoint}/conda_cache" +# echo "${CONDA}" config --set channel_alias "file://${rapids_mirror_mountpoint}/conda.anaconda.org" +# for channel in 'rapidsai' 'nvidia' 'pkgs/main' 'pkgs/r' 'conda-forge' ; do +# echo "${CONDA}" config --set \ +# "custom_channels.${channel}" "file://${rapids_mirror_mountpoint}/conda.anaconda.org/" +# done + # patch conda to install from mirror +# files_to_patch=$(find ${CONDA_ROOT}/ -name 'download.py' | grep conda/gateways/connection) +# perl -i -pe 's{if "://" not in self.url:}{if "file://" in self.url or "://" not in self.url:}' \ +# ${files_to_patch} +# perl -i -pe 's{self.url = url$}{self.url = url.replace("file://","")}' \ +# ${files_to_patch} + +# time for d in dask main nvidia r rapidsai conda-forge ; do +# find "${rapids_mirror_mountpoint}/conda.anaconda.org/${d}" -name '*.conda' -o -name '*.tar.bz2' -print0 | \ +# xargs -0 ln -sf -t "${pkgs_dir}" +# done + + # Point to the cache built with the mirror +# for channel in 'rapidsai' 'nvidia' 'main' 'r' 'conda-forge' ; do +# for plat in noarch linux-64 ; do +# echo ${CONDA} config --add pkgs_dirs "/srv/mirror/conda.anaconda.org/${channel}/${plat}" +# done +# done + +# for channel in pkgs/main pkgs/r ; do +# echo ${CONDA} config --add default_channels "file://${rapids_mirror_mountpoint}/conda.anaconda.org/${channel}" +# done + +} -function prepare_to_install(){ +function prepare_to_install() { readonly DEFAULT_CUDA_VERSION="12.4" CUDA_VERSION=$(get_metadata_attribute 'cuda-version' ${DEFAULT_CUDA_VERSION}) readonly CUDA_VERSION @@ -599,12 +725,8 @@ function prepare_to_install(){ readonly ROLE=$(get_metadata_attribute dataproc-role) readonly MASTER=$(get_metadata_attribute dataproc-master) - # RAPIDS config - RAPIDS_RUNTIME=$(get_metadata_attribute 'rapids-runtime' 'DASK') - readonly RAPIDS_RUNTIME - - readonly DEFAULT_DASK_RAPIDS_VERSION="24.08" - readonly RAPIDS_VERSION=$(get_metadata_attribute 'rapids-version' ${DEFAULT_DASK_RAPIDS_VERSION}) + export CONDA_ROOT=/opt/conda/miniconda3 + export CONDA="${CONDA_ROOT}/bin/conda" # Dask config DASK_RUNTIME="$(get_metadata_attribute dask-runtime || echo 'standalone')" @@ -612,7 +734,8 @@ function prepare_to_install(){ readonly DASK_SERVICE=dask-cluster readonly DASK_WORKER_SERVICE=dask-worker readonly DASK_SCHEDULER_SERVICE=dask-scheduler - readonly DASK_CONDA_ENV="/opt/conda/miniconda3/envs/dask-rapids" + readonly RAPIDS_ENV_NAME="dask-rapids" + readonly RAPIDS_CONDA_ENV="${CONDA_ROOT}/envs/${RAPIDS_ENV_NAME}" # Knox config readonly KNOX_HOME=/usr/lib/knox @@ -620,42 +743,84 @@ function prepare_to_install(){ readonly KNOX_DASKWS_DIR="${KNOX_HOME}/data/services/daskws/0.1.0" enable_worker_service="0" + # RAPIDS config + RAPIDS_RUNTIME=$(get_metadata_attribute 'rapids-runtime' 'DASK') + readonly RAPIDS_RUNTIME + + readonly DEFAULT_DASK_RAPIDS_VERSION="23.11" + readonly RAPIDS_VERSION=$(get_metadata_attribute 'rapids-version' ${DEFAULT_DASK_RAPIDS_VERSION}) + + readonly PROJECT_ID="$(gcloud config get project)" + zone="$(/usr/share/google/get_metadata_value zone)" + export ZONE="$(echo $zone | sed -e 's:.*/::')" + export REGION="$(echo ${ZONE} | perl -pe 's/^(.+)-[^-]+$/$1/')" + + export RAPIDS_MIRROR_DISK="$(get_metadata_attribute 'rapids-mirror-disk' '')" + export RAPIDS_MIRROR_HOST="$(get_metadata_attribute 'rapids-mirror-host' '')" + + rapids_mirror_mountpoint=/srv/mirror + free_mem="$(awk '/^MemFree/ {print $2}' /proc/meminfo)" + # With a local conda mirror mounted, use reduced ram disk size + if [[ -n "${RAPIDS_MIRROR_DISK}" ]] ; then + min_mem=18500000 + pkgs_dir= + else + min_mem=33300000 + fi # Write to a ramdisk instead of churning the persistent disk - if [[ ${free_mem} -ge 5250000 ]]; then + if [[ ${free_mem} -ge ${min_mem} ]]; then tmpdir=/mnt/shm - mkdir -p /mnt/shm - mount -t tmpfs tmpfs /mnt/shm - - # Download conda packages to tmpfs - /opt/conda/miniconda3/bin/conda config --add pkgs_dirs /mnt/shm - mount -t tmpfs tmpfs /mnt/shm + mkdir -p "${tmpdir}" + mount -t tmpfs tmpfs "${tmpdir}" - # Download pip packages to tmpfs - pip config set global.cache-dir /mnt/shm || echo "unable to set global.cache-dir" - - # Download OS packages to tmpfs - if is_debuntu ; then - mount -t tmpfs tmpfs /var/cache/apt/archives - else - mount -t tmpfs tmpfs /var/cache/dnf - fi + # Minimum of 11G of capacity required for rapids package install via conda + # + 5G without rapids mirror mounted + mount -t tmpfs tmpfs "${tmpdir}" else tmpdir=/tmp fi + install_log="${tmpdir}/install.log" trap exit_handler EXIT + touch ~/.condarc + cp ~/.condarc ~/.condarc.default + + #"${CONDA}" config --set verbosity 3 + # Clean conda cache + clean_conda_cache + + mount_rapids_mirror + + if [[ -n "${RAPIDS_MIRROR_HOST}" ]] && nc -vz "${RAPIDS_MIRROR_HOST}" 80 > /dev/null 2>&1 ; then + for channel in 'conda-forge' 'rapidsai' 'nvidia' 'pkgs/r' 'pkgs/main' ; do + echo "${CONDA}" config --set \ + "custom_channels.${channel}" "http://${RAPIDS_MIRROR_HOST}/conda.anaconda.org/" + done + fi + + if grep -q "${rapids_mirror_mountpoint}" /proc/mounts ; then + # if we are using the mirror disk, install exclusively from its cache + extra_conda_args="--offline" + else + pkgs_dir="${tmpdir}/pkgs_dir" + mkdir -p "${pkgs_dir}" + "${CONDA}" config --add pkgs_dirs "${pkgs_dir}" + fi + # Monitor disk usage in a screen session if is_debuntu ; then + command -v screen || \ apt-get install -y -qq screen else + command -v screen || \ dnf -y -q install screen fi - df -h / | tee "${tmpdir}/disk-usage.log" - touch "${tmpdir}/keep-running-df" + df / > "/run/disk-usage.log" + touch "/run/keep-running-df" screen -d -m -US keep-running-df \ - bash -c "while [[ -f ${tmpdir}/keep-running-df ]] ; do df -h / | tee -a ${tmpdir}/disk-usage.log ; sleep 5s ; done" + bash -c "while [[ -f /run/keep-running-df ]] ; do df / | tee -a /run/disk-usage.log ; sleep 5s ; done" } prepare_to_install