diff --git a/README.md b/README.md
index f0a1111..e55a81b 100644
--- a/README.md
+++ b/README.md
@@ -133,10 +133,10 @@ python generate_custom_image.py \
     default value of 300 seconds will be used.
 *   **--dry-run**: Dry run mode which only validates input and generates
     workflow script without creating image. Disabled by default.
-*   **--trusted-cert**: a certificate in DER format to be inserted
-    into the custom image's EFI boot sector.  Can be generated by
-    reading examples/secure-boot/README.md.  This argument is mutually
-    exclusive with base-image-family
+
+*   **--trusted-cert**: (Optional) Pass an empty string to this
+    argument to disable support for shielded-secure-boot.
+
 *   **--metadata**: VM metadata which can be read by the customization script
     with `/usr/share/google/get_metadata_value attributes/<key>` at runtime. The
     value of this flag takes the form of `key1=value1,key2=value2,...`. If the
diff --git a/custom_image_utils/args_parser.py b/custom_image_utils/args_parser.py
index 95bf9a1..2b0542d 100644
--- a/custom_image_utils/args_parser.py
+++ b/custom_image_utils/args_parser.py
@@ -228,8 +228,7 @@ def parse_args(args):
       type=str,
       required=False,
       default="tls/db.der",
-      help="""(Optional) Inserts the specified DER-format certificate into
-      the custom image's EFI boot sector for use with secure boot.""")
-
+      help="""(Optional) Pass an empty string to this argument to
+      disable support for shielded-secure-boot.""")
 
   return parser.parse_args(args)
diff --git a/custom_image_utils/shell_script_generator.py b/custom_image_utils/shell_script_generator.py
index 89730c3..82d44b7 100644
--- a/custom_image_utils/shell_script_generator.py
+++ b/custom_image_utils/shell_script_generator.py
@@ -35,7 +35,10 @@
   local -r cmd="$*"
 
   for ((i = 0; i < 3; i++)); do
-    if eval "$cmd"; then return 0 ; fi
+    set -x
+    time eval "$cmd" > "/tmp/{run_id}/install.log" 2>&1 && retval=$? || {{ retval=$? ; cat "/tmp/{run_id}/install.log" ; }}
+    set +x
+    if [[ $retval == 0 ]] ; then return 0 ; fi
     sleep 5
   done
   return 1
@@ -44,23 +47,24 @@
 function exit_handler() {{
   echo 'Cleaning up before exiting.'
 
-  if [[ -f /tmp/{run_id}/vm_created ]]; then
+  if [[ -f /tmp/{run_id}/vm_created ]]; then ( set +e
     echo 'Deleting VM instance.'
-    execute_with_retries gcloud compute instances delete {image_name}-install \
-        --project={project_id} --zone={zone} -q
-  elif [[ -f /tmp/{run_id}/disk_created ]]; then
+    execute_with_retries \
+      gcloud compute instances delete {image_name}-install --project={project_id} --zone={zone} -q
+  ) elif [[ -f /tmp/{run_id}/disk_created ]]; then
     echo 'Deleting disk.'
-    execute_with_retries gcloud compute ${{base_obj_type}} delete {image_name}-install --project={project_id} --zone={zone} -q
+    execute_with_retries \
+      gcloud compute ${{base_obj_type}} delete {image_name}-install --project={project_id} --zone={zone} -q
   fi
 
   echo 'Uploading local logs to GCS bucket.'
   gsutil -m rsync -r {log_dir}/ {gcs_log_dir}/
 
   if [[ -f /tmp/{run_id}/image_created ]]; then
-    echo -e "${{GREEN}}Workflow succeeded, check logs at {log_dir}/ or {gcs_log_dir}/${{NC}}"
+    echo -e "${{GREEN}}Workflow succeeded${{NC}}, check logs at {log_dir}/ or {gcs_log_dir}/"
     exit 0
   else
-    echo -e "${{RED}}Workflow failed, check logs at {log_dir}/ or {gcs_log_dir}/${{NC}}"
+    echo -e "${{RED}}Workflow failed${{NC}}, check logs at {log_dir}/ or {gcs_log_dir}/"
     exit 1
   fi
 }}
@@ -111,11 +115,13 @@
 
   local cert_args=""
   local num_src_certs="0"
+  metadata_arg="{metadata_flag}"
   if [[ -n '{trusted_cert}' ]] && [[ -f '{trusted_cert}' ]]; then
     # build tls/ directory from variables defined near the header of
     # the examples/secure-boot/create-key-pair.sh file
 
     eval "$(bash examples/secure-boot/create-key-pair.sh)"
+    metadata_arg="${{metadata_arg}},public_secret_name=${{public_secret_name}},private_secret_name=${{private_secret_name}},secret_project=${{secret_project}},secret_version=${{secret_version}}"
 
     # by default, a gcloud secret with the name of efi-db-pub-key-042 is
     # created in the current project to store the certificate installed
@@ -132,16 +138,20 @@
 
     local -a cert_list=()
 
-    local -a default_cert_list=("{trusted_cert}" "${{MS_UEFI_CA}}")
+    local -a default_cert_list
+    default_cert_list=("{trusted_cert}" "${{MS_UEFI_CA}}")
     local -a src_img_modulus_md5sums=()
 
     mapfile -t src_img_modulus_md5sums < <(print_img_dbs_modulus_md5sums {dataproc_base_image})
     num_src_certs="${{#src_img_modulus_md5sums[@]}}"
-    echo "${{num_src_certs}} db certificates attached to source image"
-    if [[ "${{num_src_certs}}" -eq "0" ]]; then
+    echo "debug - num_src_certs: [${{#src_img_modulus_md5sums[*]}}]"
+    echo "value of src_img_modulus_md5sums: [${{src_img_modulus_md5sums}}]"
+    if [[ -z "${{src_img_modulus_md5sums}}" ]]; then
+      num_src_certs=0
       echo "no db certificates in source image"
-      cert_list=default_cert_list
+      cert_list=( "${{default_cert_list[@]}}" )
     else
+      echo "${{num_src_certs}} db certificates attached to source image"
       echo "db certs exist in source image"
       for cert in ${{default_cert_list[*]}}; do
         if test_element_in_array "$(print_modulus_md5sum ${{cert}})" ${{src_img_modulus_md5sums[@]}} ; then
@@ -175,7 +185,8 @@
     echo 'Creating image.'
     base_obj_type="images"
     instance_disk_args='--image-project={project_id} --image={image_name}-install --boot-disk-size={disk_size}G --boot-disk-type=pd-ssd'
-    time execute_with_retries gcloud compute images create {image_name}-install \
+    execute_with_retries \
+      gcloud compute images create {image_name}-install \
       --project={project_id} \
       --source-image={dataproc_base_image} \
       ${{cert_args}} \
@@ -186,7 +197,7 @@
     echo 'Creating disk.'
     base_obj_type="disks"
     instance_disk_args='--disk=auto-delete=yes,boot=yes,mode=rw,name={image_name}-install'
-    time execute_with_retries gcloud compute disks create {image_name}-install \
+    execute_with_retries gcloud compute disks create {image_name}-install \
       --project={project_id} \
       --zone={zone} \
       --image={dataproc_base_image} \
@@ -197,8 +208,7 @@
 
   date
   echo 'Creating VM instance to run customization script.'
-  ( set -x
-  time execute_with_retries gcloud compute instances create {image_name}-install \
+  execute_with_retries gcloud compute instances create {image_name}-install \
       --project={project_id} \
       --zone={zone} \
       {network_flag} \
@@ -209,15 +219,16 @@
       {accelerator_flag} \
       {service_account_flag} \
       --scopes=cloud-platform \
-      {metadata_flag} \
-      --metadata-from-file startup-script=startup_script/run.sh )
+      "${{metadata_arg}}" \
+      --metadata-from-file startup-script=startup_script/run.sh
 
   touch /tmp/{run_id}/vm_created
 
   # clean up intermediate install image
-  if [[ "${{base_obj_type}}" == "images" ]] ; then
-    execute_with_retries gcloud compute images delete -q {image_name}-install --project={project_id}
-  fi
+  if [[ "${{base_obj_type}}" == "images" ]] ; then ( set +e
+    # This sometimes returns an API error but deletes the image despite the failure
+    gcloud compute images delete -q {image_name}-install --project={project_id}
+  ) fi
 
   echo 'Waiting for customization script to finish and VM shutdown.'
   execute_with_retries gcloud compute instances tail-serial-port-output {image_name}-install \
@@ -226,7 +237,7 @@
       --port=1 2>&1 \
       | grep 'startup-script' \
       | sed -e 's/ {image_name}-install.*startup-script://g' \
-      | dd bs=1 of={log_dir}/startup-script.log \
+      | dd status=none bs=1 of={log_dir}/startup-script.log \
       || true
   echo 'Checking customization script result.'
   date
@@ -243,13 +254,12 @@
 
   date
   echo 'Creating custom image.'
-  ( set -x
-  time execute_with_retries gcloud compute images create {image_name} \
+  execute_with_retries gcloud compute images create {image_name} \
     --project={project_id} \
     --source-disk-zone={zone} \
     --source-disk={image_name}-install \
     {storage_location_flag} \
-    --family={family} )
+    --family={family}
 
   touch /tmp/{run_id}/image_created
 }}
diff --git a/examples/secure-boot/build-current-images.sh b/examples/secure-boot/build-current-images.sh
index 0d7846d..f9147d2 100644
--- a/examples/secure-boot/build-current-images.sh
+++ b/examples/secure-boot/build-current-images.sh
@@ -49,6 +49,15 @@ function configure_service_account() {
   gcloud secrets add-iam-policy-binding "${public_secret_name}" \
     --member="serviceAccount:${GSA}" \
     --role="roles/secretmanager.secretAccessor" > /dev/null 2>&1
+
+  gcloud projects add-iam-policy-binding "${PROJECT_ID}" \
+    --member="serviceAccount:${GSA}" \
+    --role=roles/compute.instanceAdmin.v1 > /dev/null 2>&1
+
+  gcloud iam service-accounts add-iam-policy-binding "${GSA}" \
+    --member="serviceAccount:${GSA}" \
+    --role=roles/iam.serviceAccountUser > /dev/null 2>&1
+
 }
 
 function revoke_bindings() {
@@ -66,6 +75,15 @@ function revoke_bindings() {
   gcloud projects remove-iam-policy-binding "${PROJECT_ID}" \
     --member="serviceAccount:${GSA}" \
     --role="roles/secretmanager.viewer" > /dev/null 2>&1
+
+  gcloud projects remove-iam-policy-binding "${PROJECT_ID}" \
+    --member="serviceAccount:${GSA}" \
+    --role=roles/compute.instanceAdmin.v1 > /dev/null 2>&1
+
+  gcloud iam service-accounts remove-iam-policy-binding "${GSA}" \
+    --member="serviceAccount:${GSA}" \
+    --role=roles/iam.serviceAccountUser > /dev/null 2>&1
+
 }
 
 export PROJECT_ID="$(jq    -r .PROJECT_ID    env.json)"
@@ -85,49 +103,25 @@ configure_service_account
 session_name="build-current-images"
 
 readonly timestamp="$(date +%F-%H-%M)"
-#readonly timestamp="2024-10-24-04-21"
+#readonly timestamp="2024-11-27-06-47"
 export timestamp
 
 export tmpdir=/tmp/${timestamp};
-mkdir ${tmpdir}
+mkdir -p ${tmpdir}
 export ZONE="$(jq -r .ZONE env.json)"
 gcloud compute instances list --zones "${ZONE}" --format json > ${tmpdir}/instances.json
 gcloud compute images    list                   --format json > ${tmpdir}/images.json
 
 # Run generation scripts simultaneously for each dataproc image version
-screen -US "${session_name}" -c examples/secure-boot/pre-init.screenrc
+screen -L -US "${session_name}" -c examples/secure-boot/pre-init.screenrc
 
-# tail -n 3 /tmp/custom-image-*/logs/workflow.log
-# tail -n 3 /tmp/custom-image-*/logs/startup-script.log
-# tail -n 3 /tmp/custom-image-${PURPOSE}-2-*/logs/workflow.log
 function find_disk_usage() {
-  test -f /tmp/genline.pl || cat > /tmp/genline.pl<<'EOF'
-#!/usr/bin/perl -w
-use strict;
-
-my $fn = $ARGV[0];
-my( $config ) = ( $fn =~ /custom-image-(.*-(debian|rocky|ubuntu)\d+)-\d+/ );
-
-my @raw_lines = <STDIN>;
-my( $l ) = grep { m: /dev/.*/\s*$: } @raw_lines;
-my( $stats ) = ( $l =~ m:\s*/dev/\S+\s+(.*?)\s*$: );
-
-my( $dp_version ) = ($config =~ /-pre-init-(.+)/);
-$dp_version =~ s/-/./;
-
-my($max) = map { / maximum-disk-used: (\d+)/ } @raw_lines;
-$max+=3;
-my $i_dp_version = sprintf(q{%-15s}, qq{"$dp_version"});
-
-print( qq{  $i_dp_version) disk_size_gb="$max" ;; # $stats # $config}, $/ );
-EOF
-  for f in $(grep -l 'Customization script suc' /tmp/custom-image-*/logs/workflow.log|sed -e 's/workflow.log/startup-script.log/')
-  do
-    grep -A20 'Filesystem.*Avail' $f | perl /tmp/genline.pl $f
+  grep 'Customization script' /tmp/custom-image-*/logs/workflow.log
+#  grep maximum-disk-used /tmp/custom-image-*/logs/startup-script.log
+  for workflow_log in $(grep -l "Customization script" /tmp/custom-image-*/logs/workflow.log) ; do
+    startup_log=$(echo "${workflow_log}" | sed -e 's/workflow.log/startup-script.log/')
+    grep -A5 'Filesystem.*1K-blocks' "${startup_log}" | perl examples/secure-boot/genline.pl "${workflow_log}"
   done
 }
 
-# sleep 8m ; grep 'Customization script' /tmp/custom-image-*/logs/workflow.log
-# grep maximum-disk-used /tmp/custom-image-*/logs/startup-script.log
-
 revoke_bindings
diff --git a/examples/secure-boot/create-key-pair.sh b/examples/secure-boot/create-key-pair.sh
index 3039042..8f2a42a 100644
--- a/examples/secure-boot/create-key-pair.sh
+++ b/examples/secure-boot/create-key-pair.sh
@@ -74,7 +74,6 @@ function create_key () {
     fi
 
     if [[ -f "${PRIVATE_KEY}" ]]; then
-        echo "key already exists.  Skipping generation." >&2
         modulus_md5sum="$(cat tls/modulus-md5sum.txt)"
         return
     fi
diff --git a/examples/secure-boot/dask.sh b/examples/secure-boot/dask.sh
index e1c1229..b71b4e5 100644
--- a/examples/secure-boot/dask.sh
+++ b/examples/secure-boot/dask.sh
@@ -517,8 +517,8 @@ function main() {
   echo "Dask for ${DASK_RUNTIME} successfully initialized."
 }
 
-function exit_handler() (
-  set +e
+function exit_handler() {
+  set +ex
   echo "Exit handler invoked"
 
   # Free conda cache
@@ -527,16 +527,30 @@ function exit_handler() (
   # Clear pip cache
   pip cache purge || echo "unable to purge pip cache"
 
-  # remove the tmpfs conda pkgs_dirs
-  if [[ -d /mnt/shm ]] ; then /opt/conda/miniconda3/bin/conda config --remove pkgs_dirs /mnt/shm ; fi
-
-  # Clean up shared memory mounts
-  for shmdir in /var/cache/apt/archives /var/cache/dnf /mnt/shm ; do
-    if grep -q "^tmpfs ${shmdir}" /proc/mounts ; then
-      rm -rf ${shmdir}/*
-      umount -f ${shmdir}
-    fi
-  done
+  # If system memory was sufficient to mount memory-backed filesystems
+  if [[ "${tmpdir}" == "/mnt/shm" ]] ; then
+    # Stop hadoop services
+    systemctl list-units | perl -n -e 'qx(systemctl stop $1) if /^.*? ((hadoop|knox|hive|mapred|yarn|hdfs)\S*).service/'
+
+    # remove the tmpfs conda pkgs_dirs
+    /opt/conda/miniconda3/bin/conda config --remove pkgs_dirs /mnt/shm || echo "unable to remove pkgs_dirs conda config"
+
+    # remove the tmpfs pip cache-dir
+    pip config unset global.cache-dir || echo "unable to unset global pip cache"
+
+    # Clean up shared memory mounts
+    for shmdir in /var/cache/apt/archives /var/cache/dnf /mnt/shm ; do
+      if grep -q "^tmpfs ${shmdir}" /proc/mounts ; then
+        rm -rf ${shmdir}/*
+        sync
+        sleep 3s
+        execute_with_retries umount -f ${shmdir}
+      fi
+    done
+
+    umount -f /tmp
+    systemctl list-units | perl -n -e 'qx(systemctl start $1) if /^.*? ((hadoop|knox|hive|mapred|yarn|hdfs)\S*).service/'
+  fi
 
   # Clean up OS package cache ; re-hold systemd package
   if is_debuntu ; then
@@ -546,36 +560,62 @@ function exit_handler() (
     dnf clean all
   fi
 
-  # print disk usage statistics
-  if is_debuntu ; then
-    # Rocky doesn't have sort -h and fails when the argument is passed
-    du --max-depth 3 -hx / | sort -h | tail -10
+  # print disk usage statistics for large components
+  if is_ubuntu ; then
+    du -hs \
+      /usr/lib/{pig,hive,hadoop,jvm,spark,google-cloud-sdk,x86_64-linux-gnu} \
+      /usr/lib \
+      /opt/nvidia/* \
+      /usr/local/cuda-1?.? \
+      /opt/conda/miniconda3
+  elif is_debian ; then
+    du -hs \
+      /usr/lib/{pig,hive,hadoop,jvm,spark,google-cloud-sdk,x86_64-linux-gnu} \
+      /usr/lib \
+      /usr/local/cuda-1?.? \
+      /opt/conda/miniconda3
+  else
+    du -hs \
+      /var/lib/docker \
+      /usr/lib/{pig,hive,hadoop,firmware,jvm,spark,atlas} \
+      /usr/lib64/google-cloud-sdk \
+      /usr/lib \
+      /opt/nvidia/* \
+      /usr/local/cuda-1?.? \
+      /opt/conda/miniconda3
   fi
 
   # Process disk usage logs from installation period
-  rm -f /tmp/keep-running-df
-  sleep 6s
+  rm -f /run/keep-running-df
+  sync
+  sleep 5.01s
   # compute maximum size of disk during installation
   # Log file contains logs like the following (minus the preceeding #):
-#Filesystem      Size  Used Avail Use% Mounted on
-#/dev/vda2       6.8G  2.5G  4.0G  39% /
-  df --si
-  perl -e '$max=( sort
-                 map { (split)[2] =~ /^(\d+)/ }
-                grep { m:^/: } <STDIN> )[-1];
-print( "maximum-disk-used: $max", $/ );' < /tmp/disk-usage.log
+#Filesystem     1K-blocks    Used Available Use% Mounted on
+#/dev/vda2        7096908 2611344   4182932  39% /
+  df / | tee -a "/run/disk-usage.log"
+
+  perl -e '@siz=( sort { $a => $b }
+                   map { (split)[2] =~ /^(\d+)/ }
+                  grep { m:^/: } <STDIN> );
+$max=$siz[0]; $min=$siz[-1]; $inc=$max-$min;
+print( "    samples-taken: ", scalar @siz, $/,
+       "maximum-disk-used: $max", $/,
+       "minimum-disk-used: $min", $/,
+       "     increased-by: $inc", $/ )' < "/run/disk-usage.log"
 
   echo "exit_handler has completed"
 
   # zero free disk space
   if [[ -n "$(get_metadata_attribute creating-image)" ]]; then
-    dd if=/dev/zero of=/zero ; sync ; rm -f /zero
+    dd if=/dev/zero of=/zero
+    sync
+    sleep 3s
+    rm -f /zero
   fi
 
   return 0
-)
-
-trap exit_handler EXIT
+}
 
 function prepare_to_install() {
   readonly DEFAULT_CUDA_VERSION="12.4"
@@ -601,7 +641,8 @@ function prepare_to_install() {
 
   free_mem="$(awk '/^MemFree/ {print $2}' /proc/meminfo)"
   # Write to a ramdisk instead of churning the persistent disk
-  if [[ ${free_mem} -ge 5250000 ]]; then
+  if [[ ${free_mem} -ge 10500000 ]]; then
+    tmpdir=/mnt/shm
     mkdir -p /mnt/shm
     mount -t tmpfs tmpfs /mnt/shm
 
@@ -618,18 +659,22 @@ function prepare_to_install() {
     else
       mount -t tmpfs tmpfs /var/cache/dnf
     fi
+  else
+    tmpdir=/tmp
   fi
+  install_log="/run/install.log"
+  trap exit_handler EXIT
 
   # Monitor disk usage in a screen session
   if is_debuntu ; then
       apt-get install -y -qq screen
-  elif is_rocky ; then
+  else
       dnf -y -q install screen
   fi
-  rm -f /tmp/disk-usage.log
-  touch /tmp/keep-running-df
+  df / | tee "/run/disk-usage.log"
+  touch "/run/keep-running-df"
   screen -d -m -US keep-running-df \
-    bash -c 'while [[ -f /tmp/keep-running-df ]] ; do df --si / | tee -a /tmp/disk-usage.log ; sleep 5s ; done'
+    bash -c "while [[ -f /run/keep-running-df ]] ; do df / | tee -a /run/disk-usage.log ; sleep 5s ; done"
 }
 
 prepare_to_install
diff --git a/examples/secure-boot/env.json.sample b/examples/secure-boot/env.json.sample
index c8a89b5..2461fc0 100644
--- a/examples/secure-boot/env.json.sample
+++ b/examples/secure-boot/env.json.sample
@@ -3,5 +3,6 @@
   "PURPOSE":"cuda-pre-init",
   "BUCKET":"my-bucket-name",
   "IMAGE_VERSION":"2.2-debian12",
-  "ZONE":"us-west4-a"
+  "ZONE":"us-west4-a",
+  "SUBNET":"my-subnet"
 }
diff --git a/examples/secure-boot/genline.pl b/examples/secure-boot/genline.pl
new file mode 100644
index 0000000..81ab752
--- /dev/null
+++ b/examples/secure-boot/genline.pl
@@ -0,0 +1,28 @@
+#!/usr/bin/perl -w
+use strict;
+use POSIX qw(ceil);
+
+# /tmp/custom-image-cuda-pre-init-2-0-debian10-2024-11-14-20-00-20241114-200043/logs/workflow.log
+my $fn = $ARGV[0];
+my( $config, $purpose, $dp_version, $timestamp ) =
+  ( $fn =~
+    m{custom-image-
+       (
+	 (.+)-
+	 (\d+-\d+-(debian|rocky|ubuntu)\d+)
+       )-
+       (\d{4}(?:-\d{2}){4})
+    }x
+  );
+$dp_version =~ s/-/./;
+
+my @raw_lines = <STDIN>;
+my( $l ) = grep { m: /dev/.*/\s*$: } @raw_lines;
+my( $stats ) = ( $l =~ m:\s*/dev/\S+\s+(.*?)\s*$: );
+$stats =~ s:(\d{4,}):sprintf(q{%7s}, sprintf(q{%.2fG},($1/1024)/1024)):eg;
+
+my($max)   = map { / maximum-disk-used: (\d+)/ } @raw_lines;
+my($gbmax) = ceil((($max / 1024) / 1024) * 1.03);
+$gbmax     = 30 if $gbmax < 30;
+my $i_dp_version = sprintf(q{%-15s}, qq{"$dp_version"});
+print( qq{  $i_dp_version) disk_size_gb="$gbmax" ;; # $stats # $purpose}, $/ );
diff --git a/examples/secure-boot/install_gpu_driver.sh b/examples/secure-boot/install_gpu_driver.sh
index c0129dc..25efb2a 100644
--- a/examples/secure-boot/install_gpu_driver.sh
+++ b/examples/secure-boot/install_gpu_driver.sh
@@ -16,20 +16,35 @@
 
 set -euxo pipefail
 
-function os_id()       ( set +x ; grep '^ID=' /etc/os-release | cut -d= -f2 | xargs ; )
+function os_id()       ( set +x ;  grep '^ID=' /etc/os-release | cut -d= -f2 | xargs ; )
 function os_version()  ( set +x ;  grep '^VERSION_ID=' /etc/os-release | cut -d= -f2 | xargs ; )
 function os_codename() ( set +x ;  grep '^VERSION_CODENAME=' /etc/os-release | cut -d= -f2 | xargs ; )
-function is_rocky()    ( set +x ;  [[ "$(os_id)" == 'rocky' ]] ; )
-function is_rocky8()   ( set +x ;  is_rocky && [[ "$(os_version)" == '8'* ]] ; )
-function is_rocky9()   ( set +x ;  is_rocky && [[ "$(os_version)" == '9'* ]] ; )
-function is_ubuntu()   ( set +x ;  [[ "$(os_id)" == 'ubuntu' ]] ; )
-function is_ubuntu18() ( set +x ;  is_ubuntu && [[ "$(os_version)" == '18.04'* ]] ; )
-function is_ubuntu20() ( set +x ;  is_ubuntu && [[ "$(os_version)" == '20.04'* ]] ; )
-function is_ubuntu22() ( set +x ;  is_ubuntu && [[ "$(os_version)" == '22.04'* ]] ; )
-function is_debian()   ( set +x ;  [[ "$(os_id)" == 'debian' ]] ; )
-function is_debian10() ( set +x ;  is_debian && [[ "$(os_version)" == '10'* ]] ; )
-function is_debian11() ( set +x ;  is_debian && [[ "$(os_version)" == '11'* ]] ; )
-function is_debian12() ( set +x ;  is_debian && [[ "$(os_version)" == '12'* ]] ; )
+
+function version_ge() ( set +x ;  [ "$1" = "$(echo -e "$1\n$2" | sort -V | tail -n1)" ] ; )
+function version_gt() ( set +x ;  [ "$1" = "$2" ] && return 1 || version_ge $1 $2 ; )
+function version_le() ( set +x ;  [ "$1" = "$(echo -e "$1\n$2" | sort -V | head -n1)" ] ; )
+function version_lt() ( set +x ;  [ "$1" = "$2" ] && return 1 || version_le $1 $2 ; )
+
+readonly -A supported_os=(
+  ['debian']="10 11 12"
+  ['rocky']="8 9"
+  ['ubuntu']="18.04 20.04 22.04"
+)
+
+# dynamically define OS version test utility functions
+if [[ "$(os_id)" == "rocky" ]];
+then _os_version=$(os_version | sed -e 's/[^0-9].*$//g')
+else _os_version="$(os_version)"; fi
+for os_id_val in 'rocky' 'ubuntu' 'debian' ; do
+  eval "function is_${os_id_val}() ( set +x ;  [[ \"$(os_id)\" == '${os_id_val}' ]] ; )"
+
+  for osver in $(echo "${supported_os["${os_id_val}"]}") ; do
+    eval "function is_${os_id_val}${osver%%.*}() ( set +x ; is_${os_id_val} && [[ \"${_os_version}\" == \"${osver}\" ]] ; )"
+    eval "function ge_${os_id_val}${osver%%.*}() ( set +x ; is_${os_id_val} && version_ge \"${_os_version}\" \"${osver}\" ; )"
+    eval "function le_${os_id_val}${osver%%.*}() ( set +x ; is_${os_id_val} && version_le \"${_os_version}\" \"${osver}\" ; )"
+  done
+done
+
 function is_debuntu()  ( set +x ;  is_debian || is_ubuntu ; )
 
 function os_vercat()   ( set +x
@@ -37,8 +52,8 @@ function os_vercat()   ( set +x
   elif is_rocky  ; then os_version | sed -e 's/[^0-9].*$//g'
                    else os_version ; fi ; )
 
-function remove_old_backports {
-  if is_debian12 ; then return ; fi
+function repair_old_backports {
+  if ge_debian12 || ! is_debuntu ; then return ; fi
   # This script uses 'apt-get update' and is therefore potentially dependent on
   # backports repositories which have been archived.  In order to mitigate this
   # problem, we will use archive.debian.org for the oldoldstable repo
@@ -58,14 +73,6 @@ function remove_old_backports {
   done
 }
 
-# Return true if the first argument is equal to or less than the second argument
-function compare_versions_lte { [ "$1" = "$(echo -e "$1\n$2" | sort -V | head -n1)" ] ; }
-
-# Return true if the first argument is less than the second argument
-function compare_versions_lt() ( set +x
-  [ "$1" = "$2" ] && return 1 || compare_versions_lte $1 $2
-)
-
 function print_metadata_value() {
   local readonly tmpfile=$(mktemp)
   http_code=$(curl -f "${1}" -H "Metadata-Flavor: Google" -w "%{http_code}" \
@@ -120,52 +127,93 @@ readonly ROLE
 
 # CUDA version and Driver version
 # https://docs.nvidia.com/deeplearning/frameworks/support-matrix/index.html
+# https://developer.nvidia.com/cuda-downloads
+# Rocky8: 12.0: 525.147.05
 readonly -A DRIVER_FOR_CUDA=(
-          [11.8]="525.147.05" [12.1]="530.30.02"  [12.4]="550.54.14"
-          [12.5]="555.42.06"  [12.6]="560.28.03"
+          ["11.8"]="560.35.03"
+          ["12.0"]="525.60.13"  ["12.4"]="560.35.03"  ["12.6"]="560.35.03"
+)
+# https://developer.nvidia.com/cudnn-downloads
+if is_debuntu ; then
+readonly -A CUDNN_FOR_CUDA=(
+          ["11.8"]="9.5.1.17"
+          ["12.0"]="9.5.1.17"   ["12.4"]="9.5.1.17"   ["12.6"]="9.5.1.17"
 )
+elif is_rocky ; then
+# rocky:
+#   12.0: 8.8.1.3
+#   12.1: 8.9.3.28
+#   12.2: 8.9.7.29
+#   12.3: 9.0.0.312
+#   12.4: 9.1.1.17
+#   12.5: 9.2.1.18
+#   12.6: 9.5.1.17
 readonly -A CUDNN_FOR_CUDA=(
-          [11.8]="8.6.0.163"  [12.1]="8.9.0"      [12.4]="9.1.0.70"
-          [12.5]="9.2.1.18"
+          ["11.8"]="9.5.1.17"
+          ["12.0"]="8.8.1.3"   ["12.4"]="9.1.1.17"   ["12.6"]="9.5.1.17"
 )
+fi
+# https://developer.nvidia.com/nccl/nccl-download
+# 12.2: 2.19.3, 12.5: 2.21.5
 readonly -A NCCL_FOR_CUDA=(
-          [11.8]="2.15.5"     [12.1]="2.17.1"     [12.4]="2.21.5"
-          [12.5]="2.22.3"
+          ["11.8"]="2.15.5"
+          ["12.0"]="2.16.5"  ["12.4"]="2.23.4"     ["12.6"]="2.23.4"
 )
 readonly -A CUDA_SUBVER=(
-          [11.8]="11.8.0"     [12.1]="12.1.0"     [12.4]="12.4.1"
-          [12.5]="12.5.1"
+          ["11.8"]="11.8.0"
+          ["12.0"]="12.0.0"  ["12.4"]="12.4.1"     ["12.6"]="12.6.2"
 )
 
 RAPIDS_RUNTIME=$(get_metadata_attribute 'rapids-runtime' 'SPARK')
 readonly DEFAULT_CUDA_VERSION='12.4'
 CUDA_VERSION=$(get_metadata_attribute 'cuda-version' "${DEFAULT_CUDA_VERSION}")
+if ( ( ge_debian12 || ge_rocky9 ) && version_le "${CUDA_VERSION%%.*}" "11" ) ; then
+  # CUDA 11 no longer supported on debian12 - 2024-11-22, rocky9 - 2024-11-27
+  CUDA_VERSION="${DEFAULT_CUDA_VERSION}"
+fi
+
+if ( version_ge "${CUDA_VERSION}" "12" && (le_debian11 || le_ubuntu18) ) ; then
+  # Only CUDA 12.0 supported on older debuntu
+  CUDA_VERSION="12.0"
+fi
 readonly CUDA_VERSION
 readonly CUDA_FULL_VERSION="${CUDA_SUBVER["${CUDA_VERSION}"]}"
 
 function is_cuda12() ( set +x ; [[ "${CUDA_VERSION%%.*}" == "12" ]] ; )
+function le_cuda12() ( set +x ; version_le "${CUDA_VERSION%%.*}" "12" ; )
+function ge_cuda12() ( set +x ; version_ge "${CUDA_VERSION%%.*}" "12" ; )
+
 function is_cuda11() ( set +x ; [[ "${CUDA_VERSION%%.*}" == "11" ]] ; )
-readonly DEFAULT_DRIVER=${DRIVER_FOR_CUDA["${CUDA_VERSION}"]}
+function le_cuda11() ( set +x ; version_le "${CUDA_VERSION%%.*}" "11" ; )
+function ge_cuda11() ( set +x ; version_ge "${CUDA_VERSION%%.*}" "11" ; )
+
+DEFAULT_DRIVER="${DRIVER_FOR_CUDA[${CUDA_VERSION}]}"
+if ( ge_ubuntu22 && version_le "${CUDA_VERSION}" "12.0" ) ; then
+                                         DEFAULT_DRIVER="560.28.03"  ; fi
+if ( is_debian11 || is_ubuntu20 ) ; then DEFAULT_DRIVER="560.28.03"  ; fi
+if ( is_rocky    && le_cuda11 )   ; then DEFAULT_DRIVER="525.147.05" ; fi
+if ( is_ubuntu20 && le_cuda11 )   ; then DEFAULT_DRIVER="535.183.06" ; fi
+if ( is_rocky9   && ge_cuda12 )   ; then DEFAULT_DRIVER="565.57.01"  ; fi
 DRIVER_VERSION=$(get_metadata_attribute 'gpu-driver-version' "${DEFAULT_DRIVER}")
-if is_debian11 || is_ubuntu22 || is_ubuntu20 ; then DRIVER_VERSION="560.28.03" ; fi
-if is_ubuntu20 && is_cuda11 ; then DRIVER_VERSION="535.183.06" ; fi
 
 readonly DRIVER_VERSION
 readonly DRIVER=${DRIVER_VERSION%%.*}
 
-# Parameters for NVIDIA-provided CUDNN library
+readonly DEFAULT_CUDNN8_VERSION="8.0.5.39"
+readonly DEFAULT_CUDNN9_VERSION="9.1.0.70"
+
+# Parameters for NVIDIA-provided cuDNN library
 readonly DEFAULT_CUDNN_VERSION=${CUDNN_FOR_CUDA["${CUDA_VERSION}"]}
 CUDNN_VERSION=$(get_metadata_attribute 'cudnn-version' "${DEFAULT_CUDNN_VERSION}")
 function is_cudnn8() ( set +x ; [[ "${CUDNN_VERSION%%.*}" == "8" ]] ; )
 function is_cudnn9() ( set +x ; [[ "${CUDNN_VERSION%%.*}" == "9" ]] ; )
-if is_rocky \
-   && (compare_versions_lte "${CUDNN_VERSION}" "8.0.5.39") ; then
-  CUDNN_VERSION="8.0.5.39"
-elif (is_ubuntu20 || is_ubuntu22 || is_debian12) && is_cudnn8 ; then
+# The minimum cuDNN version supported by rocky is ${DEFAULT_CUDNN8_VERSION}
+if is_rocky  && (version_le "${CUDNN_VERSION}" "${DEFAULT_CUDNN8_VERSION}") ; then
+  CUDNN_VERSION="${DEFAULT_CUDNN8_VERSION}"
+elif (ge_ubuntu20 || ge_debian12) && is_cudnn8 ; then
   # cuDNN v8 is not distribution for ubuntu20+, debian12
-  CUDNN_VERSION="9.1.0.70"
-
-elif (is_ubuntu18 || is_debian10 || is_debian11) && is_cudnn9 ; then
+  CUDNN_VERSION="${DEFAULT_CUDNN9_VERSION}"
+elif (le_ubuntu18 || le_debian11) && is_cudnn9 ; then
   # cuDNN v9 is not distributed for ubuntu18, debian10, debian11 ; fall back to 8
   CUDNN_VERSION="8.8.0.121"
 fi
@@ -181,14 +229,14 @@ readonly USERSPACE_URL=$(get_metadata_attribute 'gpu-driver-url' "${DEFAULT_USER
 
 # Short name for urls
 if is_ubuntu22  ; then
-    # at the time of writing 20240721 there is no ubuntu2204 in the index of repos at
+    # at the time of writing 20241125 there is no ubuntu2204 in the index of repos at
     # https://developer.download.nvidia.com/compute/machine-learning/repos/
     # use packages from previous release until such time as nvidia
     # release ubuntu2204 builds
 
     nccl_shortname="ubuntu2004"
     shortname="$(os_id)$(os_vercat)"
-elif is_rocky9 ; then
+elif ge_rocky9 ; then
     # use packages from previous release until such time as nvidia
     # release rhel9 builds
 
@@ -212,30 +260,53 @@ NCCL_REPO_URL=$(get_metadata_attribute 'nccl-repo-url' "${DEFAULT_NCCL_REPO_URL}
 readonly NCCL_REPO_URL
 readonly NCCL_REPO_KEY="${NVIDIA_BASE_DL_URL}/machine-learning/repos/${nccl_shortname}/x86_64/7fa2af80.pub" # 3bf863cc.pub
 
-readonly -A DEFAULT_NVIDIA_CUDA_URLS=(
-  [11.8]="${NVIDIA_BASE_DL_URL}/cuda/11.8.0/local_installers/cuda_11.8.0_520.61.05_linux.run"
-  [12.1]="${NVIDIA_BASE_DL_URL}/cuda/12.1.0/local_installers/cuda_12.1.0_530.30.02_linux.run"
-  [12.4]="${NVIDIA_BASE_DL_URL}/cuda/12.4.0/local_installers/cuda_12.4.0_550.54.14_linux.run"
-)
-readonly DEFAULT_NVIDIA_CUDA_URL=${DEFAULT_NVIDIA_CUDA_URLS["${CUDA_VERSION}"]}
-NVIDIA_CUDA_URL=$(get_metadata_attribute 'cuda-url' "${DEFAULT_NVIDIA_CUDA_URL}")
-readonly NVIDIA_CUDA_URL
+function set_cuda_runfile_url() {
+  local RUNFILE_DRIVER_VERSION="${DRIVER_VERSION}"
+  local RUNFILE_CUDA_VERSION="${CUDA_FULL_VERSION}"
+
+  if ge_cuda12 ; then
+    if ( le_debian11 || le_ubuntu18 ) ; then
+      RUNFILE_DRIVER_VERSION="525.60.13"
+      RUNFILE_CUDA_VERSION="12.0.0"
+    elif ( le_rocky8 && version_le "${DATAPROC_IMAGE_VERSION}" "2.0" ) ; then
+      RUNFILE_DRIVER_VERSION="525.147.05"
+      RUNFILE_CUDA_VERSION="12.0.0"
+    fi
+  else
+    RUNFILE_DRIVER_VERSION="520.61.05"
+    RUNFILE_CUDA_VERSION="11.8.0"
+  fi
+
+  readonly RUNFILE_FILENAME="cuda_${RUNFILE_CUDA_VERSION}_${RUNFILE_DRIVER_VERSION}_linux.run"
+  CUDA_RELEASE_BASE_URL="${NVIDIA_BASE_DL_URL}/cuda/${RUNFILE_CUDA_VERSION}"
+  DEFAULT_NVIDIA_CUDA_URL="${CUDA_RELEASE_BASE_URL}/local_installers/${RUNFILE_FILENAME}"
+  readonly DEFAULT_NVIDIA_CUDA_URL
+
+  NVIDIA_CUDA_URL=$(get_metadata_attribute 'cuda-url' "${DEFAULT_NVIDIA_CUDA_URL}")
+  readonly NVIDIA_CUDA_URL
+}
+
+set_cuda_runfile_url
 
 # Parameter for NVIDIA-provided Rocky Linux GPU driver
 readonly NVIDIA_ROCKY_REPO_URL="${NVIDIA_REPO_URL}/cuda-${shortname}.repo"
 
 CUDNN_TARBALL="cudnn-${CUDA_VERSION}-linux-x64-v${CUDNN_VERSION}.tgz"
 CUDNN_TARBALL_URL="${NVIDIA_BASE_DL_URL}/redist/cudnn/v${CUDNN_VERSION%.*}/${CUDNN_TARBALL}"
-if ( compare_versions_lte "8.3.1.22" "${CUDNN_VERSION}" ); then
+if ( version_ge "${CUDNN_VERSION}" "8.3.1.22" ); then
+  # When version is greater than or equal to 8.3.1.22 but less than 8.4.1.50 use this format
   CUDNN_TARBALL="cudnn-linux-x86_64-${CUDNN_VERSION}_cuda${CUDA_VERSION%.*}-archive.tar.xz"
-  if ( compare_versions_lte "${CUDNN_VERSION}" "8.4.1.50" ); then
+  if ( version_le "${CUDNN_VERSION}" "8.4.1.50" ); then
+    # When cuDNN version is greater than or equal to 8.4.1.50 use this format
     CUDNN_TARBALL="cudnn-linux-x86_64-${CUDNN_VERSION}_cuda${CUDA_VERSION}-archive.tar.xz"
   fi
+  # Use legacy url format with one of the tarball name formats depending on version as above
   CUDNN_TARBALL_URL="${NVIDIA_BASE_DL_URL}/redist/cudnn/v${CUDNN_VERSION%.*}/local_installers/${CUDA_VERSION}/${CUDNN_TARBALL}"
 fi
-if ( compare_versions_lte "12.0" "${CUDA_VERSION}" ); then
-  # When cuda version is greater than 12.0
-  CUDNN_TARBALL_URL="${NVIDIA_BASE_DL_URL}/cudnn/redist/cudnn/linux-x86_64/cudnn-linux-x86_64-9.2.0.82_cuda12-archive.tar.xz"
+if ( version_ge "${CUDA_VERSION}" "12.0" ); then
+  # Use modern url format When cuda version is greater than or equal to 12.0
+  CUDNN_TARBALL="cudnn-linux-x86_64-${CUDNN_VERSION}_cuda${CUDA_VERSION%%.*}-archive.tar.xz"
+  CUDNN_TARBALL_URL="${NVIDIA_BASE_DL_URL}/cudnn/redist/cudnn/linux-x86_64/${CUDNN_TARBALL}"
 fi
 readonly CUDNN_TARBALL
 readonly CUDNN_TARBALL_URL
@@ -264,10 +335,14 @@ function execute_with_retries() (
   local -r cmd="$*"
 
   if [[ "$cmd" =~ "^apt-get install" ]] ; then
-    cmd="apt-get -y clean && $cmd"
+    apt-get -y clean
+    apt-get -y autoremove
   fi
   for ((i = 0; i < 3; i++)); do
-    if eval "$cmd" ; then return 0 ; fi
+    set -x
+    time eval "$cmd" > "${install_log}" 2>&1 && retval=$? || { retval=$? ; cat "${install_log}" ; }
+    set +x
+    if [[ $retval == 0 ]] ; then return 0 ; fi
     sleep 5
   done
   return 1
@@ -279,9 +354,9 @@ function install_cuda_keyring_pkg() {
   local kr_ver=1.1
   curl -fsSL --retry-connrefused --retry 10 --retry-max-time 30 \
     "${NVIDIA_REPO_URL}/cuda-keyring_${kr_ver}-1_all.deb" \
-    -o "${download_dir}/cuda-keyring.deb"
-  dpkg -i "${download_dir}/cuda-keyring.deb"
-  rm -f "${download_dir}/cuda-keyring.deb"
+    -o "${tmpdir}/cuda-keyring.deb"
+  dpkg -i "${tmpdir}/cuda-keyring.deb"
+  rm -f "${tmpdir}/cuda-keyring.deb"
   CUDA_KEYRING_PKG_INSTALLED="1"
 }
 
@@ -301,10 +376,10 @@ function install_local_cuda_repo() {
   readonly DIST_KEYRING_DIR="/var/${pkgname}"
 
   curl -fsSL --retry-connrefused --retry 3 --retry-max-time 5 \
-    "${LOCAL_DEB_URL}" -o "${download_dir}/${LOCAL_INSTALLER_DEB}"
+    "${LOCAL_DEB_URL}" -o "${tmpdir}/${LOCAL_INSTALLER_DEB}"
 
-  dpkg -i "${download_dir}/${LOCAL_INSTALLER_DEB}"
-  rm "${download_dir}/${LOCAL_INSTALLER_DEB}"
+  dpkg -i "${tmpdir}/${LOCAL_INSTALLER_DEB}"
+  rm "${tmpdir}/${LOCAL_INSTALLER_DEB}"
   cp ${DIST_KEYRING_DIR}/cuda-*-keyring.gpg /usr/share/keyrings/
 
   if is_ubuntu ; then
@@ -329,11 +404,11 @@ function install_local_cudnn_repo() {
 
   # ${NVIDIA_BASE_DL_URL}/redist/cudnn/v8.6.0/local_installers/11.8/cudnn-linux-x86_64-8.6.0.163_cuda11-archive.tar.xz
   curl -fsSL --retry-connrefused --retry 3 --retry-max-time 5 \
-    "${local_deb_url}" -o "${download_dir}/local-installer.deb"
+    "${local_deb_url}" -o "${tmpdir}/local-installer.deb"
 
-  dpkg -i "${download_dir}/local-installer.deb"
+  dpkg -i "${tmpdir}/local-installer.deb"
 
-  rm -f "${download_dir}/local-installer.deb"
+  rm -f "${tmpdir}/local-installer.deb"
 
   cp /var/cudnn-local-repo-*-${CUDNN}*/cudnn-local-*-keyring.gpg /usr/share/keyrings
 
@@ -361,7 +436,7 @@ function install_local_cudnn8_repo() {
   CUDNN8_PKG_NAME="${pkgname}"
 
   deb_fn="${pkgname}_1.0-1_amd64.deb"
-  local_deb_fn="${download_dir}/${deb_fn}"
+  local_deb_fn="${tmpdir}/${deb_fn}"
   local_deb_url="${NVIDIA_BASE_DL_URL}/redist/cudnn/v${CUDNN}/local_installers/${CUDNN8_CUDA_VER}/${deb_fn}"
   curl -fsSL --retry-connrefused --retry 3 --retry-max-time 5 \
       "${local_deb_url}" -o "${local_deb_fn}"
@@ -383,10 +458,9 @@ function install_nvidia_nccl() {
   local -r nccl_version="${NCCL_VERSION}-1+cuda${CUDA_VERSION}"
 
   if is_rocky ; then
-    time execute_with_retries \
+    execute_with_retries \
       dnf -y -q install \
-        "libnccl-${nccl_version}" "libnccl-devel-${nccl_version}" "libnccl-static-${nccl_version}" \
-        > "${install_log}" 2>&1 || { cat "${install_log}" && exit -4 ; }
+        "libnccl-${nccl_version}" "libnccl-devel-${nccl_version}" "libnccl-static-${nccl_version}"
     sync
   elif is_ubuntu ; then
     install_cuda_keyring_pkg
@@ -394,16 +468,14 @@ function install_nvidia_nccl() {
     apt-get update -qq
 
     if is_ubuntu18 ; then
-      time execute_with_retries \
+      execute_with_retries \
         apt-get install -q -y \
-          libnccl2 libnccl-dev \
-          > "${install_log}" 2>&1 || { cat "${install_log}" && exit -4 ; }
+          libnccl2 libnccl-dev
       sync
     else
-      time execute_with_retries \
+      execute_with_retries \
         apt-get install -q -y \
-          "libnccl2=${nccl_version}" "libnccl-dev=${nccl_version}" \
-          > "${install_log}" 2>&1 || { cat "${install_log}" && exit -4 ; }
+          "libnccl2=${nccl_version}" "libnccl-dev=${nccl_version}"
       sync
     fi
   else
@@ -427,22 +499,20 @@ function install_nvidia_cudnn() {
 
   if is_rocky ; then
     if is_cudnn8 ; then
-      time execute_with_retries dnf -y -q install \
+      execute_with_retries dnf -y -q install \
         "libcudnn${major_version}" \
-        "libcudnn${major_version}-devel" \
-        > "${install_log}" 2>&1 || { cat "${install_log}" && exit -4 ; }
+        "libcudnn${major_version}-devel"
       sync
     elif is_cudnn9 ; then
-      time execute_with_retries dnf -y -q install \
+      execute_with_retries dnf -y -q install \
         "libcudnn9-static-cuda-${CUDA_VERSION%%.*}" \
-        "libcudnn9-devel-cuda-${CUDA_VERSION%%.*}" \
-        > "${install_log}" 2>&1 || { cat "${install_log}" && exit -4 ; }
+        "libcudnn9-devel-cuda-${CUDA_VERSION%%.*}"
       sync
     else
       echo "Unsupported cudnn version: '${major_version}'"
     fi
   elif is_debuntu; then
-    if is_debian12 && is_src_os ; then
+    if ge_debian12 && is_src_os ; then
       apt-get -y install nvidia-cudnn
     else
       local CUDNN="${CUDNN_VERSION%.*}"
@@ -451,23 +521,21 @@ function install_nvidia_cudnn() {
 
         apt-get update -qq
 
-        time execute_with_retries \
+        execute_with_retries \
           apt-get -y install --no-install-recommends \
             "libcudnn8=${cudnn_pkg_version}" \
-            "libcudnn8-dev=${cudnn_pkg_version}" \
-            > "${install_log}" 2>&1 || { cat "${install_log}" && exit -4 ; }
+            "libcudnn8-dev=${cudnn_pkg_version}"
 	sync
       elif is_cudnn9 ; then
 	install_cuda_keyring_pkg
 
         apt-get update -qq
 
-        time execute_with_retries \
+        execute_with_retries \
           apt-get -y install --no-install-recommends \
           "libcudnn9-cuda-${CUDA_VERSION%%.*}" \
           "libcudnn9-dev-cuda-${CUDA_VERSION%%.*}" \
-          "libcudnn9-static-cuda-${CUDA_VERSION%%.*}" \
-          > "${install_log}" 2>&1 || { cat "${install_log}" && exit -4 ; }
+          "libcudnn9-static-cuda-${CUDA_VERSION%%.*}"
 	sync
       else
         echo "Unsupported cudnn version: [${CUDNN_VERSION}]"
@@ -478,9 +546,8 @@ function install_nvidia_cudnn() {
     packages=(
       "libcudnn${major_version}=${cudnn_pkg_version}"
       "libcudnn${major_version}-dev=${cudnn_pkg_version}")
-    time execute_with_retries \
-      apt-get install -q -y --no-install-recommends "${packages[*]}" \
-      > "${install_log}" 2>&1 || { cat "${install_log}" && exit -4 ; }
+    execute_with_retries \
+      apt-get install -q -y --no-install-recommends "${packages[*]}"
     sync
   else
     echo "Unsupported OS: '${OS_NAME}'"
@@ -577,7 +644,7 @@ function clear_dkms_key {
 }
 
 function add_contrib_component() {
-  if is_debian12 ; then
+  if ge_debian12 ; then
       # Include in sources file components on which nvidia-kernel-open-dkms depends
       local -r debian_sources="/etc/apt/sources.list.d/debian.sources"
       local components="main contrib"
@@ -590,7 +657,7 @@ function add_contrib_component() {
 
 function add_nonfree_components() {
   if is_src_nvidia ; then return; fi
-  if is_debian12 ; then
+  if ge_debian12 ; then
       # Include in sources file components on which nvidia-open-kernel-dkms depends
       local -r debian_sources="/etc/apt/sources.list.d/debian.sources"
       local components="main contrib non-free non-free-firmware"
@@ -621,7 +688,7 @@ function add_repo_cuda() {
   if is_debuntu ; then
     local kr_path=/usr/share/keyrings/cuda-archive-keyring.gpg
     local sources_list_path="/etc/apt/sources.list.d/cuda-${shortname}-x86_64.list"
-echo "deb [signed-by=${kr_path}] https://developer.download.nvidia.com/compute/cuda/repos/${shortname}/x86_64/ /" \
+    echo "deb [signed-by=${kr_path}] https://developer.download.nvidia.com/compute/cuda/repos/${shortname}/x86_64/ /" \
     | sudo tee "${sources_list_path}"
     curl "${NVIDIA_BASE_DL_URL}/cuda/repos/${shortname}/x86_64/cuda-archive-keyring.gpg" \
       -o "${kr_path}"
@@ -675,7 +742,7 @@ function build_driver_from_github() {
 }
 
 function build_driver_from_packages() {
-  if is_ubuntu || is_debian ; then
+  if is_debuntu ; then
     if [[ -n "$(apt-cache search -n "nvidia-driver-${DRIVER}-server-open")" ]] ; then
       local pkglist=("nvidia-driver-${DRIVER}-server-open") ; else
       local pkglist=("nvidia-driver-${DRIVER}-open") ; fi
@@ -692,21 +759,17 @@ function build_driver_from_packages() {
     fi
     add_contrib_component
     apt-get update -qq
-    execute_with_retries apt-get install -y -qq --no-install-recommends dkms  \
-    > "${install_log}" 2>&1 || { cat "${install_log}" && exit -4 ; }
+    execute_with_retries apt-get install -y -qq --no-install-recommends dkms
     #configure_dkms_certs
-    time execute_with_retries apt-get install -y -qq --no-install-recommends "${pkglist[@]}" \
-     > "${install_log}" 2>&1 || { cat "${install_log}" && exit -4 ; }
+    execute_with_retries apt-get install -y -qq --no-install-recommends "${pkglist[@]}"
     sync
 
   elif is_rocky ; then
     #configure_dkms_certs
-    if time execute_with_retries dnf -y -q module install "nvidia-driver:${DRIVER}-dkms" \
-       > "${install_log}" 2>&1 || { cat "${install_log}" && exit -4 ; } ; then
+    if execute_with_retries dnf -y -q module install "nvidia-driver:${DRIVER}-dkms" ; then
       echo "nvidia-driver:${DRIVER}-dkms installed successfully"
     else
-      time execute_with_retries dnf -y -q module install 'nvidia-driver:latest' \
-      > "${install_log}" 2>&1 || { cat "${install_log}" && exit -4 ; }
+      execute_with_retries dnf -y -q module install 'nvidia-driver:latest'
     fi
     sync
   fi
@@ -714,30 +777,28 @@ function build_driver_from_packages() {
 }
 
 function install_nvidia_userspace_runfile() {
-  if test -f "${download_dir}/userspace-complete" ; then return ; fi
+  if test -f "${tmpdir}/userspace-complete" ; then return ; fi
   curl -fsSL --retry-connrefused --retry 10 --retry-max-time 30 \
-    "${USERSPACE_URL}" -o "${download_dir}/userspace.run"
-  time bash "${download_dir}/userspace.run" --no-kernel-modules --silent --install-libglvnd \
-  > "${install_log}" 2>&1 || { cat "${install_log}" && exit -4 ; }
-  rm -f "${download_dir}/userspace.run"
-  touch "${download_dir}/userspace-complete"
+    "${USERSPACE_URL}" -o "${tmpdir}/userspace.run"
+  execute_with_retries bash "${tmpdir}/userspace.run" --no-kernel-modules --silent --install-libglvnd --tmpdir="${tmpdir}"
+  rm -f "${tmpdir}/userspace.run"
+  touch "${tmpdir}/userspace-complete"
   sync
 }
 
 function install_cuda_runfile() {
-  if test -f "${download_dir}/cuda-complete" ; then return ; fi
+  if test -f "${tmpdir}/cuda-complete" ; then return ; fi
   time curl -fsSL --retry-connrefused --retry 10 --retry-max-time 30 \
-    "${NVIDIA_CUDA_URL}" -o "${download_dir}/cuda.run"
-  time bash "${download_dir}/cuda.run" --silent --toolkit --no-opengl-libs \
-  > "${install_log}" 2>&1 || { cat "${install_log}" && exit -4 ; }
-  rm -f "${download_dir}/cuda.run"
-  touch "${download_dir}/cuda-complete"
+    "${NVIDIA_CUDA_URL}" -o "${tmpdir}/cuda.run"
+  execute_with_retries bash "${tmpdir}/cuda.run" --silent --toolkit --no-opengl-libs --tmpdir="${tmpdir}"
+  rm -f "${tmpdir}/cuda.run"
+  touch "${tmpdir}/cuda-complete"
   sync
 }
 
 function install_cuda_toolkit() {
   local cudatk_package=cuda-toolkit
-  if is_debian12 && is_src_os ; then
+  if ge_debian12 && is_src_os ; then
     cudatk_package="${cudatk_package}=${CUDA_FULL_VERSION}-1"
   elif [[ -n "${CUDA_VERSION}" ]]; then
     cudatk_package="${cudatk_package}-${CUDA_VERSION//./-}"
@@ -746,47 +807,32 @@ function install_cuda_toolkit() {
   readonly cudatk_package
   if is_debuntu ; then
 #    if is_ubuntu ; then execute_with_retries "apt-get install -y -qq --no-install-recommends cuda-drivers-${DRIVER}=${DRIVER_VERSION}-1" ; fi
-    time execute_with_retries apt-get install -y -qq --no-install-recommends ${cuda_package} ${cudatk_package} \
-    > "${install_log}" 2>&1 || { cat "${install_log}" ; exit -4 ; }
-     sync
+    execute_with_retries apt-get install -y -qq --no-install-recommends ${cuda_package} ${cudatk_package}
+    sync
   elif is_rocky ; then
-    time execute_with_retries dnf -y -q install "${cudatk_package}" \
-    > "${install_log}" 2>&1 || { cat "${install_log}" && exit -4 ; }
+    # rocky9: cuda-11-[7,8], cuda-12-[1..6]
+    execute_with_retries dnf -y -q install "${cudatk_package}"
     sync
   fi
 }
 
-function install_drivers_aliases() {
-  if is_rocky ; then return ; fi
-  if ! (is_debian12 || is_debian11) ; then return ; fi
-  if (is_debian12 && is_cuda11) && is_src_nvidia ; then return ; fi # don't install on debian 12 / cuda11 with drivers from nvidia
-  # Add a modprobe alias to prefer the open kernel modules
-  local conffile="/etc/modprobe.d/nvidia-aliases.conf"
-  echo -n "" > "${conffile}"
-  local prefix
-  if   is_src_os     ; then prefix="nvidia-current-open"
-  elif is_src_nvidia ; then prefix="nvidia-current" ; fi
-  local suffix
-  for suffix in uvm peermem modeset drm; do
-    echo "alias nvidia-${suffix} ${prefix}-${suffix}" >> "${conffile}"
-  done
-  echo "alias nvidia ${prefix}" >> "${conffile}"
-}
-
 function load_kernel_module() {
   # for some use cases, the kernel module needs to be removed before first use of nvidia-smi
   for module in nvidia_uvm nvidia_drm nvidia_modeset nvidia ; do
     rmmod ${module} > /dev/null 2>&1 || echo "unable to rmmod ${module}"
   done
 
-  install_drivers_aliases
   depmod -a
   modprobe nvidia
+  for suffix in uvm modeset drm; do
+    modprobe "nvidia-${suffix}"
+  done
+  # TODO: if peermem is available, also modprobe nvidia-peermem
 }
 
 # Install NVIDIA GPU driver provided by NVIDIA
 function install_nvidia_gpu_driver() {
-  if is_debian12 && is_src_os ; then
+  if ( ge_debian12 && is_src_os ) ; then
     add_nonfree_components
     add_repo_nvidia_container_toolkit
     apt-get update -qq
@@ -800,33 +846,25 @@ function install_nvidia_gpu_driver() {
           libglvnd0 \
           libcuda1
     #clear_dkms_key
-    load_kernel_module
-  elif is_ubuntu18 || is_debian10 || (is_debian12 && is_cuda11) ; then
+  elif ( le_ubuntu18 || le_debian10 || (ge_debian12 && le_cuda11) ) ; then
 
     install_nvidia_userspace_runfile
 
     build_driver_from_github
 
-    load_kernel_module
-
     install_cuda_runfile
   elif is_debuntu ; then
     install_cuda_keyring_pkg
 
     build_driver_from_packages
 
-    load_kernel_module
-
     install_cuda_toolkit
   elif is_rocky ; then
     add_repo_cuda
 
     build_driver_from_packages
 
-    load_kernel_module
-
     install_cuda_toolkit
-
   else
     echo "Unsupported OS: '${OS_NAME}'"
     exit 1
@@ -852,8 +890,7 @@ function install_gpu_agent() {
     "${GPU_AGENT_REPO_URL}/report_gpu_metrics.py" \
     | sed -e 's/-u --format=/--format=/' \
     | dd status=none of="${install_dir}/report_gpu_metrics.py"
-  time execute_with_retries pip install -r "${install_dir}/requirements.txt" \
-  > "${install_log}" 2>&1 || { cat "${install_log}" && exit -4 ; }
+  execute_with_retries pip install -r "${install_dir}/requirements.txt"
   sync
 
   # Generate GPU service.
@@ -957,7 +994,8 @@ function configure_gpu_script() {
   # need to update the getGpusResources.sh script to look for MIG devices since if multiple GPUs nvidia-smi still
   # lists those because we only disable the specific GIs via CGROUPs. Here we just create it based off of:
   # https://raw.githubusercontent.com/apache/spark/master/examples/src/main/scripts/getGpusResources.sh
-  cat > ${spark_gpu_script_dir}/getGpusResources.sh <<'EOF'
+  local -r gpus_resources_script="${spark_gpu_script_dir}/getGpusResources.sh"
+  cat > "${gpus_resources_script}" <<'EOF'
 #!/usr/bin/env bash
 
 #
@@ -977,31 +1015,17 @@ function configure_gpu_script() {
 # limitations under the License.
 #
 
-CACHE_FILE="/var/run/nvidia-gpu-index.txt"
-if [[ -f "${CACHE_FILE}" ]]; then
-  cat "${CACHE_FILE}"
-  exit 0
-fi
-NV_SMI_L_CACHE_FILE="/var/run/nvidia-smi_-L.txt"
-if [[ -f "${NV_SMI_L_CACHE_FILE}" ]]; then
-  NVIDIA_SMI_L="$(cat "${NV_SMI_L_CACHE_FILE}")"
-else
-  NVIDIA_SMI_L="$(nvidia-smi -L | tee "${NV_SMI_L_CACHE_FILE}")"
-fi
-
-NUM_MIG_DEVICES=$(echo "${NVIDIA_SMI_L}" | grep -e MIG -e H100 -e A100 | wc -l || echo '0')
-
-if [[ "${NUM_MIG_DEVICES}" -gt "0" ]] ; then
-  MIG_INDEX=$(( $NUM_MIG_DEVICES - 1 ))
-  ADDRS="$(perl -e 'print(join(q{,},map{qq{"$_"}}(0..$ARGV[0])),$/)' "${MIG_INDEX}")"
-else
-  ADDRS=$(nvidia-smi --query-gpu=index --format=csv,noheader | perl -e 'print(join(q{,},map{chomp; qq{"$_"}}<STDIN>))')
-fi
+ADDRS=$(nvidia-smi --query-gpu=index --format=csv,noheader | perl -e 'print(join(q{,},map{chomp; qq{"$_"}}<STDIN>))')
 
-echo {\"name\": \"gpu\", \"addresses\":[$ADDRS]} | tee "${CACHE_FILE}"
+echo {\"name\": \"gpu\", \"addresses\":[${ADDRS}]}
 EOF
 
-  chmod a+rwx -R ${spark_gpu_script_dir}
+  chmod a+rx "${gpus_resources_script}"
+
+  local spark_defaults_conf="/etc/spark/conf.dist/spark-defaults.conf"
+  if ! grep spark.executor.resource.gpu.discoveryScript "${spark_defaults_conf}" ; then
+    echo "spark.executor.resource.gpu.discoveryScript=${gpus_resources_script}" >> "${spark_defaults_conf}"
+  fi
 }
 
 function configure_gpu_isolation() {
@@ -1050,41 +1074,39 @@ function nvsmi() {
   "${nvsmi}" $*
 }
 
-function main() {
-  if ! is_debian && ! is_ubuntu && ! is_rocky ; then
-    echo "Unsupported OS: '$(os_name)'"
-    exit 1
-  fi
-
-  remove_old_backports
-
+function install_dependencies() {
   if is_debuntu ; then
-    export DEBIAN_FRONTEND=noninteractive
-    time execute_with_retries apt-get install -y -qq pciutils "linux-headers-${uname_r}" > /dev/null 2>&1
+    execute_with_retries apt-get install -y -qq pciutils "linux-headers-${uname_r}" screen
   elif is_rocky ; then
-    time execute_with_retries dnf -y -q update --exclude=systemd*,kernel* \
-    > "${install_log}" 2>&1 || { cat "${install_log}" && exit -4 ; }
-    time execute_with_retries dnf -y -q install pciutils gcc \
-    > "${install_log}" 2>&1 || { cat "${install_log}" && exit -4 ; }
+    execute_with_retries dnf -y -q install pciutils gcc screen
 
     local dnf_cmd="dnf -y -q install kernel-devel-${uname_r}"
-    local kernel_devel_pkg_out="$(eval "${dnf_cmd} 2>&1")"
-    if [[ "${kernel_devel_pkg_out}" =~ 'Unable to find a match: kernel-devel-' ]] ; then
+    local install_log="${tmpdir}/install.log"
+    set +e
+    eval "${dnf_cmd}" > "${install_log}" 2>&1
+    local retval="$?"
+    set -e
+
+    if [[ "${retval}" == "0" ]] ; then return ; fi
+
+    if grep -q 'Unable to find a match: kernel-devel-' "${install_log}" ; then
       # this kernel-devel may have been migrated to the vault
-      local vault="https://download.rockylinux.org/vault/rocky/$(os_version)"
-      time execute_with_retries dnf -y -q --setopt=localpkg_gpgcheck=1 install \
+      local os_ver="$(echo $uname_r | perl -pe 's/.*el(\d+_\d+)\..*/$1/; s/_/./')"
+      local vault="https://download.rockylinux.org/vault/rocky/${os_ver}"
+      dnf_cmd="$(echo dnf -y -q --setopt=localpkg_gpgcheck=1 install \
         "${vault}/BaseOS/x86_64/os/Packages/k/kernel-${uname_r}.rpm" \
         "${vault}/BaseOS/x86_64/os/Packages/k/kernel-core-${uname_r}.rpm" \
         "${vault}/BaseOS/x86_64/os/Packages/k/kernel-modules-${uname_r}.rpm" \
         "${vault}/BaseOS/x86_64/os/Packages/k/kernel-modules-core-${uname_r}.rpm" \
-        "${vault}/AppStream/x86_64/os/Packages/k/kernel-devel-${uname_r}.rpm" \
-	> "${install_log}" 2>&1 || { cat "${install_log}" ; exit -4 ; }
-      sync
-    else
-      execute_with_retries "${dnf_cmd}"
+        "${vault}/AppStream/x86_64/os/Packages/k/kernel-devel-${uname_r}.rpm"
+       )"
     fi
+
+    execute_with_retries "${dnf_cmd}"
   fi
+}
 
+function main() {
   # This configuration should be run on all nodes
   # regardless if they have attached GPUs
   configure_yarn
@@ -1111,6 +1133,8 @@ function main() {
     if [[ $IS_MIG_ENABLED -eq 0 ]]; then
       install_nvidia_gpu_driver
 
+      load_kernel_module
+
       if [[ -n ${CUDNN_VERSION} ]]; then
         install_nvidia_nccl
         install_nvidia_cudnn
@@ -1128,7 +1152,7 @@ function main() {
         rmmod ${module} > /dev/null 2>&1 || echo "unable to rmmod ${module}"
       done
 
-      MIG_GPU_LIST="$(nvsmi -L | grep -e MIG -e H100 -e A100 || echo -n "")"
+      MIG_GPU_LIST="$(nvsmi -L | grep -e MIG -e P100 -e H100 -e A100 || echo -n "")"
       if test -n "$(nvsmi -L)" ; then
 	# cache the result of the gpu query
         ADDRS=$(nvsmi --query-gpu=index --format=csv,noheader | perl -e 'print(join(q{,},map{chomp; qq{"$_"}}<STDIN>))')
@@ -1241,8 +1265,10 @@ function clean_up_sources_lists() {
   # cran-r
   #
   if [[ -f /etc/apt/sources.list.d/cran-r.list ]]; then
+    keyid="0x95c0faf38db3ccad0c080a7bdc78b2ddeabc47b7"
+    if is_ubuntu18 ; then keyid="0x51716619E084DAB9"; fi
     rm -f /usr/share/keyrings/cran-r.gpg
-    curl 'https://keyserver.ubuntu.com/pks/lookup?op=get&search=0x95c0faf38db3ccad0c080a7bdc78b2ddeabc47b7' | \
+    curl "https://keyserver.ubuntu.com/pks/lookup?op=get&search=${keyid}" | \
       gpg --dearmor -o /usr/share/keyrings/cran-r.gpg
     sed -i -e 's:deb http:deb [signed-by=/usr/share/keyrings/cran-r.gpg] http:g' /etc/apt/sources.list.d/cran-r.list
   fi
@@ -1262,132 +1288,180 @@ function clean_up_sources_lists() {
 }
 
 function exit_handler() {
-  echo "Exit handler invoked"
   set +ex
+  echo "Exit handler invoked"
+
   # Purge private key material until next grant
   clear_dkms_key
 
-  # Free conda cache
-  /opt/conda/miniconda3/bin/conda clean -a
-
   # Clear pip cache
   pip cache purge || echo "unable to purge pip cache"
 
-  # remove the tmpfs conda pkgs_dirs
-  if [[ -d /mnt/shm ]] ; then /opt/conda/miniconda3/bin/conda config --remove pkgs_dirs /mnt/shm ; fi
+  # If system memory was sufficient to mount memory-backed filesystems
+  if [[ "${tmpdir}" == "/mnt/shm" ]] ; then
+    # remove the tmpfs pip cache-dir
+    pip config unset global.cache-dir || echo "unable to unset global pip cache"
 
-  # remove the tmpfs pip cache-dir
-  pip config unset global.cache-dir || echo "unable to set global pip cache"
-
-  # Clean up shared memory mounts
-  for shmdir in /mnt/shm /var/cache/apt/archives /var/cache/dnf ; do
-    if grep -q "^tmpfs ${shmdir}" /proc/mounts ; then
-      rm -rf ${shmdir}/*
-      sync
+    # Clean up shared memory mounts
+    for shmdir in /var/cache/apt/archives /var/cache/dnf /mnt/shm /tmp ; do
+      if grep -q "^tmpfs ${shmdir}" /proc/mounts && ! grep -q "^tmpfs ${shmdir}" /etc/fstab ; then
+        umount -f ${shmdir}
+      fi
+    done
 
-      execute_with_retries umount -f ${shmdir}
-    fi
-  done
+    # restart services stopped during preparation stage
+    # systemctl list-units | perl -n -e 'qx(systemctl start $1) if /^.*? ((hadoop|knox|hive|mapred|yarn|hdfs)\S*).service/'
+  fi
 
-  # Clean up OS package cache ; re-hold systemd package
   if is_debuntu ; then
+    # Clean up OS package cache
     apt-get -y -qq clean
     apt-get -y -qq autoremove
-    if is_debian12 ; then
+    # re-hold systemd package
+    if ge_debian12 ; then
     apt-mark hold systemd libsystemd0 ; fi
   else
     dnf clean all
   fi
 
-  # print disk usage statistics
-  if is_debuntu ; then
-    # Rocky doesn't have sort -h and fails when the argument is passed
-    du --max-depth 3 -hx / | sort -h | tail -10
+  # print disk usage statistics for large components
+  if is_ubuntu ; then
+    du -hs \
+      /usr/lib/{pig,hive,hadoop,jvm,spark,google-cloud-sdk,x86_64-linux-gnu} \
+      /usr/lib \
+      /opt/nvidia/* \
+      /usr/local/cuda-1?.? \
+      /opt/conda/miniconda3 | sort -h
+  elif is_debian ; then
+    du -hs \
+      /usr/lib/{pig,hive,hadoop,jvm,spark,google-cloud-sdk,x86_64-linux-gnu} \
+      /usr/lib \
+      /usr/local/cuda-1?.? \
+      /opt/conda/miniconda3 | sort -h
+  else
+    du -hs \
+      /var/lib/docker \
+      /usr/lib/{pig,hive,hadoop,firmware,jvm,spark,atlas} \
+      /usr/lib64/google-cloud-sdk \
+      /usr/lib \
+      /opt/nvidia/* \
+      /usr/local/cuda-1?.? \
+      /opt/conda/miniconda3
   fi
 
   # Process disk usage logs from installation period
-  rm -f /tmp/keep-running-df
-  sleep 6s
+  rm -f /run/keep-running-df
+  sync
+  sleep 5.01s
   # compute maximum size of disk during installation
   # Log file contains logs like the following (minus the preceeding #):
-#Filesystem      Size  Used Avail Use% Mounted on
-#/dev/vda2       6.8G  2.5G  4.0G  39% /
-  df --si
-  perl -e '$max=( sort
+#Filesystem     1K-blocks    Used Available Use% Mounted on
+#/dev/vda2        7096908 2611344   4182932  39% /
+  df / | tee -a "/run/disk-usage.log"
+
+  perl -e '@siz=( sort { $a => $b }
                    map { (split)[2] =~ /^(\d+)/ }
-                  grep { m:^/: } <STDIN> )[-1];
-print( "maximum-disk-used: $max", $/ );' < /tmp/disk-usage.log
+                  grep { m:^/: } <STDIN> );
+$max=$siz[0]; $min=$siz[-1]; $inc=$max-$min;
+print( "    samples-taken: ", scalar @siz, $/,
+       "maximum-disk-used: $max", $/,
+       "minimum-disk-used: $min", $/,
+       "     increased-by: $inc", $/ )' < "/run/disk-usage.log"
 
   echo "exit_handler has completed"
 
   # zero free disk space
   if [[ -n "$(get_metadata_attribute creating-image)" ]]; then
-    dd if=/dev/zero of=/zero ; sync ; rm -f /zero
+    dd if=/dev/zero of=/zero
+    sync
+    sleep 3s
+    rm -f /zero
   fi
 
   return 0
 }
 
-trap exit_handler EXIT
+function set_proxy(){
+  export METADATA_HTTP_PROXY="$(get_metadata_attribute http-proxy)"
+  export http_proxy="${METADATA_HTTP_PROXY}"
+  export https_proxy="${METADATA_HTTP_PROXY}"
+  export HTTP_PROXY="${METADATA_HTTP_PROXY}"
+  export HTTPS_PROXY="${METADATA_HTTP_PROXY}"
+  export no_proxy=metadata.google.internal,169.254.169.254
+  export NO_PROXY=metadata.google.internal,169.254.169.254
+}
 
-function prepare_to_install(){
-  nvsmi_works="0"
-  readonly bdcfg="/usr/local/bin/bdconfig"
-  download_dir=/tmp/
+function mount_ramdisk(){
+  local free_mem
   free_mem="$(awk '/^MemFree/ {print $2}' /proc/meminfo)"
+  if [[ ${free_mem} -lt 10500000 ]]; then return 0 ; fi
+
   # Write to a ramdisk instead of churning the persistent disk
-  if [[ ${free_mem} -ge 5250000 ]]; then
-    download_dir="/mnt/shm"
-    mkdir -p "${download_dir}"
-    mount -t tmpfs tmpfs "${download_dir}"
 
-    # Download conda packages to tmpfs
-    /opt/conda/miniconda3/bin/conda config --add pkgs_dirs "${download_dir}"
+  tmpdir="/mnt/shm"
+  mkdir -p "${tmpdir}"
+  mount -t tmpfs tmpfs "${tmpdir}"
 
-    # Download pip packages to tmpfs
-    pip config set global.cache-dir "${download_dir}" || echo "unable to set global.cache-dir"
+  # Clear pip cache
+  # TODO: make this conditional on which OSs have pip without cache purge
+  pip cache purge || echo "unable to purge pip cache"
 
-    # Download OS packages to tmpfs
-    if is_debuntu ; then
-      mount -t tmpfs tmpfs /var/cache/apt/archives
-    else
-      mount -t tmpfs tmpfs /var/cache/dnf
-    fi
+  # Download pip packages to tmpfs
+  pip config set global.cache-dir "${tmpdir}" || echo "unable to set global.cache-dir"
+
+  # Download OS packages to tmpfs
+  if is_debuntu ; then
+    mount -t tmpfs tmpfs /var/cache/apt/archives
+  else
+    mount -t tmpfs tmpfs /var/cache/dnf
+  fi
+}
+
+function prepare_to_install(){
+  nvsmi_works="0"
+  readonly bdcfg="/usr/local/bin/bdconfig"
+  tmpdir=/tmp/
+  if ! is_debuntu && ! is_rocky ; then
+    echo "Unsupported OS: '$(os_name)'"
+    exit 1
   fi
-  install_log="${download_dir}/install.log"
+
+  repair_old_backports
+
+  export DEBIAN_FRONTEND=noninteractive
+
+  trap exit_handler EXIT
+  mount_ramdisk
+  install_log="${tmpdir}/install.log"
+
+  set_proxy
 
   if is_debuntu ; then
     clean_up_sources_lists
     apt-get update -qq
     apt-get -y clean
+    sleep 5s
     apt-get -y -qq autoremove
-    if is_debian12 ; then
+    if ge_debian12 ; then
     apt-mark unhold systemd libsystemd0 ; fi
   else
     dnf clean all
   fi
 
-  # Clean conda cache
-  /opt/conda/miniconda3/bin/conda clean -a
-
   # zero free disk space
-  if [[ -n "$(get_metadata_attribute creating-image)" ]]; then
-    set +e
-    time dd if=/dev/zero of=/zero ; sync ; rm -f /zero
-    set -e
-  fi
+  if [[ -n "$(get_metadata_attribute creating-image)" ]]; then ( set +e
+    time dd if=/dev/zero of=/zero status=none ; sync ; sleep 3s ; rm -f /zero
+  ) fi
 
   configure_dkms_certs
 
+  install_dependencies
+
   # Monitor disk usage in a screen session
-  if is_debuntu ; then
-      apt-get install -y -qq screen > /dev/null 2>&1
-  elif is_rocky ; then
-      dnf -y -q install screen > /dev/null 2>&1
-  fi
-  touch /tmp/keep-running-df
+  df / > "/run/disk-usage.log"
+  touch "/run/keep-running-df"
   screen -d -m -US keep-running-df \
-    bash -c 'while [[ -f /tmp/keep-running-df ]] ; do df --si / | tee -a /tmp/disk-usage.log ; sleep 5s ; done'
+    bash -c "while [[ -f /run/keep-running-df ]] ; do df / | tee -a /run/disk-usage.log ; sleep 5s ; done"
 }
 
 prepare_to_install
diff --git a/examples/secure-boot/pre-init.sh b/examples/secure-boot/pre-init.sh
index 7797b4f..57e8e62 100644
--- a/examples/secure-boot/pre-init.sh
+++ b/examples/secure-boot/pre-init.sh
@@ -25,6 +25,9 @@ export PROJECT_ID="$(jq    -r .PROJECT_ID           env.json)"
 export PURPOSE="$(jq       -r .PURPOSE              env.json)"
 export BUCKET="$(jq        -r .BUCKET               env.json)"
 export ZONE="$(jq          -r .ZONE                 env.json)"
+export SUBNET="$(jq        -r .SUBNET               env.json)"
+
+export region="$(echo "${ZONE}" | perl -pe 's/-[a-z]+$//')"
 
 custom_image_zone="${ZONE}"
 disk_size_gb="30" # greater than or equal to 30
@@ -37,13 +40,12 @@ gcloud config set project ${PROJECT_ID}
 #gcloud auth login
 
 eval "$(bash examples/secure-boot/create-key-pair.sh)"
-metadata="public_secret_name=${public_secret_name}"
-metadata="${metadata},private_secret_name=${private_secret_name}"
-metadata="${metadata},secret_project=${secret_project}"
-metadata="${metadata},secret_version=${secret_version}"
-metadata="${metadata},dask-runtime=standalone"
+metadata="dask-runtime=standalone"
 metadata="${metadata},rapids-runtime=DASK"
 metadata="${metadata},cuda-version=12.4"
+metadata="${metadata},creating-image=c9h"
+metadata="${metadata},rapids-mirror-disk=rapids-mirror-${region}"
+metadata="${metadata},rapids-mirror-host=10.42.79.42"
 
 # If no OS family specified, default to debian
 if [[ "${IMAGE_VERSION}" != *-* ]] ; then
@@ -67,6 +69,12 @@ function generate() {
     return
   fi
 
+  local install_image="$(jq -r ".[] | select(.name == \"${image_name}-install\").name" "${tmpdir}/images.json")"
+  if [[ -n "${install_image}" ]] ; then
+    echo "Install image already exists.  Cleaning up after aborted run."
+    gcloud -q compute images delete "${image_name}-install"
+  fi
+
   local instance="$(jq -r ".[] | select(.name == \"${image_name}-install\").name" "${tmpdir}/instances.json")"
 
   if [[ -n "${instance}" ]]; then
@@ -77,7 +85,7 @@ function generate() {
   fi
   set -xe
   python generate_custom_image.py \
-    --machine-type         "n1-standard-8" \
+    --machine-type         "n1-standard-16" \
     --accelerator          "type=nvidia-tesla-t4" \
     --image-name           "${image_name}" \
     --customization-script "${customization_script}" \
@@ -86,6 +94,7 @@ function generate() {
     --zone                 "${custom_image_zone}" \
     --disk-size            "${disk_size_gb}" \
     --gcs-bucket           "${BUCKET}" \
+    --subnet               "${SUBNET}" \
     --shutdown-instance-timer-sec=30 \
     --no-smoke-test \
     ${extra_args}
@@ -100,15 +109,15 @@ function generate_from_base_purpose() {
 
 # base image -> cuda
 case "${dataproc_version}" in
-  "2.0-debian10" ) disk_size_gb="38" ;; # 40G   31G  7.8G  80% / # cuda-pre-init-2-0-debian10
-  "2.0-rocky8"   ) disk_size_gb="35" ;; # 38G   32G  6.2G  84% / # cuda-pre-init-2-0-rocky8
-  "2.0-ubuntu18" ) disk_size_gb="37" ;; # 39G   30G  8.5G  79% / # cuda-pre-init-2-0-ubuntu18
-  "2.1-debian11" ) disk_size_gb="37" ;; # 39G   34G  4.1G  90% / # cuda-pre-init-2-1-debian11
-  "2.1-rocky8"   ) disk_size_gb="38" ;; # 41G   35G  6.1G  86% / # cuda-pre-init-2-1-rocky8
-  "2.1-ubuntu20" ) disk_size_gb="35" ;; # 37G   32G  4.4G  88% / # cuda-pre-init-2-1-ubuntu20
-  "2.2-debian12" ) disk_size_gb="38" ;; # 40G   35G  3.3G  92% / # cuda-pre-init-2-2-debian12
-  "2.2-rocky9"   ) disk_size_gb="40" ;; # 42G   36G  5.9G  86% / # cuda-pre-init-2-2-rocky9
-  "2.2-ubuntu22" ) disk_size_gb="38" ;; # 40G   35G  4.8G  88% / # cuda-pre-init-2-2-ubuntu22
+  "2.0-debian10" ) disk_size_gb="30" ;; # 29.30G 28.29G       0 100% / # cuda-pre-init-2-0-debian10
+  "2.0-rocky8"   ) disk_size_gb="30" ;; # 29.79G 28.94G   0.85G  98% / # cuda-pre-init-2-0-rocky8
+  "2.0-ubuntu18" ) disk_size_gb="30" ;; # 28.89G 27.64G   1.24G  96% / # cuda-pre-init-2-0-ubuntu18
+  "2.1-debian11" ) disk_size_gb="32" ;; # 31.26G 30.74G       0 100% / # cuda-pre-init-2-1-debian11
+  "2.1-rocky8"   ) disk_size_gb="34" ;; # 33.79G 32.00G   1.80G  95% / # cuda-pre-init-2-1-rocky8
+  "2.1-ubuntu20" ) disk_size_gb="32" ;; # 30.83G 30.35G   0.46G  99% / # cuda-pre-init-2-1-ubuntu20
+  "2.2-debian12" ) disk_size_gb="34" ;; # 33.23G 32.71G       0 100% / # cuda-pre-init-2-2-debian12
+  "2.2-rocky9"   ) disk_size_gb="35" ;; # 34.79G 33.16G   1.64G  96% / # cuda-pre-init-2-2-rocky9
+  "2.2-ubuntu22" ) disk_size_gb="35" ;; # 33.74G 32.94G   0.78G  98% / # cuda-pre-init-2-2-ubuntu22
 esac
 
 # Install GPU drivers + cuda on dataproc base image
@@ -118,15 +127,15 @@ time generate_from_dataproc_version "${dataproc_version}"
 
 # cuda image -> rapids
 case "${dataproc_version}" in
-  "2.0-debian10" ) disk_size_gb="44" ;; # 47G   41G  4.0G  91% / # rapids-pre-init-2-0-debian10
-  "2.0-rocky8"   ) disk_size_gb="45" ;; # 49G   42G  7.0G  86% / # rapids-pre-init-2-0-rocky8
-  "2.0-ubuntu18" ) disk_size_gb="43" ;; # 45G   40G  4.9G  90% / # rapids-pre-init-2-0-ubuntu18
-  "2.1-debian11" ) disk_size_gb="46" ;; # 49G   43G  3.6G  93% / # rapids-pre-init-2-1-debian11
-  "2.1-rocky8"   ) disk_size_gb="48" ;; # 52G   45G  7.2G  87% / # rapids-pre-init-2-1-rocky8
-  "2.1-ubuntu20" ) disk_size_gb="45" ;; # 47G   42G  5.2G  89% / # rapids-pre-init-2-1-ubuntu20
-  "2.2-debian12" ) disk_size_gb="48" ;; # 51G   45G  3.8G  93% / # rapids-pre-init-2-2-debian12
-  "2.2-rocky9"   ) disk_size_gb="49" ;; # 53G   46G  7.2G  87% / # rapids-pre-init-2-2-rocky9
-  "2.2-ubuntu22" ) disk_size_gb="48" ;; # 50G   45G  5.6G  89% / # rapids-pre-init-2-2-ubuntu22
+  "2.0-debian10" ) disk_size_gb="41" ;; # 40.12G 37.51G   0.86G  98% / # rapids-pre-init-2-0-debian10
+  "2.0-rocky8"   ) disk_size_gb="41" ;; # 38.79G 38.04G   0.76G  99% / # rapids-pre-init-2-0-rocky8
+  "2.0-ubuntu18" ) disk_size_gb="40" ;; # 37.62G 36.69G   0.91G  98% / # rapids-pre-init-2-0-ubuntu18
+  "2.1-debian11" ) disk_size_gb="44" ;; # 42.09G 39.77G   0.49G  99% / # rapids-pre-init-2-1-debian11
+  "2.1-rocky8"   ) disk_size_gb="44" ;; # 43.79G 41.11G   2.68G  94% / # rapids-pre-init-2-1-rocky8
+  "2.1-ubuntu20" ) disk_size_gb="45" ;; # 39.55G 39.39G   0.15G 100% / # rapids-pre-init-2-1-ubuntu20
+  "2.2-debian12" ) disk_size_gb="46" ;; # 44.06G 41.73G   0.41G 100% / # rapids-pre-init-2-2-debian12
+  "2.2-rocky9"   ) disk_size_gb="45" ;; # 44.79G 42.29G   2.51G  95% / # rapids-pre-init-2-2-rocky9
+  "2.2-ubuntu22" ) disk_size_gb="46" ;; # 42.46G 41.97G   0.48G  99% / # rapids-pre-init-2-2-ubuntu22
 esac
 
 #disk_size_gb="50"
@@ -134,9 +143,9 @@ esac
 # Install dask with rapids on base image
 PURPOSE="rapids-pre-init"
 customization_script="examples/secure-boot/rapids.sh"
-time generate_from_base_purpose "cuda-pre-init"
+#time generate_from_base_purpose "cuda-pre-init"
 
-# Install dask without rapids on base image
-PURPOSE="dask-pre-init"
-customization_script="examples/secure-boot/dask.sh"
-time generate_from_base_purpose "cuda-pre-init"
+## Install dask without rapids on base image
+#PURPOSE="dask-pre-init"
+#customization_script="examples/secure-boot/dask.sh"
+#time generate_from_base_purpose "cuda-pre-init"
diff --git a/examples/secure-boot/rapids.sh b/examples/secure-boot/rapids.sh
index 6c5c9d4..308003f 100644
--- a/examples/secure-boot/rapids.sh
+++ b/examples/secure-boot/rapids.sh
@@ -19,11 +19,12 @@
 
 set -euxo pipefail
 
-function os_id()       { grep '^ID=' /etc/os-release | cut -d= -f2 | xargs ; }
-function is_ubuntu()   { [[ "$(os_id)" == 'ubuntu' ]] ; }
-function is_ubuntu18() { is_ubuntu && [[ "$(os_version)" == '18.04'* ]] ; }
-function is_debian()   { [[ "$(os_id)" == 'debian' ]] ; }
-function is_debuntu()  { is_debian || is_ubuntu ; }
+function os_id()       ( set +x ;  grep '^ID=' /etc/os-release | cut -d= -f2 | xargs ; )
+function os_version()  ( set +x ;  grep '^VERSION_ID=' /etc/os-release | cut -d= -f2 | xargs ; )
+function is_ubuntu()   ( set +x ;  [[ "$(os_id)" == 'ubuntu' ]] ; )
+function is_ubuntu18() ( set +x ;  is_ubuntu && [[ "$(os_version)" == '18.04'* ]] ; )
+function is_debian()   ( set +x ;  [[ "$(os_id)" == 'debian' ]] ; )
+function is_debuntu()  ( set +x ;  is_debian || is_ubuntu ; )
 
 function print_metadata_value() {
   local readonly tmpfile=$(mktemp)
@@ -72,17 +73,6 @@ function get_metadata_attribute() (
 function is_cuda12() { [[ "${CUDA_VERSION%%.*}" == "12" ]] ; }
 function is_cuda11() { [[ "${CUDA_VERSION%%.*}" == "11" ]] ; }
 
-function execute_with_retries() {
-  local -r cmd="$*"
-  for i in {0..9} ; do
-    if eval "$cmd"; then
-      return 0 ; fi
-    sleep 5
-  done
-  echo "Cmd '${cmd}' failed."
-  return 1
-}
-
 function configure_dask_yarn() {
   readonly DASK_YARN_CONFIG_DIR=/etc/dask/
   readonly DASK_YARN_CONFIG_FILE=${DASK_YARN_CONFIG_DIR}/config.yaml
@@ -98,7 +88,7 @@ function configure_dask_yarn() {
 # https://yarn.dask.org/en/latest/configuration.html#default-configuration
 
 yarn:
-  environment: python://${DASK_CONDA_ENV}/bin/python
+  environment: python://${RAPIDS_CONDA_ENV}/bin/python
 
   worker:
     count: 2
@@ -120,7 +110,7 @@ function install_systemd_dask_worker() {
 LOGFILE="/var/log/${DASK_WORKER_SERVICE}.log"
 nvidia-smi -c DEFAULT
 echo "dask-cuda-worker starting, logging to \${LOGFILE}"
-${DASK_CONDA_ENV}/bin/dask-cuda-worker "${MASTER}:8786" --local-directory="${dask_worker_local_dir}" --memory-limit=auto >> "\${LOGFILE}" 2>&1
+${RAPIDS_CONDA_ENV}/bin/dask-cuda-worker "${MASTER}:8786" --local-directory="${dask_worker_local_dir}" --memory-limit=auto >> "\${LOGFILE}" 2>&1
 EOF
 
   chmod 750 "${DASK_WORKER_LAUNCHER}"
@@ -172,7 +162,7 @@ function install_systemd_dask_scheduler() {
 #!/bin/bash
 LOGFILE="/var/log/${DASK_SCHEDULER_SERVICE}.log"
 echo "dask scheduler starting, logging to \${LOGFILE}"
-${DASK_CONDA_ENV}/bin/dask scheduler >> "\${LOGFILE}" 2>&1
+${RAPIDS_CONDA_ENV}/bin/dask scheduler >> "\${LOGFILE}" 2>&1
 EOF
 
   chmod 750 "${DASK_SCHEDULER_LAUNCHER}"
@@ -419,16 +409,24 @@ EOF
 }
 
 function install_dask_rapids() {
+#To enable CUDA support, UCX requires the CUDA Runtime library (libcudart).
+#The library can be installed with the appropriate command below:
+
+#* For CUDA 11, run:    conda install cudatoolkit cuda-version=11
+#* For CUDA 12, run:    conda install cuda-cudart cuda-version=12
+
   if is_cuda12 ; then
     local python_spec="python>=3.11"
     local cuda_spec="cuda-version>=12,<13"
-    local dask_spec="dask>=2024.7"
+    local dask_spec="dask"
     local numba_spec="numba"
+    local cudart_spec="cuda-cudart"
   elif is_cuda11 ; then
     local python_spec="python>=3.9"
     local cuda_spec="cuda-version>=11,<12.0a0"
     local dask_spec="dask"
     local numba_spec="numba"
+    local cudart_spec="cudatoolkit"
   fi
 
   rapids_spec="rapids>=${RAPIDS_VERSION}"
@@ -451,36 +449,59 @@ function install_dask_rapids() {
 
   CONDA_PACKAGES+=(
     "${cuda_spec}"
+    "${cudart_spec}"
     "${rapids_spec}"
     "${dask_spec}"
-    "dask-bigquery"
-    "dask-ml"
-    "dask-sql"
     "cudf"
     "${numba_spec}"
   )
 
   # Install cuda, rapids, dask
-  mamba="/opt/conda/miniconda3/bin/mamba"
-  conda="/opt/conda/miniconda3/bin/conda"
+  mamba="${CONDA_ROOT}/bin/mamba"
+  conda="${CONDA_ROOT}/bin/conda"
+
+  readonly DASK_CONDA_ENV="${CONDA_ROOT}/envs/${RAPIDS_ENV_NAME}"  
+  if test -d "${DASK_CONDA_ENV}" ; then
+    "${conda}" remove -n "${RAPIDS_ENV_NAME}" --all > /dev/null 2>&1 || rm -rf "${DASK_CONDA_ENV}"
+  fi
+  # Unpin conda version and upgrade
+#  perl -ni -e 'print unless /^conda /' "${CONDA_ROOT}/conda-meta/pinned"
+#  "${mamba}" install conda mamba libmamba libmambapy conda-libmamba-solver
 
-  "${conda}" remove -n dask --all || echo "unable to remove conda environment [dask]"
+  # This error occurs when we set channel_alias
+#  util_files_to_patch="$(find "${CONDA_ROOT}" -name utils.py | grep mamba/utils.py)"
+#  perl -pi -e 's[raise ValueError\("missing key][print("missing key]' ${util_files_to_patch}
+#  File "/home/zhyue/mambaforge/lib/python3.9/site-packages/mamba/utils.py", line 393, in compute_final_precs
+#  raise ValueError("missing key {} in channels: {}".format(key, lookup_dict))
+
+  CONDA_EXE="${CONDA_ROOT}/bin/conda"
+  CONDA_PYTHON_EXE="${CONDA_ROOT}/bin/python"
+  PATH="${CONDA_ROOT}/bin/condabin:${CONDA_ROOT}/bin:${PATH}"
 
   ( set +e
   local is_installed="0"
   for installer in "${mamba}" "${conda}" ; do
-    test -d "${DASK_CONDA_ENV}" || \
-      time "${installer}" "create" -m -n 'dask-rapids' -y --no-channel-priority \
+    echo "${installer}" "create" -q -m -n "${RAPIDS_ENV_NAME}" -y --no-channel-priority \
+      -c 'conda-forge' -c 'nvidia' -c 'rapidsai'  \
+      ${CONDA_PACKAGES[*]} \
+      "${python_spec}"
+#    read placeholder
+    # for debugging, consider -vvv
+    time "${installer}" "create" -q -m -n "${RAPIDS_ENV_NAME}" -y --no-channel-priority \
       -c 'conda-forge' -c 'nvidia' -c 'rapidsai'  \
       ${CONDA_PACKAGES[*]} \
       "${python_spec}" \
-      > "${install_log}" 2>&1 && retval=$? || { retval=$? ; cat "${install_log}" ; }
+      && retval=$? || retval=$?
     sync
     if [[ "$retval" == "0" ]] ; then
       is_installed="1"
       break
+    else
+      test -d "${RAPIDS_CONDA_ENV}" && ( "${conda}" remove -n "${RAPIDS_ENV_NAME}" --all > /dev/null 2>&1 || rm -rf "${RAPIDS_CONDA_ENV}" )
+      "${conda}" config --set channel_priority flexible
+      df -h
+      clean_conda_cache
     fi
-    "${conda}" config --set channel_priority flexible
   done
   if [[ "${is_installed}" == "0" ]]; then
     echo "failed to install dask"
@@ -533,26 +554,38 @@ function main() {
   fi
 }
 
-function exit_handler() (
+function clean_conda_cache() {
+  if ! grep -q "${rapids_mirror_mountpoint}" /proc/mounts ; then
+    "${CONDA}" clean -a
+  fi
+}
+
+function exit_handler() {
   set +e
+  set -x
   echo "Exit handler invoked"
 
-  # Free conda cache
-  /opt/conda/miniconda3/bin/conda clean -a > /dev/null 2>&1
+  unmount_rapids_mirror
 
-  # Clear pip cache
-  pip cache purge || echo "unable to purge pip cache"
+  mv ~/.condarc.default ~/.condarc
+  mv /root/.config/pip/pip.conf.default /root/.config/pip/pip.conf
 
-  # remove the tmpfs conda pkgs_dirs
-  if [[ -d /mnt/shm ]] ; then /opt/conda/miniconda3/bin/conda config --remove pkgs_dirs /mnt/shm ; fi
+  # If system memory was sufficient to mount memory-backed filesystems
+  if [[ "${tmpdir}" == "/mnt/shm" ]] ; then
+    echo "cleaning up tmpfs mounts"
 
-  # Clean up shared memory mounts
-  for shmdir in /var/cache/apt/archives /var/cache/dnf /mnt/shm ; do
-    if grep -q "^tmpfs ${shmdir}" /proc/mounts ; then
-      rm -rf ${shmdir}/*
-      umount -f ${shmdir}
-    fi
-  done
+    # Clean up shared memory mounts
+    for shmdir in /var/cache/apt/archives /var/cache/dnf /mnt/shm /tmp ; do
+      if grep -q "^tmpfs ${shmdir}" /proc/mounts ; then
+        sync
+        umount -f ${shmdir}
+      fi
+    done
+  else
+    clean_conda_cache
+    # Clear pip cache from non-tmpfs
+    pip cache purge || echo "unable to purge pip cache"
+  fi
 
   # Clean up OS package cache ; re-hold systemd package
   if is_debuntu ; then
@@ -562,36 +595,129 @@ function exit_handler() (
     dnf clean all
   fi
 
-  # print disk usage statistics
-  if is_debuntu ; then
-    # Rocky doesn't have sort -h and fails when the argument is passed
-    du --max-depth 3 -hx / | sort -h | tail -10
+  # print disk usage statistics for large components
+  if is_ubuntu ; then
+    du -hs \
+      /usr/lib/{pig,hive,hadoop,jvm,spark,google-cloud-sdk,x86_64-linux-gnu} \
+      /usr/lib \
+      /opt/nvidia/* \
+      /usr/local/cuda-1?.? \
+      ${CONDA_ROOT}
+  elif is_debian ; then
+    du -hs \
+      /usr/lib/{pig,hive,hadoop,jvm,spark,google-cloud-sdk,x86_64-linux-gnu} \
+      /usr/lib \
+      /usr/local/cuda-1?.? \
+      ${CONDA_ROOT}
+  else
+    du -hs \
+      /var/lib/docker \
+      /usr/lib/{pig,hive,hadoop,firmware,jvm,spark,atlas} \
+      /usr/lib64/google-cloud-sdk \
+      /usr/lib \
+      /opt/nvidia/* \
+      /usr/local/cuda-1?.? \
+      ${CONDA_ROOT}
   fi
 
   # Process disk usage logs from installation period
-  rm -f "${tmpdir}/keep-running-df"
-  sleep 6s
+  rm -f /run/keep-running-df
+  sync
+  sleep 5.01s
   # compute maximum size of disk during installation
   # Log file contains logs like the following (minus the preceeding #):
-#Filesystem      Size  Used Avail Use% Mounted on
-#/dev/vda2       6.8G  2.5G  4.0G  39% /
-  df -h / | tee -a "${tmpdir}/disk-usage.log"
-  perl -e '$max=( sort
+#Filesystem     1K-blocks    Used Available Use% Mounted on
+#/dev/vda2        7096908 2611344   4182932  39% /
+  set +x
+  df / | tee -a "/run/disk-usage.log"
+  perl -e '@siz=( sort { $a => $b }
                    map { (split)[2] =~ /^(\d+)/ }
-                  grep { m:^/: } <STDIN> )[-1];
-print( "maximum-disk-used: $max", $/ );' < "${tmpdir}/disk-usage.log"
-
+                  grep { m:^/: } <STDIN> );
+$max=$siz[0]; $min=$siz[-1]; $inc=$max-$min;
+print( "    samples-taken: ", scalar @siz, $/,
+       "maximum-disk-used: $max", $/,
+       "minimum-disk-used: $min", $/,
+       "     increased-by: $inc", $/ )' < "/run/disk-usage.log"
+  set -x
   echo "exit_handler has completed"
 
   # zero free disk space
   if [[ -n "$(get_metadata_attribute creating-image)" ]]; then
-    dd if=/dev/zero of=/zero ; sync ; rm -f /zero
+    eval "dd if=/dev/zero of=/zero"
+    sync
+    sleep 3s
+    rm -f /zero
   fi
 
   return 0
-)
+}
+
+function unmount_rapids_mirror() {
+  if ! grep -q "${rapids_mirror_mountpoint}" /proc/mounts ; then return ; fi
+
+  umount "${rapids_mirror_mountpoint}"
+  umount "${rapids_mirror_mountpoint}_ro"
+  gcloud compute instances detach-disk "$(hostname -s)" \
+    --device-name "${RAPIDS_MIRROR_DISK_NAME}" \
+    --zone       "${ZONE}" \
+    --disk-scope regional
+}
+
+function mount_rapids_mirror() {
+  # use a regional mirror instead of fetching from cloudflare CDN
+  export RAPIDS_MIRROR_DISK_NAME="$(gcloud compute disks list | awk "/${RAPIDS_MIRROR_DISK}-/ {print \$1}" | sort | tail -1)"
+  export RAPIDS_DISK_FQN="projects/${PROJECT_ID}/regions/${REGION}/disks/${RAPIDS_MIRROR_DISK_NAME}"
+
+  if [[ -z "${RAPIDS_MIRROR_DISK_NAME}" ]]; then return ; fi
+
+  # If the service account can describe the disk, attempt to attach and mount it
+  eval gcloud compute disks describe "${RAPIDS_MIRROR_DISK_NAME}" --region "${REGION}" > /tmp/mirror-disk.txt
+  if [[ "$?" != "0" ]] ; then return ; fi
+  
+  if ! grep -q "${rapids_mirror_mountpoint}" /proc/mounts ; then 
+    gcloud compute instances attach-disk "$(hostname -s)" \
+      --disk        "${RAPIDS_DISK_FQN}" \
+      --device-name "${RAPIDS_MIRROR_DISK_NAME}" \
+      --disk-scope  "regional" \
+      --zone        "${ZONE}" \
+      --mode=ro
+
+    mkdir -p "${rapids_mirror_mountpoint}" "${rapids_mirror_mountpoint}_ro" "${tmpdir}/overlay" "${tmpdir}/workdir"
+    mount -o ro "/dev/disk/by-id/google-${RAPIDS_MIRROR_DISK_NAME}" "${rapids_mirror_mountpoint}_ro"
+    mount -t overlay overlay -o lowerdir="${rapids_mirror_mountpoint}_ro",upperdir="${tmpdir}/overlay",workdir="${tmpdir}/workdir" "${rapids_mirror_mountpoint}"
+  fi
+  ${CONDA} config --add pkgs_dirs "${rapids_mirror_mountpoint}/conda_cache"
+#  echo "${CONDA}" config --set channel_alias "file://${rapids_mirror_mountpoint}/conda.anaconda.org"
+#  for channel in 'rapidsai' 'nvidia' 'pkgs/main' 'pkgs/r' 'conda-forge' ; do
+#    echo "${CONDA}" config --set \
+#      "custom_channels.${channel}" "file://${rapids_mirror_mountpoint}/conda.anaconda.org/"
+#  done
+  # patch conda to install from mirror
+#  files_to_patch=$(find ${CONDA_ROOT}/ -name 'download.py' | grep conda/gateways/connection)
+#  perl -i -pe 's{if "://" not in self.url:}{if "file://" in self.url or "://" not in self.url:}' \
+#    ${files_to_patch}
+#  perl -i -pe 's{self.url = url$}{self.url = url.replace("file://","")}' \
+#    ${files_to_patch}
+
+#  time for d in dask main nvidia r rapidsai conda-forge ; do
+#    find "${rapids_mirror_mountpoint}/conda.anaconda.org/${d}" -name '*.conda' -o -name '*.tar.bz2' -print0 | \
+#      xargs -0 ln -sf -t "${pkgs_dir}"
+#  done
+
+  # Point to the cache built with the mirror
+#  for channel in 'rapidsai' 'nvidia' 'main' 'r' 'conda-forge' ; do
+#    for plat in noarch linux-64 ; do
+#      echo ${CONDA} config --add pkgs_dirs "/srv/mirror/conda.anaconda.org/${channel}/${plat}"
+#    done
+#  done
+
+#  for channel in pkgs/main pkgs/r ; do
+#    echo ${CONDA} config --add default_channels "file://${rapids_mirror_mountpoint}/conda.anaconda.org/${channel}"
+#  done
+
+}
 
-function prepare_to_install(){
+function prepare_to_install() {
   readonly DEFAULT_CUDA_VERSION="12.4"
   CUDA_VERSION=$(get_metadata_attribute 'cuda-version' ${DEFAULT_CUDA_VERSION})
   readonly CUDA_VERSION
@@ -599,12 +725,8 @@ function prepare_to_install(){
   readonly ROLE=$(get_metadata_attribute dataproc-role)
   readonly MASTER=$(get_metadata_attribute dataproc-master)
 
-  # RAPIDS config
-  RAPIDS_RUNTIME=$(get_metadata_attribute 'rapids-runtime' 'DASK')
-  readonly RAPIDS_RUNTIME
-
-  readonly DEFAULT_DASK_RAPIDS_VERSION="24.08"
-  readonly RAPIDS_VERSION=$(get_metadata_attribute 'rapids-version' ${DEFAULT_DASK_RAPIDS_VERSION})
+  export CONDA_ROOT=/opt/conda/miniconda3
+  export CONDA="${CONDA_ROOT}/bin/conda"
 
   # Dask config
   DASK_RUNTIME="$(get_metadata_attribute dask-runtime || echo 'standalone')"
@@ -612,7 +734,8 @@ function prepare_to_install(){
   readonly DASK_SERVICE=dask-cluster
   readonly DASK_WORKER_SERVICE=dask-worker
   readonly DASK_SCHEDULER_SERVICE=dask-scheduler
-  readonly DASK_CONDA_ENV="/opt/conda/miniconda3/envs/dask-rapids"
+  readonly RAPIDS_ENV_NAME="dask-rapids"
+  readonly RAPIDS_CONDA_ENV="${CONDA_ROOT}/envs/${RAPIDS_ENV_NAME}"
 
   # Knox config
   readonly KNOX_HOME=/usr/lib/knox
@@ -620,42 +743,84 @@ function prepare_to_install(){
   readonly KNOX_DASKWS_DIR="${KNOX_HOME}/data/services/daskws/0.1.0"
   enable_worker_service="0"
 
+  # RAPIDS config
+  RAPIDS_RUNTIME=$(get_metadata_attribute 'rapids-runtime' 'DASK')
+  readonly RAPIDS_RUNTIME
+
+  readonly DEFAULT_DASK_RAPIDS_VERSION="23.11"
+  readonly RAPIDS_VERSION=$(get_metadata_attribute 'rapids-version' ${DEFAULT_DASK_RAPIDS_VERSION})
+
+  readonly PROJECT_ID="$(gcloud config get project)"
+  zone="$(/usr/share/google/get_metadata_value zone)"
+  export ZONE="$(echo $zone | sed -e 's:.*/::')"
+  export REGION="$(echo ${ZONE} | perl -pe 's/^(.+)-[^-]+$/$1/')"
+
+  export RAPIDS_MIRROR_DISK="$(get_metadata_attribute 'rapids-mirror-disk' '')"
+  export RAPIDS_MIRROR_HOST="$(get_metadata_attribute 'rapids-mirror-host' '')"
+
+  rapids_mirror_mountpoint=/srv/mirror
+
   free_mem="$(awk '/^MemFree/ {print $2}' /proc/meminfo)"
+  # With a local conda mirror mounted, use reduced ram disk size
+  if [[ -n "${RAPIDS_MIRROR_DISK}" ]] ; then
+    min_mem=18500000
+    pkgs_dir=
+  else
+    min_mem=33300000
+  fi
   # Write to a ramdisk instead of churning the persistent disk
-  if [[ ${free_mem} -ge 5250000 ]]; then
+  if [[ ${free_mem} -ge ${min_mem} ]]; then
     tmpdir=/mnt/shm
-    mkdir -p /mnt/shm
-    mount -t tmpfs tmpfs /mnt/shm
-
-    # Download conda packages to tmpfs
-    /opt/conda/miniconda3/bin/conda config --add pkgs_dirs /mnt/shm
-    mount -t tmpfs tmpfs /mnt/shm
+    mkdir -p "${tmpdir}"
+    mount -t tmpfs tmpfs "${tmpdir}"
 
-    # Download pip packages to tmpfs
-    pip config set global.cache-dir /mnt/shm || echo "unable to set global.cache-dir"
-
-    # Download OS packages to tmpfs
-    if is_debuntu ; then
-      mount -t tmpfs tmpfs /var/cache/apt/archives
-    else
-      mount -t tmpfs tmpfs /var/cache/dnf
-    fi
+    # Minimum of 11G of capacity required for rapids package install via conda
+    # + 5G without rapids mirror mounted
+    mount -t tmpfs tmpfs "${tmpdir}"
   else
     tmpdir=/tmp
   fi
+
   install_log="${tmpdir}/install.log"
   trap exit_handler EXIT
 
+  touch ~/.condarc
+  cp ~/.condarc ~/.condarc.default
+
+  #"${CONDA}" config --set verbosity 3
+  # Clean conda cache
+  clean_conda_cache
+
+  mount_rapids_mirror
+
+  if [[ -n "${RAPIDS_MIRROR_HOST}" ]] && nc -vz "${RAPIDS_MIRROR_HOST}" 80 > /dev/null 2>&1 ; then
+    for channel in 'conda-forge' 'rapidsai' 'nvidia' 'pkgs/r' 'pkgs/main' ; do
+      echo "${CONDA}" config --set \
+        "custom_channels.${channel}" "http://${RAPIDS_MIRROR_HOST}/conda.anaconda.org/"
+    done
+  fi
+
+  if grep -q "${rapids_mirror_mountpoint}" /proc/mounts ; then
+    # if we are using the mirror disk, install exclusively from its cache
+    extra_conda_args="--offline"
+  else
+    pkgs_dir="${tmpdir}/pkgs_dir"
+    mkdir -p "${pkgs_dir}"
+    "${CONDA}" config --add pkgs_dirs "${pkgs_dir}"
+  fi
+
   # Monitor disk usage in a screen session
   if is_debuntu ; then
+    command -v screen || \
       apt-get install -y -qq screen
   else
+    command -v screen || \
       dnf -y -q install screen
   fi
-  df -h / | tee "${tmpdir}/disk-usage.log"
-  touch "${tmpdir}/keep-running-df"
+  df / > "/run/disk-usage.log"
+  touch "/run/keep-running-df"
   screen -d -m -US keep-running-df \
-    bash -c "while [[ -f ${tmpdir}/keep-running-df ]] ; do df -h / | tee -a ${tmpdir}/disk-usage.log ; sleep 5s ; done"
+    bash -c "while [[ -f /run/keep-running-df ]] ; do df / | tee -a /run/disk-usage.log ; sleep 5s ; done"
 }
 
 prepare_to_install