diff --git a/cloud-service-providers/google-cloud/gke/README.md b/cloud-service-providers/google-cloud/gke/README.md index b70f655..07e5292 100644 --- a/cloud-service-providers/google-cloud/gke/README.md +++ b/cloud-service-providers/google-cloud/gke/README.md @@ -106,10 +106,12 @@ cd nim-deploy/cloud-service-providers/google-cloud/gke | Variable | Description | Default | Need update? | |---|---|---|---| - | `registry_server` | NVIDIA Registry that hosts the images | `nvcr.io` | *No* | + | `ngc_transfer_repository` | NVIDIA Registry that hosts the images | `nvcr.io` | *No* | | `ngc_api_key` | NGC API Key from NVIDIA | <> | *Yes* | - | `repository` | NIM image | `nvcr.io/nim/meta/llama3-8b-instruct` | *No* | - | `tag` | Tag of image | `1.0.0` | *No* | + | `ngc_nim_repository` | NIM image | `nvcr.io/nim/meta/llama3-8b-instruct` | *No* | + | `ngc_nim_tag` | Tag of NIM image | `1.0.0` | *No* | + | `ngc_transfer_repository` | NGC transfer image | `nvcr.io/nim/meta/llama3-8b-instruct` | *No* | + | `ngc_transfer_tag` | Tag of NGC transfer image | `1.0.0` | *No* | | `model_name` | NIM Model name | `meta/llama3-8b-instruct` | *No* | | `gpu_limits` | GPU Limits | `1` | *No* | diff --git a/cloud-service-providers/google-cloud/gke/infra/3-config/artifacts/Dockerfile b/cloud-service-providers/google-cloud/gke/infra/3-config/artifacts/Dockerfile new file mode 100644 index 0000000..1e661b0 --- /dev/null +++ b/cloud-service-providers/google-cloud/gke/infra/3-config/artifacts/Dockerfile @@ -0,0 +1,3 @@ +FROM debian:bookworm-slim +RUN apt-get update && apt-get install -y aria2 pigz +ENTRYPOINT ["/bin/sh"] diff --git a/cloud-service-providers/google-cloud/gke/infra/3-config/fetch-ngc-url.sh b/cloud-service-providers/google-cloud/gke/infra/3-config/fetch-ngc-url.sh new file mode 100755 index 0000000..683aace --- /dev/null +++ b/cloud-service-providers/google-cloud/gke/infra/3-config/fetch-ngc-url.sh @@ -0,0 +1,14 @@ +#!/bin/sh + +# use --token-format=full for print-identity-token if using GCE VM. +cat < req.cred.json +{ + "bucket": "${NIM_GCS_BUCKET}", + "text": "${NGC_EULA_TEXT}", + "textb64": "$(echo ${NGC_EULA_TEXT} | base64 -w0)", + "jwt": "$(gcloud auth print-identity-token)" +} +EOF + +HTTP_URL="$(curl -s -X POST -H 'accept: application/json' -H 'Content-Type: application/json' -d @req.cred.json "https://${SERVICE_FQDN}/v1/request/${GCS_FILENAME}" | sed 's/.*\(https.*\)\\\\n.*/\1/g')" +echo -n "$HTTP_URL" diff --git a/cloud-service-providers/google-cloud/gke/infra/3-config/helm/ngc-cache-values.yaml b/cloud-service-providers/google-cloud/gke/infra/3-config/helm/ngc-cache-values.yaml index e6b4bfa..36d911f 100644 --- a/cloud-service-providers/google-cloud/gke/infra/3-config/helm/ngc-cache-values.yaml +++ b/cloud-service-providers/google-cloud/gke/infra/3-config/helm/ngc-cache-values.yaml @@ -4,7 +4,7 @@ extraVolumes: driver: gcsfuse.csi.storage.gke.io volumeAttributes: bucketName: "ngc-gcs-cache" - mountOptions: "max-conns-per-host=0" + mountOptions: "implicit-dirs,max-conns-per-host=0" extraVolumeMounts: cache-volume: mountPath: /upload-dir diff --git a/cloud-service-providers/google-cloud/gke/infra/3-config/helm/ngc-cache/files/ngc_pull.sh b/cloud-service-providers/google-cloud/gke/infra/3-config/helm/ngc-cache/files/ngc_pull.sh new file mode 100644 index 0000000..86e82c2 --- /dev/null +++ b/cloud-service-providers/google-cloud/gke/infra/3-config/helm/ngc-cache/files/ngc_pull.sh @@ -0,0 +1,20 @@ +#!/bin/bash + +set -euo pipefail + +if [ -n "$NGC_BUNDLE_URL" ]; then + # Fetch and extract from the provided URL, with max concurrency + mkdir "$NIM_CACHE_PATH/cache" + MODEL_BUNDLE_FILENAME="model.tar" + aria2c -x 16 -s 16 -j 10 --dir "$NIM_CACHE_PATH/cache" --out="$MODEL_BUNDLE_FILENAME" "$NGC_BUNDLE_URL" + #pigz -dc "$NIM_CACHE_PATH/cache/$MODEL_BUNDLE_FILENAME" | tar xf "$NIM_CACHE_PATH/cache/$MODEL_BUNDLE_FILENAME" -C "$NIM_CACHE_PATH/cache" + tar xf "$NIM_CACHE_PATH/cache/$MODEL_BUNDLE_FILENAME" -C "$NIM_CACHE_PATH/cache" + #tar -xzf "$NIM_CACHE_PATH/cache/model.tar.gz" -C "$NIM_CACHE_PATH/cache" + rm "$NIM_CACHE_PATH/cache/$MODEL_BUNDLE_FILENAME" +else + # Fetch directly from NGC to $NIM_CACHE_PATH + download-to-cache +fi + +find $NIM_CACHE_PATH/cache -type d -printf '%P\n' | xargs -P 100 -I {} mkdir -p /upload-dir/{} +find $NIM_CACHE_PATH/cache -type f,l -printf '%P\n' | xargs -P 100 -I {} cp --no-dereference $NIM_CACHE_PATH/cache/{} /upload-dir/{} diff --git a/cloud-service-providers/google-cloud/gke/infra/3-config/helm/ngc-cache/templates/configmap.yaml b/cloud-service-providers/google-cloud/gke/infra/3-config/helm/ngc-cache/templates/configmap.yaml new file mode 100644 index 0000000..744f03d --- /dev/null +++ b/cloud-service-providers/google-cloud/gke/infra/3-config/helm/ngc-cache/templates/configmap.yaml @@ -0,0 +1,11 @@ +--- +apiVersion: v1 +kind: ConfigMap +metadata: + name: {{ .Release.Name }}-scripts-configmap + labels: + {{- include "nim-llm.labels" . | nindent 4 }} +data: + ngc_pull.sh: |- +{{ .Files.Get "files/ngc_pull.sh" | indent 4 }} + diff --git a/cloud-service-providers/google-cloud/gke/infra/3-config/helm/ngc-cache/templates/job.yaml b/cloud-service-providers/google-cloud/gke/infra/3-config/helm/ngc-cache/templates/job.yaml index a111cd1..00f4dcf 100644 --- a/cloud-service-providers/google-cloud/gke/infra/3-config/helm/ngc-cache/templates/job.yaml +++ b/cloud-service-providers/google-cloud/gke/infra/3-config/helm/ngc-cache/templates/job.yaml @@ -34,18 +34,25 @@ spec: {{- toYaml .Values.containerSecurityContext | nindent 12 }} image: "{{ .Values.image.repository }}:{{ .Values.image.tag | default .Chart.AppVersion }}" imagePullPolicy: {{ .Values.image.pullPolicy }} - command: - - /bin/bash - - -c - - "download-to-cache && find $NIM_CACHE_PATH -type d -printf '%P\\n' | xargs -P 100 -I {} mkdir -p /upload-dir/{} && find $NIM_CACHE_PATH -type f,l -printf '%P\\n' | xargs -P 100 -I {} cp --no-dereference $NIM_CACHE_PATH/{} /upload-dir/{}" + command: ["/bin/sh", "-c"] + args: ["/scripts/ngc_pull.sh"] env: - name: NIM_CACHE_PATH value: {{ .Values.model.nimCache | quote }} + {{- if .Values.model.ngcAPISecret }} - name: NGC_API_KEY valueFrom: secretKeyRef: name: {{ .Values.model.ngcAPISecret }} key: NGC_API_KEY + {{- end }} + {{- if .Values.model.ngcBundleURLSecret }} + - name: NGC_BUNDLE_URL + valueFrom: + secretKeyRef: + name: {{ .Values.model.ngcBundleURLSecret }} + key: NGC_BUNDLE_URL + {{- end }} resources: {{- toYaml .Values.resources | nindent 12 }} volumeMounts: @@ -56,6 +63,8 @@ spec: {{- else }} mountPath: {{ .Values.model.nimCache }} {{- end }} + - mountPath: /scripts + name: scripts-volume {{- if .Values.extraVolumeMounts }} {{- range $k, $v := .Values.extraVolumeMounts }} - name: {{ $k }} @@ -92,6 +101,10 @@ spec: {{- else }} emptyDir: {} {{- end }} + - name: scripts-volume + configMap: + name: {{ .Release.Name }}-scripts-configmap + defaultMode: 0555 {{- if .Values.extraVolumes }} {{- range $k, $v := .Values.extraVolumes }} - name: {{ $k }} diff --git a/cloud-service-providers/google-cloud/gke/infra/3-config/helm/ngc-cache/templates/pv.yaml b/cloud-service-providers/google-cloud/gke/infra/3-config/helm/ngc-cache/templates/pv.yaml index 62275e4..c678b0b 100644 --- a/cloud-service-providers/google-cloud/gke/infra/3-config/helm/ngc-cache/templates/pv.yaml +++ b/cloud-service-providers/google-cloud/gke/infra/3-config/helm/ngc-cache/templates/pv.yaml @@ -13,7 +13,9 @@ spec: {{- end }} {{- if .Values.persistence.mountOptions }} mountOptions: - - {{ .Values.persistence.mountOptions | quote }} + {{- range .Values.persistence.mountOptions }} + - {{ . | quote }} + {{- end }} {{- end }} {{- if .Values.persistence.csi }} csi: diff --git a/cloud-service-providers/google-cloud/gke/infra/3-config/helm/ngc-cache/values.yaml b/cloud-service-providers/google-cloud/gke/infra/3-config/helm/ngc-cache/values.yaml index 16b16b4..e63138b 100644 --- a/cloud-service-providers/google-cloud/gke/infra/3-config/helm/ngc-cache/values.yaml +++ b/cloud-service-providers/google-cloud/gke/infra/3-config/helm/ngc-cache/values.yaml @@ -108,6 +108,7 @@ model: # most of these values only matter if not using customCommand nimCache: /model-store name: my-model # optionsl name of the model in the OpenAI API -- used in `helm test` ngcAPISecret: ngc-api + ngcBundleURLSecret: ngc-bundle-url openaiPort: 8000 labels: {} # any extra labels desired on deployed pods jsonLogging: true diff --git a/cloud-service-providers/google-cloud/gke/infra/3-config/main.tf b/cloud-service-providers/google-cloud/gke/infra/3-config/main.tf index 14b44e8..7954d63 100644 --- a/cloud-service-providers/google-cloud/gke/infra/3-config/main.tf +++ b/cloud-service-providers/google-cloud/gke/infra/3-config/main.tf @@ -48,13 +48,37 @@ resource "null_resource" "get-credentials" { } +data "local_file" "ngc-eula" { + filename = "${path.module}/NIM_GKE_GCS_SIGNED_URL_EULA" +} + +resource "null_resource" "get-signed-ngc-bundle-url" { + triggers = { + shell_hash = "${sha256(file("${path.module}/fetch-ngc-url.sh"))}" + } + provisioner "local-exec" { + command = "./fetch-ngc-url.sh > ${path.module}/ngc_signed_url.txt" + environment = { + NGC_EULA_TEXT = "${data.local_file.ngc-eula.content}" + NIM_GCS_BUCKET = "${var.ngc_bundle_gcs_bucket}" + GCS_FILENAME = "${var.ngc_bundle_filename}" + SERVICE_FQDN = "nim-gke-gcs-signed-url-722708171432.us-central1.run.app" + } + } +} + +data "local_file" "ngc-bundle-url" { + filename = "${path.module}/ngc_signed_url.txt" + depends_on = [null_resource.get-signed-ngc-bundle-url] +} + resource "kubernetes_namespace" "nim" { metadata { name = "nim" } } -resource "kubernetes_secret" "registry_secret" { +resource "kubernetes_secret" "ngc_registry_secret" { metadata { name = "registry-secret" namespace = "nim" @@ -65,7 +89,7 @@ resource "kubernetes_secret" "registry_secret" { data = { ".dockerconfigjson" = jsonencode({ "auths" = { - "${var.registry_server}" = { + "${var.ngc_registry_server}" = { "username" = var.ngc_username "password" = var.ngc_api_key "auth" = base64encode("${var.ngc_username}:${var.ngc_api_key}") @@ -90,7 +114,21 @@ resource "kubernetes_secret" "ngc_api" { } depends_on = [kubernetes_namespace.nim] +} + +resource "kubernetes_secret" "ngc_bundle_url" { + metadata { + name = "ngc-bundle-url" + namespace = "nim" + } + + type = "Opaque" # Generic secret type + + data = { + "NGC_BUNDLE_URL" = "${data.local_file.ngc-bundle-url.content}" + } + depends_on = [kubernetes_namespace.nim] } resource "kubernetes_service_account" "ngc_gcs_ksa" { @@ -101,9 +139,12 @@ resource "kubernetes_service_account" "ngc_gcs_ksa" { depends_on = [kubernetes_namespace.nim] } +resource "random_uuid" "gcs_cache_uuid" { +} + resource "google_storage_bucket" "ngc_gcs_cache" { project = data.google_project.current.name - name = "${data.google_project.current.name}-ngc-gcs-cache" + name = "ngc-gcs-cache-${random_uuid.gcs_cache_uuid.result}" location = "US" force_destroy = true @@ -143,12 +184,12 @@ resource "helm_release" "ngc_to_gcs_transfer" { set { name = "image.repository" - value = var.repository + value = var.ngc_transfer_repository } set { name = "image.tag" - value = var.tag + value = var.ngc_transfer_tag } set { @@ -166,9 +207,9 @@ resource "helm_release" "ngc_to_gcs_transfer" { value = var.gpu_limits } - depends_on = [kubernetes_secret.ngc_api, google_storage_bucket_iam_binding.ngc_gcs_ksa_binding] + depends_on = [kubernetes_secret.ngc_api, kubernetes_secret.ngc_bundle_url, google_storage_bucket_iam_binding.ngc_gcs_ksa_binding] - timeout = 900 + timeout = 3600 wait = true } @@ -184,17 +225,17 @@ resource "helm_release" "my_nim" { set { name = "csi.volumeAttributes.bucketName" - value = google_storage_bucket.ngc_gcs_cache.name + value = "ngc-gcs-cache-5f0f6937-fad0-1df7-025e-a912ebf61647" } set { name = "image.repository" - value = var.repository + value = var.ngc_nim_repository } set { name = "image.tag" - value = var.tag + value = var.ngc_nim_tag } set { diff --git a/cloud-service-providers/google-cloud/gke/infra/3-config/variables.tf b/cloud-service-providers/google-cloud/gke/infra/3-config/variables.tf index bff1a79..cfc7763 100644 --- a/cloud-service-providers/google-cloud/gke/infra/3-config/variables.tf +++ b/cloud-service-providers/google-cloud/gke/infra/3-config/variables.tf @@ -13,7 +13,7 @@ # limitations under the License. -variable "registry_server" { +variable "ngc_registry_server" { type = string default = "nvcr.io" description = "Registry that hosts the NIM images" @@ -33,16 +33,36 @@ variable "ngc_api_key" { sensitive = true } -variable "repository" { +variable "ngc_transfer_repository" { + type = string + description = "Docker image of NGC transfer container" +} + +variable "ngc_transfer_tag" { + type = string + description = "Docker repository tag of the NGC transfer container" +} + +variable "ngc_nim_repository" { type = string description = "Docker image of NIM container" } -variable "tag" { +variable "ngc_nim_tag" { type = string description = "Docker repository tag of NIM container" } +variable "ngc_bundle_gcs_bucket" { + type = string + description = "GCS bucket containing NGC bucket with NIM profiles" +} + +variable "ngc_bundle_filename" { + type = string + description = "Filename containing NIM profiles from NGC" +} + variable "model_name" { type = string description = "Name of the NIM model" diff --git a/helm/nim-llm/templates/deployment.yaml b/helm/nim-llm/templates/deployment.yaml index 52beadb..88e5e93 100644 --- a/helm/nim-llm/templates/deployment.yaml +++ b/helm/nim-llm/templates/deployment.yaml @@ -170,11 +170,13 @@ spec: env: - name: NIM_CACHE_PATH value: {{ .Values.model.nimCache | quote }} + {{- if .Values.model.ngcAPISecret }} - name: NGC_API_KEY valueFrom: secretKeyRef: name: {{ .Values.model.ngcAPISecret }} key: NGC_API_KEY + {{- end }} - name: OUTLINES_CACHE_DIR value: /tmp/outlines - name: NIM_SERVER_PORT