MPS Support in the Kubernetes GPU Device Plugin

Install Drivers

distribution=$(. /etc/os-release;echo $ID$VERSION_ID)
curl -s -L https://nvidia.github.io/libnvidia-container/gpgkey | sudo apt-key add -
curl -s -L https://nvidia.github.io/libnvidia-container/$distribution/libnvidia-container.list | sudo tee /etc/apt/sources.list.d/libnvidia-container.list

sudo apt-get update && sudo apt-get install -y nvidia-container-toolkit

sudo add-apt-repository ppa:graphics-drivers/ppa && sudo apt-get update

lspci | grep -i nvidia
ubuntu-drivers devices
ubuntu-drivers autoinstall
nvidia-smi

Uninstall K3s

/usr/local/bin/k3s-uninstall.sh
/usr/local/bin/k3s-agent-uninstall.sh

Add a RuntimeClass:

cat <<EOF | kubectl apply -f -
apiVersion: node.k8s.io/v1
kind: RuntimeClass
metadata:
  name: nvidia
handler: nvidia
EOF

Enabling GPU Support in K3s

# kubectl create -f https://raw.githubusercontent.com/NVIDIA/k8s-device-plugin/v0.16.1/nvidia-device-plugin.yml

cat <<EOF | kubectl apply -f -
apiVersion: apps/v1
kind: DaemonSet
metadata:
  name: nvidia-device-plugin-daemonset
  namespace: kube-system
spec:
  selector:
    matchLabels:
      name: nvidia-device-plugin-ds
  updateStrategy:
    type: RollingUpdate
  template:
    metadata:
      labels:
        name: nvidia-device-plugin-ds
    spec:
      runtimeClassName: nvidia # Added 😈
      tolerations:
      - key: nvidia.com/gpu
        operator: Exists
        effect: NoSchedule
      priorityClassName: "system-node-critical"
      containers:
      - image: nvcr.io/nvidia/k8s-device-plugin:v0.12.2
        name: nvidia-device-plugin-ctr
        env:
          - name: FAIL_ON_INIT_ERROR
            value: "false"
        securityContext:
          allowPrivilegeEscalation: false
          capabilities:
            drop: ["ALL"]
        volumeMounts:
          - name: device-plugin
            mountPath: /var/lib/kubelet/device-plugins
      volumes:
        - name: device-plugin
          hostPath:
            path: /var/lib/kubelet/device-plugins
EOF

TEST

cat <<EOF | kubectl apply -f -
apiVersion: apps/v1
kind: Deployment
metadata:
  name: nvidia-deployment
  namespace: default
spec:
  replicas: 1
  selector:
    matchLabels:
      app: nvidia
  template:
    metadata:
      labels:
        app: nvidia
    spec:
      runtimeClassName: nvidia
      restartPolicy: Always
      containers:
        - name: nvidia
          image: "nvidia/cuda:12.6.1-base-ubuntu22.04"
          command: [ "/bin/bash", "-c", "--" ]
          args: [ "while true; do sleep 30; done;" ]
          resources:
            limits:
              nvidia.com/gpu: 1
EOF

Rancher

curl https://raw.githubusercontent.com/helm/helm/master/scripts/get-helm-3 | bash
helm version

# If you have installed the CRDs manually instead of with the `--set installCRDs=true` option added to your Helm install command, you should upgrade your CRD resources before upgrading the Helm chart:
kubectl apply -f https://github.com/cert-manager/cert-manager/releases/download/v1.7.1/cert-manager.crds.yaml

# Add the Jetstack Helm repository
helm repo add jetstack https://charts.jetstack.io

# Update your local Helm chart repository cache
helm repo update

# Install the cert-manager Helm chart
helm install cert-manager jetstack/cert-manager \
  --namespace cert-manager \
  --create-namespace \
  --version v1.7.1

helm repo add rancher-stable https://releases.rancher.com/server-charts/stable
helm repo update

echo "export KUBECONFIG=/etc/rancher/k3s/k3s.yaml" >> ~/.bash_profile
source .bash_profile

kubectl create namespace cattle-system

helm install rancher rancher-stable/rancher \
  --namespace cattle-system \
  --set hostname=rancher.x.run \
  --set bootstrapPassword=admin

Other

k3s kubectl get pods -A
kubectl describe node

Name		Name	Last commit message	Last commit date
Latest commit History 11 Commits
README.md		README.md

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Repository files navigation

MPS Support in the Kubernetes GPU Device Plugin

Install Drivers

Uninstall K3s

Add a RuntimeClass:

Enabling GPU Support in K3s

TEST

Rancher

Other

About

Releases

Packages

webees/K3S_NVIDIA_MPS

Folders and files

Latest commit

History

Repository files navigation

MPS Support in the Kubernetes GPU Device Plugin

Install Drivers

Uninstall K3s

Add a RuntimeClass:

Enabling GPU Support in K3s

TEST

Rancher

Other

About

Resources

Stars

Watchers

Forks

Releases

Packages 0

Packages