From 920bfbae4f015ebe52ee621de0710e32e8f8fbc2 Mon Sep 17 00:00:00 2001 From: Andre-Philippe Paquet Date: Thu, 5 Dec 2024 20:51:43 -0500 Subject: [PATCH] dynamic vfio --- gpu-old.sh | 131 ++++++++++++++++++++++++ gpu.sh | 176 +++++++++++--------------------- nixos/deskapp/configuration.nix | 53 ++-------- nixos/deskapp/gpu-switch.nix | 110 ++++++++++++++++++++ nixos/virt-gpu-passthrough.nix | 8 +- 5 files changed, 312 insertions(+), 166 deletions(-) create mode 100755 gpu-old.sh create mode 100644 nixos/deskapp/gpu-switch.nix diff --git a/gpu-old.sh b/gpu-old.sh new file mode 100755 index 0000000..f04d8b4 --- /dev/null +++ b/gpu-old.sh @@ -0,0 +1,131 @@ +#!/bin/bash +set -x + +# TODO: Move to a script written by nix +# TODO: Create a systemd service that switch to nvidia on boot +# TODO: Create a qemu hook to switch to vfio when starting a VM +# TODO: bullet proof the script (check if device already binded, PCI device identifier to bus address) + +# lspci -nn | grep '10de:2216' | awk '{print $1}' +# 01:00.0 + + + +GPU="0000:01:00.0" +GPU_PCI="pci_0000_01_00_0" +AUDIO="0000:01:00.1" +AUDIO_PCI="pci_0000_01_00_1" + +# function unbind() { +# echo "" +# } + +function nvidia() { + echo "switching to nvidia..." + + # virsh nodedev-detach $GPU_PCI + # virsh nodedev-detach $AUDIO_PCI + + echo $GPU >/sys/bus/pci/drivers/vfio-pci/unbind + echo $AUDIO >/sys/bus/pci/drivers/vfio-pci/unbind + + sleep 5 + + modprobe -a nvidia nvidia_modeset nvidia_uvm nvidia_drm + + sleep 5 + + echo $GPU >/sys/bus/pci/drivers/nvidia/bind + echo $AUDIO >/sys/bus/pci/drivers/nvidia/bind + + # modprobe -r vfio_pci vfio_iommu_type1 vfio + # modprobe -a nvidia nvidia_modeset nvidia_uvm nvidia_drm + + # virsh nodedev-reattach $GPU_PCI + # virsh nodedev-reattach $AUDIO_PCI +} + +function vfio() { + echo "switching to fvio..." + + echo $GPU >/sys/bus/pci/drivers/nvidia/unbind + echo $AUDIO >/sys/bus/pci/drivers/nvidia/unbind + + echo $GPU >/sys/bus/pci/drivers/vfio-pci/bind + echo $AUDIO >/sys/bus/pci/drivers/vfio-pci/bind +} + +function attach() { + modprobe -r nvidia + modprobe -r nvidia_modeset + modprobe -r nvidia_uvm + modprobe -r nvidia_drm + + modprobe vfio_pci + modprobe vfio_iommu_type1 + modprobe vfio + + virsh nodedev-reattach $GPU_PCI + virsh nodedev-reattach $AUDIO_PCI + + # Unbind devices from VFIO + # echo $GPU >/sys/bus/pci/drivers/vfio-pci/unbind + # echo $AUDIO >/sys/bus/pci/drivers/vfio-pci/unbind + + # echo $GPU > /sys/bus/pci/devices/0000:01:00.0/driver/unbind + # echo $AUDIO > /sys/bus/pci/devices/0000:01:00.1/driver/unbind + + # 10de:2216 + # 10de:1aef + + # echo "10de 2216" > /sys/bus/pci/drivers/nvidia/new_id + # echo 0000:01:00.0 > /sys/bus/pci/drivers/nvidia/bind + + # echo $GPU >/sys/bus/pci/drivers_probe + # echo $AUDIO >/sys/bus/pci/drivers_probe + + # modprobe -r vfio_pci + # modprobe -r vfio_iommu_type1 + # modprobe -r vfio + + # virsh nodedev-reattach $GPU_PCI + # virsh nodedev-reattach $AUDIO_PCI + + # # Load NVIDIA drivers + # modprobe nvidia + # modprobe nvidia_modeset + # modprobe nvidia_uvm + # modprobe nvidia_drm + +} + +$1 + +# Bind devices to NVIDIA driver +# 01:00.0 VGA compatible controller: NVIDIA Corporation GA102 [GeForce RTX 3080 Lite Hash Rate] (rev a1) (prog-if 00 [VGA controller]) +# Subsystem: Micro-Star International Co., Ltd. [MSI] Device 389b +# Control: I/O+ Mem+ BusMaster+ SpecCycle- MemWINV- VGASnoop- ParErr- Stepping- SERR+ FastB2B- DisINTx- +# Status: Cap+ 66MHz- UDF- FastB2B- ParErr- DEVSEL=fast >TAbort- SERR- +# Kernel driver in use: vfio-pci +# Kernel modules: nvidiafb, nouveau, nvidia_drm, nvidia + +# 01:00.1 Audio device: NVIDIA Corporation GA102 High Definition Audio Controller (rev a1) +# Subsystem: Micro-Star International Co., Ltd. [MSI] Device 389b +# Control: I/O- Mem+ BusMaster+ SpecCycle- MemWINV- VGASnoop- ParErr- Stepping- SERR+ FastB2B- DisINTx- +# Status: Cap+ 66MHz- UDF- FastB2B- ParErr- DEVSEL=fast >TAbort- SERR- +# Kernel driver in use: vfio-pci +# Kernel modules: snd_hda_intel diff --git a/gpu.sh b/gpu.sh index f04d8b4..05d6b68 100755 --- a/gpu.sh +++ b/gpu.sh @@ -1,131 +1,69 @@ -#!/bin/bash -set -x +#!/usr/bin/env bash +set -uo pipefail -# TODO: Move to a script written by nix -# TODO: Create a systemd service that switch to nvidia on boot -# TODO: Create a qemu hook to switch to vfio when starting a VM -# TODO: bullet proof the script (check if device already binded, PCI device identifier to bus address) - -# lspci -nn | grep '10de:2216' | awk '{print $1}' -# 01:00.0 +if [[ $EUID -ne 0 ]]; then + echo "This script must be run as root" 1>&2 + exit 1 +fi +function get_bus() { + # takes a PCI device identifier (ex: 10de:2216) and returns the bus address (ex: 01:00.0) + lspci -nn | grep "$1" | awk '{print $1}' +} +function format_bus() { + # format bus address 01:00.0 to 0000:01:00.0 + echo "0000:${1}" +} -GPU="0000:01:00.0" -GPU_PCI="pci_0000_01_00_0" -AUDIO="0000:01:00.1" -AUDIO_PCI="pci_0000_01_00_1" +function get_bus_driver() { + # takes a bus address (ex: 0000:01:00.0) and returns the driver in use (ex: nvidia, vfio-pci) + echo $(lspci -nn -s $1 -k | grep "Kernel driver in use" | awk '{print $5}') +} -# function unbind() { -# echo "" -# } +function switch_driver() { + to_driver=$1 + + echo "Switching to $to_driver..." + + gpu_bus=$(format_bus $(get_bus "10de:2216")) + audio_bus=$(format_bus $(get_bus "10de:1aef")) + + gpu_driver=$(get_bus_driver $gpu_bus) + if [ "$gpu_driver" == "$to_driver" ]; then + echo "GPU already using $to_driver driver" + exit 0 + fi + + if [ "$gpu_driver" != "" ]; then + echo "Unbinding GPU from $gpu_driver" + echo $gpu_bus >/sys/bus/pci/drivers/$gpu_driver/unbind + echo $audio_bus >/sys/bus/pci/drivers/$gpu_driver/unbind + sleep 2 + fi + + if [ "$to_driver" == "nvidia" ]; then + echo "Loading nvidia drivers..." + modprobe -a nvidia nvidia_modeset nvidia_uvm nvidia_drm + sleep 2 + elif [ "$to_driver" == "vfio-pci" ]; then + echo "Loading vfio drivers..." + modprobe -a vfio_pci vfio vfio_iommu_type1 + fi + + echo "Binding GPU to $to_driver" + echo $gpu_bus >/sys/bus/pci/drivers/$to_driver/bind + echo $audio_bus >/sys/bus/pci/drivers/$to_driver/bind +} function nvidia() { - echo "switching to nvidia..." - - # virsh nodedev-detach $GPU_PCI - # virsh nodedev-detach $AUDIO_PCI - - echo $GPU >/sys/bus/pci/drivers/vfio-pci/unbind - echo $AUDIO >/sys/bus/pci/drivers/vfio-pci/unbind - - sleep 5 - - modprobe -a nvidia nvidia_modeset nvidia_uvm nvidia_drm - - sleep 5 - - echo $GPU >/sys/bus/pci/drivers/nvidia/bind - echo $AUDIO >/sys/bus/pci/drivers/nvidia/bind - - # modprobe -r vfio_pci vfio_iommu_type1 vfio - # modprobe -a nvidia nvidia_modeset nvidia_uvm nvidia_drm - - # virsh nodedev-reattach $GPU_PCI - # virsh nodedev-reattach $AUDIO_PCI + switch_driver "nvidia" } function vfio() { - echo "switching to fvio..." - - echo $GPU >/sys/bus/pci/drivers/nvidia/unbind - echo $AUDIO >/sys/bus/pci/drivers/nvidia/unbind - - echo $GPU >/sys/bus/pci/drivers/vfio-pci/bind - echo $AUDIO >/sys/bus/pci/drivers/vfio-pci/bind + switch_driver "vfio-pci" } -function attach() { - modprobe -r nvidia - modprobe -r nvidia_modeset - modprobe -r nvidia_uvm - modprobe -r nvidia_drm - - modprobe vfio_pci - modprobe vfio_iommu_type1 - modprobe vfio - - virsh nodedev-reattach $GPU_PCI - virsh nodedev-reattach $AUDIO_PCI - - # Unbind devices from VFIO - # echo $GPU >/sys/bus/pci/drivers/vfio-pci/unbind - # echo $AUDIO >/sys/bus/pci/drivers/vfio-pci/unbind - - # echo $GPU > /sys/bus/pci/devices/0000:01:00.0/driver/unbind - # echo $AUDIO > /sys/bus/pci/devices/0000:01:00.1/driver/unbind - - # 10de:2216 - # 10de:1aef - - # echo "10de 2216" > /sys/bus/pci/drivers/nvidia/new_id - # echo 0000:01:00.0 > /sys/bus/pci/drivers/nvidia/bind - - # echo $GPU >/sys/bus/pci/drivers_probe - # echo $AUDIO >/sys/bus/pci/drivers_probe - - # modprobe -r vfio_pci - # modprobe -r vfio_iommu_type1 - # modprobe -r vfio - - # virsh nodedev-reattach $GPU_PCI - # virsh nodedev-reattach $AUDIO_PCI - - # # Load NVIDIA drivers - # modprobe nvidia - # modprobe nvidia_modeset - # modprobe nvidia_uvm - # modprobe nvidia_drm - -} - -$1 - -# Bind devices to NVIDIA driver -# 01:00.0 VGA compatible controller: NVIDIA Corporation GA102 [GeForce RTX 3080 Lite Hash Rate] (rev a1) (prog-if 00 [VGA controller]) -# Subsystem: Micro-Star International Co., Ltd. [MSI] Device 389b -# Control: I/O+ Mem+ BusMaster+ SpecCycle- MemWINV- VGASnoop- ParErr- Stepping- SERR+ FastB2B- DisINTx- -# Status: Cap+ 66MHz- UDF- FastB2B- ParErr- DEVSEL=fast >TAbort- SERR- -# Kernel driver in use: vfio-pci -# Kernel modules: nvidiafb, nouveau, nvidia_drm, nvidia - -# 01:00.1 Audio device: NVIDIA Corporation GA102 High Definition Audio Controller (rev a1) -# Subsystem: Micro-Star International Co., Ltd. [MSI] Device 389b -# Control: I/O- Mem+ BusMaster+ SpecCycle- MemWINV- VGASnoop- ParErr- Stepping- SERR+ FastB2B- DisINTx- -# Status: Cap+ 66MHz- UDF- FastB2B- ParErr- DEVSEL=fast >TAbort- SERR- -# Kernel driver in use: vfio-pci -# Kernel modules: snd_hda_intel +CMD="$1" +shift +$CMD "$@" diff --git a/nixos/deskapp/configuration.nix b/nixos/deskapp/configuration.nix index 5162ef7..4f35029 100644 --- a/nixos/deskapp/configuration.nix +++ b/nixos/deskapp/configuration.nix @@ -1,16 +1,17 @@ -{ pkgs, config, ... }: +{ pkgs, ... }: { imports = [ - ./hardware-configuration.nix + ./gpu-switch.nix ./ha-ctrl.nix - ./virt + ./hardware-configuration.nix ./home-backup.nix + ./virt ./vms-backup.nix ../common.nix - ../network_bridge.nix ../dev.nix ../docker.nix + ../network_bridge.nix ../ups.nix ]; @@ -24,29 +25,6 @@ "pcie_aspm.policy=performance" ]; - - services.xserver.videoDrivers = [ "nvidia" "amdgpu" ]; - - # From https://nixos.wiki/wiki/Nvidia - hardware.nvidia = { - # Hinders with dynamic switching since it manages the card using KMS - # https://forums.developer.nvidia.com/t/unbinding-isolating-a-card-is-difficult-post-470/223134 - modesetting.enable = false; - powerManagement.enable = false; - powerManagement.finegrained = false; # TODO: check if can be enabled (test suspend) - open = false; - nvidiaSettings = false; # no need for settings menu - package = config.boot.kernelPackages.nvidiaPackages.stable; - }; - - # To test: docker run --rm -it --device=nvidia.com/gpu=all ubuntu:latest nvidia-smi - hardware.nvidia-container-toolkit.enable = true; - - environment.systemPackages = with pkgs; [ - nvtopPackages.nvidia - ]; - - networking.hostName = "deskapp"; # Drives (lsblk -f) @@ -78,12 +56,12 @@ services.xserver.enable = true; services.xserver.displayManager.lightdm.enable = true; services.xserver.desktopManager.xfce.enable = true; - services.xserver.displayManager.autoLogin.enable = true; - services.xserver.displayManager.autoLogin.user = "appaquet"; services.xserver = { - layout = "us"; - xkbVariant = ""; + xkb.layout = "us"; + xkb.variant = ""; }; + services.displayManager.autoLogin.enable = true; + services.displayManager.autoLogin.user = "appaquet"; # Enable sound with pipewire. hardware.pulseaudio.enable = false; @@ -93,18 +71,11 @@ alsa.enable = true; alsa.support32Bit = true; pulse.enable = true; - # If you want to use JACK applications, uncomment this - #jack.enable = true; - - # use the example session manager (no others are packaged yet so this is enabled by default, - # no need to redefine it in your config for now) - #media-session.enable = true; }; - # Install firefox + # Programs & services programs.firefox.enable = true; - - # Enable the OpenSSH daemon. + services.printing.enable = false; services.openssh.enable = true; # Open ports in the firewall. @@ -113,8 +84,6 @@ # Or disable the firewall altogether. networking.firewall.enable = false; - services.printing.enable = false; - # This value determines the NixOS release from which the default # settings for stateful data, like file locations and database versions # on your system were taken. It‘s perfectly fine and recommended to leave diff --git a/nixos/deskapp/gpu-switch.nix b/nixos/deskapp/gpu-switch.nix new file mode 100644 index 0000000..5d7a60d --- /dev/null +++ b/nixos/deskapp/gpu-switch.nix @@ -0,0 +1,110 @@ +{ lib, pkgs, config, ... }: + +let + # Keep in sync with ./virt/default.nix + gpuPci = "10de:2216"; + audioPci = "10de:1aef"; + + gpuSwitch = pkgs.writeShellScriptBin "gpu-switch" '' + #!/usr/bin/env bash + set -uo pipefail + + if [[ $EUID -ne 0 ]]; then + echo "This script must be run as root" 1>&2 + exit 1 + fi + + function get_bus() { + # takes a PCI device identifier (ex: 10de:2216) and returns the bus address (ex: 01:00.0) + lspci -nn | grep "$1" | awk '{print $1}' + } + + function format_bus() { + # format bus address 01:00.0 to 0000:01:00.0 + echo "0000:$1" + } + + function get_bus_driver() { + # takes a bus address (ex: 0000:01:00.0) and returns the driver in use (ex: nvidia, vfio-pci) + echo $(lspci -nn -s $1 -k | grep "Kernel driver in use" | awk '{print $5}') + } + + function switch_driver() { + to_driver=$1 + + echo "Switching to $to_driver..." + + gpu_bus=$(format_bus $(get_bus "${gpuPci}")) + audio_bus=$(format_bus $(get_bus "${audioPci}")) + + gpu_driver=$(get_bus_driver $gpu_bus) + if [ "$gpu_driver" == "$to_driver" ]; then + echo "GPU already using $to_driver driver" + exit 0 + fi + + if [ "$gpu_driver" != "" ]; then + echo "Unbinding GPU from $gpu_driver" + echo $gpu_bus >/sys/bus/pci/drivers/$gpu_driver/unbind + echo $audio_bus >/sys/bus/pci/drivers/$gpu_driver/unbind + sleep 5 + fi + + if [ "$to_driver" == "nvidia" ]; then + echo "Loading nvidia drivers..." + modprobe -a nvidia nvidia_modeset nvidia_uvm nvidia_drm + sleep 5 + elif [ "$to_driver" == "vfio-pci" ]; then + echo "Loading vfio drivers..." + modprobe -a vfio_pci vfio vfio_iommu_type1 + sleep 5 + fi + + sleep 2 + echo "Binding GPU to $to_driver" + echo $gpu_bus >/sys/bus/pci/drivers/$to_driver/bind + echo $audio_bus >/sys/bus/pci/drivers/$to_driver/bind + } + + function nvidia() { + switch_driver "nvidia" + } + + function vfio() { + switch_driver "vfio-pci" + } + + CMD="$1" + shift + $CMD "$@" + ''; +in +{ + # Enable both nvidia & amd drivers, even if nvidia won't be used for display. This allow + # installing drivers. + services.xserver.videoDrivers = [ "nvidia" "amdgpu" ]; + + # From https://nixos.wiki/wiki/Nvidia + hardware.nvidia = { + # Hinders with dynamic switching since it manages the card using KMS + # https://forums.developer.nvidia.com/t/unbinding-isolating-a-card-is-difficult-post-470/223134 + modesetting.enable = false; + + powerManagement.enable = false; + powerManagement.finegrained = false; + + open = false; + + nvidiaSettings = false; # no need for settings menu + + package = config.boot.kernelPackages.nvidiaPackages.production; + }; + + # To test: docker run --rm -it --device=nvidia.com/gpu=all ubuntu:latest nvidia-smi + hardware.nvidia-container-toolkit.enable = true; + + environment.systemPackages = with pkgs; [ + nvtopPackages.nvidia + gpuSwitch + ]; +} diff --git a/nixos/virt-gpu-passthrough.nix b/nixos/virt-gpu-passthrough.nix index d3bcc89..d0b7079 100644 --- a/nixos/virt-gpu-passthrough.nix +++ b/nixos/virt-gpu-passthrough.nix @@ -15,20 +15,18 @@ in devices = lib.mkOption { type = lib.types.listOf lib.types.str; default = [ ]; - description = "Device IDs to pass through. Use `iommulist | grep NVIDIA` to find"; + description = "PCI identifiers to passthrough"; }; }; config = lib.mkIf cfg.enable { boot = { # Load these kernel modules before everything else - # load vfio before nouveau drivers so that vfio claims the gpu first + # so that nvidia drivers don't claim the gpu kernelModules = [ "vfio_pci" "vfio" "vfio_iommu_type1" - - "nouveau" ]; # Enable kernel modules, assign gpu to vfio @@ -40,6 +38,6 @@ in ]; }; - hardware.opengl.enable = true; + hardware.graphics.enable = true; }; }