From fbc215c1b98d89d923b5d56572c848cc073920f3 Mon Sep 17 00:00:00 2001 From: Serapheim Dimitropoulos Date: Thu, 17 Aug 2023 09:59:51 -0700 Subject: [PATCH] DLPX-87572 sdb: want live kernel tests to find kernel regressions early = Problem With our switch to the new v5.15 kernel a subset of SDB commands broke without us realizing until we actually needed them. Our regression dumps helps us to ensure we don't introduce regressions for older kernels when developing new features but they can't help us in detecting changes in the upstream kernel or ZFS that break our commands. = This Patch This patch attempts to provide a rudimentary mechanism for catching regression introduced by the upstream Ubuntu kernels by running a few basic SDB commands in a Github action that's run nightly and for every PR. Specifically this patch makes it so we have such a test for each Ubuntu LTS kernel starting from 20.04 (currently the `ubuntu-latest` Github runner tag points to 22.04 so we'd test that twice but in the future that tag will point to 24.04, etc...). We also change for all the available Python versions for each Ubuntu version to further ensure SDB's compatibility with future Python versions. = Misc Notes In order to use SDB in the Github runner I had to introduce an extra script that downloads the kernel's debug info. See the `install-live-kernel-dbg.sh` script for more info. I also made sure to decouple the apt-install of the python-dev files to its own shell script too as different Ubuntu versions ship with different Python versions. See `install-python-dev.sh` for more info. = Potential Future Items In the future we may want to detect whenever our ZFS commands are not getting out of date. `test_live_kernel.sh` has a way of detecting whether the ZFS module is installed and running a few ZFS commands on the live kernel. The idea is that we can either introduce Github Actions like the upstream openzfs that install our kernel module to the runner and run the commands there OR we can create a BlackBox test that clones the repo and runs this script. --- .github/scripts/install-libkdumpfile.sh | 1 - .github/scripts/install-live-kernel-dbg.sh | 23 ++++++ .github/scripts/install-python-dev.sh | 9 ++ .github/workflows/main.yml | 95 ++++++++++++++++------ tests/scripts/test_live_kernel.sh | 81 ++++++++++++++++++ 5 files changed, 185 insertions(+), 24 deletions(-) create mode 100755 .github/scripts/install-live-kernel-dbg.sh create mode 100755 .github/scripts/install-python-dev.sh create mode 100755 tests/scripts/test_live_kernel.sh diff --git a/.github/scripts/install-libkdumpfile.sh b/.github/scripts/install-libkdumpfile.sh index f99b9ac..2ba0e63 100755 --- a/.github/scripts/install-libkdumpfile.sh +++ b/.github/scripts/install-libkdumpfile.sh @@ -8,7 +8,6 @@ # sudo apt update sudo apt install autoconf automake liblzo2-dev libsnappy1v5 libtool pkg-config zlib1g-dev -sudo apt install python3.8-dev python3.9-dev git clone https://github.com/ptesarik/libkdumpfile.git diff --git a/.github/scripts/install-live-kernel-dbg.sh b/.github/scripts/install-live-kernel-dbg.sh new file mode 100755 index 0000000..f7d876a --- /dev/null +++ b/.github/scripts/install-live-kernel-dbg.sh @@ -0,0 +1,23 @@ +#!/bin/bash -eux + +# uname -a +# uname -r +# cat /etc/apt/sources.list +# sudo apt-get clean +# sudo apt-get update +# echo "deb http://ddebs.ubuntu.com $(lsb_release -cs) main restricted universe multiverse" | sudo tee -a /etc/apt/sources.list.d/ddebs.list +# echo "deb http://ddebs.ubuntu.com $(lsb_release -cs)-updates main restricted universe multiverse" | sudo tee -a /etc/apt/sources.list.d/ddebs.list +# echo "deb http://ddebs.ubuntu.com $(lsb_release -cs)-proposed main restricted universe multiverse" | sudo tee -a /etc/apt/sources.list.d/ddebs.list +# sudo apt install ubuntu-dbgsym-keyring +# sudo apt-get clean +# sudo apt-get update +# sudo apt-get install -y linux-image-$(uname -r)-dbgsym + +kvers=$(uname -r) +ddeb_file=$(curl http://ddebs.ubuntu.com/pool/main/l/linux-azure/ | + grep -Eo ">linux-image-(unsigned-)?$kvers(.*)amd64\.ddeb" | + cut -c2-) + +wget http://ddebs.ubuntu.com/pool/main/l/linux-azure/$ddeb_file +sudo dpkg -i $ddeb_file +rm $ddeb_file diff --git a/.github/scripts/install-python-dev.sh b/.github/scripts/install-python-dev.sh new file mode 100755 index 0000000..79e0d16 --- /dev/null +++ b/.github/scripts/install-python-dev.sh @@ -0,0 +1,9 @@ +#!/bin/bash -eux + +sudo apt update +sudo apt install python3.$(python3 --version | cut -d . -f 2)-dev + +# +# Debug statements +# +echo $(which python3) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 3cade6b..1aaa375 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -6,24 +6,6 @@ on: jobs: # - # Verify the build and installation of SDB. - # - install: - runs-on: ubuntu-20.04 - strategy: - matrix: - python-version: [3.8, 3.9] - steps: - - uses: actions/checkout@v2 - - uses: actions/setup-python@v1 - with: - python-version: ${{ matrix.python-version }} - - run: python3 setup.py install - # - # The statement below is used for debugging the Github job. - # - - run: python3 --version - # # Verify "pylint" runs successfully. # # Note, we need to have "drgn" installed in order to run "pylint". @@ -34,7 +16,7 @@ jobs: runs-on: ubuntu-20.04 steps: - uses: actions/checkout@v2 - - uses: actions/setup-python@v1 + - uses: actions/setup-python@v4 with: python-version: '3.8' - run: ./.github/scripts/install-drgn.sh @@ -55,16 +37,17 @@ jobs: runs-on: ubuntu-20.04 strategy: matrix: - python-version: [3.8, 3.9] + python-version: ['3.8', '3.9'] dump: [dump.201912060006.tar.lzma, dump.202303131823.tar.gz] env: AWS_DEFAULT_REGION: 'us-west-2' steps: - uses: actions/checkout@v2 - - uses: actions/setup-python@v1 + - uses: actions/setup-python@v4 with: python-version: ${{ matrix.python-version }} - run: python3 -m pip install aws python-config pytest pytest-cov + - run: ./.github/scripts/install-python-dev.sh - run: ./.github/scripts/install-libkdumpfile.sh - run: ./.github/scripts/install-drgn.sh - run: ./.github/scripts/download-dump-from-s3.sh ${{ matrix.dump }} @@ -73,13 +56,79 @@ jobs: with: token: ${{ secrets.CODECOV_TOKEN }} # + # Verify common linux SDB commands can run on vanilla ubuntu kernels: 20.04 + # + live_ubuntu_20_04: + runs-on: ubuntu-20.04 + strategy: + matrix: + python-version: ['3.8', '3.9'] + env: + AWS_DEFAULT_REGION: 'us-west-2' + steps: + - uses: actions/checkout@v2 + - uses: actions/setup-python@v4 + with: + python-version: ${{ matrix.python-version }} + - run: python3 -m pip install aws python-config pytest pytest-cov + - run: ./.github/scripts/install-python-dev.sh + - run: sudo ./.github/scripts/install-libkdumpfile.sh + - run: sudo ./.github/scripts/install-drgn.sh + - run: sudo ./.github/scripts/install-live-kernel-dbg.sh + - run: sudo python3 ./setup.py install + - run: sudo ./tests/scripts/test_live_kernel.sh + # + # Verify common linux SDB commands can run on vanilla ubuntu kernels: 22.04 + # + live_ubuntu_22_04: + runs-on: ubuntu-22.04 + strategy: + matrix: + python-version: ['3.10' , '3.11'] + env: + AWS_DEFAULT_REGION: 'us-west-2' + steps: + - uses: actions/checkout@v2 + - uses: actions/setup-python@v4 + with: + python-version: ${{ matrix.python-version }} + - run: python3 -m pip install aws python-config pytest pytest-cov + - run: ./.github/scripts/install-python-dev.sh + - run: sudo ./.github/scripts/install-libkdumpfile.sh + - run: sudo ./.github/scripts/install-drgn.sh + - run: sudo ./.github/scripts/install-live-kernel-dbg.sh + - run: sudo python3 ./setup.py install + - run: sudo ./tests/scripts/test_live_kernel.sh + # + # Verify common linux SDB commands can run on the latest Github Ubuntu kernel + # + live_ubuntu_latest: + runs-on: ubuntu-latest + strategy: + matrix: + python-version: ['3.10' , '3.11'] + env: + AWS_DEFAULT_REGION: 'us-west-2' + steps: + - uses: actions/checkout@v2 + - uses: actions/setup-python@v4 + with: + python-version: ${{ matrix.python-version }} + - run: python3 -m pip install aws python-config pytest pytest-cov + - run: ./.github/scripts/install-python-dev.sh + - run: sudo ./.github/scripts/install-libkdumpfile.sh + - run: sudo ./.github/scripts/install-drgn.sh + - run: sudo ./.github/scripts/install-live-kernel-dbg.sh + - run: sudo python3 ./setup.py install + - run: sudo ./tests/scripts/test_live_kernel.sh + # # Verify "yapf" runs successfully. # yapf: runs-on: ubuntu-20.04 steps: - uses: actions/checkout@v2 - - uses: actions/setup-python@v1 + - uses: actions/setup-python@v4 with: python-version: '3.8' - run: python3 -m pip install yapf @@ -104,7 +153,7 @@ jobs: runs-on: ubuntu-20.04 steps: - uses: actions/checkout@v2 - - uses: actions/setup-python@v1 + - uses: actions/setup-python@v4 with: python-version: '3.8' - run: ./.github/scripts/install-drgn.sh diff --git a/tests/scripts/test_live_kernel.sh b/tests/scripts/test_live_kernel.sh new file mode 100755 index 0000000..f2331ad --- /dev/null +++ b/tests/scripts/test_live_kernel.sh @@ -0,0 +1,81 @@ +#!/bin/bash -eu + +scmds=( + # Test `stacks` + "stacks" + + # Test `dmesg` + "dmesg" + + # Test `lxlist` + "addr modules | lxlist module list | member name" + + # Test `slabs` and `percpu` + "slabs | filter \"obj.name == 'kmalloc-8'\" | member cpu_slab | percpu 0 1" + + # Test `pid` + "pid 1" + + # Test `find_task` + "find_task 1 2" + + # Test `threads` + "threads" + + # Test `walk` and `slub_cache` walker + "slabs | filter \"obj.name == 'TCP'\" | walk" + + # Test `rbtree` walker + "addr vmap_area_root | rbtree vmap_area rb_node" + + # Test `fget` + "find_task 1 | fget 1 4" +) + +for ((i = 0; i < ${#scmds[@]}; i++)); do + sudo /usr/local/bin/sdb -e "${scmds[$i]}" +done + +zfs_scmds=( + # Test `arc` + "arc" + + # Test `zfs_dbgmsg` + "zfs_dbgmsg" + + # Test `zio` + "zio" + + # Test `spa` + "spa -vmH" + + # Test `vdev` and `metaslab` + "spa | vdev | metaslab" + + # Test `vdev` and `metaslab` and `range_tree` + "spa | vdev | metaslab | head 1 | member ms_allocatable | range_tree" + + # Test `dbuf` + "dbuf" + + # Test `dbuf` and `blkptr` + "dbuf | head 1 | member db_blkptr | blkptr" + + # Test `spa` and `zhist` + "spa | member spa_normal_class.mc_histogram | zhist" + + # Test `avl` + "address spa_namespace_avl | avl" + + # Test `spl_kmem_caches` + "spl_kmem_caches" +) + +if $(lsmod | grep -q zfs); then + echo "Detected ZFS kernel module... testing ZFS commands:" + for ((i = 0; i < ${#zfs_scmds[@]}; i++)); do + sudo /usr/local/bin/sdb -e "${zfs_scmds[$i]}" + done +else + echo "Can't find ZFS kernel module... skipping ZFS commands" +fi