From c5ea40b3273ca53e8c1a5957fd65278afcb4d1dc Mon Sep 17 00:00:00 2001 From: Serapheim Dimitropoulos Date: Tue, 11 Aug 2020 14:43:45 -0700 Subject: [PATCH 01/10] Use "delphix/actions/sync-with-upstream" action --- .github/workflows/sync-with-upstream.yml | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) create mode 100644 .github/workflows/sync-with-upstream.yml diff --git a/.github/workflows/sync-with-upstream.yml b/.github/workflows/sync-with-upstream.yml new file mode 100644 index 0000000..24eeb22 --- /dev/null +++ b/.github/workflows/sync-with-upstream.yml @@ -0,0 +1,16 @@ +on: + schedule: + - cron: '0 * * * *' + +jobs: + sync: + runs-on: ubuntu-18.04 + steps: + - uses: actions/checkout@v2 + - uses: delphix/actions/sync-with-upstream@master + with: + upstream-repository: https://github.com/sdimitro/savedump.git + upstream-branch: master + downstream-branch: master + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} From 9d614769e40d95f752949acdff719b641475a7ef Mon Sep 17 00:00:00 2001 From: Serapheim Dimitropoulos Date: Fri, 28 Aug 2020 14:15:31 -0700 Subject: [PATCH 02/10] README: Add Github Actions Badge; re-word TLDR section --- README.md | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index 6391729..a1d7f64 100644 --- a/README.md +++ b/README.md @@ -1,8 +1,12 @@ # savedump -TL;DR; A Python hack I put together that does its best to archive -crash dumps and core dumps together with their required binaries -and debug info in Linux. +![](https://github.com/sdimitro/savedump/workflows/.github/workflows/main.yml/badge.svg) + +TL;DR; A Python script that creates a best-effort self-contained +archive of a kernel crash dump or userland core dump. The archive +contains the memory dump coupled together with any required +binaries and debug information that it could find at the time it +was invoked. ### Motivation From dbf88776aa7d4b2fb961f643c937bd4555d55a11 Mon Sep 17 00:00:00 2001 From: Serapheim Dimitropoulos Date: Fri, 28 Aug 2020 14:16:51 -0700 Subject: [PATCH 03/10] linux-pkg: remove "Suggests" from debian file --- debian/control | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/debian/control b/debian/control index a41f171..3cd633f 100644 --- a/debian/control +++ b/debian/control @@ -3,13 +3,7 @@ Section: misc Priority: optional Maintainer: Delphix Engineering Standards-Version: 4.1.2 -Build-Depends: debhelper (>= 9), - dh-python, - python3, - python3-distutils, - python3.6-dev, - zlib1g-dev -Suggests: libkdumpfile, drgn +Build-Depends: debhelper (>= 9), dh-python, python3 Package: savedump Architecture: any From f6f4a52c6ef7c773c067a0f6237332d0d45be70d Mon Sep 17 00:00:00 2001 From: Serapheim Dimitropoulos Date: Fri, 28 Aug 2020 14:17:39 -0700 Subject: [PATCH 04/10] README: add Installation instructions section --- README.md | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/README.md b/README.md index a1d7f64..8132698 100644 --- a/README.md +++ b/README.md @@ -39,6 +39,26 @@ dumps. What it does lack though is proper tooling to capture a self-contained dump from one system to analyze it in another. This is what this utility is attempting to help with. +### Installation + +Ensure you have the following dependencies: +* Python 3.6 or newer +* [libkdumpfile](https://github.com/ptesarik/libkdumpfile) +* [drgn](https://github.com/osandov/drgn/) +* [gdb](https://www.gnu.org/software/gdb/) + +Note that `libkdumpfile` and `drgn` are only needed for kernel +crash dumps. If you only need `savedump` for userland core dumps +then you only need `python3`. `gdb` is not a hard dependency +either but it is recommeneded for accurate archival of shared +objects in userland core dumps. + +Once all dependencies are installed clone this repo and +run the following command from the root of the repo: +``` +sudo python3 setup.py install +``` + ### How do I use it? To capture a crash dump or a core dump: From 8651b37297fbdd1fbae971f8c040a011ab3a570a Mon Sep 17 00:00:00 2001 From: Serapheim Dimitropoulos Date: Fri, 28 Aug 2020 14:19:13 -0700 Subject: [PATCH 05/10] archive kernel modules based on srcvers --- savedump/savedump.py | 86 ++++++++++++++++++++++++++++++++++++++------ 1 file changed, 75 insertions(+), 11 deletions(-) diff --git a/savedump/savedump.py b/savedump/savedump.py index a433440..8943fd4 100644 --- a/savedump/savedump.py +++ b/savedump/savedump.py @@ -19,8 +19,6 @@ """ import argparse -import distutils -from distutils import dir_util # pylint: disable=unused-import from enum import Enum import os import pathlib @@ -133,16 +131,83 @@ def get_dump_type(path: str) -> Optional[DumpType]: """ +def get_module_paths(osrelease: str, path: str) -> List[str]: + """ + Use drgn on the crash dump specified by `path` and return list + of paths from the `osrelease` kernel modules relevant to the + crash dump. + """ + # + # Similarly to libkdumpfile we import these libraries locally + # here so people who don't have drgn can still use savedump + # for userland core dumps. + # + import drgn # pylint: disable=import-outside-toplevel + from drgn.helpers.linux.list import list_for_each_entry # pylint: disable=import-outside-toplevel + + prog = drgn.program_from_core_dump(path) + + # + # First go through all modules in the dump and create a map + # of [key: module name] -> (value: module srcversion). + # + # Note: + # It would be prefereable to be able to use the binary's + # .build-id to do the matching instead of srcversion. + # Unfortunately there doesn't seem to be a straightforward + # way to get the build-id section of the ELF files recorded + # in the dump. Hopefully that changes in the future. + # + mod_name_srcvers = {} + for mod in list_for_each_entry('struct module', + prog['modules'].address_of_(), 'list'): + mod_name_srcvers[str(mod.name.string_(), + encoding='utf-8')] = str(mod.srcversion.string_(), + encoding='utf-8') + + # + # Go through all modules in /usr/lib/debug/lib/modules/ + # and gather the file paths of the ones that are part of our + # module name-to-srcversion map. + # + system_modules = pathlib.Path( + f"/usr/lib/debug/lib/modules/{osrelease}/").rglob('*.ko') + mod_paths = [] + for modpath in system_modules: + modname = os.path.basename(modpath)[:-3] + if not mod_name_srcvers.get(modname): + continue + + success, output = shell_cmd( + ['modinfo', '--field=srcversion', + str(modpath)]) + if not success: + sys.exit(output) + output = output.strip() + + if output != mod_name_srcvers[modname]: + continue + + mod_paths.append(str(modpath)) + del mod_name_srcvers[modname] + + print(f"found {len(mod_paths)} relevant modules with their debug info...") + print("warning: could not find the debug info of the following modules:") + print(f" {', '.join(mod_name_srcvers.keys())}") + return mod_paths + + def archive_kernel_dump(path: str) -> None: """ Packages the dump together with its vmlinux and modules in a gzipped archive in the working directory. """ + # pylint: disable=too-many-locals # - # We import libkdumpfile specifically here and not - # in the top-level to allow users that don't have + # We import drgn and libkdumpfile specifically here and + # not in the top-level to allow users that don't have # it installed to still be able to use savedump for - # core files. + # userland core files. # import kdumpfile # pylint: disable=import-outside-toplevel @@ -156,18 +221,17 @@ def archive_kernel_dump(path: str) -> None: sys.exit(f"error: cannot find vmlinux at: {vmlinux_path}") print(f"vmlinux found: {vmlinux_path}") - extra_mod_path = f"/usr/lib/debug/lib/modules/{osrelease}/extra" - if not os.path.exists(extra_mod_path): - sys.exit(f"error: cannot find extra mod path: {extra_mod_path}") - print(f"using module path: {extra_mod_path}") + mod_paths = get_module_paths(osrelease, path) archive_dir = f"{nodename}.archive-{dumpname}" pathlib.Path(archive_dir).mkdir(parents=True, exist_ok=True) shutil.copy(path, archive_dir) shutil.copy(vmlinux_path, archive_dir) - archive_extra_mod_path = f"{archive_dir}{extra_mod_path}" - distutils.dir_util.copy_tree(extra_mod_path, archive_extra_mod_path) + for mod_path in mod_paths: + archive_mod_path = f"{archive_dir}{mod_path}" + os.makedirs(os.path.dirname(archive_mod_path), exist_ok=True) + shutil.copy(mod_path, archive_mod_path) # # Generate run-sdb.sh. From ee242100a47234c8a5b72f4833d0b9b0af16cbc0 Mon Sep 17 00:00:00 2001 From: Serapheim Dimitropoulos Date: Tue, 1 Sep 2020 08:41:34 -0700 Subject: [PATCH 06/10] Change automation token --- .github/workflows/sync-with-upstream.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/sync-with-upstream.yml b/.github/workflows/sync-with-upstream.yml index 24eeb22..d3ec7ac 100644 --- a/.github/workflows/sync-with-upstream.yml +++ b/.github/workflows/sync-with-upstream.yml @@ -13,4 +13,4 @@ jobs: upstream-branch: master downstream-branch: master env: - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + GITHUB_TOKEN: ${{ secrets.DEVOPS_AUTOMATION_TOKEN }} From 38d06550a6b52e00402c23fe9d2bd39047c4abe2 Mon Sep 17 00:00:00 2001 From: Serapheim Dimitropoulos Date: Tue, 1 Sep 2020 08:44:15 -0700 Subject: [PATCH 07/10] Use Appropriate Workflow Badge --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 8132698..80472c8 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,6 @@ # savedump -![](https://github.com/sdimitro/savedump/workflows/.github/workflows/main.yml/badge.svg) +![](https://github.com/delphix/savedump/workflows/.github/workflows/main.yml/badge.svg) TL;DR; A Python script that creates a best-effort self-contained archive of a kernel crash dump or userland core dump. The archive From 2505576cb38b17cd0ddeae329b63bcc0227f1f50 Mon Sep 17 00:00:00 2001 From: Serapheim Dimitropoulos Date: Tue, 1 Sep 2020 08:51:06 -0700 Subject: [PATCH 08/10] Update README with latest progress --- README.md | 2 -- 1 file changed, 2 deletions(-) diff --git a/README.md b/README.md index 8132698..5d199a1 100644 --- a/README.md +++ b/README.md @@ -123,9 +123,7 @@ As mentioned in the TL;DR; the utility is far from perfect but I do hope to add more functionality to it as cases arise. * [verify BuildID and/or SRCVERSION between dumps and binaries](https://github.com/sdimitro/savedump/issues/6) -* [only package the modules needed in crash dumps](https://github.com/sdimitro/savedump/issues/7) * [make the gdb dependency optional](https://github.com/sdimitro/savedump/issues/9) -* [make the libkdumpfile dependency optional](https://github.com/sdimitro/savedump/issues/8) * [support custom paths for binaries and debug info](https://github.com/sdimitro/savedump/issues/5) * [generate run-sdb.sh](https://github.com/sdimitro/savedump/issues/10) * [support for plain vmcores](https://github.com/sdimitro/savedump/issues/3) From bb384b7e8b31027461f74c45c0b1d4f9ce17f89e Mon Sep 17 00:00:00 2001 From: Prakash Surya Date: Tue, 1 Sep 2020 11:25:01 -0700 Subject: [PATCH 09/10] Modify tokens used in "sync-with-upstream" workflow This change modifies the tokens used by the "sync-with-upstream" workflow. Now we use the token that has admin access when we checkout the repository initially; this way the "sync-with-upstream" action has admin access when performing git commands. This specifically is required when the action attempts to git-push to the "sync-with-upstream" branch, in order to overcome the usual branch protections. Further, we don't use the token with admin access via the GITHUB_TOKEN environment varable; this way the pull request that is opened by the "sync-with-upstream" action is "owned" by the usual "github-actions" user, rather than the user associated with the admin token. While this isn't required, it makes it more consistent with PRs created by other actions, as those PRs are owned by the "github-actions" user. --- .github/workflows/sync-with-upstream.yml | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/.github/workflows/sync-with-upstream.yml b/.github/workflows/sync-with-upstream.yml index d3ec7ac..6ec3384 100644 --- a/.github/workflows/sync-with-upstream.yml +++ b/.github/workflows/sync-with-upstream.yml @@ -7,10 +7,12 @@ jobs: runs-on: ubuntu-18.04 steps: - uses: actions/checkout@v2 + with: + token: ${{ secrets.DEVOPS_AUTOMATION_TOKEN }} - uses: delphix/actions/sync-with-upstream@master with: upstream-repository: https://github.com/sdimitro/savedump.git upstream-branch: master downstream-branch: master env: - GITHUB_TOKEN: ${{ secrets.DEVOPS_AUTOMATION_TOKEN }} + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} From 082e3ab3a37ffffd90619a0b7cda938cfed66a43 Mon Sep 17 00:00:00 2001 From: Serapheim Dimitropoulos Date: Wed, 9 Sep 2020 15:29:03 -0700 Subject: [PATCH 10/10] workflows: sync 6.0 with master --- .github/workflows/sync-with-master.yml | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) create mode 100644 .github/workflows/sync-with-master.yml diff --git a/.github/workflows/sync-with-master.yml b/.github/workflows/sync-with-master.yml new file mode 100644 index 0000000..a6a7a44 --- /dev/null +++ b/.github/workflows/sync-with-master.yml @@ -0,0 +1,21 @@ +on: + push: + branches: + - master + schedule: + - cron: '0 0 * * *' + +jobs: + sync: + strategy: + matrix: + branch: + - 6.0/stage + runs-on: ubuntu-18.04 + steps: + - uses: actions/checkout@v2 + - uses: delphix/actions/sync-with-master@master + with: + branch-to-sync: ${{ matrix.branch }} + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}