Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

kernfs_memcg: Add helpers to gather memcgroup related data #96

Open
wants to merge 4 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
221 changes: 221 additions & 0 deletions drgn_tools/kernfs_memcg.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,221 @@
# Copyright (c) 2024, Oracle and/or its affiliates.
# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/
"""
Kernfs_memcg
--------------

The ``drgn.helpers.linux.kernfs_memcg`` module provides helpers for working with the
Linux memcg subsystem.
"""
import argparse
from typing import Iterator

from drgn import cast
from drgn import container_of
from drgn import FaultError
from drgn import Object
from drgn import Program
from drgn.helpers.common.format import decode_enum_type_flags
from drgn.helpers.linux import cgroup_path
from drgn.helpers.linux import css_for_each_descendant_pre
from drgn.helpers.linux import find_slab_cache
from drgn.helpers.linux import for_each_page
from drgn.helpers.linux import inode_path
from drgn.helpers.linux import kernfs_path
from drgn.helpers.linux import slab_cache_for_each_allocated_object

from drgn_tools.corelens import CorelensModule
from drgn_tools.dentry import dentry_path_any_mount

# cgroup subsystem id for memory cgroup, from kernel/cgroup/cgroup.c
_MEMORY_CGRP_ID = 4


def decode_css_flags(css: Object) -> str:
"""
Get a human-readable representation of cgroup_subsys_state.flags

:param css: ``struct cgroup_subsys_state *``
"""
CSS_DYING = css.prog_["CSS_DYING"]
flags = css.flags.value_()
if not flags:
# There is no dedicated flag value to indicate a zombie cgroup.
# A css.flags value of 0 indicates that cgroup destruction is
# complete but cgroup object has not been fully freed because
# of being pinned by some other object
return "ZOMBIE"

return decode_enum_type_flags(flags, CSS_DYING.type_, False)


def for_each_kernfs_node(prog: Program) -> Iterator[Object]:
"""
Iterate over all kernfs_node objects in the system.

:returns: Iterator of ``struct kernfs_node *`` objects.
"""
kernfs_node_cache = find_slab_cache(prog, "kernfs_node_cache")
for kn in slab_cache_for_each_allocated_object(
kernfs_node_cache, "struct kernfs_node"
):
yield kn


def dump_memcgroup_hierarchy(prog: Program) -> None:
"""
Dump hierarchy of active mem cgroups.
"""
cgroup_subsys = prog["cgroup_subsys"][_MEMORY_CGRP_ID]
css = cgroup_subsys.root.cgrp.self.address_of_()
print(f"dumping: {cgroup_subsys.name.string_().decode()} hierarchy")
for pos in css_for_each_descendant_pre(css):
cgroup_state = decode_css_flags(pos)
print(
f"path: {cgroup_path(pos.cgroup).decode()} state: {cgroup_state}"
)


def kernfs_node_of_cgroup(kn: Object) -> bool:
"""
Check if a kernfs_node object represents a cgroup object.

:param kn: ``struct kernfs_node *``
:returns: True if kernfs_node object represents a cgroup object,
False otherwise.
"""
if (kn.flags.value_() & 0xF) == 0x1:
try:
cgrp = Object(kn.prog_, "struct cgroup", address=kn.priv.value_())
return cgrp.kn == kn
except FaultError:
return False
else:
return False


def kernfs_node_of_memcgroup(kn: Object) -> bool:
"""
Check if a kernfs_node object represents a mem cgroup object.

:param kn: ``struct kernfs_node *``
:returns: True if kernfs_node object represents a mem cgroup object,
False otherwise.
"""
if kernfs_node_of_cgroup(kn):
prog = kn.prog_
cgrp = Object(prog, "struct cgroup", address=kn.priv.value_())
return prog["cgroup_subsys"][_MEMORY_CGRP_ID].root == cgrp.root
else:
return False


def dump_memcg_kernfs_nodes(prog) -> None:
"""
List all kernfs_node objects that represent a mem cgroup.
"""
count = 0
for kn in for_each_kernfs_node(prog):
if kernfs_node_of_memcgroup(kn):
count = count + 1
path = kernfs_path(kn).decode()
print("kernfs_node: ", hex(kn.value_()), " ", path)

print("Total number of memcg kernfs_node objects: ", count)


def get_num_active_mem_cgroups(prog: Program) -> int:
"""
Get number of active mem cgroups.
"""
mem_cgroup_subsys = prog["cgroup_subsys"][_MEMORY_CGRP_ID]
# add 1 to number of active memcgroups to account for root memcgroup
return mem_cgroup_subsys.root.cgrp.nr_descendants.value_() + 1


def get_num_dying_mem_cgroups(prog: Program) -> int:
"""
Get number of inactive or dying mem cgroups.
"""
mem_cgroup_subsys = prog["cgroup_subsys"][_MEMORY_CGRP_ID]
return mem_cgroup_subsys.root.cgrp.nr_dying_descendants.value_()


# By default we scan all pages, that have memcg ref
# but if max_pages is specified then we bail out
# after getting those many pages or scanning all
# pages , whichever happens first
def dump_page_cache_pages_pinning_cgroups(prog: Program, max_pages: int = 0):
"""
Dump all page-cache pages that have reference to a mem-cgroup.
The ouput also contains information such as the cgroup that is pinned, its flags
(to indicate current state of cgroup) and file cached by this page.

:params: max_pages: specify how many pages to find. For default (0) all such pages
are listed.

"""
PG_slab_mask = 1 << prog.constant("PG_slab")
mem_cgroup_root = prog["cgroup_subsys"][_MEMORY_CGRP_ID].root
total_count = 0
found_count = 0
for page in for_each_page(prog):
total_count = total_count + 1
try:
# Ignore slab pages
if page.flags & PG_slab_mask:
continue
# Ignore non page-cache pages
if not page.mapping:
continue
try:
mem_cgroup = page.mem_cgroup
except AttributeError:
mem_cgroup = page.memcg_data

if not mem_cgroup.value_() or mem_cgroup.value_() & 3:
continue
cgroup_subsys_state = cast(
"struct cgroup_subsys_state *", mem_cgroup
)
if cgroup_subsys_state.cgroup.root == mem_cgroup_root:
found_count = found_count + 1
cgrp = cgroup_subsys_state.cgroup
address_space = page.mapping
inode = address_space.host
if inode_path(inode) is None:
continue
dentry = container_of(
inode.i_dentry.first, "struct dentry", "d_u.d_alias"
)
path = dentry_path_any_mount(dentry).decode()
cgroup_state = decode_css_flags(cgrp.self.address_of_())
print(
f"page: 0x{page.value_():x} cgroup: {cgroup_path(cgrp).decode()} state: {cgroup_state} path: {path}\n"
)
if max_pages and found_count == max_pages:
break
except FaultError:
continue

print(
f"Scanned {total_count} pages, found {found_count} pages with memory cgroup refs."
)


class PagesPinningMemcgroups(CorelensModule):
"""Print information related to memcgroup pinning by pages"""

name = "kernfs_memcg"

def add_args(self, parser: argparse.ArgumentParser) -> None:
parser.add_argument(
"--max",
"-m",
type=int,
default=10000,
help="Maximum number of pages to show",
)

def run(self, prog: Program, args: argparse.Namespace) -> None:
dump_page_cache_pages_pinning_cgroups(prog, max_pages=args.max)
33 changes: 33 additions & 0 deletions tests/test_kernfs_memcg.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
# Copyright (c) 2024, Oracle and/or its affiliates.
# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/
import drgn

from drgn_tools import kernfs_memcg as kernfs_memcg


def test_dump_page_cache_pages_pinning_cgroups(prog: drgn.Program) -> None:
kernfs_memcg.dump_page_cache_pages_pinning_cgroups(prog, 10)


def test_dump_memcgroup_hierarchy(prog: drgn.Program) -> None:
kernfs_memcg.dump_memcgroup_hierarchy(prog)


def test_kernfs_node_of_memcgroup(prog: drgn.Program) -> None:
count = 0
for kn in kernfs_memcg.for_each_kernfs_node(prog):
if kernfs_memcg.kernfs_node_of_memcgroup(kn):
count = count + 1
if count >= 5:
print("Found 5 memcgroup, kernfs_node objects.")
break


def test_get_num_active_mem_cgroups(prog: drgn.Program) -> None:
count = kernfs_memcg.get_num_active_mem_cgroups(prog)
print(f"number of active memcgroups: {count}\n")


def test_get_num_dying_mem_cgroups(prog: drgn.Program) -> None:
count = kernfs_memcg.get_num_dying_mem_cgroups(prog)
print(f"number of dying memcgroups: {count}\n")