Skip to content

Commit

Permalink
fix pstats file generation
Browse files Browse the repository at this point in the history
  • Loading branch information
Kamilcuk committed Nov 28, 2024
1 parent 9a20fb3 commit 7179d6a
Show file tree
Hide file tree
Showing 4 changed files with 100 additions and 53 deletions.
38 changes: 38 additions & 0 deletions doc/pstats.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
# pstats

My documentation of python pstats objects.

# docs

`pstats` has type `dict[tuple[str, int, str], tuple[int, int, int, int, list[tuple[int, int, int, int]]]]`.

The `pstats` object is marshal-ed dictionary object with tuple of `(filename, lineno, funcname)` of keys and tuple of `(cc, nc, tt, ct, callers)` as values.

The tuple elements of `pstats[func]` hold:
- `cc` is the number of primitive calls to the function. Primitive calls are all calls minus recursive calls.
- `nc` is the number of all calls to the function.
- `tt` is the time spent in the function excluding the time spent in subfunctions.
- If a function calls itself recursively, then this time is summed.
- `ct` is the time spent in the function.
- If a function calls itself recursively, then this time is effectively ignored.
- `callers` is a list of a nested tuple of `(cc, nc, tt, ct)`. The tuple elements `pstats[func].callers[func2]` hold:
- `cc` - the the number of calls `func2` made to `func`
- `nc` - always equal to `cc`, just ignore
- `tt` - the time spent in such `func2` calls that at least one called `func` excluding the time spent in subfunction
- `ct` - the time spent in such `func2` calls that at least one called `func`

The following properties hold:
- `pstats.keys()` holds __all__ functions that were executed in the script
- `set(c for s in pstats.values() for c in s.callers).issubset(stats.keys())`
- `all(s.nc >= s.cc for s in pstats.values())`
- `all(f.nc == sum(c.nc for s in stats.values() for c in stats.callers.values()))`

# links

- https://github.com/python/cpython/blob/main/Lib/cProfile.py#L63
- https://github.com/python/cpython/blob/main/Modules/_lsprof.c#L31
- https://github.com/python/cpython/blob/main/Lib/pstats.py#L160
- https://docs.python.org/3/library/profile.html
- https://zameermanji.com/blog/2012/6/30/undocumented-cprofile-features/
- https://stackoverflow.com/questions/15816415/in-python-cprofile-what-is-the-difference-between-calls-count-and-primitive-cal

20 changes: 8 additions & 12 deletions scripts/Makefile
Original file line number Diff line number Diff line change
@@ -1,20 +1,16 @@
MAKEFLAGS = -rR --warn-undefined-variables --no-show-directories
SHELL = bash
$(shell mkdir -vp build)

compare_script1: build/a build/b
paste -d'@' build/a build/b | column -t -s '@'
build/script1.py.prof: ./script.1py
@mkdir -vp build
build/script1.py.prof: ./script1.py
python -m cProfile -o $@ script1.py
build/script1.sh.txt: ../src/L_bash_profile.py ./script1.py
@mkdir -vp build
L_bash_profile profile -o $@ '. ./script1.sh'
build/script1.sh.prof: build/script1.sh.txt
@mkdir -vp build
L_bash_profile analyze --pstats $^ $@
build/a: build/script1.sh.prof
@mkdir -vp build
L_bash_profile showpstats -r $^ > $@
build/b: build/script1.py.prof
@mkdir -vp build
L_bash_profile showpstats -r $^ > $@
build/script1.sh.prof: build/script1.sh.txt
L_bash_profile analyze --pstats $@ $<
build/a: build/script1.sh.prof ../src/L_bash_profile.py
L_bash_profile showpstats -r $< > $@
build/b: build/script1.py.prof ../src/L_bash_profile.py
L_bash_profile showpstats -r $< > $@
22 changes: 18 additions & 4 deletions scripts/script1.sh
Original file line number Diff line number Diff line change
@@ -1,28 +1,42 @@
#!/usr/bin/env bash
# line numbers synchronized with script1.py




a() {
sleep 0.1
}




b() {
case "$1" in
1)
sleep 0.2
a
;;


a ;;
2)
sleep 0.2
b 1
;;



b 1 ;;
*)
sleep 0.3
;;
esac
}


c() {
a
sleep 0.5



b 2
}

Expand Down
73 changes: 36 additions & 37 deletions src/L_bash_profile.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,9 @@ class FunctionKey:
lineno: int = 0
funcname: str = ""

def __str__(self):
return f"{self.filename}:{self.lineno}({self.funcname})"


@dataclass
class Record:
Expand Down Expand Up @@ -137,35 +140,20 @@ def totaltime(self):


@dataclass
class PstatsCallers:
class Pstatsnocallers:
"""Statistics for pstats python module"""

callcount: int = 0
primitivecallcount: int = 0
inlinetime: float = 0
totaltime: float = 0

@staticmethod
def from_node(node: CallgraphNode):
return PstatsCallers(
node.callcount,
node.callcount - node.recursivecallcount,
us2s(node.inlinetime),
us2s(node.totaltime),
)

def add(self, o: PstatsCallers):
self.callcount += o.callcount
self.primitivecallcount += o.primitivecallcount
self.inlinetime += o.inlinetime
self.totaltime += o.totaltime


@dataclass
class Pstats(PstatsCallers):
class Pstats(Pstatsnocallers):
"""Statistics for pstats python module"""

callers: dict[FunctionKey, PstatsCallers] = field(default_factory=dict)
callers: dict[FunctionKey, Pstatsnocallers] = field(default_factory=dict)


@dataclass
Expand Down Expand Up @@ -488,30 +476,36 @@ def create_python_pstats_file(self, file: str):
statsroot: dict[FunctionKey, Pstats] = {}

def fillstats(node: CallgraphNode):
cur = PstatsCallers.from_node(node)
stats: Pstats = statsroot.setdefault(node.function, Pstats())
stats.add(cur)
parent = node.parent
while parent is not None:
cur = PstatsCallers.from_node(parent)
stats.callers.setdefault(parent.function, PstatsCallers()).add(cur)
parent = parent.parent
for val in node.childs.values():
fillstats(val)
nodestats = statsroot.setdefault(node.function, Pstats())
nodestats.callcount += node.callcount
nodestats.primitivecallcount += node.callcount - node.recursivecallcount
nodestats.totaltime += us2s(node.totaltime)
nodestats.inlinetime += us2s(node.inlinetime)
for child in node.childs.values():
fillstats(child)
childstats = statsroot.setdefault(
child.function, Pstats()
).callers.setdefault(node.function, Pstatsnocallers())
childstats.callcount += 1
childstats.primitivecallcount += 1
childstats.inlinetime += us2s(child.inlinetime)
childstats.totaltime += us2s(child.totaltime)
if child.function == node.function:
nodestats.totaltime -= us2s(child.totaltime)

fillstats(callgraph)

# Write pstats file
def writer(what: Pstats):
return (
what.callcount,
what.primitivecallcount,
what.callcount,
what.inlinetime,
what.totaltime,
{
astuple(key): (
val.callcount,
val.primitivecallcount,
val.callcount,
val.inlinetime,
val.totaltime,
)
Expand Down Expand Up @@ -602,15 +596,20 @@ def analyze(args: AnalyzeArgs):
@click.argument("file", type=click.File("rb", lazy=True))
def showpstats(raw: bool, file: io.FileIO):
if raw:
stats = marshal.load(file)
for key, val in stats.items():

def sortthem(x: dict):
return sorted(x.items())

def printit(prefix, key, val):
print(
f"{key[0]}:{key[1]}({key[2]}) nc={val[0]} cc={val[1]} tt={val[2]:f} ct={val[3]:f}"
f"{prefix}{key[0]}:{key[1]}({key[2]}) cc={val[0]} nc={val[1]} tt={val[2]:f} ct={val[3]:f}"
)
for key, val in (val[4] or {}).items():
print(
f" ^ {key[0]}:{key[1]}({key[2]}) nc={val[0]} cc={val[1]} tt={val[2]:f} ct={val[3]:f}"
)

stats = marshal.load(file)
for key, val in sortthem(stats):
printit("", key, val)
for key2, val2 in sortthem(val[4] or {}):
printit(" ^ ", key2, val2)
else:
ps = pstats.Stats(file.name)
sortby = "cumulative"
Expand Down

0 comments on commit 7179d6a

Please sign in to comment.