diff --git a/lib/pyda/base.py b/lib/pyda/base.py index e004060..b4c06bc 100644 --- a/lib/pyda/base.py +++ b/lib/pyda/base.py @@ -1,6 +1,6 @@ import pyda_core from pyda_core import MemoryError, ThreadExitError, InvalidStateError, FatalSignalError -from .process import Process, Map +from .process import Process, Map, backtrace_to_str from . import arch import sys, os @@ -31,7 +31,7 @@ def xinfo(addr): path, start, end, perms = res return Map(path=path, vaddr=start, size=end - start, perms=perms) -FatalSignalError.__str__ = lambda self: f"Signal {self.args[0]} on Thread {self.args[1]}\nBacktrace:\n{self.args[2]}" +FatalSignalError.__str__ = lambda self: f"Signal {self.args[0]} on Thread {self.args[1]}\nBacktrace:\n{backtrace_to_str(self.args[2])}" def exit(*args, **kwargs): raise RuntimeError("exit") diff --git a/lib/pyda/process.py b/lib/pyda/process.py index 5ded322..021c605 100644 --- a/lib/pyda/process.py +++ b/lib/pyda/process.py @@ -1,8 +1,10 @@ from collections import namedtuple, deque from dataclasses import dataclass import ctypes +import ctypes.util from .tube import ProcessTube import pyda_core +import sys class Process(ProcessTube): def __init__(self, handle, io=False): @@ -192,6 +194,63 @@ def call(*args): return call + def backtrace(self): + return backtrace_to_str(self._p.backtrace()) + + def backtrace_cpp(self, short=False): + return backtrace_to_str(self._p.backtrace(), demangle=True, short=short) + +def find_any_library(*choices: str) -> str: + for choice in choices: + lib = ctypes.util.find_library(choice) + if lib is not None: + return lib + raise LibraryNotFound('Cannot find any of libraries: {}'.format(choices)) + +try: + libcxx = find_any_library("stdc++", "c++") + libcxx = ctypes.CDLL(libcxx) # On Linux + cxa_demangle = getattr(libcxx, '__cxa_demangle') + cxa_demangle.restype = ctypes.c_void_p +except LibraryNotFound: + libcxx = None + +def cxx_demangle(s): + mangled_name_p = ctypes.c_char_p(s.encode('utf-8')) + status = ctypes.c_int() + retval = cxa_demangle(mangled_name_p, None, None, ctypes.pointer(status)) + + res = None + if status.value == 0: + try: + res = ctypes.c_char_p(retval).value.decode('utf-8') + finally: + pyda_core.free(retval) + + return res + +def backtrace_to_str(bt, demangle=False, short=False): + if demangle: + if "cxxfilt" not in sys.modules: + import cxxfilt + + cxxfilt = sys.modules["cxxfilt"] + + s = "" + for f in bt: + if demangle and f[3].startswith("_Z"): + sym = cxx_demangle(f[3]) + if short and len(sym) > 100: + sym = "..." + sym[-100:] + + s += f"[{f[1]}+{hex(f[2])}] {sym}\n" + elif f[2] != 0: + s += f"[{f[1]}+{hex(f[2])}] {f[3]}\n" + else: + s += f"[ip={hex(f[0])}]\n" + + return s + def set_regs_for_call_linux_x86(p, args): if len(args) > 6: raise NotImplementedError(">6 args not supported yet") diff --git a/pyda_core/CMakeLists.txt b/pyda_core/CMakeLists.txt index 2d6227d..4155d38 100644 --- a/pyda_core/CMakeLists.txt +++ b/pyda_core/CMakeLists.txt @@ -23,7 +23,7 @@ use_DynamoRIO_extension(tool drcontainers) #use_DynamoRIO_extension(tool drtools) # the standalone python shared library (no dynamorio linked here) -add_library(pyda_core SHARED pyda_core_py.c pyda_core.c) -set_target_properties(pyda_core PROPERTIES PREFIX "") -target_include_directories(pyda_core PRIVATE ${Python3_INCLUDE_DIRS}) -target_link_libraries(pyda_core ${Python3_LIBRARIES} unwind) +# add_library(pyda_core SHARED pyda_core_py.c pyda_core.c) +# set_target_properties(pyda_core PROPERTIES PREFIX "") +# target_include_directories(pyda_core PRIVATE ${Python3_INCLUDE_DIRS}) +# target_link_libraries(pyda_core ${Python3_LIBRARIES} unwind) diff --git a/pyda_core/pyda_core.h b/pyda_core/pyda_core.h index 69141c6..912d3d4 100644 --- a/pyda_core/pyda_core.h +++ b/pyda_core/pyda_core.h @@ -9,11 +9,10 @@ #include #undef Py_BUILD_CORE -#ifdef PYDA_DYNAMORIO_CLIENT #include #include "hashtable.h" #include "drvector.h" -#endif + extern int is_dynamorio_running; typedef struct pyda_hook_s pyda_hook; @@ -87,6 +86,15 @@ struct pyda_thread_s { #endif }; +struct pyda_bt_entry { + char modname[128]; + uint64_t offset; + char sym_name[512]; + uint64_t ip; + uint64_t sp; +}; + + pyda_process* pyda_mk_process(); pyda_thread* pyda_mk_thread(pyda_process*); @@ -129,7 +137,7 @@ void pyda_hook_rununtil_reached(void *pc); int pyda_push_context(pyda_thread *t); int pyda_pop_context(pyda_thread *t); -int pyda_get_backtrace (pyda_thread *t, char *buf, int size); +int pyda_get_backtrace (pyda_thread *t, drvector_t *res); #ifndef PYDA_DYNAMORIO_CLIENT diff --git a/pyda_core/pyda_core_py.c b/pyda_core/pyda_core_py.c index bc2eaad..560029d 100644 --- a/pyda_core/pyda_core_py.c +++ b/pyda_core/pyda_core_py.c @@ -15,6 +15,7 @@ typedef struct { } PydaProcess; static PyObject* pyda_core_process(PyObject *self, PyObject *args, PyObject *kwargs); +static PyObject* pyda_core_free(PyObject *self, PyObject *args, PyObject *kwargs); static PyObject *pyda_list_modules(PyObject *self, PyObject *noarg); static PyObject *pyda_get_base(PyObject *self, PyObject *args); static PyObject *pyda_get_module_for_addr(PyObject *self, PyObject *args); @@ -52,6 +53,8 @@ static PyMethodDef PydaGlobalMethods[] = { "Get module info for addr"}, {"get_current_thread_id", (PyCFunction)pyda_get_current_thread_id, METH_NOARGS, "Get current thread id, numbered from 1"}, + {"free", (PyCFunction)pyda_core_free, METH_KEYWORDS | METH_VARARGS, + "Call into the allocator used by the rest of the tool."}, {NULL, NULL, 0, NULL} /* Sentinel */ }; @@ -187,7 +190,7 @@ static PyMethodDef PydaProcessMethods[] = { {"set_syscall_post_hook", PydaProcess_set_syscall_post_hook, METH_VARARGS, "Register syscall post hook"}, {"push_state", PydaProcess_push_state, METH_VARARGS, "Push register state (thread-local)"}, {"pop_state", PydaProcess_pop_state, METH_VARARGS, "Pop register state (thread-local)"}, - {"backtrace", PydaProcess_backtrace, METH_NOARGS, "Returns backtrace (string)"}, + {"backtrace", PydaProcess_backtrace, METH_NOARGS, "Returns backtrace (array of tuples)"}, {NULL, NULL, 0, NULL} /* Sentinel */ }; @@ -241,6 +244,18 @@ pyda_core_process(PyObject *self, PyObject *args, PyObject *kwargs) { return (PyObject*)result; } +static PyObject * +pyda_core_free(PyObject *self, PyObject *args, PyObject *kwargs) { + unsigned long addr; + if (!PyArg_ParseTuple(args, "K", &addr)) + return NULL; + + free((void*)addr); + + Py_INCREF(Py_None); + return Py_None; +} + static int check_valid_thread(pyda_thread *t) { if (!t) { PyErr_SetString(PyExc_RuntimeError, "Threads created with Python threading APIs cannot use Pyda APIs"); @@ -354,11 +369,31 @@ PydaProcess_backtrace(PyObject* self, PyObject *noarg) { pyda_thread *t = pyda_thread_getspecific(g_pyda_tls_idx); if (check_exited(t)) return NULL; - char *s = malloc(4096); - pyda_get_backtrace(t, s, 4096); - PyObject *ret = PyUnicode_FromString(s); - free(s); - return ret; + drvector_t backtrace; + + int ret = pyda_get_backtrace(t, &backtrace); + + PyObject *list = PyList_New(0); + for (int i=0; iip)); + PyTuple_SetItem(tuple, 1, PyUnicode_FromString(e->modname)); + PyTuple_SetItem(tuple, 2, PyLong_FromLong(e->offset)); + PyTuple_SetItem(tuple, 3, PyUnicode_FromString(e->sym_name)); + PyList_Append(list, tuple); + } + + drvector_delete(&backtrace); + + if (ret) { + Py_DECREF(list); + PyErr_SetString(PyExc_RuntimeError, "Could not generate backtrace"); + return NULL; + } + + return list; } static PyObject * diff --git a/pyda_core/pyda_unwind.c b/pyda_core/pyda_unwind.c index cb070e7..90c05da 100644 --- a/pyda_core/pyda_unwind.c +++ b/pyda_core/pyda_unwind.c @@ -3,7 +3,11 @@ #include #include -int pyda_get_backtrace (pyda_thread *t, char *buf, int size) { +static void free_bt_entry(void *ptr) { + dr_global_free(ptr, sizeof(struct pyda_bt_entry)); +} + +int pyda_get_backtrace (pyda_thread *t, drvector_t *res) { unw_cursor_t cursor; unw_context_t uc; unw_word_t ip, sp; @@ -38,16 +42,19 @@ int pyda_get_backtrace (pyda_thread *t, char *buf, int size) { unw_init_local(&cursor, &uc); - char *bufcur = buf; + drvector_init(res, 0, true, free_bt_entry); + do { - char sym[256]; + struct pyda_bt_entry *e = dr_global_alloc(sizeof(struct pyda_bt_entry)); + + char sym[512]; unw_word_t offset; unw_get_reg(&cursor, UNW_REG_IP, &ip); unw_get_reg(&cursor, UNW_REG_SP, &sp); module_data_t *mod = dr_lookup_module((void*)ip); - int res; + if (mod) { char *modname = strrchr(mod->full_path, '/'); if (modname) { @@ -56,27 +63,25 @@ int pyda_get_backtrace (pyda_thread *t, char *buf, int size) { modname = mod->full_path; } - res = snprintf(bufcur, size, "[%s+0x%lx]\t", modname, (uint64_t)ip - (uint64_t)mod->start); + snprintf(e->modname, sizeof(e->modname), "%s", modname); + e->offset = (uint64_t)ip - (uint64_t)mod->start; } else { - res = snprintf(bufcur, size, "\t"); - } - - if (res > 0 && res <= size) { - bufcur += res; - size -= res; + e->modname[0] = 0; + e->offset = 0; } if (unw_get_proc_name(&cursor, sym, sizeof(sym), &offset) == 0) { - res = snprintf(bufcur, size,"(%s+0x%lx)\tip = %lx, sp = %lx\n", sym, offset, (uint64_t) ip, (uint64_t) sp); + snprintf(e->sym_name, sizeof(e->sym_name), "%s", sym); } else { - res = snprintf(bufcur, size, "\t\tip = %lx, sp = %lx\n", (uint64_t) ip, (uint64_t) sp); + e->sym_name[0] = 0; } - if (res > 0 && res <= size) { - bufcur += res; - size -= res; - } + e->ip = ip; + e->sp = sp; + + drvector_append(res, e); } while (unw_step(&cursor) > 0); return 0; -} \ No newline at end of file +} +