From b053746fd11823c8bd0e42eb336656bf5d245bca Mon Sep 17 00:00:00 2001 From: Andrew Haberlandt Date: Mon, 8 Jul 2024 07:26:52 +0000 Subject: [PATCH] feat: add syscall pre/post hooks This also sets up a default hook that prevents stdio from being closed. --- examples/strace.py | 43 +++++++++++++ lib/pyda/base.py | 4 +- lib/pyda/process.py | 62 +++++++++++++++++- pyda_core/pyda_core.c | 131 ++++++++++++++++++++++++++++++++------- pyda_core/pyda_core.h | 7 ++- pyda_core/pyda_core_py.c | 62 +++++++++++++++++- pyda_core/tool.c | 28 +++++++++ 7 files changed, 308 insertions(+), 29 deletions(-) create mode 100644 examples/strace.py diff --git a/examples/strace.py b/examples/strace.py new file mode 100644 index 0000000..6e6e4f4 --- /dev/null +++ b/examples/strace.py @@ -0,0 +1,43 @@ +from pyda import * +from pwnlib.elf.elf import ELF +from pwnlib.util.packing import u64 +import string +import sys + +p = process() + +e = ELF(p.exe_path) +e.address = p.maps[p.exe_path].base + +plt_map = { e.plt[x]: x for x in e.plt } + +def guess_arg(x): + printable_chars = bytes(string.printable, 'ascii') + + # Is pointer? + if x > 0x100000000: + try: + data = p.read(x, 0x20) + if all([c in printable_chars for c in data[:4]]): + if 0 in data: + return str(data[:data.index(0)]) + else: + return str(data[:20]) + "..." + + except Exception as e: + pass + + return hex(x) + +def syscall_pre_hook(p, num): + print(f"[syscall {num}] (" + ", ".join([ + f"rdi={guess_arg(p.regs.rdi)}", + f"rsi={guess_arg(p.regs.rsi)}", + f"rdx={guess_arg(p.regs.rdx)}", + f"rcx={guess_arg(p.regs.rcx)}", + ]) + ")") + +for snum in range(500): + p.syscall_pre(snum, syscall_pre_hook) + +p.run() \ No newline at end of file diff --git a/lib/pyda/base.py b/lib/pyda/base.py index 62a8200..339a4c1 100644 --- a/lib/pyda/base.py +++ b/lib/pyda/base.py @@ -6,11 +6,11 @@ INIT = False -def process(): +def process(**kwargs): global INIT # todo: remove the bogus argument - proc = Process(pyda_core.process("")) + proc = Process(pyda_core.process(""), **kwargs) if not INIT: # by this point, hacks/ is in pythonpath diff --git a/lib/pyda/process.py b/lib/pyda/process.py index 1779032..e355a1b 100644 --- a/lib/pyda/process.py +++ b/lib/pyda/process.py @@ -3,13 +3,46 @@ import pyda_core class Process(): - def __init__(self, handle): + def __init__(self, handle, prevent_close_stdio=True): self._p = handle self._hooks = {} + self._syscall_pre_hooks = {} + self._syscall_post_hooks = {} + self._registered_syscall_pre_hook = False + self._registered_syscall_post_hook = False + self._has_run = False + + def prevent_close(p, num): + if p.regs.rdi in [0, 1, 2]: + p.regs.rax = 0 + return False # pre-hooks that return False will prevent the syscall from executing + + return None + + if prevent_close_stdio: + self.syscall_pre(3, prevent_close) def _hook_dispatch(self, addr): for h in self._hooks[addr]: h(self) + + def _syscall_pre_hook_dispatch(self, syscall_num): + if syscall_num in self._syscall_pre_hooks: + results = [] + for h in self._syscall_pre_hooks[syscall_num]: + results.append(h(self, syscall_num)) + + if False in results and True in results: + raise RuntimeError("Cannot have mixed return values from syscall pre-hooks") + elif False in results: + return False + elif True in results: + return True + + def _syscall_post_hook_dispatch(self, syscall_num): + if syscall_num in self._syscall_pre_hooks: + for h in self._syscall_pre_hooks[syscall_num]: + h(self, syscall_num) def hook(self, addr, callback): if addr not in self._hooks: @@ -38,6 +71,32 @@ def after_call_hook(p): self.hook(addr, call_hook) + def syscall_pre(self, syscall_num, callback): + if self._has_run: + raise RuntimeError("Cannot add syscall hooks after process has started") + + if not self._registered_syscall_pre_hook: + self._p.set_syscall_pre_hook(lambda p, syscall_num: self._syscall_pre_hook_dispatch(syscall_num)) + self._registered_syscall_pre_hook = True + + if syscall_num not in self._syscall_pre_hooks: + self._syscall_pre_hooks[syscall_num] = [callback] + else: + self._syscall_pre_hooks[syscall_num].append(callback) + + def syscall_post(self, syscall_num, callback): + if self._has_run: + raise RuntimeError("Cannot add syscall hooks after process has started") + + if not self._registered_syscall_post_hook: + self._p.set_syscall_post_hook(lambda p, syscall_num: self._syscall_post_hook_dispatch(syscall_num)) + self._registered_syscall_post_hook = True + + if syscall_num not in self._syscall_post_hooks: + self._syscall_post_hooks[syscall_num] = [callback] + else: + self._syscall_post_hooks[syscall_num].append(callback) + def set_thread_entry(self, callback): self._p.set_thread_init_hook(lambda p: callback(self)) @@ -61,6 +120,7 @@ def __getattr__(self, name): raise AttributeError(f"Invalid attribute '{name}'. Did you mean 'regs.{name}'?") def run(self): + self._has_run = True self._p.run() @property diff --git a/pyda_core/pyda_core.c b/pyda_core/pyda_core.c index d81319c..263f0d8 100644 --- a/pyda_core/pyda_core.c +++ b/pyda_core/pyda_core.c @@ -23,6 +23,8 @@ pyda_process* pyda_mk_process() { proc->main_thread = pyda_mk_thread(proc); proc->callbacks = NULL; proc->thread_init_hook = NULL; + proc->syscall_pre_hook = NULL; + proc->syscall_post_hook = NULL; proc->py_obj = NULL; pthread_condattr_t condattr; @@ -257,6 +259,26 @@ void pyda_set_thread_init_hook(pyda_process *p, PyObject *callback) { Py_INCREF(callback); } +void pyda_set_syscall_pre_hook(pyda_process *p, PyObject *callback) { + // NOTE: GIL is held + + if (p->syscall_pre_hook) + Py_DECREF(p->syscall_pre_hook); + + p->syscall_pre_hook = callback; + Py_INCREF(callback); +} + +void pyda_set_syscall_post_hook(pyda_process *p, PyObject *callback) { + // NOTE: GIL is held + + if (p->syscall_post_hook) + Py_DECREF(p->syscall_post_hook); + + p->syscall_post_hook = callback; + Py_INCREF(callback); +} + int pyda_flush_hooks() { pyda_thread *t = pyda_thread_getspecific(g_pyda_tls_idx); pyda_process *p = t->proc; @@ -284,8 +306,7 @@ pyda_hook* pyda_get_callback(pyda_process *p, void* addr) { return NULL; } -void pyda_hook_cleancall(pyda_hook *cb) { - pyda_thread *t = pyda_thread_getspecific(g_pyda_tls_idx); +static void thread_prepare_for_python_entry(PyGILState_STATE *gstate, pyda_thread *t, void* pc) { if (t->skip_next_hook) { t->skip_next_hook = 0; return; @@ -293,19 +314,66 @@ void pyda_hook_cleancall(pyda_hook *cb) { if (t->errored) return; - PyGILState_STATE gstate; - gstate = PyGILState_Ensure(); + *gstate = PyGILState_Ensure(); void *drcontext = dr_get_current_drcontext(); t->cur_context.size = sizeof(dr_mcontext_t); t->cur_context.flags = DR_MC_ALL; // dr_redirect_execution requires it dr_get_mcontext(drcontext, &t->cur_context); - t->cur_context.pc = (app_pc)cb->addr; + + if (pc) + t->cur_context.pc = (app_pc)pc; + t->rip_updated_in_cleancall = 0; +} + +static void thread_prepare_for_python_return(PyGILState_STATE *gstate, pyda_thread *t, void* hook_addr) { + void *drcontext = dr_get_current_drcontext(); + + // Syscall hooks are not allowed to modify PC + if (!hook_addr) { + if (t->rip_updated_in_cleancall) { + dr_fprintf(STDERR, "\n[Pyda] ERROR: Syscall hooks are not allowed to modify PC. Skipping future hooks.\n"); + dr_flush_file(STDERR); + t->errored = 1; + } + pyda_flush_hooks(); // There is no risk of invalidating the current block here, since we are about to do a syscall + dr_set_mcontext(drcontext, &t->cur_context); + PyGILState_Release(*gstate); + return; + } + + if (t->cur_context.pc == (app_pc)hook_addr && t->rip_updated_in_cleancall) { + if (t->rip_updated_in_cleancall) { + dr_fprintf(STDERR, "\n[Pyda] ERROR: Hook updated RIP to the same address. This is UB. Skipping future hooks.\n"); + dr_flush_file(STDERR); + t->errored = 1; + } + } + + if (pyda_flush_hooks() || t->rip_updated_in_cleancall) { + if (t->cur_context.pc == hook_addr) { + t->skip_next_hook = 1; + } + // we need to call dr_redirect_execution + PyGILState_Release(*gstate); + dr_redirect_execution(&t->cur_context); + } else { + dr_set_mcontext(drcontext, &t->cur_context); + PyGILState_Release(*gstate); + } +} + +void pyda_hook_cleancall(pyda_hook *cb) { + PyGILState_STATE gstate; + pyda_thread *t = pyda_thread_getspecific(g_pyda_tls_idx); + + thread_prepare_for_python_entry(&gstate, t, cb->addr); DEBUG_PRINTF("cleancall %p %p %p\n", cb, cb->py_func, t); PyObject *result = PyObject_CallFunctionObjArgs(cb->py_func, t->proc->py_obj, NULL); + if (result == NULL) { dr_fprintf(STDERR, "\n[Pyda] ERROR: Hook call failed. Skipping future hooks on thread %d\n", t->tid); dr_flush_file(STDERR); @@ -318,27 +386,46 @@ void pyda_hook_cleancall(pyda_hook *cb) { } DEBUG_PRINTF("cleancall ret %p %p %p\n", cb, cb->py_func, t); + thread_prepare_for_python_return(&gstate, t, cb->addr); +} - if (t->cur_context.pc == (app_pc)cb->addr && t->rip_updated_in_cleancall) { - if (t->rip_updated_in_cleancall) { - dr_fprintf(STDERR, "\n[Pyda] Hook updated RIP to the same address. This is UB. Skipping future hooks.\n"); - dr_flush_file(STDERR); - t->errored = 1; - // dr_abort(); - } - } +int pyda_hook_syscall(int syscall_num, int is_pre) { + PyGILState_STATE gstate; + pyda_thread *t = pyda_thread_getspecific(g_pyda_tls_idx); - if (pyda_flush_hooks() || t->rip_updated_in_cleancall) { - if (t->cur_context.pc == cb->addr) { - t->skip_next_hook = 1; - } - // we need to call dr_redirect_execution - PyGILState_Release(gstate); - dr_redirect_execution(&t->cur_context); + PyObject *hook = (is_pre ? t->proc->syscall_pre_hook : t->proc->syscall_post_hook); + if (!hook) return 1; + + thread_prepare_for_python_entry(&gstate, t, NULL); + + DEBUG_PRINTF("syscall %d pre %d\n", syscall_num, is_pre); + + int should_run = 1; + + PyObject *syscall_num_obj = PyLong_FromLong(syscall_num); + PyObject *result = PyObject_CallFunctionObjArgs(hook, t->proc->py_obj, syscall_num_obj, NULL); + + Py_DECREF(syscall_num_obj); + + if (result == NULL) { + dr_fprintf(STDERR, "\n[Pyda] ERROR: Syscall hook call failed. Skipping future hooks on thread %d\n", t->tid); + dr_flush_file(STDERR); + t->errored = 1; + PyErr_Print(); + dr_fprintf(STDERR, "\n"); + } else if (is_pre && PyBool_Check(result)) { + // Should run + should_run = PyObject_IsTrue(result); + DEBUG_PRINTF("syscall pre_hook returned %d\n", should_run); } else { - dr_set_mcontext(drcontext, &t->cur_context); - PyGILState_Release(gstate); + Py_DECREF(result); + DEBUG_PRINTF("syscall hook returned non-bool\n"); } + + DEBUG_PRINTF("syscall ret %d pre %d\n", syscall_num, is_pre); + thread_prepare_for_python_return(&gstate, t, NULL); + + return should_run; } #endif \ No newline at end of file diff --git a/pyda_core/pyda_core.h b/pyda_core/pyda_core.h index e28684b..dd3a909 100644 --- a/pyda_core/pyda_core.h +++ b/pyda_core/pyda_core.h @@ -36,6 +36,9 @@ struct pyda_process_s { pyda_thread *main_thread; PyObject *thread_init_hook; + PyObject *syscall_pre_hook; + PyObject *syscall_post_hook; + PyObject *py_obj; pthread_cond_t thread_exit_cond; @@ -89,12 +92,14 @@ void pyda_initial_break(pyda_thread *t); void pyda_add_hook(pyda_process *p, uint64_t addr, PyObject *callback); void pyda_remove_hook(pyda_process *p, uint64_t addr); void pyda_set_thread_init_hook(pyda_process *p, PyObject *callback); +void pyda_set_syscall_pre_hook(pyda_process *p, PyObject *callback); +void pyda_set_syscall_post_hook(pyda_process *p, PyObject *callback); pyda_hook* pyda_get_callback(pyda_process *p, void* addr); // These can only be called from application threads int pyda_flush_hooks(); void pyda_hook_cleancall(pyda_hook *cb); - +int pyda_hook_syscall(int syscall_num, int is_pre); #ifndef PYDA_DYNAMORIO_CLIENT diff --git a/pyda_core/pyda_core_py.c b/pyda_core/pyda_core_py.c index c15fa6b..747b622 100644 --- a/pyda_core/pyda_core_py.c +++ b/pyda_core/pyda_core_py.c @@ -31,6 +31,9 @@ static PyObject *PydaProcess_set_register(PyObject *self, PyObject *args); static PyObject *PydaProcess_read(PyObject *self, PyObject *args); static PyObject *PydaProcess_write(PyObject *self, PyObject *args); static PyObject *PydaProcess_get_main_module(PyObject *self, PyObject *args); +static PyObject *PydaProcess_set_syscall_filter(PyObject *self, PyObject *args); +static PyObject *PydaProcess_set_syscall_pre_hook(PyObject *self, PyObject *args); +static PyObject *PydaProcess_set_syscall_post_hook(PyObject *self, PyObject *args); static PyMethodDef PydaGlobalMethods[] = { {"process", (PyCFunction)pyda_core_process, METH_KEYWORDS | METH_VARARGS, @@ -84,6 +87,9 @@ static PyMethodDef PydaProcessMethods[] = { {"get_main_module", PydaProcess_get_main_module, METH_VARARGS, "Get name of main module"}, {"read", PydaProcess_read, METH_VARARGS, "Read memory"}, {"write", PydaProcess_write, METH_VARARGS, "Write memory"}, + // {"set_syscall_filter", PydaProcess_set_syscall_filter, METH_VARARGS, "Set list of syscalls to call hooks on"}, + {"set_syscall_pre_hook", PydaProcess_set_syscall_pre_hook, METH_VARARGS, "Register syscall pre hook"}, + {"set_syscall_post_hook", PydaProcess_set_syscall_post_hook, METH_VARARGS, "Register syscall post hook"}, {NULL, NULL, 0, NULL} /* Sentinel */ }; @@ -360,16 +366,66 @@ PydaProcess_set_thread_init_hook(PyObject *self, PyObject *args) { PyErr_SetString(PyExc_RuntimeError, "Callback must take one argument"); return NULL; } - #ifdef PYDA_DYNAMORIO_CLIENT DEBUG_PRINTF("set_thread_init_hook\n"); -#endif // PYDA_DYNAMORIO_CLIENT - Py_INCREF(callback); +#endif + + // note: pyda_set_thread_init_hook calls incref pyda_set_thread_init_hook(p->main_thread->proc, callback); Py_INCREF(Py_None); return Py_None; } +static PyObject * +PydaProcess_set_syscall_pre_hook(PyObject *self, PyObject *args) { + PydaProcess *p = (PydaProcess*)self; + + PyObject *callback; + + if (!PyArg_ParseTuple(args, "O!", &PyFunction_Type, &callback)) + return NULL; + + PyCodeObject *code = (PyCodeObject*)PyFunction_GetCode(callback); + if (!code || code->co_argcount != 2) { + PyErr_SetString(PyExc_RuntimeError, "Callback must take two arguments"); + return NULL; + } + +#ifdef PYDA_DYNAMORIO_CLIENT + DEBUG_PRINTF("set_syscall_pre_hook\n"); +#endif + + // note: pyda_set_syscall_pre_hook calls incref + pyda_set_syscall_pre_hook(p->main_thread->proc, callback); + + Py_INCREF(Py_None); + return Py_None; +} +static PyObject * +PydaProcess_set_syscall_post_hook(PyObject *self, PyObject *args) { + PydaProcess *p = (PydaProcess*)self; + + PyObject *callback; + + if (!PyArg_ParseTuple(args, "O!", &PyFunction_Type, &callback)) + return NULL; + + PyCodeObject *code = (PyCodeObject*)PyFunction_GetCode(callback); + if (!code || code->co_argcount != 2) { + PyErr_SetString(PyExc_RuntimeError, "Callback must take two arguments"); + return NULL; + } + +#ifdef PYDA_DYNAMORIO_CLIENT + DEBUG_PRINTF("set_syscall_post_hook\n"); +#endif + + // note: pyda_set_syscall_pre_hook calls incref + pyda_set_syscall_post_hook(p->main_thread->proc, callback); + + Py_INCREF(Py_None); + return Py_None; +} static PyObject * PydaProcess_unregister_hook(PyObject *self, PyObject *args) { diff --git a/pyda_core/tool.c b/pyda_core/tool.c index 3ae9a01..5202e22 100644 --- a/pyda_core/tool.c +++ b/pyda_core/tool.c @@ -30,6 +30,10 @@ static dr_emit_flags_t event_insert(void *drcontext, void *tag, instrlist_t *bb, instr_t *instr, bool for_trace, bool translating, void *user_data); +static bool filter_syscall_event(void *drcontext, int sysnum); +static bool pre_syscall_event(void *drcontext, int sysnum); +static void post_syscall_event(void *drcontext, int sysnum); + extern int is_dynamorio_running; pthread_cond_t python_thread_init1; @@ -68,6 +72,10 @@ dr_client_main(client_id_t id, int argc, const char *argv[]) drmgr_register_bb_instrumentation_event(event_analysis, event_insert, NULL); + + drmgr_register_pre_syscall_event(pre_syscall_event); + drmgr_register_post_syscall_event(post_syscall_event); + dr_register_filter_syscall_event(filter_syscall_event); pthread_cond_init(&python_thread_init1, 0); @@ -220,6 +228,26 @@ event_insert(void *drcontext, void *tag, instrlist_t *bb, instr_t *instr, return DR_EMIT_DEFAULT; } +static bool filter_syscall_event(void *drcontext, int sysnum) { + // TODO: Check the list of desired syscalls + return true; +} + +static bool pre_syscall_event(void *drcontext, int sysnum) { + if (drcontext != dr_get_current_drcontext()) { + DEBUG_PRINTF("pre_syscall_event: drcontext mismatch\n"); + return true; + } + return pyda_hook_syscall(sysnum, 1); +} + +static void post_syscall_event(void *drcontext, int sysnum) { + if (drcontext != dr_get_current_drcontext()) { + DEBUG_PRINTF("post_syscall_event: drcontext mismatch\n"); + } + pyda_hook_syscall(sysnum, 0); +} + static void thread_entrypoint_break() { DEBUG_PRINTF("entrypoint (break)\n");