diff --git a/lib/pyda/process.py b/lib/pyda/process.py index 0c6a7af..0a7544d 100644 --- a/lib/pyda/process.py +++ b/lib/pyda/process.py @@ -1,26 +1,21 @@ from collections import namedtuple from dataclasses import dataclass +from .tube import ProcessTube import pyda_core -class Process(): +class Process(ProcessTube): def __init__(self, handle, prevent_close_stdio=True): self._p = handle + + fds = self._p.capture_io() + super().__init__(fds[0], fds[1]) + self._hooks = {} self._syscall_pre_hooks = {} self._syscall_post_hooks = {} self._registered_syscall_pre_hook = False self._registered_syscall_post_hook = False self._has_run = False - - def prevent_close(p, num): - if p.regs.rdi in [0, 1, 2]: - p.regs.rax = 0 - return False # pre-hooks that return False will prevent the syscall from executing - - return None - - if prevent_close_stdio: - self.syscall_pre(3, prevent_close) def _hook_dispatch(self, addr): for h in self._hooks[addr]: diff --git a/lib/pyda/tube.py b/lib/pyda/tube.py new file mode 100644 index 0000000..d1c694e --- /dev/null +++ b/lib/pyda/tube.py @@ -0,0 +1,118 @@ +from pwnlib.tubes.tube import tube +import os +import pyda_core +import errno + +# todo +class ProcessTube(tube): + def __init__(self, stdin_fd, stdout_fd, **kwargs): + super(ProcessTube, self).__init__(**kwargs) + + self.closed = {"recv": False, "send": False} + + self.stdin_fd = stdin_fd + self.stdout_fd = stdout_fd + + # Overwritten for better usability + def recvall(self, timeout = None): + """recvall() -> str + + Receives data until the socket is closed. + """ + # todo + raise NotImplementedError("recvall() not implemented") + + def recv_raw(self, numb, *a): + if self.closed["recv"]: + raise EOFError + + if len(a) > 0: + raise NotImplementedError("recv_raw() with flags not implemented") + + while True: + try: + data = os.read(self.stdout_fd, numb) + break + except IOError as e: + if e.errno == errno.EAGAIN: + # If we're waiting for data, let the program continue + try: + self._p.run_until_io() + continue + except Exception as e: + raise EOFError + + if e.errno == errno.ETIMEDOUT or 'timed out' in e.strerror: + return None + elif e.errno in (errno.ECONNREFUSED, errno.ECONNRESET): + self.shutdown("recv") + raise EOFError + elif e.errno == errno.EINTR: + continue + else: + raise + + if not data: + self.shutdown("recv") + raise EOFError + + return data + + # TODO: What happens when the pipe fills? This call + # will indefinitely block? + def send_raw(self, data): + if self.closed["send"]: + raise EOFError + + try: + os.write(self.stdin_fd, data) + except IOError as e: + eof_numbers = (errno.EPIPE, errno.ECONNRESET, errno.ECONNREFUSED) + if e.errno in eof_numbers or 'Socket is closed' in e.args: + self.shutdown("send") + raise EOFError + else: + raise + + def settimeout_raw(self, timeout): + raise NotImplementedError("settimeout_raw() not implemented") + + def can_recv_raw(self, timeout): + if self.closed["recv"]: + return False + + try: + if timeout is None: + return select.select([self.stdout_fd], [], []) == ([self.stdout_fd], [], []) + + return select.select([self.stdout_fd], [], [], timeout) == ([self.stdout_fd], [], []) + except ValueError: + # Not sure why this isn't caught when testing self.proc.stdout.closed, + # but it's not. + # + # File "/home/user/pwntools/pwnlib/tubes/process.py", line 112, in can_recv_raw + # return select.select([self.proc.stdout], [], [], timeout) == ([self.proc.stdout], [], []) + # ValueError: I/O operation on closed file + raise EOFError + except select.error as v: + if v.args[0] == errno.EINTR: + return False + + def connected_raw(self, direction): + return True + + def close(self): + pass + + def _close_msg(self): + self.info('Closed pyda socket') + + def fileno(self): + self.error("fileno() not implemented") + return None + + def shutdown_raw(self, direction): + pass + + def interactive(self): + self.error("interactive() is not currently supported.") diff --git a/patches/cpython-3.10.12.patch b/patches/cpython-3.10.12.patch index a7706cf..49438c2 100644 --- a/patches/cpython-3.10.12.patch +++ b/patches/cpython-3.10.12.patch @@ -11,6 +11,22 @@ index 8d2221cfd8..1372cbc291 100644 #endif #if defined(FAULTHANDLER_USE_ALT_STACK) && defined(HAVE_LINUX_AUXVEC_H) && defined(HAVE_SYS_AUXV_H) +diff --git a/Python/errors.c b/Python/errors.c +index bc1b55e440..78e704e332 100644 +--- a/Python/errors.c ++++ b/Python/errors.c +@@ -698,10 +698,10 @@ PyErr_SetFromErrnoWithFilenameObject(PyObject *exc, PyObject *filenameObject) + PyObject * + PyErr_SetFromErrnoWithFilenameObjects(PyObject *exc, PyObject *filenameObject, PyObject *filenameObject2) + { ++ int i = errno; + PyThreadState *tstate = _PyThreadState_GET(); + PyObject *message; + PyObject *v, *args; +- int i = errno; + #ifdef MS_WINDOWS + WCHAR *s_buf = NULL; + #endif /* Unix/Windows */ diff --git a/Python/thread_pthread.h b/Python/thread_pthread.h index 35b9810aa3..130528aec3 100644 --- a/Python/thread_pthread.h diff --git a/patches/dynamorio-10.0.patch b/patches/dynamorio-10.0.patch index a19f8d3..7f0332f 100644 --- a/patches/dynamorio-10.0.patch +++ b/patches/dynamorio-10.0.patch @@ -260,7 +260,7 @@ index f5eef1e5c..91f5a16bd 100644 /* grab all_threads_synch_lock */ /* since all_threads synch doesn't give any permissions this is necessary diff --git a/core/unix/loader.c b/core/unix/loader.c -index 10c4518b0..1a07ce582 100644 +index 10c4518b0..b27284a8e 100644 --- a/core/unix/loader.c +++ b/core/unix/loader.c @@ -158,7 +158,7 @@ privload_locate_and_load(const char *impname, privmod_t *dependent, bool reachab @@ -352,6 +352,17 @@ index 10c4518b0..1a07ce582 100644 { "free", (app_pc)redirect_free }, { "realloc", (app_pc)redirect_realloc }, { "strdup", (app_pc)redirect_strdup }, +@@ -1533,8 +1540,8 @@ static const redirect_import_t redirect_imports[] = { + /* These libc routines can call pthread functions and cause hangs (i#4928) so + * we use our syscall wrappers instead. + */ +- { "read", (app_pc)os_read }, +- { "write", (app_pc)os_write }, ++ /* { "read", (app_pc)os_read }, */ ++ /* { "write", (app_pc)os_write }, */ + #if defined(LINUX) && !defined(ANDROID) + { "__tls_get_addr", (app_pc)redirect___tls_get_addr }, + { "___tls_get_addr", (app_pc)redirect____tls_get_addr }, @@ -1548,6 +1555,7 @@ static const redirect_import_t redirect_imports[] = { { "__gnu_Unwind_Find_exidx", (app_pc)redirect___gnu_Unwind_Find_exidx }, # endif @@ -360,9 +371,11 @@ index 10c4518b0..1a07ce582 100644 { "dlsym", (app_pc)redirect_dlsym }, /* We need these for clients that don't use libc (i#1747) */ { "strlen", (app_pc)strlen }, -@@ -1570,6 +1578,10 @@ static const redirect_import_t redirect_imports[] = { +@@ -1569,7 +1577,12 @@ static const redirect_import_t redirect_imports[] = { + { "memset_chk", (app_pc)memset }, { "memmove_chk", (app_pc)memmove }, { "strncpy_chk", (app_pc)strncpy }, ++ /* { "__errno_location", (app_pc)__errno_location } */ }; + +DR_API redirect_import_t *client_redirect_imports = NULL; @@ -371,7 +384,7 @@ index 10c4518b0..1a07ce582 100644 #define REDIRECT_IMPORTS_NUM (sizeof(redirect_imports) / sizeof(redirect_imports[0])) #ifdef DEBUG -@@ -1599,6 +1611,15 @@ privload_redirect_sym(os_privmod_data_t *opd, ptr_uint_t *r_addr, const char *na +@@ -1599,6 +1612,15 @@ privload_redirect_sym(os_privmod_data_t *opd, ptr_uint_t *r_addr, const char *na } } #endif diff --git a/pyda_core/pyda_core.c b/pyda_core/pyda_core.c index d4d3ce4..f78cd2c 100644 --- a/pyda_core/pyda_core.c +++ b/pyda_core/pyda_core.c @@ -2,6 +2,8 @@ #include "pyda_core.h" #include "pyda_threads.h" #include "util.h" +#include + #ifndef PYDA_DYNAMORIO_CLIENT @@ -35,6 +37,7 @@ pyda_process* pyda_mk_process() { proc->syscall_post_hook = NULL; proc->py_obj = NULL; + // Setup locks, etc. pthread_condattr_t condattr; int ret; if (ret = pthread_condattr_setpshared(&condattr, PTHREAD_PROCESS_SHARED)) { @@ -54,9 +57,50 @@ pyda_process* pyda_mk_process() { dr_fprintf(STDERR, "pthread_mutex_init failed %d\n", ret); dr_abort(); } + + // Setup I/O + proc->stdin_fd = -1; + proc->stdout_fd = -1; + proc->stderr_fd = -1; + + // TODO: also need to modify dynamorio printing functions + // as they use raw fd 0/1/2 return proc; } +extern file_t our_stderr; +void pyda_capture_io(pyda_process *proc) { + int orig_in = dup(0); + int orig_out = dup(1); + int orig_err = dup(2); + + int pipe1[2], pipe2[2], pipe3[2]; + if (pipe(pipe1) || pipe(pipe2) || pipe(pipe3)) { + dr_fprintf(STDERR, "Failed to create pipes\n"); + dr_abort(); + } + + dup2(pipe1[0], 0); + dup2(pipe2[1], 1); + dup2(pipe3[1], 2); + + stdin = fdopen(orig_in, "r"); + stdout = fdopen(orig_out, "w"); + stderr = fdopen(orig_err, "w"); + + proc->stdin_fd = pipe1[1]; + proc->stdout_fd = pipe2[0]; + proc->stderr_fd = pipe3[0]; + + our_stderr = orig_err; + + // nonblocking + if (fcntl(proc->stdout_fd, F_SETFL, O_NONBLOCK) || fcntl(proc->stderr_fd, F_SETFL, O_NONBLOCK)) { + dr_fprintf(STDERR, "Failed to set stdout to nonblocking\n"); + dr_abort(); + } +} + pyda_thread* pyda_mk_thread(pyda_process *proc) { ABORT_IF_NODYNAMORIO; @@ -100,6 +144,7 @@ pyda_thread* pyda_mk_thread(pyda_process *proc) { thread->rip_updated_in_cleancall = 0; thread->skip_next_hook = 0; thread->python_exited = 0; + thread->app_exited = 0; thread->errored = 0; thread->python_blocked_on_io = 0; diff --git a/pyda_core/pyda_core.h b/pyda_core/pyda_core.h index 83d8855..5ed81c6 100644 --- a/pyda_core/pyda_core.h +++ b/pyda_core/pyda_core.h @@ -67,6 +67,7 @@ struct pyda_thread_s { int skip_next_hook; int python_exited; + int app_exited; int errored; int yield_count; @@ -79,6 +80,8 @@ struct pyda_thread_s { pyda_process* pyda_mk_process(); pyda_thread* pyda_mk_thread(pyda_process*); +void pyda_capture_io(pyda_process *p); + void pyda_process_destroy(pyda_process *p); void pyda_thread_destroy(pyda_thread *t); void pyda_thread_destroy_last(pyda_thread *t); diff --git a/pyda_core/pyda_core_py.c b/pyda_core/pyda_core_py.c index 9ee563e..3bdc868 100644 --- a/pyda_core/pyda_core_py.c +++ b/pyda_core/pyda_core_py.c @@ -26,7 +26,8 @@ static void PydaProcess_dealloc(PydaProcess *self); static PyObject *PydaProcess_run(PyObject *self, PyObject *noarg); static PyObject *PydaProcess_run_until_io(PyObject *self, PyObject *noarg); static PyObject *PydaProcess_run_until_pc(PyObject *self, PyObject *arg); -static PyObject *PydaProcess_get_io_fds(PyObject *self, PyObject *noarg); +static PyObject *PydaProcess_exited(PyObject *self, PyObject *noarg); +static PyObject *PydaProcess_capture_io(PyObject *self, PyObject *noarg); static PyObject *PydaProcess_register_hook(PyObject *self, PyObject *args); static PyObject *PydaProcess_unregister_hook(PyObject *self, PyObject *args); static PyObject *PydaProcess_set_thread_init_hook(PyObject *self, PyObject *args); @@ -149,7 +150,7 @@ static PyMethodDef PydaProcessMethods[] = { {"run", PydaProcess_run, METH_NOARGS, "Run"}, {"run_until_pc", PydaProcess_run_until_pc, METH_VARARGS, "Run until PC is reached"}, {"run_until_io", PydaProcess_run_until_io, METH_NOARGS, "Run until IO syscall"}, - {"get_io_fds", PydaProcess_get_io_fds, METH_NOARGS, "Get IO fds"}, + {"capture_io", PydaProcess_capture_io, METH_NOARGS, "Capture IO -- returns IO fds"}, {"register_hook", PydaProcess_register_hook, METH_VARARGS, "Register a hook"}, {"unregister_hook", PydaProcess_unregister_hook, METH_VARARGS, "Un-register a hook"}, {"set_thread_init_hook", PydaProcess_set_thread_init_hook, METH_VARARGS, "Register thread init hook"}, @@ -158,6 +159,7 @@ static PyMethodDef PydaProcessMethods[] = { {"get_main_module", PydaProcess_get_main_module, METH_VARARGS, "Get name of main module"}, {"read", PydaProcess_read, METH_VARARGS, "Read memory"}, {"write", PydaProcess_write, METH_VARARGS, "Write memory"}, + {"exited", PydaProcess_exited, METH_NOARGS, "Check if thread has exited"}, // {"set_syscall_filter", PydaProcess_set_syscall_filter, METH_VARARGS, "Set list of syscalls to call hooks on"}, {"set_syscall_pre_hook", PydaProcess_set_syscall_pre_hook, METH_VARARGS, "Register syscall pre hook"}, {"set_syscall_post_hook", PydaProcess_set_syscall_post_hook, METH_VARARGS, "Register syscall post hook"}, @@ -214,9 +216,18 @@ pyda_core_process(PyObject *self, PyObject *args, PyObject *kwargs) { return (PyObject*)result; } +static int check_exited(pyda_thread *t) { + if (t->app_exited) { + PyErr_SetString(PyExc_RuntimeError, "Thread has already exited; cannot be resumed"); + return 1; + } + return 0; +} + static PyObject * PydaProcess_run(PyObject* self, PyObject *noarg) { pyda_thread *t = pyda_thread_getspecific(g_pyda_tls_idx); + if (check_exited(t)) return NULL; Py_BEGIN_ALLOW_THREADS pyda_yield(t); @@ -232,8 +243,11 @@ PydaProcess_run(PyObject* self, PyObject *noarg) { static PyObject * PydaProcess_run_until_io(PyObject* self, PyObject *noarg) { pyda_thread *t = pyda_thread_getspecific(g_pyda_tls_idx); + if (check_exited(t)) return NULL; + t->python_blocked_on_io = 1; + // todo: assert that this thread is like, actually blocked Py_BEGIN_ALLOW_THREADS @@ -248,10 +262,15 @@ PydaProcess_run_until_io(PyObject* self, PyObject *noarg) { } static PyObject * -PydaProcess_get_io_fds(PyObject* self, PyObject *noarg) { +PydaProcess_capture_io(PyObject* self, PyObject *noarg) { PydaProcess *p = (PydaProcess*)self; pyda_process *proc = p->main_thread->proc; + if (proc->stdin_fd == -1) { + PyErr_SetString(PyExc_RuntimeError, "IO is not captured"); + return NULL; + } + PyObject *list = PyList_New(0); PyList_Append(list, PyLong_FromLong(proc->stdin_fd)); PyList_Append(list, PyLong_FromLong(proc->stdout_fd)); @@ -266,6 +285,7 @@ PydaProcess_run_until_pc(PyObject* self, PyObject *args) { return NULL; pyda_thread *t = pyda_thread_getspecific(g_pyda_tls_idx); + if (check_exited(t)) return NULL; t->python_blocked_on_io = 1; unsigned long addr; @@ -285,6 +305,17 @@ PydaProcess_run_until_pc(PyObject* self, PyObject *args) { return Py_None; } +static PyObject * +PydaProcess_exited(PyObject* self, PyObject *noarg) { + pyda_thread *t = pyda_thread_getspecific(g_pyda_tls_idx); + if (t->app_exited) { + Py_INCREF(Py_True); + return Py_True; + } + Py_INCREF(Py_False); + return Py_False; +} + static PyObject * pyda_list_modules(PyObject* self, PyObject *noarg) { #ifdef PYDA_DYNAMORIO_CLIENT diff --git a/pyda_core/pyda_patch_python.c b/pyda_core/pyda_patch_python.c index c868220..52d95e3 100644 --- a/pyda_core/pyda_patch_python.c +++ b/pyda_core/pyda_patch_python.c @@ -17,6 +17,7 @@ static redirect_import_t python_redirect_imports[] = { { "pthread_cond_timedwait", (app_pc)pyda_cond_timedwait }, { "pthread_cond_signal", (app_pc)pyda_cond_signal }, { "pthread_mutex_init", (app_pc)pyda_mutex_init }, + { "pthread_self", (app_pc)pyda_thread_self }, { "dlopen", (app_pc)pyda_dlopen }, { "dlsym", (app_pc)pyda_dlsym }, }; diff --git a/pyda_core/pyda_threads.c b/pyda_core/pyda_threads.c index 7b3a795..0f57bc5 100644 --- a/pyda_core/pyda_threads.c +++ b/pyda_core/pyda_threads.c @@ -102,4 +102,14 @@ int pyda_mutex_init(pthread_mutex_t *mutex, const pthread_mutexattr_t *attr) { } return res; +} + +int pyda_thread_self() { + // XXX: We *could* try to return our pyda-specific tid -- but there + // are technically two threads with that tid!! (Python and App). + // If we returned the same ID for two python threads, + // it seems likely it would break things. + // + // Instead, we are just going to return the dynamorio thread id + return dr_get_thread_id(dr_get_current_drcontext()); } \ No newline at end of file diff --git a/pyda_core/pyda_threads.h b/pyda_core/pyda_threads.h index 719fe0d..e30f9b8 100644 --- a/pyda_core/pyda_threads.h +++ b/pyda_core/pyda_threads.h @@ -13,3 +13,4 @@ int pyda_mutex_init(pthread_mutex_t *mutex, const pthread_mutexattr_t *attr); void* pyda_dlopen(const char *filename, int flag); void* pyda_dlsym(void *handle, const char *symbol); +void* pyda_thread_self(); diff --git a/pyda_core/tool.c b/pyda_core/tool.c index c9e4ebc..beca813 100644 --- a/pyda_core/tool.c +++ b/pyda_core/tool.c @@ -93,6 +93,10 @@ void thread_init_event(void *drcontext) { pyda_thread *t; if (!global_proc) { global_proc = pyda_mk_process(); + + if (!getenv("PYDA_NO_CAPTURE") || getenv("PYDA_NO_CAPTURE")[0] != '1') + pyda_capture_io(global_proc); + t = global_proc->main_thread; } else { t = pyda_mk_thread(global_proc); @@ -138,6 +142,7 @@ void thread_exit_event(void *drcontext) { pyda_thread *t = drmgr_get_tls_field(drcontext, g_pyda_tls_idx); DEBUG_PRINTF("thread_exit_event: %p thread id %d\n", t, dr_get_thread_id(drcontext)); + t->app_exited = 1; if (t->proc->main_thread == t) { pyda_break_noblock(t); @@ -308,6 +313,11 @@ void python_main_thread(void *arg) { goto python_exit; } + // The thread will be holding the lock until + // it reaches the "initial" breakpoint + pthread_mutex_lock(&t->mutex); + pthread_mutex_unlock(&t->mutex); + if (PyRun_SimpleFile(f, script_name) == -1) { // python exception } @@ -323,7 +333,7 @@ void python_main_thread(void *arg) { DEBUG_PRINTF("After script exit, GIL status %d\n", PyGILState_Check()); PyEval_SaveThread(); // release GIL - if (t->yield_count == 0) { + if (!t->app_exited) { dr_fprintf(STDERR, "[Pyda] ERROR: Did you forget to call p.run()?\n"); pyda_yield(t); // unblock (note: blocking) DEBUG_PRINTF("Implicit pyda_yield finished\n"); diff --git a/tests/run_tests.py b/tests/run_tests.py index cd9cb16..6b1f92d 100644 --- a/tests/run_tests.py +++ b/tests/run_tests.py @@ -106,6 +106,17 @@ def no_warnings_or_errors(stdout: bytes, stderr: bytes) -> bool: no_warnings_or_errors, lambda o, e: o.count(b"success") == 1, ] + )), + + # test "blocking" I/O + ("test_io", "test_io.c", "test_io.py", ExpectedResult( + retcode=0, + checkers=[ + output_checker, + no_warnings_or_errors, + lambda o, e: o.count(b"hello") == 0, + lambda o, e: o.count(b"pass\n") == 1, + ] )) ] diff --git a/tests/test_io.c b/tests/test_io.c new file mode 100644 index 0000000..de11cdc --- /dev/null +++ b/tests/test_io.c @@ -0,0 +1,16 @@ +#include +int main() { + setvbuf(stdin, NULL, _IONBF, 0); + setvbuf(stdout, NULL, _IONBF, 0); + setvbuf(stderr, NULL, _IONBF, 0); + printf("hello there. please enter your name:\n"); + char name[100]; + scanf("%s", name); + printf("hello, %s\n", name); + + int age; + printf("please enter your age:\n"); + scanf("%d", &age); + printf("hello %s, you are %d years old\n", name, age); + printf("goodbye\n"); +} \ No newline at end of file diff --git a/tests/test_io.py b/tests/test_io.py new file mode 100644 index 0000000..ec41303 --- /dev/null +++ b/tests/test_io.py @@ -0,0 +1,25 @@ +from pyda import * +from pwnlib.elf.elf import ELF +from pwnlib.util.packing import u64 + +p = process() + +e = ELF(p.exe_path) +e.address = p.maps[p.exe_path].base + +def main_hook(p): + print(f"at main, rsp={hex(p.regs.rsp)}") + return_addr = p.read(p.regs.rsp, 8) + print(f"return address: {hex(u64(return_addr))}") + +p.hook(e.symbols["main"], main_hook) + +p.recvuntil(b"please enter your name:") +p.sendline("andrew") +p.recvuntil("please enter your age:") +p.sendline("21") +p.recvline() +line = p.recvline() +assert line == b"hello andrew, you are 21 years old\n", line +p.run() +print("pass") \ No newline at end of file