From aebfaf60b6efc42b0b5de1ab73e6f42dfaa1e221 Mon Sep 17 00:00:00 2001 From: Matthew Francis-Landau Date: Sat, 30 Jul 2016 16:09:55 -0700 Subject: [PATCH] some issue with the forward branch inlining, not able to pin down the problem at the moment but it happens when running ipython and disabling it gets past at least this issue --- crash.org | 149 +++++++++++++++++++++++++++++++++++++++++ src/config.h | 13 +++- src/jit_internal.h | 7 ++ src/manager.cc | 7 +- src/simple_compiler.cc | 9 ++- src/tracer.cc | 135 +++++++++++++------------------------ src/tracer.h | 2 + tools/biset.py | 67 ++++++++++++++++++ tools/gdb-helper.py | 15 +++-- 9 files changed, 306 insertions(+), 98 deletions(-) create mode 100644 crash.org create mode 100644 tools/biset.py diff --git a/crash.org b/crash.org new file mode 100644 index 0000000..9a2c1ca --- /dev/null +++ b/crash.org @@ -0,0 +1,149 @@ +* caused by the forward prediction +==> 0x007fffe62cc6e8 +[ 16105270 2850 0x000000004416c0] push %rbp 55 PyInt_AsSsize_t +[ 16105271 2851 0x000000004416c1] push %rbx 53 PyInt_AsSsize_t +[ 16105272 2852 0x000000004416c2] sub $0x8, %rsp 4883ec08 PyInt_AsSsize_t +[ 16105273 2853 0x000000004416c6] test %rdi, %rdi 4885ff PyInt_AsSsize_t +[ 16105274 2854 0x000000004416c9] jz 0x4417d0 0f8401010000 PyInt_AsSsize_t +[ 16105275 2855 0x000000004416cf] mov 0x8(%rdi), %rdx 488b5708 PyInt_AsSsize_t +[ 16105276 2856 0x000000004416d3] mov 0xa8(%rdx), %rax 488b82a8000000 PyInt_AsSsize_t +[ 16105277 2857 0x000000004416da] test $0x800000, %eax a900008000 PyInt_AsSsize_t +[ 16105278 2858 0x000000004416df] jnz 0x441770 0f858b000000 PyInt_AsSsize_t +[ 16105279 2859 0x000000004416e5] test $0x1000000, %eax a900000001 PyInt_AsSsize_t +[ 16105280 2860 0x000000004416ea] jnz 0x441760 7574 PyInt_AsSsize_t +[ 16105281 2861 0x000000004416ec] mov 0x60(%rdx), %rax 488b4260 PyInt_AsSsize_t +[ 16105282 2862 0x000000004416f0] test %rax, %rax 4885c0 PyInt_AsSsize_t +[ *16105283 2863 0x000000004416f3] jz 0x4417d0 0f84d7000000 PyInt_AsSsize_t +[ 16105284 2864 0x000000004416f9] mov 0x90(%rax), %rax 488b8090000000 PyInt_AsSsize_t +[ 16105285 2865 0x00000000441700] test %rax, %rax 4885c0 PyInt_AsSsize_t +[ 16105286 2866 0x00000000441703] jz 0x4417d0 0f84c7000000 PyInt_AsSsize_t +[ 16105287 2867 0x00000000441709] call *%rax ffd0 PyInt_AsSsize_t +==> 0x007fffe62cc6f0 +[ 16105288 2868 0x000000004416cf] mov 0x8(%rdi), %rdx 488b5708 PyInt_AsSsize_t +[ 16105289 2869 0x000000004416d3] mov 0xa8(%rdx), %rax 488b82a8000000 PyInt_AsSsize_t +[ 16105290 2870 0x000000004416da] test $0x800000, %eax a900008000 PyInt_AsSsize_t +[ 16105291 2871 0x000000004416df] jnz 0x441770 (take this branch) 0f858b000000 PyInt_AsSsize_t +[ 16105292 2872 0x000000004416e5] test $0x1000000, %eax a900000001 PyInt_AsSsize_t +[ 16105293 2873 0x000000004416ea] jnz 0x441760 7574 PyInt_AsSsize_t +[ 16105294 2874 0x000000004416ec] mov 0x60(%rdx), %rax 488b4260 PyInt_AsSsize_t +[ 16105295 2875 0x000000004416f0] test %rax, %rax 4885c0 PyInt_AsSsize_t +[ 16105296 2876 0x000000004416f3] jz 0x4417d0 0f84d7000000 PyInt_AsSsize_t +[ 16105297 2877 0x000000004416f9] mov 0x90(%rax), %rax 488b8090000000 PyInt_AsSsize_t +[ 16105298 2878 0x00000000441700] test %rax, %rax 4885c0 PyInt_AsSsize_t +[ 16105299 2879 0x00000000441703] jz 0x4417d0 0f84c7000000 PyInt_AsSsize_t +[ 16105300 2880 0x00000000441709] call *%rax ffd0 PyInt_AsSsize_t +==> 0x007fffe62cc6ff +[ 16105301 2881 0x00000000441770] mov 0x10(%rdi), %rbp 488b6f10 PyInt_AsSsize_t +[ 16105302 2882 0x00000000441774] add $0x8, %rsp 4883c408 PyInt_AsSsize_t +[ 16105303 2883 0x00000000441778] pop %rbx 5b PyInt_AsSsize_t +[ 16105304 2884 0x00000000441779] mov %rbp, %rax 4889e8 PyInt_AsSsize_t +[ 16105305 2885 0x0000000044177c] pop %rbp 5d PyInt_AsSsize_t +[ 16105306 2886 0x0000000044177d] ret c3 PyInt_AsSsize_t +==> 0x007fffe62cc715 +[ 16105307 2887 0x0000000041fe38] cmp $0xffffffffffffffff, %rax 4883f8ff PyNumber_AsSsize_t +[ 16105308 2888 0x0000000041fe3c] mov %rax, %r12 4989c4 PyNumber_AsSsize_t +[ 16105309 2889 0x0000000041fe3f] jz 0x41feb0 746f PyNumber_AsSsize_t +[ 16105310 2890 0x0000000041fe41] call 0x413b30 e8ea3cffff PyNumber_AsSsize_t +==> 0x007fffe62cc726 + +disabling the forward predictor prevents this crash from happening + + +What the branches looks like full +==> 0x007fffe62cc6e8 +[ 16105270 2850 0x000000004416c0] push %rbp 55 PyInt_AsSsize_t +[ 16105271 2851 0x000000004416c1] push %rbx 53 PyInt_AsSsize_t +[ 16105272 2852 0x000000004416c2] sub $0x8, %rsp 4883ec08 PyInt_AsSsize_t +[ 16105273 2853 0x000000004416c6] test %rdi, %rdi 4885ff PyInt_AsSsize_t +[ 16105274 2854 0x000000004416c9] jz xxxxxxxx 0f8401010000 PyInt_AsSsize_t +[ 16105288 2868 0x000000004416cf] mov 0x8(%rdi), %rdx 488b5708 PyInt_AsSsize_t +[ 16105289 2869 0x000000004416d3] mov 0xa8(%rdx), %rax 488b82a8000000 PyInt_AsSsize_t +[ 16105290 2870 0x000000004416da] test $0x800000, %eax a900008000 PyInt_AsSsize_t +[ 16105291 2871 0x000000004416df] j z xxxxxxxx (rewritten branch) 0f858b000000 PyInt_AsSsize_t +[ 16105301 2881 0x00000000441770] mov 0x10(%rdi), %rbp 488b6f10 PyInt_AsSsize_t +[ 16105302 2882 0x00000000441774] add $0x8, %rsp 4883c408 PyInt_AsSsize_t +[ 16105303 2883 0x00000000441778] pop %rbx 5b PyInt_AsSsize_t +[ 16105304 2884 0x00000000441779] mov %rbp, %rax 4889e8 PyInt_AsSsize_t +[ 16105305 2885 0x0000000044177c] pop %rbp 5d PyInt_AsSsize_t +[ 16105306 2886 0x0000000044177d] ret (add $8, %rsp) c3 PyInt_AsSsize_t + + +with the abort +[ 16105270 2850 0x000000004416c0] push %rbp 55 PyInt_AsSsize_t +[ 16105271 2851 0x000000004416c1] push %rbx 53 PyInt_AsSsize_t +[ 16105272 2852 0x000000004416c2] sub $0x8, %rsp 4883ec08 PyInt_AsSsize_t +[ 16105273 2853 0x000000004416c6] test %rdi, %rdi 4885ff PyInt_AsSsize_t + jmp xxxxxxxxxx (resume normal program) + + + +* reduced the amount that the forward prediction will go: +==> 0x007fffe62cc31a +[ 13488446 2145 0x0000000047331e] mov 0x10(%rbx), %rax 488b4310 lib=/home/matthew/developer/cpython/python +[ 13488447 2146 0x00000000473322] cmp %rbp, %rax 4839e8 lib=/home/matthew/developer/cpython/python +[ 13488448 2147 0x00000000473325] jle 0x473490 0f8e65010000 lib=/home/matthew/developer/cpython/python +==> 0x007fffe62cc323 +[ 13488449 2148 0x0000000047332b] movzx 0x24(%rbx,%rbp), %eax 0fb6442b24 lib=/home/matthew/developer/cpython/python +[ 13488450 2149 0x00000000473330] mov %al, 0x28(%rsp) 88442428 lib=/home/matthew/developer/cpython/python +[ 13488451 2150 0x00000000473334] mov 0x7dfba0(%rax,8), %rax 488b04c5a0fb7d00 lib=/home/matthew/developer/cpython/python +[ 13488452 2151 0x0000000047333c] test %rax, %rax 4885c0 lib=/home/matthew/developer/cpython/python +[ 13488453 2152 0x0000000047333f] jz 0x4734f5 0f84b0010000 lib=/home/matthew/developer/cpython/python +==> 0x007fffe62cc330 +[ 13488454 2153 0x00000000473345] add $0x1, (%rax) 48830001 lib=/home/matthew/developer/cpython/python +[ 13488455 2154 0x00000000473349] add $0x38, %rsp 4883c438 lib=/home/matthew/developer/cpython/python +[ 13488456 2155 0x0000000047334d] pop %rbx 5b lib=/home/matthew/developer/cpython/python +[ 13488457 2156 0x0000000047334e] pop %rbp 5d lib=/home/matthew/developer/cpython/python +[ *13488458 2157 0x0000000047334f] ret c3 lib=/home/matthew/developer/cpython/python +==> 0x007fffe62cc34a +[ 13488459 2158 0x000000004b21f4] mov %rax, 0x20(%rsp) 4889442420 PyEval_EvalFrameEx +[ 13488460 2159 0x000000004b21f9] call 0x413b30 e83219f6ff PyEval_EvalFrameEx +==> 0x007fffe62cc358 +[ 13488461 2160 0x00000000413b30] jmp 0x38ea92(%rip) ff2592ea3800 lib=/home/matthew/developer/cpython/python +==> 0x007fffe62cc35d +[ 13488462 2161 0x000000004b21fe] sub $0x1, (%r14) 49832e01 PyEval_EvalFrameEx +[ 13488463 2162 0x000000004b2202] jz 0x4b6960 0f8458470000 PyEval_EvalFrameEx +==> 0x007fffe62cc35d +[ 13488464 2163 0x000000004b2208] call 0x413e80 e8731cf6ff PyEval_EvalFrameEx +[ 13488465 2164 0x00000000413e80] jmp 0x38e8ea(%rip) ff25eae83800 lib=/home/matthew/developer/cpython/python +==> 0x007fffe62cc367 +[ 13488466 2165 0x000000004b220d] call 0x413b30 e81e19f6ff PyEval_EvalFrameEx +[ 13488467 2166 0x00000000413b30] jmp 0x38ea92(%rip) ff2592ea3800 lib=/home/matthew/developer/cpython/python +==> 0x007fffe62cc367 +[ 13488468 2167 0x000000004b2212] mov 0x10(%rsp), %rsi 488b742410 PyEval_EvalFrameEx +[ 13488469 2168 0x000000004b2217] mov (%rsi), %rax 488b06 PyEval_EvalFrameEx +[ 13488470 2169 0x000000004b221a] mov %rax, 0x18(%rsp) 4889442418 PyEval_EvalFrameEx +[ 13488471 2170 0x000000004b221f] sub $0x1, %rax 4883e801 PyEval_EvalFrameEx +[ 13488472 2171 0x000000004b2223] test %rax, %rax 4885c0 PyEval_EvalFrameEx +[ 13488473 2172 0x000000004b2226] mov %rax, (%rsi) 488906 PyEval_EvalFrameEx +[ 13488474 2173 0x000000004b2229] jz 0x4b6951 0f8422470000 PyEval_EvalFrameEx + +what full branches look like: +==> 0x007fffe62cc31a +[ 13488446 2145 0x0000000047331e] mov 0x10(%rbx), %rax 488b4310 lib=/home/matthew/developer/cpython/python +[ 13488447 2146 0x00000000473322] cmp %rbp, %rax 4839e8 lib=/home/matthew/developer/cpython/python +[ 13488448 2147 0x00000000473325] jle xxxxxxxx (not taken) 0f8e65010000 lib=/home/matthew/developer/cpython/python +[ 13488449 2148 0x0000000047332b] movzx 0x24(%rbx,%rbp), %eax 0fb6442b24 lib=/home/matthew/developer/cpython/python +[ 13488450 2149 0x00000000473330] mov %al, 0x28(%rsp) 88442428 lib=/home/matthew/developer/cpython/python +[ 13488451 2150 0x00000000473334] mov 0x7dfba0(%rax,8), %rax 488b04c5a0fb7d00 lib=/home/matthew/developer/cpython/python +[ 13488452 2151 0x0000000047333c] test %rax, %rax 4885c0 lib=/home/matthew/developer/cpython/python +[ 13488453 2152 0x0000000047333f] jnz xxxxxxxx (branch taken) 0f84b0010000 lib=/home/matthew/developer/cpython/python +[ 13488454 2153 0x00000000473345] add $0x1, (%rax) 48830001 lib=/home/matthew/developer/cpython/python +[ 13488455 2154 0x00000000473349] add $0x38, %rsp 4883c438 lib=/home/matthew/developer/cpython/python +[ 13488456 2155 0x0000000047334d] pop %rbx 5b lib=/home/matthew/developer/cpython/python +[ 13488457 2156 0x0000000047334e] pop %rbp 5d lib=/home/matthew/developer/cpython/python +[ *13488458 2157 0x0000000047334f] ret (add $8, %rsp) c3 lib=/home/matthew/developer/cpython/python + +with the abort +[ 13488446 2145 0x0000000047331e] mov 0x10(%rbx), %rax 488b4310 lib=/home/matthew/developer/cpython/python +[ 13488447 2146 0x00000000473322] cmp %rbp, %rax 4839e8 lib=/home/matthew/developer/cpython/python +[ 13488448 2147 0x00000000473325] jle xxxxxxxx (not taken) 0f8e65010000 lib=/home/matthew/developer/cpython/python +[ 13488449 2148 0x0000000047332b] movzx 0x24(%rbx,%rbp), %eax 0fb6442b24 lib=/home/matthew/developer/cpython/python +[ 13488450 2149 0x00000000473330] mov %al, 0x28(%rsp) 88442428 lib=/home/matthew/developer/cpython/python +[ 13488451 2150 0x00000000473334] mov 0x7dfba0(%rax,8), %rax 488b04c5a0fb7d00 lib=/home/matthew/developer/cpython/python +[ 13488452 2151 0x0000000047333c] test %rax, %rax 4885c0 lib=/home/matthew/developer/cpython/python +[ 13488453 2152 0x0000000047333f] jnz xxxxxxxx (branch taken) 0f84b0010000 lib=/home/matthew/developer/cpython/python +[ 13488454 2153 0x00000000473345] add $0x1, (%rax) 48830001 lib=/home/matthew/developer/cpython/python +[ 13488455 2154 0x00000000473349] add $0x38, %rsp 4883c438 lib=/home/matthew/developer/cpython/python +[ 13488456 2155 0x0000000047334d] pop %rbx 5b lib=/home/matthew/developer/cpython/python +[ 13488457 2156 0x0000000047334e] pop %rbp 5d lib=/home/matthew/developer/cpython/python + jmp xxxxxxxx resume program diff --git a/src/config.h b/src/config.h index 40ef9b1..5beb643 100644 --- a/src/config.h +++ b/src/config.h @@ -2,12 +2,21 @@ #define REDMAGIC_CONFIG_H_ -// using 10 will cause ipython to crash, TODO: find the bug... +// the number of loops that are require to occure before it traces a loop #define CONF_NUMBER_OF_JUMPS_BEFORE_TRACE 10 +// redmagic will attempt inline forward jumps which is useful in cases like: `if(a || b || c...)` where many conditional jumps +// will merge to the same point, but it may require back tracking in a lot of cases which may be slower +//#define CONF_ATTEMPT_FORWARD_JUMP_INLINE -//#define CONF_VERBOSE +// backward jumps that are inside the same generated block will be inlined, does _not_ require back tracking as the size of the block +// is know at the time the instruction is emitted, this is useful for sort loops eg: `while (a != NULL) a = a->next;` +#define CONF_ATTEMPT_BACKWARDS_JUMP_INLINE +// makes it print all the instructions processed an extra info +#define CONF_VERBOSE + +// support aborting the system after some fixed number of instruction have been processed, see tools/bisect for debugging with this #define CONF_GLOBAL_ABORT #endif // REDMAGIC_CONFIG_H_ diff --git a/src/jit_internal.h b/src/jit_internal.h index 97ce434..aacc56b 100644 --- a/src/jit_internal.h +++ b/src/jit_internal.h @@ -280,6 +280,13 @@ namespace redmagic { ::write(2, buffer, b); } + /* assembly code to read the TSC */ + static inline uint64_t RDTSC() { + unsigned int hi, lo; + __asm__ volatile("rdtsc" : "=a" (lo), "=d" (hi)); + return ((uint64_t)hi << 32) | lo; + } + } diff --git a/src/manager.cc b/src/manager.cc index 364848b..8977af6 100644 --- a/src/manager.cc +++ b/src/manager.cc @@ -379,7 +379,7 @@ void* Manager::backwards_branch(void *id, void *ret_addr) { new_head->is_compiled = true; new_head->is_traced = true; #ifdef CONF_VERBOSE - red_printf("entering trace %x\n", id); + red_printf("entering trace %#016lx\n", id); #endif return info->starting_point; } @@ -400,7 +400,7 @@ void* Manager::backwards_branch(void *id, void *ret_addr) { new_head->is_compiled = true; new_head->is_traced = true; #ifdef CONF_VERBOSE - red_printf("entering aborted trace %x\n", id); + red_printf("entering aborted trace %#016lx\n", id); #endif return info->starting_point; } @@ -694,7 +694,8 @@ void* Manager::is_traced_call() { } void Manager::disable_branch(void *id) { - branches[id].disabled = true; + auto info = &branches[id]; + info->disabled = true; for(int i = 0; i < threadl_tracer_stack.size(); i++) { auto b = &threadl_tracer_stack[i]; assert(b->trace_id != id || !b->is_traced); diff --git a/src/simple_compiler.cc b/src/simple_compiler.cc index c125ac4..a7f2ad6 100644 --- a/src/simple_compiler.cc +++ b/src/simple_compiler.cc @@ -423,8 +423,13 @@ size_t SimpleCompiler::_relocCode(void* _dst, asmjit::Ptr baseAddress) const noe if(_labels[i]->exId == 0xAB0ADD00) { uint8_t *target = (uint8_t*)_labels[i]->exData; // check that in the same 4gb memory block - assert(((uint64_t)target & 0xffffffff00000000) == ((uint64_t)dst & 0xffffffff00000000)); - int32_t buf_offset = target - dst; + // if(((uint64_t)target & 0xffffffff00000000) != ((uint64_t)dst & 0xffffffff00000000)) { + // red_printf("failed same region check %#016lx %#016lx\n", target, dst); + // assert(0); + // } + int64_t buf_offset_l = target - dst; + int32_t buf_offset = buf_offset_l; //target - dst; + assert(buf_offset == buf_offset_l); LabelLink *link = _labels[i]->links; // LabelLink *prev = nullptr; while(link) { diff --git a/src/tracer.cc b/src/tracer.cc index fc6a04d..2099533 100644 --- a/src/tracer.cc +++ b/src/tracer.cc @@ -65,8 +65,9 @@ Tracer::Tracer(CodeBuffer* buffer) { ud_set_input_hook(&disassm, udis_input_hook); ud_set_mode(&disassm, 64); // 64 bit ud_set_vendor(&disassm, UD_VENDOR_INTEL); +#ifdef CONF_VERBOSE ud_set_syntax(&disassm, UD_SYN_ATT); - +#endif auto written = buffer->writeToEnd(cb_interrupt_block); written.replace_stump(0xfafafafafafafafa, (uint64_t)&resume_struct); @@ -195,7 +196,7 @@ extern "C" void* red_end_trace(mem_loc_t normal_end_address) { } } #ifdef CONF_VERBOSE - red_printf("exiting trace %x\n", head.trace_id); + red_printf("exiting trace %#016lx\n", head.trace_id); #endif return ret; } @@ -213,54 +214,16 @@ extern "C" void* red_branch_to_sub_trace(void *resume_addr, void *sub_trace_id, protected_malloc = true; return ret; - - - /* - auto head = manager->get_tracer_head(); - assert(head->is_traced); - assert(head->tracer == nullptr || head->tracer->did_abort); - assert(head->resume_addr == nullptr); - head->resume_addr = resume_addr; - assert(sub_trace_id != head->trace_id); - - Manager::branch_info *info = &manager->branches[sub_trace_id]; - if(info->tracer != nullptr) { -#ifdef CONF_GLOBAL_ABORT - assert(info->tracer->did_abort); -#else - assert(0); // TODO: vvv -#endif - // TODO: pop this element off the manager stack and abort the trace by jumping back to normal code - // also check that the inner loop wasn't aborted? - // maybe treat this as a temp disabled inner loop - } - assert(info->starting_point != nullptr); - assert(!info->disabled); - auto new_head = manager->push_tracer_stack(); - new_head->is_traced = true; - new_head->trace_id = sub_trace_id; - - return info->starting_point; - */ } extern "C" void red_asm_start_tracing(void*, void*, void*, void*); extern "C" void red_asm_begin_block(); - -// extern "C" void _dl_runtime_resolve(); -// extern "C" void _dl_fixup(); - void* Tracer::Start(void *start_addr) { - //generation_lock.lock(); set_pc((mem_loc_t)start_addr); - //set_pc((uint64_t)&red_asm_ret_only); - - //red_asm_start_tracing(NULL, (void*)&red_begin_tracing, this, stack - sizeof(stack)); using namespace asmjit; SimpleCompiler compiler(buffer); - //compiler.mov(x86::rdx, x86::rsp); // stash the values of the register that we are about to override compiler.mov(x86::ptr(x86::rsp, static_cast(-TRACE_STACK_OFFSET - sizeof(struct user_regs_struct))), x86::rdx); compiler.mov(x86::ptr(x86::rsp, static_cast(-TRACE_STACK_OFFSET - sizeof(struct user_regs_struct) - sizeof(register_t))), x86::rsi); @@ -270,19 +233,15 @@ void* Tracer::Start(void *start_addr) { compiler.mov(x86::rdx, imm_ptr(this)); // argument 3 compiler.mov(x86::rsi, imm_ptr(&red_begin_tracing)); - //mem_loc_t stack_ptr = ((stack + 8*1024) & ~(4*1024 - 1)) + TRACE_STACK_SIZE; mem_loc_t stack_ptr = (((mem_loc_t)stack_) + sizeof(stack_)) & ~63; resume_struct = {0}; resume_struct.stack_pointer = (register_t)stack_ptr - sizeof(mem_loc_t); *(void**)(stack_ptr - sizeof(mem_loc_t)) = (void*)&red_asm_begin_block; - //compiler.mov(x86::rsp, imm_ptr(stack - sizeof(stack))); - //compiler.push(imm_ptr(red_begin_tracing)); compiler.jmp(imm_ptr(interrupt_block_location)); compiler.mov(x86::r15, imm_u(0xdeadbeef)); compiler.mov(x86::r15, imm_ptr(start_addr)); - //compiler.jmp(imm_ptr(start_addr)); auto written = compiler.finalize(); @@ -294,41 +253,39 @@ void* Tracer::Start(void *start_addr) { } icount = 0; - last_local_jump = 0; last_call_instruction = -1; +#ifdef CONF_ATTEMPT_FORWARD_JUMP_INLINE + last_local_jump = 0; local_jump_min_addr = 0; +#endif -#if defined(NDEBUG) && defined(CONF_GLOBAL_ABORT) - if(global_abort()) { - // disable this tracing - - SimpleCompiler compiler2(buffer); - compiler2.jmp(imm_ptr(start_addr)); - - did_abort = true; - manager->get_tracer_head()->did_abort = true; - CodeBuffer::Relase(buffer); - buffer = nullptr; - return start_addr; - } +#ifdef CONF_GLOBAL_ABORT +#ifdef NDEBUG +#error "global abort without debug?" +#endif + // if(global_abort()) { + // // disable this tracing + + // SimpleCompiler compiler2(buffer); + // compiler2.jmp(imm_ptr(start_addr)); + + // did_abort = true; + // manager->get_tracer_head()->did_abort = true; + // CodeBuffer::Relase(buffer); + // buffer = nullptr; + // assert(0); + // return start_addr; + // } #endif return (void*)written.getRawBuffer(); - //return (void*)&red_begin_tracing; } // abort after some number of instructions to see if there is an error with the first n instructions // useful for bisecting which instruction is failing if there is an error //#define ABORT_BEFORE 50 -//56 -// break with 16 after 10 iterations -// if we should check what the loop number is first -//#define ABORT_ENTER_ITER 10 - -// 15 works, 16 breaks with `mov (%rdx, %rax) %eax` -// 21 was breaking almost instantly after `jmp *%rax` #ifndef NDEBUG bool Tracer::debug_check_abort() { @@ -343,8 +300,6 @@ bool Tracer::debug_check_abort() { return true; #endif - // if(loop_n == 10) - // return true; return false; } #endif @@ -369,7 +324,9 @@ void Tracer::Run(struct user_regs_struct *other_stack) { assert(after_stack == 0xdeadbeef); generated_location = buffer->getRawBuffer() + buffer->getOffset(); last_location = udis_loc; +#ifdef CONF_ATTEMPT_FORWARD_JUMP_INLINE local_jump_min_addr = last_local_jump = 0; +#endif assert(current_location == last_location); assert(protected_malloc); //assert(generation_lock.owns_lock()); @@ -377,14 +334,17 @@ void Tracer::Run(struct user_regs_struct *other_stack) { // if we somehow have less then 1kb free then we might have overwritten something // which is why this is asserted as an error assert(buffer->getFree() > 1024); - if(buffer->getFree() <= 10 * 1024) { + if(buffer->getFree() <= 10 * 1024 && icount - last_call_instruction > 5) { // there is less than 10 kb of space on this buffer, so we are going to make a new one // disabling malloc protecting might be bad... protected_malloc = false; +#ifdef CONF_VERBOSE + red_printf("switching code generation buffer\n"); +#endif auto new_buffer = CodeBuffer::CreateBuffer(1024 * 1024); { SimpleCompiler compiler(new_buffer); - compiler.mov(asmjit::x86::r15, asmjit::imm_u(0xdeadcafe)); + compiler.mov(asmjit::x86::r15, asmjit::imm_u(0xdeadcafe1)); compiler.mov(asmjit::x86::r15, asmjit::imm_u(generated_location)); } auto new_gen_l = new_buffer->getRawBuffer() + new_buffer->getOffset(); @@ -398,7 +358,6 @@ void Tracer::Run(struct user_regs_struct *other_stack) { generated_location = new_gen_l; protected_malloc = true; - write_interrupt_block(); } processes_instructions: @@ -412,8 +371,8 @@ void Tracer::Run(struct user_regs_struct *other_stack) { #ifdef CONF_VERBOSE Dl_info dlinfo; - dladdr((void*)ud_insn_off(&disassm), &dlinfo); auto ins_loc = ud_insn_off(&disassm); + dladdr((void*)ins_loc, &dlinfo); if(dlinfo.dli_sname != nullptr) red_printf("[%10lu %8i %#016lx] \t%-38s %-20s %s\n", global_icount, icount, ins_loc, ud_insn_asm(&disassm), ud_insn_hex(&disassm), dlinfo.dli_sname); @@ -421,21 +380,26 @@ void Tracer::Run(struct user_regs_struct *other_stack) { red_printf("[%10lu %8i %#016lx] \t%-38s %-20s lib=%s\n", global_icount, icount, ins_loc, ud_insn_asm(&disassm), ud_insn_hex(&disassm), dlinfo.dli_fname); #endif - //fprintf(stderr, ); - //fflush(stderr); - jmp_info = decode_instruction(); if(jmp_info.is_jump) { if(jmp_info.is_local_jump) { // there is a chance that we can directly inline this if this is a short loop + assert(jmp_info.local_jump_offset); if(jmp_info.local_jump_offset < 0) { +#ifdef CONF_ATTEMPT_BACKWARDS_JUMP_INLINE if(udis_loc - current_location > -jmp_info.local_jump_offset) { // this is a backwards branch that is going an acceptable distance goto instruction_approved; } +#endif } else { +#ifdef CONF_ATTEMPT_FORWARD_JUMP_INLINE // this is a forward branch - if(jmp_info.local_jump_offset > 512) + // TODO: there is some bug in this part of code....not sure what it is + // when running with max foward distance of 512 and starting ipython it will end up crashing + // when tracing PyInt_AsSsize_t + // use the bisect tool to find where exactly it is crashing + if(jmp_info.local_jump_offset > 100) // this is too far forward, we are unlikely to actually be able to inline this, so just run it goto run_instructions; if(last_local_jump == 0) { @@ -445,8 +409,8 @@ void Tracer::Run(struct user_regs_struct *other_stack) { if(udis_loc + jmp_info.local_jump_offset > local_jump_min_addr) local_jump_min_addr = udis_loc + jmp_info.local_jump_offset; goto instruction_approved; +#endif } - } goto run_instructions; } @@ -469,22 +433,26 @@ void Tracer::Run(struct user_regs_struct *other_stack) { #endif instruction_approved: last_location = udis_loc; +#ifdef CONF_ATTEMPT_FORWARD_JUMP_INLINE if(local_jump_min_addr && udis_loc > local_jump_min_addr) { // yay, we are able to direclty inline this jump local_jump_min_addr = last_local_jump = 0; } +#endif } run_instructions: +#ifdef CONF_ATTEMPT_FORWARD_JUMP_INLINE if(local_jump_min_addr > last_location) { // we failed to get far enough in the decoding to allow these jumps to be inlined // so we revert back to the last "good" state last_location = last_local_jump; set_pc(last_local_jump); ud_disassemble(&disassm); - last_local_jump = local_jump_min_addr = 0; // these jumps can't reference registers so if that is what caused the break then set to false rip_used = false; } + //last_local_jump = local_jump_min_addr = 0; +#endif if(current_location != last_location) { { CodeBuffer ins_set(current_location, last_location - current_location); @@ -599,19 +567,14 @@ void* Tracer::TempDisableTrace() { buffer->setOffset(last_call_generated_op); SimpleCompiler compiler(buffer); auto label = compiler.newLabel(); - //compiler.mov(asmjit::x86::rdi, asmjit::imm_u(0xfafafafafafafafa)); compiler.lea(asmjit::x86::rdi, asmjit::x86::ptr(label)); compiler.call(asmjit::imm_ptr(&red_set_temp_resume)); compiler.jmp(asmjit::imm_ptr(last_call_ret_addr)); - compiler.mov(asmjit::x86::r15, asmjit::imm_u(0xdeadcafe)); + compiler.mov(asmjit::x86::r15, asmjit::imm_u(0xdeadcafe2)); compiler.bind(label); auto written = compiler.finalize(); - // SimpleCompiler compiler2(buffer.get()); - // compiler2.mov(asmjit::x86::rax, asmjit::x86::ptr(asmjit::x86::rsp, -8)); - // compiler2.TestRegister(RAX) write_interrupt_block(); - //temp_disable_resume = (void*)(written.getRawBuffer() + written.getOffset()); red_set_temp_resume((void*)(written.getRawBuffer() + written.getOffset())); return (void*)last_call_ret_addr; @@ -624,7 +587,6 @@ void Tracer::TempEnableTrace(void *resume_pc) { set_pc((mem_loc_t)resume_pc); SimpleCompiler compiler(buffer); // the "normal" return address will be set to ris when this returns from the temp disabled region - //compiler.mov(asmjit::x86::rax, asmjit::x86::ptr(asmjit::x86::rsp, -8)); compiler.TestRegister((mem_loc_t)&red_asm_jump_rsi, RSI, (register_t)resume_pc, &merge_block_stack.back()); auto written = compiler.finalize(); write_interrupt_block(); @@ -677,7 +639,6 @@ void* Tracer::BeginMergeBlock() { if(current_not_traced_call_addr != (mem_loc_t)&redmagic_begin_merge_block) return NULL; assert(icount - last_call_instruction < 2); - //assert(current_not_traced_call_addr == (mem_loc_t)&redmagic_begin_merge_block); buffer->setOffset(last_call_generated_op); mem_loc_t ret = buffer->getRawBuffer() + buffer->getOffset(); // there are no instructions to generate for this @@ -708,7 +669,6 @@ void* Tracer::EndMergeBlock() { } // the ending of this tracer instructions - //method_address_stack.clear(); finish_patch(); tracing_from = 0; merge_resume = 0; @@ -723,6 +683,7 @@ void* Tracer::EndMergeBlock() { auto info = &manager->branches[head->trace_id]; assert(info->tracer == this); head->tracer = info->tracer = nullptr; + head->is_compiled = true; info->traced_instruction_count += icount; if(info->longest_trace_instruction_count < icount) info->longest_trace_instruction_count = icount; diff --git a/src/tracer.h b/src/tracer.h index ee90a07..a6e04cb 100644 --- a/src/tracer.h +++ b/src/tracer.h @@ -172,8 +172,10 @@ namespace redmagic { bool rip_used = false; int64_t icount = 0; +#ifdef CONF_ATTEMPT_FORWARD_JUMP_INLINE mem_loc_t last_local_jump = 0; mem_loc_t local_jump_min_addr = 0; +#endif mem_loc_t interrupt_block_location; diff --git a/tools/biset.py b/tools/biset.py new file mode 100644 index 0000000..44b4097 --- /dev/null +++ b/tools/biset.py @@ -0,0 +1,67 @@ +#!/usr/bin/env python3 + +import sys +import subprocess +import os +import time +from collections import deque + + +def do_run(instruction_count, process, error_search): + env = os.environ.copy() + env['REDMAGIC_GLOBAL_ABORT'] = str(instruction_count) + + cnt = 0 + qu = deque([], maxlen=300) + + proc = subprocess.Popen(process, stderr=subprocess.PIPE, env=env) + + try: + for li in proc.stderr: + qu.append(li) + if cnt % 50000 == 0: + print(instruction_count, li) + cnt += 1 + finally: + proc.kill() + + ending = '\n'.join(map(str, qu)) + print(ending) + + if any([e in ending for e in error_search]): + print('FAILED') + return False + else: + print('SUCCESS') + return True + + +def main(): + max_i = int(sys.argv[-1]) + min_i = int(sys.argv[-2]) + + assert max_i > min_i + + error_search = ['IndexError', 'Assertion'] + #process = '/home/matthew/developer/cpython/python -m IPython -c exit()'.split() + # run under gdb since the program seems to change behavor depending on how it is run + process = ['gdb', '/home/matthew/developer/cpython/python', '--eval-command=run -m IPython -c "exit()"', '--eval-command=quit'] + + try: + while max_i - min_i > 2: + inst = (max_i + min_i) // 2 + print('>>>>>>>>>>>>>>>>>>>running bisect stopping at instruction {} ({}, {}, {})'.format(inst, min_i, max_i, max_i - min_i)) + time.sleep(5) + r = do_run(inst, process, error_search) + if r: + min_i = inst + else: + max_i = inst + finally: + print(min_i, max_i) + + + + +if __name__ == '__main__': + main() diff --git a/tools/gdb-helper.py b/tools/gdb-helper.py index feb4024..674af64 100644 --- a/tools/gdb-helper.py +++ b/tools/gdb-helper.py @@ -23,19 +23,26 @@ def invoke(self, args, from_tty): redmagic_start = int(redmagic_info[0], 16) redmagic_end = int(redmagic_info[1], 16) - verbose = True + verbose = False branches_taken = [] def get_rip(): return int(gdb.parse_and_eval('$rip')) + # so that we can determine where it is resuming the trace + gdb.execute('break red_asm_resume_eval_block') + current_rip = get_rip() while True: last_rip = current_rip - if not verbose or redmagic_start < last_rip < redmagic_end: - gdb.execute('n', to_string=True) + if not verbose and redmagic_start < last_rip < redmagic_end: + li = gdb.execute('x/i {}'.format(last_rip), to_string=True) + if 'red_asm_resume_eval_block' in li: + gdb.execute('si', to_string=True) + else: + gdb.execute('n', to_string=True) current_rip = get_rip() else: gdb.execute('si', to_string=True) @@ -43,7 +50,7 @@ def get_rip(): if not (0 < current_rip - last_rip < 15): # then we probably have taken a branch or something li = gdb.execute('x/i {}'.format(last_rip), to_string=True) - if verbose or '__tls_get_addr' not in li: + if verbose or ('__tls_get_addr' not in li and '_dl_addr' not in li): #branches_taken.append(li) gdb.write(li)