From 76c0167f5b5734a607ccfa355b07d913a7fc5d42 Mon Sep 17 00:00:00 2001 From: Ryan Scott Date: Tue, 13 Aug 2024 11:27:35 -0400 Subject: [PATCH] macaw-x86: Fix `call` semantics when call target involves the stack pointer Previously, the `macaw-x86` semantics for `call` would retrieve the call target *after* pushing the next instruction's address to the stack, but if the call target involves the stack pointer, then this would mean that it would get the next instruction's address when retrieving the call target. This is not what is intended! This patch fixes the issue by always retrieving the call target *before* pushing the next instruction's address to the stack. I have added a test case to the `macaw-x86-symbolic` test suite which demonstrates that this fix works as intended. Fixes #420. --- x86/src/Data/Macaw/X86/Semantics.hs | 7 ++++- x86_symbolic/tests/pass/T420.c | 42 +++++++++++++++++++++++++ x86_symbolic/tests/pass/T420.opt.exe | Bin 0 -> 9232 bytes x86_symbolic/tests/pass/T420.unopt.exe | Bin 0 -> 9272 bytes 4 files changed, 48 insertions(+), 1 deletion(-) create mode 100644 x86_symbolic/tests/pass/T420.c create mode 100755 x86_symbolic/tests/pass/T420.opt.exe create mode 100755 x86_symbolic/tests/pass/T420.unopt.exe diff --git a/x86/src/Data/Macaw/X86/Semantics.hs b/x86/src/Data/Macaw/X86/Semantics.hs index 370567a3..6aafcc83 100644 --- a/x86/src/Data/Macaw/X86/Semantics.hs +++ b/x86/src/Data/Macaw/X86/Semantics.hs @@ -1376,11 +1376,16 @@ def_set_list = def_call :: InstructionDef def_call = defUnary "call" $ \_ v -> do + -- Get the address to branch to. Make sure to do this *before* pushing the + -- value of the next instruction. If the call target is the stack pointer, + -- then pushing a value will change the underlying value of the stack, so we + -- want to ensure that we get the call target before the stack is modified. + -- (See the T420 test case in macaw-x86-symbolic's test suite.) + tgt <- getCallTarget v -- Push value of next instruction old_pc <- getReg R.X86_IP push addrRepr old_pc -- Set IP - tgt <- getCallTarget v rip .= tgt -- | Conditional jumps diff --git a/x86_symbolic/tests/pass/T420.c b/x86_symbolic/tests/pass/T420.c new file mode 100644 index 00000000..d0c94919 --- /dev/null +++ b/x86_symbolic/tests/pass/T420.c @@ -0,0 +1,42 @@ +/** + * A regression test for #420 which ensures that macaw-x86's semantics for the + * `call` instruction are correct when the call target involves the stack + * pointer. If we get it wrong, then the `call *0x8(%rsp)` instruction below + * will call a nonsensical address. + * + * It is surprisingly difficult to get `gcc` to generate code with a + * `call *0x8(%rsp)` instruction, so we resort to inline assembly to guarantee + * that this happens. + */ + +int one(void) { + return 1; +} + +int __attribute__((noinline)) test_callit(void) { + int (*fn)(void) = &one; + int ret = 0; + __asm__( + // 1. Decrement the stack pointer in preparation for storing `fn` on the + // stack. + "sub $0x10,%%rsp;" + // 2. Store the `fn` function pointer (passed as an input) on the stack. + "mov %1,0x8(%%rsp);" + // 3. Load `fn` from the stack and call it. This is the key part of the + // test, as we want to ensure that macaw-x86's semantics for `call` will + // retrieve `fn` *before* pushing the next instruction's address onto the + // stack. + "call *0x8(%%rsp);" + // 4. Save the return value of `fn` as output. + "mov %%eax, %0;" + // 5. Increment the stack pointer back to its original position. + "add $0x10,%%rsp;" + : "=r"(ret) /* Outputs */ + : "r"(fn) /* Inputs */ + ); + return ret; +} + +void _start() { + test_callit(); +} diff --git a/x86_symbolic/tests/pass/T420.opt.exe b/x86_symbolic/tests/pass/T420.opt.exe new file mode 100755 index 0000000000000000000000000000000000000000..062abd862b6925a90223871cfceeb6a75a6661dc GIT binary patch literal 9232 zcmeHNOKTHR6h4!_QYmy4L8W4+kcA>0l2B2)kSVP;3lXZx#!Z|w(+A{Hk{f6RAJA17 zh5iH={s@z^%x@6*kJlG-qC5m z$az#3^>a*u<7vfU-nbdLj%sKf&|&q8h$kZce!4z1~rBII}e$_n~dPX`GH6dEYF~}9S!l7=OozSqS9T@a}qp1 zLHu?L{xT}beqh_Ai-U&090(|g`6 zc}H9H9&`)kl5}I-TZf|S8%G=a8e)#P|5f83MWh*xUh$^1r*~i6m-8>8a$5a{sei_a I27BxO0uPFk!T$=9`&)FJF3T-aK@! zPsR)b*kW)6C~^~aK*=Y--f$2q?gI3J4Sf&~YP(t~^lTML?2pNi5fU<{J-W$NF{u3* zZ_6@Z-AWZDjAp%d-rc?zDhAbq2T-%Oib z#fZN9a8@FmACYC@+=%L;+NfnPAdb%urY+N^gm8XDhB)ccZc`Xif2y5^x+w|zJBS+Z yAH{I*NZ~V62Ahgwjd=f4itj_DS&mLy7?w=uxp-f$KaS3R{ZCZ?9WGSZ+5a0Y%dXM@ literal 0 HcmV?d00001