diff --git a/doc/deob.md b/doc/deob.md index 7fc37cc..359fc12 100644 --- a/doc/deob.md +++ b/doc/deob.md @@ -1,15 +1,20 @@ ## Decompile obfuscated code Menu *"View/Open subviews/Decompile obfuscated code (Alt-F5)"* -This experimental feature have the intention to force decompilation a code that Hex-Rays unwilling to decompile. Such code may be full of trashy void operations, liberal stack movement, smeared excessive jumps, calls and returns used as jumps. Purpose of this feature to force decompiler to clean up trash and show a crumb of meaning of the code does. +This experimental feature have the intention to force decompilation a code that Hex-Rays unwilling to decompile. +Such code may be full of trashy void operations, liberal stack movement, smeared excessive jumps, calls and returns used as jumps. +Purpose of this feature to force decompiler to clean up trash and show a crumb of meaning of the code does. How does it work: - - At first the plugin tries to collect ranges of code spread over binary that belongs one function. If any other subroutines is met in the path - it will be destroyed. If data or unexplored bytes are met in the path - it will be converted to code automatically or ask user to deal with it. + - At first the plugin tries to collect ranges of code spread over binary that belongs one function. + If any other subroutines is met in the path - it will be destroyed. If data or unexplored bytes are met in the path - it will be converted to code automatically or ask user to deal with it. - New disassembler's proc is created from the collected code chunks. - - Then for the Hex-Rays decompiler are installed few microcode hooks (handlers) and generated microcode. There are also few sets of microcode level custom optimizers. Part of them works always, part only in time of "Decompile obfuscated code" mode. + - Then for the Hex-Rays decompiler are installed few microcode hooks (handlers) and generated microcode. + There are also few sets of microcode level custom optimizers. Part of them works always, part only in time of "Decompile obfuscated code" mode. - If hookers detect return statement is turned into jump, processing is above steps repeated for newly discovered code at jump target. - On final pass collected chunks are decompiled with decompile_snippet - - Then, just to open new pseudocode window, the plugin creates new segment "`[hrt]nullsub`" with `nullsub_1` function inside and open pseudocode window to show it (for a very short time) and then pseudocode view immediately switched to showing results of obfuscated code decompilation. + - Then, just to open new pseudocode window, the plugin creates new segment "`[hrt]nullsub`" with `nullsub_1` function inside and open pseudocode window to show it + (for a very short time) and then pseudocode view immediately switched to showing results of obfuscated code decompilation. How to use it: just press "Alt-F5" or select menu entry *"View/Open subviews/Decompile obfuscated code"* at entry point of obfuscated code in disasm view. @@ -27,11 +32,20 @@ to short unconditional jump jmp short loc_403F2E ; ;patched: jle loc_403F2E ``` Original code mnemonic are left on instruction comment. If "Allow Patching" is unchecked the plugin deals with such jumps in a processor independent way at Hex-Rays microcode level (unstable in some situations). - - Fast CFG: if unchecked collect chunks of the procedure with help of decompiler only. When "Fast CFG" is checked, use additional pre-processing steps with help of disassembler and flow chart regeneration after each block adding. + - Fast CFG: if checked, the additional pre-processing step with help of disassembler and flow chart regeneration after each block adding, that is faster then decompiling. + When "Fast CFG" is unchecked collect chunks of the procedure with help of decompiler only. + It may be more suitable in situation then decompiler may convert conditional jumps to unconditional like in example below: +``` + mov eax, 1 + dec eax + jz short loc_xxx +; some trashy code that breaks decompiler +``` - Func regeneration: if checked - additional step that destroy, re-create and re-analyze the function from scratch after chunks were collected and before microcode of the function generation. >📝 **Notes:** -> - Deleting whole function or function's tails that have been met during CFG creation doesn't work stable enough. For better results it may be a good idea to manually delete functions on execution path or even mark all of the code as unexplored before trying to decompile it. +> - Deleting whole function or function's tails that have been met during CFG creation doesn't work stable enough. +> For better results it may be a good idea to manually delete functions on execution path or even mark all of the code as unexplored before trying to decompile it. > - After some manual changes in pseudocode Hex-Rays loses returns-converted-to-jump and truncate results of decompilation. You may press "Alt-F5" in pseudocode view to restore lost parts. ![Decompile obfuscated code](deob.gif) diff --git a/src/deob.cpp b/src/deob.cpp index cf238a8..b6031c1 100644 --- a/src/deob.cpp +++ b/src/deob.cpp @@ -733,6 +733,15 @@ void remove_funcs_tails(ea_t ea) if (i > 100 ) msg("[hrt] %a FIXME: remove_funcs_tails loops\n", ea); } +enum Add_BB_Stop_Reason { + eABBSR_none, + eABBSR_unreachBlocks, + eABBSR_decode_insn, + eABBSR_already_added, + eABBSR_del_items, + eABBSR_create_insn, + eABBSR_bb_end +}; static bool add_bb(ea_t eaBgn, rangeset_t &ranges) { @@ -743,30 +752,43 @@ static bool add_bb(ea_t eaBgn, rangeset_t &ranges) remove_funcs_tails(eaBgn); #endif + Add_BB_Stop_Reason ABBSR = eABBSR_none; ea_t ea = eaBgn; while (1) { - if (unreachBlocks.contains(ea)) + if (unreachBlocks.contains(ea)) { + ABBSR = eABBSR_unreachBlocks; break; + } insn_t insn; int sz = decode_insn(&insn, ea); - if (sz <= 0) + if (sz <= 0) { + ABBSR = eABBSR_decode_insn; break; + } flags64_t flg = get_flags(ea); if (!is_code(flg)) { - if (ranges.has_common(range_t(ea, insn.size))) + if (ranges.has_common(range_t(ea, insn.size))) { + ABBSR = eABBSR_already_added; break; -#if 1 - if (!del_items(ea, DELIT_SIMPLE, insn.size)) + } +#if 0 + if (!is_unknown(flg) && !del_items(ea, DELIT_SIMPLE, insn.size)) { + ABBSR = eABBSR_del_items; break; + } #else for (decltype(insn.size) i = 0; i < insn.size; i++) { - if (!is_unknown(get_flags(ea + i)) && !del_items(ea + i, DELIT_SIMPLE)) + if (!is_unknown(get_flags(ea + i)) && !del_items(ea + i, DELIT_SIMPLE)) { + ABBSR = eABBSR_del_items; break; + } } #endif - if (!create_insn(ea, &insn)) + if (!create_insn(ea, &insn)) { + ABBSR = eABBSR_create_insn; break; + } } ea += insn.size; @@ -778,6 +800,7 @@ static bool add_bb(ea_t eaBgn, rangeset_t &ranges) // I have not good alternative for is_basic_block_end except CFG creation so leave here slower variant if (is_basic_block_end(insn, false)) { disasm_dbl_jc(ea - insn.size); + ABBSR = eABBSR_bb_end; break; } } @@ -785,6 +808,27 @@ static bool add_bb(ea_t eaBgn, rangeset_t &ranges) MSG_DO(("[hrt] new block %a-%a\n", eaBgn, ea)); return true; } + +#if DEBUG_DO + const char* m; + switch(ABBSR) { + case eABBSR_unreachBlocks: + m = "unreachBlocks"; break; + case eABBSR_decode_insn: + m = "decode_insn"; break; + case eABBSR_already_added: + m = "already_added"; break; + case eABBSR_del_items: + m = "del_items"; break; + case eABBSR_create_insn: + m = "create_insn"; break; + case eABBSR_bb_end: + m = "bb_end"; break; + default: + m = "none"; + } + msg("[hrt] add_bb fail at %a with %s\n", ea, m); +#endif return false; } diff --git a/src/hrtng.cpp b/src/hrtng.cpp index 28c59c8..0865e44 100644 --- a/src/hrtng.cpp +++ b/src/hrtng.cpp @@ -4388,7 +4388,7 @@ plugmod_t* addon.producer = "Sergey Belov and Milan Bohacek, Rolf Rolles, Takahiro Haruyama," \ " Karthik Selvaraj, Ali Rahbar, Ali Pezeshk, Elias Bachaalany, Markus Gaasedelen"; addon.url = "https://github.com/KasperskyLab/hrtng"; - addon.version = "1.1.7"; + addon.version = "1.1.8"; register_addon(&addon); return PLUGIN_KEEP;