From afe9a4801da0e215a78a41171067efaa86bcc1b8 Mon Sep 17 00:00:00 2001 From: Arker123 Date: Thu, 9 Nov 2023 22:14:03 +0530 Subject: [PATCH 1/7] More i386 xrefs --- floss/language/rust/extract.py | 11 +++++++++-- floss/language/utils.py | 28 ++++++++++++++++++++++++++++ 2 files changed, 37 insertions(+), 2 deletions(-) diff --git a/floss/language/rust/extract.py b/floss/language/rust/extract.py index 3d3dc3960..79407fa92 100644 --- a/floss/language/rust/extract.py +++ b/floss/language/rust/extract.py @@ -10,7 +10,13 @@ import binary2strings as b2s from floss.results import StaticString, StringEncoding -from floss.language.utils import find_lea_xrefs, find_mov_xrefs, find_push_xrefs, get_struct_string_candidates +from floss.language.utils import ( + find_lea_xrefs, + find_mov_xrefs, + find_push_xrefs, + get_raw_xrefs_rdata_i386, + get_struct_string_candidates, +) logger = logging.getLogger(__name__) @@ -151,7 +157,8 @@ def get_string_blob_strings(pe: pefile.PE, min_length: int) -> Iterable[StaticSt xrefs_lea = find_lea_xrefs(pe) xrefs_push = find_push_xrefs(pe) xrefs_mov = find_mov_xrefs(pe) - xrefs = itertools.chain(struct_string_addrs, xrefs_lea, xrefs_push, xrefs_mov) + xrefs_raw_rdata = get_raw_xrefs_rdata_i386(pe, rdata_section.get_data()) + xrefs = itertools.chain(struct_string_addrs, xrefs_lea, xrefs_push, xrefs_mov, xrefs_raw_rdata) elif pe.FILE_HEADER.Machine == pefile.MACHINE_TYPE["IMAGE_FILE_MACHINE_AMD64"]: xrefs_lea = find_lea_xrefs(pe) diff --git a/floss/language/utils.py b/floss/language/utils.py index e97c4fa47..101ccb35c 100644 --- a/floss/language/utils.py +++ b/floss/language/utils.py @@ -465,6 +465,34 @@ def get_struct_string_candidates(pe: pefile.PE) -> Iterable[StructString]: # dozens of seconds or more (suspect many minutes). +def get_raw_xrefs_rdata_i386(pe: pefile.PE, buf: bytes) -> Iterable[VA]: + """ + scan for raw xrefs in .rdata section + """ + format = "I" + + if not buf: + return + + low, high = get_image_range(pe) + + # using array module as a high-performance way to access the data as fixed-sized words. + words = iter(array.array(format, buf)) + + last = next(words) + for current in words: + address = last + last = current + + if address == 0x0: + continue + + if not (low <= address < high): + continue + + yield address + + def get_extract_stats( pe: pefile, all_ss_strings: List[StaticString], lang_strings: List[StaticString], min_len: int, min_blob_len=0 ) -> float: From 555b60600672371ebb3fcb4afdb495155493a79f Mon Sep 17 00:00:00 2001 From: Arker123 Date: Fri, 10 Nov 2023 17:14:14 +0530 Subject: [PATCH 2/7] Added Comments for raw xrefs --- floss/language/utils.py | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/floss/language/utils.py b/floss/language/utils.py index 101ccb35c..3acf46290 100644 --- a/floss/language/utils.py +++ b/floss/language/utils.py @@ -467,7 +467,18 @@ def get_struct_string_candidates(pe: pefile.PE) -> Iterable[StructString]: def get_raw_xrefs_rdata_i386(pe: pefile.PE, buf: bytes) -> Iterable[VA]: """ - scan for raw xrefs in .rdata section + scan for raw xrefs in .rdata section. + raw xrefs are 32-bit absolute addresses to strings in .rdata section (i386). + They are not encoded as struct String instances. + + example: + .rdata:004D6234 dd offset unk_4C85C9 + .rdata:004D6238 dd offset unk_4C85C3 + .rdata:004D623C dd offset unk_4C85BB + .rdata:004D6240 dd offset unk_4C85B3 + + The above are not struct String instances, but are references to strings in .rdata section. + They can be used to divide the string blobs into smaller chunks. """ format = "I" From d29ee1a5fd120b1090ebc094ea3241da839992c1 Mon Sep 17 00:00:00 2001 From: Arker123 Date: Fri, 10 Nov 2023 17:17:35 +0530 Subject: [PATCH 3/7] Tweaks --- floss/language/utils.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/floss/language/utils.py b/floss/language/utils.py index 3acf46290..ceb172cdf 100644 --- a/floss/language/utils.py +++ b/floss/language/utils.py @@ -477,6 +477,9 @@ def get_raw_xrefs_rdata_i386(pe: pefile.PE, buf: bytes) -> Iterable[VA]: .rdata:004D623C dd offset unk_4C85BB .rdata:004D6240 dd offset unk_4C85B3 + From the disassembly, they are called as follows: + .text:00498E56 push ds:off_4D61E0[ecx*4] + The above are not struct String instances, but are references to strings in .rdata section. They can be used to divide the string blobs into smaller chunks. """ From ce80cd8cf53c4be6d7012eb2995f26f288540633 Mon Sep 17 00:00:00 2001 From: Arker123 Date: Sun, 24 Dec 2023 14:28:20 +0530 Subject: [PATCH 4/7] Add tests --- floss/language/rust/extract.py | 2 +- tests/test_language_extract_rust.py | 18 ++++++++++++++++++ 2 files changed, 19 insertions(+), 1 deletion(-) diff --git a/floss/language/rust/extract.py b/floss/language/rust/extract.py index 29010626f..37f6f7b96 100644 --- a/floss/language/rust/extract.py +++ b/floss/language/rust/extract.py @@ -14,8 +14,8 @@ find_lea_xrefs, find_mov_xrefs, find_push_xrefs, - get_raw_xrefs_rdata_i386, get_rdata_section, + get_raw_xrefs_rdata_i386, get_struct_string_candidates, ) diff --git a/tests/test_language_extract_rust.py b/tests/test_language_extract_rust.py index 9d210ce0c..a7c300236 100644 --- a/tests/test_language_extract_rust.py +++ b/tests/test_language_extract_rust.py @@ -80,3 +80,21 @@ def test_push(request, string, offset, encoding, rust_strings): ) def test_mov_jmp(request, string, offset, encoding, rust_strings): assert StaticString(string=string, offset=offset, encoding=encoding) in request.getfixturevalue(rust_strings) + + +@pytest.mark.parametrize( + "string,offset,encoding,rust_strings", + [ + # .rdata:004BFA48 dd offset unk_4BA13A + # .rdata:004BFA4C dd offset unk_4BA100 + pytest.param("Invalid branch target in DWARF expression", 0xB813A, StringEncoding.UTF8, "rust_strings32"), + pytest.param( + "Expected to find an FDE pointer, but found a CIE pointer instead.", + 0xB8163, + StringEncoding.UTF8, + "rust_strings32", + ), + ], +) +def test_raw_xrefs(request, string, offset, encoding, rust_strings): + assert StaticString(string=string, offset=offset, encoding=encoding) in request.getfixturevalue(rust_strings) From c4458cb3008ccd43ddb2dbfda40e5891240deefb Mon Sep 17 00:00:00 2001 From: Arnav Kharbanda <94680887+Arker123@users.noreply.github.com> Date: Mon, 24 Jun 2024 10:20:46 +0530 Subject: [PATCH 5/7] Update floss/language/utils.py Co-authored-by: Vasco Schiavo <115561717+VascoSch92@users.noreply.github.com> --- floss/language/utils.py | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/floss/language/utils.py b/floss/language/utils.py index 5b815d6e7..5bf660090 100644 --- a/floss/language/utils.py +++ b/floss/language/utils.py @@ -506,13 +506,8 @@ def get_raw_xrefs_rdata_i386(pe: pefile.PE, buf: bytes) -> Iterable[VA]: address = last last = current - if address == 0x0: - continue - - if not (low <= address < high): - continue - - yield address + if address != 0x0 and low <= address < high: + yield address def get_extract_stats( From 47c79f1ec706b911b8bb9e9b90ac9d769e6d3cd9 Mon Sep 17 00:00:00 2001 From: Arker123 Date: Mon, 24 Jun 2024 10:50:41 +0530 Subject: [PATCH 6/7] Code Style --- floss/language/utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/floss/language/utils.py b/floss/language/utils.py index 5bf660090..f46790bbd 100644 --- a/floss/language/utils.py +++ b/floss/language/utils.py @@ -507,7 +507,7 @@ def get_raw_xrefs_rdata_i386(pe: pefile.PE, buf: bytes) -> Iterable[VA]: last = current if address != 0x0 and low <= address < high: - yield address + yield address def get_extract_stats( From 3fdef6caaca769c84adb94749a77f1f182bfd021 Mon Sep 17 00:00:00 2001 From: Arker123 Date: Wed, 26 Jun 2024 10:31:51 +0530 Subject: [PATCH 7/7] Update Documentation --- floss/language/utils.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/floss/language/utils.py b/floss/language/utils.py index f46790bbd..2a56dd7f8 100644 --- a/floss/language/utils.py +++ b/floss/language/utils.py @@ -475,8 +475,7 @@ def get_struct_string_candidates(pe: pefile.PE) -> Iterable[StructString]: def get_raw_xrefs_rdata_i386(pe: pefile.PE, buf: bytes) -> Iterable[VA]: """ - scan for raw xrefs in .rdata section. - raw xrefs are 32-bit absolute addresses to strings in .rdata section (i386). + scan for raw xrefs that are 32-bit absolute addresses in the PE file (i386). They are not encoded as struct String instances. example: @@ -488,7 +487,7 @@ def get_raw_xrefs_rdata_i386(pe: pefile.PE, buf: bytes) -> Iterable[VA]: From the disassembly, they are called as follows: .text:00498E56 push ds:off_4D61E0[ecx*4] - The above are not struct String instances, but are references to strings in .rdata section. + The above are not struct String instances, but are references to strings in the PE file. They can be used to divide the string blobs into smaller chunks. """ format = "I"