-
Notifications
You must be signed in to change notification settings - Fork 568
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
adding instruction feature extractors
- Loading branch information
1 parent
3f16877
commit 25b48c4
Showing
4 changed files
with
171 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Empty file.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,71 @@ | ||
from __future__ import annotations | ||
|
||
from typing import TYPE_CHECKING | ||
|
||
if TYPE_CHECKING: | ||
from dnfile.mdtable import MemberRefRow | ||
from dnfile.mdtable import MethodDefRow | ||
from dnfile import dnPE | ||
|
||
import dnfile | ||
from dnfile.enums import MetadataTables | ||
from dncil.cil.body import CilMethodBody | ||
from dncil.clr.token import Token, InvalidToken | ||
from dncil.cil.body.reader import CilMethodBodyReaderBase | ||
|
||
# key indexes to dotnet metadata tables | ||
DOTNET_META_TABLES_BY_INDEX = {table.value: table.name for table in MetadataTables} | ||
|
||
|
||
class DnfileMethodBodyReader(CilMethodBodyReaderBase): | ||
def __init__(self, pe: dnfile.dnPE, row: MethodDefRow): | ||
""" """ | ||
self.pe = pe | ||
self.rva = self.pe.get_offset_from_rva(row.Rva) | ||
|
||
def read(self, n): | ||
""" """ | ||
data = self.pe.get_data(self.pe.get_rva_from_offset(self.rva), n) | ||
self.rva += n | ||
return data | ||
|
||
def tell(self): | ||
""" """ | ||
return self.rva | ||
|
||
def seek(self, rva): | ||
""" """ | ||
self.rva = rva | ||
|
||
def get_token(self, value, is_str=False): | ||
""" """ | ||
token = Token(value) | ||
|
||
if is_str: | ||
return self.pe.net.user_strings.get_us(token.rid).value | ||
|
||
table_name = DOTNET_META_TABLES_BY_INDEX.get(token.table, "") | ||
if not table_name: | ||
# table_index is not valid | ||
return InvalidToken(token.value) | ||
|
||
table = getattr(self.pe.net.mdtables, table_name, None) | ||
if table is None: | ||
# table index is valid but table is not present | ||
return InvalidToken(token.value) | ||
|
||
try: | ||
return table.rows[token.rid - 1] | ||
except IndexError: | ||
# table index is valid but row index is not valid | ||
return InvalidToken(token.value) | ||
|
||
|
||
def read_dotnet_method_body(pe: dnPE, row: MethodDefRow) -> CilMethodBody: | ||
""" """ | ||
return CilMethodBody(DnfileMethodBodyReader(pe, row)) | ||
|
||
|
||
def get_imported_class_name(row: MemberRefRow) -> str: | ||
""" """ | ||
return f"{row.Class.row.TypeNamespace}.{row.Class.row.TypeName}" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,97 @@ | ||
from __future__ import annotations | ||
|
||
from typing import TYPE_CHECKING, Generator, Union, Tuple, List, Callable | ||
|
||
if TYPE_CHECKING: | ||
from dncil.cil.instruction import Instruction | ||
from dncil.cil.body import CilMethodBody | ||
|
||
import dncil | ||
import dnfile | ||
from dncil.cil.error import MethodBodyFormatError | ||
from dncil.cil.opcode import OpCodes | ||
|
||
import capa.features.extractors.helpers | ||
import capa.features.extractors.dotnet.helpers | ||
from capa.features.insn import API, Number | ||
from capa.features.common import String | ||
|
||
|
||
def extract_insn_api_features(f: CilMethodBody, insn: Instruction) -> Generator[Tuple[API, int], None, None]: | ||
"""parse instruction API features | ||
see https://www.ntcore.com/files/dotnetformat.htm | ||
10 - MemberRef Table | ||
Each row represents an imported method. | ||
Class (index into the TypeRef, ModuleRef, MethodDef, TypeSpec or TypeDef tables) | ||
01 - TypeRef Table | ||
Each row represents an imported class, its namespace and the assembly which contains it. | ||
TypeName (index into String heap) | ||
TypeNamespace (index into String heap) | ||
""" | ||
if insn.opcode in (OpCodes.Call, OpCodes.Callvirt, OpCodes.Jmp, OpCodes.Calli): | ||
if isinstance(insn.operand, dnfile.mdtable.MemberRefRow): | ||
if isinstance(insn.operand.Class.row, (dnfile.mdtable.TypeRefRow,)): | ||
class_name = capa.features.extractors.dotnet.helpers.get_imported_class_name(insn.operand) | ||
method_name = insn.operand.Name | ||
yield API(f"{class_name}::{method_name}"), insn.offset | ||
|
||
|
||
def extract_insn_number_features(f: CilMethodBody, insn: Instruction) -> Generator[Tuple[Number, int], None, None]: | ||
"""parse instruction number features""" | ||
if insn.is_ldc(): | ||
yield Number(insn.get_ldc()), insn.offset | ||
|
||
|
||
def extract_insn_string_features(f: CilMethodBody, insn: Instruction) -> Generator[Tuple[String, int], None, None]: | ||
"""parse instruction string features""" | ||
if insn.is_ldstr(): | ||
yield String(insn.operand), insn.offset | ||
|
||
|
||
def extract_features(f: CilMethodBody, insn: Instruction) -> Generator[Tuple[Union[API, String, Number], int], None, None]: | ||
"""extract instruction features""" | ||
for inst_handler in INSTRUCTION_HANDLERS: | ||
for (feature, ea) in inst_handler(f, insn): | ||
yield feature, ea | ||
|
||
|
||
INSTRUCTION_HANDLERS = ( | ||
extract_insn_api_features, | ||
extract_insn_number_features, | ||
extract_insn_string_features, | ||
) | ||
|
||
|
||
def main(args): | ||
""" """ | ||
dn = dnfile.dnPE(args.path) | ||
|
||
features = [] | ||
for row in dn.net.mdtables.MethodDef: | ||
if row.ImplFlags.miIL: | ||
try: | ||
body = read_dotnet_method_body(dn, row) | ||
except MethodBodyFormatError as e: | ||
print(e) | ||
continue | ||
|
||
for insn in body.instructions: | ||
features.extend(list(extract_features(body, insn))) | ||
|
||
import pprint | ||
|
||
pprint.pprint(features) | ||
|
||
|
||
if __name__ == "__main__": | ||
""" """ | ||
import argparse | ||
|
||
from capa.features.extractors.dotnet.helpers import read_dotnet_method_body | ||
|
||
parser = argparse.ArgumentParser(prog="parse instruction features from .NET PE") | ||
parser.add_argument("path", type=str, help="full path to .NET PE") | ||
|
||
main(parser.parse_args()) |