diff --git a/.github/mypy/mypy.ini b/.github/mypy/mypy.ini index 6d177d40aa..3d22b05f72 100644 --- a/.github/mypy/mypy.ini +++ b/.github/mypy/mypy.ini @@ -74,3 +74,6 @@ ignore_missing_imports = True [mypy-elftools.*] ignore_missing_imports = True + +[mypy-dncil.*] +ignore_missing_imports = True \ No newline at end of file diff --git a/capa/features/extractors/dotnet/__init__.py b/capa/features/extractors/dotnet/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/capa/features/extractors/dotnet/helpers.py b/capa/features/extractors/dotnet/helpers.py new file mode 100644 index 0000000000..7d2f290013 --- /dev/null +++ b/capa/features/extractors/dotnet/helpers.py @@ -0,0 +1,71 @@ +from __future__ import annotations + +from typing import TYPE_CHECKING + +if TYPE_CHECKING: + from dnfile.mdtable import MemberRefRow + from dnfile.mdtable import MethodDefRow + from dnfile import dnPE + +import dnfile +from dnfile.enums import MetadataTables +from dncil.cil.body import CilMethodBody +from dncil.clr.token import Token, InvalidToken +from dncil.cil.body.reader import CilMethodBodyReaderBase + +# key indexes to dotnet metadata tables +DOTNET_META_TABLES_BY_INDEX = {table.value: table.name for table in MetadataTables} + + +class DnfileMethodBodyReader(CilMethodBodyReaderBase): + def __init__(self, pe: dnfile.dnPE, row: MethodDefRow): + """ """ + self.pe = pe + self.rva = self.pe.get_offset_from_rva(row.Rva) + + def read(self, n): + """ """ + data = self.pe.get_data(self.pe.get_rva_from_offset(self.rva), n) + self.rva += n + return data + + def tell(self): + """ """ + return self.rva + + def seek(self, rva): + """ """ + self.rva = rva + + def get_token(self, value, is_str=False): + """ """ + token = Token(value) + + if is_str: + return self.pe.net.user_strings.get_us(token.rid).value + + table_name = DOTNET_META_TABLES_BY_INDEX.get(token.table, "") + if not table_name: + # table_index is not valid + return InvalidToken(token.value) + + table = getattr(self.pe.net.mdtables, table_name, None) + if table is None: + # table index is valid but table is not present + return InvalidToken(token.value) + + try: + return table.rows[token.rid - 1] + except IndexError: + # table index is valid but row index is not valid + return InvalidToken(token.value) + + +def read_dotnet_method_body(dn: dnPE, row: MethodDefRow) -> CilMethodBody: + """ """ + return CilMethodBody(DnfileMethodBodyReader(dn, row)) + + +def get_imported_class_name(row: MemberRefRow) -> str: + """ """ + return f"{row.Class.row.TypeNamespace}.{row.Class.row.TypeName}" diff --git a/capa/features/extractors/dotnet/insn.py b/capa/features/extractors/dotnet/insn.py new file mode 100644 index 0000000000..d50ec98614 --- /dev/null +++ b/capa/features/extractors/dotnet/insn.py @@ -0,0 +1,97 @@ +from __future__ import annotations + +from typing import TYPE_CHECKING, Generator, Union, Tuple, List, Callable + +if TYPE_CHECKING: + from dncil.cil.instruction import Instruction + from dncil.cil.body import CilMethodBody + +import dncil +import dnfile +from dncil.cil.error import MethodBodyFormatError +from dncil.cil.opcode import OpCodes + +import capa.features.extractors.helpers +import capa.features.extractors.dotnet.helpers +from capa.features.insn import API, Number +from capa.features.common import String + + +def extract_insn_api_features(f: CilMethodBody, insn: Instruction) -> Generator[Tuple[API, int], None, None]: + """parse instruction API features + + see https://www.ntcore.com/files/dotnetformat.htm + + 10 - MemberRef Table + Each row represents an imported method. + Class (index into the TypeRef, ModuleRef, MethodDef, TypeSpec or TypeDef tables) + 01 - TypeRef Table + Each row represents an imported class, its namespace and the assembly which contains it. + TypeName (index into String heap) + TypeNamespace (index into String heap) + """ + if insn.opcode in (OpCodes.Call, OpCodes.Callvirt, OpCodes.Jmp, OpCodes.Calli): + if isinstance(insn.operand, dnfile.mdtable.MemberRefRow): + if isinstance(insn.operand.Class.row, (dnfile.mdtable.TypeRefRow,)): + class_name = capa.features.extractors.dotnet.helpers.get_imported_class_name(insn.operand) + method_name = insn.operand.Name + yield API(f"{class_name}::{method_name}"), insn.offset + + +def extract_insn_number_features(f: CilMethodBody, insn: Instruction) -> Generator[Tuple[Number, int], None, None]: + """parse instruction number features""" + if insn.is_ldc(): + yield Number(insn.get_ldc()), insn.offset + + +def extract_insn_string_features(f: CilMethodBody, insn: Instruction) -> Generator[Tuple[String, int], None, None]: + """parse instruction string features""" + if insn.is_ldstr(): + yield String(insn.operand), insn.offset + + +def extract_features(f: CilMethodBody, insn: Instruction) -> Generator[Tuple[Union[API, String, Number], int], None, None]: + """extract instruction features""" + for inst_handler in INSTRUCTION_HANDLERS: + for (feature, ea) in inst_handler(f, insn): + yield feature, ea + + +INSTRUCTION_HANDLERS = ( + extract_insn_api_features, + extract_insn_number_features, + extract_insn_string_features, +) + + +def main(args): + """ """ + dn = dnfile.dnPE(args.path) + + features = [] + for row in dn.net.mdtables.MethodDef: + if row.ImplFlags.miIL: + try: + body = read_dotnet_method_body(dn, row) + except MethodBodyFormatError as e: + print(e) + continue + + for insn in body.instructions: + features.extend(list(extract_features(body, insn))) + + import pprint + + pprint.pprint(features) + + +if __name__ == "__main__": + """ """ + import argparse + + from capa.features.extractors.dotnet.helpers import read_dotnet_method_body + + parser = argparse.ArgumentParser(prog="parse instruction features from .NET PE") + parser.add_argument("path", type=str, help="full path to .NET PE") + + main(parser.parse_args())