Skip to content

Commit

Permalink
adding support to parse imports
Browse files Browse the repository at this point in the history
  • Loading branch information
mike-hunhoff committed Apr 5, 2022
1 parent 574a30d commit 6947497
Show file tree
Hide file tree
Showing 2 changed files with 150 additions and 62 deletions.
139 changes: 107 additions & 32 deletions capa/features/extractors/dotnet/helpers.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
from __future__ import annotations

from typing import TYPE_CHECKING
from typing import TYPE_CHECKING, Tuple, Generator, Any, Dict
from itertools import chain

if TYPE_CHECKING:
from dnfile.mdtable import MemberRefRow
Expand All @@ -10,62 +11,136 @@
import dnfile
from dnfile.enums import MetadataTables
from dncil.cil.body import CilMethodBody
from dncil.clr.token import Token, InvalidToken
from dncil.clr.token import Token, StringToken, InvalidToken
from dncil.cil.body.reader import CilMethodBodyReaderBase

# key indexes to dotnet metadata tables
DOTNET_META_TABLES_BY_INDEX = {table.value: table.name for table in MetadataTables}


class DnfileMethodBodyReader(CilMethodBodyReaderBase):
def __init__(self, pe: dnfile.dnPE, row: MethodDefRow):
def __init__(self, pe: dnPE, row: MethodDefRow):
""" """
self.pe = pe
self.rva = self.pe.get_offset_from_rva(row.Rva)
self.pe: dnPE = pe
self.offset: int = self.pe.get_offset_from_rva(row.Rva)

def read(self, n):
def read(self, n: int) -> bytes:
""" """
data = self.pe.get_data(self.pe.get_rva_from_offset(self.rva), n)
self.rva += n
data: bytes = self.pe.get_data(self.pe.get_rva_from_offset(self.offset), n)
self.offset += n
return data

def tell(self):
def tell(self) -> int:
""" """
return self.rva
return self.offset

def seek(self, rva):
def seek(self, offset: int) -> int:
""" """
self.rva = rva
self.offset = offset
return self.offset

def get_token(self, value, is_str=False):
""" """
token = Token(value)

if is_str:
return self.pe.net.user_strings.get_us(token.rid).value
def make_token(table: int, rid: int) -> int:
""" """
return ((table & 0xFF) << Token.TABLE_SHIFT) | (rid & Token.RID_MASK)


table_name = DOTNET_META_TABLES_BY_INDEX.get(token.table, "")
if not table_name:
# table_index is not valid
return InvalidToken(token.value)
def resolve_token(pe: dnPE, token: Token) -> Any:
""" """
if isinstance(token, StringToken):
return pe.net.user_strings.get_us(token.rid).value

table = getattr(self.pe.net.mdtables, table_name, None)
if table is None:
# table index is valid but table is not present
return InvalidToken(token.value)
table_name: str = DOTNET_META_TABLES_BY_INDEX.get(token.table, "")
if not table_name:
# table_index is not valid
return InvalidToken(token.value)

try:
return table.rows[token.rid - 1]
except IndexError:
# table index is valid but row index is not valid
return InvalidToken(token.value)
table: Any = getattr(pe.net.mdtables, table_name, None)
if table is None:
# table index is valid but table is not present
return InvalidToken(token.value)

try:
return table.rows[token.rid - 1]
except IndexError:
# table index is valid but row index is not valid
return InvalidToken(token.value)

def read_dotnet_method_body(pe: dnPE, row: MethodDefRow) -> CilMethodBody:

def get_method_body(pe: dnPE, row: MethodDefRow) -> CilMethodBody:
""" """
return CilMethodBody(DnfileMethodBodyReader(pe, row))


def get_imported_class_name(row: MemberRefRow) -> str:
def get_class_import_name(row: MemberRefRow) -> str:
""" """
return f"{row.Class.row.TypeNamespace}.{row.Class.row.TypeName}"


def get_class_imports(pe: dnPE) -> Generator[Tuple[int, str], None, None]:
"""parse class imports
see https://www.ntcore.com/files/dotnetformat.htm
10 - MemberRef Table
Each row represents an imported method
Class (index into the TypeRef, ModuleRef, MethodDef, TypeSpec or TypeDef tables)
Name (index into String heap)
01 - TypeRef Table
Each row represents an imported class, its namespace and the assembly which contains it
TypeName (index into String heap)
TypeNamespace (index into String heap)
"""
if not hasattr(pe.net.mdtables, "MemberRef"):
return

for (rid, row) in enumerate(pe.net.mdtables.MemberRef):
if not isinstance(row.Class.row, (dnfile.mdtable.TypeRefRow,)):
continue

class_imp = f"{get_class_import_name(row)}::{row.Name}"
token = make_token(MetadataTables.MemberRef.value, rid + 1)

yield token, class_imp


def get_native_imports(pe: dnPE) -> Generator[Tuple[int, str], None, None]:
"""parse native imports
see https://www.ntcore.com/files/dotnetformat.htm
28 - ImplMap Table
ImplMap table holds information about unmanaged methods that can be reached from managed code, using PInvoke dispatch
MemberForwarded (index into the Field or MethodDef table; more precisely, a MemberForwarded coded index)
ImportName (index into the String heap)
ImportScope (index into the ModuleRef table)
"""
if not hasattr(pe.net.mdtables, "ImplMap"):
return

for row in pe.net.mdtables.ImplMap:
dll: str = row.ImportScope.row.Name
symbol: str = row.ImportName

# like Kernel32.dll
if dll and "." in dll:
dll = dll.split(".")[0].lower()

# like kernel32.CreateFileA
native_imp: str = f"{dll}.{symbol}"

# ECMA says "Each row of the ImplMap table associates a row in the MethodDef table (MemberForwarded) with the
# name of a routine (ImportName) in some unmanaged DLL (ImportScope)"; so we calculate and map the MemberForwarded
# MethodDef table token to help us later record native import method calls made from CIL
member_forwarded_token = make_token(row.MemberForwarded.table.number, row.MemberForwarded.row_index)

yield member_forwarded_token, native_imp


def get_imports(pe: dnPE) -> Dict[int, str]:
""" """
imps: Dict[int, str] = {}

for (token, imp) in chain(get_class_imports(pe), get_native_imports(pe)):
imps[token] = imp
return imps
73 changes: 43 additions & 30 deletions capa/features/extractors/dotnet/insn.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
from __future__ import annotations

from typing import TYPE_CHECKING, List, Tuple, Union, Callable, Generator
from typing import TYPE_CHECKING, Dict, List, Tuple, Union, Callable, Generator, Any

if TYPE_CHECKING:
from dncil.cil.instruction import Instruction
Expand All @@ -17,25 +17,28 @@
from capa.features.common import String


def get_imports(ctx):
""" """
if "imports_cache" not in ctx:
ctx["imports_cache"] = capa.features.extractors.dotnet.helpers.get_imports(ctx["pe"])
return ctx["imports_cache"]


def extract_insn_api_features(f: CilMethodBody, insn: Instruction) -> Generator[Tuple[API, int], None, None]:
"""parse instruction API features
see https://www.ntcore.com/files/dotnetformat.htm
10 - MemberRef Table
Each row represents an imported method.
Class (index into the TypeRef, ModuleRef, MethodDef, TypeSpec or TypeDef tables)
01 - TypeRef Table
Each row represents an imported class, its namespace and the assembly which contains it.
TypeName (index into String heap)
TypeNamespace (index into String heap)
"""
if insn.opcode in (OpCodes.Call, OpCodes.Callvirt, OpCodes.Jmp, OpCodes.Calli):
if isinstance(insn.operand, dnfile.mdtable.MemberRefRow):
if isinstance(insn.operand.Class.row, (dnfile.mdtable.TypeRefRow,)):
class_name = capa.features.extractors.dotnet.helpers.get_imported_class_name(insn.operand)
method_name = insn.operand.Name
yield API(f"{class_name}::{method_name}"), insn.offset
"""parse instruction API features"""
if insn.opcode not in (OpCodes.Call, OpCodes.Callvirt, OpCodes.Jmp, OpCodes.Calli):
return

name = get_imports(f.ctx).get(insn.operand.value, "")
if not name:
return

if "::" in name:
yield API(name), insn.offset
else:
dll, _, symbol = name.rpartition(".")
for name_variant in capa.features.extractors.helpers.generate_symbols(dll, symbol):
yield API(name_variant), insn.offset


def extract_insn_number_features(f: CilMethodBody, insn: Instruction) -> Generator[Tuple[Number, int], None, None]:
Expand All @@ -47,7 +50,8 @@ def extract_insn_number_features(f: CilMethodBody, insn: Instruction) -> Generat
def extract_insn_string_features(f: CilMethodBody, insn: Instruction) -> Generator[Tuple[String, int], None, None]:
"""parse instruction string features"""
if insn.is_ldstr():
yield String(insn.operand), insn.offset
user_string = capa.features.extractors.dotnet.helpers.resolve_token(f.ctx["pe"], insn.operand)
yield String(user_string), insn.offset


def extract_features(
Expand All @@ -68,16 +72,25 @@ def extract_features(

def main(args):
""" """
dn = dnfile.dnPE(args.path)
pe: dnPE = dnfile.dnPE(args.path)

# data structure shared across functions yielded here.
# useful for caching analysis relevant across a single workspace.
ctx = {}
ctx["pe"] = pe

features: List[Any] = []
for row in pe.net.mdtables.MethodDef:
if not row.ImplFlags.miIL or any((row.Flags.mdAbstract, row.Flags.mdPinvokeImpl)):
continue

try:
body: CilMethodBody = get_method_body(pe, row)
except MethodBodyFormatError as e:
print(e)
continue

features = []
for row in dn.net.mdtables.MethodDef:
if row.ImplFlags.miIL:
try:
body = read_dotnet_method_body(dn, row)
except MethodBodyFormatError as e:
print(e)
continue
setattr(body, "ctx", ctx)

for insn in body.instructions:
features.extend(list(extract_features(body, insn)))
Expand All @@ -91,7 +104,7 @@ def main(args):
""" """
import argparse

from capa.features.extractors.dotnet.helpers import read_dotnet_method_body
from capa.features.extractors.dotnet.helpers import get_method_body

parser = argparse.ArgumentParser(prog="parse instruction features from .NET PE")
parser.add_argument("path", type=str, help="full path to .NET PE")
Expand Down

0 comments on commit 6947497

Please sign in to comment.