Skip to content

Commit

Permalink
Merge branch 'dotnet-main' into dotnet-extract
Browse files Browse the repository at this point in the history
  • Loading branch information
mike-hunhoff committed Apr 6, 2022
2 parents efd8b30 + 97e76a8 commit 0499f9e
Show file tree
Hide file tree
Showing 32 changed files with 1,138 additions and 338 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ name: build

on:
push:
branches: [master]
branches: [master, dotnet-main]
release:
types: [edited, published]

Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/publish.yml
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ jobs:
- name: Set up Python
uses: actions/setup-python@v2
with:
python-version: '3.6'
python-version: '3.7'
- name: Install dependencies
run: |
python -m pip install --upgrade pip
Expand Down
10 changes: 5 additions & 5 deletions .github/workflows/tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,9 @@ name: CI

on:
push:
branches: [ master ]
branches: [ master, dotnet-main ]
pull_request:
branches: [ master ]
branches: [ master, dotnet-main ]

# save workspaces to speed up testing
env:
Expand Down Expand Up @@ -37,6 +37,8 @@ jobs:
run: isort --profile black --length-sort --line-width 120 -c .
- name: Lint with black
run: black -l 120 --check .
- name: Lint with pycodestyle
run: pycodestyle --show-source capa/ scripts/ tests/
- name: Check types with mypy
run: mypy --config-file .github/mypy/mypy.ini capa/ scripts/ tests/

Expand Down Expand Up @@ -65,11 +67,9 @@ jobs:
matrix:
os: [ubuntu-20.04, windows-2019, macos-10.15]
# across all operating systems
python-version: ["3.6", "3.10"]
python-version: ["3.7", "3.10"]
include:
# on Ubuntu run these as well
- os: ubuntu-20.04
python-version: "3.7"
- os: ubuntu-20.04
python-version: "3.8"
- os: ubuntu-20.04
Expand Down
4 changes: 4 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -118,3 +118,7 @@ rule-linter-output.log
scripts/perf/*.txt
scripts/perf/*.svg
scripts/perf/*.zip
.direnv
.envrc
.DS_Store
*/.DS_Store
8 changes: 8 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,16 @@

### New Features

- add new scope "instruction" for matching mnemonics and operands #767 @williballenthin
- add new feature "operand[{0, 1, 2}].number" for matching instruction operand immediate values #767 @williballenthin
- add new feature "operand[{0, 1, 2}].offset" for matching instruction operand offsets #767 @williballenthin
- main: detect dotnet binaries #955 @mr-tz

### Breaking Changes

- instruction scope and operand feature are new and are not backwards compatible with older versions of capa
- Python 3.7 is now the minimum supported Python version #866 @williballenthin

### New Rules (4)

- data-manipulation/encryption/aes/manually-build-aes-constants [email protected]
Expand Down
4 changes: 2 additions & 2 deletions capa/engine.py
Original file line number Diff line number Diff line change
Expand Up @@ -235,8 +235,8 @@ class Subscope(Statement):
the engine should preprocess rules to extract subscope statements into their own rules.
"""

def __init__(self, scope, child):
super(Subscope, self).__init__()
def __init__(self, scope, child, description=None):
super(Subscope, self).__init__(description=description)
self.scope = scope
self.child = child

Expand Down
14 changes: 14 additions & 0 deletions capa/exceptions.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
class UnsupportedRuntimeError(RuntimeError):
pass


class UnsupportedFormatError(ValueError):
pass


class UnsupportedArchError(ValueError):
pass


class UnsupportedOSError(ValueError):
pass
23 changes: 18 additions & 5 deletions capa/features/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
# See the License for the specific language governing permissions and limitations under the License.

import re
import abc
import codecs
import logging
import collections
Expand Down Expand Up @@ -96,7 +97,7 @@ def __nonzero__(self):
return self.success


class Feature:
class Feature(abc.ABC):
def __init__(self, value: Union[str, int, bytes], bitness=None, description=None):
"""
Args:
Expand Down Expand Up @@ -168,6 +169,8 @@ def freeze_deserialize(cls, args):
kwargs = args[-1]
args = args[:-1]
return cls(*args, **kwargs)
else:
return cls(*args)


class MatchedRule(Feature):
Expand All @@ -178,7 +181,6 @@ def __init__(self, value: str, description=None):

class Characteristic(Feature):
def __init__(self, value: str, description=None):

super(Characteristic, self).__init__(value, description=description)


Expand Down Expand Up @@ -408,7 +410,9 @@ def freeze_deserialize(cls, args):
# other candidates here: https://docs.microsoft.com/en-us/windows/win32/debug/pe-format#machine-types
ARCH_I386 = "i386"
ARCH_AMD64 = "amd64"
VALID_ARCH = (ARCH_I386, ARCH_AMD64)
# dotnet
ARCH_ANY = "any"
VALID_ARCH = (ARCH_I386, ARCH_AMD64, ARCH_ANY)


class Arch(Feature):
Expand All @@ -420,8 +424,10 @@ def __init__(self, value: str, description=None):
OS_WINDOWS = "windows"
OS_LINUX = "linux"
OS_MACOS = "macos"
# dotnet
OS_ANY = "any"
VALID_OS = {os.value for os in capa.features.extractors.elf.OS}
VALID_OS.update({OS_WINDOWS, OS_LINUX, OS_MACOS})
VALID_OS.update({OS_WINDOWS, OS_LINUX, OS_MACOS, OS_ANY})


class OS(Feature):
Expand All @@ -432,7 +438,14 @@ def __init__(self, value: str, description=None):

FORMAT_PE = "pe"
FORMAT_ELF = "elf"
VALID_FORMAT = (FORMAT_PE, FORMAT_ELF)
FORMAT_DOTNET = "dotnet"
VALID_FORMAT = (FORMAT_PE, FORMAT_ELF, FORMAT_DOTNET)
# internal only, not to be used in rules
FORMAT_AUTO = "auto"
FORMAT_SC32 = "sc32"
FORMAT_SC64 = "sc64"
FORMAT_FREEZE = "freeze"
FORMAT_UNKNOWN = "unknown"


class Format(Feature):
Expand Down
5 changes: 4 additions & 1 deletion capa/features/extractors/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,8 @@
import capa.features
import capa.features.extractors.elf
import capa.features.extractors.pefile
from capa.features.common import OS, FORMAT_PE, FORMAT_ELF, OS_WINDOWS, Arch, Format, String
from capa.features.common import OS, FORMAT_PE, FORMAT_ELF, OS_WINDOWS, FORMAT_FREEZE, Arch, Format, String
from capa.features.freeze import is_freeze

logger = logging.getLogger(__name__)

Expand All @@ -29,6 +30,8 @@ def extract_format(buf):
yield Format(FORMAT_PE), 0x0
elif buf.startswith(b"\x7fELF"):
yield Format(FORMAT_ELF), 0x0
elif is_freeze(buf):
yield Format(FORMAT_FREEZE), 0x0
else:
# we likely end up here:
# 1. handling a file format (e.g. macho)
Expand Down
109 changes: 109 additions & 0 deletions capa/features/extractors/dnfile_.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,109 @@
import logging
from typing import Tuple, Iterator

import dnfile
import pefile

from capa.features.common import OS, OS_ANY, ARCH_ANY, ARCH_I386, ARCH_AMD64, FORMAT_DOTNET, Arch, Format, Feature
from capa.features.extractors.base_extractor import FeatureExtractor

logger = logging.getLogger(__name__)


def extract_file_format(**kwargs):
yield Format(FORMAT_DOTNET), 0x0


def extract_file_os(**kwargs):
yield OS(OS_ANY), 0x0


def extract_file_arch(pe, **kwargs):
# to distinguish in more detail, see https://stackoverflow.com/a/23614024/10548020
# .NET 4.5 added option: any CPU, 32-bit preferred
if pe.net.Flags.CLR_32BITREQUIRED and pe.PE_TYPE == pefile.OPTIONAL_HEADER_MAGIC_PE:
yield Arch(ARCH_I386), 0x0
elif not pe.net.Flags.CLR_32BITREQUIRED and pe.PE_TYPE == pefile.OPTIONAL_HEADER_MAGIC_PE_PLUS:
yield Arch(ARCH_AMD64), 0x0
else:
yield Arch(ARCH_ANY), 0x0


def extract_file_features(pe: dnfile.dnPE) -> Iterator[Tuple[Feature, int]]:
for file_handler in FILE_HANDLERS:
for feature, va in file_handler(pe=pe): # type: ignore
yield feature, va


FILE_HANDLERS = (
# extract_file_export_names,
# extract_file_import_names,
# extract_file_section_names,
# extract_file_strings,
# extract_file_function_names,
extract_file_format,
)


def extract_global_features(pe: dnfile.dnPE) -> Iterator[Tuple[Feature, int]]:
for handler in GLOBAL_HANDLERS:
for feature, va in handler(pe=pe): # type: ignore
yield feature, va


GLOBAL_HANDLERS = (
extract_file_os,
extract_file_arch,
)


class DnfileFeatureExtractor(FeatureExtractor):
def __init__(self, path: str):
super(DnfileFeatureExtractor, self).__init__()
self.path: str = path
self.pe: dnfile.dnPE = dnfile.dnPE(path)

def get_base_address(self) -> int:
return 0x0

def get_entry_point(self) -> int:
return self.pe.net.struct.EntryPointTokenOrRva

def extract_global_features(self):
yield from extract_global_features(self.pe)

def extract_file_features(self):
yield from extract_file_features(self.pe)

def is_dotnet_file(self) -> bool:
return bool(self.pe.net)

def get_runtime_version(self) -> Tuple[int, int]:
return self.pe.net.struct.MajorRuntimeVersion, self.pe.net.struct.MinorRuntimeVersion

def get_meta_version_string(self) -> str:
return self.pe.net.metadata.struct.Version.rstrip(b"\x00").decode("utf-8")

def get_functions(self):
raise NotImplementedError("DnfileFeatureExtractor can only be used to extract file features")

def extract_function_features(self, f):
raise NotImplementedError("DnfileFeatureExtractor can only be used to extract file features")

def get_basic_blocks(self, f):
raise NotImplementedError("DnfileFeatureExtractor can only be used to extract file features")

def extract_basic_block_features(self, f, bb):
raise NotImplementedError("DnfileFeatureExtractor can only be used to extract file features")

def get_instructions(self, f, bb):
raise NotImplementedError("DnfileFeatureExtractor can only be used to extract file features")

def extract_insn_features(self, f, bb, insn):
raise NotImplementedError("DnfileFeatureExtractor can only be used to extract file features")

def is_library_function(self, va):
raise NotImplementedError("DnfileFeatureExtractor can only be used to extract file features")

def get_function_name(self, va):
raise NotImplementedError("DnfileFeatureExtractor can only be used to extract file features")
17 changes: 14 additions & 3 deletions capa/features/extractors/ida/insn.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@

import capa.features.extractors.helpers
import capa.features.extractors.ida.helpers
from capa.features.insn import API, Number, Offset, Mnemonic
from capa.features.insn import API, Number, Offset, Mnemonic, OperandNumber, OperandOffset
from capa.features.common import (
BITNESS_X32,
BITNESS_X64,
Expand Down Expand Up @@ -143,7 +143,11 @@ def extract_insn_number_features(f, bb, insn):
# .text:00401145 add esp, 0Ch
return

for op in capa.features.extractors.ida.helpers.get_insn_ops(insn, target_ops=(idaapi.o_imm, idaapi.o_mem)):
for i, op in enumerate(insn.ops):
if op.type == idaapi.o_void:
break
if op.type not in (idaapi.o_imm, idaapi.o_mem):
continue
# skip things like:
# .text:00401100 shr eax, offset loc_C
if capa.features.extractors.ida.helpers.is_op_offset(insn, op):
Expand All @@ -156,6 +160,7 @@ def extract_insn_number_features(f, bb, insn):

yield Number(const), insn.ea
yield Number(const, bitness=get_bitness(f.ctx)), insn.ea
yield OperandNumber(i, const), insn.ea


def extract_insn_bytes_features(f, bb, insn):
Expand Down Expand Up @@ -208,9 +213,14 @@ def extract_insn_offset_features(f, bb, insn):
example:
.text:0040112F cmp [esi+4], ebx
"""
for op in capa.features.extractors.ida.helpers.get_insn_ops(insn, target_ops=(idaapi.o_phrase, idaapi.o_displ)):
for i, op in enumerate(insn.ops):
if op.type == idaapi.o_void:
break
if op.type not in (idaapi.o_phrase, idaapi.o_displ):
continue
if capa.features.extractors.ida.helpers.is_op_stack_var(insn.ea, op.n):
continue

p_info = capa.features.extractors.ida.helpers.get_op_phrase_info(op)
op_off = p_info.get("offset", 0)
if idaapi.is_mapped(op_off):
Expand All @@ -225,6 +235,7 @@ def extract_insn_offset_features(f, bb, insn):

yield Offset(op_off), insn.ea
yield Offset(op_off, bitness=get_bitness(f.ctx)), insn.ea
yield OperandOffset(i, op_off), insn.ea


def contains_stack_cookie_keywords(s):
Expand Down
2 changes: 1 addition & 1 deletion capa/features/extractors/smda/insn.py
Original file line number Diff line number Diff line change
Expand Up @@ -218,7 +218,7 @@ def extract_insn_offset_features(f, bb, insn):
# mov eax, [esi + ecx + 16384]
operands = [o.strip() for o in insn.operands.split(",")]
for operand in operands:
if not "ptr" in operand:
if "ptr" not in operand:
continue
if "esp" in operand or "ebp" in operand or "rbp" in operand:
continue
Expand Down
Loading

0 comments on commit 0499f9e

Please sign in to comment.