Skip to content

Commit

Permalink
Merge pull request #1017 from onekey-sec/report_carve_dir
Browse files Browse the repository at this point in the history
Report carve dir
  • Loading branch information
e3krisztian authored Dec 3, 2024
2 parents 0c9d34c + 4f1bd85 commit 7565a38
Show file tree
Hide file tree
Showing 53 changed files with 346 additions and 76 deletions.
14 changes: 10 additions & 4 deletions .envrc
Original file line number Diff line number Diff line change
@@ -1,4 +1,10 @@
watch_file flake.nix flake.lock devenv.nix
use nix \
--option extra-substituters "https://unblob.cachix.org" \
--option extra-trusted-public-keys "unblob.cachix.org-1:5kWA6DwOg176rSqU8TOTBXWxsDB4LoCMfGfTgL5qCAE="
# Source further custom features from .env.user if exists
# Also allows users to disable/override features in this file
source_env_if_exists .envrc.user

if ${UNBLOB_USE_DEVENV:-true}; then
watch_file flake.nix flake.lock devenv.nix
use nix \
--option extra-substituters "https://unblob.cachix.org" \
--option extra-trusted-public-keys "unblob.cachix.org-1:5kWA6DwOg176rSqU8TOTBXWxsDB4LoCMfGfTgL5qCAE="
fi
1 change: 1 addition & 0 deletions .gitattributes
Original file line number Diff line number Diff line change
@@ -1 +1,2 @@
tests/integration/** filter=lfs diff=lfs merge=lfs -text
tests/files/** filter=lfs diff=lfs merge=lfs -text
3 changes: 3 additions & 0 deletions tests/files/suffixes/__input__/chunks
Git LFS file not shown
3 changes: 3 additions & 0 deletions tests/files/suffixes/__input__/collisions.zip
Git LFS file not shown
Git LFS file not shown
Git LFS file not shown
Git LFS file not shown
Git LFS file not shown
Git LFS file not shown
Git LFS file not shown
Git LFS file not shown
Git LFS file not shown
Git LFS file not shown
Git LFS file not shown
Git LFS file not shown
Git LFS file not shown
Git LFS file not shown
Git LFS file not shown
Git LFS file not shown
Git LFS file not shown
Git LFS file not shown
Git LFS file not shown
Git LFS file not shown
Git LFS file not shown
Git LFS file not shown
Git LFS file not shown
Git LFS file not shown
Git LFS file not shown
Git LFS file not shown
3 changes: 3 additions & 0 deletions tests/files/suffixes/chunks
Git LFS file not shown
Empty file.
Empty file.
Empty file.
Empty file.
Empty file.
Empty file.
Empty file.
Empty file.
Empty file.
Empty file.
Empty file.
Empty file.
2 changes: 1 addition & 1 deletion tests/test_cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -314,7 +314,7 @@ def test_keep_extracted_chunks(
[
pytest.param([], 5, id="skip-extension-empty"),
pytest.param([""], 5, id="skip-zip-extension-empty-suffix"),
pytest.param([".zip"], 1, id="skip-extension-zip"),
pytest.param([".zip"], 0, id="skip-extension-zip"),
pytest.param([".rlib"], 5, id="skip-extension-rlib"),
],
)
Expand Down
6 changes: 3 additions & 3 deletions tests/test_processing.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,11 +34,11 @@
)
from unblob.report import (
ChunkReport,
ExtractDirectoryExistsReport,
FileMagicReport,
HashReport,
MultiFileCollisionReport,
MultiFileReport,
OutputDirectoryExistsReport,
RandomnessMeasurements,
RandomnessReport,
StatReport,
Expand Down Expand Up @@ -350,7 +350,7 @@ def test_process_file_prevents_double_extracts(tmp_path: Path, fw: Path):

# we expect exactly 1 problem reported, related to the extraction of "internal.zip"
[report] = process_result.errors
assert isinstance(report, ExtractDirectoryExistsReport)
assert isinstance(report, OutputDirectoryExistsReport)
assert report.path.name == "internal.zip_extract"

# the rest should be the same, except that the extraction is shifted with one extra directory
Expand Down Expand Up @@ -819,7 +819,7 @@ def test_multi_file_extract_dir(
multi_file_reports = task_result_by_path[directory].filter_reports(MultiFileReport)
assert multi_file_reports
assert any(
isinstance(report, ExtractDirectoryExistsReport)
isinstance(report, OutputDirectoryExistsReport)
for report in multi_file_reports[0].extraction_reports
)

Expand Down
100 changes: 100 additions & 0 deletions tests/test_processing_suffixes.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,100 @@
from pathlib import Path

import pytest

from unblob.processing import ExtractionConfig, process_file
from unblob.report import OutputDirectoryExistsReport
from unblob.testing import check_output_is_the_same

TEST_DATA_PATH = Path(__file__).parent / "files/suffixes"


def _patch(extraction_config: ExtractionConfig, carve_suffix: str, extract_suffix: str):
extraction_config.keep_extracted_chunks = False
extraction_config.carve_suffix = carve_suffix
extraction_config.extract_suffix = extract_suffix


@pytest.mark.parametrize(
"carve_suffix,extract_suffix,output_root_dir_name",
[
("_extract", "_extract", "defaults"),
("_c", "_e", "_c_e"),
("_carve", "_extract", "_carve_extract"),
],
)
def test_top_level_carve(
carve_suffix: str,
extract_suffix: str,
output_root_dir_name: str,
extraction_config: ExtractionConfig,
):
_patch(extraction_config, carve_suffix, extract_suffix)
input_file = TEST_DATA_PATH / "__input__/chunks"
carve_dir_name = input_file.name + extraction_config.carve_suffix
extract_dir_name = input_file.name + extraction_config.extract_suffix
expected_output_dir = TEST_DATA_PATH / "__outputs__/chunks" / output_root_dir_name

reports = process_file(extraction_config, input_file)

assert reports.errors == []

assert (
carve_dir_name == extract_dir_name
or not (extraction_config.extract_root / extract_dir_name).exists()
)
check_output_is_the_same(expected_output_dir, extraction_config.extract_root)


EXPECTED_COLLISION_PATHS: "dict[tuple[str, str], set]" = {
("_extract", "_extract"): {
"collisions.zip_extract/chunks_carve/0-160.gzip_extract",
},
("_carve", "_extract"): {
"collisions.zip_extract/chunks_carve",
"collisions.zip_extract/chunks_carve/0-160.gzip_extract",
},
}


@pytest.mark.parametrize(
"carve_suffix,extract_suffix,output_root_dir_name",
[
("_extract", "_extract", "defaults"),
("_c", "_e", "_c_e"),
("_carve", "_extract", "_carve_extract"),
],
)
def test_top_level_extract_and_collisions(
carve_suffix: str,
extract_suffix: str,
output_root_dir_name: str,
extraction_config: ExtractionConfig,
):
_patch(extraction_config, carve_suffix, extract_suffix)
input_file = TEST_DATA_PATH / "__input__/collisions.zip"
carve_dir_name = input_file.name + extraction_config.carve_suffix
extract_dir_name = input_file.name + extraction_config.extract_suffix
expected_output_dir = (
TEST_DATA_PATH / "__outputs__/collisions.zip" / output_root_dir_name
)

reports = process_file(extraction_config, input_file)

# check collision problems - the input was prepared to have collisions
# during both the carving and extracting phases
problem_paths = {
e.path.relative_to(extraction_config.extract_root).as_posix()
for e in reports.errors
if isinstance(e, OutputDirectoryExistsReport)
}
key = (carve_suffix, extract_suffix)
assert problem_paths == EXPECTED_COLLISION_PATHS.get(key, set())
# we expect only OutputDirectoryExistsReport-s
assert len(reports.errors) == len(problem_paths)

assert (
carve_dir_name == extract_dir_name
or not (extraction_config.extract_root / carve_dir_name).exists()
)
check_output_is_the_same(expected_output_dir, extraction_config.extract_root)
2 changes: 2 additions & 0 deletions tests/test_report.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
from unblob.models import ProcessResult, Task, TaskResult
from unblob.processing import ExtractionConfig, process_file
from unblob.report import (
CarveDirectoryReport,
ChunkReport,
FileMagicReport,
HashReport,
Expand Down Expand Up @@ -120,6 +121,7 @@ def hello_kitty_task_results(
sha1="febca6ed75dc02e0def065e7b08f1cca87b57c74",
sha256="144d8b2c949cb4943128aa0081153bcba4f38eb0ba26119cc06ca1563c4999e1",
),
CarveDirectoryReport(carve_dir=extract_root / "hello_kitty_extract"),
UnknownChunkReport(
id=ANY,
start_offset=0,
Expand Down
Loading

0 comments on commit 7565a38

Please sign in to comment.