Skip to content

Commit

Permalink
fix(cli): add ability to extend default skip magic rather than overwr…
Browse files Browse the repository at this point in the history
…ite it.

unblob has a decent default skip magic list that gets overwritten if a
user provides its own, which means unblob users need to redefine all of
unblob's default skip magic through the CLI whenever they provide their
own.

Changed the logic so that user provided skip magic values are simply
appended to unblob's default list unless the user explicitly provides the
"--clear-skip-magics" flag.

Co-authored-by: Krisztián Fekete <[email protected]>
  • Loading branch information
qkaiser and e3krisztian committed Jan 4, 2024
1 parent a3d406f commit 3c3cafb
Show file tree
Hide file tree
Showing 3 changed files with 91 additions and 24 deletions.
57 changes: 55 additions & 2 deletions tests/test_cli.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
from pathlib import Path
from typing import List, Optional, Type
from typing import Iterable, List, Optional, Type
from unittest import mock

import pytest
Expand All @@ -10,7 +10,12 @@
from unblob.extractors.command import MultiFileCommand
from unblob.handlers import BUILTIN_HANDLERS
from unblob.models import DirectoryHandler, Glob, Handler, HexString, MultiFile
from unblob.processing import DEFAULT_DEPTH, DEFAULT_PROCESS_NUM, ExtractionConfig
from unblob.processing import (
DEFAULT_DEPTH,
DEFAULT_PROCESS_NUM,
DEFAULT_SKIP_MAGIC,
ExtractionConfig,
)
from unblob.ui import (
NullProgressReporter,
ProgressReporter,
Expand Down Expand Up @@ -367,3 +372,51 @@ def test_skip_extraction(
assert (
process_file_mock.call_args.args[0].skip_extraction == skip_extraction
), fail_message


@pytest.mark.parametrize(
"args, skip_magic, fail_message",
[
([], DEFAULT_SKIP_MAGIC, "Should have kept default skip magics"),
(
["--skip-magic", "SUPERMAGIC"],
(*DEFAULT_SKIP_MAGIC, "SUPERMAGIC"),
"Should have kept default skip magics",
),
(["--clear-skip-magics"], [], "Should have cleared default skip magics"),
(
["--clear-skip-magics", "--skip-magic", "SUPERMAGIC"],
["SUPERMAGIC"],
"Should have cleared default skip magics",
),
(
["--clear-skip-magics", "--skip-magic", DEFAULT_SKIP_MAGIC[1]],
[DEFAULT_SKIP_MAGIC[1]],
"Should allow user specified and remove the rest",
),
],
)
def test_clear_skip_magics(
args: List[str], skip_magic: Iterable[str], fail_message: str, tmp_path: Path
):
runner = CliRunner()
in_path = (
Path(__file__).parent
/ "integration"
/ "archive"
/ "zip"
/ "regular"
/ "__input__"
/ "apple.zip"
)
params = [*args, "--extract-dir", str(tmp_path), str(in_path)]

process_file_mock = mock.MagicMock()
with mock.patch.object(unblob.cli, "process_file", process_file_mock):
result = runner.invoke(unblob.cli.cli, params)

assert result.exit_code == 0
process_file_mock.assert_called_once()
assert sorted(process_file_mock.call_args.args[0].skip_magic) == sorted(
skip_magic
), fail_message
20 changes: 17 additions & 3 deletions unblob/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -168,9 +168,11 @@ def __init__(
"--skip-magic",
"skip_magic",
type=click.STRING,
default=DEFAULT_SKIP_MAGIC,
help="Skip processing files with given magic prefix",
show_default=True,
help=f"""Skip processing files with given magic prefix.
The provided values are appended to unblob's own skip magic list unless
--clear-skip-magic is provided.
[default: {', '.join(DEFAULT_SKIP_MAGIC)}]
""",
multiple=True,
)
@click.option(
Expand All @@ -182,6 +184,14 @@ def __init__(
show_default=True,
multiple=True,
)
@click.option(
"--clear-skip-magics",
"clear_skip_magics",
is_flag=True,
show_default=True,
default=False,
help="Clear unblob's own skip magic list.",
)
@click.option(
"-p",
"--process-num",
Expand Down Expand Up @@ -246,6 +256,7 @@ def cli(
entropy_depth: int,
skip_magic: Iterable[str],
skip_extension: Iterable[str],
clear_skip_magics: bool, # noqa: FBT001
skip_extraction: bool, # noqa: FBT001
keep_extracted_chunks: bool, # noqa: FBT001
handlers: Handlers,
Expand All @@ -263,6 +274,9 @@ def cli(
extra_dir_handlers = plugin_manager.load_dir_handlers_from_plugins()
dir_handlers += tuple(extra_dir_handlers)

extra_magics_to_skip = () if clear_skip_magics else DEFAULT_SKIP_MAGIC
skip_magic = tuple(sorted(set(skip_magic).union(extra_magics_to_skip)))

config = ExtractionConfig(
extract_root=extract_root,
force_extract=force,
Expand Down
38 changes: 19 additions & 19 deletions unblob/processing.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,31 +52,31 @@
DEFAULT_PROCESS_NUM = multiprocessing.cpu_count()
DEFAULT_SKIP_MAGIC = (
"BFLT",
"JPEG",
"Composite Document File V2 Document",
"Erlang BEAM file",
"GIF",
"PNG",
"SQLite",
"compiled Java class",
"TrueType Font data",
"PDF document",
"magic binary file",
"MS Windows icon resource",
"Web Open Font Format",
"GNU message catalog",
"Xilinx BIT data",
"HP Printer Job Language",
"Intel serial flash for PCH ROM",
"JPEG",
"MPEG",
"MS Windows icon resource",
"Macromedia Flash data",
"Microsoft Excel",
"Microsoft Word",
"Microsoft PowerPoint",
"Microsoft OOXML",
"Microsoft PowerPoint",
"Microsoft Word",
"OpenDocument",
"Macromedia Flash data",
"MPEG",
"HP Printer Job Language",
"Erlang BEAM file",
"python", # (e.g. python 2.7 byte-compiled)
"Composite Document File V2 Document",
"PDF document",
"PNG",
"SQLite",
"TrueType Font data",
"Web Open Font Format",
"Windows Embedded CE binary image",
"Intel serial flash for PCH ROM",
"Xilinx BIT data",
"compiled Java class",
"magic binary file",
"python", # # (e.g. python 2.7 byte-compiled)
)
DEFAULT_SKIP_EXTENSION = (".rlib",)

Expand Down

0 comments on commit 3c3cafb

Please sign in to comment.