Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add ability to skip files based on extension. #695

Merged
merged 1 commit into from
Jan 3, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
31 changes: 31 additions & 0 deletions tests/test_cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -302,3 +302,34 @@ def test_keep_extracted_chunks(
process_file_mock.call_args.args[0].keep_extracted_chunks
== keep_extracted_chunks
), fail_message


@pytest.mark.parametrize(
"skip_extension, extracted_files_count",
[
pytest.param([], 5, id="skip-extension-empty"),
pytest.param([""], 5, id="skip-zip-extension-empty-suffix"),
pytest.param([".zip"], 1, id="skip-extension-zip"),
pytest.param([".rlib"], 5, id="skip-extension-rlib"),
],
)
def test_skip_extension(
skip_extension: List[str], extracted_files_count: int, tmp_path: Path
):
runner = CliRunner()
in_path = (
Path(__file__).parent
/ "integration"
/ "archive"
/ "zip"
/ "regular"
/ "__input__"
/ "apple.zip"
)
args = []
for suffix in skip_extension:
args += ["--skip-extension", suffix]
params = [*args, "--extract-dir", str(tmp_path), str(in_path)]
result = runner.invoke(unblob.cli.cli, params)
assert extracted_files_count == len(list(tmp_path.rglob("*")))
assert result.exit_code == 0
12 changes: 12 additions & 0 deletions unblob/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
from .processing import (
DEFAULT_DEPTH,
DEFAULT_PROCESS_NUM,
DEFAULT_SKIP_EXTENSION,
DEFAULT_SKIP_MAGIC,
ExtractionConfig,
process_file,
Expand Down Expand Up @@ -166,6 +167,15 @@ def __init__(
show_default=True,
multiple=True,
)
@click.option(
"--skip-extension",
"skip_extension",
type=click.STRING,
default=DEFAULT_SKIP_EXTENSION,
help="Skip processing files with given extension",
show_default=True,
multiple=True,
)
@click.option(
"-p",
"--process-num",
Expand Down Expand Up @@ -229,6 +239,7 @@ def cli(
depth: int,
entropy_depth: int,
skip_magic: Iterable[str],
skip_extension: Iterable[str],
skip_extraction: bool, # noqa: FBT001
keep_extracted_chunks: bool, # noqa: FBT001
handlers: Handlers,
Expand All @@ -254,6 +265,7 @@ def cli(
entropy_plot=bool(verbose >= 3),
skip_extraction=skip_extraction,
skip_magic=skip_magic,
skip_extension=skip_extension,
process_num=process_num,
handlers=handlers,
dir_handlers=dir_handlers,
Expand Down
9 changes: 8 additions & 1 deletion unblob/processing.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,7 @@
"Windows Embedded CE binary image",
"Intel serial flash for PCH ROM",
)
DEFAULT_SKIP_EXTENSION = (".rlib",)


@attr.define(kw_only=True)
Expand All @@ -87,6 +88,7 @@ class ExtractionConfig:
entropy_plot: bool = False
max_depth: int = DEFAULT_DEPTH
skip_magic: Iterable[str] = DEFAULT_SKIP_MAGIC
skip_extension: Iterable[str] = DEFAULT_SKIP_EXTENSION
skip_extraction: bool = False
process_num: int = DEFAULT_PROCESS_NUM
keep_extracted_chunks: bool = False
Expand Down Expand Up @@ -292,9 +294,14 @@ def _process_task(self, result: TaskResult, task: Task):
should_skip_file = any(
magic.startswith(pattern) for pattern in self._config.skip_magic
)
should_skip_file |= task.path.suffix in self._config.skip_extension

if should_skip_file:
log.debug("Ignoring file based on magic", magic=magic)
log.debug(
"Ignoring file based on magic or extension.",
magic=magic,
extension=task.path.suffix,
)
return

_FileTask(self._config, task, stat_report.size, result).process()
Expand Down
Loading