diff --git a/.gitignore b/.gitignore index e2dd25dcaf..0ed3a2437b 100644 --- a/.gitignore +++ b/.gitignore @@ -7,3 +7,4 @@ build/ *.so .idea .coverage* +/.venv/ diff --git a/docs/development.md b/docs/development.md index 178100f81c..e6ace9958e 100644 --- a/docs/development.md +++ b/docs/development.md @@ -515,7 +515,7 @@ class Extractor(abc.ABC): return [] @abc.abstractmethod - def extract(self, inpath: Path, outdir: Path): + def extract(self, inpath: Path, outdir: Path) -> Optional[ExtractResult]: """Extract the carved out chunk. Raises ExtractError on failure.""" ``` @@ -526,6 +526,15 @@ Two methods are exposed by this class: - `extract()`: you must override this function. This is where you'll perform the extraction of `inpath` content into `outdir` extraction directory +!!! Recommendation + + Although it is possible to implement `extract()` with path manipulations, + checks for path traversals, and performing io by using Python libraries + (`os`, `pathlib.Path`), but it turns out somewhat tedious. + Instead we recommend to remove boilerplate and use a helper class `FileSystem` from + [unblob/file_utils.py](https://github.com/onekey-sec/unblob/blob/main/unblob/file_utils.py) + which ensures that all file objects are created under its root. + ### DirectoryExtractor class The `DirectoryExtractor` interface is defined in @@ -538,7 +547,7 @@ class DirectoryExtractor(abc.ABC): return [] @abc.abstractmethod - def extract(self, paths: List[Path], outdir: Path): + def extract(self, paths: List[Path], outdir: Path) -> Optional[ExtractResult]: """Extract from a multi file path list. Raises ExtractError on failure. @@ -552,6 +561,11 @@ Two methods are exposed by this class: - `extract()`: you must override this function. This is where you'll perform the extraction of `paths` files into `outdir` extraction directory +!!! Recommendation + + Similarly to `Extractor`, it is recommended to use the `FileSystem` helper class to + implement `extract`. + ### Example Extractor Extractors are quite complex beasts, so rather than trying to come up with a diff --git a/tests/handlers/filesystem/test_romfs.py b/tests/handlers/filesystem/test_romfs.py index 984469c512..55e240bb99 100644 --- a/tests/handlers/filesystem/test_romfs.py +++ b/tests/handlers/filesystem/test_romfs.py @@ -1,9 +1,7 @@ -from pathlib import Path - import pytest from unblob.file_utils import File -from unblob.handlers.filesystem.romfs import get_string, is_safe_path, valid_checksum +from unblob.handlers.filesystem.romfs import get_string, valid_checksum @pytest.mark.parametrize( @@ -44,23 +42,3 @@ def test_get_string(content, expected): ) def test_valid_checksum(content, valid): assert valid_checksum(content) == valid - - -@pytest.mark.parametrize( - "basedir, path, expected", - [ - ("/lib/out", "/lib/out/file", True), - ("/lib/out", "file", True), - ("/lib/out", "dir/file", True), - ("/lib/out", "some/dir/file", True), - ("/lib/out", "some/dir/../file", True), - ("/lib/out", "some/dir/../../file", True), - ("/lib/out", "some/dir/../../../file", False), - ("/lib/out", "some/dir/../../../", False), - ("/lib/out", "some/dir/../../..", False), - ("/lib/out", "../file", False), - ("/lib/out", "/lib/out/../file", False), - ], -) -def test_is_safe_path(basedir, path, expected): - assert is_safe_path(Path(basedir), Path(path)) is expected diff --git a/tests/integration/filesystem/yaffs/__output__/malformed.2048.16.ecc.be.yaffs2_extract/passwd b/tests/integration/filesystem/yaffs/__output__/malformed.2048.16.ecc.be.yaffs2_extract/passwd new file mode 120000 index 0000000000..ba193d2b4c --- /dev/null +++ b/tests/integration/filesystem/yaffs/__output__/malformed.2048.16.ecc.be.yaffs2_extract/passwd @@ -0,0 +1 @@ +etc/passwd \ No newline at end of file diff --git a/tests/integration/filesystem/yaffs/__output__/malformed.2048.16.ecc.le.yaffs2_extract/passwd b/tests/integration/filesystem/yaffs/__output__/malformed.2048.16.ecc.le.yaffs2_extract/passwd new file mode 120000 index 0000000000..ba193d2b4c --- /dev/null +++ b/tests/integration/filesystem/yaffs/__output__/malformed.2048.16.ecc.le.yaffs2_extract/passwd @@ -0,0 +1 @@ +etc/passwd \ No newline at end of file diff --git a/tests/integration/filesystem/yaffs/__output__/sample.16384.128.ecc.be.yaffs2_extract/passwd b/tests/integration/filesystem/yaffs/__output__/sample.16384.128.ecc.be.yaffs2_extract/passwd new file mode 120000 index 0000000000..ba193d2b4c --- /dev/null +++ b/tests/integration/filesystem/yaffs/__output__/sample.16384.128.ecc.be.yaffs2_extract/passwd @@ -0,0 +1 @@ +etc/passwd \ No newline at end of file diff --git a/tests/integration/filesystem/yaffs/__output__/sample.16384.128.ecc.le.yaffs2_extract/passwd b/tests/integration/filesystem/yaffs/__output__/sample.16384.128.ecc.le.yaffs2_extract/passwd new file mode 120000 index 0000000000..ba193d2b4c --- /dev/null +++ b/tests/integration/filesystem/yaffs/__output__/sample.16384.128.ecc.le.yaffs2_extract/passwd @@ -0,0 +1 @@ +etc/passwd \ No newline at end of file diff --git a/tests/integration/filesystem/yaffs/__output__/sample.16384.128.le.yaffs2_extract/passwd b/tests/integration/filesystem/yaffs/__output__/sample.16384.128.le.yaffs2_extract/passwd new file mode 120000 index 0000000000..ba193d2b4c --- /dev/null +++ b/tests/integration/filesystem/yaffs/__output__/sample.16384.128.le.yaffs2_extract/passwd @@ -0,0 +1 @@ +etc/passwd \ No newline at end of file diff --git a/tests/integration/filesystem/yaffs/__output__/sample.16384.16.ecc.be.yaffs2_extract/passwd b/tests/integration/filesystem/yaffs/__output__/sample.16384.16.ecc.be.yaffs2_extract/passwd new file mode 120000 index 0000000000..ba193d2b4c --- /dev/null +++ b/tests/integration/filesystem/yaffs/__output__/sample.16384.16.ecc.be.yaffs2_extract/passwd @@ -0,0 +1 @@ +etc/passwd \ No newline at end of file diff --git a/tests/integration/filesystem/yaffs/__output__/sample.16384.16.ecc.le.yaffs2_extract/passwd b/tests/integration/filesystem/yaffs/__output__/sample.16384.16.ecc.le.yaffs2_extract/passwd new file mode 120000 index 0000000000..ba193d2b4c --- /dev/null +++ b/tests/integration/filesystem/yaffs/__output__/sample.16384.16.ecc.le.yaffs2_extract/passwd @@ -0,0 +1 @@ +etc/passwd \ No newline at end of file diff --git a/tests/integration/filesystem/yaffs/__output__/sample.16384.16.le.yaffs2_extract/passwd b/tests/integration/filesystem/yaffs/__output__/sample.16384.16.le.yaffs2_extract/passwd new file mode 120000 index 0000000000..ba193d2b4c --- /dev/null +++ b/tests/integration/filesystem/yaffs/__output__/sample.16384.16.le.yaffs2_extract/passwd @@ -0,0 +1 @@ +etc/passwd \ No newline at end of file diff --git a/tests/integration/filesystem/yaffs/__output__/sample.16384.256.ecc.be.yaffs2_extract/passwd b/tests/integration/filesystem/yaffs/__output__/sample.16384.256.ecc.be.yaffs2_extract/passwd new file mode 120000 index 0000000000..ba193d2b4c --- /dev/null +++ b/tests/integration/filesystem/yaffs/__output__/sample.16384.256.ecc.be.yaffs2_extract/passwd @@ -0,0 +1 @@ +etc/passwd \ No newline at end of file diff --git a/tests/integration/filesystem/yaffs/__output__/sample.16384.256.ecc.le.yaffs2_extract/passwd b/tests/integration/filesystem/yaffs/__output__/sample.16384.256.ecc.le.yaffs2_extract/passwd new file mode 120000 index 0000000000..ba193d2b4c --- /dev/null +++ b/tests/integration/filesystem/yaffs/__output__/sample.16384.256.ecc.le.yaffs2_extract/passwd @@ -0,0 +1 @@ +etc/passwd \ No newline at end of file diff --git a/tests/integration/filesystem/yaffs/__output__/sample.16384.256.le.yaffs2_extract/passwd b/tests/integration/filesystem/yaffs/__output__/sample.16384.256.le.yaffs2_extract/passwd new file mode 120000 index 0000000000..ba193d2b4c --- /dev/null +++ b/tests/integration/filesystem/yaffs/__output__/sample.16384.256.le.yaffs2_extract/passwd @@ -0,0 +1 @@ +etc/passwd \ No newline at end of file diff --git a/tests/integration/filesystem/yaffs/__output__/sample.16384.32.ecc.be.yaffs2_extract/passwd b/tests/integration/filesystem/yaffs/__output__/sample.16384.32.ecc.be.yaffs2_extract/passwd new file mode 120000 index 0000000000..ba193d2b4c --- /dev/null +++ b/tests/integration/filesystem/yaffs/__output__/sample.16384.32.ecc.be.yaffs2_extract/passwd @@ -0,0 +1 @@ +etc/passwd \ No newline at end of file diff --git a/tests/integration/filesystem/yaffs/__output__/sample.16384.32.ecc.le.yaffs2_extract/passwd b/tests/integration/filesystem/yaffs/__output__/sample.16384.32.ecc.le.yaffs2_extract/passwd new file mode 120000 index 0000000000..ba193d2b4c --- /dev/null +++ b/tests/integration/filesystem/yaffs/__output__/sample.16384.32.ecc.le.yaffs2_extract/passwd @@ -0,0 +1 @@ +etc/passwd \ No newline at end of file diff --git a/tests/integration/filesystem/yaffs/__output__/sample.16384.32.le.yaffs2_extract/passwd b/tests/integration/filesystem/yaffs/__output__/sample.16384.32.le.yaffs2_extract/passwd new file mode 120000 index 0000000000..ba193d2b4c --- /dev/null +++ b/tests/integration/filesystem/yaffs/__output__/sample.16384.32.le.yaffs2_extract/passwd @@ -0,0 +1 @@ +etc/passwd \ No newline at end of file diff --git a/tests/integration/filesystem/yaffs/__output__/sample.16384.512.ecc.be.yaffs2_extract/passwd b/tests/integration/filesystem/yaffs/__output__/sample.16384.512.ecc.be.yaffs2_extract/passwd new file mode 120000 index 0000000000..ba193d2b4c --- /dev/null +++ b/tests/integration/filesystem/yaffs/__output__/sample.16384.512.ecc.be.yaffs2_extract/passwd @@ -0,0 +1 @@ +etc/passwd \ No newline at end of file diff --git a/tests/integration/filesystem/yaffs/__output__/sample.16384.512.ecc.le.yaffs2_extract/passwd b/tests/integration/filesystem/yaffs/__output__/sample.16384.512.ecc.le.yaffs2_extract/passwd new file mode 120000 index 0000000000..ba193d2b4c --- /dev/null +++ b/tests/integration/filesystem/yaffs/__output__/sample.16384.512.ecc.le.yaffs2_extract/passwd @@ -0,0 +1 @@ +etc/passwd \ No newline at end of file diff --git a/tests/integration/filesystem/yaffs/__output__/sample.16384.512.le.yaffs2_extract/passwd b/tests/integration/filesystem/yaffs/__output__/sample.16384.512.le.yaffs2_extract/passwd new file mode 120000 index 0000000000..ba193d2b4c --- /dev/null +++ b/tests/integration/filesystem/yaffs/__output__/sample.16384.512.le.yaffs2_extract/passwd @@ -0,0 +1 @@ +etc/passwd \ No newline at end of file diff --git a/tests/integration/filesystem/yaffs/__output__/sample.16384.64.ecc.be.yaffs2_extract/passwd b/tests/integration/filesystem/yaffs/__output__/sample.16384.64.ecc.be.yaffs2_extract/passwd new file mode 120000 index 0000000000..ba193d2b4c --- /dev/null +++ b/tests/integration/filesystem/yaffs/__output__/sample.16384.64.ecc.be.yaffs2_extract/passwd @@ -0,0 +1 @@ +etc/passwd \ No newline at end of file diff --git a/tests/integration/filesystem/yaffs/__output__/sample.16384.64.ecc.le.yaffs2_extract/passwd b/tests/integration/filesystem/yaffs/__output__/sample.16384.64.ecc.le.yaffs2_extract/passwd new file mode 120000 index 0000000000..ba193d2b4c --- /dev/null +++ b/tests/integration/filesystem/yaffs/__output__/sample.16384.64.ecc.le.yaffs2_extract/passwd @@ -0,0 +1 @@ +etc/passwd \ No newline at end of file diff --git a/tests/integration/filesystem/yaffs/__output__/sample.16384.64.le.yaffs2_extract/passwd b/tests/integration/filesystem/yaffs/__output__/sample.16384.64.le.yaffs2_extract/passwd new file mode 120000 index 0000000000..ba193d2b4c --- /dev/null +++ b/tests/integration/filesystem/yaffs/__output__/sample.16384.64.le.yaffs2_extract/passwd @@ -0,0 +1 @@ +etc/passwd \ No newline at end of file diff --git a/tests/integration/filesystem/yaffs/__output__/sample.2048.128.ecc.be.yaffs2_extract/passwd b/tests/integration/filesystem/yaffs/__output__/sample.2048.128.ecc.be.yaffs2_extract/passwd new file mode 120000 index 0000000000..ba193d2b4c --- /dev/null +++ b/tests/integration/filesystem/yaffs/__output__/sample.2048.128.ecc.be.yaffs2_extract/passwd @@ -0,0 +1 @@ +etc/passwd \ No newline at end of file diff --git a/tests/integration/filesystem/yaffs/__output__/sample.2048.128.ecc.le.yaffs2_extract/passwd b/tests/integration/filesystem/yaffs/__output__/sample.2048.128.ecc.le.yaffs2_extract/passwd new file mode 120000 index 0000000000..ba193d2b4c --- /dev/null +++ b/tests/integration/filesystem/yaffs/__output__/sample.2048.128.ecc.le.yaffs2_extract/passwd @@ -0,0 +1 @@ +etc/passwd \ No newline at end of file diff --git a/tests/integration/filesystem/yaffs/__output__/sample.2048.128.le.yaffs2_extract/passwd b/tests/integration/filesystem/yaffs/__output__/sample.2048.128.le.yaffs2_extract/passwd new file mode 120000 index 0000000000..ba193d2b4c --- /dev/null +++ b/tests/integration/filesystem/yaffs/__output__/sample.2048.128.le.yaffs2_extract/passwd @@ -0,0 +1 @@ +etc/passwd \ No newline at end of file diff --git a/tests/integration/filesystem/yaffs/__output__/sample.2048.16.ecc.be.yaffs2_extract/passwd b/tests/integration/filesystem/yaffs/__output__/sample.2048.16.ecc.be.yaffs2_extract/passwd new file mode 120000 index 0000000000..ba193d2b4c --- /dev/null +++ b/tests/integration/filesystem/yaffs/__output__/sample.2048.16.ecc.be.yaffs2_extract/passwd @@ -0,0 +1 @@ +etc/passwd \ No newline at end of file diff --git a/tests/integration/filesystem/yaffs/__output__/sample.2048.16.ecc.le.yaffs2_extract/passwd b/tests/integration/filesystem/yaffs/__output__/sample.2048.16.ecc.le.yaffs2_extract/passwd new file mode 120000 index 0000000000..ba193d2b4c --- /dev/null +++ b/tests/integration/filesystem/yaffs/__output__/sample.2048.16.ecc.le.yaffs2_extract/passwd @@ -0,0 +1 @@ +etc/passwd \ No newline at end of file diff --git a/tests/integration/filesystem/yaffs/__output__/sample.2048.16.le.yaffs2_extract/passwd b/tests/integration/filesystem/yaffs/__output__/sample.2048.16.le.yaffs2_extract/passwd new file mode 120000 index 0000000000..ba193d2b4c --- /dev/null +++ b/tests/integration/filesystem/yaffs/__output__/sample.2048.16.le.yaffs2_extract/passwd @@ -0,0 +1 @@ +etc/passwd \ No newline at end of file diff --git a/tests/integration/filesystem/yaffs/__output__/sample.2048.256.ecc.be.yaffs2_extract/passwd b/tests/integration/filesystem/yaffs/__output__/sample.2048.256.ecc.be.yaffs2_extract/passwd new file mode 120000 index 0000000000..ba193d2b4c --- /dev/null +++ b/tests/integration/filesystem/yaffs/__output__/sample.2048.256.ecc.be.yaffs2_extract/passwd @@ -0,0 +1 @@ +etc/passwd \ No newline at end of file diff --git a/tests/integration/filesystem/yaffs/__output__/sample.2048.256.ecc.le.yaffs2_extract/passwd b/tests/integration/filesystem/yaffs/__output__/sample.2048.256.ecc.le.yaffs2_extract/passwd new file mode 120000 index 0000000000..ba193d2b4c --- /dev/null +++ b/tests/integration/filesystem/yaffs/__output__/sample.2048.256.ecc.le.yaffs2_extract/passwd @@ -0,0 +1 @@ +etc/passwd \ No newline at end of file diff --git a/tests/integration/filesystem/yaffs/__output__/sample.2048.256.le.yaffs2_extract/passwd b/tests/integration/filesystem/yaffs/__output__/sample.2048.256.le.yaffs2_extract/passwd new file mode 120000 index 0000000000..ba193d2b4c --- /dev/null +++ b/tests/integration/filesystem/yaffs/__output__/sample.2048.256.le.yaffs2_extract/passwd @@ -0,0 +1 @@ +etc/passwd \ No newline at end of file diff --git a/tests/integration/filesystem/yaffs/__output__/sample.2048.32.ecc.be.yaffs2_extract/passwd b/tests/integration/filesystem/yaffs/__output__/sample.2048.32.ecc.be.yaffs2_extract/passwd new file mode 120000 index 0000000000..ba193d2b4c --- /dev/null +++ b/tests/integration/filesystem/yaffs/__output__/sample.2048.32.ecc.be.yaffs2_extract/passwd @@ -0,0 +1 @@ +etc/passwd \ No newline at end of file diff --git a/tests/integration/filesystem/yaffs/__output__/sample.2048.32.ecc.le.yaffs2_extract/passwd b/tests/integration/filesystem/yaffs/__output__/sample.2048.32.ecc.le.yaffs2_extract/passwd new file mode 120000 index 0000000000..ba193d2b4c --- /dev/null +++ b/tests/integration/filesystem/yaffs/__output__/sample.2048.32.ecc.le.yaffs2_extract/passwd @@ -0,0 +1 @@ +etc/passwd \ No newline at end of file diff --git a/tests/integration/filesystem/yaffs/__output__/sample.2048.32.le.yaffs2_extract/passwd b/tests/integration/filesystem/yaffs/__output__/sample.2048.32.le.yaffs2_extract/passwd new file mode 120000 index 0000000000..ba193d2b4c --- /dev/null +++ b/tests/integration/filesystem/yaffs/__output__/sample.2048.32.le.yaffs2_extract/passwd @@ -0,0 +1 @@ +etc/passwd \ No newline at end of file diff --git a/tests/integration/filesystem/yaffs/__output__/sample.2048.512.ecc.be.yaffs2_extract/passwd b/tests/integration/filesystem/yaffs/__output__/sample.2048.512.ecc.be.yaffs2_extract/passwd new file mode 120000 index 0000000000..ba193d2b4c --- /dev/null +++ b/tests/integration/filesystem/yaffs/__output__/sample.2048.512.ecc.be.yaffs2_extract/passwd @@ -0,0 +1 @@ +etc/passwd \ No newline at end of file diff --git a/tests/integration/filesystem/yaffs/__output__/sample.2048.512.ecc.le.yaffs2_extract/passwd b/tests/integration/filesystem/yaffs/__output__/sample.2048.512.ecc.le.yaffs2_extract/passwd new file mode 120000 index 0000000000..ba193d2b4c --- /dev/null +++ b/tests/integration/filesystem/yaffs/__output__/sample.2048.512.ecc.le.yaffs2_extract/passwd @@ -0,0 +1 @@ +etc/passwd \ No newline at end of file diff --git a/tests/integration/filesystem/yaffs/__output__/sample.2048.512.le.yaffs2_extract/passwd b/tests/integration/filesystem/yaffs/__output__/sample.2048.512.le.yaffs2_extract/passwd new file mode 120000 index 0000000000..ba193d2b4c --- /dev/null +++ b/tests/integration/filesystem/yaffs/__output__/sample.2048.512.le.yaffs2_extract/passwd @@ -0,0 +1 @@ +etc/passwd \ No newline at end of file diff --git a/tests/integration/filesystem/yaffs/__output__/sample.2048.64.ecc.be.yaffs2_extract/passwd b/tests/integration/filesystem/yaffs/__output__/sample.2048.64.ecc.be.yaffs2_extract/passwd new file mode 120000 index 0000000000..ba193d2b4c --- /dev/null +++ b/tests/integration/filesystem/yaffs/__output__/sample.2048.64.ecc.be.yaffs2_extract/passwd @@ -0,0 +1 @@ +etc/passwd \ No newline at end of file diff --git a/tests/integration/filesystem/yaffs/__output__/sample.2048.64.ecc.le.yaffs2_extract/passwd b/tests/integration/filesystem/yaffs/__output__/sample.2048.64.ecc.le.yaffs2_extract/passwd new file mode 120000 index 0000000000..ba193d2b4c --- /dev/null +++ b/tests/integration/filesystem/yaffs/__output__/sample.2048.64.ecc.le.yaffs2_extract/passwd @@ -0,0 +1 @@ +etc/passwd \ No newline at end of file diff --git a/tests/integration/filesystem/yaffs/__output__/sample.2048.64.le.yaffs2_extract/passwd b/tests/integration/filesystem/yaffs/__output__/sample.2048.64.le.yaffs2_extract/passwd new file mode 120000 index 0000000000..ba193d2b4c --- /dev/null +++ b/tests/integration/filesystem/yaffs/__output__/sample.2048.64.le.yaffs2_extract/passwd @@ -0,0 +1 @@ +etc/passwd \ No newline at end of file diff --git a/tests/integration/filesystem/yaffs/__output__/sample.4096.128.ecc.be.yaffs2_extract/passwd b/tests/integration/filesystem/yaffs/__output__/sample.4096.128.ecc.be.yaffs2_extract/passwd new file mode 120000 index 0000000000..ba193d2b4c --- /dev/null +++ b/tests/integration/filesystem/yaffs/__output__/sample.4096.128.ecc.be.yaffs2_extract/passwd @@ -0,0 +1 @@ +etc/passwd \ No newline at end of file diff --git a/tests/integration/filesystem/yaffs/__output__/sample.4096.128.ecc.le.yaffs2_extract/passwd b/tests/integration/filesystem/yaffs/__output__/sample.4096.128.ecc.le.yaffs2_extract/passwd new file mode 120000 index 0000000000..ba193d2b4c --- /dev/null +++ b/tests/integration/filesystem/yaffs/__output__/sample.4096.128.ecc.le.yaffs2_extract/passwd @@ -0,0 +1 @@ +etc/passwd \ No newline at end of file diff --git a/tests/integration/filesystem/yaffs/__output__/sample.4096.128.le.yaffs2_extract/passwd b/tests/integration/filesystem/yaffs/__output__/sample.4096.128.le.yaffs2_extract/passwd new file mode 120000 index 0000000000..ba193d2b4c --- /dev/null +++ b/tests/integration/filesystem/yaffs/__output__/sample.4096.128.le.yaffs2_extract/passwd @@ -0,0 +1 @@ +etc/passwd \ No newline at end of file diff --git a/tests/integration/filesystem/yaffs/__output__/sample.4096.16.ecc.be.yaffs2_extract/passwd b/tests/integration/filesystem/yaffs/__output__/sample.4096.16.ecc.be.yaffs2_extract/passwd new file mode 120000 index 0000000000..ba193d2b4c --- /dev/null +++ b/tests/integration/filesystem/yaffs/__output__/sample.4096.16.ecc.be.yaffs2_extract/passwd @@ -0,0 +1 @@ +etc/passwd \ No newline at end of file diff --git a/tests/integration/filesystem/yaffs/__output__/sample.4096.16.ecc.le.yaffs2_extract/passwd b/tests/integration/filesystem/yaffs/__output__/sample.4096.16.ecc.le.yaffs2_extract/passwd new file mode 120000 index 0000000000..ba193d2b4c --- /dev/null +++ b/tests/integration/filesystem/yaffs/__output__/sample.4096.16.ecc.le.yaffs2_extract/passwd @@ -0,0 +1 @@ +etc/passwd \ No newline at end of file diff --git a/tests/integration/filesystem/yaffs/__output__/sample.4096.16.le.yaffs2_extract/passwd b/tests/integration/filesystem/yaffs/__output__/sample.4096.16.le.yaffs2_extract/passwd new file mode 120000 index 0000000000..ba193d2b4c --- /dev/null +++ b/tests/integration/filesystem/yaffs/__output__/sample.4096.16.le.yaffs2_extract/passwd @@ -0,0 +1 @@ +etc/passwd \ No newline at end of file diff --git a/tests/integration/filesystem/yaffs/__output__/sample.4096.256.ecc.be.yaffs2_extract/passwd b/tests/integration/filesystem/yaffs/__output__/sample.4096.256.ecc.be.yaffs2_extract/passwd new file mode 120000 index 0000000000..ba193d2b4c --- /dev/null +++ b/tests/integration/filesystem/yaffs/__output__/sample.4096.256.ecc.be.yaffs2_extract/passwd @@ -0,0 +1 @@ +etc/passwd \ No newline at end of file diff --git a/tests/integration/filesystem/yaffs/__output__/sample.4096.256.ecc.le.yaffs2_extract/passwd b/tests/integration/filesystem/yaffs/__output__/sample.4096.256.ecc.le.yaffs2_extract/passwd new file mode 120000 index 0000000000..ba193d2b4c --- /dev/null +++ b/tests/integration/filesystem/yaffs/__output__/sample.4096.256.ecc.le.yaffs2_extract/passwd @@ -0,0 +1 @@ +etc/passwd \ No newline at end of file diff --git a/tests/integration/filesystem/yaffs/__output__/sample.4096.256.le.yaffs2_extract/passwd b/tests/integration/filesystem/yaffs/__output__/sample.4096.256.le.yaffs2_extract/passwd new file mode 120000 index 0000000000..ba193d2b4c --- /dev/null +++ b/tests/integration/filesystem/yaffs/__output__/sample.4096.256.le.yaffs2_extract/passwd @@ -0,0 +1 @@ +etc/passwd \ No newline at end of file diff --git a/tests/integration/filesystem/yaffs/__output__/sample.4096.32.ecc.be.yaffs2_extract/passwd b/tests/integration/filesystem/yaffs/__output__/sample.4096.32.ecc.be.yaffs2_extract/passwd new file mode 120000 index 0000000000..ba193d2b4c --- /dev/null +++ b/tests/integration/filesystem/yaffs/__output__/sample.4096.32.ecc.be.yaffs2_extract/passwd @@ -0,0 +1 @@ +etc/passwd \ No newline at end of file diff --git a/tests/integration/filesystem/yaffs/__output__/sample.4096.32.ecc.le.yaffs2_extract/passwd b/tests/integration/filesystem/yaffs/__output__/sample.4096.32.ecc.le.yaffs2_extract/passwd new file mode 120000 index 0000000000..ba193d2b4c --- /dev/null +++ b/tests/integration/filesystem/yaffs/__output__/sample.4096.32.ecc.le.yaffs2_extract/passwd @@ -0,0 +1 @@ +etc/passwd \ No newline at end of file diff --git a/tests/integration/filesystem/yaffs/__output__/sample.4096.32.le.yaffs2_extract/passwd b/tests/integration/filesystem/yaffs/__output__/sample.4096.32.le.yaffs2_extract/passwd new file mode 120000 index 0000000000..ba193d2b4c --- /dev/null +++ b/tests/integration/filesystem/yaffs/__output__/sample.4096.32.le.yaffs2_extract/passwd @@ -0,0 +1 @@ +etc/passwd \ No newline at end of file diff --git a/tests/integration/filesystem/yaffs/__output__/sample.4096.512.ecc.be.yaffs2_extract/passwd b/tests/integration/filesystem/yaffs/__output__/sample.4096.512.ecc.be.yaffs2_extract/passwd new file mode 120000 index 0000000000..ba193d2b4c --- /dev/null +++ b/tests/integration/filesystem/yaffs/__output__/sample.4096.512.ecc.be.yaffs2_extract/passwd @@ -0,0 +1 @@ +etc/passwd \ No newline at end of file diff --git a/tests/integration/filesystem/yaffs/__output__/sample.4096.512.ecc.le.yaffs2_extract/passwd b/tests/integration/filesystem/yaffs/__output__/sample.4096.512.ecc.le.yaffs2_extract/passwd new file mode 120000 index 0000000000..ba193d2b4c --- /dev/null +++ b/tests/integration/filesystem/yaffs/__output__/sample.4096.512.ecc.le.yaffs2_extract/passwd @@ -0,0 +1 @@ +etc/passwd \ No newline at end of file diff --git a/tests/integration/filesystem/yaffs/__output__/sample.4096.512.le.yaffs2_extract/passwd b/tests/integration/filesystem/yaffs/__output__/sample.4096.512.le.yaffs2_extract/passwd new file mode 120000 index 0000000000..ba193d2b4c --- /dev/null +++ b/tests/integration/filesystem/yaffs/__output__/sample.4096.512.le.yaffs2_extract/passwd @@ -0,0 +1 @@ +etc/passwd \ No newline at end of file diff --git a/tests/integration/filesystem/yaffs/__output__/sample.4096.64.ecc.be.yaffs2_extract/passwd b/tests/integration/filesystem/yaffs/__output__/sample.4096.64.ecc.be.yaffs2_extract/passwd new file mode 120000 index 0000000000..ba193d2b4c --- /dev/null +++ b/tests/integration/filesystem/yaffs/__output__/sample.4096.64.ecc.be.yaffs2_extract/passwd @@ -0,0 +1 @@ +etc/passwd \ No newline at end of file diff --git a/tests/integration/filesystem/yaffs/__output__/sample.4096.64.ecc.le.yaffs2_extract/passwd b/tests/integration/filesystem/yaffs/__output__/sample.4096.64.ecc.le.yaffs2_extract/passwd new file mode 120000 index 0000000000..ba193d2b4c --- /dev/null +++ b/tests/integration/filesystem/yaffs/__output__/sample.4096.64.ecc.le.yaffs2_extract/passwd @@ -0,0 +1 @@ +etc/passwd \ No newline at end of file diff --git a/tests/integration/filesystem/yaffs/__output__/sample.4096.64.le.yaffs2_extract/passwd b/tests/integration/filesystem/yaffs/__output__/sample.4096.64.le.yaffs2_extract/passwd new file mode 120000 index 0000000000..ba193d2b4c --- /dev/null +++ b/tests/integration/filesystem/yaffs/__output__/sample.4096.64.le.yaffs2_extract/passwd @@ -0,0 +1 @@ +etc/passwd \ No newline at end of file diff --git a/tests/integration/filesystem/yaffs/__output__/sample.8192.128.ecc.be.yaffs2_extract/passwd b/tests/integration/filesystem/yaffs/__output__/sample.8192.128.ecc.be.yaffs2_extract/passwd new file mode 120000 index 0000000000..ba193d2b4c --- /dev/null +++ b/tests/integration/filesystem/yaffs/__output__/sample.8192.128.ecc.be.yaffs2_extract/passwd @@ -0,0 +1 @@ +etc/passwd \ No newline at end of file diff --git a/tests/integration/filesystem/yaffs/__output__/sample.8192.128.ecc.le.yaffs2_extract/passwd b/tests/integration/filesystem/yaffs/__output__/sample.8192.128.ecc.le.yaffs2_extract/passwd new file mode 120000 index 0000000000..ba193d2b4c --- /dev/null +++ b/tests/integration/filesystem/yaffs/__output__/sample.8192.128.ecc.le.yaffs2_extract/passwd @@ -0,0 +1 @@ +etc/passwd \ No newline at end of file diff --git a/tests/integration/filesystem/yaffs/__output__/sample.8192.128.le.yaffs2_extract/passwd b/tests/integration/filesystem/yaffs/__output__/sample.8192.128.le.yaffs2_extract/passwd new file mode 120000 index 0000000000..ba193d2b4c --- /dev/null +++ b/tests/integration/filesystem/yaffs/__output__/sample.8192.128.le.yaffs2_extract/passwd @@ -0,0 +1 @@ +etc/passwd \ No newline at end of file diff --git a/tests/integration/filesystem/yaffs/__output__/sample.8192.16.ecc.be.yaffs2_extract/passwd b/tests/integration/filesystem/yaffs/__output__/sample.8192.16.ecc.be.yaffs2_extract/passwd new file mode 120000 index 0000000000..ba193d2b4c --- /dev/null +++ b/tests/integration/filesystem/yaffs/__output__/sample.8192.16.ecc.be.yaffs2_extract/passwd @@ -0,0 +1 @@ +etc/passwd \ No newline at end of file diff --git a/tests/integration/filesystem/yaffs/__output__/sample.8192.16.ecc.le.yaffs2_extract/passwd b/tests/integration/filesystem/yaffs/__output__/sample.8192.16.ecc.le.yaffs2_extract/passwd new file mode 120000 index 0000000000..ba193d2b4c --- /dev/null +++ b/tests/integration/filesystem/yaffs/__output__/sample.8192.16.ecc.le.yaffs2_extract/passwd @@ -0,0 +1 @@ +etc/passwd \ No newline at end of file diff --git a/tests/integration/filesystem/yaffs/__output__/sample.8192.16.le.yaffs2_extract/passwd b/tests/integration/filesystem/yaffs/__output__/sample.8192.16.le.yaffs2_extract/passwd new file mode 120000 index 0000000000..ba193d2b4c --- /dev/null +++ b/tests/integration/filesystem/yaffs/__output__/sample.8192.16.le.yaffs2_extract/passwd @@ -0,0 +1 @@ +etc/passwd \ No newline at end of file diff --git a/tests/integration/filesystem/yaffs/__output__/sample.8192.256.ecc.be.yaffs2_extract/passwd b/tests/integration/filesystem/yaffs/__output__/sample.8192.256.ecc.be.yaffs2_extract/passwd new file mode 120000 index 0000000000..ba193d2b4c --- /dev/null +++ b/tests/integration/filesystem/yaffs/__output__/sample.8192.256.ecc.be.yaffs2_extract/passwd @@ -0,0 +1 @@ +etc/passwd \ No newline at end of file diff --git a/tests/integration/filesystem/yaffs/__output__/sample.8192.256.ecc.le.yaffs2_extract/passwd b/tests/integration/filesystem/yaffs/__output__/sample.8192.256.ecc.le.yaffs2_extract/passwd new file mode 120000 index 0000000000..ba193d2b4c --- /dev/null +++ b/tests/integration/filesystem/yaffs/__output__/sample.8192.256.ecc.le.yaffs2_extract/passwd @@ -0,0 +1 @@ +etc/passwd \ No newline at end of file diff --git a/tests/integration/filesystem/yaffs/__output__/sample.8192.256.le.yaffs2_extract/passwd b/tests/integration/filesystem/yaffs/__output__/sample.8192.256.le.yaffs2_extract/passwd new file mode 120000 index 0000000000..ba193d2b4c --- /dev/null +++ b/tests/integration/filesystem/yaffs/__output__/sample.8192.256.le.yaffs2_extract/passwd @@ -0,0 +1 @@ +etc/passwd \ No newline at end of file diff --git a/tests/integration/filesystem/yaffs/__output__/sample.8192.32.ecc.be.yaffs2_extract/passwd b/tests/integration/filesystem/yaffs/__output__/sample.8192.32.ecc.be.yaffs2_extract/passwd new file mode 120000 index 0000000000..ba193d2b4c --- /dev/null +++ b/tests/integration/filesystem/yaffs/__output__/sample.8192.32.ecc.be.yaffs2_extract/passwd @@ -0,0 +1 @@ +etc/passwd \ No newline at end of file diff --git a/tests/integration/filesystem/yaffs/__output__/sample.8192.32.ecc.le.yaffs2_extract/passwd b/tests/integration/filesystem/yaffs/__output__/sample.8192.32.ecc.le.yaffs2_extract/passwd new file mode 120000 index 0000000000..ba193d2b4c --- /dev/null +++ b/tests/integration/filesystem/yaffs/__output__/sample.8192.32.ecc.le.yaffs2_extract/passwd @@ -0,0 +1 @@ +etc/passwd \ No newline at end of file diff --git a/tests/integration/filesystem/yaffs/__output__/sample.8192.32.le.yaffs2_extract/passwd b/tests/integration/filesystem/yaffs/__output__/sample.8192.32.le.yaffs2_extract/passwd new file mode 120000 index 0000000000..ba193d2b4c --- /dev/null +++ b/tests/integration/filesystem/yaffs/__output__/sample.8192.32.le.yaffs2_extract/passwd @@ -0,0 +1 @@ +etc/passwd \ No newline at end of file diff --git a/tests/integration/filesystem/yaffs/__output__/sample.8192.512.ecc.be.yaffs2_extract/passwd b/tests/integration/filesystem/yaffs/__output__/sample.8192.512.ecc.be.yaffs2_extract/passwd new file mode 120000 index 0000000000..ba193d2b4c --- /dev/null +++ b/tests/integration/filesystem/yaffs/__output__/sample.8192.512.ecc.be.yaffs2_extract/passwd @@ -0,0 +1 @@ +etc/passwd \ No newline at end of file diff --git a/tests/integration/filesystem/yaffs/__output__/sample.8192.512.ecc.le.yaffs2_extract/passwd b/tests/integration/filesystem/yaffs/__output__/sample.8192.512.ecc.le.yaffs2_extract/passwd new file mode 120000 index 0000000000..ba193d2b4c --- /dev/null +++ b/tests/integration/filesystem/yaffs/__output__/sample.8192.512.ecc.le.yaffs2_extract/passwd @@ -0,0 +1 @@ +etc/passwd \ No newline at end of file diff --git a/tests/integration/filesystem/yaffs/__output__/sample.8192.512.le.yaffs2_extract/passwd b/tests/integration/filesystem/yaffs/__output__/sample.8192.512.le.yaffs2_extract/passwd new file mode 120000 index 0000000000..ba193d2b4c --- /dev/null +++ b/tests/integration/filesystem/yaffs/__output__/sample.8192.512.le.yaffs2_extract/passwd @@ -0,0 +1 @@ +etc/passwd \ No newline at end of file diff --git a/tests/integration/filesystem/yaffs/__output__/sample.8192.64.ecc.be.yaffs2_extract/passwd b/tests/integration/filesystem/yaffs/__output__/sample.8192.64.ecc.be.yaffs2_extract/passwd new file mode 120000 index 0000000000..ba193d2b4c --- /dev/null +++ b/tests/integration/filesystem/yaffs/__output__/sample.8192.64.ecc.be.yaffs2_extract/passwd @@ -0,0 +1 @@ +etc/passwd \ No newline at end of file diff --git a/tests/integration/filesystem/yaffs/__output__/sample.8192.64.ecc.le.yaffs2_extract/passwd b/tests/integration/filesystem/yaffs/__output__/sample.8192.64.ecc.le.yaffs2_extract/passwd new file mode 120000 index 0000000000..ba193d2b4c --- /dev/null +++ b/tests/integration/filesystem/yaffs/__output__/sample.8192.64.ecc.le.yaffs2_extract/passwd @@ -0,0 +1 @@ +etc/passwd \ No newline at end of file diff --git a/tests/integration/filesystem/yaffs/__output__/sample.8192.64.le.yaffs2_extract/passwd b/tests/integration/filesystem/yaffs/__output__/sample.8192.64.le.yaffs2_extract/passwd new file mode 120000 index 0000000000..ba193d2b4c --- /dev/null +++ b/tests/integration/filesystem/yaffs/__output__/sample.8192.64.le.yaffs2_extract/passwd @@ -0,0 +1 @@ +etc/passwd \ No newline at end of file diff --git a/tests/test_file_utils.py b/tests/test_file_utils.py index 9d7d0698e0..3aa3d52956 100644 --- a/tests/test_file_utils.py +++ b/tests/test_file_utils.py @@ -1,4 +1,6 @@ import io +import os +from pathlib import Path from typing import List import pytest @@ -6,14 +8,17 @@ from unblob.file_utils import ( Endian, File, + FileSystem, InvalidInputFormat, StructParser, + chop_root, convert_int8, convert_int16, convert_int32, convert_int64, decode_multibyte_integer, get_endian, + is_safe_path, iterate_file, iterate_patterns, round_down, @@ -21,6 +26,26 @@ ) +@pytest.mark.parametrize( + "basedir, path, expected", + [ + ("/lib/out", "/lib/out/file", True), + ("/lib/out", "file", True), + ("/lib/out", "dir/file", True), + ("/lib/out", "some/dir/file", True), + ("/lib/out", "some/dir/../file", True), + ("/lib/out", "some/dir/../../file", True), + ("/lib/out", "some/dir/../../../file", False), + ("/lib/out", "some/dir/../../../", False), + ("/lib/out", "some/dir/../../..", False), + ("/lib/out", "../file", False), + ("/lib/out", "/lib/out/../file", False), + ], +) +def test_is_safe_path(basedir, path, expected): + assert is_safe_path(Path(basedir), Path(path)) is expected + + @pytest.mark.parametrize( "size, alignment, result", [ @@ -338,3 +363,198 @@ def test_get_endian_resets_the_file_pointer(self): with pytest.raises(InvalidInputFormat): get_endian(file, 0xFFFF_0000) assert file.tell() == pos + + +@pytest.mark.parametrize( + "input_path, expected", + [ + pytest.param("/", ".", id="absolute-root"), + pytest.param("/path/to/file", "path/to/file", id="absolute-path"), + pytest.param(".", ".", id="current-directory"), + pytest.param("path/to/file", "path/to/file", id="relative-path"), + ], +) +def test_chop_root(input_path: str, expected: str): + assert chop_root(Path(input_path)) == Path(expected) + + +class TestFileSystem: + @pytest.mark.parametrize( + "path", + [ + "/etc/passwd", + "file", + "some/dir/file", + "some/dir/../file", + "some/dir/../../file", + ], + ) + def test_get_checked_path_success(self, path): + fs = FileSystem(Path("/unblob/sandbox")) + checked_path = fs.get_checked_path(Path(path), "test") + assert checked_path + assert fs.problems == [] + assert checked_path.relative_to(fs.root) + + @pytest.mark.parametrize( + "path", + [ + "../file", + "some/dir/../../../file", + "some/dir/../../../", + "some/dir/../../..", + ], + ) + def test_get_checked_path_path_traversal_is_reported(self, path): + fs = FileSystem(Path("/unblob/sandbox")) + assert not fs.get_checked_path(Path(path), "test") + assert fs.problems + + def test_get_checked_path_path_traversal_reports(self): + fs = FileSystem(Path("/unblob/sandbox")) + op1 = f"test1-{object()}" + op2 = f"test2-{object()}" + assert op1 != op2 + assert not fs.get_checked_path(Path("../file"), op1) + assert not fs.get_checked_path(Path("../etc/passwd"), op2) + + report1, report2 = fs.problems + + assert "path traversal" in report1.problem + assert op1 in report1.problem + assert report1.path == "../file" + + assert "path traversal" in report2.problem + assert op2 in report2.problem + assert report2.path == "../etc/passwd" + + @pytest.fixture + def sandbox_parent(self, tmp_path: Path): + return tmp_path + + @pytest.fixture + def sandbox_root(self, sandbox_parent: Path): + return sandbox_parent / "sandbox" + + @pytest.fixture + def sandbox(self, sandbox_root: Path): + sandbox_root.mkdir(parents=True, exist_ok=True) + return FileSystem(sandbox_root) + + def test_carve(self, sandbox: FileSystem): + file = File.from_bytes(b"0123456789") + sandbox.carve(Path("carved"), file, 1, 2) + + assert (sandbox.root / "carved").read_bytes() == b"12" + assert sandbox.problems == [] + + def test_carve_outside_sandbox(self, sandbox: FileSystem): + file = File.from_bytes(b"0123456789") + sandbox.carve(Path("../carved"), file, 1, 2) + + assert not (sandbox.root / "../carved").exists() + assert sandbox.problems + + def test_mkdir(self, sandbox: FileSystem): + sandbox.mkdir(Path("directory")) + + assert (sandbox.root / "directory").is_dir() + assert sandbox.problems == [] + + def test_mkdir_outside_sandbox(self, sandbox: FileSystem): + sandbox.mkdir(Path("../directory")) + + assert not (sandbox.root / "../directory").exists() + assert sandbox.problems + + def test_mkfifo(self, sandbox: FileSystem): + sandbox.mkfifo(Path("named_pipe")) + + assert (sandbox.root / "named_pipe").is_fifo() + assert sandbox.problems == [] + + def test_mkfifo_outside_sandbox(self, sandbox: FileSystem): + sandbox.mkfifo(Path("../named_pipe")) + + assert not (sandbox.root / "../named_pipe").exists() + assert sandbox.problems + + def test_create_symlink(self, sandbox: FileSystem): + sandbox.create_symlink(Path("target file"), Path("symlink")) + + output_path = sandbox.root / "symlink" + assert not output_path.exists() + assert os.readlink(output_path) == "target file" + assert sandbox.problems == [] + + def test_create_symlink_absolute_paths(self, sandbox: FileSystem): + sandbox.write_bytes(Path("target file"), b"test content") + sandbox.create_symlink(Path("/target file"), Path("/symlink")) + + output_path = sandbox.root / "symlink" + assert output_path.exists() + assert os.readlink(output_path) == "target file" + assert sandbox.problems == [] + + def test_create_symlink_absolute_paths_self_referenced(self, sandbox: FileSystem): + sandbox.mkdir(Path("/etc")) + sandbox.create_symlink(Path("/etc/passwd"), Path("/etc/passwd")) + + output_path = sandbox.root / "etc/passwd" + assert not output_path.exists() + assert os.readlink(output_path) == "../etc/passwd" + assert sandbox.problems == [] + + def test_create_symlink_outside_sandbox(self, sandbox: FileSystem): + sandbox.create_symlink(Path("target file"), Path("../symlink")) + + output_path = sandbox.root / "../symlink" + assert not os.path.lexists(output_path) + assert sandbox.problems + + def test_create_symlink_path_traversal( + self, sandbox: FileSystem, sandbox_parent: Path + ): + """Document a remaining path traversal scenario through a symlink chain. + + unblob.extractor.fix_symlinks() exists to cover up cases like this. + """ + (sandbox_parent / "outer-secret").write_text("private key") + + # The path traversal is possible because at the creation of "secret" "future" does not exist + # so it is not yet possible to determine if it will be a symlink to be allowed or not. + # When the order of the below 2 lines are changed, the path traversal is recognized and prevented. + sandbox.create_symlink(Path("future/../outer-secret"), Path("secret")) + sandbox.create_symlink(Path("."), Path("future")) + + assert sandbox.problems == [] + assert (sandbox.root / "secret").read_text() == "private key" + + def test_create_hardlink(self, sandbox: FileSystem): + output_path = sandbox.root / "hardlink" + linked_file = sandbox.root / "file" + linked_file.write_bytes(b"") + sandbox.create_hardlink(Path("file"), Path("hardlink")) + + assert output_path.stat().st_nlink == 2 + assert output_path.stat().st_ino == linked_file.stat().st_ino + assert sandbox.problems == [] + + def test_create_hardlink_absolute_paths(self, sandbox: FileSystem): + output_path = sandbox.root / "hardlink" + linked_file = sandbox.root / "file" + linked_file.write_bytes(b"") + sandbox.create_hardlink(Path("/file"), Path("/hardlink")) + + assert output_path.stat().st_nlink == 2 + assert output_path.stat().st_ino == linked_file.stat().st_ino + assert sandbox.problems == [] + + def test_create_hardlink_outside_sandbox(self, sandbox: FileSystem): + output_path = sandbox.root / "../hardlink" + linked_file = sandbox.root / "file" + linked_file.write_bytes(b"") + sandbox.create_hardlink(Path("file"), Path("../hardlink")) + + assert not os.path.lexists(output_path) + assert sandbox.problems diff --git a/unblob/extractor.py b/unblob/extractor.py index 31dd34dd9c..d88ea31f35 100644 --- a/unblob/extractor.py +++ b/unblob/extractor.py @@ -5,7 +5,7 @@ from structlog import get_logger -from .file_utils import iterate_file +from .file_utils import carve, is_safe_path from .models import Chunk, File, TaskResult, UnknownChunk, ValidChunk from .report import MaliciousSymlinkRemoved @@ -14,12 +14,8 @@ def carve_chunk_to_file(carve_path: Path, file: File, chunk: Chunk): """Extract valid chunk to a file, which we then pass to another tool to extract it.""" - carve_path.parent.mkdir(parents=True, exist_ok=True) logger.debug("Carving chunk", path=carve_path) - - with carve_path.open("xb") as f: - for data in iterate_file(file, chunk.start_offset, chunk.size): - f.write(data) + carve(carve_path, file, chunk.start_offset, chunk.size) def fix_permission(path: Path): @@ -32,14 +28,6 @@ def fix_permission(path: Path): path.chmod(0o775) -def is_safe_path(basedir: Path, path: Path) -> bool: - try: - basedir.joinpath(path).resolve().relative_to(basedir.resolve()) - except ValueError: - return False - return True - - def is_recursive_link(path: Path) -> bool: try: path.resolve() diff --git a/unblob/file_utils.py b/unblob/file_utils.py index 587e7f0728..b525c1b1f4 100644 --- a/unblob/file_utils.py +++ b/unblob/file_utils.py @@ -1,4 +1,5 @@ import enum +import functools import io import math import mmap @@ -6,13 +7,28 @@ import shutil import struct from pathlib import Path -from typing import Iterator, List, Tuple, Union +from typing import Iterable, Iterator, List, Optional, Tuple, Union from dissect.cstruct import Instance, cstruct +from structlog import get_logger from .logging import format_hex +from .report import ( + ExtractionProblem, + LinkExtractionProblem, + SpecialFileExtractionProblem, +) DEFAULT_BUFSIZE = shutil.COPY_BUFSIZE # type: ignore +logger = get_logger() + + +def is_safe_path(basedir: Path, path: Path) -> bool: + try: + basedir.joinpath(path).resolve().relative_to(basedir.resolve()) + except ValueError: + return False + return True class SeekError(ValueError): @@ -252,6 +268,15 @@ def iterate_file( yield data +def carve(carve_path: Path, file: File, start_offset: int, size: int): + """Extract part of a file.""" + carve_path.parent.mkdir(parents=True, exist_ok=True) + + with carve_path.open("xb") as f: + for data in iterate_file(file, start_offset, size): + f.write(data) + + def stream_scan(scanner, file: File): """Scan the whole file by increment of DEFAULT_BUFSIZE using Hyperscan's streaming mode.""" scanner.scan(file, DEFAULT_BUFSIZE) @@ -332,3 +357,204 @@ def read_until_past(file: File, pattern: bytes): return file.tell() if next_byte not in pattern: return file.tell() - 1 + + +def chop_root(path: Path): + """Make absolute paths relative by chopping off the root.""" + if not path.is_absolute(): + return path + + relative_parts = list(path.parts[1:]) + return Path("/".join(relative_parts)) + + +class _FSPath: + def __init__(self, *, root: Path, path: Path) -> None: + self.root = root + self.relative_path = chop_root(path) + self.absolute_path = root / self.relative_path + self.is_safe = is_safe_path(self.root, self.absolute_path) + + def format_path(self) -> str: + return str(self.relative_path) + + +class _FSLink: + def __init__(self, *, root: Path, src: Path, dst: Path) -> None: + self.dst = _FSPath(root=root, path=dst) + self.src = _FSPath(root=root, path=src) + self.is_safe = self.dst.is_safe and self.src.is_safe + + def format_report( + self, description, resolution="Skipped." + ) -> LinkExtractionProblem: + return LinkExtractionProblem( + problem=description, + resolution=resolution, + path=str(self.dst.relative_path), + link_path=str(self.src.relative_path), + ) + + +class FileSystem: + """Restricts file system operations to a directory. + + Path traversal violations are collected as a list of :ExtractionProblem:-s + and not reported immediately - violating operations looks like successful for the caller. + + All input paths are interpreted as relative to the root directory. + Absolute paths are converted to relative paths by dropping the root /. + There is one exception to this universal base: symlink targets, + which are relative to the directory containing the symbolic link, because + this is how symlinks work. + """ + + problems: List[ExtractionProblem] + + def __init__(self, root: Path): + self.root = root.resolve() + self.problems = [] + + def record_problem(self, problem: ExtractionProblem): + self.problems.append(problem) + problem.log_with(logger) + + @functools.cached_property + def has_root_permissions(self): + return os.geteuid() == 0 + + def _fs_path(self, path: Path) -> _FSPath: + return _FSPath(root=self.root, path=path) + + def get_checked_path(self, path: Path, path_use_description: str) -> Optional[Path]: + fs_path = self._fs_path(path) + if fs_path.is_safe: + return fs_path.absolute_path + + report = ExtractionProblem( + path=fs_path.format_path(), + problem=f"Potential path traversal through {path_use_description}", + resolution="Skipped.", + ) + self.record_problem(report) + return None + + def write_bytes(self, path: Path, content: bytes): + logger.debug("creating file", file_path=path, _verbosity=3) + safe_path = self.get_checked_path(path, "write_bytes") + + if safe_path: + safe_path.write_bytes(content) + + def write_chunks(self, path: Path, chunks: Iterable[bytes]): + logger.debug("creating file", file_path=path, _verbosity=3) + safe_path = self.get_checked_path(path, "write_chunks") + + if safe_path: + with safe_path.open("wb") as f: + for chunk in chunks: + f.write(chunk) + + def carve(self, path: Path, file: File, start_offset: int, size: int): + logger.debug("carving file", path=path, _verbosity=3) + safe_path = self.get_checked_path(path, "carve") + + if safe_path: + carve(safe_path, file, start_offset, size) + + def mkdir(self, path: Path, *, mode=0o777, parents=False, exist_ok=False): + logger.debug("creating directory", dir_path=path, _verbosity=3) + safe_path = self.get_checked_path(path, "mkdir") + + if safe_path: + safe_path.mkdir(mode=mode, parents=parents, exist_ok=exist_ok) + + def mkfifo(self, path: Path, mode=0o666): + logger.debug("creating fifo", path=path, _verbosity=3) + safe_path = self.get_checked_path(path, "mkfifo") + + if safe_path: + os.mkfifo(safe_path, mode=mode) + + def mknod(self, path: Path, mode=0o600, device=0): + logger.debug("creating special file", special_path=path, _verbosity=3) + safe_path = self.get_checked_path(path, "mknod") + + if safe_path: + if self.has_root_permissions: + os.mknod(safe_path, mode=mode, device=device) + else: + problem = SpecialFileExtractionProblem( + problem="Root privileges are required to create block and char devices.", + resolution="Skipped.", + path=str(path), + mode=mode, + device=device, + ) + self.record_problem(problem) + + def _get_checked_link(self, src: Path, dst: Path) -> Optional[_FSLink]: + link = _FSLink(root=self.root, src=src, dst=dst) + if link.is_safe: + return link + + self.record_problem(link.format_report("Potential path traversal through link")) + return None + + def _path_to_root(self, from_dir: Path) -> Path: + # This version does not look at the existing symlinks, so while it looks cleaner it is also + # somewhat less precise: + # + # os.path.relpath(self.root, start=self.root / chop_root(from_dir)) + # + # In contrast, the below version looks like a kludge, but using .resolve() actually + # calculates the correct path in more cases, even if it can still give a bad result due + # to ordering of symlink creation and resolve defaulting to non-strict checking. + # Calculation unfortunately might fall back to the potentially wrong string interpretation, + # which is the same as os.path.relpath, sharing the same failure case. + # Ultimately we can not easily catch all symlink based path traversals here, so there + # still remains work for `unblob.extractor.fix_symlink()` + # + absolute_from_dir = (self.root / chop_root(from_dir)).resolve() + ups = len(absolute_from_dir.parts) - len(self.root.parts) + return Path("/".join(["."] + [".."] * ups)) + + def create_symlink(self, src: Path, dst: Path): + """Create a symlink dst with the link/content/target src.""" + logger.debug("creating symlink", file_path=dst, link_target=src, _verbosity=3) + + if src.is_absolute(): + # convert absolute paths to dst relative paths + # these would point to the same path if self.root would be the real root "/" + # but they are relocatable + src = self._path_to_root(dst.parent) / chop_root(src) + + safe_link = self._get_checked_link(src=dst.parent / src, dst=dst) + + if safe_link: + dst = safe_link.dst.absolute_path + dst.symlink_to(src) + + def create_hardlink(self, src: Path, dst: Path): + """Create a new hardlink dst to the existing file src.""" + logger.debug("creating hardlink", file_path=dst, link_target=src, _verbosity=3) + safe_link = self._get_checked_link(src=src, dst=dst) + + if safe_link: + try: + src = safe_link.src.absolute_path + dst = safe_link.dst.absolute_path + os.link(src, dst) + # FIXME: from python 3.10 change the above to + # dst.hardlink_to(src) + # so as to make it consistent with create_symlink + # (see Path.link_to vs Path.hardlink_to parameter order mess up) + except FileNotFoundError: + self.record_problem( + safe_link.format_report("Hard link target does not exist.") + ) + except PermissionError: + not_enough_privileges = ( + "Not enough privileges to create hardlink to block/char device." + ) + self.record_problem(safe_link.format_report(not_enough_privileges)) diff --git a/unblob/handlers/archive/_safe_tarfile.py b/unblob/handlers/archive/_safe_tarfile.py index f421a86aef..0ecc2e081e 100644 --- a/unblob/handlers/archive/_safe_tarfile.py +++ b/unblob/handlers/archive/_safe_tarfile.py @@ -2,10 +2,10 @@ import tarfile from pathlib import Path -import attrs from structlog import get_logger from unblob.extractor import is_safe_path +from unblob.report import ExtractionProblem logger = get_logger() @@ -13,17 +13,10 @@ MAX_PATH_LEN = 255 -@attrs.define -class ProblematicTarMember: - tarinfo: tarfile.TarInfo - problem: str - resolution: str - - class SafeTarFile: def __init__(self, inpath: Path): self.inpath = inpath - self.problems = [] + self.reports = [] self.tarfile = tarfile.open(inpath) self.directories = {} @@ -143,4 +136,10 @@ def fix_directories(self, extract_root): def record_problem(self, tarinfo, problem, resolution): logger.warning(f"{problem} {resolution}", path=tarinfo.name) # noqa: G004 - self.problems.append(ProblematicTarMember(tarinfo, problem, resolution)) + self.reports.append( + ExtractionProblem( + path=tarinfo.name, + problem=problem, + resolution=resolution, + ) + ) diff --git a/unblob/handlers/archive/hp/ipkg.py b/unblob/handlers/archive/hp/ipkg.py index 520b999d55..07769f5356 100644 --- a/unblob/handlers/archive/hp/ipkg.py +++ b/unblob/handlers/archive/hp/ipkg.py @@ -5,9 +5,21 @@ from dissect.cstruct import Instance from structlog import get_logger -from unblob.extractor import carve_chunk_to_file, is_safe_path -from unblob.file_utils import Endian, File, InvalidInputFormat, StructParser, snull -from unblob.models import Chunk, Extractor, HexString, StructHandler, ValidChunk +from unblob.file_utils import ( + Endian, + File, + FileSystem, + InvalidInputFormat, + StructParser, + snull, +) +from unblob.models import ( + Extractor, + ExtractResult, + HexString, + StructHandler, + ValidChunk, +) logger = get_logger() @@ -54,6 +66,7 @@ def __init__(self): def extract(self, inpath: Path, outdir: Path): entries = [] + fs = FileSystem(outdir) with File.from_path(inpath) as file: header = self._struct_parser.parse("ipkg_header_t", file, Endian.LITTLE) file.seek(header.toc_offset, io.SEEK_SET) @@ -64,28 +77,18 @@ def extract(self, inpath: Path, outdir: Path): entry_path = Path(snull(entry.name).decode("utf-8")) if entry_path.parent.name: raise InvalidInputFormat("Entry name contains directories.") - if not is_safe_path(outdir, entry_path): - logger.warning( - "Path traversal attempt, discarding.", - outdir=outdir, - ) - continue entries.append( ( - outdir.joinpath(outdir / entry_path.name), - Chunk( - start_offset=entry.offset, - end_offset=entry.offset + entry.size, - ), + Path(entry_path.name), + entry.offset, + entry.size, ) ) - for carve_path, chunk in entries: - carve_chunk_to_file( - file=file, - chunk=chunk, - carve_path=carve_path, - ) + for carve_path, start_offset, size in entries: + fs.carve(carve_path, file, start_offset, size) + + return ExtractResult(reports=list(fs.problems)) class HPIPKGHandler(StructHandler): diff --git a/unblob/handlers/archive/tar.py b/unblob/handlers/archive/tar.py index 77ffb6f052..92d3a76309 100644 --- a/unblob/handlers/archive/tar.py +++ b/unblob/handlers/archive/tar.py @@ -7,7 +7,14 @@ from structlog import get_logger from ...file_utils import OffsetFile, SeekError, decode_int, round_up, snull -from ...models import Extractor, File, HexString, StructHandler, ValidChunk +from ...models import ( + Extractor, + ExtractResult, + File, + HexString, + StructHandler, + ValidChunk, +) from ._safe_tarfile import SafeTarFile logger = get_logger() @@ -88,6 +95,7 @@ class TarExtractor(Extractor): def extract(self, inpath: Path, outdir: Path): with contextlib.closing(SafeTarFile(inpath)) as tarfile: tarfile.extractall(outdir) + return ExtractResult(reports=tarfile.reports) class TarHandler(StructHandler): diff --git a/unblob/handlers/archive/xiaomi/hdr.py b/unblob/handlers/archive/xiaomi/hdr.py index 9c1c4832ce..75b8a3554b 100644 --- a/unblob/handlers/archive/xiaomi/hdr.py +++ b/unblob/handlers/archive/xiaomi/hdr.py @@ -6,12 +6,17 @@ from dissect.cstruct import Instance from structlog import get_logger -from unblob.extractor import carve_chunk_to_file, is_safe_path -from unblob.file_utils import File, InvalidInputFormat, iterate_file, snull +from unblob.file_utils import ( + File, + FileSystem, + InvalidInputFormat, + iterate_file, + snull, +) from unblob.models import ( - Chunk, Endian, Extractor, + ExtractResult, HexString, StructHandler, StructParser, @@ -95,16 +100,13 @@ def __init__(self, header_struct: str): self._struct_parser = StructParser(C_DEFINITIONS) def extract(self, inpath: Path, outdir: Path): + fs = FileSystem(outdir) with File.from_path(inpath) as file: - for output_path, chunk in self.parse(file): - if not is_safe_path(outdir, output_path): - logger.warning( - "Path traversal attempt, discarding.", output_path=output_path - ) - return - carve_chunk_to_file(outdir.joinpath(output_path), file, chunk) - - def parse(self, file: File) -> Iterable[Tuple[Path, Chunk]]: + for output_path, start_offset, size in self.parse(file): + fs.carve(output_path, file, start_offset, size) + return ExtractResult(reports=list(fs.problems)) + + def parse(self, file: File) -> Iterable[Tuple[Path, int, int]]: header = self._struct_parser.parse(self.header_struct, file, Endian.LITTLE) for offset in cast(Iterable, header.blob_offsets): if not offset: @@ -118,14 +120,12 @@ def parse(self, file: File) -> Iterable[Tuple[Path, Chunk]]: if not is_valid_blob_header(blob_header): raise InvalidInputFormat("Invalid HDR blob header.") - # file.tell() points to right after the blob_header yield ( ( Path(snull(blob_header.name).decode("utf-8")), - Chunk( - start_offset=file.tell(), - end_offset=file.tell() + blob_header.size, - ), + # file.tell() points to right after the blob_header == start_offset + file.tell(), + blob_header.size, ) ) diff --git a/unblob/handlers/filesystem/romfs.py b/unblob/handlers/filesystem/romfs.py index 1b3564c064..8e62a217fe 100644 --- a/unblob/handlers/filesystem/romfs.py +++ b/unblob/handlers/filesystem/romfs.py @@ -8,10 +8,21 @@ from structlog import get_logger -from unblob.extractor import is_safe_path - -from ...file_utils import Endian, InvalidInputFormat, read_until_past, round_up -from ...models import Extractor, File, HexString, StructHandler, ValidChunk +from ...file_utils import ( + Endian, + FileSystem, + InvalidInputFormat, + read_until_past, + round_up, +) +from ...models import ( + Extractor, + ExtractResult, + File, + HexString, + StructHandler, + ValidChunk, +) logger = get_logger() @@ -164,12 +175,12 @@ class RomFSHeader: file: File end_offset: int inodes: Dict[int, "FileHeader"] - extract_root: Path + fs: FileSystem def __init__( self, file: File, - extract_root: Path, + fs: FileSystem, ): self.file = file self.file.seek(0, io.SEEK_END) @@ -186,7 +197,7 @@ def __init__( self.header_end_offset = self.file.tell() self.inodes = {} - self.extract_root = extract_root + self.fs = fs def valid_checksum(self) -> bool: current_position = self.file.tell() @@ -242,101 +253,50 @@ def walk_dir(self, addr: int, parent: Optional[FileHeader] = None): self.inodes[addr] = file_header return file_header.next_filehdr - def create_symlink(self, extract_root: Path, output_path: Path, inode: FileHeader): - target = inode.content.decode("utf-8").lstrip("/") - - if target.startswith(".."): - target_path = extract_root.joinpath(output_path.parent, target).resolve() - else: - target_path = extract_root.joinpath(target).resolve() + def create_symlink(self, output_path: Path, inode: FileHeader): + target_path = Path(inode.content.decode("utf-8")) + self.fs.create_symlink(src=target_path, dst=output_path) - if not is_safe_path(extract_root, target_path): - logger.warning( - "Path traversal attempt through symlink.", target_path=target_path - ) - return - # we create relative paths to make the output directory portable - output_path.symlink_to(os.path.relpath(target_path, start=output_path.parent)) - - def create_hardlink(self, extract_root: Path, link_path: Path, inode: FileHeader): + def create_hardlink(self, output_path: Path, inode: FileHeader): if inode.spec_info in self.inodes: - target = str(self.inodes[inode.spec_info].path).lstrip("/") - target_path = extract_root.joinpath(target).resolve() - - # we don't need to check for potential traversal given that, if the inode - # is in self.inodes, it already got verified in create_inode. - try: - os.link(target_path, link_path) - except FileNotFoundError: - logger.warning( - "Hard link target does not exist, discarding.", - target_path=target_path, - link_path=link_path, - ) - except PermissionError: - logger.warning( - "Not enough privileges to create hardlink to block/char device, discarding.", - target_path=target_path, - link_path=link_path, - ) + target_path = self.inodes[inode.spec_info].path + self.fs.create_hardlink(dst=output_path, src=target_path) else: logger.warning("Invalid hard link target", inode_key=inode.spec_info) - def create_inode(self, extract_root: Path, inode: FileHeader): - output_path = extract_root.joinpath(inode.path).resolve() - if not is_safe_path(extract_root, inode.path): - logger.warning( - "Path traversal attempt, discarding.", output_path=output_path - ) - return + def create_inode(self, inode: FileHeader): + output_path = inode.path logger.info("dumping inode", inode=inode, output_path=str(output_path)) if inode.fs_type == FSType.HARD_LINK: - self.create_hardlink(extract_root, output_path, inode) + self.create_hardlink(output_path, inode) elif inode.fs_type == FSType.SYMLINK: - self.create_symlink(extract_root, output_path, inode) + self.create_symlink(output_path, inode) elif inode.fs_type == FSType.DIRECTORY: - output_path.mkdir(mode=inode.mode, exist_ok=True) + self.fs.mkdir(output_path, mode=inode.mode, exist_ok=True) elif inode.fs_type == FSType.FILE: - with output_path.open("wb") as f: - f.write(inode.content) + self.fs.write_bytes(output_path, inode.content) elif inode.fs_type in [FSType.BLOCK_DEV, FSType.CHAR_DEV]: - os.mknod(inode.path, inode.mode, inode.dev) + self.fs.mknod(output_path, mode=inode.mode, device=inode.dev) elif inode.fs_type == FSType.FIFO: - os.mkfifo(output_path, inode.mode) + self.fs.mkfifo(output_path, mode=inode.mode) def dump_fs(self): - # first we create files and directories - fd_inodes = { - k: v - for k, v in self.inodes.items() - if v.fs_type in [FSType.FILE, FSType.DIRECTORY, FSType.FIFO, FSType.SOCKET] - } - for inode in sorted(fd_inodes.values(), key=lambda inode: inode.path): - self.create_inode(self.extract_root, inode) - - if os.geteuid() != 0: - logger.warning( - "root privileges are required to create block and char devices, skipping." + def inodes(*inode_types): + return sorted( + (v for v in self.inodes.values() if v.fs_type in inode_types), + key=lambda inode: inode.path, ) - else: - # then we create devices if we have enough privileges - dev_inodes = { - k: v - for k, v in self.inodes.items() - if v.fs_type in [FSType.BLOCK_DEV, FSType.CHAR_DEV] - } - for inode in sorted(dev_inodes.values(), key=lambda inode: inode.path): - self.create_inode(self.extract_root, inode) - - # then we create links - links_inodes = { - k: v - for k, v in self.inodes.items() - if v.fs_type in [FSType.SYMLINK, FSType.HARD_LINK] - } - for inode in sorted(links_inodes.values(), key=lambda inode: inode.path): - self.create_inode(self.extract_root, inode) + + # order of file object creation is important + sorted_inodes = ( + inodes(FSType.FILE, FSType.DIRECTORY, FSType.FIFO, FSType.SOCKET) + + inodes(FSType.BLOCK_DEV, FSType.CHAR_DEV) + + inodes(FSType.SYMLINK, FSType.HARD_LINK) + ) + + for inode in sorted_inodes: + self.create_inode(inode) def __str__(self): return f"signature: {self.signature}\nfull_size: {self.full_size}\nchecksum: {self.checksum}\nvolume_name: {self.volume_name}" @@ -344,11 +304,13 @@ def __str__(self): class RomfsExtractor(Extractor): def extract(self, inpath: Path, outdir: Path): + fs = FileSystem(outdir) with File.from_path(inpath) as f: - header = RomFSHeader(f, outdir) + header = RomFSHeader(f, fs) header.validate() header.recursive_walk(header.header_end_offset, None) header.dump_fs() + return ExtractResult(reports=list(fs.problems)) class RomFSFSHandler(StructHandler): diff --git a/unblob/handlers/filesystem/yaffs.py b/unblob/handlers/filesystem/yaffs.py index 175d341b53..f69383c972 100644 --- a/unblob/handlers/filesystem/yaffs.py +++ b/unblob/handlers/filesystem/yaffs.py @@ -1,6 +1,5 @@ import io import itertools -import os from collections import defaultdict from enum import IntEnum from pathlib import Path @@ -12,17 +11,17 @@ from treelib import Tree from treelib.exceptions import NodeIDAbsentError -from unblob.extractor import is_safe_path from unblob.file_utils import ( Endian, File, + FileSystem, InvalidInputFormat, StructParser, get_endian_multi, read_until_past, snull, ) -from unblob.models import Extractor, Handler, HexString, ValidChunk +from unblob.models import Extractor, ExtractResult, Handler, HexString, ValidChunk logger = get_logger() @@ -470,84 +469,42 @@ def resolve_path(self, entry: YAFFSEntry) -> Path: return self.resolve_path(parent_entry).joinpath(resolved_path) return resolved_path - def get_file_bytes(self, entry: YAFFSEntry) -> Iterable[bytes]: + def get_file_chunks(self, entry: YAFFSEntry) -> Iterable[bytes]: for chunk in self.get_chunks(entry.object_id): yield self.file[chunk.offset : chunk.offset + chunk.byte_count] - def extract(self, outdir: Path): + def extract(self, fs: FileSystem): for entry in [ self.file_entries.get_node(node) for node in self.file_entries.expand_tree(mode=Tree.DEPTH) ]: if entry is None or entry.data is None: continue - self.extract_entry(entry.data, outdir) + self.extract_entry(entry.data, fs) - def extract_entry(self, entry: YAFFSEntry, outdir: Path): # noqa: C901 + def extract_entry(self, entry: YAFFSEntry, fs: FileSystem): if entry.object_type == YaffsObjectType.UNKNOWN: - logger.warning("unknown type entry", entry=entry) + logger.warning("unknown entry type", entry=entry) return - entry_path = self.resolve_path(entry) - - if not is_safe_path(outdir, entry_path): - logger.warning( - "Potential path traversal attempt", outdir=outdir, path=entry_path - ) - return - - out_path = outdir.joinpath(entry_path) + out_path = self.resolve_path(entry) if entry.object_type == YaffsObjectType.SPECIAL: if not isinstance(entry, YAFFS2Entry): logger.warning("non YAFFS2 special object", entry=entry) return - if os.geteuid() == 0: - logger.debug( - "creating special file", special_path=out_path, _verbosity=3 - ) - os.mknod(out_path.as_posix(), entry.st_mode, entry.st_rdev) - else: - logger.warning( - "creating special files requires elevated privileges, skipping.", - path=out_path, - st_mode=entry.st_mode, - st_rdev=entry.st_rdev, - ) - return - - if entry.object_type == YaffsObjectType.DIRECTORY: - logger.debug("creating directory", dir_path=out_path, _verbosity=3) - out_path.mkdir(exist_ok=True) + fs.mknod(out_path, entry.st_mode, entry.st_rdev) + elif entry.object_type == YaffsObjectType.DIRECTORY: + fs.mkdir(out_path, exist_ok=True) elif entry.object_type == YaffsObjectType.FILE: - logger.debug("creating file", file_path=out_path, _verbosity=3) - with out_path.open("wb") as f: - for chunk in self.get_file_bytes(entry): - f.write(chunk) + fs.write_chunks(out_path, self.get_file_chunks(entry)) elif entry.object_type == YaffsObjectType.SYMLINK: - if not is_safe_path(outdir, out_path.parent / Path(entry.alias)): - logger.warning( - "Potential path traversal attempt through symlink", - outdir=outdir, - path=entry.alias, - ) - return - logger.debug("creating symlink", file_path=out_path, _verbosity=3) - out_path.symlink_to(Path(entry.alias)) + fs.create_symlink(src=Path(entry.alias), dst=out_path) elif entry.object_type == YaffsObjectType.HARDLINK: - logger.debug("creating hardlink", file_path=out_path, _verbosity=3) dst_entry = self.file_entries[entry.equiv_id].data dst_path = self.resolve_path(dst_entry) - if not is_safe_path(outdir, dst_path): - logger.warning( - "Potential path traversal attempt through hardlink", - outdir=outdir, - path=dst_path, - ) - return - dst_full_path = outdir / dst_path - dst_full_path.link_to(out_path) + fs.create_hardlink(src=dst_path, dst=out_path) class YAFFS2Parser(YAFFSParser): @@ -765,7 +722,9 @@ def extract(self, inpath: Path, outdir: Path): infile = File.from_path(inpath) parser = instantiate_parser(infile) parser.parse(store=True) - parser.extract(outdir) + fs = FileSystem(outdir) + parser.extract(fs) + return ExtractResult(reports=list(fs.problems)) class YAFFSHandler(Handler): diff --git a/unblob/models.py b/unblob/models.py index 32e2e7dcac..ecf218c1d9 100644 --- a/unblob/models.py +++ b/unblob/models.py @@ -103,7 +103,7 @@ class ValidChunk(Chunk): handler: "Handler" = attr.ib(init=False, eq=False) is_encrypted: bool = attr.ib(default=False) - def extract(self, inpath: Path, outdir: Path): + def extract(self, inpath: Path, outdir: Path) -> Optional["ExtractResult"]: if self.is_encrypted: logger.warning( "Encrypted file is not extracted", @@ -112,7 +112,7 @@ def extract(self, inpath: Path, outdir: Path): ) raise ExtractError - self.handler.extract(inpath, outdir) + return self.handler.extract(inpath, outdir) def as_report(self, extraction_reports: List[Report]) -> ChunkReport: return ChunkReport( @@ -130,7 +130,7 @@ def as_report(self, extraction_reports: List[Report]) -> ChunkReport: class UnknownChunk(Chunk): r"""Gaps between valid chunks or otherwise unknown chunks. - Important for manual analysis, and analytical certanity: for example + Important for manual analysis, and analytical certainty: for example entropy, other chunks inside it, metadata, etc. These are not extracted, just logged for information purposes and further analysis, @@ -154,8 +154,8 @@ class MultiFile(Blob): handler: "DirectoryHandler" = attr.ib(init=False, eq=False) - def extract(self, outdir: Path): - self.handler.extract(self.paths, outdir) + def extract(self, outdir: Path) -> Optional["ExtractResult"]: + return self.handler.extract(self.paths, outdir) def as_report(self, extraction_reports: List[Report]) -> MultiFileReport: return MultiFileReport( @@ -253,13 +253,18 @@ def __init__(self, *reports: Report): self.reports: Tuple[Report, ...] = reports +@attr.define(kw_only=True) +class ExtractResult: + reports: List[Report] + + class Extractor(abc.ABC): def get_dependencies(self) -> List[str]: """Return the external command dependencies.""" return [] @abc.abstractmethod - def extract(self, inpath: Path, outdir: Path): + def extract(self, inpath: Path, outdir: Path) -> Optional[ExtractResult]: """Extract the carved out chunk. Raises ExtractError on failure. @@ -272,7 +277,7 @@ def get_dependencies(self) -> List[str]: return [] @abc.abstractmethod - def extract(self, paths: List[Path], outdir: Path): + def extract(self, paths: List[Path], outdir: Path) -> Optional[ExtractResult]: """Extract from a multi file path list. Raises ExtractError on failure. @@ -381,7 +386,7 @@ def get_dependencies(cls): def calculate_multifile(self, file: Path) -> Optional[MultiFile]: """Calculate the MultiFile in a directory, using a file matched by the pattern as a starting point.""" - def extract(self, paths: List[Path], outdir: Path): + def extract(self, paths: List[Path], outdir: Path) -> Optional[ExtractResult]: if self.EXTRACTOR is None: logger.debug("Skipping file: no extractor.", paths=paths) raise ExtractError @@ -389,7 +394,7 @@ def extract(self, paths: List[Path], outdir: Path): # We only extract every blob once, it's a mistake to extract the same blob again outdir.mkdir(parents=True, exist_ok=False) - self.EXTRACTOR.extract(paths, outdir) + return self.EXTRACTOR.extract(paths, outdir) class Handler(abc.ABC): @@ -414,7 +419,7 @@ def get_dependencies(cls): def calculate_chunk(self, file: File, start_offset: int) -> Optional[ValidChunk]: """Calculate the Chunk offsets from the File and the file type headers.""" - def extract(self, inpath: Path, outdir: Path): + def extract(self, inpath: Path, outdir: Path) -> Optional[ExtractResult]: if self.EXTRACTOR is None: logger.debug("Skipping file: no extractor.", path=inpath) raise ExtractError @@ -422,7 +427,7 @@ def extract(self, inpath: Path, outdir: Path): # We only extract every blob once, it's a mistake to extract the same blob again outdir.mkdir(parents=True, exist_ok=False) - self.EXTRACTOR.extract(inpath, outdir) + return self.EXTRACTOR.extract(inpath, outdir) class StructHandler(Handler): diff --git a/unblob/processing.py b/unblob/processing.py index 9776e96c69..209b4c5d37 100644 --- a/unblob/processing.py +++ b/unblob/processing.py @@ -403,7 +403,8 @@ def _extract_multi_file(self, multi_file: MultiFile) -> Path: extraction_reports = [] try: - multi_file.extract(extract_dir) + if result := multi_file.extract(extract_dir): + extraction_reports.extend(result.reports) except ExtractError as e: extraction_reports.extend(e.reports) except Exception as exc: @@ -522,7 +523,7 @@ def _calculate_entropy(self, path: Path) -> Optional[EntropyReport]: return report return None - def _extract_chunk(self, file, chunk: ValidChunk): + def _extract_chunk(self, file, chunk: ValidChunk): # noqa: C901 skip_carving = chunk.is_whole_file if skip_carving: inpath = self.task.path @@ -554,7 +555,8 @@ def _extract_chunk(self, file, chunk: ValidChunk): extraction_reports = [] try: - chunk.extract(inpath, extract_dir) + if result := chunk.extract(inpath, extract_dir): + extraction_reports.extend(result.reports) if carved_path and not self.config.keep_extracted_chunks: logger.debug("Removing extracted chunk", path=carved_path) diff --git a/unblob/report.py b/unblob/report.py index a0cbed9004..599ec62ed5 100644 --- a/unblob/report.py +++ b/unblob/report.py @@ -244,3 +244,47 @@ class MultiFileReport(Report): name: str paths: List[Path] extraction_reports: List[Report] + + +@attr.define(kw_only=True, frozen=True) +class ExtractionProblem(Report): + """A non-fatal problem discovered during extraction. + + A report like this still means, that the extraction was successful, + but there were problems that got resolved. + The output is expected to be complete, with the exception of + the reported path. + + Examples + -------- + - duplicate entries for certain archive formats (tar, zip) + - unsafe symlinks pointing outside of extraction directory + """ + + problem: str + resolution: str + path: Optional[str] = None + + @property + def log_msg(self): + return f"{self.problem} {self.resolution}" + + def log_with(self, logger): + logger.warning(self.log_msg, path=self.path) + + +@attr.define(kw_only=True, frozen=True) +class LinkExtractionProblem(ExtractionProblem): + link_path: str + + def log_with(self, logger): + logger.warning(self.log_msg, path=self.path, link_path=self.link_path) + + +@attr.define(kw_only=True, frozen=True) +class SpecialFileExtractionProblem(ExtractionProblem): + mode: int + device: int + + def log_with(self, logger): + logger.warning(self.log_msg, path=self.path, mode=self.mode, device=self.device)