diff --git a/.envrc b/.envrc index 64d3275a96..9ed06114c2 100644 --- a/.envrc +++ b/.envrc @@ -1,4 +1,10 @@ -watch_file flake.nix flake.lock devenv.nix -use nix \ - --option extra-substituters "https://unblob.cachix.org" \ - --option extra-trusted-public-keys "unblob.cachix.org-1:5kWA6DwOg176rSqU8TOTBXWxsDB4LoCMfGfTgL5qCAE=" +# Source further custom features from .env.user if exists +# Also allows users to disable/override features in this file +source_env_if_exists .envrc.user + +if ${UNBLOB_USE_DEVENV:-true}; then + watch_file flake.nix flake.lock devenv.nix + use nix \ + --option extra-substituters "https://unblob.cachix.org" \ + --option extra-trusted-public-keys "unblob.cachix.org-1:5kWA6DwOg176rSqU8TOTBXWxsDB4LoCMfGfTgL5qCAE=" +fi diff --git a/.gitattributes b/.gitattributes index 0fb9031300..894545dc5d 100644 --- a/.gitattributes +++ b/.gitattributes @@ -1 +1,2 @@ tests/integration/** filter=lfs diff=lfs merge=lfs -text +tests/files/** filter=lfs diff=lfs merge=lfs -text diff --git a/tests/files/suffixes/__input__/chunks b/tests/files/suffixes/__input__/chunks new file mode 100644 index 0000000000..28099296da --- /dev/null +++ b/tests/files/suffixes/__input__/chunks @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1e33039682514a23548f4c6f9b7ed2af6dfbfca8d921fdf7845b0ef7255725b4 +size 512 diff --git a/tests/files/suffixes/__input__/collisions.zip b/tests/files/suffixes/__input__/collisions.zip new file mode 100644 index 0000000000..31d9f68784 --- /dev/null +++ b/tests/files/suffixes/__input__/collisions.zip @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:33fc006557dbaa40deb732ecfd1ac7921b8397636303ac2d153fa2ab477b21b3 +size 1119 diff --git a/tests/files/suffixes/__outputs__/chunks/_c_e/chunks_c/0-160.gzip_e/gzip.uncompressed b/tests/files/suffixes/__outputs__/chunks/_c_e/chunks_c/0-160.gzip_e/gzip.uncompressed new file mode 100644 index 0000000000..f7cd2c92fb --- /dev/null +++ b/tests/files/suffixes/__outputs__/chunks/_c_e/chunks_c/0-160.gzip_e/gzip.uncompressed @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:93d4e5c77838e0aa5cb6647c385c810a7c2782bf769029e6c420052048ab22bb +size 292 diff --git a/tests/files/suffixes/__outputs__/chunks/_c_e/chunks_c/160-375.gzip_e/gzip.uncompressed b/tests/files/suffixes/__outputs__/chunks/_c_e/chunks_c/160-375.gzip_e/gzip.uncompressed new file mode 100644 index 0000000000..a92657ea1c --- /dev/null +++ b/tests/files/suffixes/__outputs__/chunks/_c_e/chunks_c/160-375.gzip_e/gzip.uncompressed @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a91b12613458d22a3fbf6d66ff2fba42124b46ea0e032f68c3564cc0c506ec43 +size 404 diff --git a/tests/files/suffixes/__outputs__/chunks/_c_e/chunks_c/375-512.padding b/tests/files/suffixes/__outputs__/chunks/_c_e/chunks_c/375-512.padding new file mode 100644 index 0000000000..dd4e08a3fc --- /dev/null +++ b/tests/files/suffixes/__outputs__/chunks/_c_e/chunks_c/375-512.padding @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4a93894f08d98d707cd9a0274f4c9a51bcfa27e701359e12befcc78ffb488817 +size 137 diff --git a/tests/files/suffixes/__outputs__/chunks/_carve_extract/chunks_carve/0-160.gzip_extract/gzip.uncompressed b/tests/files/suffixes/__outputs__/chunks/_carve_extract/chunks_carve/0-160.gzip_extract/gzip.uncompressed new file mode 100644 index 0000000000..f7cd2c92fb --- /dev/null +++ b/tests/files/suffixes/__outputs__/chunks/_carve_extract/chunks_carve/0-160.gzip_extract/gzip.uncompressed @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:93d4e5c77838e0aa5cb6647c385c810a7c2782bf769029e6c420052048ab22bb +size 292 diff --git a/tests/files/suffixes/__outputs__/chunks/_carve_extract/chunks_carve/160-375.gzip_extract/gzip.uncompressed b/tests/files/suffixes/__outputs__/chunks/_carve_extract/chunks_carve/160-375.gzip_extract/gzip.uncompressed new file mode 100644 index 0000000000..a92657ea1c --- /dev/null +++ b/tests/files/suffixes/__outputs__/chunks/_carve_extract/chunks_carve/160-375.gzip_extract/gzip.uncompressed @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a91b12613458d22a3fbf6d66ff2fba42124b46ea0e032f68c3564cc0c506ec43 +size 404 diff --git a/tests/files/suffixes/__outputs__/chunks/_carve_extract/chunks_carve/375-512.padding b/tests/files/suffixes/__outputs__/chunks/_carve_extract/chunks_carve/375-512.padding new file mode 100644 index 0000000000..dd4e08a3fc --- /dev/null +++ b/tests/files/suffixes/__outputs__/chunks/_carve_extract/chunks_carve/375-512.padding @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4a93894f08d98d707cd9a0274f4c9a51bcfa27e701359e12befcc78ffb488817 +size 137 diff --git a/tests/files/suffixes/__outputs__/chunks/defaults/chunks_extract/0-160.gzip_extract/gzip.uncompressed b/tests/files/suffixes/__outputs__/chunks/defaults/chunks_extract/0-160.gzip_extract/gzip.uncompressed new file mode 100644 index 0000000000..f7cd2c92fb --- /dev/null +++ b/tests/files/suffixes/__outputs__/chunks/defaults/chunks_extract/0-160.gzip_extract/gzip.uncompressed @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:93d4e5c77838e0aa5cb6647c385c810a7c2782bf769029e6c420052048ab22bb +size 292 diff --git a/tests/files/suffixes/__outputs__/chunks/defaults/chunks_extract/160-375.gzip_extract/gzip.uncompressed b/tests/files/suffixes/__outputs__/chunks/defaults/chunks_extract/160-375.gzip_extract/gzip.uncompressed new file mode 100644 index 0000000000..a92657ea1c --- /dev/null +++ b/tests/files/suffixes/__outputs__/chunks/defaults/chunks_extract/160-375.gzip_extract/gzip.uncompressed @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a91b12613458d22a3fbf6d66ff2fba42124b46ea0e032f68c3564cc0c506ec43 +size 404 diff --git a/tests/files/suffixes/__outputs__/chunks/defaults/chunks_extract/375-512.padding b/tests/files/suffixes/__outputs__/chunks/defaults/chunks_extract/375-512.padding new file mode 100644 index 0000000000..dd4e08a3fc --- /dev/null +++ b/tests/files/suffixes/__outputs__/chunks/defaults/chunks_extract/375-512.padding @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4a93894f08d98d707cd9a0274f4c9a51bcfa27e701359e12befcc78ffb488817 +size 137 diff --git a/tests/files/suffixes/__outputs__/collisions.zip/_c_e/collisions.zip_e/chunks b/tests/files/suffixes/__outputs__/collisions.zip/_c_e/collisions.zip_e/chunks new file mode 100644 index 0000000000..28099296da --- /dev/null +++ b/tests/files/suffixes/__outputs__/collisions.zip/_c_e/collisions.zip_e/chunks @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1e33039682514a23548f4c6f9b7ed2af6dfbfca8d921fdf7845b0ef7255725b4 +size 512 diff --git a/tests/files/suffixes/__outputs__/collisions.zip/_c_e/collisions.zip_e/chunks_c/0-160.gzip_e/gzip.uncompressed b/tests/files/suffixes/__outputs__/collisions.zip/_c_e/collisions.zip_e/chunks_c/0-160.gzip_e/gzip.uncompressed new file mode 100644 index 0000000000..f7cd2c92fb --- /dev/null +++ b/tests/files/suffixes/__outputs__/collisions.zip/_c_e/collisions.zip_e/chunks_c/0-160.gzip_e/gzip.uncompressed @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:93d4e5c77838e0aa5cb6647c385c810a7c2782bf769029e6c420052048ab22bb +size 292 diff --git a/tests/files/suffixes/__outputs__/collisions.zip/_c_e/collisions.zip_e/chunks_c/160-375.gzip_e/gzip.uncompressed b/tests/files/suffixes/__outputs__/collisions.zip/_c_e/collisions.zip_e/chunks_c/160-375.gzip_e/gzip.uncompressed new file mode 100644 index 0000000000..a92657ea1c --- /dev/null +++ b/tests/files/suffixes/__outputs__/collisions.zip/_c_e/collisions.zip_e/chunks_c/160-375.gzip_e/gzip.uncompressed @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a91b12613458d22a3fbf6d66ff2fba42124b46ea0e032f68c3564cc0c506ec43 +size 404 diff --git a/tests/files/suffixes/__outputs__/collisions.zip/_c_e/collisions.zip_e/chunks_c/375-512.padding b/tests/files/suffixes/__outputs__/collisions.zip/_c_e/collisions.zip_e/chunks_c/375-512.padding new file mode 100644 index 0000000000..dd4e08a3fc --- /dev/null +++ b/tests/files/suffixes/__outputs__/collisions.zip/_c_e/collisions.zip_e/chunks_c/375-512.padding @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4a93894f08d98d707cd9a0274f4c9a51bcfa27e701359e12befcc78ffb488817 +size 137 diff --git a/tests/files/suffixes/__outputs__/collisions.zip/_c_e/collisions.zip_e/chunks_carve/0-160.gzip b/tests/files/suffixes/__outputs__/collisions.zip/_c_e/collisions.zip_e/chunks_carve/0-160.gzip new file mode 100644 index 0000000000..4dfcc2ba5a --- /dev/null +++ b/tests/files/suffixes/__outputs__/collisions.zip/_c_e/collisions.zip_e/chunks_carve/0-160.gzip @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:04c4ab621ccdb9267ceabad58d0f801f20071533c2f01220711e86ef5e13ac40 +size 160 diff --git a/tests/files/suffixes/__outputs__/collisions.zip/_c_e/collisions.zip_e/chunks_carve/0-160.gzip_e/gzip.uncompressed b/tests/files/suffixes/__outputs__/collisions.zip/_c_e/collisions.zip_e/chunks_carve/0-160.gzip_e/gzip.uncompressed new file mode 100644 index 0000000000..f7cd2c92fb --- /dev/null +++ b/tests/files/suffixes/__outputs__/collisions.zip/_c_e/collisions.zip_e/chunks_carve/0-160.gzip_e/gzip.uncompressed @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:93d4e5c77838e0aa5cb6647c385c810a7c2782bf769029e6c420052048ab22bb +size 292 diff --git a/tests/files/suffixes/__outputs__/collisions.zip/_c_e/collisions.zip_e/chunks_carve/0-160.gzip_extract/gzip.uncompressed b/tests/files/suffixes/__outputs__/collisions.zip/_c_e/collisions.zip_e/chunks_carve/0-160.gzip_extract/gzip.uncompressed new file mode 100644 index 0000000000..e246ce0a08 --- /dev/null +++ b/tests/files/suffixes/__outputs__/collisions.zip/_c_e/collisions.zip_e/chunks_carve/0-160.gzip_extract/gzip.uncompressed @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a1621be95040239ee14362c16e20510ddc20f527d772d823b2a1679b33f5cd74 +size 15 diff --git a/tests/files/suffixes/__outputs__/collisions.zip/_carve_extract/collisions.zip_extract/chunks b/tests/files/suffixes/__outputs__/collisions.zip/_carve_extract/collisions.zip_extract/chunks new file mode 100644 index 0000000000..28099296da --- /dev/null +++ b/tests/files/suffixes/__outputs__/collisions.zip/_carve_extract/collisions.zip_extract/chunks @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1e33039682514a23548f4c6f9b7ed2af6dfbfca8d921fdf7845b0ef7255725b4 +size 512 diff --git a/tests/files/suffixes/__outputs__/collisions.zip/_carve_extract/collisions.zip_extract/chunks_carve/0-160.gzip b/tests/files/suffixes/__outputs__/collisions.zip/_carve_extract/collisions.zip_extract/chunks_carve/0-160.gzip new file mode 100644 index 0000000000..4dfcc2ba5a --- /dev/null +++ b/tests/files/suffixes/__outputs__/collisions.zip/_carve_extract/collisions.zip_extract/chunks_carve/0-160.gzip @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:04c4ab621ccdb9267ceabad58d0f801f20071533c2f01220711e86ef5e13ac40 +size 160 diff --git a/tests/files/suffixes/__outputs__/collisions.zip/_carve_extract/collisions.zip_extract/chunks_carve/0-160.gzip_extract/gzip.uncompressed b/tests/files/suffixes/__outputs__/collisions.zip/_carve_extract/collisions.zip_extract/chunks_carve/0-160.gzip_extract/gzip.uncompressed new file mode 100644 index 0000000000..e246ce0a08 --- /dev/null +++ b/tests/files/suffixes/__outputs__/collisions.zip/_carve_extract/collisions.zip_extract/chunks_carve/0-160.gzip_extract/gzip.uncompressed @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a1621be95040239ee14362c16e20510ddc20f527d772d823b2a1679b33f5cd74 +size 15 diff --git a/tests/files/suffixes/__outputs__/collisions.zip/defaults/collisions.zip_extract/chunks b/tests/files/suffixes/__outputs__/collisions.zip/defaults/collisions.zip_extract/chunks new file mode 100644 index 0000000000..28099296da --- /dev/null +++ b/tests/files/suffixes/__outputs__/collisions.zip/defaults/collisions.zip_extract/chunks @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1e33039682514a23548f4c6f9b7ed2af6dfbfca8d921fdf7845b0ef7255725b4 +size 512 diff --git a/tests/files/suffixes/__outputs__/collisions.zip/defaults/collisions.zip_extract/chunks_carve/0-160.gzip b/tests/files/suffixes/__outputs__/collisions.zip/defaults/collisions.zip_extract/chunks_carve/0-160.gzip new file mode 100644 index 0000000000..4dfcc2ba5a --- /dev/null +++ b/tests/files/suffixes/__outputs__/collisions.zip/defaults/collisions.zip_extract/chunks_carve/0-160.gzip @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:04c4ab621ccdb9267ceabad58d0f801f20071533c2f01220711e86ef5e13ac40 +size 160 diff --git a/tests/files/suffixes/__outputs__/collisions.zip/defaults/collisions.zip_extract/chunks_carve/0-160.gzip_extract/gzip.uncompressed b/tests/files/suffixes/__outputs__/collisions.zip/defaults/collisions.zip_extract/chunks_carve/0-160.gzip_extract/gzip.uncompressed new file mode 100644 index 0000000000..e246ce0a08 --- /dev/null +++ b/tests/files/suffixes/__outputs__/collisions.zip/defaults/collisions.zip_extract/chunks_carve/0-160.gzip_extract/gzip.uncompressed @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a1621be95040239ee14362c16e20510ddc20f527d772d823b2a1679b33f5cd74 +size 15 diff --git a/tests/files/suffixes/__outputs__/collisions.zip/defaults/collisions.zip_extract/chunks_extract/0-160.gzip_extract/gzip.uncompressed b/tests/files/suffixes/__outputs__/collisions.zip/defaults/collisions.zip_extract/chunks_extract/0-160.gzip_extract/gzip.uncompressed new file mode 100644 index 0000000000..f7cd2c92fb --- /dev/null +++ b/tests/files/suffixes/__outputs__/collisions.zip/defaults/collisions.zip_extract/chunks_extract/0-160.gzip_extract/gzip.uncompressed @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:93d4e5c77838e0aa5cb6647c385c810a7c2782bf769029e6c420052048ab22bb +size 292 diff --git a/tests/files/suffixes/__outputs__/collisions.zip/defaults/collisions.zip_extract/chunks_extract/160-375.gzip_extract/gzip.uncompressed b/tests/files/suffixes/__outputs__/collisions.zip/defaults/collisions.zip_extract/chunks_extract/160-375.gzip_extract/gzip.uncompressed new file mode 100644 index 0000000000..a92657ea1c --- /dev/null +++ b/tests/files/suffixes/__outputs__/collisions.zip/defaults/collisions.zip_extract/chunks_extract/160-375.gzip_extract/gzip.uncompressed @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a91b12613458d22a3fbf6d66ff2fba42124b46ea0e032f68c3564cc0c506ec43 +size 404 diff --git a/tests/files/suffixes/__outputs__/collisions.zip/defaults/collisions.zip_extract/chunks_extract/375-512.padding b/tests/files/suffixes/__outputs__/collisions.zip/defaults/collisions.zip_extract/chunks_extract/375-512.padding new file mode 100644 index 0000000000..dd4e08a3fc --- /dev/null +++ b/tests/files/suffixes/__outputs__/collisions.zip/defaults/collisions.zip_extract/chunks_extract/375-512.padding @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4a93894f08d98d707cd9a0274f4c9a51bcfa27e701359e12befcc78ffb488817 +size 137 diff --git a/tests/files/suffixes/chunks b/tests/files/suffixes/chunks new file mode 100644 index 0000000000..28099296da --- /dev/null +++ b/tests/files/suffixes/chunks @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1e33039682514a23548f4c6f9b7ed2af6dfbfca8d921fdf7845b0ef7255725b4 +size 512 diff --git a/tests/integration/archive/ar/__output__/offset_malformed.ar_extract/.gitkeep b/tests/integration/archive/rar/password/__output__/.gitkeep similarity index 100% rename from tests/integration/archive/ar/__output__/offset_malformed.ar_extract/.gitkeep rename to tests/integration/archive/rar/password/__output__/.gitkeep diff --git a/tests/integration/archive/rar/password/__output__/cherry_password.rar_extract/.gitkeep b/tests/integration/archive/zip/encrypted/__output__/.gitkeep similarity index 100% rename from tests/integration/archive/rar/password/__output__/cherry_password.rar_extract/.gitkeep rename to tests/integration/archive/zip/encrypted/__output__/.gitkeep diff --git a/tests/integration/archive/zip/encrypted/__output__/apple_encrypted.zip_extract/.gitkeep b/tests/integration/archive/zip/encrypted/__output__/apple_encrypted.zip_extract/.gitkeep deleted file mode 100644 index e69de29bb2..0000000000 diff --git a/tests/integration/archive/tar/__output__/truncated-header.tar_extract/.gitkeep b/tests/integration/archive/zip/partly_encrypted/__output__/.gitkeep similarity index 100% rename from tests/integration/archive/tar/__output__/truncated-header.tar_extract/.gitkeep rename to tests/integration/archive/zip/partly_encrypted/__output__/.gitkeep diff --git a/tests/integration/archive/zip/partly_encrypted/__output__/kaki1_aes.zip_extract/.gitkeep b/tests/integration/archive/zip/partly_encrypted/__output__/kaki1_aes.zip_extract/.gitkeep deleted file mode 100644 index e69de29bb2..0000000000 diff --git a/tests/integration/compression/lzma/__output__/lorem.txt.small_size.lzma_extract/.gitkeep b/tests/integration/compression/lzma/__output__/lorem.txt.small_size.lzma_extract/.gitkeep deleted file mode 100644 index e69de29bb2..0000000000 diff --git a/tests/integration/compression/lzma/__output__/lorem.txt.smaller_than_compressed.lzma_extract/.gitkeep b/tests/integration/compression/lzma/__output__/lorem.txt.smaller_than_compressed.lzma_extract/.gitkeep deleted file mode 100644 index e69de29bb2..0000000000 diff --git a/tests/integration/compression/zstd/__output__/lorem.truncated.zstd_extract/.gitkeep b/tests/integration/compression/zstd/__output__/lorem.truncated.zstd_extract/.gitkeep deleted file mode 100644 index e69de29bb2..0000000000 diff --git a/tests/integration/compression/zstd/__output__/truncated.dos.zstd_extract/.gitkeep b/tests/integration/compression/zstd/__output__/truncated.dos.zstd_extract/.gitkeep deleted file mode 100644 index e69de29bb2..0000000000 diff --git a/tests/integration/executable/elf/elf32/__output__/sample_32_prg_end.elf_extract/.gitkeep b/tests/integration/executable/elf/elf32/__output__/sample_32_prg_end.elf_extract/.gitkeep deleted file mode 100644 index e69de29bb2..0000000000 diff --git a/tests/integration/executable/elf/elf64/__output__/hellomod.sha1.ko_extract/.gitkeep b/tests/integration/executable/elf/elf64/__output__/hellomod.sha1.ko_extract/.gitkeep deleted file mode 100644 index e69de29bb2..0000000000 diff --git a/tests/integration/executable/elf/elf64/__output__/hellomod.sha1.nonsigned.ko_extract/.gitkeep b/tests/integration/executable/elf/elf64/__output__/hellomod.sha1.nonsigned.ko_extract/.gitkeep deleted file mode 100644 index e69de29bb2..0000000000 diff --git a/tests/integration/executable/elf/elf64/__output__/hellomod.sha256.ko_extract/.gitkeep b/tests/integration/executable/elf/elf64/__output__/hellomod.sha256.ko_extract/.gitkeep deleted file mode 100644 index e69de29bb2..0000000000 diff --git a/tests/integration/executable/elf/elf64/__output__/hellomod.sha384.ko_extract/.gitkeep b/tests/integration/executable/elf/elf64/__output__/hellomod.sha384.ko_extract/.gitkeep deleted file mode 100644 index e69de29bb2..0000000000 diff --git a/tests/integration/executable/elf/elf64/__output__/hellomod.sha512.ko_extract/.gitkeep b/tests/integration/executable/elf/elf64/__output__/hellomod.sha512.ko_extract/.gitkeep deleted file mode 100644 index e69de29bb2..0000000000 diff --git a/tests/test_cli.py b/tests/test_cli.py index 9f11d3bbf4..44f0ebdf79 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -314,7 +314,7 @@ def test_keep_extracted_chunks( [ pytest.param([], 5, id="skip-extension-empty"), pytest.param([""], 5, id="skip-zip-extension-empty-suffix"), - pytest.param([".zip"], 1, id="skip-extension-zip"), + pytest.param([".zip"], 0, id="skip-extension-zip"), pytest.param([".rlib"], 5, id="skip-extension-rlib"), ], ) diff --git a/tests/test_processing.py b/tests/test_processing.py index d1534a8db3..bd1c82e2c6 100644 --- a/tests/test_processing.py +++ b/tests/test_processing.py @@ -34,11 +34,11 @@ ) from unblob.report import ( ChunkReport, - ExtractDirectoryExistsReport, FileMagicReport, HashReport, MultiFileCollisionReport, MultiFileReport, + OutputDirectoryExistsReport, RandomnessMeasurements, RandomnessReport, StatReport, @@ -350,7 +350,7 @@ def test_process_file_prevents_double_extracts(tmp_path: Path, fw: Path): # we expect exactly 1 problem reported, related to the extraction of "internal.zip" [report] = process_result.errors - assert isinstance(report, ExtractDirectoryExistsReport) + assert isinstance(report, OutputDirectoryExistsReport) assert report.path.name == "internal.zip_extract" # the rest should be the same, except that the extraction is shifted with one extra directory @@ -819,7 +819,7 @@ def test_multi_file_extract_dir( multi_file_reports = task_result_by_path[directory].filter_reports(MultiFileReport) assert multi_file_reports assert any( - isinstance(report, ExtractDirectoryExistsReport) + isinstance(report, OutputDirectoryExistsReport) for report in multi_file_reports[0].extraction_reports ) diff --git a/tests/test_processing_suffixes.py b/tests/test_processing_suffixes.py new file mode 100644 index 0000000000..d3a0e017fa --- /dev/null +++ b/tests/test_processing_suffixes.py @@ -0,0 +1,100 @@ +from pathlib import Path + +import pytest + +from unblob.processing import ExtractionConfig, process_file +from unblob.report import OutputDirectoryExistsReport +from unblob.testing import check_output_is_the_same + +TEST_DATA_PATH = Path(__file__).parent / "files/suffixes" + + +def _patch(extraction_config: ExtractionConfig, carve_suffix: str, extract_suffix: str): + extraction_config.keep_extracted_chunks = False + extraction_config.carve_suffix = carve_suffix + extraction_config.extract_suffix = extract_suffix + + +@pytest.mark.parametrize( + "carve_suffix,extract_suffix,output_root_dir_name", + [ + ("_extract", "_extract", "defaults"), + ("_c", "_e", "_c_e"), + ("_carve", "_extract", "_carve_extract"), + ], +) +def test_top_level_carve( + carve_suffix: str, + extract_suffix: str, + output_root_dir_name: str, + extraction_config: ExtractionConfig, +): + _patch(extraction_config, carve_suffix, extract_suffix) + input_file = TEST_DATA_PATH / "__input__/chunks" + carve_dir_name = input_file.name + extraction_config.carve_suffix + extract_dir_name = input_file.name + extraction_config.extract_suffix + expected_output_dir = TEST_DATA_PATH / "__outputs__/chunks" / output_root_dir_name + + reports = process_file(extraction_config, input_file) + + assert reports.errors == [] + + assert ( + carve_dir_name == extract_dir_name + or not (extraction_config.extract_root / extract_dir_name).exists() + ) + check_output_is_the_same(expected_output_dir, extraction_config.extract_root) + + +EXPECTED_COLLISION_PATHS: "dict[tuple[str, str], set]" = { + ("_extract", "_extract"): { + "collisions.zip_extract/chunks_carve/0-160.gzip_extract", + }, + ("_carve", "_extract"): { + "collisions.zip_extract/chunks_carve", + "collisions.zip_extract/chunks_carve/0-160.gzip_extract", + }, +} + + +@pytest.mark.parametrize( + "carve_suffix,extract_suffix,output_root_dir_name", + [ + ("_extract", "_extract", "defaults"), + ("_c", "_e", "_c_e"), + ("_carve", "_extract", "_carve_extract"), + ], +) +def test_top_level_extract_and_collisions( + carve_suffix: str, + extract_suffix: str, + output_root_dir_name: str, + extraction_config: ExtractionConfig, +): + _patch(extraction_config, carve_suffix, extract_suffix) + input_file = TEST_DATA_PATH / "__input__/collisions.zip" + carve_dir_name = input_file.name + extraction_config.carve_suffix + extract_dir_name = input_file.name + extraction_config.extract_suffix + expected_output_dir = ( + TEST_DATA_PATH / "__outputs__/collisions.zip" / output_root_dir_name + ) + + reports = process_file(extraction_config, input_file) + + # check collision problems - the input was prepared to have collisions + # during both the carving and extracting phases + problem_paths = { + e.path.relative_to(extraction_config.extract_root).as_posix() + for e in reports.errors + if isinstance(e, OutputDirectoryExistsReport) + } + key = (carve_suffix, extract_suffix) + assert problem_paths == EXPECTED_COLLISION_PATHS.get(key, set()) + # we expect only OutputDirectoryExistsReport-s + assert len(reports.errors) == len(problem_paths) + + assert ( + carve_dir_name == extract_dir_name + or not (extraction_config.extract_root / carve_dir_name).exists() + ) + check_output_is_the_same(expected_output_dir, extraction_config.extract_root) diff --git a/tests/test_report.py b/tests/test_report.py index 8cc16565f1..f7f2772839 100644 --- a/tests/test_report.py +++ b/tests/test_report.py @@ -10,6 +10,7 @@ from unblob.models import ProcessResult, Task, TaskResult from unblob.processing import ExtractionConfig, process_file from unblob.report import ( + CarveDirectoryReport, ChunkReport, FileMagicReport, HashReport, @@ -120,6 +121,7 @@ def hello_kitty_task_results( sha1="febca6ed75dc02e0def065e7b08f1cca87b57c74", sha256="144d8b2c949cb4943128aa0081153bcba4f38eb0ba26119cc06ca1563c4999e1", ), + CarveDirectoryReport(carve_dir=extract_root / "hello_kitty_extract"), UnknownChunkReport( id=ANY, start_offset=0, diff --git a/unblob/cli.py b/unblob/cli.py index 72c7d81fd6..fb48356327 100755 --- a/unblob/cli.py +++ b/unblob/cli.py @@ -234,6 +234,21 @@ def __init__( show_default=True, help="Keep extracted chunks", ) +@click.option( + "--carve-suffix", + "carve_suffix", + default="_extract", + show_default=True, + help="""Carve directory name is source file + this suffix. + NOTE: carving is skipped when the whole file is of a known type""", +) +@click.option( + "--extract-suffix", + "extract_suffix", + default="_extract", + show_default=True, + help="Extraction directory name is source file + this suffix", +) @verbosity_option @click.option( "--show-external-dependencies", @@ -250,19 +265,22 @@ def __init__( expose_value=False, ) def cli( + *, file: Path, extract_root: Path, report_file: Optional[Path], log_path: Path, - force: bool, # noqa: FBT001 + force: bool, process_num: int, depth: int, randomness_depth: int, skip_magic: Iterable[str], skip_extension: Iterable[str], - clear_skip_magics: bool, # noqa: FBT001 - skip_extraction: bool, # noqa: FBT001 - keep_extracted_chunks: bool, # noqa: FBT001 + clear_skip_magics: bool, + skip_extraction: bool, + keep_extracted_chunks: bool, + carve_suffix: str, + extract_suffix: str, handlers: Handlers, dir_handlers: DirectoryHandlers, plugins_path: Optional[Path], @@ -294,6 +312,8 @@ def cli( handlers=handlers, dir_handlers=dir_handlers, keep_extracted_chunks=keep_extracted_chunks, + extract_suffix=extract_suffix, + carve_suffix=carve_suffix, verbose=verbose, progress_reporter=NullProgressReporter if verbose @@ -424,24 +444,14 @@ def print_report(reports: ProcessResult): total_files, total_dirs, total_links, extracted_size = get_size_report( reports.results ) - chunks_distribution = get_chunks_distribution(reports.results) - - valid_size = 0 - total_size = 0 - for handler, size in chunks_distribution.items(): - if handler != "unknown": - valid_size += size - total_size += size - - if total_size == 0: - return summary = Panel( - f"""Extracted files: [#00FFC8]{total_files}[/#00FFC8] + f"""\ +Output path: [#00FFC8]{reports.get_output_dir()}[/#00FFC8] +Extracted files: [#00FFC8]{total_files}[/#00FFC8] Extracted directories: [#00FFC8]{total_dirs}[/#00FFC8] Extracted links: [#00FFC8]{total_links}[/#00FFC8] -Extraction directory size: [#00FFC8]{human_size(extracted_size)}[/#00FFC8] -Chunks identification ratio: [#00FFC8]{(valid_size/total_size) * 100:0.2f}%[/#00FFC8]""", +Extraction directory size: [#00FFC8]{human_size(extracted_size)}[/#00FFC8]""", subtitle="Summary", title=f"unblob ({get_version()})", ) @@ -449,20 +459,33 @@ def print_report(reports: ProcessResult): console = Console() console.print(summary) + chunks_distribution = get_chunks_distribution(reports.results) + + valid_size = 0 + total_size = 0 + for handler, size in chunks_distribution.items(): + if handler != "unknown": + valid_size += size + total_size += size + chunks_table = Table(title="Chunks distribution") chunks_table.add_column("Chunk type", justify="left", style="#00FFC8", no_wrap=True) chunks_table.add_column("Size", justify="center", style="#00FFC8", no_wrap=True) chunks_table.add_column("Ratio", justify="center", style="#00FFC8", no_wrap=True) - for handler, size in sorted( - chunks_distribution.items(), key=lambda item: item[1], reverse=True - ): - chunks_table.add_row( - handler.upper(), human_size(size), f"{(size/total_size) * 100:0.2f}%" + if total_size: + for handler, size in sorted( + chunks_distribution.items(), key=lambda item: item[1], reverse=True + ): + chunks_table.add_row( + handler.upper(), human_size(size), f"{(size/total_size) * 100:0.2f}%" + ) + + console.print(chunks_table) + console.print( + f"Chunk identification ratio: [#00FFC8]{(valid_size/total_size) * 100:0.2f}%[/#00FFC8]" ) - console.print(chunks_table) - if len(reports.errors): errors_table = Table(title="Encountered errors") errors_table.add_column("Severity", justify="left", style="cyan", no_wrap=True) diff --git a/unblob/models.py b/unblob/models.py index 52e5afcc8e..ec54728b07 100644 --- a/unblob/models.py +++ b/unblob/models.py @@ -13,6 +13,7 @@ from .identifiers import new_id from .parser import hexstring2regex from .report import ( + CarveDirectoryReport, ChunkReport, ErrorReport, MultiFileReport, @@ -238,6 +239,20 @@ def register(self, result: TaskResult): def to_json(self, indent=" "): return to_json(self.results, indent=indent) + def get_output_dir(self) -> Optional[Path]: + try: + top_result = self.results[0] + if carves := top_result.filter_reports(CarveDirectoryReport): + # we have a top level carve + return carves[0].carve_dir + + # we either have an extraction, + # and the extract directory registered as subtask + return top_result.subtasks[0].path + except IndexError: + # or no extraction + return None + class _JSONEncoder(json.JSONEncoder): def default(self, obj): diff --git a/unblob/processing.py b/unblob/processing.py index 393807842d..b759b55159 100644 --- a/unblob/processing.py +++ b/unblob/processing.py @@ -34,10 +34,11 @@ from .pool import make_pool from .report import ( CalculateMultiFileExceptionReport, - ExtractDirectoryExistsReport, + CarveDirectoryReport, FileMagicReport, HashReport, MultiFileCollisionReport, + OutputDirectoryExistsReport, RandomnessMeasurements, RandomnessReport, Report, @@ -95,21 +96,26 @@ class ExtractionConfig: process_num: int = DEFAULT_PROCESS_NUM keep_extracted_chunks: bool = False extract_suffix: str = "_extract" + carve_suffix: str = "_extract" handlers: Handlers = BUILTIN_HANDLERS dir_handlers: DirectoryHandlers = BUILTIN_DIR_HANDLERS verbose: int = 1 progress_reporter: Type[ProgressReporter] = NullProgressReporter - def get_extract_dir_for(self, path: Path) -> Path: - """Return extraction dir under root with the name of path.""" + def _get_output_path(self, path: Path) -> Path: + """Return path under extract root.""" try: relative_path = path.relative_to(self.extract_root) except ValueError: # path is not inside root, i.e. it is an input file relative_path = Path(path.name) - extract_name = path.name + self.extract_suffix - extract_dir = self.extract_root / relative_path.with_name(extract_name) - return extract_dir.expanduser().resolve() + return (self.extract_root / relative_path).expanduser().resolve() + + def get_extract_dir_for(self, path: Path) -> Path: + return self._get_output_path(path.with_name(path.name + self.extract_suffix)) + + def get_carve_dir_for(self, path: Path) -> Path: + return self._get_output_path(path.with_name(path.name + self.carve_suffix)) @terminate_gracefully @@ -130,6 +136,11 @@ def process_file( logger.info("Removing extract dir", path=extract_dir) shutil.rmtree(extract_dir) + carve_dir = config.get_carve_dir_for(input_path) + if config.force_extract and carve_dir.exists(): + logger.info("Removing carve dir", path=carve_dir) + shutil.rmtree(carve_dir) + if not prepare_report_file(config, report_file): logger.error( "File not processed, as report could not be written", file=input_path @@ -138,10 +149,6 @@ def process_file( process_result = _process_task(config, task) - if not config.skip_extraction: - # ensure that the root extraction directory is created even for empty extractions - extract_dir.mkdir(parents=True, exist_ok=True) - if report_file: write_json_report(report_file, process_result) @@ -415,7 +422,7 @@ def _extract_multi_file(self, multi_file: MultiFile) -> Path: raise DirectoryProcessingError( "Skipped: extraction directory exists", report=multi_file.as_report( - [ExtractDirectoryExistsReport(path=extract_dir)] + [OutputDirectoryExistsReport(path=extract_dir)] ), ) @@ -496,24 +503,9 @@ def __init__( self.size = size self.result = result - self.carve_dir = config.get_extract_dir_for(self.task.path) - def process(self): logger.debug("Processing file", path=self.task.path, size=self.size) - if self.carve_dir.exists() and not self.config.skip_extraction: - # Extraction directory is not supposed to exist, it is usually a simple mistake of running - # unblob again without cleaning up or using --force. - # It would cause problems continuing, as it would mix up original and extracted files, - # and it would just introduce weird, non-deterministic problems due to interference on paths - # by multiple workers (parallel processing, modifying content (fix_symlink), - # and `mmap` + open for write with O_TRUNC). - logger.error( - "Skipped: extraction directory exists", extract_dir=self.carve_dir - ) - self.result.add_report(ExtractDirectoryExistsReport(path=self.carve_dir)) - return - with File.from_path(self.task.path) as file: all_chunks = search_chunks( file, self.size, self.config.handlers, self.result @@ -549,13 +541,58 @@ def _process_chunks( self.result.add_report(chunk.as_report(extraction_reports=[])) return + is_whole_file_chunk = len(outer_chunks) + len(unknown_chunks) == 1 + if is_whole_file_chunk: + # skip carving, extract directly the whole file (chunk) + carved_path = self.task.path + for chunk in outer_chunks: + self._extract_chunk( + carved_path, + chunk, + self.config.get_extract_dir_for(carved_path), + # since we do not carve, we want to keep the input around + remove_extracted_input=False, + ) + else: + self._carve_then_extract_chunks(file, outer_chunks, unknown_chunks) + + def _carve_then_extract_chunks(self, file, outer_chunks, unknown_chunks): + assert not self.config.skip_extraction + + carve_dir = self.config.get_carve_dir_for(self.task.path) + + # report the technical carve directory explicitly + self.result.add_report(CarveDirectoryReport(carve_dir=carve_dir)) + + if carve_dir.exists(): + # Carve directory is not supposed to exist, it is usually a simple mistake of running + # unblob again without cleaning up or using --force. + # It would cause problems continuing, as it would mix up original and extracted files, + # and it would just introduce weird, non-deterministic problems due to interference on paths + # by multiple workers (parallel processing, modifying content (fix_symlink), + # and `mmap` + open for write with O_TRUNC). + logger.error("Skipped: carve directory exists", carve_dir=carve_dir) + self.result.add_report(OutputDirectoryExistsReport(path=carve_dir)) + return + for chunk in unknown_chunks: - carved_unknown_path = carve_unknown_chunk(self.carve_dir, file, chunk) + carved_unknown_path = carve_unknown_chunk(carve_dir, file, chunk) randomness = self._calculate_randomness(carved_unknown_path) self.result.add_report(chunk.as_report(randomness=randomness)) for chunk in outer_chunks: - self._extract_chunk(file, chunk) + carved_path = carve_valid_chunk(carve_dir, file, chunk) + + self._extract_chunk( + carved_path, + chunk, + self.config.get_extract_dir_for(carved_path), + # when a carved chunk is successfully extracted, usually + # we want to get rid of it, as its data is available in + # extracted format, and the raw data is still part of + # the file the chunk belongs to + remove_extracted_input=not self.config.keep_extracted_chunks, + ) def _calculate_randomness(self, path: Path) -> Optional[RandomnessReport]: if self.task.depth < self.config.randomness_depth: @@ -571,17 +608,14 @@ def _calculate_randomness(self, path: Path) -> Optional[RandomnessReport]: return report return None - def _extract_chunk(self, file, chunk: ValidChunk): # noqa: C901 - skip_carving = chunk.is_whole_file - if skip_carving: - inpath = self.task.path - extract_dir = self.carve_dir - carved_path = None - else: - inpath = carve_valid_chunk(self.carve_dir, file, chunk) - extract_dir = self.carve_dir / (inpath.name + self.config.extract_suffix) - carved_path = inpath - + def _extract_chunk( + self, + carved_path: Path, + chunk: ValidChunk, + extract_dir: Path, + *, + remove_extracted_input: bool, + ): if extract_dir.exists(): # Extraction directory is not supposed to exist, it mixes up original and extracted files, # and it would just introduce weird, non-deterministic problems due to interference on paths @@ -593,7 +627,7 @@ def _extract_chunk(self, file, chunk: ValidChunk): # noqa: C901 chunk=chunk, ) self.result.add_report( - chunk.as_report([ExtractDirectoryExistsReport(path=extract_dir)]) + chunk.as_report([OutputDirectoryExistsReport(path=extract_dir)]) ) return @@ -603,10 +637,10 @@ def _extract_chunk(self, file, chunk: ValidChunk): # noqa: C901 extraction_reports = [] try: - if result := chunk.extract(inpath, extract_dir): + if result := chunk.extract(carved_path, extract_dir): extraction_reports.extend(result.reports) - if carved_path and not self.config.keep_extracted_chunks: + if remove_extracted_input: logger.debug("Removing extracted chunk", path=carved_path) carved_path.unlink() diff --git a/unblob/report.py b/unblob/report.py index 4c38abee4a..6459fe7788 100644 --- a/unblob/report.py +++ b/unblob/report.py @@ -90,7 +90,7 @@ class ExtractCommandFailedReport(ErrorReport): @attr.define(kw_only=True, frozen=True) -class ExtractDirectoryExistsReport(ErrorReport): +class OutputDirectoryExistsReport(ErrorReport): severity: Severity = Severity.ERROR path: Path @@ -233,6 +233,11 @@ class UnknownChunkReport(Report): randomness: Optional[RandomnessReport] +@attr.define(kw_only=True, frozen=True) +class CarveDirectoryReport(Report): + carve_dir: Path + + @final @attr.define(kw_only=True, frozen=True) class MultiFileReport(Report):