From 4a238ee3ff9998cd29ce270fc3fb12648425ac31 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Igor=20Anic=CC=81?= Date: Sat, 10 Feb 2024 17:09:18 +0100 Subject: [PATCH] fix two big bugs exposed by fuzzing tests 1. Reading distance code while parsing fixed block as number insted as code. Codes has to be read MSB first, so with @bitReverse. 2. Reporting EndOfStream in bit reader by checking wrong variable. Big thanks to @squeek502 for finding this. Fixes: #8 --- bin/roundtrip.zig | 34 +++++++++++++++ build.zig | 1 + src/bit_reader.zig | 3 +- src/deflate.zig | 8 ++++ src/inflate.zig | 17 ++++---- src/root.zig | 73 +++++++++++++++++++++++--------- src/testdata/fuzzing/roundtrip1 | Bin 0 -> 370 bytes src/testdata/fuzzing/roundtrip2 | Bin 0 -> 371 bytes 8 files changed, 105 insertions(+), 31 deletions(-) create mode 100644 bin/roundtrip.zig create mode 100644 src/testdata/fuzzing/roundtrip1 create mode 100644 src/testdata/fuzzing/roundtrip2 diff --git a/bin/roundtrip.zig b/bin/roundtrip.zig new file mode 100644 index 0000000..74da2d3 --- /dev/null +++ b/bin/roundtrip.zig @@ -0,0 +1,34 @@ +const std = @import("std"); +const flate = @import("flate"); + +pub fn main() !void { + var gpa = std.heap.GeneralPurposeAllocator(.{}){}; + defer std.debug.assert(gpa.deinit() == .ok); + const allocator = gpa.allocator(); + + // Read the data from stdin + const stdin = std.io.getStdIn(); + const data = try stdin.readToEndAlloc(allocator, std.math.maxInt(usize)); + defer allocator.free(data); + + var fbs = std.io.fixedBufferStream(data); + const reader = fbs.reader(); + + // Compress the data + var buf = std.ArrayList(u8).init(allocator); + defer buf.deinit(); + // TODO: vary the level + try flate.raw.compress(reader, buf.writer(), .default); + + // Now try to decompress it + var buf_fbs = std.io.fixedBufferStream(buf.items); + var inflate = flate.raw.decompressor(buf_fbs.reader()); + const inflated = inflate.reader().readAllAlloc(allocator, std.math.maxInt(usize)) catch |err| { + std.debug.print("{}\n", .{err}); + return; + }; + defer allocator.free(inflated); + + std.debug.print("OK {d}\n", .{data.len}); + try std.testing.expectEqualSlices(u8, data, inflated); +} diff --git a/build.zig b/build.zig index 690872f..43f5dba 100644 --- a/build.zig +++ b/build.zig @@ -50,6 +50,7 @@ pub fn build(b: *std.Build) void { .{ .name = "gzip", .src = "bin/gzip.zig" }, .{ .name = "gunzip", .src = "bin/gunzip.zig" }, .{ .name = "decompress", .src = "bin/decompress.zig" }, + .{ .name = "roundtrip", .src = "bin/roundtrip.zig" }, }; for (binaries) |i| { const bin = b.addExecutable(.{ diff --git a/src/bit_reader.zig b/src/bit_reader.zig index 81f058d..91d01a1 100644 --- a/src/bit_reader.zig +++ b/src/bit_reader.zig @@ -63,9 +63,8 @@ pub fn BitReader(comptime ReaderType: type) type { return; } - if (self.bits == 0) { + if (self.nbits == 0) return error.EndOfStream; - } } // Read exactly buf.len bytes into buf. diff --git a/src/deflate.zig b/src/deflate.zig index ff0a4df..e36e95a 100644 --- a/src/deflate.zig +++ b/src/deflate.zig @@ -620,6 +620,14 @@ test "deflate file tokenization" { .data = @embedFile("testdata/huffman-text.input"), .tokens_count = .{ 235, 234, 234, 234, 234, 234 }, }, + .{ + .data = @embedFile("testdata/fuzzing/roundtrip1"), + .tokens_count = .{ 333, 331, 331, 331, 331, 331 }, + }, + .{ + .data = @embedFile("testdata/fuzzing/roundtrip2"), + .tokens_count = .{ 334, 334, 334, 334, 334, 334 }, + }, }; for (cases) |case| { // for each case diff --git a/src/inflate.zig b/src/inflate.zig index 1267716..7374e48 100644 --- a/src/inflate.zig +++ b/src/inflate.zig @@ -118,7 +118,7 @@ pub fn Inflate(comptime container: Container, comptime ReaderType: type) type { fn fixedDistanceCode(self: *Self, code: u8) !void { try self.bits.fill(5 + 5 + 13); const length = try self.decodeLength(code); - const distance = try self.decodeDistance(try self.bits.readF(u5, F.buffered)); + const distance = try self.decodeDistance(try self.bits.readF(u5, F.buffered | F.reverse)); try self.hist.writeMatch(length, distance); } @@ -481,13 +481,13 @@ test "fuzzing tests" { err: ?anyerror = null, }{ .{ .input = "deflate-stream", .out = @embedFile("testdata/fuzzing/deflate-stream-out") }, - .{ .input = "empty-distance-alphabet01", .err = error.EndOfStream }, - .{ .input = "empty-distance-alphabet02", .out = "" }, + .{ .input = "empty-distance-alphabet01" }, + .{ .input = "empty-distance-alphabet02" }, .{ .input = "end-of-stream", .err = error.EndOfStream }, - .{ .input = "invalid-distance", .err = error.EndOfStream }, + .{ .input = "invalid-distance", .err = error.InvalidMatch }, .{ .input = "invalid-tree01", .err = error.EndOfStream }, - .{ .input = "invalid-tree02", .err = error.EndOfStream }, - .{ .input = "invalid-tree03", .err = error.EndOfStream }, + .{ .input = "invalid-tree02" }, + .{ .input = "invalid-tree03" }, .{ .input = "lengths-overflow", .err = error.BadDecoderState }, .{ .input = "out-of-codes", .err = error.InvalidCode }, .{ .input = "puff01", .err = error.WrongStoredBlockNlen }, @@ -499,7 +499,7 @@ test "fuzzing tests" { .{ .input = "puff08", .err = error.InvalidCode }, .{ .input = "puff09", .out = "P" }, .{ .input = "puff10", .err = error.InvalidCode }, - .{ .input = "puff11", .err = error.EndOfStream }, + .{ .input = "puff11", .err = error.InvalidMatch }, .{ .input = "puff12", .err = error.EndOfStream }, .{ .input = "puff13", .err = error.InvalidCode }, .{ .input = "puff14", .err = error.EndOfStream }, @@ -512,10 +512,11 @@ test "fuzzing tests" { .{ .input = "fuzz4", .err = error.InvalidCode }, }; - inline for (cases) |c| { + inline for (cases, 0..) |c, case_no| { var in = std.io.fixedBufferStream(@embedFile("testdata/fuzzing/" ++ c.input)); var out = std.ArrayList(u8).init(testing.allocator); defer out.deinit(); + errdefer std.debug.print("test case failed {}\n", .{case_no}); if (c.err) |expected_err| { try testing.expectError(expected_err, decompress(.raw, in.reader(), out.writer())); diff --git a/src/root.zig b/src/root.zig index 3ffd68f..2becdea 100644 --- a/src/root.zig +++ b/src/root.zig @@ -88,6 +88,7 @@ test { const std = @import("std"); const testing = std.testing; +const print = std.debug.print; test "decompress" { const deflate_block = [_]u8{ @@ -176,9 +177,10 @@ test "compress/decompress" { const levels = [_]deflate.Level{ .level_4, .level_5, .level_6, .level_7, .level_8, .level_9 }; const cases = [_]struct { data: []const u8, // uncompressed content - gzip_sizes: [levels.len]usize, // compressed data sizes per level 4-9 - huffman_only_size: usize, - store_size: usize, + // compressed data sizes per level 4-9 + gzip_sizes: [levels.len]usize = [_]usize{0} ** levels.len, + huffman_only_size: usize = 0, + store_size: usize = 0, }{ .{ .data = @embedFile("testdata/rfc1951.txt"), @@ -186,35 +188,47 @@ test "compress/decompress" { .huffman_only_size = 20287, .store_size = 36967, }, + .{ + .data = @embedFile("testdata/fuzzing/roundtrip1"), + .gzip_sizes = [_]usize{ 373, 370, 370, 370, 370, 370 }, + .huffman_only_size = 393, + .store_size = 393, + }, + .{ + .data = @embedFile("testdata/fuzzing/roundtrip2"), + .gzip_sizes = [_]usize{ 373, 373, 373, 373, 373, 373 }, + .huffman_only_size = 394, + .store_size = 394, + }, + .{ + .data = @embedFile("testdata/fuzzing/deflate-stream-out"), + .gzip_sizes = [_]usize{ 351, 347, 347, 347, 347, 347 }, + .huffman_only_size = 498, + .store_size = 747, + }, }; - // helper for printing sizes - // for (cases, 0..) |case, i| { - // const data = case.data; - // std.debug.print("\ncase[{d}]: ", .{i}); - // for (4..10) |ilevel| { - // var original = fixedBufferStream(data); - // var compressed = fixedBufferStream(&cmp_buf); - // try compress(.gzip, original.reader(), compressed.writer(), .{ .level = @enumFromInt(ilevel) }); - // std.debug.print("{d}, ", .{compressed.pos}); - // } - // } - // std.debug.print("\n", .{}); - - for (cases) |case| { // for each case + for (cases, 0..) |case, case_no| { // for each case const data = case.data; for (levels, 0..) |level, i| { // for each compression level - const gzip_size = case.gzip_sizes[i]; inline for (Container.list) |container| { // for each wrapping - const compressed_size = gzip_size - Container.gzip.size() + container.size(); + var compressed_size: usize = if (case.gzip_sizes[i] > 0) + case.gzip_sizes[i] - Container.gzip.size() + container.size() + else + 0; // compress original stream to compressed stream { var original = fixedBufferStream(data); var compressed = fixedBufferStream(&cmp_buf); try deflate.compress(container, original.reader(), compressed.writer(), level); + if (compressed_size == 0) { + if (container == .gzip) + print("case {d} gzip level {} compressed size: {d}\n", .{ case_no, level, compressed.pos }); + compressed_size = compressed.pos; + } try testing.expectEqual(compressed_size, compressed.pos); } // decompress compressed stream to decompressed stream @@ -249,7 +263,10 @@ test "compress/decompress" { // huffman only compression { inline for (Container.list) |container| { // for each wrapping - const compressed_size = case.huffman_only_size - Container.gzip.size() + container.size(); + var compressed_size: usize = if (case.huffman_only_size > 0) + case.huffman_only_size - Container.gzip.size() + container.size() + else + 0; // compress original stream to compressed stream { @@ -258,6 +275,11 @@ test "compress/decompress" { var cmp = try deflate.huffmanCompressor(container, compressed.writer()); try cmp.compress(original.reader()); try cmp.close(); + if (compressed_size == 0) { + if (container == .gzip) + print("case {d} huffman only compressed size: {d}\n", .{ case_no, compressed.pos }); + compressed_size = compressed.pos; + } try testing.expectEqual(compressed_size, compressed.pos); } // decompress compressed stream to decompressed stream @@ -273,7 +295,10 @@ test "compress/decompress" { // store only { inline for (Container.list) |container| { // for each wrapping - const compressed_size = case.store_size - Container.gzip.size() + container.size(); + var compressed_size: usize = if (case.store_size > 0) + case.store_size - Container.gzip.size() + container.size() + else + 0; // compress original stream to compressed stream { @@ -282,6 +307,12 @@ test "compress/decompress" { var cmp = try deflate.storeCompressor(container, compressed.writer()); try cmp.compress(original.reader()); try cmp.close(); + if (compressed_size == 0) { + if (container == .gzip) + print("case {d} store only compressed size: {d}\n", .{ case_no, compressed.pos }); + compressed_size = compressed.pos; + } + try testing.expectEqual(compressed_size, compressed.pos); } // decompress compressed stream to decompressed stream diff --git a/src/testdata/fuzzing/roundtrip1 b/src/testdata/fuzzing/roundtrip1 new file mode 100644 index 0000000000000000000000000000000000000000..4e3353d0fa1310e59fc8407c907a440e7238840f GIT binary patch literal 370 zcmbO_!8ZSpfk4~yPuhNG#5T@tIdb#X+MwNAy#Q8ST${r z&GXN9Kc90GI452aec;BHc`;vEA24z<0D(Zv{^i@1*X_wH{r&OQ%+KqZZ-<7v1it)z z^p?+tNVoU*NKhy;0(Qk;ug6Ueem_iI6W^A2`E?wLoT(B?z$C-w z7DtemfI#6?!YPI13C00DEd1eT>ZNw=Q{DTeG}7UI{mU#r*?H|>q+?Y?n7DLoZnA!P t@6%&qQ8ST${r z&GXN9Kc90GI452aecWJvQ;u#Fm-fCPYQW9Jz{u~M$X(19 zUAF4_m*y77QwgUOk|!7k@UZZQpQ)d^e~b0P`jb)sICI`PCMy`o( zOT7F#jzrE>iJXShMGN&LJlG83rZPZ)fYh#is(ZhbMmpSIcO%PBc3%4z=~xvJCN3SD wo2*~n`}CODcrWQFPHZu{l)1WCd1`@S)TOWwCa1R^oISJUFPrF%zGsW`0a!b)CIA2c literal 0 HcmV?d00001