Skip to content

Commit

Permalink
fix two big bugs exposed by fuzzing tests
Browse files Browse the repository at this point in the history
1. Reading distance code while parsing fixed block as number insted as
code. Codes has to be read MSB first, so with @bitreverse.

2. Reporting EndOfStream in bit reader by checking wrong variable.

Big thanks to @squeek502 for finding this.

Fixes: #8
  • Loading branch information
ianic committed Feb 10, 2024
1 parent 553763e commit 4a238ee
Show file tree
Hide file tree
Showing 8 changed files with 105 additions and 31 deletions.
34 changes: 34 additions & 0 deletions bin/roundtrip.zig
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
const std = @import("std");
const flate = @import("flate");

pub fn main() !void {
var gpa = std.heap.GeneralPurposeAllocator(.{}){};
defer std.debug.assert(gpa.deinit() == .ok);
const allocator = gpa.allocator();

// Read the data from stdin
const stdin = std.io.getStdIn();
const data = try stdin.readToEndAlloc(allocator, std.math.maxInt(usize));
defer allocator.free(data);

var fbs = std.io.fixedBufferStream(data);
const reader = fbs.reader();

// Compress the data
var buf = std.ArrayList(u8).init(allocator);
defer buf.deinit();
// TODO: vary the level
try flate.raw.compress(reader, buf.writer(), .default);

// Now try to decompress it
var buf_fbs = std.io.fixedBufferStream(buf.items);
var inflate = flate.raw.decompressor(buf_fbs.reader());
const inflated = inflate.reader().readAllAlloc(allocator, std.math.maxInt(usize)) catch |err| {
std.debug.print("{}\n", .{err});
return;
};
defer allocator.free(inflated);

std.debug.print("OK {d}\n", .{data.len});
try std.testing.expectEqualSlices(u8, data, inflated);
}
1 change: 1 addition & 0 deletions build.zig
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,7 @@ pub fn build(b: *std.Build) void {
.{ .name = "gzip", .src = "bin/gzip.zig" },
.{ .name = "gunzip", .src = "bin/gunzip.zig" },
.{ .name = "decompress", .src = "bin/decompress.zig" },
.{ .name = "roundtrip", .src = "bin/roundtrip.zig" },
};
for (binaries) |i| {
const bin = b.addExecutable(.{
Expand Down
3 changes: 1 addition & 2 deletions src/bit_reader.zig
Original file line number Diff line number Diff line change
Expand Up @@ -63,9 +63,8 @@ pub fn BitReader(comptime ReaderType: type) type {
return;
}

if (self.bits == 0) {
if (self.nbits == 0)
return error.EndOfStream;
}
}

// Read exactly buf.len bytes into buf.
Expand Down
8 changes: 8 additions & 0 deletions src/deflate.zig
Original file line number Diff line number Diff line change
Expand Up @@ -620,6 +620,14 @@ test "deflate file tokenization" {
.data = @embedFile("testdata/huffman-text.input"),
.tokens_count = .{ 235, 234, 234, 234, 234, 234 },
},
.{
.data = @embedFile("testdata/fuzzing/roundtrip1"),
.tokens_count = .{ 333, 331, 331, 331, 331, 331 },
},
.{
.data = @embedFile("testdata/fuzzing/roundtrip2"),
.tokens_count = .{ 334, 334, 334, 334, 334, 334 },
},
};

for (cases) |case| { // for each case
Expand Down
17 changes: 9 additions & 8 deletions src/inflate.zig
Original file line number Diff line number Diff line change
Expand Up @@ -118,7 +118,7 @@ pub fn Inflate(comptime container: Container, comptime ReaderType: type) type {
fn fixedDistanceCode(self: *Self, code: u8) !void {
try self.bits.fill(5 + 5 + 13);
const length = try self.decodeLength(code);
const distance = try self.decodeDistance(try self.bits.readF(u5, F.buffered));
const distance = try self.decodeDistance(try self.bits.readF(u5, F.buffered | F.reverse));
try self.hist.writeMatch(length, distance);
}

Expand Down Expand Up @@ -481,13 +481,13 @@ test "fuzzing tests" {
err: ?anyerror = null,
}{
.{ .input = "deflate-stream", .out = @embedFile("testdata/fuzzing/deflate-stream-out") },
.{ .input = "empty-distance-alphabet01", .err = error.EndOfStream },
.{ .input = "empty-distance-alphabet02", .out = "" },
.{ .input = "empty-distance-alphabet01" },
.{ .input = "empty-distance-alphabet02" },
.{ .input = "end-of-stream", .err = error.EndOfStream },
.{ .input = "invalid-distance", .err = error.EndOfStream },
.{ .input = "invalid-distance", .err = error.InvalidMatch },
.{ .input = "invalid-tree01", .err = error.EndOfStream },
.{ .input = "invalid-tree02", .err = error.EndOfStream },
.{ .input = "invalid-tree03", .err = error.EndOfStream },
.{ .input = "invalid-tree02" },
.{ .input = "invalid-tree03" },
.{ .input = "lengths-overflow", .err = error.BadDecoderState },
.{ .input = "out-of-codes", .err = error.InvalidCode },
.{ .input = "puff01", .err = error.WrongStoredBlockNlen },
Expand All @@ -499,7 +499,7 @@ test "fuzzing tests" {
.{ .input = "puff08", .err = error.InvalidCode },
.{ .input = "puff09", .out = "P" },
.{ .input = "puff10", .err = error.InvalidCode },
.{ .input = "puff11", .err = error.EndOfStream },
.{ .input = "puff11", .err = error.InvalidMatch },
.{ .input = "puff12", .err = error.EndOfStream },
.{ .input = "puff13", .err = error.InvalidCode },
.{ .input = "puff14", .err = error.EndOfStream },
Expand All @@ -512,10 +512,11 @@ test "fuzzing tests" {
.{ .input = "fuzz4", .err = error.InvalidCode },
};

inline for (cases) |c| {
inline for (cases, 0..) |c, case_no| {
var in = std.io.fixedBufferStream(@embedFile("testdata/fuzzing/" ++ c.input));
var out = std.ArrayList(u8).init(testing.allocator);
defer out.deinit();
errdefer std.debug.print("test case failed {}\n", .{case_no});

if (c.err) |expected_err| {
try testing.expectError(expected_err, decompress(.raw, in.reader(), out.writer()));
Expand Down
73 changes: 52 additions & 21 deletions src/root.zig
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,7 @@ test {

const std = @import("std");
const testing = std.testing;
const print = std.debug.print;

test "decompress" {
const deflate_block = [_]u8{
Expand Down Expand Up @@ -176,45 +177,58 @@ test "compress/decompress" {
const levels = [_]deflate.Level{ .level_4, .level_5, .level_6, .level_7, .level_8, .level_9 };
const cases = [_]struct {
data: []const u8, // uncompressed content
gzip_sizes: [levels.len]usize, // compressed data sizes per level 4-9
huffman_only_size: usize,
store_size: usize,
// compressed data sizes per level 4-9
gzip_sizes: [levels.len]usize = [_]usize{0} ** levels.len,
huffman_only_size: usize = 0,
store_size: usize = 0,
}{
.{
.data = @embedFile("testdata/rfc1951.txt"),
.gzip_sizes = [_]usize{ 11513, 11217, 11139, 11126, 11122, 11119 },
.huffman_only_size = 20287,
.store_size = 36967,
},
.{
.data = @embedFile("testdata/fuzzing/roundtrip1"),
.gzip_sizes = [_]usize{ 373, 370, 370, 370, 370, 370 },
.huffman_only_size = 393,
.store_size = 393,
},
.{
.data = @embedFile("testdata/fuzzing/roundtrip2"),
.gzip_sizes = [_]usize{ 373, 373, 373, 373, 373, 373 },
.huffman_only_size = 394,
.store_size = 394,
},
.{
.data = @embedFile("testdata/fuzzing/deflate-stream-out"),
.gzip_sizes = [_]usize{ 351, 347, 347, 347, 347, 347 },
.huffman_only_size = 498,
.store_size = 747,
},
};

// helper for printing sizes
// for (cases, 0..) |case, i| {
// const data = case.data;
// std.debug.print("\ncase[{d}]: ", .{i});
// for (4..10) |ilevel| {
// var original = fixedBufferStream(data);
// var compressed = fixedBufferStream(&cmp_buf);
// try compress(.gzip, original.reader(), compressed.writer(), .{ .level = @enumFromInt(ilevel) });
// std.debug.print("{d}, ", .{compressed.pos});
// }
// }
// std.debug.print("\n", .{});

for (cases) |case| { // for each case
for (cases, 0..) |case, case_no| { // for each case
const data = case.data;

for (levels, 0..) |level, i| { // for each compression level
const gzip_size = case.gzip_sizes[i];

inline for (Container.list) |container| { // for each wrapping
const compressed_size = gzip_size - Container.gzip.size() + container.size();
var compressed_size: usize = if (case.gzip_sizes[i] > 0)
case.gzip_sizes[i] - Container.gzip.size() + container.size()
else
0;

// compress original stream to compressed stream
{
var original = fixedBufferStream(data);
var compressed = fixedBufferStream(&cmp_buf);
try deflate.compress(container, original.reader(), compressed.writer(), level);
if (compressed_size == 0) {
if (container == .gzip)
print("case {d} gzip level {} compressed size: {d}\n", .{ case_no, level, compressed.pos });
compressed_size = compressed.pos;
}
try testing.expectEqual(compressed_size, compressed.pos);
}
// decompress compressed stream to decompressed stream
Expand Down Expand Up @@ -249,7 +263,10 @@ test "compress/decompress" {
// huffman only compression
{
inline for (Container.list) |container| { // for each wrapping
const compressed_size = case.huffman_only_size - Container.gzip.size() + container.size();
var compressed_size: usize = if (case.huffman_only_size > 0)
case.huffman_only_size - Container.gzip.size() + container.size()
else
0;

// compress original stream to compressed stream
{
Expand All @@ -258,6 +275,11 @@ test "compress/decompress" {
var cmp = try deflate.huffmanCompressor(container, compressed.writer());
try cmp.compress(original.reader());
try cmp.close();
if (compressed_size == 0) {
if (container == .gzip)
print("case {d} huffman only compressed size: {d}\n", .{ case_no, compressed.pos });
compressed_size = compressed.pos;
}
try testing.expectEqual(compressed_size, compressed.pos);
}
// decompress compressed stream to decompressed stream
Expand All @@ -273,7 +295,10 @@ test "compress/decompress" {
// store only
{
inline for (Container.list) |container| { // for each wrapping
const compressed_size = case.store_size - Container.gzip.size() + container.size();
var compressed_size: usize = if (case.store_size > 0)
case.store_size - Container.gzip.size() + container.size()
else
0;

// compress original stream to compressed stream
{
Expand All @@ -282,6 +307,12 @@ test "compress/decompress" {
var cmp = try deflate.storeCompressor(container, compressed.writer());
try cmp.compress(original.reader());
try cmp.close();
if (compressed_size == 0) {
if (container == .gzip)
print("case {d} store only compressed size: {d}\n", .{ case_no, compressed.pos });
compressed_size = compressed.pos;
}

try testing.expectEqual(compressed_size, compressed.pos);
}
// decompress compressed stream to decompressed stream
Expand Down
Binary file added src/testdata/fuzzing/roundtrip1
Binary file not shown.
Binary file added src/testdata/fuzzing/roundtrip2
Binary file not shown.

0 comments on commit 4a238ee

Please sign in to comment.