Skip to content

Commit

Permalink
implement compression levels
Browse files Browse the repository at this point in the history
  • Loading branch information
ianic committed Jan 19, 2024
1 parent cb567b5 commit 6ba1248
Showing 1 changed file with 55 additions and 55 deletions.
110 changes: 55 additions & 55 deletions src/deflate.zig
Original file line number Diff line number Diff line change
Expand Up @@ -18,63 +18,40 @@ pub fn deflate(reader: anytype, writer: anytype) !void {
try df.close();
}

const Compression = enum {
default,
best,
};

const Level = struct {
good: u16,
nice: u16,
lazy: u16,
chain: u16,
};

pub fn Deflate(comptime WriterType: type) type {
const compression: Compression = .default;
const level: Level = switch (compression) {
.default => .{ .good = 8, .lazy = 16, .nice = 128, .chain = 128 },
.best => .{ .good = 32, .lazy = 258, .nice = 258, .chain = 4096 },
};
return struct {
hasher: Hasher = .{},
win: StreamWindow = .{},
tokens: Tokens = .{},
token_writer: WriterType,

prev_match: ?Token = null,
prev_literal: ?u8 = null,

const Self = @This();
pub fn init(w: WriterType) Self {
return .{ .token_writer = w };
}

fn nextToken(self: *Self, min_lookahead: usize) ?Token {
const lh = self.win.lookahead();
if (lh.len <= min_lookahead) return null;

var token = Token.initLiteral(lh[0]);
var length: usize = 1;

const curr_pos = self.win.pos();
var match_pos = self.hasher.add(lh, @intCast(curr_pos)); // TODO: rethink intCast

var tries: usize = 128; // TODO: this is just hack
while (match_pos != Hasher.not_found and tries > 0) : (tries -= 1) {
const distance = curr_pos - match_pos;
if (distance > consts.match.max_distance or
match_pos < self.win.offset) break;
const match_length = self.win.match(match_pos, curr_pos);
if (match_length > length) {
token = Token.initMatch(@intCast(distance), match_length);
length = match_length;
}
match_pos = self.hasher.prev(match_pos);
}

self.win.advance(length);
if (length > 1)
self.hasher.bulkAdd(lh[1..], length - 1, @intCast(curr_pos + 1));

return token;
}

const ProcessOption = enum { none, flush, final };

// Process data in window and create tokens.
// If token buffer is full flush tokens to the token writer.
fn processWindowGreedy(self: *Self, opt: ProcessOption) !void {
const min_lookahead: usize = if (opt == .none) consts.match.max_length else 0;

while (self.nextToken(min_lookahead)) |token| {
self.tokens.add(token);
if (self.tokens.full()) try self.flushTokens(false);
}

if (opt != .none) try self.flushTokens(opt == .final);
}

// Process data in window and create tokens. If token buffer is full
// flush tokens to the token writer. In the case of `flush` or `final`
// option it will process all data from the window. In the `none` case
Expand All @@ -83,22 +60,30 @@ pub fn Deflate(comptime WriterType: type) type {
// flush - process all data from window
const flsh = (opt != .none);

var match: ?Token = null;
var literal: ?u8 = null;
var match: ?Token = self.prev_match;
var literal: ?u8 = self.prev_literal;

// While there is data in active lookahead buffer.
while (self.win.activeLookahead(flsh)) |lh| {
var step: usize = 1; // 1 in the case of literal, match length otherwise
const pos: usize = self.win.pos();
const min_len: u16 = if (match) |m| m.length() else 4;

// Try to find match at leat min_len long.
// Try to find match at least min_len long.
if (self.findMatch(pos, lh, min_len)) |token| {
// Found better match than previous.
// Write previous literal (if any) and store this match.
_ = try self.addMatchOrLiteral(null, literal);
literal = lh[0];
match = token;

if (false or token.length() >= level.lazy) {
// Don't try to lazy find better match, use this.
step = try self.addMatchOrLiteral(token, null) + 1;
literal = null;
match = null;
} else {
literal = lh[0];
match = token;
}
} else {
// There is no better match at current pos the it was previous.
// Write previous match or literal.
Expand All @@ -111,7 +96,16 @@ pub fn Deflate(comptime WriterType: type) type {
self.hasher.bulkAdd(lh[1..], step - 1, @intCast(pos + 1));
self.win.advance(step);
}
_ = try self.addMatchOrLiteral(match, literal);

if (flsh) {
// In the case of flushing, last few lookahead buffers were smaller then min match len.
// So only last literal can be unwritten.
assert(match == null);
_ = try self.addMatchOrLiteral(null, literal);
literal = null;
}
self.prev_literal = literal;
self.prev_match = match;

if (flsh) try self.flushTokens(opt == .final);
}
Expand Down Expand Up @@ -139,14 +133,22 @@ pub fn Deflate(comptime WriterType: type) type {

var token: ?Token = null;

var tries: usize = 256; // TODO: this is just hack
var tries: usize = level.chain;
if (min_len >= level.good) {
// If we've got a match that's good enough, only look in 1/4 the chain.
tries >>= 2;
}
while (match_pos != Hasher.not_found and tries > 0) : (tries -= 1) {
const distance = pos - match_pos;
if (distance > consts.match.max_distance or
match_pos < self.win.offset) break;
const match_length = self.win.match(match_pos, pos);
if (match_length > length) {
token = Token.initMatch(@intCast(distance), match_length);
if (length >= level.nice) {
// The match is good enough that we don't try to find a better one.
return token;
}
length = match_length;
}
match_pos = self.hasher.prev(match_pos);
Expand Down Expand Up @@ -487,14 +489,12 @@ test "Hasher add/prev" {
}

test "Token size" {
// TODO: remove this
// print("size of Tokens {d}, bit_offset: {d} {d} {d}\n", .{
// // TODO: remove this
// print("size of Tokens {d}\n", .{
// @sizeOf(Tokens),
// @bitOffsetOf(Token, "kind"),
// @bitOffsetOf(Token, "lc_sym"),
// @bitOffsetOf(Token, "dc"),
// });
try expect(@sizeOf(Token) == 4);
try expect(@sizeOf(Tokens) == 131_080);
//try expect(@bitSizeOf(Token) == 26);
// print("size of Hasher {d}\n", .{@sizeOf(Hasher)});
try expect(@sizeOf(Hasher) == 655_360);
Expand Down

0 comments on commit 6ba1248

Please sign in to comment.