Skip to content

Commit

Permalink
add comments
Browse files Browse the repository at this point in the history
  • Loading branch information
ianic committed Feb 6, 2024
1 parent bbf5e27 commit 46e4120
Show file tree
Hide file tree
Showing 7 changed files with 196 additions and 146 deletions.
33 changes: 21 additions & 12 deletions src/CircularBuffer.zig
Original file line number Diff line number Diff line change
@@ -1,19 +1,22 @@
const std = @import("std");
const assert = std.debug.assert;
const testing = std.testing;

/// Sliding window of decoded data. Or maybe better described as circular buffer.
/// Contains 64K bytes. Deflate limits:
/// 64K buffer of uncompressed data created in inflate (decompression). Has enough
/// history to support writing match<length, distance>; copying length of bytes
/// from the position distance backward from current.
///
/// Reads can return less than available bytes if they are spread across
/// different circles. So reads should repeat until get required number of bytes
/// or until returned slice is zero length.
///
/// Note on deflate limits:
/// * non-compressible block is limited to 65,535 bytes.
/// * backward pointer is limited in distance to 32K bytes and in length to 258 bytes.
///
/// Whole non-compressed block can be written without overlap. We always have
/// history of up to 64K, more then 32K needed.
///
/// Reads can return less than available bytes if they are spread across
/// different circles. So reads should repeat until get required number of bytes
/// or until returned slice is zero length.
///
const std = @import("std");
const assert = std.debug.assert;
const testing = std.testing;

const mask = 0xffff; // 64K - 1
const buffer_len = mask + 1; // 64K buffer

Expand All @@ -34,7 +37,7 @@ pub inline fn write(self: *Self, b: u8) void {
self.wp += 1;
}

// Write match (backreference to the same data slice) starting at `distance`
// Write match (back-reference to the same data slice) starting at `distance`
// back from current write position, and `length` of bytes.
pub fn writeMatch(self: *Self, length: u16, distance: u16) void {
assert(self.wp - self.rp < mask);
Expand Down Expand Up @@ -64,7 +67,7 @@ pub fn writeMatch(self: *Self, length: u16, distance: u16) void {
}
}

// Retruns writable part of the internal buffer of size `n` at most. Advanjces
// Returns writable part of the internal buffer of size `n` at most. Advances
// write pointer, assumes that returned buffer will be filled with data.
pub fn getWritable(self: *Self, n: usize) []u8 {
const wp = self.wp & mask;
Expand Down Expand Up @@ -113,6 +116,12 @@ pub inline fn free(self: *Self) usize {
return buffer_len - (self.wp - self.rp);
}

// Full if largest match can't fit. 258 is largest match length. That much bytes
// can be produced in single decode step.
pub inline fn full(self: *Self) bool {
return self.free() < 258 + 1;
}

// example from: https://youtu.be/SJPvNi4HrWQ?t=3558
test "CircularBuffer copy" {
var sw: Self = .{};
Expand Down
49 changes: 31 additions & 18 deletions src/SlidingWindow.zig
Original file line number Diff line number Diff line change
@@ -1,12 +1,13 @@
/// Used in deflate (compression), holds uncompressed data form which Tokens are
/// produces. In combination with Lookup it is used to find matches in history data.
///
const std = @import("std");
const consts = @import("consts.zig");

const expect = testing.expect;
const assert = std.debug.assert;
const testing = std.testing;

// Buffer of history data.

const hist_len = consts.history.len;
const buffer_len = 2 * hist_len;
const min_lookahead = consts.match.min_length + consts.match.max_length;
Expand All @@ -17,7 +18,7 @@ const Self = @This();
buffer: [buffer_len]u8 = undefined,
wp: usize = 0, // write position
rp: usize = 0, // read position
fp: isize = 0, // flush position, tokens are build from fp..rp
fp: isize = 0, // last flush position, tokens are build from fp..rp

// Returns number of bytes written, or 0 if buffer is full and need to slide.
pub fn write(self: *Self, buf: []const u8) usize {
Expand All @@ -30,7 +31,7 @@ pub fn write(self: *Self, buf: []const u8) usize {
}

// Slide buffer for hist_len.
// Drops old history, preserves bwtween hist_len and hist_len - min_lookahead.
// Drops old history, preserves between hist_len and hist_len - min_lookahead.
// Returns number of bytes removed.
pub fn slide(self: *Self) u16 {
assert(self.rp >= max_rp and self.wp >= self.rp);
Expand All @@ -42,34 +43,41 @@ pub fn slide(self: *Self) u16 {
return @intCast(n);
}

// flush - process all data from window
// If not flush preserve enough data for the loghest match.
// Returns null if there is not enough data.
pub fn activeLookahead(self: *Self, flush: bool) ?[]const u8 {
const min: usize = if (flush) 0 else min_lookahead;
// Data from the current position (read position). Those part of the buffer is
// not converted to tokens yet.
inline fn lookahead(self: *Self) []const u8 {
assert(self.wp >= self.rp);
return self.buffer[self.rp..self.wp];
}

// Returns part of the lookahead buffer. If should_flush is set no lookahead is
// preserved otherwise preserves enough data for the longest match. Returns
// null if there is not enough data.
pub fn activeLookahead(self: *Self, should_flush: bool) ?[]const u8 {
const min: usize = if (should_flush) 0 else min_lookahead;
const lh = self.lookahead();
return if (lh.len > min) lh else null;
}

pub inline fn lookahead(self: *Self) []const u8 {
assert(self.wp >= self.rp);
return self.buffer[self.rp..self.wp];
// Advances read position, shrinks lookahead.
pub fn advance(self: *Self, n: u16) void {
assert(self.wp >= self.rp + n);
self.rp += n;
}

// Returns writable part of the buffer, where new uncompressed data can be
// written.
pub fn writable(self: *Self) []u8 {
return self.buffer[self.wp..];
}

// Notification of what part of writable buffer is filled with data.
pub fn written(self: *Self, n: usize) void {
self.wp += n;
}

pub fn advance(self: *Self, n: u16) void {
assert(self.wp >= self.rp + n);
self.rp += n;
}

// Finds match length between previous and current position.
// Used in hot path!
pub fn match(self: *Self, prev_pos: u16, curr_pos: u16, min_len: u16) u16 {
const max_len: usize = @min(self.wp - curr_pos, consts.match.max_length);
// lookahead buffers from previous and current positions
Expand All @@ -95,14 +103,19 @@ pub fn match(self: *Self, prev_pos: u16, curr_pos: u16, min_len: u16) u16 {
return if (i >= consts.match.min_length) @intCast(i) else 0;
}

// Current position of non-compressed data. Data before rp are already converted
// to tokens.
pub fn pos(self: *Self) u16 {
return @intCast(self.rp);
}

pub fn flushed(self: *Self) void {
// Notification that token list is cleared.
pub fn flush(self: *Self) void {
self.fp = @intCast(self.rp);
}

// Part of the buffer since last flush or null if there was slide in between (so
// fp becomes negative).
pub fn tokensBuffer(self: *Self) ?[]const u8 {
assert(self.fp <= self.rp);
if (self.fp < 0) return null;
Expand Down
Loading

0 comments on commit 46e4120

Please sign in to comment.