Skip to content

Commit

Permalink
move slicer to its own package (#390)
Browse files Browse the repository at this point in the history
This commit cleans up the slicer logic by moving the reader into
its own package.  We also moved LoadIndex from pcap/slicer.go
to pcap/index.go.  It also simplifies the slicer interface to take an
io.ReadSeeker instead of *os.File (which implements ReadSeeker).

This is the first refactor toward adding support for pcap-ng.
  • Loading branch information
mccanne authored Mar 5, 2020
1 parent 69f41da commit c95c755
Show file tree
Hide file tree
Showing 5 changed files with 116 additions and 77 deletions.
17 changes: 15 additions & 2 deletions pcap/index.go
Original file line number Diff line number Diff line change
@@ -1,12 +1,15 @@
package pcap

import (
"encoding/json"
"errors"
"io"
"io/ioutil"
"sync"

"github.com/brimsec/zq/pkg/nano"
"github.com/brimsec/zq/pkg/ranger"
"github.com/brimsec/zq/pkg/slicer"
)

type Index struct {
Expand All @@ -17,7 +20,7 @@ type Index struct {
// there is just one section at the beginning of the file. For nextgen pcaps,
// there can be multiple sections.
type Section struct {
Blocks []Slice
Blocks []slicer.Slice
Index ranger.Envelope
}

Expand Down Expand Up @@ -47,7 +50,7 @@ func CreateIndex(r io.Reader, limit int) (*Index, error) {
return nil, errors.New("no packets found")
}
// legacy pcap file has just the file header at the start of the file
blocks := []Slice{{0, fileHeaderLen}}
blocks := []slicer.Slice{{0, fileHeaderLen}}
return &Index{
Sections: []Section{{
Blocks: blocks,
Expand Down Expand Up @@ -84,3 +87,13 @@ func (w *IndexWriter) Close() (*Index, error) {
w.wg.Wait()
return w.idx, w.err
}

func LoadIndex(path string) (*Index, error) {
b, err := ioutil.ReadFile(path)
if err != nil {
return nil, err
}
var index *Index
err = json.Unmarshal(b, &index)
return index, err
}
83 changes: 8 additions & 75 deletions pcap/slicer.go
Original file line number Diff line number Diff line change
@@ -1,86 +1,29 @@
package pcap

import (
"encoding/json"
"errors"
"io"
"io/ioutil"
"os"

"github.com/brimsec/zq/pkg/nano"
"github.com/brimsec/zq/pkg/ranger"
"github.com/brimsec/zq/pkg/slicer"
)

// Slicer implements io.Reader reading the sliced regions provided to it from
// the underlying file thus extracting subsets of an underlying file without
// modifying or copying the file.
type Slicer struct {
slices []Slice
slice Slice
file *os.File
eof bool
}

func NewSlicer(file *os.File, index *Index, span nano.Span) (*Slicer, error) {
func NewSlicer(file *os.File, index *Index, span nano.Span) (*slicer.Reader, error) {
slices, err := GenerateSlices(index, span)
if err != nil {
return nil, err
}
s := &Slicer{
slices: slices,
file: file,
}
return s, s.next()
}

func (s *Slicer) next() error {
if len(s.slices) == 0 {
s.eof = true
return nil
}
s.slice = s.slices[0]
s.slices = s.slices[1:]
_, err := s.file.Seek(int64(s.slice.Offset), 0)
return err
}

func (s *Slicer) Read(b []byte) (int, error) {
if s.eof {
return 0, io.EOF
}
p := b
if uint64(len(p)) > s.slice.Length {
p = p[:s.slice.Length]
}
n, err := s.file.Read(p)
if n != 0 {
if err == io.EOF {
err = nil
}
s.slice.Length -= uint64(n)
if s.slice.Length == 0 {
err = s.next()
}
}
return n, err
}

type Slice struct {
Offset uint64
Length uint64
}

func (s Slice) Overlaps(x Slice) bool {
return x.Offset >= s.Offset && x.Offset < s.Offset+x.Length
return slicer.NewReader(file, slices)
}

// GenerateSlices takes an index and time span and generates a list of
// slices that should be read to enumerate the relevant chunks of an
// underlying pcap file. Extra packets may appear in the resulting stream
// but all packets that fall within the time range will be produced, i.e.,
// another layering of time filtering should be applied to resulting packets.
func GenerateSlices(index *Index, span nano.Span) ([]Slice, error) {
var slices []Slice
func GenerateSlices(index *Index, span nano.Span) ([]slicer.Slice, error) {
var slices []slicer.Slice
for _, section := range index.Sections {
pslice, err := FindPacketSlice(section.Index, span)
if err != nil {
Expand All @@ -96,21 +39,11 @@ func GenerateSlices(index *Index, span nano.Span) ([]Slice, error) {
return slices, nil
}

func FindPacketSlice(e ranger.Envelope, span nano.Span) (Slice, error) {
func FindPacketSlice(e ranger.Envelope, span nano.Span) (slicer.Slice, error) {
if len(e) == 0 {
return Slice{}, errors.New("no packets")
return slicer.Slice{}, errors.New("no packets")
}
d := e.FindSmallestDomain(ranger.Range{uint64(span.Ts), uint64(span.End())})
//XXX check for empty domain.. though seems like this will do the right thing
return Slice{d.X0, d.X1 - d.X0}, nil
}

func LoadIndex(path string) (*Index, error) {
b, err := ioutil.ReadFile(path)
if err != nil {
return nil, err
}
var index *Index
err = json.Unmarshal(b, &index)
return index, err
return slicer.Slice{d.X0, d.X1 - d.X0}, nil
}
56 changes: 56 additions & 0 deletions pkg/slicer/reader.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
// Package slicer provides an io.Reader that returns subsets of a file.
package slicer

import (
"io"
)

// Reader implements io.Reader reading the sliced regions provided to it from
// the underlying file thus extracting subsets of an underlying file without
// modifying or copying the file.
type Reader struct {
slices []Slice
slice Slice
seeker io.ReadSeeker
eof bool
}

func NewReader(seeker io.ReadSeeker, slices []Slice) (*Reader, error) {
r := &Reader{
slices: slices,
seeker: seeker,
}
return r, r.next()
}

func (r *Reader) next() error {
if len(r.slices) == 0 {
r.eof = true
return nil
}
r.slice = r.slices[0]
r.slices = r.slices[1:]
_, err := r.seeker.Seek(int64(r.slice.Offset), 0)
return err
}

func (r *Reader) Read(b []byte) (int, error) {
if r.eof {
return 0, io.EOF
}
p := b
if uint64(len(p)) > r.slice.Length {
p = p[:r.slice.Length]
}
n, err := r.seeker.Read(p)
if n != 0 {
if err == io.EOF {
err = nil
}
r.slice.Length -= uint64(n)
if r.slice.Length == 0 {
err = r.next()
}
}
return n, err
}
10 changes: 10 additions & 0 deletions pkg/slicer/slice.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
package slicer

type Slice struct {
Offset uint64
Length uint64
}

func (s Slice) Overlaps(x Slice) bool {
return x.Offset >= s.Offset && x.Offset < s.Offset+x.Length
}
27 changes: 27 additions & 0 deletions pkg/slicer/slicer_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
package slicer_test

import (
"bytes"
"io/ioutil"
"testing"

"github.com/brimsec/zq/pkg/slicer"
"github.com/stretchr/testify/assert"
)

func TestSlicer(t *testing.T) {
in := []byte("abcdefghijklmnopqrstuvwxyz")
slices := []slicer.Slice{
{0, 2},
{0, 26},
{3, 4},
{25, 1},
{25, 2},
}
expected := []byte("ababcdefghijklmnopqrstuvwxyzdefgzz")
reader, err := slicer.NewReader(bytes.NewReader(in), slices)
assert.NoError(t, err)
out, err := ioutil.ReadAll(reader)
assert.NoError(t, err)
assert.Exactly(t, expected, out)
}

0 comments on commit c95c755

Please sign in to comment.