diff --git a/README.md b/README.md index 077dbfc..ed8408a 100644 --- a/README.md +++ b/README.md @@ -20,7 +20,7 @@ Current status: * Handles archives split into multiple volumes, (`7za a -v100m test.7z ...`). * Handles self-extracting archives, (`7za a -sfx archive.exe ...`). * Validates CRC values as it parses the file. -* Supports BCJ2, Brotli, Bzip2, Copy, Deflate, Delta, LZ4, LZMA, LZMA2 and Zstandard methods. +* Supports ARM, BCJ, BCJ2, Brotli, Bzip2, Copy, Deflate, Delta, LZ4, LZMA, LZMA2, PPC, SPARC and Zstandard methods. * Implements the `fs.FS` interface so you can treat an opened 7-zip archive like a filesystem. More examples of 7-zip archives are needed to test all of the different combinations/algorithms possible. diff --git a/internal/bra/arm.go b/internal/bra/arm.go new file mode 100644 index 0000000..3916a0c --- /dev/null +++ b/internal/bra/arm.go @@ -0,0 +1,55 @@ +package bra + +import ( + "encoding/binary" + "io" +) + +const armAlignment = 4 + +type arm struct { + ip uint32 +} + +func (c *arm) Size() int { return armAlignment } + +func (c *arm) Convert(b []byte, encoding bool) int { + if len(b) < c.Size() { + return 0 + } + + if c.ip == 0 { + c.ip += armAlignment + } + + var i int + + for i = 0; i < len(b) & ^(armAlignment-1); i += armAlignment { + v := binary.LittleEndian.Uint32(b[i:]) + + c.ip += uint32(armAlignment) + + if b[i+3] == 0xeb { + v <<= 2 + + if encoding { + v += c.ip + } else { + v -= c.ip + } + + v >>= 2 + v &= 0x00ffffff + v |= 0xeb000000 + } + + binary.LittleEndian.PutUint32(b[i:], v) + } + + return i +} + +// NewARMReader returns a new ARM io.ReadCloser. +func NewARMReader(_ []byte, _ uint64, readers []io.ReadCloser) (io.ReadCloser, error) { + return newReader(readers, new(arm)) +} diff --git a/internal/bra/bcj.go b/internal/bra/bcj.go new file mode 100644 index 0000000..40fc7c0 --- /dev/null +++ b/internal/bra/bcj.go @@ -0,0 +1,104 @@ +package bra + +import ( + "encoding/binary" + "io" +) + +const bcjLookAhead = 4 + +type bcj struct { + ip, state uint32 +} + +func (c *bcj) Size() int { return bcjLookAhead + 1 } + +func test86MSByte(b byte) bool { + return (b+1)&0xfe == 0 +} + +//nolint:cyclop,funlen,gocognit +func (c *bcj) Convert(b []byte, encoding bool) int { + if len(b) < c.Size() { + return 0 + } + + var ( + pos int + mask = c.state & 7 + ) + + for { + p := pos + for ; p < len(b)-bcjLookAhead; p++ { + if b[p]&0xfe == 0xe8 { + break + } + } + + d := p - pos + pos = p + + if p >= len(b)-bcjLookAhead { + if d > 2 { + c.state = 0 + } else { + c.state = mask >> d + } + + c.ip += uint32(pos) + + return pos + } + + if d > 2 { + mask = 0 + } else { + mask >>= d + if mask != 0 && (mask > 4 || mask == 3 || test86MSByte(b[p+int(mask>>1)+1])) { + mask = (mask >> 1) | 4 + pos++ + + continue + } + } + + //nolint:nestif + if test86MSByte(b[p+4]) { + v := binary.LittleEndian.Uint32(b[p+1:]) + cur := c.ip + uint32(c.Size()+pos) + pos += c.Size() + + if encoding { + v += cur + } else { + v -= cur + } + + if mask != 0 { + sh := mask & 6 << 2 + if test86MSByte(byte(v >> sh)) { + v ^= (uint32(0x100) << sh) - 1 + if encoding { + v += cur + } else { + v -= cur + } + } + + mask = 0 + } + + binary.LittleEndian.PutUint32(b[p+1:], v) + b[p+4] = 0 - b[p+4]&1 + } else { + mask = (mask >> 1) | 4 + pos++ + } + } +} + +// NewBCJReader returns a new BCJ io.ReadCloser. +func NewBCJReader(_ []byte, _ uint64, readers []io.ReadCloser) (io.ReadCloser, error) { + return newReader(readers, new(bcj)) +} diff --git a/internal/bra/bra.go b/internal/bra/bra.go new file mode 100644 index 0000000..f567b5d --- /dev/null +++ b/internal/bra/bra.go @@ -0,0 +1,14 @@ +package bra + +type converter interface { + Size() int + Convert([]byte, bool) int +} + +func max(x, y int) int { + if x > y { + return x + } + + return y +} diff --git a/internal/bra/ppc.go b/internal/bra/ppc.go new file mode 100644 index 0000000..9d38243 --- /dev/null +++ b/internal/bra/ppc.go @@ -0,0 +1,48 @@ +package bra + +import ( + "encoding/binary" + "io" +) + +const ppcAlignment = 4 + +type ppc struct { + ip uint32 +} + +func (c *ppc) Size() int { return ppcAlignment } + +func (c *ppc) Convert(b []byte, encoding bool) int { + if len(b) < c.Size() { + return 0 + } + + var i int + + for i = 0; i < len(b) & ^(ppcAlignment-1); i += ppcAlignment { + v := binary.BigEndian.Uint32(b[i:]) + + if b[i+0]&0xfc == 0x48 && b[i+3]&3 == 1 { + if encoding { + v += c.ip + } else { + v -= c.ip + } + + v &= 0x03ffffff + v |= 0x48000000 + } + + c.ip += uint32(ppcAlignment) + + binary.BigEndian.PutUint32(b[i:], v) + } + + return i +} + +// NewPPCReader returns a new PPC io.ReadCloser. +func NewPPCReader(_ []byte, _ uint64, readers []io.ReadCloser) (io.ReadCloser, error) { + return newReader(readers, new(ppc)) +} diff --git a/internal/bra/reader.go b/internal/bra/reader.go new file mode 100644 index 0000000..274fe1d --- /dev/null +++ b/internal/bra/reader.go @@ -0,0 +1,51 @@ +package bra + +import ( + "bytes" + "errors" + "io" +) + +type readCloser struct { + rc io.ReadCloser + buf bytes.Buffer + conv converter +} + +func (rc *readCloser) Close() (err error) { + if rc.rc != nil { + err = rc.rc.Close() + rc.rc = nil + } + + return +} + +func (rc *readCloser) Read(p []byte) (int, error) { + if rc.rc == nil { + return 0, errors.New("bra: Read after Close") + } + + if _, err := io.CopyN(&rc.buf, rc.rc, int64(max(len(p), rc.conv.Size())-rc.buf.Len())); err != nil { + if !errors.Is(err, io.EOF) { + return 0, err + } + } + + if n := rc.conv.Convert(rc.buf.Bytes(), false); n > 0 { + return rc.buf.Read(p[:n]) + } + + return rc.buf.Read(p) +} + +func newReader(readers []io.ReadCloser, conv converter) (io.ReadCloser, error) { + if len(readers) != 1 { + return nil, errors.New("bra: need exactly one reader") + } + + return &readCloser{ + rc: readers[0], + conv: conv, + }, nil +} diff --git a/internal/bra/sparc.go b/internal/bra/sparc.go new file mode 100644 index 0000000..8aa4553 --- /dev/null +++ b/internal/bra/sparc.go @@ -0,0 +1,53 @@ +package bra + +import ( + "encoding/binary" + "io" +) + +const sparcAlignment = 4 + +type sparc struct { + ip uint32 +} + +func (c *sparc) Size() int { return sparcAlignment } + +func (c *sparc) Convert(b []byte, encoding bool) int { + if len(b) < c.Size() { + return 0 + } + + var i int + + for i = 0; i < len(b) & ^(sparcAlignment-1); i += sparcAlignment { + v := binary.BigEndian.Uint32(b[i:]) + + if (b[i+0] == 0x40 && b[i+1]&0xc0 == 0) || (b[i+0] == 0x7f && b[i+1] >= 0xc0) { + v <<= 2 + + if encoding { + v += c.ip + } else { + v -= c.ip + } + + v &= 0x01ffffff + v -= uint32(1) << 24 + v ^= 0xff000000 + v >>= 2 + v |= 0x40000000 + } + + c.ip += uint32(sparcAlignment) + + binary.BigEndian.PutUint32(b[i:], v) + } + + return i +} + +// NewSPARCReader returns a new SPARC io.ReadCloser. +func NewSPARCReader(_ []byte, _ uint64, readers []io.ReadCloser) (io.ReadCloser, error) { + return newReader(readers, new(sparc)) +} diff --git a/reader_test.go b/reader_test.go index e4c0018..1e9d47d 100644 --- a/reader_test.go +++ b/reader_test.go @@ -120,6 +120,22 @@ func TestOpenReader(t *testing.T) { name: "sfx", file: "sfx.exe", }, + { + name: "bcj", + file: "bcj.7z", + }, + { + name: "ppc", + file: "ppc.7z", + }, + { + name: "arm", + file: "arm.7z", + }, + { + name: "sparc", + file: "sparc.7z", + }, } for _, table := range tables { diff --git a/register.go b/register.go index bd950b1..a08a679 100644 --- a/register.go +++ b/register.go @@ -7,6 +7,7 @@ import ( "github.com/bodgit/sevenzip/internal/aes7z" "github.com/bodgit/sevenzip/internal/bcj2" + "github.com/bodgit/sevenzip/internal/bra" "github.com/bodgit/sevenzip/internal/brotli" "github.com/bodgit/sevenzip/internal/bzip2" "github.com/bodgit/sevenzip/internal/deflate" @@ -42,8 +43,16 @@ func init() { RegisterDecompressor([]byte{0x03}, Decompressor(delta.NewReader)) // LZMA RegisterDecompressor([]byte{0x03, 0x01, 0x01}, Decompressor(lzma.NewReader)) + // BCJ + RegisterDecompressor([]byte{0x03, 0x03, 0x01, 0x03}, Decompressor(bra.NewBCJReader)) // BCJ2 RegisterDecompressor([]byte{0x03, 0x03, 0x01, 0x1b}, Decompressor(bcj2.NewReader)) + // PPC + RegisterDecompressor([]byte{0x03, 0x03, 0x02, 0x05}, Decompressor(bra.NewPPCReader)) + // ARM + RegisterDecompressor([]byte{0x03, 0x03, 0x05, 0x01}, Decompressor(bra.NewARMReader)) + // SPARC + RegisterDecompressor([]byte{0x03, 0x03, 0x08, 0x05}, Decompressor(bra.NewSPARCReader)) // Deflate RegisterDecompressor([]byte{0x04, 0x01, 0x08}, Decompressor(deflate.NewReader)) // Bzip2 diff --git a/testdata/arm.7z b/testdata/arm.7z new file mode 100644 index 0000000..7791b1a Binary files /dev/null and b/testdata/arm.7z differ diff --git a/testdata/bcj.7z b/testdata/bcj.7z new file mode 100644 index 0000000..cd2a090 Binary files /dev/null and b/testdata/bcj.7z differ diff --git a/testdata/ppc.7z b/testdata/ppc.7z new file mode 100644 index 0000000..444c743 Binary files /dev/null and b/testdata/ppc.7z differ diff --git a/testdata/sparc.7z b/testdata/sparc.7z new file mode 100644 index 0000000..9b357f0 Binary files /dev/null and b/testdata/sparc.7z differ