Skip to content

Commit

Permalink
Merge pull request #10 from tomtwinkle/fix/cannot-transform-larger-st…
Browse files Browse the repository at this point in the history
…rings

fix: Supports bufio use also for strings larger than 4096 bytes.
  • Loading branch information
tomtwinkle authored Jan 23, 2024
2 parents 4a5af67 + f4c7648 commit c3c945d
Show file tree
Hide file tree
Showing 3 changed files with 85 additions and 31 deletions.
5 changes: 5 additions & 0 deletions .github/linters/.jscpd.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
{
"threshold": 0,
"ignore": ["**/*_test.go"],
"absolute": true
}
28 changes: 19 additions & 9 deletions main.go
Original file line number Diff line number Diff line change
Expand Up @@ -32,15 +32,25 @@ func (t *replacer) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err e
if len(_src) == 0 && atEOF {
return
}
if !utf8.Valid(_src) {
// If not a string, do not process
err = ErrInvalidUTF8
return
}

for len(_src) > 0 {
_, n := utf8.DecodeRune(_src)
buf := _src[:n]
r, size := utf8.DecodeRune(_src)
if r < utf8.RuneSelf {
size = 1
} else if size == 1 {
// All valid runes of size 1 (those below utf8.RuneSelf) were
// handled above. We have invalid UTF-8, or we haven't seen the
// full character yet.
if !atEOF && !utf8.FullRune(_src) {
err = transform.ErrShortSrc
break
}
// If the last string cannot be converted to rune, it is not replaced.
if atEOF && !utf8.FullRune(_src) {
break
}
}
buf := _src[:size]
if _, encErr := t.enc.Bytes(buf); encErr != nil {
// Replace strings that cannot be converted
buf = []byte(string(t.replaceRune))
Expand All @@ -54,9 +64,9 @@ func (t *replacer) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err e
if dstN <= 0 {
break
}
nSrc += n
nSrc += size
nDst += dstN
_src = _src[n:]
_src = _src[size:]
}
return
}
83 changes: 61 additions & 22 deletions main_test.go
Original file line number Diff line number Diff line change
@@ -1,16 +1,18 @@
package garbledreplacer_test

import (
"bufio"
"bytes"
"errors"
"strings"
"testing"
"unicode/utf8"

"github.com/tomtwinkle/garbledreplacer"
"golang.org/x/text/encoding"
"golang.org/x/text/encoding/japanese"
"golang.org/x/text/encoding/traditionalchinese"
"golang.org/x/text/transform"
"strings"
"testing"
"unicode/utf8"
)

func TestNewTransformer(t *testing.T) {
Expand Down Expand Up @@ -46,10 +48,10 @@ func TestNewTransformer(t *testing.T) {
want: "?",
},
"UTF-8->ShiftJIS:Invalid UTF-8 character": {
encoding: japanese.ShiftJIS,
in: "\xe4",
replace: '?',
wantError: garbledreplacer.ErrInvalidUTF8,
encoding: japanese.ShiftJIS,
in: "\xe4",
replace: '?',
want: "",
},
"UTF-8->EUCJP:with garbled text": {
encoding: japanese.EUCJP,
Expand All @@ -63,40 +65,77 @@ func TestNewTransformer(t *testing.T) {
replace: '?',
want: strings.Repeat("咖呸咕咀呻?呷咄咒咆呼咐?呱呶和咚呢", 3000),
},
"UTF-8->ShiftJIS:with garbled text:larger than 4096bytes": {
encoding: japanese.ShiftJIS,
in: strings.Repeat("一二三四🍣五六七八九🍺十拾壱", 4000),
replace: '?',
want: strings.Repeat("一二三四?五六七八九?十拾壱", 4000),
},
"UTF-8->ShiftJIS:all garbled text:larger than 4096bytes": {
encoding: japanese.ShiftJIS,
in: strings.Repeat("🍣🍣🍣🍺🍣🍣🍣", 4000),
replace: '?',
want: strings.Repeat("???????", 4000),
},
}

assertFunc := func(t *testing.T, want string, actual bytes.Buffer, decoder *encoding.Decoder) {
var assertBuf bytes.Buffer
aw := transform.NewWriter(&assertBuf, decoder)
if _, err := aw.Write(actual.Bytes()); err != nil {
t.Error(err)
}
if err := aw.Close(); err != nil {
t.Error(err)
}

if len([]rune(want)) != len([]rune(assertBuf.String())) {
t.Errorf("string length does not match %d=%d", len([]rune(want)), len([]rune(assertBuf.String())))
}
if want != assertBuf.String() {
t.Errorf("string does not match\n%s", assertBuf.String())
}
}

for n, v := range tests {
name := n
tt := v

t.Run(name, func(t *testing.T) {
t.Run("[transform.NewWriter]"+name, func(t *testing.T) {
var buf bytes.Buffer
w := transform.NewWriter(&buf, garbledreplacer.NewTransformer(tt.encoding, tt.replace))
if _, err := w.Write([]byte(tt.in)); err != nil {
if tt.wantError != nil && errors.Is(err, tt.wantError) {
_, err := w.Write([]byte(tt.in))
if tt.wantError != nil {
if err == nil {
t.Errorf("want error %v, got nil", tt.wantError)
}
if errors.Is(err, tt.wantError) {
return
}
t.Error(err)
}
if err := w.Close(); err != nil {
t.Error(err)
}

var actual bytes.Buffer
aw := transform.NewWriter(&actual, tt.encoding.NewDecoder())
if _, err := aw.Write(buf.Bytes()); err != nil {
assertFunc(t, tt.want, buf, tt.encoding.NewDecoder())
})
t.Run("[transform.NewWriter with bufio.NewWriter]"+name, func(t *testing.T) {
var buf bytes.Buffer
w := bufio.NewWriter(transform.NewWriter(&buf, garbledreplacer.NewTransformer(tt.encoding, tt.replace)))
_, err := w.WriteString(tt.in)
if tt.wantError != nil {
if err == nil {
t.Errorf("want error %v, got nil", tt.wantError)
}
if errors.Is(err, tt.wantError) {
return
}
t.Error(err)
}
if err := aw.Close(); err != nil {
if err := w.Flush(); err != nil {
t.Error(err)
}

if len([]rune(tt.want)) != len([]rune(actual.String())) {
t.Errorf("string length does not match %d=%d", len([]rune(tt.want)), len([]rune(actual.String())))
}
if tt.want != actual.String() {
t.Errorf("string does not match\n%s", actual.String())
}
assertFunc(t, tt.want, buf, tt.encoding.NewDecoder())
})
}
}
Expand Down

0 comments on commit c3c945d

Please sign in to comment.