From 3a6fa2ae18f5bf07f2a10346b2ef81db52d54039 Mon Sep 17 00:00:00 2001 From: tom twinkle Date: Tue, 23 Jan 2024 14:04:08 +0900 Subject: [PATCH 1/3] fix: Supports bufio use also for strings larger than 4096 bytes. fix lint error --- main.go | 28 ++++++++++++------- main_test.go | 76 +++++++++++++++++++++++++++++++++++++++------------- 2 files changed, 76 insertions(+), 28 deletions(-) diff --git a/main.go b/main.go index c84a484..9905fc7 100644 --- a/main.go +++ b/main.go @@ -32,15 +32,25 @@ func (t *replacer) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err e if len(_src) == 0 && atEOF { return } - if !utf8.Valid(_src) { - // If not a string, do not process - err = ErrInvalidUTF8 - return - } for len(_src) > 0 { - _, n := utf8.DecodeRune(_src) - buf := _src[:n] + r, size := utf8.DecodeRune(_src) + if r < utf8.RuneSelf { + size = 1 + } else if size == 1 { + // All valid runes of size 1 (those below utf8.RuneSelf) were + // handled above. We have invalid UTF-8, or we haven't seen the + // full character yet. + if !atEOF && !utf8.FullRune(_src) { + err = transform.ErrShortSrc + break + } + // If the last string cannot be converted to rune, it is not replaced. + if atEOF && !utf8.FullRune(_src) { + break + } + } + buf := _src[:size] if _, encErr := t.enc.Bytes(buf); encErr != nil { // Replace strings that cannot be converted buf = []byte(string(t.replaceRune)) @@ -54,9 +64,9 @@ func (t *replacer) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err e if dstN <= 0 { break } - nSrc += n + nSrc += size nDst += dstN - _src = _src[n:] + _src = _src[size:] } return } diff --git a/main_test.go b/main_test.go index 8324af2..794ddf6 100644 --- a/main_test.go +++ b/main_test.go @@ -1,6 +1,7 @@ package garbledreplacer_test import ( + "bufio" "bytes" "errors" "github.com/tomtwinkle/garbledreplacer" @@ -46,10 +47,10 @@ func TestNewTransformer(t *testing.T) { want: "?", }, "UTF-8->ShiftJIS:Invalid UTF-8 character": { - encoding: japanese.ShiftJIS, - in: "\xe4", - replace: '?', - wantError: garbledreplacer.ErrInvalidUTF8, + encoding: japanese.ShiftJIS, + in: "\xe4", + replace: '?', + want: "", }, "UTF-8->EUCJP:with garbled text": { encoding: japanese.EUCJP, @@ -63,17 +64,51 @@ func TestNewTransformer(t *testing.T) { replace: '?', want: strings.Repeat("咖呸咕咀呻?呷咄咒咆呼咐?呱呶和咚呢", 3000), }, + "UTF-8->ShiftJIS:with garbled text:larger than 4096bytes": { + encoding: japanese.ShiftJIS, + in: strings.Repeat("一二三四🍣五六七八九🍺十拾壱", 4000), + replace: '?', + want: strings.Repeat("一二三四?五六七八九?十拾壱", 4000), + }, + "UTF-8->ShiftJIS:all garbled text:larger than 4096bytes": { + encoding: japanese.ShiftJIS, + in: strings.Repeat("🍣🍣🍣🍺🍣🍣🍣", 4000), + replace: '?', + want: strings.Repeat("???????", 4000), + }, + } + + assertFunc := func(t *testing.T, want string, actual bytes.Buffer, decoder *encoding.Decoder) { + var assertBuf bytes.Buffer + aw := transform.NewWriter(&assertBuf, decoder) + if _, err := aw.Write(actual.Bytes()); err != nil { + t.Error(err) + } + if err := aw.Close(); err != nil { + t.Error(err) + } + + if len([]rune(want)) != len([]rune(assertBuf.String())) { + t.Errorf("string length does not match %d=%d", len([]rune(want)), len([]rune(assertBuf.String()))) + } + if want != assertBuf.String() { + t.Errorf("string does not match\n%s", assertBuf.String()) + } } for n, v := range tests { name := n tt := v - t.Run(name, func(t *testing.T) { + t.Run("[transform.NewWriter]"+name, func(t *testing.T) { var buf bytes.Buffer w := transform.NewWriter(&buf, garbledreplacer.NewTransformer(tt.encoding, tt.replace)) - if _, err := w.Write([]byte(tt.in)); err != nil { - if tt.wantError != nil && errors.Is(err, tt.wantError) { + _, err := w.Write([]byte(tt.in)) + if tt.wantError != nil { + if err == nil { + t.Errorf("want error %v, got nil", tt.wantError) + } + if errors.Is(err, tt.wantError) { return } t.Error(err) @@ -81,22 +116,25 @@ func TestNewTransformer(t *testing.T) { if err := w.Close(); err != nil { t.Error(err) } - - var actual bytes.Buffer - aw := transform.NewWriter(&actual, tt.encoding.NewDecoder()) - if _, err := aw.Write(buf.Bytes()); err != nil { + assertFunc(t, tt.want, buf, tt.encoding.NewDecoder()) + }) + t.Run("[transform.NewWriter with bufio.NewWriter]"+name, func(t *testing.T) { + var buf bytes.Buffer + w := bufio.NewWriter(transform.NewWriter(&buf, garbledreplacer.NewTransformer(tt.encoding, tt.replace))) + _, err := w.WriteString(tt.in) + if tt.wantError != nil { + if err == nil { + t.Errorf("want error %v, got nil", tt.wantError) + } + if errors.Is(err, tt.wantError) { + return + } t.Error(err) } - if err := aw.Close(); err != nil { + if err := w.Flush(); err != nil { t.Error(err) } - - if len([]rune(tt.want)) != len([]rune(actual.String())) { - t.Errorf("string length does not match %d=%d", len([]rune(tt.want)), len([]rune(actual.String()))) - } - if tt.want != actual.String() { - t.Errorf("string does not match\n%s", actual.String()) - } + assertFunc(t, tt.want, buf, tt.encoding.NewDecoder()) }) } } From 777a593035ac4da28564cbe3dd093a5fb99cec58 Mon Sep 17 00:00:00 2001 From: tom twinkle Date: Tue, 23 Jan 2024 14:22:12 +0900 Subject: [PATCH 2/3] ignore lint jscpd --- main_test.go | 1 + 1 file changed, 1 insertion(+) diff --git a/main_test.go b/main_test.go index 794ddf6..c35d4cd 100644 --- a/main_test.go +++ b/main_test.go @@ -14,6 +14,7 @@ import ( "unicode/utf8" ) +//nolint:jscpd func TestNewTransformer(t *testing.T) { tests := map[string]struct { encoding encoding.Encoding From f4c76488d0f5a9f06c2a79bcfe90be34c8f694ee Mon Sep 17 00:00:00 2001 From: tom twinkle Date: Tue, 23 Jan 2024 14:36:16 +0900 Subject: [PATCH 3/3] ignore lint jscpd fix lint --- .github/linters/.jscpd.json | 5 +++++ main_test.go | 8 ++++---- 2 files changed, 9 insertions(+), 4 deletions(-) create mode 100644 .github/linters/.jscpd.json diff --git a/.github/linters/.jscpd.json b/.github/linters/.jscpd.json new file mode 100644 index 0000000..fa08b20 --- /dev/null +++ b/.github/linters/.jscpd.json @@ -0,0 +1,5 @@ +{ + "threshold": 0, + "ignore": ["**/*_test.go"], + "absolute": true +} \ No newline at end of file diff --git a/main_test.go b/main_test.go index c35d4cd..9f4210e 100644 --- a/main_test.go +++ b/main_test.go @@ -4,17 +4,17 @@ import ( "bufio" "bytes" "errors" + "strings" + "testing" + "unicode/utf8" + "github.com/tomtwinkle/garbledreplacer" "golang.org/x/text/encoding" "golang.org/x/text/encoding/japanese" "golang.org/x/text/encoding/traditionalchinese" "golang.org/x/text/transform" - "strings" - "testing" - "unicode/utf8" ) -//nolint:jscpd func TestNewTransformer(t *testing.T) { tests := map[string]struct { encoding encoding.Encoding