From a3af31d241f177af9f886db0113c9e544b7c8cdb Mon Sep 17 00:00:00 2001 From: mtsang Date: Thu, 2 Nov 2023 15:04:58 +0000 Subject: [PATCH] fix encoding with non-utf8 bytes --- field/string.go | 30 +++++++++++++++--------- field/string_test.go | 54 ++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 73 insertions(+), 11 deletions(-) diff --git a/field/string.go b/field/string.go index c8ce11c..b07e3fc 100644 --- a/field/string.go +++ b/field/string.go @@ -70,18 +70,28 @@ func (f *String) SetValue(v string) { } func (f *String) Pack() ([]byte, error) { - data := []byte(f.value) + utf8Data := []byte(f.value) if f.spec.Pad != nil { - data = f.spec.Pad.Pad(data, f.spec.Length) + // The length of the encoded data may differ from the UTF-8 encoded length. + // Use the difference to ensure the correct padded length. + unpaddedPacked, err := f.spec.Enc.Encode(utf8Data) + if err != nil { + return nil, fmt.Errorf("failed to encode content: %w", err) + } + diff := len(utf8Data) - len(unpaddedPacked) + if diff < 0 { + diff = -diff + } + utf8Data = f.spec.Pad.Pad(utf8Data, diff+f.spec.Length) } - packed, err := f.spec.Enc.Encode(data) + packed, err := f.spec.Enc.Encode(utf8Data) if err != nil { return nil, fmt.Errorf("failed to encode content: %w", err) } - packedLength, err := f.spec.Pref.EncodeLength(f.spec.Length, len(data)) + packedLength, err := f.spec.Pref.EncodeLength(f.spec.Length, len(packed)) if err != nil { return nil, fmt.Errorf("failed to encode length: %w", err) } @@ -95,18 +105,15 @@ func (f *String) Unpack(data []byte) (int, error) { return 0, fmt.Errorf("failed to decode length: %w", err) } - raw, read, err := f.spec.Enc.Decode(data[prefBytes:], dataLen) + utf8Bytes, read, err := f.spec.Enc.Decode(data[prefBytes:], dataLen) if err != nil { return 0, fmt.Errorf("failed to decode content: %w", err) } if f.spec.Pad != nil { - raw = f.spec.Pad.Unpad(raw) - } - - if err := f.SetBytes(raw); err != nil { - return 0, fmt.Errorf("failed to set bytes: %w", err) + utf8Bytes = f.spec.Pad.Unpad(utf8Bytes) } + f.value = string(utf8Bytes) return read + prefBytes, nil } @@ -214,5 +221,6 @@ func (f *String) UnmarshalJSON(b []byte) error { if err != nil { return utils.NewSafeError(err, "failed to JSON unmarshal bytes to string") } - return f.SetBytes([]byte(v)) + f.value = v + return nil } diff --git a/field/string_test.go b/field/string_test.go index 01d1b09..cca7105 100644 --- a/field/string_test.go +++ b/field/string_test.go @@ -59,6 +59,60 @@ func TestStringField(t *testing.T) { require.Equal(t, "hello", str.Value()) } +func TestStringWithNonUTF8Encoding(t *testing.T) { + spec := &Spec{ + Length: 10, + Description: "Field", + Enc: encoding.EBCDIC1047, + Pref: prefix.EBCDIC1047.Fixed, + Pad: padding.Left(' '), + } + str := NewString(spec) + + hullo := []byte{0x88, 0xDC, 0x93, 0x93, 0x96} + olluh := []byte{0x96, 0x93, 0x93, 0xDC, 0x88} + + // SetBytes takes UTF-8 encoded bytes + str.SetBytes([]byte("hüllo")) + require.Equal(t, "hüllo", str.Value()) + + packed, err := str.Pack() + require.NoError(t, err) + require.Equal(t, append([]byte{0x40, 0x40, 0x40, 0x40, 0x40}, hullo...), packed) + + length, err := str.Unpack(append([]byte{0x40, 0x40, 0x40, 0x40, 0x40}, olluh...)) + require.NoError(t, err) + require.Equal(t, 10, length) + + // Bytes returns the UTF-8 encoding of the value + b, err := str.Bytes() + require.NoError(t, err) + require.Equal(t, []byte("ollüh"), b) + + require.Equal(t, "ollüh", str.Value()) + + str = NewString(spec) + str.Marshal(NewStringValue("hüllo")) + packed, err = str.Pack() + require.NoError(t, err) + require.Equal(t, append([]byte{0x40, 0x40, 0x40, 0x40, 0x40}, hullo...), packed) + + str = NewString(spec) + length, err = str.Unpack(append([]byte{0x40, 0x40, 0x40, 0x40, 0x40}, olluh...)) + require.NoError(t, err) + require.Equal(t, 10, length) + require.Equal(t, "ollüh", str.Value()) + + str = NewString(spec) + err = str.SetBytes([]byte("hüllo")) + require.NoError(t, err) + require.Equal(t, "hüllo", str.Value()) + + str = NewString(spec) + str.SetValue("hüllo") + require.Equal(t, "hüllo", str.Value()) +} + func TestStringNil(t *testing.T) { var str *String = nil