From c88fb2b8e299460e695911b485f10f5c6a229e49 Mon Sep 17 00:00:00 2001 From: Brian Quinlan Date: Fri, 3 Nov 2023 16:20:12 -0700 Subject: [PATCH 1/5] Add chunked decoding support to CodePage Add chunked decoding support (`startChunkedConversion`) for `CodePage` encodings --- CHANGELOG.md | 2 + lib/src/codepage.dart | 27 ++++++++++ test/codepage_test.dart | 116 +++++++++++++++++++++++++++++++--------- 3 files changed, 119 insertions(+), 26 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 415ade5..180f3dd 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,6 +1,8 @@ ## 3.1.2-dev - Require Dart 2.19 +- Add chunked decoding support (`startChunkedConversion`) for `CodePage` + encodings. ## 3.1.1 diff --git a/lib/src/codepage.dart b/lib/src/codepage.dart index 50941b4..e541297 100644 --- a/lib/src/codepage.dart +++ b/lib/src/codepage.dart @@ -277,6 +277,25 @@ CodePageDecoder _createDecoder(String characters) { return _NonBmpCodePageDecoder._(result); } +/// An input Sink for decoders where each input byte can be be considered +/// independantly. +class _CodePageSink implements Sink> { + final Sink _output; + final String Function(List input) _convert; + + _CodePageSink(this._output, this._convert); + + @override + void add(List chunk) { + _output.add(_convert(chunk)); + } + + @override + void close() { + _output.close(); + } +} + /// Code page with non-BMP characters. class _NonBmpCodePageDecoder extends Converter, String> implements CodePageDecoder { @@ -326,6 +345,10 @@ class _NonBmpCodePageDecoder extends Converter, String> } return String.fromCharCodes(buffer); } + + @override + Sink> startChunkedConversion(Sink sink) => + _CodePageSink(sink, convert); } class _BmpCodePageDecoder extends Converter, String> @@ -360,6 +383,10 @@ class _BmpCodePageDecoder extends Converter, String> return String.fromCharCodes(codeUnits); } + @override + Sink> startChunkedConversion(Sink sink) => + _CodePageSink(sink, convert); + String _convertAllowInvalid(List bytes) { var count = bytes.length; var codeUnits = Uint16List(count); diff --git a/test/codepage_test.dart b/test/codepage_test.dart index c0fa45f..e729dc0 100644 --- a/test/codepage_test.dart +++ b/test/codepage_test.dart @@ -2,6 +2,8 @@ // for details. All rights reserved. Use of this source code is governed by a // BSD-style license that can be found in the LICENSE file. +import 'dart:convert'; +import 'dart:core'; import 'dart:typed_data'; import 'package:convert/convert.dart'; @@ -25,24 +27,52 @@ void main() { latinThai, latinArabic ]) { - test('${cp.name} codepage', () { - // All ASCII compatible. - for (var byte = 0x20; byte < 0x7f; byte++) { - expect(cp[byte], byte); - } - // Maps both directions. - for (var byte = 0; byte < 256; byte++) { - var char = cp[byte]; - if (char != 0xFFFD) { - var string = String.fromCharCode(char); - expect(cp.encode(string), [byte]); - expect(cp.decode([byte]), string); + group('${cp.name} codepage', () { + test('ascii compatible', () { + for (var byte = 0x20; byte < 0x7f; byte++) { + expect(cp[byte], byte); } - } - expect(() => cp.decode([0xfffd]), throwsA(isA())); - // Decode works like operator[]. - expect(cp.decode(bytes, allowInvalid: true), - String.fromCharCodes([for (var i = 0; i < 256; i++) cp[i]])); + }); + + test('bidirectional mapping', () { + // Maps both directions. + for (var byte = 0; byte < 256; byte++) { + var char = cp[byte]; + if (char != 0xFFFD) { + var string = String.fromCharCode(char); + expect(cp.encode(string), [byte]); + expect(cp.decode([byte]), string); + } + } + }); + + test('decode invalid characters not allowed', () { + expect(() => cp.decode([0xfffd]), throwsA(isA())); + }); + + test('decode invalid characters allowed', () { + // Decode works like operator[]. + expect(cp.decode(bytes, allowInvalid: true), + String.fromCharCodes([for (var i = 0; i < 256; i++) cp[i]])); + }); + + test('chunked conversion', () { + late final String decodedString; + final outputSink = StringConversionSink.withCallback( + (accumulated) => decodedString = accumulated); + final inputSink = cp.decoder.startChunkedConversion(outputSink); + final expected = StringBuffer(); + + for (var byte = 0; byte < 256; byte++) { + var char = cp[byte]; + if (char != 0xFFFD) { + inputSink.add([byte]); + expected.writeCharCode(char); + } + } + inputSink.close(); + expect(decodedString, expected.toString()); + }); }); } test('latin-2 roundtrip', () { @@ -62,14 +92,48 @@ void main() { expect(decoded, latin2text); }); - test('Custom code page', () { - var cp = CodePage('custom', "ABCDEF${"\uFFFD" * 250}"); - var result = cp.encode('BADCAFE'); - expect(result, [1, 0, 3, 2, 0, 5, 4]); - expect(() => cp.encode('GAD'), throwsFormatException); - expect(cp.encode('GAD', invalidCharacter: 0x3F), [0x3F, 0, 3]); - expect(cp.decode([1, 0, 3, 2, 0, 5, 4]), 'BADCAFE'); - expect(() => cp.decode([6, 1, 255]), throwsFormatException); - expect(cp.decode([6, 1, 255], allowInvalid: true), '\u{FFFD}B\u{FFFD}'); + group('Custom code page', () { + late final CodePage cp; + + setUpAll(() => cp = CodePage('custom', "ABCDEF${"\uFFFD" * 250}")); + + test('simple encode', () { + var result = cp.encode('BADCAFE'); + expect(result, [1, 0, 3, 2, 0, 5, 4]); + }); + + test('unencodable character', () { + expect(() => cp.encode('GAD'), throwsFormatException); + }); + + test('unencodable character with invalidCharacter', () { + expect(cp.encode('GAD', invalidCharacter: 0x3F), [0x3F, 0, 3]); + }); + + test('simple decode', () { + expect(cp.decode([1, 0, 3, 2, 0, 5, 4]), 'BADCAFE'); + }); + + test('undecode byte', () { + expect(() => cp.decode([6, 1, 255]), throwsFormatException); + }); + + test('undecode byte with allowInvalid', () { + expect(cp.decode([6, 1, 255], allowInvalid: true), '\u{FFFD}B\u{FFFD}'); + }); + + test('chunked conversion', () { + late final String decodedString; + final outputSink = StringConversionSink.withCallback( + (accumulated) => decodedString = accumulated); + final inputSink = cp.decoder.startChunkedConversion(outputSink); + inputSink + ..add([1]) + ..add([0]) + ..add([3]); + + inputSink.close(); + expect(decodedString, 'BAD'); + }); }); } From 97d0da31f2eb0d5c4d2714f0a8212916df46062d Mon Sep 17 00:00:00 2001 From: Brian Quinlan Date: Fri, 3 Nov 2023 16:22:25 -0700 Subject: [PATCH 2/5] Typo --- test/codepage_test.dart | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/test/codepage_test.dart b/test/codepage_test.dart index e729dc0..fcc2830 100644 --- a/test/codepage_test.dart +++ b/test/codepage_test.dart @@ -114,11 +114,11 @@ void main() { expect(cp.decode([1, 0, 3, 2, 0, 5, 4]), 'BADCAFE'); }); - test('undecode byte', () { + test('undecodable byte', () { expect(() => cp.decode([6, 1, 255]), throwsFormatException); }); - test('undecode byte with allowInvalid', () { + test('undecodable byte with allowInvalid', () { expect(cp.decode([6, 1, 255], allowInvalid: true), '\u{FFFD}B\u{FFFD}'); }); From eafd169d4fb9f64581202e9ee1e2ffffa578a061 Mon Sep 17 00:00:00 2001 From: Brian Quinlan Date: Sun, 5 Nov 2023 13:42:39 -0800 Subject: [PATCH 3/5] Review fixes --- lib/src/codepage.dart | 18 ++++++++++++------ test/codepage_test.dart | 23 ++++++++++++++++++++--- 2 files changed, 32 insertions(+), 9 deletions(-) diff --git a/lib/src/codepage.dart b/lib/src/codepage.dart index e541297..03c4194 100644 --- a/lib/src/codepage.dart +++ b/lib/src/codepage.dart @@ -277,19 +277,25 @@ CodePageDecoder _createDecoder(String characters) { return _NonBmpCodePageDecoder._(result); } -/// An input Sink for decoders where each input byte can be be considered -/// independantly. -class _CodePageSink implements Sink> { +/// An input [ByteConversionSink] for decoders where each input byte can be be +/// considered independantly. +class _CodePageDecoderSink implements ByteConversionSink { final Sink _output; final String Function(List input) _convert; - _CodePageSink(this._output, this._convert); + _CodePageDecoderSink(this._output, this._convert); @override void add(List chunk) { _output.add(_convert(chunk)); } + @override + void addSlice(List chunk, int start, int end, bool isLast) { + add(chunk.sublist(start, end)); + if (isLast) close(); + } + @override void close() { _output.close(); @@ -348,7 +354,7 @@ class _NonBmpCodePageDecoder extends Converter, String> @override Sink> startChunkedConversion(Sink sink) => - _CodePageSink(sink, convert); + _CodePageDecoderSink(sink, convert); } class _BmpCodePageDecoder extends Converter, String> @@ -385,7 +391,7 @@ class _BmpCodePageDecoder extends Converter, String> @override Sink> startChunkedConversion(Sink sink) => - _CodePageSink(sink, convert); + _CodePageDecoderSink(sink, convert); String _convertAllowInvalid(List bytes) { var count = bytes.length; diff --git a/test/codepage_test.dart b/test/codepage_test.dart index fcc2830..13c1563 100644 --- a/test/codepage_test.dart +++ b/test/codepage_test.dart @@ -127,13 +127,30 @@ void main() { final outputSink = StringConversionSink.withCallback( (accumulated) => decodedString = accumulated); final inputSink = cp.decoder.startChunkedConversion(outputSink); + inputSink ..add([1]) ..add([0]) - ..add([3]); - - inputSink.close(); + ..add([3]) + ..close(); expect(decodedString, 'BAD'); }); + + test('chunked conversion - byte conversion sink', () { + late final String decodedString; + final outputSink = StringConversionSink.withCallback( + (accumulated) => decodedString = accumulated); + final bytes = [1, 0, 3, 2, 0, 5, 4]; + + final inputSink = cp.decoder.startChunkedConversion(outputSink); + expect(inputSink, isA()); + + (inputSink as ByteConversionSink) + ..addSlice(bytes, 1, 3, false) + ..addSlice(bytes, 4, 5, false) + ..addSlice(bytes, 6, 6, true); + + expect(decodedString, 'ADA'); + }); }); } From 79b110391284faad6eb3bdee156ce1d00d73175c Mon Sep 17 00:00:00 2001 From: Brian Quinlan Date: Mon, 6 Nov 2023 03:41:42 -0800 Subject: [PATCH 4/5] Code review fixes #2 --- lib/src/codepage.dart | 18 ++++++------------ test/codepage_test.dart | 4 +--- 2 files changed, 7 insertions(+), 15 deletions(-) diff --git a/lib/src/codepage.dart b/lib/src/codepage.dart index 03c4194..c298ff5 100644 --- a/lib/src/codepage.dart +++ b/lib/src/codepage.dart @@ -279,21 +279,15 @@ CodePageDecoder _createDecoder(String characters) { /// An input [ByteConversionSink] for decoders where each input byte can be be /// considered independantly. -class _CodePageDecoderSink implements ByteConversionSink { +class _CodePageDecoderSink extends ByteConversionSink { final Sink _output; - final String Function(List input) _convert; + final Converter, String> _decoder; - _CodePageDecoderSink(this._output, this._convert); + _CodePageDecoderSink(this._output, this._decoder); @override void add(List chunk) { - _output.add(_convert(chunk)); - } - - @override - void addSlice(List chunk, int start, int end, bool isLast) { - add(chunk.sublist(start, end)); - if (isLast) close(); + _output.add(_decoder.convert(chunk)); } @override @@ -354,7 +348,7 @@ class _NonBmpCodePageDecoder extends Converter, String> @override Sink> startChunkedConversion(Sink sink) => - _CodePageDecoderSink(sink, convert); + _CodePageDecoderSink(sink, this); } class _BmpCodePageDecoder extends Converter, String> @@ -391,7 +385,7 @@ class _BmpCodePageDecoder extends Converter, String> @override Sink> startChunkedConversion(Sink sink) => - _CodePageDecoderSink(sink, convert); + _CodePageDecoderSink(sink, this); String _convertAllowInvalid(List bytes) { var count = bytes.length; diff --git a/test/codepage_test.dart b/test/codepage_test.dart index 13c1563..cca75e7 100644 --- a/test/codepage_test.dart +++ b/test/codepage_test.dart @@ -93,9 +93,7 @@ void main() { }); group('Custom code page', () { - late final CodePage cp; - - setUpAll(() => cp = CodePage('custom', "ABCDEF${"\uFFFD" * 250}")); + late final cp = CodePage('custom', "ABCDEF${"\uFFFD" * 250}"); test('simple encode', () { var result = cp.encode('BADCAFE'); From 4d85997927ee0ea8ca394b1ad836a7af73aced17 Mon Sep 17 00:00:00 2001 From: Brian Quinlan Date: Mon, 6 Nov 2023 06:13:29 -0800 Subject: [PATCH 5/5] Change minimum supported SDK version to 3.0 --- .github/workflows/test-package.yml | 2 +- CHANGELOG.md | 2 +- pubspec.yaml | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/test-package.yml b/.github/workflows/test-package.yml index f464e92..081bfb7 100644 --- a/.github/workflows/test-package.yml +++ b/.github/workflows/test-package.yml @@ -47,7 +47,7 @@ jobs: matrix: # Add macos-latest and/or windows-latest if relevant for this package. os: [ubuntu-latest] - sdk: [2.19.0, dev] + sdk: [3.0.0, dev] steps: - uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 - uses: dart-lang/setup-dart@b64355ae6ca0b5d484f0106a033dd1388965d06d diff --git a/CHANGELOG.md b/CHANGELOG.md index 180f3dd..925b1d6 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,6 +1,6 @@ ## 3.1.2-dev -- Require Dart 2.19 +- Require Dart 3.0 - Add chunked decoding support (`startChunkedConversion`) for `CodePage` encodings. diff --git a/pubspec.yaml b/pubspec.yaml index 5756232..77a7edb 100644 --- a/pubspec.yaml +++ b/pubspec.yaml @@ -6,7 +6,7 @@ description: >- repository: https://github.com/dart-lang/convert environment: - sdk: '>=2.19.0 <3.0.0' + sdk: '^3.0.0' dependencies: typed_data: ^1.3.0