From 58b20532fbdac0e54ed0d4d4e27602c1fdeb2e1e Mon Sep 17 00:00:00 2001 From: HavenDV Date: Mon, 19 Feb 2024 01:06:37 +0400 Subject: [PATCH] feat: Released 1.2.0. --- README.md | 70 +++++++++---------- benchmarks/1.2.0.0_encode.md | 41 +++++++++++ src/Directory.Packages.props | 24 ++++--- .../Tiktoken.Benchmarks/Benchmarks.cs | 12 ++-- .../Tiktoken.Benchmarks.csproj | 2 +- src/libs/Directory.Build.props | 2 +- src/libs/Tiktoken/CoreBPE.cs | 10 +-- src/libs/Tiktoken/Tiktoken.csproj | 2 +- src/libs/Tiktoken/UtfToken.cs | 26 ++++--- .../Tiktoken/Utilities/ByteArrayComparer.cs | 13 +--- .../Tiktoken.UnitTests.csproj | 2 +- 11 files changed, 126 insertions(+), 78 deletions(-) create mode 100644 benchmarks/1.2.0.0_encode.md diff --git a/README.md b/README.md index 668950c..b520e8a 100644 --- a/README.md +++ b/README.md @@ -34,45 +34,45 @@ You can view the reports for each version [here](benchmarks) ``` -BenchmarkDotNet v0.13.7, macOS Ventura 13.5.1 (22G90) [Darwin 22.6.0] +BenchmarkDotNet v0.13.12, macOS Sonoma 14.2.1 (23C71) [Darwin 23.2.0] Apple M1 Pro, 1 CPU, 10 logical and 10 physical cores -.NET SDK 7.0.400 - [Host] : .NET 7.0.10 (7.0.1023.36312), Arm64 RyuJIT AdvSIMD - DefaultJob : .NET 7.0.10 (7.0.1023.36312), Arm64 RyuJIT AdvSIMD +.NET SDK 8.0.100 + [Host] : .NET 8.0.0 (8.0.23.53103), Arm64 RyuJIT AdvSIMD + DefaultJob : .NET 8.0.0 (8.0.23.53103), Arm64 RyuJIT AdvSIMD ``` -| Method | Categories | Data | Mean | Ratio | Gen0 | Gen1 | Allocated | Alloc Ratio | -|--------------------------- |------------ |-------------------- |---------------:|------:|---------:|---------:|----------:|------------:| -| **SharpTokenV1_2_8_** | **CountTokens** | **1. (...)57. [19866]** | **1,450,007.0 ns** | **1.00** | **292.9688** | **146.4844** | **1846187 B** | **1.00** | -| TiktokenSharpV1_0_6_ | CountTokens | 1. (...)57. [19866] | 977,818.9 ns | 0.67 | 250.0000 | 125.0000 | 1571155 B | 0.85 | -| TokenizerLibV1_3_2_ | CountTokens | 1. (...)57. [19866] | 854,357.2 ns | 0.59 | 246.0938 | 85.9375 | 1547673 B | 0.84 | -| Tiktoken_ | CountTokens | 1. (...)57. [19866] | 355,029.1 ns | 0.24 | 49.3164 | - | 309449 B | 0.17 | -| | | | | | | | | | -| **SharpTokenV1_2_8_** | **CountTokens** | **Hello, World!** | **1,722.2 ns** | **1.00** | **0.5264** | **-** | **3304 B** | **1.00** | -| TiktokenSharpV1_0_6_ | CountTokens | Hello, World! | 6,291.2 ns | 3.65 | 2.1820 | 0.0305 | 13728 B | 4.15 | -| TokenizerLibV1_3_2_ | CountTokens | Hello, World! | 604.0 ns | 0.35 | 0.2356 | - | 1480 B | 0.45 | -| Tiktoken_ | CountTokens | Hello, World! | 247.0 ns | 0.14 | 0.0420 | - | 264 B | 0.08 | -| | | | | | | | | | -| **SharpTokenV1_2_8_** | **CountTokens** | **King(...)edy. [275]** | **15,377.1 ns** | **1.00** | **4.1199** | **0.1526** | **26008 B** | **1.00** | -| TiktokenSharpV1_0_6_ | CountTokens | King(...)edy. [275] | 14,758.1 ns | 0.96 | 5.1117 | 0.1526 | 32096 B | 1.23 | -| TokenizerLibV1_3_2_ | CountTokens | King(...)edy. [275] | 8,366.9 ns | 0.54 | 3.0823 | 0.1373 | 19344 B | 0.74 | -| Tiktoken_ | CountTokens | King(...)edy. [275] | 3,838.6 ns | 0.25 | 0.6409 | - | 4032 B | 0.16 | -| | | | | | | | | | -| **SharpTokenV1_2_8_Encode** | **Encode** | **1. (...)57. [19866]** | **1,393,026.6 ns** | **1.00** | **292.9688** | **146.4844** | **1846187 B** | **1.00** | -| TiktokenSharpV1_0_6_Encode | Encode | 1. (...)57. [19866] | 1,246,776.8 ns | 0.90 | 250.0000 | 125.0000 | 1571155 B | 0.85 | -| TokenizerLibV1_3_2_Encode | Encode | 1. (...)57. [19866] | 852,519.6 ns | 0.61 | 246.0938 | 85.9375 | 1547673 B | 0.84 | -| Tiktoken_Encode | Encode | 1. (...)57. [19866] | 378,546.7 ns | 0.27 | 59.5703 | 2.4414 | 375665 B | 0.20 | -| | | | | | | | | | -| **SharpTokenV1_2_8_Encode** | **Encode** | **Hello, World!** | **1,719.3 ns** | **1.00** | **0.5264** | **-** | **3304 B** | **1.00** | -| TiktokenSharpV1_0_6_Encode | Encode | Hello, World! | 6,293.3 ns | 3.66 | 2.1820 | 0.0305 | 13728 B | 4.15 | -| TokenizerLibV1_3_2_Encode | Encode | Hello, World! | 607.6 ns | 0.35 | 0.2356 | - | 1480 B | 0.45 | -| Tiktoken_Encode | Encode | Hello, World! | 320.6 ns | 0.19 | 0.1135 | - | 712 B | 0.22 | -| | | | | | | | | | -| **SharpTokenV1_2_8_Encode** | **Encode** | **King(...)edy. [275]** | **15,444.0 ns** | **1.00** | **4.1199** | **0.1526** | **26008 B** | **1.00** | -| TiktokenSharpV1_0_6_Encode | Encode | King(...)edy. [275] | 14,704.0 ns | 0.95 | 5.1117 | 0.1526 | 32096 B | 1.23 | -| TokenizerLibV1_3_2_Encode | Encode | King(...)edy. [275] | 8,556.8 ns | 0.55 | 3.0823 | 0.1373 | 19344 B | 0.74 | -| Tiktoken_Encode | Encode | King(...)edy. [275] | 4,136.4 ns | 0.27 | 0.8011 | - | 5056 B | 0.19 | +| Method | Categories | Data | Mean | Median | Ratio | Gen0 | Gen1 | Gen2 | Allocated | Alloc Ratio | +|--------------------------- |------------ |-------------------- |---------------:|---------------:|------:|---------:|---------:|-------:|----------:|------------:| +| **SharpTokenV1_2_16_** | **CountTokens** | **1. (...)57. [19866]** | **1,554,552.0 ns** | **1,552,769.4 ns** | **1.00** | **292.9688** | **146.4844** | **-** | **1846147 B** | **1.00** | +| TiktokenSharpV1_0_9_ | CountTokens | 1. (...)57. [19866] | 1,242,157.7 ns | 1,241,657.7 ns | 0.80 | 253.9063 | 117.1875 | 3.9063 | 1570786 B | 0.85 | +| TokenizerLibV1_3_3_ | CountTokens | 1. (...)57. [19866] | 815,490.5 ns | 806,761.4 ns | 0.52 | 247.0703 | 98.6328 | 0.9766 | 1547678 B | 0.84 | +| Tiktoken_ | CountTokens | 1. (...)57. [19866] | 311,744.2 ns | 311,591.0 ns | 0.20 | 49.3164 | - | - | 309449 B | 0.17 | +| | | | | | | | | | | | +| **SharpTokenV1_2_16_** | **CountTokens** | **Hello, World!** | **1,585.8 ns** | **1,586.5 ns** | **1.00** | **0.5188** | **0.0019** | **-** | **3264 B** | **1.00** | +| TiktokenSharpV1_0_9_ | CountTokens | Hello, World! | 5,806.8 ns | 5,805.7 ns | 3.66 | 2.1286 | 0.0381 | 0.0076 | 13344 B | 4.09 | +| TokenizerLibV1_3_3_ | CountTokens | Hello, World! | 766.2 ns | 766.7 ns | 0.48 | 0.2356 | - | - | 1480 B | 0.45 | +| Tiktoken_ | CountTokens | Hello, World! | 210.9 ns | 210.2 ns | 0.13 | 0.0420 | - | - | 264 B | 0.08 | +| | | | | | | | | | | | +| **SharpTokenV1_2_16_** | **CountTokens** | **King(...)edy. [275]** | **13,851.9 ns** | **13,808.5 ns** | **1.00** | **4.1351** | **0.0153** | **-** | **25968 B** | **1.00** | +| TiktokenSharpV1_0_9_ | CountTokens | King(...)edy. [275] | 13,387.6 ns | 13,395.3 ns | 0.97 | 5.0659 | 0.1984 | 0.0153 | 31712 B | 1.22 | +| TokenizerLibV1_3_3_ | CountTokens | King(...)edy. [275] | 10,861.4 ns | 10,865.2 ns | 0.78 | 3.0975 | 0.1526 | 0.0153 | 19344 B | 0.74 | +| Tiktoken_ | CountTokens | King(...)edy. [275] | 3,162.3 ns | 3,162.0 ns | 0.23 | 0.6447 | - | - | 4064 B | 0.16 | +| | | | | | | | | | | | +| **SharpTokenV1_2_16_Encode** | **Encode** | **1. (...)57. [19866]** | **1,327,775.1 ns** | **1,330,166.1 ns** | **1.00** | **294.9219** | **142.5781** | **1.9531** | **1846151 B** | **1.00** | +| TiktokenSharpV1_0_9_Encode | Encode | 1. (...)57. [19866] | 1,016,985.4 ns | 994,095.3 ns | 0.80 | 250.0000 | 125.0000 | - | 1570772 B | 0.85 | +| TokenizerLibV1_3_3_Encode | Encode | 1. (...)57. [19866] | 804,657.4 ns | 803,549.7 ns | 0.61 | 247.0703 | 108.3984 | 0.9766 | 1547678 B | 0.84 | +| Tiktoken_Encode | Encode | 1. (...)57. [19866] | 331,107.8 ns | 331,142.1 ns | 0.25 | 59.5703 | 2.4414 | - | 375601 B | 0.20 | +| | | | | | | | | | | | +| **SharpTokenV1_2_16_Encode** | **Encode** | **Hello, World!** | **1,891.1 ns** | **1,894.6 ns** | **1.00** | **0.5188** | **0.0019** | **-** | **3264 B** | **1.00** | +| TiktokenSharpV1_0_9_Encode | Encode | Hello, World! | 5,816.9 ns | 5,824.0 ns | 3.08 | 2.1210 | 0.0381 | - | 13344 B | 4.09 | +| TokenizerLibV1_3_3_Encode | Encode | Hello, World! | 496.7 ns | 496.8 ns | 0.26 | 0.2356 | - | - | 1480 B | 0.45 | +| Tiktoken_Encode | Encode | Hello, World! | 265.3 ns | 264.7 ns | 0.14 | 0.1030 | - | - | 648 B | 0.20 | +| | | | | | | | | | | | +| **SharpTokenV1_2_16_Encode** | **Encode** | **King(...)edy. [275]** | **17,497.7 ns** | **17,480.3 ns** | **1.00** | **4.1199** | **0.0305** | **-** | **25968 B** | **1.00** | +| TiktokenSharpV1_0_9_Encode | Encode | King(...)edy. [275] | 13,374.0 ns | 13,348.4 ns | 0.76 | 5.0659 | 0.1984 | 0.0153 | 31712 B | 1.22 | +| TokenizerLibV1_3_3_Encode | Encode | King(...)edy. [275] | 7,333.9 ns | 7,338.7 ns | 0.42 | 3.0899 | 0.1450 | 0.0076 | 19344 B | 0.74 | +| Tiktoken_Encode | Encode | King(...)edy. [275] | 3,450.2 ns | 3,452.9 ns | 0.20 | 0.7973 | - | - | 5024 B | 0.19 | diff --git a/benchmarks/1.2.0.0_encode.md b/benchmarks/1.2.0.0_encode.md new file mode 100644 index 0000000..8d45e5d --- /dev/null +++ b/benchmarks/1.2.0.0_encode.md @@ -0,0 +1,41 @@ +``` + +BenchmarkDotNet v0.13.12, macOS Sonoma 14.2.1 (23C71) [Darwin 23.2.0] +Apple M1 Pro, 1 CPU, 10 logical and 10 physical cores +.NET SDK 8.0.100 + [Host] : .NET 8.0.0 (8.0.23.53103), Arm64 RyuJIT AdvSIMD + DefaultJob : .NET 8.0.0 (8.0.23.53103), Arm64 RyuJIT AdvSIMD + + +``` +| Method | Categories | Data | Mean | Median | Ratio | Gen0 | Gen1 | Gen2 | Allocated | Alloc Ratio | +|--------------------------- |------------ |-------------------- |---------------:|---------------:|------:|---------:|---------:|-------:|----------:|------------:| +| **SharpTokenV1_2_16_** | **CountTokens** | **1. (...)57. [19866]** | **1,554,552.0 ns** | **1,552,769.4 ns** | **1.00** | **292.9688** | **146.4844** | **-** | **1846147 B** | **1.00** | +| TiktokenSharpV1_0_9_ | CountTokens | 1. (...)57. [19866] | 1,242,157.7 ns | 1,241,657.7 ns | 0.80 | 253.9063 | 117.1875 | 3.9063 | 1570786 B | 0.85 | +| TokenizerLibV1_3_3_ | CountTokens | 1. (...)57. [19866] | 815,490.5 ns | 806,761.4 ns | 0.52 | 247.0703 | 98.6328 | 0.9766 | 1547678 B | 0.84 | +| Tiktoken_ | CountTokens | 1. (...)57. [19866] | 311,744.2 ns | 311,591.0 ns | 0.20 | 49.3164 | - | - | 309449 B | 0.17 | +| | | | | | | | | | | | +| **SharpTokenV1_2_16_** | **CountTokens** | **Hello, World!** | **1,585.8 ns** | **1,586.5 ns** | **1.00** | **0.5188** | **0.0019** | **-** | **3264 B** | **1.00** | +| TiktokenSharpV1_0_9_ | CountTokens | Hello, World! | 5,806.8 ns | 5,805.7 ns | 3.66 | 2.1286 | 0.0381 | 0.0076 | 13344 B | 4.09 | +| TokenizerLibV1_3_3_ | CountTokens | Hello, World! | 766.2 ns | 766.7 ns | 0.48 | 0.2356 | - | - | 1480 B | 0.45 | +| Tiktoken_ | CountTokens | Hello, World! | 210.9 ns | 210.2 ns | 0.13 | 0.0420 | - | - | 264 B | 0.08 | +| | | | | | | | | | | | +| **SharpTokenV1_2_16_** | **CountTokens** | **King(...)edy. [275]** | **13,851.9 ns** | **13,808.5 ns** | **1.00** | **4.1351** | **0.0153** | **-** | **25968 B** | **1.00** | +| TiktokenSharpV1_0_9_ | CountTokens | King(...)edy. [275] | 13,387.6 ns | 13,395.3 ns | 0.97 | 5.0659 | 0.1984 | 0.0153 | 31712 B | 1.22 | +| TokenizerLibV1_3_3_ | CountTokens | King(...)edy. [275] | 10,861.4 ns | 10,865.2 ns | 0.78 | 3.0975 | 0.1526 | 0.0153 | 19344 B | 0.74 | +| Tiktoken_ | CountTokens | King(...)edy. [275] | 3,162.3 ns | 3,162.0 ns | 0.23 | 0.6447 | - | - | 4064 B | 0.16 | +| | | | | | | | | | | | +| **SharpTokenV1_2_16_Encode** | **Encode** | **1. (...)57. [19866]** | **1,327,775.1 ns** | **1,330,166.1 ns** | **1.00** | **294.9219** | **142.5781** | **1.9531** | **1846151 B** | **1.00** | +| TiktokenSharpV1_0_9_Encode | Encode | 1. (...)57. [19866] | 1,016,985.4 ns | 994,095.3 ns | 0.80 | 250.0000 | 125.0000 | - | 1570772 B | 0.85 | +| TokenizerLibV1_3_3_Encode | Encode | 1. (...)57. [19866] | 804,657.4 ns | 803,549.7 ns | 0.61 | 247.0703 | 108.3984 | 0.9766 | 1547678 B | 0.84 | +| Tiktoken_Encode | Encode | 1. (...)57. [19866] | 331,107.8 ns | 331,142.1 ns | 0.25 | 59.5703 | 2.4414 | - | 375601 B | 0.20 | +| | | | | | | | | | | | +| **SharpTokenV1_2_16_Encode** | **Encode** | **Hello, World!** | **1,891.1 ns** | **1,894.6 ns** | **1.00** | **0.5188** | **0.0019** | **-** | **3264 B** | **1.00** | +| TiktokenSharpV1_0_9_Encode | Encode | Hello, World! | 5,816.9 ns | 5,824.0 ns | 3.08 | 2.1210 | 0.0381 | - | 13344 B | 4.09 | +| TokenizerLibV1_3_3_Encode | Encode | Hello, World! | 496.7 ns | 496.8 ns | 0.26 | 0.2356 | - | - | 1480 B | 0.45 | +| Tiktoken_Encode | Encode | Hello, World! | 265.3 ns | 264.7 ns | 0.14 | 0.1030 | - | - | 648 B | 0.20 | +| | | | | | | | | | | | +| **SharpTokenV1_2_16_Encode** | **Encode** | **King(...)edy. [275]** | **17,497.7 ns** | **17,480.3 ns** | **1.00** | **4.1199** | **0.0305** | **-** | **25968 B** | **1.00** | +| TiktokenSharpV1_0_9_Encode | Encode | King(...)edy. [275] | 13,374.0 ns | 13,348.4 ns | 0.76 | 5.0659 | 0.1984 | 0.0153 | 31712 B | 1.22 | +| TokenizerLibV1_3_3_Encode | Encode | King(...)edy. [275] | 7,333.9 ns | 7,338.7 ns | 0.42 | 3.0899 | 0.1450 | 0.0076 | 19344 B | 0.74 | +| Tiktoken_Encode | Encode | King(...)edy. [275] | 3,450.2 ns | 3,452.9 ns | 0.20 | 0.7973 | - | - | 5024 B | 0.19 | diff --git a/src/Directory.Packages.props b/src/Directory.Packages.props index d1019f8..00fbc61 100644 --- a/src/Directory.Packages.props +++ b/src/Directory.Packages.props @@ -3,19 +3,25 @@ true - + - - - - - - - + + all + runtime; build; native; contentfiles; analyzers; buildtransitive + + + + + + + all + runtime; build; native; contentfiles; analyzers; buildtransitive + + - + \ No newline at end of file diff --git a/src/benchmarks/Tiktoken.Benchmarks/Benchmarks.cs b/src/benchmarks/Tiktoken.Benchmarks/Benchmarks.cs index ec4d35e..515ba5c 100644 --- a/src/benchmarks/Tiktoken.Benchmarks/Benchmarks.cs +++ b/src/benchmarks/Tiktoken.Benchmarks/Benchmarks.cs @@ -30,15 +30,15 @@ public async Task GlobalSetup() [Benchmark(Baseline = true)] [BenchmarkCategory("Encode")] - public List SharpTokenV1_2_8_Encode() => _sharpToken.Encode(Data); + public List SharpTokenV1_2_16_Encode() => _sharpToken.Encode(Data); [Benchmark] [BenchmarkCategory("Encode")] - public List TiktokenSharpV1_0_6_Encode() => _tiktokenSharp.Encode(Data); + public List TiktokenSharpV1_0_9_Encode() => _tiktokenSharp.Encode(Data); [Benchmark] [BenchmarkCategory("Encode")] - public IReadOnlyCollection TokenizerLibV1_3_2_Encode() => _tokenizerLib!.Encode(Data, ArraySegment.Empty); + public IReadOnlyCollection TokenizerLibV1_3_3_Encode() => _tokenizerLib!.Encode(Data, ArraySegment.Empty); [Benchmark] [BenchmarkCategory("Encode")] @@ -47,15 +47,15 @@ public async Task GlobalSetup() [Benchmark(Baseline = true)] [BenchmarkCategory("CountTokens")] - public int SharpTokenV1_2_8_() => _sharpToken.Encode(Data).Count; + public int SharpTokenV1_2_16_() => _sharpToken.Encode(Data).Count; [Benchmark] [BenchmarkCategory("CountTokens")] - public int TiktokenSharpV1_0_6_() => _tiktokenSharp.Encode(Data).Count; + public int TiktokenSharpV1_0_9_() => _tiktokenSharp.Encode(Data).Count; [Benchmark] [BenchmarkCategory("CountTokens")] - public int TokenizerLibV1_3_2_() => _tokenizerLib!.Encode(Data, ArraySegment.Empty).Count; + public int TokenizerLibV1_3_3_() => _tokenizerLib!.Encode(Data, ArraySegment.Empty).Count; [Benchmark] [BenchmarkCategory("CountTokens")] diff --git a/src/benchmarks/Tiktoken.Benchmarks/Tiktoken.Benchmarks.csproj b/src/benchmarks/Tiktoken.Benchmarks/Tiktoken.Benchmarks.csproj index 110a673..2d18b20 100644 --- a/src/benchmarks/Tiktoken.Benchmarks/Tiktoken.Benchmarks.csproj +++ b/src/benchmarks/Tiktoken.Benchmarks/Tiktoken.Benchmarks.csproj @@ -2,7 +2,7 @@ Exe - net7.0 + net8.0 $(NoWarn);CS8002 diff --git a/src/libs/Directory.Build.props b/src/libs/Directory.Build.props index a766c68..1b61b37 100644 --- a/src/libs/Directory.Build.props +++ b/src/libs/Directory.Build.props @@ -9,7 +9,7 @@ - 1.1.3 + 1.2.0 true true tryAGI, aiqinxuancai and contributors diff --git a/src/libs/Tiktoken/CoreBPE.cs b/src/libs/Tiktoken/CoreBPE.cs index a0b3c2f..aee6df1 100644 --- a/src/libs/Tiktoken/CoreBPE.cs +++ b/src/libs/Tiktoken/CoreBPE.cs @@ -15,19 +15,19 @@ public class CoreBpe { private IReadOnlyDictionary SpecialTokensEncoder { get; set; } private IReadOnlyDictionary Encoder { get; set; } - private IReadOnlyDictionary FastEncoder { get; set; } + private Dictionary FastEncoder { get; set; } internal bool EnableCache { get; set; } = true; - private IDictionary> FastCache { get; set; } = + private ConcurrentDictionary> FastCache { get; set; } = new ConcurrentDictionary>(); - private IDictionary FastCacheCounts { get; set; } = + private ConcurrentDictionary FastCacheCounts { get; set; } = new ConcurrentDictionary(); private Regex SpecialRegex { get; set; } private Regex Regex { get; set; } - private IReadOnlyDictionary Decoder { get; set; } - private IReadOnlyDictionary SpecialTokensDecoder { get; set; } + private Dictionary Decoder { get; set; } + private Dictionary SpecialTokensDecoder { get; set; } /// /// diff --git a/src/libs/Tiktoken/Tiktoken.csproj b/src/libs/Tiktoken/Tiktoken.csproj index 0c8211a..b5f3b4b 100644 --- a/src/libs/Tiktoken/Tiktoken.csproj +++ b/src/libs/Tiktoken/Tiktoken.csproj @@ -1,7 +1,7 @@ - net4.6.1;netstandard2.0;netstandard2.1;net6.0;net7.0 + net4.6.1;netstandard2.0;netstandard2.1;net6.0;net7.0;net8.0 true $(NoWarn);CA1724 diff --git a/src/libs/Tiktoken/UtfToken.cs b/src/libs/Tiktoken/UtfToken.cs index e716133..08009a1 100644 --- a/src/libs/Tiktoken/UtfToken.cs +++ b/src/libs/Tiktoken/UtfToken.cs @@ -1,13 +1,21 @@ namespace Tiktoken; -public class UtfToken +/// +/// +/// +/// +/// +public class UtfToken( + string token, + int encodedTokens) { - public string Token { get; private set; } - public int EncodedTokens { get; internal set; } - - public UtfToken(string token, int encodedTokens) - { - Token = token; - EncodedTokens = encodedTokens; - } + /// + /// + /// + public string Token { get; private set; } = token; + + /// + /// + /// + public int EncodedTokens { get; internal set; } = encodedTokens; } \ No newline at end of file diff --git a/src/libs/Tiktoken/Utilities/ByteArrayComparer.cs b/src/libs/Tiktoken/Utilities/ByteArrayComparer.cs index 06fa7f2..8274c27 100644 --- a/src/libs/Tiktoken/Utilities/ByteArrayComparer.cs +++ b/src/libs/Tiktoken/Utilities/ByteArrayComparer.cs @@ -39,15 +39,8 @@ public bool Equals(byte[]? x, byte[]? y) /// public int GetHashCode(byte[] obj) { - if (obj == null) - { - throw new ArgumentNullException(nameof(obj)); - } - int hash = 17; - foreach (byte b in obj) - { - hash = hash * 31 + b; - } - return hash; + obj = obj ?? throw new ArgumentNullException(nameof(obj)); + + return obj.Aggregate(17, (current, b) => current * 31 + b); } } \ No newline at end of file diff --git a/src/tests/Tiktoken.UnitTests/Tiktoken.UnitTests.csproj b/src/tests/Tiktoken.UnitTests/Tiktoken.UnitTests.csproj index 3523169..05b7463 100644 --- a/src/tests/Tiktoken.UnitTests/Tiktoken.UnitTests.csproj +++ b/src/tests/Tiktoken.UnitTests/Tiktoken.UnitTests.csproj @@ -1,7 +1,7 @@ - net7.0 + net8.0