diff --git a/global.json b/global.json new file mode 100644 index 0000000..eafb435 --- /dev/null +++ b/global.json @@ -0,0 +1,6 @@ +{ + "sdk": { + "rollForward": "latestMajor", + "allowPrerelease": false + } +} \ No newline at end of file diff --git a/src/libs/Directory.Build.props b/src/libs/Directory.Build.props index ab53f26..1678381 100644 --- a/src/libs/Directory.Build.props +++ b/src/libs/Directory.Build.props @@ -9,7 +9,7 @@ - 2.0.2 + 2.0.3 The fastest tokenizer for GPT-3.5 and GPT-4 inspired by Tiktoken. chatgpt;openai;tiktoken;tokens;gpt-4;gpt-3.5-turbo;cl100k_base;p50k_base true diff --git a/src/libs/Tiktoken.Core/Tiktoken.Core.csproj b/src/libs/Tiktoken.Core/Tiktoken.Core.csproj index 100834e..bc62e6c 100644 --- a/src/libs/Tiktoken.Core/Tiktoken.Core.csproj +++ b/src/libs/Tiktoken.Core/Tiktoken.Core.csproj @@ -1,7 +1,7 @@ - net4.6.1;netstandard2.0;netstandard2.1;net6.0;net7.0;net8.0 + net4.6.2;netstandard2.0;netstandard2.1;net6.0;net8.0 true $(NoWarn);CA1724 Tiktoken @@ -20,7 +20,7 @@ - + diff --git a/src/libs/Tiktoken.Encodings.Abstractions/EncodingLoader.cs b/src/libs/Tiktoken.Encodings.Abstractions/EncodingLoader.cs index 1556e56..86ed49f 100644 --- a/src/libs/Tiktoken.Encodings.Abstractions/EncodingLoader.cs +++ b/src/libs/Tiktoken.Encodings.Abstractions/EncodingLoader.cs @@ -31,8 +31,31 @@ public static Dictionary LoadEncodingFromManifestResource( throw new InvalidOperationException("Resource not found."); using var reader = new StreamReader(stream); - var dictionary = new Dictionary(new ByteArrayComparer()); + var lines = new List(); while (reader.ReadLine() is { } line) + { + lines.Add(line); + } + + return LoadEncodingFromLines(lines, name); + } + + /// + /// + /// + /// + /// + /// + /// + /// + public static Dictionary LoadEncodingFromLines( + this IReadOnlyList lines, + string name) + { + lines = lines ?? throw new ArgumentNullException(nameof(lines)); + + var dictionary = new Dictionary(new ByteArrayComparer()); + foreach (var line in lines) { if (string.IsNullOrWhiteSpace(line)) { diff --git a/src/libs/Tiktoken.Encodings.Abstractions/Tiktoken.Encodings.Abstractions.csproj b/src/libs/Tiktoken.Encodings.Abstractions/Tiktoken.Encodings.Abstractions.csproj index 9525091..5cc2c60 100644 --- a/src/libs/Tiktoken.Encodings.Abstractions/Tiktoken.Encodings.Abstractions.csproj +++ b/src/libs/Tiktoken.Encodings.Abstractions/Tiktoken.Encodings.Abstractions.csproj @@ -1,7 +1,7 @@ - net4.6.1;netstandard2.0;netstandard2.1;net6.0;net7.0;net8.0 + net4.6.2;netstandard2.0;netstandard2.1;net6.0;net8.0 Tiktoken.Encodings diff --git a/src/libs/Tiktoken.Encodings.cl100k/Tiktoken.Encodings.cl100k.csproj b/src/libs/Tiktoken.Encodings.cl100k/Tiktoken.Encodings.cl100k.csproj index eb940e8..50297c5 100644 --- a/src/libs/Tiktoken.Encodings.cl100k/Tiktoken.Encodings.cl100k.csproj +++ b/src/libs/Tiktoken.Encodings.cl100k/Tiktoken.Encodings.cl100k.csproj @@ -1,7 +1,7 @@ - net4.6.1;netstandard2.0;netstandard2.1;net6.0;net7.0;net8.0 + net4.6.2;netstandard2.0;netstandard2.1;net6.0;net8.0 Tiktoken.Encodings diff --git a/src/libs/Tiktoken.Encodings.o200k/Tiktoken.Encodings.o200k.csproj b/src/libs/Tiktoken.Encodings.o200k/Tiktoken.Encodings.o200k.csproj index 87661b5..fc4db45 100644 --- a/src/libs/Tiktoken.Encodings.o200k/Tiktoken.Encodings.o200k.csproj +++ b/src/libs/Tiktoken.Encodings.o200k/Tiktoken.Encodings.o200k.csproj @@ -1,7 +1,7 @@ - net4.6.1;netstandard2.0;netstandard2.1;net6.0;net7.0;net8.0 + net4.6.2;netstandard2.0;netstandard2.1;net6.0;net8.0 Tiktoken.Encodings diff --git a/src/libs/Tiktoken.Encodings.p50k/Tiktoken.Encodings.p50k.csproj b/src/libs/Tiktoken.Encodings.p50k/Tiktoken.Encodings.p50k.csproj index 87661b5..fc4db45 100644 --- a/src/libs/Tiktoken.Encodings.p50k/Tiktoken.Encodings.p50k.csproj +++ b/src/libs/Tiktoken.Encodings.p50k/Tiktoken.Encodings.p50k.csproj @@ -1,7 +1,7 @@ - net4.6.1;netstandard2.0;netstandard2.1;net6.0;net7.0;net8.0 + net4.6.2;netstandard2.0;netstandard2.1;net6.0;net8.0 Tiktoken.Encodings diff --git a/src/libs/Tiktoken.Encodings.r50k/Tiktoken.Encodings.r50k.csproj b/src/libs/Tiktoken.Encodings.r50k/Tiktoken.Encodings.r50k.csproj index 87661b5..fc4db45 100644 --- a/src/libs/Tiktoken.Encodings.r50k/Tiktoken.Encodings.r50k.csproj +++ b/src/libs/Tiktoken.Encodings.r50k/Tiktoken.Encodings.r50k.csproj @@ -1,7 +1,7 @@ - net4.6.1;netstandard2.0;netstandard2.1;net6.0;net7.0;net8.0 + net4.6.2;netstandard2.0;netstandard2.1;net6.0;net8.0 Tiktoken.Encodings diff --git a/src/libs/Tiktoken/Tiktoken.csproj b/src/libs/Tiktoken/Tiktoken.csproj index 4938622..abf4181 100644 --- a/src/libs/Tiktoken/Tiktoken.csproj +++ b/src/libs/Tiktoken/Tiktoken.csproj @@ -1,7 +1,7 @@ - net4.6.1;netstandard2.0;netstandard2.1;net6.0;net7.0;net8.0 + net4.6.2;netstandard2.0;netstandard2.1;net6.0;net8.0