diff --git a/README.md b/README.md index e7a58dc..86f9298 100644 --- a/README.md +++ b/README.md @@ -18,10 +18,9 @@ We will be happy to accept any PR. ### Usage ```csharp -using Tiktoken.Encodings; using Tiktoken; -var encoder = Encoders.ForModel("gpt-4o"); // or explicitly new Encoder(new O200KBase()) +var encoder = ModelToEncoder.For("gpt-4o"); // or explicitly using new Encoder(new O200KBase()) var tokens = encoder.Encode("hello world"); // [15339, 1917] var text = encoder.Decode(tokens); // hello world var numberOfTokens = encoder.CountTokens(text); // 2 diff --git a/src/libs/Directory.Build.props b/src/libs/Directory.Build.props index a882e96..ab53f26 100644 --- a/src/libs/Directory.Build.props +++ b/src/libs/Directory.Build.props @@ -9,7 +9,7 @@ - 2.0.1 + 2.0.2 The fastest tokenizer for GPT-3.5 and GPT-4 inspired by Tiktoken. chatgpt;openai;tiktoken;tokens;gpt-4;gpt-3.5-turbo;cl100k_base;p50k_base true diff --git a/src/libs/Tiktoken/ModelToEncoder.cs b/src/libs/Tiktoken/ModelToEncoder.cs new file mode 100644 index 0000000..654c4fd --- /dev/null +++ b/src/libs/Tiktoken/ModelToEncoder.cs @@ -0,0 +1,31 @@ +namespace Tiktoken; + +/// +/// +/// +public static class ModelToEncoder +{ + /// + /// Returns encoder by model name. + /// + /// gpt-3.5-turbo + /// + public static Encoder For(string modelName) + { + return new Encoder(ModelToEncoding.For(modelName)); + } + + /// + /// Returns encoder by model name or null. + /// + /// gpt-3.5-turbo + /// + public static Encoder? TryFor(string modelName) + { + var encoding = ModelToEncoding.TryFor(modelName); + + return encoding == null + ? null + : new Encoder(encoding); + } +} \ No newline at end of file diff --git a/src/libs/Tiktoken/Encoders.cs b/src/libs/Tiktoken/ModelToEncoding.cs similarity index 54% rename from src/libs/Tiktoken/Encoders.cs rename to src/libs/Tiktoken/ModelToEncoding.cs index 9662248..e3aee22 100644 --- a/src/libs/Tiktoken/Encoders.cs +++ b/src/libs/Tiktoken/ModelToEncoding.cs @@ -5,33 +5,9 @@ namespace Tiktoken; /// /// /// -public static class Encoders +public static class ModelToEncoding { - /// - /// Returns encoder by model name. - /// - /// gpt-3.5-turbo - /// - public static Encoder ForModel(string modelName) - { - return new Encoder(GetEncodingByModel(modelName)); - } - - /// - /// Returns encoder by model name or null. - /// - /// gpt-3.5-turbo - /// - public static Encoder? TryForModel(string modelName) - { - var encoding = TryGetEncodingByModel(modelName); - - return encoding == null - ? null - : new Encoder(encoding); - } - - private static Dictionary ModelToEncoding { get; } = new() + private static Dictionary Dictionary { get; } = new() { // chat { "gpt-4o", new O200KBase() }, @@ -51,9 +27,9 @@ public static Encoder ForModel(string modelName) /// gpt-4 gpt-3.5-turbo ... /// /// - public static Encoding? TryGetEncodingByModel(string modelName) + public static Encoding? TryFor(string modelName) { - return ModelToEncoding + return Dictionary .FirstOrDefault(a => modelName.StartsWith(a.Key, StringComparison.Ordinal)).Value; } @@ -63,9 +39,9 @@ public static Encoder ForModel(string modelName) /// gpt-4 gpt-3.5-turbo ... /// /// - public static Encoding GetEncodingByModel(string modelName) + public static Encoding For(string modelName) { - return TryGetEncodingByModel(modelName) ?? + return TryFor(modelName) ?? throw new ArgumentException($"Model name {modelName} is not supported."); } } \ No newline at end of file