diff --git a/crates/bpe-openai/Cargo.toml b/crates/bpe-openai/Cargo.toml
new file mode 100644
index 0000000..2975731
--- /dev/null
+++ b/crates/bpe-openai/Cargo.toml
@@ -0,0 +1,24 @@
+[package]
+name = "bpe-openai"
+version = "0.1.0"
+edition = "2021"
+description = "Prebuilt fast byte-pair encoders for OpenAI."
+repository = "https://github.com/github/rust-gems"
+license = "MIT"
+keywords = ["tokenizer", "algorithm", "encoding", "bpe"]
+categories = ["algorithms", "data-structures", "encoding", "science"]
+
+[lib]
+crate-type = ["lib", "staticlib"]
+bench = false
+
+[dependencies]
+bpe = { version = "0.1.0", path = "../bpe" }
+rmp-serde = "1"
+serde = { version = "1" }
+
+[build-dependencies]
+bpe = { version = "0.1.0", path = "../bpe", features = ["tiktoken-rs"] }
+rmp-serde = "1"
+tiktoken-rs = { version = "0.5" }
+serde = { version = "1" }
diff --git a/crates/bpe-openai/README.md b/crates/bpe-openai/README.md
new file mode 100644
index 0000000..e06d488
--- /dev/null
+++ b/crates/bpe-openai/README.md
@@ -0,0 +1,42 @@
+# OpenAI Byte Pair Encoders
+
+Fast tokenizers for OpenAI token sets based on the [bpe](https://crates.io/crates/bpe) crate.
+Serialized BPE instances are generated during build and lazily loaded at runtime as static values.
+The overhead of loading the tokenizers is small because it happens only once per process and only requires deserialization (as opposed to actually building the internal data structures).
+For convencience it re-exports the `bpe` crate so that depending on this crate is enough to use these tokenizers.
+
+Supported token sets:
+
+- r50k
+- p50k
+- cl100k
+- o200k
+
+## Usage
+
+Add a dependency by running
+
+```sh
+cargo add bpe-openai
+```
+
+or by adding the following to `Cargo.toml`
+
+```toml
+[dependencies]
+bpe-openai = "0.1"
+```
+
+Counting tokens is as simple as:
+
+```rust
+use bpe_openai::cl100k;
+
+fn main() {
+  let bpe = cl100k();
+  let count = bpe.count("Hello, world!");
+  println!("{tokens}");
+}
+```
+
+For more detailed documentation we refer to [bpe](https://crates.io/crates/bpe).
diff --git a/crates/bpe-openai/build.rs b/crates/bpe-openai/build.rs
new file mode 100644
index 0000000..b4f3837
--- /dev/null
+++ b/crates/bpe-openai/build.rs
@@ -0,0 +1,51 @@
+use std::env;
+use std::fs::File;
+use std::path::PathBuf;
+
+use bpe::byte_pair_encoding::BytePairEncoding;
+use serde::Serialize;
+use tiktoken_rs::CoreBPE;
+
+fn main() {
+    serialize_tokens(
+        "r50k",
+        &tiktoken_rs::r50k_base().expect("tiktoken initialization must not fail!"),
+        50256,
+        1,
+    );
+    serialize_tokens(
+        "p50k",
+        &tiktoken_rs::p50k_base().expect("tiktoken initialization must not fail!"),
+        50280,
+        1,
+    );
+    serialize_tokens(
+        "cl100k",
+        &tiktoken_rs::cl100k_base().expect("tiktoken initialization must not fail!"),
+        100256,
+        17846336922010275747,
+    );
+    serialize_tokens(
+        "cl100k",
+        &tiktoken_rs::cl100k_base().expect("tiktoken initialization must not fail!"),
+        100256,
+        17846336922010275747,
+    );
+    serialize_tokens(
+        "o200k",
+        &tiktoken_rs::o200k_base().expect("tiktoken initialization must not fail!"),
+        199998,
+        17846336922010275747,
+    );
+    println!("cargo::rerun-if-changed=build.rs");
+}
+
+fn serialize_tokens(name: &str, bpe: &CoreBPE, num_tokens: usize, hash_factor: u64) {
+    let mut path = PathBuf::from(env::var("OUT_DIR").expect("OUT_DIR is set during build"));
+    path.push(format!("bpe_{name}.dict"));
+    let file = File::create(path).expect("can create output file");
+    let mut serializer = rmp_serde::Serializer::new(file);
+    let bpe = BytePairEncoding::from_tiktoken(bpe, num_tokens, Some(hash_factor));
+    bpe.serialize(&mut serializer)
+        .expect("serialization succeeds");
+}
diff --git a/crates/bpe-openai/src/lib.rs b/crates/bpe-openai/src/lib.rs
new file mode 100644
index 0000000..65c3619
--- /dev/null
+++ b/crates/bpe-openai/src/lib.rs
@@ -0,0 +1,66 @@
+use std::sync::LazyLock;
+
+use bpe::byte_pair_encoding::BytePairEncoding;
+
+static BPE_R50K: LazyLock<BytePairEncoding> = LazyLock::new(|| {
+    let bytes = include_bytes!(concat!(env!("OUT_DIR"), "/bpe_r50k.dict"));
+    rmp_serde::from_slice(bytes).expect("")
+});
+
+static BPE_P50K: LazyLock<BytePairEncoding> = LazyLock::new(|| {
+    let bytes = include_bytes!(concat!(env!("OUT_DIR"), "/bpe_p50k.dict"));
+    rmp_serde::from_slice(bytes).expect("")
+});
+
+static BPE_CL100K: LazyLock<BytePairEncoding> = LazyLock::new(|| {
+    let bytes = include_bytes!(concat!(env!("OUT_DIR"), "/bpe_cl100k.dict"));
+    rmp_serde::from_slice(bytes).expect("")
+});
+
+static BPE_O200K: LazyLock<BytePairEncoding> = LazyLock::new(|| {
+    let bytes = include_bytes!(concat!(env!("OUT_DIR"), "/bpe_o200k.dict"));
+    rmp_serde::from_slice(bytes).expect("")
+});
+
+pub use bpe::*;
+
+pub fn r50k() -> &'static BytePairEncoding {
+    &BPE_R50K
+}
+
+pub fn p50k() -> &'static BytePairEncoding {
+    &BPE_P50K
+}
+
+pub fn cl100k() -> &'static BytePairEncoding {
+    &BPE_CL100K
+}
+
+pub fn o200k() -> &'static BytePairEncoding {
+    &BPE_O200K
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn can_load_r50k() {
+        r50k().count("".as_bytes());
+    }
+
+    #[test]
+    fn can_load_p50k() {
+        p50k().count("".as_bytes());
+    }
+
+    #[test]
+    fn can_load_cl100k() {
+        cl100k().count("".as_bytes());
+    }
+
+    #[test]
+    fn can_load_o200k() {
+        o200k().count("".as_bytes());
+    }
+}
diff --git a/crates/bpe/Cargo.toml b/crates/bpe/Cargo.toml
index 3e2e190..f48ad10 100644
--- a/crates/bpe/Cargo.toml
+++ b/crates/bpe/Cargo.toml
@@ -2,6 +2,11 @@
 name = "bpe"
 version = "0.1.0"
 edition = "2021"
+description = "Fast byte-pair encoding implementation."
+repository = "https://github.com/github/rust-gems"
+license = "MIT"
+keywords = ["tokenizer", "algorithm", "encoding", "bpe"]
+categories = ["algorithms", "data-structures", "encoding", "science"]
 
 [lib]
 crate-type = ["lib", "staticlib"]
@@ -11,6 +16,7 @@ bench = false
 name = "performance"
 path = "benches/performance.rs"
 harness = false
+test = false
 
 [features]
 rand = ["dep:rand"]
diff --git a/crates/bpe/benches/performance.rs b/crates/bpe/benches/performance.rs
index 9fe2704..b4f1acc 100644
--- a/crates/bpe/benches/performance.rs
+++ b/crates/bpe/benches/performance.rs
@@ -10,21 +10,28 @@ use criterion::{
 use rand::{thread_rng, Rng};
 use tiktoken_rs::CoreBPE;
 
-static TOKENIZERS: LazyLock<[(&'static str, &'static BytePairEncoding, CoreBPE); 2]> =
-    LazyLock::new(|| {
-        [
-            (
-                "cl100k",
-                BytePairEncoding::cl100k(),
-                tiktoken_rs::cl100k_base().unwrap(),
+static TOKENIZERS: LazyLock<[(&'static str, BytePairEncoding, CoreBPE); 2]> = LazyLock::new(|| {
+    [
+        (
+            "cl100k",
+            BytePairEncoding::from_tiktoken(
+                &tiktoken_rs::cl100k_base_singleton().lock(),
+                100256,
+                Some(17846336922010275747),
             ),
-            (
-                "o200k",
-                BytePairEncoding::o200k(),
-                tiktoken_rs::o200k_base().unwrap(),
+            tiktoken_rs::cl100k_base().unwrap(),
+        ),
+        (
+            "o200k",
+            BytePairEncoding::from_tiktoken(
+                &tiktoken_rs::o200k_base_singleton().lock(),
+                199998,
+                Some(17846336922010275747),
             ),
-        ]
-    });
+            tiktoken_rs::o200k_base().unwrap(),
+        ),
+    ]
+});
 
 fn counting_benchmark(c: &mut Criterion) {
     for (name, bpe, _) in TOKENIZERS.iter() {
diff --git a/crates/bpe/src/appendable_encoder.rs b/crates/bpe/src/appendable_encoder.rs
index f75fde8..b0752b5 100644
--- a/crates/bpe/src/appendable_encoder.rs
+++ b/crates/bpe/src/appendable_encoder.rs
@@ -90,13 +90,13 @@ impl<'a> AppendableEncoder<'a> {
 
 #[cfg(test)]
 mod tests {
-    use crate::byte_pair_encoding::{create_test_bytes, BytePairEncoding};
+    use crate::byte_pair_encoding::{create_test_bytes, BPE_CL100K};
 
     use super::AppendableEncoder;
 
     #[test]
     fn test_appendable_encoder() {
-        let bpe = BytePairEncoding::cl100k();
+        let bpe = &BPE_CL100K;
         let mut enc = AppendableEncoder::new(bpe);
         let input_string = create_test_bytes(bpe, 100);
         for (i, c) in input_string.iter().enumerate() {
diff --git a/crates/bpe/src/byte_pair_encoding.rs b/crates/bpe/src/byte_pair_encoding.rs
index 1f278bf..f18468e 100644
--- a/crates/bpe/src/byte_pair_encoding.rs
+++ b/crates/bpe/src/byte_pair_encoding.rs
@@ -2,7 +2,6 @@ use std::cmp::Reverse;
 use std::collections::BinaryHeap;
 use std::hash::{Hash, Hasher};
 use std::ops::Range;
-use std::sync::LazyLock;
 
 use aneubeck_daachorse::{DoubleArrayAhoCorasick, DoubleArrayAhoCorasickBuilder};
 use fnv::{FnvHashMap, FnvHasher};
@@ -13,15 +12,25 @@ use serde::{Deserialize, Deserializer, Serialize, Serializer};
 use crate::backtrack_encoder::BacktrackEncoder;
 use crate::bitfield::BitField;
 
-static BPE_CL100K: LazyLock<BytePairEncoding> = LazyLock::new(|| {
-    let bytes = include_bytes!("data/bpe_cl100k.dict");
-    rmp_serde::from_slice(bytes).expect("")
-});
+#[cfg(test)]
+pub(crate) static BPE_CL100K: std::sync::LazyLock<BytePairEncoding> =
+    std::sync::LazyLock::new(|| {
+        BytePairEncoding::from_tiktoken(
+            &tiktoken_rs::cl100k_base_singleton().lock(),
+            100256,
+            Some(17846336922010275747),
+        )
+    });
 
-static BPE_O200K: LazyLock<BytePairEncoding> = LazyLock::new(|| {
-    let bytes = include_bytes!("data/bpe_o200k.dict");
-    rmp_serde::from_slice(bytes).expect("")
-});
+#[cfg(test)]
+pub(crate) static BPE_O200K: std::sync::LazyLock<BytePairEncoding> =
+    std::sync::LazyLock::new(|| {
+        BytePairEncoding::from_tiktoken(
+            &tiktoken_rs::o200k_base_singleton().lock(),
+            199998,
+            Some(17846336922010275747),
+        )
+    });
 
 /// Representation of the byte pair dictionary.
 /// This struct provides various conversions.
@@ -212,14 +221,6 @@ fn find_token_by_bytes(
 }
 
 impl BytePairEncoding {
-    pub fn cl100k() -> &'static Self {
-        &BPE_CL100K
-    }
-
-    pub fn o200k() -> &'static Self {
-        &BPE_O200K
-    }
-
     /// Construct a BytePairEncoding instance from a tiktoken dictionary.
     /// A suitable hash factor may be necessary to prevent hash collisions,
     /// which can by found using [`find_hash_factor_for_tiktoken`].
@@ -569,7 +570,7 @@ mod tests {
     use itertools::Itertools;
     use tiktoken_rs::{cl100k_base_singleton, o200k_base_singleton};
 
-    use crate::byte_pair_encoding::{create_test_bytes, BytePairEncoding};
+    use crate::byte_pair_encoding::{create_test_bytes, BPE_CL100K, BPE_O200K};
 
     #[test]
     fn test_correctness_cl100k() {
@@ -582,9 +583,9 @@ mod tests {
         ])
         .unwrap();
         let time = Instant::now();
-        let bpe = BytePairEncoding::o200k();
+        let bpe = &BPE_CL100K;
         println!("{:?}", time.elapsed());
-        let encoded1 = o200k_base_singleton()
+        let encoded1 = cl100k_base_singleton()
             .lock()
             .encode_ordinary(test_string)
             .into_iter()
@@ -609,9 +610,9 @@ mod tests {
         ])
         .unwrap();
         let time = Instant::now();
-        let bpe = BytePairEncoding::cl100k();
+        let bpe = &BPE_O200K;
         println!("{:?}", time.elapsed());
-        let encoded1 = cl100k_base_singleton()
+        let encoded1 = o200k_base_singleton()
             .lock()
             .encode_ordinary(test_string)
             .into_iter()
@@ -627,7 +628,7 @@ mod tests {
 
     #[test]
     fn test_bpe_equivalence() {
-        let bpe = BytePairEncoding::cl100k();
+        let bpe = &BPE_CL100K;
         for tokens in [10, 1000, 10000] {
             for _ in 0..5 {
                 let test_input = create_test_bytes(bpe, tokens);
@@ -638,48 +639,3 @@ mod tests {
         }
     }
 }
-
-#[cfg(test)]
-mod data {
-    use std::fs::File;
-    use std::path::PathBuf;
-
-    use serde::Serialize;
-
-    use crate::byte_pair_encoding::BytePairEncoding;
-
-    #[test]
-    fn update_token_dicts() {
-        serialize_tokens(
-            "cl100k",
-            &tiktoken_rs::cl100k_base().expect("tiktoken initialization must not fail!"),
-            100256,
-            17846336922010275747,
-        );
-        serialize_tokens(
-            "o200k",
-            &tiktoken_rs::o200k_base().expect("tiktoken initialization must not fail!"),
-            199998,
-            17846336922010275747,
-        );
-    }
-
-    #[track_caller]
-    fn serialize_tokens(
-        name: &str,
-        dict: &tiktoken_rs::CoreBPE,
-        num_tokens: usize,
-        hash_factor: u64,
-    ) {
-        let path = PathBuf::from(file!());
-        let dir = path.parent().unwrap();
-        let data_file = dir.join(format!("data/bpe_{name}.dict"));
-        let current_dir = std::env::current_dir().unwrap();
-        let abs_path = current_dir.parent().unwrap().parent().unwrap();
-        let file = File::create(abs_path.join(data_file)).unwrap();
-        let mut serializer = rmp_serde::Serializer::new(file);
-        BytePairEncoding::from_tiktoken(dict, num_tokens, Some(hash_factor))
-            .serialize(&mut serializer)
-            .unwrap();
-    }
-}
diff --git a/crates/bpe/src/data/bpe_cl100k.dict b/crates/bpe/src/data/bpe_cl100k.dict
deleted file mode 100644
index bdcfbc5..0000000
Binary files a/crates/bpe/src/data/bpe_cl100k.dict and /dev/null differ
diff --git a/crates/bpe/src/data/bpe_o200k.dict b/crates/bpe/src/data/bpe_o200k.dict
deleted file mode 100644
index 330589f..0000000
Binary files a/crates/bpe/src/data/bpe_o200k.dict and /dev/null differ
diff --git a/crates/bpe/src/interval_encoding.rs b/crates/bpe/src/interval_encoding.rs
index 5c2f248..05bf79f 100644
--- a/crates/bpe/src/interval_encoding.rs
+++ b/crates/bpe/src/interval_encoding.rs
@@ -86,13 +86,13 @@ impl<'a> IntervalEncoding<'a> {
 mod tests {
     use rand::{thread_rng, Rng};
 
-    use crate::byte_pair_encoding::{create_test_bytes, BytePairEncoding};
+    use crate::byte_pair_encoding::{create_test_bytes, BPE_CL100K};
 
     use super::IntervalEncoding;
 
     #[test]
     fn test_interval_count() {
-        let bpe = BytePairEncoding::cl100k();
+        let bpe = &BPE_CL100K;
         let text = create_test_bytes(bpe, 10000);
         let intervals = IntervalEncoding::new(bpe, &text);
         for _ in 0..1000 {
diff --git a/crates/bpe/src/prependable_encoder.rs b/crates/bpe/src/prependable_encoder.rs
index f229d32..ce13e40 100644
--- a/crates/bpe/src/prependable_encoder.rs
+++ b/crates/bpe/src/prependable_encoder.rs
@@ -90,13 +90,13 @@ impl<'a> PrependableEncoder<'a> {
 
 #[cfg(test)]
 mod tests {
-    use crate::byte_pair_encoding::{create_test_bytes, BytePairEncoding};
+    use crate::byte_pair_encoding::{create_test_bytes, BPE_CL100K};
 
     use super::PrependableEncoder;
 
     #[test]
     fn test_prependable_encoder() {
-        let bpe = BytePairEncoding::cl100k();
+        let bpe = &BPE_CL100K;
         let mut enc = PrependableEncoder::new(bpe);
         let input_string = create_test_bytes(bpe, 100);
         for (i, c) in input_string.iter().enumerate().rev() {