diff --git a/crates/bpe/.gitignore b/crates/bpe/.gitignore new file mode 100644 index 0000000..da6881e --- /dev/null +++ b/crates/bpe/.gitignore @@ -0,0 +1,10 @@ +# Ignore benchmark results except figures references in the README. +# Negated ignore patterns do not work for files inside a directory that is itself ignored. +# Therefore ignore using `**` and then negate the nested directories (but not the files inside). +/benches/result/** +!/benches/result/*/ +!/benches/result/*/*/ +# Negate the actual figures we want to keep. +!/benches/result/reports/counting-o200k/lines.svg +!/benches/result/reports/encoding-o200k/lines.svg +!/benches/result/reports/appending-o200k/lines.svg diff --git a/crates/bpe/README.md b/crates/bpe/README.md index 3fc5e8d..b8349a2 100644 --- a/crates/bpe/README.md +++ b/crates/bpe/README.md @@ -198,3 +198,21 @@ As can be seen, our Backtracking implementation beats the TikToken Rust implemen And even the fully dynamic programming solution is faster with a more consistent runtime. The tuned heap implementation is still quite competitive to TikToken (especially for smaller inputs). If the requirement of correct BPE output can be relaxed, then the Greedy approach or the minimal encoding approach are the clear winners. + +### Counting results + +Results for counting o200k tokens for random 10000 byte slices. The setup time of the interval encoder is comparable to backtracking. After setup counting of slices of the original data are approximately constant time. + +![Counting o200k tokens for random 10000 byte slices](./benches/result/reports/counting-o200k/lines.svg) + +### Encoding results + +Results for encoding o200k tokens for random 1000 bytes. The backtracking encoder consistently outperforms tiktoken by a constant factor. + +![Encoding o200k tokens for 10000 random bytes](./benches/result/reports/encoding-o200k/lines.svg) + +### Incremental encoding results + +Results for incrementally encoding o200k tokens by appending 10000 random bytes. The appending encoder is slower by a constant factor but overall has similar performance curve as the backtracking encoder encoding all data at once. + +![Incrementally encoding o200k tokens by appending 10000 random bytes](./benches/result/reports/appending-o200k/lines.svg) diff --git a/crates/bpe/benches/performance.rs b/crates/bpe/benches/performance.rs index 9bb48ec..ef428e1 100644 --- a/crates/bpe/benches/performance.rs +++ b/crates/bpe/benches/performance.rs @@ -140,9 +140,7 @@ fn appending_benchmark(c: &mut Criterion) { AppendableEncoder::new(bpe), ) }, - |(start, mut enc)| { - enc.extend(input[start..start + bytes].into_iter().copied()) - }, + |(start, mut enc)| enc.extend(input[start..start + bytes].into_iter().copied()), criterion::BatchSize::SmallInput, ) }); diff --git a/crates/bpe/benches/result/reports/appending-o200k/lines.svg b/crates/bpe/benches/result/reports/appending-o200k/lines.svg new file mode 100644 index 0000000..e69de29 diff --git a/crates/bpe/benches/result/reports/counting-o200k/lines.svg b/crates/bpe/benches/result/reports/counting-o200k/lines.svg new file mode 100644 index 0000000..e69de29 diff --git a/crates/bpe/benches/result/reports/encoding-o200k/lines.svg b/crates/bpe/benches/result/reports/encoding-o200k/lines.svg new file mode 100644 index 0000000..e69de29 diff --git a/crates/bpe/criterion.toml b/crates/bpe/criterion.toml index c0f42f2..ada40f9 100644 --- a/crates/bpe/criterion.toml +++ b/crates/bpe/criterion.toml @@ -1,2 +1,2 @@ # save report in this directory, even if a custom target directory is set -criterion_home = "./target/criterion" +criterion_home = "./benches/result"