diff --git a/crates/bpe/.gitignore b/crates/bpe/.gitignore deleted file mode 100644 index da6881e..0000000 --- a/crates/bpe/.gitignore +++ /dev/null @@ -1,10 +0,0 @@ -# Ignore benchmark results except figures references in the README. -# Negated ignore patterns do not work for files inside a directory that is itself ignored. -# Therefore ignore using `**` and then negate the nested directories (but not the files inside). -/benches/result/** -!/benches/result/*/ -!/benches/result/*/*/ -# Negate the actual figures we want to keep. -!/benches/result/reports/counting-o200k/lines.svg -!/benches/result/reports/encoding-o200k/lines.svg -!/benches/result/reports/appending-o200k/lines.svg diff --git a/crates/bpe/README.md b/crates/bpe/README.md index e74695b..ab624be 100644 --- a/crates/bpe/README.md +++ b/crates/bpe/README.md @@ -203,16 +203,16 @@ If the requirement of correct BPE output can be relaxed, then the Greedy approac Results for counting o200k tokens for random 10000 byte slices. The setup time of the interval encoder is comparable to backtracking. After setup counting of slices of the original data are approximately constant time. - +![counting runtime comparison](./benches/result/counting-o200k.svg) ### Encoding results Results for encoding o200k tokens for random 1000 bytes. The backtracking encoder consistently outperforms tiktoken by a constant factor. - +![encoding runtime comparison](./benches/result/encoding-o200k.svg) ### Incremental encoding results -Results for incrementally encoding o200k tokens by appending 10000 random bytes. The appending encoder is slower by a constant factor but overall has similar performance curve as the backtracking encoder encoding all data at once. +Results for incrementally encoding o200k tokens by appending 10000 random bytes. The appending encoder is slower by a constant factor but overall has similar performance curve as the backtracking encoder encoding all data at once. - +![appending runtime comparison](./benches/result/appending-o200k.svg) diff --git a/crates/bpe/benches/result/appending-o200k.svg b/crates/bpe/benches/result/appending-o200k.svg new file mode 100644 index 0000000..a7cadf9 --- /dev/null +++ b/crates/bpe/benches/result/appending-o200k.svg @@ -0,0 +1,52 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/crates/bpe/benches/result/counting-o200k.svg b/crates/bpe/benches/result/counting-o200k.svg new file mode 100644 index 0000000..b84d4c9 --- /dev/null +++ b/crates/bpe/benches/result/counting-o200k.svg @@ -0,0 +1,48 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/crates/bpe/benches/result/encoding-o200k.svg b/crates/bpe/benches/result/encoding-o200k.svg new file mode 100644 index 0000000..8a8259b --- /dev/null +++ b/crates/bpe/benches/result/encoding-o200k.svg @@ -0,0 +1,76 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/crates/bpe/benches/result/reports/appending-o200k/lines.svg b/crates/bpe/benches/result/reports/appending-o200k/lines.svg deleted file mode 100644 index c114d21..0000000 --- a/crates/bpe/benches/result/reports/appending-o200k/lines.svg +++ /dev/null @@ -1,232 +0,0 @@ - - - -Gnuplot -Produced by GNUPLOT 6.0 patchlevel 1 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - 0.0001 - - - - - - - - - - - - - 0.001 - - - - - - - - - - - - - 0.01 - - - - - - - - - - - - - 0.1 - - - - - - - - - - - - - 1 - - - - - - - - - - - - - 10 - - - - - 10 - - - - - 100 - - - - - 1000 - - - - - 10000 - - - - - - - - - appending - - - - - appending - - - - - - gnuplot_plot_2 - - - - - - - - - - backtracking - - - - - backtracking - - - - - - gnuplot_plot_4 - - - - - - - - - - - - - - - - - - Average time (ms) - - - - - Input Size (Bytes) - - - - - - - appending-o200k: Comparison - - - - - - - diff --git a/crates/bpe/benches/result/reports/counting-o200k/lines.svg b/crates/bpe/benches/result/reports/counting-o200k/lines.svg deleted file mode 100644 index 396969a..0000000 --- a/crates/bpe/benches/result/reports/counting-o200k/lines.svg +++ /dev/null @@ -1,217 +0,0 @@ - - - -Gnuplot -Produced by GNUPLOT 6.0 patchlevel 1 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - 0.1 - - - - - - - - - - - - - 1 - - - - - - - - - - - - - 10 - - - - - - - - - - - - - 100 - - - - - - - - - - - - - 1000 - - - - - 10 - - - - - 100 - - - - - 1000 - - - - - 10000 - - - - - - - - - interval - - - - - interval - - - - - - gnuplot_plot_2 - - - - - - - - - - backtracking - - - - - backtracking - - - - - - gnuplot_plot_4 - - - - - - - - - - - - - - - - - - Average time (µs) - - - - - Input Size (Bytes) - - - - - - - counting-o200k: Comparison - - - - - - - diff --git a/crates/bpe/benches/result/reports/encoding-o200k/lines.svg b/crates/bpe/benches/result/reports/encoding-o200k/lines.svg deleted file mode 100644 index a54143e..0000000 --- a/crates/bpe/benches/result/reports/encoding-o200k/lines.svg +++ /dev/null @@ -1,316 +0,0 @@ - - - -Gnuplot -Produced by GNUPLOT 6.0 patchlevel 1 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - 0.0001 - - - - - - - - - - - - - 0.001 - - - - - - - - - - - - - 0.01 - - - - - - - - - - - - - 0.1 - - - - - - - - - - - - - 1 - - - - - - - - - - - - - 10 - - - - - 10 - - - - - 100 - - - - - 1000 - - - - - 10000 - - - - - - - - - backtracking - - - - - backtracking - - - - - - gnuplot_plot_2 - - - - - - - - - - heap - - - - - heap - - - - - - gnuplot_plot_4 - - - - - - - - - - table - - - - - table - - - - - - gnuplot_plot_6 - - - - - - - - - - greedy - - - - - greedy - - - - - - gnuplot_plot_8 - - - - - - - - - - minimal - - - - - minimal - - - - - - gnuplot_plot_10 - - - - - - - - - - tiktoken - - - - - tiktoken - - - - - - gnuplot_plot_12 - - - - - - - - - - - - - - - - - - Average time (ms) - - - - - Input Size (Bytes) - - - - - - - encoding-o200k: Comparison - - - - - - - diff --git a/crates/bpe/criterion.toml b/crates/bpe/criterion.toml index ada40f9..c0f42f2 100644 --- a/crates/bpe/criterion.toml +++ b/crates/bpe/criterion.toml @@ -1,2 +1,2 @@ # save report in this directory, even if a custom target directory is set -criterion_home = "./benches/result" +criterion_home = "./target/criterion" diff --git a/crates/bpe/script/copy-benchmark-results b/crates/bpe/script/copy-benchmark-results new file mode 100755 index 0000000..df9e97f --- /dev/null +++ b/crates/bpe/script/copy-benchmark-results @@ -0,0 +1,11 @@ +#!/usr/bin/env bash + +set -eu + +result_dir="benches/result" + +mkdir -p "$result_dir" + +for i in {counting,encoding,appending}-o200k; do + rsvg-convert --format svg --output "$result_dir/$i.svg" --background-color white "target/criterion/reports/$i/lines.svg" +done