Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Update tokenizers requirement from 0.20 to 0.21 #41

Merged
merged 2 commits into from
Dec 3, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions crates/bpe-openai/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -115,7 +115,7 @@ impl Tokenizer {

/// Returns an iterator with the text pieces resulting from pre-tokenization. If this
/// tokenizer does not have pre-tokenization, the iterator returns the full text.
pub fn split<'a>(&'a self, text: &'a str) -> impl Iterator<Item = &str> + 'a {
pub fn split<'a>(&'a self, text: &'a str) -> impl Iterator<Item = &'a str> + 'a {
match &self.pre {
Some(pre) => Either::Left(pre.split(text)),
None => Either::Right(std::iter::once(text)),
Expand Down Expand Up @@ -144,7 +144,7 @@ impl Pretokenizer {
}

/// Returns an iterator with the text pieces after splitting with the regular expression.
pub fn split<'a>(&'a self, text: &'a str) -> impl Iterator<Item = &str> + 'a {
pub fn split<'a>(&'a self, text: &'a str) -> impl Iterator<Item = &'a str> + 'a {
Splits {
pat: &self.pat,
lookahead: &self.lookahead,
Expand Down
2 changes: 1 addition & 1 deletion crates/bpe/benchmarks/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -23,4 +23,4 @@ bpe-openai = { path = "../../bpe-openai" }
criterion = "0.5"
rand = "0.8"
tiktoken-rs = "0.6"
tokenizers = { version = "0.20", features = ["http"] }
tokenizers = { version = "0.21", features = ["http"] }
2 changes: 1 addition & 1 deletion crates/bpe/src/byte_pair_encoding.rs
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@ fn serialize_daac<S: Serializer>(
}

struct DaacVisitor;
impl<'de> Visitor<'de> for DaacVisitor {
impl Visitor<'_> for DaacVisitor {
type Value = DoubleArrayAhoCorasick<u32>;

fn expecting(&self, _formatter: &mut std::fmt::Formatter) -> std::fmt::Result {
Expand Down
2 changes: 1 addition & 1 deletion crates/geo_filters/src/config.rs
Original file line number Diff line number Diff line change
Expand Up @@ -290,7 +290,7 @@ pub(crate) fn nth_one(mut value: u64, mut n: u32) -> u32 {
/// Take a number of elements from an iterator without consuming it.
pub(crate) fn take_ref<I: Iterator>(iter: &mut I, n: usize) -> impl Iterator<Item = I::Item> + '_ {
struct TakeRef<'a, I: Iterator>(usize, &'a mut I);
impl<'a, I: Iterator> Iterator for TakeRef<'a, I> {
impl<I: Iterator> Iterator for TakeRef<'_, I> {
type Item = I::Item;
fn next(&mut self) -> Option<Self::Item> {
if self.0 > 0 {
Expand Down
6 changes: 3 additions & 3 deletions crates/geo_filters/src/diff_count.rs
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@ pub struct GeoDiffCount<'a, C: GeoConfig<Diff>> {
lsb: BitVec<'a>,
}

impl<'a, C: GeoConfig<Diff>> std::fmt::Debug for GeoDiffCount<'a, C> {
impl<C: GeoConfig<Diff>> std::fmt::Debug for GeoDiffCount<'_, C> {
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
write!(
f,
Expand All @@ -76,7 +76,7 @@ impl<'a, C: GeoConfig<Diff>> std::fmt::Debug for GeoDiffCount<'a, C> {
}
}

impl<'a, C: GeoConfig<Diff>> GeoDiffCount<'a, C> {
impl<C: GeoConfig<Diff>> GeoDiffCount<'_, C> {
pub fn new(config: C) -> Self {
Self {
config,
Expand Down Expand Up @@ -550,7 +550,7 @@ mod tests {
assert_eq!(vec![17, 11, 7], a.msb.iter().copied().collect_vec());
}

impl<'a, C: GeoConfig<Diff>> GeoDiffCount<'a, C> {
impl<C: GeoConfig<Diff>> GeoDiffCount<'_, C> {
fn from_ones(config: C, ones: impl IntoIterator<Item = C::BucketType>) -> Self {
let mut result = Self::new(config);
for one in ones {
Expand Down
8 changes: 4 additions & 4 deletions crates/geo_filters/src/diff_count/bitvec.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ pub(crate) struct BitVec<'a> {
blocks: Cow<'a, [u64]>,
}

impl<'a> Ord for BitVec<'a> {
impl Ord for BitVec<'_> {
fn cmp(&self, other: &Self) -> std::cmp::Ordering {
match self.num_bits.cmp(&other.num_bits) {
Ordering::Equal => self.blocks.iter().rev().cmp(other.blocks.iter().rev()),
Expand All @@ -27,13 +27,13 @@ impl<'a> Ord for BitVec<'a> {
}
}

impl<'a> PartialOrd for BitVec<'a> {
impl PartialOrd for BitVec<'_> {
fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
Some(self.cmp(other))
}
}

impl<'a> BitVec<'a> {
impl BitVec<'_> {
/// Takes an iterator of `BitChunk` items as input and returns the corresponding `BitVec`.
/// The order of `BitChunk`s doesn't matter for this function and `BitChunk` may be hitting
/// the same block. In this case, the function will simply xor them together.
Expand Down Expand Up @@ -144,7 +144,7 @@ impl<'a> BitVec<'a> {
}
}

impl<'a> Index<usize> for BitVec<'a> {
impl Index<usize> for BitVec<'_> {
type Output = bool;

/// Returns the value of the bit corresponding to the provided zero-based bit position.
Expand Down
6 changes: 3 additions & 3 deletions crates/geo_filters/src/diff_count/sim_hash.rs
Original file line number Diff line number Diff line change
Expand Up @@ -126,7 +126,7 @@ impl<'a, C: GeoConfig<Diff>> SimHashIterator<'a, C> {
}
}

impl<'a, C: GeoConfig<Diff>> Iterator for SimHashIterator<'a, C> {
impl<C: GeoConfig<Diff>> Iterator for SimHashIterator<'_, C> {
type Item = (BucketId, SimHash);

fn next(&mut self) -> Option<Self::Item> {
Expand All @@ -148,7 +148,7 @@ impl<'a, C: GeoConfig<Diff>> Iterator for SimHashIterator<'a, C> {
}
}

impl<'a, C: GeoConfig<Diff>> GeoDiffCount<'a, C> {
impl<C: GeoConfig<Diff>> GeoDiffCount<'_, C> {
/// n specifies the desired zero-based index of the most significant one.
/// The zero-based index of the desired one bit is returned.
fn nth_most_significant_one(&self, mut n: usize) -> Option<C::BucketType> {
Expand Down Expand Up @@ -179,7 +179,7 @@ impl<'a, C: GeoConfig<Diff>> GeoDiffCount<'a, C> {
}
}

impl<'a> BitVec<'a> {
impl BitVec<'_> {
/// n specifies the desired zero-based index of the most significant one.
/// The zero-based index of the desired one bit is returned.
pub fn nth_most_significant_one(&self, mut n: usize) -> Option<usize> {
Expand Down
12 changes: 6 additions & 6 deletions crates/geo_filters/src/distinct_count.rs
Original file line number Diff line number Diff line change
Expand Up @@ -34,13 +34,13 @@ pub struct GeoDistinctCount<'a, C: GeoConfig<Distinct>> {
lsb: BitDeque<'a>,
}

impl<'a, C: GeoConfig<Distinct> + Default> Default for GeoDistinctCount<'a, C> {
impl<C: GeoConfig<Distinct> + Default> Default for GeoDistinctCount<'_, C> {
fn default() -> Self {
Self::new(C::default())
}
}

impl<'a, C: GeoConfig<Distinct>> std::fmt::Debug for GeoDistinctCount<'a, C> {
impl<C: GeoConfig<Distinct>> std::fmt::Debug for GeoDistinctCount<'_, C> {
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
write!(
f,
Expand All @@ -52,7 +52,7 @@ impl<'a, C: GeoConfig<Distinct>> std::fmt::Debug for GeoDistinctCount<'a, C> {
}
}

impl<'a, C: GeoConfig<Distinct>> GeoDistinctCount<'a, C> {
impl<C: GeoConfig<Distinct>> GeoDistinctCount<'_, C> {
pub fn new(config: C) -> Self {
let msb = Default::default();
let lsb = BitDeque::new(max_lsb_bytes::<C::BucketType>(
Expand Down Expand Up @@ -128,7 +128,7 @@ impl<'a, C: GeoConfig<Distinct>> GeoDistinctCount<'a, C> {
}
}

impl<'a, C: GeoConfig<Distinct>> Count<Distinct> for GeoDistinctCount<'a, C> {
impl<C: GeoConfig<Distinct>> Count<Distinct> for GeoDistinctCount<'_, C> {
fn push_hash(&mut self, hash: u64) {
self.set_bit(self.config.hash_to_bucket(hash));
}
Expand Down Expand Up @@ -179,7 +179,7 @@ impl<'a, C: GeoConfig<Distinct>> Count<Distinct> for GeoDistinctCount<'a, C> {
}
}

impl<'a, C: GeoConfig<Distinct>> GeoDistinctCount<'a, C> {
impl<C: GeoConfig<Distinct>> GeoDistinctCount<'_, C> {
fn insert_into_lsb(&mut self, bucket: usize) {
if !self.lsb.test_bit(bucket) {
self.lsb.set_bit(bucket);
Expand Down Expand Up @@ -395,7 +395,7 @@ mod tests {
assert_eq!(vec![17, 11, 7], a.msb.iter().copied().collect_vec());
}

impl<'a, C: GeoConfig<Distinct>> GeoDistinctCount<'a, C> {
impl<C: GeoConfig<Distinct>> GeoDistinctCount<'_, C> {
fn from_ones(config: C, ones: impl IntoIterator<Item = C::BucketType>) -> Self {
let mut result = Self::new(config);
for one in ones {
Expand Down
24 changes: 12 additions & 12 deletions crates/geo_filters/src/distinct_count/bitdeque.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ enum DequeCow<'a> {
Borrowed(&'a [u64]),
}

impl<'a> Debug for DequeCow<'a> {
impl Debug for DequeCow<'_> {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Self::Owned(b) => Debug::fmt(b, f),
Expand All @@ -21,13 +21,13 @@ impl<'a> Debug for DequeCow<'a> {
}
}

impl<'a> Default for DequeCow<'a> {
impl Default for DequeCow<'_> {
fn default() -> Self {
Self::Borrowed(&[])
}
}

impl<'a> PartialEq for DequeCow<'a> {
impl PartialEq for DequeCow<'_> {
fn eq(&self, other: &Self) -> bool {
match (self, other) {
(Self::Owned(l0), Self::Owned(r0)) => l0 == r0,
Expand All @@ -39,9 +39,9 @@ impl<'a> PartialEq for DequeCow<'a> {
}
}

impl<'a> Eq for DequeCow<'a> {}
impl Eq for DequeCow<'_> {}

impl<'a> DequeCow<'a> {
impl DequeCow<'_> {
fn to_mut(&mut self) -> &mut VecDeque<u64> {
match self {
DequeCow::Owned(o) => o,
Expand All @@ -53,7 +53,7 @@ impl<'a> DequeCow<'a> {
}
}

impl<'a> Index<usize> for DequeCow<'a> {
impl Index<usize> for DequeCow<'_> {
type Output = u64;

fn index(&self, index: usize) -> &Self::Output {
Expand All @@ -64,7 +64,7 @@ impl<'a> Index<usize> for DequeCow<'a> {
}
}

impl<'a> DequeCow<'a> {
impl DequeCow<'_> {
fn len(&self) -> usize {
match self {
DequeCow::Owned(o) => o.len(),
Expand All @@ -88,7 +88,7 @@ impl<'a> Iterator for DequeIter<'a> {
}
}

impl<'a> DoubleEndedIterator for DequeIter<'a> {
impl DoubleEndedIterator for DequeIter<'_> {
fn next_back(&mut self) -> Option<Self::Item> {
match self {
Self::Slice(i) => i.next_back(),
Expand All @@ -97,7 +97,7 @@ impl<'a> DoubleEndedIterator for DequeIter<'a> {
}
}

impl<'a> ExactSizeIterator for DequeIter<'a> {
impl ExactSizeIterator for DequeIter<'_> {
fn len(&self) -> usize {
match self {
Self::Slice(i) => i.len(),
Expand All @@ -106,7 +106,7 @@ impl<'a> ExactSizeIterator for DequeIter<'a> {
}
}

impl<'a> DequeCow<'a> {
impl DequeCow<'_> {
fn iter(&self) -> DequeIter<'_> {
match self {
DequeCow::Owned(o) => DequeIter::VecDeque(o.iter()),
Expand All @@ -126,7 +126,7 @@ pub struct BitDeque<'a> {
max_blocks: usize,
}

impl<'a> BitDeque<'a> {
impl BitDeque<'_> {
pub fn new(max_bytes: usize) -> Self {
assert!(max_bytes >= BYTES_PER_BLOCK);
Self {
Expand Down Expand Up @@ -229,7 +229,7 @@ impl<'a> BitDeque<'a> {
}
}

impl<'a> Index<usize> for BitDeque<'a> {
impl Index<usize> for BitDeque<'_> {
type Output = bool;

/// Returns the value of the bit corresponding to the provided zero-based bit position.
Expand Down
Loading