From 0af044c03e9f3420f5c0029bac673d261fe0a059 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 2 Dec 2024 01:23:13 +0000 Subject: [PATCH 1/2] Update tokenizers requirement from 0.20 to 0.21 Updates the requirements on [tokenizers](https://github.com/huggingface/tokenizers) to permit the latest version. - [Release notes](https://github.com/huggingface/tokenizers/releases) - [Changelog](https://github.com/huggingface/tokenizers/blob/main/RELEASE.md) - [Commits](https://github.com/huggingface/tokenizers/compare/v0.20.0...v0.21.0) --- updated-dependencies: - dependency-name: tokenizers dependency-type: direct:production ... Signed-off-by: dependabot[bot] --- crates/bpe/benchmarks/Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crates/bpe/benchmarks/Cargo.toml b/crates/bpe/benchmarks/Cargo.toml index e6a4b64..87f35c2 100644 --- a/crates/bpe/benchmarks/Cargo.toml +++ b/crates/bpe/benchmarks/Cargo.toml @@ -23,4 +23,4 @@ bpe-openai = { path = "../../bpe-openai" } criterion = "0.5" rand = "0.8" tiktoken-rs = "0.6" -tokenizers = { version = "0.20", features = ["http"] } +tokenizers = { version = "0.21", features = ["http"] } From 59aea99bc65bb613cb5fa9ea13b9900c83ee59ea Mon Sep 17 00:00:00 2001 From: Hendrik van Antwerpen Date: Tue, 3 Dec 2024 14:12:58 +0100 Subject: [PATCH 2/2] new Rust version got stricter about unnecessary lifetimes --- crates/bpe-openai/src/lib.rs | 4 ++-- crates/bpe/src/byte_pair_encoding.rs | 2 +- crates/geo_filters/src/config.rs | 2 +- crates/geo_filters/src/diff_count.rs | 6 ++--- crates/geo_filters/src/diff_count/bitvec.rs | 8 +++---- crates/geo_filters/src/diff_count/sim_hash.rs | 6 ++--- crates/geo_filters/src/distinct_count.rs | 12 +++++----- .../src/distinct_count/bitdeque.rs | 24 +++++++++---------- 8 files changed, 32 insertions(+), 32 deletions(-) diff --git a/crates/bpe-openai/src/lib.rs b/crates/bpe-openai/src/lib.rs index 76d3eab..fba90aa 100644 --- a/crates/bpe-openai/src/lib.rs +++ b/crates/bpe-openai/src/lib.rs @@ -115,7 +115,7 @@ impl Tokenizer { /// Returns an iterator with the text pieces resulting from pre-tokenization. If this /// tokenizer does not have pre-tokenization, the iterator returns the full text. - pub fn split<'a>(&'a self, text: &'a str) -> impl Iterator + 'a { + pub fn split<'a>(&'a self, text: &'a str) -> impl Iterator + 'a { match &self.pre { Some(pre) => Either::Left(pre.split(text)), None => Either::Right(std::iter::once(text)), @@ -144,7 +144,7 @@ impl Pretokenizer { } /// Returns an iterator with the text pieces after splitting with the regular expression. - pub fn split<'a>(&'a self, text: &'a str) -> impl Iterator + 'a { + pub fn split<'a>(&'a self, text: &'a str) -> impl Iterator + 'a { Splits { pat: &self.pat, lookahead: &self.lookahead, diff --git a/crates/bpe/src/byte_pair_encoding.rs b/crates/bpe/src/byte_pair_encoding.rs index 09a156d..9c5a014 100644 --- a/crates/bpe/src/byte_pair_encoding.rs +++ b/crates/bpe/src/byte_pair_encoding.rs @@ -64,7 +64,7 @@ fn serialize_daac( } struct DaacVisitor; -impl<'de> Visitor<'de> for DaacVisitor { +impl Visitor<'_> for DaacVisitor { type Value = DoubleArrayAhoCorasick; fn expecting(&self, _formatter: &mut std::fmt::Formatter) -> std::fmt::Result { diff --git a/crates/geo_filters/src/config.rs b/crates/geo_filters/src/config.rs index 2de0d2f..b0e63bf 100644 --- a/crates/geo_filters/src/config.rs +++ b/crates/geo_filters/src/config.rs @@ -290,7 +290,7 @@ pub(crate) fn nth_one(mut value: u64, mut n: u32) -> u32 { /// Take a number of elements from an iterator without consuming it. pub(crate) fn take_ref(iter: &mut I, n: usize) -> impl Iterator + '_ { struct TakeRef<'a, I: Iterator>(usize, &'a mut I); - impl<'a, I: Iterator> Iterator for TakeRef<'a, I> { + impl Iterator for TakeRef<'_, I> { type Item = I::Item; fn next(&mut self) -> Option { if self.0 > 0 { diff --git a/crates/geo_filters/src/diff_count.rs b/crates/geo_filters/src/diff_count.rs index 107c103..2f1ccb2 100644 --- a/crates/geo_filters/src/diff_count.rs +++ b/crates/geo_filters/src/diff_count.rs @@ -64,7 +64,7 @@ pub struct GeoDiffCount<'a, C: GeoConfig> { lsb: BitVec<'a>, } -impl<'a, C: GeoConfig> std::fmt::Debug for GeoDiffCount<'a, C> { +impl> std::fmt::Debug for GeoDiffCount<'_, C> { fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { write!( f, @@ -76,7 +76,7 @@ impl<'a, C: GeoConfig> std::fmt::Debug for GeoDiffCount<'a, C> { } } -impl<'a, C: GeoConfig> GeoDiffCount<'a, C> { +impl> GeoDiffCount<'_, C> { pub fn new(config: C) -> Self { Self { config, @@ -550,7 +550,7 @@ mod tests { assert_eq!(vec![17, 11, 7], a.msb.iter().copied().collect_vec()); } - impl<'a, C: GeoConfig> GeoDiffCount<'a, C> { + impl> GeoDiffCount<'_, C> { fn from_ones(config: C, ones: impl IntoIterator) -> Self { let mut result = Self::new(config); for one in ones { diff --git a/crates/geo_filters/src/diff_count/bitvec.rs b/crates/geo_filters/src/diff_count/bitvec.rs index 5bded96..c94a041 100644 --- a/crates/geo_filters/src/diff_count/bitvec.rs +++ b/crates/geo_filters/src/diff_count/bitvec.rs @@ -18,7 +18,7 @@ pub(crate) struct BitVec<'a> { blocks: Cow<'a, [u64]>, } -impl<'a> Ord for BitVec<'a> { +impl Ord for BitVec<'_> { fn cmp(&self, other: &Self) -> std::cmp::Ordering { match self.num_bits.cmp(&other.num_bits) { Ordering::Equal => self.blocks.iter().rev().cmp(other.blocks.iter().rev()), @@ -27,13 +27,13 @@ impl<'a> Ord for BitVec<'a> { } } -impl<'a> PartialOrd for BitVec<'a> { +impl PartialOrd for BitVec<'_> { fn partial_cmp(&self, other: &Self) -> Option { Some(self.cmp(other)) } } -impl<'a> BitVec<'a> { +impl BitVec<'_> { /// Takes an iterator of `BitChunk` items as input and returns the corresponding `BitVec`. /// The order of `BitChunk`s doesn't matter for this function and `BitChunk` may be hitting /// the same block. In this case, the function will simply xor them together. @@ -144,7 +144,7 @@ impl<'a> BitVec<'a> { } } -impl<'a> Index for BitVec<'a> { +impl Index for BitVec<'_> { type Output = bool; /// Returns the value of the bit corresponding to the provided zero-based bit position. diff --git a/crates/geo_filters/src/diff_count/sim_hash.rs b/crates/geo_filters/src/diff_count/sim_hash.rs index 3337105..cee2370 100644 --- a/crates/geo_filters/src/diff_count/sim_hash.rs +++ b/crates/geo_filters/src/diff_count/sim_hash.rs @@ -126,7 +126,7 @@ impl<'a, C: GeoConfig> SimHashIterator<'a, C> { } } -impl<'a, C: GeoConfig> Iterator for SimHashIterator<'a, C> { +impl> Iterator for SimHashIterator<'_, C> { type Item = (BucketId, SimHash); fn next(&mut self) -> Option { @@ -148,7 +148,7 @@ impl<'a, C: GeoConfig> Iterator for SimHashIterator<'a, C> { } } -impl<'a, C: GeoConfig> GeoDiffCount<'a, C> { +impl> GeoDiffCount<'_, C> { /// n specifies the desired zero-based index of the most significant one. /// The zero-based index of the desired one bit is returned. fn nth_most_significant_one(&self, mut n: usize) -> Option { @@ -179,7 +179,7 @@ impl<'a, C: GeoConfig> GeoDiffCount<'a, C> { } } -impl<'a> BitVec<'a> { +impl BitVec<'_> { /// n specifies the desired zero-based index of the most significant one. /// The zero-based index of the desired one bit is returned. pub fn nth_most_significant_one(&self, mut n: usize) -> Option { diff --git a/crates/geo_filters/src/distinct_count.rs b/crates/geo_filters/src/distinct_count.rs index 6e9221e..ccd8bc8 100644 --- a/crates/geo_filters/src/distinct_count.rs +++ b/crates/geo_filters/src/distinct_count.rs @@ -34,13 +34,13 @@ pub struct GeoDistinctCount<'a, C: GeoConfig> { lsb: BitDeque<'a>, } -impl<'a, C: GeoConfig + Default> Default for GeoDistinctCount<'a, C> { +impl + Default> Default for GeoDistinctCount<'_, C> { fn default() -> Self { Self::new(C::default()) } } -impl<'a, C: GeoConfig> std::fmt::Debug for GeoDistinctCount<'a, C> { +impl> std::fmt::Debug for GeoDistinctCount<'_, C> { fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { write!( f, @@ -52,7 +52,7 @@ impl<'a, C: GeoConfig> std::fmt::Debug for GeoDistinctCount<'a, C> { } } -impl<'a, C: GeoConfig> GeoDistinctCount<'a, C> { +impl> GeoDistinctCount<'_, C> { pub fn new(config: C) -> Self { let msb = Default::default(); let lsb = BitDeque::new(max_lsb_bytes::( @@ -128,7 +128,7 @@ impl<'a, C: GeoConfig> GeoDistinctCount<'a, C> { } } -impl<'a, C: GeoConfig> Count for GeoDistinctCount<'a, C> { +impl> Count for GeoDistinctCount<'_, C> { fn push_hash(&mut self, hash: u64) { self.set_bit(self.config.hash_to_bucket(hash)); } @@ -179,7 +179,7 @@ impl<'a, C: GeoConfig> Count for GeoDistinctCount<'a, C> { } } -impl<'a, C: GeoConfig> GeoDistinctCount<'a, C> { +impl> GeoDistinctCount<'_, C> { fn insert_into_lsb(&mut self, bucket: usize) { if !self.lsb.test_bit(bucket) { self.lsb.set_bit(bucket); @@ -395,7 +395,7 @@ mod tests { assert_eq!(vec![17, 11, 7], a.msb.iter().copied().collect_vec()); } - impl<'a, C: GeoConfig> GeoDistinctCount<'a, C> { + impl> GeoDistinctCount<'_, C> { fn from_ones(config: C, ones: impl IntoIterator) -> Self { let mut result = Self::new(config); for one in ones { diff --git a/crates/geo_filters/src/distinct_count/bitdeque.rs b/crates/geo_filters/src/distinct_count/bitdeque.rs index b0c01cf..9422a5d 100644 --- a/crates/geo_filters/src/distinct_count/bitdeque.rs +++ b/crates/geo_filters/src/distinct_count/bitdeque.rs @@ -12,7 +12,7 @@ enum DequeCow<'a> { Borrowed(&'a [u64]), } -impl<'a> Debug for DequeCow<'a> { +impl Debug for DequeCow<'_> { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { match self { Self::Owned(b) => Debug::fmt(b, f), @@ -21,13 +21,13 @@ impl<'a> Debug for DequeCow<'a> { } } -impl<'a> Default for DequeCow<'a> { +impl Default for DequeCow<'_> { fn default() -> Self { Self::Borrowed(&[]) } } -impl<'a> PartialEq for DequeCow<'a> { +impl PartialEq for DequeCow<'_> { fn eq(&self, other: &Self) -> bool { match (self, other) { (Self::Owned(l0), Self::Owned(r0)) => l0 == r0, @@ -39,9 +39,9 @@ impl<'a> PartialEq for DequeCow<'a> { } } -impl<'a> Eq for DequeCow<'a> {} +impl Eq for DequeCow<'_> {} -impl<'a> DequeCow<'a> { +impl DequeCow<'_> { fn to_mut(&mut self) -> &mut VecDeque { match self { DequeCow::Owned(o) => o, @@ -53,7 +53,7 @@ impl<'a> DequeCow<'a> { } } -impl<'a> Index for DequeCow<'a> { +impl Index for DequeCow<'_> { type Output = u64; fn index(&self, index: usize) -> &Self::Output { @@ -64,7 +64,7 @@ impl<'a> Index for DequeCow<'a> { } } -impl<'a> DequeCow<'a> { +impl DequeCow<'_> { fn len(&self) -> usize { match self { DequeCow::Owned(o) => o.len(), @@ -88,7 +88,7 @@ impl<'a> Iterator for DequeIter<'a> { } } -impl<'a> DoubleEndedIterator for DequeIter<'a> { +impl DoubleEndedIterator for DequeIter<'_> { fn next_back(&mut self) -> Option { match self { Self::Slice(i) => i.next_back(), @@ -97,7 +97,7 @@ impl<'a> DoubleEndedIterator for DequeIter<'a> { } } -impl<'a> ExactSizeIterator for DequeIter<'a> { +impl ExactSizeIterator for DequeIter<'_> { fn len(&self) -> usize { match self { Self::Slice(i) => i.len(), @@ -106,7 +106,7 @@ impl<'a> ExactSizeIterator for DequeIter<'a> { } } -impl<'a> DequeCow<'a> { +impl DequeCow<'_> { fn iter(&self) -> DequeIter<'_> { match self { DequeCow::Owned(o) => DequeIter::VecDeque(o.iter()), @@ -126,7 +126,7 @@ pub struct BitDeque<'a> { max_blocks: usize, } -impl<'a> BitDeque<'a> { +impl BitDeque<'_> { pub fn new(max_bytes: usize) -> Self { assert!(max_bytes >= BYTES_PER_BLOCK); Self { @@ -229,7 +229,7 @@ impl<'a> BitDeque<'a> { } } -impl<'a> Index for BitDeque<'a> { +impl Index for BitDeque<'_> { type Output = bool; /// Returns the value of the bit corresponding to the provided zero-based bit position.