Skip to content

Commit

Permalink
fix build warnings
Browse files Browse the repository at this point in the history
  • Loading branch information
tmm1 committed Nov 9, 2024
1 parent a076df4 commit d7cef90
Show file tree
Hide file tree
Showing 2 changed files with 6 additions and 8 deletions.
3 changes: 2 additions & 1 deletion src/corebpe.rs
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,7 @@ pub fn byte_pair_encode(piece: &[u8], ranks: &HashMap<Vec<u8>, Rank>) -> Vec<Ran
.collect()
}

#[cfg(test)]
pub fn byte_pair_split<'a>(piece: &'a [u8], ranks: &HashMap<Vec<u8>, Rank>) -> Vec<&'a [u8]> {
assert!(piece.len() > 1);
_byte_pair_merge(ranks, piece)
Expand Down Expand Up @@ -230,7 +231,7 @@ impl CoreBPE {

if !piece.is_empty() {
last_piece_token_len = match self.encoder.get(piece) {
Some(token) => 1,
Some(_token) => 1,
None => byte_pair_encode(piece, &self.encoder).len(),
};
};
Expand Down
11 changes: 4 additions & 7 deletions src/encoding.rs
Original file line number Diff line number Diff line change
Expand Up @@ -22,8 +22,6 @@ pub struct Encoding {
prefixes_of_mergeable_ranks: HashTableOwned<PrefixConfig>,
/// The map from special token strings to their values.
special_tokens: HashMap<String, Rank>,
/// The maximum token value in the encoding.
max_token_value: Rank,
/// The core BPE logic implemented in Rust.
core_bpe: Arc<CoreBPE>,
}
Expand Down Expand Up @@ -118,7 +116,6 @@ impl Encoding {
mergeable_ranks_max_key_len,
prefixes_of_mergeable_ranks,
special_tokens,
max_token_value,
core_bpe: Arc::new(core_bpe),
})
}
Expand Down Expand Up @@ -294,7 +291,7 @@ impl Encoding {
}

/// Encodes a list of strings into tokens, in parallel, ignoring special tokens.
pub fn encode_ordinary_batch(&self, text: Vec<String>) -> Vec<Vec<usize>> {
pub fn encode_ordinary_batch(&self, _text: Vec<String>) -> Vec<Vec<usize>> {
// encoder = functools.partial(self.encode_ordinary)
// with ThreadPoolExecutor(num_threads) as e:
// return list(e.map(encoder, text))
Expand All @@ -305,8 +302,8 @@ impl Encoding {
/// Encodes a list of strings into tokens, in parallel.
pub fn encode_batch(
&self,
text: Vec<String>,
special_token_handling: &SpecialTokenHandling,
_text: Vec<String>,
_special_token_handling: &SpecialTokenHandling,
) -> Vec<Vec<usize>> {
// with ThreadPoolExecutor(num_threads) as e:
// return list(e.map(encoder, text))
Expand Down Expand Up @@ -460,7 +457,7 @@ impl Encoding {
/// Encodes text corresponding to bytes without a regex split.
///
/// NOTE: this will not encode any special tokens.
fn _encode_single_piece(&self, text: &str) -> Vec<Rank> {
pub fn _encode_single_piece(&self, text: &str) -> Vec<Rank> {
let text_or_bytes = text.as_bytes();
self.core_bpe.encode_single_piece(text_or_bytes)
}
Expand Down

0 comments on commit d7cef90

Please sign in to comment.