Skip to content

Commit

Permalink
fix stuff
Browse files Browse the repository at this point in the history
  • Loading branch information
shreyashnigam committed Jan 17, 2025
1 parent 163ff40 commit 2ed85c9
Show file tree
Hide file tree
Showing 4 changed files with 8 additions and 8 deletions.
2 changes: 1 addition & 1 deletion src/chonkie/chunker/late.py
Original file line number Diff line number Diff line change
Expand Up @@ -436,5 +436,5 @@ def __repr__(self):
f"min_sentences_per_chunk={self.min_sentences_per_chunk}, "
f"min_characters_per_sentence={self.min_characters_per_sentence}, "
f"approximate={self.approximate}, "
f"delim={self.delim}"
f"delim={self.delim})"
)
4 changes: 2 additions & 2 deletions src/chonkie/chunker/semantic.py
Original file line number Diff line number Diff line change
Expand Up @@ -556,8 +556,8 @@ def __repr__(self) -> str:
f"threshold={self.threshold}, "
f"similarity_window={self.similarity_window}, "
f"min_sentences={self.min_sentences}, "
f"min_chunk_size={self.min_chunk_size})",
f"min_chunk_size={self.min_chunk_size}",
f"min_characters_per_sentence={self.min_characters_per_sentence}, ",
f"threshold_step={self.threshold_step}, ",
f"delim={self.delim}",
f"delim={self.delim})",
)
8 changes: 4 additions & 4 deletions src/chonkie/chunker/sentence.py
Original file line number Diff line number Diff line change
Expand Up @@ -402,8 +402,8 @@ def __repr__(self) -> str:
f"SentenceChunker(tokenizer={self.tokenizer}, "
f"chunk_size={self.chunk_size}, "
f"chunk_overlap={self.chunk_overlap}, "
f"min_sentences_per_chunk={self.min_sentences_per_chunk})",
f"min_characters_per_sentence={self.min_characters_per_sentence}",
f"approximate={self.approximate}",
f"delim={self.delim}",
f"min_sentences_per_chunk={self.min_sentences_per_chunk}, "
f"min_characters_per_sentence={self.min_characters_per_sentence}, "
f"approximate={self.approximate}, "
f"delim={self.delim})"
)
2 changes: 1 addition & 1 deletion tests/chunker/test_word_chunker.py
Original file line number Diff line number Diff line change
Expand Up @@ -147,7 +147,7 @@ def test_word_chunker_repr(tokenizer):
"""Test that the WordChunker has a string representation."""
chunker = WordChunker(tokenizer=tokenizer, chunk_size=512, chunk_overlap=128)

assert repr(chunker) == "WordChunker(chunk_size=512, chunk_overlap=128)"
assert repr(chunker) == '''WordChunker(tokenizer=Tokenizer(version="1.0", truncation=None, padding=None, added_tokens=[{"id":50256, "content":"<|endoftext|>", "single_word":False, "lstrip":False, "rstrip":False, ...}], normalizer=None, pre_tokenizer=ByteLevel(add_prefix_space=False, trim_offsets=True, use_regex=True), post_processor=ByteLevel(add_prefix_space=True, trim_offsets=False, use_regex=True), decoder=ByteLevel(add_prefix_space=True, trim_offsets=True, use_regex=True), model=BPE(dropout=None, unk_token=None, continuing_subword_prefix="", end_of_word_suffix="", fuse_unk=False, byte_fallback=False, ignore_merges=False, vocab={"!":0, """:1, "#":2, "$":3, "%":4, ...}, merges=[("Ġ", "t"), ("Ġ", "a"), ("h", "e"), ("i", "n"), ("r", "e"), ...])), chunk_size=512, chunk_overlap=128)'''


def test_word_chunker_call(tokenizer, sample_text):
Expand Down

0 comments on commit 2ed85c9

Please sign in to comment.