Skip to content

Commit

Permalink
remove max token count for asyncio task embed_one_async
Browse files Browse the repository at this point in the history
  • Loading branch information
dayesouza committed Nov 11, 2024
1 parent 3091dfe commit 082979e
Showing 1 changed file with 11 additions and 9 deletions.
20 changes: 11 additions & 9 deletions intelligence_toolkit/AI/base_embedder.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,19 +53,21 @@ async def embed_one_async(
self,
data: VectorData,
has_callback=False,
check_token_count=True,
) -> Any | list[float]:
async with self.semaphore:
if not data["hash"]:
text_hashed = hash_text(data["text"])
data["hash"] = text_hashed
try:
tokens = get_token_count(data["text"])
if tokens > self.max_tokens:
text = data["text"][: self.max_tokens]
data["text"] = text
logger.info("Truncated text to max tokens")
except Exception:
pass
if check_token_count:
try:
tokens = get_token_count(data["text"])
if tokens > self.max_tokens:
text = data["text"][: self.max_tokens]
data["text"] = text
logger.info("Truncated text to max tokens")
except Exception:
pass
try:
embedding = await asyncio.wait_for(
self._generate_embedding_async(data["text"]), timeout=90
Expand Down Expand Up @@ -158,7 +160,7 @@ async def embed_store_many(
]
if len(new_items) > 0:
tasks = [
asyncio.create_task(self.embed_one_async(item, callbacks))
asyncio.create_task(self.embed_one_async(item, callbacks, False))
for item in new_items
]
if callbacks:
Expand Down

0 comments on commit 082979e

Please sign in to comment.