Skip to content

Commit

Permalink
fixed openai empty string bug
Browse files Browse the repository at this point in the history
  • Loading branch information
Filimoa committed Nov 13, 2024
1 parent fdb8e6b commit f8d9716
Showing 1 changed file with 13 additions and 14 deletions.
27 changes: 13 additions & 14 deletions src/openparse/processing/semantic_transforms.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,27 +37,26 @@ def __init__(
self.batch_size = batch_size
self.client = self._create_client()

def embed_many(self, texts: List[str]) -> List[List[float]]:
"""
Generate embeddings for a list of texts in batches.
Args:
texts (list[str]): The list of texts to embed.
batch_size (int): The number of texts to process in each batch.
Returns:
List[List[float]]: A list of embeddings.
"""
def embed_many(self, texts: list[str]) -> list[list[float]]:
res = []
for i in range(0, len(texts), self.batch_size):
batch_texts = texts[i : i + self.batch_size]
non_empty_texts = [text for text in texts if text]

embedding_size = 1
for i in range(0, len(non_empty_texts), self.batch_size):
batch_texts = non_empty_texts[i : i + self.batch_size]
api_resp = self.client.embeddings.create(
input=batch_texts, model=self.model
)
batch_res = [val.embedding for val in api_resp.data]
res.extend(batch_res)
embedding_size = len(batch_res[0])

# Map results back to original indices, adding zero embeddings for empty texts
final_res = [
[0.0] * embedding_size if not text else res.pop(0) for text in texts
]

return res
return final_res

def _create_client(self):
try:
Expand Down

0 comments on commit f8d9716

Please sign in to comment.