From cb351c94c962584e1adb6a4af411dc21b69dee6e Mon Sep 17 00:00:00 2001 From: Thomas Wood Date: Mon, 16 Dec 2024 12:45:38 +0000 Subject: [PATCH] set default batch size to 1000 everywhere --- src/harmony/matching/default_matcher.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/harmony/matching/default_matcher.py b/src/harmony/matching/default_matcher.py index 902f931..c2620b4 100644 --- a/src/harmony/matching/default_matcher.py +++ b/src/harmony/matching/default_matcher.py @@ -46,7 +46,7 @@ model = SentenceTransformer(sentence_transformer_path) -def convert_texts_to_vector(texts: List, batch_size=50, max_batches=2000) -> ndarray: +def convert_texts_to_vector(texts: List, batch_size=1000, max_batches=2000) -> ndarray: if batch_size == 0: embeddings = model.encode(sentences=texts, convert_to_numpy=True) @@ -74,7 +74,7 @@ def match_instruments( mhc_questions: List = [], mhc_all_metadatas: List = [], mhc_embeddings: np.ndarray = np.zeros((0, 0)), - texts_cached_vectors: dict[str, List[float]] = {}, batch_size: int = 50, max_batches: int = 2000, + texts_cached_vectors: dict[str, List[float]] = {}, batch_size: int = 1000, max_batches: int = 2000, ) -> tuple: return match_instruments_with_function(