From 43bc7e7de31565f702c8ca4fa04c321ce3097d55 Mon Sep 17 00:00:00 2001 From: Ravi Theja Date: Fri, 27 Sep 2024 21:00:57 +0530 Subject: [PATCH] Update BM25 retreiver to use metadata --- .../llama_index/retrievers/bm25/base.py | 10 ++++++++-- .../llama-index-retrievers-bm25/pyproject.toml | 2 +- 2 files changed, 9 insertions(+), 3 deletions(-) diff --git a/llama-index-integrations/retrievers/llama-index-retrievers-bm25/llama_index/retrievers/bm25/base.py b/llama-index-integrations/retrievers/llama-index-retrievers-bm25/llama_index/retrievers/bm25/base.py index 1e04d70c4dd96..d204bebb975ea 100644 --- a/llama-index-integrations/retrievers/llama-index-retrievers-bm25/llama_index/retrievers/bm25/base.py +++ b/llama-index-integrations/retrievers/llama-index-retrievers-bm25/llama_index/retrievers/bm25/base.py @@ -8,7 +8,13 @@ from llama_index.core.callbacks.base import CallbackManager from llama_index.core.constants import DEFAULT_SIMILARITY_TOP_K from llama_index.core.indices.vector_store.base import VectorStoreIndex -from llama_index.core.schema import BaseNode, IndexNode, NodeWithScore, QueryBundle +from llama_index.core.schema import ( + BaseNode, + IndexNode, + NodeWithScore, + QueryBundle, + MetadataMode, +) from llama_index.core.storage.docstore.types import BaseDocumentStore from llama_index.core.vector_stores.utils import ( node_to_metadata_dict, @@ -75,7 +81,7 @@ def __init__( self.corpus = [node_to_metadata_dict(node) for node in nodes] corpus_tokens = bm25s.tokenize( - [node.get_content() for node in nodes], + [node.get_content(metadata_mode=MetadataMode.EMBED) for node in nodes], stopwords=language, stemmer=self.stemmer, show_progress=verbose, diff --git a/llama-index-integrations/retrievers/llama-index-retrievers-bm25/pyproject.toml b/llama-index-integrations/retrievers/llama-index-retrievers-bm25/pyproject.toml index 80b8155095fca..dbd71ae457bec 100644 --- a/llama-index-integrations/retrievers/llama-index-retrievers-bm25/pyproject.toml +++ b/llama-index-integrations/retrievers/llama-index-retrievers-bm25/pyproject.toml @@ -27,7 +27,7 @@ exclude = ["**/BUILD"] license = "MIT" name = "llama-index-retrievers-bm25" readme = "README.md" -version = "0.3.0" +version = "0.3.1" [tool.poetry.dependencies] python = ">=3.8.1,<4.0"