From baec2b8924c85b1954e76f567f2cc3eb601993ee Mon Sep 17 00:00:00 2001 From: Jason Kang Date: Thu, 5 Sep 2024 14:53:13 +0900 Subject: [PATCH] feat: add page to vector factory metadata attribute --- api/core/rag/datasource/vdb/vector_factory.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/api/core/rag/datasource/vdb/vector_factory.py b/api/core/rag/datasource/vdb/vector_factory.py index 3e9ca8e1fe7f4a..627d7c3aeb5a18 100644 --- a/api/core/rag/datasource/vdb/vector_factory.py +++ b/api/core/rag/datasource/vdb/vector_factory.py @@ -30,7 +30,7 @@ def gen_index_struct_dict(vector_type: VectorType, collection_name: str) -> dict class Vector: def __init__(self, dataset: Dataset, attributes: list = None): if attributes is None: - attributes = ['doc_id', 'dataset_id', 'document_id', 'doc_hash'] + attributes = ['doc_id', 'dataset_id', 'document_id', 'doc_hash', 'page'] self._dataset = dataset self._embeddings = self._get_embeddings() self._attributes = attributes @@ -107,6 +107,7 @@ def create(self, texts: list = None, **kwargs): def add_texts(self, documents: list[Document], **kwargs): if kwargs.get('duplicate_check', False): documents = self._filter_duplicate_texts(documents) + embeddings = self._embeddings.embed_documents([document.page_content for document in documents]) self._vector_processor.create( texts=documents,