From e366c16db5bf9aede607888bfda000988dec4d1b Mon Sep 17 00:00:00 2001 From: Jyong <76649700+JohnJyong@users.noreply.github.com> Date: Tue, 10 Sep 2024 18:21:22 +0800 Subject: [PATCH] revert page column (#8217) --- api/core/rag/datasource/vdb/vector_factory.py | 2 +- .../nodes/knowledge_retrieval/knowledge_retrieval_node.py | 4 ---- api/core/workflow/nodes/llm/llm_node.py | 1 - 3 files changed, 1 insertion(+), 6 deletions(-) diff --git a/api/core/rag/datasource/vdb/vector_factory.py b/api/core/rag/datasource/vdb/vector_factory.py index 7d2db140df2ad1..bb24143a41ac8d 100644 --- a/api/core/rag/datasource/vdb/vector_factory.py +++ b/api/core/rag/datasource/vdb/vector_factory.py @@ -27,7 +27,7 @@ def gen_index_struct_dict(vector_type: VectorType, collection_name: str) -> dict class Vector: def __init__(self, dataset: Dataset, attributes: list = None): if attributes is None: - attributes = ["doc_id", "dataset_id", "document_id", "doc_hash", "page"] + attributes = ["doc_id", "dataset_id", "document_id", "doc_hash"] self._dataset = dataset self._embeddings = self._get_embeddings() self._attributes = attributes diff --git a/api/core/workflow/nodes/knowledge_retrieval/knowledge_retrieval_node.py b/api/core/workflow/nodes/knowledge_retrieval/knowledge_retrieval_node.py index 19deca162aed24..53e8be64153d99 100644 --- a/api/core/workflow/nodes/knowledge_retrieval/knowledge_retrieval_node.py +++ b/api/core/workflow/nodes/knowledge_retrieval/knowledge_retrieval_node.py @@ -163,9 +163,6 @@ def _fetch_dataset_retriever(self, node_data: KnowledgeRetrievalNodeData, query: for item in all_documents: if item.metadata.get("score"): document_score_list[item.metadata["doc_id"]] = item.metadata["score"] - # both 'page' and 'score' are metadata fields - if item.metadata.get("page"): - page_number_list[item.metadata["doc_id"]] = item.metadata["page"] index_node_ids = [document.metadata["doc_id"] for document in all_documents] segments = DocumentSegment.query.filter( @@ -200,7 +197,6 @@ def _fetch_dataset_retriever(self, node_data: KnowledgeRetrievalNodeData, query: "document_id": document.id, "document_name": document.name, "document_data_source_type": document.data_source_type, - "page": page_number_list.get(segment.index_node_id, None), "segment_id": segment.id, "retriever_from": "workflow", "score": document_score_list.get(segment.index_node_id, None), diff --git a/api/core/workflow/nodes/llm/llm_node.py b/api/core/workflow/nodes/llm/llm_node.py index 6dfd27861eefc4..049c2114882830 100644 --- a/api/core/workflow/nodes/llm/llm_node.py +++ b/api/core/workflow/nodes/llm/llm_node.py @@ -451,7 +451,6 @@ def _convert_to_original_retriever_resource(self, context_dict: dict) -> Optiona "segment_position": metadata.get("segment_position"), "index_node_hash": metadata.get("segment_index_node_hash"), "content": context_dict.get("content"), - "page": metadata.get("page"), } return source