From 4fa60037ddcc9cc7a9b58b40ffa9b780e5e05af0 Mon Sep 17 00:00:00 2001 From: Aries-ckt <916701291@qq.com> Date: Mon, 2 Dec 2024 20:56:23 +0800 Subject: [PATCH] fix: VectorStore can not be attached to EmbeddingAssemblerOperator bug (#2170) --- dbgpt/rag/knowledge/pdf.py | 2 + dbgpt/rag/operators/embedding.py | 2 +- dbgpt/rag/operators/knowledge.py | 8 ++-- dbgpt/serve/rag/connector.py | 42 ------------------- .../knowledge_graph/community_summary.py | 1 - dbgpt/storage/vector_store/chroma_store.py | 22 ++++++++-- dbgpt/storage/vector_store/elastic_store.py | 22 ++++++++-- dbgpt/storage/vector_store/milvus_store.py | 22 ++++++++-- dbgpt/storage/vector_store/oceanbase_store.py | 22 ++++++++-- dbgpt/storage/vector_store/pgvector_store.py | 22 ++++++++-- dbgpt/storage/vector_store/weaviate_store.py | 22 ++++++++-- 11 files changed, 121 insertions(+), 66 deletions(-) diff --git a/dbgpt/rag/knowledge/pdf.py b/dbgpt/rag/knowledge/pdf.py index 66e82bbd9..021bb4545 100644 --- a/dbgpt/rag/knowledge/pdf.py +++ b/dbgpt/rag/knowledge/pdf.py @@ -232,6 +232,7 @@ def _load(self) -> List[Document]: "page": page, "type": "excel", "title": file_title, + "source": self.file_path, } page_documents.append( Document( @@ -244,6 +245,7 @@ def _load(self) -> List[Document]: "page": page, "type": "text", "title": file_title, + "source": self.file_path, } page_documents.append( Document(content=inside_content, metadata=content_metadata) diff --git a/dbgpt/rag/operators/embedding.py b/dbgpt/rag/operators/embedding.py index da0b1cfd3..128bed56f 100644 --- a/dbgpt/rag/operators/embedding.py +++ b/dbgpt/rag/operators/embedding.py @@ -155,7 +155,7 @@ class EmbeddingAssemblerOperator(AssemblerOperator[Knowledge, List[Chunk]]): IOField.build_from( _("Chunks"), "chunks", - Chunk, + List[Chunk], description=_( "The assembled chunks, it has been persisted to vector " "store." ), diff --git a/dbgpt/rag/operators/knowledge.py b/dbgpt/rag/operators/knowledge.py index fab6a014e..47cf2d3ab 100644 --- a/dbgpt/rag/operators/knowledge.py +++ b/dbgpt/rag/operators/knowledge.py @@ -20,7 +20,7 @@ class KnowledgeOperator(MapOperator[str, Knowledge]): """Knowledge Factory Operator.""" metadata = ViewMetadata( - label=_("Knowledge Operator"), + label=_("Knowledge Loader Operator"), name="knowledge_operator", category=OperatorCategory.RAG, description=_( @@ -30,7 +30,7 @@ class KnowledgeOperator(MapOperator[str, Knowledge]): IOField.build_from( _("knowledge datasource"), "knowledge datasource", - str, + dict, _("knowledge datasource, which can be a document, url, or text."), ) ], @@ -89,7 +89,7 @@ def __init__( self._datasource = datasource self._knowledge_type = KnowledgeType.get_by_value(knowledge_type) - async def map(self, datasource: str) -> Knowledge: + async def map(self, datasource: dict) -> Knowledge: """Create knowledge from datasource.""" if self._datasource: datasource = self._datasource @@ -120,7 +120,7 @@ class ChunksToStringOperator(MapOperator[List[Chunk], str]): IOField.build_from( _("Chunks"), "chunks", - Chunk, + List[Chunk], description=_("The input chunks."), is_list=True, ) diff --git a/dbgpt/serve/rag/connector.py b/dbgpt/serve/rag/connector.py index ae7cf1773..6eb266015 100644 --- a/dbgpt/serve/rag/connector.py +++ b/dbgpt/serve/rag/connector.py @@ -8,17 +8,9 @@ from dbgpt.app.component_configs import CFG from dbgpt.core import Chunk, Embeddings -from dbgpt.core.awel.flow import ( - FunctionDynamicOptions, - OptionValue, - Parameter, - ResourceCategory, - register_resource, -) from dbgpt.rag.index.base import IndexStoreBase, IndexStoreConfig from dbgpt.storage.vector_store.base import VectorStoreConfig from dbgpt.storage.vector_store.filters import MetadataFilters -from dbgpt.util.i18n_utils import _ logger = logging.getLogger(__name__) @@ -26,40 +18,6 @@ pools: DefaultDict[str, Dict] = defaultdict(dict) -def _load_vector_options() -> List[OptionValue]: - from dbgpt.storage import vector_store - - return [ - OptionValue(label=cls, name=cls, value=cls) - for cls in vector_store.__all__ - if issubclass(getattr(vector_store, cls)[0], IndexStoreBase) - ] - - -@register_resource( - _("Vector Store Connector"), - "vector_store_connector", - category=ResourceCategory.VECTOR_STORE, - parameters=[ - Parameter.build_from( - _("Vector Store Type"), - "vector_store_type", - str, - description=_("The type of vector store."), - options=FunctionDynamicOptions(func=_load_vector_options), - ), - Parameter.build_from( - _("Vector Store Implementation"), - "vector_store_config", - VectorStoreConfig, - description=_("The vector store implementation."), - optional=True, - default=None, - ), - ], - # Compatible with the old version - alias=["dbgpt.storage.vector_store.connector.VectorStoreConnector"], -) class VectorStoreConnector: """The connector for vector store. diff --git a/dbgpt/storage/knowledge_graph/community_summary.py b/dbgpt/storage/knowledge_graph/community_summary.py index 1815c26be..6a1027012 100644 --- a/dbgpt/storage/knowledge_graph/community_summary.py +++ b/dbgpt/storage/knowledge_graph/community_summary.py @@ -197,7 +197,6 @@ async def aload_document(self, chunks: List[Chunk]) -> List[str]: async def _aload_document_graph(self, chunks: List[Chunk]) -> None: """Load the knowledge graph from the chunks. - The chunks include the doc structure. """ if not self._document_graph_enabled: diff --git a/dbgpt/storage/vector_store/chroma_store.py b/dbgpt/storage/vector_store/chroma_store.py index 4e377a372..93191c715 100644 --- a/dbgpt/storage/vector_store/chroma_store.py +++ b/dbgpt/storage/vector_store/chroma_store.py @@ -21,10 +21,10 @@ @register_resource( - _("Chroma Vector Store"), - "chroma_vector_store", + _("Chroma Config"), + "chroma_vector_config", category=ResourceCategory.VECTOR_STORE, - description=_("Chroma vector store."), + description=_("Chroma vector store config."), parameters=[ *_COMMON_PARAMETERS, Parameter.build_from( @@ -53,6 +53,22 @@ class ChromaVectorConfig(VectorStoreConfig): ) +@register_resource( + _("Chroma Vector Store"), + "chroma_vector_store", + category=ResourceCategory.VECTOR_STORE, + description=_("Chroma vector store."), + parameters=[ + Parameter.build_from( + _("Chroma Config"), + "vector_store_config", + ChromaVectorConfig, + description=_("the chroma config of vector store."), + optional=True, + default=None, + ), + ], +) class ChromaStore(VectorStoreBase): """Chroma vector store.""" diff --git a/dbgpt/storage/vector_store/elastic_store.py b/dbgpt/storage/vector_store/elastic_store.py index f91048d5f..4098a38c0 100644 --- a/dbgpt/storage/vector_store/elastic_store.py +++ b/dbgpt/storage/vector_store/elastic_store.py @@ -22,8 +22,8 @@ @register_resource( - _("ElasticSearch Vector Store"), - "elasticsearch_vector_store", + _("Elastic Vector Config"), + "elasticsearch_vector_config", category=ResourceCategory.VECTOR_STORE, parameters=[ *_COMMON_PARAMETERS, @@ -72,7 +72,7 @@ default="index_name_test", ), ], - description=_("Elasticsearch vector store."), + description=_("Elasticsearch vector config."), ) class ElasticsearchVectorConfig(VectorStoreConfig): """Elasticsearch vector store config.""" @@ -116,6 +116,22 @@ class Config: ) +@register_resource( + _("Elastic Vector Store"), + "elastic_vector_store", + category=ResourceCategory.VECTOR_STORE, + description=_("Elastic vector store."), + parameters=[ + Parameter.build_from( + _("Elastic Config"), + "vector_store_config", + ElasticsearchVectorConfig, + description=_("the elastic config of vector store."), + optional=True, + default=None, + ), + ], +) class ElasticStore(VectorStoreBase): """Elasticsearch vector store.""" diff --git a/dbgpt/storage/vector_store/milvus_store.py b/dbgpt/storage/vector_store/milvus_store.py index b71832268..0032fa89a 100644 --- a/dbgpt/storage/vector_store/milvus_store.py +++ b/dbgpt/storage/vector_store/milvus_store.py @@ -22,8 +22,8 @@ @register_resource( - _("Milvus Vector Store"), - "milvus_vector_store", + _("Milvus Config"), + "milvus_vector_config", category=ResourceCategory.VECTOR_STORE, parameters=[ *_COMMON_PARAMETERS, @@ -91,7 +91,7 @@ default="vector", ), ], - description=_("Milvus vector store."), + description=_("Milvus vector config."), ) class MilvusVectorConfig(VectorStoreConfig): """Milvus vector store config.""" @@ -139,6 +139,22 @@ class MilvusVectorConfig(VectorStoreConfig): ) +@register_resource( + _("Milvus Vector Store"), + "milvus_vector_store", + category=ResourceCategory.VECTOR_STORE, + description=_("Milvus vector store."), + parameters=[ + Parameter.build_from( + _("Milvus Config"), + "vector_store_config", + MilvusVectorConfig, + description=_("the milvus config of vector store."), + optional=True, + default=None, + ), + ], +) class MilvusStore(VectorStoreBase): """Milvus vector store.""" diff --git a/dbgpt/storage/vector_store/oceanbase_store.py b/dbgpt/storage/vector_store/oceanbase_store.py index 6f6a1afbd..42153744c 100644 --- a/dbgpt/storage/vector_store/oceanbase_store.py +++ b/dbgpt/storage/vector_store/oceanbase_store.py @@ -73,8 +73,8 @@ def _normalize(vector: List[float]) -> List[float]: @register_resource( - _("OceanBase Vector Store"), - "oceanbase_vector_store", + _("OceanBase Config"), + "oceanbase_vector_config", category=ResourceCategory.VECTOR_STORE, parameters=[ *_COMMON_PARAMETERS, @@ -119,7 +119,7 @@ def _normalize(vector: List[float]) -> List[float]: default=None, ), ], - description="OceanBase vector store.", + description="OceanBase vector store config.", ) class OceanBaseConfig(VectorStoreConfig): """OceanBase vector store config.""" @@ -152,6 +152,22 @@ class Config: ) +@register_resource( + _("OceanBase Vector Store"), + "ob_vector_store", + category=ResourceCategory.VECTOR_STORE, + description=_("OceanBase vector store."), + parameters=[ + Parameter.build_from( + _("OceanBase Config"), + "vector_store_config", + OceanBaseConfig, + description=_("the ob config of vector store."), + optional=True, + default=None, + ), + ], +) class OceanBaseStore(VectorStoreBase): """OceanBase vector store.""" diff --git a/dbgpt/storage/vector_store/pgvector_store.py b/dbgpt/storage/vector_store/pgvector_store.py index 5db414723..950469301 100644 --- a/dbgpt/storage/vector_store/pgvector_store.py +++ b/dbgpt/storage/vector_store/pgvector_store.py @@ -18,8 +18,8 @@ @register_resource( - _("PG Vector Store"), - "pg_vector_store", + _("PGVector Config"), + "pg_vector_config", category=ResourceCategory.VECTOR_STORE, parameters=[ *_COMMON_PARAMETERS, @@ -35,7 +35,7 @@ default=None, ), ], - description="PG vector store.", + description="PG vector config.", ) class PGVectorConfig(VectorStoreConfig): """PG vector store config.""" @@ -49,6 +49,22 @@ class PGVectorConfig(VectorStoreConfig): ) +@register_resource( + _("PG Vector Store"), + "pg_vector_store", + category=ResourceCategory.VECTOR_STORE, + description=_("PG vector store."), + parameters=[ + Parameter.build_from( + _("PG Config"), + "vector_store_config", + PGVectorConfig, + description=_("the pg config of vector store."), + optional=True, + default=None, + ), + ], +) class PGVectorStore(VectorStoreBase): """PG vector store. diff --git a/dbgpt/storage/vector_store/weaviate_store.py b/dbgpt/storage/vector_store/weaviate_store.py index e78cbc4f4..5355200d7 100644 --- a/dbgpt/storage/vector_store/weaviate_store.py +++ b/dbgpt/storage/vector_store/weaviate_store.py @@ -15,10 +15,10 @@ @register_resource( - _("Weaviate Vector Store"), - "weaviate_vector_store", + _("Weaviate Config"), + "weaviate_vector_config", category=ResourceCategory.VECTOR_STORE, - description=_("Weaviate vector store."), + description=_("Weaviate vector config."), parameters=[ *_COMMON_PARAMETERS, Parameter.build_from( @@ -56,6 +56,22 @@ class WeaviateVectorConfig(VectorStoreConfig): ) +@register_resource( + _("Weaviate Vector Store"), + "weaviate_vector_store", + category=ResourceCategory.VECTOR_STORE, + description=_("Weaviate vector store."), + parameters=[ + Parameter.build_from( + _("Weaviate Config"), + "vector_store_config", + WeaviateVectorConfig, + description=_("the weaviate config of vector store."), + optional=True, + default=None, + ), + ], +) class WeaviateStore(VectorStoreBase): """Weaviate database."""