Merge pull request #8 from zebra-uestc/pr/Jant1L/2200-2

Pr/jant1 l/2200 2
eosphoros-ai · Jan 13, 2025 · eb932e1 · eb932e1
2 parents 7acd751 + 368b037
commit eb932e1
Show file tree

Hide file tree

Showing 6 changed files with 28 additions and 12 deletions.
diff --git a/dbgpt/datasource/conn_tugraph.py b/dbgpt/datasource/conn_tugraph.py
@@ -74,7 +74,7 @@ def from_uri_db(
                 "neo4j package is not installed, please install it with "
                 "`pip install neo4j`"
             ) from err
-    
+
     def get_system_info(self) -> Dict:
         """Get system info from the TuGraph."""
         with self._driver.session(database="default") as session:

diff --git a/dbgpt/rag/index/base.py b/dbgpt/rag/index/base.py
@@ -177,7 +177,7 @@ async def aload_document_with_limit(
             List[str]: Chunk ids.
         """
         chunk_groups = [
-            chunks[i: i + max_chunks_once_load]
+            chunks[i : i + max_chunks_once_load]
             for i in range(0, len(chunks), max_chunks_once_load)
         ]
         logger.info(
@@ -189,6 +189,7 @@ async def aload_document_with_limit(
             tasks.append(self.aload_document(chunk_group))
 
         import asyncio
+
         results = await asyncio.gather(*tasks)
 
         ids = []

diff --git a/dbgpt/rag/transformer/base.py b/dbgpt/rag/transformer/base.py
@@ -26,8 +26,10 @@ def drop(self):
 class EmbedderBase(TransformerBase, ABC):
     """Embedder base class."""
 
-    def __init__(self, embedding_fn: Embeddings):
+    def __init__(self, embedding_fn: Optional[Embeddings]):
         """Initialize the Embedder."""
+        if not embedding_fn:
+            raise ValueError("Embedding sevice is required.")
         self._embedding_fn = embedding_fn
 
     @retry(stop=stop_after_attempt(3), wait=wait_fixed(2))

diff --git a/dbgpt/rag/transformer/text_embedder.py b/dbgpt/rag/transformer/text_embedder.py
@@ -18,7 +18,7 @@ async def batch_embed(
         batch_size: int = 1,
     ) -> List[List[float]]:
         """Embed texts from graphs in batches."""
-        vectors = []
+        vectors: List[List[float]] = []
         n_texts = len(inputs)
 
         # Batch embedding
@@ -32,7 +32,7 @@ async def batch_embed(
 
             # Process embedding in parallel
             batch_results = await asyncio.gather(
-                *(task for task in embedding_tasks), return_exceptions=True
+                *(task for task in embedding_tasks), return_exceptions=False
             )
 
             # Place results in the correct positions

diff --git a/dbgpt/storage/knowledge_graph/community/tugraph_store_adapter.py b/dbgpt/storage/knowledge_graph/community/tugraph_store_adapter.py
@@ -3,6 +3,7 @@
 import json
 import logging
 from typing import Any, AsyncGenerator, Dict, Iterator, List, Optional, Tuple, Union
+
 from packaging.version import Version
 
 from dbgpt.storage.graph_store.graph import (
@@ -199,12 +200,16 @@ def upsert_edge(
     def upsert_chunks(self, chunks: Iterator[Union[Vertex, ParagraphChunk]]) -> None:
         """Upsert chunks."""
         enable_similarity_search = self.graph_store.enable_similarity_search
-        chunk_list = [
+        chunk_list: List[Dict[str, Union[str, List[float]]]] = [
             {
                 "id": self._escape_quotes(chunk.chunk_id),
                 "name": self._escape_quotes(chunk.chunk_name),
                 "content": self._escape_quotes(chunk.content),
-                **({"_embedding": chunk.embedding} if enable_similarity_search else {}),
+                **(
+                    {"_embedding": chunk.embedding}
+                    if enable_similarity_search and chunk.embedding
+                    else {}
+                ),
             }
             if isinstance(chunk, ParagraphChunk)
             else {
@@ -413,9 +418,11 @@ def create_graph(self, graph_name: str):
         dbms_system_info = self.graph_store.conn.get_system_info()
         lgraph_version = dbms_system_info["lgraph_version"]
         similarity_search_compatible = Version(lgraph_version) >= Version("4.5.1")
-        
+
         if enable_similarity_search and not similarity_search_compatible:
-            raise Exception("TuGraph 4.5.0 and below does not support similarity search.")
+            raise Exception(
+                "TuGraph 4.5.0 and below does not support similarity search."
+            )
 
         # Create the graph schema
         def _format_graph_property_schema(
@@ -468,7 +475,9 @@ def _format_graph_property_schema(
             _format_graph_property_schema("content", "STRING", True, True),
         ]
         if enable_similarity_search:
-            chunk_proerties.append(_format_graph_property_schema("_embedding", "FLOAT_VECTOR", True, False))
+            chunk_proerties.append(
+                _format_graph_property_schema("_embedding", "FLOAT_VECTOR", True, False)
+            )
         self.create_graph_label(
             graph_elem_type=GraphElemType.CHUNK, graph_properties=chunk_proerties
         )
@@ -481,7 +490,11 @@ def _format_graph_property_schema(
             _format_graph_property_schema("description", "STRING", True, True),
         ]
         if enable_similarity_search:
-            vertex_proerties.append(_format_graph_property_schema("_embedding", "FLOAT_VECTOR", True, False),)
+            vertex_proerties.append(
+                _format_graph_property_schema(
+                    "_embedding", "FLOAT_VECTOR", True, False
+                ),
+            )
         self.create_graph_label(
             graph_elem_type=GraphElemType.ENTITY, graph_properties=vertex_proerties
         )

diff --git a/docs/docs/cookbook/rag/graph_rag_app_develop.md b/docs/docs/cookbook/rag/graph_rag_app_develop.md
@@ -17,7 +17,7 @@ pip install "dbgpt[graph_rag]>=0.6.1"
 
 To store the knowledge in graph, we need an graph database, [TuGraph](https://github.com/TuGraph-family/tugraph-db) is the first graph database supported by DB-GPT.
 
-Visit github repository of TuGraph to view [Quick Start](https://tugraph-db.readthedocs.io/zh-cn/latest/3.quick-start/1.preparation.html#id5) document, follow the instructions to pull the TuGraph database docker image (latest / version >= 4.5.0) and launch it.
+Visit github repository of TuGraph to view [Quick Start](https://tugraph-db.readthedocs.io/zh-cn/latest/3.quick-start/1.preparation.html#id5) document, follow the instructions to pull the TuGraph database docker image (latest / version >= 4.5.1) and launch it.
 
 ```
 docker pull tugraph/tugraph-runtime-centos7:4.5.1