Skip to content

Commit

Permalink
Merge pull request #8 from zebra-uestc/pr/Jant1L/2200-2
Browse files Browse the repository at this point in the history
Pr/jant1 l/2200 2
  • Loading branch information
SonglinLyu authored Jan 13, 2025
2 parents 7acd751 + 368b037 commit eb932e1
Show file tree
Hide file tree
Showing 6 changed files with 28 additions and 12 deletions.
2 changes: 1 addition & 1 deletion dbgpt/datasource/conn_tugraph.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,7 @@ def from_uri_db(
"neo4j package is not installed, please install it with "
"`pip install neo4j`"
) from err

def get_system_info(self) -> Dict:
"""Get system info from the TuGraph."""
with self._driver.session(database="default") as session:
Expand Down
3 changes: 2 additions & 1 deletion dbgpt/rag/index/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -177,7 +177,7 @@ async def aload_document_with_limit(
List[str]: Chunk ids.
"""
chunk_groups = [
chunks[i: i + max_chunks_once_load]
chunks[i : i + max_chunks_once_load]
for i in range(0, len(chunks), max_chunks_once_load)
]
logger.info(
Expand All @@ -189,6 +189,7 @@ async def aload_document_with_limit(
tasks.append(self.aload_document(chunk_group))

import asyncio

results = await asyncio.gather(*tasks)

ids = []
Expand Down
4 changes: 3 additions & 1 deletion dbgpt/rag/transformer/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,8 +26,10 @@ def drop(self):
class EmbedderBase(TransformerBase, ABC):
"""Embedder base class."""

def __init__(self, embedding_fn: Embeddings):
def __init__(self, embedding_fn: Optional[Embeddings]):
"""Initialize the Embedder."""
if not embedding_fn:
raise ValueError("Embedding sevice is required.")
self._embedding_fn = embedding_fn

@retry(stop=stop_after_attempt(3), wait=wait_fixed(2))
Expand Down
4 changes: 2 additions & 2 deletions dbgpt/rag/transformer/text_embedder.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ async def batch_embed(
batch_size: int = 1,
) -> List[List[float]]:
"""Embed texts from graphs in batches."""
vectors = []
vectors: List[List[float]] = []
n_texts = len(inputs)

# Batch embedding
Expand All @@ -32,7 +32,7 @@ async def batch_embed(

# Process embedding in parallel
batch_results = await asyncio.gather(
*(task for task in embedding_tasks), return_exceptions=True
*(task for task in embedding_tasks), return_exceptions=False
)

# Place results in the correct positions
Expand Down
25 changes: 19 additions & 6 deletions dbgpt/storage/knowledge_graph/community/tugraph_store_adapter.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import json
import logging
from typing import Any, AsyncGenerator, Dict, Iterator, List, Optional, Tuple, Union

from packaging.version import Version

from dbgpt.storage.graph_store.graph import (
Expand Down Expand Up @@ -199,12 +200,16 @@ def upsert_edge(
def upsert_chunks(self, chunks: Iterator[Union[Vertex, ParagraphChunk]]) -> None:
"""Upsert chunks."""
enable_similarity_search = self.graph_store.enable_similarity_search
chunk_list = [
chunk_list: List[Dict[str, Union[str, List[float]]]] = [
{
"id": self._escape_quotes(chunk.chunk_id),
"name": self._escape_quotes(chunk.chunk_name),
"content": self._escape_quotes(chunk.content),
**({"_embedding": chunk.embedding} if enable_similarity_search else {}),
**(
{"_embedding": chunk.embedding}
if enable_similarity_search and chunk.embedding
else {}
),
}
if isinstance(chunk, ParagraphChunk)
else {
Expand Down Expand Up @@ -413,9 +418,11 @@ def create_graph(self, graph_name: str):
dbms_system_info = self.graph_store.conn.get_system_info()
lgraph_version = dbms_system_info["lgraph_version"]
similarity_search_compatible = Version(lgraph_version) >= Version("4.5.1")

if enable_similarity_search and not similarity_search_compatible:
raise Exception("TuGraph 4.5.0 and below does not support similarity search.")
raise Exception(
"TuGraph 4.5.0 and below does not support similarity search."
)

# Create the graph schema
def _format_graph_property_schema(
Expand Down Expand Up @@ -468,7 +475,9 @@ def _format_graph_property_schema(
_format_graph_property_schema("content", "STRING", True, True),
]
if enable_similarity_search:
chunk_proerties.append(_format_graph_property_schema("_embedding", "FLOAT_VECTOR", True, False))
chunk_proerties.append(
_format_graph_property_schema("_embedding", "FLOAT_VECTOR", True, False)
)
self.create_graph_label(
graph_elem_type=GraphElemType.CHUNK, graph_properties=chunk_proerties
)
Expand All @@ -481,7 +490,11 @@ def _format_graph_property_schema(
_format_graph_property_schema("description", "STRING", True, True),
]
if enable_similarity_search:
vertex_proerties.append(_format_graph_property_schema("_embedding", "FLOAT_VECTOR", True, False),)
vertex_proerties.append(
_format_graph_property_schema(
"_embedding", "FLOAT_VECTOR", True, False
),
)
self.create_graph_label(
graph_elem_type=GraphElemType.ENTITY, graph_properties=vertex_proerties
)
Expand Down
2 changes: 1 addition & 1 deletion docs/docs/cookbook/rag/graph_rag_app_develop.md
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ pip install "dbgpt[graph_rag]>=0.6.1"

To store the knowledge in graph, we need an graph database, [TuGraph](https://github.com/TuGraph-family/tugraph-db) is the first graph database supported by DB-GPT.

Visit github repository of TuGraph to view [Quick Start](https://tugraph-db.readthedocs.io/zh-cn/latest/3.quick-start/1.preparation.html#id5) document, follow the instructions to pull the TuGraph database docker image (latest / version >= 4.5.0) and launch it.
Visit github repository of TuGraph to view [Quick Start](https://tugraph-db.readthedocs.io/zh-cn/latest/3.quick-start/1.preparation.html#id5) document, follow the instructions to pull the TuGraph database docker image (latest / version >= 4.5.1) and launch it.

```
docker pull tugraph/tugraph-runtime-centos7:4.5.1
Expand Down

0 comments on commit eb932e1

Please sign in to comment.