Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Neo4j Graph Rag Integration #139

Merged
merged 15 commits into from
Dec 6, 2024
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@ repos:
args: ["-L", "ans,linar,nam,tread,ot,assertIn,dependin,socio-economic"]
exclude: |
(?x)^(
test/agentchat/contrib/graph_rag/paul_graham_essay.txt |
Eric-Shang marked this conversation as resolved.
Show resolved Hide resolved
pyproject.toml |
website/static/img/ag.svg |
website/static/img/ag2.svg |
Expand Down
156 changes: 156 additions & 0 deletions autogen/agentchat/contrib/graph_rag/neo4j_graph_query_engine.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,156 @@
# Copyright (c) 2023 - 2024, Owners of https://github.com/ag2ai
#
# SPDX-License-Identifier: Apache-2.0
import os
from typing import Dict, List, Optional, TypeAlias, Union

from llama_index.core import PropertyGraphIndex, SimpleDirectoryReader
from llama_index.core.indices.property_graph import SchemaLLMPathExtractor
from llama_index.core.indices.property_graph.transformations.schema_llm import Triple
from llama_index.embeddings.openai import OpenAIEmbedding
from llama_index.graph_stores.neo4j import Neo4jPropertyGraphStore
from llama_index.llms.openai import OpenAI

from .document import Document
from .graph_query_engine import GraphQueryEngine, GraphStoreQueryResult


class Neo4jGraphQueryEngine(GraphQueryEngine):
"""
This is a wrapper for Neo4j KnowledgeGraph.
"""

def __init__(
self,
host: str = "bolt://localhost",
port: int = 7687,
database: str = "neo4j",
username: str = "neo4j",
password: str = "neo4j",
model: str = "gpt-3.5-turbo",
temperature: float = 0.0,
embed_model: str = "text-embedding-3-small",
entities: Optional[TypeAlias] = None,
relations: Optional[TypeAlias] = None,
validation_schema: Optional[Union[Dict[str, str], List[Triple]]] = None,
strict: Optional[bool] = True,
):
"""
Initialize a Neo4j knowledge graph.
Please also refer to https://neo4j.com/docs/
Eric-Shang marked this conversation as resolved.
Show resolved Hide resolved

Args:
name (str): Knowledge graph name.
host (str): Neo4j hostname.
port (int): Neo4j port number.
database (str): Neo4j database name.
username (str): Neo4j username.
password (str): Neo4j password.
model (str): LLM model to use for Neo4j to build and retrieve from the graph, default to use OAI gpt-3.5-turbo.
temperature (float): LLM temperature.
include_embeddings (bool): Whether to include embeddings in the graph.
entities (Optional[TypeAlias]): Custom possible entities to include in the graph.
relations (Optional[TypeAlias]): Custom poissble relations to include in the graph.
validation_schema (Optional[Union[Dict[str, str], List[Triple]]): Custom schema to validate the extracted triplets
strict (Optional[bool]): If false, allows for values outside of the schema, useful for using the schema as a suggestion.
"""
self.host = host
self.port = port
self.database = database
self.username = username
self.password = password
self.model = model
self.temperature = temperature
self.embed_model = embed_model
self.entities = entities
self.relations = relations
self.validation_schema = validation_schema
self.strict = strict

def init_db(self, input_doc: List[Document] | None = None):
"""
Build the knowledge graph with input documents.
Eric-Shang marked this conversation as resolved.
Show resolved Hide resolved
"""
self.input_files = []
for doc in input_doc:
if os.path.exists(doc.path_or_url):
self.input_files.append(doc.path_or_url)
else:
raise ValueError(f"Document file not found: {doc.path_or_url}")

self.graph_store = Neo4jPropertyGraphStore(
username=self.username,
password=self.password,
url=self.host + ":" + str(self.port),
database=self.database,
)
self.documents = SimpleDirectoryReader(input_files=self.input_files).load_data()

# Extract paths following a strict schema of allowed entities, relationships, and which entities can be connected to which relationships.
# To add more extractors, please refer to https://docs.llamaindex.ai/en/latest/module_guides/indexing/lpg_index_guide/#construction
self.kg_extractors = [
SchemaLLMPathExtractor(
llm=OpenAI(model=self.model, temperature=0.0),
possible_entities=self.entities,
possible_relations=self.relations,
kg_validation_schema=self.validation_schema,
strict=self.strict,
)
]

self.index = PropertyGraphIndex.from_documents(
self.documents,
embed_model=OpenAIEmbedding(model_name="text-embedding-3-small"),
kg_extractors=self.kg_extractors,
property_graph_store=self.graph_store,
show_progress=True,
)

def add_records(self, new_records: List) -> bool:
Eric-Shang marked this conversation as resolved.
Show resolved Hide resolved
Eric-Shang marked this conversation as resolved.
Show resolved Hide resolved
"""
Add new records to the knowledge graph.

Args:
new_records (List[Document]): List of new documents to add.

Returns:
bool: True if successful, False otherwise.
"""
if self.graph_store is None:
raise ValueError("Knowledge graph is not initialized. Please call init_db first.")

try:
# Load new documents
new_documents = SimpleDirectoryReader(input_files=[doc.path_or_url for doc in new_records]).load_data()

for doc in new_documents:
self.index.insert(doc)

return True
except Exception as e:
print(f"Error adding records: {e}")
return False

def query(self, question: str, n_results: int = 1, **kwargs) -> GraphStoreQueryResult:
"""
Query the knowledge graph with a question and optional message history.
Eric-Shang marked this conversation as resolved.
Show resolved Hide resolved

Args:
question: a human input question.
n_results: number of results to return.

Returns:
Neo4j GrapStorehQueryResult
Eric-Shang marked this conversation as resolved.
Show resolved Hide resolved
"""
if self.graph_store is None:
raise ValueError("Knowledge graph is not created.")

# query the graph to get the answer
query_engine = self.index.as_query_engine(include_text=True)
response = str(query_engine.query(question))

# retrieve source chunks that are semantically related to the question
retriever = self.index.as_retriever(include_text=False)
nodes = retriever.retrieve(question)

return GraphStoreQueryResult(answer=response, results=nodes)
82 changes: 82 additions & 0 deletions autogen/agentchat/contrib/graph_rag/neo4j_graph_rag_capability.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,82 @@
# Copyright (c) 2023 - 2024, Owners of https://github.com/ag2ai
#
# SPDX-License-Identifier: Apache-2.0

from typing import Any, Dict, List, Optional, Tuple, Union

from autogen import Agent, ConversableAgent, UserProxyAgent

from .graph_query_engine import GraphStoreQueryResult
from .graph_rag_capability import GraphRagCapability
from .neo4j_graph_query_engine import Neo4jGraphQueryEngine


class Neo4jGraphCapability(GraphRagCapability):
"""
The Neo4j graph capability integrates Neo4j Property graph
Ref: https://neo4j.com/labs/genai-ecosystem/llamaindex/?utm_source=GSearch&utm_medium=PaidSearch&utm_campaign=Evergreen&utm_content=AMS-Search-SEMCE-DSA-None-SEM-SEM-NonABM&utm_term=&utm_adgroup=DSA&gad_source=1&gclid=Cj0KCQiAr7C6BhDRARIsAOUKifhzCrn5py9WlgkJP5sT3ABlD-qb2-FPSWCcO5GDcrkNuCYpOQjxh5AaAvdYEALw_wcB#_property_graph_constructing_modules
Eric-Shang marked this conversation as resolved.
Show resolved Hide resolved

For usage, please refer to example notebook/agentchat_graph_rag_neo4j.ipynb
"""

def __init__(self, query_engine: Neo4jGraphQueryEngine):
"""
initialize GraphRAG capability with a graph query engine
"""
self.query_engine = query_engine

def add_to_agent(self, agent: UserProxyAgent):
"""
Add Neo4j GraphRAG capability to a UserProxyAgent.
The restriction to a UserProxyAgent to make sure the returned message only contains information retrieved from the graph DB instead of any LLMs.
"""

self.graph_rag_agent = agent

# Validate the agent config
if agent.llm_config not in (None, False):
raise Exception(
"Agents with GraphRAG capabilities do not use an LLM configuration. Please set your llm_config to None or False."
)

# Register method to generate the reply using a Neo4j query
# All other reply methods will be removed
agent.register_reply(
[ConversableAgent, None], self._reply_using_neo4j_query, position=0, remove_other_reply_funcs=True
)

def _reply_using_neo4j_query(
self,
recipient: ConversableAgent,
messages: Optional[List[Dict]] = None,
sender: Optional[Agent] = None,
config: Optional[Any] = None,
) -> Tuple[bool, Union[str, Dict, None]]:
"""
Query neo4j and return the message. Internally, it utilises OpenAI to generate a reply based on the given messages.
Eric-Shang marked this conversation as resolved.
Show resolved Hide resolved

If no results are found, a default message is returned: "I'm sorry, I don't have an answer for that."

Args:
recipient: The agent instance that will receive the message.
messages: A list of messages in the conversation history with the sender.
sender: The agent instance that sent the message.
config: Optional configuration for message processing.

Returns:
A tuple containing a boolean indicating success and the assistant's reply.
"""
question = self._get_last_question(messages[-1])

result: GraphStoreQueryResult = self.query_engine.query(question)

return True, result.answer

def _get_last_question(self, message: Union[Dict, str]):
"""Retrieves the last message from the conversation history."""
if isinstance(message, str):
return message
if isinstance(message, Dict):
if "content" in message:
return message["content"]
return None
Loading
Loading