Add Memgraph integration (#16345)

run-llama · Oct 14, 2024 · 3c5ac94 · 3c5ac94
1 parent d1dbb9d
commit 3c5ac94
Show file tree

Hide file tree

Showing 13 changed files with 1,748 additions and 0 deletions.
diff --git a/docs/docs/examples/property_graph/property_graph_memgraph.ipynb b/docs/docs/examples/property_graph/property_graph_memgraph.ipynb
@@ -0,0 +1,284 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Memgraph Property Graph Index\n",
+    "\n",
+    "[Memgraph](https://memgraph.com/) is an open source graph database built real-time streaming and fast analysis of your stored data.\n",
+    "\n",
+    "Before running Memgraph, ensure you have Docker running in the background. The quickest way to try out [Memgraph Platform](https://memgraph.com/docs/getting-started#install-memgraph-platform) (Memgraph database + MAGE library + Memgraph Lab) for the first time is running the following command:\n",
+    "\n",
+    "For Linux/macOS:\n",
+    "```shell\n",
+    "curl https://install.memgraph.com | sh\n",
+    "```\n",
+    "\n",
+    "For Windows:\n",
+    "```shell\n",
+    "iwr https://windows.memgraph.com | iex\n",
+    "```\n",
+    "\n",
+    "From here, you can check Memgraph's visual tool, Memgraph Lab on the [http://localhost:3000/](http://localhost:3000/) or the [desktop version](https://memgraph.com/download) of the app."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "%pip install llama-index llama-index-graph-stores-memgraph"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Environment setup "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import os\n",
+    "\n",
+    "os.environ[\n",
+    "    \"OPENAI_API_KEY\"\n",
+    "] = \"sk-proj-...\"  # Replace with your OpenAI API key"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Create the data directory and download the Paul Graham essay we'll be using as the input data for this example."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import urllib.request\n",
+    "\n",
+    "os.makedirs(\"data/paul_graham/\", exist_ok=True)\n",
+    "\n",
+    "url = \"https://raw.githubusercontent.com/run-llama/llama_index/main/docs/docs/examples/data/paul_graham/paul_graham_essay.txt\"\n",
+    "output_path = \"data/paul_graham/paul_graham_essay.txt\"\n",
+    "urllib.request.urlretrieve(url, output_path)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import nest_asyncio\n",
+    "\n",
+    "nest_asyncio.apply()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Read the file, replace single quotes, save the modified content and load the document data using the `SimpleDirectoryReader`"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from llama_index.core import SimpleDirectoryReader\n",
+    "\n",
+    "with open(output_path, \"r\", encoding=\"utf-8\") as file:\n",
+    "    content = file.read()\n",
+    "\n",
+    "modified_content = content.replace(\"'\", \"\\\\'\")\n",
+    "\n",
+    "with open(output_path, \"w\", encoding=\"utf-8\") as file:\n",
+    "    file.write(modified_content)\n",
+    "\n",
+    "documents = SimpleDirectoryReader(\"./data/paul_graham/\").load_data()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Setup Memgraph connection"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Set up your graph store class by providing the database credentials."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from llama_index.graph_stores.memgraph import MemgraphPropertyGraphStore\n",
+    "\n",
+    "username = \"\"  # Enter your Memgraph username (default \"\")\n",
+    "password = \"\"  # Enter your Memgraph password (default \"\")\n",
+    "url = \"\"  # Specify the connection URL, e.g., 'bolt://localhost:7687'\n",
+    "\n",
+    "graph_store = MemgraphPropertyGraphStore(\n",
+    "    username=username,\n",
+    "    password=password,\n",
+    "    url=url,\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Index Construction"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from llama_index.core import PropertyGraphIndex\n",
+    "from llama_index.embeddings.openai import OpenAIEmbedding\n",
+    "from llama_index.llms.openai import OpenAI\n",
+    "from llama_index.core.indices.property_graph import SchemaLLMPathExtractor\n",
+    "\n",
+    "index = PropertyGraphIndex.from_documents(\n",
+    "    documents,\n",
+    "    embed_model=OpenAIEmbedding(model_name=\"text-embedding-ada-002\"),\n",
+    "    kg_extractors=[\n",
+    "        SchemaLLMPathExtractor(\n",
+    "            llm=OpenAI(model=\"gpt-3.5-turbo\", temperature=0.0)\n",
+    "        )\n",
+    "    ],\n",
+    "    property_graph_store=graph_store,\n",
+    "    show_progress=True,\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Now that the graph is created, we can explore it in the UI by visiting [http://localhost:3000/](http://localhost:3000/).\n",
+    "\n",
+    "The easiest way to visualize the entire graph is by running a Cypher command similar to this:\n",
+    "\n",
+    "```shell\n",
+    "MATCH p=()-[]-() RETURN p;\n",
+    "```\n",
+    "\n",
+    "This command matches all of the possible paths in the graph and returns entire graph. \n",
+    "\n",
+    "To visualize the schema of the graph, visit the Graph schema tab and generate the new schema based on the newly created graph.\n",
+    "\n",
+    "To delete an entire graph, use:\n",
+    "\n",
+    "```shell\n",
+    "MATCH (n) DETACH DELETE n;\n",
+    "```"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Querying and retrieval"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "retriever = index.as_retriever(include_text=False)\n",
+    "\n",
+    "# Example query: \"What happened at Interleaf and Viaweb?\"\n",
+    "nodes = retriever.retrieve(\"What happened at Interleaf and Viaweb?\")\n",
+    "\n",
+    "# Output results\n",
+    "print(\"Query Results:\")\n",
+    "for node in nodes:\n",
+    "    print(node.text)\n",
+    "\n",
+    "# Alternatively, using a query engine\n",
+    "query_engine = index.as_query_engine(include_text=True)\n",
+    "\n",
+    "# Perform a query and print the detailed response\n",
+    "response = query_engine.query(\"What happened at Interleaf and Viaweb?\")\n",
+    "print(\"\\nDetailed Query Response:\")\n",
+    "print(str(response))"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Loading from an existing graph"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "If you have an existing graph (either created with LlamaIndex or otherwise), we can connect to and use it!\n",
+    "\n",
+    "**NOTE:** If your graph was created outside of LlamaIndex, the most useful retrievers will be [text to cypher](../../module_guides/indexing/lpg_index_guide.md#texttocypherretriever) or [cypher templates](../../module_guides/indexing/lpg_index_guide.md#cyphertemplateretriever). Other retrievers rely on properties that LlamaIndex inserts."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "llm = OpenAI(model=\"gpt-4\", temperature=0.0)\n",
+    "kg_extractors = [SchemaLLMPathExtractor(llm=llm)]\n",
+    "\n",
+    "index = PropertyGraphIndex.from_existing(\n",
+    "    property_graph_store=graph_store,\n",
+    "    kg_extractors=kg_extractors,\n",
+    "    embed_model=OpenAIEmbedding(model_name=\"text-embedding-ada-002\"),\n",
+    "    show_progress=True,\n",
+    ")"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3.9.13 64-bit (microsoft store)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "name": "python"
+  },
+  "vscode": {
+   "interpreter": {
+    "hash": "289d8ae9ac585fcc15d0d9333c941ae27bdf80d3e799883224b20975f2046730"
+   }
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/llama-index-integrations/graph_stores/llama-index-graph-stores-memgraph/BUILD b/llama-index-integrations/graph_stores/llama-index-graph-stores-memgraph/BUILD
@@ -0,0 +1,3 @@
+poetry_requirements(
+    name="poetry",
+)
diff --git a/llama-index-integrations/graph_stores/llama-index-graph-stores-memgraph/Makefile b/llama-index-integrations/graph_stores/llama-index-graph-stores-memgraph/Makefile
@@ -0,0 +1,17 @@
+GIT_ROOT ?= $(shell git rev-parse --show-toplevel)
+
+help:	## Show all Makefile targets.
+	@grep -E '^[a-zA-Z_-]+:.*?## .*$$' $(MAKEFILE_LIST) | awk 'BEGIN {FS = ":.*?## "}; {printf "\033[33m%-30s\033[0m %s\n", $$1, $$2}'
+
+format:	## Run code autoformatters (black).
+	pre-commit install
+	git ls-files | xargs pre-commit run black --files
+
+lint:	## Run linters: pre-commit (black, ruff, codespell) and mypy
+	pre-commit install && git ls-files | xargs pre-commit run --show-diff-on-failure --files
+
+test:	## Run tests via pytest.
+	pytest tests
+
+watch-docs:	## Build and watch documentation.
+	sphinx-autobuild docs/ docs/_build/html --open-browser --watch $(GIT_ROOT)/llama_index/