From 42aa07c67b29974cbb43e25fd29babfe27e880c8 Mon Sep 17 00:00:00 2001 From: willtai Date: Tue, 3 Sep 2024 14:58:48 +0100 Subject: [PATCH] Prepare release for experimental kg creation (#112) * Update CHANGELOG * Update docs * Update docs * Update CHANGELOG --------- Co-authored-by: Alex Thomas --- CHANGELOG.md | 11 ++++++----- docs/source/user_guide_kg_builder.rst | 2 +- docs/source/user_guide_pipeline.rst | 4 ++-- src/neo4j_genai/experimental/components/embedder.py | 2 +- .../components/entity_relation_extractor.py | 2 +- src/neo4j_genai/experimental/components/kg_writer.py | 2 +- src/neo4j_genai/experimental/components/schema.py | 2 +- .../components/text_splitters/langchain.py | 2 +- .../components/text_splitters/llamaindex.py | 2 +- 9 files changed, 15 insertions(+), 14 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 0be4ecce..d0a3b87b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,11 +2,15 @@ ## Next +### Added +- PDF-to-graph pipeline for knowledge graph construction in experimental mode +- Introduced support for Component/Pipeline flexible architecture. +- Added new components for knowledge graph construction, including text splitters, schema builders, entity-relation extractors, and Neo4j writers. +- Implemented end-to-end tests for the new knowledge graph builder pipeline. + ### Changed - When saving the lexical graph in a KG creation pipeline, the document is also saved as a specific node, together with relationships between each chunk and the document they were created from. -## 0.5.0 - ### Fixed - Corrected the hybrid retriever query to ensure proper normalization of scores in vector search results. @@ -14,9 +18,6 @@ ### Added - Add optional custom_prompt arg to the Text2CypherRetriever class. -- Introduced support for Component/Pipeline flexible architecture. -- Added new components for knowledge graph construction, including text splitters, schema builders, entity-relation extractors, and Neo4j writers. -- Implemented end-to-end tests for the new knowledge graph builder pipeline. ### Changed - `GraphRAG.search` method first parameter has been renamed `query_text` (was `query`) for consistency with the retrievers interface. diff --git a/docs/source/user_guide_kg_builder.rst b/docs/source/user_guide_kg_builder.rst index 182f0323..a868c24f 100644 --- a/docs/source/user_guide_kg_builder.rst +++ b/docs/source/user_guide_kg_builder.rst @@ -60,7 +60,7 @@ They can also be used within a pipeline: from neo4j_genai.experimental.components.pdf_loader import PdfLoader pipeline = Pipeline() my_component = PdfLoader() - pipeline.add("component_name", my_component) + pipeline.add_component(my_component, "component_name") Document Parser diff --git a/docs/source/user_guide_pipeline.rst b/docs/source/user_guide_pipeline.rst index f8c258ab..08276d4c 100644 --- a/docs/source/user_guide_pipeline.rst +++ b/docs/source/user_guide_pipeline.rst @@ -57,8 +57,8 @@ Here's how to create a simple pipeline and propagate results from one component from neo4j_genai.experimental.pipeline import Pipeline pipe = Pipeline() - pipe.add_component("a", ComponentAdd()) - pipe.add_component("b", ComponentAdd()) + pipe.add_component(ComponentAdd(), "a") + pipe.add_component(ComponentAdd(), "b") pipe.connect("a", "b", {"number2": "a.result"}) asyncio.run(pipe.run({"a": {"number1": 10, "number2": 1}, "b": {"number1": 4})) diff --git a/src/neo4j_genai/experimental/components/embedder.py b/src/neo4j_genai/experimental/components/embedder.py index 3878f912..7bee819c 100644 --- a/src/neo4j_genai/experimental/components/embedder.py +++ b/src/neo4j_genai/experimental/components/embedder.py @@ -36,7 +36,7 @@ class TextChunkEmbedder(Component): embedder = OpenAIEmbeddings(model="text-embedding-3-large") chunk_embedder = TextChunkEmbedder(embedder) pipeline = Pipeline() - pipeline.add_component("chunk_embedder", chunk_embedder) + pipeline.add_component(chunk_embedder, "chunk_embedder") """ diff --git a/src/neo4j_genai/experimental/components/entity_relation_extractor.py b/src/neo4j_genai/experimental/components/entity_relation_extractor.py index 242fd4df..904d57d1 100644 --- a/src/neo4j_genai/experimental/components/entity_relation_extractor.py +++ b/src/neo4j_genai/experimental/components/entity_relation_extractor.py @@ -297,7 +297,7 @@ class LLMEntityRelationExtractor(EntityRelationExtractor): extractor = LLMEntityRelationExtractor(llm=llm) pipe = Pipeline() - pipe.add_component("extractor", extractor) + pipe.add_component(extractor, "extractor") """ diff --git a/src/neo4j_genai/experimental/components/kg_writer.py b/src/neo4j_genai/experimental/components/kg_writer.py index 8d833b78..6dc4fd6c 100644 --- a/src/neo4j_genai/experimental/components/kg_writer.py +++ b/src/neo4j_genai/experimental/components/kg_writer.py @@ -88,7 +88,7 @@ class Neo4jWriter(KGWriter): writer = Neo4jWriter(driver=driver, neo4j_database=DATABASE) pipeline = Pipeline() - pipeline.add_component("writer", writer) + pipeline.add_component(writer, "writer") """ diff --git a/src/neo4j_genai/experimental/components/schema.py b/src/neo4j_genai/experimental/components/schema.py index eed586d3..a32d13c7 100644 --- a/src/neo4j_genai/experimental/components/schema.py +++ b/src/neo4j_genai/experimental/components/schema.py @@ -145,7 +145,7 @@ class SchemaBuilder(Component): ] pipe = Pipeline() schema_builder = SchemaBuilder() - pipe.add_component("schema_builder", schema_builder) + pipe.add_component(schema_builder, "schema_builder") pipe_inputs = { "schema": { "entities": entities, diff --git a/src/neo4j_genai/experimental/components/text_splitters/langchain.py b/src/neo4j_genai/experimental/components/text_splitters/langchain.py index 6788b392..979004f1 100644 --- a/src/neo4j_genai/experimental/components/text_splitters/langchain.py +++ b/src/neo4j_genai/experimental/components/text_splitters/langchain.py @@ -37,7 +37,7 @@ class LangChainTextSplitterAdapter(TextSplitter): pipeline = Pipeline() text_splitter = LangChainTextSplitterAdapter(RecursiveCharacterTextSplitter()) - pipeline.add_component("text_splitter", text_splitter) + pipeline.add_component(text_splitter, "text_splitter") """ diff --git a/src/neo4j_genai/experimental/components/text_splitters/llamaindex.py b/src/neo4j_genai/experimental/components/text_splitters/llamaindex.py index 08cf45b8..b1920055 100644 --- a/src/neo4j_genai/experimental/components/text_splitters/llamaindex.py +++ b/src/neo4j_genai/experimental/components/text_splitters/llamaindex.py @@ -37,7 +37,7 @@ class LlamaIndexTextSplitterAdapter(TextSplitter): pipeline = Pipeline() text_splitter = LlamaIndexTextSplitterAdapter(SentenceSplitter()) - pipeline.add_component("text_splitter", text_splitter) + pipeline.add_component(text_splitter, "text_splitter") """