From 294a67e426a45307658b4112928213e8b331f6b1 Mon Sep 17 00:00:00 2001 From: Sebastian Husch Lee Date: Wed, 30 Oct 2024 16:03:41 +0100 Subject: [PATCH] feat: Adding StringJoiner (#8357) * Adding StringJoiner * Release notes * Remove typing * Remove unused import * Try to fix header * Fix one test * Add to docs, move test to behavioral pipeline test * Undo changes * Fix test * Update haystack/components/joiners/string_joiner.py Co-authored-by: Stefano Fiorucci * Update haystack/components/joiners/string_joiner.py Co-authored-by: Stefano Fiorucci * Provide usage example * Apply suggestions from code review Co-authored-by: Stefano Fiorucci --------- Co-authored-by: Stefano Fiorucci Co-authored-by: Silvano Cerza <3314350+silvanocerza@users.noreply.github.com> --- README.md | 2 +- docs/pydoc/config/joiners_api.yml | 2 +- haystack/components/joiners/__init__.py | 3 +- haystack/components/joiners/string_joiner.py | 59 +++++++++++++++++++ .../add-string-joiner-a7754e6bff9332ea.yaml | 4 ++ test/components/joiners/test_string_joiner.py | 37 ++++++++++++ .../pipeline/features/pipeline_run.feature | 1 + test/core/pipeline/features/test_run.py | 31 +++++++++- 8 files changed, 135 insertions(+), 4 deletions(-) create mode 100644 haystack/components/joiners/string_joiner.py create mode 100644 releasenotes/notes/add-string-joiner-a7754e6bff9332ea.yaml create mode 100644 test/components/joiners/test_string_joiner.py diff --git a/README.md b/README.md index cb0559bc03..10566904e2 100644 --- a/README.md +++ b/README.md @@ -24,7 +24,7 @@ pip install haystack-ai Install from the `main` branch to try the newest features: ```sh -pip install git+https://github.com/deepset-ai/haystack.git@main +pip install git+https://github.com/deepset-ai/haystack.git@main ``` Haystack supports multiple installation methods including Docker images. For a comprehensive guide please refer diff --git a/docs/pydoc/config/joiners_api.yml b/docs/pydoc/config/joiners_api.yml index 6b7d422166..72708f4400 100644 --- a/docs/pydoc/config/joiners_api.yml +++ b/docs/pydoc/config/joiners_api.yml @@ -1,7 +1,7 @@ loaders: - type: haystack_pydoc_tools.loaders.CustomPythonLoader search_path: [../../../haystack/components/joiners] - modules: ["document_joiner", "branch", "answer_joiner"] + modules: ["document_joiner", "branch", "answer_joiner", "string_joiner"] ignore_when_discovered: ["__init__"] processors: - type: filter diff --git a/haystack/components/joiners/__init__.py b/haystack/components/joiners/__init__.py index 57878c209c..ea9082ba4d 100644 --- a/haystack/components/joiners/__init__.py +++ b/haystack/components/joiners/__init__.py @@ -5,5 +5,6 @@ from .answer_joiner import AnswerJoiner from .branch import BranchJoiner from .document_joiner import DocumentJoiner +from .string_joiner import StringJoiner -__all__ = ["DocumentJoiner", "BranchJoiner", "AnswerJoiner"] +__all__ = ["DocumentJoiner", "BranchJoiner", "AnswerJoiner", "StringJoiner"] diff --git a/haystack/components/joiners/string_joiner.py b/haystack/components/joiners/string_joiner.py new file mode 100644 index 0000000000..42d42fe4cf --- /dev/null +++ b/haystack/components/joiners/string_joiner.py @@ -0,0 +1,59 @@ +# SPDX-FileCopyrightText: 2022-present deepset GmbH +# +# SPDX-License-Identifier: Apache-2.0 + +from typing import List + +from haystack import component, logging +from haystack.core.component.types import Variadic + +logger = logging.getLogger(__name__) + + +@component +class StringJoiner: + """ + Component to join strings from different components to a list of strings. + + ### Usage example + + ```python + from haystack.components.joiners import StringJoiner + from haystack.components.builders import PromptBuilder + from haystack.core.pipeline import Pipeline + + from haystack.components.generators.chat import OpenAIChatGenerator + from haystack.dataclasses import ChatMessage + + string_1 = "What's Natural Language Processing?" + string_2 = "What is life?" + + pipeline = Pipeline() + pipeline.add_component("prompt_builder_1", PromptBuilder("Builder 1: {{query}}")) + pipeline.add_component("prompt_builder_2", PromptBuilder("Builder 2: {{query}}")) + pipeline.add_component("string_joiner", StringJoiner()) + + pipeline.connect("prompt_builder_1.prompt", "string_joiner.strings") + pipeline.connect("prompt_builder_2.prompt", "string_joiner.strings") + + print(pipeline.run(data={"prompt_builder_1": {"query": string_1}, "prompt_builder_2": {"query": string_2}})) + + >> {"string_joiner": {"strings": ["Builder 1: What's Natural Language Processing?", "Builder 2: What is life?"]}} + ``` + """ + + @component.output_types(strings=List[str]) + def run(self, strings: Variadic[str]): + """ + Joins strings into a list of strings + + :param strings: + strings from different components + + :returns: + A dictionary with the following keys: + - `strings`: Merged list of strings + """ + + out_strings = list(strings) + return {"strings": out_strings} diff --git a/releasenotes/notes/add-string-joiner-a7754e6bff9332ea.yaml b/releasenotes/notes/add-string-joiner-a7754e6bff9332ea.yaml new file mode 100644 index 0000000000..3cbf8f554d --- /dev/null +++ b/releasenotes/notes/add-string-joiner-a7754e6bff9332ea.yaml @@ -0,0 +1,4 @@ +--- +features: + - | + Added component StringJoiner to join strings from different components to a list of strings. diff --git a/test/components/joiners/test_string_joiner.py b/test/components/joiners/test_string_joiner.py new file mode 100644 index 0000000000..9939ae96a7 --- /dev/null +++ b/test/components/joiners/test_string_joiner.py @@ -0,0 +1,37 @@ +# SPDX-FileCopyrightText: 2022-present deepset GmbH +# +# SPDX-License-Identifier: Apache-2.0 + +from haystack.core.serialization import component_from_dict, component_to_dict +from haystack.components.joiners.string_joiner import StringJoiner + + +class TestStringJoiner: + def test_init(self): + joiner = StringJoiner() + assert isinstance(joiner, StringJoiner) + + def test_to_dict(self): + joiner = StringJoiner() + data = component_to_dict(joiner, name="string_joiner") + assert data == {"type": "haystack.components.joiners.string_joiner.StringJoiner", "init_parameters": {}} + + def test_from_dict(self): + data = {"type": "haystack.components.joiners.string_joiner.StringJoiner", "init_parameters": {}} + string_joiner = component_from_dict(StringJoiner, data=data, name="string_joiner") + assert isinstance(string_joiner, StringJoiner) + + def test_empty_list(self): + joiner = StringJoiner() + result = joiner.run([]) + assert result == {"strings": []} + + def test_single_string(self): + joiner = StringJoiner() + result = joiner.run("a") + assert result == {"strings": ["a"]} + + def test_two_strings(self): + joiner = StringJoiner() + result = joiner.run(["a", "b"]) + assert result == {"strings": ["a", "b"]} diff --git a/test/core/pipeline/features/pipeline_run.feature b/test/core/pipeline/features/pipeline_run.feature index e05f16b570..db064ea2b1 100644 --- a/test/core/pipeline/features/pipeline_run.feature +++ b/test/core/pipeline/features/pipeline_run.feature @@ -43,6 +43,7 @@ Feature: Pipeline running | that is linear and a component in the middle receives optional input from other components and input from the user | | that has a loop in the middle | | that has variadic component that receives a conditional input | + | that has a string variadic component | Scenario Outline: Running a bad Pipeline Given a pipeline diff --git a/test/core/pipeline/features/test_run.py b/test/core/pipeline/features/test_run.py index f5739aa690..6a82cf4dbd 100644 --- a/test/core/pipeline/features/test_run.py +++ b/test/core/pipeline/features/test_run.py @@ -13,7 +13,7 @@ from haystack.components.preprocessors import DocumentCleaner, DocumentSplitter from haystack.components.retrievers.in_memory import InMemoryBM25Retriever from haystack.document_stores.in_memory import InMemoryDocumentStore -from haystack.components.joiners import BranchJoiner, DocumentJoiner, AnswerJoiner +from haystack.components.joiners import BranchJoiner, DocumentJoiner, AnswerJoiner, StringJoiner from haystack.testing.sample_components import ( Accumulate, AddFixedValue, @@ -2195,3 +2195,32 @@ def run(self, documents: List[Document]): ], ), ] + + +@given("a pipeline that has a string variadic component", target_fixture="pipeline_data") +def that_has_a_string_variadic_component(): + string_1 = "What's Natural Language Processing?" + string_2 = "What's is life?" + + pipeline = Pipeline() + pipeline.add_component("prompt_builder_1", PromptBuilder("Builder 1: {{query}}")) + pipeline.add_component("prompt_builder_2", PromptBuilder("Builder 2: {{query}}")) + pipeline.add_component("string_joiner", StringJoiner()) + + pipeline.connect("prompt_builder_1.prompt", "string_joiner.strings") + pipeline.connect("prompt_builder_2.prompt", "string_joiner.strings") + + return ( + pipeline, + [ + PipelineRunData( + inputs={"prompt_builder_1": {"query": string_1}, "prompt_builder_2": {"query": string_2}}, + expected_outputs={ + "string_joiner": { + "strings": ["Builder 1: What's Natural Language Processing?", "Builder 2: What's is life?"] + } + }, + expected_run_order=["prompt_builder_1", "prompt_builder_2", "string_joiner"], + ) + ], + )