From 2b2c6529a79c0815cc11fbca44a1b529a48c7dd9 Mon Sep 17 00:00:00 2001 From: danc Date: Thu, 21 Sep 2023 00:16:15 +0100 Subject: [PATCH] added render_template test. --- .../generator/llm_command_generator.py | 2 +- .../generator/rendered_prompt.txt | 1 + .../generator/test_llm_command_generator.py | 165 ++++++++++++------ 3 files changed, 115 insertions(+), 53 deletions(-) create mode 100644 tests/dialogue_understanding/generator/rendered_prompt.txt diff --git a/rasa/dialogue_understanding/generator/llm_command_generator.py b/rasa/dialogue_understanding/generator/llm_command_generator.py index c6533a4a341e..c143f7d61451 100644 --- a/rasa/dialogue_understanding/generator/llm_command_generator.py +++ b/rasa/dialogue_understanding/generator/llm_command_generator.py @@ -239,7 +239,7 @@ def parse_commands( cancel_flow_re = re.compile(r"CancelFlow\(\)") chitchat_re = re.compile(r"ChitChat\(\)") knowledge_re = re.compile(r"SearchAndReply\(\)") - humand_handoff_re = re.compile(r"HumandHandoff\(\)") + humand_handoff_re = re.compile(r"HumanHandoff\(\)") clarify_re = re.compile(r"Clarify\(([a-zA-Z0-9_, ]+)\)") for action in actions.strip().splitlines(): diff --git a/tests/dialogue_understanding/generator/rendered_prompt.txt b/tests/dialogue_understanding/generator/rendered_prompt.txt new file mode 100644 index 000000000000..ea92e3f93387 --- /dev/null +++ b/tests/dialogue_understanding/generator/rendered_prompt.txt @@ -0,0 +1 @@ +Your task is to analyze the current conversation context and generate a list of actions to start new business processes that we call flows, to extract slots, or respond to small talk and knowledge requests.\n\nThese are the flows that can be started, with their description and slots:\n\ntest_flow: some description\n slot: test_slot\n \n\n===\nHere is what happened previously in the conversation:\nUSER: Hello\nAI: Hi\nUSER: some message\n\n===\n\nYou are currently not in any flow and so there are no active slots.\nThis means you can only set a slot if you first start a flow that requires that slot.\n\nIf you start a flow, first start the flow and then optionally fill that flow\'s slots with information the user provided in their message.\n\nThe user just said """some message""".\n\n===\nBased on this information generate a list of actions you want to take. Your job is to start flows and to fill slots where appropriate. Any logic of what happens afterwards is handled by the flow engine. These are your available actions:\n* Slot setting, described by "SetSlot(slot_name, slot_value)". An example would be "SetSlot(recipient, Freddy)"\n* Starting another flow, described by "StartFlow(flow_name)". An example would be "StartFlow(transfer_money)"\n* Cancelling the current flow, described by "CancelFlow()"\n* Clarifying which flow should be started. An example would be Clarify(list_contacts, add_contact, remove_contact) if the user just wrote "contacts" and there are multiple potential candidates. It also works with a single flow name to confirm you understood correctly, as in Clarify(transfer_money).\n* Responding to knowledge-oriented user messages, described by "SearchAndReply()"\n* Responding to a casual, non-task-oriented user message, described by "ChitChat()".\n* Handing off to a human, in case the user seems frustrated or explicitly asks to speak to one, described by "HumanHandoff()".\n\n===\nWrite out the actions you want to take, one per line, in the order they should take place.\nDo not fill slots with abstract values or placeholders.\nOnly use information provided by the user.\nOnly start a flow if it\'s completely clear what the user wants. Imagine you were a person reading this message. If it\'s not 100% clear, clarify the next step.\nDon\'t be overly confident. Take a conservative approach and clarify before proceeding.\nIf the user asks for two things which seem contradictory, clarify before starting a flow.\nStrictly adhere to the provided action types listed above.\nFocus on the last message and take it one step at a time.\nUse the previous conversation steps only to aid understanding.\n\nYour action list: \ No newline at end of file diff --git a/tests/dialogue_understanding/generator/test_llm_command_generator.py b/tests/dialogue_understanding/generator/test_llm_command_generator.py index ee807f82c6e9..02d13ecd049e 100644 --- a/tests/dialogue_understanding/generator/test_llm_command_generator.py +++ b/tests/dialogue_understanding/generator/test_llm_command_generator.py @@ -16,13 +16,19 @@ KnowledgeAnswerCommand, ClarifyCommand, ) -from rasa.engine.graph import ExecutionContext -from rasa.engine.storage.resource import Resource -from rasa.engine.storage.storage import ModelStorage +# from rasa.engine.graph import ExecutionContext +# from rasa.engine.storage.resource import Resource +# from rasa.engine.storage.storage import ModelStorage +from rasa.shared.core.events import BotUttered, UserUttered +from rasa.shared.core.flows.flow import FlowsList from rasa.shared.core.slots import BooleanSlot, FloatSlot, TextSlot from rasa.shared.core.trackers import DialogueStateTracker +from rasa.shared.nlu.training_data.message import Message +from tests.utilities import flows_from_str +TEST_PROMPT_PATH = "./tests/dialogue_understanding/generator/rendered_prompt.txt" + class TestLLMCommandGenerator: """Tests for the LLMCommandGenerator.""" @@ -32,64 +38,109 @@ def command_generator(self): return LLMCommandGenerator.create( config={}, resource=Mock(), model_storage=Mock(), execution_context=Mock()) + # @pytest.fixture + # def mock_command_generator( + # self, + # default_model_storage: ModelStorage, + # default_execution_context: ExecutionContext, + # ) -> LLMCommandGenerator: + # """Create a patched LLMCommandGenerator.""" + # with patch( + # "rasa.shared.utils.llm.llm_factory", + # Mock(return_value=FakeListLLM(responses=["StartFlow(check_balance)"])), + # ) as mock_llm: + # return LLMCommandGenerator.create( + # config=LLMCommandGenerator.get_default_config(), + # model_storage=default_model_storage, + # resource=Resource("llmcommandgenerator"), + # execution_context=default_execution_context) + @pytest.fixture - def mock_command_generator( - self, - default_model_storage: ModelStorage, - default_execution_context: ExecutionContext, - ) -> LLMCommandGenerator: - """Create a patched LLMCommandGenerator.""" - with patch( - "rasa.dialogue_understanding.generator.llm_command_generator.llm_factory", - Mock(return_value=FakeListLLM(responses=["StartFlow(check_balance)"])), - ) as mock_llm: - return LLMCommandGenerator.create( - config=LLMCommandGenerator.get_default_config(), - model_storage=default_model_storage, - resource=Resource("llmcommandgenerator"), - execution_context=default_execution_context) - - def test_predict_commands_with_no_flows(self, mock_command_generator: LLMCommandGenerator): + def test_flows(self) -> FlowsList: + """Create a FlowsList.""" + return flows_from_str( + """ + flows: + test_flow: + steps: + - id: first_step + action: action_listen + """ + ) + + + def test_predict_commands_with_no_flows( + self, + mock_command_generator: LLMCommandGenerator + ): """Test that predict_commands returns an empty list when flows is None.""" + # Given + empty_flows = FlowsList([]) # When - predicted_commands = mock_command_generator.predict_commands(Mock(), flows=None, tracker=Mock()) + predicted_commands = mock_command_generator.predict_commands( + Mock(), + flows=empty_flows, + tracker=Mock() + ) # Then assert not predicted_commands - def test_predict_commands_with_no_tracker(self, mock_command_generator: LLMCommandGenerator): + def test_predict_commands_with_no_tracker( + self, + mock_command_generator: LLMCommandGenerator + ): """Test that predict_commands returns an empty list when tracker is None.""" # When - predicted_commands = mock_command_generator.predict_commands(Mock(), flows=Mock(), tracker=None) + predicted_commands = mock_command_generator.predict_commands( + Mock(), + flows=Mock(), + tracker=None + ) # Then assert not predicted_commands - @patch.object(LLMCommandGenerator, "render_template", Mock(return_value="some prompt")) + @patch.object( + LLMCommandGenerator, + "render_template", + Mock(return_value="some prompt") + ) @patch.object(LLMCommandGenerator, "parse_commands", Mock()) - def test_predict_commands_calls_llm_correctly(self, command_generator: LLMCommandGenerator): + def test_predict_commands_calls_llm_correctly( + self, + command_generator: LLMCommandGenerator, + test_flows: FlowsList + ): """Test that predict_commands calls llm correctly.""" # When mock_llm = Mock() with patch( - "rasa.dialogue_understanding.generator.llm_command_generator.llm_factory", - Mock(return_value=mock_llm), + "rasa.shared.utils.llm.llm_factory", + Mock(return_value=mock_llm) ): - command_generator.predict_commands(Mock(), flows=Mock(), tracker=Mock()) + command_generator.predict_commands(Mock(), flows=test_flows, tracker=Mock()) # Then mock_llm.assert_called_once_with("some prompt") - @patch.object(LLMCommandGenerator, "render_template", Mock(return_value="some prompt")) + + @patch.object( + LLMCommandGenerator, + "render_template", + Mock(return_value="some prompt") + ) @patch.object(LLMCommandGenerator, "parse_commands", Mock()) - def test_generate_action_list_catches_llm_exception(self, command_generator: LLMCommandGenerator): - """Test that predict_commands calls llm correctly.""" + def test_generate_action_list_catches_llm_exception(self, + command_generator: LLMCommandGenerator, + test_flows: FlowsList): + """Test that predict_commands catches llm exceptions.""" # Given mock_llm = Mock(side_effect=Exception("some exception")) with patch( - "rasa.dialogue_understanding.generator.llm_command_generator.llm_factory", + "rasa.shared.utils.llm.llm_factory", Mock(return_value=mock_llm), ): # When with capture_logs() as logs: - command_generator.predict_commands(Mock(), flows=Mock(), tracker=Mock()) + command_generator.predict_commands(Mock(), flows=test_flows, tracker=Mock()) # Then print(logs) assert len(logs) == 4 @@ -97,30 +148,40 @@ def test_generate_action_list_catches_llm_exception(self, command_generator: LLM - def test_render_template(self, mock_command_generator: LLMCommandGenerator): + def test_render_template(self, command_generator: LLMCommandGenerator): """Test that render_template renders a template.""" - pass - # # Given - # message = Mock() - - # tracker = Mock() - - # flows = Mock() + # Given + test_message = Message.build(text="some message") + test_slot = TextSlot( + name="test_slot", mappings=[{}], initial_value=None, influence_conversation=False + ) + test_tracker = DialogueStateTracker.from_events( + sender_id="test", + evts=[UserUttered("Hello"), BotUttered("Hi")], + slots=[test_slot] + ) + test_flows = flows_from_str( + """ + flows: + test_flow: + description: some description + steps: + - id: first_step + collect_information: test_slot + """ + ) + with open(TEST_PROMPT_PATH, "r", encoding='unicode_escape') as f: + expected_template = f.read() # # When - # rendered_template = command_generator.render_template() + rendered_template = command_generator.render_template( + message=test_message, + tracker=test_tracker, + flows=test_flows + ) # # Then - # assert rendered_template == "template" + assert rendered_template == expected_template - # def test_generate_action_list_calls_llm_with_correct_promt(self): - # # Given - # prompt = "some prompt" - # with patch( - # "rasa.rasa.shared.utils.llm.llm_factory", - # Mock(return_value=FakeListLLM(responses=["hello"])) - # ) as mock_llm: - # LLMCommandGenerator._generate_action_list(prompt) - # mock_llm.assert_called_once_with(prompt) @pytest.mark.parametrize( "input_action, expected_command",