From 54de7572145428c9d8ab4a0cdf128a51f334a782 Mon Sep 17 00:00:00 2001 From: Marian Tietz Date: Wed, 9 Aug 2023 18:11:35 +0200 Subject: [PATCH 1/5] Fix OSS-413: Proper intents in interactive training In OSS-413 a user reports that since Rasa 3.1 the interactive training dialogue suggests malformed/shortened intent names, e.g.: - a - b - c This is due to a bug in the parsing of intent names which assumes that every intent is a dictionary (which it is ONLY when a property such as `use_entities` is set). While OSS-413 states that this is solely cosmetic it is an actual bug that caused severe problems that cost several hours to debug: When using the regex matcher it is checked whether the parsed intent is in the domain or not. When it is not, it will fail and attempt to revert the user utterance. The user utterance is then written to the tracker but `SlotSet` events are not repeated - therefore any form validator will fail for inexplicable reasons. This also means that there is a bug in the `_correct_wrong_nlu` for not copying enough or re-starting the `ActionExtractSlots` action which is not addressed here. --- rasa/core/training/interactive.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/rasa/core/training/interactive.py b/rasa/core/training/interactive.py index 860dae007145..78c1b71f6822 100644 --- a/rasa/core/training/interactive.py +++ b/rasa/core/training/interactive.py @@ -1477,7 +1477,11 @@ async def record_messages( domain_intents = domain.get("intents", []) if domain is not None else [] - intents = [next(iter(i)) for i in domain_intents] + intents = [ + next(iter(i)) if isinstance(i, dict) # intent property + else i # plain intent name + for i in domain_intents + ] num_messages = 0 From 3975fa76b6313ce115b11be67ad0c219c27c535c Mon Sep 17 00:00:00 2001 From: Marian Tietz Date: Wed, 9 Aug 2023 18:24:55 +0200 Subject: [PATCH 2/5] Formatting --- rasa/core/training/interactive.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/rasa/core/training/interactive.py b/rasa/core/training/interactive.py index 78c1b71f6822..7f16b59819cc 100644 --- a/rasa/core/training/interactive.py +++ b/rasa/core/training/interactive.py @@ -1478,8 +1478,9 @@ async def record_messages( domain_intents = domain.get("intents", []) if domain is not None else [] intents = [ - next(iter(i)) if isinstance(i, dict) # intent property - else i # plain intent name + next(iter(i)) + if isinstance(i, dict) # intent property + else i # plain intent name for i in domain_intents ] From ba5f66d93247b8f642c4b29737b255e6f849b0a2 Mon Sep 17 00:00:00 2001 From: Marian Tietz Date: Fri, 18 Aug 2023 13:07:23 +0200 Subject: [PATCH 3/5] Address review comment: add comment --- rasa/core/training/interactive.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/rasa/core/training/interactive.py b/rasa/core/training/interactive.py index 7f16b59819cc..dae4aad17516 100644 --- a/rasa/core/training/interactive.py +++ b/rasa/core/training/interactive.py @@ -1477,10 +1477,13 @@ async def record_messages( domain_intents = domain.get("intents", []) if domain is not None else [] + # intents with properties such as `use_entities` or `ignore_entities` + # are a dictionary which needs unpacking. Other intents are strings + # and can be used as-is. intents = [ next(iter(i)) - if isinstance(i, dict) # intent property - else i # plain intent name + if isinstance(i, dict) + else i for i in domain_intents ] From b0a7c6d3c1dd6bd3a2c041dab0239ae3e1186926 Mon Sep 17 00:00:00 2001 From: Marian Tietz Date: Fri, 18 Aug 2023 13:08:57 +0200 Subject: [PATCH 4/5] Review: add changelog --- changelog/12722.bugfix.md | 4 ++++ 1 file changed, 4 insertions(+) create mode 100644 changelog/12722.bugfix.md diff --git a/changelog/12722.bugfix.md b/changelog/12722.bugfix.md new file mode 100644 index 000000000000..a2d8a3c4a106 --- /dev/null +++ b/changelog/12722.bugfix.md @@ -0,0 +1,4 @@ +Intent names will not be falsely abbreviated in interactive training (fixes OSS-413). + +This will also fix a bug where forced user utterances (using the regex matcher) will +be reverted even though they are present in the domain. From ee1dd884c3f7fb8223913bb0cda413c17a728ea2 Mon Sep 17 00:00:00 2001 From: Marian Tietz Date: Tue, 26 Sep 2023 19:07:51 +0200 Subject: [PATCH 5/5] Refactor and test function to retrieve intent names Since the main problem of bug OSS-413 is that intents with attributes are not retrieved well the test was implemented to use domains with and without intent definitions using attributes. --- rasa/core/training/interactive.py | 26 +++++++++++--------- tests/core/training/test_interactive.py | 32 +++++++++++++++++++++++++ 2 files changed, 47 insertions(+), 11 deletions(-) diff --git a/rasa/core/training/interactive.py b/rasa/core/training/interactive.py index dae4aad17516..2978b94099af 100644 --- a/rasa/core/training/interactive.py +++ b/rasa/core/training/interactive.py @@ -1457,6 +1457,20 @@ def _print_help(skip_visualization: bool) -> None: ) +def intent_names_from_domain(domain: Any) -> List[Text]: + """Get a list of the possible intents names from the domain specification. + + This is its own function as intents are non-trivial to unpack and this + warrants testing. + """ + domain_intents = domain.get("intents", []) if domain is not None else [] + + # intents with properties such as `use_entities` or `ignore_entities` + # are a dictionary which needs unpacking. Other intents are strings + # and can be used as-is. + return [next(iter(i)) if isinstance(i, dict) else i for i in domain_intents] + + async def record_messages( endpoint: EndpointConfig, file_importer: TrainingDataImporter, @@ -1475,17 +1489,7 @@ async def record_messages( ) return - domain_intents = domain.get("intents", []) if domain is not None else [] - - # intents with properties such as `use_entities` or `ignore_entities` - # are a dictionary which needs unpacking. Other intents are strings - # and can be used as-is. - intents = [ - next(iter(i)) - if isinstance(i, dict) - else i - for i in domain_intents - ] + intents = intent_names_from_domain(domain) num_messages = 0 diff --git a/tests/core/training/test_interactive.py b/tests/core/training/test_interactive.py index 215bd09a8f1a..172322842b8b 100644 --- a/tests/core/training/test_interactive.py +++ b/tests/core/training/test_interactive.py @@ -54,6 +54,38 @@ def mock_file_importer( ) +@pytest.mark.parametrize( + "domain_file, expected_intents", + [ + ( + "data/test_domains/default_unfeaturized_entities.yml", + [ + "greet", + "default", + "goodbye", + "thank", + "ask", + "why", + "pure_intent", + ], + ), + ( + "data/test_domains/default.yml", + [ + "greet", + "default", + "goodbye", + ], + ), + ], +) +def test_intent_names_from_domain(domain_file, expected_intents): + test_domain = Domain.load(domain_file) + + intents = interactive.intent_names_from_domain(test_domain.as_dict()) + assert set(intents) == set(expected_intents) + + async def test_send_message(mock_endpoint: EndpointConfig): sender_id = uuid.uuid4().hex