Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Implement rasa data validate flows #12866

Merged
merged 11 commits into from
Sep 29, 2023
16 changes: 16 additions & 0 deletions rasa/cli/data.py
Original file line number Diff line number Diff line change
Expand Up @@ -144,6 +144,22 @@ def _add_data_validate_parsers(
)
arguments.set_validator_arguments(story_structure_parser)

flows_structure_parser = validate_subparsers.add_parser(
"flows",
formatter_class=argparse.ArgumentDefaultsHelpFormatter,
parents=parents,
help="Checks for inconsistencies in the flows files.",
)
flows_structure_parser.set_defaults(
func=lambda args: rasa.cli.utils.validate_files(
args.fail_on_warnings,
args.max_history,
_build_training_data_importer(args),
flows_only=True,
)
)
arguments.set_validator_arguments(flows_structure_parser)


def _build_training_data_importer(args: argparse.Namespace) -> "TrainingDataImporter":
config = rasa.cli.utils.get_validated_path(
Expand Down
7 changes: 6 additions & 1 deletion rasa/cli/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -217,6 +217,7 @@ def validate_files(
max_history: Optional[int],
importer: TrainingDataImporter,
stories_only: bool = False,
flows_only: bool = False,
) -> None:
"""Validates either the story structure or the entire project.

Expand All @@ -225,13 +226,16 @@ def validate_files(
max_history: The max history to use when validating the story structure.
importer: The `TrainingDataImporter` to use to load the training data.
stories_only: If `True`, only the story structure is validated.
flows_only: If `True`, only the flows are validated.
"""
from rasa.validator import Validator

validator = Validator.from_importer(importer)

if stories_only:
all_good = _validate_story_structure(validator, max_history, fail_on_warnings)
elif flows_only:
all_good = validator.verify_flows()
ancalita marked this conversation as resolved.
Show resolved Hide resolved
else:
if importer.get_domain().is_empty():
rasa.shared.utils.cli.print_error_and_exit(
Expand All @@ -243,8 +247,9 @@ def validate_files(
valid_stories = _validate_story_structure(
validator, max_history, fail_on_warnings
)
valid_flows = validator.verify_flows()

all_good = valid_domain and valid_nlu and valid_stories
all_good = valid_domain and valid_nlu and valid_stories and valid_flows

validator.warn_if_config_mandatory_keys_are_not_set()

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -41,14 +41,16 @@ slots:

flows:
pattern_continue_interrupted:
description: A flow that should will be started to continue an interrupted flow.
description: A flow that should will be started to continue an interrupted flow.
name: pattern continue interrupted

steps:
- id: "0"
action: utter_flow_continue_interrupted

pattern_correction:
description: Handle a correction of a slot value.
name: pattern correction

steps:
- id: "check_if_reset_only"
Expand All @@ -66,6 +68,7 @@ flows:

pattern_cancel_flow:
description: A meta flow that's started when a flow is cancelled.
name: pattern_cancel_flow

steps:
- id: "cancel_flow"
Expand All @@ -76,18 +79,21 @@ flows:

pattern_internal_error:
description: internal error
name: pattern internal error
steps:
- id: "0"
action: utter_internal_error_rasa

pattern_completed:
description: a flow has been completed and there is nothing else to be done
name: pattern completed
steps:
- id: "0"
action: utter_can_do_something_else

pattern_chitchat:
description: handle interactions with the user that are not task-oriented
name: pattern chitchat
steps:
- id: "0"
generation_prompt: |
Expand All @@ -99,6 +105,7 @@ flows:

pattern_clarification:
description: handle clarifications with the user
name: pattern clarification
steps:
- id: "0"
action: action_clarify_flows
Expand All @@ -108,6 +115,7 @@ flows:

pattern_collect_information:
description: flow used to fill a slot
name: pattern collect information
steps:
- id: "start"
action: action_run_slot_rejections
Expand Down
2 changes: 1 addition & 1 deletion rasa/shared/core/flows/flow.py
Original file line number Diff line number Diff line change
Expand Up @@ -119,7 +119,7 @@ def __str__(self) -> Text:


class UnresolvedFlowStepIdException(RasaException):
"""Raised when a flow step is referenced but it's id can not be resolved."""
"""Raised when a flow step is referenced, but its id can not be resolved."""

def __init__(
self, step_id: Text, flow: Flow, referenced_from: Optional[FlowStep]
Expand Down
183 changes: 183 additions & 0 deletions rasa/validator.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,19 @@
import logging
import re
import string
from collections import defaultdict
from typing import Set, Text, Optional, Dict, Any, List

from pypred import Predicate

import rasa.core.training.story_conflict
from rasa.shared.core.flows.flow import (
ActionFlowStep,
BranchFlowStep,
CollectInformationFlowStep,
FlowsList,
IfFlowLink,
SetSlotsFlowStep,
)
import rasa.shared.nlu.constants
from rasa.shared.constants import (
Expand All @@ -27,7 +34,9 @@
from rasa.shared.core.domain import Domain
from rasa.shared.core.generator import TrainingDataGenerator
from rasa.shared.core.constants import SlotMappingType, MAPPING_TYPE
from rasa.shared.core.slots import ListSlot, Slot
from rasa.shared.core.training_data.structures import StoryGraph
from rasa.shared.exceptions import RasaException
from rasa.shared.importers.importer import TrainingDataImporter
from rasa.shared.nlu.training_data.training_data import TrainingData
import rasa.shared.utils.io
Expand Down Expand Up @@ -483,3 +492,177 @@ def warn_if_config_mandatory_keys_are_not_set(self) -> None:
f"'{ASSISTANT_ID_KEY}' mandatory key. Please replace the default "
f"placeholder value with a unique identifier."
)

@staticmethod
def _raise_exception_if_slot_not_in_domain(
slot_name: str, domain_slots: Dict[Text, Slot], step_id: str, flow_id: str
) -> None:
if slot_name not in domain_slots:
raise RasaException(
f"The slot '{slot_name}' is used in the "
f"step '{step_id}' of flow id '{flow_id}', but it "
f"is not listed in the domain slots. "
f"You should add it to your domain file!",
)

@staticmethod
def _raise_exception_if_list_slot(slot: Slot, step_id: str, flow_id: str) -> None:
if isinstance(slot, ListSlot):
raise RasaException(
f"The slot '{slot.name}' is used in the "
f"step '{step_id}' of flow id '{flow_id}', but it "
f"is a list slot. List slots are currently not "
f"supported in flows. You should change it to a "
f"text, boolean or float slot in your domain file!",
)

@staticmethod
def _raise_exception_if_dialogue_stack_slot(
slot: Slot, step_id: str, flow_id: str
) -> None:
if slot.name == constants.DIALOGUE_STACK_SLOT:
raise RasaException(
f"The slot '{constants.DIALOGUE_STACK_SLOT}' is used in the "
f"step '{step_id}' of flow id '{flow_id}', but it "
f"is a reserved slot. You must not use reserved slots in "
f"your flows.",
)

def verify_flows_steps_against_domain(self) -> bool:
"""Checks flows steps' references against the domain file."""
domain_slots = {slot.name: slot for slot in self.domain.slots}
for flow in self.flows.underlying_flows:
for step in flow.steps:
if isinstance(step, CollectInformationFlowStep):
self._raise_exception_if_slot_not_in_domain(
step.collect, domain_slots, step.id, flow.id
)

current_slot = domain_slots[step.collect]
self._raise_exception_if_list_slot(current_slot, step.id, flow.id)
self._raise_exception_if_dialogue_stack_slot(
current_slot, step.id, flow.id
)

elif isinstance(step, SetSlotsFlowStep):
for slot in step.slots:
slot_name = slot["key"]
self._raise_exception_if_slot_not_in_domain(
slot_name, domain_slots, step.id, flow.id
)

current_slot = domain_slots[slot_name]
self._raise_exception_if_list_slot(
current_slot, step.id, flow.id
)
self._raise_exception_if_dialogue_stack_slot(
current_slot, step.id, flow.id
)

elif isinstance(step, ActionFlowStep):
regex = r"{context\..+?}"
matches = re.findall(regex, step.action)
if matches:
logger.warning(
f"An interpolated action name was found at step "
f"'{step.id}' of flow id '{flow.id}'. "
f"Skipping validation for this step. "
f"Please make sure that the action name is "
f"listed in your domain responses or actions."
)
elif step.action not in self.domain.action_names_or_texts:
raise RasaException(
f"The action '{step.action}' is used in the step "
f"'{step.id}' of flow id '{flow.id}', but it "
f"is not listed in the domain file. "
f"You should add it to your domain file!",
)
return True

def verify_unique_flows(self) -> bool:
"""Checks if all flows have unique names and descriptions."""
flows_mapping: Dict[str, str] = {}
punctuation_table = str.maketrans({i: "" for i in string.punctuation})

for flow in self.flows.underlying_flows:
flow_description = flow.description
cleaned_description = flow_description.translate(punctuation_table) # type: ignore[union-attr] # noqa: E501
if cleaned_description in flows_mapping.values():
raise RasaException(
f"Detected duplicate flow description for flow id '{flow.id}'. "
f"Flow descriptions must be unique. "
f"Please make sure that all flows have different descriptions."
)

if flow.name in flows_mapping:
raise RasaException(
f"Detected duplicate flow name '{flow.name}' for flow "
f"id '{flow.id}'. Flow names must be unique. "
f"Please make sure that all flows have different names."
)

flows_mapping[flow.name] = cleaned_description

return True

@staticmethod
def _construct_predicate(predicate: Optional[str], step_id: str) -> Predicate:
try:
pred = Predicate(predicate)
except (TypeError, Exception) as exception:
raise RasaException(
f"Could not initialize the predicate found under step "
f"'{step_id}'. Please make sure that all predicates "
f"are strings."
) from exception

return pred

def verify_predicates(self) -> bool:
"""Checks that predicates used in branch flow steps or `collect` steps are valid.""" # noqa: E501
all_good = True
for flow in self.flows.underlying_flows:
for step in flow.steps:
if isinstance(step, BranchFlowStep):
for link in step.next.links:
if isinstance(link, IfFlowLink):
predicate = Validator._construct_predicate(
link.condition, step.id
)
if not predicate.is_valid():
raise RasaException(
f"Detected invalid condition '{link.condition}' "
f"at step '{step.id}' for flow id '{flow.id}'. "
f"Please make sure that all conditions are valid."
)
elif isinstance(step, CollectInformationFlowStep):
predicates = [predicate.if_ for predicate in step.rejections]
for predicate in predicates:
pred = Validator._construct_predicate(predicate, step.id)
if not pred.is_valid():
raise RasaException(
f"Detected invalid rejection '{predicate}' "
f"at `collect` step '{step.id}' "
f"for flow id '{flow.id}'. "
f"Please make sure that all conditions are valid."
)
return all_good
ancalita marked this conversation as resolved.
Show resolved Hide resolved

def verify_flows(self) -> bool:
"""Checks for inconsistencies across flows."""
logger.info("Validating flows...")

if self.flows.is_empty():
logger.warning(
"No flows were found in the data files. "
"Will not proceed with flow validation.",
)
return True

all_good = (
self.verify_flows_steps_against_domain()
and self.verify_unique_flows()
and self.verify_predicates()
)
ancalita marked this conversation as resolved.
Show resolved Hide resolved

return all_good
42 changes: 41 additions & 1 deletion tests/cli/test_rasa_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@

from _pytest.fixtures import FixtureRequest
from _pytest.pytester import RunResult

from rasa.shared.constants import LATEST_TRAINING_DATA_FORMAT_VERSION
from rasa.shared.nlu.training_data.formats import RasaYAMLReader
import rasa.shared.utils.io

Expand Down Expand Up @@ -139,7 +141,7 @@ def test_data_validate_help(run: Callable[..., RunResult]):
[--max-history MAX_HISTORY] [-c CONFIG]
[--fail-on-warnings] [-d DOMAIN]
[--data DATA [DATA ...]]
{{stories}} ..."""
{{stories,flows}} ..."""

lines = help_text.split("\n")
# expected help text lines should appear somewhere in the output
Expand Down Expand Up @@ -255,3 +257,41 @@ def test_data_split_stories(run_in_simple_project: Callable[..., RunResult]):
test_data = rasa.shared.utils.io.read_yaml_file(test_file)
assert len(test_data.get("stories", [])) == 1
assert test_data["stories"][0].get("story") == "story 2"


def test_rasa_data_validate_flows_success(
run_in_simple_project: Callable[..., RunResult]
) -> None:
flows_yaml = f"""
version: "{LATEST_TRAINING_DATA_FORMAT_VERSION}"
flows:
transfer_money:
description: This flow lets users send money.
name: transfer money
steps:
- id: "ask_recipient"
collect_information: transfer_recipient
next: "ask_amount"
- id: "ask_amount"
collect_information: transfer_amount
next: "execute_transfer"
- id: "execute_transfer"
action: action_transfer_money"""

Path("data/flows.yml").write_text(flows_yaml)

domain_yaml = """
actions:
- action_transfer_money
intents:
- transfer_money
slots:
transfer_recipient:
type: text
mappings: []
transfer_amount:
type: float
mappings: []"""
Path("domain.yml").write_text(domain_yaml)
result = run_in_simple_project("data", "validate", "flows")
assert result.ret == 0
Loading