Skip to content

Commit

Permalink
Make validation more strict (home-assistant#130)
Browse files Browse the repository at this point in the history
  • Loading branch information
balloob authored Dec 25, 2022
1 parent c2c9580 commit c31f41f
Show file tree
Hide file tree
Showing 3 changed files with 33 additions and 45 deletions.
64 changes: 27 additions & 37 deletions script/intentfest/validate.py
Original file line number Diff line number Diff line change
Expand Up @@ -99,6 +99,19 @@ def validate(value):
]
}
},
}
# Fields from SENTENCE_COMMON_SCHEMA are allowed by the parser
# but we do not accept that in our repository.
)

SENTENCE_COMMON_SCHEMA = vol.Schema(
{
vol.Required("language"): str,
vol.Optional("responses"): {
vol.Optional("errors"): {
vol.In(INTENT_ERRORS): str,
}
},
vol.Optional("lists"): {
str: single_key_dict_validator(
{
Expand All @@ -122,11 +135,6 @@ def validate(value):
},
vol.Optional("expansion_rules"): {str: str},
vol.Optional("skip_words"): [str],
vol.Optional("responses"): {
vol.Optional("errors"): {
vol.In(INTENT_ERRORS): str,
}
},
}
)

Expand Down Expand Up @@ -236,8 +244,8 @@ def validate_language(intent_schemas, language, errors):
response_dir: Path = RESPONSE_DIR / language

sentence_files = {}
lists = {}
expansion_rules = {}
lists = None
expansion_rules = None

for sentence_file in sentence_dir.iterdir():
path = str(sentence_dir.relative_to(ROOT) / sentence_file.name)
Expand All @@ -247,11 +255,17 @@ def validate_language(intent_schemas, language, errors):
errors[language].append(f"{path}: invalid YAML: {err}")
continue

if sentence_file.name != "_common.yaml":
if sentence_file.name == "_common.yaml":
schema = SENTENCE_COMMON_SCHEMA
lists = content.get("lists", {})
expansion_rules = content.get("expansion_rules", {})

else:
schema = SENTENCE_SCHEMA
sentence_files[sentence_file.name] = content

try:
validate_with_humanized_errors(content, SENTENCE_SCHEMA)
validate_with_humanized_errors(content, schema)
except vol.Error as err:
errors[language].append(f"{path}: invalid format: {err}")
continue
Expand All @@ -261,16 +275,7 @@ def validate_language(intent_schemas, language, errors):
f"{path}: references incorrect language {content['language']}"
)

if "lists" in content:
lists.update(content["lists"])

if "expansion_rules" in content:
expansion_rules.update(content["expansion_rules"])

for sentence_file, content in sentence_files.items():
if sentence_file.startswith("_"):
continue

domain, intent = sentence_file.split(".")[0].split("_")

if intent not in intent_schemas:
Expand Down Expand Up @@ -331,8 +336,6 @@ def validate_language(intent_schemas, language, errors):
f"got {slots.get('domain')}"
)

seen_sentences = set()

if not test_dir.exists():
errors[language].append(f"Missing tests directory {test_dir}")
return
Expand Down Expand Up @@ -378,21 +381,6 @@ def validate_language(intent_schemas, language, errors):

domain, intent = test_file.stem.split("_")

tested_intents = set(i["intent"]["name"] for i in content["tests"])

if intent not in tested_intents:
errors[language].append(
f"{path}: does not contain test for intent {intent}"
)

if extra_intents := tested_intents - {intent}:
errors[language].append(
f"{path}: tests extra intents {', '.join(sorted(extra_intents))}. Only {intent} allowed"
)

if tested_intents != {intent}:
return

test_count = sum(len(test["sentences"]) for test in content["tests"])
sentence_count = sum(
len(data["sentences"]) for data in sentence_file["intents"][intent]["data"]
Expand All @@ -401,11 +389,13 @@ def validate_language(intent_schemas, language, errors):
if sentence_count > test_count:
errors[language].append(f"{path}: not all sentences have tests")

test_sentences = set()

for test in content["tests"]:
for sentence in test["sentences"]:
if sentence in seen_sentences:
if sentence in test_sentences:
errors[language].append(f"{path}: duplicate sentence {sentence}")
seen_sentences.add(sentence)
test_sentences.add(sentence)

if sentence_files:
for sentence_file in sentence_files:
Expand Down
8 changes: 0 additions & 8 deletions tests/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,11 +35,3 @@ def load_test(language: str, test_name: str):
return yaml.safe_load(
(TEST_SENTENCES_DIR / language / f"{test_name}.yaml").read_text()
)
# lang_dir = TEST_SENTENCES_DIR / language
# files: Dict[str, Any] = {}

# for yaml_path in lang_dir.rglob("*.yaml"):
# with open(yaml_path, "r", encoding="utf-8") as yaml_file:
# files[yaml_path.name] = yaml.safe_load(yaml_file)

# return files
6 changes: 6 additions & 0 deletions tests/test_language_sentences.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,8 +26,14 @@ def do_test_language_sentences_file(
language, test_file, slot_lists, language_sentences
):
"""Tests recognition all of the test sentences for a language"""
_testing_domain, testing_intent = test_file.split("_", 1)

for test in load_test(language, test_file)["tests"]:
intent = test["intent"]
assert (
intent["name"] == testing_intent
), f"File {test_file} should only test for intent {testing_intent}"

for sentence in test["sentences"]:
result = recognize(sentence, language_sentences, slot_lists=slot_lists)
assert result is not None, f"Recognition failed for '{sentence}'"
Expand Down

0 comments on commit c31f41f

Please sign in to comment.