Skip to content

Commit

Permalink
Split language intents tests by file (home-assistant#131)
Browse files Browse the repository at this point in the history
* Split language intents by file

* isort

* cleanup

* remove stale comment

* Re-use parsed common intents

* Remove validate logic that is also checked in tests

* Document individual file testing

* typo

* Verify duplicate sentence being tested in tests instead of validate

* Extract common validate logic into helper function
  • Loading branch information
balloob authored Dec 26, 2022
1 parent 69ecabd commit 5149647
Show file tree
Hide file tree
Showing 6 changed files with 133 additions and 134 deletions.
6 changes: 3 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ Repository layout:
* `tests/<language>`
* YAML files for `<language>` with test sentences and corresponding intents
* [File format](tests/README.md#file-format)

See the [documentation](docs/README.md) for more information.


Expand Down Expand Up @@ -75,10 +75,10 @@ Validate the data is correctly formatted.
python3 -m script.intentfest validate
```

Run the tests. Leave `--language` off to run all tests.
Run the tests. Specify a language with `--language` and specify a file with `-k`. Leave off both to run the full test suite.

```
pytest tests --language nl
pytest tests --language nl -k fan_HassTurnOn
```

## Test parsing sentences
Expand Down
169 changes: 55 additions & 114 deletions script/intentfest/validate.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@
from __future__ import annotations

import argparse
import re
from pathlib import Path
from typing import Any

Expand All @@ -13,10 +12,6 @@
from .const import INTENTS_FILE, LANGUAGES, RESPONSE_DIR, ROOT, SENTENCE_DIR, TESTS_DIR
from .util import get_base_arg_parser, require_sentence_domain_slot

# Slots can be {name} or {name:new_name}
PATTERN_SLOTS = re.compile(r"\{(?P<name>[a-z_]+)(?:|:\w+)\}")
PATTERN_EXPANSION_RULES = re.compile(r"\<(?P<name>[a-z_]+)\>")


def match_anything(value):
"""Validator that matches everything"""
Expand Down Expand Up @@ -238,45 +233,56 @@ def run() -> int:
return 0


def _load_yaml_file(
errors: list, language: str, file_path: Path, schema: vol.Schemna
) -> dict | None:
"""Load a YAML file."""
path = str(file_path.relative_to(ROOT))
try:
content = yaml.safe_load(file_path.read_text())
except yaml.YAMLError as err:
errors.append(f"{path}: invalid YAML: {err}")
return None

try:
validate_with_humanized_errors(content, schema)
except vol.Error as err:
errors.append(f"{path}: invalid format: {err}")
return None

if content["language"] != language:
errors.append(f"{path}: references incorrect language {content['language']}")
return None

return content


def validate_language(intent_schemas, language, errors):
sentence_dir: Path = SENTENCE_DIR / language
test_dir: Path = TESTS_DIR / language
response_dir: Path = RESPONSE_DIR / language

sentence_files = {}
lists = None
expansion_rules = None

for sentence_file in sentence_dir.iterdir():
path = str(sentence_dir.relative_to(ROOT) / sentence_file.name)
try:
content = yaml.safe_load(sentence_file.read_text())
except yaml.YAMLError as err:
errors[language].append(f"{path}: invalid YAML: {err}")
continue
path = str(sentence_file.relative_to(ROOT))

if sentence_file.name == "_common.yaml":
schema = SENTENCE_COMMON_SCHEMA
lists = content.get("lists", {})
expansion_rules = content.get("expansion_rules", {})

else:
schema = SENTENCE_SCHEMA
sentence_files[sentence_file.name] = content

try:
validate_with_humanized_errors(content, schema)
except vol.Error as err:
errors[language].append(f"{path}: invalid format: {err}")
content = _load_yaml_file(errors[language], language, sentence_file, schema)

if sentence_file.name == "_common.yaml":
continue

if content["language"] != language:
errors[language].append(
f"{path}: references incorrect language {content['language']}"
)
sentence_files[sentence_file.name] = content

for sentence_file, content in sentence_files.items():
domain, intent = sentence_file.split(".")[0].split("_")
if content is None:
continue

domain, intent = sentence_file.stem.split("_")

if intent not in intent_schemas:
errors[language].append(
Expand All @@ -291,43 +297,10 @@ def validate_language(intent_schemas, language, errors):
)
continue

intent_slots = intent_schemas[intent].get("slots", {})
intent_slots_plus_lists = {*intent_slots, *lists}

for idx, sentence_info in enumerate(intent_info["data"]):
prefix = f"{path} sentence block {idx + 1}:"
slots = sentence_info.get("slots", {})

for sentence in sentence_info["sentences"]:
for slot in PATTERN_SLOTS.findall(sentence):
if slot not in intent_slots_plus_lists:
errors[language].append(
f"{prefix} sentence '{sentence}' references unknown slot '{slot}'"
)
continue

for expansion_rule in PATTERN_EXPANSION_RULES.findall(sentence):
if expansion_rule not in expansion_rules:
errors[language].append(
f"{prefix} sentence '{sentence}' references unknown expansion rule '{expansion_rule}'"
)
continue

for slot in PATTERN_SLOTS.findall(
expansion_rules[expansion_rule]
):
if slot not in intent_slots_plus_lists:
errors[language].append(
f"{prefix} sentence '{sentence}' references expansion rule '{expansion_rule}' which references unknown slot '{slot}'"
)
continue

for slot in slots:
if slot not in intent_slots:
errors[language].append(
f"{prefix} references unknown slot {slot}"
)

if require_sentence_domain_slot(intent, domain) and domain != slots.get(
"domain"
):
Expand All @@ -341,68 +314,46 @@ def validate_language(intent_schemas, language, errors):
return

for test_file in test_dir.iterdir():
path = str(test_dir.relative_to(ROOT) / test_file.name)
try:
content = yaml.safe_load(test_file.read_text())
except yaml.YAMLError as err:
errors[language].append(f"{path}: invalid YAML: {err}")
continue
path = str(test_file.relative_to(ROOT))

if test_file.name == "_fixtures.yaml":
try:
validate_with_humanized_errors(content, TESTS_FIXTURES)
except vol.Error as err:
errors[language].append(f"{path}: invalid format: {err}")
continue
schema = TESTS_FIXTURES
else:
schema = TESTS_SCHEMA

if content["language"] != language:
errors[language].append(
f"{path}: references incorrect language {content['language']}"
)
content = _load_yaml_file(errors[language], language, test_file, schema)

if content is None or test_file.name == "_fixtures.yaml":
continue

if test_file.name not in sentence_files:
errors[language].append(f"{path}: has no matching sentence file")
continue

sentence_file = sentence_files.pop(test_file.name)

try:
validate_with_humanized_errors(content, TESTS_SCHEMA)
except vol.Error as err:
errors[language].append(f"{path}: invalid format: {err}")
continue

if content["language"] != language:
errors[language].append(
f"{path}: references incorrect language {content['language']}"
)

sentence_content = sentence_files.pop(test_file.name)
domain, intent = test_file.stem.split("_")

test_count = sum(len(test["sentences"]) for test in content["tests"])
sentence_count = sum(
len(data["sentences"]) for data in sentence_file["intents"][intent]["data"]
)

if sentence_count > test_count:
errors[language].append(f"{path}: not all sentences have tests")
# Happens if the sentence file is invalid
if sentence_content is None:
continue

test_sentences = set()
if intent in sentence_content["intents"]:
sentence_count = sum(
len(data["sentences"])
for data in sentence_content["intents"][intent]["data"]
)

for test in content["tests"]:
for sentence in test["sentences"]:
if sentence in test_sentences:
errors[language].append(f"{path}: duplicate sentence {sentence}")
test_sentences.add(sentence)
if sentence_count > test_count:
errors[language].append(f"{path}: not all sentences have tests")

if sentence_files:
for sentence_file in sentence_files:
errors[language].append(f"{sentence_file} has no tests")

for response_file in response_dir.iterdir():
path = str(response_dir.relative_to(ROOT) / response_file.name)
path = str(response_file.relative_to(ROOT))
intent = response_file.stem

if intent not in intent_schemas:
Expand All @@ -411,23 +362,13 @@ def validate_language(intent_schemas, language, errors):
)
continue

try:
content = yaml.safe_load(response_file.read_text())
except yaml.YAMLError as err:
errors[language].append(f"{path}: invalid YAML: {err}")
continue
content = _load_yaml_file(
errors[language], language, response_file, RESPONSE_SCHEMA
)

try:
validate_with_humanized_errors(content, RESPONSE_SCHEMA)
except vol.Error as err:
errors[language].append(f"{path}: invalid format: {err}")
if content is None:
continue

if content["language"] != language:
errors[language].append(
f"{path}: references incorrect language {content['language']}"
)

for intent_name, intent_info in content["responses"]["intents"].items():
if intent != intent_name:
errors[language].append(
Expand Down
12 changes: 5 additions & 7 deletions tests/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,15 +7,15 @@
_DIR = Path(__file__).parent
_BASE_DIR = _DIR.parent
INTENTS_FILE = _BASE_DIR / "intents.yaml"
USER_SENTENCES_DIR = _BASE_DIR / "sentences"
TEST_SENTENCES_DIR = _BASE_DIR / "tests"
SENTENCES_DIR = _BASE_DIR / "sentences"
TESTS_DIR = _BASE_DIR / "tests"

LANGUAGES = [p.name for p in USER_SENTENCES_DIR.iterdir() if p.is_dir()]
LANGUAGES = [p.name for p in SENTENCES_DIR.iterdir() if p.is_dir()]


def load_sentences(language: str):
"""Load sentences from sentences/ for a language"""
lang_dir = USER_SENTENCES_DIR / language
lang_dir = SENTENCES_DIR / language
files: Dict[str, Any] = {}

for yaml_path in lang_dir.glob("*.yaml"):
Expand All @@ -32,6 +32,4 @@ def load_sentences(language: str):

def load_test(language: str, test_name: str):
"""Load test sentences from tests/ for a language"""
return yaml.safe_load(
(TEST_SENTENCES_DIR / language / f"{test_name}.yaml").read_text()
)
return yaml.safe_load((TESTS_DIR / language / f"{test_name}.yaml").read_text())
19 changes: 16 additions & 3 deletions tests/conftest.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
"""Test configuration and fixtures."""
from __future__ import annotations

import dataclasses

import pytest
import yaml
from hassil import Intents
Expand Down Expand Up @@ -37,11 +39,22 @@ def language_sentences_yaml_fixture(language: str):
return load_sentences(language)


@pytest.fixture(name="language_sentences_common", scope="session")
def language_sentences_common_fixture(language, language_sentences_yaml):
"""Loads the common language intents."""
language_sentences_yaml["_common.yaml"].setdefault("intents", {})
return Intents.from_dict(language_sentences_yaml["_common.yaml"])


@pytest.fixture(scope="session")
def language_sentences(language_sentences_yaml: dict):
def language_sentences(language_sentences_yaml: dict, language_sentences_common):
"""Parse language sentences."""
merged: dict = {}
for intents_dict in language_sentences_yaml.values():
for file_name, intents_dict in language_sentences_yaml.items():
if file_name == "_common.yaml":
continue
merge_dict(merged, intents_dict)

return Intents.from_dict(merged)
intents = Intents.from_dict(merged)

return dataclasses.replace(language_sentences_common, intents=intents.intents)
Loading

0 comments on commit 5149647

Please sign in to comment.