Validate the YAML files against schema (home-assistant#9)

* Validate the YAML files against schema * Fix imports * Cleanup * Fix typing * Single dict key for lists * Add type to range example * Address comments * remove validation checks from tests * Validate tests/common * Remove unused vol.All * Add markers around all dict keys in schemas
cvladan · Dec 16, 2022 · 94a0af1 · 94a0af1
1 parent 21b60b3
commit 94a0af1
Show file tree

Hide file tree

Showing 6 changed files with 183 additions and 23 deletions.
diff --git a/mypy.ini b/mypy.ini
@@ -1,4 +1,5 @@
 [mypy]
+ignore_missing_imports = true
 
 [mypy-setuptools.*]
 ignore_missing_imports = True

diff --git a/requirements.txt b/requirements.txt
@@ -1,5 +1,6 @@
 hassil~=0.0.1
 PyYAML==6.0
+voluptuous==0.13.1
 
 black==22.10.0
 flake8==6.0.0

diff --git a/script/intentfest/validate.py b/script/intentfest/validate.py
@@ -2,13 +2,147 @@
 from __future__ import annotations
 
 import argparse
+from typing import Any
 
+import voluptuous as vol
 import yaml
+from voluptuous.humanize import validate_with_humanized_errors
 
 from .const import INTENTS_FILE, LANGUAGES, SENTENCE_DIR, TESTS_DIR
 from .util import get_base_arg_parser, require_sentence_domain_slot
 
 
+def match_anything(value):
+    """Validator that matches everything"""
+    return value
+
+
+def single_key_dict_validator(schemas: dict[str, Any]) -> vol.Schema:
+    """Create a validator for a single key dict."""
+
+    def validate(value):
+        if not isinstance(value, dict):
+            raise vol.Invalid("Expected a dict")
+
+        if len(value) != 1:
+            raise vol.Invalid("Expected a single key dict")
+
+        key = next(iter(value))
+
+        if key not in schemas:
+            raise vol.Invalid(f"Expected a key in {', '.join(schemas)}")
+
+        if not isinstance(schemas[key], vol.Schema):
+            schemas[key] = vol.Schema(schemas[key])
+
+        return schemas[key](value[key])
+
+    return validate
+
+
+INTENTS_SCHEMA = vol.Schema(
+    {
+        str: {
+            vol.Required("description"): str,
+            vol.Optional("slots"): {
+                str: {
+                    vol.Required("description"): str,
+                    vol.Optional("required"): bool,
+                }
+            },
+            vol.Optional("slot_combinations"): {str: [str]},
+        }
+    }
+)
+
+INTENT_ERRORS = {
+    "no_intent",
+    "no_area",
+    "no_domain",
+    "no_device_class",
+    "no_entity",
+    "handle_error",
+}
+
+SENTENCE_SCHEMA = vol.Schema(
+    {
+        vol.Required("language"): str,
+        vol.Optional("intents"): {
+            str: {
+                vol.Required("data"): [
+                    {
+                        vol.Required("sentences"): [str],
+                        vol.Optional("slots"): {str: match_anything},
+                    }
+                ]
+            }
+        },
+        vol.Optional("lists"): {
+            str: single_key_dict_validator(
+                {
+                    "values": [
+                        vol.Any(
+                            str,
+                            {"in": str, "out": match_anything},
+                        )
+                    ],
+                    "range": {
+                        vol.Required("type", default="number"): str,
+                        vol.Required("from"): int,
+                        vol.Required("to"): int,
+                        vol.Optional("step", default=1): int,
+                    },
+                }
+            )
+        },
+        vol.Optional("expansion_rules"): {str: str},
+        vol.Optional("skip_words"): [str],
+        vol.Optional("responses"): {
+            vol.Optional("errors"): {
+                vol.In(INTENT_ERRORS): str,
+            }
+        },
+    }
+)
+
+TESTS_SCHEMA = vol.Schema(
+    {
+        vol.Required("language"): str,
+        vol.Required("tests"): [
+            {
+                vol.Required("sentences"): [str],
+                vol.Required("intent"): {
+                    vol.Required("name"): str,
+                    vol.Optional("slots"): {
+                        str: {vol.Required("value"): match_anything}
+                    },
+                },
+            }
+        ],
+    }
+)
+
+TESTS_COMMON = vol.Schema(
+    {
+        vol.Required("language"): str,
+        vol.Optional("areas"): [
+            {
+                vol.Required("name"): str,
+                vol.Required("id"): str,
+            }
+        ],
+        vol.Optional("entities"): [
+            {
+                vol.Required("name"): str,
+                vol.Required("id"): str,
+                vol.Required("area"): str,
+                vol.Required("domain"): str,
+            }
+        ],
+    }
+)
+
+
 def get_arguments() -> argparse.Namespace:
     """Get parsed passed in arguments."""
     parser = get_base_arg_parser()
@@ -28,6 +162,12 @@ def run() -> int:
 
     intent_schemas = yaml.safe_load(INTENTS_FILE.read_text())
 
+    try:
+        validate_with_humanized_errors(intent_schemas, INTENTS_SCHEMA)
+    except vol.Error as err:
+        print(f"File intents.yaml has invalid format: {err}")
+        return 1
+
     errors: dict[str, list[str]] = {}
 
     for language in languages:
@@ -38,11 +178,14 @@ def run() -> int:
             errors.pop(language)
 
     if errors:
-        print("Validation failed:")
+        print("Validation failed")
+        print()
+
         for language, language_errors in errors.items():
             print(f"Language: {language}")
             for error in language_errors:
                 print(f" - {error}")
+            print()
         return 1
 
     print("All good!")
@@ -58,12 +201,22 @@ def validate_language(intent_schemas, language, errors):
     for language_file in language_dir.iterdir():
         language_files.add(language_file.name)
 
-        if language_file.name == "_common.yaml":
-            info = yaml.safe_load(language_file.read_text())
-            if info["language"] != language:
-                errors[language].append(
-                    f"File {language_file.name} references incorrect language {info['language']}"
-                )
+        content = yaml.safe_load(language_file.read_text())
+
+        try:
+            validate_with_humanized_errors(content, SENTENCE_SCHEMA)
+        except vol.Error as err:
+            errors[language].append(
+                f"File {language_file.name} has invalid format: {err}"
+            )
+            continue
+
+        if content["language"] != language:
+            errors[language].append(
+                f"File {language_file.name} references incorrect language {content['language']}"
+            )
+
+        if language_file.name.startswith("_"):
             continue
 
         domain, intent = language_file.stem.split("_")
@@ -74,13 +227,12 @@ def validate_language(intent_schemas, language, errors):
             )
             continue
 
-        sentences = yaml.safe_load(language_file.read_text())
-        if sentences["language"] != language:
+        if content["language"] != language:
             errors[language].append(
-                f"File {language_file.name} references incorrect language {sentences['language']}"
+                f"File {language_file.name} references incorrect language {content['language']}"
             )
 
-        for intent_name, intent_info in sentences["intents"].items():
+        for intent_name, intent_info in content["intents"].items():
             if intent != intent_name:
                 errors[language].append(
                     f"File {language_file.name} references incorrect intent {intent_name}. Only {intent} allowed"
@@ -107,19 +259,30 @@ def validate_language(intent_schemas, language, errors):
 
         language_files.discard(test_file.name)
 
-        info = yaml.safe_load(test_file.read_text())
+        content = yaml.safe_load(test_file.read_text())
+
+        if test_file.name == "_common.yaml":
+            schema = TESTS_COMMON
+        else:
+            schema = TESTS_SCHEMA
+
+        try:
+            validate_with_humanized_errors(content, schema)
+        except vol.Error as err:
+            errors[language].append(f"File {test_file.name} has invalid format: {err}")
+            continue
 
-        if info["language"] != language:
+        if content["language"] != language:
             errors[language].append(
-                f"Test {test_file.name} references incorrect language {info['language']}"
+                f"Test {test_file.name} references incorrect language {content['language']}"
             )
 
         if test_file.name == "_common.yaml":
             continue
 
         domain, intent = test_file.stem.split("_")
 
-        tested_intents = set(i["intent"]["name"] for i in info["tests"])
+        tested_intents = set(i["intent"]["name"] for i in content["tests"])
 
         if intent not in tested_intents:
             errors[language].append(

diff --git a/sentences/README.md b/sentences/README.md
@@ -28,6 +28,7 @@ lists:
       - "the list"
   <range_name>
     range:
+      type: "number"
       from: 0
       to: 100
 

diff --git a/sentences/en/_common.yaml b/sentences/en/_common.yaml
@@ -20,13 +20,13 @@ lists:
       - "purple"
       - "brown"
   brightness:
-    type: "percentage"
     range:
+      type: "percentage"
       from: 0
       to: 100
   temperature:
-    type: "temperature"
     range:
+      type: "temperature"
       from: 0
       to: 100
   temperature_unit:

diff --git a/tests/test_language_intents.py b/tests/test_language_intents.py
@@ -8,12 +8,6 @@
 
 def test_language_intents(language_intents, intent_schemas):
     """Ensure all language sentences contain valid slots, lists, rules, etc."""
-    # Ensure all intents names are present
-    from pprint import pprint
-
-    pprint(language_intents)
-    assert sorted(language_intents.intents) == sorted(intent_schemas)
-
     # Add placeholder slots that HA will generate
     language_intents.slot_lists["area"] = TextSlotList(values=[])
     language_intents.slot_lists["name"] = TextSlotList(values=[])
-Original file line number
+Diff line change
@@ Expand Up / @@ -28,6 +28,7 @@ lists: @@
           - "the list"
       <range_name>
         range:
+          type: "number"
           from: 0
           to: 100
@@ Expand Down @@