diff --git a/README.md b/README.md index d25a288..ec0d053 100644 --- a/README.md +++ b/README.md @@ -30,3 +30,4 @@ Please refer to test files for detailed usage. - [PEP 484](https://peps.python.org/pep-0484/) type annotations - Native integration with [Sphinx](https://github.com/sphinx-doc/sphinx), [DP-GUI](https://github.com/deepmodeling/dpgui), and [Jupyter Notebook](https://jupyter.org/) - JSON encoder for `Argument` and `Variant` classes +- Generate [JSON schema](https://json-schema.org/) from an `Argument`, which can be further integrated with JSON editors such as [Visual Studio Code](https://code.visualstudio.com/) diff --git a/dargs/dargs.py b/dargs/dargs.py index 290f9fd..2529560 100644 --- a/dargs/dargs.py +++ b/dargs/dargs.py @@ -460,11 +460,15 @@ def _check_data(self, value: Any, path=None): ) def _check_strict(self, value: dict, path=None): - allowed_keys = self.flatten_sub(value, path).keys() + allowed_keys = set(self.flatten_sub(value, path).keys()) # curpath = [*path, self.name] if not len(allowed_keys): # no allowed keys defined, allow any keys return + # A special case to allow $schema in any dict to be compatible with vscode + json schema + # https://code.visualstudio.com/docs/languages/json#_mapping-in-the-json + # considering usually it's not a typo of users when they use $schema + allowed_keys.add("$schema") for name in value.keys(): if name not in allowed_keys: dym_message = did_you_mean(name, allowed_keys) diff --git a/dargs/json_schema.py b/dargs/json_schema.py new file mode 100644 index 0000000..221dd3b --- /dev/null +++ b/dargs/json_schema.py @@ -0,0 +1,158 @@ +"""Generate JSON schema from a given dargs.Argument.""" + +from __future__ import annotations + +from typing import Any + +from dargs.dargs import Argument, _Flags + +try: + from typing import get_origin +except ImportError: + from typing_extensions import get_origin + + +def generate_json_schema(argument: Argument, id: str = "") -> dict: + """Generate JSON schema from a given dargs.Argument. + + Parameters + ---------- + argument : Argument + The argument to generate JSON schema. + id : str, optional + The URL of the schema, by default "". + + Returns + ------- + dict + The JSON schema. Use :func:`json.dump` to save it to a file + or :func:`json.dumps` to get a string. + + Examples + -------- + Dump the JSON schema of DeePMD-kit to a file: + + >>> from dargs.json_schema import generate_json_schema + >>> from deepmd.utils.argcheck import gen_args + >>> import json + >>> from dargs import Argument + >>> a = Argument("DeePMD-kit", dtype=dict, sub_fields=gen_args()) + >>> schema = generate_json_schema(a) + >>> with open("deepmd.json", "w") as f: + ... json.dump(schema, f, indent=2) + """ + schema = { + "$schema": "https://json-schema.org/draft/2020-12/schema", + "$id": id, + "title": argument.name, + **_convert_single_argument(argument), + } + return schema + + +def _convert_single_argument(argument: Argument) -> dict: + """Convert a single argument to JSON schema. + + Parameters + ---------- + argument : Argument + The argument to convert. + + Returns + ------- + dict + The JSON schema of the argument. + """ + data = { + "description": argument.doc, + "type": list({_convert_types(tt) for tt in argument.dtype}), + } + if argument.default is not _Flags.NONE: + data["default"] = argument.default + properties = { + **{ + nn: _convert_single_argument(aa) + for aa in argument.sub_fields.values() + for nn in (aa.name, *aa.alias) + }, + **{ + vv.flag_name: { + "type": "string", + "enum": list(vv.choice_dict.keys()) + list(vv.choice_alias.keys()), + "default": vv.default_tag, + "description": vv.doc, + } + for vv in argument.sub_variants.values() + }, + } + required = [ + aa.name + for aa in argument.sub_fields.values() + if not aa.optional and not aa.alias + ] + [vv.flag_name for vv in argument.sub_variants.values() if not vv.optional] + allof = [ + { + "if": { + "oneOf": [ + { + "properties": {vv.flag_name: {"const": kkaa}}, + } + for kkaa in (kk, *aa.alias) + ], + "required": [vv.flag_name] + if not (vv.optional and vv.default_tag == kk) + else [], + }, + "then": _convert_single_argument(aa), + } + for vv in argument.sub_variants.values() + for kk, aa in vv.choice_dict.items() + ] + allof += [ + {"oneOf": [{"required": [nn]} for nn in (aa.name, *aa.alias)]} + for aa in argument.sub_fields.values() + if not aa.optional and aa.alias + ] + if not argument.repeat: + data["properties"] = properties + data["required"] = required + if allof: + data["allOf"] = allof + else: + data["items"] = { + "type": "object", + "properties": properties, + "required": required, + } + if allof: + data["items"]["allOf"] = allof + return data + + +def _convert_types(T: type | Any | None) -> str: + """Convert a type to JSON schema type. + + Parameters + ---------- + T : type | Any | None + The type to convert. + + Returns + ------- + str + The JSON schema type. + """ + # string, number, integer, object, array, boolean, null + if T is None or T is type(None): + return "null" + elif T is str: + return "string" + elif T in (int, float): + return "number" + elif T is bool: + return "boolean" + elif T is list or get_origin(T) is list: + return "array" + elif T is dict or get_origin(T) is dict: + return "object" + raise ValueError(f"Unknown type: {T}") diff --git a/docs/json_schema.md b/docs/json_schema.md new file mode 100644 index 0000000..e7c1803 --- /dev/null +++ b/docs/json_schema.md @@ -0,0 +1,42 @@ +## Generate JSON schema from an argument + +One can use {func}`dargs.json_schema_generate_json_schema` to generate [JSON schema](https://json-schema.org/). + +```py +import json + +from dargs import Argument +from dargs.json_schema import generate_json_schema +from deepmd.utils.argcheck import gen_args + + +a = Argument("DeePMD-kit", dtype=dict, sub_fields=gen_args()) +schema = generate_json_schema(a) +with open("deepmd.json", "w") as f: + json.dump(schema, f, indent=2) +``` + +JSON schema can be used in several JSON editors. For example, in [Visual Studio Code](https://code.visualstudio.com/), you can [configure JSON schema](https://code.visualstudio.com/docs/languages/json#_json-schemas-and-settings) in the project `settings.json`: + +```json +{ + "json.schemas": [ + { + "fileMatch": [ + "/**/*.json" + ], + "url": "./deepmd.json" + } + ] +} +``` + +VS Code also allows one to [specify the JSON schema in a JSON file](https://code.visualstudio.com/docs/languages/json#_mapping-in-the-json) with the `$schema` key. +To be compatible, dargs will not throw an error for `$schema` in the strict mode even if `$schema` is not defined in the argument. + +```json +{ + "$schema": "./deepmd.json", + "model": {} +} +``` diff --git a/pyproject.toml b/pyproject.toml index 8e5b0e2..c4b0900 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -30,6 +30,7 @@ repository = "https://github.com/deepmodeling/dargs" [project.optional-dependencies] test = [ "ipython", + "jsonschema", ] typecheck = [ "basedpyright==1.12.2", diff --git a/tests/dpmdargs.py b/tests/dpmdargs.py index 8241083..72b2519 100644 --- a/tests/dpmdargs.py +++ b/tests/dpmdargs.py @@ -216,7 +216,7 @@ def descrpt_hybrid_args(): "type", [ Argument("loc_frame", dict, descrpt_local_frame_args()), - Argument("se_a", dict, descrpt_se_a_args()), + Argument("se_e2_a", dict, descrpt_se_a_args(), alias=["se_a"]), Argument("se_r", dict, descrpt_se_r_args()), Argument( "se_a_3be", dict, descrpt_se_a_3be_args(), alias=["se_at"] @@ -764,8 +764,19 @@ def normalize(data): return data +def gen_args() -> Argument: + ma = model_args() + lra = learning_rate_args() + la = loss_args() + ta = training_args() + + base = Argument("base", dict, [ma, lra, la, ta]) + return base + + example_json_str = """ { + "$schema": "this should be ignored by dargs", "_comment": " model parameters", "model": { "type_map": ["O", "H"], diff --git a/tests/test_json_schema.py b/tests/test_json_schema.py new file mode 100644 index 0000000..48dba41 --- /dev/null +++ b/tests/test_json_schema.py @@ -0,0 +1,30 @@ +from __future__ import annotations + +import json +import unittest + +from jsonschema import validate + +from dargs.json_schema import _convert_types, generate_json_schema + +from .dpmdargs import example_json_str, gen_args + + +class TestJsonSchema(unittest.TestCase): + def test_json_schema(self): + args = gen_args() + schema = generate_json_schema(args) + data = json.loads(example_json_str) + validate(data, schema) + + def test_convert_types(self): + self.assertEqual(_convert_types(int), "number") + self.assertEqual(_convert_types(str), "string") + self.assertEqual(_convert_types(float), "number") + self.assertEqual(_convert_types(bool), "boolean") + self.assertEqual(_convert_types(None), "null") + self.assertEqual(_convert_types(type(None)), "null") + self.assertEqual(_convert_types(list), "array") + self.assertEqual(_convert_types(dict), "object") + with self.assertRaises(ValueError): + _convert_types(set)