diff --git a/Makefile b/Makefile index 78bb952d69a6c..c062f8adc4b49 100644 --- a/Makefile +++ b/Makefile @@ -92,3 +92,6 @@ check_manifest: check-manifest python_modules/dagster-webserver check-manifest python_modules/dagster-graphql ls python_modules/libraries | xargs -n 1 -Ipkg check-manifest python_modules/libraries/pkg + +externals_json_schema: + python scripts/generate_externals_json_schema.py diff --git a/python_modules/dagster-externals/dagster_externals/_util.py b/python_modules/dagster-externals/dagster_externals/_util.py index d1cc877690724..88f11cccd5c10 100644 --- a/python_modules/dagster-externals/dagster_externals/_util.py +++ b/python_modules/dagster-externals/dagster_externals/_util.py @@ -1,5 +1,18 @@ +import collections.abc import json -from typing import Any, Optional, Sequence, TypeVar +from typing import ( + Any, + Optional, + Sequence, + Type, + TypeVar, + Union, + cast, + get_args, + get_origin, +) + +from typing_extensions import TypedDict, get_type_hints from ._protocol import ExternalExecutionContextData, ExternalExecutionExtras @@ -77,3 +90,59 @@ def assert_param_json_serializable(value: T, method: str, param: str) -> T: f" type, got `{type(value)}`." ) return value + + +# ######################## +# ##### JSON SCHEMA +# ######################## + + +def typed_dict_to_json_schema(typed_dict_cls: Type[TypedDict]): + properties = {} + required = [] + + for field, field_type in get_type_hints(typed_dict_cls).items(): + schema = process_field_type(field_type) + properties[field] = schema + required.append(field) + + return {"type": "object", "properties": properties, "required": required} + + +def process_field_type(field_type: Type[Any]): + origin = get_origin(field_type) + args = get_args(field_type) + + # Handle basic types + if field_type == int: + return {"type": "integer"} + elif field_type == float: + return {"type": "number"} + elif field_type == str: + return {"type": "string"} + elif field_type == bool: + return {"type": "boolean"} + # Handle optional types + elif origin is Union and len(args) == 2 and type(None) in args: + inner_type = args[0] if args[1] is type(None) else args[1] + return process_field_type(inner_type) + # Handle nested TypedDict + elif ( + isinstance(field_type, type) and issubclass(field_type, dict) and get_type_hints(field_type) + ): + return typed_dict_to_json_schema(cast(Type[TypedDict], field_type)) + # Handle sequences + # at runtime the origin will come from collections.abc instead of typing + elif origin is collections.abc.Sequence: + item_type = args[0] + return {"type": "array", "items": process_field_type(item_type)} + # Handle mappings + # at runtime the origin will come from collections.abc instead of typing + elif origin is collections.abc.Mapping: + key_type, value_type = args + if key_type != str: + raise ValueError(f"Unsupported key type {key_type} for Mapping") + additional_properties = True if value_type is Any else process_field_type(value_type) + return {"type": "object", "additionalProperties": additional_properties} + else: + raise ValueError(f"Unsupported type {field_type}") diff --git a/scripts/generate_externals_json_schema.py b/scripts/generate_externals_json_schema.py new file mode 100644 index 0000000000000..e69de29bb2d1d