diff --git a/ogc/bblocks/examples-schema.yaml b/ogc/bblocks/examples-schema.yaml index 0325544..c59deb6 100644 --- a/ogc/bblocks/examples-schema.yaml +++ b/ogc/bblocks/examples-schema.yaml @@ -88,3 +88,43 @@ items: - code - required: - ref + transforms: + description: | + List of transforms for this example + type: array + items: + type: object + required: + - input-language + - output-language + - type + oneOf: + - required: + - code + - required: + - ref + properties: + input-language: + description: | + Input language of the code snippet from this example that will be transformed. It can correspond to a + manually-provided snippet, or to an uplifted one. + type: string + output-language: + description: | + Output language of the transformed snippet. + type: string + type: + description: | + The type of this transform. "jq" or "shacl" are examples of automatically processed ones. + type: string + description: + description: Textual description of this transformation. Markdown is accepted. + type: string + code: + description: Code contents of this transformation (e.g., jq script or SHACL rules file). + type: string + ref: + description: | + Location of a file with the code contents of this transformation (instead of + providing them inline through the "code" property). + type: string diff --git a/ogc/bblocks/mimetypes.py b/ogc/bblocks/mimetypes.py index b0c6774..1774e86 100644 --- a/ogc/bblocks/mimetypes.py +++ b/ogc/bblocks/mimetypes.py @@ -20,3 +20,9 @@ def lookup(t: str) -> dict | None: if 'aliases' in entry and entry['aliases'] and t in entry['aliases']: return entry + +def normalize(t: str) -> str: + n = lookup(t) + if n: + return n['mime-type'] + return t diff --git a/ogc/bblocks/transform.py b/ogc/bblocks/transform.py index b0e84c0..6d364e1 100644 --- a/ogc/bblocks/transform.py +++ b/ogc/bblocks/transform.py @@ -15,56 +15,57 @@ def apply_transforms(bblock: BuildingBlock, outputs_path: str | Path, output_subpath='transforms'): - if not bblock.examples or not bblock.transforms: + if not bblock.examples: return output_dir = Path(outputs_path) / bblock.subdirs / output_subpath shutil.rmtree(output_dir, ignore_errors=True) output_dir.mkdir(parents=True, exist_ok=True) - transforms_per_type = {} - for transform in bblock.transforms: - for i, mt in enumerate(transform['mime-types']['source']): - source_mime_type = mimetypes.lookup(mt) - if source_mime_type: - mt = source_mime_type['mime-type'] - transform['mime-types']['source'][i] = mt - transforms_per_type.setdefault(mt, []).append(transform) - target_mime_type = mimetypes.lookup(transform['mime-types']['target']) - if target_mime_type: - transform['mime-types']['target'] = target_mime_type['mime-type'] - output_ext = '.' + target_mime_type['extensions'][0] - output_mime_type = target_mime_type['mime-type'] - else: - output_ext = '' - output_mime_type = transform['mime-types']['target'] + for example_id, example in enumerate(bblock.examples): + transforms = example.get('transforms') + snippets = example.get('snippets') + if not transforms or not snippets: + continue - for example_id, example in enumerate(bblock.examples): - snippets = example.get('snippets', ()) - for snippet_id, snippet in enumerate(snippets): - found_mime_type = mimetypes.lookup(snippet.get('language')) - mime_type = found_mime_type['mime-type'] if found_mime_type else snippet.get('language') + transforms_by_input_lang = {} + for idx, transform in enumerate(transforms): + transform['input-language'] = mimetypes.normalize(transform['input-language']) + output_lang = mimetypes.lookup(transform['output-language']) + if output_lang: + transform['output-extension'] = output_lang['extensions'][0] + transform['output-language'] = output_lang['mime-type'] + else: + transform['output-extension'] = transform['output-language'] - if mime_type not in transform['mime-types']['source']: - continue + transform['idx'] = idx + transforms_by_input_lang.setdefault(transform['input-language'], []).append(transform) - output_fn = output_dir / f"example_{example_id + 1}_{snippet_id + 1}-{transform['type']}{output_ext}" + for snippet_id, snippet in enumerate(snippets): + snippet_lang = snippet.get('language') + if not snippet_lang: + continue + snippet_mime_type = mimetypes.normalize(snippet_lang) + + for transform in transforms_by_input_lang.get(snippet_mime_type, ()): + output_ext = transform['output-extension'] + output_fn = output_dir / (f"example_{example_id + 1}_{snippet_id + 1}" + f"-{transform['idx'] + 1}.{output_ext}") - ref = transform['ref'] if is_url(transform['ref']) else bblock.files_path / transform['ref'] transform_metadata = TransformMetadata(type=transform['type'], - source_mime_type=mime_type, - target_mime_type=output_mime_type, - source_ref=ref, + source_mime_type=transform['input-language'], + target_mime_type=transform['output-language'], transform_content=transform['code'], metadata=transform.get('metadata'), input_data=snippet['code']) + try: transform_result = transformers.transform(transform_metadata) if transform_result: with open(output_fn, 'w') as f: f.write(transform_result) - except Exception: + except: with open(output_fn.with_stem(output_fn.name + '.error'), 'w') as f: f.write('Error generating transformed file:\n') f.write(traceback.format_exc()) diff --git a/ogc/bblocks/transforms-schema.yaml b/ogc/bblocks/transforms-schema.yaml deleted file mode 100644 index fab9029..0000000 --- a/ogc/bblocks/transforms-schema.yaml +++ /dev/null @@ -1,79 +0,0 @@ -"$schema": https://json-schema.org/draft/2020-12/schema -title: OGC Building Blocks Register transforms schema -type: object -properties: - transforms: - description: List of transforms available for the building block - type: array - items: - type: object - required: - - title - - mime-types - - output-types - - ref - - type - properties: - title: - description: Title or label for this transform - type: string - mime-types: - type: object - required: - - source - - target - properties: - source: - description: Single or list of MIME types (or aliases thereof) that this transformation can be applied to - oneOf: - - type: array - minItems: 1 - items: - type: string - examples: - - [application/json, jsonld] - - type: string - examples: - - application/json - - jsonld - target: - description: Target MIME type (or alias thereof) for this transformation - type: string - examples: - - text/turtle - - ttl - output-types: - description: List of target specifications or formats that this transform can generate - type: array - minItems: 1 - items: - type: object - required: - - label - - identifier - properties: - label: - description: Label for this target - type: string - examples: - - GeoDCAT - identifier: - description: Keyword or identifier (or, preferably, URI) for this target type (e.g., "ttl") - type: string - examples: - - geodcat - ref: - description: URL or file path to the contents of this transform (e.g., SHACL file, Python script, etc.) - type: string - type: - description: URI or identifier for the type of this transform - type: string - examples: - - shacl - - ogc.na.ingest_json - - jq - - python - metadata: - description: | - Additional metadata needed for the transform (e.g., required Python dependencies). The specific - format and content will depend on the transform type. diff --git a/ogc/bblocks/util.py b/ogc/bblocks/util.py index ffddf04..639bf14 100644 --- a/ogc/bblocks/util.py +++ b/ogc/bblocks/util.py @@ -108,9 +108,6 @@ def __init__(self, identifier: str, metadata_file: Path, shacl_rules.append('rules.shacl') self.shacl_rules = [r if is_url(r) else fp / r for r in shacl_rules] - self.transforms_file = fp / 'transforms.yaml' - self.transforms = self._load_transforms() - def _load_examples(self): examples = None if self.examples_file.is_file(): @@ -126,25 +123,13 @@ def _load_examples(self): # Load snippet code from "ref" ref = snippet['ref'] if is_url(snippet['ref']) else self.files_path / snippet['ref'] snippet['code'] = load_file(ref) + for transform in example.get('transforms', ()): + if 'ref' in transform: + # Load transform code from "ref" + ref = transform['ref'] if is_url(transform['ref']) else self.files_path / transform['ref'] + transform['code'] = load_file(ref) return examples - def _load_transforms(self) -> list: - transforms = None - if self.transforms_file.is_file(): - transforms = load_yaml(self.transforms_file) - try: - jsonschema.validate(transforms, get_schema('transforms')) - except Exception as e: - raise BuildingBlockError('Error validating building block transforms') from e - - transforms = transforms.get('transforms', []) - for transform in transforms: - ref = transform['ref'] if is_url(transform['ref']) else self.files_path / transform['ref'] - transform['code'] = load_file(ref) - if isinstance(transform['mime-types']['source'], str): - transform['mime-types']['source'] = [transform['mime-types']['source']] - return transforms - @property def schema_contents(self): if 'schema_contents' not in self._lazy_properties: @@ -354,7 +339,6 @@ class TransformMetadata: type: str source_mime_type: str target_mime_type: str - source_ref: str | Path transform_content: AnyStr input_data: AnyStr metadata: Any | None = None