Skip to content

Commit

Permalink
Per-example transforms
Browse files Browse the repository at this point in the history
  • Loading branch information
avillar committed Oct 2, 2023
1 parent 30ce63e commit ed928e0
Show file tree
Hide file tree
Showing 5 changed files with 82 additions and 130 deletions.
40 changes: 40 additions & 0 deletions ogc/bblocks/examples-schema.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -88,3 +88,43 @@ items:
- code
- required:
- ref
transforms:
description: |
List of transforms for this example
type: array
items:
type: object
required:
- input-language
- output-language
- type
oneOf:
- required:
- code
- required:
- ref
properties:
input-language:
description: |
Input language of the code snippet from this example that will be transformed. It can correspond to a
manually-provided snippet, or to an uplifted one.
type: string
output-language:
description: |
Output language of the transformed snippet.
type: string
type:
description: |
The type of this transform. "jq" or "shacl" are examples of automatically processed ones.
type: string
description:
description: Textual description of this transformation. Markdown is accepted.
type: string
code:
description: Code contents of this transformation (e.g., jq script or SHACL rules file).
type: string
ref:
description: |
Location of a file with the code contents of this transformation (instead of
providing them inline through the "code" property).
type: string
6 changes: 6 additions & 0 deletions ogc/bblocks/mimetypes.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,3 +20,9 @@ def lookup(t: str) -> dict | None:
if 'aliases' in entry and entry['aliases'] and t in entry['aliases']:
return entry


def normalize(t: str) -> str:
n = lookup(t)
if n:
return n['mime-type']
return t
61 changes: 31 additions & 30 deletions ogc/bblocks/transform.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,56 +15,57 @@
def apply_transforms(bblock: BuildingBlock,
outputs_path: str | Path,
output_subpath='transforms'):
if not bblock.examples or not bblock.transforms:
if not bblock.examples:
return

output_dir = Path(outputs_path) / bblock.subdirs / output_subpath
shutil.rmtree(output_dir, ignore_errors=True)
output_dir.mkdir(parents=True, exist_ok=True)

transforms_per_type = {}
for transform in bblock.transforms:
for i, mt in enumerate(transform['mime-types']['source']):
source_mime_type = mimetypes.lookup(mt)
if source_mime_type:
mt = source_mime_type['mime-type']
transform['mime-types']['source'][i] = mt
transforms_per_type.setdefault(mt, []).append(transform)
target_mime_type = mimetypes.lookup(transform['mime-types']['target'])
if target_mime_type:
transform['mime-types']['target'] = target_mime_type['mime-type']
output_ext = '.' + target_mime_type['extensions'][0]
output_mime_type = target_mime_type['mime-type']
else:
output_ext = ''
output_mime_type = transform['mime-types']['target']
for example_id, example in enumerate(bblock.examples):
transforms = example.get('transforms')
snippets = example.get('snippets')
if not transforms or not snippets:
continue

for example_id, example in enumerate(bblock.examples):
snippets = example.get('snippets', ())
for snippet_id, snippet in enumerate(snippets):
found_mime_type = mimetypes.lookup(snippet.get('language'))
mime_type = found_mime_type['mime-type'] if found_mime_type else snippet.get('language')
transforms_by_input_lang = {}
for idx, transform in enumerate(transforms):
transform['input-language'] = mimetypes.normalize(transform['input-language'])
output_lang = mimetypes.lookup(transform['output-language'])
if output_lang:
transform['output-extension'] = output_lang['extensions'][0]
transform['output-language'] = output_lang['mime-type']
else:
transform['output-extension'] = transform['output-language']

if mime_type not in transform['mime-types']['source']:
continue
transform['idx'] = idx
transforms_by_input_lang.setdefault(transform['input-language'], []).append(transform)

output_fn = output_dir / f"example_{example_id + 1}_{snippet_id + 1}-{transform['type']}{output_ext}"
for snippet_id, snippet in enumerate(snippets):
snippet_lang = snippet.get('language')
if not snippet_lang:
continue
snippet_mime_type = mimetypes.normalize(snippet_lang)

for transform in transforms_by_input_lang.get(snippet_mime_type, ()):
output_ext = transform['output-extension']
output_fn = output_dir / (f"example_{example_id + 1}_{snippet_id + 1}"
f"-{transform['idx'] + 1}.{output_ext}")

ref = transform['ref'] if is_url(transform['ref']) else bblock.files_path / transform['ref']
transform_metadata = TransformMetadata(type=transform['type'],
source_mime_type=mime_type,
target_mime_type=output_mime_type,
source_ref=ref,
source_mime_type=transform['input-language'],
target_mime_type=transform['output-language'],
transform_content=transform['code'],
metadata=transform.get('metadata'),
input_data=snippet['code'])

try:
transform_result = transformers.transform(transform_metadata)
if transform_result:
with open(output_fn, 'w') as f:
f.write(transform_result)

except Exception:
except:
with open(output_fn.with_stem(output_fn.name + '.error'), 'w') as f:
f.write('Error generating transformed file:\n')
f.write(traceback.format_exc())
79 changes: 0 additions & 79 deletions ogc/bblocks/transforms-schema.yaml

This file was deleted.

26 changes: 5 additions & 21 deletions ogc/bblocks/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -108,9 +108,6 @@ def __init__(self, identifier: str, metadata_file: Path,
shacl_rules.append('rules.shacl')
self.shacl_rules = [r if is_url(r) else fp / r for r in shacl_rules]

self.transforms_file = fp / 'transforms.yaml'
self.transforms = self._load_transforms()

def _load_examples(self):
examples = None
if self.examples_file.is_file():
Expand All @@ -126,25 +123,13 @@ def _load_examples(self):
# Load snippet code from "ref"
ref = snippet['ref'] if is_url(snippet['ref']) else self.files_path / snippet['ref']
snippet['code'] = load_file(ref)
for transform in example.get('transforms', ()):
if 'ref' in transform:
# Load transform code from "ref"
ref = transform['ref'] if is_url(transform['ref']) else self.files_path / transform['ref']
transform['code'] = load_file(ref)
return examples

def _load_transforms(self) -> list:
transforms = None
if self.transforms_file.is_file():
transforms = load_yaml(self.transforms_file)
try:
jsonschema.validate(transforms, get_schema('transforms'))
except Exception as e:
raise BuildingBlockError('Error validating building block transforms') from e

transforms = transforms.get('transforms', [])
for transform in transforms:
ref = transform['ref'] if is_url(transform['ref']) else self.files_path / transform['ref']
transform['code'] = load_file(ref)
if isinstance(transform['mime-types']['source'], str):
transform['mime-types']['source'] = [transform['mime-types']['source']]
return transforms

@property
def schema_contents(self):
if 'schema_contents' not in self._lazy_properties:
Expand Down Expand Up @@ -354,7 +339,6 @@ class TransformMetadata:
type: str
source_mime_type: str
target_mime_type: str
source_ref: str | Path
transform_content: AnyStr
input_data: AnyStr
metadata: Any | None = None
Expand Down

0 comments on commit ed928e0

Please sign in to comment.