diff --git a/common/plugins/eu.esdihumboldt.hale.common.align/plugin.xml b/common/plugins/eu.esdihumboldt.hale.common.align/plugin.xml index a02bb92f21..eb0bc92f31 100644 --- a/common/plugins/eu.esdihumboldt.hale.common.align/plugin.xml +++ b/common/plugins/eu.esdihumboldt.hale.common.align/plugin.xml @@ -375,6 +375,24 @@ + + + + + + partitionInstances(InstanceCollection in String transformationIdentifier, TransformationEngine engine, ListMultimap transformationParameters, Map executionParameters, TransformationLog log) - throws TransformationException { + throws TransformationException { if (transformationParameters == null || !transformationParameters.containsKey(PARAMETER_JOIN) @@ -122,7 +122,13 @@ public ResourceIterator partitionInstances(InstanceCollection in } } - return new IndexJoinIterator(startInstances, joinDefinition, indexService); + boolean innerJoin = false; // default to false if not specified + List innerJoinValues = transformationParameters.get(PARAMETER_INNER_JOIN); + if (!innerJoinValues.isEmpty()) { + innerJoin = innerJoinValues.get(0).as(Boolean.class, innerJoin); + } + + return new IndexJoinIterator(startInstances, joinDefinition, indexService, innerJoin); } /** diff --git a/cst/plugins/eu.esdihumboldt.cst.functions.core/src/eu/esdihumboldt/cst/functions/core/join/IndexJoinIterator.java b/cst/plugins/eu.esdihumboldt.cst.functions.core/src/eu/esdihumboldt/cst/functions/core/join/IndexJoinIterator.java index cbd1f9a323..d16e1e5853 100644 --- a/cst/plugins/eu.esdihumboldt.cst.functions.core/src/eu/esdihumboldt/cst/functions/core/join/IndexJoinIterator.java +++ b/cst/plugins/eu.esdihumboldt.cst.functions.core/src/eu/esdihumboldt/cst/functions/core/join/IndexJoinIterator.java @@ -36,7 +36,7 @@ import eu.esdihumboldt.hale.common.instance.index.InstanceIndexService; import eu.esdihumboldt.hale.common.instance.model.FamilyInstance; import eu.esdihumboldt.hale.common.instance.model.ResolvableInstanceReference; -import eu.esdihumboldt.hale.common.instance.model.impl.GenericResourceIteratorAdapter; +import eu.esdihumboldt.hale.common.instance.model.impl.FilterResourceIteratorAdapter; /** * Iterator used by {@link IndexJoinHandler} @@ -44,21 +44,21 @@ * @author Florian Esser */ class IndexJoinIterator - extends GenericResourceIteratorAdapter { + extends FilterResourceIteratorAdapter { private final JoinDefinition joinDefinition; private final InstanceIndexService index; + private final boolean innerJoin; + protected IndexJoinIterator(Collection startInstances, - JoinDefinition joinDefinition, InstanceIndexService index) { + JoinDefinition joinDefinition, InstanceIndexService index, boolean innerJoin) { super(startInstances.iterator()); this.joinDefinition = joinDefinition; this.index = index; + this.innerJoin = innerJoin; } - /** - * @see eu.esdihumboldt.hale.common.instance.model.impl.GenericResourceIteratorAdapter#convert(java.lang.Object) - */ @Override protected FamilyInstance convert(ResolvableInstanceReference next) { FamilyInstance base = new FamilyInstanceImpl(next.resolve()); @@ -66,16 +66,21 @@ protected FamilyInstance convert(ResolvableInstanceReference next) { FamilyInstance[] currentInstances = new FamilyInstance[joinDefinition.directParent.length]; currentInstances[0] = base; - join(currentInstances, 0); + if (!join(currentInstances, 0)) { + // skip this instance + return null; + } return base; } /** * Joins all direct children of the given type to currentInstances. + * + * @return if the instance should be skipped */ @SuppressWarnings("javadoc") - private void join(FamilyInstance[] currentInstances, int currentType) { + private boolean join(FamilyInstance[] currentInstances, int currentType) { // Join all types that are direct children of the last type. for (int i = currentType + 1; i < joinDefinition.directParent.length; i++) { if (joinDefinition.directParent[i] == currentType) { @@ -148,12 +153,21 @@ private void join(FamilyInstance[] currentInstances, int currentType) { child = new FamilyInstanceImpl(ref.resolve()); parent.addChild(child); currentInstances[i] = child; - join(currentInstances, i); + if (!join(currentInstances, i)) { + return false; + } } currentInstances[i] = null; } + else { + if (innerJoin) { + return false; + } + } } } + + return true; } /** diff --git a/cst/plugins/eu.esdihumboldt.cst.functions.groovy/plugin.xml b/cst/plugins/eu.esdihumboldt.cst.functions.groovy/plugin.xml index 80d71827b1..de1ae4c6be 100644 --- a/cst/plugins/eu.esdihumboldt.cst.functions.groovy/plugin.xml +++ b/cst/plugins/eu.esdihumboldt.cst.functions.groovy/plugin.xml @@ -276,6 +276,24 @@ ref="text"> + + + + + + transformData(TransformationExample example) throws Exception { ConceptualSchemaTransformer transformer = new ConceptualSchemaTransformer(); diff --git a/cst/plugins/eu.esdihumboldt.cst.test/src/eu/esdihumboldt/cst/test/TransformationExamples.groovy b/cst/plugins/eu.esdihumboldt.cst.test/src/eu/esdihumboldt/cst/test/TransformationExamples.groovy index bb4aa145cd..a27016f4ab 100644 --- a/cst/plugins/eu.esdihumboldt.cst.test/src/eu/esdihumboldt/cst/test/TransformationExamples.groovy +++ b/cst/plugins/eu.esdihumboldt.cst.test/src/eu/esdihumboldt/cst/test/TransformationExamples.groovy @@ -113,6 +113,9 @@ abstract class TransformationExamples { public static final String XSL_XPATH_1 = 'xpath1' public static final String INNER_JOIN = 'inner_join' + public static final String INNER_JOIN_FIRST_LEVEL = 'inner_join_first_level' + public static final String INNER_JOIN_CONDITIONS = 'inner_join_conditions' + public static final String JOIN = 'join' /** * Internal example map. @@ -230,7 +233,10 @@ abstract class TransformationExamples { (XSL_XPATH_1): defaultExample(XSL_XPATH_1), // builder based examples - (INNER_JOIN): builderExample(INNER_JOIN) + (INNER_JOIN): builderExample(INNER_JOIN), + (INNER_JOIN_FIRST_LEVEL): builderExample(INNER_JOIN_FIRST_LEVEL), + (INNER_JOIN_CONDITIONS): builderExample(INNER_JOIN_CONDITIONS), + (JOIN): builderExample(JOIN) ]; static def defaultExample(String folder) { diff --git a/cst/plugins/eu.esdihumboldt.cst.test/testdata/inner_join_conditions/mapping.halex b/cst/plugins/eu.esdihumboldt.cst.test/testdata/inner_join_conditions/mapping.halex new file mode 100644 index 0000000000..67d18a14c8 --- /dev/null +++ b/cst/plugins/eu.esdihumboldt.cst.test/testdata/inner_join_conditions/mapping.halex @@ -0,0 +1,33 @@ + + + mapping + hale + 2024-02-07T10:28:58.718+01:00 + 2024-02-07T13:02:59.850+01:00 + + UTF-8 + false + eu.esdihumboldt.hale.io.project.hale25.xml + file:/home/simon/repos/hale/cst/plugins/eu.esdihumboldt.cst.test/testdata/inner_join_conditions/mapping.halex + + + UTF-8 + ec2ec25e-5940-4b91-8ccd-9ea9455a3dbc + source-schema.groovy + eu.esdihumboldt.hale.io.schemabuilder + + + UTF-8 + 646e49d6-a2f9-462c-8bfb-e6a2a47abef1 + target-schema.groovy + eu.esdihumboldt.hale.io.schemabuilder + + + UTF-8 + 95bd5843-dfc0-43be-a901-03e7b843d44a + source-instances.groovy + eu.esdihumboldt.hale.io.instancebuilder + + + + diff --git a/cst/plugins/eu.esdihumboldt.cst.test/testdata/inner_join_conditions/mapping.halex.alignment.xml b/cst/plugins/eu.esdihumboldt.cst.test/testdata/inner_join_conditions/mapping.halex.alignment.xml new file mode 100644 index 0000000000..9d416c452a --- /dev/null +++ b/cst/plugins/eu.esdihumboldt.cst.test/testdata/inner_join_conditions/mapping.halex.alignment.xml @@ -0,0 +1,121 @@ + + + + + + + a LIKE 'a%' + + + + + + + b like 'b%' + + + + + + + + + + + + + + + + + + + a LIKE 'a%' + + + + + b like 'b%' + + + + + + + + + a LIKE 'a%' + + + + + + b like 'b%' + + + + + + + + b like 'b%' + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/cst/plugins/eu.esdihumboldt.cst.test/testdata/inner_join_conditions/mapping.halex.styles.sld b/cst/plugins/eu.esdihumboldt.cst.test/testdata/inner_join_conditions/mapping.halex.styles.sld new file mode 100644 index 0000000000..ad34f540f6 --- /dev/null +++ b/cst/plugins/eu.esdihumboldt.cst.test/testdata/inner_join_conditions/mapping.halex.styles.sld @@ -0,0 +1,3 @@ + + Default Styler + diff --git a/cst/plugins/eu.esdihumboldt.cst.test/testdata/inner_join_conditions/source-instances.groovy b/cst/plugins/eu.esdihumboldt.cst.test/testdata/inner_join_conditions/source-instances.groovy new file mode 100644 index 0000000000..c7e33efe2b --- /dev/null +++ b/cst/plugins/eu.esdihumboldt.cst.test/testdata/inner_join_conditions/source-instances.groovy @@ -0,0 +1,83 @@ +createCollection { + + /* + * Sets of instances that are joined + */ + + A { + a('a1') + } + B { + a('a1') + b('b1') + } + C { + b('b1') + c('c1') + } + + A { + a('a3') + } + B { + a('a3') + b('b1') + } + + /* + * Sets of instances that are not joined because of failing conditions + */ + + A { + a('a6') + } + B { + a('a1') + b('x6') + } + C { + b('x6') + c('c6') + } + + A { + a('x7') + } + B { + a('x7') + b('b7') + } + C { + b('b7') + c('c7') + } + + /* + * Sets of instances that are not joined because of missing links + */ + + A { + a('a2') + } + B { + a('a2') + b('b2') + } + C { + b('c2') + c('c2') + } + + A { + a('a4') + } + B { + a('a4') + b('b4') + } + + A { + a('a5') + } + +} \ No newline at end of file diff --git a/cst/plugins/eu.esdihumboldt.cst.test/testdata/inner_join_conditions/source-schema.groovy b/cst/plugins/eu.esdihumboldt.cst.test/testdata/inner_join_conditions/source-schema.groovy new file mode 100644 index 0000000000..f36e161f99 --- /dev/null +++ b/cst/plugins/eu.esdihumboldt.cst.test/testdata/inner_join_conditions/source-schema.groovy @@ -0,0 +1,15 @@ +schema('source') { + A { + a() + } + + B { + a() + b(cardinality: '?') + } + + C { + b() + c(cardinality: '?') + } +} \ No newline at end of file diff --git a/cst/plugins/eu.esdihumboldt.cst.test/testdata/inner_join_conditions/target-instances.groovy b/cst/plugins/eu.esdihumboldt.cst.test/testdata/inner_join_conditions/target-instances.groovy new file mode 100644 index 0000000000..bcffd2ac78 --- /dev/null +++ b/cst/plugins/eu.esdihumboldt.cst.test/testdata/inner_join_conditions/target-instances.groovy @@ -0,0 +1,15 @@ +createCollection { + + T { + a('a1') + b('b1') + c('c1') + } + + T { + a('a3') + b('b1') + c('c1') + } + +} \ No newline at end of file diff --git a/cst/plugins/eu.esdihumboldt.cst.test/testdata/inner_join_conditions/target-schema.groovy b/cst/plugins/eu.esdihumboldt.cst.test/testdata/inner_join_conditions/target-schema.groovy new file mode 100644 index 0000000000..d1e90f9ce2 --- /dev/null +++ b/cst/plugins/eu.esdihumboldt.cst.test/testdata/inner_join_conditions/target-schema.groovy @@ -0,0 +1,7 @@ +schema('target') { + T { + a() + b() + c() + } +} diff --git a/cst/plugins/eu.esdihumboldt.cst.test/testdata/inner_join_first_level/mapping.halex b/cst/plugins/eu.esdihumboldt.cst.test/testdata/inner_join_first_level/mapping.halex new file mode 100644 index 0000000000..cbab257b05 --- /dev/null +++ b/cst/plugins/eu.esdihumboldt.cst.test/testdata/inner_join_first_level/mapping.halex @@ -0,0 +1,33 @@ + + + mapping + hale + 2024-02-07T10:28:58.718+01:00 + 2024-02-07T12:55:12.584+01:00 + + UTF-8 + false + eu.esdihumboldt.hale.io.project.hale25.xml + file:/home/simon/repos/hale/cst/plugins/eu.esdihumboldt.cst.test/testdata/inner_join_first_level/mapping.halex + + + UTF-8 + ec2ec25e-5940-4b91-8ccd-9ea9455a3dbc + source-schema.groovy + eu.esdihumboldt.hale.io.schemabuilder + + + UTF-8 + 646e49d6-a2f9-462c-8bfb-e6a2a47abef1 + target-schema.groovy + eu.esdihumboldt.hale.io.schemabuilder + + + UTF-8 + 95bd5843-dfc0-43be-a901-03e7b843d44a + source-instances.groovy + eu.esdihumboldt.hale.io.instancebuilder + + + + diff --git a/cst/plugins/eu.esdihumboldt.cst.test/testdata/inner_join_first_level/mapping.halex.alignment.xml b/cst/plugins/eu.esdihumboldt.cst.test/testdata/inner_join_first_level/mapping.halex.alignment.xml new file mode 100644 index 0000000000..77627e8112 --- /dev/null +++ b/cst/plugins/eu.esdihumboldt.cst.test/testdata/inner_join_first_level/mapping.halex.alignment.xml @@ -0,0 +1,107 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/cst/plugins/eu.esdihumboldt.cst.test/testdata/inner_join_first_level/mapping.halex.styles.sld b/cst/plugins/eu.esdihumboldt.cst.test/testdata/inner_join_first_level/mapping.halex.styles.sld new file mode 100644 index 0000000000..ad34f540f6 --- /dev/null +++ b/cst/plugins/eu.esdihumboldt.cst.test/testdata/inner_join_first_level/mapping.halex.styles.sld @@ -0,0 +1,3 @@ + + Default Styler + diff --git a/cst/plugins/eu.esdihumboldt.cst.test/testdata/inner_join_first_level/source-instances.groovy b/cst/plugins/eu.esdihumboldt.cst.test/testdata/inner_join_first_level/source-instances.groovy new file mode 100644 index 0000000000..d5298c7139 --- /dev/null +++ b/cst/plugins/eu.esdihumboldt.cst.test/testdata/inner_join_first_level/source-instances.groovy @@ -0,0 +1,43 @@ +createCollection { + + /* + * Sets of instances that are joined + */ + + A { + a('a1') + } + B1 { + a('a1') + b1('b1_1') + } + B2 { + a('a1') + b2('b1_2') + } + + /* + * Sets of instances that are not joined because of missing links + */ + + A { + a('a3') + } + B1 { + a('a3') + b1('b3') + } + + A { + a('a2') + } + B2 { + a('a2') + b2('b2') + } + + A { + a('a4') + } + +} \ No newline at end of file diff --git a/cst/plugins/eu.esdihumboldt.cst.test/testdata/inner_join_first_level/source-schema.groovy b/cst/plugins/eu.esdihumboldt.cst.test/testdata/inner_join_first_level/source-schema.groovy new file mode 100644 index 0000000000..d9e50d6444 --- /dev/null +++ b/cst/plugins/eu.esdihumboldt.cst.test/testdata/inner_join_first_level/source-schema.groovy @@ -0,0 +1,15 @@ +schema('source') { + A { + a() + } + + B1 { + a() + b1() + } + + B2 { + a() + b2() + } +} \ No newline at end of file diff --git a/cst/plugins/eu.esdihumboldt.cst.test/testdata/inner_join_first_level/target-instances.groovy b/cst/plugins/eu.esdihumboldt.cst.test/testdata/inner_join_first_level/target-instances.groovy new file mode 100644 index 0000000000..461505532c --- /dev/null +++ b/cst/plugins/eu.esdihumboldt.cst.test/testdata/inner_join_first_level/target-instances.groovy @@ -0,0 +1,9 @@ +createCollection { + + T { + a('a1') + b1('b1_1') + b2('b1_2') + } + +} \ No newline at end of file diff --git a/cst/plugins/eu.esdihumboldt.cst.test/testdata/inner_join_first_level/target-schema.groovy b/cst/plugins/eu.esdihumboldt.cst.test/testdata/inner_join_first_level/target-schema.groovy new file mode 100644 index 0000000000..ce10330aa6 --- /dev/null +++ b/cst/plugins/eu.esdihumboldt.cst.test/testdata/inner_join_first_level/target-schema.groovy @@ -0,0 +1,7 @@ +schema('target') { + T { + a() + b1() + b2() + } +} diff --git a/cst/plugins/eu.esdihumboldt.cst.test/testdata/join/mapping.halex b/cst/plugins/eu.esdihumboldt.cst.test/testdata/join/mapping.halex new file mode 100644 index 0000000000..74e7dfbc99 --- /dev/null +++ b/cst/plugins/eu.esdihumboldt.cst.test/testdata/join/mapping.halex @@ -0,0 +1,33 @@ + + + mapping + hale + 2024-02-07T10:28:58.718+01:00 + 2024-02-07T10:39:07.594+01:00 + + UTF-8 + false + eu.esdihumboldt.hale.io.project.hale25.xml + file:/home/simon/repos/hale/cst/plugins/eu.esdihumboldt.cst.test/testdata/inner_join/mapping.halex + + + UTF-8 + ec2ec25e-5940-4b91-8ccd-9ea9455a3dbc + source-schema.groovy + eu.esdihumboldt.hale.io.schemabuilder + + + UTF-8 + 646e49d6-a2f9-462c-8bfb-e6a2a47abef1 + target-schema.groovy + eu.esdihumboldt.hale.io.schemabuilder + + + UTF-8 + 95bd5843-dfc0-43be-a901-03e7b843d44a + source-instances.groovy + eu.esdihumboldt.hale.io.instancebuilder + + + + diff --git a/cst/plugins/eu.esdihumboldt.cst.test/testdata/join/mapping.halex.alignment.xml b/cst/plugins/eu.esdihumboldt.cst.test/testdata/join/mapping.halex.alignment.xml new file mode 100644 index 0000000000..031d29c6b0 --- /dev/null +++ b/cst/plugins/eu.esdihumboldt.cst.test/testdata/join/mapping.halex.alignment.xml @@ -0,0 +1,106 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/cst/plugins/eu.esdihumboldt.cst.test/testdata/join/mapping.halex.styles.sld b/cst/plugins/eu.esdihumboldt.cst.test/testdata/join/mapping.halex.styles.sld new file mode 100644 index 0000000000..ad34f540f6 --- /dev/null +++ b/cst/plugins/eu.esdihumboldt.cst.test/testdata/join/mapping.halex.styles.sld @@ -0,0 +1,3 @@ + + Default Styler + diff --git a/cst/plugins/eu.esdihumboldt.cst.test/testdata/join/source-instances.groovy b/cst/plugins/eu.esdihumboldt.cst.test/testdata/join/source-instances.groovy new file mode 100644 index 0000000000..6ec91bc764 --- /dev/null +++ b/cst/plugins/eu.esdihumboldt.cst.test/testdata/join/source-instances.groovy @@ -0,0 +1,55 @@ +createCollection { + + /* + * Sets of instances that are joined + */ + + A { + a('a1') + } + B { + a('a1') + b('b1') + } + C { + b('b1') + c('c1') + } + + A { + a('a3') + } + B { + a('a3') + b('b1') + } + + /* + * Sets of instances that are not joined because of missing links + */ + + A { + a('a2') + } + B { + a('a2') + b('b2') + } + C { + b('c2') + c('c2') + } + + A { + a('a4') + } + B { + a('a4') + b('b4') + } + + A { + a('a5') + } + +} \ No newline at end of file diff --git a/cst/plugins/eu.esdihumboldt.cst.test/testdata/join/source-schema.groovy b/cst/plugins/eu.esdihumboldt.cst.test/testdata/join/source-schema.groovy new file mode 100644 index 0000000000..f36e161f99 --- /dev/null +++ b/cst/plugins/eu.esdihumboldt.cst.test/testdata/join/source-schema.groovy @@ -0,0 +1,15 @@ +schema('source') { + A { + a() + } + + B { + a() + b(cardinality: '?') + } + + C { + b() + c(cardinality: '?') + } +} \ No newline at end of file diff --git a/cst/plugins/eu.esdihumboldt.cst.test/testdata/join/target-instances.groovy b/cst/plugins/eu.esdihumboldt.cst.test/testdata/join/target-instances.groovy new file mode 100644 index 0000000000..b2fed34e5e --- /dev/null +++ b/cst/plugins/eu.esdihumboldt.cst.test/testdata/join/target-instances.groovy @@ -0,0 +1,29 @@ +createCollection { + + T { + a('a1') + b('b1') + c('c1') + } + + T { + a('a3') + b('b1') + c('c1') + } + + T { + a('a2') + b('b2') + } + + T { + a('a4') + b('b4') + } + + T { + a('a5') + } + +} \ No newline at end of file diff --git a/cst/plugins/eu.esdihumboldt.cst.test/testdata/join/target-schema.groovy b/cst/plugins/eu.esdihumboldt.cst.test/testdata/join/target-schema.groovy new file mode 100644 index 0000000000..d1e90f9ce2 --- /dev/null +++ b/cst/plugins/eu.esdihumboldt.cst.test/testdata/join/target-schema.groovy @@ -0,0 +1,7 @@ +schema('target') { + T { + a() + b() + c() + } +}