From c6e943ce30c755fcbab8c9456b72dca0429e2e54 Mon Sep 17 00:00:00 2001 From: Simon Templer Date: Wed, 7 Feb 2024 11:40:53 +0100 Subject: [PATCH] WIP inner join --- .../align/model/functions/JoinFunction.java | 6 + .../impl/FilterResourceIteratorAdapter.java | 132 ++++++++++++++++++ .../cst/functions/core/join/JoinHandler.java | 10 +- .../cst/functions/core/join/JoinIterator.java | 40 ++++-- 4 files changed, 175 insertions(+), 13 deletions(-) create mode 100644 common/plugins/eu.esdihumboldt.hale.common.instance/src/eu/esdihumboldt/hale/common/instance/model/impl/FilterResourceIteratorAdapter.java diff --git a/common/plugins/eu.esdihumboldt.hale.common.align/src/eu/esdihumboldt/hale/common/align/model/functions/JoinFunction.java b/common/plugins/eu.esdihumboldt.hale.common.align/src/eu/esdihumboldt/hale/common/align/model/functions/JoinFunction.java index bb7a994137..fc7b83f0c9 100644 --- a/common/plugins/eu.esdihumboldt.hale.common.align/src/eu/esdihumboldt/hale/common/align/model/functions/JoinFunction.java +++ b/common/plugins/eu.esdihumboldt.hale.common.align/src/eu/esdihumboldt/hale/common/align/model/functions/JoinFunction.java @@ -28,6 +28,12 @@ public interface JoinFunction { */ public static final String PARAMETER_JOIN = "join"; + /** + * Name of the parameter that specifies if an inner join should be + * performed. + */ + public static final String PARAMETER_INNER_JOIN = "innerJoin"; + /** * the join function Id */ diff --git a/common/plugins/eu.esdihumboldt.hale.common.instance/src/eu/esdihumboldt/hale/common/instance/model/impl/FilterResourceIteratorAdapter.java b/common/plugins/eu.esdihumboldt.hale.common.instance/src/eu/esdihumboldt/hale/common/instance/model/impl/FilterResourceIteratorAdapter.java new file mode 100644 index 0000000000..25c3ae1ff2 --- /dev/null +++ b/common/plugins/eu.esdihumboldt.hale.common.instance/src/eu/esdihumboldt/hale/common/instance/model/impl/FilterResourceIteratorAdapter.java @@ -0,0 +1,132 @@ +/* + * Copyright (c) 2024 wetransform GmbH + * + * All rights reserved. This program and the accompanying materials are made + * available under the terms of the GNU Lesser General Public License as + * published by the Free Software Foundation, either version 3 of the License, + * or (at your option) any later version. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this distribution. If not, see . + * + * Contributors: + * wetransform GmbH + */ + +package eu.esdihumboldt.hale.common.instance.model.impl; + +import java.util.Iterator; +import java.util.NoSuchElementException; + +import eu.esdihumboldt.hale.common.instance.model.ResourceIterator; + +/** + * {@link ResourceIterator} adapter for a normal iterator that can perform a + * conversion from the iterator elements to a target element type. It filters + * out items that are converted to a null value. + * + * @param the source object type served by the wrapped iterator + * @param the object type served by the resource iterator + * @author Simon Templer + */ +public abstract class FilterResourceIteratorAdapter implements ResourceIterator { + + /** + * The next matching instance + */ + private T preview; + + /** + * States if the value in {@link #preview} represents a valid element + */ + private boolean previewPresent; + + /** + * States if {@link #preview}/{@link #previewPresent} must be updated + */ + private boolean updatePreview = true; + + private final Iterator iterator; + + /** + * Create a {@link ResourceIterator} adapter for the given iterator. + * + * @param iterator the iterator to adapt + */ + public FilterResourceIteratorAdapter(Iterator iterator) { + super(); + this.iterator = iterator; + } + + @Override + public boolean hasNext() { + update(); // ensure previewPresent/preview are set + + return previewPresent; + } + + @Override + public T next() { + update(); // ensure previewPresent/preview are set + + if (!previewPresent) { + throw new NoSuchElementException(); + } + + updatePreview = true; // next time, update the preview + + return preview; + } + + /** + * Move {@link #preview} to the next non-null converted item if possible, + * update {@link #previewPresent}. + */ + private void update() { + if (updatePreview) { + previewPresent = false; + + // find first instance matching the filter + while (!previewPresent && iterator.hasNext()) { + S item = iterator.next(); + T converted = convert(item); + + if (converted != null) { + previewPresent = true; + preview = converted; + } + } + + if (!previewPresent) { + preview = null; + } + + updatePreview = false; + } + } + + @Override + public void remove() { + throw new UnsupportedOperationException( + "Removing instances not supported on filtered collections"); + } + + /** + * Convert an object before it is returned by {@link #next()}. + * + * @param next the object to convert + * @return the converted object or null if it should be skipped + */ + protected abstract T convert(S next); + + /** + * @see ResourceIterator#close() + */ + @Override + public void close() { + if (iterator instanceof ResourceIterator) { + ((ResourceIterator) iterator).close(); + } + } + +} diff --git a/cst/plugins/eu.esdihumboldt.cst.functions.core/src/eu/esdihumboldt/cst/functions/core/join/JoinHandler.java b/cst/plugins/eu.esdihumboldt.cst.functions.core/src/eu/esdihumboldt/cst/functions/core/join/JoinHandler.java index 079de93946..82fda96ad0 100644 --- a/cst/plugins/eu.esdihumboldt.cst.functions.core/src/eu/esdihumboldt/cst/functions/core/join/JoinHandler.java +++ b/cst/plugins/eu.esdihumboldt.cst.functions.core/src/eu/esdihumboldt/cst/functions/core/join/JoinHandler.java @@ -65,7 +65,7 @@ public ResourceIterator partitionInstances(InstanceCollection in String transformationIdentifier, TransformationEngine engine, ListMultimap transformationParameters, Map executionParameters, TransformationLog log) - throws TransformationException { + throws TransformationException { if (transformationParameters == null || !transformationParameters.containsKey(PARAMETER_JOIN) || transformationParameters.get(PARAMETER_JOIN).isEmpty()) { @@ -121,7 +121,13 @@ public ResourceIterator partitionInstances(InstanceCollection in iterator.close(); } + boolean innerJoin = false; // default to false if not specified + List innerJoinValues = transformationParameters.get(PARAMETER_INNER_JOIN); + if (!innerJoinValues.isEmpty()) { + innerJoin = innerJoinValues.get(0).as(Boolean.class, innerJoin); + } + return new JoinIterator(instances, startInstances, joinDefinition.directParent, index, - joinDefinition.joinTable, valueProcessor); + joinDefinition.joinTable, valueProcessor, innerJoin); } } \ No newline at end of file diff --git a/cst/plugins/eu.esdihumboldt.cst.functions.core/src/eu/esdihumboldt/cst/functions/core/join/JoinIterator.java b/cst/plugins/eu.esdihumboldt.cst.functions.core/src/eu/esdihumboldt/cst/functions/core/join/JoinIterator.java index 4dc903831f..be15cb9a80 100644 --- a/cst/plugins/eu.esdihumboldt.cst.functions.core/src/eu/esdihumboldt/cst/functions/core/join/JoinIterator.java +++ b/cst/plugins/eu.esdihumboldt.cst.functions.core/src/eu/esdihumboldt/cst/functions/core/join/JoinIterator.java @@ -32,14 +32,14 @@ import eu.esdihumboldt.hale.common.instance.model.InstanceCollection; import eu.esdihumboldt.hale.common.instance.model.InstanceReference; import eu.esdihumboldt.hale.common.instance.model.ResolvableInstanceReference; -import eu.esdihumboldt.hale.common.instance.model.impl.GenericResourceIteratorAdapter; +import eu.esdihumboldt.hale.common.instance.model.impl.FilterResourceIteratorAdapter; /** * Iterator used by {@link JoinHandler} * * @author Florian Esser */ -class JoinIterator extends GenericResourceIteratorAdapter { +class JoinIterator extends FilterResourceIteratorAdapter { private final InstanceCollection instances; // type -> direct-parent @@ -51,35 +51,43 @@ class JoinIterator extends GenericResourceIteratorAdapter startInstances, int[] parent, Map> index, - Map> joinTable, - ValueProcessor valueProcessor) { + Map> joinTable, ValueProcessor valueProcessor, + boolean innerJoin) { super(startInstances.iterator()); this.instances = instances; this.parent = parent; this.index = index; this.joinTable = joinTable; this.valueProcessor = valueProcessor; + this.innerJoin = innerJoin; + } - /** - * @see eu.esdihumboldt.hale.common.instance.model.impl.GenericResourceIteratorAdapter#convert(java.lang.Object) - */ @Override protected FamilyInstance convert(InstanceReference next) { FamilyInstance base = new FamilyInstanceImpl(instances.getInstance(next)); FamilyInstance[] currentInstances = new FamilyInstance[parent.length]; currentInstances[0] = base; - join(currentInstances, 0); + if (!join(currentInstances, 0)) { + // skip this instance + return null; + } return base; } -// Joins all direct children of the given type to currentInstances. - private void join(FamilyInstance[] currentInstances, int currentType) { + /** + * Joins all direct children of the given type to currentInstances. + * + * @return if the instance should be skipped + */ + private boolean join(FamilyInstance[] currentInstances, int currentType) { // Join all types that are direct children of the last type. for (int i = currentType + 1; i < parent.length; i++) { if (parent[i] == currentType) { @@ -140,12 +148,22 @@ private void join(FamilyInstance[] currentInstances, int currentType) { } parent.addChild(child); currentInstances[i] = child; - join(currentInstances, i); + if (!join(currentInstances, i)) { + return false; + } } currentInstances[i] = null; } + else { + if (innerJoin) { + // no instances for this link + return false; + } + } } } + + return true; } /**