diff --git a/.github/workflows/create_backend_branch_image.yml b/.github/workflows/create_backend_branch_image.yml index d77318b4b8..1744bf614c 100644 --- a/.github/workflows/create_backend_branch_image.yml +++ b/.github/workflows/create_backend_branch_image.yml @@ -12,7 +12,7 @@ on: - master - release - staging/** - + env: REGISTRY: ghcr.io IMAGE_NAME: ${{ github.repository }}-backend @@ -48,6 +48,7 @@ jobs: uses: docker/build-push-action@ad44023a93711e3deb337508980b4b5e9bcdc5dc with: context: . + file: backend.Dockerfile push: true tags: ${{ steps.meta.outputs.tags }} labels: ${{ steps.meta.outputs.labels }} diff --git a/.github/workflows/create_backend_image.yml b/.github/workflows/create_backend_image.yml index c295e272f0..86e6ffb051 100644 --- a/.github/workflows/create_backend_image.yml +++ b/.github/workflows/create_backend_image.yml @@ -47,6 +47,7 @@ jobs: uses: docker/build-push-action@ad44023a93711e3deb337508980b4b5e9bcdc5dc with: context: . + file: backend.Dockerfile push: true tags: ${{ steps.meta.outputs.tags }} labels: ${{ steps.meta.outputs.labels }} diff --git a/.github/workflows/create_frontend_branch_image.yml b/.github/workflows/create_frontend_branch_image.yml index 648fcbaa73..1a70e74c31 100644 --- a/.github/workflows/create_frontend_branch_image.yml +++ b/.github/workflows/create_frontend_branch_image.yml @@ -12,7 +12,7 @@ on: - master - release - staging/* - + env: REGISTRY: ghcr.io IMAGE_NAME: ${{ github.repository }}-frontend @@ -47,7 +47,8 @@ jobs: - name: Build and push Docker image uses: docker/build-push-action@ad44023a93711e3deb337508980b4b5e9bcdc5dc with: - context: ./frontend + context: . + file: frontend.Dockerfile push: true tags: ${{ steps.meta.outputs.tags }} labels: ${{ steps.meta.outputs.labels }} diff --git a/.github/workflows/create_frontend_image.yml b/.github/workflows/create_frontend_image.yml index 7bd5a04435..4ae5ba5102 100644 --- a/.github/workflows/create_frontend_image.yml +++ b/.github/workflows/create_frontend_image.yml @@ -44,7 +44,8 @@ jobs: - name: Build and push Docker image uses: docker/build-push-action@ad44023a93711e3deb337508980b4b5e9bcdc5dc with: - context: ./frontend + context: . + file: frontend.Dockerfile push: true tags: ${{ steps.meta.outputs.tags }} labels: ${{ steps.meta.outputs.labels }} diff --git a/.github/workflows/test_build_backend_image.yml b/.github/workflows/test_build_backend_image.yml index d7991ae1e5..88fa597fb8 100644 --- a/.github/workflows/test_build_backend_image.yml +++ b/.github/workflows/test_build_backend_image.yml @@ -12,13 +12,13 @@ on: - develop - master paths: - - 'backend/**' - - 'executable/**' - - 'Dockerfile' - - 'pom.xml' - - 'lombok.config' - - 'scripts/**' - - '.github/workflows/test_build_backend_image.yml' + - "backend/**" + - "executable/**" + - "Dockerfile" + - "pom.xml" + - "lombok.config" + - "scripts/**" + - ".github/workflows/test_build_backend_image.yml" env: REGISTRY: ghcr.io @@ -42,4 +42,5 @@ jobs: uses: docker/build-push-action@ad44023a93711e3deb337508980b4b5e9bcdc5dc with: context: . + file: backend.Dockerfile push: false diff --git a/.github/workflows/test_build_frontend_image.yml b/.github/workflows/test_build_frontend_image.yml index 53f901f2f5..7badc1589e 100644 --- a/.github/workflows/test_build_frontend_image.yml +++ b/.github/workflows/test_build_frontend_image.yml @@ -12,9 +12,9 @@ on: - develop - master paths: - - 'frontend/*' - - 'scripts/*' - - '.github/workflows/test_build_frontend_image.yml' + - "frontend/*" + - "scripts/*" + - ".github/workflows/test_build_frontend_image.yml" env: REGISTRY: ghcr.io IMAGE_NAME: ${{ github.repository }}-frontend @@ -36,5 +36,6 @@ jobs: - name: Build docker image uses: docker/build-push-action@ad44023a93711e3deb337508980b4b5e9bcdc5dc with: - context: ./frontend + context: . + file: frontend.Dockerfile push: false diff --git a/Dockerfile b/backend.Dockerfile similarity index 100% rename from Dockerfile rename to backend.Dockerfile diff --git a/backend/src/main/java/com/bakdata/conquery/Conquery.java b/backend/src/main/java/com/bakdata/conquery/Conquery.java index 0e28353dfe..d41e183163 100644 --- a/backend/src/main/java/com/bakdata/conquery/Conquery.java +++ b/backend/src/main/java/com/bakdata/conquery/Conquery.java @@ -3,7 +3,6 @@ import javax.validation.Validator; import ch.qos.logback.classic.Level; -import com.bakdata.conquery.commands.CollectEntitiesCommand; import com.bakdata.conquery.commands.DistributedStandaloneCommand; import com.bakdata.conquery.commands.ManagerNode; import com.bakdata.conquery.commands.MigrateCommand; @@ -54,7 +53,6 @@ public void initialize(Bootstrap bootstrap) { bootstrap.addCommand(new ShardNode()); bootstrap.addCommand(new PreprocessorCommand()); - bootstrap.addCommand(new CollectEntitiesCommand()); bootstrap.addCommand(new DistributedStandaloneCommand(this)); bootstrap.addCommand(new RecodeStoreCommand()); bootstrap.addCommand(new MigrateCommand()); diff --git a/backend/src/main/java/com/bakdata/conquery/ConqueryConstants.java b/backend/src/main/java/com/bakdata/conquery/ConqueryConstants.java index de7e4077b9..4c21a4cc77 100644 --- a/backend/src/main/java/com/bakdata/conquery/ConqueryConstants.java +++ b/backend/src/main/java/com/bakdata/conquery/ConqueryConstants.java @@ -45,7 +45,6 @@ public class ConqueryConstants { OBSERVATION_SCOPE_INFO = new LocalizedDefaultResultInfo((l) -> C10N.get(ResultHeadersC10n.class, l).observationScope(), new ResultType.StringT(FeatureGroup::localizeValue), Set.of()); - public static final String PRIMARY_DICTIONARY = "PRIMARY_DICTIONARY"; /** * Drawn from random.org */ diff --git a/backend/src/main/java/com/bakdata/conquery/apiv1/FilterTemplate.java b/backend/src/main/java/com/bakdata/conquery/apiv1/FilterTemplate.java index 03cda498ba..955b7cb42c 100644 --- a/backend/src/main/java/com/bakdata/conquery/apiv1/FilterTemplate.java +++ b/backend/src/main/java/com/bakdata/conquery/apiv1/FilterTemplate.java @@ -8,7 +8,6 @@ import com.bakdata.conquery.apiv1.frontend.FrontendValue; import com.bakdata.conquery.io.cps.CPSType; import com.bakdata.conquery.io.jackson.serializer.NsIdRef; -import com.bakdata.conquery.io.storage.NamespaceStorage; import com.bakdata.conquery.models.config.IndexConfig; import com.bakdata.conquery.models.datasets.Dataset; import com.bakdata.conquery.models.datasets.concepts.Searchable; @@ -40,7 +39,7 @@ @ToString @Slf4j @CPSType(id = "CSV_TEMPLATE", base = SearchIndex.class) -public class FilterTemplate extends IdentifiableImpl implements Searchable, SearchIndex { +public class FilterTemplate extends IdentifiableImpl implements Searchable, SearchIndex { private static final long serialVersionUID = 1L; @@ -90,7 +89,7 @@ public boolean isSearchDisabled() { return false; } - public TrieSearch createTrieSearch(IndexConfig config, NamespaceStorage storage) { + public TrieSearch createTrieSearch(IndexConfig config) { final URI resolvedURI = FileUtil.getResolvedUri(config.getBaseUrl(), getFilePath()); log.trace("Resolved filter template reference url for search '{}': {}", this.getId(), resolvedURI); diff --git a/backend/src/main/java/com/bakdata/conquery/apiv1/LabelMap.java b/backend/src/main/java/com/bakdata/conquery/apiv1/LabelMap.java new file mode 100644 index 0000000000..51663d3dfd --- /dev/null +++ b/backend/src/main/java/com/bakdata/conquery/apiv1/LabelMap.java @@ -0,0 +1,71 @@ +package com.bakdata.conquery.apiv1; + +import java.util.List; +import java.util.stream.Collectors; + +import com.bakdata.conquery.apiv1.frontend.FrontendValue; +import com.bakdata.conquery.models.config.IndexConfig; +import com.bakdata.conquery.models.datasets.concepts.Searchable; +import com.bakdata.conquery.models.identifiable.ids.specific.FilterId; +import com.bakdata.conquery.models.query.FilterSearch; +import com.bakdata.conquery.util.search.TrieSearch; +import com.google.common.collect.BiMap; +import lombok.EqualsAndHashCode; +import lombok.Getter; +import lombok.RequiredArgsConstructor; +import lombok.experimental.Delegate; +import lombok.extern.slf4j.Slf4j; +import org.apache.commons.lang3.time.StopWatch; + +@Getter +@RequiredArgsConstructor +@Slf4j +@EqualsAndHashCode +public class LabelMap implements Searchable { + + private final FilterId id; + @Delegate + private final BiMap delegate; + private final int minSuffixLength; + private final boolean generateSearchSuffixes; + + @Override + public TrieSearch createTrieSearch(IndexConfig config) { + + final TrieSearch search = config.createTrieSearch(true); + + final List collected = delegate.entrySet().stream() + .map(entry -> new FrontendValue(entry.getKey(), entry.getValue())) + .collect(Collectors.toList()); + + if (log.isTraceEnabled()) { + log.trace("Labels for {}: `{}`", getId(), collected.stream().map(FrontendValue::toString).collect(Collectors.toList())); + } + + StopWatch timer = StopWatch.createStarted(); + log.trace("START-SELECT ADDING_ITEMS for {}", getId()); + + collected.forEach(feValue -> search.addItem(feValue, FilterSearch.extractKeywords(feValue))); + + log.trace("DONE-SELECT ADDING_ITEMS for {} in {}", getId(), timer); + + timer.reset(); + log.trace("START-SELECT SHRINKING for {}", getId()); + + search.shrinkToFit(); + + log.trace("DONE-SELECT SHRINKING for {} in {}", getId(), timer); + + return search; + } + + @Override + public boolean isGenerateSuffixes() { + return generateSearchSuffixes; + } + + @Override + public boolean isSearchDisabled() { + return false; + } +} diff --git a/backend/src/main/java/com/bakdata/conquery/apiv1/QueryProcessor.java b/backend/src/main/java/com/bakdata/conquery/apiv1/QueryProcessor.java index e296826a31..77d97a265f 100644 --- a/backend/src/main/java/com/bakdata/conquery/apiv1/QueryProcessor.java +++ b/backend/src/main/java/com/bakdata/conquery/apiv1/QueryProcessor.java @@ -555,15 +555,14 @@ public Stream> resolveEntities(Subject subject, List resultInfos = query.getResultInfos(); + public ResultStatistics getResultStatistics(SingleTableResult managedQuery) { + final List resultInfos = managedQuery.getResultInfos(); final Optional dateInfo = - query.getResultInfos().stream().filter(info -> info.getSemantics().contains(new SemanticType.EventDateT())).findFirst(); + resultInfos.stream().filter(info -> info.getSemantics().contains(new SemanticType.EventDateT())).findFirst(); - final int dateIndex = dateInfo.map(resultInfos::indexOf).orElse(0 /*Discarded if dateInfo is not present*/); + final Optional dateIndex = dateInfo.map(resultInfos::indexOf); final Locale locale = I18n.LOCALE.get(); final NumberFormat decimalFormat = NumberFormat.getNumberInstance(locale); @@ -576,7 +575,6 @@ public ResultStatistics getResultStatistics(ManagedQuery managedQuery) { new PrintSettings(true, locale, managedQuery.getNamespace(), config, null, null, decimalFormat, integerFormat); final UniqueNamer uniqueNamer = new UniqueNamer(printSettings); - return ResultStatistics.collectResultStatistics(managedQuery, resultInfos, dateInfo, dateIndex, printSettings, uniqueNamer, config); } diff --git a/backend/src/main/java/com/bakdata/conquery/apiv1/auth/ProtoRole.java b/backend/src/main/java/com/bakdata/conquery/apiv1/auth/ProtoRole.java new file mode 100644 index 0000000000..cdd8a15a77 --- /dev/null +++ b/backend/src/main/java/com/bakdata/conquery/apiv1/auth/ProtoRole.java @@ -0,0 +1,60 @@ +package com.bakdata.conquery.apiv1.auth; + +import java.util.Collections; +import java.util.Objects; +import java.util.Set; + +import javax.validation.constraints.NotEmpty; +import javax.validation.constraints.NotNull; + +import com.bakdata.conquery.io.storage.MetaStorage; +import com.bakdata.conquery.models.auth.entities.Role; +import com.bakdata.conquery.models.auth.permissions.WildcardPermission; +import com.bakdata.conquery.models.identifiable.ids.specific.RoleId; +import com.fasterxml.jackson.annotation.JsonIgnore; +import lombok.Builder; +import lombok.Getter; +import lombok.NonNull; + +/** + * Factory class to create configured initial roles. + + */ +@Getter +@Builder +public class ProtoRole { + + private String label; + + @NotEmpty + private final String name; + + /** + * String permissions in the form of + * {@link org.apache.shiro.authz.permission.WildcardPermission}, that the user + * should hold after initialization. + */ + @Builder.Default + @NotNull + private Set permissions = Collections.emptySet(); + + public Role createOrOverwriteRole(@NonNull MetaStorage storage) { + label = Objects.requireNonNullElse(label, name); + + + Role role = new Role(name, label, storage); + + storage.updateRole(role); + + for (String permission : permissions) { + role.addPermission(new WildcardPermission(permission)); + } + + return role; + } + + @JsonIgnore + public RoleId createId() { + return new RoleId(name); + } +} diff --git a/backend/src/main/java/com/bakdata/conquery/apiv1/auth/ProtoUser.java b/backend/src/main/java/com/bakdata/conquery/apiv1/auth/ProtoUser.java index 5803a98b58..dd3fcba698 100644 --- a/backend/src/main/java/com/bakdata/conquery/apiv1/auth/ProtoUser.java +++ b/backend/src/main/java/com/bakdata/conquery/apiv1/auth/ProtoUser.java @@ -10,26 +10,29 @@ import com.bakdata.conquery.io.storage.MetaStorage; import com.bakdata.conquery.models.auth.UserManageable; import com.bakdata.conquery.models.auth.basic.LocalAuthenticationRealm; +import com.bakdata.conquery.models.auth.entities.Role; import com.bakdata.conquery.models.auth.entities.User; import com.bakdata.conquery.models.auth.permissions.WildcardPermission; +import com.bakdata.conquery.models.identifiable.ids.specific.RoleId; import com.bakdata.conquery.models.identifiable.ids.specific.UserId; import com.fasterxml.jackson.annotation.JsonIgnore; import lombok.Builder; import lombok.Getter; import lombok.NonNull; +import lombok.extern.slf4j.Slf4j; /** * Factory class to create configured initial users. - */ @Getter @Builder +@Slf4j public class ProtoUser { - private String label; @NotEmpty private final String name; - + private final Set roles; + private String label; /** * String permissions in the form of * {@link org.apache.shiro.authz.permission.WildcardPermission}, that the user @@ -51,11 +54,29 @@ public User createOrOverwriteUser(@NonNull MetaStorage storage) { if (label == null) { label = name; } - User user = new User(name, label, storage); + + final User user = new User(name, label, storage); storage.updateUser(user); + + if (roles != null){ + for (String roleId : roles) { + final Role role = storage.getRole(new RoleId(roleId)); + + if(role == null){ + log.warn("Unknown Role[{}] for {}", roleId, this); + continue; + } + + user.addRole(role); + } + } + + for (String sPermission : permissions) { user.addPermission(new WildcardPermission(sPermission)); } + + return user; } diff --git a/backend/src/main/java/com/bakdata/conquery/apiv1/forms/export_form/FullExportForm.java b/backend/src/main/java/com/bakdata/conquery/apiv1/forms/export_form/FullExportForm.java index f457c1a04f..aa392f2710 100644 --- a/backend/src/main/java/com/bakdata/conquery/apiv1/forms/export_form/FullExportForm.java +++ b/backend/src/main/java/com/bakdata/conquery/apiv1/forms/export_form/FullExportForm.java @@ -127,6 +127,6 @@ public String getLocalizedTypeLabel() { @Override public ManagedInternalForm toManagedExecution(User user, Dataset submittedDataset, MetaStorage storage) { - return new ManagedInternalForm(this, user, submittedDataset, storage); + return new ManagedInternalForm<>(this, user, submittedDataset, storage); } } diff --git a/backend/src/main/java/com/bakdata/conquery/apiv1/frontend/FrontendConfiguration.java b/backend/src/main/java/com/bakdata/conquery/apiv1/frontend/FrontendConfiguration.java index 57fcfa27ce..9afa35d5be 100644 --- a/backend/src/main/java/com/bakdata/conquery/apiv1/frontend/FrontendConfiguration.java +++ b/backend/src/main/java/com/bakdata/conquery/apiv1/frontend/FrontendConfiguration.java @@ -2,6 +2,7 @@ import java.net.URL; import java.time.LocalDate; +import java.util.Map; import com.bakdata.conquery.models.config.FrontendConfig; import com.bakdata.conquery.models.config.IdColumnConfig; @@ -9,14 +10,17 @@ /** * API Response for the dynamic configuration of the frontend * - * @param version backend version - * @param currency currency representation - * @param queryUpload identifier specific column configuration for the query upload - * @param manualUrl url to a user manual - * @param contactEmail typical a mailto-url + * @param version backend version + * @param formBackendVersions mapping of form backend ids to their versions (version can be null) + * @param currency currency representation + * @param queryUpload identifier specific column configuration for the query upload + * @param manualUrl url to a user manual + * @param contactEmail typical a mailto-url */ public record FrontendConfiguration( String version, + + Map formBackendVersions, FrontendConfig.CurrencyConfig currency, IdColumnConfig queryUpload, URL manualUrl, diff --git a/backend/src/main/java/com/bakdata/conquery/apiv1/query/CQElement.java b/backend/src/main/java/com/bakdata/conquery/apiv1/query/CQElement.java index 8f73997881..ff9ad66218 100644 --- a/backend/src/main/java/com/bakdata/conquery/apiv1/query/CQElement.java +++ b/backend/src/main/java/com/bakdata/conquery/apiv1/query/CQElement.java @@ -86,6 +86,6 @@ public void visit(Consumer visitor) { } public RequiredEntities collectRequiredEntities(QueryExecutionContext context) { - return new RequiredEntities(context.getBucketManager().getEntities().keySet()); + return new RequiredEntities(context.getBucketManager().getEntities()); } } diff --git a/backend/src/main/java/com/bakdata/conquery/apiv1/query/CQYes.java b/backend/src/main/java/com/bakdata/conquery/apiv1/query/CQYes.java index 3511a78155..8bdc6a00d6 100644 --- a/backend/src/main/java/com/bakdata/conquery/apiv1/query/CQYes.java +++ b/backend/src/main/java/com/bakdata/conquery/apiv1/query/CQYes.java @@ -6,10 +6,8 @@ import com.bakdata.conquery.io.cps.CPSType; import com.bakdata.conquery.models.identifiable.ids.specific.ManagedExecutionId; -import com.bakdata.conquery.models.query.QueryExecutionContext; import com.bakdata.conquery.models.query.QueryPlanContext; import com.bakdata.conquery.models.query.QueryResolveContext; -import com.bakdata.conquery.models.query.RequiredEntities; import com.bakdata.conquery.models.query.queryplan.ConceptQueryPlan; import com.bakdata.conquery.models.query.queryplan.QPNode; import com.bakdata.conquery.models.query.queryplan.specific.Yes; @@ -37,9 +35,4 @@ public void collectRequiredQueries(Set requiredQueries) { public List getResultInfos() { return Collections.emptyList(); } - - @Override - public RequiredEntities collectRequiredEntities(QueryExecutionContext context) { - return new RequiredEntities(context.getBucketManager().getEntities().keySet()); - } } diff --git a/backend/src/main/java/com/bakdata/conquery/apiv1/query/EditorQuery.java b/backend/src/main/java/com/bakdata/conquery/apiv1/query/EditorQuery.java index 84e2a46bc7..21c2096e1f 100644 --- a/backend/src/main/java/com/bakdata/conquery/apiv1/query/EditorQuery.java +++ b/backend/src/main/java/com/bakdata/conquery/apiv1/query/EditorQuery.java @@ -3,10 +3,9 @@ import com.bakdata.conquery.apiv1.execution.ExecutionStatus; import com.bakdata.conquery.io.cps.CPSType; import com.bakdata.conquery.models.query.ManagedQuery; -import com.bakdata.conquery.sql.conquery.SqlManagedQuery; /** - * Common abstraction for intersecting parts of {@link ManagedQuery} and {@link SqlManagedQuery}. + * Common abstraction for intersecting parts of {@link ManagedQuery}. */ public interface EditorQuery { diff --git a/backend/src/main/java/com/bakdata/conquery/apiv1/query/QueryDescription.java b/backend/src/main/java/com/bakdata/conquery/apiv1/query/QueryDescription.java index 6f551e15c8..091d78a5e3 100644 --- a/backend/src/main/java/com/bakdata/conquery/apiv1/query/QueryDescription.java +++ b/backend/src/main/java/com/bakdata/conquery/apiv1/query/QueryDescription.java @@ -112,6 +112,6 @@ public static void authorizeQuery(QueryDescription queryDescription, Subject sub } default RequiredEntities collectRequiredEntities(QueryExecutionContext context){ - return new RequiredEntities(context.getBucketManager().getEntities().keySet()); + return new RequiredEntities(context.getBucketManager().getEntities()); } } diff --git a/backend/src/main/java/com/bakdata/conquery/apiv1/query/SecondaryIdQuery.java b/backend/src/main/java/com/bakdata/conquery/apiv1/query/SecondaryIdQuery.java index 8a9138bf05..4c413a3678 100644 --- a/backend/src/main/java/com/bakdata/conquery/apiv1/query/SecondaryIdQuery.java +++ b/backend/src/main/java/com/bakdata/conquery/apiv1/query/SecondaryIdQuery.java @@ -6,7 +6,6 @@ import java.util.Set; import java.util.function.Consumer; -import javax.annotation.CheckForNull; import javax.validation.constraints.NotNull; import com.bakdata.conquery.apiv1.query.concept.filter.CQTable; @@ -110,7 +109,7 @@ public void resolve(final QueryResolveContext context) { for (CQTable connector : concept.getTables()) { final Table table = connector.getConnector().getTable(); - final Column secondaryIdColumn = findSecondaryIdColumn(table); + final Column secondaryIdColumn = table.findSecondaryIdColumn(secondaryId); if (secondaryIdColumn != null && !concept.isExcludeFromSecondaryId()) { withSecondaryId.add(secondaryIdColumn); @@ -127,23 +126,6 @@ public void resolve(final QueryResolveContext context) { } } - /** - * selects the right column for the given secondaryId from a table - */ - @CheckForNull - private Column findSecondaryIdColumn(Table table) { - - for (Column col : table.getColumns()) { - if (col.getSecondaryId() == null || !secondaryId.equals(col.getSecondaryId())) { - continue; - } - - return col; - } - - return null; - } - @Override public List getResultInfos() { final List resultInfos = new ArrayList<>(); @@ -170,4 +152,4 @@ public CQElement getReusableComponents() { public RequiredEntities collectRequiredEntities(QueryExecutionContext context) { return query.collectRequiredEntities(context); } -} \ No newline at end of file +} diff --git a/backend/src/main/java/com/bakdata/conquery/apiv1/query/concept/filter/CQTable.java b/backend/src/main/java/com/bakdata/conquery/apiv1/query/concept/filter/CQTable.java index b858c3741d..b165cfbd59 100644 --- a/backend/src/main/java/com/bakdata/conquery/apiv1/query/concept/filter/CQTable.java +++ b/backend/src/main/java/com/bakdata/conquery/apiv1/query/concept/filter/CQTable.java @@ -1,7 +1,9 @@ package com.bakdata.conquery.apiv1.query.concept.filter; +import java.util.Arrays; import java.util.Collections; import java.util.List; +import java.util.Objects; import javax.annotation.CheckForNull; import javax.validation.Valid; @@ -10,6 +12,8 @@ import com.bakdata.conquery.apiv1.query.concept.specific.CQConcept; import com.bakdata.conquery.io.jackson.serializer.NsIdRef; import com.bakdata.conquery.io.jackson.serializer.NsIdRefCollection; +import com.bakdata.conquery.models.datasets.Column; +import com.bakdata.conquery.models.datasets.SecondaryIdDescription; import com.bakdata.conquery.models.datasets.concepts.Connector; import com.bakdata.conquery.models.datasets.concepts.ValidityDate; import com.bakdata.conquery.models.datasets.concepts.select.Select; @@ -88,4 +92,11 @@ public ValidityDate findValidityDate() { return null; } + public boolean hasSelectedSecondaryId(SecondaryIdDescription secondaryId) { + return Arrays.stream(connector.getTable().getColumns()) + .map(Column::getSecondaryId) + .filter(Objects::nonNull) + .anyMatch(o -> Objects.equals(secondaryId, o)); + } + } diff --git a/backend/src/main/java/com/bakdata/conquery/apiv1/query/concept/filter/FilterValue.java b/backend/src/main/java/com/bakdata/conquery/apiv1/query/concept/filter/FilterValue.java index 0cd55977d1..e3b11b0868 100644 --- a/backend/src/main/java/com/bakdata/conquery/apiv1/query/concept/filter/FilterValue.java +++ b/backend/src/main/java/com/bakdata/conquery/apiv1/query/concept/filter/FilterValue.java @@ -21,9 +21,9 @@ import com.bakdata.conquery.models.query.queryplan.filter.FilterNode; import com.bakdata.conquery.sql.conversion.cqelement.ConversionContext; import com.bakdata.conquery.sql.conversion.cqelement.concept.FilterContext; +import com.bakdata.conquery.sql.conversion.model.SqlIdColumns; import com.bakdata.conquery.sql.conversion.model.SqlTables; import com.bakdata.conquery.sql.conversion.model.filter.SqlFilters; -import com.fasterxml.jackson.annotation.JsonIgnore; import com.fasterxml.jackson.annotation.JsonTypeInfo; import com.fasterxml.jackson.core.JsonParser; import com.fasterxml.jackson.core.JsonProcessingException; @@ -32,7 +32,6 @@ import com.fasterxml.jackson.databind.annotation.JsonDeserialize; import com.fasterxml.jackson.databind.deser.std.StdDeserializer; import com.fasterxml.jackson.databind.exc.InvalidTypeIdException; -import io.dropwizard.validation.ValidationMethod; import lombok.EqualsAndHashCode; import lombok.Getter; import lombok.NoArgsConstructor; @@ -50,10 +49,7 @@ @EqualsAndHashCode @ToString(of = "value") public abstract class FilterValue { - /** - * Very large SELECT FilterValues can cause issues, so we just limit it to large but not gigantic quantities. - */ - private static final int MAX_NUMBER_FILTER_VALUES = 20_000; + @NotNull @Nonnull @NsIdRef @@ -70,8 +66,8 @@ public FilterNode createNode() { return getFilter().createFilterNode(getValue()); } - public SqlFilters convertToSqlFilter(ConversionContext context, SqlTables connectorTables) { - FilterContext filterContext = new FilterContext<>(value, context, connectorTables); + public SqlFilters convertToSqlFilter(SqlIdColumns ids, ConversionContext context, SqlTables connectorTables) { + FilterContext filterContext = new FilterContext<>(ids, value, context, connectorTables); SqlFilters sqlFilters = filter.convertToSqlFilter(filterContext); if (context.isNegation()) { return new SqlFilters(sqlFilters.getSelects(), sqlFilters.getWhereClauses().negated()); @@ -87,11 +83,6 @@ public CQMultiSelectFilter(@NsIdRef Filter filter, String[] value) { super(filter, value); } - @ValidationMethod(message = "Too many values selected.") - @JsonIgnore - public boolean isSaneAmountOfFilterValues() { - return getValue().length < MAX_NUMBER_FILTER_VALUES; - } } @NoArgsConstructor @@ -102,11 +93,6 @@ public CQBigMultiSelectFilter(@NsIdRef Filter filter, String[] value) super(filter, value); } - @ValidationMethod(message = "Too many values selected.") - @JsonIgnore - public boolean isSaneAmountOfFilterValues() { - return getValue().length < MAX_NUMBER_FILTER_VALUES; - } } @NoArgsConstructor diff --git a/backend/src/main/java/com/bakdata/conquery/apiv1/query/concept/specific/CQConcept.java b/backend/src/main/java/com/bakdata/conquery/apiv1/query/concept/specific/CQConcept.java index bed4ace016..e2f988f019 100644 --- a/backend/src/main/java/com/bakdata/conquery/apiv1/query/concept/specific/CQConcept.java +++ b/backend/src/main/java/com/bakdata/conquery/apiv1/query/concept/specific/CQConcept.java @@ -1,11 +1,9 @@ package com.bakdata.conquery.apiv1.query.concept.specific; import java.util.ArrayList; -import java.util.Arrays; import java.util.Collections; import java.util.List; import java.util.Locale; -import java.util.Objects; import java.util.Set; import java.util.stream.Collectors; @@ -21,7 +19,6 @@ import com.bakdata.conquery.io.jackson.View; import com.bakdata.conquery.io.jackson.serializer.NsIdRefCollection; import com.bakdata.conquery.models.common.CDateSet; -import com.bakdata.conquery.models.datasets.Column; import com.bakdata.conquery.models.datasets.concepts.Concept; import com.bakdata.conquery.models.datasets.concepts.ConceptElement; import com.bakdata.conquery.models.datasets.concepts.Connector; @@ -234,12 +231,7 @@ public QPNode createQueryPlan(QueryPlanContext context, ConceptQueryPlan plan) { existsAggregators.forEach(agg -> agg.setReference(conceptSpecificNode)); // Select if matching secondaryId available - final boolean hasSelectedSecondaryId = - Arrays.stream(table.getConnector().getTable().getColumns()) - .map(Column::getSecondaryId) - .filter(Objects::nonNull) - .anyMatch(o -> Objects.equals(context.getSelectedSecondaryId(), o)); - + final boolean hasSelectedSecondaryId = table.hasSelectedSecondaryId(context.getSelectedSecondaryId()); final ConceptNode node = new ConceptNode( conceptSpecificNode, diff --git a/backend/src/main/java/com/bakdata/conquery/apiv1/query/concept/specific/CQNegation.java b/backend/src/main/java/com/bakdata/conquery/apiv1/query/concept/specific/CQNegation.java index 381cd44442..1957f5c29f 100644 --- a/backend/src/main/java/com/bakdata/conquery/apiv1/query/concept/specific/CQNegation.java +++ b/backend/src/main/java/com/bakdata/conquery/apiv1/query/concept/specific/CQNegation.java @@ -11,10 +11,8 @@ import com.bakdata.conquery.io.cps.CPSType; import com.bakdata.conquery.io.jackson.View; import com.bakdata.conquery.models.identifiable.ids.specific.ManagedExecutionId; -import com.bakdata.conquery.models.query.QueryExecutionContext; import com.bakdata.conquery.models.query.QueryPlanContext; import com.bakdata.conquery.models.query.QueryResolveContext; -import com.bakdata.conquery.models.query.RequiredEntities; import com.bakdata.conquery.models.query.Visitable; import com.bakdata.conquery.models.query.queryplan.ConceptQueryPlan; import com.bakdata.conquery.models.query.queryplan.DateAggregationAction; @@ -78,15 +76,4 @@ public void visit(Consumer visitor) { super.visit(visitor); child.visit(visitor); } - - @Override - public RequiredEntities collectRequiredEntities(QueryExecutionContext context) { - /* - * We cannot realistically handle negation as that would require translating most if not all query logic into collectRequiredTables. Specifically CQConcept/Filters. - * - * Additionally, it would require collectRequiredEntities to be perfect, instead of being good enough, - * since excluding entities that _might_ not be included would exclude them from evaluation. - */ - return new RequiredEntities(context.getBucketManager().getEntities().keySet()); - } } diff --git a/backend/src/main/java/com/bakdata/conquery/apiv1/query/concept/specific/external/CQExternal.java b/backend/src/main/java/com/bakdata/conquery/apiv1/query/concept/specific/external/CQExternal.java index 438b5d17e2..96cda4b7a1 100644 --- a/backend/src/main/java/com/bakdata/conquery/apiv1/query/concept/specific/external/CQExternal.java +++ b/backend/src/main/java/com/bakdata/conquery/apiv1/query/concept/specific/external/CQExternal.java @@ -42,7 +42,6 @@ import com.fasterxml.jackson.annotation.JsonView; import com.google.common.collect.Streams; import io.dropwizard.validation.ValidationMethod; -import it.unimi.dsi.fastutil.ints.Int2ObjectOpenHashMap; import lombok.AccessLevel; import lombok.Data; import lombok.Getter; @@ -84,7 +83,7 @@ public class CQExternal extends CQElement { */ @Getter(AccessLevel.PRIVATE) @JsonView(View.InternalCommunication.class) - private Map valuesResolved; + private Map valuesResolved; @Getter(AccessLevel.PRIVATE) @JsonView(View.InternalCommunication.class) @@ -99,7 +98,7 @@ public class CQExternal extends CQElement { */ @Getter(AccessLevel.PRIVATE) @JsonView(View.InternalCommunication.class) - private Map>> extra; + private Map>> extra; public CQExternal(List format, @NotEmpty String[][] values, boolean onlySingles) { this.format = format; @@ -107,6 +106,10 @@ public CQExternal(List format, @NotEmpty String[][] values, boolean only this.onlySingles = onlySingles; } + public boolean containsDates() { + return format.stream().anyMatch(DateFormat.NAMES::contains); + } + @Override public QPNode createQueryPlan(QueryPlanContext context, ConceptQueryPlan plan) { if (valuesResolved == null) { @@ -135,7 +138,7 @@ public void collectRequiredQueries(Set requiredQueries) { private ExternalNode createExternalNodeOnlySingle(QueryPlanContext context, ConceptQueryPlan plan, String[] extraHeaders) { // Remove zero element Lists and substitute one element Lists by containing String - final Map> extraFlat = extra.entrySet().stream() + final Map> extraFlat = extra.entrySet().stream() .collect(Collectors.toMap( Map.Entry::getKey, entityToRowMap -> entityToRowMap.getValue().entrySet().stream() @@ -186,10 +189,10 @@ private ExternalNode> createExternalNodeForList(QueryPlanContext co private static CDateSet[] readDates(String[][] values, List format, DateReader dateReader) { final CDateSet[] out = new CDateSet[values.length]; - List dateFormats = format.stream() - .map(CQExternal::resolveDateFormat) - // Don't use Stream#toList to preserve null-values - .collect(Collectors.toList()); + final List dateFormats = format.stream() + .map(CQExternal::resolveDateFormat) + // Don't use Stream#toList to preserve null-values + .collect(Collectors.toList()); /* @@ -278,13 +281,13 @@ public void resolve(QueryResolveContext context) { public static class ResolveStatistic { @JsonIgnore - private final Map resolved; + private final Map resolved; /** * Entity -> Column -> Values */ @JsonIgnore - private final Map>> extra; + private final Map>> extra; private final List unreadableDate; private final List unresolvedId; @@ -295,7 +298,7 @@ public static class ResolveStatistic { * Helper method to try and resolve entities in values using the specified format. */ public static ResolveStatistic resolveEntities(@NotEmpty String[][] values, @NotEmpty List format, EntityIdMap mapping, IdColumnConfig idColumnConfig, @NotNull DateReader dateReader, boolean onlySingles) { - final Map resolved = new Int2ObjectOpenHashMap<>(); + final Map resolved = new HashMap<>(); final List unresolvedDate = new ArrayList<>(); final List unresolvedId = new ArrayList<>(); @@ -315,7 +318,7 @@ public static ResolveStatistic resolveEntities(@NotEmpty String[][] values, @Not } // Entity -> Column -> Values - final Map>> extraDataByEntity = new HashMap<>(); + final Map>> extraDataByEntity = new HashMap<>(); // ignore the first row, because this is the header for (int rowNum = 1; rowNum < values.length; rowNum++) { @@ -327,9 +330,9 @@ public static ResolveStatistic resolveEntities(@NotEmpty String[][] values, @Not continue; } - int resolvedId = tryResolveId(row, readers, mapping); + String resolvedId = tryResolveId(row, readers, mapping); - if (resolvedId == -1) { + if (resolvedId == null) { unresolvedId.add(row); continue; } @@ -366,8 +369,8 @@ public static ResolveStatistic resolveEntities(@NotEmpty String[][] values, @Not * Try to extract a {@link com.bakdata.conquery.models.identifiable.mapping.EntityIdMap.ExternalId} from the row, * then try to map it to an internal {@link com.bakdata.conquery.models.query.entity.Entity} */ - private static int tryResolveId(String[] row, List> readers, EntityIdMap mapping) { - int resolvedId = -1; + private static String tryResolveId(String[] row, List> readers, EntityIdMap mapping) { + String resolvedId = null; for (Function reader : readers) { final EntityIdMap.ExternalId externalId = reader.apply(row); @@ -376,14 +379,14 @@ private static int tryResolveId(String[] row, List NAMES = Arrays.stream(DateFormat.values()).map(Enum::name).collect(Collectors.toSet()); + } diff --git a/backend/src/main/java/com/bakdata/conquery/commands/CollectEntitiesCommand.java b/backend/src/main/java/com/bakdata/conquery/commands/CollectEntitiesCommand.java index 8e377a5b19..e69de29bb2 100644 --- a/backend/src/main/java/com/bakdata/conquery/commands/CollectEntitiesCommand.java +++ b/backend/src/main/java/com/bakdata/conquery/commands/CollectEntitiesCommand.java @@ -1,145 +0,0 @@ -package com.bakdata.conquery.commands; - -import java.io.File; -import java.io.FileInputStream; -import java.io.IOException; -import java.util.ArrayList; -import java.util.Collection; -import java.util.List; -import java.util.Map.Entry; -import java.util.Set; -import java.util.concurrent.ConcurrentHashMap; -import java.util.concurrent.ConcurrentMap; -import java.util.concurrent.ExecutorService; -import java.util.concurrent.Executors; -import java.util.concurrent.TimeUnit; -import java.util.zip.GZIPInputStream; - -import com.bakdata.conquery.ConqueryConstants; -import com.bakdata.conquery.io.jackson.Jackson; -import com.bakdata.conquery.models.datasets.Dataset; -import com.bakdata.conquery.models.dictionary.EncodedDictionary; -import com.bakdata.conquery.models.events.stores.specific.string.EncodedStringStore; -import com.bakdata.conquery.models.exceptions.JSONException; -import com.bakdata.conquery.models.jobs.SimpleJob.Executable; -import com.bakdata.conquery.models.preproc.PreprocessedDictionaries; -import com.bakdata.conquery.models.preproc.PreprocessedHeader; -import com.bakdata.conquery.models.preproc.PreprocessedReader; -import com.bakdata.conquery.util.io.ConqueryMDC; -import com.bakdata.conquery.util.io.LogUtil; -import com.fasterxml.jackson.databind.ObjectMapper; -import com.github.powerlibraries.io.Out; -import com.google.common.collect.Sets; -import io.dropwizard.cli.Command; -import io.dropwizard.setup.Bootstrap; -import lombok.Getter; -import lombok.RequiredArgsConstructor; -import lombok.extern.slf4j.Slf4j; -import net.sourceforge.argparse4j.impl.Arguments; -import net.sourceforge.argparse4j.inf.Namespace; -import net.sourceforge.argparse4j.inf.Subparser; - -@Slf4j -public class CollectEntitiesCommand extends Command { - - private final ConcurrentMap> entities = new ConcurrentHashMap<>(); - private boolean verbose; - - public CollectEntitiesCommand() { - super("collectEntities", "Collect all entities from the given preprocessing directories."); - } - - @Override - public void configure(Subparser subparser) { - subparser - .addArgument("-verbose") - .help("creates not only a file for all entities but for eqach cqpp") - .action(Arguments.storeTrue()); - - subparser - .addArgument("--file") - .nargs("+") - .help("List of CQPP to process"); - - } - - - @Override - public void run(Bootstrap bootstrap, Namespace namespace) throws Exception { - verbose = Boolean.TRUE.equals(namespace.getBoolean("-verbose")); - final Collection jobs = findPreprocessedJobs(namespace.getList("file")); - - final ExecutorService pool = Executors.newFixedThreadPool(Runtime.getRuntime().availableProcessors() - 1); - - for (EntityExtractor job : jobs) { - pool.submit(() -> { - ConqueryMDC.setLocation(LogUtil.printPath(job.getFile())); - try { - job.execute(); - log.info("Merged {}", LogUtil.printPath(job.getFile())); - } - catch (Exception e) { - log.error("Failed to preprocess " + LogUtil.printPath(job.getFile()), e); - } - }); - } - - pool.shutdown(); - pool.awaitTermination(24, TimeUnit.HOURS); - - log.info("finished collecting ids, writing..."); - for (Entry> e : entities.entrySet()) { - log.info("{} entities into {}", e.getValue().size(), e.getKey()); - Out - .file(e.getKey()) - .withUTF8() - .writeLines(e.getValue().stream().sorted().distinct().iterator()); - } - - } - - public List findPreprocessedJobs(List files) throws IOException, JSONException { - final List l = new ArrayList<>(); - for (File preprocessedFile : files) { - if (!preprocessedFile.getName().endsWith(ConqueryConstants.EXTENSION_PREPROCESSED)) { - continue; - } - - l.add(new EntityExtractor(preprocessedFile)); - } - return l; - } - - @RequiredArgsConstructor - @Getter - public class EntityExtractor implements Executable { - - private final File file; - - @Override - public void execute() throws Exception { - final ObjectMapper om = Jackson.BINARY_MAPPER.copy(); - try (final PreprocessedReader parser = new PreprocessedReader(new GZIPInputStream(new FileInputStream(file)), om)) { - parser.addReplacement(Dataset.PLACEHOLDER.getId(), Dataset.PLACEHOLDER); - final PreprocessedHeader header = parser.readHeader(); - log.info("Reading {}", header.getName()); - - final PreprocessedDictionaries dictionaries = parser.readDictionaries(); - - final EncodedDictionary primaryDictionary = new EncodedDictionary(dictionaries.getPrimaryDictionary(), EncodedStringStore.Encoding.UTF8); - - add(primaryDictionary, new File(file.getParentFile(), "all_entities.csv")); - if (verbose) { - add(primaryDictionary, new File(file.getParentFile(), file.getName() + ".entities.csv")); - } - } - } - - private void add(EncodedDictionary primDict, File file) { - final Set list = entities.computeIfAbsent(file, f -> Sets.newConcurrentHashSet()); - for (int id = 0; id < primDict.getSize(); id++) { - list.add(primDict.getElement(id)); - } - } - } -} diff --git a/backend/src/main/java/com/bakdata/conquery/commands/ManagerNode.java b/backend/src/main/java/com/bakdata/conquery/commands/ManagerNode.java index c7b649fe1f..b23eab19ed 100644 --- a/backend/src/main/java/com/bakdata/conquery/commands/ManagerNode.java +++ b/backend/src/main/java/com/bakdata/conquery/commands/ManagerNode.java @@ -10,7 +10,6 @@ import java.util.concurrent.TimeUnit; import javax.validation.Validator; -import javax.ws.rs.client.Client; import com.bakdata.conquery.io.cps.CPSTypeIdResolver; import com.bakdata.conquery.io.jackson.MutableInjectableValues; @@ -20,7 +19,6 @@ import com.bakdata.conquery.io.storage.MetaStorage; import com.bakdata.conquery.io.storage.NamespaceStorage; import com.bakdata.conquery.mode.Manager; -import com.bakdata.conquery.mode.StorageHandler; import com.bakdata.conquery.models.auth.AuthorizationController; import com.bakdata.conquery.models.config.ConqueryConfig; import com.bakdata.conquery.models.forms.frontendconfiguration.FormScanner; @@ -31,7 +29,6 @@ import com.bakdata.conquery.resources.ResourcesProvider; import com.bakdata.conquery.resources.admin.AdminServlet; import com.bakdata.conquery.resources.admin.ShutdownTask; -import com.bakdata.conquery.resources.unprotected.AuthServlet; import com.bakdata.conquery.tasks.PermissionCleanupTask; import com.bakdata.conquery.tasks.QueryCleanupTask; import com.bakdata.conquery.tasks.ReloadMetaStorageTask; @@ -39,7 +36,6 @@ import com.fasterxml.jackson.databind.ObjectMapper; import com.fasterxml.jackson.databind.SerializationConfig; import com.google.common.base.Throwables; -import io.dropwizard.client.JerseyClientBuilder; import io.dropwizard.jersey.DropwizardResourceConfig; import io.dropwizard.lifecycle.Managed; import io.dropwizard.setup.Environment; @@ -69,14 +65,9 @@ public class ManagerNode extends IoHandlerAdapter implements Managed { private AuthorizationController authController; private ScheduledExecutorService maintenanceService; private final List providers = new ArrayList<>(); - private Client client; @Delegate(excludes = Managed.class) private Manager manager; - // Resources without authentication - private DropwizardResourceConfig unprotectedAuthApi; - private DropwizardResourceConfig unprotectedAuthAdmin; - // For registering form providers private FormScanner formScanner; @@ -93,9 +84,6 @@ public void run(Manager manager) throws InterruptedException { ConqueryConfig config = manager.getConfig(); validator = environment.getValidator(); - client = new JerseyClientBuilder(environment).using(config.getJerseyClient()) - .build(getName()); - this.manager = manager; final ObjectMapper objectMapper = environment.getObjectMapper(); @@ -125,17 +113,11 @@ public void run(Manager manager) throws InterruptedException { loadMetaStorage(); - authController = new AuthorizationController(getStorage(), config.getAuthorizationRealms()); - environment.lifecycle().manage(authController); - - unprotectedAuthAdmin = AuthServlet.generalSetup(environment.metrics(), config, environment.admin(), objectMapper); - unprotectedAuthApi = AuthServlet.generalSetup(environment.metrics(), config, environment.servlets(), objectMapper); - // Create AdminServlet first to make it available to the realms admin = new AdminServlet(this); - authController.externalInit(this, config.getAuthenticationRealms()); - + authController = new AuthorizationController(getStorage(), config, environment, admin); + environment.lifecycle().manage(authController); // Register default components for the admin interface admin.register(); @@ -249,8 +231,7 @@ public void loadNamespaces() { DatasetRegistry registry = getDatasetRegistry(); // Namespaces load their storage themselves, so they can inject Namespace relevant objects into stored objects - StorageHandler storageHandler = registry.getStorageHandler(); - final Collection namespaceStorages = getConfig().getStorage().discoverNamespaceStorages(storageHandler); + final Collection namespaceStorages = getConfig().getStorage().discoverNamespaceStorages(); for (NamespaceStorage namespaceStorage : namespaceStorages) { loaders.submit(() -> { registry.createNamespace(namespaceStorage); @@ -291,7 +272,5 @@ public void stop() throws Exception { log.error("{} could not be closed", getStorage(), e); } - client.close(); - } } diff --git a/backend/src/main/java/com/bakdata/conquery/io/external/form/ExternalFormBackendApi.java b/backend/src/main/java/com/bakdata/conquery/io/external/form/ExternalFormBackendApi.java index 23fee29160..d8d0a3b168 100644 --- a/backend/src/main/java/com/bakdata/conquery/io/external/form/ExternalFormBackendApi.java +++ b/backend/src/main/java/com/bakdata/conquery/io/external/form/ExternalFormBackendApi.java @@ -43,11 +43,12 @@ public class ExternalFormBackendApi { private final WebTarget getStatusTarget; private final WebTarget cancelTaskTarget; private final WebTarget getHealthTarget; + private final WebTarget getVersionTarget; private final Function tokenCreator; private final WebTarget baseTarget; private final URL conqueryApiUrl; - public ExternalFormBackendApi(Client client, URI baseURI, String formConfigPath, String postFormPath, String statusTemplatePath, String cancelTaskPath, String healthCheckPath, Function tokenCreator, URL conqueryApiUrl, AuthenticationClientFilterProvider authFilterProvider) { + public ExternalFormBackendApi(Client client, URI baseURI, String formConfigPath, String postFormPath, String statusTemplatePath, String cancelTaskPath, String healthCheckPath, String versionPath, Function tokenCreator, URL conqueryApiUrl, AuthenticationClientFilterProvider authFilterProvider) { this.client = client; this.tokenCreator = tokenCreator; @@ -65,6 +66,7 @@ public ExternalFormBackendApi(Client client, URI baseURI, String formConfigPath, cancelTaskTarget = baseTarget.path(cancelTaskPath); getHealthTarget = baseTarget.path(healthCheckPath); + getVersionTarget = baseTarget.path(versionPath); } public List getFormConfigs() { @@ -127,6 +129,10 @@ public HealthCheck createHealthCheck() { return new HttpHealthCheck(getHealthTarget.getUri().toString(), client); } + public String getVersion() { + return getVersionTarget.request(MediaType.APPLICATION_JSON_TYPE).get(FormBackendVersion.class).getVersion(); + } + public ExternalTaskState cancelTask(UUID taskId) { log.debug("Cancelling task {}", taskId); diff --git a/backend/src/main/java/com/bakdata/conquery/io/external/form/FormBackendVersion.java b/backend/src/main/java/com/bakdata/conquery/io/external/form/FormBackendVersion.java new file mode 100644 index 0000000000..93a9588819 --- /dev/null +++ b/backend/src/main/java/com/bakdata/conquery/io/external/form/FormBackendVersion.java @@ -0,0 +1,8 @@ +package com.bakdata.conquery.io.external.form; + +import lombok.Data; + +@Data +public class FormBackendVersion { + private String version; +} diff --git a/backend/src/main/java/com/bakdata/conquery/io/jackson/Jackson.java b/backend/src/main/java/com/bakdata/conquery/io/jackson/Jackson.java index 5e799fd9d5..da9b1a45a1 100644 --- a/backend/src/main/java/com/bakdata/conquery/io/jackson/Jackson.java +++ b/backend/src/main/java/com/bakdata/conquery/io/jackson/Jackson.java @@ -2,6 +2,7 @@ import java.util.Locale; +import com.bakdata.conquery.io.jackson.serializer.Object2IntMapMixin; import com.bakdata.conquery.models.auth.permissions.ConqueryPermission; import com.fasterxml.jackson.annotation.JsonInclude.Include; import com.fasterxml.jackson.core.JsonGenerator; @@ -17,6 +18,7 @@ import com.fasterxml.jackson.datatype.jsr310.JavaTimeModule; import com.fasterxml.jackson.module.blackbird.BlackbirdModule; import com.fasterxml.jackson.module.paramnames.ParameterNamesModule; +import it.unimi.dsi.fastutil.objects.Object2IntMap; import org.apache.shiro.authz.Permission; public class Jackson { @@ -71,7 +73,8 @@ public static T configure(T objectMapper){ .setDefaultPropertyInclusion(Include.ALWAYS) //.setAnnotationIntrospector(new RestrictingAnnotationIntrospector()) .setInjectableValues(new MutableInjectableValues()) - .addMixIn(Permission.class, ConqueryPermission.class); + .addMixIn(Permission.class, ConqueryPermission.class) + .addMixIn(Object2IntMap.class, Object2IntMapMixin.class); return objectMapper; } diff --git a/backend/src/main/java/com/bakdata/conquery/io/jackson/JacksonUtil.java b/backend/src/main/java/com/bakdata/conquery/io/jackson/JacksonUtil.java index 0c8202042a..56ddadc71c 100644 --- a/backend/src/main/java/com/bakdata/conquery/io/jackson/JacksonUtil.java +++ b/backend/src/main/java/com/bakdata/conquery/io/jackson/JacksonUtil.java @@ -7,6 +7,8 @@ import com.bakdata.conquery.io.mina.ChunkedMessage; import com.fasterxml.jackson.core.JsonParser; import com.fasterxml.jackson.core.JsonToken; +import com.fasterxml.jackson.databind.DeserializationContext; +import com.fasterxml.jackson.databind.JsonMappingException; import lombok.experimental.UtilityClass; import lombok.extern.slf4j.Slf4j; import org.apache.commons.collections4.IteratorUtils; @@ -106,4 +108,10 @@ public static InputStream stream(Iterable list) { public static String toJsonDebug(ChunkedMessage msg) { return toJsonDebug(msg.createInputStream()); } + + public static void expect(Class parseTargetType, DeserializationContext ctxt, JsonToken token, JsonToken expected) throws JsonMappingException { + if (token != expected) { + ctxt.reportInputMismatch(parseTargetType, "Expected " + expected + " but found " + token); + } + } } diff --git a/backend/src/main/java/com/bakdata/conquery/io/jackson/serializer/CDateRangeDeserializer.java b/backend/src/main/java/com/bakdata/conquery/io/jackson/serializer/CDateRangeDeserializer.java index d7531f7ccd..43b803a1a4 100644 --- a/backend/src/main/java/com/bakdata/conquery/io/jackson/serializer/CDateRangeDeserializer.java +++ b/backend/src/main/java/com/bakdata/conquery/io/jackson/serializer/CDateRangeDeserializer.java @@ -1,7 +1,8 @@ package com.bakdata.conquery.io.jackson.serializer; +import java.io.IOException; + import com.bakdata.conquery.models.common.daterange.CDateRange; -import com.bakdata.conquery.models.preproc.parser.specific.DateRangeParser; import com.bakdata.conquery.util.DateReader; import com.fasterxml.jackson.core.JsonParser; import com.fasterxml.jackson.core.JsonProcessingException; @@ -9,8 +10,6 @@ import com.fasterxml.jackson.databind.deser.std.StdDeserializer; import lombok.SneakyThrows; -import java.io.IOException; - public class CDateRangeDeserializer extends StdDeserializer { private final DateReader dateReader; diff --git a/backend/src/main/java/com/bakdata/conquery/io/jackson/serializer/Object2IntMapMixin.java b/backend/src/main/java/com/bakdata/conquery/io/jackson/serializer/Object2IntMapMixin.java new file mode 100644 index 0000000000..76b660ffbd --- /dev/null +++ b/backend/src/main/java/com/bakdata/conquery/io/jackson/serializer/Object2IntMapMixin.java @@ -0,0 +1,73 @@ +package com.bakdata.conquery.io.jackson.serializer; + +import java.io.IOException; +import java.util.Map; + +import com.fasterxml.jackson.core.JacksonException; +import com.fasterxml.jackson.core.JsonParser; +import com.fasterxml.jackson.databind.BeanProperty; +import com.fasterxml.jackson.databind.DeserializationContext; +import com.fasterxml.jackson.databind.JavaType; +import com.fasterxml.jackson.databind.JsonDeserializer; +import com.fasterxml.jackson.databind.JsonMappingException; +import com.fasterxml.jackson.databind.KeyDeserializer; +import com.fasterxml.jackson.databind.annotation.JsonDeserialize; +import com.fasterxml.jackson.databind.deser.ContextualDeserializer; +import com.fasterxml.jackson.databind.deser.std.MapDeserializer; +import com.fasterxml.jackson.databind.deser.std.StdDeserializer; +import com.fasterxml.jackson.databind.deser.std.StdKeyDeserializers; +import com.fasterxml.jackson.databind.deser.std.StdValueInstantiator; +import com.fasterxml.jackson.databind.exc.InvalidDefinitionException; +import it.unimi.dsi.fastutil.objects.Object2IntMap; +import it.unimi.dsi.fastutil.objects.Object2IntOpenHashMap; +import lombok.extern.slf4j.Slf4j; + +/** + * (De-)Serialization Mixin for {@link Object2IntMap}. + */ +@JsonDeserialize(using = Object2IntMapMixin.Deserializer.class) +@Slf4j +public class Object2IntMapMixin { + + public static class Deserializer extends StdDeserializer> implements ContextualDeserializer { + + private final MapDeserializer mapDeserializer; + + public Deserializer() { + super(Object2IntMap.class); + this.mapDeserializer = null; + } + + public Deserializer(MapDeserializer mapDeserializer) { + super(Object2IntMap.class); + this.mapDeserializer = mapDeserializer; + } + + @Override + public Object2IntMap deserialize(JsonParser p, DeserializationContext ctxt) throws IOException, JacksonException { + final Object2IntOpenHashMap map = new Object2IntOpenHashMap<>(); + Object2IntOpenHashMap map1 = map; + return (Object2IntMap) mapDeserializer.deserialize(p, ctxt, (Map) map1); + } + + @Override + public JsonDeserializer createContextual(DeserializationContext ctxt, BeanProperty property) throws JsonMappingException { + + final JavaType mapType = ctxt.constructType(Object2IntMap.class); + KeyDeserializer keyDeserializer; + final JavaType keyType = ctxt.getContextualType().getKeyType(); + try { + keyDeserializer = ctxt.findKeyDeserializer(keyType, property); + } + catch (InvalidDefinitionException e) { + log.trace("Falling back to delegating key deserializer for type: {} ", keyType); + final JsonDeserializer contextualKeyDeserializer = ctxt.findContextualValueDeserializer(keyType, property); + keyDeserializer = StdKeyDeserializers.constructDelegatingKeyDeserializer(ctxt.getConfig(), keyType, contextualKeyDeserializer); + } + final JavaType valueType = ctxt.getContextualType().getContentType(); + final JsonDeserializer valueDeserializer = ctxt.findContextualValueDeserializer(valueType, property); + final StdValueInstantiator valueInstantiator = new StdValueInstantiator(ctxt.getConfig(), valueType); + return new Deserializer<>(new MapDeserializer(mapType, valueInstantiator, keyDeserializer, valueDeserializer, null)); + } + } +} diff --git a/backend/src/main/java/com/bakdata/conquery/io/storage/ConqueryStorage.java b/backend/src/main/java/com/bakdata/conquery/io/storage/ConqueryStorage.java index d3056048e1..b9e9ba69ff 100644 --- a/backend/src/main/java/com/bakdata/conquery/io/storage/ConqueryStorage.java +++ b/backend/src/main/java/com/bakdata/conquery/io/storage/ConqueryStorage.java @@ -3,7 +3,6 @@ import java.io.Closeable; import java.io.IOException; -import com.bakdata.conquery.io.storage.xodus.stores.KeyIncludingStore; import com.bakdata.conquery.models.identifiable.CentralRegistry; import com.fasterxml.jackson.databind.ObjectMapper; import com.google.common.collect.ImmutableList; @@ -18,12 +17,12 @@ public abstract class ConqueryStorage implements Closeable { * @implSpec The order defines the order of loading. Dependencies should be modeled here. * @implNote If you implement this method, please do it always from scratch and not using calls to super, it can be quite annoying. */ - public abstract ImmutableList> getStores(); + public abstract ImmutableList getStores(); public abstract void openStores(ObjectMapper objectMapper); public final void loadData(){ - for (KeyIncludingStore store : getStores()) { + for (ManagedStore store : getStores()) { store.loadData(); } } @@ -32,7 +31,7 @@ public final void loadData(){ * Delete the storage's contents. */ public void clear(){ - for (KeyIncludingStore store : getStores()) { + for (ManagedStore store : getStores()) { store.clear(); } } @@ -41,7 +40,7 @@ public void clear(){ * Remove the storage. */ public final void removeStorage(){ - for (KeyIncludingStore store : getStores()) { + for (ManagedStore store : getStores()) { store.removeStore(); } } @@ -50,7 +49,7 @@ public final void removeStorage(){ * Close the storage. */ public final void close() throws IOException { - for (KeyIncludingStore store : getStores()) { + for (ManagedStore store : getStores()) { store.close(); } } diff --git a/backend/src/main/java/com/bakdata/conquery/io/storage/IdentifiableCachedStore.java b/backend/src/main/java/com/bakdata/conquery/io/storage/IdentifiableCachedStore.java index 24f4d3b5a8..a53be0d30e 100644 --- a/backend/src/main/java/com/bakdata/conquery/io/storage/IdentifiableCachedStore.java +++ b/backend/src/main/java/com/bakdata/conquery/io/storage/IdentifiableCachedStore.java @@ -69,7 +69,7 @@ protected void updated(VALUE value) { @Override public void loadData() { - store.fillCache(); + store.loadData(); for (Id key : getAllKeys()) { centralRegistry.registerCacheable(key, this::get); } diff --git a/backend/src/main/java/com/bakdata/conquery/io/storage/ManagedStore.java b/backend/src/main/java/com/bakdata/conquery/io/storage/ManagedStore.java new file mode 100644 index 0000000000..3213f49a6c --- /dev/null +++ b/backend/src/main/java/com/bakdata/conquery/io/storage/ManagedStore.java @@ -0,0 +1,14 @@ +package com.bakdata.conquery.io.storage; + +import java.io.IOException; + +/** + * Interface to unify among {@link com.bakdata.conquery.io.storage.xodus.stores.KeyIncludingStore} and {@link Store} for management only. + */ +public interface ManagedStore { + void loadData(); + void close() throws IOException; + + void clear(); + void removeStore(); +} diff --git a/backend/src/main/java/com/bakdata/conquery/io/storage/MetaStorage.java b/backend/src/main/java/com/bakdata/conquery/io/storage/MetaStorage.java index 2528dfd6e3..0bc297acbd 100644 --- a/backend/src/main/java/com/bakdata/conquery/io/storage/MetaStorage.java +++ b/backend/src/main/java/com/bakdata/conquery/io/storage/MetaStorage.java @@ -4,7 +4,6 @@ import com.bakdata.conquery.io.jackson.Injectable; import com.bakdata.conquery.io.jackson.MutableInjectableValues; -import com.bakdata.conquery.io.storage.xodus.stores.KeyIncludingStore; import com.bakdata.conquery.models.auth.entities.Group; import com.bakdata.conquery.models.auth.entities.Role; import com.bakdata.conquery.models.auth.entities.User; @@ -54,7 +53,7 @@ public void openStores(ObjectMapper mapper) { } @Override - public ImmutableList> getStores() { + public ImmutableList getStores() { Preconditions.checkNotNull(authUser, "User storage was not created"); Preconditions.checkNotNull(authRole, "Role storage was not created"); Preconditions.checkNotNull(authGroup, "Group storage was not created"); diff --git a/backend/src/main/java/com/bakdata/conquery/io/storage/NamespaceStorage.java b/backend/src/main/java/com/bakdata/conquery/io/storage/NamespaceStorage.java index 5fa5e50969..96b9458d86 100644 --- a/backend/src/main/java/com/bakdata/conquery/io/storage/NamespaceStorage.java +++ b/backend/src/main/java/com/bakdata/conquery/io/storage/NamespaceStorage.java @@ -2,20 +2,15 @@ import java.util.Collection; import java.util.Objects; +import java.util.OptionalInt; import javax.validation.Validator; -import com.bakdata.conquery.ConqueryConstants; -import com.bakdata.conquery.io.storage.xodus.stores.KeyIncludingStore; +import com.bakdata.conquery.io.storage.xodus.stores.CachedStore; import com.bakdata.conquery.io.storage.xodus.stores.SingletonStore; -import com.bakdata.conquery.mode.StorageHandler; import com.bakdata.conquery.models.config.StoreFactory; import com.bakdata.conquery.models.datasets.PreviewConfig; import com.bakdata.conquery.models.datasets.concepts.StructureNode; -import com.bakdata.conquery.models.dictionary.Dictionary; -import com.bakdata.conquery.models.dictionary.EncodedDictionary; -import com.bakdata.conquery.models.dictionary.MapDictionary; -import com.bakdata.conquery.models.events.stores.specific.string.EncodedStringStore; import com.bakdata.conquery.models.identifiable.ids.specific.InternToExternMapperId; import com.bakdata.conquery.models.identifiable.ids.specific.SearchIndexId; import com.bakdata.conquery.models.identifiable.mapping.EntityIdMap; @@ -24,7 +19,6 @@ import com.bakdata.conquery.models.worker.WorkerToBucketsMap; import com.fasterxml.jackson.databind.ObjectMapper; import com.google.common.collect.ImmutableList; -import lombok.NonNull; import lombok.extern.slf4j.Slf4j; @Slf4j @@ -34,35 +28,13 @@ public class NamespaceStorage extends NamespacedStorage { protected IdentifiableStore searchIndexes; protected SingletonStore idMapping; protected SingletonStore structure; - protected SingletonStore preview; - protected SingletonStore workerToBuckets; - protected SingletonStore primaryDictionary; - - public NamespaceStorage(StoreFactory storageFactory, String pathName, Validator validator, StorageHandler storageHandler) { - super(storageFactory, pathName, validator, storageHandler); - } - - public EncodedDictionary getPrimaryDictionary() { - return new EncodedDictionary(getPrimaryDictionaryRaw(), EncodedStringStore.Encoding.UTF8); - } - - @NonNull - public Dictionary getPrimaryDictionaryRaw() { - final Dictionary dictionary = primaryDictionary.get(); - - if (dictionary == null) { - log.trace("No prior PrimaryDictionary, creating one"); - final MapDictionary newPrimary = new MapDictionary(getDataset(), ConqueryConstants.PRIMARY_DICTIONARY); - - primaryDictionary.update(newPrimary); + protected CachedStore entity2Bucket; - return newPrimary; - } - - return dictionary; + public NamespaceStorage(StoreFactory storageFactory, String pathName, Validator validator) { + super(storageFactory, pathName, validator); } @@ -85,15 +57,15 @@ public void openStores(ObjectMapper objectMapper) { idMapping = getStorageFactory().createIdMappingStore(super.getPathName(), objectMapper); structure = getStorageFactory().createStructureStore(super.getPathName(), getCentralRegistry(), objectMapper); workerToBuckets = getStorageFactory().createWorkerToBucketsStore(super.getPathName(), objectMapper); - primaryDictionary = getStorageFactory().createPrimaryDictionaryStore(super.getPathName(), getCentralRegistry(), objectMapper); preview = getStorageFactory().createPreviewStore(super.getPathName(), getCentralRegistry(), objectMapper); + entity2Bucket = getStorageFactory().createEntity2BucketStore(super.getPathName(), objectMapper); decorateInternToExternMappingStore(internToExternMappers); decorateIdMapping(idMapping); } @Override - public ImmutableList> getStores() { + public ImmutableList getStores() { return ImmutableList.of( dataset, @@ -102,7 +74,6 @@ public void openStores(ObjectMapper objectMapper) { secondaryIds, tables, - dictionaries, imports, // Concepts depend on internToExternMappers @@ -112,7 +83,7 @@ public void openStores(ObjectMapper objectMapper) { idMapping, structure, workerToBuckets, - primaryDictionary + entity2Bucket ); } @@ -124,10 +95,6 @@ public EntityIdMap getIdMapping() { } - public void updatePrimaryDictionary(Dictionary dictionary) { - primaryDictionary.update(dictionary); - } - public void updateIdMapping(EntityIdMap idMapping) { this.idMapping.update(idMapping); } @@ -140,6 +107,28 @@ public WorkerToBucketsMap getWorkerBuckets() { return workerToBuckets.get(); } + public int getNumberOfEntities() { + return entity2Bucket.count(); + } + + public OptionalInt getEntityBucket(String entity) { + final Integer bucket = entity2Bucket.get(entity); + + if(bucket == null){ + return OptionalInt.empty(); + } + + return OptionalInt.of(bucket); + } + + public int assignEntityBucket(String entity, int bucketSize) { + final int bucket = (int) Math.ceil((1d + getNumberOfEntities()) / (double) bucketSize); + + entity2Bucket.add(entity, bucket); + + return bucket; + } + public StructureNode[] getStructure() { return Objects.requireNonNullElseGet(structure.get(), () -> new StructureNode[0]); diff --git a/backend/src/main/java/com/bakdata/conquery/io/storage/NamespacedStorage.java b/backend/src/main/java/com/bakdata/conquery/io/storage/NamespacedStorage.java index 4fe3f60e24..9238658a28 100644 --- a/backend/src/main/java/com/bakdata/conquery/io/storage/NamespacedStorage.java +++ b/backend/src/main/java/com/bakdata/conquery/io/storage/NamespacedStorage.java @@ -8,9 +8,7 @@ import javax.validation.ConstraintViolation; import javax.validation.Validator; -import com.bakdata.conquery.io.storage.xodus.stores.KeyIncludingStore; import com.bakdata.conquery.io.storage.xodus.stores.SingletonStore; -import com.bakdata.conquery.mode.StorageHandler; import com.bakdata.conquery.models.config.StoreFactory; import com.bakdata.conquery.models.datasets.Column; import com.bakdata.conquery.models.datasets.Dataset; @@ -20,11 +18,9 @@ import com.bakdata.conquery.models.datasets.concepts.Concept; import com.bakdata.conquery.models.datasets.concepts.Connector; import com.bakdata.conquery.models.datasets.concepts.tree.TreeConcept; -import com.bakdata.conquery.models.dictionary.Dictionary; import com.bakdata.conquery.models.exceptions.ValidatorHelper; import com.bakdata.conquery.models.identifiable.CentralRegistry; import com.bakdata.conquery.models.identifiable.ids.specific.ConceptId; -import com.bakdata.conquery.models.identifiable.ids.specific.DictionaryId; import com.bakdata.conquery.models.identifiable.ids.specific.ImportId; import com.bakdata.conquery.models.identifiable.ids.specific.SecondaryIdDescriptionId; import com.bakdata.conquery.models.identifiable.ids.specific.TableId; @@ -53,24 +49,19 @@ public abstract class NamespacedStorage extends ConqueryStorage { @Getter private final StoreFactory storageFactory; - @Getter - private final StorageHandler storageHandler; - @Getter private final Validator validator; protected SingletonStore dataset; protected IdentifiableStore secondaryIds; protected IdentifiableStore tables; - protected IdentifiableStore dictionaries; protected IdentifiableStore imports; protected IdentifiableStore> concepts; - public NamespacedStorage(StoreFactory storageFactory, String pathName, Validator validator, StorageHandler storageHandler) { + public NamespacedStorage(StoreFactory storageFactory, String pathName, Validator validator) { this.pathName = pathName; this.storageFactory = storageFactory; this.validator = validator; - this.storageHandler = storageHandler; } public void openStores(ObjectMapper objectMapper) { @@ -79,21 +70,19 @@ public void openStores(ObjectMapper objectMapper) { dataset = storageFactory.createDatasetStore(pathName, objectMapper); secondaryIds = storageFactory.createSecondaryIdDescriptionStore(centralRegistry, pathName, objectMapper); tables = storageFactory.createTableStore(centralRegistry, pathName, objectMapper); - dictionaries = storageFactory.createDictionaryStore(centralRegistry, pathName, objectMapper); imports = storageFactory.createImportStore(centralRegistry, pathName, objectMapper); concepts = storageFactory.createConceptStore(centralRegistry, pathName, objectMapper); decorateDatasetStore(dataset); decorateSecondaryIdDescriptionStore(secondaryIds); - decorateDictionaryStore(dictionaries); decorateTableStore(tables); decorateImportStore(imports); decorateConceptStore(concepts); } @Override - public ImmutableList> getStores() { - return ImmutableList.of(dataset, secondaryIds, tables, dictionaries, imports, concepts); + public ImmutableList getStores() { + return ImmutableList.of(dataset, secondaryIds, tables, imports, concepts); } @Override @@ -110,10 +99,6 @@ private void decorateSecondaryIdDescriptionStore(IdentifiableStore store) { - // Nothing to decorate - } - private void decorateTableStore(IdentifiableStore
store) { store.onAdd(table -> { for (Column c : table.getColumns()) { @@ -176,18 +161,6 @@ private void decorateImportStore(IdentifiableStore store) { // Intentionally left blank } - public Dictionary getDictionary(DictionaryId id) { - return dictionaries.get(id); - } - - public void updateDictionary(Dictionary dict) { - dictionaries.update(dict); - } - - public void removeDictionary(DictionaryId id) { - dictionaries.remove(id); - } - public void addImport(Import imp) { imports.add(imp); diff --git a/backend/src/main/java/com/bakdata/conquery/io/storage/Store.java b/backend/src/main/java/com/bakdata/conquery/io/storage/Store.java index bc652345ca..51e92011a6 100644 --- a/backend/src/main/java/com/bakdata/conquery/io/storage/Store.java +++ b/backend/src/main/java/com/bakdata/conquery/io/storage/Store.java @@ -1,11 +1,10 @@ package com.bakdata.conquery.io.storage; -import java.io.IOException; import java.util.Collection; import com.bakdata.conquery.io.storage.xodus.stores.SerializingStore.IterationStatistic; -public interface Store { +public interface Store extends ManagedStore { public void add(KEY key, VALUE value); @@ -18,8 +17,7 @@ public interface Store { public void remove(KEY key); - public void fillCache(); - + public int count(); public Collection getAll(); @@ -36,7 +34,4 @@ public interface StoreEntryConsumer { void clear(); - void deleteStore(); - - void close() throws IOException; } diff --git a/backend/src/main/java/com/bakdata/conquery/io/storage/StoreMappings.java b/backend/src/main/java/com/bakdata/conquery/io/storage/StoreMappings.java index adf0d064a7..ef3483f264 100644 --- a/backend/src/main/java/com/bakdata/conquery/io/storage/StoreMappings.java +++ b/backend/src/main/java/com/bakdata/conquery/io/storage/StoreMappings.java @@ -13,7 +13,6 @@ import com.bakdata.conquery.models.datasets.Table; import com.bakdata.conquery.models.datasets.concepts.Concept; import com.bakdata.conquery.models.datasets.concepts.StructureNode; -import com.bakdata.conquery.models.dictionary.Dictionary; import com.bakdata.conquery.models.events.Bucket; import com.bakdata.conquery.models.events.CBlock; import com.bakdata.conquery.models.execution.ManagedExecution; @@ -24,7 +23,6 @@ import com.bakdata.conquery.models.identifiable.ids.specific.BucketId; import com.bakdata.conquery.models.identifiable.ids.specific.CBlockId; import com.bakdata.conquery.models.identifiable.ids.specific.ConceptId; -import com.bakdata.conquery.models.identifiable.ids.specific.DictionaryId; import com.bakdata.conquery.models.identifiable.ids.specific.FormConfigId; import com.bakdata.conquery.models.identifiable.ids.specific.GroupId; import com.bakdata.conquery.models.identifiable.ids.specific.ImportId; @@ -54,37 +52,32 @@ @Getter @ToString(of = {"name", "keyType", "valueType"}) public enum StoreMappings { + + AUTH_GROUP(Group.class, GroupId.class), + AUTH_ROLE(Role.class, RoleId.class), + AUTH_USER(User.class, UserId.class), + BUCKETS(Bucket.class, BucketId.class), + CONCEPTS(Concept.class, ConceptId.class), + C_BLOCKS(CBlock.class, CBlockId.class), DATASET(Dataset.class, Boolean.class), + ENTITY_PREVIEW(PreviewConfig.class, Boolean.class), + ENTITY_TO_BUCKET(Integer.class, String.class), + EXECUTIONS(ManagedExecution.class, ManagedExecutionId.class), + FORM_CONFIG(FormConfig.class, FormConfigId.class), ID_MAPPING(EntityIdMap.class, Boolean.class), - NAMESPACES(DatasetRegistry.class, Boolean.class), - DICTIONARIES(Dictionary.class, DictionaryId.class), IMPORTS(Import.class, ImportId.class), + INTERN_TO_EXTERN(InternToExternMapper.class, InternToExternMapperId.class), + NAMESPACES(DatasetRegistry.class, Boolean.class), + SEARCH_INDEX(SearchIndex.class, SearchIndexId.class), SECONDARY_IDS(SecondaryIdDescription.class, SecondaryIdDescriptionId.class), + STRUCTURE(StructureNode[].class, Boolean.class), TABLES(Table.class, TableId.class), - CONCEPTS(Concept.class, ConceptId.class), - BUCKETS(Bucket.class, BucketId.class), - C_BLOCKS(CBlock.class, CBlockId.class), WORKER(WorkerInformation.class, Boolean.class), - EXECUTIONS(ManagedExecution.class, ManagedExecutionId.class), - AUTH_ROLE(Role.class, RoleId.class), - AUTH_USER(User.class, UserId.class), - AUTH_GROUP(Group.class, GroupId.class), - STRUCTURE(StructureNode[].class, Boolean.class), - FORM_CONFIG(FormConfig.class, FormConfigId.class), - WORKER_TO_BUCKETS(WorkerToBucketsMap.class, Boolean.class), - PRIMARY_DICTIONARY(Dictionary.class, Boolean.class), - - ENTITY_PREVIEW(PreviewConfig.class, Boolean.class), - INTERN_TO_EXTERN(InternToExternMapper.class, InternToExternMapperId.class), - SEARCH_INDEX(SearchIndex.class, SearchIndexId.class); + WORKER_TO_BUCKETS(WorkerToBucketsMap.class, Boolean.class); private final Class valueType; private final Class keyType; - public , CLASS_V extends Class> StoreInfo storeInfo() { - return new StoreInfo(getName(), (CLASS_K) getKeyType(), (CLASS_V) getValueType()); - } - /** * Store for identifiable values, with injectors. Store is also cached. */ @@ -106,7 +99,6 @@ public static > IdentifiableCachedStore identifiabl return new IdentifiableCachedStore(centralRegistry, baseStore); } - /** * Store holding a single value. */ @@ -114,6 +106,10 @@ public static SingletonStore singleton(Store base return new SingletonStore<>(baseStore); } + public , CLASS_V extends Class> StoreInfo storeInfo() { + return new StoreInfo(getName(), (CLASS_K) getKeyType(), (CLASS_V) getValueType()); + } + private String getName() { return name(); } diff --git a/backend/src/main/java/com/bakdata/conquery/io/storage/WorkerStorage.java b/backend/src/main/java/com/bakdata/conquery/io/storage/WorkerStorage.java index fe6130307d..6b301d9415 100644 --- a/backend/src/main/java/com/bakdata/conquery/io/storage/WorkerStorage.java +++ b/backend/src/main/java/com/bakdata/conquery/io/storage/WorkerStorage.java @@ -4,9 +4,7 @@ import javax.validation.Validator; -import com.bakdata.conquery.io.storage.xodus.stores.KeyIncludingStore; import com.bakdata.conquery.io.storage.xodus.stores.SingletonStore; -import com.bakdata.conquery.mode.cluster.ClusterStorageHandler; import com.bakdata.conquery.models.config.StoreFactory; import com.bakdata.conquery.models.datasets.concepts.Concept; import com.bakdata.conquery.models.events.Bucket; @@ -29,7 +27,7 @@ public class WorkerStorage extends NamespacedStorage { private IdentifiableStore cBlocks; public WorkerStorage(StoreFactory storageFactory, Validator validator, String pathName) { - super(storageFactory, pathName, validator, new ClusterStorageHandler()); + super(storageFactory, pathName, validator); } @Override @@ -46,12 +44,11 @@ public void openStores(ObjectMapper objectMapper) { } @Override - public ImmutableList> getStores() { + public ImmutableList getStores() { return ImmutableList.of( dataset, secondaryIds, tables, - dictionaries, imports, concepts, diff --git a/backend/src/main/java/com/bakdata/conquery/io/storage/xodus/stores/BigStore.java b/backend/src/main/java/com/bakdata/conquery/io/storage/xodus/stores/BigStore.java index 4beb3633c0..8914f03d6f 100644 --- a/backend/src/main/java/com/bakdata/conquery/io/storage/xodus/stores/BigStore.java +++ b/backend/src/main/java/com/bakdata/conquery/io/storage/xodus/stores/BigStore.java @@ -145,7 +145,7 @@ public void remove(KEY key) { } @Override - public void fillCache() { + public void loadData() { } @Override @@ -241,8 +241,8 @@ public void clear() { } @Override - public void deleteStore() { - metaStore.deleteStore(); - dataStore.deleteStore(); + public void removeStore() { + metaStore.removeStore(); + dataStore.removeStore(); } } diff --git a/backend/src/main/java/com/bakdata/conquery/io/storage/xodus/stores/CachedStore.java b/backend/src/main/java/com/bakdata/conquery/io/storage/xodus/stores/CachedStore.java index d8e10f2ecb..0840ca471a 100644 --- a/backend/src/main/java/com/bakdata/conquery/io/storage/xodus/stores/CachedStore.java +++ b/backend/src/main/java/com/bakdata/conquery/io/storage/xodus/stores/CachedStore.java @@ -3,7 +3,7 @@ import java.io.IOException; import java.util.Collection; import java.util.concurrent.ConcurrentHashMap; -import java.util.concurrent.atomic.AtomicLong; +import java.util.concurrent.atomic.LongAdder; import com.bakdata.conquery.io.jackson.serializer.IdReferenceResolvingException; import com.bakdata.conquery.io.storage.Store; @@ -39,7 +39,7 @@ public VALUE get(KEY key) { @Override public IterationStatistic forEach(StoreEntryConsumer consumer) { - throw new UnsupportedOperationException(); + return store.forEach(consumer); } @Override @@ -63,27 +63,30 @@ public int count() { } @Override - public void fillCache() { - AtomicLong totalSize = new AtomicLong(0); - int count = count(); - cache = new ConcurrentHashMap(count); + public void loadData() { + final LongAdder totalSize = new LongAdder(); + final int count = count(); + cache = new ConcurrentHashMap<>(count); final ProgressBar bar; - Stopwatch timer = Stopwatch.createStarted(); if (count > 100) { synchronized (PROGRESS_BAR) { bar = PROGRESS_BAR; bar.addMaxValue(count); } - log.info("\tloading store {}", this); } else { bar = null; } + log.info("BEGIN loading store {}", this); + + + final Stopwatch timer = Stopwatch.createStarted(); + store.forEach((key, value, size) -> { try { - totalSize.addAndGet(size); + totalSize.add(size); cache.put(key, value); } catch (RuntimeException e) { @@ -107,7 +110,7 @@ public void fillCache() { log.debug("\tloaded store {}: {} entries, {} within {}", this, cache.values().size(), - BinaryByteUnit.format(totalSize.get()), + BinaryByteUnit.format(totalSize.sum()), timer.stop() ); } @@ -134,8 +137,8 @@ public void clear() { } @Override - public void deleteStore() { - store.deleteStore(); + public void removeStore() { + store.removeStore(); } @Override diff --git a/backend/src/main/java/com/bakdata/conquery/io/storage/xodus/stores/EnvironmentRegistry.java b/backend/src/main/java/com/bakdata/conquery/io/storage/xodus/stores/EnvironmentRegistry.java new file mode 100644 index 0000000000..d91e1ca376 --- /dev/null +++ b/backend/src/main/java/com/bakdata/conquery/io/storage/xodus/stores/EnvironmentRegistry.java @@ -0,0 +1,84 @@ +package com.bakdata.conquery.io.storage.xodus.stores; + +import java.io.File; +import java.util.HashMap; +import java.util.Map; + +import com.bakdata.conquery.models.config.XodusConfig; +import com.fasterxml.jackson.annotation.JsonIgnore; +import jetbrains.exodus.env.Environment; +import jetbrains.exodus.env.Environments; +import lombok.NonNull; +import lombok.RequiredArgsConstructor; +import lombok.experimental.Delegate; +import lombok.extern.slf4j.Slf4j; +import org.jetbrains.annotations.NotNull; + +/** + * Keeps transparently track of open environments using a map. + * If an environment is closed it is automatically unregistered. + */ +@RequiredArgsConstructor +@Slf4j +public class EnvironmentRegistry { + + @JsonIgnore + private final Map activeEnvironments = new HashMap<>(); + + public Environment register(Environment environment) { + + final Environment proxyInstance = createManaged(environment); + + synchronized (activeEnvironments) { + activeEnvironments.put(environment.getLocation(), proxyInstance); + } + return proxyInstance; + } + + @NotNull + private Environment createManaged(Environment environment) { + return new ManagedEnvironment(environment); + } + + private void unregister(Environment environment) { + log.debug("Unregister environment: {}", environment.getLocation()); + synchronized (activeEnvironments) { + final Environment remove = activeEnvironments.remove(environment.getLocation()); + + if (remove == null) { + log.warn("Could not unregister environment, because it was not registered: {}", environment.getLocation()); + } + } + } + + public Environment findOrCreateEnvironment(@NonNull File path, XodusConfig xodusConfig) { + synchronized (activeEnvironments) { + + try { + // Check for old env or register new env + return activeEnvironments.computeIfAbsent( + path.toString(), + newPath -> createManaged(Environments.newInstance(newPath, xodusConfig.createConfig())) + ); + } + catch (Exception e) { + throw new IllegalStateException("Unable to open environment: " + path, e); + } + } + } + + @RequiredArgsConstructor + public class ManagedEnvironment implements Environment { + + @Delegate + private final Environment delegate; + + public void close() { + synchronized (activeEnvironments) { + log.debug("Environment was closed: {}", delegate.getLocation()); + unregister(delegate); + delegate.close(); + } + } + } +} diff --git a/backend/src/main/java/com/bakdata/conquery/io/storage/xodus/stores/KeyIncludingStore.java b/backend/src/main/java/com/bakdata/conquery/io/storage/xodus/stores/KeyIncludingStore.java index 585eb6460d..d89a3c5f87 100644 --- a/backend/src/main/java/com/bakdata/conquery/io/storage/xodus/stores/KeyIncludingStore.java +++ b/backend/src/main/java/com/bakdata/conquery/io/storage/xodus/stores/KeyIncludingStore.java @@ -6,7 +6,10 @@ import com.bakdata.conquery.io.storage.Store; -public abstract class KeyIncludingStore implements Closeable { +import com.bakdata.conquery.io.storage.ManagedStore; +import com.bakdata.conquery.io.storage.Store; + +public abstract class KeyIncludingStore implements Closeable, ManagedStore { protected final Store store; @@ -40,7 +43,7 @@ public void remove(KEY key) { } public void loadData() { - store.fillCache(); + store.loadData(); for(VALUE value : getAll()) { added(value); } @@ -70,7 +73,7 @@ public void clear() { } public void removeStore() { - store.deleteStore(); + store.removeStore(); } @Override diff --git a/backend/src/main/java/com/bakdata/conquery/io/storage/xodus/stores/SerializingStore.java b/backend/src/main/java/com/bakdata/conquery/io/storage/xodus/stores/SerializingStore.java index bffb1a5426..55f4d63208 100644 --- a/backend/src/main/java/com/bakdata/conquery/io/storage/xodus/stores/SerializingStore.java +++ b/backend/src/main/java/com/bakdata/conquery/io/storage/xodus/stores/SerializingStore.java @@ -7,6 +7,7 @@ import java.io.InputStream; import java.io.OutputStream; import java.io.PrintStream; +import java.nio.file.Files; import java.time.LocalDateTime; import java.time.format.DateTimeFormatter; import java.util.Collection; @@ -276,7 +277,7 @@ private VALUE readValue(ByteIterable value) { } /** - * Dumps the content of an unreadable value to a file as a json (it tries to parse it as an object and than tries to dump it as a json). + * Dumps the content of an unreadable value to a file as a json (it tries to parse it as an object and then tries to dump it as a json). * * @param gzippedObj The object to dump. * @param keyOfDump The key under which the unreadable value is accessible. It is used for the file name. @@ -294,9 +295,17 @@ private static void dumpToFile(byte[] gzippedObj, @NonNull String keyOfDump, Exc return; } - if (!dumpfile.getParentFile().exists() && !dumpfile.getParentFile().mkdirs()) { - //TODO this seems to occur sometimes, is it maybe just a race condition? - throw new IllegalStateException("Could not create `%s`.".formatted(dumpfile.getParentFile())); + try { + // This will create all necessary parent directories. + Files.createDirectories(dumpfile.toPath().getParent()); + + // Should be a redundant check, due to the above reasoning + if (!dumpfile.getParentFile().exists()) { + throw new IllegalStateException("Could not create `%s`.".formatted(dumpfile.getParentFile())); + } + } + catch (IOException e) { + log.warn("Could not create `{}`", dumpfile.getParentFile(), e); } // Write json @@ -351,7 +360,7 @@ private static byte[] debugUnGzip(byte[] bytes) throws IOException { /** * Iterates a given consumer over the entries of this store. - * Depending on the {@link XodusStoreFactory} corrupt entries may be dump to a file and/or removed from the store. + * Depending on the {@link XodusStoreFactory} corrupt entries may be dumped to a file and/or removed from the store. * These entries are not submitted to the consumer. * * @implNote This method is concurrent! @@ -420,7 +429,8 @@ private ByteIterable handle(StoreEntryConsumer consumer, IterationSt result.incrTotalProcessed(); // Try to read the key first - key = getDeserializedAndDumpFailed(keyRaw, SerializingStore.this::readKey, () -> new String(keyRaw.getBytesUnsafe()), valueRaw, "Could not parse key [{}]"); + key = + getDeserializedAndDumpFailed(keyRaw, SerializingStore.this::readKey, () -> new String(keyRaw.getBytesUnsafe()), valueRaw, "Could not parse key [{}]"); if (key == null) { result.incrFailedKeys(); return keyRaw; @@ -433,7 +443,8 @@ private ByteIterable handle(StoreEntryConsumer consumer, IterationSt result.incrFailedValues(); return keyRaw; } - }catch(Exception e){ + } + catch (Exception e) { log.error("Failed processing key/value", e); return keyRaw; } @@ -496,7 +507,7 @@ public void update(KEY key, VALUE value) { } @Override - public void fillCache() { + public void loadData() { } @Override @@ -520,7 +531,7 @@ public void clear() { } @Override - public void deleteStore() { + public void removeStore() { store.deleteStore(); } diff --git a/backend/src/main/java/com/bakdata/conquery/io/storage/xodus/stores/WeakCachedStore.java b/backend/src/main/java/com/bakdata/conquery/io/storage/xodus/stores/WeakCachedStore.java index 590b81a651..6a66f56d93 100644 --- a/backend/src/main/java/com/bakdata/conquery/io/storage/xodus/stores/WeakCachedStore.java +++ b/backend/src/main/java/com/bakdata/conquery/io/storage/xodus/stores/WeakCachedStore.java @@ -89,7 +89,7 @@ public int count() { } @Override - public void fillCache() {} + public void loadData() {} @Override public Collection getAll() { @@ -114,9 +114,9 @@ public void clear() { } @Override - public void deleteStore() { + public void removeStore() { cache.invalidateAll(); - store.deleteStore(); + store.removeStore(); } @Override diff --git a/backend/src/main/java/com/bakdata/conquery/mode/ManagerProvider.java b/backend/src/main/java/com/bakdata/conquery/mode/ManagerProvider.java index c82eab5779..5d12c4885f 100644 --- a/backend/src/main/java/com/bakdata/conquery/mode/ManagerProvider.java +++ b/backend/src/main/java/com/bakdata/conquery/mode/ManagerProvider.java @@ -3,8 +3,6 @@ import javax.validation.Validator; import com.bakdata.conquery.io.storage.MetaStorage; -import com.bakdata.conquery.mode.cluster.ClusterStorageHandler; -import com.bakdata.conquery.mode.local.SqlStorageHandler; import com.bakdata.conquery.models.config.ConqueryConfig; import com.bakdata.conquery.models.index.IndexService; import com.bakdata.conquery.models.jobs.JobManager; @@ -12,7 +10,6 @@ import com.bakdata.conquery.models.worker.DistributedNamespace; import com.bakdata.conquery.models.worker.LocalNamespace; import com.bakdata.conquery.models.worker.Namespace; -import com.bakdata.conquery.sql.execution.SqlExecutionService; import io.dropwizard.setup.Environment; /** @@ -37,24 +34,20 @@ static DatasetRegistry createDistributedDatasetRegistry( ConqueryConfig config, InternalObjectMapperCreator creator ) { - ClusterStorageHandler storageHandler = new ClusterStorageHandler(); - return createDatasetRegistry(namespaceHandler, creator, storageHandler, config); + return createDatasetRegistry(namespaceHandler, creator, config); } static DatasetRegistry createLocalDatasetRegistry( NamespaceHandler namespaceHandler, ConqueryConfig config, - InternalObjectMapperCreator creator, - SqlExecutionService sqlExecutionService + InternalObjectMapperCreator creator ) { - SqlStorageHandler storageHandler = new SqlStorageHandler(sqlExecutionService); - return createDatasetRegistry(namespaceHandler, creator, storageHandler, config); + return createDatasetRegistry(namespaceHandler, creator, config); } private static DatasetRegistry createDatasetRegistry( NamespaceHandler namespaceHandler, InternalObjectMapperCreator creator, - StorageHandler storageHandler, ConqueryConfig config ) { final IndexService indexService = new IndexService(config.getCsv().createCsvParserSettings(), config.getIndex().getEmptyLabel()); @@ -63,8 +56,7 @@ private static DatasetRegistry createDatasetRegistry( config, creator, namespaceHandler, - indexService, - storageHandler + indexService ); MetaStorage storage = new MetaStorage(config.getStorage(), datasetRegistry); datasetRegistry.setMetaStorage(storage); diff --git a/backend/src/main/java/com/bakdata/conquery/mode/NamespaceHandler.java b/backend/src/main/java/com/bakdata/conquery/mode/NamespaceHandler.java index 2d5740e4d1..8f3e4eb1e4 100644 --- a/backend/src/main/java/com/bakdata/conquery/mode/NamespaceHandler.java +++ b/backend/src/main/java/com/bakdata/conquery/mode/NamespaceHandler.java @@ -46,7 +46,7 @@ static NamespaceSetupData createNamespaceSetup(NamespaceStorage storage, final C JobManager jobManager = new JobManager(storage.getDataset().getName(), config.isFailOnError()); - FilterSearch filterSearch = new FilterSearch(storage, jobManager, config.getCsv(), config.getIndex()); + FilterSearch filterSearch = new FilterSearch(config.getIndex()); return new NamespaceSetupData(injectables, indexService, communicationMapper, preprocessMapper, jobManager, filterSearch); } diff --git a/backend/src/main/java/com/bakdata/conquery/mode/StorageListener.java b/backend/src/main/java/com/bakdata/conquery/mode/StorageListener.java index ecb7a982c3..fe831a4733 100644 --- a/backend/src/main/java/com/bakdata/conquery/mode/StorageListener.java +++ b/backend/src/main/java/com/bakdata/conquery/mode/StorageListener.java @@ -1,6 +1,5 @@ package com.bakdata.conquery.mode; -import com.bakdata.conquery.models.datasets.Dataset; import com.bakdata.conquery.models.datasets.SecondaryIdDescription; import com.bakdata.conquery.models.datasets.Table; import com.bakdata.conquery.models.datasets.concepts.Concept; @@ -22,6 +21,4 @@ public interface StorageListener { void onDeleteConcept(Concept concept); - void onUpdateMatchingStats(final Dataset dataset); - } diff --git a/backend/src/main/java/com/bakdata/conquery/mode/cluster/ClusterImportHandler.java b/backend/src/main/java/com/bakdata/conquery/mode/cluster/ClusterImportHandler.java index d2e30ab3e4..91f07d5d72 100644 --- a/backend/src/main/java/com/bakdata/conquery/mode/cluster/ClusterImportHandler.java +++ b/backend/src/main/java/com/bakdata/conquery/mode/cluster/ClusterImportHandler.java @@ -9,9 +9,7 @@ import com.bakdata.conquery.models.datasets.Table; import com.bakdata.conquery.models.datasets.concepts.Concept; import com.bakdata.conquery.models.datasets.concepts.Connector; -import com.bakdata.conquery.models.identifiable.IdMutex; import com.bakdata.conquery.models.identifiable.ids.specific.DatasetId; -import com.bakdata.conquery.models.identifiable.ids.specific.DictionaryId; import com.bakdata.conquery.models.jobs.ImportJob; import com.bakdata.conquery.models.messages.namespaces.specific.RemoveImportJob; import com.bakdata.conquery.models.worker.DatasetRegistry; @@ -27,7 +25,6 @@ public class ClusterImportHandler implements ImportHandler { - private final IdMutex sharedDictionaryLocks = new IdMutex<>(); private final ConqueryConfig config; private final DatasetRegistry datasetRegistry; @@ -38,7 +35,6 @@ public void updateImport(Namespace namespace, InputStream inputStream) { datasetRegistry.get(namespace.getDataset().getId()), inputStream, config.getCluster().getEntityBucketSize(), - sharedDictionaryLocks, config, true ); @@ -48,6 +44,18 @@ public void updateImport(Namespace namespace, InputStream inputStream) { clearDependentConcepts(namespace.getStorage().getAllConcepts(), job.getTable()); } + private void clearDependentConcepts(Collection> allConcepts, Table table) { + for (Concept c : allConcepts) { + for (Connector con : c.getConnectors()) { + if (!con.getTable().equals(table)) { + continue; + } + + con.getConcept().clearMatchingStats(); + } + } + } + @SneakyThrows @Override public void addImport(Namespace namespace, InputStream inputStream) { @@ -55,7 +63,6 @@ public void addImport(Namespace namespace, InputStream inputStream) { datasetRegistry.get(namespace.getDataset().getId()), inputStream, config.getCluster().getEntityBucketSize(), - sharedDictionaryLocks, config, false ); @@ -78,16 +85,4 @@ public void deleteImport(Import imp) { // Remove bucket assignments for consistency report namespace.getWorkerHandler().removeBucketAssignmentsForImportFormWorkers(imp); } - - private void clearDependentConcepts(Collection> allConcepts, Table table) { - for (Concept c : allConcepts) { - for (Connector con : c.getConnectors()) { - if (!con.getTable().equals(table)) { - continue; - } - - con.getConcept().clearMatchingStats(); - } - } - } } diff --git a/backend/src/main/java/com/bakdata/conquery/mode/cluster/ClusterStorageHandler.java b/backend/src/main/java/com/bakdata/conquery/mode/cluster/ClusterStorageHandler.java deleted file mode 100644 index d2579a7af4..0000000000 --- a/backend/src/main/java/com/bakdata/conquery/mode/cluster/ClusterStorageHandler.java +++ /dev/null @@ -1,22 +0,0 @@ -package com.bakdata.conquery.mode.cluster; - -import java.util.stream.Stream; - -import com.bakdata.conquery.io.storage.NamespaceStorage; -import com.bakdata.conquery.mode.StorageHandler; -import com.bakdata.conquery.models.datasets.Column; -import com.bakdata.conquery.models.datasets.ImportColumn; -import com.bakdata.conquery.models.events.stores.root.StringStore; - -public class ClusterStorageHandler implements StorageHandler { - - @Override - public Stream lookupColumnValues(NamespaceStorage namespaceStorage, Column column) { - return namespaceStorage.getAllImports().stream() - .filter(imp -> imp.getTable().equals(column.getTable())) - .flatMap(imp -> { - final ImportColumn importColumn = imp.getColumns()[column.getPosition()]; - return ((StringStore) importColumn.getTypeDescription()).iterateValues(); - }); - } -} diff --git a/backend/src/main/java/com/bakdata/conquery/mode/cluster/ClusterStorageListener.java b/backend/src/main/java/com/bakdata/conquery/mode/cluster/ClusterStorageListener.java index f16aaaa1b8..9d8360a383 100644 --- a/backend/src/main/java/com/bakdata/conquery/mode/cluster/ClusterStorageListener.java +++ b/backend/src/main/java/com/bakdata/conquery/mode/cluster/ClusterStorageListener.java @@ -1,10 +1,6 @@ package com.bakdata.conquery.mode.cluster; -import java.util.Collection; -import java.util.stream.Collectors; - import com.bakdata.conquery.mode.StorageListener; -import com.bakdata.conquery.models.datasets.Dataset; import com.bakdata.conquery.models.datasets.SecondaryIdDescription; import com.bakdata.conquery.models.datasets.Table; import com.bakdata.conquery.models.datasets.concepts.Concept; @@ -14,12 +10,10 @@ import com.bakdata.conquery.models.messages.namespaces.specific.RemoveSecondaryId; import com.bakdata.conquery.models.messages.namespaces.specific.RemoveTable; import com.bakdata.conquery.models.messages.namespaces.specific.UpdateConcept; -import com.bakdata.conquery.models.messages.namespaces.specific.UpdateMatchingStatsMessage; import com.bakdata.conquery.models.messages.namespaces.specific.UpdateSecondaryId; import com.bakdata.conquery.models.messages.namespaces.specific.UpdateTable; import com.bakdata.conquery.models.worker.DatasetRegistry; import com.bakdata.conquery.models.worker.DistributedNamespace; -import com.bakdata.conquery.models.worker.Namespace; import com.bakdata.conquery.models.worker.WorkerHandler; import lombok.AllArgsConstructor; @@ -66,14 +60,4 @@ public void onDeleteConcept(Concept concept) { SimpleJob simpleJob = new SimpleJob("sendToAll: remove " + concept.getId(), () -> handler.sendToAll(new RemoveConcept(concept))); jobManager.addSlowJob(simpleJob); } - - @Override - public void onUpdateMatchingStats(final Dataset dataset) { - final Namespace namespace = datasetRegistry.get(dataset.getId()); - final Collection> concepts = namespace.getStorage().getAllConcepts() - .stream() - .filter(concept -> concept.getMatchingStats() == null) - .collect(Collectors.toSet()); - datasetRegistry.get(dataset.getId()).getWorkerHandler().sendToAll(new UpdateMatchingStatsMessage(concepts)); - } } diff --git a/backend/src/main/java/com/bakdata/conquery/mode/local/LocalManagerProvider.java b/backend/src/main/java/com/bakdata/conquery/mode/local/LocalManagerProvider.java index 6214b22374..34d11615e0 100644 --- a/backend/src/main/java/com/bakdata/conquery/mode/local/LocalManagerProvider.java +++ b/backend/src/main/java/com/bakdata/conquery/mode/local/LocalManagerProvider.java @@ -43,7 +43,7 @@ public DelegateManager provideManager(ConqueryConfig config, Env ); NamespaceHandler namespaceHandler = new LocalNamespaceHandler(config, creator, sqlContext, sqlExecutionService); - DatasetRegistry datasetRegistry = ManagerProvider.createLocalDatasetRegistry(namespaceHandler, config, creator, sqlExecutionService); + DatasetRegistry datasetRegistry = ManagerProvider.createLocalDatasetRegistry(namespaceHandler, config, creator); creator.init(datasetRegistry); return new DelegateManager<>( diff --git a/backend/src/main/java/com/bakdata/conquery/mode/local/LocalStorageListener.java b/backend/src/main/java/com/bakdata/conquery/mode/local/LocalStorageListener.java index 20dcd9e25b..8d53dde819 100644 --- a/backend/src/main/java/com/bakdata/conquery/mode/local/LocalStorageListener.java +++ b/backend/src/main/java/com/bakdata/conquery/mode/local/LocalStorageListener.java @@ -1,7 +1,6 @@ package com.bakdata.conquery.mode.local; import com.bakdata.conquery.mode.StorageListener; -import com.bakdata.conquery.models.datasets.Dataset; import com.bakdata.conquery.models.datasets.SecondaryIdDescription; import com.bakdata.conquery.models.datasets.Table; import com.bakdata.conquery.models.datasets.concepts.Concept; @@ -33,8 +32,4 @@ public void onAddConcept(Concept concept) { @Override public void onDeleteConcept(Concept concept) { } - - @Override - public void onUpdateMatchingStats(Dataset dataset) { - } } diff --git a/backend/src/main/java/com/bakdata/conquery/models/auth/AuthorizationController.java b/backend/src/main/java/com/bakdata/conquery/models/auth/AuthorizationController.java index e95307e39a..2f2615f462 100644 --- a/backend/src/main/java/com/bakdata/conquery/models/auth/AuthorizationController.java +++ b/backend/src/main/java/com/bakdata/conquery/models/auth/AuthorizationController.java @@ -7,8 +7,8 @@ import java.util.UUID; import java.util.stream.Collectors; +import com.bakdata.conquery.apiv1.auth.ProtoRole; import com.bakdata.conquery.apiv1.auth.ProtoUser; -import com.bakdata.conquery.commands.ManagerNode; import com.bakdata.conquery.io.storage.MetaStorage; import com.bakdata.conquery.models.auth.basic.JWTokenHandler; import com.bakdata.conquery.models.auth.conquerytoken.ConqueryTokenRealm; @@ -21,7 +21,11 @@ import com.bakdata.conquery.models.config.auth.AuthenticationRealmFactory; import com.bakdata.conquery.models.config.auth.AuthorizationConfig; import com.bakdata.conquery.models.identifiable.ids.specific.UserId; +import com.bakdata.conquery.resources.admin.AdminServlet; +import com.bakdata.conquery.resources.unprotected.AuthServlet; +import io.dropwizard.jersey.DropwizardResourceConfig; import io.dropwizard.lifecycle.Managed; +import io.dropwizard.setup.Environment; import lombok.Getter; import lombok.NonNull; import lombok.RequiredArgsConstructor; @@ -49,10 +53,15 @@ public final class AuthorizationController implements Managed{ @NonNull - private final AuthorizationConfig authorizationConfig; + private final ConqueryConfig config; @NonNull + private final Environment environment; + @NonNull + @Getter private final MetaStorage storage; @Getter + private final AdminServlet adminServlet; + @Getter private final ConqueryTokenRealm conqueryTokenRealm; @Getter private final List authenticationRealms = new ArrayList<>(); @@ -65,14 +74,33 @@ public final class AuthorizationController implements Managed{ private final DefaultSecurityManager securityManager; - public AuthorizationController(MetaStorage storage, AuthorizationConfig authorizationConfig) { + + // Resources without authentication + @Getter + private DropwizardResourceConfig unprotectedAuthApi; + @Getter + private DropwizardResourceConfig unprotectedAuthAdmin; + + public AuthorizationController(MetaStorage storage, ConqueryConfig config, Environment environment, AdminServlet adminServlet) { this.storage = storage; - this.authorizationConfig = authorizationConfig; + this.config = config; + this.environment = environment; + this.adminServlet = adminServlet; + // Create Jersey filter for authentication. The filter is registered here for the api and the but can be used by // any servlet. In the following configured realms can register TokenExtractors in the filter. authenticationFilter = DefaultAuthFilter.asDropwizardFeature(storage); redirectingAuthFilter = new RedirectingAuthFilter(authenticationFilter); + if (adminServlet != null) { + adminServlet.getJerseyConfig().register(authenticationFilter); + adminServlet.getJerseyConfigUI().register(redirectingAuthFilter); + } + + + unprotectedAuthAdmin = AuthServlet.generalSetup(environment.metrics(), config, environment.admin(), environment.getObjectMapper()); + unprotectedAuthApi = AuthServlet.generalSetup(environment.metrics(), config, environment.servlets(), environment.getObjectMapper()); + // Add the user token realm conqueryTokenRealm = new ConqueryTokenRealm(storage); @@ -90,18 +118,14 @@ public AuthorizationController(MetaStorage storage, AuthorizationConfig authoriz registerStaticSecurityManager(); - // Register initial users for authorization and authentication (if the realm is able to) - initializeAuthConstellation(authorizationConfig, realms, storage); } - - public void externalInit(ManagerNode manager, List authenticationRealmFactories) { - manager.getAdmin().getJerseyConfig().register(authenticationFilter); - manager.getEnvironment().jersey().register(authenticationFilter); + + private void externalInit() { // Init authentication realms provided by the config. - for (AuthenticationRealmFactory authenticationConf : authenticationRealmFactories) { - ConqueryAuthenticationRealm realm = authenticationConf.createRealm(manager); + for (AuthenticationRealmFactory authenticationConf : config.getAuthenticationRealms()) { + ConqueryAuthenticationRealm realm = authenticationConf.createRealm(environment, config, this); authenticationRealms.add(realm); realms.add(realm); } @@ -115,6 +139,11 @@ public void externalInit(ManagerNode manager, List a public void start() throws Exception { // Call Shiros init on all realms LifecycleUtils.init(realms); + + externalInit(); + + // Register initial users for authorization and authentication (if the realm is able to) + initializeAuthConstellation(config.getAuthorizationRealms(), realms, storage); } @Override @@ -141,8 +170,14 @@ public void registerStaticSecurityManager() { * A storage, where the handler might add a new users. */ private static void initializeAuthConstellation(@NonNull AuthorizationConfig config, @NonNull List realms, @NonNull MetaStorage storage) { + for (ProtoRole pRole : config.getInitialRoles()) { + pRole.createOrOverwriteRole(storage); + } + for (ProtoUser pUser : config.getInitialUsers()) { + final User user = pUser.createOrOverwriteUser(storage); + for (Realm realm : realms) { if (realm instanceof UserManageable) { AuthorizationHelper.registerForAuthentication((UserManageable) realm, user, pUser.getCredential(), true); diff --git a/backend/src/main/java/com/bakdata/conquery/models/auth/ConqueryAuthenticationInfo.java b/backend/src/main/java/com/bakdata/conquery/models/auth/ConqueryAuthenticationInfo.java index 473ad19f9f..bb97cec09c 100644 --- a/backend/src/main/java/com/bakdata/conquery/models/auth/ConqueryAuthenticationInfo.java +++ b/backend/src/main/java/com/bakdata/conquery/models/auth/ConqueryAuthenticationInfo.java @@ -1,5 +1,9 @@ package com.bakdata.conquery.models.auth; +import java.net.URI; + +import javax.annotation.Nullable; + import com.bakdata.conquery.models.auth.entities.Subject; import com.bakdata.conquery.models.auth.util.SubjectPrincipalCollection; import com.bakdata.conquery.models.identifiable.ids.specific.UserId; @@ -29,12 +33,25 @@ public class ConqueryAuthenticationInfo implements AuthenticationInfo { /** * A realm can indicate whether a logout button is shown for the user or not */ - private final boolean displayLogout; + private final boolean displayLogout; + + /** + * An uri a user can be redirected to perform an external logout. + */ + @Nullable + private final URI frontChannelLogout; - public ConqueryAuthenticationInfo(Subject subject, Object credentials, ConqueryAuthenticationRealm realm, boolean displayLogout) { + public ConqueryAuthenticationInfo( + Subject subject, + Object credentials, + ConqueryAuthenticationRealm realm, + boolean displayLogout, + @Nullable URI frontChannelLogout + ) { this.credentials = credentials; this.displayLogout = displayLogout; principals = new SubjectPrincipalCollection(subject, realm); + this.frontChannelLogout = frontChannelLogout; } } diff --git a/backend/src/main/java/com/bakdata/conquery/models/auth/conquerytoken/ConqueryTokenRealm.java b/backend/src/main/java/com/bakdata/conquery/models/auth/conquerytoken/ConqueryTokenRealm.java index 10d7a56314..d808260d75 100644 --- a/backend/src/main/java/com/bakdata/conquery/models/auth/conquerytoken/ConqueryTokenRealm.java +++ b/backend/src/main/java/com/bakdata/conquery/models/auth/conquerytoken/ConqueryTokenRealm.java @@ -81,7 +81,7 @@ public ConqueryAuthenticationInfo doGetAuthenticationInfo(AuthenticationToken to final User user = getUserOrThrowUnknownAccount(storage, userId); - return new ConqueryAuthenticationInfo(user, token, this, true); + return new ConqueryAuthenticationInfo(user, token, this, true, null); } diff --git a/backend/src/main/java/com/bakdata/conquery/models/auth/develop/DefaultInitialUserRealm.java b/backend/src/main/java/com/bakdata/conquery/models/auth/develop/DefaultInitialUserRealm.java index 3af505f74e..aa5076a915 100644 --- a/backend/src/main/java/com/bakdata/conquery/models/auth/develop/DefaultInitialUserRealm.java +++ b/backend/src/main/java/com/bakdata/conquery/models/auth/develop/DefaultInitialUserRealm.java @@ -1,11 +1,11 @@ package com.bakdata.conquery.models.auth.develop; import com.bakdata.conquery.io.storage.MetaStorage; -import com.bakdata.conquery.models.auth.entities.User; -import com.bakdata.conquery.models.config.auth.AuthorizationConfig; import com.bakdata.conquery.models.auth.ConqueryAuthenticationInfo; import com.bakdata.conquery.models.auth.ConqueryAuthenticationRealm; +import com.bakdata.conquery.models.auth.entities.User; import com.bakdata.conquery.models.auth.util.SkippingCredentialsMatcher; +import com.bakdata.conquery.models.config.auth.AuthorizationConfig; import com.bakdata.conquery.models.identifiable.ids.specific.UserId; import lombok.extern.slf4j.Slf4j; import org.apache.shiro.authc.AuthenticationException; @@ -60,6 +60,6 @@ public ConqueryAuthenticationInfo doGetAuthenticationInfo(AuthenticationToken to } DevelopmentToken devToken = (DevelopmentToken) token; final User user = getUserOrThrowUnknownAccount(storage, devToken.getPrincipal()); - return new ConqueryAuthenticationInfo(user, devToken.getCredentials(), this, true); + return new ConqueryAuthenticationInfo(user, devToken.getCredentials(), this, true, null); } } diff --git a/backend/src/main/java/com/bakdata/conquery/models/auth/develop/DevAuthConfig.java b/backend/src/main/java/com/bakdata/conquery/models/auth/develop/DevAuthConfig.java index f6c41a1690..75c1c7bfe6 100644 --- a/backend/src/main/java/com/bakdata/conquery/models/auth/develop/DevAuthConfig.java +++ b/backend/src/main/java/com/bakdata/conquery/models/auth/develop/DevAuthConfig.java @@ -1,11 +1,12 @@ package com.bakdata.conquery.models.auth.develop; -import com.bakdata.conquery.commands.ManagerNode; import com.bakdata.conquery.io.cps.CPSType; +import com.bakdata.conquery.models.auth.AuthorizationController; import com.bakdata.conquery.models.auth.ConqueryAuthenticationRealm; import com.bakdata.conquery.models.auth.entities.User; +import com.bakdata.conquery.models.config.ConqueryConfig; import com.bakdata.conquery.models.config.auth.AuthenticationRealmFactory; -import com.bakdata.conquery.models.config.auth.AuthenticationRealmFactory; +import io.dropwizard.setup.Environment; /** * Default configuration for the auth system. Sets up all other default components. @@ -15,15 +16,15 @@ public class DevAuthConfig implements AuthenticationRealmFactory { @Override - public ConqueryAuthenticationRealm createRealm(ManagerNode managerNode) { - User defaultUser = managerNode.getConfig() + public ConqueryAuthenticationRealm createRealm(Environment environment, ConqueryConfig config, AuthorizationController authorizationController) { + User defaultUser = config .getAuthorizationRealms() .getInitialUsers() .get(0) - .createOrOverwriteUser(managerNode.getStorage()); + .createOrOverwriteUser(authorizationController.getStorage()); - managerNode.getAuthController().getAuthenticationFilter().registerTokenExtractor(new UserIdTokenExtractor(defaultUser)); + authorizationController.getAuthenticationFilter().registerTokenExtractor(new UserIdTokenExtractor(defaultUser)); - return new DefaultInitialUserRealm(managerNode.getStorage()); + return new DefaultInitialUserRealm(authorizationController.getStorage()); } } diff --git a/backend/src/main/java/com/bakdata/conquery/models/auth/entities/FilteredUser.java b/backend/src/main/java/com/bakdata/conquery/models/auth/entities/FilteredUser.java index 84bde3681d..35ba82cab8 100644 --- a/backend/src/main/java/com/bakdata/conquery/models/auth/entities/FilteredUser.java +++ b/backend/src/main/java/com/bakdata/conquery/models/auth/entities/FilteredUser.java @@ -4,7 +4,6 @@ import java.util.List; import java.util.concurrent.Callable; -import com.bakdata.conquery.models.identifiable.ids.specific.PermissionOwnerId; import com.fasterxml.jackson.annotation.JsonIgnoreProperties; import org.apache.shiro.authc.AuthenticationException; import org.apache.shiro.authc.AuthenticationToken; diff --git a/backend/src/main/java/com/bakdata/conquery/models/auth/entities/Subject.java b/backend/src/main/java/com/bakdata/conquery/models/auth/entities/Subject.java index 9227c0999e..f2e19617aa 100644 --- a/backend/src/main/java/com/bakdata/conquery/models/auth/entities/Subject.java +++ b/backend/src/main/java/com/bakdata/conquery/models/auth/entities/Subject.java @@ -1,17 +1,16 @@ package com.bakdata.conquery.models.auth.entities; +import java.security.Principal; +import java.util.Collection; +import java.util.List; +import java.util.Set; + import com.bakdata.conquery.models.auth.ConqueryAuthenticationInfo; import com.bakdata.conquery.models.auth.permissions.Ability; import com.bakdata.conquery.models.auth.permissions.Authorized; -import com.bakdata.conquery.models.auth.permissions.ConqueryPermission; import com.bakdata.conquery.models.identifiable.ids.specific.UserId; import lombok.NonNull; -import java.security.Principal; -import java.util.Collection; -import java.util.List; -import java.util.Set; - /** * An interface for classes that facade a user or represent a user. * @@ -36,6 +35,8 @@ public interface Subject extends Principal { boolean isDisplayLogout(); + ConqueryAuthenticationInfo getAuthenticationInfo(); + void setAuthenticationInfo(ConqueryAuthenticationInfo info); User getUser(); diff --git a/backend/src/main/java/com/bakdata/conquery/models/auth/entities/User.java b/backend/src/main/java/com/bakdata/conquery/models/auth/entities/User.java index e030532d12..bc3b1fc8d1 100644 --- a/backend/src/main/java/com/bakdata/conquery/models/auth/entities/User.java +++ b/backend/src/main/java/com/bakdata/conquery/models/auth/entities/User.java @@ -154,6 +154,13 @@ public void setAuthenticationInfo(ConqueryAuthenticationInfo info) { shiroUserAdapter.getAuthenticationInfo().set(info); } + + @JsonIgnore + @Override + public ConqueryAuthenticationInfo getAuthenticationInfo() { + return shiroUserAdapter.getAuthenticationInfo().get(); + } + @Override @JsonIgnore public User getUser() { @@ -169,7 +176,7 @@ public class ShiroUserAdapter extends FilteredUser { @Getter private final ThreadLocal authenticationInfo = - ThreadLocal.withInitial(() -> new ConqueryAuthenticationInfo(User.this, null, null, false)); + ThreadLocal.withInitial(() -> new ConqueryAuthenticationInfo(User.this, null, null, false, null)); @Override public void checkPermission(Permission permission) throws AuthorizationException { diff --git a/backend/src/main/java/com/bakdata/conquery/models/auth/oidc/IntrospectionDelegatingRealm.java b/backend/src/main/java/com/bakdata/conquery/models/auth/oidc/IntrospectionDelegatingRealm.java index c18ffc1d7f..4913cf3c6a 100644 --- a/backend/src/main/java/com/bakdata/conquery/models/auth/oidc/IntrospectionDelegatingRealm.java +++ b/backend/src/main/java/com/bakdata/conquery/models/auth/oidc/IntrospectionDelegatingRealm.java @@ -122,7 +122,8 @@ public ConqueryAuthenticationInfo doGetAuthenticationInfo(AuthenticationToken to final User user = storage.getUser(userId); - return new ConqueryAuthenticationInfo(user, token, this, true); + final String logoutEndpoint = authProviderConf.getAuthClient(true).getServerConfiguration().getLogoutEndpoint(); + return new ConqueryAuthenticationInfo(user, token, this, true, URI.create(logoutEndpoint)); } diff --git a/backend/src/main/java/com/bakdata/conquery/models/auth/oidc/JwtPkceVerifyingRealm.java b/backend/src/main/java/com/bakdata/conquery/models/auth/oidc/JwtPkceVerifyingRealm.java index 79bb0cf1af..e42b311976 100644 --- a/backend/src/main/java/com/bakdata/conquery/models/auth/oidc/JwtPkceVerifyingRealm.java +++ b/backend/src/main/java/com/bakdata/conquery/models/auth/oidc/JwtPkceVerifyingRealm.java @@ -1,18 +1,24 @@ package com.bakdata.conquery.models.auth.oidc; -import java.lang.reflect.Array; import java.security.PublicKey; import java.util.List; +import java.util.Map; import java.util.Optional; +import java.util.Set; import java.util.function.Supplier; import com.bakdata.conquery.io.storage.MetaStorage; import com.bakdata.conquery.models.auth.ConqueryAuthenticationInfo; import com.bakdata.conquery.models.auth.ConqueryAuthenticationRealm; +import com.bakdata.conquery.models.auth.entities.Role; import com.bakdata.conquery.models.auth.entities.User; import com.bakdata.conquery.models.auth.util.SkippingCredentialsMatcher; import com.bakdata.conquery.models.config.auth.JwtPkceVerifyingRealmFactory; +import com.bakdata.conquery.models.identifiable.ids.specific.RoleId; import com.bakdata.conquery.models.identifiable.ids.specific.UserId; +import com.google.common.cache.Cache; +import com.google.common.cache.CacheBuilder; +import lombok.Data; import lombok.NonNull; import lombok.extern.slf4j.Slf4j; import org.apache.shiro.authc.AuthenticationException; @@ -33,47 +39,58 @@ * the authenticated user from it. */ @Slf4j +@Data public class JwtPkceVerifyingRealm extends AuthenticatingRealm implements ConqueryAuthenticationRealm { private static final Class TOKEN_CLASS = BearerToken.class; - - Supplier> idpConfigurationSupplier; + //TODO FK/MT: Investigate difference between current allowedAudience impl and supposed audience (without mapper). private final String[] allowedAudience; private final TokenVerifier.Predicate[] tokenChecks; private final List alternativeIdClaims; private final ActiveWithLeewayVerifier activeVerifier; private final MetaStorage storage; - public JwtPkceVerifyingRealm(@NonNull Supplier> idpConfigurationSupplier, - @NonNull String allowedAudience, - List> additionalTokenChecks, - List alternativeIdClaims, - MetaStorage storage, - int tokenLeeway) { + /** + * Used in handleRoleClaims as size-limited set, with LRU characteristics. + * @implNote maximumSize is an arbitrary medium high number to avoid stuffing memory with token hashes, while avoiding reprocessing known access tokens. + */ + private final Cache processedRoleClaims = CacheBuilder.newBuilder() + .maximumSize(1_000) + .build(); + + Supplier> idpConfigurationSupplier; + + + public JwtPkceVerifyingRealm(@NonNull Supplier> idpConfigurationSupplier, @NonNull String allowedAudience, List> additionalTokenChecks, List alternativeIdClaims, MetaStorage storage, int tokenLeeway) { this.storage = storage; this.idpConfigurationSupplier = idpConfigurationSupplier; this.allowedAudience = new String[]{allowedAudience}; - this.tokenChecks = additionalTokenChecks.toArray((TokenVerifier.Predicate[]) Array.newInstance(TokenVerifier.Predicate.class, 0)); this.alternativeIdClaims = alternativeIdClaims; - this.setCredentialsMatcher(SkippingCredentialsMatcher.INSTANCE); - this.setAuthenticationTokenClass(TOKEN_CLASS); - this.activeVerifier = new ActiveWithLeewayVerifier(tokenLeeway); + this.tokenChecks = additionalTokenChecks.toArray(TokenVerifier.Predicate[]::new); + setCredentialsMatcher(SkippingCredentialsMatcher.INSTANCE); + setAuthenticationTokenClass(TOKEN_CLASS); + activeVerifier = new ActiveWithLeewayVerifier(tokenLeeway); } - @Override public ConqueryAuthenticationInfo doGetAuthenticationInfo(AuthenticationToken token) throws AuthenticationException { - Optional idpConfigurationOpt = idpConfigurationSupplier.get(); + + final Optional idpConfigurationOpt = idpConfigurationSupplier.get(); + if (idpConfigurationOpt.isEmpty()) { log.warn("Unable to start authentication, because idp configuration is not available."); return null; } - JwtPkceVerifyingRealmFactory.IdpConfiguration idpConfiguration = idpConfigurationOpt.get(); + + final JwtPkceVerifyingRealmFactory.IdpConfiguration idpConfiguration = idpConfigurationOpt.get(); final BearerToken bearerToken = (BearerToken) token; log.trace("Parsing token ({}) to extract key id from header", bearerToken.getToken()); + final String keyId = JOSEParser.parse(bearerToken.getToken()).getHeader().getKeyId(); + log.trace("Key id of token signer: {}", keyId); + final PublicKey publicKey = idpConfiguration.signingKeys().get(keyId); if (publicKey == null) { @@ -81,13 +98,16 @@ public ConqueryAuthenticationInfo doGetAuthenticationInfo(AuthenticationToken to } log.trace("Creating token verifier"); - TokenVerifier verifier = TokenVerifier.create(bearerToken.getToken(), AccessToken.class) - .withChecks(new TokenVerifier.RealmUrlCheck(idpConfiguration.issuer()), TokenVerifier.SUBJECT_EXISTS_CHECK, activeVerifier) - .withChecks(tokenChecks) - .publicKey(publicKey) - .audience(allowedAudience); + final TokenVerifier + verifier = + TokenVerifier.create(bearerToken.getToken(), AccessToken.class) + .withChecks(new TokenVerifier.RealmUrlCheck(idpConfiguration.issuer()), TokenVerifier.SUBJECT_EXISTS_CHECK, activeVerifier) + .withChecks(tokenChecks) + .publicKey(publicKey) + .audience(allowedAudience); log.trace("Verifying token"); + final AccessToken accessToken; try { verifier.verify(); @@ -97,6 +117,7 @@ public ConqueryAuthenticationInfo doGetAuthenticationInfo(AuthenticationToken to log.trace("Verification failed", e); throw new IncorrectCredentialsException(e); } + final String subject = accessToken.getSubject(); if (subject == null) { @@ -109,27 +130,90 @@ public ConqueryAuthenticationInfo doGetAuthenticationInfo(AuthenticationToken to UserId userId = new UserId(subject); User user = storage.getUser(userId); + if (user != null) { log.trace("Successfully authenticated user {}", userId); - return new ConqueryAuthenticationInfo(user, token, this, true); + handleRoleClaims(accessToken, user); + return new ConqueryAuthenticationInfo(user, token, this, true, idpConfiguration.logoutEndpoint()); } + // Try alternative ids for (String alternativeIdClaim : alternativeIdClaims) { - Object altId = accessToken.getOtherClaims().get(alternativeIdClaim); + + final Object altId = accessToken.getOtherClaims().get(alternativeIdClaim); + if (!(altId instanceof String)) { log.trace("Found no value for alternative id claim {}", alternativeIdClaim); continue; } + userId = new UserId((String) altId); user = storage.getUser(userId); + if (user != null) { log.trace("Successfully mapped subject {} using user id {}", subject, userId); - return new ConqueryAuthenticationInfo(user, token, this, true); + handleRoleClaims(accessToken, user); + return new ConqueryAuthenticationInfo(user, token, this, true, idpConfiguration.logoutEndpoint()); } } throw new UnknownAccountException("The user id was unknown: " + subject); } + private void handleRoleClaims(AccessToken accessToken, User user) { + + if (processedRoleClaims.getIfPresent(accessToken.getId()) != null) { + log.trace("Already handled role claims of {}", accessToken.getId()); + return; + } + + processedRoleClaims.put(accessToken.getId(), accessToken.getId()); + + //TODO handle removal of role claim? (probably not!?) + + final Map resourceAccess = accessToken.getResourceAccess(); + + if (resourceAccess == null) { + log.trace("No resource Access present."); + return; + } + + final AccessToken.Access access = resourceAccess.get(getAllowedAudience()[0]); + + if (access == null) { + log.trace("No resource access found for {}.", getAllowedAudience()[0]); + return; + } + + final Set roleClaims = access.getRoles(); + + if (roleClaims == null) { + log.trace("No role claims found."); + return; + } + + log.trace("Found role claims for {}: {}.", user, roleClaims); + + + for (String roleClaim : roleClaims) { + final RoleId roleId = new RoleId(roleClaim); + + if (user.getRoles().contains(roleId)) { + log.trace("Role {} already registered.", roleId); + continue; + } + + final Role role = storage.getRole(roleId); + + if (role == null) { + continue; + } + + log.trace("Adding {} to {}", role, user); + + user.addRole(role); + } + } + } diff --git a/backend/src/main/java/com/bakdata/conquery/models/auth/web/DefaultAuthFilter.java b/backend/src/main/java/com/bakdata/conquery/models/auth/web/DefaultAuthFilter.java index ed94d8bc7e..171823776c 100644 --- a/backend/src/main/java/com/bakdata/conquery/models/auth/web/DefaultAuthFilter.java +++ b/backend/src/main/java/com/bakdata/conquery/models/auth/web/DefaultAuthFilter.java @@ -1,5 +1,16 @@ package com.bakdata.conquery.models.auth.web; +import java.io.IOException; +import java.util.HashSet; +import java.util.Set; + +import javax.annotation.Priority; +import javax.ws.rs.NotAuthorizedException; +import javax.ws.rs.Priorities; +import javax.ws.rs.container.ContainerRequestContext; +import javax.ws.rs.container.PreMatching; +import javax.ws.rs.core.SecurityContext; + import com.bakdata.conquery.io.storage.MetaStorage; import com.bakdata.conquery.models.auth.AuthorizationController; import com.bakdata.conquery.models.auth.ConqueryAuthenticationRealm; @@ -16,16 +27,6 @@ import org.apache.shiro.authc.AuthenticationException; import org.apache.shiro.authc.AuthenticationToken; -import javax.annotation.Priority; -import javax.ws.rs.NotAuthorizedException; -import javax.ws.rs.Priorities; -import javax.ws.rs.container.ContainerRequestContext; -import javax.ws.rs.container.PreMatching; -import javax.ws.rs.core.SecurityContext; -import java.io.IOException; -import java.util.HashSet; -import java.util.Set; - /** * This filter hooks into dropwizard's request handling to extract and process * security relevant information for protected resources. The request is first @@ -71,7 +72,7 @@ public void filter(final ContainerRequestContext requestContext) throws IOExcept continue; } // Success an extracted token could be authenticated - log.trace("Authentication was successfull for token type {}", token.getClass().getName()); + log.trace("Authentication was successful for token type {}", token.getClass().getName()); return; } catch (AuthenticationException e) { // This is the shiro way to indicate that authentication failed diff --git a/backend/src/main/java/com/bakdata/conquery/models/auth/web/RedirectingAuthFilter.java b/backend/src/main/java/com/bakdata/conquery/models/auth/web/RedirectingAuthFilter.java index 4fe56294ee..1922623de8 100644 --- a/backend/src/main/java/com/bakdata/conquery/models/auth/web/RedirectingAuthFilter.java +++ b/backend/src/main/java/com/bakdata/conquery/models/auth/web/RedirectingAuthFilter.java @@ -1,5 +1,21 @@ package com.bakdata.conquery.models.auth.web; +import java.io.IOException; +import java.net.URI; +import java.util.ArrayList; +import java.util.List; +import java.util.function.Function; + +import javax.annotation.Priority; +import javax.ws.rs.BadRequestException; +import javax.ws.rs.NotAuthorizedException; +import javax.ws.rs.Priorities; +import javax.ws.rs.RedirectionException; +import javax.ws.rs.ServiceUnavailableException; +import javax.ws.rs.WebApplicationException; +import javax.ws.rs.container.ContainerRequestContext; +import javax.ws.rs.core.Response; + import com.bakdata.conquery.models.auth.entities.User; import com.bakdata.conquery.resources.admin.ui.model.UIView; import io.dropwizard.auth.AuthFilter; @@ -8,16 +24,6 @@ import lombok.extern.slf4j.Slf4j; import org.apache.shiro.authc.AuthenticationToken; -import javax.annotation.Priority; -import javax.ws.rs.*; -import javax.ws.rs.container.ContainerRequestContext; -import javax.ws.rs.core.Response; -import java.io.IOException; -import java.net.URI; -import java.util.ArrayList; -import java.util.List; -import java.util.function.Function; - /** * The {@link RedirectingAuthFilter} first delegates a request to the actual authentication filter. * If that filter is unable to map a user to the request, this filter checks if this request is in @@ -34,14 +40,16 @@ @Priority(Priorities.AUTHENTICATION) public class RedirectingAuthFilter extends AuthFilter { + public static final String REDIRECT_URI = "redirect_uri"; + /** * The Filter that checks if a request was authenticated */ private final DefaultAuthFilter delegate; /** - * Request processors that check if an request belongs to its multi-step authentication schema. - * E.g. the request contains an authorization code, then this checker tries to redeemed the code for an access token. + * Request processors that check if a request belongs to its multi-step authentication schema. + * E.g. the request contains an authorization code, then this checker tries to redeem the code for an access token. * If that succeeds, it produces a response that sets a cookie with the required authentication data for that schema. * * If the request does not fit the schema, the processor returns null. @@ -82,8 +90,6 @@ else if (authenticatedRedirects.size() > 1) { // The request was not authenticated, nor was it a step towards an authentication, so we redirect the user to a login. log.info("Redirecting unauthenticated user to login schema"); - - List loginRedirects = new ArrayList<>(); for ( Function loginInitiator : loginInitiators) { URI uri = loginInitiator.apply(request); @@ -97,8 +103,14 @@ else if (authenticatedRedirects.size() > 1) { throw new ServiceUnavailableException("No login schema configured"); } - // Give the user a choice to choose between them. (If there is only one schema, still redirect the user there) - // to prevent too many redirects if there was a problem wit the authentication + // shortcut when only one login provider is configured + if (loginRedirects.size() == 1) { + final URI loginUri = loginRedirects.get(0); + log.trace("One login redirect configured. Short cutting to: {}", loginUri); + throw new WebApplicationException(Response.seeOther(loginUri).build()); + } + + // Give the user a choice to choose between them. throw new WebApplicationException(Response.ok(new UIView<>("logins.html.ftl", null, loginRedirects)).build()); } } diff --git a/backend/src/main/java/com/bakdata/conquery/models/common/YearUtils.java b/backend/src/main/java/com/bakdata/conquery/models/common/YearUtils.java index 0569751ede..3b7105fe57 100644 --- a/backend/src/main/java/com/bakdata/conquery/models/common/YearUtils.java +++ b/backend/src/main/java/com/bakdata/conquery/models/common/YearUtils.java @@ -3,7 +3,6 @@ import java.time.LocalDate; import com.bakdata.conquery.models.common.daterange.CDateRange; - import lombok.experimental.UtilityClass; /** diff --git a/backend/src/main/java/com/bakdata/conquery/models/common/daterange/CDateRange.java b/backend/src/main/java/com/bakdata/conquery/models/common/daterange/CDateRange.java index 29c5c7e406..11bf063796 100644 --- a/backend/src/main/java/com/bakdata/conquery/models/common/daterange/CDateRange.java +++ b/backend/src/main/java/com/bakdata/conquery/models/common/daterange/CDateRange.java @@ -4,7 +4,6 @@ import java.time.temporal.ChronoUnit; import java.time.temporal.IsoFields; import java.util.ArrayList; -import java.util.Arrays; import java.util.Collections; import java.util.List; import java.util.stream.Collectors; @@ -18,24 +17,36 @@ import com.fasterxml.jackson.annotation.JsonCreator; import com.fasterxml.jackson.annotation.JsonIgnore; import com.fasterxml.jackson.annotation.JsonValue; -import lombok.EqualsAndHashCode; +import lombok.With; -@EqualsAndHashCode(onlyExplicitlyIncluded = true) -public abstract class CDateRange implements IRange { +@With +public final class CDateRange implements IRange { public static final int NEGATIVE_INFINITY = Integer.MIN_VALUE; public static final int POSITIVE_INFINITY = Integer.MAX_VALUE; + private final int min; + private final int max; + + private CDateRange(int min, int max) { + this.min = min; + this.max = max; + + if (min > max) { + throw new IllegalArgumentException( + String.format("Min(%s) is not less than max(%s)", CDate.toLocalDate(min), CDate.toLocalDate(max))); + } + } /** * Create a Range containing only the supplied date. The value needs to be a valid CDate. * @param value The value this range contains, as {@link CDate}. */ public static CDateRange exactly(int value) { - return new CDateRangeExactly(value); + return of(value, value); } /** - * Creates a new Range containing containing only the supplied date. + * Creates a new Range containing only the supplied date. * @param value the value the resulting range will contain. */ public static CDateRange exactly(LocalDate value) { @@ -53,25 +64,9 @@ public static CDateRange of(Range value) { CDate.ofLocalDate(value.getMax(), POSITIVE_INFINITY) ); } - - public static CDateRange of(int min, int max) { - if(min == NEGATIVE_INFINITY && max == POSITIVE_INFINITY){ - return CDateRange.all(); - } - - if(max == POSITIVE_INFINITY){ - return atLeast(min); - } - if(min == NEGATIVE_INFINITY){ - return atMost(max); - } - - if(min == max){ - return exactly(min); - } - - return new CDateRangeClosed(min, max); + public static CDateRange of(int min, int max) { + return new CDateRange(min, max); } /** @@ -89,7 +84,7 @@ public static CDateRange atLeast(LocalDate value) { * @return */ public static CDateRange atLeast(int value) { - return new CDateRangeStarting(value); + return of(value, POSITIVE_INFINITY); } /** @@ -98,7 +93,7 @@ public static CDateRange atLeast(int value) { * @return */ public static CDateRange atMost(int value) { - return new CDateRangeEnding(value); + return of(NEGATIVE_INFINITY, value); } /** @@ -110,7 +105,6 @@ public static CDateRange atMost(LocalDate value) { return atMost(CDate.ofLocalDate(value)); } - /** * Creates a new range containing all values between {@code min} and {@code max}. * @param min lower bound of the range @@ -130,23 +124,7 @@ public static CDateRange of(LocalDate min, LocalDate max) { * @return */ public static CDateRange all() { - return CDateRangeOpen.INSTANCE; - } - - @Override - public LocalDate getMax() { - return getMaxValue() == POSITIVE_INFINITY ? null : CDate.toLocalDate(getMaxValue()); - } - - @EqualsAndHashCode.Include - public abstract int getMaxValue(); - - @EqualsAndHashCode.Include - public abstract int getMinValue(); - - @Override - public LocalDate getMin() { - return getMinValue() == NEGATIVE_INFINITY ? null : CDate.toLocalDate(getMinValue()); + return of(NEGATIVE_INFINITY, POSITIVE_INFINITY); } @JsonCreator @@ -166,6 +144,36 @@ public static CDateRange fromList(List values) { return of(values.get(0).intValue(), values.get(1).intValue()); } + public boolean contains(int rep) { + return rep >= getMinValue() && rep <= getMaxValue(); + } + + public int getMinValue() { + return min; + } + + public int getMaxValue() { + return max; + } + + @Override + public String toString() { + final String min = hasLowerBound() ? getMin().toString() : "-∞"; + final String max = hasUpperBound() ? getMax().toString() : "∞"; + + return min + "/" + max; + } + + @Override + public LocalDate getMax() { + return getMaxValue() == POSITIVE_INFINITY ? null : CDate.toLocalDate(getMaxValue()); + } + + @Override + public LocalDate getMin() { + return getMinValue() == NEGATIVE_INFINITY ? null : CDate.toLocalDate(getMinValue()); + } + public CDateRange intersection(CDateRange other) { if (!intersects(other)) { throw new IllegalArgumentException("Ranges do not intersect."); @@ -179,7 +187,6 @@ public int[] asArray() { return new int[] { getMinValue(), getMaxValue() }; } - @Override public boolean contains(LocalDate value) { return value != null && contains(CDate.ofLocalDate(value)); @@ -190,8 +197,6 @@ public boolean contains(CDateRange other) { return other != null && contains(other.getMinValue()) && contains(other.getMaxValue()); } - public abstract boolean contains(int rep); - @Override public CDateRange span(CDateRange other) { return of(Math.min(getMinValue(), other.getMinValue()), Math.max(getMaxValue(), other.getMaxValue())); @@ -270,7 +275,6 @@ else if(other.hasLowerBound()){ return of(min, max); } - @Override public boolean isOpen() { return getMinValue() == NEGATIVE_INFINITY || getMaxValue() == POSITIVE_INFINITY; @@ -318,13 +322,12 @@ public boolean intersects(CDateRange other) { } return !( - this.getMinValue() > other.getMaxValue() + getMinValue() > other.getMaxValue() || - this.getMaxValue() < other.getMinValue() + getMaxValue() < other.getMinValue() ); } - public boolean encloses(CDateRange other) { if (other == null) { return false; @@ -358,13 +361,6 @@ public boolean hasLowerBound() { public Range toSimpleRange() { return new Range<>(getMin(), getMax()); } - - /** - * The String representation of a DateRange follows the ISO definition. - * For open ended ranges a positive or negative ∞ is used. - */ - @Override - public abstract String toString(); /** * Returns the years that are part of this date range. @@ -377,28 +373,28 @@ public List getCoveredYears() { return Collections.emptyList(); } - int startYear = this.getMin().getYear(); - int endYear = this.getMax().getYear(); + final int startYear = getMin().getYear(); + final int endYear = getMax().getYear(); if(startYear == endYear) { - return Arrays.asList(this); + return List.of(this); } // Range covers multiple years - List ranges = new ArrayList<>(); - + final List ranges = new ArrayList<>(); + // First year begins with this range - ranges.add(CDateRange.of(this.getMin(), LocalDate.of(startYear, 12, 31))); - + ranges.add(CDateRange.of(getMin(), LocalDate.of(startYear, 12, 31))); + // Years in between if(endYear-startYear > 1) { ranges.addAll(IntStream .rangeClosed(startYear+1, endYear-1) - // Create date range with first days of year and the last day + // Create date range with first days of year and the last day .mapToObj(year -> CDateRange.of(LocalDate.ofYearDay(year, 1), LocalDate.of(year, 12, 31))) .collect(Collectors.toList())); } // Last year end with this range - ranges.add(CDateRange.of(LocalDate.of(endYear, 1, 1), this.getMax())); + ranges.add(CDateRange.of(LocalDate.of(endYear, 1, 1), getMax())); return ranges; } @@ -415,31 +411,31 @@ public List getCoveredQuarters() { } // If dateRange is shorter than a quarter, only add that first quarter. - if(QuarterUtils.getFirstDayOfQuarter(getMin()).isEqual( QuarterUtils.getFirstDayOfQuarter(getMax()))){ + if(QuarterUtils.getFirstDayOfQuarter(getMin()).isEqual(QuarterUtils.getFirstDayOfQuarter(getMax()))){ return List.of(this); } - List ranges = new ArrayList<>(); + final List ranges = new ArrayList<>(); // First quarter begins with this range - CDateRange start = CDateRange.of(getMin(), QuarterUtils.getLastDayOfQuarter(getMin())); - CDateRange end = CDateRange.of(QuarterUtils.getFirstDayOfQuarter(getMax()), getMax()); + final CDateRange start = CDateRange.of(getMin(), QuarterUtils.getLastDayOfQuarter(getMin())); + final CDateRange end = CDateRange.of(QuarterUtils.getFirstDayOfQuarter(getMax()), getMax()); ranges.add(start); - LocalDate nextQuarterDate = this.getMin().plus(1, IsoFields.QUARTER_YEARS); + LocalDate nextQuarterDate = getMin().plus(1, IsoFields.QUARTER_YEARS); while(nextQuarterDate.isBefore(end.getMin())) { ranges.add(QuarterUtils.fromDate(nextQuarterDate)); nextQuarterDate = nextQuarterDate.plus(1, IsoFields.QUARTER_YEARS); } - // Don't add the end if its the same quarter as start + // Don't add the end if it's the same quarter as start if(!start.equals(end)) { - // Last year end with this range + // Last year ends with this range ranges.add(end); } return ranges; } - + /** * Returns the days that are part of this date range as ranges. * @@ -448,12 +444,11 @@ public List getCoveredQuarters() { */ public List getCoveredDays() { if(isOpen()){ - // TODO: 22.04.2020 throw exception? return Collections.emptyList(); } - List ranges = new ArrayList<>(); - for(int i = this.getMinValue(); i <= this.getMaxValue(); i++) { + final List ranges = new ArrayList<>(); + for(int i = getMinValue(); i <= getMaxValue(); i++) { ranges.add(CDateRange.exactly(i)); } return ranges; @@ -465,7 +460,28 @@ public boolean isSingleQuarter() { } - int quarterStart = CDate.ofLocalDate(QuarterUtils.getFirstDayOfQuarter(getMinValue())); + final int quarterStart = CDate.ofLocalDate(QuarterUtils.getFirstDayOfQuarter(getMinValue())); return getMinValue() == quarterStart && getMaxValue() == CQuarter.getLastDay(quarterStart); } + + public boolean equals(Object o) { + if (o == this) { + return true; + } + + if (!(o instanceof CDateRange other)) { + return false; + } + + return getMaxValue() == other.getMaxValue() && getMinValue() == other.getMinValue(); + } + + public int hashCode() { + + final int PRIME = 59; + int result = 1; + result = result * PRIME + getMaxValue(); + result = result * PRIME + getMinValue(); + return result; + } } diff --git a/backend/src/main/java/com/bakdata/conquery/models/common/daterange/CDateRangeClosed.java b/backend/src/main/java/com/bakdata/conquery/models/common/daterange/CDateRangeClosed.java deleted file mode 100644 index 4769e11480..0000000000 --- a/backend/src/main/java/com/bakdata/conquery/models/common/daterange/CDateRangeClosed.java +++ /dev/null @@ -1,46 +0,0 @@ -package com.bakdata.conquery.models.common.daterange; - -import com.bakdata.conquery.models.common.CDate; -import lombok.With; - -@With -public class CDateRangeClosed extends CDateRange { - - private final int min; - private final int max; - - /*package*/ CDateRangeClosed(int min, int max) { - this.min = min; - this.max = max; - - if (min > max) { - throw new IllegalArgumentException( - String.format("Min(%s) is not less than max(%s)", CDate.toLocalDate(min), CDate.toLocalDate(max))); - } - - if (min == CDateRange.NEGATIVE_INFINITY || max == CDateRange.POSITIVE_INFINITY || min == max) { - throw new IllegalArgumentException( - String.format("%s is not a valid closed range", this)); - } - } - - @Override - public boolean contains(int rep) { - return rep >= getMinValue() && rep <= getMaxValue(); - } - - @Override - public String toString() { - return getMin() + "/" + getMax(); - } - - @Override - public int getMaxValue() { - return max; - } - - @Override - public int getMinValue() { - return min; - } -} diff --git a/backend/src/main/java/com/bakdata/conquery/models/common/daterange/CDateRangeEnding.java b/backend/src/main/java/com/bakdata/conquery/models/common/daterange/CDateRangeEnding.java deleted file mode 100644 index 952ac57a95..0000000000 --- a/backend/src/main/java/com/bakdata/conquery/models/common/daterange/CDateRangeEnding.java +++ /dev/null @@ -1,41 +0,0 @@ -package com.bakdata.conquery.models.common.daterange; - -import lombok.AccessLevel; -import lombok.AllArgsConstructor; -import lombok.With; - -@With -@AllArgsConstructor(access = AccessLevel.PACKAGE) -public class CDateRangeEnding extends CDateRange { - - private final int max; - - @Override - public boolean contains(int rep) { - return rep <= max; - } - - @Override - public String toString() { - return "-∞/" + getMax(); - } - - @Override - public int getMaxValue() { - return max; - } - - @Override - public int getMinValue() { - return CDateRange.NEGATIVE_INFINITY; - } - - @Override - public boolean intersects(CDateRange other) { - if (other == null) { - return false; - } - - return this.getMaxValue() >= other.getMinValue(); - } -} diff --git a/backend/src/main/java/com/bakdata/conquery/models/common/daterange/CDateRangeExactly.java b/backend/src/main/java/com/bakdata/conquery/models/common/daterange/CDateRangeExactly.java deleted file mode 100644 index 6515f502d9..0000000000 --- a/backend/src/main/java/com/bakdata/conquery/models/common/daterange/CDateRangeExactly.java +++ /dev/null @@ -1,33 +0,0 @@ -package com.bakdata.conquery.models.common.daterange; - -import lombok.AccessLevel; -import lombok.AllArgsConstructor; -import lombok.With; - -@With -@AllArgsConstructor(access = AccessLevel.PACKAGE) -public class CDateRangeExactly extends CDateRange { - - private final int value; - - @Override - public boolean contains(int rep) { - return rep == value; - } - - @Override - public String toString() { - final String str = getMin().toString(); - return str + "/" + str; - } - - @Override - public int getMaxValue() { - return value; - } - - @Override - public int getMinValue() { - return value; - } -} diff --git a/backend/src/main/java/com/bakdata/conquery/models/common/daterange/CDateRangeOpen.java b/backend/src/main/java/com/bakdata/conquery/models/common/daterange/CDateRangeOpen.java deleted file mode 100644 index 3a240e1f43..0000000000 --- a/backend/src/main/java/com/bakdata/conquery/models/common/daterange/CDateRangeOpen.java +++ /dev/null @@ -1,31 +0,0 @@ -package com.bakdata.conquery.models.common.daterange; - -import lombok.AccessLevel; -import lombok.NoArgsConstructor; -import lombok.With; - -@With -@NoArgsConstructor(access = AccessLevel.PRIVATE) -public class CDateRangeOpen extends CDateRange { - public static final CDateRange INSTANCE = new CDateRangeOpen(); - - @Override - public boolean contains(int rep) { - return true; - } - - @Override - public String toString() { - return "-∞/+∞"; - } - - @Override - public int getMaxValue() { - return CDateRange.POSITIVE_INFINITY; - } - - @Override - public int getMinValue() { - return CDateRange.NEGATIVE_INFINITY; - } -} diff --git a/backend/src/main/java/com/bakdata/conquery/models/common/daterange/CDateRangeStarting.java b/backend/src/main/java/com/bakdata/conquery/models/common/daterange/CDateRangeStarting.java deleted file mode 100644 index 02f7b9301d..0000000000 --- a/backend/src/main/java/com/bakdata/conquery/models/common/daterange/CDateRangeStarting.java +++ /dev/null @@ -1,41 +0,0 @@ -package com.bakdata.conquery.models.common.daterange; - -import lombok.AccessLevel; -import lombok.AllArgsConstructor; -import lombok.With; - -@With -@AllArgsConstructor(access = AccessLevel.PACKAGE) -public class CDateRangeStarting extends CDateRange { - - private final int min; - - @Override - public boolean contains(int rep) { - return rep >= min; - } - - @Override - public String toString() { - return getMin() + "/+∞"; - } - - @Override - public int getMaxValue() { - return CDateRange.POSITIVE_INFINITY; - } - - @Override - public int getMinValue() { - return min; - } - - @Override - public boolean intersects(CDateRange other) { - if (other == null) { - return false; - } - - return this.getMinValue() <= other.getMaxValue(); - } -} diff --git a/backend/src/main/java/com/bakdata/conquery/models/config/FormBackendConfig.java b/backend/src/main/java/com/bakdata/conquery/models/config/FormBackendConfig.java index 591c398e12..8abea7ee0d 100644 --- a/backend/src/main/java/com/bakdata/conquery/models/config/FormBackendConfig.java +++ b/backend/src/main/java/com/bakdata/conquery/models/config/FormBackendConfig.java @@ -25,6 +25,7 @@ import com.bakdata.conquery.models.datasets.Dataset; import com.bakdata.conquery.models.forms.frontendconfiguration.FormConfigProvider; import com.bakdata.conquery.models.forms.frontendconfiguration.FormFrontendConfigInformation; +import com.bakdata.conquery.util.VersionInfo; import com.fasterxml.jackson.annotation.JsonIgnore; import com.fasterxml.jackson.databind.ObjectMapper; import com.fasterxml.jackson.databind.node.ObjectNode; @@ -66,6 +67,9 @@ public class FormBackendConfig implements PluginConfig, MultiInstancePlugin { @NotEmpty private String healthCheckPath = "health"; + @NotEmpty + private String versionPath = "version"; + @NotNull private URL conqueryApiUrl; @@ -102,13 +106,35 @@ public void initialize(ManagerNode managerNode) { managerNode.getFormScanner().registerFrontendFormConfigProvider(new FormConfigProvider(getId(), this::registerFormConfigs)); } + /** + * Retrieves the version information from the form backend and writes it to the {@link VersionInfo} + */ + private void updateVersion(ExternalFormBackendApi externalApi) { + + try { + final String version = externalApi.getVersion(); + final String oldVersion = VersionInfo.INSTANCE.setFormBackendVersion(getId(), version); + if (!version.equals(oldVersion)) { + log.info("Form Backend '{}' version update: {} -> {}", getId(), oldVersion, version); + } + } + catch (Exception e) { + log.warn("Unable to retrieve version from form backend '{}'. Enable trace logging for more info", getId(), (Exception) (log.isTraceEnabled() + ? e + : null)); + // Set place holder + VersionInfo.INSTANCE.setFormBackendVersion(getId(), "no-version-available"); + } + + } + public static ObjectMapper configureObjectMapper(ObjectMapper om) { return om.addMixIn(ExternalForm.class, ExternalFormMixin.class); } public ExternalFormBackendApi createApi() { - return new ExternalFormBackendApi(client, baseURI, formConfigPath, postFormPath, statusTemplatePath, cancelTaskPath, healthCheckPath, this::createAccessToken, conqueryApiUrl, getAuthentication()); + return new ExternalFormBackendApi(client, baseURI, formConfigPath, postFormPath, statusTemplatePath, cancelTaskPath, healthCheckPath, versionPath, this::createAccessToken, conqueryApiUrl, getAuthentication()); } public boolean supportsFormType(String formType) { @@ -125,7 +151,8 @@ public boolean supportsFormType(String formType) { private void registerFormConfigs(ImmutableCollection.Builder formConfigs) { final Set supportedFormTypes = new HashSet<>(); - for (ObjectNode formConfig : createApi().getFormConfigs()) { + final ExternalFormBackendApi api = createApi(); + for (ObjectNode formConfig : api.getFormConfigs()) { final String subType = formConfig.get("type").asText(); final String formType = createSubTypedId(subType); @@ -138,6 +165,9 @@ private void registerFormConfigs(ImmutableCollection.Builder discoverNamespaceStorages(StorageHandler storageHandler); + Collection discoverNamespaceStorages(); Collection discoverWorkerStorages(); @@ -49,8 +48,6 @@ public interface StoreFactory { IdentifiableStore
createTableStore(CentralRegistry centralRegistry, String pathName, ObjectMapper objectMapper); - IdentifiableStore createDictionaryStore(CentralRegistry centralRegistry, String pathName, ObjectMapper objectMapper); - IdentifiableStore> createConceptStore(CentralRegistry centralRegistry, String pathName, ObjectMapper objectMapper); IdentifiableStore createImportStore(CentralRegistry centralRegistry, String pathName, ObjectMapper objectMapper); @@ -80,11 +77,11 @@ public interface StoreFactory { IdentifiableStore createGroupStore(CentralRegistry centralRegistry, String pathName, MetaStorage storage, ObjectMapper objectMapper); - SingletonStore createPrimaryDictionaryStore(String pathName, CentralRegistry namespaceCollection, ObjectMapper objectMapper); - IdentifiableStore createInternToExternMappingStore(String pathName, CentralRegistry centralRegistry, ObjectMapper objectMapper); IdentifiableStore createSearchIndexStore(String pathName, CentralRegistry centralRegistry, ObjectMapper objectMapper); SingletonStore createPreviewStore(String pathName, CentralRegistry centralRegistry, ObjectMapper objectMapper); + + CachedStore createEntity2BucketStore(String pathName, ObjectMapper objectMapper); } diff --git a/backend/src/main/java/com/bakdata/conquery/models/config/XodusStoreFactory.java b/backend/src/main/java/com/bakdata/conquery/models/config/XodusStoreFactory.java index 2aa54ed50e..36eed3c6bb 100644 --- a/backend/src/main/java/com/bakdata/conquery/models/config/XodusStoreFactory.java +++ b/backend/src/main/java/com/bakdata/conquery/models/config/XodusStoreFactory.java @@ -33,12 +33,11 @@ import com.bakdata.conquery.io.storage.WorkerStorage; import com.bakdata.conquery.io.storage.xodus.stores.BigStore; import com.bakdata.conquery.io.storage.xodus.stores.CachedStore; +import com.bakdata.conquery.io.storage.xodus.stores.EnvironmentRegistry; import com.bakdata.conquery.io.storage.xodus.stores.SerializingStore; import com.bakdata.conquery.io.storage.xodus.stores.SingletonStore; import com.bakdata.conquery.io.storage.xodus.stores.StoreInfo; -import com.bakdata.conquery.io.storage.xodus.stores.WeakCachedStore; import com.bakdata.conquery.io.storage.xodus.stores.XodusStore; -import com.bakdata.conquery.mode.StorageHandler; import com.bakdata.conquery.models.auth.entities.Group; import com.bakdata.conquery.models.auth.entities.Role; import com.bakdata.conquery.models.auth.entities.User; @@ -49,13 +48,11 @@ import com.bakdata.conquery.models.datasets.Table; import com.bakdata.conquery.models.datasets.concepts.Concept; import com.bakdata.conquery.models.datasets.concepts.StructureNode; -import com.bakdata.conquery.models.dictionary.Dictionary; import com.bakdata.conquery.models.events.Bucket; import com.bakdata.conquery.models.events.CBlock; import com.bakdata.conquery.models.execution.ManagedExecution; import com.bakdata.conquery.models.forms.configs.FormConfig; import com.bakdata.conquery.models.identifiable.CentralRegistry; -import com.bakdata.conquery.models.identifiable.ids.Id; import com.bakdata.conquery.models.identifiable.mapping.EntityIdMap; import com.bakdata.conquery.models.index.InternToExternMapper; import com.bakdata.conquery.models.index.search.SearchIndex; @@ -66,15 +63,12 @@ import com.bakdata.conquery.util.io.FileUtil; import com.fasterxml.jackson.annotation.JsonIgnore; import com.fasterxml.jackson.databind.ObjectMapper; -import com.google.common.collect.BiMap; -import com.google.common.collect.HashBiMap; import com.google.common.collect.Multimap; import com.google.common.collect.MultimapBuilder; import com.google.common.collect.Multimaps; import com.google.common.collect.Sets; import io.dropwizard.util.Duration; import jetbrains.exodus.env.Environment; -import jetbrains.exodus.env.Environments; import lombok.AllArgsConstructor; import lombok.Getter; import lombok.NoArgsConstructor; @@ -102,8 +96,6 @@ public class XodusStoreFactory implements StoreFactory { DATASET.storeInfo().getName(), SECONDARY_IDS.storeInfo().getName(), TABLES.storeInfo().getName(), - DICTIONARIES.storeInfo().getName() + BigStore.META, - DICTIONARIES.storeInfo().getName() + BigStore.DATA, IMPORTS.storeInfo().getName(), CONCEPTS.storeInfo().getName() ); @@ -115,8 +107,8 @@ public class XodusStoreFactory implements StoreFactory { ID_MAPPING.storeInfo().getName() + BigStore.DATA, STRUCTURE.storeInfo().getName(), WORKER_TO_BUCKETS.storeInfo().getName(), - PRIMARY_DICTIONARY.storeInfo().getName(), - ENTITY_PREVIEW.storeInfo().getName() + ENTITY_PREVIEW.storeInfo().getName(), + ENTITY_TO_BUCKET.storeInfo().getName() ) ); public static final Set WORKER_STORES = Sets.union( @@ -130,11 +122,14 @@ public class XodusStoreFactory implements StoreFactory { private Path directory = Path.of("storage"); - private boolean validateOnWrite; + private boolean validateOnWrite = false; @NotNull @Valid private XodusConfig xodus = new XodusConfig(); + @JsonIgnore + private EnvironmentRegistry registry = new EnvironmentRegistry(); + /** * Number of threads reading from XoduStore. * @implNote it's always only one thread reading from disk, dispatching to multiple reader threads. @@ -189,17 +184,14 @@ public ExecutorService getReaderExecutorService() { @JsonIgnore private transient Validator validator; - @JsonIgnore - private final BiMap activeEnvironments = HashBiMap.create(); - @JsonIgnore private final transient Multimap openStoresInEnv = Multimaps.synchronizedSetMultimap(MultimapBuilder.hashKeys().hashSetValues().build()); @Override - public Collection discoverNamespaceStorages(StorageHandler storageHandler) { - return loadNamespacedStores("dataset_", (storePath) -> new NamespaceStorage(this, storePath, getValidator(), storageHandler), NAMESPACE_STORES); + public Collection discoverNamespaceStorages() { + return loadNamespacedStores("dataset_", (storePath) -> new NamespaceStorage(this, storePath, getValidator()), NAMESPACE_STORES); } @Override @@ -223,9 +215,11 @@ private List loadNamespacedStores(String prefix ConqueryMDC.setLocation(directory.toString()); - if (!environmentHasStores(directory, storesToTest)) { - log.warn("No valid WorkerStorage found in {}", directory); - continue; + try (Environment environment = registry.findOrCreateEnvironment(directory, xodus)) { + if (!environmentHasStores(environment, storesToTest)) { + log.warn("No valid {}storage found in {}", prefix, directory); + continue; + } } final T namespacedStorage = creator.apply(name); @@ -236,9 +230,8 @@ private List loadNamespacedStores(String prefix return storages; } - private boolean environmentHasStores(File pathName, Set storesToTest) { - final Environment env = findEnvironment(pathName); - final boolean exists = env.computeInTransaction(t -> { + private boolean environmentHasStores(Environment env, Set storesToTest) { + return env.computeInTransaction(t -> { final List allStoreNames = env.getAllStoreNames(t); final boolean complete = new HashSet<>(allStoreNames).containsAll(storesToTest); if (complete) { @@ -256,10 +249,6 @@ private boolean environmentHasStores(File pathName, Set storesToTest) { return loadEnvironmentWithMissingStores; }); - if (!exists) { - closeEnvironment(env); - } - return exists; } @Override @@ -288,35 +277,13 @@ public SingletonStore createPreviewStore(String pathName, Central } @Override - public IdentifiableStore
createTableStore(CentralRegistry centralRegistry, String pathName, ObjectMapper objectMapper) { - return StoreMappings.identifiable(createStore(findEnvironment(pathName), validator, TABLES, centralRegistry.injectIntoNew(objectMapper)), centralRegistry); + public CachedStore createEntity2BucketStore(String pathName, ObjectMapper objectMapper) { + return StoreMappings.cached(createStore(findEnvironment(pathName), validator, ENTITY_TO_BUCKET, objectMapper)); } @Override - public IdentifiableStore createDictionaryStore(CentralRegistry centralRegistry, String pathName, ObjectMapper objectMapper) { - final Environment environment = findEnvironment(pathName); - - final BigStore, Dictionary> bigStore; - - synchronized (openStoresInEnv) { - bigStore = - new BigStore<>( - this, - validator, - environment, - DICTIONARIES.storeInfo(), - this::closeStore, - this::removeStore, - centralRegistry.injectIntoNew(objectMapper), getReaderExecutorService() - ); - openStoresInEnv.put(bigStore.getDataXodusStore().getEnvironment(), bigStore.getDataXodusStore()); - openStoresInEnv.put(bigStore.getMetaXodusStore().getEnvironment(), bigStore.getMetaXodusStore()); - } - - if (useWeakDictionaryCaching) { - return StoreMappings.identifiableCachedStore(new WeakCachedStore<>(bigStore, getWeakCacheDuration()), centralRegistry); - } - return StoreMappings.identifiable(StoreMappings.cached(bigStore), centralRegistry); + public IdentifiableStore
createTableStore(CentralRegistry centralRegistry, String pathName, ObjectMapper objectMapper) { + return StoreMappings.identifiable(createStore(findEnvironment(pathName), validator, TABLES, centralRegistry.injectIntoNew(objectMapper)), centralRegistry); } @Override @@ -394,11 +361,6 @@ public IdentifiableStore createGroupStore(CentralRegistry centralRegistry return StoreMappings.identifiable(createStore(findEnvironment(resolveSubDir(pathName, "groups")), validator, AUTH_GROUP, objectMapper), centralRegistry); } - @Override - public SingletonStore createPrimaryDictionaryStore(String pathName, CentralRegistry centralRegistry, ObjectMapper objectMapper) { - return StoreMappings.singleton(createStore(findEnvironment(pathName), validator, PRIMARY_DICTIONARY, centralRegistry.injectIntoNew(objectMapper))); - } - private File resolveSubDir(String... subdirs) { Path current = getDirectory(); @@ -418,20 +380,15 @@ private File getStorageDir(String pathName) { return getDirectory().resolve(pathName).toFile(); } - private Environment findEnvironment(@NonNull File path) { - synchronized (activeEnvironments) { - try { - return activeEnvironments.computeIfAbsent(path, (p) -> Environments.newInstance(path, getXodus().createConfig())); - } - catch (Exception e) { - throw new IllegalStateException("Unable to open environment: " + path, e); - } - } - } + private Environment findEnvironment(String pathName) { final File path = getStorageDir(pathName); - return findEnvironment(path); + return registry.findOrCreateEnvironment(path, getXodus()); + } + + private Environment findEnvironment(File path) { + return registry.findOrCreateEnvironment(path, getXodus()); } private void closeStore(XodusStore store) { @@ -447,17 +404,7 @@ private void closeStore(XodusStore store) { } log.info("Closed last XodusStore in Environment. Closing Environment as well: {}", env.getLocation()); - closeEnvironment(env); - } - - private void closeEnvironment(Environment env) { - synchronized (activeEnvironments) { - - if (activeEnvironments.remove(activeEnvironments.inverse().get(env)) == null) { - return; - } - env.close(); - } + env.close(); } private void removeStore(XodusStore store) { @@ -484,7 +431,7 @@ private void removeEnvironment(Environment env) { throw new IllegalStateException("Cannot delete environment, because it still contains these stores:" + xodusStore); } - closeEnvironment(env); + env.close(); try { FileUtil.deleteRecursive(Path.of(env.getLocation())); diff --git a/backend/src/main/java/com/bakdata/conquery/models/config/auth/AuthenticationRealmFactory.java b/backend/src/main/java/com/bakdata/conquery/models/config/auth/AuthenticationRealmFactory.java index 6bc2490087..907fc43a90 100644 --- a/backend/src/main/java/com/bakdata/conquery/models/config/auth/AuthenticationRealmFactory.java +++ b/backend/src/main/java/com/bakdata/conquery/models/config/auth/AuthenticationRealmFactory.java @@ -1,11 +1,12 @@ package com.bakdata.conquery.models.config.auth; -import com.bakdata.conquery.commands.ManagerNode; import com.bakdata.conquery.io.cps.CPSBase; +import com.bakdata.conquery.models.auth.AuthorizationController; import com.bakdata.conquery.models.auth.ConqueryAuthenticationRealm; +import com.bakdata.conquery.models.config.ConqueryConfig; import com.fasterxml.jackson.annotation.JsonIgnore; import com.fasterxml.jackson.annotation.JsonTypeInfo; -import org.apache.shiro.realm.AuthenticatingRealm; +import io.dropwizard.setup.Environment; @JsonTypeInfo(use = JsonTypeInfo.Id.CUSTOM, property = "type") @CPSBase @@ -13,9 +14,12 @@ public interface AuthenticationRealmFactory { /** * Gets the realm specified in the configuration. + * + * @param environment + * @param config + * @param authorizationController * @return The realm. - * @param managerNode */ @JsonIgnore - ConqueryAuthenticationRealm createRealm(ManagerNode managerNode); + ConqueryAuthenticationRealm createRealm(Environment environment, ConqueryConfig config, AuthorizationController authorizationController); } diff --git a/backend/src/main/java/com/bakdata/conquery/models/config/auth/AuthorizationConfig.java b/backend/src/main/java/com/bakdata/conquery/models/config/auth/AuthorizationConfig.java index c74ce52c1e..e89b3e6d25 100644 --- a/backend/src/main/java/com/bakdata/conquery/models/config/auth/AuthorizationConfig.java +++ b/backend/src/main/java/com/bakdata/conquery/models/config/auth/AuthorizationConfig.java @@ -1,14 +1,16 @@ package com.bakdata.conquery.models.config.auth; +import java.util.List; + +import javax.validation.constraints.NotNull; + +import com.bakdata.conquery.apiv1.auth.ProtoRole; import com.bakdata.conquery.apiv1.auth.ProtoUser; import com.bakdata.conquery.io.cps.CPSBase; import com.bakdata.conquery.models.auth.UserManageable; import com.bakdata.conquery.models.auth.permissions.ExecutionPermission; import com.fasterxml.jackson.annotation.JsonTypeInfo; -import javax.validation.constraints.NotNull; -import java.util.List; - /** * Configurations of this type define the initial users with their permissions * and optional credentials that might be registered by realm that are @@ -24,6 +26,7 @@ public interface AuthorizationConfig { */ @NotNull List getInitialUsers(); + List getInitialRoles(); /** * A list of permission scopes/domains that should be used to generate the permission overview as an CSV. diff --git a/backend/src/main/java/com/bakdata/conquery/models/config/auth/DefaultAuthorizationConfig.java b/backend/src/main/java/com/bakdata/conquery/models/config/auth/DefaultAuthorizationConfig.java index 68dd789678..0c4132fd10 100644 --- a/backend/src/main/java/com/bakdata/conquery/models/config/auth/DefaultAuthorizationConfig.java +++ b/backend/src/main/java/com/bakdata/conquery/models/config/auth/DefaultAuthorizationConfig.java @@ -1,18 +1,28 @@ package com.bakdata.conquery.models.config.auth; -import com.bakdata.conquery.apiv1.auth.ProtoUser; -import com.bakdata.conquery.io.cps.CPSType; -import lombok.Getter; +import java.util.List; +import java.util.Set; import javax.validation.Valid; import javax.validation.constraints.NotEmpty; -import java.util.List; + +import com.bakdata.conquery.apiv1.auth.ProtoRole; +import com.bakdata.conquery.apiv1.auth.ProtoUser; +import com.bakdata.conquery.io.cps.CPSType; +import com.bakdata.conquery.models.auth.permissions.AdminPermission; +import lombok.Getter; @CPSType(base = AuthorizationConfig.class, id = "DEFAULT") @Getter public class DefaultAuthorizationConfig implements AuthorizationConfig { - @NotEmpty + @Valid + private List initialRoles = List.of(ProtoRole.builder() + .name("admin") + .permissions(Set.of(AdminPermission.DOMAIN)) + .build()); + + @NotEmpty @Valid private List initialUsers; diff --git a/backend/src/main/java/com/bakdata/conquery/models/config/auth/DevelopmentAuthorizationConfig.java b/backend/src/main/java/com/bakdata/conquery/models/config/auth/DevelopmentAuthorizationConfig.java index b4eeb5064c..8f48ec668a 100644 --- a/backend/src/main/java/com/bakdata/conquery/models/config/auth/DevelopmentAuthorizationConfig.java +++ b/backend/src/main/java/com/bakdata/conquery/models/config/auth/DevelopmentAuthorizationConfig.java @@ -1,5 +1,12 @@ package com.bakdata.conquery.models.config.auth; +import java.util.List; +import java.util.Set; + +import javax.validation.constraints.NotEmpty; +import javax.validation.constraints.NotNull; + +import com.bakdata.conquery.apiv1.auth.ProtoRole; import com.bakdata.conquery.apiv1.auth.ProtoUser; import com.bakdata.conquery.io.cps.CPSType; import com.bakdata.conquery.models.auth.permissions.AdminPermission; @@ -7,29 +14,25 @@ import com.bakdata.conquery.models.auth.permissions.SuperPermission; import lombok.Getter; -import javax.validation.constraints.NotEmpty; -import javax.validation.constraints.NotNull; -import java.util.List; -import java.util.Set; - @CPSType(base = AuthorizationConfig.class, id = "DEVELOPMENT") @Getter -public class DevelopmentAuthorizationConfig implements AuthorizationConfig{ - +public class DevelopmentAuthorizationConfig implements AuthorizationConfig { + + private List initialRoles = List.of(ProtoRole.builder() + .name("admin") + .permissions(Set.of(AdminPermission.DOMAIN)) + .build()); + @NotEmpty - private List initialUsers = List.of( - ProtoUser.builder() - .name("SUPERUSER@SUPERUSER") - .label("SUPERUSER") - .permissions(Set.of("*")) - .build() - ); + private List initialUsers = List.of(ProtoUser.builder() + .name("SUPERUSER@SUPERUSER") + .label("SUPERUSER") + .permissions(Set.of("*")) + .roles(Set.of("admin")) + .build()); @NotNull - private List overviewScope = List.of( - DatasetPermission.DOMAIN, - AdminPermission.DOMAIN, - SuperPermission.DOMAIN); + private List overviewScope = List.of(DatasetPermission.DOMAIN, AdminPermission.DOMAIN, SuperPermission.DOMAIN); } diff --git a/backend/src/main/java/com/bakdata/conquery/models/config/auth/IntrospectionDelegatingRealmFactory.java b/backend/src/main/java/com/bakdata/conquery/models/config/auth/IntrospectionDelegatingRealmFactory.java index 8d812f7327..94b7c449f0 100644 --- a/backend/src/main/java/com/bakdata/conquery/models/config/auth/IntrospectionDelegatingRealmFactory.java +++ b/backend/src/main/java/com/bakdata/conquery/models/config/auth/IntrospectionDelegatingRealmFactory.java @@ -6,7 +6,7 @@ import javax.ws.rs.client.Client; -import com.bakdata.conquery.commands.ManagerNode; +import com.bakdata.conquery.models.auth.AuthorizationController; import com.bakdata.conquery.models.auth.ConqueryAuthenticationRealm; import com.bakdata.conquery.models.auth.basic.JWTokenHandler; import com.bakdata.conquery.models.auth.oidc.IntrospectionDelegatingRealm; @@ -18,6 +18,7 @@ import com.nimbusds.oauth2.sdk.id.ClientID; import io.dropwizard.client.JerseyClientBuilder; import io.dropwizard.servlets.tasks.Task; +import io.dropwizard.setup.Environment; import io.dropwizard.validation.ValidationMethod; import lombok.Getter; import lombok.Setter; @@ -45,17 +46,17 @@ public class IntrospectionDelegatingRealmFactory extends Configuration { private transient AuthzClient authClient; - public ConqueryAuthenticationRealm createRealm(ManagerNode managerNode) { + public ConqueryAuthenticationRealm createRealm(Environment environment, AuthorizationController authorizationController) { // Register token extractor for JWT Tokens - managerNode.getAuthController().getAuthenticationFilter().registerTokenExtractor(JWTokenHandler::extractToken); + authorizationController.getAuthenticationFilter().registerTokenExtractor(JWTokenHandler::extractToken); // At start up, try tp retrieve the idp client api object if possible. If the idp service is not up don't fail start up. authClient = getAuthClient(false); // Register task to retrieve the idp client api, so the realm can be used, when the idp service is available. - if (managerNode.getEnvironment().admin() != null) { - managerNode.getEnvironment().admin().addTask(new Task("keycloak-update-authz-client") { + if (environment.admin() != null) { + environment.admin().addTask(new Task("keycloak-update-authz-client") { @Override public void execute(Map> parameters, PrintWriter output) throws Exception { @@ -66,10 +67,10 @@ public void execute(Map> parameters, PrintWriter output) th } // Setup keycloak api - final Client client = new JerseyClientBuilder(managerNode.getEnvironment()).build("keycloak-api"); + final Client client = new JerseyClientBuilder(environment).build("keycloak-api"); final KeycloakApi keycloakApi = new KeycloakApi(this, client); - return new IntrospectionDelegatingRealm(managerNode.getStorage(), this, keycloakApi); + return new IntrospectionDelegatingRealm(authorizationController.getStorage(), this, keycloakApi); } diff --git a/backend/src/main/java/com/bakdata/conquery/models/config/auth/JwtPkceVerifyingRealmFactory.java b/backend/src/main/java/com/bakdata/conquery/models/config/auth/JwtPkceVerifyingRealmFactory.java index d70bdb21dc..c7f5692e45 100644 --- a/backend/src/main/java/com/bakdata/conquery/models/config/auth/JwtPkceVerifyingRealmFactory.java +++ b/backend/src/main/java/com/bakdata/conquery/models/config/auth/JwtPkceVerifyingRealmFactory.java @@ -27,12 +27,13 @@ import javax.ws.rs.core.UriBuilder; import com.bakdata.conquery.apiv1.RequestHelper; -import com.bakdata.conquery.commands.ManagerNode; import com.bakdata.conquery.io.cps.CPSType; import com.bakdata.conquery.io.jackson.Jackson; +import com.bakdata.conquery.models.auth.AuthorizationController; import com.bakdata.conquery.models.auth.ConqueryAuthenticationRealm; import com.bakdata.conquery.models.auth.oidc.JwtPkceVerifyingRealm; import com.bakdata.conquery.models.auth.web.RedirectingAuthFilter; +import com.bakdata.conquery.models.config.ConqueryConfig; import com.bakdata.conquery.resources.admin.AdminServlet; import com.fasterxml.jackson.annotation.JsonIgnore; import com.fasterxml.jackson.core.JsonProcessingException; @@ -52,6 +53,8 @@ import groovy.lang.Binding; import groovy.lang.GroovyShell; import groovy.lang.Script; +import io.dropwizard.client.JerseyClientBuilder; +import io.dropwizard.setup.Environment; import io.dropwizard.validation.ValidationMethod; import lombok.Data; import lombok.NoArgsConstructor; @@ -131,6 +134,9 @@ public class JwtPkceVerifyingRealmFactory implements AuthenticationRealmFactory @JsonIgnore public BiFunction authCookieCreator; + @JsonIgnore + private Client httpClient; + @ValidationMethod(message = "Neither wellKnownEndpoint nor idpConfiguration was given") @JsonIgnore @@ -146,26 +152,28 @@ public record IdpConfiguration( @NonNull Map signingKeys, @NonNull URI authorizationEndpoint, @NonNull URI tokenEndpoint, + @NonNull URI logoutEndpoint, @NotEmpty String issuer) { } - public ConqueryAuthenticationRealm createRealm(ManagerNode manager) { + public ConqueryAuthenticationRealm createRealm(Environment environment, ConqueryConfig config, AuthorizationController authorizationController) { List> additionalVerifiers = new ArrayList<>(); for (String additionalTokenCheck : additionalTokenChecks) { additionalVerifiers.add(ScriptedTokenChecker.create(additionalTokenCheck)); } - idpConfigurationSupplier = getIdpOptionsSupplier(manager.getClient()); - authCookieCreator = manager.getConfig().getAuthentication()::createAuthCookie; + + idpConfigurationSupplier = getIdpOptionsSupplier(environment, config); + authCookieCreator = config.getAuthentication()::createAuthCookie; // Add login schema for admin end - final RedirectingAuthFilter redirectingAuthFilter = manager.getAuthController().getRedirectingAuthFilter(); + final RedirectingAuthFilter redirectingAuthFilter = authorizationController.getRedirectingAuthFilter(); redirectingAuthFilter.getAuthAttemptCheckers().add(this::checkAndRedeemAuthzCode); redirectingAuthFilter.getAuthAttemptCheckers().add(this::checkAndRedeemRefreshToken); redirectingAuthFilter.getLoginInitiators().add(this::initiateLogin); - return new JwtPkceVerifyingRealm(idpConfigurationSupplier, client, additionalVerifiers, alternativeIdClaims, manager.getStorage(), tokenLeeway); + return new JwtPkceVerifyingRealm(idpConfigurationSupplier, client, additionalVerifiers, alternativeIdClaims, authorizationController.getStorage(), tokenLeeway); } @Data @@ -173,15 +181,21 @@ private static class JWKs { List keys; } - private Supplier> getIdpOptionsSupplier(final Client client) { + private Supplier> getIdpOptionsSupplier(Environment environment, ConqueryConfig config) { return () -> { if (idpConfiguration == null) { synchronized (this) { // check again since we are now in an exclusive section if (idpConfiguration == null) { + + + Client httpClient = new JerseyClientBuilder(environment).using(config.getJerseyClient()) + .build(this.getClass().getSimpleName()); // retrieve the configuration and cache it - idpConfiguration = retrieveIdpConfiguration(client); + idpConfiguration = retrieveIdpConfiguration(httpClient); + + httpClient.close(); } } } @@ -219,6 +233,7 @@ public IdpConfiguration retrieveIdpConfiguration(final Client client) { String issuer = response.get("issuer").asText(); URI authorizationEndpoint = URI.create(response.get("authorization_endpoint").asText()); URI tokenEndpoint = URI.create(response.get("token_endpoint").asText()); + URI logoutEndpoint = URI.create(response.get("end_session_endpoint").asText()); URI jwksUri = URI.create(response.get("jwks_uri").asText()); @@ -251,7 +266,7 @@ public IdpConfiguration retrieveIdpConfiguration(final Client client) { .map(JWK::getKeyId).toList()); } - return new IdpConfiguration(signingKeys, authorizationEndpoint, tokenEndpoint, issuer); + return new IdpConfiguration(signingKeys, authorizationEndpoint, tokenEndpoint, logoutEndpoint, issuer); } diff --git a/backend/src/main/java/com/bakdata/conquery/models/config/auth/LocalAuthenticationConfig.java b/backend/src/main/java/com/bakdata/conquery/models/config/auth/LocalAuthenticationConfig.java index fb3f005465..a6bdef03de 100644 --- a/backend/src/main/java/com/bakdata/conquery/models/config/auth/LocalAuthenticationConfig.java +++ b/backend/src/main/java/com/bakdata/conquery/models/config/auth/LocalAuthenticationConfig.java @@ -11,14 +11,15 @@ import javax.ws.rs.core.UriBuilder; import com.bakdata.conquery.apiv1.RequestHelper; -import com.bakdata.conquery.commands.ManagerNode; import com.bakdata.conquery.io.cps.CPSType; import com.bakdata.conquery.io.jackson.Jackson; +import com.bakdata.conquery.models.auth.AuthorizationController; import com.bakdata.conquery.models.auth.ConqueryAuthenticationRealm; import com.bakdata.conquery.models.auth.basic.JWTokenHandler; import com.bakdata.conquery.models.auth.basic.LocalAuthenticationRealm; import com.bakdata.conquery.models.auth.basic.UserAuthenticationManagementProcessor; import com.bakdata.conquery.models.auth.web.RedirectingAuthFilter; +import com.bakdata.conquery.models.config.ConqueryConfig; import com.bakdata.conquery.models.config.XodusConfig; import com.bakdata.conquery.resources.admin.AdminServlet; import com.bakdata.conquery.resources.admin.rest.UserAuthenticationManagementResource; @@ -28,6 +29,7 @@ import com.password4j.BenchmarkResult; import com.password4j.SystemChecker; import io.dropwizard.jersey.DropwizardResourceConfig; +import io.dropwizard.setup.Environment; import io.dropwizard.util.Duration; import io.dropwizard.validation.MinDuration; import io.dropwizard.validation.ValidationMethod; @@ -41,7 +43,6 @@ @Slf4j public class LocalAuthenticationConfig implements AuthenticationRealmFactory { - public static final String REDIRECT_URI = "redirect_uri"; public static final int BCRYPT_MAX_MILLISECONDS = 300; /** * Configuration for the password store. An encryption for the store itself might be set here. @@ -67,7 +68,7 @@ public class LocalAuthenticationConfig implements AuthenticationRealmFactory { boolean isStorageEncrypted() { // Check if a cipher is configured for xodus according to https://github.com/JetBrains/xodus/wiki/Database-Encryption // in the config - if(passwordStoreConfig.getCipherId() != null){ + if (passwordStoreConfig.getCipherId() != null) { return true; } @@ -76,9 +77,9 @@ boolean isStorageEncrypted() { } @Override - public ConqueryAuthenticationRealm createRealm(ManagerNode manager) { + public ConqueryAuthenticationRealm createRealm(Environment environment, ConqueryConfig config, AuthorizationController authorizationController) { // Token extractor is not needed because this realm depends on the ConqueryTokenRealm - manager.getAuthController().getAuthenticationFilter().registerTokenExtractor(JWTokenHandler::extractToken); + authorizationController.getAuthenticationFilter().registerTokenExtractor(JWTokenHandler::extractToken); log.info("Performing benchmark for default hash function (bcrypt) with max_milliseconds={}", BCRYPT_MAX_MILLISECONDS); final BenchmarkResult result = SystemChecker.benchmarkBcrypt(BCRYPT_MAX_MILLISECONDS); @@ -91,26 +92,26 @@ public ConqueryAuthenticationRealm createRealm(ManagerNode manager) { log.info("Using bcrypt with {} logarithmic rounds. Elapsed time={}", rounds, realElapsed); LocalAuthenticationRealm realm = new LocalAuthenticationRealm( - manager.getValidator(), + environment.getValidator(), Jackson.copyMapperAndInjectables(Jackson.BINARY_MAPPER), - manager.getAuthController().getConqueryTokenRealm(), + authorizationController.getConqueryTokenRealm(), storeName, directory, passwordStoreConfig, jwtDuration, prototype ); - UserAuthenticationManagementProcessor processor = new UserAuthenticationManagementProcessor(realm, manager.getStorage()); + UserAuthenticationManagementProcessor processor = new UserAuthenticationManagementProcessor(realm, authorizationController.getStorage()); // Register resources for users to exchange username and password for an access token - registerAdminUnprotectedAuthenticationResources(manager.getUnprotectedAuthAdmin(), realm); - registerApiUnprotectedAuthenticationResources(manager.getUnprotectedAuthApi(), realm); + registerAdminUnprotectedAuthenticationResources(authorizationController.getUnprotectedAuthAdmin(), realm); + registerApiUnprotectedAuthenticationResources(authorizationController.getUnprotectedAuthApi(), realm); - registerAuthenticationAdminResources(manager.getAdmin().getJerseyConfig(), processor); + registerAuthenticationAdminResources(authorizationController.getAdminServlet().getJerseyConfig(), processor); // Add login schema for admin end - final RedirectingAuthFilter redirectingAuthFilter = manager.getAuthController().getRedirectingAuthFilter(); - redirectingAuthFilter.getLoginInitiators().add(loginProvider(manager.getUnprotectedAuthAdmin())); + final RedirectingAuthFilter redirectingAuthFilter = authorizationController.getRedirectingAuthFilter(); + redirectingAuthFilter.getLoginInitiators().add(loginProvider(authorizationController.getUnprotectedAuthAdmin())); return realm; } @@ -119,7 +120,9 @@ private Function loginProvider(DropwizardResourceCo return (ContainerRequestContext request) -> { return UriBuilder.fromPath(unprotectedAuthAdmin.getUrlPattern()) .path(LoginResource.class) - .queryParam(REDIRECT_URI, UriBuilder.fromUri(RequestHelper.getRequestURL(request)).path(AdminServlet.ADMIN_UI).build()) + .queryParam(RedirectingAuthFilter.REDIRECT_URI, UriBuilder.fromUri(RequestHelper.getRequestURL(request)) + .path(AdminServlet.ADMIN_UI) + .build()) .build(); }; diff --git a/backend/src/main/java/com/bakdata/conquery/models/config/auth/OIDCAuthorizationCodeFlowRealmFactory.java b/backend/src/main/java/com/bakdata/conquery/models/config/auth/OIDCAuthorizationCodeFlowRealmFactory.java index 63957e6ade..3bca2eae83 100644 --- a/backend/src/main/java/com/bakdata/conquery/models/config/auth/OIDCAuthorizationCodeFlowRealmFactory.java +++ b/backend/src/main/java/com/bakdata/conquery/models/config/auth/OIDCAuthorizationCodeFlowRealmFactory.java @@ -1,8 +1,10 @@ package com.bakdata.conquery.models.config.auth; -import com.bakdata.conquery.commands.ManagerNode; import com.bakdata.conquery.io.cps.CPSType; +import com.bakdata.conquery.models.auth.AuthorizationController; import com.bakdata.conquery.models.auth.ConqueryAuthenticationRealm; +import com.bakdata.conquery.models.config.ConqueryConfig; +import io.dropwizard.setup.Environment; import lombok.Getter; import lombok.Setter; import lombok.extern.slf4j.Slf4j; @@ -19,7 +21,7 @@ public class OIDCAuthorizationCodeFlowRealmFactory implements AuthenticationReal private IntrospectionDelegatingRealmFactory client; @Override - public ConqueryAuthenticationRealm createRealm(ManagerNode managerNode) { - return client.createRealm(managerNode); + public ConqueryAuthenticationRealm createRealm(Environment environment, ConqueryConfig config, AuthorizationController authorizationController) { + return client.createRealm(environment, authorizationController); } } diff --git a/backend/src/main/java/com/bakdata/conquery/models/config/auth/OIDCResourceOwnerPasswordCredentialRealmFactory.java b/backend/src/main/java/com/bakdata/conquery/models/config/auth/OIDCResourceOwnerPasswordCredentialRealmFactory.java index 6104e180b6..dd54ea34d3 100644 --- a/backend/src/main/java/com/bakdata/conquery/models/config/auth/OIDCResourceOwnerPasswordCredentialRealmFactory.java +++ b/backend/src/main/java/com/bakdata/conquery/models/config/auth/OIDCResourceOwnerPasswordCredentialRealmFactory.java @@ -1,12 +1,14 @@ package com.bakdata.conquery.models.config.auth; -import com.bakdata.conquery.commands.ManagerNode; import com.bakdata.conquery.io.cps.CPSType; +import com.bakdata.conquery.models.auth.AuthorizationController; import com.bakdata.conquery.models.auth.ConqueryAuthenticationRealm; import com.bakdata.conquery.models.auth.oidc.passwordflow.IdpDelegatingAccessTokenCreator; +import com.bakdata.conquery.models.config.ConqueryConfig; import com.bakdata.conquery.resources.unprotected.LoginResource; import com.bakdata.conquery.resources.unprotected.TokenResource; import io.dropwizard.jersey.DropwizardResourceConfig; +import io.dropwizard.setup.Environment; import lombok.Getter; import lombok.Setter; import lombok.extern.slf4j.Slf4j; @@ -25,16 +27,16 @@ public class OIDCResourceOwnerPasswordCredentialRealmFactory implements Authenti @Override - public ConqueryAuthenticationRealm createRealm(ManagerNode managerNode) { + public ConqueryAuthenticationRealm createRealm(Environment environment, ConqueryConfig config, AuthorizationController authorizationController) { // Register processor that does the actual exchange of user credentials for an access token IdpDelegatingAccessTokenCreator idpDelegatingAccessTokenCreator = new IdpDelegatingAccessTokenCreator(client); // Register resources for users to exchange username and password for an access token - registerAdminUnprotectedAuthenticationResources(managerNode.getUnprotectedAuthAdmin(), idpDelegatingAccessTokenCreator); - registerApiUnprotectedAuthenticationResources(managerNode.getUnprotectedAuthApi(), idpDelegatingAccessTokenCreator); + registerAdminUnprotectedAuthenticationResources(authorizationController.getUnprotectedAuthAdmin(), idpDelegatingAccessTokenCreator); + registerApiUnprotectedAuthenticationResources(authorizationController.getUnprotectedAuthApi(), idpDelegatingAccessTokenCreator); - return client.createRealm(managerNode); + return client.createRealm(environment, authorizationController); } public void registerAdminUnprotectedAuthenticationResources(DropwizardResourceConfig jerseyConfig, IdpDelegatingAccessTokenCreator idpDelegatingAccessTokenCreator) { diff --git a/backend/src/main/java/com/bakdata/conquery/models/datasets/Column.java b/backend/src/main/java/com/bakdata/conquery/models/datasets/Column.java index 1f5c853cde..5a7180976f 100644 --- a/backend/src/main/java/com/bakdata/conquery/models/datasets/Column.java +++ b/backend/src/main/java/com/bakdata/conquery/models/datasets/Column.java @@ -1,42 +1,31 @@ package com.bakdata.conquery.models.datasets; -import java.util.Map; - import javax.annotation.Nullable; import javax.validation.constraints.NotNull; import com.bakdata.conquery.apiv1.frontend.FrontendValue; import com.bakdata.conquery.io.jackson.serializer.NsIdRef; -import com.bakdata.conquery.io.storage.NamespaceStorage; import com.bakdata.conquery.models.config.IndexConfig; import com.bakdata.conquery.models.datasets.concepts.Searchable; -import com.bakdata.conquery.models.dictionary.Dictionary; -import com.bakdata.conquery.models.dictionary.MapDictionary; import com.bakdata.conquery.models.events.MajorTypeId; -import com.bakdata.conquery.models.identifiable.IdMutex; import com.bakdata.conquery.models.identifiable.Labeled; import com.bakdata.conquery.models.identifiable.ids.NamespacedIdentifiable; import com.bakdata.conquery.models.identifiable.ids.specific.ColumnId; -import com.bakdata.conquery.models.identifiable.ids.specific.DictionaryId; -import com.bakdata.conquery.models.query.FilterSearch; import com.bakdata.conquery.util.search.TrieSearch; import com.fasterxml.jackson.annotation.JsonBackReference; import com.fasterxml.jackson.annotation.JsonIgnore; -import com.google.common.base.Preconditions; -import io.dropwizard.validation.ValidationMethod; import lombok.EqualsAndHashCode; import lombok.Getter; import lombok.NoArgsConstructor; import lombok.Setter; import lombok.extern.slf4j.Slf4j; import org.apache.commons.lang3.ArrayUtils; -import org.apache.commons.lang3.time.StopWatch; @Getter @Setter @NoArgsConstructor @Slf4j -public class Column extends Labeled implements NamespacedIdentifiable, Searchable { +public class Column extends Labeled implements NamespacedIdentifiable, Searchable { public static final int UNKNOWN_POSITION = -1; @@ -57,11 +46,6 @@ public class Column extends Labeled implements NamespacedIdentifiable< @JsonIgnore @Getter(lazy = true) private final int position = ArrayUtils.indexOf(getTable().getColumns(), this); - /** - * if set this column should use the given dictionary - * if it is of type string, instead of its own dictionary - */ - private String sharedDictionary; /** * if this is set this column counts as the secondary id of the given name for this * table @@ -79,100 +63,19 @@ public String toString() { return "Column(id = " + getId() + ", type = " + getType() + ")"; } - @ValidationMethod(message = "Only STRING columns can be part of shared Dictionaries.") - @JsonIgnore - public boolean isSharedString() { - return sharedDictionary == null || type.equals(MajorTypeId.STRING); - } - @JsonIgnore @Override public Dataset getDataset() { return table.getDataset(); } - /** - * Creates an id-replacement mapping for shared dictionaries for an {@link Import}. - * Because Imports are bound to a {@link com.bakdata.conquery.models.worker.Namespace} but the {@link com.bakdata.conquery.models.preproc.Preprocessed} files are not - * they contain dummy-{@link NsIdRef}. These References are mapped to actual object with valid ids through this - * generated mapping. - *

- * In this method for shared dictionaries, it is ensured, that the shared dictionary exists in the storage and it is - * created if not. - * - * @param dicts The mapping of column names in the Import to dictionaries in the Import - * @param storage The {@link NamespaceStorage} that backs the dictionaries - * @param out The collection for the generated replacement, that are needed during the deserialization of the next - * part of the {@link com.bakdata.conquery.models.preproc.Preprocessed}-file - * @param sharedDictionaryLocks A collection of locks used for the synchronized creation of shared dictionaries. - */ - public void createSharedDictionaryReplacement(Map dicts, NamespaceStorage storage, Map out, IdMutex sharedDictionaryLocks) { - Preconditions.checkArgument(type.equals(MajorTypeId.STRING), "Not a STRING Column."); - Preconditions.checkArgument(sharedDictionary != null, "Can only be used for Shared Dictionary based Columns"); - // If the column is based on a shared dict. We reference a new empty dictionary or the existing one - // but without updated entries. The entries are updated later on, see ImportJob#applyDictionaryMappings. - - Dictionary sharedDict = null; - final DictionaryId sharedDictId = new DictionaryId(table.getDataset().getId(), getSharedDictionary()); - - try (IdMutex.Locked lock = sharedDictionaryLocks.acquire(sharedDictId)) { - sharedDict = storage.getDictionary(sharedDictId); - // Create dictionary if not yet present - if (sharedDict == null) { - sharedDict = new MapDictionary(table.getDataset(), getSharedDictionary()); - storage.updateDictionary(sharedDict); - } - } - out.put(new DictionaryId(Dataset.PLACEHOLDER.getId(), dicts.get(getName()).getName()), sharedDict); - } - /** - * See {@link Column#createSharedDictionaryReplacement(Map, NamespaceStorage, Map, IdMutex)} + * We create only an empty search here, because the content is provided through {@link com.bakdata.conquery.models.messages.namespaces.specific.RegisterColumnValues} and filled by the caller. */ - public void createSingleColumnDictionaryReplacement(Map dicts, String importName, Map out) { - Preconditions.checkArgument(type.equals(MajorTypeId.STRING), "Not a STRING Column."); - Preconditions.checkArgument(sharedDictionary == null, "Cannot be used for Shared Dictionary based Columns."); - - final Dictionary dict = dicts.get(getName()); - final String name = computeDefaultDictionaryName(importName); - - out.put(new DictionaryId(Dataset.PLACEHOLDER.getId(), dict.getName()), dict); - - dict.setDataset(table.getDataset()); - dict.setName(name); - } - - - private String computeDefaultDictionaryName(String importName) { - return String.format("%s#%s", importName, getId().toString()); - } - - @Override - public TrieSearch createTrieSearch(IndexConfig config, NamespaceStorage storage) { - - final TrieSearch search = config.createTrieSearch(isGenerateSuffixes()); - - StopWatch timer = StopWatch.createStarted(); - - log.trace("START-COLUMN ADDING_ITEMS for {}", getId()); - - storage.getStorageHandler() - .lookupColumnValues(storage, this) - .map(value -> new FrontendValue(value, value)) - .onClose(() -> log.debug("DONE processing values for {}", getId())) - .forEach(feValue -> search.addItem(feValue, FilterSearch.extractKeywords(feValue))); - - log.trace("DONE-COLUMN ADDING_ITEMS for {} in {}", getId(), timer); - - timer.reset(); - log.trace("START-COLUMN SHRINKING for {}", getId()); - - search.shrinkToFit(); - - log.trace("DONE-COLUMN SHRINKING for {} in {}", getId(), timer); + public TrieSearch createTrieSearch(IndexConfig config) { - return search; + return config.createTrieSearch(isGenerateSuffixes()); } } diff --git a/backend/src/main/java/com/bakdata/conquery/models/datasets/Import.java b/backend/src/main/java/com/bakdata/conquery/models/datasets/Import.java index 1284ee66bd..0302a9c367 100644 --- a/backend/src/main/java/com/bakdata/conquery/models/datasets/Import.java +++ b/backend/src/main/java/com/bakdata/conquery/models/datasets/Import.java @@ -1,6 +1,5 @@ package com.bakdata.conquery.models.datasets; -import java.util.Set; import javax.validation.Valid; import javax.validation.constraints.NotNull; @@ -8,7 +7,6 @@ import com.bakdata.conquery.io.jackson.serializer.NsIdRef; import com.bakdata.conquery.models.identifiable.NamedImpl; import com.bakdata.conquery.models.identifiable.ids.NamespacedIdentifiable; -import com.bakdata.conquery.models.identifiable.ids.specific.DictionaryId; import com.bakdata.conquery.models.identifiable.ids.specific.ImportId; import com.fasterxml.jackson.annotation.JsonCreator; import com.fasterxml.jackson.annotation.JsonIgnore; @@ -41,9 +39,6 @@ public class Import extends NamedImpl implements NamespacedIdentifiabl @NotNull private ImportColumn[] columns = new ImportColumn[0]; - @NotNull - private Set dictionaries; - @Override public ImportId createId() { return new ImportId(table.getId(), getName()); diff --git a/backend/src/main/java/com/bakdata/conquery/models/datasets/Table.java b/backend/src/main/java/com/bakdata/conquery/models/datasets/Table.java index 856fdd54b7..4d59ac4910 100644 --- a/backend/src/main/java/com/bakdata/conquery/models/datasets/Table.java +++ b/backend/src/main/java/com/bakdata/conquery/models/datasets/Table.java @@ -5,6 +5,7 @@ import java.util.Set; import java.util.stream.Stream; +import javax.annotation.CheckForNull; import javax.validation.Valid; import javax.validation.constraints.NotNull; @@ -78,4 +79,22 @@ public Column getColumnByName(@NotNull String columnName) { .findFirst() .orElseThrow(() -> new IllegalStateException(String.format("Column %s not found", columnName))); } + + /** + * selects the right column for the given secondaryId from this table + */ + @CheckForNull + public Column findSecondaryIdColumn(SecondaryIdDescription secondaryId) { + + for (Column col : columns) { + if (col.getSecondaryId() == null || !secondaryId.equals(col.getSecondaryId())) { + continue; + } + + return col; + } + + return null; + } + } diff --git a/backend/src/main/java/com/bakdata/conquery/models/datasets/concepts/MatchingStats.java b/backend/src/main/java/com/bakdata/conquery/models/datasets/concepts/MatchingStats.java index 72b2aac5fd..293d845f7d 100644 --- a/backend/src/main/java/com/bakdata/conquery/models/datasets/concepts/MatchingStats.java +++ b/backend/src/main/java/com/bakdata/conquery/models/datasets/concepts/MatchingStats.java @@ -1,7 +1,9 @@ package com.bakdata.conquery.models.datasets.concepts; import java.util.HashMap; +import java.util.HashSet; import java.util.Map; +import java.util.Set; import com.bakdata.conquery.models.common.daterange.CDateRange; import com.bakdata.conquery.models.datasets.Column; @@ -9,9 +11,11 @@ import com.bakdata.conquery.models.events.Bucket; import com.bakdata.conquery.models.identifiable.ids.specific.WorkerId; import com.fasterxml.jackson.annotation.JsonIgnore; -import it.unimi.dsi.fastutil.ints.IntOpenHashSet; -import it.unimi.dsi.fastutil.ints.IntSet; -import lombok.*; +import lombok.AllArgsConstructor; +import lombok.Data; +import lombok.Getter; +import lombok.NoArgsConstructor; +import lombok.Setter; @Getter @Setter @@ -78,12 +82,24 @@ public static class Entry { private long numberOfEvents; @JsonIgnore - private final IntSet foundEntities = new IntOpenHashSet(); + private final Set foundEntities = new HashSet<>(); private long numberOfEntities; - private CDateRange span; - - - public void addEvent(Table table, Bucket bucket, int event, int entityForEvent) { + private int minDate = Integer.MAX_VALUE; + private int maxDate = Integer.MIN_VALUE; + + @JsonIgnore + public CDateRange getSpan() { + if(minDate == Integer.MAX_VALUE && maxDate == Integer.MIN_VALUE) { + return null; + } + + return CDateRange.of( + minDate == Integer.MAX_VALUE ? Integer.MIN_VALUE : minDate, + maxDate == Integer.MIN_VALUE ? Integer.MAX_VALUE : maxDate + ); + } + + public void addEvent(Table table, Bucket bucket, int event, String entityForEvent) { numberOfEvents++; if (foundEntities.add(entityForEvent)) { numberOfEntities++; @@ -99,7 +115,14 @@ public void addEvent(Table table, Bucket bucket, int event, int entityForEvent) } final CDateRange time = bucket.getAsDateRange(event, c); - span = time.spanClosed(span); + + if (time.hasUpperBound()){ + maxDate = Math.max(time.getMaxValue(), maxDate); + } + + if (time.hasLowerBound()){ + minDate = Math.min(time.getMinValue(), minDate); + } } } } diff --git a/backend/src/main/java/com/bakdata/conquery/models/datasets/concepts/Searchable.java b/backend/src/main/java/com/bakdata/conquery/models/datasets/concepts/Searchable.java index ee8c13d5ab..b6c53a1544 100644 --- a/backend/src/main/java/com/bakdata/conquery/models/datasets/concepts/Searchable.java +++ b/backend/src/main/java/com/bakdata/conquery/models/datasets/concepts/Searchable.java @@ -1,43 +1,23 @@ package com.bakdata.conquery.models.datasets.concepts; -import java.util.List; - import javax.validation.constraints.Min; import com.bakdata.conquery.apiv1.frontend.FrontendValue; -import com.bakdata.conquery.io.storage.NamespaceStorage; import com.bakdata.conquery.models.config.IndexConfig; -import com.bakdata.conquery.models.datasets.Dataset; -import com.bakdata.conquery.models.identifiable.Identifiable; -import com.bakdata.conquery.models.identifiable.ids.Id; import com.bakdata.conquery.models.query.FilterSearch; import com.bakdata.conquery.util.search.TrieSearch; -import com.fasterxml.jackson.annotation.JsonIgnore; /** * @implNote This class is tightly coupled with {@link FilterSearch} and {@link com.bakdata.conquery.models.datasets.concepts.filters.specific.SelectFilter}. *

* Searchable classes describe how a search should be constructed, and provide the values with getSearchValues. */ -public interface Searchable>> extends Identifiable { - - public Dataset getDataset(); +public interface Searchable { /** * All available {@link FrontendValue}s for searching in a {@link TrieSearch}. */ - TrieSearch createTrieSearch(IndexConfig config, NamespaceStorage storage); - - /** - * The actual Searchables to use, if there is potential for deduplication/pooling. - * - * @implSpec The order of objects returned is used to also sort search results from different sources. - */ - @JsonIgnore - default List> getSearchReferences() { - //Hopefully the only candidate will be Column - return List.of(this); - } + TrieSearch createTrieSearch(IndexConfig config); /** * Parameter used in the construction of {@link com.bakdata.conquery.util.search.TrieSearch}, defining the shortest suffix to create. diff --git a/backend/src/main/java/com/bakdata/conquery/models/datasets/concepts/filters/specific/SelectFilter.java b/backend/src/main/java/com/bakdata/conquery/models/datasets/concepts/filters/specific/SelectFilter.java index a3aab711a1..3e5a155abe 100644 --- a/backend/src/main/java/com/bakdata/conquery/models/datasets/concepts/filters/specific/SelectFilter.java +++ b/backend/src/main/java/com/bakdata/conquery/models/datasets/concepts/filters/specific/SelectFilter.java @@ -7,20 +7,16 @@ import java.util.stream.Collectors; import com.bakdata.conquery.apiv1.FilterTemplate; +import com.bakdata.conquery.apiv1.LabelMap; import com.bakdata.conquery.apiv1.frontend.FrontendFilterConfiguration; import com.bakdata.conquery.apiv1.frontend.FrontendValue; import com.bakdata.conquery.io.jackson.View; import com.bakdata.conquery.io.jackson.serializer.NsIdRef; -import com.bakdata.conquery.io.storage.NamespaceStorage; import com.bakdata.conquery.models.config.ConqueryConfig; -import com.bakdata.conquery.models.config.IndexConfig; import com.bakdata.conquery.models.datasets.concepts.Searchable; import com.bakdata.conquery.models.datasets.concepts.filters.SingleColumnFilter; import com.bakdata.conquery.models.events.MajorTypeId; import com.bakdata.conquery.models.exceptions.ConceptConfigurationException; -import com.bakdata.conquery.models.identifiable.ids.specific.FilterId; -import com.bakdata.conquery.models.query.FilterSearch; -import com.bakdata.conquery.util.search.TrieSearch; import com.fasterxml.jackson.annotation.JsonIgnore; import com.fasterxml.jackson.annotation.JsonIgnoreProperties; import com.google.common.collect.BiMap; @@ -30,7 +26,6 @@ import lombok.NoArgsConstructor; import lombok.Setter; import lombok.extern.slf4j.Slf4j; -import org.apache.commons.lang3.time.StopWatch; import org.jetbrains.annotations.NotNull; @Setter @@ -38,7 +33,7 @@ @NoArgsConstructor @Slf4j @JsonIgnoreProperties({"searchType"}) -public abstract class SelectFilter extends SingleColumnFilter implements Searchable { +public abstract class SelectFilter extends SingleColumnFilter { /** * user given mapping from the values in the columns to shown labels @@ -71,19 +66,25 @@ public void configureFrontend(FrontendFilterConfiguration.Top f, ConqueryConfig @JsonIgnore public abstract String getFilterType(); - @Override - public List> getSearchReferences() { - final List> out = new ArrayList<>(); + + /** + * The actual Searchables to use, if there is potential for deduplication/pooling. + * + * @implSpec The order of objects returned is used to also sort search results from different sources. + */ + @JsonIgnore + public List getSearchReferences() { + final List out = new ArrayList<>(); if (getTemplate() != null) { out.add(getTemplate()); } if (!labels.isEmpty()) { - out.add(this); + out.add(new LabelMap(getId(), labels, searchMinSuffixLength, generateSearchSuffixes)); } - out.addAll(getColumn().getSearchReferences()); + out.add(getColumn()); return out; } @@ -105,51 +106,4 @@ public boolean isNotUsingTemplateAndLabels() { return (getTemplate() == null) != labels.isEmpty(); } - - @Override - @JsonIgnore - public boolean isGenerateSuffixes() { - return generateSearchSuffixes; - } - - @Override - @JsonIgnore - public int getMinSuffixLength() { - return searchMinSuffixLength; - } - - /** - * Does not make sense to distinguish at Filter level since it's only referenced when labels are set. - */ - @Override - @JsonIgnore - public boolean isSearchDisabled() { - return false; - } - - @Override - public TrieSearch createTrieSearch(IndexConfig config, NamespaceStorage storage) { - - final TrieSearch search = config.createTrieSearch(true); - - if(log.isTraceEnabled()) { - log.trace("Labels for {}: `{}`", getId(), collectLabels().stream().map(FrontendValue::toString).collect(Collectors.toList())); - } - - StopWatch timer = StopWatch.createStarted(); - log.trace("START-SELECT ADDING_ITEMS for {}", getId()); - - collectLabels().forEach(feValue -> search.addItem(feValue, FilterSearch.extractKeywords(feValue))); - - log.trace("DONE-SELECT ADDING_ITEMS for {} in {}", getId(), timer); - - timer.reset(); - log.trace("START-SELECT SHRINKING for {}", getId()); - - search.shrinkToFit(); - - log.trace("DONE-SELECT SHRINKING for {} in {}", getId(), timer); - - return search; - } } diff --git a/backend/src/main/java/com/bakdata/conquery/models/datasets/concepts/tree/ConceptTreeCache.java b/backend/src/main/java/com/bakdata/conquery/models/datasets/concepts/tree/ConceptTreeCache.java index 8037f48dd4..c23469d152 100644 --- a/backend/src/main/java/com/bakdata/conquery/models/datasets/concepts/tree/ConceptTreeCache.java +++ b/backend/src/main/java/com/bakdata/conquery/models/datasets/concepts/tree/ConceptTreeCache.java @@ -1,12 +1,11 @@ package com.bakdata.conquery.models.datasets.concepts.tree; +import java.util.HashMap; import java.util.Map; import com.bakdata.conquery.models.exceptions.ConceptConfigurationException; import com.bakdata.conquery.util.CalculatedValue; import com.fasterxml.jackson.annotation.JsonIgnore; -import com.tomgibara.bits.BitStore; -import com.tomgibara.bits.Bits; import lombok.Getter; /** @@ -28,44 +27,36 @@ public class ConceptTreeCache { @JsonIgnore private final TreeConcept treeConcept; - /** - * Stores if the value is present in the cache. Values are allowed to not be resolvable but we still want to cache the tree walk. - */ - @JsonIgnore - private final BitStore cached; - /** * Store of all cached values. */ + @JsonIgnore - private final ConceptTreeChild[] values; + private final Map cached; - public ConceptTreeCache(TreeConcept treeConcept, int size) { + public ConceptTreeCache(TreeConcept treeConcept) { this.treeConcept = treeConcept; - values = new ConceptTreeChild[size]; - cached = Bits.store(size); + cached = new HashMap<>(); } /** * If id is already in cache, use that. If not calculate it by querying treeConcept. If rowMap was not used to query, cache the response. * - * @param id String id to resolve in conceptTree. - * @param scriptValue + * @param value */ - public ConceptTreeChild findMostSpecificChild(int id, String scriptValue, CalculatedValue> rowMap) throws ConceptConfigurationException { + public ConceptTreeChild findMostSpecificChild(String value, CalculatedValue> rowMap) throws ConceptConfigurationException { - if(cached.getBit(id)) { + if(cached.containsKey(value)) { hits++; - return values[id]; + return cached.get(value); } misses++; - final ConceptTreeChild child = treeConcept.findMostSpecificChild(scriptValue, rowMap); + final ConceptTreeChild child = treeConcept.findMostSpecificChild(value, rowMap); if(!rowMap.isCalculated()) { - cached.setBit(id, true); - this.values[id] = child; + cached.put(value, child); } return child; diff --git a/backend/src/main/java/com/bakdata/conquery/models/datasets/concepts/tree/TreeConcept.java b/backend/src/main/java/com/bakdata/conquery/models/datasets/concepts/tree/TreeConcept.java index 2848b6d2b3..85d4306d64 100644 --- a/backend/src/main/java/com/bakdata/conquery/models/datasets/concepts/tree/TreeConcept.java +++ b/backend/src/main/java/com/bakdata/conquery/models/datasets/concepts/tree/TreeConcept.java @@ -15,7 +15,6 @@ import com.bakdata.conquery.models.datasets.concepts.Concept; import com.bakdata.conquery.models.datasets.concepts.SelectHolder; import com.bakdata.conquery.models.datasets.concepts.select.concept.UniversalSelect; -import com.bakdata.conquery.models.events.stores.root.StringStore; import com.bakdata.conquery.models.exceptions.ConceptConfigurationException; import com.bakdata.conquery.models.exceptions.ConfigurationException; import com.bakdata.conquery.models.exceptions.JSONException; @@ -185,8 +184,8 @@ public int countElements() { return nChildren = 1 + (int) getAllChildren().count(); } - public void initializeIdCache(StringStore type, Import importId) { - caches.computeIfAbsent(importId, id -> new ConceptTreeCache(this, type.size())); + public void initializeIdCache(Import importId) { + caches.computeIfAbsent(importId, id -> new ConceptTreeCache(this)); } public void removeImportCache(Import imp) { diff --git a/backend/src/main/java/com/bakdata/conquery/models/dictionary/Dictionary.java b/backend/src/main/java/com/bakdata/conquery/models/dictionary/Dictionary.java deleted file mode 100644 index 8516d2f652..0000000000 --- a/backend/src/main/java/com/bakdata/conquery/models/dictionary/Dictionary.java +++ /dev/null @@ -1,57 +0,0 @@ -package com.bakdata.conquery.models.dictionary; - -import javax.validation.constraints.NotNull; - -import com.bakdata.conquery.io.cps.CPSBase; -import com.bakdata.conquery.io.jackson.serializer.NsIdRef; -import com.bakdata.conquery.models.datasets.Dataset; -import com.bakdata.conquery.models.identifiable.NamedImpl; -import com.bakdata.conquery.models.identifiable.ids.NamespacedIdentifiable; -import com.bakdata.conquery.models.identifiable.ids.specific.DictionaryId; -import com.fasterxml.jackson.annotation.JsonTypeInfo; -import lombok.Getter; -import lombok.Setter; -import lombok.ToString; - -@JsonTypeInfo(use = JsonTypeInfo.Id.CUSTOM, property = "type") -@CPSBase -@ToString(onlyExplicitlyIncluded = true, callSuper = true) -public abstract class Dictionary extends NamedImpl implements NamespacedIdentifiable, Iterable { - - @Getter - @Setter - @NsIdRef - private Dataset dataset; - - public Dictionary(Dataset dataset, @NotNull String name) { - this.setName(name); - this.dataset = dataset; - } - - @Override - public DictionaryId createId() { - return new DictionaryId(dataset.getId(), getName()); - } - - public abstract int add(byte[] bytes); - - public abstract int put(byte[] bytes); - - public abstract int getId(byte[] bytes); - - public abstract byte[] getElement(int id); - - @ToString.Include - public abstract int size(); - - public static MapDictionary copyUncompressed(Dictionary dict) { - MapDictionary newDict = new MapDictionary(dict.getDataset(), dict.getName()); - for (DictionaryEntry e : dict) { - newDict.add(e.getValue()); - } - return newDict; - } - - public abstract long estimateMemoryConsumption(); - -} diff --git a/backend/src/main/java/com/bakdata/conquery/models/dictionary/DictionaryEntry.java b/backend/src/main/java/com/bakdata/conquery/models/dictionary/DictionaryEntry.java deleted file mode 100644 index 10c283161f..0000000000 --- a/backend/src/main/java/com/bakdata/conquery/models/dictionary/DictionaryEntry.java +++ /dev/null @@ -1,9 +0,0 @@ -package com.bakdata.conquery.models.dictionary; - -import lombok.Value; - -@Value -public class DictionaryEntry { - private final int id; - private final byte[] value; -} \ No newline at end of file diff --git a/backend/src/main/java/com/bakdata/conquery/models/dictionary/DictionaryMapping.java b/backend/src/main/java/com/bakdata/conquery/models/dictionary/DictionaryMapping.java deleted file mode 100644 index df48a7b59a..0000000000 --- a/backend/src/main/java/com/bakdata/conquery/models/dictionary/DictionaryMapping.java +++ /dev/null @@ -1,114 +0,0 @@ -package com.bakdata.conquery.models.dictionary; - - -import java.util.stream.IntStream; - -import com.bakdata.conquery.models.events.stores.root.IntegerStore; -import com.bakdata.conquery.models.events.stores.root.StringStore; -import it.unimi.dsi.fastutil.ints.Int2IntMap; -import it.unimi.dsi.fastutil.ints.Int2IntOpenHashMap; -import it.unimi.dsi.fastutil.ints.IntCollection; -import lombok.AccessLevel; -import lombok.Getter; -import lombok.RequiredArgsConstructor; -import lombok.ToString; -import lombok.extern.slf4j.Slf4j; - -/** - * Create a mapping from one {@link Dictionary} to the other (Map source to target). Adding all ids in target, not in source, to source. - */ -@RequiredArgsConstructor(access = AccessLevel.PRIVATE) -@Getter -@Slf4j -@ToString(onlyExplicitlyIncluded = true, callSuper = true) -public class DictionaryMapping { - - @ToString.Include - private final Dictionary sourceDictionary; - - @ToString.Include - private final Dictionary targetDictionary; - - private final Int2IntMap source2Target; - - private final Int2IntMap target2Source; - - @ToString.Include - private final int numberOfNewIds; - - public static DictionaryMapping createAndImport(Dictionary from, Dictionary into) { - - log.debug("Importing values from `{}` into `{}`", from, into); - - int newIds = 0; - - final Int2IntMap source2Target = new Int2IntOpenHashMap(from.size()); - - source2Target.defaultReturnValue(-1); - - final Int2IntMap target2Source = new Int2IntOpenHashMap(from.size()); - - target2Source.defaultReturnValue(-1); - - for (int id = 0; id < from.size(); id++) { - - final byte[] value = from.getElement(id); - int targetId = into.getId(value); - - //if id was unknown until now - if (targetId == -1L) { - targetId = into.add(value); - newIds++; - } - - if (source2Target.put(id, targetId) != -1) { - log.error("Multiple ids map to same target"); - } - - if (target2Source.put(targetId, id) != -1) { - log.error("Multiple ids map to same target"); - } - - } - - return new DictionaryMapping(from, into, source2Target, target2Source, newIds); - } - - public int source2Target(int sourceId) { - return source2Target.get(sourceId); - } - - public int target2Source(int targetId) { - return target2Source.get(targetId); - } - - public IntCollection source() { - return source2Target.keySet(); - } - - public IntCollection target() { - return source2Target.values(); - } - - /** - * Mutably applies mapping to store. - */ - public void applyToStore(StringStore from, IntegerStore to) { - IntStream.range(0, from.getLines()) - .parallel() - .forEach(event -> { - if (!from.has(event)) { - to.setNull(event); - return; - } - final int string = from.getString(event); - final int value = source2Target(string); - - if (value == -1) { - throw new IllegalStateException(String.format("Missing mapping for %s", string)); - } - to.setInteger(event, value); - }); - } - -} diff --git a/backend/src/main/java/com/bakdata/conquery/models/dictionary/EncodedDictionary.java b/backend/src/main/java/com/bakdata/conquery/models/dictionary/EncodedDictionary.java deleted file mode 100644 index 3958a3691c..0000000000 --- a/backend/src/main/java/com/bakdata/conquery/models/dictionary/EncodedDictionary.java +++ /dev/null @@ -1,31 +0,0 @@ -package com.bakdata.conquery.models.dictionary; - -import com.bakdata.conquery.models.events.stores.specific.string.EncodedStringStore; - -/** - * Handle class allowing direct encoded access to Dictionary without wrapping inside StringTypeEncoded. - * - * Main usage is PrimaryDictionary. - */ -public class EncodedDictionary { - - private final Dictionary dict; - private final EncodedStringStore.Encoding encoding; - - public EncodedDictionary(Dictionary dict, EncodedStringStore.Encoding encoding) { - this.dict = dict; - this.encoding = encoding; - } - - public String getElement(int id) { - return encoding.decode(dict.getElement(id)); - } - - public int getId(String value) { - return dict.getId(encoding.encode(value)); - } - - public int getSize() { - return dict.size(); - } -} diff --git a/backend/src/main/java/com/bakdata/conquery/models/dictionary/MapDictionary.java b/backend/src/main/java/com/bakdata/conquery/models/dictionary/MapDictionary.java deleted file mode 100644 index 22d44034d2..0000000000 --- a/backend/src/main/java/com/bakdata/conquery/models/dictionary/MapDictionary.java +++ /dev/null @@ -1,139 +0,0 @@ -package com.bakdata.conquery.models.dictionary; - -import java.math.RoundingMode; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.Iterator; -import java.util.List; -import java.util.ListIterator; - -import javax.validation.constraints.NotNull; - -import com.bakdata.conquery.io.cps.CPSType; -import com.bakdata.conquery.models.datasets.Dataset; -import com.fasterxml.jackson.annotation.JsonCreator; -import com.fasterxml.jackson.annotation.JsonProperty; -import com.google.common.math.DoubleMath; -import it.unimi.dsi.fastutil.Hash; -import it.unimi.dsi.fastutil.bytes.ByteArrayList; -import it.unimi.dsi.fastutil.objects.Object2IntOpenHashMap; -import lombok.ToString; - -@CPSType(id = "MAP_DICTIONARY", base = Dictionary.class) -@ToString(callSuper = true, onlyExplicitlyIncluded = true) -public class MapDictionary extends Dictionary { - - private Object2IntOpenHashMap value2Id; - private List id2Value; - - public MapDictionary(Dataset dataset, @NotNull String name) { - super(dataset, name); - value2Id = new Object2IntOpenHashMap<>(); - value2Id.defaultReturnValue(-1); - id2Value = new ArrayList<>(); - } - - @JsonCreator - public MapDictionary(Dataset dataset, String name, byte[][] id2Value) { - super(dataset, name); - if (id2Value == null) { - id2Value = new byte[0][]; - } - this.id2Value = new ArrayList<>(id2Value.length); - value2Id = new Object2IntOpenHashMap<>(id2Value.length); - value2Id.defaultReturnValue(-1); - - for (int i = 0; i < id2Value.length; i++) { - ByteArrayList v = new ByteArrayList(id2Value[i]); - this.id2Value.add(v); - value2Id.put(v, i); - } - } - - @JsonProperty - public byte[][] getId2Value() { - final int size = id2Value.size(); - final byte[][] result = new byte[size][]; - - for (int i = 0; i < size; i++) { - result[i] = id2Value.get(i).elements(); - } - return result; - } - - @Override - public int add(byte[] bytes) { - ByteArrayList value = new ByteArrayList(bytes); - int id = value2Id.getInt(value); - if (id == -1) { - id = id2Value.size(); - value2Id.put(value, id); - id2Value.add(value); - } - else { - throw new IllegalStateException("there already was an element " + Arrays.toString(bytes)); - } - return id; - } - - @Override - public int put(byte[] bytes) { - ByteArrayList value = new ByteArrayList(bytes); - int id = value2Id.getInt(value); - if (id == -1) { - id = id2Value.size(); - value2Id.put(value, id); - id2Value.add(value); - } - return id; - } - - @Override - public int getId(byte[] bytes) { - return value2Id.getInt(new ByteArrayList(bytes)); - } - - @Override - public byte[] getElement(int id) { - return id2Value.get(id).elements(); - } - - @Override - public int size() { - return id2Value.size(); - } - - @Override - public Iterator iterator() { - ListIterator it = id2Value.listIterator(); - return new Iterator() { - @Override - public DictionaryEntry next() { - return new DictionaryEntry(it.nextIndex(), it.next().elements()); - } - - @Override - public boolean hasNext() { - return it.hasNext(); - } - }; - } - - public static long estimateMemoryConsumption(long entries, long totalBytes) { - return DoubleMath.roundToLong( - //size of two collections and string object overhead - entries * (48f + 8f / Hash.DEFAULT_LOAD_FACTOR) - //number of string bytes - + totalBytes, - RoundingMode.CEILING - ); - } - - @Override - public long estimateMemoryConsumption() { - return MapDictionary.estimateMemoryConsumption( - id2Value.size(), - id2Value.stream().mapToLong(ByteArrayList::size).sum() - ); - } -} diff --git a/backend/src/main/java/com/bakdata/conquery/models/events/Bucket.java b/backend/src/main/java/com/bakdata/conquery/models/events/Bucket.java index 618920194f..2fccdf4f61 100644 --- a/backend/src/main/java/com/bakdata/conquery/models/events/Bucket.java +++ b/backend/src/main/java/com/bakdata/conquery/models/events/Bucket.java @@ -5,7 +5,6 @@ import java.util.Collection; import java.util.HashMap; import java.util.Map; -import java.util.Set; import javax.validation.constraints.Min; import javax.validation.constraints.NotNull; @@ -33,7 +32,9 @@ import com.fasterxml.jackson.annotation.JsonCreator; import com.fasterxml.jackson.annotation.JsonIgnore; import com.fasterxml.jackson.annotation.JsonManagedReference; +import com.google.common.collect.ImmutableSet; import io.dropwizard.validation.ValidationMethod; +import it.unimi.dsi.fastutil.objects.Object2IntMap; import lombok.AccessLevel; import lombok.AllArgsConstructor; import lombok.Getter; @@ -58,35 +59,28 @@ public class Bucket extends IdentifiableImpl implements NamespacedIden @Min(0) private final int bucket; - private final int root; - @Min(0) private final int numberOfEvents; - - @JsonManagedReference @Setter(AccessLevel.PROTECTED) private ColumnStore[] stores; - //TODO consider usage of SortedSet but that would require custom deserializer, sorted set would have the benefit, that iteration of entities would also conform to layout of data giving some performance gains to CBlocks and Matching Stats - private final Set entities; - /** * start of each Entity in {@code stores}. */ - private final int[] start; + private final Object2IntMap start; /** * Number of events per Entity in {@code stores}. */ - private final int[] ends; + private final Object2IntMap ends; @NsIdRef private final Import imp; @JsonIgnore - @ValidationMethod(message = "Number of events does not match to the number of stores") + @ValidationMethod(message = "Number of events does not match the length of some stores.") public boolean isNumberOfEventsEqualsNumberOfStores() { return Arrays.stream(stores).allMatch(columnStore -> columnStore.getLines() == getNumberOfEvents()); } @@ -105,31 +99,28 @@ public BucketId createId() { /** * Iterate entities */ - public Collection entities() { - return entities; + public Collection entities() { + return ImmutableSet.copyOf(start.keySet()); } - public boolean containsEntity(int entity) { - return getEntityStart(entity) != -1; + public boolean containsEntity(String entity) { + return start.containsKey(entity); } - public int getEntityStart(int entityId) { - return start[getEntityIndex(entityId)]; + public int getEntityStart(String entityId) { + return start.get(entityId); } - public int getEntityIndex(int entityId) { - return entityId - root; - } - public int getEntityEnd(int entityId) { - return ends[getEntityIndex(entityId)]; + public int getEntityEnd(String entityId) { + return ends.getInt(entityId); } public final boolean has(int event, Column column) { return getStore(column).has(event); } - public int getString(int event, @NotNull Column column) { + public String getString(int event, @NotNull Column column) { return ((StringStore) getStore(column)).getString(event); } @@ -210,4 +201,4 @@ public Dataset getDataset() { public ColumnStore getStore(@NotNull String storeName) { return getStore(getTable().getColumnByName(storeName)); } -} +} \ No newline at end of file diff --git a/backend/src/main/java/com/bakdata/conquery/models/events/BucketManager.java b/backend/src/main/java/com/bakdata/conquery/models/events/BucketManager.java index 0610762773..2135951817 100644 --- a/backend/src/main/java/com/bakdata/conquery/models/events/BucketManager.java +++ b/backend/src/main/java/com/bakdata/conquery/models/events/BucketManager.java @@ -4,6 +4,7 @@ import java.util.Collection; import java.util.Collections; import java.util.HashMap; +import java.util.HashSet; import java.util.List; import java.util.Map; import java.util.Set; @@ -30,8 +31,8 @@ import it.unimi.dsi.fastutil.ints.Int2ObjectMap; import it.unimi.dsi.fastutil.ints.Int2ObjectMaps; import it.unimi.dsi.fastutil.ints.IntArraySet; -import it.unimi.dsi.fastutil.ints.IntOpenHashSet; -import it.unimi.dsi.fastutil.ints.IntSet; +import it.unimi.dsi.fastutil.objects.Object2IntMap; +import it.unimi.dsi.fastutil.objects.Object2IntOpenHashMap; import lombok.Getter; import lombok.RequiredArgsConstructor; import lombok.SneakyThrows; @@ -51,8 +52,8 @@ public class BucketManager { private final WorkerStorage storage; private final Worker worker; - @Getter - private final Int2ObjectMap entities; + + private final Object2IntMap entity2Bucket; /** @@ -71,33 +72,40 @@ public class BucketManager { private final int entityBucketSize; public static BucketManager create(Worker worker, WorkerStorage storage, int entityBucketSize) { - Int2ObjectMap entities = new Int2ObjectAVLTreeMap<>(); - Map>> connectorCBlocks = new HashMap<>(); - Map>> tableBuckets = new HashMap<>(); + final Map>> connectorCBlocks = new HashMap<>(); + final Map>> tableBuckets = new HashMap<>(); + final Object2IntMap entity2Bucket = new Object2IntOpenHashMap<>(); - IntArraySet assignedBucketNumbers = worker.getInfo().getIncludedBuckets(); + final IntArraySet assignedBucketNumbers = worker.getInfo().getIncludedBuckets(); log.trace("Trying to load these buckets that map to: {}", assignedBucketNumbers); for (Bucket bucket : storage.getAllBuckets()) { if (!assignedBucketNumbers.contains(bucket.getBucket())) { log.warn("Found Bucket[{}] in Storage that does not belong to this Worker according to the Worker information.", bucket.getId()); } - registerBucket(bucket, entities, tableBuckets); + registerBucket(bucket, entity2Bucket, tableBuckets); } for (CBlock cBlock : storage.getAllCBlocks()) { registerCBlock(cBlock, connectorCBlocks); } - return new BucketManager(worker.getJobManager(), storage, worker, entities, connectorCBlocks, tableBuckets, entityBucketSize); + return new BucketManager(worker.getJobManager(), storage, worker, entity2Bucket, connectorCBlocks, tableBuckets, entityBucketSize); } /** * register entities, and create query specific indices for bucket */ - private static void registerBucket(Bucket bucket, Int2ObjectMap entities, Map>> tableBuckets) { - for (int entity : bucket.entities()) { - entities.computeIfAbsent(entity, Entity::new); + private static void registerBucket(Bucket bucket, Object2IntMap entity2Bucket, Map>> tableBuckets) { + for (String entity : bucket.entities()) { + + if(entity2Bucket.containsKey(entity)){ + // This is an unrecoverable state, but should not happen in practice. Just a precaution. + assert entity2Bucket.getInt(entity) == bucket.getBucket(); + continue; + } + + entity2Bucket.put(entity, bucket.getBucket()); } tableBuckets @@ -121,16 +129,16 @@ public Locked acquireLock(Connector connector) { @SneakyThrows public void fullUpdate() { - CalculateCBlocksJob job = new CalculateCBlocksJob(storage, this, worker.getJobsExecutorService()); + final CalculateCBlocksJob job = new CalculateCBlocksJob(storage, this, worker.getJobsExecutorService()); for (Concept c : storage.getAllConcepts()) { if (!(c instanceof TreeConcept)) { continue; } - for (ConceptTreeConnector con : ((TreeConcept)c).getConnectors()) { + for (ConceptTreeConnector con : ((TreeConcept) c).getConnectors()) { for (Bucket bucket : storage.getAllBuckets()) { - CBlockId cBlockId = new CBlockId(bucket.getId(), con.getId()); + final CBlockId cBlockId = new CBlockId(bucket.getId(), con.getId()); if (!con.getTable().equals(bucket.getTable())) { continue; @@ -152,26 +160,30 @@ public void fullUpdate() { } } + public boolean hasCBlock(CBlockId id) { + return storage.getCBlock(id) != null; + } + public synchronized void addCalculatedCBlock(CBlock cBlock) { registerCBlock(cBlock, connectorToCblocks); } public void addBucket(Bucket bucket) { storage.addBucket(bucket); - registerBucket(bucket, entities, tableToBuckets); + registerBucket(bucket, entity2Bucket, tableToBuckets); - CalculateCBlocksJob job = new CalculateCBlocksJob(storage, this, worker.getJobsExecutorService()); + final CalculateCBlocksJob job = new CalculateCBlocksJob(storage, this, worker.getJobsExecutorService()); for (Concept concept : storage.getAllConcepts()) { if (!(concept instanceof TreeConcept)) { continue; } - for (ConceptTreeConnector connector : ((TreeConcept)concept).getConnectors()) { + for (ConceptTreeConnector connector : ((TreeConcept) concept).getConnectors()) { if (!connector.getTable().equals(bucket.getTable())) { continue; } - CBlockId cBlockId = new CBlockId(bucket.getId(), connector.getId()); + final CBlockId cBlockId = new CBlockId(bucket.getId(), connector.getId()); if (hasCBlock(cBlockId)) { @@ -186,32 +198,18 @@ public void addBucket(Bucket bucket) { jobManager.addSlowJob(job); } - public void addConcept(Concept concept) { - storage.updateConcept(concept); + public void removeTable(Table table) { + final Int2ObjectMap> removed = tableToBuckets.remove(table); - if (!(concept instanceof TreeConcept)){ - return; + // It's possible no buckets were registered yet + if (removed != null) { + removed.values() + .stream() + .flatMap(List::stream) + .forEach(this::removeBucket); } - CalculateCBlocksJob job = new CalculateCBlocksJob(storage, this, worker.getJobsExecutorService()); - - for (ConceptTreeConnector connector : ((TreeConcept)concept).getConnectors()) { - - for (Bucket bucket : storage.getAllBuckets()) { - if (!bucket.getTable().equals(connector.getTable())) { - continue; - } - - final CBlockId cBlockId = new CBlockId(bucket.getId(), connector.getId()); - - if (hasCBlock(cBlockId)) { - continue; - } - - job.addCBlock(bucket, connector); - } - } - jobManager.addSlowJob(job); + storage.removeTable(table.getId()); } public void removeBucket(Bucket bucket) { @@ -220,18 +218,6 @@ public void removeBucket(Bucket bucket) { .filter(cblock -> cblock.getBucket().equals(bucket)) .forEach(this::removeCBlock); - for (int entityId : bucket.entities()) { - final Entity entity = entities.get(entityId); - - if (entity == null) { - continue; - } - - if (isEntityEmpty(entity)) { - entities.remove(entityId); - } - } - tableToBuckets.getOrDefault(bucket.getTable(), Int2ObjectMaps.emptyMap()) .getOrDefault(bucket.getBucket(), Collections.emptyList()) .remove(bucket); @@ -239,25 +225,6 @@ public void removeBucket(Bucket bucket) { storage.removeBucket(bucket.getId()); } - public void removeConcept(Concept concept) { - - // Just drop all CBlocks at once for the connectors - for (Connector connector : concept.getConnectors()) { - final Int2ObjectMap> removed = connectorToCblocks.remove(connector); - - // It's possible that no data has been loaded yet - if(removed != null) { - removed.values().stream() - .map(Map::values) - .flatMap(Collection::stream) - .map(CBlock::getId) - .forEach(storage::removeCBlock); - } - } - - storage.removeConcept(concept.getId()); - } - private void removeCBlock(CBlock cBlock) { connectorToCblocks.getOrDefault(cBlock.getConnector(), Int2ObjectMaps.emptyMap()) @@ -268,32 +235,12 @@ private void removeCBlock(CBlock cBlock) { storage.removeCBlock(cBlock.getId()); } - /** - * Test if there is any known associated data to the Entity in the {@link BucketManager} - * - * @param entity - */ - public boolean isEntityEmpty(Entity entity) { - return !hasBucket(Entity.getBucket(entity.getId(), worker.getInfo().getEntityBucketSize())); - } - - private boolean hasBucket(int id) { - return tableToBuckets.values().stream() - .anyMatch(buckets -> buckets.containsKey(id)); + public Set getEntities() { + return Collections.unmodifiableSet(entity2Bucket.keySet()); } - public void removeTable(Table table) { - final Int2ObjectMap> removed = tableToBuckets.remove(table); - - // It's possible no buckets were registered yet - if (removed != null) { - removed.values() - .stream() - .flatMap(List::stream) - .forEach(this::removeBucket); - } - - storage.removeTable(table.getId()); + private int getBucket(String id) { + return entity2Bucket.getInt(id); } /** @@ -316,12 +263,8 @@ public void removeImport(Import imp) { storage.removeImport(imp.getId()); } - public boolean hasCBlock(CBlockId id) { - return storage.getCBlock(id) != null; - } - public List getEntityBucketsForTable(Entity entity, Table table) { - final int bucketId = Entity.getBucket(entity.getId(), worker.getInfo().getEntityBucketSize()); + final int bucketId = getBucket(entity.getId()); return tableToBuckets.getOrDefault(table, Int2ObjectMaps.emptyMap()) .getOrDefault(bucketId, Collections.emptyList()); @@ -330,19 +273,19 @@ public List getEntityBucketsForTable(Entity entity, Table table) { /** * Collects all Entites, that have any of the concepts on the connectors in a specific time. */ - public IntSet getEntitiesWithConcepts(Collection> concepts, Set connectors, CDateSet restriction) { + public Set getEntitiesWithConcepts(Collection> concepts, Set connectors, CDateSet restriction) { final long requiredBits = ConceptNode.calculateBitMask(concepts); - final IntSet out = new IntOpenHashSet(); + final Set out = new HashSet<>(); for (Connector connector : connectors) { - if(!connectorToCblocks.containsKey(connector)) { + if (!connectorToCblocks.containsKey(connector)) { continue; } for (Map bucketCBlockMap : connectorToCblocks.get(connector).values()) { for (CBlock cblock : bucketCBlockMap.values()) { - for (int entity : cblock.getBucket().entities()) { + for (String entity : cblock.getBucket().entities()) { if (cblock.isConceptIncluded(entity, requiredBits) && restriction.intersects(cblock.getEntityDateRange(entity))) { out.add(entity); @@ -356,19 +299,19 @@ public IntSet getEntitiesWithConcepts(Collection> concepts, Se } public Map getEntityCBlocksForConnector(Entity entity, Connector connector) { - final int bucketId = Entity.getBucket(entity.getId(), worker.getInfo().getEntityBucketSize()); + final int bucketId = getBucket(entity.getId()); return connectorToCblocks.getOrDefault(connector, Int2ObjectMaps.emptyMap()) .getOrDefault(bucketId, Collections.emptyMap()); } public boolean hasEntityCBlocksForConnector(Entity entity, Connector connector) { - final int bucketId = Entity.getBucket(entity.getId(), worker.getInfo().getEntityBucketSize()); + final int bucketId = getBucket(entity.getId()); final Map cblocks = connectorToCblocks.getOrDefault(connector, Int2ObjectMaps.emptyMap()) - .getOrDefault(bucketId, Collections.emptyMap()); + .getOrDefault(bucketId, Collections.emptyMap()); for (Bucket bucket : cblocks.keySet()) { - if (bucket.containsEntity(entity.getId())){ + if (bucket.containsEntity(entity.getId())) { return true; } } @@ -385,5 +328,52 @@ public void updateConcept(Concept incoming) { addConcept(incoming); } + public void removeConcept(Concept concept) { + + // Just drop all CBlocks at once for the connectors + for (Connector connector : concept.getConnectors()) { + final Int2ObjectMap> removed = connectorToCblocks.remove(connector); + + // It's possible that no data has been loaded yet + if (removed != null) { + removed.values().stream() + .map(Map::values) + .flatMap(Collection::stream) + .map(CBlock::getId) + .forEach(storage::removeCBlock); + } + } + + storage.removeConcept(concept.getId()); + } + + public void addConcept(Concept concept) { + storage.updateConcept(concept); + + if (!(concept instanceof TreeConcept)) { + return; + } + + final CalculateCBlocksJob job = new CalculateCBlocksJob(storage, this, worker.getJobsExecutorService()); + + for (ConceptTreeConnector connector : ((TreeConcept) concept).getConnectors()) { + + for (Bucket bucket : storage.getAllBuckets()) { + if (!bucket.getTable().equals(connector.getTable())) { + continue; + } + + final CBlockId cBlockId = new CBlockId(bucket.getId(), connector.getId()); + + if (hasCBlock(cBlockId)) { + continue; + } + + job.addCBlock(bucket, connector); + } + } + jobManager.addSlowJob(job); + } + -} +} \ No newline at end of file diff --git a/backend/src/main/java/com/bakdata/conquery/models/events/CBlock.java b/backend/src/main/java/com/bakdata/conquery/models/events/CBlock.java index 0f46f37489..68f26c7ffc 100644 --- a/backend/src/main/java/com/bakdata/conquery/models/events/CBlock.java +++ b/backend/src/main/java/com/bakdata/conquery/models/events/CBlock.java @@ -1,7 +1,8 @@ package com.bakdata.conquery.models.events; import java.util.Arrays; -import java.util.List; +import java.util.Collection; +import java.util.HashMap; import java.util.Map; import javax.validation.constraints.NotNull; @@ -47,111 +48,60 @@ @Slf4j @ToString(onlyExplicitlyIncluded = true) public class CBlock extends IdentifiableImpl implements NamespacedIdentifiable { - - /** - * Estimate the memory usage of CBlocks. - * - * @param depthEstimate estimate of depth of mostSpecificChildren - */ - public static long estimateMemoryBytes(long entities, long entries, double depthEstimate) { - return Math.round(entities * - ( - Integer.BYTES + Long.BYTES // includedConcepts - + Integer.BYTES // minDate - + Integer.BYTES // maxDate - ) - + entries * depthEstimate * Integer.BYTES // mostSpecificChildren (rough estimate, not resident on ManagerNode) - ); - } - + //TODO Index per StringStore for isOfInterest @ToString.Include @NsIdRef private final Bucket bucket; - @NotNull @NsIdRef @ToString.Include private final ConceptTreeConnector connector; - /** * We leverage the fact that a Bucket contains entities from bucketSize * {@link Bucket#getBucket()} to (1 + bucketSize) * {@link Bucket#getBucket()} - 1 to layout our internal structure. * This is maps the first Entities entry in this bucket to 0. */ private final int root; - /** * Crude Bloomfilter for Concept inclusion per Entity: Each set bit denotes that the concept (with localId <= 64) or a descendant of that concept (with localId > 64) is present for the entity in this Bucket. */ - private final long[] includedConceptElementsPerEntity; - + private final Map includedConceptElementsPerEntity; /** * Statistic for fast lookup if entity is of interest. */ - private final CDateRange[] entitySpan; - - + private final Map entitySpan; /** * Per event: represents the path in a {@link TreeConcept} to optimize lookup. * Nodes in the tree are simply enumerated. */ private final int[][] mostSpecificChildren; + /** + * Estimate the memory usage of CBlocks. + * + * @param depthEstimate estimate of depth of mostSpecificChildren + */ + public static long estimateMemoryBytes(long entities, long entries, double depthEstimate) { + return Math.round(entities * + ( + Integer.BYTES + Long.BYTES // includedConcepts + + Integer.BYTES // minDate + + Integer.BYTES // maxDate + ) + + entries * depthEstimate * Integer.BYTES // mostSpecificChildren (rough estimate, not resident on ManagerNode) + ); + } + public static CBlock createCBlock(ConceptTreeConnector connector, Bucket bucket, int bucketSize) { final int root = bucket.getBucket() * bucketSize; final int[][] mostSpecificChildren = calculateSpecificChildrenPaths(bucket, connector); - final long[] includedConcepts = calculateConceptElementPathBloomFilter(bucketSize, bucket, mostSpecificChildren); - final CDateRange[] entitySpans = calculateEntityDateIndices(bucket, bucketSize); + //TODO Object2LongMap + final Map includedConcepts = calculateConceptElementPathBloomFilter(bucketSize, bucket, mostSpecificChildren); + final Map entitySpans = calculateEntityDateIndices(bucket); return new CBlock(bucket, connector, root, includedConcepts, entitySpans, mostSpecificChildren); } - - public int[] getPathToMostSpecificChild(int event) { - if (mostSpecificChildren == null) { - return null; - } - - return mostSpecificChildren[event]; - } - - public int getMostSpecificChildLocalId(int event) { - if (mostSpecificChildren == null) { - return -1; - } - - final int[] mostSpecificChild = mostSpecificChildren[event]; - return mostSpecificChild[mostSpecificChild.length - 1]; - } - - public CDateRange getEntityDateRange(int entity) { - return entitySpan[bucket.getEntityIndex(entity)]; - } - - @Override - @JsonIgnore - public CBlockId createId() { - return new CBlockId(bucket.getId(), connector.getId()); - } - - public boolean isConceptIncluded(int entity, long requiredBits) { - if (requiredBits == 0L) { - return true; - } - - final int index = bucket.getEntityIndex(entity); - - final long bits = includedConceptElementsPerEntity[index]; - - return (bits & requiredBits) != 0L; - } - - @Override - @JsonIgnore - public Dataset getDataset() { - return bucket.getDataset(); - } - /** * Calculates the path for each event from the root of the {@link TreeConcept} to the most specific {@link ConceptTreeChild} * denoted by the individual {@link ConceptTreeChild#getPrefix()}. @@ -169,7 +119,7 @@ private static int[][] calculateSpecificChildrenPaths(Bucket bucket, ConceptTree stringStore = (StringStore) bucket.getStores()[column.getPosition()]; - treeConcept.initializeIdCache(stringStore, bucket.getImp()); + treeConcept.initializeIdCache(bucket.getImp()); } // No column only possible if we have just one tree element! else if (treeConcept.countElements() == 1) { @@ -199,11 +149,9 @@ else if (treeConcept.countElements() == 1) { continue; } String stringValue = ""; - int valueIndex = -1; if (stringStore != null) { - valueIndex = bucket.getString(event, column); - stringValue = stringStore.getElement(valueIndex); + stringValue = bucket.getString(event, column); } // Lazy evaluation of map to avoid allocations if possible. @@ -219,7 +167,7 @@ else if (treeConcept.countElements() == 1) { final ConceptTreeChild child = cache == null ? treeConcept.findMostSpecificChild(stringValue, rowMap) - : cache.findMostSpecificChild(valueIndex, stringValue, rowMap); + : cache.findMostSpecificChild(stringValue, rowMap); // All unresolved elements resolve to the root. if (child == null) { @@ -252,14 +200,13 @@ else if (treeConcept.countElements() == 1) { * Calculate for every event a 64 bit long bloom filter, that masks the concept element path within * the first 64 {@link com.bakdata.conquery.models.datasets.concepts.ConceptElement}s of the {@link TreeConcept}. * This is used in the evaluation of a query to quickly decide if an event is of interest by logically ANDing - * the bitmask of the event with the bitmask calculated by {@link ConceptNode#calculateBitMask(List)} + * the bitmask of the event with the bitmask calculated by {@link ConceptNode#calculateBitMask(Collection)} */ - private static long[] calculateConceptElementPathBloomFilter(int bucketSize, Bucket bucket, int[][] mostSpecificChildren) { - final long[] includedConcepts = new long[bucketSize]; + private static Map calculateConceptElementPathBloomFilter(int bucketSize, Bucket bucket, int[][] mostSpecificChildren) { + final Map includedConcepts = new HashMap<>(bucketSize); - for (int entity : bucket.getEntities()) { + for (String entity : bucket.entities()) { - final int entityIndex = bucket.getEntityIndex(entity); final int end = bucket.getEntityEnd(entity); for (int event = bucket.getEntityStart(entity); event < end; event++) { @@ -270,7 +217,7 @@ private static long[] calculateConceptElementPathBloomFilter(int bucketSize, Buc final long mask = calculateBitMask(i, mostSpecificChild); - includedConcepts[entityIndex] |= mask; + includedConcepts.compute(entity, (ignored, current) -> current == null ? mask : current | mask); } } } @@ -278,7 +225,6 @@ private static long[] calculateConceptElementPathBloomFilter(int bucketSize, Buc return includedConcepts; } - /** * Calculates the bloom filter from the precomputed path to the most specific {@link ConceptTreeChild}. */ @@ -292,14 +238,13 @@ public static long calculateBitMask(int pathIndex, int[] mostSpecificChild) { return calculateBitMask(pathIndex - 1, mostSpecificChild); } - /** * For every included entity, calculate min and max and store them as statistics in the CBlock. * * @implNote This is an unrolled implementation of {@link CDateRange#span(CDateRange)}. */ - private static CDateRange[] calculateEntityDateIndices(Bucket bucket, int bucketSize) { - final CDateRange[] spans = new CDateRange[bucketSize]; + private static Map calculateEntityDateIndices(Bucket bucket) { + final Map spans = new HashMap<>(); final Table table = bucket.getTable(); @@ -309,8 +254,7 @@ private static CDateRange[] calculateEntityDateIndices(Bucket bucket, int bucket continue; } - for (int entity : bucket.getEntities()) { - final int index = bucket.getEntityIndex(entity); + for (String entity : bucket.entities()) { final int end = bucket.getEntityEnd(entity); // We unroll span for the whole bucket/entity, this avoids costly reallocation in a loop @@ -343,25 +287,60 @@ private static CDateRange[] calculateEntityDateIndices(Bucket bucket, int bucket final CDateRange span = CDateRange.of(min, max); - if (spans[index] == null) { - spans[index] = span; - } - else { - spans[index] = spans[index].span(span); - } + spans.compute(entity, (ignored, current) -> current == null ? span : current.span(span)); } } - for (int index = 0; index < spans.length; index++) { - if (spans[index] != null) { - continue; - } + return spans; + } - spans[index] = CDateRange.all(); + public int[] getPathToMostSpecificChild(int event) { + if (mostSpecificChildren == null) { + return null; } - return spans; + return mostSpecificChildren[event]; + } + + public int getMostSpecificChildLocalId(int event) { + if (mostSpecificChildren == null) { + return -1; + } + + final int[] mostSpecificChild = mostSpecificChildren[event]; + return mostSpecificChild[mostSpecificChild.length - 1]; + } + + public CDateRange getEntityDateRange(String entity) { + return entitySpan.getOrDefault(entity, CDateRange.all()); + } + + @Override + @JsonIgnore + public CBlockId createId() { + return new CBlockId(bucket.getId(), connector.getId()); + } + + public boolean isConceptIncluded(String entity, long requiredBits) { + if (requiredBits == 0L) { + return true; + } + + if(!includedConceptElementsPerEntity.containsKey(entity)){ + return false; + } + + + final long bits = includedConceptElementsPerEntity.get(entity); + + return (bits & requiredBits) != 0L; + } + + @Override + @JsonIgnore + public Dataset getDataset() { + return bucket.getDataset(); } -} +} \ No newline at end of file diff --git a/backend/src/main/java/com/bakdata/conquery/models/events/EmptyBucket.java b/backend/src/main/java/com/bakdata/conquery/models/events/EmptyBucket.java index d820c82fb2..f0b6f64951 100644 --- a/backend/src/main/java/com/bakdata/conquery/models/events/EmptyBucket.java +++ b/backend/src/main/java/com/bakdata/conquery/models/events/EmptyBucket.java @@ -1,7 +1,6 @@ package com.bakdata.conquery.models.events; import java.math.BigDecimal; -import java.util.Collections; import java.util.Map; import com.bakdata.conquery.models.common.CDateSet; @@ -9,6 +8,7 @@ import com.bakdata.conquery.models.datasets.Column; import com.bakdata.conquery.models.datasets.concepts.ValidityDate; import com.bakdata.conquery.models.events.stores.root.ColumnStore; +import it.unimi.dsi.fastutil.objects.Object2IntMaps; import lombok.Getter; /** @@ -20,7 +20,7 @@ public class EmptyBucket extends Bucket { private static final EmptyBucket Instance = new EmptyBucket(); public EmptyBucket() { - super(0, 0, 0, Collections.emptySet(), new int[0], new int[0], null); + super(0, 0, Object2IntMaps.emptyMap(), Object2IntMaps.emptyMap(), null); this.setStores(new ColumnStore[0]); } @@ -31,24 +31,24 @@ public boolean eventIsContainedIn(int event, ValidityDate column, CDateSet dateR } @Override - public boolean containsEntity(int entity) { + public boolean containsEntity(String entity) { return false; } @Override - public int getEntityStart(int entityId) { + public int getEntityStart(String entityId) { throw new IllegalStateException("ALL_IDS Bucket does not do anything"); } @Override - public int getEntityEnd(int entityId) { + public int getEntityEnd(String entityId) { throw new IllegalStateException("ALL_IDS Bucket does not do anything"); } @Override - public int getString(int event, Column column) { + public String getString(int event, Column column) { throw new IllegalStateException("Bucket for ALL_IDS_TABLE may not be evaluated."); } diff --git a/backend/src/main/java/com/bakdata/conquery/models/events/EmptyStore.java b/backend/src/main/java/com/bakdata/conquery/models/events/EmptyStore.java index 9966352440..2d156a3226 100644 --- a/backend/src/main/java/com/bakdata/conquery/models/events/EmptyStore.java +++ b/backend/src/main/java/com/bakdata/conquery/models/events/EmptyStore.java @@ -4,7 +4,6 @@ import java.util.stream.Stream; import com.bakdata.conquery.io.cps.CPSType; -import com.bakdata.conquery.models.dictionary.Dictionary; import com.bakdata.conquery.models.events.stores.root.BooleanStore; import com.bakdata.conquery.models.events.stores.root.ColumnStore; import com.bakdata.conquery.models.events.stores.root.DateStore; @@ -131,53 +130,23 @@ public void setReal(int event, double value) { } @Override - public int getString(int event) { - return 0; + public String getString(int event) { + return null; } @Override - public void setString(int event, int value) { + public void setString(int event, String value) { } - @Override - public String getElement(int id) { - return null; - } - @Override public int size() { return 0; } @Override - public Stream iterateValues() { + public Stream streamValues() { return Stream.empty(); } - @Override - public int getId(String value) { - return 0; - } - - @Override - public Dictionary getUnderlyingDictionary() { - return null; - } - - @Override - public void setUnderlyingDictionary(Dictionary dictionary) { - // No underlying Dictionary - } - - @Override - public boolean isDictionaryHolding() { - return false; - } - - @Override - public void setIndexStore(IntegerStore newType) { - - } - } diff --git a/backend/src/main/java/com/bakdata/conquery/models/events/stores/primitive/StringStoreString.java b/backend/src/main/java/com/bakdata/conquery/models/events/stores/primitive/StringStoreString.java new file mode 100644 index 0000000000..d4f885e098 --- /dev/null +++ b/backend/src/main/java/com/bakdata/conquery/models/events/stores/primitive/StringStoreString.java @@ -0,0 +1,92 @@ +package com.bakdata.conquery.models.events.stores.primitive; + +import java.util.Arrays; +import java.util.Objects; +import java.util.stream.Stream; + +import com.bakdata.conquery.io.cps.CPSType; +import com.bakdata.conquery.models.events.stores.root.ColumnStore; +import com.bakdata.conquery.models.events.stores.root.StringStore; +import com.fasterxml.jackson.annotation.JsonCreator; +import lombok.AccessLevel; +import lombok.Data; +import lombok.RequiredArgsConstructor; +import lombok.ToString; + +@CPSType(id = "STRINGS", base = ColumnStore.class) +@RequiredArgsConstructor(access = AccessLevel.PRIVATE) +@Data +public class StringStoreString implements StringStore { + + @ToString.Exclude + private final String[] values; + + public static StringStoreString create(int size) { + return new StringStoreString(new String[size]); + } + + @JsonCreator + public static StringStoreString withInternedStrings(String[] values) { + for (int index = 0; index < values.length; index++) { + values[index] = values[index] != null ? values[index].intern() : null; + } + + return new StringStoreString(values); + } + + @Override + public boolean has(int event) { + return values[event] != null; + } + + @Override + public Object createScriptValue(int event) { + return getString(event); + } + + @Override + public String getString(int event) { + return values[event]; + } + + @Override + public long estimateEventBits() { + return 0; + } + + @Override + public int getLines() { + return values.length; + } + + @Override + public T createDescription() { + return null; + } + + @Override + public StringStoreString select(int[] starts, int[] lengths) { + return new StringStoreString(ColumnStore.selectArray(starts, lengths, values, String[]::new)); + } + + @Override + public void setNull(int event) { + values[event] = null; + } + + @Override + public void setString(int event, String value) { + values[event] = value; + } + + @Override + public int size() { + return (int) Arrays.stream(values).distinct().count(); + } + + @Override + public Stream streamValues() { + return Arrays.stream(values).filter(Objects::nonNull); + } + +} diff --git a/backend/src/main/java/com/bakdata/conquery/models/events/stores/root/StringStore.java b/backend/src/main/java/com/bakdata/conquery/models/events/stores/root/StringStore.java index 888811eb3d..9f941ff1ce 100644 --- a/backend/src/main/java/com/bakdata/conquery/models/events/stores/root/StringStore.java +++ b/backend/src/main/java/com/bakdata/conquery/models/events/stores/root/StringStore.java @@ -2,57 +2,17 @@ import java.util.stream.Stream; -import com.bakdata.conquery.models.dictionary.Dictionary; -import com.bakdata.conquery.models.events.MajorTypeId; -import com.fasterxml.jackson.annotation.JsonIgnore; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -/** - * {@link ColumnStore} for dictionary encoded string values. - *

- * See also {@link MajorTypeId#STRING} and {@link com.bakdata.conquery.models.preproc.parser.specific.StringParser}. - *

- * This class has a lot of special methods for handling dictionary encoded values. - * - * @implSpec Every implementation must guarantee IDs between 0 and size. - */ public interface StringStore extends ColumnStore { - static final Logger log = LoggerFactory.getLogger(StringStore.class); - - - int getString(int event); - - void setString(int event, int value); + String getString(int event); - String getElement(int id); + void setString(int event, String value); /** * Maximum number of distinct values in this Store. */ int size(); - - Stream iterateValues(); - - /** - * Lookup the id of a value in the dictionary. - */ - int getId(String value); - - @JsonIgnore - Dictionary getUnderlyingDictionary(); - - @JsonIgnore - void setUnderlyingDictionary(Dictionary dictionary); - - @JsonIgnore - boolean isDictionaryHolding(); - - /** - * After applying DictionaryMapping a new store might be needed. - */ - void setIndexStore(IntegerStore newType); + Stream streamValues(); } diff --git a/backend/src/main/java/com/bakdata/conquery/models/events/stores/specific/string/DictionaryStore.java b/backend/src/main/java/com/bakdata/conquery/models/events/stores/specific/string/DictionaryStore.java deleted file mode 100644 index 3ff6411713..0000000000 --- a/backend/src/main/java/com/bakdata/conquery/models/events/stores/specific/string/DictionaryStore.java +++ /dev/null @@ -1,142 +0,0 @@ -package com.bakdata.conquery.models.events.stores.specific.string; - -import java.util.HashSet; -import java.util.Set; -import java.util.stream.Stream; - -import com.bakdata.conquery.io.cps.CPSType; -import com.bakdata.conquery.io.jackson.View; -import com.bakdata.conquery.io.jackson.serializer.NsIdRef; -import com.bakdata.conquery.models.dictionary.Dictionary; -import com.bakdata.conquery.models.events.stores.root.ColumnStore; -import com.bakdata.conquery.models.events.stores.root.IntegerStore; -import com.bakdata.conquery.models.events.stores.root.StringStore; -import com.fasterxml.jackson.annotation.JsonView; -import com.google.common.base.Preconditions; -import lombok.Getter; -import lombok.NoArgsConstructor; -import lombok.Setter; -import lombok.extern.slf4j.Slf4j; -import org.jetbrains.annotations.NotNull; - -/** - * Strings are stored in a Dictionary, ids are handles into the Dictionary. - * - * @implNote this is NOT a {@link StringStore}, but is the base class of it. This enables some shenanigans with encodings. - */ -@Getter -@Setter -@Slf4j -@CPSType(base = ColumnStore.class, id = "STRING_DICTIONARY") -@NoArgsConstructor -public class DictionaryStore implements ColumnStore { - - protected IntegerStore numberType; - - @NsIdRef - private Dictionary dictionary; - - @JsonView(View.Persistence.Manager.class) - private Set usedEntries; - - public DictionaryStore(IntegerStore store, Dictionary dictionary) { - this.numberType = store; - this.dictionary = dictionary; - } - - - public Stream iterateStrings() { - Preconditions.checkState(usedEntries != null, "usedEntries are not set yet."); - - return usedEntries.stream().map(dictionary::getElement); - } - - @Override - public DictionaryStore createDescription() { - DictionaryStore result = new DictionaryStore(numberType.createDescription(), dictionary); - - result.setUsedEntries(collectUsedStrings(this)); - - return result; - } - - @NotNull - private static Set collectUsedStrings(DictionaryStore stringStore) { - Set sampled = new HashSet<>(); - for (int event = 0; event < stringStore.getLines(); event++) { - if (!stringStore.has(event)) { - continue; - } - - sampled.add(stringStore.getString(event)); - } - return sampled; - } - - @Override - public int getLines() { - return numberType.getLines(); - } - - public byte[] getElement(int value) { - return dictionary.getElement(value); - } - - @Override - public Object createScriptValue(int event) { - return getElement(getString(event)); - } - - - public int size() { - return dictionary.size(); - } - - public int getId(byte[] value) { - return dictionary.getId(value); - } - - public int getString(int event) { - return (int) getNumberType().getInteger(event); - } - - @Override - public String toString() { - return "StringTypeDictionary(dictionary=" + dictionary + ", numberType=" + numberType + ")"; - } - - @Override - public long estimateTypeSizeBytes() { - return dictionary.estimateMemoryConsumption(); - } - - - @Override - public DictionaryStore select(int[] starts, int[] length) { - return new DictionaryStore(numberType.select(starts, length), dictionary); - } - - - @Override - public long estimateEventBits() { - return numberType.estimateEventBits(); - } - - public void set(int event, int value) { - numberType.setInteger(event, value); - } - - @Override - public void setNull(int event) { - numberType.setNull(event); - } - - @Override - public final boolean has(int event) { - return numberType.has(event); - } - - public void setIndexStore(IntegerStore newType) { - numberType = newType; - } -} diff --git a/backend/src/main/java/com/bakdata/conquery/models/events/stores/specific/string/EncodedStringStore.java b/backend/src/main/java/com/bakdata/conquery/models/events/stores/specific/string/EncodedStringStore.java deleted file mode 100644 index 7792cc93de..0000000000 --- a/backend/src/main/java/com/bakdata/conquery/models/events/stores/specific/string/EncodedStringStore.java +++ /dev/null @@ -1,232 +0,0 @@ -package com.bakdata.conquery.models.events.stores.specific.string; - -import java.nio.charset.StandardCharsets; -import java.util.stream.Stream; - -import javax.annotation.Nonnull; -import javax.annotation.ParametersAreNonnullByDefault; - -import com.bakdata.conquery.io.cps.CPSType; -import com.bakdata.conquery.models.dictionary.Dictionary; -import com.bakdata.conquery.models.events.stores.root.ColumnStore; -import com.bakdata.conquery.models.events.stores.root.IntegerStore; -import com.bakdata.conquery.models.events.stores.root.StringStore; -import com.fasterxml.jackson.annotation.JsonCreator; -import com.fasterxml.jackson.annotation.JsonIgnore; -import com.google.common.cache.CacheBuilder; -import com.google.common.cache.CacheLoader; -import com.google.common.cache.LoadingCache; -import com.google.common.io.BaseEncoding; -import lombok.Getter; -import lombok.NonNull; -import lombok.RequiredArgsConstructor; -import lombok.Setter; -import lombok.SneakyThrows; -import lombok.extern.slf4j.Slf4j; - -/** - * Compacted String store, that uses two methods to reduce memory footprint: - * 1. Use a byte efficient encoding string for the actual string. See {@link Encoding} - * 2. Store the byte string in an appropriate data structure. See {{@link Dictionary and sub classes}} - */ -@Getter -@Setter -@Slf4j -@CPSType(base = ColumnStore.class, id = "STRING_ENCODED") -public class EncodedStringStore implements StringStore { - - @Nonnull - protected DictionaryStore subType; - @NonNull - private Encoding encoding; - - /** - * Cache element lookups and as they might be time-consuming, when a trie traversal is necessary (See {@link com.bakdata.conquery.util.dict.SuccinctTrie}). - */ - @JsonIgnore - private final LoadingCache elementCache; - - - @JsonCreator - public EncodedStringStore(DictionaryStore subType, Encoding encoding) { - super(); - this.subType = subType; - this.encoding = encoding; - elementCache = CacheBuilder.newBuilder() - .softValues() - .build(new CacheLoader() { - @Override - @ParametersAreNonnullByDefault - public String load(Integer key) throws Exception { - return encoding.decode(subType.getElement(key)); - } - }); - } - - - - @Override - @SneakyThrows - public String getElement(int value) { - return elementCache.get(value); - } - - @Override - public int getLines() { - return subType.getLines(); - } - - @Override - public String createScriptValue(int event) { - return getElement(getString(event)); - } - - - @Override - public int size() { - return subType.size(); - } - - @Override - public Stream iterateValues() { - return getSubType().iterateStrings().map(encoding::decode); - } - - @Override - public int getId(String value) { - // Make sure we can even decode before doing so - if (!encoding.canEncode(value)) { - return -1; - } - - return subType.getId(encoding.encode(value)); - } - - @Override - public String toString() { - return "StringTypeEncoded(encoding=" + encoding + ", subType=" + subType + ")"; - } - - @Override - public long estimateEventBits() { - return subType.estimateEventBits(); - } - - @Override - public long estimateMemoryConsumptionBytes() { - return subType.estimateMemoryConsumptionBytes(); - } - - @Override - public long estimateTypeSizeBytes() { - return subType.estimateTypeSizeBytes(); - } - - - @Override - public Dictionary getUnderlyingDictionary() { - return subType.getDictionary(); - } - - @Override - public void setUnderlyingDictionary(Dictionary dictionary) { - subType.setDictionary(dictionary); - } - - @Override - public boolean isDictionaryHolding() { - return true; - } - - @Override - public void setIndexStore(IntegerStore newType) { - subType.setIndexStore(newType); - } - - @Override - public EncodedStringStore select(int[] starts, int[] length) { - return new EncodedStringStore(subType.select(starts, length), getEncoding()); - } - - @Override - public EncodedStringStore createDescription() { - return new EncodedStringStore((DictionaryStore) subType.createDescription(), getEncoding()); - } - - @Override - public void setString(int event, int value) { - subType.set(event, value); - } - - @Override - public void setNull(int event) { - subType.setNull(event); - } - - @Override - public int getString(int event) { - return subType.getString(event); - } - - @Override - public boolean has(int event) { - return subType.has(event); - } - - /** - * We use common Encodings in the reversed way. What the encoding sees as "encoded" data, - * is actually our raw data. On this raw data the decoding of the chosen encoding applied, which - * yield a smaller representation for storage in the memory. - *

- * To use this technique all string in the dictionary must only use the dictionary that is inherent - * to the chosen encoding. - */ - @RequiredArgsConstructor - public static enum Encoding { - // Order is for precedence, least specific encodings go last. - Base16LowerCase(2, BaseEncoding.base16().lowerCase().omitPadding()), - Base16UpperCase(2, BaseEncoding.base16().upperCase().omitPadding()), - Base32LowerCase(8, BaseEncoding.base32().lowerCase().omitPadding()), - Base32UpperCase(8, BaseEncoding.base32().upperCase().omitPadding()), - Base32HexLowerCase(8, BaseEncoding.base32Hex().lowerCase().omitPadding()), - Base32HexUpperCase(8, BaseEncoding.base32Hex().upperCase().omitPadding()), - Base64(4, BaseEncoding.base64().omitPadding()), - - UTF8(1, null) { - @Override - public String decode(byte[] bytes) { - return new String(bytes, StandardCharsets.UTF_8); - } - - @Override - public byte[] encode(String chars) { - return chars.getBytes(StandardCharsets.UTF_8); - } - - @Override - public boolean canEncode(String chars) { - return true; - } - }; - - private final int requiredLengthBase; - private final BaseEncoding encoding; - - public String decode(byte[] bytes) { - // Using encode here is valid see comment on this enum - return encoding.encode(bytes); - } - - public boolean canEncode(String chars) { - // Using canDecode here is valid see comment on this enum - return encoding.canDecode(chars) - && chars.length() % requiredLengthBase == 0; - } - - public byte[] encode(String chars) { - // Using decode here is valid see comment on this enum - return encoding.decode(chars); - } - - } -} diff --git a/backend/src/main/java/com/bakdata/conquery/models/events/stores/specific/string/NumberStringStore.java b/backend/src/main/java/com/bakdata/conquery/models/events/stores/specific/string/NumberStringStore.java deleted file mode 100644 index e8e07858de..0000000000 --- a/backend/src/main/java/com/bakdata/conquery/models/events/stores/specific/string/NumberStringStore.java +++ /dev/null @@ -1,175 +0,0 @@ -package com.bakdata.conquery.models.events.stores.specific.string; - -import java.util.HashSet; -import java.util.Map; -import java.util.Objects; -import java.util.Set; -import java.util.stream.Stream; - -import javax.annotation.Nonnull; - -import com.bakdata.conquery.io.cps.CPSType; -import com.bakdata.conquery.io.jackson.View; -import com.bakdata.conquery.models.common.Range; -import com.bakdata.conquery.models.dictionary.Dictionary; -import com.bakdata.conquery.models.events.stores.root.ColumnStore; -import com.bakdata.conquery.models.events.stores.root.IntegerStore; -import com.bakdata.conquery.models.events.stores.root.StringStore; -import com.bakdata.conquery.models.preproc.parser.specific.StringParser; -import com.fasterxml.jackson.annotation.JsonCreator; -import com.fasterxml.jackson.annotation.JsonIgnore; -import com.fasterxml.jackson.annotation.JsonView; -import lombok.Getter; -import lombok.Setter; -import lombok.ToString; - -/** - * The strings are only numbers and can therefore be used directly. - */ -@Getter -@Setter -@CPSType(base = ColumnStore.class, id = "STRING_NUMBER") -@ToString(of = "delegate") -public class NumberStringStore implements StringStore { - - @Nonnull - protected IntegerStore delegate; - //used as a compact intset - private Range range; - - // Only used for setting values in Preprocessing. - // TODO fk: can this be moved to the parser? - @JsonIgnore - private transient Map dictionary; - - public NumberStringStore(Range range, IntegerStore numberType, Map dictionary) { - this(range, numberType); - this.dictionary = dictionary; - } - - @JsonCreator - public NumberStringStore(Range range, IntegerStore numberType) { - super(); - this.range = range; - this.delegate = numberType; - } - - @Override - public int getLines() { - return delegate.getLines(); - } - - @Override - public Object createScriptValue(int event) { - return Objects.toString(delegate.createScriptValue(event)); - } - - @Override - public long estimateEventBits() { - return delegate.estimateEventBits(); - } - - @Override - public String getElement(int id) { - return Integer.toString(id); - } - - @Override - public int size() { - // Size here is maximum id - return range.getMax() + 1; - } - - @JsonView(View.Persistence.Manager.class) - private Set usedValues; - - private static Set collectUsedStrings(NumberStringStore stringStore) { - Set sampled = new HashSet<>(); - for (int event = 0; event < stringStore.getLines(); event++) { - if (!stringStore.has(event)) { - continue; - } - - sampled.add(stringStore.getString(event)); - } - return sampled; - } - - @Override - public NumberStringStore createDescription() { - NumberStringStore description = new NumberStringStore(getRange(), delegate.createDescription()); - - description.setUsedValues(collectUsedStrings(this)); - return description; - } - - @Override - public Stream iterateValues() { - return usedValues.stream().map(val -> Integer.toString(val)); - } - - @Override - public int getId(String value) { - try { - if (!StringParser.isOnlyDigits(value)){ - return -1; - } - - int parsed = Integer.parseInt(value); - - if (!range.contains(parsed)) { - return -1; - } - - return parsed; - } - catch (NumberFormatException e) { - return -1; - } - } - - @Override - public Dictionary getUnderlyingDictionary() { - return null; - } - - @Override - public void setUnderlyingDictionary(Dictionary dictionary) { - // No Dictionary - } - - @Override - public boolean isDictionaryHolding() { - return false; - } - - @Override - public void setIndexStore(IntegerStore indexStore) { - } - - @Override - public NumberStringStore select(int[] starts, int[] length) { - return new NumberStringStore(range, delegate.select(starts, length)); - } - - - @Override - public int getString(int event) { - return (int) getDelegate().getInteger(event); - } - - @Override - public void setNull(int event) { - getDelegate().setNull(event); - } - - @Override - public void setString(int event, int value) { - getDelegate().setInteger(event, Long.valueOf(dictionary.get(value))); - } - - @Override - public boolean has(int event) { - return getDelegate().has(event); - } -} diff --git a/backend/src/main/java/com/bakdata/conquery/models/events/stores/specific/string/PrefixSuffixStringStore.java b/backend/src/main/java/com/bakdata/conquery/models/events/stores/specific/string/PrefixSuffixStringStore.java deleted file mode 100644 index 721f4e22c6..0000000000 --- a/backend/src/main/java/com/bakdata/conquery/models/events/stores/specific/string/PrefixSuffixStringStore.java +++ /dev/null @@ -1,147 +0,0 @@ -package com.bakdata.conquery.models.events.stores.specific.string; - -import java.nio.charset.StandardCharsets; -import java.util.stream.Stream; - -import javax.annotation.Nonnull; - -import com.bakdata.conquery.io.cps.CPSType; -import com.bakdata.conquery.models.dictionary.Dictionary; -import com.bakdata.conquery.models.events.stores.root.ColumnStore; -import com.bakdata.conquery.models.events.stores.root.IntegerStore; -import com.bakdata.conquery.models.events.stores.root.StringStore; -import com.fasterxml.jackson.annotation.JsonCreator; -import lombok.Getter; -import lombok.NonNull; -import lombok.Setter; -import lombok.ToString; -import lombok.extern.slf4j.Slf4j; - -/** - * Strings with common, but stripped prefix/suffix. - */ -@Getter -@Setter -@CPSType(base = ColumnStore.class, id = "STRING_PREFIX") -@ToString(of = {"prefix", "suffix", "subType"}) -@Slf4j -public class PrefixSuffixStringStore implements StringStore { - - @Nonnull - protected StringStore subType; - - @NonNull - private String prefix; - - @NonNull - private String suffix; - - @JsonCreator - public PrefixSuffixStringStore(StringStore subType, String prefix, String suffix) { - super(); - this.subType = subType; - this.prefix = prefix; - this.suffix = suffix; - } - - @Override - public String getElement(int value) { - return prefix + subType.getElement(value) + suffix; - } - - @Override - public int getLines() { - return subType.getLines(); - } - - @Override - public String createScriptValue(int event) { - return prefix + subType.createScriptValue(event) + suffix; - } - - @Override - public int getId(String value) { - if (value.startsWith(prefix)) { - return subType.getId(value.substring(prefix.length())); - } - return -1; - } - - @Override - public void setIndexStore(IntegerStore indexStore) { - subType.setIndexStore(indexStore); - } - - @Override - public PrefixSuffixStringStore select(int[] starts, int[] length) { - return new PrefixSuffixStringStore(subType.select(starts, length), getPrefix(), getSuffix()); - } - - @Override - public PrefixSuffixStringStore createDescription() { - return new PrefixSuffixStringStore(subType.createDescription(), getPrefix(), getSuffix()); - } - - @Override - public int size() { - return subType.size(); - } - - @Override - public Stream iterateValues() { - return subType.iterateValues().map(val -> getPrefix() + val + getSuffix()); - } - - @Override - public long estimateEventBits() { - return subType.estimateEventBits(); - } - - @Override - public long estimateMemoryConsumptionBytes() { - return (long) prefix.getBytes(StandardCharsets.UTF_8).length * Byte.SIZE + - (long) suffix.getBytes(StandardCharsets.UTF_8).length * Byte.SIZE + - subType.estimateMemoryConsumptionBytes(); - } - - @Override - public long estimateTypeSizeBytes() { - return subType.estimateTypeSizeBytes(); - } - - - @Override - public Dictionary getUnderlyingDictionary() { - return subType.getUnderlyingDictionary(); - } - - @Override - public void setUnderlyingDictionary(Dictionary dictionary) { - subType.setUnderlyingDictionary(dictionary); - } - - @Override - public boolean isDictionaryHolding() { - return subType.isDictionaryHolding(); - } - - @Override - public int getString(int event) { - return subType.getString(event); - } - - @Override - public void setString(int event, int value) { - subType.setString(event, value); - } - - @Override - public void setNull(int event) { - subType.setNull(event); - } - - @Override - public boolean has(int event) { - return subType.has(event); - } -} diff --git a/backend/src/main/java/com/bakdata/conquery/models/events/stores/specific/string/SingletonStringStore.java b/backend/src/main/java/com/bakdata/conquery/models/events/stores/specific/string/SingletonStringStore.java deleted file mode 100644 index c2cd285e0c..0000000000 --- a/backend/src/main/java/com/bakdata/conquery/models/events/stores/specific/string/SingletonStringStore.java +++ /dev/null @@ -1,124 +0,0 @@ -package com.bakdata.conquery.models.events.stores.specific.string; - -import java.util.stream.Stream; - -import com.bakdata.conquery.io.cps.CPSType; -import com.bakdata.conquery.models.dictionary.Dictionary; -import com.bakdata.conquery.models.events.stores.primitive.BitSetStore; -import com.bakdata.conquery.models.events.stores.root.ColumnStore; -import com.bakdata.conquery.models.events.stores.root.IntegerStore; -import com.bakdata.conquery.models.events.stores.root.StringStore; -import com.fasterxml.jackson.annotation.JsonCreator; -import lombok.Getter; -import lombok.Setter; -import lombok.ToString; - -/** - * Only one string. - */ -@Getter -@Setter -@CPSType(base = ColumnStore.class, id = "STRING_SINGLETON") -@ToString(of = "singleValue") -public class SingletonStringStore implements StringStore { - - private final String singleValue; - private final BitSetStore delegate; - - @JsonCreator - public SingletonStringStore(String singleValue, BitSetStore delegate) { - super(); - this.singleValue = singleValue; - this.delegate = delegate; - } - - @Override - public void setIndexStore(IntegerStore indexStore) { - - } - - @Override - public int size() { - return singleValue == null ? 0 : 1; - } - - @Override - public Stream iterateValues() { - return Stream.of(singleValue); - } - - @Override - public SingletonStringStore select(int[] starts, int[] length) { - return new SingletonStringStore(singleValue, delegate.select(starts, length)); - } - - @Override - public String getElement(int id) { - return singleValue; - } - - @Override - public int getLines() { - return delegate.getLines(); - } - - @Override - public SingletonStringStore createDescription() { - return new SingletonStringStore(singleValue, delegate.createDescription()); - } - - @Override - public String createScriptValue(int event) { - return singleValue; - } - - @Override - public int getId(String value) { - if (value != null && value.equals(singleValue)) { - return 0; - } - return -1; - } - - @Override - public long estimateEventBits() { - return Byte.SIZE; - } - - - @Override - public Dictionary getUnderlyingDictionary() { - return null; - } - - @Override - public void setUnderlyingDictionary(Dictionary dictionary) { - // No underlying Dictionary - } - - @Override - public boolean isDictionaryHolding() { - return false; - } - - @Override - public void setString(int event, int value) { - getDelegate().setBoolean(event, true); - } - - @Override - public void setNull(int event) { - getDelegate().setBoolean(event, false); - } - - @Override - public boolean has(int event) { - return getDelegate().getBoolean(event); - } - - - @Override - public int getString(int event) { - return 0; - } -} diff --git a/backend/src/main/java/com/bakdata/conquery/models/execution/InternalExecution.java b/backend/src/main/java/com/bakdata/conquery/models/execution/InternalExecution.java index f34aa3223f..2474583b01 100644 --- a/backend/src/main/java/com/bakdata/conquery/models/execution/InternalExecution.java +++ b/backend/src/main/java/com/bakdata/conquery/models/execution/InternalExecution.java @@ -15,10 +15,4 @@ public interface InternalExecution { */ WorkerMessage createExecutionMessage(); - /** - * The callback for the results the shard nodes return. - * Is called once per shard node - */ - void addResult(R result); - } diff --git a/backend/src/main/java/com/bakdata/conquery/models/execution/ManagedExecution.java b/backend/src/main/java/com/bakdata/conquery/models/execution/ManagedExecution.java index b346aaa8ba..90eec7a4af 100644 --- a/backend/src/main/java/com/bakdata/conquery/models/execution/ManagedExecution.java +++ b/backend/src/main/java/com/bakdata/conquery/models/execution/ManagedExecution.java @@ -5,7 +5,6 @@ import java.time.ZoneId; import java.time.temporal.ChronoUnit; import java.util.ArrayList; -import java.util.Arrays; import java.util.List; import java.util.Set; import java.util.UUID; @@ -22,8 +21,8 @@ import com.bakdata.conquery.apiv1.execution.OverviewExecutionStatus; import com.bakdata.conquery.apiv1.query.QueryDescription; import com.bakdata.conquery.apiv1.query.concept.specific.CQConcept; +import com.bakdata.conquery.apiv1.query.concept.specific.CQReusedQuery; import com.bakdata.conquery.apiv1.query.concept.specific.external.CQExternal; -import com.bakdata.conquery.apiv1.query.concept.specific.external.DateFormat; import com.bakdata.conquery.io.cps.CPSBase; import com.bakdata.conquery.io.jackson.serializer.MetaIdRef; import com.bakdata.conquery.io.jackson.serializer.NsIdRef; @@ -107,9 +106,12 @@ public abstract class ManagedExecution extends IdentifiableImpl permittedGroups = new ArrayList<>(); for (Group group : storage.getAllGroups()) { @@ -377,7 +382,11 @@ private static boolean containsDates(QueryDescription query) { } if (visitable instanceof CQExternal external) { - return Arrays.stream(DateFormat.values()).anyMatch(external.getFormat()::contains); + return external.containsDates(); + } + + if (visitable instanceof CQReusedQuery reusedQuery && reusedQuery.getResolvedQuery() != null){ + return containsDates(reusedQuery.getResolvedQuery()); } return false; diff --git a/backend/src/main/java/com/bakdata/conquery/models/forms/managed/EntityDateQuery.java b/backend/src/main/java/com/bakdata/conquery/models/forms/managed/EntityDateQuery.java index bf8a9a84bf..8a2963b189 100644 --- a/backend/src/main/java/com/bakdata/conquery/models/forms/managed/EntityDateQuery.java +++ b/backend/src/main/java/com/bakdata/conquery/models/forms/managed/EntityDateQuery.java @@ -51,8 +51,6 @@ public class EntityDateQuery extends Query { @NotEmpty private final List resolutionsAndAlignments; - @NotNull - @Valid private final CDateRange dateRange; @NotNull diff --git a/backend/src/main/java/com/bakdata/conquery/models/forms/managed/ExternalExecution.java b/backend/src/main/java/com/bakdata/conquery/models/forms/managed/ExternalExecution.java index 58c3fcfb01..cc619b66d9 100644 --- a/backend/src/main/java/com/bakdata/conquery/models/forms/managed/ExternalExecution.java +++ b/backend/src/main/java/com/bakdata/conquery/models/forms/managed/ExternalExecution.java @@ -169,6 +169,7 @@ public void setStatusBase(@NonNull Subject subject, @NonNull ExecutionStatus sta @Override public void cancel() { + //TODO this is no longer called as the ExecutionManager used to call this. Preconditions.checkNotNull(externalTaskId, "Cannot check external task, because no Id is present"); updateStatus(api.cancelTask(externalTaskId)); @@ -189,7 +190,7 @@ public Response fetchExternalResult(String assetId) { } @Override - protected void finish(ExecutionState executionState) { + public void finish(ExecutionState executionState) { if (getState().equals(executionState)) { return; } diff --git a/backend/src/main/java/com/bakdata/conquery/models/forms/managed/FormQueryPlan.java b/backend/src/main/java/com/bakdata/conquery/models/forms/managed/FormQueryPlan.java index 3b5df25c50..d07db86cbf 100644 --- a/backend/src/main/java/com/bakdata/conquery/models/forms/managed/FormQueryPlan.java +++ b/backend/src/main/java/com/bakdata/conquery/models/forms/managed/FormQueryPlan.java @@ -41,7 +41,7 @@ public FormQueryPlan(List dateContexts, ArrayConceptQueryPlan featu if (dateContexts.size() <= 0) { // There is nothing to do for this FormQueryPlan, but we will return an empty result when its executed - log.warn("dateContexts are empty. Will not produce a result."); + log.trace("dateContexts are empty. Will not produce a result."); constantCount = 3; withRelativeEventDate = false; return; diff --git a/backend/src/main/java/com/bakdata/conquery/models/forms/managed/ManagedInternalForm.java b/backend/src/main/java/com/bakdata/conquery/models/forms/managed/ManagedInternalForm.java index 405c8e68de..367fec4626 100644 --- a/backend/src/main/java/com/bakdata/conquery/models/forms/managed/ManagedInternalForm.java +++ b/backend/src/main/java/com/bakdata/conquery/models/forms/managed/ManagedInternalForm.java @@ -31,7 +31,6 @@ import com.bakdata.conquery.models.query.results.EntityResult; import com.bakdata.conquery.models.query.results.FormShardResult; import com.bakdata.conquery.models.worker.DistributedNamespace; -import com.bakdata.conquery.models.worker.Namespace; import com.fasterxml.jackson.annotation.JacksonInject; import com.fasterxml.jackson.annotation.JsonIgnore; import com.fasterxml.jackson.annotation.OptBoolean; @@ -39,6 +38,7 @@ import lombok.Getter; import lombok.extern.slf4j.Slf4j; import org.jetbrains.annotations.NotNull; +import org.jetbrains.annotations.Nullable; /** * Execution type for simple forms, that are completely executed within Conquery and produce a single table as result. @@ -73,6 +73,11 @@ public ManagedInternalForm(F form, User user, Dataset submittedDataset, MetaStor super(form, user, submittedDataset, storage); } + @Nullable + public ManagedQuery getSubQuery(ManagedExecutionId subQueryId) { + return flatSubQueries.get(subQueryId); + } + @Override public void doInitExecutable() { // Convert sub queries to sub executions @@ -88,7 +93,7 @@ private Map createSubExecutions() { return getSubmitted().createSubQueries() .entrySet() .stream().collect(Collectors.toMap( - e -> e.getKey(), + Map.Entry::getKey, e -> e.getValue().toManagedExecution(getOwner(), getDataset(), getStorage()) )); @@ -105,8 +110,8 @@ public void start() { } @Override - public List generateColumnDescriptions(boolean isInitialized, Namespace namespace, ConqueryConfig config) { - return subQueries.values().iterator().next().generateColumnDescriptions(isInitialized, namespace, config); + public List generateColumnDescriptions(boolean isInitialized, ConqueryConfig config) { + return subQueries.values().iterator().next().generateColumnDescriptions(isInitialized, config); } @@ -125,7 +130,7 @@ protected void setAdditionalFieldsForStatusWithColumnDescription(Subject subject return; } ManagedQuery subQuery = subQueries.entrySet().iterator().next().getValue(); - status.setColumnDescriptions(subQuery.generateColumnDescriptions(isInitialized(), getNamespace(), getConfig())); + status.setColumnDescriptions(subQuery.generateColumnDescriptions(isInitialized(), getConfig())); } @Override @@ -167,44 +172,8 @@ public WorkerMessage createExecutionMessage() { .collect(Collectors.toMap(Map.Entry::getKey, entry -> entry.getValue().getQuery()))); } - /** - * Distribute the result to a sub query. - */ - @Override - public void addResult(FormShardResult result) { - if (result.getError().isPresent()) { - fail(result.getError().get()); - return; - } - - ManagedExecutionId subQueryId = result.getSubQueryId(); - - ManagedQuery subQuery = flatSubQueries.get(subQueryId); - subQuery.addResult(result); - - switch (subQuery.getState()) { - case DONE -> { - if (allSubQueriesDone()) { - finish(ExecutionState.DONE); - } - } - // Fail the whole execution if a subquery fails - case FAILED -> { - fail( - result.getError().orElseThrow( - () -> new IllegalStateException(String.format("Query [%s] failed but no error was set.", getId())) - ) - ); - } - - default -> { - } - } - - } - - private boolean allSubQueriesDone() { + public boolean allSubQueriesDone() { synchronized (this) { return flatSubQueries.values().stream().allMatch(q -> q.getState().equals(ExecutionState.DONE)); } diff --git a/backend/src/main/java/com/bakdata/conquery/models/forms/util/Resolution.java b/backend/src/main/java/com/bakdata/conquery/models/forms/util/Resolution.java index f9a81e81a8..e36cdc00ce 100644 --- a/backend/src/main/java/com/bakdata/conquery/models/forms/util/Resolution.java +++ b/backend/src/main/java/com/bakdata/conquery/models/forms/util/Resolution.java @@ -9,7 +9,6 @@ import c10n.C10N; import com.bakdata.conquery.apiv1.forms.FeatureGroup; import com.bakdata.conquery.internationalization.DateContextResolutionC10n; -import com.bakdata.conquery.models.common.daterange.CDateRange; import com.bakdata.conquery.models.query.PrintSettings; import com.fasterxml.jackson.annotation.JsonIgnore; import lombok.RequiredArgsConstructor; diff --git a/backend/src/main/java/com/bakdata/conquery/models/identifiable/ids/specific/DictionaryId.java b/backend/src/main/java/com/bakdata/conquery/models/identifiable/ids/specific/DictionaryId.java deleted file mode 100644 index b40216bf07..0000000000 --- a/backend/src/main/java/com/bakdata/conquery/models/identifiable/ids/specific/DictionaryId.java +++ /dev/null @@ -1,41 +0,0 @@ -package com.bakdata.conquery.models.identifiable.ids.specific; - -import java.util.List; - -import javax.validation.constraints.NotNull; - -import com.bakdata.conquery.models.dictionary.Dictionary; -import com.bakdata.conquery.models.identifiable.ids.Id; -import com.bakdata.conquery.models.identifiable.ids.IdUtil; -import com.bakdata.conquery.models.identifiable.ids.IdIterator; -import com.bakdata.conquery.models.identifiable.ids.NamespacedId; -import lombok.AllArgsConstructor; -import lombok.EqualsAndHashCode; -import lombok.Getter; - -@AllArgsConstructor -@Getter -@EqualsAndHashCode(callSuper = false) -public class DictionaryId extends Id implements NamespacedId { - - @NotNull - private final DatasetId dataset; - @NotNull - private final String name; - - @Override - public void collectComponents(List components) { - dataset.collectComponents(components); - components.add(name); - } - - public static enum Parser implements IdUtil.Parser { - INSTANCE; - - @Override - public DictionaryId parseInternally(IdIterator parts) { - String dict = parts.next(); - return new DictionaryId(DatasetId.Parser.INSTANCE.parse(parts), dict); - } - } -} \ No newline at end of file diff --git a/backend/src/main/java/com/bakdata/conquery/models/identifiable/mapping/AutoIncrementingPseudomizer.java b/backend/src/main/java/com/bakdata/conquery/models/identifiable/mapping/AutoIncrementingPseudomizer.java index efa7cc1e8e..49b918fb64 100644 --- a/backend/src/main/java/com/bakdata/conquery/models/identifiable/mapping/AutoIncrementingPseudomizer.java +++ b/backend/src/main/java/com/bakdata/conquery/models/identifiable/mapping/AutoIncrementingPseudomizer.java @@ -1,10 +1,10 @@ package com.bakdata.conquery.models.identifiable.mapping; +import java.util.HashMap; +import java.util.Map; import java.util.concurrent.atomic.AtomicInteger; import com.bakdata.conquery.models.query.results.EntityResult; -import it.unimi.dsi.fastutil.ints.Int2ObjectAVLTreeMap; -import it.unimi.dsi.fastutil.ints.Int2ObjectMap; import lombok.RequiredArgsConstructor; /** @@ -15,7 +15,7 @@ public class AutoIncrementingPseudomizer implements IdPrinter { private static final String ANONYMOUS_ID_PREFIX = "anon_"; // Abbreviation for anonymous - private final Int2ObjectMap cachedIds = new Int2ObjectAVLTreeMap<>(); + private final Map cachedIds = new HashMap<>(); private final AtomicInteger pseudoIdPointer = new AtomicInteger(0); private final int size; @@ -30,11 +30,11 @@ public EntityPrintId createId(EntityResult entityResult) { * In the pseudo format the actual id columns are preserved but empty. * Only the Pid Column is written with a new generated id. */ - public EntityPrintId getPseudoId(int csvEntityId) { + public EntityPrintId getPseudoId(String csvEntityId) { return cachedIds.computeIfAbsent(csvEntityId, this::createPseudonym); } - private EntityPrintId createPseudonym(int ignored) { + private EntityPrintId createPseudonym(String ignored) { final String name = ANONYMOUS_ID_PREFIX + pseudoIdPointer.getAndIncrement(); final String[] parts = new String[size]; diff --git a/backend/src/main/java/com/bakdata/conquery/models/identifiable/mapping/EntityIdMap.java b/backend/src/main/java/com/bakdata/conquery/models/identifiable/mapping/EntityIdMap.java index 93544e61b4..4bbb70ce92 100644 --- a/backend/src/main/java/com/bakdata/conquery/models/identifiable/mapping/EntityIdMap.java +++ b/backend/src/main/java/com/bakdata/conquery/models/identifiable/mapping/EntityIdMap.java @@ -144,17 +144,19 @@ public EntityPrintId toExternal(String internal) { * * Return -1 when not resolved. */ - public int resolve(ExternalId key) { - String value = external2Internal.get(key); + public String resolve(ExternalId key) { + final String value = external2Internal.get(key); if (value != null) { - return getStorage().getPrimaryDictionary().getId(value); + return value; } // Maybe we can find them directly in the dictionary? - final int id = getStorage().getPrimaryDictionary().getId(key.getId()); + if (storage.getEntityBucket(key.getId()).isPresent()) { + return key.getId(); + } - return id; + return null; } public void addOutputMapping(String csvEntityId, EntityPrintId externalEntityId) { diff --git a/backend/src/main/java/com/bakdata/conquery/models/identifiable/mapping/FullIdPrinter.java b/backend/src/main/java/com/bakdata/conquery/models/identifiable/mapping/FullIdPrinter.java index 8fd8bf6fb3..55072ec2a5 100644 --- a/backend/src/main/java/com/bakdata/conquery/models/identifiable/mapping/FullIdPrinter.java +++ b/backend/src/main/java/com/bakdata/conquery/models/identifiable/mapping/FullIdPrinter.java @@ -1,17 +1,12 @@ package com.bakdata.conquery.models.identifiable.mapping; -import com.bakdata.conquery.models.dictionary.EncodedDictionary; import com.bakdata.conquery.models.query.results.EntityResult; -import lombok.RequiredArgsConstructor; +import lombok.Data; -/** - * IdPrinter using {@link EncodedDictionary} and {@link EntityIdMap} to generate full ids. - */ -@RequiredArgsConstructor +@Data public class FullIdPrinter implements IdPrinter { - private final EncodedDictionary dictionary; private final EntityIdMap idMapping; private final int size; @@ -20,7 +15,7 @@ public class FullIdPrinter implements IdPrinter { @Override public EntityPrintId createId(EntityResult entityResult) { - String csvEntityId = dictionary.getElement(entityResult.getEntityId()); + String csvEntityId = entityResult.getEntityId(); // The state may be uses by implementations of this class if (idMapping == null) { diff --git a/backend/src/main/java/com/bakdata/conquery/models/jobs/ImportJob.java b/backend/src/main/java/com/bakdata/conquery/models/jobs/ImportJob.java index b788b6dec7..e7fc45b644 100644 --- a/backend/src/main/java/com/bakdata/conquery/models/jobs/ImportJob.java +++ b/backend/src/main/java/com/bakdata/conquery/models/jobs/ImportJob.java @@ -6,58 +6,44 @@ import java.util.Collections; import java.util.HashMap; import java.util.HashSet; -import java.util.IntSummaryStatistics; import java.util.List; import java.util.Map; import java.util.Objects; import java.util.Set; -import java.util.concurrent.ConcurrentHashMap; import java.util.stream.Collectors; import javax.ws.rs.BadRequestException; import javax.ws.rs.WebApplicationException; import javax.ws.rs.core.Response; -import com.bakdata.conquery.io.storage.NamespaceStorage; import com.bakdata.conquery.models.config.ConqueryConfig; import com.bakdata.conquery.models.datasets.Column; import com.bakdata.conquery.models.datasets.Dataset; import com.bakdata.conquery.models.datasets.Import; import com.bakdata.conquery.models.datasets.ImportColumn; import com.bakdata.conquery.models.datasets.Table; -import com.bakdata.conquery.models.dictionary.Dictionary; -import com.bakdata.conquery.models.dictionary.DictionaryMapping; import com.bakdata.conquery.models.events.Bucket; -import com.bakdata.conquery.models.events.MajorTypeId; import com.bakdata.conquery.models.events.stores.root.ColumnStore; -import com.bakdata.conquery.models.events.stores.root.IntegerStore; -import com.bakdata.conquery.models.events.stores.root.StringStore; import com.bakdata.conquery.models.exceptions.JSONException; -import com.bakdata.conquery.models.identifiable.IdMutex; import com.bakdata.conquery.models.identifiable.ids.specific.BucketId; -import com.bakdata.conquery.models.identifiable.ids.specific.DictionaryId; import com.bakdata.conquery.models.identifiable.ids.specific.ImportId; import com.bakdata.conquery.models.identifiable.ids.specific.TableId; import com.bakdata.conquery.models.identifiable.ids.specific.WorkerId; import com.bakdata.conquery.models.messages.namespaces.specific.AddImport; import com.bakdata.conquery.models.messages.namespaces.specific.ImportBucket; import com.bakdata.conquery.models.messages.namespaces.specific.RemoveImportJob; -import com.bakdata.conquery.models.messages.namespaces.specific.UpdateDictionary; import com.bakdata.conquery.models.preproc.PreprocessedData; -import com.bakdata.conquery.models.preproc.PreprocessedDictionaries; import com.bakdata.conquery.models.preproc.PreprocessedHeader; import com.bakdata.conquery.models.preproc.PreprocessedReader; -import com.bakdata.conquery.models.preproc.parser.specific.IntegerParser; -import com.bakdata.conquery.models.query.entity.Entity; import com.bakdata.conquery.models.worker.DistributedNamespace; import com.bakdata.conquery.models.worker.WorkerHandler; import com.bakdata.conquery.models.worker.WorkerInformation; -import com.bakdata.conquery.util.ResourceUtil; import com.bakdata.conquery.util.progressreporter.ProgressReporter; +import com.google.common.base.Functions; import it.unimi.dsi.fastutil.ints.IntArrayList; import it.unimi.dsi.fastutil.ints.IntList; -import it.unimi.dsi.fastutil.ints.IntOpenHashSet; -import it.unimi.dsi.fastutil.ints.IntSet; +import it.unimi.dsi.fastutil.objects.Object2IntMap; +import it.unimi.dsi.fastutil.objects.Object2IntOpenHashMap; import lombok.Getter; import lombok.RequiredArgsConstructor; import lombok.extern.slf4j.Slf4j; @@ -69,22 +55,15 @@ @Slf4j public class ImportJob extends Job { + private static final int NUMBER_OF_STEPS = /* directly in execute = */4; private final DistributedNamespace namespace; - @Getter private final Table table; private final int bucketSize; private final PreprocessedHeader header; - private final PreprocessedDictionaries dictionaries; private final PreprocessedData container; - private final ConqueryConfig config; - - private final IdMutex sharedDictionaryLocks; - - - private static final int NUMBER_OF_STEPS = /* directly in execute = */4; - public static ImportJob createOrUpdate(DistributedNamespace namespace, InputStream inputStream, int entityBucketSize, IdMutex sharedDictionaryLocks, ConqueryConfig config, boolean update) + public static ImportJob createOrUpdate(DistributedNamespace namespace, InputStream inputStream, int entityBucketSize, ConqueryConfig config, boolean update) throws IOException { try (PreprocessedReader parser = new PreprocessedReader(inputStream, namespace.getPreprocessMapper())) { @@ -100,7 +79,7 @@ public static ImportJob createOrUpdate(DistributedNamespace namespace, InputStre final PreprocessedHeader header = parser.readHeader(); final TableId tableId = new TableId(ds.getId(), header.getTable()); - Table table = namespace.getStorage().getTable(tableId); + final Table table = namespace.getStorage().getTable(tableId); if (table == null) { throw new BadRequestException(String.format("Table[%s] does not exist.", tableId)); @@ -110,7 +89,7 @@ public static ImportJob createOrUpdate(DistributedNamespace namespace, InputStre header.assertMatch(table); final ImportId importId = new ImportId(table.getId(), header.getName()); - Import processedImport = namespace.getStorage().getImport(importId); + final Import processedImport = namespace.getStorage().getImport(importId); if (update) { if (processedImport == null) { @@ -125,20 +104,9 @@ else if (processedImport != null) { } - log.trace("Begin reading Dictionaries"); - parser.addReplacement(Dataset.PLACEHOLDER.getId(), ds); - PreprocessedDictionaries dictionaries = parser.readDictionaries(); - - Map - dictReplacements = - createLocalIdReplacements(dictionaries.getDictionaries(), table, header.getName(), namespace.getStorage(), sharedDictionaryLocks); - - // We inject the mappings into the parser, so that the incoming placeholder names are replaced with the new names of the dictionaries. This allows us to use NsIdRef in conjunction with shared-Dictionaries - parser.addAllReplacements(dictReplacements); - log.trace("Begin reading data."); - PreprocessedData container = parser.readData(); + final PreprocessedData container = parser.readData(); log.debug("Done reading data. Contains {} Entities.", container.size()); @@ -149,133 +117,11 @@ else if (processedImport != null) { table, entityBucketSize, header, - dictionaries, - container, - config, - sharedDictionaryLocks + container ); } } - /** - * Collects all dictionaries that map only to columns of this import. - */ - private static Map createLocalIdReplacements(Map dicts, Table table, String importName, NamespaceStorage storage, IdMutex sharedDictionaryLocks) { - - // Empty Maps are Coalesced to null by Jackson - if (dicts == null) { - return Collections.emptyMap(); - } - - final Map out = new HashMap<>(); - - log.trace("Importing Normal Dictionaries."); - - for (Column column : table.getColumns()) { - - if (column.getType() != MajorTypeId.STRING) { - continue; - } - - // Might not have an underlying Dictionary (eg Singleton, direct-Number) - // but could also be an error :/ Most likely the former - if (!dicts.containsKey(column.getName()) || dicts.get(column.getName()) == null) { - log.trace("No Dictionary for {}", column); - continue; - } - - if (column.getSharedDictionary() != null) { - column.createSharedDictionaryReplacement(dicts, storage, out, sharedDictionaryLocks); - } - else { - // Its a normal dictionary (only valid for this column in this import) - column.createSingleColumnDictionaryReplacement(dicts, importName, out); - } - } - - return out; - } - - /** - * Import all dictionaries. Shared dictionaries are merge into existing ones. All are distributed to corresponding workers. - * Create mappings for shared dictionaries dict. - * This is not synchronized because the methods is called within the job execution. - */ - private static Map importDictionaries(DistributedNamespace namespace, Map dicts, Column[] columns, String importName, Table table, IdMutex sharedDictionaryLocks) { - - // Empty Maps are Coalesced to null by Jackson - if (dicts == null) { - return Collections.emptyMap(); - } - - - log.debug("BEGIN importing {} Dictionaries", dicts.size()); - - // Might not have an underlying Dictionary (eg Singleton, direct-Number) - // but could also be an error :/ Most likely the former - // It's a shared dictionary - // This should never fail, becaus the dictionary is pre-created in the replacement generation step - - Arrays.stream(columns) - .parallel() - .filter(column -> column.getType() == MajorTypeId.STRING) - .filter(col -> col.getSharedDictionary() == null) - .map(col -> dicts.get(col.getName())) - .filter(Objects::nonNull) - .forEach(dictionary -> { - // Normal Dictionary -> no merge necessary, just distribute - storeAndDistributeDictionary(namespace, dictionary); - }); - - final Map out = new ConcurrentHashMap<>(); - - // We group by sharedDictionary to avoid sending dictionaries multliple times - Arrays.stream(columns) - .parallel() - .filter(column -> column.getType() == MajorTypeId.STRING) - .filter(col -> col.getSharedDictionary() != null) - .filter(col -> dicts.containsKey(col.getName())) - .collect(Collectors.groupingBy(Column::getSharedDictionary)) - .values() - .forEach(allColumns -> { - final Column refColumn = allColumns.get(0); - final String sharedDictionaryName = refColumn.getSharedDictionary(); - final DictionaryId dictionaryId = new DictionaryId(namespace.getDataset().getId(), sharedDictionaryName); - - log.debug("Column[{}.{}.{}] part of shared Dictionary[{}]", table.getId(), importName, refColumn.getName(), sharedDictionaryName); - - // We have to lock here, as sibling columns might both use the same shared-dictionary - try (IdMutex.Locked lock = sharedDictionaryLocks.acquire(dictionaryId)) { - final Dictionary sharedDictionary = namespace.getStorage().getDictionary(dictionaryId); - - ResourceUtil.throwNotFoundIfNull(dictionaryId, sharedDictionary); - log.trace("Merging into shared Dictionary[{}]", sharedDictionary); - - int newIds = 0; - - for (Column column : allColumns) { - final Dictionary importDictionary = dicts.get(column.getName()); - - final DictionaryMapping mapping = DictionaryMapping.createAndImport(importDictionary, sharedDictionary); - - newIds += mapping.getNumberOfNewIds(); - out.put(column, mapping); - } - - if (newIds > 0) { - storeAndDistributeDictionary(namespace, sharedDictionary); - } - } - }); - return out; - } - - private static void storeAndDistributeDictionary(DistributedNamespace namespace, Dictionary dictionary) { - log.trace("Sending {} to all Workers", dictionary); - namespace.getStorage().updateDictionary(dictionary); - namespace.getWorkerHandler().sendToAll(new UpdateDictionary(dictionary)); - } - @Override public void execute() throws JSONException, InterruptedException, IOException { @@ -284,33 +130,18 @@ public void execute() throws JSONException, InterruptedException, IOException { log.trace("Updating primary dictionary"); - // Update primary dictionary: load new data, and create mapping. - final DictionaryMapping primaryMapping = importPrimaryDictionary(dictionaries.getPrimaryDictionary()); - getProgressReporter().report(1); // Distribute the new IDs among workers - distributeWorkerResponsibilities(primaryMapping); + distributeWorkerResponsibilities(container.entities()); getProgressReporter().report(1); - - log.info("Importing Dictionaries"); - - Map sharedDictionaryMappings = - importDictionaries(namespace, dictionaries.getDictionaries(), table.getColumns(), header.getName(), table, sharedDictionaryLocks); - - log.info("Remapping Dictionaries {}", sharedDictionaryMappings.values()); - - remapToSharedDictionary(sharedDictionaryMappings, container.getStores()); - - - Import imp = createImport(header, container.getStores(), table.getColumns(), container.size()); - + final Import imp = createImport(header, container.getStores(), table.getColumns(), container.size()); namespace.getStorage().updateImport(imp); - Map> buckets2LocalEntities = groupEntitiesByBucket(container.entities(), primaryMapping, bucketSize); + final Map> buckets2LocalEntities = groupEntitiesByBucket(container.entities(), bucketSize); final ColumnStore[] storesSorted = Arrays.stream(table.getColumns()) @@ -324,34 +155,101 @@ public void execute() throws JSONException, InterruptedException, IOException { // we use this to track assignment to workers. final Map> workerAssignments = - sendBuckets(container.getStarts(), container.getLengths(), primaryMapping, imp, buckets2LocalEntities, storesSorted); + sendBuckets(container.getStarts(), container.getLengths(), imp, buckets2LocalEntities, storesSorted); - WorkerHandler handler = namespace.getWorkerHandler(); + final WorkerHandler handler = namespace.getWorkerHandler(); workerAssignments.forEach(handler::addBucketsToWorker); } + private void distributeWorkerResponsibilities(Set entities) { + log.debug("Updating bucket assignments."); + + synchronized (namespace) { + for (String entity : entities) { + final int bucket = namespace.getBucket(entity, bucketSize); + + if (namespace.getWorkerHandler().getResponsibleWorkerForBucket(bucket) != null) { + continue; + } + + namespace.getWorkerHandler().addResponsibility(bucket); + } + } + } + + private Import createImport(PreprocessedHeader header, Map stores, Column[] columns, int size) { + final Import imp = new Import(table); + + imp.setName(header.getName()); + imp.setNumberOfEntries(header.getRows()); + imp.setNumberOfEntities(size); + + final ImportColumn[] importColumns = new ImportColumn[columns.length]; + + for (int i = 0; i < columns.length; i++) { + final ColumnStore store = stores.get(columns[i].getName()); + + final ImportColumn col = new ImportColumn(imp, store.createDescription(), store.getLines(), store.estimateMemoryConsumptionBytes()); + + col.setName(columns[i].getName()); + + importColumns[i] = col; + } + + imp.setColumns(importColumns); + + namespace.getWorkerHandler().sendToAll(new AddImport(imp)); + return imp; + } + + /** + * Group entities by their global bucket id. + */ + private Map> groupEntitiesByBucket(Set entities, int bucketSize) { + return entities.stream() + .collect(Collectors.groupingBy(entity -> namespace.getBucket(entity, bucketSize))); + + } + /** * select, then send buckets. */ - private Map> sendBuckets(Map starts, Map lengths, DictionaryMapping primaryMapping, Import imp, Map> buckets2LocalEntities, ColumnStore[] storesSorted) { + private Map> sendBuckets(Map starts, Map lengths, Import imp, Map> buckets2LocalEntities, ColumnStore[] storesSorted) { - Map> newWorkerAssignments = new HashMap<>(); + final Map> newWorkerAssignments = new HashMap<>(); final ProgressReporter subJob = getProgressReporter().subJob(buckets2LocalEntities.size()); - for (Map.Entry> bucket2entities : buckets2LocalEntities.entrySet()) { + for (Map.Entry> bucket2entities : buckets2LocalEntities.entrySet()) { + - WorkerInformation responsibleWorker = Objects.requireNonNull( - namespace - .getWorkerHandler() - .getResponsibleWorkerForBucket(bucket2entities.getKey()), - () -> "No responsible worker for Bucket#" + bucket2entities.getKey()); + final int bucketId = bucket2entities.getKey(); + final List entities = bucket2entities.getValue(); + + final WorkerInformation responsibleWorker = Objects.requireNonNull( + namespace + .getWorkerHandler() + .getResponsibleWorkerForBucket(bucketId), + () -> "No responsible worker for Bucket#" + bucketId + ); awaitFreeJobQueue(responsibleWorker); + final Map bucketStarts = entities.stream() + .filter(starts::containsKey) + .collect(Collectors.toMap(Functions.identity(), starts::get)); + + final Map bucketLengths = entities.stream() + .filter(lengths::containsKey) + .collect(Collectors.toMap(Functions.identity(), lengths::get)); + + + assert !Collections.disjoint(bucketStarts.keySet(), bucketLengths.keySet()); + + final Bucket bucket = - selectBucket(starts, lengths, storesSorted, primaryMapping, imp, bucket2entities.getKey(), bucket2entities.getValue()); + selectBucket(bucketStarts, bucketLengths, storesSorted, imp, bucketId); newWorkerAssignments.computeIfAbsent(responsibleWorker.getId(), (ignored) -> new HashSet<>()) .add(bucket.getId()); @@ -379,48 +277,32 @@ private void awaitFreeJobQueue(WorkerInformation responsibleWorker) { * - calculate per-Entity regions of Bucklet (start/end) * - split stores */ - private Bucket selectBucket(Map localStarts, Map localLengths, ColumnStore[] stores, DictionaryMapping primaryMapping, Import imp, int bucketId, List localEntities) { - - final int root = bucketSize * bucketId; + private Bucket selectBucket(Map localStarts, Map localLengths, ColumnStore[] stores, Import imp, int bucketId) { - IntList selectionStart = new IntArrayList(); - IntList selectionLength = new IntArrayList(); - IntSet entities = new IntOpenHashSet(); + final IntList selectionStart = new IntArrayList(); + final IntList selectionLength = new IntArrayList(); // First entity of Bucket starts at 0, the following are appended. - int[] entityStarts = new int[bucketSize]; - int[] entityEnds = new int[bucketSize]; + final Object2IntMap entityStarts = new Object2IntOpenHashMap<>(); + final Object2IntMap entityEnds = new Object2IntOpenHashMap<>(); - Arrays.fill(entityEnds, -1); - Arrays.fill(entityStarts, -1); int currentStart = 0; - for (int position = 0; position < bucketSize; position++) { - int globalId = root + position; - - int localId = primaryMapping.target2Source(globalId); + for (Map.Entry entity2Start : localStarts.entrySet()) { + final String entity = entity2Start.getKey(); + final int start = entity2Start.getValue(); - if (localId == -1) { - continue; - } - - if (!localStarts.containsKey(localId)) { - continue; - } + final int length = localLengths.get(entity); - entities.add(globalId); - - final int length = localLengths.get(localId); - - selectionStart.add(localStarts.get(localId)); + selectionStart.add(start); selectionLength.add(length); - entityStarts[position] = currentStart; - entityEnds[position] = currentStart + length; + entityStarts.put(entity, currentStart); + entityEnds.put(entity, currentStart + length); currentStart += length; } @@ -433,154 +315,14 @@ private Bucket selectBucket(Map localStarts, Map mappings, Map values) { - - if (mappings.isEmpty()) { - log.trace("No columns with shared dictionary appear to be in the import."); - return; - } - - final ProgressReporter subJob = getProgressReporter().subJob(mappings.size()); - - // we need to find a new Type for the index-Column as it's going to be remapped and might change in size - mappings.entrySet().parallelStream() - .forEach(entry -> { - final Column column = entry.getKey(); - final DictionaryMapping mapping = entry.getValue(); - - final StringStore stringStore = (StringStore) values.get(column.getName()); - log.debug("Remapping Column[{}] = {} with {}", column, stringStore, mapping); - final IntegerParser indexParser = new IntegerParser(config); - final IntSummaryStatistics statistics = mapping.target().intStream().summaryStatistics(); - - indexParser.setLines(stringStore.getLines()); - indexParser.setMinValue(statistics.getMin()); - indexParser.setMaxValue(statistics.getMax()); - - final IntegerStore newType = indexParser.findBestType(); - - log.trace("Decided for {}", newType); - - mapping.applyToStore(stringStore, newType); - stringStore.setIndexStore(newType); - - subJob.report(1); - }); - } - - private Import createImport(PreprocessedHeader header, Map stores, Column[] columns, int size) { - Import imp = new Import(table); - - imp.setName(header.getName()); - imp.setNumberOfEntries(header.getRows()); - imp.setNumberOfEntities(size); - - final ImportColumn[] importColumns = new ImportColumn[columns.length]; - - for (int i = 0; i < columns.length; i++) { - final ColumnStore store = stores.get(columns[i].getName()); - - ImportColumn col = new ImportColumn(imp, store.createDescription(), store.getLines(), store.estimateMemoryConsumptionBytes()); - - col.setName(columns[i].getName()); - - importColumns[i] = col; - } - - imp.setColumns(importColumns); - - Set dictionaries = new HashSet<>(); - - for (Column column : columns) { - // only non-shared dictionaries need to be registered here - if (column.getType() != MajorTypeId.STRING) { - continue; - } - - // shared dictionaries are not related to a specific import. - if (column.getSharedDictionary() != null) { - continue; - } - - // Some StringStores don't have Dictionaries. - final StringStore stringStore = (StringStore) stores.get(column.getName()); - - if (!stringStore.isDictionaryHolding()) { - continue; - } - - dictionaries.add(stringStore.getUnderlyingDictionary().getId()); - } - - imp.setDictionaries(dictionaries); - namespace.getWorkerHandler().sendToAll(new AddImport(imp)); - return imp; - } - - - /** - * Group entities by their global bucket id. - */ - private Map> groupEntitiesByBucket(Set entities, DictionaryMapping primaryMapping, int bucketSize) { - return entities.stream() - .collect(Collectors.groupingBy(entity -> Entity.getBucket(primaryMapping.source2Target(entity), bucketSize))); - - } - - private Dataset getDataset() { return namespace.getDataset(); } diff --git a/backend/src/main/java/com/bakdata/conquery/models/jobs/UpdateFilterSearchJob.java b/backend/src/main/java/com/bakdata/conquery/models/jobs/UpdateFilterSearchJob.java index f5b1a480ec..e980c21b1f 100644 --- a/backend/src/main/java/com/bakdata/conquery/models/jobs/UpdateFilterSearchJob.java +++ b/backend/src/main/java/com/bakdata/conquery/models/jobs/UpdateFilterSearchJob.java @@ -1,97 +1,110 @@ package com.bakdata.conquery.models.jobs; +import java.util.Arrays; import java.util.Collection; import java.util.Collections; +import java.util.HashMap; import java.util.List; import java.util.Map; -import java.util.Objects; import java.util.Set; import java.util.concurrent.ExecutorService; import java.util.concurrent.Executors; import java.util.concurrent.TimeUnit; +import java.util.function.Consumer; import java.util.function.Predicate; import java.util.stream.Collectors; -import java.util.stream.Stream; import com.bakdata.conquery.apiv1.frontend.FrontendValue; import com.bakdata.conquery.io.storage.NamespaceStorage; import com.bakdata.conquery.models.config.IndexConfig; +import com.bakdata.conquery.models.datasets.Column; import com.bakdata.conquery.models.datasets.concepts.Searchable; import com.bakdata.conquery.models.datasets.concepts.filters.specific.SelectFilter; +import com.bakdata.conquery.models.worker.Namespace; import com.bakdata.conquery.util.search.TrieSearch; -import com.google.common.base.Functions; import com.google.common.collect.Sets; -import it.unimi.dsi.fastutil.objects.Object2LongMap; import lombok.NonNull; import lombok.RequiredArgsConstructor; import lombok.extern.slf4j.Slf4j; import org.apache.commons.lang3.time.StopWatch; - +import org.jetbrains.annotations.NotNull; + +/** + * Job that initializes the filter search for the frontend. + * It collects all sources of values for all filters, e.g.: + *
    + *
  • explicit mappings in a {@link SelectFilter}
  • + *
  • external reference mappings
  • + *
  • columns of imported data which are referenced by a filter
  • + *
+ */ @Slf4j @RequiredArgsConstructor public class UpdateFilterSearchJob extends Job { - @NonNull - private final NamespaceStorage storage; - @NonNull - private final Map, TrieSearch> searchCache; + private final Namespace namespace; @NonNull private final IndexConfig indexConfig; - @NonNull - private final Object2LongMap> totals; + private final Consumer> registerColumnValuesInSearch; @Override public void execute() throws Exception { + final NamespaceStorage storage = namespace.getStorage(); + + log.info("Clearing Search"); + namespace.getFilterSearch().clearSearch(); + log.info("BEGIN loading SourceSearch"); - // collect all SelectFilters to the create searches for them + // collect all SelectFilters to create searches for them final List> allSelectFilters = - storage.getAllConcepts().stream() - .flatMap(c -> c.getConnectors().stream()) - .flatMap(co -> co.collectAllFilters().stream()) - .filter(SelectFilter.class::isInstance) - .map(f -> ((SelectFilter) f)) - .collect(Collectors.toList()); + getAllSelectFilters(storage); - final Set> collectedSearchables = + // Unfortunately the is no ClassToInstanceMultimap yet + final Map, Set> collectedSearchables = allSelectFilters.stream() .map(SelectFilter::getSearchReferences) .flatMap(Collection::stream) // Disabling search is only a last resort for when columns are too big to store in memory or process for indexing. // TODO FK: We want no Searchable to be disabled, better scaling searches or mechanisms to fill search. .filter(Predicate.not(Searchable::isSearchDisabled)) - .collect(Collectors.toSet()); + // Group Searchables into "Columns" and other "Searchables" + .collect(Collectors.groupingBy(s -> s instanceof Column ? Column.class : Searchable.class, Collectors.toSet())); // Most computations are cheap but data intensive: we fork here to use as many cores as possible. final ExecutorService service = Executors.newFixedThreadPool(Runtime.getRuntime().availableProcessors() - 1); - final Map, TrieSearch> synchronizedResult = Collections.synchronizedMap(searchCache); + final HashMap> searchCache = new HashMap<>(); + final Map> synchronizedResult = Collections.synchronizedMap(searchCache); - log.debug("Found {} searchable Objects.", collectedSearchables.size()); + log.debug("Found {} searchable Objects.", collectedSearchables.values().stream().mapToLong(Set::size).sum()); - for (Searchable searchable : collectedSearchables) { + for (Searchable searchable : collectedSearchables.getOrDefault(Searchable.class, Collections.emptySet())) { + if (searchable instanceof Column) { + throw new IllegalStateException("Columns should have been grouped out previously"); + } service.submit(() -> { final StopWatch watch = StopWatch.createStarted(); - log.info("BEGIN collecting entries for `{}`", searchable.getId()); + log.info("BEGIN collecting entries for `{}`", searchable); try { - final TrieSearch search = searchable.createTrieSearch(indexConfig, storage); + final TrieSearch search = searchable.createTrieSearch(indexConfig); synchronizedResult.put(searchable, search); log.debug( "DONE collecting {} entries for `{}`, within {}", search.calculateSize(), - searchable.getId(), + searchable, watch ); } @@ -102,6 +115,11 @@ public void execute() throws Exception { }); } + // The following cast is safe + final Set searchableColumns = (Set) collectedSearchables.getOrDefault(Column.class, Collections.emptySet()); + log.debug("Start collecting column values: {}", Arrays.toString(searchableColumns.toArray())); + registerColumnValuesInSearch.accept(searchableColumns); + service.shutdown(); @@ -111,35 +129,26 @@ public void execute() throws Exception { service.shutdownNow(); return; } - log.debug("Still waiting for {} to finish.", Sets.difference(collectedSearchables, synchronizedResult.keySet())); + log.debug("Still waiting for {} to finish.", Sets.difference(collectedSearchables.get(Searchable.class), synchronizedResult.keySet())); } - log.debug("BEGIN counting Search totals."); - - - // Precompute totals as that can be slow when doing it on-demand. - totals.putAll( - Stream.concat( - // SelectFilters without their own labels are not "real" Searchables and therefore not in collectedSearchables - // We however want the real totals of ALL Searchables (and especially SelectFilters), which is why we include them here explicitly - allSelectFilters.parallelStream(), - collectedSearchables.parallelStream() - ) - .distinct() - .collect(Collectors.toMap( - Functions.identity(), - filter -> filter.getSearchReferences().stream() - .map(searchCache::get) - .filter(Objects::nonNull) // Failed or disabled searches are null - .flatMap(TrieSearch::stream) - .mapToInt(FrontendValue::hashCode) - .distinct() - .count() - )) - ); - - - log.debug("DONE loading SourceSearch"); + // Shrink searches before registering in the filter search + searchCache.values().forEach(TrieSearch::shrinkToFit); + + namespace.getFilterSearch().addSearches(searchCache); + + log.info("UpdateFilterSearchJob search finished"); + + } + + @NotNull + public static List> getAllSelectFilters(NamespaceStorage storage) { + return storage.getAllConcepts().stream() + .flatMap(c -> c.getConnectors().stream()) + .flatMap(co -> co.collectAllFilters().stream()) + .filter(SelectFilter.class::isInstance) + .map(f -> ((SelectFilter) f)) + .collect(Collectors.toList()); } @Override diff --git a/backend/src/main/java/com/bakdata/conquery/models/messages/ReactionMessage.java b/backend/src/main/java/com/bakdata/conquery/models/messages/ReactionMessage.java new file mode 100644 index 0000000000..41569d3f40 --- /dev/null +++ b/backend/src/main/java/com/bakdata/conquery/models/messages/ReactionMessage.java @@ -0,0 +1,20 @@ +package com.bakdata.conquery.models.messages; + +import java.util.UUID; + +import com.bakdata.conquery.models.identifiable.ids.specific.WorkerId; +import com.bakdata.conquery.models.messages.namespaces.ActionReactionMessage; +import com.bakdata.conquery.models.messages.namespaces.NamespaceMessage; +import com.bakdata.conquery.models.worker.WorkerHandler; + +/** + * Interface for a {@link NamespaceMessage} implementing {@link ActionReactionMessage} to notify the {@link WorkerHandler} to checkoff the processed reaction of this message. + */ +public interface ReactionMessage { + + UUID getCallerId(); + + WorkerId getWorkerId(); + + boolean lastMessageFromWorker(); +} diff --git a/backend/src/main/java/com/bakdata/conquery/models/messages/namespaces/ActionReactionMessage.java b/backend/src/main/java/com/bakdata/conquery/models/messages/namespaces/ActionReactionMessage.java new file mode 100644 index 0000000000..2fe4044ee8 --- /dev/null +++ b/backend/src/main/java/com/bakdata/conquery/models/messages/namespaces/ActionReactionMessage.java @@ -0,0 +1,28 @@ +package com.bakdata.conquery.models.messages.namespaces; + +import java.util.UUID; + +import com.bakdata.conquery.models.messages.ReactionMessage; +import com.bakdata.conquery.models.worker.WorkerHandler; + +/** + * Interface for {@link WorkerMessage}s that require postprocessing on the manager, after all workers responded with possibly multiple {@link ReactionMessage} that are not final and a single {@link com.bakdata.conquery.models.messages.namespaces.specific.FinalizeReactionMessage}. + */ +public interface ActionReactionMessage { + + /** + * This id is used to keep track of the reaction. + *
    + *
  1. Upon sending this message {@link WorkerHandler} registers the message id
  2. + *
  3. The react method of a {@link ActionReactionMessage} creates a {@link ReactionMessage} which carries this id
  4. + *
  5. After processing of an {@link ReactionMessage} the {@link WorkerHandler} checks off reactions from each worker
  6. + *
  7. When all workers checked ofd the afterAllReaction is executed
  8. + *
relayed by the {@link ReactionMessage} back to the + */ + UUID getMessageId(); + + /** + * This hook is called after all expected {@link ReactionMessage}s were received. + */ + void afterAllReaction(); +} diff --git a/backend/src/main/java/com/bakdata/conquery/models/messages/namespaces/NamespacedMessage.java b/backend/src/main/java/com/bakdata/conquery/models/messages/namespaces/NamespacedMessage.java index 2c4b7cd512..20d3e60d18 100644 --- a/backend/src/main/java/com/bakdata/conquery/models/messages/namespaces/NamespacedMessage.java +++ b/backend/src/main/java/com/bakdata/conquery/models/messages/namespaces/NamespacedMessage.java @@ -1,21 +1,26 @@ package com.bakdata.conquery.models.messages.namespaces; +import java.util.UUID; + import com.bakdata.conquery.io.cps.CPSBase; import com.bakdata.conquery.models.messages.Message; import com.fasterxml.jackson.annotation.JsonTypeInfo; - import lombok.Getter; import lombok.Setter; -@JsonTypeInfo(use=JsonTypeInfo.Id.CUSTOM, property="type") -@CPSBase @Getter @Setter +@JsonTypeInfo(use = JsonTypeInfo.Id.CUSTOM, property = "type") +@CPSBase +@Getter +@Setter public abstract class NamespacedMessage implements Message { + private UUID messageId = UUID.randomUUID(); + @Override public String toString() { - return this.getClass().getSimpleName(); + return "%s[%s]".formatted(this.getClass().getSimpleName(), messageId); } - + public abstract void react(CTX context) throws Exception; - + } diff --git a/backend/src/main/java/com/bakdata/conquery/models/messages/namespaces/specific/CollectColumnValuesJob.java b/backend/src/main/java/com/bakdata/conquery/models/messages/namespaces/specific/CollectColumnValuesJob.java new file mode 100644 index 0000000000..32f1f03b97 --- /dev/null +++ b/backend/src/main/java/com/bakdata/conquery/models/messages/namespaces/specific/CollectColumnValuesJob.java @@ -0,0 +1,123 @@ +package com.bakdata.conquery.models.messages.namespaces.specific; + +import java.util.Arrays; +import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.concurrent.ExecutionException; +import java.util.concurrent.TimeUnit; +import java.util.concurrent.TimeoutException; +import java.util.concurrent.atomic.AtomicInteger; +import java.util.stream.Collectors; + +import com.bakdata.conquery.io.cps.CPSType; +import com.bakdata.conquery.io.jackson.serializer.NsIdRefCollection; +import com.bakdata.conquery.models.datasets.Column; +import com.bakdata.conquery.models.datasets.Table; +import com.bakdata.conquery.models.events.Bucket; +import com.bakdata.conquery.models.events.stores.root.StringStore; +import com.bakdata.conquery.models.jobs.SimpleJob; +import com.bakdata.conquery.models.jobs.UpdateFilterSearchJob; +import com.bakdata.conquery.models.messages.namespaces.ActionReactionMessage; +import com.bakdata.conquery.models.messages.namespaces.NamespacedMessage; +import com.bakdata.conquery.models.messages.namespaces.WorkerMessage; +import com.bakdata.conquery.models.query.FilterSearch; +import com.bakdata.conquery.models.worker.Namespace; +import com.bakdata.conquery.models.worker.Worker; +import com.fasterxml.jackson.annotation.JsonCreator; +import com.fasterxml.jackson.annotation.JsonIgnore; +import com.google.common.util.concurrent.Futures; +import com.google.common.util.concurrent.ListenableFuture; +import com.google.common.util.concurrent.ListeningExecutorService; +import com.google.common.util.concurrent.MoreExecutors; +import lombok.Getter; +import lombok.RequiredArgsConstructor; +import lombok.extern.slf4j.Slf4j; + +/** + * This Job collects the distinct values in the given columns and returns a {@link RegisterColumnValues} message for each column to the namespace on the manager. + */ +@Slf4j +@RequiredArgsConstructor(onConstructor_ = @JsonCreator) +@CPSType(id = "COLLECT_COLUMN_VALUES", base = NamespacedMessage.class) +public class CollectColumnValuesJob extends WorkerMessage implements ActionReactionMessage { + + @Getter + @NsIdRefCollection + private final Set columns; + + /** + * This exists only on the manager for the afterAllReaction. + */ + @JsonIgnore + private final Namespace namespace; + + + @Override + public void react(Worker context) throws Exception { + final Map> table2Buckets = context.getStorage().getAllBuckets().stream() + .collect(Collectors.groupingBy(Bucket::getTable)); + + + final ListeningExecutorService jobsExecutorService = MoreExecutors.listeningDecorator(context.getJobsExecutorService()); + + final AtomicInteger done = new AtomicInteger(); + + + final List> futures = + columns.stream() + .filter(column -> table2Buckets.get(column.getTable()) != null) + .map(column -> + jobsExecutorService.submit(() -> { + final List buckets = table2Buckets.get(column.getTable()); + + final Set values = buckets.stream() + .flatMap(bucket -> ((StringStore) bucket.getStore(column)).streamValues()) + .collect(Collectors.toSet()); + context.send(new RegisterColumnValues(getMessageId(), context.getInfo().getId(), column, values)); + log.trace("Finished collections values for column {} as number {}", column, done.incrementAndGet()); + }) + ) + .collect(Collectors.toList()); + + + final ListenableFuture> all = Futures.allAsList(futures); + + while (true) { + try { + all.get(30, TimeUnit.SECONDS); + break; + } + catch (ExecutionException e) { + throw new RuntimeException(e); + } + catch (TimeoutException e) { + log.debug("Still waiting for jobs: {} of {} done", done.get(), futures.size()); + } + } + + log.info("Finished collecting values from these columns: {}", Arrays.toString(columns.toArray())); + context.send(new FinalizeReactionMessage(getMessageId(), context.getInfo().getId())); + } + + @Override + public void afterAllReaction() { + + // Run this in a job, so it is definitely processed after UpdateFilterSearchJob + namespace.getJobManager().addSlowJob( + new SimpleJob( + "Finalize Search update", + () -> { + log.debug("{} shrinking searches", this); + final FilterSearch filterSearch = namespace.getFilterSearch(); + columns.forEach(filterSearch::shrinkSearch); + + + log.info("BEGIN counting Search totals."); + UpdateFilterSearchJob.getAllSelectFilters(namespace.getStorage()).forEach(namespace.getFilterSearch()::getTotal); + log.debug("FINISHED counting Search totals."); + } + ) + ); + } +} diff --git a/backend/src/main/java/com/bakdata/conquery/models/messages/namespaces/specific/CollectQueryResult.java b/backend/src/main/java/com/bakdata/conquery/models/messages/namespaces/specific/CollectQueryResult.java deleted file mode 100644 index cf5293e966..0000000000 --- a/backend/src/main/java/com/bakdata/conquery/models/messages/namespaces/specific/CollectQueryResult.java +++ /dev/null @@ -1,35 +0,0 @@ -package com.bakdata.conquery.models.messages.namespaces.specific; - -import com.bakdata.conquery.io.cps.CPSType; -import com.bakdata.conquery.models.messages.namespaces.NamespaceMessage; -import com.bakdata.conquery.models.messages.namespaces.NamespacedMessage; -import com.bakdata.conquery.models.query.results.ShardResult; -import com.bakdata.conquery.models.worker.DistributedNamespace; -import lombok.AllArgsConstructor; -import lombok.Getter; -import lombok.NoArgsConstructor; -import lombok.Setter; -import lombok.ToString; -import lombok.extern.slf4j.Slf4j; - -/** - * Workers send their part of the query result to ManagerNode for assembly. - */ -@CPSType(id = "COLLECT_QUERY_RESULT", base = NamespacedMessage.class) -@AllArgsConstructor -@NoArgsConstructor -@Getter -@Setter -@ToString(of = "result") -@Slf4j -public class CollectQueryResult extends NamespaceMessage { - - private ShardResult result; - - @Override - public void react(DistributedNamespace context) throws Exception { - log.info("Received {} of size {}", result, result.getResults().size()); - - context.getExecutionManager().handleQueryResult(result); - } -} diff --git a/backend/src/main/java/com/bakdata/conquery/models/messages/namespaces/specific/FinalizeReactionMessage.java b/backend/src/main/java/com/bakdata/conquery/models/messages/namespaces/specific/FinalizeReactionMessage.java new file mode 100644 index 0000000000..e047a56963 --- /dev/null +++ b/backend/src/main/java/com/bakdata/conquery/models/messages/namespaces/specific/FinalizeReactionMessage.java @@ -0,0 +1,42 @@ +package com.bakdata.conquery.models.messages.namespaces.specific; + +import java.util.UUID; + +import com.bakdata.conquery.io.cps.CPSType; +import com.bakdata.conquery.models.identifiable.ids.specific.WorkerId; +import com.bakdata.conquery.models.messages.ReactionMessage; +import com.bakdata.conquery.models.messages.namespaces.ActionReactionMessage; +import com.bakdata.conquery.models.messages.namespaces.NamespaceMessage; +import com.bakdata.conquery.models.messages.namespaces.NamespacedMessage; +import com.bakdata.conquery.models.worker.DistributedNamespace; +import com.fasterxml.jackson.annotation.JsonCreator; +import lombok.AllArgsConstructor; +import lombok.Getter; +import lombok.ToString; +import lombok.extern.slf4j.Slf4j; + + +/** + * Use {@link ActionReactionMessage#afterAllReaction()} to processing on initiator side after all reactions where collected. + */ +@CPSType(id = "FINALIZE_REACTION_MESSAGE", base = NamespacedMessage.class) +@AllArgsConstructor(onConstructor_ = @JsonCreator) +@Getter +@Slf4j +@ToString +public final class FinalizeReactionMessage extends NamespaceMessage implements ReactionMessage { + + private UUID callerId; + + private WorkerId workerId; + + @Override + public boolean lastMessageFromWorker() { + return true; + } + + @Override + public void react(DistributedNamespace context) throws Exception { + log.debug("Received finalize message from caller '{}' workerId '{}'", callerId, workerId); + } +} diff --git a/backend/src/main/java/com/bakdata/conquery/models/messages/namespaces/specific/RegisterColumnValues.java b/backend/src/main/java/com/bakdata/conquery/models/messages/namespaces/specific/RegisterColumnValues.java new file mode 100644 index 0000000000..86cdef9d2f --- /dev/null +++ b/backend/src/main/java/com/bakdata/conquery/models/messages/namespaces/specific/RegisterColumnValues.java @@ -0,0 +1,55 @@ +package com.bakdata.conquery.models.messages.namespaces.specific; + +import java.util.Arrays; +import java.util.Collection; +import java.util.UUID; + +import com.bakdata.conquery.io.cps.CPSType; +import com.bakdata.conquery.io.jackson.serializer.NsIdRef; +import com.bakdata.conquery.models.datasets.Column; +import com.bakdata.conquery.models.identifiable.ids.specific.WorkerId; +import com.bakdata.conquery.models.messages.ReactionMessage; +import com.bakdata.conquery.models.messages.namespaces.NamespaceMessage; +import com.bakdata.conquery.models.messages.namespaces.NamespacedMessage; +import com.bakdata.conquery.models.worker.DistributedNamespace; +import com.fasterxml.jackson.annotation.JsonCreator; +import lombok.AllArgsConstructor; +import lombok.Getter; +import lombok.ToString; +import lombok.extern.slf4j.Slf4j; + +/** + * This message returns the result of the {@link CollectColumnValuesJob} to the namespace on the manager. + */ +@CPSType(id = "REGISTER_COLUMN_VALUES", base = NamespacedMessage.class) +@AllArgsConstructor(onConstructor_ = @JsonCreator) +@Getter +@Slf4j +@ToString +public class RegisterColumnValues extends NamespaceMessage implements ReactionMessage { + + private UUID callerId; + + private WorkerId workerId; + + @NsIdRef + private final Column column; + private final Collection values; + + @Override + public void react(DistributedNamespace context) throws Exception { + if (log.isTraceEnabled()) { + log.trace("Registering values for column '{}': {}", column.getId(), Arrays.toString(values.toArray())); + } + else { + log.debug("Registering {} values for column '{}'", values.size(), column.getId()); + } + + context.getFilterSearch().registerValues(column, values); + } + + @Override + public boolean lastMessageFromWorker() { + return false; + } +} diff --git a/backend/src/main/java/com/bakdata/conquery/models/messages/namespaces/specific/UpdateDictionary.java b/backend/src/main/java/com/bakdata/conquery/models/messages/namespaces/specific/UpdateDictionary.java deleted file mode 100644 index f48f839982..0000000000 --- a/backend/src/main/java/com/bakdata/conquery/models/messages/namespaces/specific/UpdateDictionary.java +++ /dev/null @@ -1,29 +0,0 @@ -package com.bakdata.conquery.models.messages.namespaces.specific; - -import com.bakdata.conquery.io.cps.CPSType; -import com.bakdata.conquery.models.dictionary.Dictionary; -import com.bakdata.conquery.models.messages.namespaces.NamespacedMessage; -import com.bakdata.conquery.models.messages.namespaces.WorkerMessage; -import com.bakdata.conquery.models.worker.Worker; -import com.fasterxml.jackson.annotation.JsonCreator; -import lombok.AllArgsConstructor; -import lombok.Getter; -import lombok.ToString; -import lombok.extern.slf4j.Slf4j; - -@CPSType(id="UPDATE_DICTIONARY", base=NamespacedMessage.class) -@AllArgsConstructor(onConstructor_=@JsonCreator) -@ToString -@Getter -@Slf4j -public class UpdateDictionary extends WorkerMessage { - - private final Dictionary dictionary; - - @Override - public void react(Worker context) throws Exception { - log.debug("Received Dictionary[{}] of size {}.", dictionary.getId(), dictionary.size()); - context.updateDictionary(dictionary); - - } -} diff --git a/backend/src/main/java/com/bakdata/conquery/models/messages/namespaces/specific/UpdateMatchingStatsMessage.java b/backend/src/main/java/com/bakdata/conquery/models/messages/namespaces/specific/UpdateMatchingStatsMessage.java index b32c0ca9e0..5f783a8c19 100644 --- a/backend/src/main/java/com/bakdata/conquery/models/messages/namespaces/specific/UpdateMatchingStatsMessage.java +++ b/backend/src/main/java/com/bakdata/conquery/models/messages/namespaces/specific/UpdateMatchingStatsMessage.java @@ -135,7 +135,7 @@ private static void calculateConceptMatches(Concept concept, Map String.format("Missing dataset `%s`", datasetId)); ConqueryMDC.setLocation(ns.getStorage().getDataset().toString()); message.react(ns); + + if (message instanceof ReactionMessage reactionMessage) { + ns.getWorkerHandler().handleReactionMessage(reactionMessage); + } } @Override diff --git a/backend/src/main/java/com/bakdata/conquery/models/preproc/Preprocessed.java b/backend/src/main/java/com/bakdata/conquery/models/preproc/Preprocessed.java index 5d315de4a9..3bb69761f2 100644 --- a/backend/src/main/java/com/bakdata/conquery/models/preproc/Preprocessed.java +++ b/backend/src/main/java/com/bakdata/conquery/models/preproc/Preprocessed.java @@ -4,34 +4,29 @@ import java.io.FileOutputStream; import java.io.IOException; import java.io.OutputStream; +import java.util.ArrayList; import java.util.Arrays; import java.util.HashMap; import java.util.IntSummaryStatistics; +import java.util.List; import java.util.Map; import java.util.stream.Collectors; import java.util.zip.GZIPOutputStream; -import com.bakdata.conquery.ConqueryConstants; import com.bakdata.conquery.io.jackson.Jackson; import com.bakdata.conquery.models.config.ConqueryConfig; -import com.bakdata.conquery.models.datasets.Dataset; -import com.bakdata.conquery.models.dictionary.Dictionary; -import com.bakdata.conquery.models.dictionary.MapDictionary; import com.bakdata.conquery.models.events.MajorTypeId; import com.bakdata.conquery.models.events.stores.root.ColumnStore; import com.bakdata.conquery.models.events.stores.root.StringStore; -import com.bakdata.conquery.models.events.stores.specific.string.EncodedStringStore; import com.bakdata.conquery.models.preproc.parser.ColumnValues; import com.bakdata.conquery.models.preproc.parser.Parser; import com.bakdata.conquery.models.preproc.parser.specific.StringParser; import com.fasterxml.jackson.core.JsonGenerator; -import it.unimi.dsi.fastutil.ints.Int2IntAVLTreeMap; -import it.unimi.dsi.fastutil.ints.Int2IntMap; -import it.unimi.dsi.fastutil.ints.Int2ObjectMap; -import it.unimi.dsi.fastutil.ints.Int2ObjectOpenHashMap; import it.unimi.dsi.fastutil.ints.IntArrayList; import it.unimi.dsi.fastutil.ints.IntList; import it.unimi.dsi.fastutil.ints.IntLists; +import it.unimi.dsi.fastutil.objects.Object2IntAVLTreeMap; +import it.unimi.dsi.fastutil.objects.Object2IntMap; import lombok.Data; import lombok.extern.slf4j.Slf4j; @@ -54,18 +49,18 @@ public class Preprocessed { /** * Per row store, entity. */ - private final IntList rowEntities = new IntArrayList(); + private final List rowEntities = new ArrayList<>(); - private long rows = 0; + private long rows; public Preprocessed(ConqueryConfig config, PreprocessingJob preprocessingJob) throws IOException { - this.job = preprocessingJob; - this.descriptor = preprocessingJob.getDescriptor(); - this.name = this.descriptor.getName(); + job = preprocessingJob; + descriptor = preprocessingJob.getDescriptor(); + name = descriptor.getName(); - TableInputDescriptor input = this.descriptor.getInputs()[0]; + final TableInputDescriptor input = descriptor.getInputs()[0]; columns = new PPColumn[input.getWidth()]; primaryColumn = (StringParser) MajorTypeId.STRING.createParser(config); @@ -73,7 +68,7 @@ public Preprocessed(ConqueryConfig config, PreprocessingJob preprocessingJob) th values = new ColumnValues[columns.length]; for (int index = 0; index < input.getWidth(); index++) { - ColumnDescription columnDescription = input.getColumnDescription(index); + final ColumnDescription columnDescription = input.getColumnDescription(index); columns[index] = new PPColumn(columnDescription.getName(), columnDescription.getType()); final Parser parser = input.getOutput()[index].createParser(config); @@ -86,8 +81,8 @@ public Preprocessed(ConqueryConfig config, PreprocessingJob preprocessingJob) th public void write(File file) throws IOException { - Int2IntMap entityStart = new Int2IntAVLTreeMap(); - Int2IntMap entityLength = new Int2IntAVLTreeMap(); + final Object2IntMap entityStart = new Object2IntAVLTreeMap<>(); + final Object2IntMap entityLength = new Object2IntAVLTreeMap<>(); calculateEntitySpans(entityStart, entityLength); @@ -95,63 +90,29 @@ public void write(File file) throws IOException { log.info("Statistics = {}", statistics); - Map columnStores = combineStores(entityStart); - - Dictionary primaryDictionary = encodePrimaryDictionary(); - - Map dicts = collectDictionaries(columnStores); + final Map columnStores = combineStores(entityStart); log.debug("Writing Headers"); - int hash = descriptor.calculateValidityHash(job.getCsvDirectory(), job.getTag()); + final int hash = descriptor.calculateValidityHash(job.getCsvDirectory(), job.getTag()); - PreprocessedHeader header = new PreprocessedHeader( - descriptor.getName(), - descriptor.getTable(), - rows, - columns, - hash - ); + final PreprocessedHeader header = new PreprocessedHeader(descriptor.getName(), descriptor.getTable(), rows, columns, hash); - final PreprocessedDictionaries dictionaries = new PreprocessedDictionaries(primaryDictionary, dicts); final PreprocessedData data = new PreprocessedData(entityStart, entityLength, columnStores); - writePreprocessed(file, header, dictionaries, data); - } - - private static void writePreprocessed(File file, PreprocessedHeader header, PreprocessedDictionaries dictionaries, PreprocessedData data) - throws IOException { - OutputStream out = new GZIPOutputStream(new FileOutputStream(file)); - try (JsonGenerator generator = Jackson.BINARY_MAPPER.copy() - .enable(JsonGenerator.Feature.AUTO_CLOSE_TARGET) - .getFactory() - .createGenerator(out)) { - - log.debug("Writing header"); - - generator.writeObject(header); - - log.debug("Writing Dictionaries"); - - generator.writeObject(dictionaries); - - log.debug("Writing data"); - - generator.writeObject(data); - } + writePreprocessed(file, header, data); } - /** * Calculate beginning and length of entities in output data. */ - private void calculateEntitySpans(Int2IntMap entityStart, Int2IntMap entityLength) { + private void calculateEntitySpans(Object2IntMap entityStart, Object2IntMap entityLength) { // Count the number of events for the entity - for (int entity : rowEntities) { + for (String entity : rowEntities) { final int curr = entityLength.getOrDefault(entity, 0); entityLength.put(entity, curr + 1); } @@ -159,8 +120,8 @@ private void calculateEntitySpans(Int2IntMap entityStart, Int2IntMap entityLengt // Lay out the entities in order, adding their length. int outIndex = 0; - for (Int2IntMap.Entry entry : entityLength.int2IntEntrySet()) { - entityStart.put(entry.getIntKey(), outIndex); + for (Object2IntMap.Entry entry : entityLength.object2IntEntrySet()) { + entityStart.put(entry.getKey(), outIndex); outIndex += entry.getIntValue(); } } @@ -169,18 +130,15 @@ private void calculateEntitySpans(Int2IntMap entityStart, Int2IntMap entityLengt * Combine raw by-Entity data into column stores, appropriately formatted. */ @SuppressWarnings("rawtypes") - private Map combineStores(Int2IntMap entityStart) { - Map columnStores = Arrays.stream(columns) - .parallel() - .collect(Collectors.toMap(PPColumn::getName, PPColumn::findBestType)); + private Map combineStores(Object2IntMap entityStart) { + final Map columnStores = Arrays.stream(columns).parallel().collect(Collectors.toMap(PPColumn::getName, PPColumn::findBestType)); // This object can be huge! - Int2ObjectMap entityEvents = new Int2ObjectOpenHashMap<>(entityStart.size()); + final Map entityEvents = new HashMap<>(entityStart.size()); for (int pos = 0, size = rowEntities.size(); pos < size; pos++) { - int entity = rowEntities.getInt(pos); - entityEvents.computeIfAbsent(entity, (ignored) -> new IntArrayList()) - .add(pos); + final String entity = rowEntities.get(pos); + entityEvents.computeIfAbsent(entity, (ignored) -> new IntArrayList()).add(pos); } for (int colIdx = 0; colIdx < columns.length; colIdx++) { @@ -193,66 +151,48 @@ private Map combineStores(Int2IntMap entityStart) { } final ColumnStore store = columnStores.get(ppColumn.getName()); - entityStart.int2IntEntrySet() - .forEach(entry -> { - final int entity = entry.getIntKey(); - int outIndex = entry.getIntValue(); - - final IntList events = entityEvents.getOrDefault(entity, IntLists.emptyList()); - - for (int inIndex : events) { - if (columnValues.isNull(inIndex)) { - store.setNull(outIndex); - } - else { - final Object raw = columnValues.get(inIndex); - ppColumn.getParser().setValue(store, outIndex, raw); - } - outIndex++; - } - }); + entityStart.object2IntEntrySet().forEach(entry -> { + final String entity = entry.getKey(); + int outIndex = entry.getIntValue(); + + final IntList events = entityEvents.getOrDefault(entity, IntLists.emptyList()); + + for (int inIndex : events) { + if (columnValues.isNull(inIndex)) { + store.setNull(outIndex); + } + else { + final Object raw = columnValues.get(inIndex); + ppColumn.getParser().setValue(store, outIndex, raw); + } + outIndex++; + } + }); } return columnStores; } + private static void writePreprocessed(File file, PreprocessedHeader header, PreprocessedData data) throws IOException { + final OutputStream out = new GZIPOutputStream(new FileOutputStream(file)); + try (JsonGenerator generator = Jackson.BINARY_MAPPER.copy().enable(JsonGenerator.Feature.AUTO_CLOSE_TARGET).getFactory().createGenerator(out)) { - private Dictionary encodePrimaryDictionary() { - log.debug("Encode primary Dictionary"); - - primaryColumn.applyEncoding(EncodedStringStore.Encoding.UTF8); - - MapDictionary primaryDict = new MapDictionary(Dataset.PLACEHOLDER, ConqueryConstants.PRIMARY_DICTIONARY); - primaryColumn.getDecoded().forEach(primaryDict::add); - - return primaryDict; - } + log.debug("Writing header"); - private static Map collectDictionaries(Map columnStores) { - final Map collect = new HashMap<>(); - for (Map.Entry entry : columnStores.entrySet()) { - if (!(entry.getValue() instanceof StringStore)) { - continue; - } + generator.writeObject(header); - final Dictionary dictionary = ((StringStore) entry.getValue()).getUnderlyingDictionary(); - if (dictionary == null) { - continue; - } + log.debug("Writing data"); - collect.put(entry.getKey(), dictionary); + generator.writeObject(data); } - - return collect; } - public synchronized int addPrimary(int primary) { - primaryColumn.addLine(primary); - return primary; + public synchronized String addPrimary(String primary) { + return primaryColumn.addLine(primary); } - public synchronized void addRow(int primaryId, PPColumn[] columns, Object[] outRow) { - int event = rowEntities.size(); + public synchronized void addRow(String primaryId, PPColumn[] columns, Object[] outRow) { + final int event = rowEntities.size(); rowEntities.add(primaryId); for (int col = 0; col < outRow.length; col++) { diff --git a/backend/src/main/java/com/bakdata/conquery/models/preproc/PreprocessedData.java b/backend/src/main/java/com/bakdata/conquery/models/preproc/PreprocessedData.java index 16d7697461..313d44ee09 100644 --- a/backend/src/main/java/com/bakdata/conquery/models/preproc/PreprocessedData.java +++ b/backend/src/main/java/com/bakdata/conquery/models/preproc/PreprocessedData.java @@ -13,8 +13,8 @@ @AllArgsConstructor(onConstructor_ = @JsonCreator) public class PreprocessedData { - private final Map starts; - private final Map lengths; + private final Map starts; + private final Map lengths; private final Map stores; @@ -27,7 +27,7 @@ public int size() { return starts.size(); } - public Set entities() { + public Set entities() { return starts.keySet(); } } diff --git a/backend/src/main/java/com/bakdata/conquery/models/preproc/PreprocessedDictionaries.java b/backend/src/main/java/com/bakdata/conquery/models/preproc/PreprocessedDictionaries.java deleted file mode 100644 index b1edb04ced..0000000000 --- a/backend/src/main/java/com/bakdata/conquery/models/preproc/PreprocessedDictionaries.java +++ /dev/null @@ -1,20 +0,0 @@ -package com.bakdata.conquery.models.preproc; - -import java.util.Map; - -import javax.annotation.CheckForNull; -import javax.validation.constraints.NotNull; - -import com.bakdata.conquery.models.dictionary.Dictionary; -import com.fasterxml.jackson.annotation.JsonCreator; -import lombok.AllArgsConstructor; -import lombok.Data; - -@Data -@AllArgsConstructor(onConstructor_ = @JsonCreator) -public class PreprocessedDictionaries { - @NotNull - private final Dictionary primaryDictionary; - @CheckForNull - private final Map dictionaries; -} diff --git a/backend/src/main/java/com/bakdata/conquery/models/preproc/PreprocessedReader.java b/backend/src/main/java/com/bakdata/conquery/models/preproc/PreprocessedReader.java index 25c071e088..abdd8574d1 100644 --- a/backend/src/main/java/com/bakdata/conquery/models/preproc/PreprocessedReader.java +++ b/backend/src/main/java/com/bakdata/conquery/models/preproc/PreprocessedReader.java @@ -32,7 +32,7 @@ public void close() throws IOException { @Accessors(fluent = true) @RequiredArgsConstructor public enum LastRead { - DATA(null), DICTIONARIES(DATA), HEADER(DICTIONARIES), BEGIN(HEADER); + DATA(null), HEADER(DATA), BEGIN(HEADER); @Getter private final LastRead next; @@ -70,17 +70,10 @@ public PreprocessedHeader readHeader() throws IOException { return header; } - public PreprocessedDictionaries readDictionaries() throws IOException { - Preconditions.checkState(lastRead.equals(LastRead.HEADER)); - - final PreprocessedDictionaries dictionaries = parser.readValueAs(PreprocessedDictionaries.class); - lastRead = lastRead.next(); - return dictionaries; - } public PreprocessedData readData() throws IOException { - Preconditions.checkState(lastRead.equals(LastRead.DICTIONARIES)); + Preconditions.checkState(lastRead.equals(LastRead.HEADER)); final PreprocessedData dictionaries = parser.readValueAs(PreprocessedData.class); diff --git a/backend/src/main/java/com/bakdata/conquery/models/preproc/Preprocessor.java b/backend/src/main/java/com/bakdata/conquery/models/preproc/Preprocessor.java index 31c0c36ba1..a7908aa324 100644 --- a/backend/src/main/java/com/bakdata/conquery/models/preproc/Preprocessor.java +++ b/backend/src/main/java/com/bakdata/conquery/models/preproc/Preprocessor.java @@ -146,11 +146,11 @@ public static void preprocess(PreprocessingJob preprocessingJob, ProgressBar tot } try { - int primaryId = - (int) Objects.requireNonNull(primaryOut.createOutput(row, result.getPrimaryColumn(), lineId), "primaryId may not be null"); + String primaryId = + (String) Objects.requireNonNull(primaryOut.createOutput(row, result.getPrimaryColumn(), lineId), "primaryId may not be null"); - final int primary = result.addPrimary(primaryId); + final String primary = result.addPrimary(primaryId); final Object[] outRow = applyOutputs(outputs, columns, row, lineId); result.addRow(primary, columns, outRow); diff --git a/backend/src/main/java/com/bakdata/conquery/models/preproc/package-info.java b/backend/src/main/java/com/bakdata/conquery/models/preproc/package-info.java index b88132ae4f..5a9cdb7de7 100644 --- a/backend/src/main/java/com/bakdata/conquery/models/preproc/package-info.java +++ b/backend/src/main/java/com/bakdata/conquery/models/preproc/package-info.java @@ -4,13 +4,11 @@ *

* It is encoded as Smile/BinaryJson-format consisting of three documents: * - {@link com.bakdata.conquery.models.preproc.PreprocessedHeader}: metadata of the import. - * - {@link com.bakdata.conquery.models.preproc.PreprocessedDictionaries}: dictionary encoded strings for the import. * - {@link com.bakdata.conquery.models.preproc.PreprocessedData}: the description and raw representation of the data as {@link com.bakdata.conquery.models.events.stores.root.ColumnStore}. *

* The file is split into three sections, so we can load them progressively: * Initially, we just read the header and determine if it isn't already loaded, and also fits to the {@link com.bakdata.conquery.models.datasets.Table} it is supposed to go in. * We then submit an {@link com.bakdata.conquery.models.jobs.ImportJob} which will load the data. - * First the {@link com.bakdata.conquery.models.dictionary.Dictionary}s. Those are imported and are potentially altered or ingested into shared-Dictionaries (via {@link com.bakdata.conquery.models.datasets.Column#getSharedDictionary()}). *

* We then load the raw data, having claims for Dictionaries in the import resolved via {@link com.bakdata.conquery.io.jackson.serializer.NsIdRef}, which is why they need to be loaded in a second step. *

diff --git a/backend/src/main/java/com/bakdata/conquery/models/preproc/parser/specific/CompoundDateRangeParser.java b/backend/src/main/java/com/bakdata/conquery/models/preproc/parser/specific/CompoundDateRangeParser.java index 212fa1b202..a0d5fc02f7 100644 --- a/backend/src/main/java/com/bakdata/conquery/models/preproc/parser/specific/CompoundDateRangeParser.java +++ b/backend/src/main/java/com/bakdata/conquery/models/preproc/parser/specific/CompoundDateRangeParser.java @@ -2,7 +2,6 @@ import javax.validation.constraints.NotEmpty; -import com.bakdata.conquery.models.common.daterange.CDateRange; import com.bakdata.conquery.models.config.ConqueryConfig; import com.bakdata.conquery.models.events.stores.primitive.BitSetStore; import com.bakdata.conquery.models.events.stores.root.DateRangeStore; diff --git a/backend/src/main/java/com/bakdata/conquery/models/preproc/parser/specific/StringColumnValues.java b/backend/src/main/java/com/bakdata/conquery/models/preproc/parser/specific/StringColumnValues.java new file mode 100644 index 0000000000..7ad3bfcb14 --- /dev/null +++ b/backend/src/main/java/com/bakdata/conquery/models/preproc/parser/specific/StringColumnValues.java @@ -0,0 +1,4 @@ +package com.bakdata.conquery.models.preproc.parser.specific; + +public class StringColumnValues extends ListColumnValues { +} diff --git a/backend/src/main/java/com/bakdata/conquery/models/preproc/parser/specific/StringParser.java b/backend/src/main/java/com/bakdata/conquery/models/preproc/parser/specific/StringParser.java index a9f1ba5be1..7acf67e522 100644 --- a/backend/src/main/java/com/bakdata/conquery/models/preproc/parser/specific/StringParser.java +++ b/backend/src/main/java/com/bakdata/conquery/models/preproc/parser/specific/StringParser.java @@ -1,301 +1,67 @@ package com.bakdata.conquery.models.preproc.parser.specific; -import java.util.Comparator; -import java.util.EnumSet; -import java.util.IntSummaryStatistics; -import java.util.List; -import java.util.Objects; -import java.util.UUID; import java.util.regex.Pattern; -import java.util.stream.Collectors; -import com.bakdata.conquery.models.common.Range; import com.bakdata.conquery.models.config.ConqueryConfig; -import com.bakdata.conquery.models.datasets.Dataset; -import com.bakdata.conquery.models.dictionary.Dictionary; -import com.bakdata.conquery.models.dictionary.MapDictionary; import com.bakdata.conquery.models.events.EmptyStore; -import com.bakdata.conquery.models.events.stores.primitive.BitSetStore; -import com.bakdata.conquery.models.events.stores.root.IntegerStore; +import com.bakdata.conquery.models.events.stores.primitive.StringStoreString; import com.bakdata.conquery.models.events.stores.root.StringStore; -import com.bakdata.conquery.models.events.stores.specific.string.DictionaryStore; -import com.bakdata.conquery.models.events.stores.specific.string.EncodedStringStore; -import com.bakdata.conquery.models.events.stores.specific.string.EncodedStringStore.Encoding; -import com.bakdata.conquery.models.events.stores.specific.string.NumberStringStore; -import com.bakdata.conquery.models.events.stores.specific.string.PrefixSuffixStringStore; -import com.bakdata.conquery.models.events.stores.specific.string.SingletonStringStore; import com.bakdata.conquery.models.exceptions.ParsingException; import com.bakdata.conquery.models.preproc.parser.ColumnValues; import com.bakdata.conquery.models.preproc.parser.Parser; -import com.bakdata.conquery.util.dict.SuccinctTrie; -import com.google.common.base.Strings; -import io.dropwizard.util.DataSize; -import it.unimi.dsi.fastutil.ints.Int2ObjectMap; -import it.unimi.dsi.fastutil.ints.Int2ObjectOpenHashMap; -import it.unimi.dsi.fastutil.ints.IntOpenHashSet; -import it.unimi.dsi.fastutil.ints.IntSet; -import it.unimi.dsi.fastutil.objects.Object2IntMap; -import it.unimi.dsi.fastutil.objects.Object2IntOpenHashMap; import lombok.Getter; import lombok.SneakyThrows; import lombok.ToString; import lombok.extern.slf4j.Slf4j; -import org.apache.commons.lang3.StringUtils; /** * Analyze all strings for common suffix/prefix, or if they are singleton. - *

- * Values are stored DictionaryEncoded(Integer->String), Integers are stored using {@link IntegerParser}. */ @Slf4j @Getter -@ToString(callSuper = true, of = {"encoding", "prefix", "suffix"}) -public class StringParser extends Parser { +@ToString(callSuper = true) +public class StringParser extends Parser { private static final Pattern DIGITS = Pattern.compile("^\\d+$"); - private Object2IntMap strings = new Object2IntOpenHashMap<>(); - - //TODO FK: this field is not used at the moment, but we want to use it to prune unused values, this would mean cleaning up strings and allowing Dictionary to set a specific valuie, not just setting it. - private IntSet registered = new IntOpenHashSet(); - - private List decoded; - private Encoding encoding; - private String prefix; - private String suffix; - public StringParser(ConqueryConfig config) { super(config); } - /** - * It's either exactly `0`, or a string of digits, not starting with `0`, and no leading +-. - */ - public static boolean isOnlyDigits(String value) { - if (value.startsWith("0")) { - return value.length() == 1; - } - - return DIGITS.matcher(value).matches(); - } - - public NumberStringStore tryCreateNumberStringStore(ConqueryConfig config) { - - //check if the remaining strings are all numbers - final IntegerParser numberParser = new IntegerParser(config); - - try { - - for (String s : getStrings().keySet()) { - - // Ensure there are only digits and no other leading zeroes. - if (!isOnlyDigits(s)) { - return null; - } - - long parseInt = Integer.parseInt(s); - numberParser.addLine(parseInt); - } - } - catch (NumberFormatException e) { - return null; - } - - - numberParser.setLines(getLines()); - - /* - Do not use a number type if the range is much larger than the number if distinct values - e.g. if the column contains only 0 and 5M - */ - - final long span = numberParser.getMaxValue() - numberParser.getMinValue() + 1; - - if (span > getStrings().size()) { - return null; - } - - IntegerStore decision = numberParser.findBestType(); - - Int2ObjectMap inverse = new Int2ObjectOpenHashMap<>(getStrings().size()); - getStrings().forEach((key, value) -> inverse.putIfAbsent((int) value, key)); - - return new NumberStringStore(new Range.IntegerRange((int) numberParser.getMinValue(), (int) numberParser.getMaxValue()), decision, inverse); - } - @Override - protected Integer parseValue(String value) throws ParsingException { - return strings.computeIfAbsent(value, this::processSingleValue); + protected String parseValue(String value) throws ParsingException { + return value.intern(); } @Override - protected void registerValue(Integer v) { - registered.add(v.intValue()); - } - - public int processSingleValue(String value) { - //set longest common prefix and suffix - prefix = Strings.commonPrefix(value, Objects.requireNonNullElse(prefix, value)); - suffix = Strings.commonSuffix(value, Objects.requireNonNullElse(suffix, value)); + protected void registerValue(String v) { - //return next id - return strings.size(); } @Override protected StringStore decideType() { //check if a singleton type is enough - if (strings.isEmpty()) { + if (getLines() == 0) { return EmptyStore.INSTANCE; } - // Is this a singleton? - if (strings.size() == 1) { - SingletonStringStore type = new SingletonStringStore(strings.keySet().iterator().next(), BitSetStore.create(getLines())); - - return type; - } - - //remove prefix and suffix - if (!StringUtils.isEmpty(prefix) || !StringUtils.isEmpty(suffix)) { - stripPrefixSuffix(); - log.debug("Reduced strings by the '{}' prefix and '{}' suffix", prefix, suffix); - } - - decode(); - - StringStore result = decideStorageType(); - - //wrap in prefix suffix - if (!Strings.isNullOrEmpty(prefix) || !Strings.isNullOrEmpty(suffix)) { - result = new PrefixSuffixStringStore(result, Strings.nullToEmpty(prefix), Strings.nullToEmpty(suffix)); - } - - return result; + return StringStoreString.create(getLines()); } - private StringStore decideStorageType() { - NumberStringStore numberType = tryCreateNumberStringStore(getConfig()); - if (numberType != null) { - log.debug("Decided for {}", numberType); - return numberType; - } - - final String name = UUID.randomUUID().toString(); - - SuccinctTrie trie = new SuccinctTrie(Dataset.PLACEHOLDER, name); - - getDecoded().forEach(trie::add); - - final long mapTypeEstimate = MapDictionary.estimateMemoryConsumption(getStrings().size(), getDecoded().stream().mapToLong(s -> s.length).sum()); - - final Dictionary dictionary; - - if (trie.estimateMemoryConsumption() < mapTypeEstimate) { - trie.compress(); - dictionary = trie; - } - else { - dictionary = new MapDictionary(Dataset.PLACEHOLDER, name); - getDecoded().forEach(dictionary::add); - } - - final IntegerStore indexType = decideIndexType(); - - log.debug( - "Decided for {} and {} (est. {})", - dictionary, - indexType, - DataSize.megabytes(indexType.estimateMemoryConsumptionBytes() + dictionary.estimateMemoryConsumption()) - ); - - return new EncodedStringStore(new DictionaryStore(indexType, dictionary), getEncoding()); - } - - private void stripPrefixSuffix() { - Object2IntMap oldStrings = strings; - strings = new Object2IntOpenHashMap<>(oldStrings.size()); - int stripLeading = prefix.length(); - int stripTrailing = suffix.length(); - - for (Object2IntMap.Entry e : oldStrings.object2IntEntrySet()) { - strings.put( - e.getKey().substring(stripLeading, e.getKey().length() - stripTrailing), - e.getIntValue() - ); - - } - } - - /** - * Select the least memory intensive encoding and decode all values using it. - */ - private void decode() { - encoding = findEncoding(); - log.debug("\tChosen encoding is {}", encoding); - applyEncoding(encoding); - } - - /** - * Test all available encodings and of the ones that can decode all values, use the one using the least memory. - */ - private Encoding findEncoding() { - EnumSet bases = EnumSet.allOf(Encoding.class); - for (String value : strings.keySet()) { - - bases.removeIf(encoding -> !encoding.canEncode(value)); - - if (bases.size() == 1) { - return bases.iterator().next(); - } - - if (bases.isEmpty()) { - throw new IllegalStateException("No Encoding can encode the values."); - } - } - - return bases.stream() - .min(Encoding::compareTo) - .orElseThrow(() -> new IllegalStateException("No valid encoding.")); - - } - - public void applyEncoding(Encoding encoding) { - this.encoding = encoding; - decoded = strings.object2IntEntrySet().stream() - .sorted(Comparator.comparing(Object2IntMap.Entry::getIntValue)) - .map(entry -> encoding.encode(entry.getKey())) - .collect(Collectors.toList()); - } @Override - public void setValue(StringStore store, int event, Integer value) { + public void setValue(StringStore store, int event, String value) { store.setString(event, value); } @SneakyThrows @Override - public ColumnValues createColumnValues() { - return new IntegerColumnValues(); + public ColumnValues createColumnValues() { + return new StringColumnValues(); } - public IntegerStore decideIndexType() { - final IntegerParser indexParser = new IntegerParser(getConfig()); - - final IntSummaryStatistics indexStatistics = getStrings().values().intStream() - .summaryStatistics(); - - indexParser.setMaxValue(indexStatistics.getMax()); - indexParser.setMinValue(indexStatistics.getMin()); - - indexParser.setLines(getLines()); - indexParser.setNullLines(getNullLines()); - - - return indexParser.findBestType(); - } } diff --git a/backend/src/main/java/com/bakdata/conquery/models/query/DistributedExecutionManager.java b/backend/src/main/java/com/bakdata/conquery/models/query/DistributedExecutionManager.java index 9e47c3ddd8..7db39b2938 100644 --- a/backend/src/main/java/com/bakdata/conquery/models/query/DistributedExecutionManager.java +++ b/backend/src/main/java/com/bakdata/conquery/models/query/DistributedExecutionManager.java @@ -1,148 +1,107 @@ package com.bakdata.conquery.models.query; -import java.util.ArrayList; +import java.util.Collection; import java.util.List; -import java.util.UUID; -import java.util.concurrent.ExecutionException; +import java.util.Map; +import java.util.Set; +import java.util.concurrent.ConcurrentHashMap; import java.util.stream.Stream; -import com.bakdata.conquery.apiv1.query.QueryDescription; import com.bakdata.conquery.io.storage.MetaStorage; import com.bakdata.conquery.metrics.ExecutionMetrics; import com.bakdata.conquery.mode.cluster.ClusterState; import com.bakdata.conquery.models.auth.AuthorizationHelper; import com.bakdata.conquery.models.auth.entities.Group; -import com.bakdata.conquery.models.auth.entities.User; -import com.bakdata.conquery.models.config.ConqueryConfig; import com.bakdata.conquery.models.datasets.Dataset; -import com.bakdata.conquery.models.error.ConqueryError; import com.bakdata.conquery.models.execution.ExecutionState; import com.bakdata.conquery.models.execution.InternalExecution; import com.bakdata.conquery.models.execution.ManagedExecution; -import com.bakdata.conquery.models.identifiable.ids.specific.ManagedExecutionId; +import com.bakdata.conquery.models.identifiable.ids.specific.WorkerId; +import com.bakdata.conquery.models.messages.namespaces.specific.CancelQuery; import com.bakdata.conquery.models.query.results.EntityResult; import com.bakdata.conquery.models.query.results.ShardResult; import com.bakdata.conquery.models.worker.Namespace; -import com.google.common.cache.Cache; -import com.google.common.cache.CacheBuilder; -import com.google.common.cache.RemovalNotification; -import lombok.RequiredArgsConstructor; +import com.bakdata.conquery.models.worker.WorkerHandler; import lombok.SneakyThrows; import lombok.extern.slf4j.Slf4j; -@RequiredArgsConstructor @Slf4j -public class DistributedExecutionManager implements ExecutionManager { +public class DistributedExecutionManager extends ExecutionManager { - private final MetaStorage storage; - private final ClusterState clusterState; - - private final Cache>> executionResults = - CacheBuilder.newBuilder() - .softValues() - .removalListener(this::executionRemoved) - .build(); + public record DistributedResult(Map> results) implements Result { - /** - * Manage state of evicted Queries, setting them to NEW. - */ - private void executionRemoved(RemovalNotification> removalNotification) { - // If removal was done manually we assume it was also handled properly - if (!removalNotification.wasEvicted()) { - return; + public DistributedResult() { + this(new ConcurrentHashMap<>()); } - final ManagedExecutionId executionId = removalNotification.getKey(); - - log.warn("Evicted Results for Query[{}] (Reason: {})", executionId, removalNotification.getCause()); - - final ManagedExecution execution = storage.getExecution(executionId); - - // The query might already be deleted - if(execution != null) { - execution.reset(); + @Override + public Stream streamQueryResults() { + return results.values().stream().flatMap(Collection::stream); } } - @Override - public ManagedExecution runQuery(Namespace namespace, QueryDescription query, User user, Dataset submittedDataset, ConqueryConfig config, boolean system) { - final ManagedExecution execution = createExecution(query, user, submittedDataset, system); - execute(namespace, execution, config); + private final ClusterState clusterState; - return execution; + public DistributedExecutionManager(MetaStorage storage, ClusterState state) { + super(storage); + clusterState = state; } - @Override - public void execute(Namespace namespace, ManagedExecution execution, ConqueryConfig config) { - try { - execution.initExecutable(namespace, config); - } - catch (Exception e) { - // ConqueryErrors are usually user input errors so no need to log them at level=ERROR - if (e instanceof ConqueryError) { - log.warn("Failed to initialize Query[{}]", execution.getId(), e); - } - else { - log.error("Failed to initialize Query[{}]", execution.getId(), e); - } - - storage.removeExecution(execution.getId()); - throw e; - } - - log.info("Starting execution[{}]", execution.getQueryId()); - - execution.start(); - - - final String primaryGroupName = AuthorizationHelper.getPrimaryGroup(execution.getOwner(), storage).map(Group::getName).orElse("none"); - ExecutionMetrics.getRunningQueriesCounter(primaryGroupName).inc(); - - if (execution instanceof InternalExecution internalExecution) { - log.info("Executing Query[{}] in Dataset[{}]", execution.getQueryId(), namespace.getDataset().getId()); - clusterState.getWorkerHandlers().get(execution.getDataset().getId()).sendToAll(internalExecution.createExecutionMessage()); - } - } @Override - public ManagedExecution createExecution(QueryDescription query, User user, Dataset submittedDataset, boolean system) { - return createQuery(query, UUID.randomUUID(), user, submittedDataset, system); - } + protected void doExecute(Namespace namespace, InternalExecution internalExecution) { + ManagedExecution execution = (ManagedExecution & InternalExecution) internalExecution; + log.info("Executing Query[{}] in Dataset[{}]", execution.getQueryId(), namespace.getDataset().getId()); - // Visible for testing - public ManagedExecution createQuery(QueryDescription query, UUID queryId, User user, Dataset submittedDataset, boolean system) { - // Transform the submitted query into an initialized execution - ManagedExecution managed = query.toManagedExecution(user, submittedDataset, storage); - managed.setSystem(system); - managed.setQueryId(queryId); + final WorkerHandler workerHandler = getWorkerHandler(execution); - // Store the execution - storage.addExecution(managed); + workerHandler.sendToAll(internalExecution.createExecutionMessage()); + } - return managed; + private WorkerHandler getWorkerHandler(ManagedExecution execution) { + return clusterState.getWorkerHandlers() + .get(execution.getDataset().getId()); } /** * Receive part of query result and store into query. * - * @param result + * @implNote subQueries of Forms are managed by the form itself, so need to be passed from outside. */ - public > void handleQueryResult(R result) { + @SneakyThrows + public > void handleQueryResult(R result, E query) { - final ManagedExecutionId executionId = result.getQueryId(); - final E query = (E) storage.getExecution(executionId); + + log.debug("Received Result[size={}] for Query[{}]", result.getResults().size(), result.getQueryId()); + log.trace("Received Result\n{}", result.getResults()); if (query.getState() != ExecutionState.RUNNING) { + log.warn("Received result for Query[{}] that is not RUNNING but {}", query.getId(), query.getState()); return; } - query.addResult(result); + if (result.getError().isPresent()) { + query.fail(result.getError().get()); + } + else { + + // We don't collect all results together into a fat list as that would cause lots of huge re-allocations for little gain. + final DistributedResult results = getResult(query, DistributedResult::new); + results.results.put(result.getWorkerId(), result.getResults()); + + final Set finishedWorkers = results.results.keySet(); + + // If all known workers have returned a result, the query is DONE. + if (finishedWorkers.equals(getWorkerHandler(query).getAllWorkerIds())) { + query.finish(ExecutionState.DONE); + } + } // State changed to DONE or FAILED if (query.getState() != ExecutionState.RUNNING) { - final String primaryGroupName = AuthorizationHelper.getPrimaryGroup(query.getOwner(), storage).map(Group::getName).orElse("none"); + final String primaryGroupName = AuthorizationHelper.getPrimaryGroup(query.getOwner(), getStorage()).map(Group::getName).orElse("none"); ExecutionMetrics.getRunningQueriesCounter(primaryGroupName).dec(); ExecutionMetrics.getQueryStateCounter(query.getState(), primaryGroupName).inc(); @@ -150,44 +109,17 @@ public /* This log is here to prevent an NPE which could occur when no strong reference to result.getResults() existed anymore after the query finished and immediately was reset */ - log.trace("Collected metrics for execution {}. Last result received: {}:", executionId, result.getResults()); + log.trace("Collected metrics for execution {}. Last result received: {}:", result.getQueryId(), result.getResults()); } } - - /** - * Register another result for the execution. - */ - - @SneakyThrows(ExecutionException.class) // can only occur if ArrayList::new fails which is unlikely and would have other problems also - public void addQueryResult(ManagedExecution execution, List queryResults) { - // We don't collect all results together into a fat list as that would cause lots of huge re-allocations for little gain. - executionResults.get(execution.getId(), ArrayList::new) - .add(queryResults); - } - - /** - * Discard the query's results. - */ - @Override - public void clearQueryResults(ManagedExecution execution) { - executionResults.invalidate(execution.getId()); - } - - @Override - public Stream streamQueryResults(ManagedExecution execution) { - final List> resultParts = executionResults.getIfPresent(execution.getId()); - - return resultParts == null - ? Stream.empty() - : resultParts.stream().flatMap(List::stream); - - } - @Override public void cancelQuery(Dataset dataset, ManagedExecution query) { + log.debug("Sending cancel message to all workers."); + query.cancel(); + getWorkerHandler(query).sendToAll(new CancelQuery(query.getId())); } } diff --git a/backend/src/main/java/com/bakdata/conquery/models/query/ExecutionManager.java b/backend/src/main/java/com/bakdata/conquery/models/query/ExecutionManager.java index 7635da9705..4f560a0cbf 100644 --- a/backend/src/main/java/com/bakdata/conquery/models/query/ExecutionManager.java +++ b/backend/src/main/java/com/bakdata/conquery/models/query/ExecutionManager.java @@ -1,33 +1,151 @@ package com.bakdata.conquery.models.query; +import java.util.UUID; +import java.util.concurrent.Callable; +import java.util.concurrent.ExecutionException; import java.util.stream.Stream; import com.bakdata.conquery.apiv1.query.QueryDescription; +import com.bakdata.conquery.io.storage.MetaStorage; +import com.bakdata.conquery.metrics.ExecutionMetrics; +import com.bakdata.conquery.models.auth.AuthorizationHelper; +import com.bakdata.conquery.models.auth.entities.Group; import com.bakdata.conquery.models.auth.entities.User; import com.bakdata.conquery.models.config.ConqueryConfig; import com.bakdata.conquery.models.datasets.Dataset; +import com.bakdata.conquery.models.error.ConqueryError; +import com.bakdata.conquery.models.execution.InternalExecution; import com.bakdata.conquery.models.execution.ManagedExecution; +import com.bakdata.conquery.models.identifiable.ids.specific.ManagedExecutionId; import com.bakdata.conquery.models.query.results.EntityResult; import com.bakdata.conquery.models.worker.Namespace; +import com.google.common.cache.Cache; +import com.google.common.cache.CacheBuilder; +import com.google.common.cache.RemovalNotification; +import lombok.Data; +import lombok.extern.slf4j.Slf4j; -public interface ExecutionManager { +@Data +@Slf4j +public abstract class ExecutionManager { - ManagedExecution runQuery(Namespace namespace, QueryDescription query, User user, Dataset submittedDataset, ConqueryConfig config, boolean system); + public interface Result { + Stream streamQueryResults(); + } - void execute(Namespace namespace, ManagedExecution execution, ConqueryConfig config); + private final MetaStorage storage; - ManagedExecution createExecution(QueryDescription query, User user, Dataset submittedDataset, boolean system); - - void cancelQuery(final Dataset dataset, final ManagedExecution query); + private final Cache executionResults = + CacheBuilder.newBuilder() + .softValues() + .removalListener(this::executionRemoved) + .build(); /** - * Discard the query's results. + * Manage state of evicted Queries, setting them to NEW. */ - void clearQueryResults(ManagedExecution execution); + private void executionRemoved(RemovalNotification removalNotification) { + // If removal was done manually we assume it was also handled properly + if (!removalNotification.wasEvicted()) { + return; + } - /** - * Stream the results of the query, if available. - */ - Stream streamQueryResults(ManagedExecution execution); + final ManagedExecutionId executionId = removalNotification.getKey(); + + log.warn("Evicted Results for Query[{}] (Reason: {})", executionId, removalNotification.getCause()); + + final ManagedExecution execution = getExecution(executionId); + + // The query might already be deleted + if (execution != null) { + execution.reset(); + } + } + + + public ManagedExecution getExecution(ManagedExecutionId execution) { + return storage.getExecution(execution); + } + + protected R getResult(ManagedExecution execution, Callable defaultProvider) throws ExecutionException { + return executionResults.get(execution.getId(), defaultProvider); + } + + protected void addResult(ManagedExecution execution, R result) { + executionResults.put(execution.getId(), result); + } + + public final ManagedExecution runQuery(Namespace namespace, QueryDescription query, User user, Dataset submittedDataset, ConqueryConfig config, boolean system) { + final ManagedExecution execution = createExecution(query, user, submittedDataset, system); + execute(namespace, execution, config); + + return execution; + } + + + public final void execute(Namespace namespace, ManagedExecution execution, ConqueryConfig config) { + + clearQueryResults(execution); + + try { + execution.initExecutable(namespace, config); + } + catch (Exception e) { + // ConqueryErrors are usually user input errors so no need to log them at level=ERROR + if (e instanceof ConqueryError) { + log.warn("Failed to initialize Query[{}]", execution.getId(), e); + } + else { + log.error("Failed to initialize Query[{}]", execution.getId(), e); + } + + storage.removeExecution(execution.getId()); + throw e; + } + + log.info("Starting execution[{}]", execution.getQueryId()); + + execution.start(); + + final String primaryGroupName = AuthorizationHelper.getPrimaryGroup(execution.getOwner(), storage).map(Group::getName).orElse("none"); + ExecutionMetrics.getRunningQueriesCounter(primaryGroupName).inc(); + + if (execution instanceof InternalExecution internalExecution) { + doExecute(namespace, internalExecution); + } + } + + protected abstract void doExecute(Namespace namespace, InternalExecution execution); + + // Visible for testing + public final ManagedExecution createExecution(QueryDescription query, User user, Dataset submittedDataset, boolean system) { + return createQuery(query, UUID.randomUUID(), user, submittedDataset, system); + } + + public final ManagedExecution createQuery(QueryDescription query, UUID queryId, User user, Dataset submittedDataset, boolean system) { + // Transform the submitted query into an initialized execution + ManagedExecution managed = query.toManagedExecution(user, submittedDataset, storage); + managed.setSystem(system); + managed.setQueryId(queryId); + + // Store the execution + storage.addExecution(managed); + + return managed; + } + + public abstract void cancelQuery(final Dataset dataset, final ManagedExecution query); + + public void clearQueryResults(ManagedExecution execution) { + executionResults.invalidate(execution.getId()); + } + + public Stream streamQueryResults(ManagedExecution execution) { + final R resultParts = executionResults.getIfPresent(execution.getId()); + + return resultParts == null + ? Stream.empty() + : resultParts.streamQueryResults(); + } } diff --git a/backend/src/main/java/com/bakdata/conquery/models/query/FilterSearch.java b/backend/src/main/java/com/bakdata/conquery/models/query/FilterSearch.java index fabf329c07..46273c979f 100644 --- a/backend/src/main/java/com/bakdata/conquery/models/query/FilterSearch.java +++ b/backend/src/main/java/com/bakdata/conquery/models/query/FilterSearch.java @@ -1,6 +1,7 @@ package com.bakdata.conquery.models.query; import java.util.ArrayList; +import java.util.Collection; import java.util.HashMap; import java.util.List; import java.util.Map; @@ -8,30 +9,23 @@ import java.util.stream.Collectors; import com.bakdata.conquery.apiv1.frontend.FrontendValue; -import com.bakdata.conquery.io.storage.NamespaceStorage; -import com.bakdata.conquery.models.config.CSVConfig; import com.bakdata.conquery.models.config.IndexConfig; import com.bakdata.conquery.models.datasets.concepts.Searchable; import com.bakdata.conquery.models.datasets.concepts.filters.specific.SelectFilter; -import com.bakdata.conquery.models.jobs.JobManager; -import com.bakdata.conquery.models.jobs.SimpleJob; -import com.bakdata.conquery.models.jobs.UpdateFilterSearchJob; import com.bakdata.conquery.util.search.TrieSearch; import com.fasterxml.jackson.annotation.JsonIgnore; import it.unimi.dsi.fastutil.objects.Object2LongMap; -import it.unimi.dsi.fastutil.objects.Object2LongMaps; import it.unimi.dsi.fastutil.objects.Object2LongOpenHashMap; -import lombok.Data; +import lombok.Getter; +import lombok.RequiredArgsConstructor; import lombok.extern.slf4j.Slf4j; @Slf4j -@Data +@RequiredArgsConstructor public class FilterSearch { - private final NamespaceStorage storage; - private final JobManager jobManager; - private final CSVConfig parserConfig; + @Getter private final IndexConfig indexConfig; /** @@ -40,8 +34,8 @@ public class FilterSearch { * In the code below, the keys of this map will usually be called "reference". */ @JsonIgnore - private final Map, TrieSearch> searchCache = new HashMap<>(); - private Object2LongMap> totals = Object2LongMaps.emptyMap(); + private Map> searchCache = new HashMap<>(); + private Object2LongMap> totals = new Object2LongOpenHashMap<>(); /** * From a given {@link FrontendValue} extract all relevant keywords. @@ -62,11 +56,11 @@ public static List extractKeywords(FrontendValue value) { /** * For a {@link SelectFilter} collect all relevant {@link TrieSearch}. */ - public final List> getSearchesFor(Searchable searchable) { - final List> references = searchable.getSearchReferences(); + public final List> getSearchesFor(SelectFilter searchable) { + final List references = searchable.getSearchReferences(); - if(log.isTraceEnabled()) { - log.trace("Got {} as searchables for {}", references.stream().map(Searchable::getId).collect(Collectors.toList()), searchable.getId()); + if (log.isTraceEnabled()) { + log.trace("Got {} as searchables for {}", references.stream().map(Searchable::toString).collect(Collectors.toList()), searchable.getId()); } return references.stream() @@ -75,19 +69,54 @@ public final List> getSearchesFor(Searchable search .collect(Collectors.toList()); } - public long getTotal(Searchable searchable) { - return totals.getOrDefault(searchable, 0); + public long getTotal(SelectFilter filter) { + return totals.computeIfAbsent(filter, (f) -> filter.getSearchReferences().stream() + .map(searchCache::get) + .flatMap(TrieSearch::stream) + .distinct() + .count()); } /** - * Scan all SelectFilters and submit {@link SimpleJob}s to create interactive searches for them. + * Add ready searches to the cache. This assumes that the search already has been shrunken. */ - public void updateSearch() { + public synchronized void addSearches(Map> searchCache) { - totals = new Object2LongOpenHashMap<>(); + this.searchCache.putAll(searchCache); + } + + + /** + * Adds new values to a search. If there is no search yet for the searchable, it is created. + * In order for this to work an existing search is not allowed to be shrunken yet, because shrinking + * prevents from adding new values. + */ + public void registerValues(Searchable searchable, Collection values) { + TrieSearch search = searchCache.computeIfAbsent(searchable, (ignored) -> searchable.createTrieSearch(indexConfig)); - jobManager.addSlowJob(new UpdateFilterSearchJob(storage, searchCache, indexConfig, totals)); + synchronized (search) { + values.stream() + .map(value -> new FrontendValue(value, value)) + .forEach(value -> search.addItem(value, extractKeywords(value))); + } } + /** + * Shrink the memory footprint of a search. After this action, no values can be registered anymore to a search. + */ + public void shrinkSearch(Searchable searchable) { + final TrieSearch search = searchCache.get(searchable); + + if (search == null) { + log.warn("Searchable has no search associated: {}", searchable); + return; + } + search.shrinkToFit(); + } + + public synchronized void clearSearch() { + totals = new Object2LongOpenHashMap<>(); + searchCache = new HashMap<>(); + } } diff --git a/backend/src/main/java/com/bakdata/conquery/models/query/ManagedQuery.java b/backend/src/main/java/com/bakdata/conquery/models/query/ManagedQuery.java index 6a1638f9a8..d1215721e5 100644 --- a/backend/src/main/java/com/bakdata/conquery/models/query/ManagedQuery.java +++ b/backend/src/main/java/com/bakdata/conquery/models/query/ManagedQuery.java @@ -1,12 +1,9 @@ package com.bakdata.conquery.models.query; -import java.util.Collections; import java.util.List; import java.util.OptionalLong; -import java.util.Set; import java.util.concurrent.atomic.AtomicLong; import java.util.function.Consumer; -import java.util.stream.Collectors; import java.util.stream.Stream; import com.bakdata.conquery.apiv1.execution.ExecutionStatus; @@ -25,15 +22,11 @@ import com.bakdata.conquery.models.execution.ExecutionState; import com.bakdata.conquery.models.execution.InternalExecution; import com.bakdata.conquery.models.execution.ManagedExecution; -import com.bakdata.conquery.models.identifiable.ids.specific.WorkerId; import com.bakdata.conquery.models.messages.namespaces.WorkerMessage; -import com.bakdata.conquery.models.messages.namespaces.specific.CancelQuery; import com.bakdata.conquery.models.messages.namespaces.specific.ExecuteQuery; import com.bakdata.conquery.models.query.resultinfo.ResultInfo; import com.bakdata.conquery.models.query.results.EntityResult; import com.bakdata.conquery.models.query.results.ShardResult; -import com.bakdata.conquery.models.worker.DistributedNamespace; -import com.bakdata.conquery.models.worker.WorkerInformation; import com.bakdata.conquery.util.QueryUtils; import com.fasterxml.jackson.annotation.JacksonInject; import com.fasterxml.jackson.annotation.JsonIgnore; @@ -58,8 +51,6 @@ public class ManagedQuery extends ManagedExecution implements EditorQuery, Singl */ private Long lastResultCount; - @JsonIgnore - private transient Set involvedWorkers; @JsonIgnore private transient List columnDescriptions; @@ -78,28 +69,10 @@ protected void doInitExecutable() { query.resolve(new QueryResolveContext(getNamespace(), getConfig(), getStorage(), null)); } - @Override - public void addResult(ShardResult result) { - log.debug("Received Result[size={}] for Query[{}]", result.getResults().size(), result.getQueryId()); - - log.trace("Received Result\n{}", result.getResults()); - - if (result.getError().isPresent()) { - fail(result.getError().get()); - return; - } - - involvedWorkers.remove(result.getWorkerId()); - - getNamespace().getExecutionManager().addQueryResult(this, result.getResults()); - - if (involvedWorkers.isEmpty() && getState() == ExecutionState.RUNNING) { - finish(ExecutionState.DONE); - } - } @Override - protected void finish(ExecutionState executionState) { + public void finish(ExecutionState executionState) { + //TODO this is not optimal with SQLExecutionService as this might fully evaluate the query. lastResultCount = query.countResults(streamResults(OptionalLong.empty())); super.finish(executionState); @@ -127,13 +100,7 @@ public long resultRowCount() { return lastResultCount; } - @Override - public void start() { - super.start(); - involvedWorkers = Collections.synchronizedSet(getNamespace().getWorkerHandler().getWorkers().stream() - .map(WorkerInformation::getId) - .collect(Collectors.toSet())); - } + @Override public void setStatusBase(@NonNull Subject subject, @NonNull ExecutionStatus status) { @@ -143,7 +110,7 @@ public void setStatusBase(@NonNull Subject subject, @NonNull ExecutionStatus sta protected void setAdditionalFieldsForStatusWithColumnDescription(Subject subject, FullExecutionStatus status) { if (columnDescriptions == null) { - columnDescriptions = generateColumnDescriptions(isInitialized(), getNamespace(), getConfig()); + columnDescriptions = generateColumnDescriptions(isInitialized(), getConfig()); } status.setColumnDescriptions(columnDescriptions); } @@ -161,9 +128,7 @@ public void reset() { @Override public void cancel() { - log.debug("Sending cancel message to all workers."); - getNamespace().getWorkerHandler().sendToAll(new CancelQuery(getId())); } @Override @@ -196,8 +161,4 @@ public void visit(Consumer visitor) { query.visit(visitor); } - public DistributedNamespace getNamespace() { - return (DistributedNamespace) super.getNamespace(); - } - } diff --git a/backend/src/main/java/com/bakdata/conquery/models/query/QueryExecutionContext.java b/backend/src/main/java/com/bakdata/conquery/models/query/QueryExecutionContext.java index 91ec64291f..a6698b8f48 100644 --- a/backend/src/main/java/com/bakdata/conquery/models/query/QueryExecutionContext.java +++ b/backend/src/main/java/com/bakdata/conquery/models/query/QueryExecutionContext.java @@ -2,30 +2,21 @@ import java.time.LocalDate; import java.util.List; -import java.util.Map; import java.util.Optional; -import java.util.concurrent.ConcurrentHashMap; import com.bakdata.conquery.io.storage.ModificationShieldedWorkerStorage; import com.bakdata.conquery.models.common.CDate; import com.bakdata.conquery.models.common.CDateSet; -import com.bakdata.conquery.models.datasets.Column; -import com.bakdata.conquery.models.datasets.Import; import com.bakdata.conquery.models.datasets.SecondaryIdDescription; import com.bakdata.conquery.models.datasets.Table; import com.bakdata.conquery.models.datasets.concepts.Connector; import com.bakdata.conquery.models.datasets.concepts.ValidityDate; import com.bakdata.conquery.models.events.Bucket; import com.bakdata.conquery.models.events.BucketManager; -import com.bakdata.conquery.models.events.stores.root.StringStore; import com.bakdata.conquery.models.identifiable.ids.specific.ManagedExecutionId; import com.bakdata.conquery.models.identifiable.ids.specific.SecondaryIdDescriptionId; import com.bakdata.conquery.models.query.entity.Entity; import com.bakdata.conquery.models.query.queryplan.aggregators.Aggregator; -import groovy.lang.Tuple3; -import it.unimi.dsi.fastutil.ints.IntOpenHashSet; -import it.unimi.dsi.fastutil.ints.IntSet; -import lombok.AccessLevel; import lombok.AllArgsConstructor; import lombok.Getter; import lombok.NonNull; @@ -52,28 +43,8 @@ public class QueryExecutionContext { @NonNull private Optional> queryDateAggregator = Optional.empty(); - @Getter(AccessLevel.NONE) - private final Map, IntSet> multiSelectValuesCache = new ConcurrentHashMap<>(); - private static IntSet findIds(Column column, Bucket bucket, String[] values) { - final IntSet selectedValues = new IntOpenHashSet(); - - final StringStore type = (StringStore) bucket.getStore(column); - - for (final String select : values) { - final int parsed = type.getId(select); - - selectedValues.add(parsed); - } - - return selectedValues; - } - - public IntSet getIdsFor(Column column, Bucket bucket, String[] values) { - return multiSelectValuesCache.computeIfAbsent(new Tuple3<>(column, bucket.getImp(), values), (ignored) -> findIds(column, bucket, values)); - } - /** * Only set when in {@link com.bakdata.conquery.models.query.queryplan.SecondaryIdQueryPlan}, to the selected {@link SecondaryIdDescriptionId}. */ diff --git a/backend/src/main/java/com/bakdata/conquery/models/query/RequiredEntities.java b/backend/src/main/java/com/bakdata/conquery/models/query/RequiredEntities.java index d3ddd9de0c..6d44d86cf4 100644 --- a/backend/src/main/java/com/bakdata/conquery/models/query/RequiredEntities.java +++ b/backend/src/main/java/com/bakdata/conquery/models/query/RequiredEntities.java @@ -1,15 +1,12 @@ package com.bakdata.conquery.models.query; import java.util.Collection; -import java.util.Objects; +import java.util.HashSet; import java.util.Set; import java.util.stream.Collectors; import com.bakdata.conquery.models.events.BucketManager; import com.bakdata.conquery.models.query.entity.Entity; -import it.unimi.dsi.fastutil.ints.Int2ObjectMap; -import it.unimi.dsi.fastutil.ints.IntOpenHashSet; -import it.unimi.dsi.fastutil.ints.IntSet; import lombok.NonNull; import lombok.ToString; @@ -19,35 +16,36 @@ @ToString(onlyExplicitlyIncluded = true) public final class RequiredEntities { - private final IntSet entities; + private final Set entities; public RequiredEntities() { - this(new IntOpenHashSet()); + this(new HashSet<>()); } - public RequiredEntities(Collection entities) { - this.entities = new IntOpenHashSet(entities); + public RequiredEntities(Collection entities) { + this.entities = new HashSet<>(entities); } public RequiredEntities intersect(@NonNull RequiredEntities other) { - final IntOpenHashSet out = new IntOpenHashSet(entities); + final Set out = new HashSet<>(entities); out.retainAll(other.entities); return new RequiredEntities(out); } public RequiredEntities union(@NonNull RequiredEntities other) { - final IntOpenHashSet out = new IntOpenHashSet(entities); + final Set out = new HashSet<>(entities); out.addAll(other.entities); return new RequiredEntities(out); } public Set resolve(BucketManager bucketManager) { - final Int2ObjectMap all = bucketManager.getEntities(); - return entities.intStream() - .mapToObj(all::get) - .filter(Objects::nonNull) + final Set all = bucketManager.getEntities(); + return entities.stream() + .filter(all::contains) + // The following is just a wrapping, that is later unwrapped again in an execution + .map(Entity::new) .collect(Collectors.toSet()); } } diff --git a/backend/src/main/java/com/bakdata/conquery/models/query/SingleTableResult.java b/backend/src/main/java/com/bakdata/conquery/models/query/SingleTableResult.java index 64fd65be1d..87f7c625d9 100644 --- a/backend/src/main/java/com/bakdata/conquery/models/query/SingleTableResult.java +++ b/backend/src/main/java/com/bakdata/conquery/models/query/SingleTableResult.java @@ -18,13 +18,13 @@ public interface SingleTableResult { - default List generateColumnDescriptions(boolean isInitialized, Namespace namespace, ConqueryConfig config) { + default List generateColumnDescriptions(boolean isInitialized, ConqueryConfig config) { Preconditions.checkArgument(isInitialized, "The execution must have been initialized first"); List columnDescriptions = new ArrayList<>(); final Locale locale = I18n.LOCALE.get(); - PrintSettings settings = new PrintSettings(true, locale, namespace, config, null); + PrintSettings settings = new PrintSettings(true, locale, getNamespace(), config, null); UniqueNamer uniqNamer = new UniqueNamer(settings); @@ -53,4 +53,8 @@ default List generateColumnDescriptions(boolean isInitialized, @JsonIgnore long resultRowCount(); + @JsonIgnore + Namespace getNamespace(); + + } diff --git a/backend/src/main/java/com/bakdata/conquery/models/query/entity/Entity.java b/backend/src/main/java/com/bakdata/conquery/models/query/entity/Entity.java index f304ec1024..c27a01ef13 100644 --- a/backend/src/main/java/com/bakdata/conquery/models/query/entity/Entity.java +++ b/backend/src/main/java/com/bakdata/conquery/models/query/entity/Entity.java @@ -13,12 +13,6 @@ @Data @RequiredArgsConstructor(onConstructor_ = @JsonCreator) public class Entity { - private final int id; + private final String id; - /** - * Calculate the bucket of the {@link Entity::getId}. Used for distributing partitions of the data to {@link com.bakdata.conquery.models.worker.Worker}s - */ - public static int getBucket(int entityId, int entityBucketSize) { - return entityId / entityBucketSize; - } } diff --git a/backend/src/main/java/com/bakdata/conquery/models/query/filter/event/MultiSelectFilterNode.java b/backend/src/main/java/com/bakdata/conquery/models/query/filter/event/MultiSelectFilterNode.java index 4b9f778a90..0086e28c49 100644 --- a/backend/src/main/java/com/bakdata/conquery/models/query/filter/event/MultiSelectFilterNode.java +++ b/backend/src/main/java/com/bakdata/conquery/models/query/filter/event/MultiSelectFilterNode.java @@ -8,11 +8,9 @@ import com.bakdata.conquery.models.datasets.Column; import com.bakdata.conquery.models.datasets.Table; import com.bakdata.conquery.models.events.Bucket; -import com.bakdata.conquery.models.events.stores.root.StringStore; import com.bakdata.conquery.models.query.QueryExecutionContext; import com.bakdata.conquery.models.query.entity.Entity; import com.bakdata.conquery.models.query.queryplan.filter.EventFilterNode; -import it.unimi.dsi.fastutil.ints.IntSet; import lombok.Getter; import lombok.Setter; import lombok.ToString; @@ -30,34 +28,31 @@ public class MultiSelectFilterNode extends EventFilterNode { @Setter private Column column; - private final boolean empty; - - private IntSet selectedValues; - private QueryExecutionContext context; + private boolean empty; + + private Set selectedValues; public MultiSelectFilterNode(Column column, String[] filterValue) { super(filterValue); this.column = column; - empty = Arrays.stream(filterValue).anyMatch(Strings::isEmpty); + setFilterValue(filterValue); } @Override public void init(Entity entity, QueryExecutionContext context) { super.init(entity, context); - this.context = context; - selectedValues = null; } @Override public void setFilterValue(String[] strings) { - selectedValues = null; super.setFilterValue(strings); + selectedValues = Set.of(strings); + empty = Arrays.stream(filterValue).anyMatch(Strings::isEmpty); } @Override public void nextBlock(Bucket bucket) { - selectedValues = context.getIdsFor(column, bucket, filterValue); } @@ -65,28 +60,27 @@ public void nextBlock(Bucket bucket) { @Override public boolean checkEvent(Bucket bucket, int event) { - if(selectedValues == null){ - throw new IllegalStateException("No selected values were set."); - } if (!bucket.has(event, getColumn())) { return empty; } - final int stringId = bucket.getString(event, getColumn()); + final String stringToken = bucket.getString(event, getColumn()); - return selectedValues.contains(stringId); + return selectedValues.contains(stringToken); } @Override public boolean isOfInterest(Bucket bucket) { - for (String selected : getFilterValue()) { - if(((StringStore) bucket.getStores()[getColumn().getPosition()]).getId(selected) != -1) { - return true; - } - } - - return false; + //TODO +// for (String selected : getFilterValue()) { +// StringStore stringStore = (StringStore) bucket.getStores()[getColumn().getPosition()]; +// if(stringStore.getId(selected) != -1) { +// return true; +// } +// } + + return true; } @Override diff --git a/backend/src/main/java/com/bakdata/conquery/models/query/filter/event/PrefixTextFilterNode.java b/backend/src/main/java/com/bakdata/conquery/models/query/filter/event/PrefixTextFilterNode.java index ab1ac047a4..7a81eb4b8a 100644 --- a/backend/src/main/java/com/bakdata/conquery/models/query/filter/event/PrefixTextFilterNode.java +++ b/backend/src/main/java/com/bakdata/conquery/models/query/filter/event/PrefixTextFilterNode.java @@ -42,10 +42,8 @@ public boolean checkEvent(Bucket bucket, int event) { return false; } - final int id = store.getString(event); - String value = store.getElement(id); + final String value = store.getString(event); - //if performance is a problem we could find the filterValue once in the dictionary and then only check the values return value.startsWith(filterValue); } diff --git a/backend/src/main/java/com/bakdata/conquery/models/query/filter/event/SelectFilterNode.java b/backend/src/main/java/com/bakdata/conquery/models/query/filter/event/SelectFilterNode.java index 2af64b1942..fba41540d9 100644 --- a/backend/src/main/java/com/bakdata/conquery/models/query/filter/event/SelectFilterNode.java +++ b/backend/src/main/java/com/bakdata/conquery/models/query/filter/event/SelectFilterNode.java @@ -1,5 +1,6 @@ package com.bakdata.conquery.models.query.filter.event; +import java.util.Objects; import java.util.Set; import javax.validation.constraints.NotNull; @@ -7,7 +8,6 @@ import com.bakdata.conquery.models.datasets.Column; import com.bakdata.conquery.models.datasets.Table; import com.bakdata.conquery.models.events.Bucket; -import com.bakdata.conquery.models.events.stores.root.StringStore; import com.bakdata.conquery.models.query.queryplan.filter.EventFilterNode; import lombok.Getter; import lombok.Setter; @@ -22,7 +22,6 @@ public class SelectFilterNode extends EventFilterNode { private final boolean empty; - private int selectedId = -1; @NotNull @Getter @Setter @@ -38,7 +37,6 @@ public SelectFilterNode(Column column, String filterValue) { @Override public void nextBlock(Bucket bucket) { // You can skip the block if the id is -1 - selectedId = ((StringStore) bucket.getStore(getColumn())).getId(filterValue); } @Override @@ -49,19 +47,17 @@ public boolean checkEvent(Bucket bucket, int event) { return true; } - if (selectedId == -1 || !has) { - return false; - } - final int value = bucket.getString(event, getColumn()); + final String value = bucket.getString(event, getColumn()); - return value == selectedId; + return Objects.equals(value, filterValue); } - @Override - public boolean isOfInterest(Bucket bucket) { - return empty || ((StringStore) bucket.getStores()[getColumn().getPosition()]).getId(filterValue) != -1; - } + //TODO +// @Override +// public boolean isOfInterest(Bucket bucket) { +// return empty || ((StringStore) bucket.getStores()[getColumn().getPosition()]).getId(filterValue) != -1; +// } @Override public void collectRequiredTables(Set

requiredTables) { diff --git a/backend/src/main/java/com/bakdata/conquery/models/query/preview/EntityPreviewExecution.java b/backend/src/main/java/com/bakdata/conquery/models/query/preview/EntityPreviewExecution.java index 40eb592659..df62608dfb 100644 --- a/backend/src/main/java/com/bakdata/conquery/models/query/preview/EntityPreviewExecution.java +++ b/backend/src/main/java/com/bakdata/conquery/models/query/preview/EntityPreviewExecution.java @@ -42,7 +42,6 @@ import com.bakdata.conquery.models.query.results.MultilineEntityResult; import com.bakdata.conquery.models.types.ResultType; import com.bakdata.conquery.models.types.SemanticType; -import com.bakdata.conquery.models.worker.Namespace; import com.fasterxml.jackson.annotation.JacksonInject; import com.fasterxml.jackson.annotation.JsonIgnore; import com.fasterxml.jackson.annotation.OptBoolean; @@ -397,12 +396,12 @@ private Object[] getCompleteLine(EntityResult entityResult) { } protected void setAdditionalFieldsForStatusWithColumnDescription(Subject subject, FullExecutionStatus status) { - status.setColumnDescriptions(generateColumnDescriptions(isInitialized(), getNamespace(), getConfig())); + status.setColumnDescriptions(generateColumnDescriptions(isInitialized(), getConfig())); } @Override - public List generateColumnDescriptions(boolean isInitialized, Namespace namespace, ConqueryConfig config) { - final List descriptors = getValuesQuery().generateColumnDescriptions(isInitialized, namespace, config); + public List generateColumnDescriptions(boolean isInitialized, ConqueryConfig config) { + final List descriptors = getValuesQuery().generateColumnDescriptions(isInitialized, config); for (ColumnDescriptor descriptor : descriptors) { // Add grouping semantics to secondaryIds to group by @@ -432,7 +431,7 @@ private ManagedQuery getValuesQuery() { @Override protected void setAdditionalFieldsForStatusWithSource(Subject subject, FullExecutionStatus status) { - status.setColumnDescriptions(generateColumnDescriptions(isInitialized(), getNamespace(), getConfig())); + status.setColumnDescriptions(generateColumnDescriptions(isInitialized(), getConfig())); } @Override diff --git a/backend/src/main/java/com/bakdata/conquery/models/query/queryplan/SecondaryIdQueryPlan.java b/backend/src/main/java/com/bakdata/conquery/models/query/queryplan/SecondaryIdQueryPlan.java index e1398e20df..26591d05f5 100644 --- a/backend/src/main/java/com/bakdata/conquery/models/query/queryplan/SecondaryIdQueryPlan.java +++ b/backend/src/main/java/com/bakdata/conquery/models/query/queryplan/SecondaryIdQueryPlan.java @@ -110,7 +110,7 @@ private void executeQueriesWithSecondaryId(QueryExecutionContext ctx, Entity ent final List tableBuckets = ctx.getBucketManager().getEntityBucketsForTable(entity, currentTable); for (Bucket bucket : tableBuckets) { - final int entityId = entity.getId(); + String entityId = entity.getId(); nextBlock(bucket); @@ -165,7 +165,7 @@ private void executeQueriesWithoutSecondaryId(QueryExecutionContext ctx, Entity final List tableBuckets = ctx.getBucketManager().getEntityBucketsForTable(entity, currentTable); for (Bucket bucket : tableBuckets) { - final int entityId = entity.getId(); + String entityId = entity.getId(); nextBlock(bucket); if (!bucket.containsEntity(entityId) || !isOfInterest(bucket)) { continue; diff --git a/backend/src/main/java/com/bakdata/conquery/models/query/queryplan/TableExportQueryPlan.java b/backend/src/main/java/com/bakdata/conquery/models/query/queryplan/TableExportQueryPlan.java index 9288871085..10c870ce4e 100644 --- a/backend/src/main/java/com/bakdata/conquery/models/query/queryplan/TableExportQueryPlan.java +++ b/backend/src/main/java/com/bakdata/conquery/models/query/queryplan/TableExportQueryPlan.java @@ -77,7 +77,7 @@ public Optional execute(QueryExecutionContext ctx, Entity final List results = new ArrayList<>(); final int totalColumns = positions.values().stream().mapToInt(i -> i).max().getAsInt() + 1; - final int entityId = entity.getId(); + final String entityId = entity.getId(); for (Map.Entry entry : tables.entrySet()) { diff --git a/backend/src/main/java/com/bakdata/conquery/models/query/queryplan/aggregators/specific/MultiSelectAggregator.java b/backend/src/main/java/com/bakdata/conquery/models/query/queryplan/aggregators/specific/MultiSelectAggregator.java index 3797d78e0f..6e5ab01dfe 100644 --- a/backend/src/main/java/com/bakdata/conquery/models/query/queryplan/aggregators/specific/MultiSelectAggregator.java +++ b/backend/src/main/java/com/bakdata/conquery/models/query/queryplan/aggregators/specific/MultiSelectAggregator.java @@ -3,10 +3,10 @@ import java.util.Arrays; import java.util.HashMap; import java.util.Map; +import java.util.Objects; import com.bakdata.conquery.models.datasets.Column; import com.bakdata.conquery.models.events.Bucket; -import com.bakdata.conquery.models.events.stores.root.StringStore; import com.bakdata.conquery.models.query.QueryExecutionContext; import com.bakdata.conquery.models.query.entity.Entity; import com.bakdata.conquery.models.query.queryplan.aggregators.SingleColumnAggregator; @@ -21,12 +21,10 @@ public class MultiSelectAggregator extends SingleColumnAggregator { private final String selected; private long hits = 0; - private int selectedId = -1; public SelectAggregator(Column column, String selected) { super(column); @@ -32,22 +32,18 @@ public void init(Entity entity, QueryExecutionContext context) { @Override public void nextBlock(Bucket bucket) { - selectedId = ((StringStore) bucket.getStore(getColumn())).getId(selected); } @Override public void consumeEvent(Bucket bucket, int event) { - if (selectedId == -1) { - return; - } if (!bucket.has(event, getColumn())) { return; } - int value = bucket.getString(event, getColumn()); + final String value = bucket.getString(event, getColumn()); - if (value == selectedId) { + if (Objects.equals(value, selected)) { hits++; } } @@ -64,7 +60,7 @@ public ResultType getResultType() { @Override public boolean isOfInterest(Bucket bucket) { - return super.isOfInterest(bucket) && - ((StringStore) bucket.getStores()[getColumn().getPosition()]).getId(selected) != -1; + return super.isOfInterest(bucket); + //TODO && ((StringStore) bucket.getStores()[getColumn().getPosition()]).getId(selected) != -1; } } diff --git a/backend/src/main/java/com/bakdata/conquery/models/query/queryplan/specific/ExternalNode.java b/backend/src/main/java/com/bakdata/conquery/models/query/queryplan/specific/ExternalNode.java index f0a5f6a4cf..0135a0e894 100644 --- a/backend/src/main/java/com/bakdata/conquery/models/query/queryplan/specific/ExternalNode.java +++ b/backend/src/main/java/com/bakdata/conquery/models/query/queryplan/specific/ExternalNode.java @@ -31,14 +31,14 @@ public class ExternalNode extends QPNode { @NotEmpty @NonNull - private final Map includedEntities; + private final Map includedEntities; - private final Map> extraData; + private final Map> extraData; private final String[] extraColumns; private final Map> extraAggregators; private CDateSet contained; - public Set getEntities() { + public Set getEntities() { return includedEntities.keySet(); } diff --git a/backend/src/main/java/com/bakdata/conquery/models/query/results/EntityResult.java b/backend/src/main/java/com/bakdata/conquery/models/query/results/EntityResult.java index 516578c520..616e4a1070 100644 --- a/backend/src/main/java/com/bakdata/conquery/models/query/results/EntityResult.java +++ b/backend/src/main/java/com/bakdata/conquery/models/query/results/EntityResult.java @@ -10,7 +10,7 @@ @CPSBase public interface EntityResult { - int getEntityId(); + String getEntityId(); /** * Provides the number of columns this result contains. */ diff --git a/backend/src/main/java/com/bakdata/conquery/models/query/results/FormShardResult.java b/backend/src/main/java/com/bakdata/conquery/models/query/results/FormShardResult.java index 1e1e1b9d05..a07989bb0b 100644 --- a/backend/src/main/java/com/bakdata/conquery/models/query/results/FormShardResult.java +++ b/backend/src/main/java/com/bakdata/conquery/models/query/results/FormShardResult.java @@ -1,19 +1,54 @@ package com.bakdata.conquery.models.query.results; import com.bakdata.conquery.io.cps.CPSType; +import com.bakdata.conquery.models.execution.ExecutionState; +import com.bakdata.conquery.models.forms.managed.ManagedInternalForm; import com.bakdata.conquery.models.identifiable.ids.specific.ManagedExecutionId; import com.bakdata.conquery.models.identifiable.ids.specific.WorkerId; +import com.bakdata.conquery.models.messages.namespaces.NamespacedMessage; +import com.bakdata.conquery.models.query.DistributedExecutionManager; +import com.bakdata.conquery.models.query.ManagedQuery; import lombok.EqualsAndHashCode; import lombok.Getter; -@CPSType(id = "FORM_SHARD_RESULT", base = ShardResult.class) +@CPSType(id = "FORM_SHARD_RESULT", base = NamespacedMessage.class) @EqualsAndHashCode(callSuper = true) @Getter public class FormShardResult extends ShardResult { - private final ManagedExecutionId subQueryId; - public FormShardResult(ManagedExecutionId queryId, ManagedExecutionId subQueryId, WorkerId workerId) { - super(queryId, workerId); - this.subQueryId = subQueryId; + private final ManagedExecutionId formId; + + public FormShardResult(ManagedExecutionId formId, ManagedExecutionId subQueryId, WorkerId workerId) { + super(subQueryId, workerId); + this.formId = formId; } + + /** + * Distribute the result to a sub query. + * + * @param executionManager + */ + @Override + public void addResult(DistributedExecutionManager executionManager) { + final ManagedInternalForm managedInternalForm = (ManagedInternalForm) executionManager.getExecution(getFormId()); + final ManagedQuery subQuery = managedInternalForm.getSubQuery(getQueryId()); + + + executionManager.handleQueryResult(this, subQuery); + + // Fail the whole execution if a subquery fails + if (ExecutionState.FAILED.equals(subQuery.getState())) { + managedInternalForm.fail( + getError().orElseThrow( + () -> new IllegalStateException(String.format("Query[%s] failed but no error was set.", subQuery.getId())) + ) + ); + } + + if (managedInternalForm.allSubQueriesDone()) { + managedInternalForm.finish(ExecutionState.DONE); + } + + } + } diff --git a/backend/src/main/java/com/bakdata/conquery/models/query/results/MultilineEntityResult.java b/backend/src/main/java/com/bakdata/conquery/models/query/results/MultilineEntityResult.java index 69fd03d190..08ce6911fc 100644 --- a/backend/src/main/java/com/bakdata/conquery/models/query/results/MultilineEntityResult.java +++ b/backend/src/main/java/com/bakdata/conquery/models/query/results/MultilineEntityResult.java @@ -6,7 +6,6 @@ import java.util.function.UnaryOperator; import java.util.stream.Stream; -import javax.validation.constraints.Min; import javax.validation.constraints.NotNull; import com.bakdata.conquery.io.cps.CPSType; @@ -18,13 +17,12 @@ @CPSType(id="MULTI_LINE", base= EntityResult.class) public class MultilineEntityResult implements EntityResult { - @Min(0) - private final int entityId; + private final String entityId; @NotNull private final List values; //this is needed because of https://github.com/FasterXML/jackson-databind/issues/2024 - public MultilineEntityResult(int entityId, List values) { + public MultilineEntityResult(String entityId, List values) { this.entityId = entityId; this.values = Objects.requireNonNullElse(values, Collections.emptyList()); } diff --git a/backend/src/main/java/com/bakdata/conquery/models/query/results/ShardResult.java b/backend/src/main/java/com/bakdata/conquery/models/query/results/ShardResult.java index 6dd533ca9d..8c5a192593 100644 --- a/backend/src/main/java/com/bakdata/conquery/models/query/results/ShardResult.java +++ b/backend/src/main/java/com/bakdata/conquery/models/query/results/ShardResult.java @@ -10,7 +10,11 @@ import com.bakdata.conquery.models.error.ConqueryError; import com.bakdata.conquery.models.identifiable.ids.specific.ManagedExecutionId; import com.bakdata.conquery.models.identifiable.ids.specific.WorkerId; -import com.bakdata.conquery.models.messages.namespaces.specific.CollectQueryResult; +import com.bakdata.conquery.models.messages.namespaces.NamespaceMessage; +import com.bakdata.conquery.models.messages.namespaces.NamespacedMessage; +import com.bakdata.conquery.models.query.DistributedExecutionManager; +import com.bakdata.conquery.models.query.ManagedQuery; +import com.bakdata.conquery.models.worker.DistributedNamespace; import com.bakdata.conquery.models.worker.Worker; import com.fasterxml.jackson.annotation.JsonTypeInfo; import lombok.Getter; @@ -22,13 +26,13 @@ @JsonTypeInfo(use = JsonTypeInfo.Id.CUSTOM, property = "type") @CPSBase -@CPSType(id = "SHARD_RESULT", base = ShardResult.class) +@CPSType(id = "SHARD_RESULT", base = NamespacedMessage.class) @Getter @Setter @Slf4j @ToString(onlyExplicitlyIncluded = true) @NoArgsConstructor -public class ShardResult { +public class ShardResult extends NamespaceMessage { @ToString.Include @@ -63,7 +67,7 @@ public synchronized void finish(@NonNull List results, Optional
results, Optional quarterCounts = new TreeMap<>(); - private final SortedMap monthCounts = new TreeMap<>(); + private final Object2IntMap monthCounts = new Object2IntOpenHashMap<>(); - private final AtomicInteger totalCount = new AtomicInteger(); - private final AtomicLong nulls = new AtomicLong(0); + private int totalCount = 0; + private int nulls = 0; private final Function dateExtractor; private CDateRange span = null; @@ -49,10 +48,10 @@ private static Function getDateExtractor(ResultType dateType @Override public void consume(Object value) { - totalCount.incrementAndGet(); + totalCount++; if (value == null) { - nulls.incrementAndGet(); + nulls++; return; } @@ -75,24 +74,19 @@ private void handleDay(int day) { final int quarter = date.get(IsoFields.QUARTER_OF_YEAR); final int month = date.getMonthValue(); - final String yearQuarter = year + "-" + quarter; // This code is pretty hot, therefore I want to avoid String.format final String yearMonth = year + "-" + (month < 10 ? "0" : "") + month; - - quarterCounts.compute(yearQuarter, (ignored, current) -> current == null ? 1 : current + 1); monthCounts.compute(yearMonth, (ignored, current) -> current == null ? 1 : current + 1); - } @Override public ResultColumnStatistics describe() { return new ColumnDescription(getName(), getLabel(), getDescription(), - totalCount.get(), - getNulls().intValue(), - quarterCounts, - monthCounts, + totalCount, + nulls, + new TreeMap<>(monthCounts), span == null ? CDateRange.all().toSimpleRange() : span.toSimpleRange() ); } @@ -104,16 +98,14 @@ public static class ColumnDescription extends ResultColumnStatistics { private final int count; private final int nullValues; - private final SortedMap quarterCounts; private final SortedMap monthCounts; private final Range span; - public ColumnDescription(String name, String label, String description, int count, int nullValues, SortedMap quarterCounts, SortedMap monthCounts, Range span) { + public ColumnDescription(String name, String label, String description, int count, int nullValues, SortedMap monthCounts, Range span) { super(name, label, description); this.count = count; this.nullValues = nullValues; - this.quarterCounts = quarterCounts; this.monthCounts = monthCounts; this.span = span; } diff --git a/backend/src/main/java/com/bakdata/conquery/models/query/statistics/NumberColumnStatsCollector.java b/backend/src/main/java/com/bakdata/conquery/models/query/statistics/NumberColumnStatsCollector.java index 302bf7db59..b6640e96db 100644 --- a/backend/src/main/java/com/bakdata/conquery/models/query/statistics/NumberColumnStatsCollector.java +++ b/backend/src/main/java/com/bakdata/conquery/models/query/statistics/NumberColumnStatsCollector.java @@ -8,7 +8,6 @@ import java.util.LinkedHashMap; import java.util.List; import java.util.Map; -import java.util.concurrent.atomic.AtomicLong; import c10n.C10N; import com.bakdata.conquery.models.query.PrintSettings; @@ -25,7 +24,7 @@ public class NumberColumnStatsCollector> private final ResultType type; private final DescriptiveStatistics statistics = new DescriptiveStatistics(); - private final AtomicLong nulls = new AtomicLong(0); + private int nulls = 0; private final Comparator comparator; @@ -107,7 +106,7 @@ private static Range expandBounds(double lower, double upper, int expect @Override public void consume(Object value) { if (value == null) { - nulls.incrementAndGet(); + nulls++; return; } @@ -185,7 +184,7 @@ private Map getExtras() { out.put(labels.sum(), printValue(getStatistics().getSum())); out.put(labels.count(), getPrintSettings().getIntegerFormat().format(getStatistics().getN())); - out.put(labels.missing(), getPrintSettings().getIntegerFormat().format(getNulls().get())); + out.put(labels.missing(), getPrintSettings().getIntegerFormat().format(getNulls())); return out; } diff --git a/backend/src/main/java/com/bakdata/conquery/models/query/statistics/ResultStatistics.java b/backend/src/main/java/com/bakdata/conquery/models/query/statistics/ResultStatistics.java index fdd7369abe..2e6b4894ff 100644 --- a/backend/src/main/java/com/bakdata/conquery/models/query/statistics/ResultStatistics.java +++ b/backend/src/main/java/com/bakdata/conquery/models/query/statistics/ResultStatistics.java @@ -14,13 +14,13 @@ import com.bakdata.conquery.models.common.Range; import com.bakdata.conquery.models.common.daterange.CDateRange; import com.bakdata.conquery.models.config.ConqueryConfig; -import com.bakdata.conquery.models.query.ManagedQuery; import com.bakdata.conquery.models.query.PrintSettings; import com.bakdata.conquery.models.query.SingleTableResult; import com.bakdata.conquery.models.query.resultinfo.ResultInfo; import com.bakdata.conquery.models.query.resultinfo.UniqueNamer; import com.bakdata.conquery.models.query.results.EntityResult; import com.bakdata.conquery.models.types.ResultType; +import com.bakdata.conquery.models.types.SemanticType; import com.google.common.util.concurrent.Futures; import com.google.common.util.concurrent.ListenableFuture; import com.google.common.util.concurrent.ListeningExecutorService; @@ -34,25 +34,41 @@ public record ResultStatistics(int entities, int total, List statistics, Range dateRange) { @SneakyThrows @NotNull - public static ResultStatistics collectResultStatistics(ManagedQuery managedQuery, List resultInfos, Optional dateInfo, int dateIndex, PrintSettings printSettings, UniqueNamer uniqueNamer, ConqueryConfig conqueryConfig) { + public static ResultStatistics collectResultStatistics(SingleTableResult managedQuery, List resultInfos, Optional dateInfo, Optional dateIndex, PrintSettings printSettings, UniqueNamer uniqueNamer, ConqueryConfig conqueryConfig) { + //TODO pull inner executor service from ManagerNode - final ListeningExecutorService executorService = MoreExecutors.listeningDecorator(Executors.newFixedThreadPool(Runtime.getRuntime().availableProcessors() - 1)); + final ListeningExecutorService + executorService = + MoreExecutors.listeningDecorator(Executors.newFixedThreadPool(Runtime.getRuntime().availableProcessors() - 1)); // Yes, we are actually iterating the result for every job. // Span date-column - final ListenableFuture> futureSpan = executorService.submit(() -> calculateDateSpan(managedQuery, dateInfo, dateIndex)); + final ListenableFuture> futureSpan; + + final boolean containsDates = dateInfo.isPresent(); + + if (containsDates) { + futureSpan = executorService.submit(() -> calculateDateSpan(managedQuery, dateInfo, dateIndex.get())); + } + else { + futureSpan = Futures.immediateFuture(CDateRange.all().toSimpleRange()); + } + + // Count result lines and entities (may differ in case of form or SecondaryIdQuery) + final ListenableFuture futureLines = executorService.submit(() -> (int) managedQuery.resultRowCount()); - // Count result lines (may differ in case of form or SecondaryIdQuery) - final ListenableFuture - futureLines = - executorService.submit(() -> managedQuery.streamResults(OptionalLong.empty()).mapToInt(result -> result.listResultLines().size()).sum()); + final ListenableFuture futureEntities = + executorService.submit(() -> (int) managedQuery.streamResults(OptionalLong.empty()).count()); // compute ResultColumnStatistics for each column final List> futureDescriptions = - IntStream.range(0, resultInfos.size()).mapToObj(col -> (Callable) () -> { + IntStream.range(0, resultInfos.size()) + // If the query doesn't contain dates, we can skip the dates-column. + .filter(col -> !resultInfos.get(col).getSemantics().contains(new SemanticType.EventDateT()) || containsDates) + .mapToObj(col -> (Callable) () -> { final StopWatch started = StopWatch.createStarted(); final ResultInfo info = resultInfos.get(col); @@ -61,7 +77,10 @@ public static ResultStatistics collectResultStatistics(ManagedQuery managedQuery log.trace("BEGIN stats collection for {}", info); - managedQuery.streamResults(OptionalLong.empty()).map(EntityResult::listResultLines).flatMap(List::stream).forEach(line -> statsCollector.consume(line[col])); + managedQuery.streamResults(OptionalLong.empty()) + .map(EntityResult::listResultLines) + .flatMap(List::stream) + .forEach(line -> statsCollector.consume(line[col])); log.trace("DONE collecting values for {}, in {}", info, started); @@ -77,7 +96,11 @@ public static ResultStatistics collectResultStatistics(ManagedQuery managedQuery final Range span = futureSpan.get(); final List descriptions = Futures.allAsList(futureDescriptions).get(); final int lines = futureLines.get(); - return new ResultStatistics(managedQuery.getLastResultCount().intValue(), lines, descriptions, span); + final int entities = futureEntities.get(); + + executorService.shutdown(); + + return new ResultStatistics(entities, lines, descriptions, span); } private static Range calculateDateSpan(SingleTableResult managedQuery, Optional dateInfo, int dateIndex) { diff --git a/backend/src/main/java/com/bakdata/conquery/models/query/statistics/StringColumnStatsCollector.java b/backend/src/main/java/com/bakdata/conquery/models/query/statistics/StringColumnStatsCollector.java index ab469df184..32520baa0d 100644 --- a/backend/src/main/java/com/bakdata/conquery/models/query/statistics/StringColumnStatsCollector.java +++ b/backend/src/main/java/com/bakdata/conquery/models/query/statistics/StringColumnStatsCollector.java @@ -1,10 +1,9 @@ package com.bakdata.conquery.models.query.statistics; import java.util.ArrayList; -import java.util.Collections; +import java.util.HashMap; import java.util.List; import java.util.Map; -import java.util.concurrent.atomic.AtomicLong; import java.util.stream.StreamSupport; import c10n.C10N; @@ -19,10 +18,9 @@ public class StringColumnStatsCollector extends ColumnStatsCollector { private final Frequency frequencies = new Frequency(); - private final AtomicLong nulls = new AtomicLong(0); private final long limit; - private final ResultType.StringT type; + private int nulls = 0; public StringColumnStatsCollector(String name, String label, String description, ResultType.StringT type, PrintSettings printSettings, long limit) { super(name, label, description, printSettings); @@ -33,7 +31,7 @@ public StringColumnStatsCollector(String name, String label, String description, @Override public void consume(Object value) { if (value == null) { - nulls.incrementAndGet(); + nulls++; return; } @@ -66,13 +64,16 @@ public ResultColumnStatistics describe() { final StatisticsLabels statisticsLabels = C10N.get(StatisticsLabels.class, getPrintSettings().getLocale()); - final Map extras = - entriesSorted.size() <= limit - ? Collections.emptyMap() - : Map.of( - statisticsLabels.remainingValues(entriesSorted.size() - limit), - statisticsLabels.remainingEntries(frequencies.getSumFreq() - shownTotal) - ); + final Map extras = new HashMap<>(); + + if (entriesSorted.size() > limit) { + extras.put( + statisticsLabels.remainingValues(entriesSorted.size() - limit), + statisticsLabels.remainingEntries(frequencies.getSumFreq() - shownTotal) + ); + } + + extras.put(statisticsLabels.missing(), getPrintSettings().getIntegerFormat().format(getNulls())); return new HistogramColumnDescription(getName(), getLabel(), getDescription(), head, extras); } diff --git a/backend/src/main/java/com/bakdata/conquery/models/worker/DatasetRegistry.java b/backend/src/main/java/com/bakdata/conquery/models/worker/DatasetRegistry.java index 4230316a7d..72eac99d22 100644 --- a/backend/src/main/java/com/bakdata/conquery/models/worker/DatasetRegistry.java +++ b/backend/src/main/java/com/bakdata/conquery/models/worker/DatasetRegistry.java @@ -18,7 +18,6 @@ import com.bakdata.conquery.io.storage.NamespaceStorage; import com.bakdata.conquery.mode.InternalObjectMapperCreator; import com.bakdata.conquery.mode.NamespaceHandler; -import com.bakdata.conquery.mode.StorageHandler; import com.bakdata.conquery.models.config.ConqueryConfig; import com.bakdata.conquery.models.datasets.Dataset; import com.bakdata.conquery.models.datasets.PreviewConfig; @@ -57,12 +56,9 @@ public class DatasetRegistry extends IdResolveContext imple private final IndexService indexService; - @Getter - private final StorageHandler storageHandler; - public N createNamespace(Dataset dataset, Validator validator) throws IOException { // Prepare empty storage - NamespaceStorage datasetStorage = new NamespaceStorage(config.getStorage(), "dataset_" + dataset.getName(), validator, storageHandler); + NamespaceStorage datasetStorage = new NamespaceStorage(config.getStorage(), "dataset_" + dataset.getName(), validator); final ObjectMapper persistenceMapper = internalObjectMapperCreator.createInternalObjectMapper(View.Persistence.Manager.class); datasetStorage.openStores(persistenceMapper); diff --git a/backend/src/main/java/com/bakdata/conquery/models/worker/DistributedNamespace.java b/backend/src/main/java/com/bakdata/conquery/models/worker/DistributedNamespace.java index eb59d79385..19ecf66d1e 100644 --- a/backend/src/main/java/com/bakdata/conquery/models/worker/DistributedNamespace.java +++ b/backend/src/main/java/com/bakdata/conquery/models/worker/DistributedNamespace.java @@ -1,12 +1,20 @@ package com.bakdata.conquery.models.worker; +import java.util.Arrays; +import java.util.Collection; import java.util.List; +import java.util.Set; +import java.util.stream.Collectors; import com.bakdata.conquery.io.jackson.Injectable; import com.bakdata.conquery.io.storage.NamespaceStorage; +import com.bakdata.conquery.models.datasets.Column; import com.bakdata.conquery.models.datasets.Dataset; +import com.bakdata.conquery.models.datasets.concepts.Concept; import com.bakdata.conquery.models.index.IndexService; import com.bakdata.conquery.models.jobs.JobManager; +import com.bakdata.conquery.models.messages.namespaces.specific.CollectColumnValuesJob; +import com.bakdata.conquery.models.messages.namespaces.specific.UpdateMatchingStatsMessage; import com.bakdata.conquery.models.query.DistributedExecutionManager; import com.bakdata.conquery.models.query.FilterSearch; import com.bakdata.conquery.models.query.entity.Entity; @@ -28,6 +36,7 @@ public class DistributedNamespace extends Namespace { private final WorkerHandler workerHandler; private final DistributedExecutionManager executionManager; + public DistributedNamespace( ObjectMapper preprocessMapper, ObjectMapper communicationMapper, @@ -44,4 +53,25 @@ public DistributedNamespace( this.workerHandler = workerHandler; } + public int getBucket(String entity, int bucketSize) { + final NamespaceStorage storage = getStorage(); + return storage.getEntityBucket(entity) + .orElseGet(() -> storage.assignEntityBucket(entity, bucketSize)); + } + + @Override + void updateMatchingStats() { + final Collection> concepts = this.getStorage().getAllConcepts() + .stream() + .filter(concept -> concept.getMatchingStats() == null) + .collect(Collectors.toSet()); + getWorkerHandler().sendToAll(new UpdateMatchingStatsMessage(concepts)); + } + + @Override + void registerColumnValuesInSearch(Set columns) { + log.trace("Sending columns to collect values on shards: {}", Arrays.toString(columns.toArray())); + getWorkerHandler().sendToAll(new CollectColumnValuesJob(columns, this)); + } + } diff --git a/backend/src/main/java/com/bakdata/conquery/models/worker/LocalNamespace.java b/backend/src/main/java/com/bakdata/conquery/models/worker/LocalNamespace.java index 5bab2bad96..5663baedc5 100644 --- a/backend/src/main/java/com/bakdata/conquery/models/worker/LocalNamespace.java +++ b/backend/src/main/java/com/bakdata/conquery/models/worker/LocalNamespace.java @@ -1,9 +1,14 @@ package com.bakdata.conquery.models.worker; import java.util.List; +import java.util.Set; +import java.util.stream.Collectors; +import java.util.stream.Stream; import com.bakdata.conquery.io.jackson.Injectable; import com.bakdata.conquery.io.storage.NamespaceStorage; +import com.bakdata.conquery.mode.local.SqlStorageHandler; +import com.bakdata.conquery.models.datasets.Column; import com.bakdata.conquery.models.index.IndexService; import com.bakdata.conquery.models.jobs.JobManager; import com.bakdata.conquery.models.query.ExecutionManager; @@ -17,6 +22,8 @@ public class LocalNamespace extends Namespace { private final SqlExecutionService sqlExecutionService; + private final SqlStorageHandler storageHandler; + public LocalNamespace( ObjectMapper preprocessMapper, ObjectMapper communicationMapper, @@ -30,5 +37,19 @@ public LocalNamespace( ) { super(preprocessMapper, communicationMapper, storage, executionManager, jobManager, filterSearch, indexService, injectables); this.sqlExecutionService = sqlExecutionService; + this.storageHandler = new SqlStorageHandler(sqlExecutionService); + } + + @Override + void updateMatchingStats() { + // TODO Build basic statistic on data + } + + @Override + void registerColumnValuesInSearch(Set columns) { + for (Column column : columns) { + final Stream stringStream = storageHandler.lookupColumnValues(getStorage(), column); + getFilterSearch().registerValues(column, stringStream.collect(Collectors.toSet())); + } } } diff --git a/backend/src/main/java/com/bakdata/conquery/models/worker/Namespace.java b/backend/src/main/java/com/bakdata/conquery/models/worker/Namespace.java index bc4bd608a2..191265b8b2 100644 --- a/backend/src/main/java/com/bakdata/conquery/models/worker/Namespace.java +++ b/backend/src/main/java/com/bakdata/conquery/models/worker/Namespace.java @@ -1,19 +1,24 @@ package com.bakdata.conquery.models.worker; import java.io.IOException; +import java.util.Collection; import java.util.List; import java.util.NoSuchElementException; +import java.util.Set; import com.bakdata.conquery.io.jackson.Injectable; import com.bakdata.conquery.io.storage.NamespaceStorage; +import com.bakdata.conquery.models.datasets.Column; import com.bakdata.conquery.models.datasets.Dataset; import com.bakdata.conquery.models.datasets.PreviewConfig; +import com.bakdata.conquery.models.datasets.concepts.Searchable; import com.bakdata.conquery.models.datasets.concepts.select.connector.specific.MappableSingleColumnSelect; import com.bakdata.conquery.models.identifiable.CentralRegistry; import com.bakdata.conquery.models.identifiable.ids.specific.DatasetId; import com.bakdata.conquery.models.index.IndexService; import com.bakdata.conquery.models.jobs.JobManager; import com.bakdata.conquery.models.jobs.SimpleJob; +import com.bakdata.conquery.models.jobs.UpdateFilterSearchJob; import com.bakdata.conquery.models.query.ExecutionManager; import com.bakdata.conquery.models.query.FilterSearch; import com.fasterxml.jackson.databind.ObjectMapper; @@ -85,7 +90,7 @@ public CentralRegistry getCentralRegistry() { } public int getNumberOfEntities() { - return getStorage().getPrimaryDictionary().getSize(); + return getStorage().getNumberOfEntities(); } public void updateInternToExternMappings() { @@ -122,4 +127,44 @@ public CentralRegistry getMetaRegistry() { throw new UnsupportedOperationException(); } + + /** + * Issues a job that initializes the search that is used by the frontend for recommendations in the filter interface of a concept. + */ + final void updateFilterSearch() { + getJobManager().addSlowJob(new UpdateFilterSearchJob(this, getFilterSearch().getIndexConfig(), this::registerColumnValuesInSearch)); + } + + /** + * Issues a job that collects basic metrics for every concept and its nodes. This information is displayed in the frontend. + */ + abstract void updateMatchingStats(); + + /** + * This collects the string values of the given {@link Column}s (each is a {@link com.bakdata.conquery.models.datasets.concepts.Searchable}) + * and registers them in the namespace's {@link FilterSearch#registerValues(Searchable, Collection)}. + * After value registration for a column is complete, {@link FilterSearch#shrinkSearch(Searchable)} should be called. + * + * @param columns + */ + abstract void registerColumnValuesInSearch(Set columns); + + /** + * Hook for actions that are best done after all data has been imported and is in a consistent state. + * Such actions are for example search initialization and collection of matching statistics. + * + * @implNote This intentionally submits a SlowJob so that it will be queued after all jobs that are already in the queue (usually import jobs). + */ + public void postprocessData() { + + getJobManager().addSlowJob(new SimpleJob( + "Initiate Update Matching Stats and FilterSearch", + () -> { + updateMatchingStats(); + updateFilterSearch(); + updateInternToExternMappings(); + } + )); + + } } diff --git a/backend/src/main/java/com/bakdata/conquery/models/worker/Worker.java b/backend/src/main/java/com/bakdata/conquery/models/worker/Worker.java index 64c6873d59..048851a0a1 100644 --- a/backend/src/main/java/com/bakdata/conquery/models/worker/Worker.java +++ b/backend/src/main/java/com/bakdata/conquery/models/worker/Worker.java @@ -2,9 +2,7 @@ import java.io.Closeable; import java.io.IOException; -import java.util.Set; import java.util.concurrent.ExecutorService; -import java.util.stream.Collectors; import javax.validation.Validator; @@ -17,16 +15,11 @@ import com.bakdata.conquery.models.config.ThreadPoolDefinition; import com.bakdata.conquery.models.datasets.Dataset; import com.bakdata.conquery.models.datasets.Import; -import com.bakdata.conquery.models.datasets.ImportColumn; import com.bakdata.conquery.models.datasets.SecondaryIdDescription; import com.bakdata.conquery.models.datasets.Table; import com.bakdata.conquery.models.datasets.concepts.Concept; -import com.bakdata.conquery.models.dictionary.Dictionary; import com.bakdata.conquery.models.events.Bucket; import com.bakdata.conquery.models.events.BucketManager; -import com.bakdata.conquery.models.events.stores.root.ColumnStore; -import com.bakdata.conquery.models.events.stores.root.StringStore; -import com.bakdata.conquery.models.identifiable.ids.specific.DictionaryId; import com.bakdata.conquery.models.identifiable.ids.specific.SecondaryIdDescriptionId; import com.bakdata.conquery.models.jobs.JobManager; import com.bakdata.conquery.models.messages.namespaces.NamespaceMessage; @@ -100,7 +93,7 @@ public static Worker newWorker( WorkerStorage workerStorage = new WorkerStorage(config, validator, directory); - // On the worker side we don't have to set the object writer vor ForwardToWorkerMessages in WorkerInformation + // On the worker side we don't have to set the object writer for ForwardToWorkerMessages in WorkerInformation WorkerInformation info = new WorkerInformation(); info.setDataset(dataset.getId()); info.setName(directory); @@ -177,11 +170,6 @@ public void addImport(Import imp) { } public void removeImport(Import imp) { - - for (DictionaryId dictionary : imp.getDictionaries()) { - storage.removeDictionary(dictionary); - } - bucketManager.removeImport(imp); } @@ -201,55 +189,6 @@ public void updateDataset(Dataset dataset) { storage.updateDataset(dataset); } - public void updateDictionary(Dictionary dictionary) { - storage.updateDictionary(dictionary); - - // Since we've updated a Dictionary, we also have to update the prior usages of that Dictionary in all Buckets and Imports - final DictionaryId dictionaryId = dictionary.getId(); - final Set relevantImports = - storage.getAllImports().stream() - .filter(imp -> imp.getDictionaries().contains(dictionaryId)) - .collect(Collectors.toSet()); - - // First replace in all Imports - for (Import imp : relevantImports) { - for (ImportColumn column : imp.getColumns()) { - final ColumnStore store = column.getTypeDescription(); - - if (!(store instanceof StringStore)) { - continue; - } - - StringStore strings = ((StringStore) store); - - if (!strings.isDictionaryHolding() || !strings.getUnderlyingDictionary().getId().equals(dictionaryId)) { - continue; - } - strings.setUnderlyingDictionary(dictionary); - } - } - - // Then replace in all Buckets of those Imports - for (Bucket bucket : getStorage().getAllBuckets()) { - if (!relevantImports.contains(bucket.getImp())) { - continue; - } - - for (ColumnStore store : bucket.getStores()) { - if (!(store instanceof StringStore)) { - continue; - } - - StringStore strings = ((StringStore) store); - - if (!strings.isDictionaryHolding() || !strings.getUnderlyingDictionary().getId().equals(dictionaryId)) { - continue; - } - strings.setUnderlyingDictionary(dictionary); - } - } - } - public void updateWorkerInfo(WorkerInformation info) { storage.updateWorker(info); } diff --git a/backend/src/main/java/com/bakdata/conquery/models/worker/WorkerHandler.java b/backend/src/main/java/com/bakdata/conquery/models/worker/WorkerHandler.java index a78b17c666..070e546974 100644 --- a/backend/src/main/java/com/bakdata/conquery/models/worker/WorkerHandler.java +++ b/backend/src/main/java/com/bakdata/conquery/models/worker/WorkerHandler.java @@ -2,22 +2,31 @@ import java.util.Collections; import java.util.Comparator; +import java.util.HashMap; +import java.util.HashSet; +import java.util.Map; import java.util.Objects; import java.util.Set; +import java.util.UUID; +import java.util.stream.Collectors; import com.bakdata.conquery.io.storage.NamespaceStorage; import com.bakdata.conquery.models.datasets.Import; import com.bakdata.conquery.models.identifiable.IdMap; import com.bakdata.conquery.models.identifiable.ids.specific.BucketId; import com.bakdata.conquery.models.identifiable.ids.specific.WorkerId; +import com.bakdata.conquery.models.messages.ReactionMessage; +import com.bakdata.conquery.models.messages.namespaces.ActionReactionMessage; import com.bakdata.conquery.models.messages.namespaces.WorkerMessage; import com.bakdata.conquery.models.messages.namespaces.specific.UpdateWorkerBucket; import com.fasterxml.jackson.databind.ObjectMapper; import it.unimi.dsi.fastutil.ints.Int2ObjectArrayMap; import it.unimi.dsi.fastutil.ints.Int2ObjectMap; +import lombok.Getter; import lombok.NonNull; import lombok.RequiredArgsConstructor; import lombok.extern.slf4j.Slf4j; +import org.jetbrains.annotations.NotNull; /** * Handler for worker in a single namespace. @@ -30,6 +39,7 @@ public class WorkerHandler { /** * All known {@link Worker}s that are part of this Namespace. */ + @Getter private final IdMap workers = new IdMap<>(); /** @@ -39,19 +49,48 @@ public class WorkerHandler { private final NamespaceStorage storage; - public IdMap getWorkers() { - return this.workers; + private final Map pendingReactions = new HashMap<>(); + + @NotNull + public Set getAllWorkerIds() { + return getWorkers().stream() + .map(WorkerInformation::getId) + .collect(Collectors.toSet()); } + public void sendToAll(WorkerMessage msg) { if (workers.isEmpty()) { throw new IllegalStateException("There are no workers yet"); } + + // Register tracker for pending reactions if applicable + if (msg instanceof ActionReactionMessage actionReactionMessage) { + final UUID callerId = actionReactionMessage.getMessageId(); + pendingReactions.put(callerId, new PendingReaction(callerId, new HashSet<>(workers.keySet()), actionReactionMessage)); + } + + // Send message to all workers for (WorkerInformation w : workers.values()) { w.send(msg); } } + public void handleReactionMessage(ReactionMessage message) { + final UUID callerId = message.getCallerId(); + final PendingReaction pendingReaction = pendingReactions.get(callerId); + + if (pendingReaction == null) { + throw new IllegalStateException(String.format("No pending action registered (anymore) for caller id %s from reaction message: %s", callerId, message)); + } + + if (pendingReaction.checkoffWorker(message)) { + log.debug("Removing pending reaction '{}' as last pending message was received.", callerId); + pendingReactions.remove(callerId); + } + + } + public synchronized void removeBucketAssignmentsForImportFormWorkers(@NonNull Import importId) { final WorkerToBucketsMap workerBuckets = storage.getWorkerBuckets(); if (workerBuckets == null) { @@ -65,21 +104,11 @@ public synchronized void removeBucketAssignmentsForImportFormWorkers(@NonNull Im } private synchronized void sendUpdatedWorkerInformation() { - for (WorkerInformation w : this.workers.values()) { + for (WorkerInformation w : workers.values()) { w.send(new UpdateWorkerBucket(w)); } } - private synchronized WorkerToBucketsMap createWorkerBucketsMap() { - // Ensure that only one map is created and populated in the storage - WorkerToBucketsMap workerBuckets = storage.getWorkerBuckets(); - if (workerBuckets != null) { - return workerBuckets; - } - storage.setWorkerToBucketsMap(new WorkerToBucketsMap()); - return storage.getWorkerBuckets(); - } - public synchronized void addBucketsToWorker(@NonNull WorkerId id, @NonNull Set bucketIds) { // Ensure that add and remove are not executed at the same time. // We don't make assumptions about the underlying implementation regarding thread safety @@ -94,17 +123,27 @@ public synchronized void addBucketsToWorker(@NonNull WorkerId id, @NonNull Set si.getIncludedBuckets().size())) .orElseThrow(() -> new IllegalStateException("Unable to find minimum.")); @@ -116,6 +155,18 @@ public synchronized void addResponsibility(int bucket) { smallest.getIncludedBuckets().add(bucket); } + public void register(ShardNodeInformation node, WorkerInformation info) { + final WorkerInformation old = getWorkers().getOptional(info.getId()).orElse(null); + if (old != null) { + old.setIncludedBuckets(info.getIncludedBuckets()); + old.setConnectedShardNode(node); + } + else { + info.setConnectedShardNode(node); + } + addWorker(info); + } + public synchronized void addWorker(WorkerInformation info) { Objects.requireNonNull(info.getConnectedShardNode(), () -> String.format("No open connections found for Worker[%s]", info.getId())); @@ -133,18 +184,6 @@ public synchronized void addWorker(WorkerInformation info) { } } - public void register(ShardNodeInformation node, WorkerInformation info) { - WorkerInformation old = this.getWorkers().getOptional(info.getId()).orElse(null); - if (old != null) { - old.setIncludedBuckets(info.getIncludedBuckets()); - old.setConnectedShardNode(node); - } - else { - info.setConnectedShardNode(node); - } - this.addWorker(info); - } - public Set getBucketsForWorker(WorkerId workerId) { final WorkerToBucketsMap workerBuckets = storage.getWorkerBuckets(); if (workerBuckets == null) { @@ -152,4 +191,35 @@ public Set getBucketsForWorker(WorkerId workerId) { } return workerBuckets.getBucketsForWorker(workerId); } + + private record PendingReaction(UUID callerId, Set pendingWorkers, ActionReactionMessage parent) { + + /** + * Marks the given worker as not pending. If the last pending worker checks off the afterAllReaction is executed. + */ + public synchronized boolean checkoffWorker(ReactionMessage message) { + final WorkerId workerId = message.getWorkerId(); + + if (!message.lastMessageFromWorker()) { + log.trace("Received reacting message, but was not the last one: {}", message); + return false; + } + + if (!pendingWorkers.remove(workerId)) { + throw new IllegalStateException(String.format("Could not check off worker %s for action-reaction message '%s'. Worker was not checked in.", workerId, callerId)); + } + + log.debug("Checked off worker '{}' for action-reaction message '{}', still waiting for {}.", workerId, parent, pendingWorkers.size()); + + if (!pendingWorkers.isEmpty()) { + return false; + } + + log.debug("Checked off last worker '{}' for action-reaction message {}. Calling hook", workerId, parent); + + parent.afterAllReaction(); + return true; + + } + } } diff --git a/backend/src/main/java/com/bakdata/conquery/models/worker/Workers.java b/backend/src/main/java/com/bakdata/conquery/models/worker/Workers.java index f92900ecdb..5b2ed31281 100644 --- a/backend/src/main/java/com/bakdata/conquery/models/worker/Workers.java +++ b/backend/src/main/java/com/bakdata/conquery/models/worker/Workers.java @@ -56,6 +56,7 @@ public class Workers extends IdResolveContext { public Workers(ThreadPoolDefinition queryThreadPoolDefinition, Supplier persistenceMapperSupplier, Supplier communicationMapperSupplier, int entityBucketSize, int secondaryIdSubPlanRetention) { this.queryThreadPoolDefinition = queryThreadPoolDefinition; + // TODO This shouldn't be coupled to the query thread pool definition jobsThreadPool = queryThreadPoolDefinition.createService("Workers"); this.persistenceMapperSupplier = persistenceMapperSupplier; diff --git a/backend/src/main/java/com/bakdata/conquery/resources/admin/AdminServlet.java b/backend/src/main/java/com/bakdata/conquery/resources/admin/AdminServlet.java index e257225739..c92842009c 100644 --- a/backend/src/main/java/com/bakdata/conquery/resources/admin/AdminServlet.java +++ b/backend/src/main/java/com/bakdata/conquery/resources/admin/AdminServlet.java @@ -73,6 +73,7 @@ public AdminServlet(ManagerNode manager) { jerseyConfigUI.setUrlPattern("/admin-ui"); RESTServer.configure(manager.getConfig(), jerseyConfig); + RESTServer.configure(manager.getConfig(), jerseyConfigUI); final AdminEnvironment admin = manager.getEnvironment().admin(); admin.addServlet(ADMIN_SERVLET_PATH, new ServletContainer(jerseyConfig)).addMapping("/" + ADMIN_SERVLET_PATH + "/*"); @@ -122,8 +123,7 @@ protected void configure() { .register(IdRefPathParamConverterProvider.class) .register(new MultiPartFeature()) .register(IdParamConverter.Provider.INSTANCE) - .register(authCookieFilter) - .register(manager.getAuthController().getAuthenticationFilter()); + .register(authCookieFilter); jerseyConfigUI.register(new ViewMessageBodyWriter(manager.getEnvironment().metrics(), Collections.singleton(Freemarker.HTML_RENDERER))) @@ -138,8 +138,7 @@ protected void configure() { }) .register(AdminPermissionFilter.class) .register(IdRefPathParamConverterProvider.class) - .register(authCookieFilter) - .register(manager.getAuthController().getRedirectingAuthFilter()); + .register(authCookieFilter); ; } diff --git a/backend/src/main/java/com/bakdata/conquery/resources/admin/rest/AdminDatasetProcessor.java b/backend/src/main/java/com/bakdata/conquery/resources/admin/rest/AdminDatasetProcessor.java index 8aa724f823..34b952dde5 100644 --- a/backend/src/main/java/com/bakdata/conquery/resources/admin/rest/AdminDatasetProcessor.java +++ b/backend/src/main/java/com/bakdata/conquery/resources/admin/rest/AdminDatasetProcessor.java @@ -39,7 +39,6 @@ import com.bakdata.conquery.models.index.InternToExternMapper; import com.bakdata.conquery.models.index.search.SearchIndex; import com.bakdata.conquery.models.jobs.JobManager; -import com.bakdata.conquery.models.jobs.SimpleJob; import com.bakdata.conquery.models.worker.DatasetRegistry; import com.bakdata.conquery.models.worker.Namespace; import com.univocity.parsers.csv.CsvParser; @@ -314,23 +313,12 @@ public synchronized void deleteConcept(Concept concept) { } /** - * Issues all Shards to do an UpdateMatchingStats. - * - * @implNote This intentionally submits a SlowJob so that it will be queued after all jobs that are already in the queue (usually import jobs). + * Issues a postprocessing of the imported data for initializing certain internal modules that are either expensive or need the whole data present. */ - public void updateMatchingStats(Dataset dataset) { + public void postprocessNamespace(Dataset dataset) { final Namespace ns = getDatasetRegistry().get(dataset.getId()); - ns.getJobManager().addSlowJob(new SimpleJob( - "Initiate Update Matching Stats and FilterSearch", - () -> { - - - storageListener.onUpdateMatchingStats(dataset); - ns.getFilterSearch().updateSearch(); - ns.updateInternToExternMappings(); - } - )); + ns.postprocessData(); } public EntityIdMap getIdMapping(Namespace namespace) { diff --git a/backend/src/main/java/com/bakdata/conquery/resources/admin/rest/AdminDatasetResource.java b/backend/src/main/java/com/bakdata/conquery/resources/admin/rest/AdminDatasetResource.java index 09f62e02d8..d403f9ce7c 100644 --- a/backend/src/main/java/com/bakdata/conquery/resources/admin/rest/AdminDatasetResource.java +++ b/backend/src/main/java/com/bakdata/conquery/resources/admin/rest/AdminDatasetResource.java @@ -242,11 +242,15 @@ public void delete() { processor.deleteDataset(dataset); } + /** + * @param dataset the namespace to postprocess + * @implNote The path mapping is historical named. Renaming the path requires some coordination. + */ @POST @Path("/update-matching-stats") @Consumes(MediaType.WILDCARD) - public void updateMatchingStats(@PathParam(DATASET) Dataset dataset) { - processor.updateMatchingStats(dataset); + public void postprocessNamespace(@PathParam(DATASET) Dataset dataset) { + processor.postprocessNamespace(dataset); } @POST diff --git a/backend/src/main/java/com/bakdata/conquery/resources/admin/rest/AdminResource.java b/backend/src/main/java/com/bakdata/conquery/resources/admin/rest/AdminResource.java index 1a3d5ae8d6..67586819f5 100644 --- a/backend/src/main/java/com/bakdata/conquery/resources/admin/rest/AdminResource.java +++ b/backend/src/main/java/com/bakdata/conquery/resources/admin/rest/AdminResource.java @@ -18,10 +18,7 @@ import javax.ws.rs.PathParam; import javax.ws.rs.Produces; import javax.ws.rs.QueryParam; -import javax.ws.rs.container.ContainerRequestContext; -import javax.ws.rs.core.Context; import javax.ws.rs.core.MediaType; -import javax.ws.rs.core.NewCookie; import javax.ws.rs.core.Response; import javax.ws.rs.core.UriBuilder; @@ -29,13 +26,10 @@ import com.bakdata.conquery.io.jersey.ExtraMimeTypes; import com.bakdata.conquery.io.storage.MetaStorage; import com.bakdata.conquery.models.auth.entities.Subject; -import com.bakdata.conquery.models.config.auth.AuthenticationConfig; import com.bakdata.conquery.models.error.ConqueryError; import com.bakdata.conquery.models.execution.ExecutionState; import com.bakdata.conquery.models.jobs.JobManagerStatus; import com.bakdata.conquery.models.messages.network.specific.CancelJobMessage; -import com.bakdata.conquery.models.worker.DatasetRegistry; -import com.bakdata.conquery.models.worker.Namespace; import com.bakdata.conquery.models.worker.ShardNodeInformation; import com.bakdata.conquery.resources.admin.ui.AdminUIResource; import io.dropwizard.auth.Auth; @@ -101,14 +95,6 @@ public boolean isBusy() { return processor.isBusy(); } - @GET - @Path("logout") - public Response logout(@Context ContainerRequestContext requestContext) { - // Invalidate all cookies. At the moment the adminEnd uses cookies only for authentication, so this does not interfere with other things - final NewCookie[] expiredCookies = requestContext.getCookies().keySet().stream().map(AuthenticationConfig::expireCookie).toArray(NewCookie[]::new); - return Response.ok().cookie(expiredCookies).build(); - } - @GET @Path("/queries") public FullExecutionStatus[] getQueries(@Auth Subject currentUser, @QueryParam("limit") OptionalLong maybeLimit, @QueryParam("since") Optional maybeSince) { @@ -117,7 +103,6 @@ public FullExecutionStatus[] getQueries(@Auth Subject currentUser, @QueryParam(" final long limit = maybeLimit.orElse(100); final MetaStorage storage = processor.getStorage(); - final DatasetRegistry datasetRegistry = processor.getDatasetRegistry(); return storage.getAllExecutions().stream() diff --git a/backend/src/main/java/com/bakdata/conquery/resources/admin/rest/UIProcessor.java b/backend/src/main/java/com/bakdata/conquery/resources/admin/rest/UIProcessor.java index 4dc7f7ada9..27797fef8b 100644 --- a/backend/src/main/java/com/bakdata/conquery/resources/admin/rest/UIProcessor.java +++ b/backend/src/main/java/com/bakdata/conquery/resources/admin/rest/UIProcessor.java @@ -31,7 +31,6 @@ import com.bakdata.conquery.models.datasets.concepts.Connector; import com.bakdata.conquery.models.datasets.concepts.tree.ConceptTreeNode; import com.bakdata.conquery.models.datasets.concepts.tree.TreeConcept; -import com.bakdata.conquery.models.dictionary.Dictionary; import com.bakdata.conquery.models.events.CBlock; import com.bakdata.conquery.models.identifiable.ids.specific.UserId; import com.bakdata.conquery.models.index.IndexKey; @@ -198,13 +197,6 @@ public TableStatistics getTableStatistics(Table table) { return new TableStatistics( table, entries, - //total size of dictionaries - imports.stream() - .flatMap(imp -> imp.getDictionaries().stream()) - .filter(Objects::nonNull) - .map(storage::getDictionary) - .mapToLong(Dictionary::estimateMemoryConsumption) - .sum(), //total size of entries imports.stream() .mapToLong(Import::estimateMemoryConsumption) diff --git a/backend/src/main/java/com/bakdata/conquery/resources/admin/ui/AdminUIResource.java b/backend/src/main/java/com/bakdata/conquery/resources/admin/ui/AdminUIResource.java index cc5b1f31ca..4f68069536 100644 --- a/backend/src/main/java/com/bakdata/conquery/resources/admin/ui/AdminUIResource.java +++ b/backend/src/main/java/com/bakdata/conquery/resources/admin/ui/AdminUIResource.java @@ -1,13 +1,24 @@ package com.bakdata.conquery.resources.admin.ui; +import java.net.URI; +import java.util.Objects; + import javax.inject.Inject; import javax.ws.rs.GET; import javax.ws.rs.Path; import javax.ws.rs.Produces; +import javax.ws.rs.container.ContainerRequestContext; +import javax.ws.rs.core.Context; import javax.ws.rs.core.MediaType; +import javax.ws.rs.core.NewCookie; +import javax.ws.rs.core.Response; +import com.bakdata.conquery.models.auth.entities.Subject; +import com.bakdata.conquery.models.config.auth.AuthenticationConfig; +import com.bakdata.conquery.resources.ResourceConstants; import com.bakdata.conquery.resources.admin.rest.UIProcessor; import com.bakdata.conquery.resources.admin.ui.model.UIView; +import io.dropwizard.auth.Auth; import io.dropwizard.views.View; import lombok.RequiredArgsConstructor; @@ -41,4 +52,16 @@ public View getQueries() { return new UIView<>("queries.html.ftl", uiProcessor.getUIContext()); } + + @GET + @Path("logout") + public Response logout(@Context ContainerRequestContext requestContext, @Auth Subject user) { + // Invalidate all cookies. At the moment the adminEnd uses cookies only for authentication, so this does not interfere with other things + final NewCookie[] expiredCookies = requestContext.getCookies().keySet().stream().map(AuthenticationConfig::expireCookie).toArray(NewCookie[]::new); + final URI logout = user.getAuthenticationInfo().getFrontChannelLogout(); + return Response.seeOther(Objects.requireNonNullElseGet(logout, () -> URI.create("/" + ResourceConstants.ADMIN_UI_SERVLET_PATH))) + .cookie(expiredCookies) + .build(); + } + } diff --git a/backend/src/main/java/com/bakdata/conquery/resources/admin/ui/DatasetsUIResource.java b/backend/src/main/java/com/bakdata/conquery/resources/admin/ui/DatasetsUIResource.java index afa4dc1b60..b68ebb84b5 100644 --- a/backend/src/main/java/com/bakdata/conquery/resources/admin/ui/DatasetsUIResource.java +++ b/backend/src/main/java/com/bakdata/conquery/resources/admin/ui/DatasetsUIResource.java @@ -4,7 +4,6 @@ import static com.bakdata.conquery.resources.admin.rest.UIProcessor.calculateCBlocksSizeBytes; import java.util.Collection; -import java.util.Objects; import java.util.stream.Collectors; import javax.inject.Inject; @@ -18,7 +17,6 @@ import com.bakdata.conquery.models.datasets.Import; import com.bakdata.conquery.models.datasets.SecondaryIdDescription; import com.bakdata.conquery.models.datasets.concepts.Concept; -import com.bakdata.conquery.models.dictionary.Dictionary; import com.bakdata.conquery.models.identifiable.ids.specific.TableId; import com.bakdata.conquery.models.identifiable.mapping.EntityIdMap; import com.bakdata.conquery.models.index.InternToExternMapper; @@ -85,17 +83,6 @@ public View getDataset(@PathParam(DATASET) Dataset dataset) { )) .collect(Collectors.toList()), namespace.getStorage().getAllConcepts(), - // total size of dictionaries - namespace - .getStorage() - .getAllImports() - .stream() - .flatMap(i -> i.getDictionaries().stream()) - .filter(Objects::nonNull) - .map(namespace.getStorage()::getDictionary) - .distinct() - .mapToLong(Dictionary::estimateMemoryConsumption) - .sum(), // Total size of CBlocks namespace .getStorage().getTables() @@ -142,7 +129,6 @@ public static class DatasetInfos { private Collection searchIndices; private Collection tables; private Collection> concepts; - private long dictionariesSize; private long cBlocksSize; private long size; } diff --git a/backend/src/main/java/com/bakdata/conquery/resources/admin/ui/model/TableStatistics.java b/backend/src/main/java/com/bakdata/conquery/resources/admin/ui/model/TableStatistics.java index 2f62fa872b..d3039adceb 100644 --- a/backend/src/main/java/com/bakdata/conquery/resources/admin/ui/model/TableStatistics.java +++ b/backend/src/main/java/com/bakdata/conquery/resources/admin/ui/model/TableStatistics.java @@ -14,7 +14,6 @@ public class TableStatistics { private final Table table; private final long numberOfEntries; - private final long dictionariesSize; private final long size; private final long cBlocksSize; private final List imports; diff --git a/backend/src/main/java/com/bakdata/conquery/resources/api/ConceptsProcessor.java b/backend/src/main/java/com/bakdata/conquery/resources/api/ConceptsProcessor.java index a1a16e1696..02b13d50e2 100644 --- a/backend/src/main/java/com/bakdata/conquery/resources/api/ConceptsProcessor.java +++ b/backend/src/main/java/com/bakdata/conquery/resources/api/ConceptsProcessor.java @@ -30,8 +30,6 @@ import com.bakdata.conquery.models.datasets.PreviewConfig; import com.bakdata.conquery.models.datasets.concepts.Concept; import com.bakdata.conquery.models.datasets.concepts.FrontEndConceptBuilder; -import com.bakdata.conquery.models.datasets.concepts.Searchable; -import com.bakdata.conquery.models.datasets.concepts.filters.Filter; import com.bakdata.conquery.models.datasets.concepts.filters.specific.SelectFilter; import com.bakdata.conquery.models.datasets.concepts.tree.ConceptTreeChild; import com.bakdata.conquery.models.datasets.concepts.tree.TreeConcept; @@ -79,14 +77,14 @@ public FrontendList load(Concept concept) { /** * Cache of all search results on SelectFilters. */ - private final LoadingCache, String>, List> + private final LoadingCache, String>, List> searchResults = CacheBuilder.newBuilder().softValues().build(new CacheLoader<>() { @Override - public List load(Pair, String> filterAndSearch) { + public List load(Pair, String> filterAndSearch) { final String searchTerm = filterAndSearch.getValue(); - final Searchable searchable = filterAndSearch.getKey(); + final SelectFilter searchable = filterAndSearch.getKey(); log.trace("Calculating a new search cache for the term \"{}\" on Searchable[{}]", searchTerm, searchable.getId()); @@ -98,9 +96,9 @@ public List load(Pair, String> filterAndSearch) { * Cache of raw listing of values on a filter. * We use Cursor here to reduce strain on memory and increase response time. */ - private final LoadingCache, CursorAndLength> listResults = CacheBuilder.newBuilder().softValues().build(new CacheLoader<>() { + private final LoadingCache, CursorAndLength> listResults = CacheBuilder.newBuilder().softValues().build(new CacheLoader<>() { @Override - public CursorAndLength load(Searchable searchable) { + public CursorAndLength load(SelectFilter searchable) { log.trace("Creating cursor for `{}`", searchable.getId()); return new CursorAndLength(listAllValues(searchable), countAllValues(searchable)); } @@ -160,7 +158,7 @@ public FrontendPreviewConfig getEntityPreviewFrontendConfig(Dataset dataset) { * Search for all search terms at once, with stricter scoring. * The user will upload a file and expect only well-corresponding resolutions. */ - public ResolvedFilterValues resolveFilterValues(Searchable searchable, List searchTerms) { + public ResolvedFilterValues resolveFilterValues(SelectFilter searchable, List searchTerms) { // search in the full text engine final Set openSearchTerms = new HashSet<>(searchTerms); @@ -184,13 +182,17 @@ public ResolvedFilterValues resolveFilterValues(Searchable searchable, List searchable, Optional maybeText, OptionalInt pageNumberOpt, OptionalInt itemsPerPageOpt) { + public AutoCompleteResult autocompleteTextFilter( + SelectFilter searchable, + Optional maybeText, + OptionalInt pageNumberOpt, + OptionalInt itemsPerPageOpt + ) { final int pageNumber = pageNumberOpt.orElse(0); final int itemsPerPage = itemsPerPageOpt.orElse(50); @@ -226,7 +228,7 @@ public AutoCompleteResult autocompleteTextFilter(Searchable searchable, Optio } } - private Cursor listAllValues(Searchable searchable) { + private Cursor listAllValues(SelectFilter searchable) { final Namespace namespace = namespaces.get(searchable.getDataset().getId()); /* Don't worry, I am as confused as you are! @@ -251,7 +253,7 @@ private Cursor listAllValues(Searchable searchable) { return new Cursor<>(Iterators.filter(iterators, seen::add)); } - private long countAllValues(Searchable searchable) { + private long countAllValues(SelectFilter searchable) { final Namespace namespace = namespaces.get(searchable.getDataset().getId()); return namespace.getFilterSearch().getTotal(searchable); @@ -261,7 +263,7 @@ private long countAllValues(Searchable searchable) { * Autocompletion for search terms. For values of {@link SelectFilter }. * Is used by the serach cache to load missing items */ - private List autocompleteTextFilter(Searchable searchable, String text) { + private List autocompleteTextFilter(SelectFilter searchable, String text) { final Namespace namespace = namespaces.get(searchable.getDataset().getId()); // Note that FEValues is equals/hashcode only on value: diff --git a/backend/src/main/java/com/bakdata/conquery/resources/api/ConfigResource.java b/backend/src/main/java/com/bakdata/conquery/resources/api/ConfigResource.java index deeb7e5d21..8e7193d3df 100644 --- a/backend/src/main/java/com/bakdata/conquery/resources/api/ConfigResource.java +++ b/backend/src/main/java/com/bakdata/conquery/resources/api/ConfigResource.java @@ -36,6 +36,7 @@ public FrontendConfiguration getFrontendConfig() { final FrontendConfig frontendConfig = config.getFrontend(); return new FrontendConfiguration( VersionInfo.INSTANCE.getProjectVersion(), + VersionInfo.INSTANCE.getFormBackendVersions(), frontendConfig.getCurrency(), idColumns, frontendConfig.getManualUrl(), diff --git a/backend/src/main/java/com/bakdata/conquery/resources/api/QueryResource.java b/backend/src/main/java/com/bakdata/conquery/resources/api/QueryResource.java index 5fe05d8652..44790c6ee1 100644 --- a/backend/src/main/java/com/bakdata/conquery/resources/api/QueryResource.java +++ b/backend/src/main/java/com/bakdata/conquery/resources/api/QueryResource.java @@ -29,7 +29,7 @@ import com.bakdata.conquery.models.auth.permissions.Ability; import com.bakdata.conquery.models.execution.ExecutionState; import com.bakdata.conquery.models.execution.ManagedExecution; -import com.bakdata.conquery.models.query.ManagedQuery; +import com.bakdata.conquery.models.query.SingleTableResult; import io.dropwizard.auth.Auth; import io.dropwizard.jersey.PATCH; import lombok.RequiredArgsConstructor; @@ -62,8 +62,8 @@ public FullExecutionStatus getStatus(@Auth Subject subject, @PathParam(QUERY) Ma @Path("{" + QUERY + "}/statistics") public Response getDescription(@Auth Subject subject, @PathParam(QUERY) ManagedExecution query) { - if (!(query instanceof ManagedQuery)) { - throw new BadRequestException("Statistics is only available for %s".formatted(ManagedQuery.class.getSimpleName())); + if (!(query instanceof SingleTableResult)) { + throw new BadRequestException("Statistics is only available for %s".formatted(SingleTableResult.class.getSimpleName())); } subject.authorize(query.getDataset(), Ability.READ); @@ -73,7 +73,7 @@ public Response getDescription(@Auth Subject subject, @PathParam(QUERY) ManagedE return Response.status(Response.Status.CONFLICT.getStatusCode(), "Query is still running.").build(); // Request was submitted too early. } - return Response.ok((processor.getResultStatistics(((ManagedQuery) query)))).build(); + return Response.ok((processor.getResultStatistics(((SingleTableResult) query)))).build(); } @PATCH diff --git a/backend/src/main/java/com/bakdata/conquery/resources/unprotected/LoginResource.java b/backend/src/main/java/com/bakdata/conquery/resources/unprotected/LoginResource.java index fa1bb9281d..f3fbe5b41d 100644 --- a/backend/src/main/java/com/bakdata/conquery/resources/unprotected/LoginResource.java +++ b/backend/src/main/java/com/bakdata/conquery/resources/unprotected/LoginResource.java @@ -1,16 +1,20 @@ package com.bakdata.conquery.resources.unprotected; -import javax.ws.rs.*; +import java.net.URI; + +import javax.ws.rs.BadRequestException; +import javax.ws.rs.GET; +import javax.ws.rs.Path; +import javax.ws.rs.Produces; +import javax.ws.rs.QueryParam; import javax.ws.rs.container.ContainerRequestContext; import javax.ws.rs.core.Context; import javax.ws.rs.core.MediaType; -import com.bakdata.conquery.models.config.auth.LocalAuthenticationConfig; +import com.bakdata.conquery.models.auth.web.RedirectingAuthFilter; import com.bakdata.conquery.resources.admin.ui.model.UIView; import io.dropwizard.views.View; -import java.net.URI; - @Path("/login") @Produces(MediaType.TEXT_HTML) public class LoginResource { @@ -19,7 +23,7 @@ public class LoginResource { private ContainerRequestContext request; @GET - public View getLoginPage(@QueryParam(LocalAuthenticationConfig.REDIRECT_URI) URI redirectUri){ + public View getLoginPage(@QueryParam(RedirectingAuthFilter.REDIRECT_URI) URI redirectUri) { final String requestAuthority = request.getUriInfo().getBaseUri().getAuthority(); final String redirectAuthority = redirectUri.getAuthority(); if (!requestAuthority.equals(redirectAuthority)){ diff --git a/backend/src/main/java/com/bakdata/conquery/sql/ConceptSqlQuery.java b/backend/src/main/java/com/bakdata/conquery/sql/ConceptSqlQuery.java deleted file mode 100644 index 9891a70047..0000000000 --- a/backend/src/main/java/com/bakdata/conquery/sql/ConceptSqlQuery.java +++ /dev/null @@ -1,28 +0,0 @@ -package com.bakdata.conquery.sql; - -import java.util.List; - -import com.bakdata.conquery.models.query.resultinfo.ResultInfo; -import com.bakdata.conquery.sql.conversion.model.SqlQuery; -import lombok.Value; -import org.jooq.Record; -import org.jooq.Select; -import org.jooq.conf.ParamType; - -@Value -public class ConceptSqlQuery implements SqlQuery { - - String sqlString; - List resultInfos; - - public ConceptSqlQuery(Select finalQuery, List resultInfos) { - this.sqlString = finalQuery.getSQL(ParamType.INLINED); - this.resultInfos = resultInfos; - } - - @Override - public String getSql() { - return this.sqlString; - } - -} diff --git a/backend/src/main/java/com/bakdata/conquery/sql/conquery/SqlExecutionManager.java b/backend/src/main/java/com/bakdata/conquery/sql/conquery/SqlExecutionManager.java index 680ad81dad..fde5116efb 100644 --- a/backend/src/main/java/com/bakdata/conquery/sql/conquery/SqlExecutionManager.java +++ b/backend/src/main/java/com/bakdata/conquery/sql/conquery/SqlExecutionManager.java @@ -1,69 +1,54 @@ package com.bakdata.conquery.sql.conquery; -import java.util.stream.Stream; - -import com.bakdata.conquery.apiv1.query.Query; -import com.bakdata.conquery.apiv1.query.QueryDescription; import com.bakdata.conquery.io.storage.MetaStorage; -import com.bakdata.conquery.models.auth.entities.User; -import com.bakdata.conquery.models.config.ConqueryConfig; import com.bakdata.conquery.models.datasets.Dataset; +import com.bakdata.conquery.models.execution.ExecutionState; +import com.bakdata.conquery.models.execution.InternalExecution; import com.bakdata.conquery.models.execution.ManagedExecution; +import com.bakdata.conquery.models.forms.managed.ManagedInternalForm; import com.bakdata.conquery.models.query.ExecutionManager; -import com.bakdata.conquery.models.query.QueryResolveContext; -import com.bakdata.conquery.models.query.results.EntityResult; +import com.bakdata.conquery.models.query.ManagedQuery; import com.bakdata.conquery.models.worker.Namespace; import com.bakdata.conquery.sql.SqlContext; import com.bakdata.conquery.sql.conversion.SqlConverter; -import com.bakdata.conquery.sql.conversion.dialect.SqlDialect; import com.bakdata.conquery.sql.conversion.model.SqlQuery; import com.bakdata.conquery.sql.execution.SqlExecutionResult; import com.bakdata.conquery.sql.execution.SqlExecutionService; import lombok.extern.slf4j.Slf4j; @Slf4j -public class SqlExecutionManager implements ExecutionManager { - private final MetaStorage metaStorage; +public class SqlExecutionManager extends ExecutionManager { + private final SqlExecutionService executionService; private final SqlConverter converter; - public SqlExecutionManager(final SqlContext context, SqlExecutionService sqlExecutionService, MetaStorage metaStorage) { - SqlDialect sqlDialect = context.getSqlDialect(); - this.metaStorage = metaStorage; - this.executionService = sqlExecutionService; - this.converter = new SqlConverter(sqlDialect, context.getConfig()); + public SqlExecutionManager(final SqlContext context, SqlExecutionService sqlExecutionService, MetaStorage storage) { + super(storage); + executionService = sqlExecutionService; + converter = new SqlConverter(context.getSqlDialect(), context.getConfig()); } @Override - public SqlManagedQuery runQuery(Namespace namespace, QueryDescription query, User user, Dataset submittedDataset, ConqueryConfig config, boolean system) { - // required for properly setting date aggregation action in all nodes of the query graph - query.resolve(new QueryResolveContext(namespace, config, metaStorage, null)); - SqlManagedQuery execution = createExecution(query, user, submittedDataset, system); - execution.initExecutable(namespace, config); - execution.start(); - // todo(tm): Non-blocking execution - SqlExecutionResult result = this.executionService.execute(execution); - execution.finish(result); - return execution; - } + protected void doExecute(Namespace namespace, InternalExecution execution) { - @Override - public void execute(Namespace namespace, ManagedExecution execution, ConqueryConfig config) { - if (!(execution instanceof SqlManagedQuery)) { - throw new UnsupportedOperationException("The SQL execution manager can only execute SQL queries, but got a %s".formatted(execution.getClass())); + // todo(tm): Non-blocking execution + if (execution instanceof ManagedQuery managedQuery) { + SqlQuery sqlQuery = converter.convert(managedQuery.getQuery()); + SqlExecutionResult result = executionService.execute(sqlQuery); + addResult(managedQuery, result); + managedQuery.setLastResultCount(((long) result.getRowCount())); + managedQuery.finish(ExecutionState.DONE); + return; } - this.executionService.execute(((SqlManagedQuery) execution)); - } + if (execution instanceof ManagedInternalForm managedForm) { + managedForm.getSubQueries().values().forEach(subQuery -> doExecute(namespace, subQuery)); + managedForm.finish(ExecutionState.DONE); + return; + } - @Override - public SqlManagedQuery createExecution(QueryDescription query, User user, Dataset submittedDataset, boolean system) { - Query castQuery = (Query) query; - SqlQuery converted = this.converter.convert(castQuery); - SqlManagedQuery sqlManagedQuery = new SqlManagedQuery(castQuery, user, submittedDataset, metaStorage, converted); - metaStorage.addExecution(sqlManagedQuery); - return sqlManagedQuery; + throw new IllegalStateException("Unexpected type of execution: %s".formatted(execution.getClass())); } @Override @@ -71,14 +56,4 @@ public void cancelQuery(Dataset dataset, ManagedExecution query) { // unsupported for now } - @Override - public void clearQueryResults(ManagedExecution execution) { - // unsupported for now - } - - @Override - public Stream streamQueryResults(ManagedExecution execution) { - throw new UnsupportedOperationException("Streaming for now not supported"); - } - } diff --git a/backend/src/main/java/com/bakdata/conquery/sql/conquery/SqlManagedQuery.java b/backend/src/main/java/com/bakdata/conquery/sql/conquery/SqlManagedQuery.java deleted file mode 100644 index 3e6d6e7a4a..0000000000 --- a/backend/src/main/java/com/bakdata/conquery/sql/conquery/SqlManagedQuery.java +++ /dev/null @@ -1,113 +0,0 @@ -package com.bakdata.conquery.sql.conquery; - -import java.util.List; -import java.util.OptionalLong; -import java.util.concurrent.atomic.AtomicLong; -import java.util.function.Consumer; -import java.util.stream.Stream; - -import com.bakdata.conquery.apiv1.execution.ExecutionStatus; -import com.bakdata.conquery.apiv1.query.EditorQuery; -import com.bakdata.conquery.apiv1.query.Query; -import com.bakdata.conquery.apiv1.query.QueryDescription; -import com.bakdata.conquery.io.cps.CPSType; -import com.bakdata.conquery.io.storage.MetaStorage; -import com.bakdata.conquery.models.auth.entities.Subject; -import com.bakdata.conquery.models.auth.entities.User; -import com.bakdata.conquery.models.datasets.Dataset; -import com.bakdata.conquery.models.execution.ExecutionState; -import com.bakdata.conquery.models.execution.ManagedExecution; -import com.bakdata.conquery.models.query.PrintSettings; -import com.bakdata.conquery.models.query.QueryResolveContext; -import com.bakdata.conquery.models.query.SingleTableResult; -import com.bakdata.conquery.models.query.Visitable; -import com.bakdata.conquery.models.query.resultinfo.ResultInfo; -import com.bakdata.conquery.models.query.results.EntityResult; -import com.bakdata.conquery.sql.conversion.model.SqlQuery; -import com.bakdata.conquery.sql.execution.SqlExecutionResult; -import com.bakdata.conquery.util.QueryUtils; -import lombok.Getter; -import lombok.NonNull; -import lombok.Setter; - -@Setter -@Getter -@CPSType(base = ManagedExecution.class, id = "SQL_QUERY") -public class SqlManagedQuery extends ManagedExecution implements EditorQuery, SingleTableResult { - - private Query query; - private SqlQuery sqlQuery; - private SqlExecutionResult result; - private Long lastResultCount; - - protected SqlManagedQuery(MetaStorage storage) { - super(storage); - } - - public SqlManagedQuery(Query query, User owner, Dataset dataset, MetaStorage storage, SqlQuery sqlQuery) { - super(owner, dataset, storage); - this.query = query; - this.sqlQuery = sqlQuery; - } - - @Override - protected void doInitExecutable() { - query.resolve(new QueryResolveContext(getNamespace(), getConfig(), getStorage(), null)); - } - - @Override - public QueryDescription getSubmitted() { - return query; - } - - @Override - protected String makeDefaultLabel(PrintSettings cfg) { - return QueryUtils.makeQueryLabel(query, cfg, getId()); - } - - @Override - public void cancel() { - //TODO when async is implemented. - } - - @Override - public void visit(Consumer visitor) { - visitor.accept(this); - } - - @Override - public List getResultInfos() { - return query.getResultInfos(); - } - - @Override - public Stream streamResults(OptionalLong maybeLimit) { - final Stream results = result.getTable().stream(); - - if(maybeLimit.isEmpty()){ - return results; - } - - final long limit = maybeLimit.getAsLong(); - final AtomicLong consumed = new AtomicLong(); - - return results.takeWhile(line -> consumed.addAndGet(line.length()) < limit); - } - - @Override - public long resultRowCount() { - return result.getRowCount(); - } - - @Override - public void setStatusBase(@NonNull Subject subject, @NonNull ExecutionStatus status) { - super.setStatusBase(subject, status); - enrichStatusBase(status); - } - - public void finish(final SqlExecutionResult result) { - this.result = result; - this.lastResultCount = (long) result.getRowCount(); - super.finish(ExecutionState.DONE); - } -} diff --git a/backend/src/main/java/com/bakdata/conquery/sql/conversion/Context.java b/backend/src/main/java/com/bakdata/conquery/sql/conversion/Context.java index bd8e50aee2..3e1207fb84 100644 --- a/backend/src/main/java/com/bakdata/conquery/sql/conversion/Context.java +++ b/backend/src/main/java/com/bakdata/conquery/sql/conversion/Context.java @@ -1,10 +1,22 @@ package com.bakdata.conquery.sql.conversion; +import com.bakdata.conquery.sql.conversion.cqelement.ConversionContext; +import com.bakdata.conquery.sql.conversion.dialect.SqlDialect; import com.bakdata.conquery.sql.conversion.model.NameGenerator; /** * Marker for a conversion context. */ public interface Context { - NameGenerator getNameGenerator(); + + ConversionContext getConversionContext(); + + default SqlDialect getSqlDialect() { + return getConversionContext().getSqlDialect(); + } + + default NameGenerator getNameGenerator() { + return getConversionContext().getNameGenerator(); + } + } diff --git a/backend/src/main/java/com/bakdata/conquery/sql/conversion/NodeConversions.java b/backend/src/main/java/com/bakdata/conquery/sql/conversion/NodeConversions.java index 03f8e22906..81b936e558 100644 --- a/backend/src/main/java/com/bakdata/conquery/sql/conversion/NodeConversions.java +++ b/backend/src/main/java/com/bakdata/conquery/sql/conversion/NodeConversions.java @@ -6,7 +6,6 @@ import com.bakdata.conquery.sql.conversion.cqelement.ConversionContext; import com.bakdata.conquery.sql.conversion.dialect.SqlDialect; import com.bakdata.conquery.sql.conversion.model.NameGenerator; -import org.jooq.impl.DSL; /** * Entry point for converting {@link QueryDescription} to an SQL query. @@ -28,7 +27,6 @@ public ConversionContext convert(QueryDescription queryDescription) { .nameGenerator(new NameGenerator(config.getDialect().getNameMaxLength())) .nodeConversions(this) .sqlDialect(this.dialect) - .primaryColumn(DSL.field(DSL.name(config.getPrimaryColumn()))) .build(); return convert(queryDescription, initialCtx); } diff --git a/backend/src/main/java/com/bakdata/conquery/sql/conversion/SharedAliases.java b/backend/src/main/java/com/bakdata/conquery/sql/conversion/SharedAliases.java index e365dd84aa..16374562d4 100644 --- a/backend/src/main/java/com/bakdata/conquery/sql/conversion/SharedAliases.java +++ b/backend/src/main/java/com/bakdata/conquery/sql/conversion/SharedAliases.java @@ -7,7 +7,9 @@ @RequiredArgsConstructor public enum SharedAliases { - PRIMARY_COLUMN("pid"); + PRIMARY_COLUMN("primary_id"), + SECONDARY_ID("secondary_id"), + DATES_COLUMN("dates"); private final String alias; } diff --git a/backend/src/main/java/com/bakdata/conquery/sql/conversion/cqelement/CQNegationConverter.java b/backend/src/main/java/com/bakdata/conquery/sql/conversion/cqelement/CQNegationConverter.java index 3022acb51b..8206694d45 100644 --- a/backend/src/main/java/com/bakdata/conquery/sql/conversion/cqelement/CQNegationConverter.java +++ b/backend/src/main/java/com/bakdata/conquery/sql/conversion/cqelement/CQNegationConverter.java @@ -35,7 +35,7 @@ public ConversionContext convert(CQNegation negationNode, ConversionContext cont } QueryStep withInvertedValidityDate = converted.getSqlDialect() .getDateAggregator() - .invertAggregatedIntervals(queryStep, context.getNameGenerator()); + .invertAggregatedIntervals(queryStep, context); return context.toBuilder().queryStep(withInvertedValidityDate).build(); } diff --git a/backend/src/main/java/com/bakdata/conquery/sql/conversion/cqelement/ConversionContext.java b/backend/src/main/java/com/bakdata/conquery/sql/conversion/cqelement/ConversionContext.java index 1e8d32f988..b4a7b7bb68 100644 --- a/backend/src/main/java/com/bakdata/conquery/sql/conversion/cqelement/ConversionContext.java +++ b/backend/src/main/java/com/bakdata/conquery/sql/conversion/cqelement/ConversionContext.java @@ -2,8 +2,13 @@ import java.util.List; +import javax.annotation.Nullable; + +import com.bakdata.conquery.apiv1.query.SecondaryIdQuery; +import com.bakdata.conquery.apiv1.query.concept.specific.CQDateRestriction; import com.bakdata.conquery.models.common.daterange.CDateRange; import com.bakdata.conquery.models.config.SqlConnectorConfig; +import com.bakdata.conquery.models.datasets.SecondaryIdDescription; import com.bakdata.conquery.sql.conversion.Context; import com.bakdata.conquery.sql.conversion.NodeConversions; import com.bakdata.conquery.sql.conversion.dialect.SqlDialect; @@ -14,7 +19,6 @@ import lombok.Singular; import lombok.Value; import lombok.With; -import org.jooq.Field; @Value @With @@ -22,15 +26,33 @@ public class ConversionContext implements Context { SqlConnectorConfig config; + NodeConversions nodeConversions; + SqlDialect sqlDialect; + NameGenerator nameGenerator; + @Singular List querySteps; + + @Nullable SqlQuery finalQuery; - Field primaryColumn; + + /** + * An optional date restriction range. Is set when converting a {@link CQDateRestriction}. + */ + @Nullable CDateRange dateRestrictionRange; + + /** + * An optional secondary id to group results by in addition to the primary id. Only set when converting {@link SecondaryIdQuery}s. + */ + @Nullable + SecondaryIdDescription secondaryIdDescription; + boolean negation; + boolean isGroupBy; public boolean dateRestrictionActive() { @@ -51,4 +73,8 @@ public ConversionContext createChildContext() { return this.toBuilder().clearQuerySteps().build(); } + @Override + public ConversionContext getConversionContext() { + return this; + } } diff --git a/backend/src/main/java/com/bakdata/conquery/sql/conversion/cqelement/aggregation/AnsiSqlDateAggregator.java b/backend/src/main/java/com/bakdata/conquery/sql/conversion/cqelement/aggregation/AnsiSqlDateAggregator.java index 1bbd58cf75..fe5c9c1920 100644 --- a/backend/src/main/java/com/bakdata/conquery/sql/conversion/cqelement/aggregation/AnsiSqlDateAggregator.java +++ b/backend/src/main/java/com/bakdata/conquery/sql/conversion/cqelement/aggregation/AnsiSqlDateAggregator.java @@ -3,12 +3,11 @@ import java.util.List; import com.bakdata.conquery.models.query.queryplan.DateAggregationAction; +import com.bakdata.conquery.sql.conversion.cqelement.ConversionContext; import com.bakdata.conquery.sql.conversion.cqelement.intervalpacking.IntervalPackingContext; import com.bakdata.conquery.sql.conversion.cqelement.intervalpacking.IntervalPackingCteStep; import com.bakdata.conquery.sql.conversion.dialect.IntervalPacker; import com.bakdata.conquery.sql.conversion.dialect.SqlDateAggregator; -import com.bakdata.conquery.sql.conversion.dialect.SqlFunctionProvider; -import com.bakdata.conquery.sql.conversion.model.NameGenerator; import com.bakdata.conquery.sql.conversion.model.QueryStep; import com.bakdata.conquery.sql.conversion.model.Selects; import com.bakdata.conquery.sql.conversion.model.SqlTables; @@ -16,11 +15,9 @@ public class AnsiSqlDateAggregator implements SqlDateAggregator { - private final SqlFunctionProvider functionProvider; private final IntervalPacker intervalPacker; - public AnsiSqlDateAggregator(SqlFunctionProvider functionProvider, IntervalPacker intervalPacker) { - this.functionProvider = functionProvider; + public AnsiSqlDateAggregator(IntervalPacker intervalPacker) { this.intervalPacker = intervalPacker; } @@ -30,7 +27,7 @@ public QueryStep apply( List carryThroughSelects, DateAggregationDates dateAggregationDates, DateAggregationAction dateAggregationAction, - NameGenerator nameGenerator + ConversionContext conversionContext ) { SqlAggregationAction aggregationAction = switch (dateAggregationAction) { case MERGE -> new MergeAggregateAction(joinedStep); @@ -43,11 +40,9 @@ public QueryStep apply( .sqlAggregationAction(aggregationAction) .carryThroughSelects(carryThroughSelects) .dateAggregationDates(dateAggregationDates) - .dateAggregationTables(aggregationAction.tableNames(nameGenerator)) - .primaryColumn(joinedStep.getQualifiedSelects().getPrimaryColumn()) - .functionProvider(this.functionProvider) - .intervalPacker(this.intervalPacker) - .nameGenerator(nameGenerator) + .dateAggregationTables(aggregationAction.tableNames(conversionContext.getNameGenerator())) + .ids(joinedStep.getQualifiedSelects().getIds()) + .conversionContext(conversionContext) .build(); QueryStep finalDateAggregationStep = convertSteps(joinedStep, aggregationAction.dateAggregationCtes(), context); @@ -57,24 +52,24 @@ public QueryStep apply( Selects predecessorSelects = finalDateAggregationStep.getSelects(); String joinedCteLabel = joinedStep.getCteName(); - SqlTables intervalPackingTables = IntervalPackingCteStep.getTables(joinedCteLabel, finalDateAggregationStep.getCteName(), context.getNameGenerator()); + SqlTables intervalPackingTables = IntervalPackingCteStep.createTables(finalDateAggregationStep, context); IntervalPackingContext intervalPackingContext = IntervalPackingContext.builder() .nodeLabel(joinedCteLabel) - .primaryColumn(predecessorSelects.getPrimaryColumn()) + .ids(predecessorSelects.getIds()) .validityDate(predecessorSelects.getValidityDate().get()) .predecessor(finalDateAggregationStep) .carryThroughSelects(carryThroughSelects) - .intervalPackingTables(intervalPackingTables) - .nameGenerator(nameGenerator) + .tables(intervalPackingTables) + .conversionContext(conversionContext) .build(); return this.intervalPacker.createIntervalPackingSteps(intervalPackingContext); } @Override - public QueryStep invertAggregatedIntervals(QueryStep baseStep, NameGenerator nameGenerator) { + public QueryStep invertAggregatedIntervals(QueryStep baseStep, ConversionContext conversionContext) { DateAggregationDates dateAggregationDates = DateAggregationDates.forSingleStep(baseStep); if (dateAggregationDates.dateAggregationImpossible()) { @@ -82,20 +77,18 @@ public QueryStep invertAggregatedIntervals(QueryStep baseStep, NameGenerator nam } Selects baseStepQualifiedSelects = baseStep.getQualifiedSelects(); - SqlTables dateAggregationTables = InvertCteStep.getTables(baseStep, nameGenerator); + SqlTables dateAggregationTables = DateAggregationCteStep.createInvertTables(baseStep, conversionContext.getNameGenerator()); DateAggregationContext context = DateAggregationContext.builder() .sqlAggregationAction(null) // when inverting, an aggregation has already been applied .carryThroughSelects(baseStepQualifiedSelects.getSqlSelects()) .dateAggregationDates(dateAggregationDates) .dateAggregationTables(dateAggregationTables) - .primaryColumn(baseStepQualifiedSelects.getPrimaryColumn()) - .functionProvider(this.functionProvider) - .intervalPacker(this.intervalPacker) - .nameGenerator(nameGenerator) + .ids(baseStepQualifiedSelects.getIds()) + .conversionContext(conversionContext) .build(); - return convertSteps(baseStep, InvertCteStep.requiredSteps(), context); + return convertSteps(baseStep, DateAggregationCteStep.createInvertCtes(), context); } private QueryStep convertSteps(QueryStep baseStep, List dateAggregationCTEs, DateAggregationContext context) { diff --git a/backend/src/main/java/com/bakdata/conquery/sql/conversion/cqelement/aggregation/DateAggregationContext.java b/backend/src/main/java/com/bakdata/conquery/sql/conversion/cqelement/aggregation/DateAggregationContext.java index 9430b046a4..a4cafe174e 100644 --- a/backend/src/main/java/com/bakdata/conquery/sql/conversion/cqelement/aggregation/DateAggregationContext.java +++ b/backend/src/main/java/com/bakdata/conquery/sql/conversion/cqelement/aggregation/DateAggregationContext.java @@ -6,31 +6,28 @@ import java.util.Map; import com.bakdata.conquery.sql.conversion.Context; -import com.bakdata.conquery.sql.conversion.dialect.IntervalPacker; -import com.bakdata.conquery.sql.conversion.dialect.SqlFunctionProvider; -import com.bakdata.conquery.sql.conversion.model.NameGenerator; +import com.bakdata.conquery.sql.conversion.cqelement.ConversionContext; +import com.bakdata.conquery.sql.conversion.model.Qualifiable; import com.bakdata.conquery.sql.conversion.model.QualifyingUtil; import com.bakdata.conquery.sql.conversion.model.QueryStep; +import com.bakdata.conquery.sql.conversion.model.SqlIdColumns; import com.bakdata.conquery.sql.conversion.model.SqlTables; import com.bakdata.conquery.sql.conversion.model.select.SqlSelect; import lombok.Builder; import lombok.Value; -import org.jooq.Field; @Value @Builder(toBuilder = true) -class DateAggregationContext implements Context { +class DateAggregationContext implements Context, Qualifiable { - Field primaryColumn; + SqlIdColumns ids; List carryThroughSelects; SqlTables dateAggregationTables; DateAggregationDates dateAggregationDates; @Builder.Default Map> intervalMergeSteps = new HashMap<>(); SqlAggregationAction sqlAggregationAction; - SqlFunctionProvider functionProvider; - IntervalPacker intervalPacker; - NameGenerator nameGenerator; + ConversionContext conversionContext; public DateAggregationContext withStep(DateAggregationCteStep dateAggregationCteStep, QueryStep queryStep) { this.intervalMergeSteps.computeIfAbsent(dateAggregationCteStep, k -> new ArrayList<>()) @@ -47,7 +44,7 @@ public QueryStep getStep(DateAggregationCteStep dateAggregationCteStep) { } public List getSteps(DateAggregationCteStep dateAggregationCteStep) { - if (dateAggregationCteStep != MergeCteStep.NODE_NO_OVERLAP) { + if (dateAggregationCteStep != DateAggregationCteStep.NODE_NO_OVERLAP) { throw new UnsupportedOperationException( "Only MergeCteStep.NODE_NO_OVERLAP has multiple steps. Use getStep() for all other DateAggregationSteps." ); @@ -55,9 +52,10 @@ public List getSteps(DateAggregationCteStep dateAggregationCteStep) { return this.intervalMergeSteps.get(dateAggregationCteStep); } + @Override public DateAggregationContext qualify(String qualifier) { return this.toBuilder() - .primaryColumn(QualifyingUtil.qualify(this.primaryColumn, qualifier)) + .ids(this.ids.qualify(qualifier)) .carryThroughSelects(QualifyingUtil.qualify(this.carryThroughSelects, qualifier)) .dateAggregationDates(this.dateAggregationDates.qualify(qualifier)) .build(); diff --git a/backend/src/main/java/com/bakdata/conquery/sql/conversion/cqelement/aggregation/DateAggregationCte.java b/backend/src/main/java/com/bakdata/conquery/sql/conversion/cqelement/aggregation/DateAggregationCte.java index 94d72ebfc2..ad67c34449 100644 --- a/backend/src/main/java/com/bakdata/conquery/sql/conversion/cqelement/aggregation/DateAggregationCte.java +++ b/backend/src/main/java/com/bakdata/conquery/sql/conversion/cqelement/aggregation/DateAggregationCte.java @@ -23,11 +23,11 @@ public QueryStep convert(DateAggregationContext context, QueryStep previous) { QueryStep.QueryStepBuilder builder = this.convertStep(context); - if (cteStep != MergeCteStep.NODE_NO_OVERLAP) { + if (cteStep != DateAggregationCteStep.NODE_NO_OVERLAP) { builder = builder.cteName(dateAggregationTables.cteName(cteStep)) .predecessors(List.of(previous)); } - if (cteStep != InvertCteStep.INVERT) { + if (cteStep != DateAggregationCteStep.INVERT) { builder = builder.fromTable(QueryStep.toTableLike(dateAggregationTables.getPredecessor(cteStep))); } diff --git a/backend/src/main/java/com/bakdata/conquery/sql/conversion/cqelement/aggregation/DateAggregationCteStep.java b/backend/src/main/java/com/bakdata/conquery/sql/conversion/cqelement/aggregation/DateAggregationCteStep.java index 53520440f3..0c156762f0 100644 --- a/backend/src/main/java/com/bakdata/conquery/sql/conversion/cqelement/aggregation/DateAggregationCteStep.java +++ b/backend/src/main/java/com/bakdata/conquery/sql/conversion/cqelement/aggregation/DateAggregationCteStep.java @@ -1,7 +1,89 @@ package com.bakdata.conquery.sql.conversion.cqelement.aggregation; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.Set; + import com.bakdata.conquery.sql.conversion.model.CteStep; +import com.bakdata.conquery.sql.conversion.model.NameGenerator; +import com.bakdata.conquery.sql.conversion.model.QueryStep; +import com.bakdata.conquery.sql.conversion.model.SqlTables; +import lombok.Getter; +import lombok.RequiredArgsConstructor; + +@Getter +@RequiredArgsConstructor +enum DateAggregationCteStep implements CteStep { + + // merge or intersect + OVERLAP("overlap", OverlapCte::new, null), + INTERMEDIATE_TABLE("no_overlap", IntermediateTableCte::new, null), + NODE_NO_OVERLAP("node_no_overlap", NodeNoOverlapCte::new, INTERMEDIATE_TABLE), + MERGE("merge", MergeCte::new, OVERLAP), + + // invert + ROW_NUMBER("row_numbers", RowNumberCte::new, null), + INVERT("inverted_dates", InvertCte::new, ROW_NUMBER); + + private static final List MERGE_STEPS = List.of( + OVERLAP, + INTERMEDIATE_TABLE, + NODE_NO_OVERLAP, + MERGE + ); + + private static final List INTERSECT_STEPS = List.of( + OVERLAP, + INTERMEDIATE_TABLE, + MERGE + ); + + private static final List INVERT_STEPS = List.of( + ROW_NUMBER, + INVERT + ); + + private final String suffix; + private final DateAggregationCteConstructor stepConstructor; + private final CteStep predecessor; + + public static List createMergeCtes() { + return createCtes(MERGE_STEPS); + } + + public static List createIntersectCtes() { + return createCtes(INTERSECT_STEPS); + } + + public static List createInvertCtes() { + return createCtes(INVERT_STEPS); + } + + public static SqlTables createMergeTables(QueryStep joinedTable, NameGenerator nameGenerator) { + return createTables(MERGE_STEPS, joinedTable, nameGenerator); + } + + public static SqlTables createIntersectTables(QueryStep joinedTable, NameGenerator nameGenerator) { + return createTables(INTERSECT_STEPS, joinedTable, nameGenerator); + } + + public static SqlTables createInvertTables(QueryStep joinedTable, NameGenerator nameGenerator) { + return createTables(INVERT_STEPS, joinedTable, nameGenerator); + } + + private static List createCtes(List requiredSteps) { + return requiredSteps.stream() + .map(cteStep -> cteStep.getStepConstructor().create(cteStep)) + .toList(); + } + + private static SqlTables createTables(List requiredSteps, QueryStep joinedTable, NameGenerator nameGenerator) { + Set asSet = new HashSet<>(requiredSteps); + Map cteNameMap = CteStep.createCteNameMap(asSet, joinedTable.getCteName(), nameGenerator); + Map predecessorMap = CteStep.getDefaultPredecessorMap(asSet); + return new SqlTables(joinedTable.getCteName(), cteNameMap, predecessorMap); + } -interface DateAggregationCteStep extends CteStep { } diff --git a/backend/src/main/java/com/bakdata/conquery/sql/conversion/cqelement/aggregation/IntermediateTableCte.java b/backend/src/main/java/com/bakdata/conquery/sql/conversion/cqelement/aggregation/IntermediateTableCte.java index a061bc6117..33790bbf14 100644 --- a/backend/src/main/java/com/bakdata/conquery/sql/conversion/cqelement/aggregation/IntermediateTableCte.java +++ b/backend/src/main/java/com/bakdata/conquery/sql/conversion/cqelement/aggregation/IntermediateTableCte.java @@ -24,12 +24,13 @@ public IntermediateTableCte(DateAggregationCteStep cteStep) { @Override protected QueryStep.QueryStepBuilder convertStep(DateAggregationContext context) { + List intermediateTableSelects = context.getSqlAggregationAction().getIntermediateTableSelects( context.getDateAggregationDates(), context.getCarryThroughSelects() ); Selects selects = Selects.builder() - .primaryColumn(context.getPrimaryColumn()) + .ids(context.getIds()) .sqlSelects(intermediateTableSelects) .build(); @@ -37,7 +38,7 @@ protected QueryStep.QueryStepBuilder convertStep(DateAggregationContext context) List> allStarts = dateAggregationDates.allStarts(); List> allEnds = dateAggregationDates.allEnds(); - SqlFunctionProvider functionProvider = context.getFunctionProvider(); + SqlFunctionProvider functionProvider = context.getSqlDialect().getFunctionProvider(); Condition startBeforeEnd = functionProvider.greatest(allStarts).lessThan(functionProvider.least(allEnds)); Condition startIsNull = allStarts.stream() diff --git a/backend/src/main/java/com/bakdata/conquery/sql/conversion/cqelement/aggregation/IntersectAggregationAction.java b/backend/src/main/java/com/bakdata/conquery/sql/conversion/cqelement/aggregation/IntersectAggregationAction.java index 4dd3892863..70ee2023fb 100644 --- a/backend/src/main/java/com/bakdata/conquery/sql/conversion/cqelement/aggregation/IntersectAggregationAction.java +++ b/backend/src/main/java/com/bakdata/conquery/sql/conversion/cqelement/aggregation/IntersectAggregationAction.java @@ -24,12 +24,12 @@ class IntersectAggregationAction implements SqlAggregationAction { @Override public SqlTables tableNames(NameGenerator nameGenerator) { - return IntersectCteStep.getTables(this.joinedStep, nameGenerator); + return DateAggregationCteStep.createIntersectTables(this.joinedStep, nameGenerator); } @Override public List dateAggregationCtes() { - return IntersectCteStep.requiredSteps(); + return DateAggregationCteStep.createIntersectCtes(); } @Override @@ -62,12 +62,12 @@ public List getIntermediateTableSelects(DateAggregationDates dateAggr @Override public List getNoOverlapSelects(DateAggregationContext dateAggregationContext) { - return List.of(dateAggregationContext.getStep(IntersectCteStep.INTERMEDIATE_TABLE)); + return List.of(dateAggregationContext.getStep(DateAggregationCteStep.INTERMEDIATE_TABLE)); } @Override public QueryStep getOverlapStep(DateAggregationContext dateAggregationContext) { - return dateAggregationContext.getStep(IntersectCteStep.OVERLAP); + return dateAggregationContext.getStep(DateAggregationCteStep.OVERLAP); } @Override diff --git a/backend/src/main/java/com/bakdata/conquery/sql/conversion/cqelement/aggregation/IntersectCteStep.java b/backend/src/main/java/com/bakdata/conquery/sql/conversion/cqelement/aggregation/IntersectCteStep.java deleted file mode 100644 index 73d06de0c6..0000000000 --- a/backend/src/main/java/com/bakdata/conquery/sql/conversion/cqelement/aggregation/IntersectCteStep.java +++ /dev/null @@ -1,36 +0,0 @@ -package com.bakdata.conquery.sql.conversion.cqelement.aggregation; - -import java.util.Arrays; -import java.util.List; -import java.util.Set; - -import com.bakdata.conquery.sql.conversion.model.NameGenerator; -import com.bakdata.conquery.sql.conversion.model.QueryStep; -import com.bakdata.conquery.sql.conversion.model.SqlTables; -import lombok.Getter; -import lombok.RequiredArgsConstructor; - -@Getter -@RequiredArgsConstructor -enum IntersectCteStep implements DateAggregationCteStep { - - OVERLAP("overlap", OverlapCte::new, null), - INTERMEDIATE_TABLE("no_overlap", IntermediateTableCte::new, null), - MERGE("merge", MergeCte::new, OVERLAP); - - private static final Set REQUIRED_STEPS = Set.of(values()); - private final String suffix; - private final DateAggregationCteConstructor stepConstructor; - private final IntersectCteStep predecessor; - - static List requiredSteps() { - return Arrays.stream(values()) - .map(cteStep -> cteStep.getStepConstructor().create(cteStep)) - .toList(); - } - - static SqlTables getTables(QueryStep joinedTable, NameGenerator nameGenerator) { - return new SqlTables(joinedTable.getCteName(), REQUIRED_STEPS, joinedTable.getCteName(), nameGenerator); - } - -} diff --git a/backend/src/main/java/com/bakdata/conquery/sql/conversion/cqelement/aggregation/InvertCte.java b/backend/src/main/java/com/bakdata/conquery/sql/conversion/cqelement/aggregation/InvertCte.java index 88781c941b..787d5f9850 100644 --- a/backend/src/main/java/com/bakdata/conquery/sql/conversion/cqelement/aggregation/InvertCte.java +++ b/backend/src/main/java/com/bakdata/conquery/sql/conversion/cqelement/aggregation/InvertCte.java @@ -1,14 +1,16 @@ package com.bakdata.conquery.sql.conversion.cqelement.aggregation; import java.sql.Date; +import java.util.List; import java.util.Optional; +import java.util.stream.Stream; -import com.bakdata.conquery.sql.conversion.SharedAliases; import com.bakdata.conquery.sql.conversion.dialect.SqlFunctionProvider; import com.bakdata.conquery.sql.conversion.model.ColumnDateRange; import com.bakdata.conquery.sql.conversion.model.QualifyingUtil; import com.bakdata.conquery.sql.conversion.model.QueryStep; import com.bakdata.conquery.sql.conversion.model.Selects; +import com.bakdata.conquery.sql.conversion.model.SqlIdColumns; import lombok.Getter; import org.jooq.Condition; import org.jooq.Field; @@ -35,31 +37,26 @@ public InvertCte(DateAggregationCteStep cteStep) { @Override protected QueryStep.QueryStepBuilder convertStep(DateAggregationContext context) { - QueryStep rowNumberStep = context.getStep(InvertCteStep.ROW_NUMBER); + QueryStep rowNumberStep = context.getStep(DateAggregationCteStep.ROW_NUMBER); - Field primaryColumn = context.getPrimaryColumn(); - Field leftPrimaryColumn = QualifyingUtil.qualify(primaryColumn, ROWS_LEFT_TABLE_NAME); - Field rightPrimaryColumn = QualifyingUtil.qualify(primaryColumn, ROWS_RIGHT_TABLE_NAME); - Field coalescedPrimaryColumn = DSL.coalesce(leftPrimaryColumn, rightPrimaryColumn) - .as(SharedAliases.PRIMARY_COLUMN.getAlias()); + SqlIdColumns ids = context.getIds(); + SqlIdColumns leftIds = ids.qualify(ROWS_LEFT_TABLE_NAME); + SqlIdColumns rightIds = ids.qualify(ROWS_RIGHT_TABLE_NAME); + SqlIdColumns coalescedIds = SqlIdColumns.coalesce(List.of(leftIds, rightIds)); - Selects invertSelects = getInvertSelects(rowNumberStep, coalescedPrimaryColumn, context); - TableOnConditionStep fromTable = selfJoinWithShiftedRows(leftPrimaryColumn, rightPrimaryColumn, rowNumberStep); + Selects invertSelects = getInvertSelects(rowNumberStep, coalescedIds, context); + TableOnConditionStep fromTable = selfJoinWithShiftedRows(leftIds, rightIds, rowNumberStep); return QueryStep.builder() .selects(invertSelects) .fromTable(fromTable); } - private Selects getInvertSelects( - QueryStep rowNumberStep, - Field coalescedPrimaryColumn, - DateAggregationContext context - ) { + private Selects getInvertSelects(QueryStep rowNumberStep, SqlIdColumns coalescedIds, DateAggregationContext context) { - SqlFunctionProvider functionProvider = context.getFunctionProvider(); + SqlFunctionProvider functionProvider = context.getSqlDialect().getFunctionProvider(); ColumnDateRange validityDate = rowNumberStep.getSelects().getValidityDate().get(); - + Field rangeStart = DSL.coalesce( QualifyingUtil.qualify(validityDate.getEnd(), ROWS_LEFT_TABLE_NAME), functionProvider.toDateField(functionProvider.getMinDateExpression()) @@ -71,26 +68,28 @@ private Selects getInvertSelects( ).as(DateAggregationCte.RANGE_END); return Selects.builder() - .primaryColumn(coalescedPrimaryColumn) + .ids(coalescedIds) .validityDate(Optional.of(ColumnDateRange.of(rangeStart, rangeEnd))) .sqlSelects(context.getCarryThroughSelects()) .build(); } - private TableOnConditionStep selfJoinWithShiftedRows(Field leftPrimaryColumn, Field rightPrimaryColumn, QueryStep rowNumberStep) { + private TableOnConditionStep selfJoinWithShiftedRows(SqlIdColumns leftPrimaryColumn, SqlIdColumns rightPrimaryColumn, QueryStep rowNumberStep) { Field leftRowNumber = DSL.field(DSL.name(ROWS_LEFT_TABLE_NAME, RowNumberCte.ROW_NUMBER_FIELD_NAME), Integer.class) .plus(1); Field rightRowNumber = DSL.field(DSL.name(ROWS_RIGHT_TABLE_NAME, RowNumberCte.ROW_NUMBER_FIELD_NAME), Integer.class); - Condition joinCondition = leftPrimaryColumn.eq(rightPrimaryColumn) - .and(leftRowNumber.eq(rightRowNumber)); + Condition[] joinConditions = Stream.concat( + Stream.of(leftRowNumber.eq(rightRowNumber)), + SqlIdColumns.join(leftPrimaryColumn, rightPrimaryColumn).stream() + ) + .toArray(Condition[]::new); TableLike rowNumberTable = QueryStep.toTableLike(rowNumberStep.getCteName()); return rowNumberTable.asTable(ROWS_LEFT_TABLE_NAME) .fullJoin(rowNumberTable.asTable(ROWS_RIGHT_TABLE_NAME)) - .on(joinCondition); + .on(joinConditions); } - } diff --git a/backend/src/main/java/com/bakdata/conquery/sql/conversion/cqelement/aggregation/InvertCteStep.java b/backend/src/main/java/com/bakdata/conquery/sql/conversion/cqelement/aggregation/InvertCteStep.java deleted file mode 100644 index adc9be3de6..0000000000 --- a/backend/src/main/java/com/bakdata/conquery/sql/conversion/cqelement/aggregation/InvertCteStep.java +++ /dev/null @@ -1,35 +0,0 @@ -package com.bakdata.conquery.sql.conversion.cqelement.aggregation; - -import java.util.Arrays; -import java.util.List; -import java.util.Set; - -import com.bakdata.conquery.sql.conversion.model.NameGenerator; -import com.bakdata.conquery.sql.conversion.model.QueryStep; -import com.bakdata.conquery.sql.conversion.model.SqlTables; -import lombok.Getter; -import lombok.RequiredArgsConstructor; - -@Getter -@RequiredArgsConstructor -enum InvertCteStep implements DateAggregationCteStep { - - ROW_NUMBER("row_numbers", RowNumberCte::new, null), - INVERT("inverted_dates", InvertCte::new, InvertCteStep.ROW_NUMBER); - - private static final Set REQUIRED_STEPS = Set.of(values()); - private final String suffix; - private final DateAggregationCteConstructor stepConstructor; - private final InvertCteStep predecessor; - - static List requiredSteps() { - return Arrays.stream(values()) - .map(cteStep -> cteStep.getStepConstructor().create(cteStep)) - .toList(); - } - - static SqlTables getTables(QueryStep joinedTable, NameGenerator nameGenerator) { - return new SqlTables(joinedTable.getCteName(), REQUIRED_STEPS, joinedTable.getCteName(), nameGenerator); - } - -} diff --git a/backend/src/main/java/com/bakdata/conquery/sql/conversion/cqelement/aggregation/MergeAggregateAction.java b/backend/src/main/java/com/bakdata/conquery/sql/conversion/cqelement/aggregation/MergeAggregateAction.java index 9178576f5e..d469015d41 100644 --- a/backend/src/main/java/com/bakdata/conquery/sql/conversion/cqelement/aggregation/MergeAggregateAction.java +++ b/backend/src/main/java/com/bakdata/conquery/sql/conversion/cqelement/aggregation/MergeAggregateAction.java @@ -22,12 +22,12 @@ class MergeAggregateAction implements SqlAggregationAction { @Override public SqlTables tableNames(NameGenerator nameGenerator) { - return MergeCteStep.tableNames(this.joinedStep, nameGenerator); + return DateAggregationCteStep.createMergeTables(this.joinedStep, nameGenerator); } @Override public List dateAggregationCtes() { - return MergeCteStep.requiredSteps(); + return DateAggregationCteStep.createMergeCtes(); } @Override @@ -51,12 +51,12 @@ public List getIntermediateTableSelects(DateAggregationDates dateAggr @Override public List getNoOverlapSelects(DateAggregationContext dateAggregationContext) { - return dateAggregationContext.getSteps(MergeCteStep.NODE_NO_OVERLAP); + return dateAggregationContext.getSteps(DateAggregationCteStep.NODE_NO_OVERLAP); } @Override public QueryStep getOverlapStep(DateAggregationContext dateAggregationContext) { - return dateAggregationContext.getStep(MergeCteStep.OVERLAP); + return dateAggregationContext.getStep(DateAggregationCteStep.OVERLAP); } @Override diff --git a/backend/src/main/java/com/bakdata/conquery/sql/conversion/cqelement/aggregation/MergeCteStep.java b/backend/src/main/java/com/bakdata/conquery/sql/conversion/cqelement/aggregation/MergeCteStep.java deleted file mode 100644 index 9ba65b44cb..0000000000 --- a/backend/src/main/java/com/bakdata/conquery/sql/conversion/cqelement/aggregation/MergeCteStep.java +++ /dev/null @@ -1,37 +0,0 @@ -package com.bakdata.conquery.sql.conversion.cqelement.aggregation; - -import java.util.Arrays; -import java.util.List; -import java.util.Set; - -import com.bakdata.conquery.sql.conversion.model.NameGenerator; -import com.bakdata.conquery.sql.conversion.model.QueryStep; -import com.bakdata.conquery.sql.conversion.model.SqlTables; -import lombok.Getter; -import lombok.RequiredArgsConstructor; - -@Getter -@RequiredArgsConstructor -enum MergeCteStep implements DateAggregationCteStep { - - OVERLAP("overlap", OverlapCte::new, null), - INTERMEDIATE_TABLE("no_overlap", IntermediateTableCte::new, null), - NODE_NO_OVERLAP("node_no_overlap", NodeNoOverlapCte::new, INTERMEDIATE_TABLE), - MERGE("merge", MergeCte::new, OVERLAP); - - private static final Set REQUIRED_STEPS = Set.of(values()); - private final String suffix; - private final DateAggregationCteConstructor stepConstructor; - private final MergeCteStep predecessor; - - static List requiredSteps() { - return Arrays.stream(values()) - .map(cteStep -> cteStep.getStepConstructor().create(cteStep)) - .toList(); - } - - static SqlTables tableNames(QueryStep joinedTable, NameGenerator nameGenerator) { - return new SqlTables(joinedTable.getCteName(), REQUIRED_STEPS, joinedTable.getCteName(), nameGenerator); - } - -} diff --git a/backend/src/main/java/com/bakdata/conquery/sql/conversion/cqelement/aggregation/NodeNoOverlapCte.java b/backend/src/main/java/com/bakdata/conquery/sql/conversion/cqelement/aggregation/NodeNoOverlapCte.java index cd290e4fcd..9af31b6d89 100644 --- a/backend/src/main/java/com/bakdata/conquery/sql/conversion/cqelement/aggregation/NodeNoOverlapCte.java +++ b/backend/src/main/java/com/bakdata/conquery/sql/conversion/cqelement/aggregation/NodeNoOverlapCte.java @@ -29,7 +29,7 @@ protected QueryStep.QueryStepBuilder convertStep(DateAggregationContext context) // we create a no-overlap node for each query step we need to aggregate DateAggregationDates dateAggregationDates = context.getDateAggregationDates(); Iterator validityDates = dateAggregationDates.getValidityDates().iterator(); - QueryStep intermediateTableStep = context.getStep(MergeCteStep.INTERMEDIATE_TABLE); + QueryStep intermediateTableStep = context.getStep(DateAggregationCteStep.INTERMEDIATE_TABLE); // first no-overlap step has intermediate table as predecessor QueryStep.QueryStepBuilder noOverlapStep = createNoOverlapStep(validityDates.next(), context, intermediateTableStep); @@ -60,7 +60,7 @@ private QueryStep.QueryStepBuilder createNoOverlapStep( Field asRangeStart = start.as(DateAggregationCte.RANGE_START); String intermediateTableCteName = dateAggregationTables.getPredecessor(getCteStep()); Selects nodeNoOverlapSelects = Selects.builder() - .primaryColumn(context.getPrimaryColumn()) + .ids(context.getIds()) .validityDate(Optional.of(ColumnDateRange.of(asRangeStart, asRangeEnd))) .sqlSelects(context.getCarryThroughSelects()) .build(); @@ -68,7 +68,7 @@ private QueryStep.QueryStepBuilder createNoOverlapStep( Condition startNotNull = start.isNotNull(); return QueryStep.builder() - .cteName("%s_%s".formatted(dateAggregationTables.cteName(MergeCteStep.NODE_NO_OVERLAP), counter)) + .cteName("%s_%s".formatted(dateAggregationTables.cteName(DateAggregationCteStep.NODE_NO_OVERLAP), counter)) .selects(nodeNoOverlapSelects) .fromTable(QueryStep.toTableLike(intermediateTableCteName)) .conditions(List.of(startNotNull)) diff --git a/backend/src/main/java/com/bakdata/conquery/sql/conversion/cqelement/aggregation/OverlapCte.java b/backend/src/main/java/com/bakdata/conquery/sql/conversion/cqelement/aggregation/OverlapCte.java index 35761f0ef1..2d4306fd3f 100644 --- a/backend/src/main/java/com/bakdata/conquery/sql/conversion/cqelement/aggregation/OverlapCte.java +++ b/backend/src/main/java/com/bakdata/conquery/sql/conversion/cqelement/aggregation/OverlapCte.java @@ -24,21 +24,19 @@ public OverlapCte(DateAggregationCteStep cteStep) { @Override protected QueryStep.QueryStepBuilder convertStep(DateAggregationContext context) { + SqlFunctionProvider functionProvider = context.getSqlDialect().getFunctionProvider(); + DateAggregationDates dateAggregationDates = context.getDateAggregationDates(); List> allStarts = dateAggregationDates.allStarts(); List> allEnds = dateAggregationDates.allEnds(); - ColumnDateRange overlapValidityDate = context.getSqlAggregationAction().getOverlapValidityDate( - context.getDateAggregationDates(), - context.getFunctionProvider() - ); + ColumnDateRange overlapValidityDate = context.getSqlAggregationAction().getOverlapValidityDate(context.getDateAggregationDates(), functionProvider); Selects overlapSelects = Selects.builder() - .primaryColumn(context.getPrimaryColumn()) + .ids(context.getIds()) .validityDate(Optional.of(overlapValidityDate)) .sqlSelects(context.getCarryThroughSelects()) .build(); - SqlFunctionProvider functionProvider = context.getFunctionProvider(); Condition startBeforeEnd = functionProvider.greatest(allStarts).lessThan(functionProvider.least(allEnds)); Condition allStartsNotNull = allStarts.stream() .map(Field::isNotNull) diff --git a/backend/src/main/java/com/bakdata/conquery/sql/conversion/cqelement/aggregation/PostgreSqlDateAggregator.java b/backend/src/main/java/com/bakdata/conquery/sql/conversion/cqelement/aggregation/PostgreSqlDateAggregator.java index d11e9eeb02..8386331d4f 100644 --- a/backend/src/main/java/com/bakdata/conquery/sql/conversion/cqelement/aggregation/PostgreSqlDateAggregator.java +++ b/backend/src/main/java/com/bakdata/conquery/sql/conversion/cqelement/aggregation/PostgreSqlDateAggregator.java @@ -5,10 +5,11 @@ import java.util.stream.Collectors; import com.bakdata.conquery.models.query.queryplan.DateAggregationAction; +import com.bakdata.conquery.sql.conversion.cqelement.ConversionContext; import com.bakdata.conquery.sql.conversion.dialect.SqlDateAggregator; import com.bakdata.conquery.sql.conversion.dialect.SqlFunctionProvider; import com.bakdata.conquery.sql.conversion.model.ColumnDateRange; -import com.bakdata.conquery.sql.conversion.model.NameGenerator; +import com.bakdata.conquery.sql.conversion.model.CteStep; import com.bakdata.conquery.sql.conversion.model.QualifyingUtil; import com.bakdata.conquery.sql.conversion.model.QueryStep; import com.bakdata.conquery.sql.conversion.model.Selects; @@ -22,7 +23,7 @@ public class PostgreSqlDateAggregator implements SqlDateAggregator { @Getter @RequiredArgsConstructor - private enum PostgresDateAggregationCteStep implements DateAggregationCteStep { + private enum PostgresDateAggregationCteStep implements CteStep { DATE_AGGREGATED("dates_aggregated"), DATES_INVERTED("dates_inverted"); @@ -42,20 +43,20 @@ public QueryStep apply( List carryThroughSelects, DateAggregationDates dateAggregationDates, DateAggregationAction dateAggregationAction, - NameGenerator nameGenerator + ConversionContext conversionContext ) { String joinedStepCteName = joinedStep.getCteName(); ColumnDateRange aggregatedValidityDate = getAggregatedValidityDate(dateAggregationDates, dateAggregationAction, joinedStepCteName); Selects dateAggregationSelects = Selects.builder() - .primaryColumn(joinedStep.getQualifiedSelects().getPrimaryColumn()) + .ids(joinedStep.getQualifiedSelects().getIds()) .validityDate(Optional.ofNullable(aggregatedValidityDate)) .sqlSelects(QualifyingUtil.qualify(carryThroughSelects, joinedStepCteName)) .build(); return QueryStep.builder() - .cteName(nameGenerator.cteStepName(PostgresDateAggregationCteStep.DATE_AGGREGATED, joinedStepCteName)) + .cteName(conversionContext.getNameGenerator().cteStepName(PostgresDateAggregationCteStep.DATE_AGGREGATED, joinedStepCteName)) .selects(dateAggregationSelects) .fromTable(QueryStep.toTableLike(joinedStepCteName)) .predecessors(List.of(joinedStep)) @@ -63,7 +64,7 @@ public QueryStep apply( } @Override - public QueryStep invertAggregatedIntervals(QueryStep baseStep, NameGenerator nameGenerator) { + public QueryStep invertAggregatedIntervals(QueryStep baseStep, ConversionContext conversionContext) { Selects baseStepSelects = baseStep.getQualifiedSelects(); Optional validityDate = baseStepSelects.getValidityDate(); @@ -90,7 +91,7 @@ public QueryStep invertAggregatedIntervals(QueryStep baseStep, NameGenerator nam ).as(PostgresDateAggregationCteStep.DATES_INVERTED.getSuffix()); return QueryStep.builder() - .cteName(nameGenerator.cteStepName(PostgresDateAggregationCteStep.DATES_INVERTED, baseStep.getCteName())) + .cteName(conversionContext.getNameGenerator().cteStepName(PostgresDateAggregationCteStep.DATES_INVERTED, baseStep.getCteName())) .selects(baseStepSelects.withValidityDate(ColumnDateRange.of(invertedValidityDate))) .fromTable(QueryStep.toTableLike(baseStep.getCteName())) .predecessors(List.of(baseStep)) diff --git a/backend/src/main/java/com/bakdata/conquery/sql/conversion/cqelement/aggregation/RowNumberCte.java b/backend/src/main/java/com/bakdata/conquery/sql/conversion/cqelement/aggregation/RowNumberCte.java index b3387fdae6..53492f0982 100644 --- a/backend/src/main/java/com/bakdata/conquery/sql/conversion/cqelement/aggregation/RowNumberCte.java +++ b/backend/src/main/java/com/bakdata/conquery/sql/conversion/cqelement/aggregation/RowNumberCte.java @@ -6,6 +6,7 @@ import com.bakdata.conquery.sql.conversion.model.ColumnDateRange; import com.bakdata.conquery.sql.conversion.model.QueryStep; import com.bakdata.conquery.sql.conversion.model.Selects; +import com.bakdata.conquery.sql.conversion.model.SqlIdColumns; import com.bakdata.conquery.sql.conversion.model.select.FieldWrapper; import com.bakdata.conquery.sql.conversion.model.select.SqlSelect; import lombok.Getter; @@ -28,17 +29,17 @@ public RowNumberCte(DateAggregationCteStep cteStep) { @Override protected QueryStep.QueryStepBuilder convertStep(DateAggregationContext context) { - Field primaryColumn = context.getPrimaryColumn(); + SqlIdColumns ids = context.getIds(); ColumnDateRange aggregatedValidityDate = context.getDateAggregationDates().getValidityDates().get(0); - Field rowNumber = DSL.rowNumber().over(DSL.partitionBy(primaryColumn).orderBy(aggregatedValidityDate.getStart())) + Field rowNumber = DSL.rowNumber().over(DSL.partitionBy(ids.toFields()).orderBy(aggregatedValidityDate.getStart())) .as(ROW_NUMBER_FIELD_NAME); ArrayList selects = new ArrayList<>(context.getCarryThroughSelects()); selects.add(new FieldWrapper<>(rowNumber)); Selects rowNumberSelects = Selects.builder() - .primaryColumn(primaryColumn) + .ids(ids) .validityDate(Optional.of(aggregatedValidityDate)) .sqlSelects(selects) .build(); diff --git a/backend/src/main/java/com/bakdata/conquery/sql/conversion/cqelement/concept/AggregationFilterCte.java b/backend/src/main/java/com/bakdata/conquery/sql/conversion/cqelement/concept/AggregationFilterCte.java index c3be63a466..5638183758 100644 --- a/backend/src/main/java/com/bakdata/conquery/sql/conversion/cqelement/concept/AggregationFilterCte.java +++ b/backend/src/main/java/com/bakdata/conquery/sql/conversion/cqelement/concept/AggregationFilterCte.java @@ -13,8 +13,8 @@ class AggregationFilterCte extends ConnectorCte { @Override - public ConnectorCteStep cteStep() { - return ConnectorCteStep.AGGREGATION_FILTER; + public ConceptCteStep cteStep() { + return ConceptCteStep.AGGREGATION_FILTER; } @Override @@ -44,7 +44,7 @@ private Selects getAggregationFilterSelects(CQTableContext tableContext) { .collect(Collectors.toList()); return Selects.builder() - .primaryColumn(previousSelects.getPrimaryColumn()) + .ids(previousSelects.getIds()) .validityDate(previousSelects.getValidityDate()) .sqlSelects(forAggregationFilterStep) .build(); diff --git a/backend/src/main/java/com/bakdata/conquery/sql/conversion/cqelement/concept/AggregationSelectCte.java b/backend/src/main/java/com/bakdata/conquery/sql/conversion/cqelement/concept/AggregationSelectCte.java index 33facee40c..769706d9ef 100644 --- a/backend/src/main/java/com/bakdata/conquery/sql/conversion/cqelement/concept/AggregationSelectCte.java +++ b/backend/src/main/java/com/bakdata/conquery/sql/conversion/cqelement/concept/AggregationSelectCte.java @@ -2,37 +2,36 @@ import java.util.List; -import com.bakdata.conquery.sql.conversion.model.QualifyingUtil; import com.bakdata.conquery.sql.conversion.model.QueryStep; import com.bakdata.conquery.sql.conversion.model.Selects; +import com.bakdata.conquery.sql.conversion.model.SqlIdColumns; import com.bakdata.conquery.sql.conversion.model.select.SqlSelect; -import org.jooq.Field; class AggregationSelectCte extends ConnectorCte { @Override public QueryStep.QueryStepBuilder convertStep(CQTableContext tableContext) { - String predecessor = tableContext.getConnectorTables().getPredecessor(ConnectorCteStep.AGGREGATION_SELECT); - Field primaryColumn = QualifyingUtil.qualify(tableContext.getPrimaryColumn(), predecessor); + String predecessor = tableContext.getConnectorTables().getPredecessor(ConceptCteStep.AGGREGATION_SELECT); + SqlIdColumns ids = tableContext.getIds().qualify(predecessor); List requiredInAggregationFilterStep = tableContext.allSqlSelects().stream() .flatMap(sqlSelects -> sqlSelects.getAggregationSelects().stream()) .toList(); Selects aggregationSelectSelects = Selects.builder() - .primaryColumn(primaryColumn) + .ids(ids) .sqlSelects(requiredInAggregationFilterStep) .build(); return QueryStep.builder() .selects(aggregationSelectSelects) - .groupBy(List.of(primaryColumn)); + .groupBy(ids.toFields()); } @Override - public ConnectorCteStep cteStep() { - return ConnectorCteStep.AGGREGATION_SELECT; + public ConceptCteStep cteStep() { + return ConceptCteStep.AGGREGATION_SELECT; } } diff --git a/backend/src/main/java/com/bakdata/conquery/sql/conversion/cqelement/concept/CQConceptConverter.java b/backend/src/main/java/com/bakdata/conquery/sql/conversion/cqelement/concept/CQConceptConverter.java index e9e09ab327..db000681be 100644 --- a/backend/src/main/java/com/bakdata/conquery/sql/conversion/cqelement/concept/CQConceptConverter.java +++ b/backend/src/main/java/com/bakdata/conquery/sql/conversion/cqelement/concept/CQConceptConverter.java @@ -9,24 +9,25 @@ import com.bakdata.conquery.apiv1.query.concept.filter.CQTable; import com.bakdata.conquery.apiv1.query.concept.specific.CQConcept; +import com.bakdata.conquery.models.datasets.Column; import com.bakdata.conquery.models.datasets.concepts.ConceptElement; import com.bakdata.conquery.models.datasets.concepts.Connector; import com.bakdata.conquery.models.datasets.concepts.tree.ConceptTreeChild; import com.bakdata.conquery.models.datasets.concepts.tree.ConceptTreeNode; import com.bakdata.conquery.models.query.queryplan.DateAggregationAction; import com.bakdata.conquery.sql.conversion.NodeConverter; +import com.bakdata.conquery.sql.conversion.SharedAliases; import com.bakdata.conquery.sql.conversion.cqelement.ConversionContext; import com.bakdata.conquery.sql.conversion.cqelement.intervalpacking.IntervalPackingContext; -import com.bakdata.conquery.sql.conversion.cqelement.intervalpacking.IntervalPackingCteStep; import com.bakdata.conquery.sql.conversion.dialect.SqlFunctionProvider; import com.bakdata.conquery.sql.conversion.model.ColumnDateRange; -import com.bakdata.conquery.sql.conversion.model.CteStep; +import com.bakdata.conquery.sql.conversion.model.ConceptConversionTables; import com.bakdata.conquery.sql.conversion.model.LogicalOperation; import com.bakdata.conquery.sql.conversion.model.NameGenerator; import com.bakdata.conquery.sql.conversion.model.QueryStep; import com.bakdata.conquery.sql.conversion.model.QueryStepJoiner; import com.bakdata.conquery.sql.conversion.model.Selects; -import com.bakdata.conquery.sql.conversion.model.SqlTables; +import com.bakdata.conquery.sql.conversion.model.SqlIdColumns; import com.bakdata.conquery.sql.conversion.model.filter.ConditionType; import com.bakdata.conquery.sql.conversion.model.filter.ConditionUtil; import com.bakdata.conquery.sql.conversion.model.filter.SqlFilters; @@ -36,21 +37,13 @@ import com.bakdata.conquery.sql.conversion.model.select.SelectContext; import com.bakdata.conquery.sql.conversion.model.select.SqlSelect; import com.bakdata.conquery.sql.conversion.model.select.SqlSelects; -import lombok.Getter; -import lombok.RequiredArgsConstructor; +import com.google.common.base.Preconditions; import org.jooq.Condition; import org.jooq.Field; import org.jooq.impl.DSL; public class CQConceptConverter implements NodeConverter { - @Getter - @RequiredArgsConstructor - private enum ConceptCteStep implements CteStep { - UNIVERSAL_SELECTS("universal_selects"); - private final String suffix; - } - private final List connectorCTEs; public CQConceptConverter() { @@ -71,24 +64,24 @@ public Class getConversionClass() { @Override public ConversionContext convert(CQConcept cqConcept, ConversionContext context) { - String label = context.getNameGenerator().conceptName(cqConcept); + TablePathGenerator pathGenerator = new TablePathGenerator(context); List convertedConnectorTables = cqConcept.getTables().stream() - .flatMap(cqTable -> convertCqTable(label, cqConcept, cqTable, context).stream()) + .flatMap(cqTable -> convertCqTable(pathGenerator, cqConcept, cqTable, context).stream()) .toList(); QueryStep lastConceptStep; if (convertedConnectorTables.size() == 1) { - lastConceptStep = finishConceptConversion(label, convertedConnectorTables.get(0), cqConcept, context); + lastConceptStep = finishConceptConversion(convertedConnectorTables.get(0), cqConcept, pathGenerator, context); } else { QueryStep joinedStep = QueryStepJoiner.joinSteps(convertedConnectorTables, LogicalOperation.OR, DateAggregationAction.MERGE, context); - lastConceptStep = finishConceptConversion(label, joinedStep, cqConcept, context); + lastConceptStep = finishConceptConversion(joinedStep, cqConcept, pathGenerator, context); } return context.withQueryStep(lastConceptStep); } - private Optional convertCqTable(String conceptLabel, CQConcept cqConcept, CQTable cqTable, ConversionContext context) { - CQTableContext tableContext = createTableContext(conceptLabel, cqConcept, cqTable, context); + private Optional convertCqTable(TablePathGenerator pathGenerator, CQConcept cqConcept, CQTable cqTable, ConversionContext context) { + CQTableContext tableContext = createTableContext(pathGenerator, cqConcept, cqTable, context); Optional lastQueryStep = Optional.empty(); for (ConnectorCte queryStep : connectorCTEs) { Optional convertedStep = queryStep.convert(tableContext, lastQueryStep); @@ -101,99 +94,122 @@ private Optional convertCqTable(String conceptLabel, CQConcept cqConc return lastQueryStep; } - private static QueryStep finishConceptConversion(String conceptLabel, QueryStep predecessor, CQConcept cqConcept, ConversionContext context) { + private static QueryStep finishConceptConversion(QueryStep predecessor, CQConcept cqConcept, TablePathGenerator pathGenerator, ConversionContext context) { + + ConceptConversionTables universalTables = pathGenerator.createUniversalTables(predecessor, cqConcept); Selects predecessorSelects = predecessor.getQualifiedSelects(); - SelectContext selectContext = SelectContext.forUniversalSelects(predecessorSelects.getPrimaryColumn(), predecessorSelects.getValidityDate(), context); - List universalSelects = cqConcept.getSelects().stream() - .map(select -> select.convertToSqlSelects(selectContext)) - .flatMap(sqlSelects -> sqlSelects.getFinalSelects().stream()) - .toList(); - - List allConceptSelects = Stream.of(universalSelects, predecessorSelects.getSqlSelects()) - .flatMap(List::stream) + SelectContext selectContext = new SelectContext(predecessorSelects.getIds(), predecessorSelects.getValidityDate(), universalTables, context); + List converted = cqConcept.getSelects().stream() + .map(select -> select.convertToSqlSelects(selectContext)) + .toList(); + + // combine all universal selects and connector selects from preceding step + List allConceptSelects = Stream.concat( + converted.stream().flatMap(sqlSelects -> sqlSelects.getFinalSelects().stream()), + predecessor.getQualifiedSelects().getSqlSelects().stream() + ) .toList(); - Selects finalSelects = predecessorSelects.toBuilder() - .clearSqlSelects() - .sqlSelects(allConceptSelects) - .build(); + Selects finalSelects = Selects.builder() + .ids(predecessorSelects.getIds()) + .validityDate(predecessorSelects.getValidityDate()) + .sqlSelects(allConceptSelects) + .build(); return QueryStep.builder() - .cteName(context.getNameGenerator().cteStepName(ConceptCteStep.UNIVERSAL_SELECTS, conceptLabel)) + .cteName(universalTables.cteName(ConceptCteStep.UNIVERSAL_SELECTS)) .selects(finalSelects) .fromTable(QueryStep.toTableLike(predecessor.getCteName())) .predecessors(List.of(predecessor)) .build(); } - private CQTableContext createTableContext(String conceptLabel, CQConcept cqConcept, CQTable cqTable, ConversionContext conversionContext) { + private CQTableContext createTableContext(TablePathGenerator pathGenerator, CQConcept cqConcept, CQTable cqTable, ConversionContext conversionContext) { NameGenerator nameGenerator = conversionContext.getNameGenerator(); SqlFunctionProvider functionProvider = conversionContext.getSqlDialect().getFunctionProvider(); Connector connector = cqTable.getConnector(); String conceptConnectorLabel = nameGenerator.conceptConnectorName(cqConcept, connector); - String tableName = connector.getTable().getName(); - Field primaryColumn = DSL.field(DSL.name(conversionContext.getConfig().getPrimaryColumn())); - Optional tablesValidityDate = convertValidityDate(cqTable, tableName, functionProvider); - SqlTables connectorTables = ConnectorCteStep.createTables(conceptConnectorLabel, tableName, nameGenerator); + SqlIdColumns ids = convertIds(cqConcept, cqTable, conversionContext); + Optional tablesValidityDate = convertValidityDate(cqTable, conceptConnectorLabel, conversionContext); + ConceptConversionTables connectorTables = pathGenerator.createConnectorTables(cqConcept, cqTable, conceptConnectorLabel); // validity date IntervalPackingContext intervalPackingContext = null; - if (intervalPackingRequired(tablesValidityDate, cqConcept)) { - String preprocessingCteName = connectorTables.getPredecessor(ConnectorCteStep.AGGREGATION_SELECT); - SqlTables intervalPackingTables = IntervalPackingCteStep.getTables(conceptConnectorLabel, preprocessingCteName, nameGenerator); + if (connectorTables.isWithIntervalPacking()) { intervalPackingContext = IntervalPackingContext.builder() .nodeLabel(conceptConnectorLabel) - .primaryColumn(primaryColumn) + .ids(ids) .validityDate(tablesValidityDate.get()) - .intervalPackingTables(intervalPackingTables) + .tables(connectorTables) .build(); } // convert filters List allSqlFiltersForTable = new ArrayList<>(); cqTable.getFilters().stream() - .map(filterValue -> filterValue.convertToSqlFilter(conversionContext, connectorTables)) + .map(filterValue -> filterValue.convertToSqlFilter(ids, conversionContext, connectorTables)) .forEach(allSqlFiltersForTable::add); collectConditionFilters(cqConcept.getElements(), cqTable, functionProvider).ifPresent(allSqlFiltersForTable::add); getDateRestriction(conversionContext, tablesValidityDate).ifPresent(allSqlFiltersForTable::add); // convert selects - SelectContext selectContext = SelectContext.forConnectorSelects(primaryColumn, tablesValidityDate, connectorTables, conversionContext); + SelectContext selectContext = new SelectContext(ids, tablesValidityDate, connectorTables, conversionContext); List allSelectsForTable = cqTable.getSelects().stream() .map(select -> select.convertToSqlSelects(selectContext)) .toList(); return CQTableContext.builder() - .conceptLabel(conceptLabel) - .conceptConnectorLabel(conceptConnectorLabel) - .primaryColumn(primaryColumn) + .ids(ids) .validityDate(tablesValidityDate) .sqlSelects(allSelectsForTable) .sqlFilters(allSqlFiltersForTable) .connectorTables(connectorTables) .intervalPackingContext(intervalPackingContext) - .parentContext(conversionContext) + .conversionContext(conversionContext) .build(); } - private static Optional convertValidityDate(CQTable cqTable, String label, SqlFunctionProvider functionProvider) { - if (Objects.isNull(cqTable.findValidityDate())) { - return Optional.empty(); + private static SqlIdColumns convertIds(CQConcept cqConcept, CQTable cqTable, ConversionContext conversionContext) { + + Field primaryColumn = DSL.field(DSL.name(conversionContext.getConfig().getPrimaryColumn())).as(SharedAliases.PRIMARY_COLUMN.getAlias()); + + if (cqConcept.isExcludeFromSecondaryId() + || conversionContext.getSecondaryIdDescription() == null + || !cqTable.hasSelectedSecondaryId(conversionContext.getSecondaryIdDescription()) + ) { + return new SqlIdColumns(primaryColumn); } - ColumnDateRange validityDate = functionProvider.daterange( - cqTable.findValidityDate(), - cqTable.getConnector().getTable().getName(), - label + + Column secondaryIdColumn = cqTable.getConnector().getTable().findSecondaryIdColumn(conversionContext.getSecondaryIdDescription()); + + Preconditions.checkArgument( + secondaryIdColumn != null, + "Expecting Table %s to have a matching secondary id for %s".formatted( + cqTable.getConnector().getTable(), + conversionContext.getSecondaryIdDescription() + ) ); - return Optional.of(validityDate); + + Field secondaryId = DSL.field(DSL.name(secondaryIdColumn.getName())).as(SharedAliases.SECONDARY_ID.getAlias()); + return new SqlIdColumns(primaryColumn, secondaryId); } - private static boolean intervalPackingRequired(Optional validityDate, CQConcept cqConcept) { - return validityDate.isPresent() && !cqConcept.isExcludeFromTimeAggregation(); + private static Optional convertValidityDate(CQTable cqTable, String connectorLabel, ConversionContext context) { + if (Objects.isNull(cqTable.findValidityDate())) { + return Optional.empty(); + } + ColumnDateRange validityDate; + if (context.getDateRestrictionRange() != null) { + validityDate = context.getSqlDialect().getFunctionProvider().forTablesValidityDate(cqTable, context.getDateRestrictionRange(), connectorLabel); + } + else { + validityDate = context.getSqlDialect().getFunctionProvider().forTablesValidityDate(cqTable, connectorLabel); + } + return Optional.of(validityDate); } private static boolean dateRestrictionApplicable(boolean dateRestrictionRequired, Optional validityDateSelect) { @@ -250,7 +266,7 @@ private static Optional getDateRestriction(ConversionContext context } SqlFunctionProvider functionProvider = context.getSqlDialect().getFunctionProvider(); - ColumnDateRange dateRestriction = functionProvider.daterange(context.getDateRestrictionRange()) + ColumnDateRange dateRestriction = functionProvider.forDateRestriction(context.getDateRestrictionRange()) .asDateRestrictionRange(); List dateRestrictionSelects = dateRestriction.toFields().stream() diff --git a/backend/src/main/java/com/bakdata/conquery/sql/conversion/cqelement/concept/CQTableContext.java b/backend/src/main/java/com/bakdata/conquery/sql/conversion/cqelement/concept/CQTableContext.java index 9dfc5a164b..2c7bc62d88 100644 --- a/backend/src/main/java/com/bakdata/conquery/sql/conversion/cqelement/concept/CQTableContext.java +++ b/backend/src/main/java/com/bakdata/conquery/sql/conversion/cqelement/concept/CQTableContext.java @@ -10,15 +10,15 @@ import com.bakdata.conquery.sql.conversion.cqelement.ConversionContext; import com.bakdata.conquery.sql.conversion.cqelement.intervalpacking.IntervalPackingContext; import com.bakdata.conquery.sql.conversion.model.ColumnDateRange; -import com.bakdata.conquery.sql.conversion.model.NameGenerator; import com.bakdata.conquery.sql.conversion.model.QueryStep; +import com.bakdata.conquery.sql.conversion.model.SqlIdColumns; import com.bakdata.conquery.sql.conversion.model.SqlTables; +import com.bakdata.conquery.sql.conversion.model.SqlIdColumns; import com.bakdata.conquery.sql.conversion.model.filter.SqlFilters; import com.bakdata.conquery.sql.conversion.model.select.SqlSelects; import lombok.Builder; import lombok.Value; import lombok.With; -import org.jooq.Field; @Value @Builder @@ -26,13 +26,13 @@ class CQTableContext implements Context { String conceptLabel; String conceptConnectorLabel; - Field primaryColumn; + SqlIdColumns ids; Optional validityDate; List sqlSelects; List sqlFilters; SqlTables connectorTables; IntervalPackingContext intervalPackingContext; - ConversionContext parentContext; + ConversionContext conversionContext; @With QueryStep previous; @@ -43,19 +43,15 @@ public List allSqlSelects() { return Stream.concat(sqlSelects.stream(), sqlFilters.stream().map(SqlFilters::getSelects)).toList(); } - public Field getPrimaryColumn() { + public SqlIdColumns getIds() { if (previous == null) { - return this.primaryColumn; + return ids; } - return previous.getSelects().getPrimaryColumn(); + return previous.getQualifiedSelects().getIds(); } public Optional getIntervalPackingContext() { return Optional.ofNullable(intervalPackingContext); } - @Override - public NameGenerator getNameGenerator() { - return parentContext.getNameGenerator(); - } } diff --git a/backend/src/main/java/com/bakdata/conquery/sql/conversion/cqelement/concept/ConceptCteStep.java b/backend/src/main/java/com/bakdata/conquery/sql/conversion/cqelement/concept/ConceptCteStep.java new file mode 100644 index 0000000000..ba8bc34caa --- /dev/null +++ b/backend/src/main/java/com/bakdata/conquery/sql/conversion/cqelement/concept/ConceptCteStep.java @@ -0,0 +1,39 @@ + +package com.bakdata.conquery.sql.conversion.cqelement.concept; + +import java.util.Set; + +import com.bakdata.conquery.sql.conversion.model.CteStep; +import lombok.AllArgsConstructor; +import lombok.Getter; + +@Getter +@AllArgsConstructor +public enum ConceptCteStep implements CteStep { + + // connector + PREPROCESSING("preprocessing", null), + EVENT_FILTER("event_filter", PREPROCESSING), + AGGREGATION_SELECT("group_select", EVENT_FILTER), + JOIN_BRANCHES("join_branches", AGGREGATION_SELECT), + AGGREGATION_FILTER("group_filter", JOIN_BRANCHES), + + // interval packing selects + UNNEST_DATE("unnested", null), + INTERVAL_PACKING_SELECTS("validity_date_selects", null), + + // universal selects / final step + UNIVERSAL_SELECTS("universal_selects", null); + + public static final Set MANDATORY_STEPS = Set.of( + PREPROCESSING, + EVENT_FILTER, + AGGREGATION_SELECT, + JOIN_BRANCHES, + AGGREGATION_FILTER + ); + + private final String suffix; + private final CteStep predecessor; + +} diff --git a/backend/src/main/java/com/bakdata/conquery/sql/conversion/cqelement/concept/ConnectorCte.java b/backend/src/main/java/com/bakdata/conquery/sql/conversion/cqelement/concept/ConnectorCte.java index 4f49039243..d64d8da2f9 100644 --- a/backend/src/main/java/com/bakdata/conquery/sql/conversion/cqelement/concept/ConnectorCte.java +++ b/backend/src/main/java/com/bakdata/conquery/sql/conversion/cqelement/concept/ConnectorCte.java @@ -31,9 +31,9 @@ else if (queryStepBuilder.build().getFromTable() == null && queryStepBuilder.bui } /** - * @return The {@link ConnectorCteStep} this instance belongs to. + * @return The {@link ConceptCteStep} this instance belongs to. */ - protected abstract ConnectorCteStep cteStep(); + protected abstract ConceptCteStep cteStep(); protected abstract QueryStep.QueryStepBuilder convertStep(CQTableContext tableContext); diff --git a/backend/src/main/java/com/bakdata/conquery/sql/conversion/cqelement/concept/ConnectorCteStep.java b/backend/src/main/java/com/bakdata/conquery/sql/conversion/cqelement/concept/ConnectorCteStep.java deleted file mode 100644 index cb15c7edbd..0000000000 --- a/backend/src/main/java/com/bakdata/conquery/sql/conversion/cqelement/concept/ConnectorCteStep.java +++ /dev/null @@ -1,30 +0,0 @@ -package com.bakdata.conquery.sql.conversion.cqelement.concept; - -import java.util.Set; - -import com.bakdata.conquery.sql.conversion.model.CteStep; -import com.bakdata.conquery.sql.conversion.model.NameGenerator; -import com.bakdata.conquery.sql.conversion.model.SqlTables; -import lombok.Getter; -import lombok.RequiredArgsConstructor; - -@Getter -@RequiredArgsConstructor -public enum ConnectorCteStep implements CteStep { - - PREPROCESSING("preprocessing", null), - EVENT_FILTER("event_filter", PREPROCESSING), - AGGREGATION_SELECT("group_select", EVENT_FILTER), - JOIN_BRANCHES("join_branches", AGGREGATION_SELECT), - AGGREGATION_FILTER("group_filter", JOIN_BRANCHES); - - private static final Set STEPS = Set.of(values()); - - private final String suffix; - private final ConnectorCteStep predecessor; - - public static SqlTables createTables(String conceptConnectorLabel, String rootTable, NameGenerator nameGenerator) { - return new SqlTables(conceptConnectorLabel, STEPS, rootTable, nameGenerator); - } - -} diff --git a/backend/src/main/java/com/bakdata/conquery/sql/conversion/cqelement/concept/EventFilterCte.java b/backend/src/main/java/com/bakdata/conquery/sql/conversion/cqelement/concept/EventFilterCte.java index 703933d196..7e15d3e0da 100644 --- a/backend/src/main/java/com/bakdata/conquery/sql/conversion/cqelement/concept/EventFilterCte.java +++ b/backend/src/main/java/com/bakdata/conquery/sql/conversion/cqelement/concept/EventFilterCte.java @@ -5,9 +5,9 @@ import java.util.stream.Stream; import com.bakdata.conquery.sql.conversion.model.ColumnDateRange; -import com.bakdata.conquery.sql.conversion.model.QualifyingUtil; import com.bakdata.conquery.sql.conversion.model.QueryStep; import com.bakdata.conquery.sql.conversion.model.Selects; +import com.bakdata.conquery.sql.conversion.model.SqlIdColumns; import com.bakdata.conquery.sql.conversion.model.aggregator.SumDistinctSqlAggregator; import com.bakdata.conquery.sql.conversion.model.filter.WhereCondition; import com.bakdata.conquery.sql.conversion.model.select.ExtractingSqlSelect; @@ -15,7 +15,6 @@ import com.bakdata.conquery.sql.conversion.model.select.SqlSelect; import com.bakdata.conquery.sql.conversion.model.select.SqlSelects; import org.jooq.Condition; -import org.jooq.Field; class EventFilterCte extends ConnectorCte { @@ -32,14 +31,13 @@ public QueryStep.QueryStepBuilder convertStep(CQTableContext tableContext) { } @Override - public ConnectorCteStep cteStep() { - return ConnectorCteStep.EVENT_FILTER; + public ConceptCteStep cteStep() { + return ConceptCteStep.EVENT_FILTER; } private Selects getEventFilterSelects(CQTableContext tableContext) { String predecessorTableName = tableContext.getConnectorTables().getPredecessor(cteStep()); - - Field primaryColumn = QualifyingUtil.qualify(tableContext.getPrimaryColumn(), predecessorTableName); + SqlIdColumns ids = tableContext.getIds().qualify(predecessorTableName); Optional validityDate = tableContext.getValidityDate(); if (validityDate.isPresent()) { @@ -53,14 +51,14 @@ private Selects getEventFilterSelects(CQTableContext tableContext) { .toList(); return Selects.builder() - .primaryColumn(primaryColumn) + .ids(ids) .validityDate(validityDate) .sqlSelects(eventFilterSelects) .build(); } /** - * Collects the columns required in {@link ConnectorCteStep#AGGREGATION_SELECT}, but also columns additional tables require (like the ones created by the + * Collects the columns required in {@link ConceptCteStep#AGGREGATION_SELECT}, but also columns additional tables require (like the ones created by the * {@link SumDistinctSqlAggregator}). An additional predecessor can contain an N-ary tree of predecessors itself (like all {@link QueryStep}s), so we want to * look for the deepest predeceasing QueryStep leafs and collect their {@link SqlSelects}, because they expect this CTE to contain all their * {@link SqlSelect#requiredColumns()}. diff --git a/backend/src/main/java/com/bakdata/conquery/sql/conversion/cqelement/concept/FilterContext.java b/backend/src/main/java/com/bakdata/conquery/sql/conversion/cqelement/concept/FilterContext.java index ca41d70864..d708b3089f 100644 --- a/backend/src/main/java/com/bakdata/conquery/sql/conversion/cqelement/concept/FilterContext.java +++ b/backend/src/main/java/com/bakdata/conquery/sql/conversion/cqelement/concept/FilterContext.java @@ -3,7 +3,7 @@ import com.bakdata.conquery.apiv1.query.concept.filter.FilterValue; import com.bakdata.conquery.sql.conversion.Context; import com.bakdata.conquery.sql.conversion.cqelement.ConversionContext; -import com.bakdata.conquery.sql.conversion.model.NameGenerator; +import com.bakdata.conquery.sql.conversion.model.SqlIdColumns; import com.bakdata.conquery.sql.conversion.model.SqlTables; import lombok.Value; @@ -13,13 +13,9 @@ public class FilterContext implements Context { /** * A filter value ({@link FilterValue#getValue()}) */ + SqlIdColumns ids; V value; - ConversionContext parentContext; - SqlTables connectorTables; - - @Override - public NameGenerator getNameGenerator() { - return this.parentContext.getNameGenerator(); - } + ConversionContext conversionContext; + SqlTables tables; } diff --git a/backend/src/main/java/com/bakdata/conquery/sql/conversion/cqelement/concept/JoinBranchesCte.java b/backend/src/main/java/com/bakdata/conquery/sql/conversion/cqelement/concept/JoinBranchesCte.java index df8b175157..53babb57f1 100644 --- a/backend/src/main/java/com/bakdata/conquery/sql/conversion/cqelement/concept/JoinBranchesCte.java +++ b/backend/src/main/java/com/bakdata/conquery/sql/conversion/cqelement/concept/JoinBranchesCte.java @@ -10,14 +10,14 @@ import com.bakdata.conquery.sql.conversion.model.QueryStep; import com.bakdata.conquery.sql.conversion.model.QueryStepJoiner; import com.bakdata.conquery.sql.conversion.model.Selects; +import com.bakdata.conquery.sql.conversion.model.SqlIdColumns; import com.bakdata.conquery.sql.conversion.model.aggregator.SumDistinctSqlAggregator; import com.bakdata.conquery.sql.conversion.model.select.SqlSelect; -import org.jooq.Field; import org.jooq.Record; import org.jooq.TableLike; /** - * Joins the {@link ConnectorCteStep#AGGREGATION_SELECT} with the interval packing branch for the aggregated validity date and optional additional predecessors. + * Joins the {@link ConceptCteStep#AGGREGATION_SELECT} with the interval packing branch for the aggregated validity date and optional additional predecessors. *

* Joining is optional - if a validity date is not present, the node is excluded from time aggregation or if there is no additional predecessor, no join will * take place. See {@link SumDistinctSqlAggregator} for an example of additional predecessors. @@ -41,8 +41,8 @@ class JoinBranchesCte extends ConnectorCte { @Override - protected ConnectorCteStep cteStep() { - return ConnectorCteStep.JOIN_BRANCHES; + protected ConceptCteStep cteStep() { + return ConceptCteStep.JOIN_BRANCHES; } @Override @@ -56,7 +56,7 @@ protected QueryStep.QueryStepBuilder convertStep(CQTableContext tableContext) { validityDate = Optional.empty(); } else { - IntervalPacker intervalPacker = tableContext.getParentContext().getSqlDialect().getIntervalPacker(); + IntervalPacker intervalPacker = tableContext.getConversionContext().getSqlDialect().getIntervalPacker(); QueryStep lastIntervalPackingStep = intervalPacker.createIntervalPackingSteps(tableContext.getIntervalPackingContext().get()); queriesToJoin.add(lastIntervalPackingStep); validityDate = lastIntervalPackingStep.getQualifiedSelects().getValidityDate(); @@ -66,15 +66,15 @@ protected QueryStep.QueryStepBuilder convertStep(CQTableContext tableContext) { .flatMap(sqlSelects -> sqlSelects.getAdditionalPredecessor().stream()) .forEach(queriesToJoin::add); - Field primaryColumn = QueryStepJoiner.coalescePrimaryColumns(queriesToJoin); + SqlIdColumns ids = QueryStepJoiner.coalesceIds(queriesToJoin); List mergedSqlSelects = QueryStepJoiner.mergeSelects(queriesToJoin); Selects selects = Selects.builder() - .primaryColumn(primaryColumn) + .ids(ids) .validityDate(validityDate) .sqlSelects(mergedSqlSelects) .build(); - TableLike fromTable = QueryStepJoiner.constructJoinedTable(queriesToJoin, LogicalOperation.AND, tableContext.getParentContext()); + TableLike fromTable = QueryStepJoiner.constructJoinedTable(queriesToJoin, LogicalOperation.AND, tableContext.getConversionContext()); return QueryStep.builder() .selects(selects) diff --git a/backend/src/main/java/com/bakdata/conquery/sql/conversion/cqelement/concept/PreprocessingCte.java b/backend/src/main/java/com/bakdata/conquery/sql/conversion/cqelement/concept/PreprocessingCte.java index 36e1b35dfc..befd8b8e07 100644 --- a/backend/src/main/java/com/bakdata/conquery/sql/conversion/cqelement/concept/PreprocessingCte.java +++ b/backend/src/main/java/com/bakdata/conquery/sql/conversion/cqelement/concept/PreprocessingCte.java @@ -2,13 +2,11 @@ import java.util.List; -import com.bakdata.conquery.sql.conversion.SharedAliases; import com.bakdata.conquery.sql.conversion.model.QueryStep; import com.bakdata.conquery.sql.conversion.model.Selects; import com.bakdata.conquery.sql.conversion.model.filter.WhereCondition; import com.bakdata.conquery.sql.conversion.model.select.SqlSelect; import org.jooq.Condition; -import org.jooq.Field; class PreprocessingCte extends ConnectorCte { @@ -18,11 +16,8 @@ public QueryStep.QueryStepBuilder convertStep(CQTableContext tableContext) { .flatMap(sqlSelects -> sqlSelects.getPreprocessingSelects().stream()) .toList(); - // we alias the primary column, so we can rely upon in other places that it has a specific name - Field aliasesPrimaryColumn = tableContext.getPrimaryColumn().as(SharedAliases.PRIMARY_COLUMN.getAlias()); - Selects preprocessingSelects = Selects.builder() - .primaryColumn(aliasesPrimaryColumn) + .ids(tableContext.getIds()) .validityDate(tableContext.getValidityDate()) .sqlSelects(forPreprocessing) .build(); @@ -36,12 +31,12 @@ public QueryStep.QueryStepBuilder convertStep(CQTableContext tableContext) { return QueryStep.builder() .selects(preprocessingSelects) .conditions(conditions) - .fromTable(QueryStep.toTableLike(tableContext.getConnectorTables().getPredecessor(ConnectorCteStep.PREPROCESSING))); + .fromTable(QueryStep.toTableLike(tableContext.getConnectorTables().getPredecessor(ConceptCteStep.PREPROCESSING))); } @Override - public ConnectorCteStep cteStep() { - return ConnectorCteStep.PREPROCESSING; + public ConceptCteStep cteStep() { + return ConceptCteStep.PREPROCESSING; } } diff --git a/backend/src/main/java/com/bakdata/conquery/sql/conversion/cqelement/concept/SelectFilterUtil.java b/backend/src/main/java/com/bakdata/conquery/sql/conversion/cqelement/concept/SelectFilterUtil.java index c4127e7874..8b2862bc06 100644 --- a/backend/src/main/java/com/bakdata/conquery/sql/conversion/cqelement/concept/SelectFilterUtil.java +++ b/backend/src/main/java/com/bakdata/conquery/sql/conversion/cqelement/concept/SelectFilterUtil.java @@ -1,10 +1,10 @@ package com.bakdata.conquery.sql.conversion.cqelement.concept; import com.bakdata.conquery.models.datasets.concepts.filters.specific.SelectFilter; -import com.bakdata.conquery.sql.conversion.model.filter.WhereCondition; -import com.bakdata.conquery.sql.conversion.model.filter.WhereClauses; import com.bakdata.conquery.sql.conversion.model.filter.MultiSelectCondition; import com.bakdata.conquery.sql.conversion.model.filter.SqlFilters; +import com.bakdata.conquery.sql.conversion.model.filter.WhereClauses; +import com.bakdata.conquery.sql.conversion.model.filter.WhereCondition; import com.bakdata.conquery.sql.conversion.model.select.ExtractingSqlSelect; import com.bakdata.conquery.sql.conversion.model.select.SqlSelects; @@ -12,15 +12,15 @@ public class SelectFilterUtil { public static SqlFilters convert(SelectFilter selectFilter, FilterContext context, String[] values) { ExtractingSqlSelect rootSelect = new ExtractingSqlSelect<>( - context.getConnectorTables().getPredecessor(ConnectorCteStep.PREPROCESSING), + context.getTables().getPredecessor(ConceptCteStep.PREPROCESSING), selectFilter.getColumn().getName(), String.class ); WhereCondition condition = new MultiSelectCondition( - context.getConnectorTables().qualifyOnPredecessor(ConnectorCteStep.EVENT_FILTER, rootSelect.aliased()), + rootSelect.qualify(context.getTables().getPredecessor(ConceptCteStep.EVENT_FILTER)).select(), values, - context.getParentContext().getSqlDialect().getFunctionProvider() + context.getConversionContext().getSqlDialect().getFunctionProvider() ); return new SqlFilters( diff --git a/backend/src/main/java/com/bakdata/conquery/sql/conversion/cqelement/concept/TablePathGenerator.java b/backend/src/main/java/com/bakdata/conquery/sql/conversion/cqelement/concept/TablePathGenerator.java new file mode 100644 index 0000000000..ea15999af2 --- /dev/null +++ b/backend/src/main/java/com/bakdata/conquery/sql/conversion/cqelement/concept/TablePathGenerator.java @@ -0,0 +1,137 @@ +package com.bakdata.conquery.sql.conversion.cqelement.concept; + +import static com.bakdata.conquery.sql.conversion.cqelement.concept.ConceptCteStep.EVENT_FILTER; +import static com.bakdata.conquery.sql.conversion.cqelement.concept.ConceptCteStep.JOIN_BRANCHES; +import static com.bakdata.conquery.sql.conversion.cqelement.concept.ConceptCteStep.MANDATORY_STEPS; +import static com.bakdata.conquery.sql.conversion.cqelement.concept.ConceptCteStep.UNIVERSAL_SELECTS; + +import java.util.HashMap; +import java.util.Map; +import java.util.Set; + +import com.bakdata.conquery.apiv1.query.concept.filter.CQTable; +import com.bakdata.conquery.apiv1.query.concept.specific.CQConcept; +import com.bakdata.conquery.sql.conversion.cqelement.ConversionContext; +import com.bakdata.conquery.sql.conversion.cqelement.intervalpacking.IntervalPackingCteStep; +import com.bakdata.conquery.sql.conversion.dialect.SqlDialect; +import com.bakdata.conquery.sql.conversion.model.ConceptConversionTables; +import com.bakdata.conquery.sql.conversion.model.CteStep; +import com.bakdata.conquery.sql.conversion.model.NameGenerator; +import com.bakdata.conquery.sql.conversion.model.QueryStep; +import com.bakdata.conquery.sql.conversion.model.Selects; +import lombok.Data; +import lombok.Value; + +@Value +class TablePathGenerator { + + SqlDialect sqlDialect; + NameGenerator nameGenerator; + + public TablePathGenerator(ConversionContext context) { + this.sqlDialect = context.getSqlDialect(); + this.nameGenerator = context.getNameGenerator(); + } + + public ConceptConversionTables createConnectorTables(CQConcept cqConcept, CQTable cqTable, String label) { + TablePathInfo tableInfo = collectConnectorTables(cqConcept, cqTable); + return create(tableInfo, label); + } + + public ConceptConversionTables createUniversalTables(QueryStep predecessor, CQConcept cqConcept) { + TablePathInfo tableInfo = collectConceptTables(predecessor, cqConcept); + String conceptName = nameGenerator.conceptName(cqConcept); + return create(tableInfo, conceptName); + } + + private ConceptConversionTables create(TablePathInfo tableInfo, String label) { + + Map cteNameMap = CteStep.createCteNameMap(tableInfo.getMappings().keySet(), label, nameGenerator); + String lastPredecessorName = tableInfo.getLastPredecessor() != null + ? cteNameMap.get(tableInfo.getLastPredecessor()) + : tableInfo.getRootTable(); + + return new ConceptConversionTables( + tableInfo.getRootTable(), + cteNameMap, + tableInfo.getMappings(), + lastPredecessorName, + tableInfo.isContainsIntervalPacking() + ); + } + + private TablePathInfo collectConnectorTables(CQConcept cqConcept, CQTable cqTable) { + + TablePathInfo tableInfo = new TablePathInfo(); + tableInfo.setRootTable(cqTable.getConnector().getTable().getName()); + tableInfo.addRequiredSteps(MANDATORY_STEPS); + tableInfo.setLastPredecessor(JOIN_BRANCHES); + + // no validity date aggregation possible + if (cqTable.findValidityDate() == null || cqConcept.isExcludeFromTimeAggregation()) { + return tableInfo; + } + + // interval packing required + tableInfo.setContainsIntervalPacking(true); + tableInfo.addMappings(IntervalPackingCteStep.getMappings(EVENT_FILTER, sqlDialect)); + + // TODO handle event date selects + + return tableInfo; + } + + private TablePathInfo collectConceptTables(QueryStep predecessor, CQConcept cqConcept) { + + TablePathInfo tableInfo = new TablePathInfo(); + tableInfo.setRootTable(predecessor.getCteName()); // last table of a single connector or merged and aggregated table of multiple connectors + tableInfo.addRequiredStep(UNIVERSAL_SELECTS); + + // TODO handle event date selects + + return tableInfo; + } + + @Data + private static class TablePathInfo { + + /** + * Mapping of a CTE step to their respective preceding CTE step. + */ + private final Map mappings; + + /** + * The root table is the predecessor of all CteSteps from {@link TablePathInfo#mappings} which have a null-predecessor. + */ + private String rootTable; + + /** + * When converting {@link Selects}, we need to qualify the final references onto the predecessor of the last CTE that is part of the conversion. + * It varies depending on the given {@link CQConcept}, thus we need to set it explicitly. + */ + private CteStep lastPredecessor; + + /** + * True if this path info contains CTEs from {@link IntervalPackingCteStep}. + */ + private boolean containsIntervalPacking; + + public TablePathInfo() { + this.mappings = new HashMap<>(); + } + + public void addMappings(Map mappings) { + this.mappings.putAll(mappings); + } + + public void addRequiredSteps(Set steps) { + this.mappings.putAll(CteStep.getDefaultPredecessorMap(steps)); + } + + public void addRequiredStep(CteStep step) { + this.mappings.put(step, step.getPredecessor()); + } + + } + +} diff --git a/backend/src/main/java/com/bakdata/conquery/sql/conversion/cqelement/intervalpacking/AnsiSqlIntervalPacker.java b/backend/src/main/java/com/bakdata/conquery/sql/conversion/cqelement/intervalpacking/AnsiSqlIntervalPacker.java index 2db795a310..7cdc3c586f 100644 --- a/backend/src/main/java/com/bakdata/conquery/sql/conversion/cqelement/intervalpacking/AnsiSqlIntervalPacker.java +++ b/backend/src/main/java/com/bakdata/conquery/sql/conversion/cqelement/intervalpacking/AnsiSqlIntervalPacker.java @@ -11,6 +11,7 @@ import com.bakdata.conquery.sql.conversion.model.QualifyingUtil; import com.bakdata.conquery.sql.conversion.model.QueryStep; import com.bakdata.conquery.sql.conversion.model.Selects; +import com.bakdata.conquery.sql.conversion.model.SqlIdColumns; import com.bakdata.conquery.sql.conversion.model.select.FieldWrapper; import com.bakdata.conquery.sql.conversion.model.select.SqlSelect; import lombok.RequiredArgsConstructor; @@ -29,12 +30,12 @@ public QueryStep createIntervalPackingSteps(IntervalPackingContext context) { private QueryStep createPreviousEndStep(IntervalPackingContext context) { - String sourceTableName = context.getIntervalPackingTables().getRootTable(); - Field primaryColumn = QualifyingUtil.qualify(context.getPrimaryColumn(), sourceTableName); + String sourceTableName = context.getTables().getPredecessor(IntervalPackingCteStep.PREVIOUS_END); + SqlIdColumns ids = context.getIds().qualify(sourceTableName); ColumnDateRange validityDate = context.getValidityDate().qualify(sourceTableName); Field previousEnd = DSL.max(validityDate.getEnd()) - .over(DSL.partitionBy(primaryColumn) + .over(DSL.partitionBy(ids.toFields()) .orderBy(validityDate.getStart(), validityDate.getEnd()) .rowsBetweenUnboundedPreceding() .andPreceding(1)) @@ -44,13 +45,13 @@ private QueryStep createPreviousEndStep(IntervalPackingContext context) { qualifiedSelects.add(new FieldWrapper<>(previousEnd)); Selects previousEndSelects = Selects.builder() - .primaryColumn(primaryColumn) + .ids(ids) .validityDate(Optional.of(validityDate)) .sqlSelects(qualifiedSelects) .build(); return QueryStep.builder() - .cteName(context.getIntervalPackingTables().cteName(IntervalPackingCteStep.PREVIOUS_END)) + .cteName(context.getTables().cteName(IntervalPackingCteStep.PREVIOUS_END)) .selects(previousEndSelects) .fromTable(QueryStep.toTableLike(sourceTableName)) .predecessors(Optional.ofNullable(context.getPredecessor()).stream().toList()) @@ -61,7 +62,7 @@ private QueryStep createRangeIndexStep(QueryStep previousEndStep, IntervalPackin String previousEndCteName = previousEndStep.getCteName(); Selects previousEndSelects = previousEndStep.getQualifiedSelects(); - Field primaryColumn = previousEndSelects.getPrimaryColumn(); + SqlIdColumns ids = previousEndSelects.getIds(); ColumnDateRange validityDate = previousEndSelects.getValidityDate().get(); Field previousEnd = DSL.field(DSL.name(previousEndCteName, IntervalPacker.PREVIOUS_END_FIELD_NAME), Date.class); @@ -69,7 +70,7 @@ private QueryStep createRangeIndexStep(QueryStep previousEndStep, IntervalPackin DSL.sum( DSL.when(validityDate.getStart().greaterThan(previousEnd), DSL.val(1)) .otherwise(DSL.inline(null, Integer.class))) - .over(DSL.partitionBy(primaryColumn) + .over(DSL.partitionBy(ids.toFields()) .orderBy(validityDate.getStart(), validityDate.getEnd()) .rowsUnboundedPreceding()) .as(IntervalPacker.RANGE_INDEX_FIELD_NAME); @@ -78,13 +79,13 @@ private QueryStep createRangeIndexStep(QueryStep previousEndStep, IntervalPackin qualifiedSelects.add(new FieldWrapper<>(rangeIndex)); Selects rangeIndexSelects = Selects.builder() - .primaryColumn(primaryColumn) + .ids(ids) .validityDate(Optional.of(validityDate)) .sqlSelects(qualifiedSelects) .build(); return QueryStep.builder() - .cteName(context.getIntervalPackingTables().cteName(IntervalPackingCteStep.RANGE_INDEX)) + .cteName(context.getTables().cteName(IntervalPackingCteStep.RANGE_INDEX)) .selects(rangeIndexSelects) .fromTable(QueryStep.toTableLike(previousEndCteName)) .predecessors(List.of(previousEndStep)) @@ -95,7 +96,7 @@ private QueryStep createIntervalCompleteStep(QueryStep rangeIndexStep, IntervalP String rangeIndexCteName = rangeIndexStep.getCteName(); Selects rangeIndexSelects = rangeIndexStep.getQualifiedSelects(); - Field primaryColumn = rangeIndexSelects.getPrimaryColumn(); + SqlIdColumns ids = rangeIndexSelects.getIds(); ColumnDateRange validityDate = rangeIndexSelects.getValidityDate().get(); Field rangeStart = DSL.min(validityDate.getStart()).as(IntervalPacker.RANGE_START_MIN_FIELD_NAME); @@ -104,19 +105,19 @@ private QueryStep createIntervalCompleteStep(QueryStep rangeIndexStep, IntervalP List qualifiedSelects = QualifyingUtil.qualify(context.getCarryThroughSelects(), rangeIndexCteName); Selects intervalCompleteSelects = Selects.builder() - .primaryColumn(primaryColumn) + .ids(ids) .validityDate(Optional.of(ColumnDateRange.of(rangeStart, rangeEnd))) .sqlSelects(qualifiedSelects) .build(); // we group range start and end by range index List> groupBySelects = new ArrayList<>(); - groupBySelects.add(primaryColumn); + groupBySelects.addAll(ids.toFields()); groupBySelects.add(rangeIndex); qualifiedSelects.stream().map(SqlSelect::select).forEach(groupBySelects::add); return QueryStep.builder() - .cteName(context.getIntervalPackingTables().cteName(IntervalPackingCteStep.INTERVAL_COMPLETE)) + .cteName(context.getTables().cteName(IntervalPackingCteStep.INTERVAL_COMPLETE)) .selects(intervalCompleteSelects) .fromTable(QueryStep.toTableLike(rangeIndexCteName)) .predecessors(List.of(rangeIndexStep)) diff --git a/backend/src/main/java/com/bakdata/conquery/sql/conversion/cqelement/intervalpacking/IntervalPackingContext.java b/backend/src/main/java/com/bakdata/conquery/sql/conversion/cqelement/intervalpacking/IntervalPackingContext.java index 22ef19e1f7..2660539bd4 100644 --- a/backend/src/main/java/com/bakdata/conquery/sql/conversion/cqelement/intervalpacking/IntervalPackingContext.java +++ b/backend/src/main/java/com/bakdata/conquery/sql/conversion/cqelement/intervalpacking/IntervalPackingContext.java @@ -6,14 +6,14 @@ import javax.annotation.CheckForNull; import com.bakdata.conquery.sql.conversion.Context; +import com.bakdata.conquery.sql.conversion.cqelement.ConversionContext; import com.bakdata.conquery.sql.conversion.model.ColumnDateRange; -import com.bakdata.conquery.sql.conversion.model.NameGenerator; import com.bakdata.conquery.sql.conversion.model.QueryStep; +import com.bakdata.conquery.sql.conversion.model.SqlIdColumns; import com.bakdata.conquery.sql.conversion.model.SqlTables; import com.bakdata.conquery.sql.conversion.model.select.SqlSelect; import lombok.Builder; import lombok.Value; -import org.jooq.Field; @Value @Builder @@ -24,7 +24,7 @@ public class IntervalPackingContext implements Context { */ String nodeLabel; - Field primaryColumn; + SqlIdColumns ids; ColumnDateRange validityDate; @@ -33,7 +33,7 @@ public class IntervalPackingContext implements Context { */ QueryStep predecessor; - SqlTables intervalPackingTables; + SqlTables tables; /** * The selects you want to carry through all interval packing steps. They won't get touched besides qualifying. @@ -41,7 +41,7 @@ public class IntervalPackingContext implements Context { @Builder.Default List carryThroughSelects = Collections.emptyList(); - NameGenerator nameGenerator; + ConversionContext conversionContext; @CheckForNull public QueryStep getPredecessor() { diff --git a/backend/src/main/java/com/bakdata/conquery/sql/conversion/cqelement/intervalpacking/IntervalPackingCteStep.java b/backend/src/main/java/com/bakdata/conquery/sql/conversion/cqelement/intervalpacking/IntervalPackingCteStep.java index 62fe83ef9b..84f7f150f7 100644 --- a/backend/src/main/java/com/bakdata/conquery/sql/conversion/cqelement/intervalpacking/IntervalPackingCteStep.java +++ b/backend/src/main/java/com/bakdata/conquery/sql/conversion/cqelement/intervalpacking/IntervalPackingCteStep.java @@ -1,15 +1,18 @@ package com.bakdata.conquery.sql.conversion.cqelement.intervalpacking; +import java.util.Map; import java.util.Set; +import com.bakdata.conquery.sql.conversion.Context; +import com.bakdata.conquery.sql.conversion.dialect.SqlDialect; import com.bakdata.conquery.sql.conversion.model.CteStep; -import com.bakdata.conquery.sql.conversion.model.NameGenerator; +import com.bakdata.conquery.sql.conversion.model.QueryStep; import com.bakdata.conquery.sql.conversion.model.SqlTables; +import lombok.AllArgsConstructor; import lombok.Getter; -import lombok.RequiredArgsConstructor; @Getter -@RequiredArgsConstructor +@AllArgsConstructor public enum IntervalPackingCteStep implements CteStep { PREVIOUS_END("previous_end", null), @@ -19,10 +22,35 @@ public enum IntervalPackingCteStep implements CteStep { private static final Set STEPS = Set.of(values()); private final String suffix; - private final IntervalPackingCteStep predecessor; + private final CteStep predecessor; - public static SqlTables getTables(String label, String rootTable, NameGenerator nameGenerator) { - return new SqlTables(label, STEPS, rootTable, nameGenerator); + /** + * Create {@link SqlTables} based on a preceding {@link QueryStep}, that must contain a validity date which shall be interval-packed. + */ + public static SqlTables createTables(QueryStep predecessor, Context context) { + + String rootTable = predecessor.getCteName(); + Set requiredSteps = context.getSqlDialect().supportsSingleColumnRanges() + ? Set.of(INTERVAL_COMPLETE) + : Set.of(values()); + + Map cteNameMap = CteStep.createCteNameMap(requiredSteps, rootTable, context.getNameGenerator()); + Map predecessorMap = CteStep.getDefaultPredecessorMap(requiredSteps); + + return new SqlTables(rootTable, cteNameMap, predecessorMap); + } + + /** + * Create predecessor mappings for these interval packing {@link CteStep}s based on a preceding root step that must contain a validity date which + * shall be interval-packed. + */ + public static Map getMappings(CteStep root, SqlDialect dialect) { + if (dialect.supportsSingleColumnRanges()) { + return Map.of(INTERVAL_COMPLETE, root); + } + Map mappings = CteStep.getDefaultPredecessorMap(Set.of(values())); + mappings.put(PREVIOUS_END, root); + return mappings; } } diff --git a/backend/src/main/java/com/bakdata/conquery/sql/conversion/cqelement/intervalpacking/PostgreSqlIntervalPacker.java b/backend/src/main/java/com/bakdata/conquery/sql/conversion/cqelement/intervalpacking/PostgreSqlIntervalPacker.java index 0643315e3b..a358531c42 100644 --- a/backend/src/main/java/com/bakdata/conquery/sql/conversion/cqelement/intervalpacking/PostgreSqlIntervalPacker.java +++ b/backend/src/main/java/com/bakdata/conquery/sql/conversion/cqelement/intervalpacking/PostgreSqlIntervalPacker.java @@ -1,16 +1,14 @@ package com.bakdata.conquery.sql.conversion.cqelement.intervalpacking; -import java.util.List; import java.util.Optional; import com.bakdata.conquery.sql.conversion.dialect.IntervalPacker; import com.bakdata.conquery.sql.conversion.dialect.SqlFunctionProvider; import com.bakdata.conquery.sql.conversion.model.ColumnDateRange; -import com.bakdata.conquery.sql.conversion.model.QualifyingUtil; import com.bakdata.conquery.sql.conversion.model.QueryStep; import com.bakdata.conquery.sql.conversion.model.Selects; +import com.bakdata.conquery.sql.conversion.model.SqlIdColumns; import lombok.RequiredArgsConstructor; -import org.jooq.Field; /** * PostgreSql supports interval packing with a native range function. @@ -25,23 +23,23 @@ public class PostgreSqlIntervalPacker implements IntervalPacker { @Override public QueryStep createIntervalPackingSteps(IntervalPackingContext context) { - String sourceTableName = context.getIntervalPackingTables().getRootTable(); - Field primaryColumn = QualifyingUtil.qualify(context.getPrimaryColumn(), sourceTableName); + String sourceTableName = context.getTables().getPredecessor(IntervalPackingCteStep.INTERVAL_COMPLETE); + SqlIdColumns ids = context.getIds().qualify(sourceTableName); ColumnDateRange qualifiedValidityDate = context.getValidityDate().qualify(sourceTableName); ColumnDateRange aggregatedValidityDate = this.functionProvider.aggregated(qualifiedValidityDate) .asValidityDateRange(context.getNodeLabel()); Selects selectsWithAggregatedValidityDate = Selects.builder() - .primaryColumn(primaryColumn) + .ids(ids) .validityDate(Optional.of(aggregatedValidityDate)) .sqlSelects(context.getCarryThroughSelects()) .build(); return QueryStep.builder() - .cteName(context.getIntervalPackingTables().cteName(IntervalPackingCteStep.INTERVAL_COMPLETE)) + .cteName(context.getTables().cteName(IntervalPackingCteStep.INTERVAL_COMPLETE)) .selects(selectsWithAggregatedValidityDate) .fromTable(QueryStep.toTableLike(sourceTableName)) - .groupBy(List.of(primaryColumn)) + .groupBy(ids.toFields()) .predecessors(Optional.ofNullable(context.getPredecessor()).stream().toList()) .build(); } diff --git a/backend/src/main/java/com/bakdata/conquery/sql/conversion/dialect/HanaSqlDialect.java b/backend/src/main/java/com/bakdata/conquery/sql/conversion/dialect/HanaSqlDialect.java index ae773aa1c5..17e54c5d84 100644 --- a/backend/src/main/java/com/bakdata/conquery/sql/conversion/dialect/HanaSqlDialect.java +++ b/backend/src/main/java/com/bakdata/conquery/sql/conversion/dialect/HanaSqlDialect.java @@ -22,7 +22,7 @@ public HanaSqlDialect(DSLContext dslContext) { this.dslContext = dslContext; this.hanaSqlFunctionProvider = new HanaSqlFunctionProvider(); this.hanaIntervalPacker = new AnsiSqlIntervalPacker(); - this.hanaSqlDateAggregator = new AnsiSqlDateAggregator(this.hanaSqlFunctionProvider, this.hanaIntervalPacker); + this.hanaSqlDateAggregator = new AnsiSqlDateAggregator(this.hanaIntervalPacker); this.defaultNotationParser = new DefaultSqlCDateSetParser(); } diff --git a/backend/src/main/java/com/bakdata/conquery/sql/conversion/dialect/HanaSqlFunctionProvider.java b/backend/src/main/java/com/bakdata/conquery/sql/conversion/dialect/HanaSqlFunctionProvider.java index 3de5aac2ae..e5b39af252 100644 --- a/backend/src/main/java/com/bakdata/conquery/sql/conversion/dialect/HanaSqlFunctionProvider.java +++ b/backend/src/main/java/com/bakdata/conquery/sql/conversion/dialect/HanaSqlFunctionProvider.java @@ -5,6 +5,7 @@ import java.util.List; import java.util.stream.Collectors; +import com.bakdata.conquery.apiv1.query.concept.filter.CQTable; import com.bakdata.conquery.models.common.daterange.CDateRange; import com.bakdata.conquery.models.datasets.Column; import com.bakdata.conquery.models.datasets.concepts.ValidityDate; @@ -61,50 +62,30 @@ public Condition dateRestriction(ColumnDateRange dateRestriction, ColumnDateRang } @Override - public ColumnDateRange daterange(CDateRange dateRestriction) { - - String startDateExpression = MIN_DATE_VALUE; - String endDateExpression = MAX_DATE_VALUE; - - if (dateRestriction.hasLowerBound()) { - startDateExpression = dateRestriction.getMin().toString(); - } - if (dateRestriction.hasUpperBound()) { - endDateExpression = dateRestriction.getMax().toString(); - } + public ColumnDateRange forDateRestriction(CDateRange dateRestriction) { + return toColumnDateRange(dateRestriction).asDateRestrictionRange(); + } - return ColumnDateRange.of(toDateField(startDateExpression), toDateField(endDateExpression)) - .asDateRestrictionRange(); + @Override + public ColumnDateRange forTablesValidityDate(CQTable cqTable, String alias) { + return toColumnDateRange(cqTable).asValidityDateRange(alias); } @Override - public ColumnDateRange daterange(ValidityDate validityDate, String qualifier, String label) { + public ColumnDateRange forTablesValidityDate(CQTable cqTable, CDateRange dateRestriction, String alias) { - Column startColumn; - Column endColumn; + ColumnDateRange validityDate = toColumnDateRange(cqTable); + ColumnDateRange restriction = toColumnDateRange(dateRestriction); - if (validityDate.getEndColumn() != null) { - startColumn = validityDate.getStartColumn(); - endColumn = validityDate.getEndColumn(); - } - else { - startColumn = validityDate.getColumn(); - endColumn = validityDate.getColumn(); - } + Field lowerBound = DSL.when(validityDate.getStart().lessThan(restriction.getStart()), restriction.getStart()) + .otherwise(validityDate.getStart()); - Field rangeStart = DSL.coalesce( - DSL.field(DSL.name(qualifier, startColumn.getName()), Date.class), - toDateField(MIN_DATE_VALUE) - ); - // when aggregating date ranges, we want to treat the last day of the range as excluded, - // so when using the date value of the end column, we add +1 day as end of the date range - Field rangeEnd = DSL.coalesce( - addDays(DSL.field(DSL.name(qualifier, endColumn.getName()), Date.class), 1), - toDateField(MAX_DATE_VALUE) - ); + Field maxDate = toDateField(MAX_DATE_VALUE); // we want to add +1 day to the end date - except when it's the max date already + Field restrictionUpperBound = DSL.when(restriction.getEnd().eq(maxDate), maxDate).otherwise(addDays(restriction.getEnd(), 1)); + Field upperBound = DSL.when(validityDate.getEnd().greaterThan(restriction.getEnd()), restrictionUpperBound) + .otherwise(validityDate.getEnd()); - return ColumnDateRange.of(rangeStart, rangeEnd) - .asValidityDateRange(label); + return ColumnDateRange.of(lowerBound, upperBound).as(alias); } @Override @@ -267,4 +248,51 @@ private Field toVarcharField(Field startDate, Param dateE ); } + private ColumnDateRange toColumnDateRange(CDateRange dateRestriction) { + + String startDateExpression = MIN_DATE_VALUE; + String endDateExpression = MAX_DATE_VALUE; + + if (dateRestriction.hasLowerBound()) { + startDateExpression = dateRestriction.getMin().toString(); + } + if (dateRestriction.hasUpperBound()) { + endDateExpression = dateRestriction.getMax().toString(); + } + + return ColumnDateRange.of(toDateField(startDateExpression), toDateField(endDateExpression)); + } + + private ColumnDateRange toColumnDateRange(CQTable cqTable) { + + ValidityDate validityDate = cqTable.findValidityDate(); + String tableName = cqTable.getConnector().getTable().getName(); + + Column startColumn; + Column endColumn; + + // if no end column is present, the only existing column is both start and end of the date range + if (validityDate.getEndColumn() == null) { + startColumn = validityDate.getColumn(); + endColumn = validityDate.getColumn(); + } + else { + startColumn = validityDate.getStartColumn(); + endColumn = validityDate.getEndColumn(); + } + + Field rangeStart = DSL.coalesce( + DSL.field(DSL.name(tableName, startColumn.getName()), Date.class), + toDateField(MIN_DATE_VALUE) + ); + // when aggregating date ranges, we want to treat the last day of the range as excluded, + // so when using the date value of the end column, we add +1 day as end of the date range + Field rangeEnd = DSL.coalesce( + addDays(DSL.field(DSL.name(tableName, endColumn.getName()), Date.class), 1), + toDateField(MAX_DATE_VALUE) + ); + + return ColumnDateRange.of(rangeStart, rangeEnd); + } + } diff --git a/backend/src/main/java/com/bakdata/conquery/sql/conversion/dialect/PostgreSqlDialect.java b/backend/src/main/java/com/bakdata/conquery/sql/conversion/dialect/PostgreSqlDialect.java index c0d695cdfa..706831e64f 100644 --- a/backend/src/main/java/com/bakdata/conquery/sql/conversion/dialect/PostgreSqlDialect.java +++ b/backend/src/main/java/com/bakdata/conquery/sql/conversion/dialect/PostgreSqlDialect.java @@ -41,6 +41,11 @@ public boolean requiresAggregationInFinalStep() { return false; } + @Override + public boolean supportsSingleColumnRanges() { + return true; + } + @Override public List> getNodeConverters() { return getDefaultNodeConverters(); diff --git a/backend/src/main/java/com/bakdata/conquery/sql/conversion/dialect/PostgreSqlFunctionProvider.java b/backend/src/main/java/com/bakdata/conquery/sql/conversion/dialect/PostgreSqlFunctionProvider.java index 2f3256ea87..fd6e9df8ba 100644 --- a/backend/src/main/java/com/bakdata/conquery/sql/conversion/dialect/PostgreSqlFunctionProvider.java +++ b/backend/src/main/java/com/bakdata/conquery/sql/conversion/dialect/PostgreSqlFunctionProvider.java @@ -5,6 +5,7 @@ import java.util.List; import java.util.stream.Collectors; +import com.bakdata.conquery.apiv1.query.concept.filter.CQTable; import com.bakdata.conquery.models.common.daterange.CDateRange; import com.bakdata.conquery.models.datasets.Column; import com.bakdata.conquery.models.datasets.concepts.ValidityDate; @@ -64,68 +65,28 @@ public Condition dateRestriction(ColumnDateRange dateRestriction, ColumnDateRang } @Override - public ColumnDateRange daterange(CDateRange dateRestriction) { - - String startDateExpression = MINUS_INFINITY_DATE_VALUE; - String endDateExpression = INFINITY_DATE_VALUE; - - if (dateRestriction.hasLowerBound()) { - startDateExpression = dateRestriction.getMin().toString(); - } - if (dateRestriction.hasUpperBound()) { - endDateExpression = dateRestriction.getMax().toString(); - } - - Field dateRestrictionRange = DSL.function( - "daterange", - Object.class, - toDateField(startDateExpression), - toDateField(endDateExpression), - DSL.val("[]") - ); - - return ColumnDateRange.of(dateRestrictionRange) - .asDateRestrictionRange(); + public ColumnDateRange forDateRestriction(CDateRange dateRestriction) { + return toColumnDateRange(dateRestriction).asDateRestrictionRange(); } @Override - public ColumnDateRange daterange(ValidityDate validityDate, String qualifier, String label) { + public ColumnDateRange forTablesValidityDate(CQTable cqTable, String alias) { + return toColumnDateRange(cqTable).asValidityDateRange(alias); + } - Field dateRange; + @Override + public ColumnDateRange forTablesValidityDate(CQTable cqTable, CDateRange dateRestriction, String alias) { - if (validityDate.getEndColumn() != null) { + ColumnDateRange validityDate = toColumnDateRange(cqTable); + ColumnDateRange restriction = toColumnDateRange(dateRestriction); - Field startColumn = DSL.coalesce( - DSL.field(DSL.name(qualifier, validityDate.getStartColumn().getName())), - toDateField(MINUS_INFINITY_DATE_VALUE) - ); - Field endColumn = DSL.coalesce( - DSL.field(DSL.name(qualifier, validityDate.getEndColumn().getName())), - toDateField(INFINITY_DATE_VALUE) - ); - - dateRange = daterange(startColumn, endColumn, "[]"); - } - else { - Column validityDateColumn = validityDate.getColumn(); - dateRange = switch (validityDateColumn.getType()) { - // if validityDateColumn is a DATE_RANGE we can make use of Postgres' integrated daterange type. - case DATE_RANGE -> DSL.field(validityDateColumn.getName()); - // if the validity date column is not of daterange type, we construct it manually - case DATE -> { - Field column = DSL.field(DSL.name(qualifier, validityDate.getColumn().getName()), Date.class); - Field startColumn = DSL.coalesce(column, toDateField(MINUS_INFINITY_DATE_VALUE)); - Field endColumn = DSL.coalesce(column, toDateField(INFINITY_DATE_VALUE)); - yield daterange(startColumn, endColumn, "[]"); - } - default -> throw new IllegalArgumentException( - "Given column type '%s' can't be converted to a proper date restriction.".formatted(validityDateColumn.getType()) - ); - }; - } + Field intersection = DSL.field( + "{0} * {1}", // intersection of both ranges + validityDate.getRange(), + restriction.getRange() + ); - return ColumnDateRange.of(dateRange) - .asValidityDateRange(label); + return ColumnDateRange.of(intersection).asValidityDateRange(alias); } @Override @@ -247,4 +208,76 @@ public Field toDateField(String dateValue) { return DSL.field("{0}::{1}", Date.class, DSL.val(dateValue), DSL.keyword("date")); } + private ColumnDateRange toColumnDateRange(CDateRange dateRestriction) { + String startDateExpression = MINUS_INFINITY_DATE_VALUE; + String endDateExpression = INFINITY_DATE_VALUE; + + if (dateRestriction.hasLowerBound()) { + startDateExpression = dateRestriction.getMin().toString(); + } + if (dateRestriction.hasUpperBound()) { + endDateExpression = dateRestriction.getMax().toString(); + } + + Field dateRestrictionRange = DSL.function( + "daterange", + Object.class, + toDateField(startDateExpression), + toDateField(endDateExpression), + DSL.val("[]") + ); + + return ColumnDateRange.of(dateRestrictionRange); + } + + private ColumnDateRange toColumnDateRange(CQTable cqTable) { + ValidityDate validityDate = cqTable.findValidityDate(); + String tableName = cqTable.getConnector().getTable().getName(); + + Field dateRange; + + if (validityDate.getEndColumn() != null) { + + Field startColumn = DSL.coalesce( + DSL.field(DSL.name(tableName, validityDate.getStartColumn().getName())), + toDateField(MINUS_INFINITY_DATE_VALUE) + ); + Field endColumn = DSL.coalesce( + DSL.field(DSL.name(tableName, validityDate.getEndColumn().getName())), + toDateField(INFINITY_DATE_VALUE) + ); + + return ColumnDateRange.of(daterange(startColumn, endColumn, "[]")); + } + + Column validityDateColumn = validityDate.getColumn(); + dateRange = switch (validityDateColumn.getType()) { + // if validityDateColumn is a DATE_RANGE we can make use of Postgres' integrated daterange type, but the upper bound is exclusive by default + case DATE_RANGE -> { + Field daterange = DSL.field(DSL.name(validityDateColumn.getName())); + Field startColumn = DSL.coalesce( + DSL.function("lower", Date.class, daterange), + toDateField(MINUS_INFINITY_DATE_VALUE) + ); + Field endColumn = DSL.coalesce( + DSL.function("upper", Date.class, daterange), + toDateField(INFINITY_DATE_VALUE) + ); + yield daterange(startColumn, endColumn, "[]"); + } + // if the validity date column is not of daterange type, we construct it manually + case DATE -> { + Field column = DSL.field(DSL.name(tableName, validityDate.getColumn().getName()), Date.class); + Field startColumn = DSL.coalesce(column, toDateField(MINUS_INFINITY_DATE_VALUE)); + Field endColumn = DSL.coalesce(column, toDateField(INFINITY_DATE_VALUE)); + yield daterange(startColumn, endColumn, "[]"); + } + default -> throw new IllegalArgumentException( + "Given column type '%s' can't be converted to a proper date restriction.".formatted(validityDateColumn.getType()) + ); + }; + + return ColumnDateRange.of(dateRange); + } + } diff --git a/backend/src/main/java/com/bakdata/conquery/sql/conversion/dialect/SqlDateAggregator.java b/backend/src/main/java/com/bakdata/conquery/sql/conversion/dialect/SqlDateAggregator.java index 7f96a0fd8f..6ae2919930 100644 --- a/backend/src/main/java/com/bakdata/conquery/sql/conversion/dialect/SqlDateAggregator.java +++ b/backend/src/main/java/com/bakdata/conquery/sql/conversion/dialect/SqlDateAggregator.java @@ -3,8 +3,8 @@ import java.util.List; import com.bakdata.conquery.models.query.queryplan.DateAggregationAction; +import com.bakdata.conquery.sql.conversion.cqelement.ConversionContext; import com.bakdata.conquery.sql.conversion.cqelement.aggregation.DateAggregationDates; -import com.bakdata.conquery.sql.conversion.model.NameGenerator; import com.bakdata.conquery.sql.conversion.model.QueryStep; import com.bakdata.conquery.sql.conversion.model.select.SqlSelect; @@ -20,12 +20,12 @@ QueryStep apply( List carryThroughSelects, DateAggregationDates dateAggregationDates, DateAggregationAction dateAggregationAction, - NameGenerator nameGenerator + ConversionContext conversionContext ); /** * Inverts the validity date of the given base step. */ - QueryStep invertAggregatedIntervals(QueryStep baseStep, NameGenerator nameGenerator); + QueryStep invertAggregatedIntervals(QueryStep baseStep, ConversionContext conversionContext); } diff --git a/backend/src/main/java/com/bakdata/conquery/sql/conversion/dialect/SqlDialect.java b/backend/src/main/java/com/bakdata/conquery/sql/conversion/dialect/SqlDialect.java index 7ff8be5ffe..a96283528e 100644 --- a/backend/src/main/java/com/bakdata/conquery/sql/conversion/dialect/SqlDialect.java +++ b/backend/src/main/java/com/bakdata/conquery/sql/conversion/dialect/SqlDialect.java @@ -15,6 +15,7 @@ import com.bakdata.conquery.sql.conversion.cqelement.concept.CQConceptConverter; import com.bakdata.conquery.sql.conversion.model.QueryStepTransformer; import com.bakdata.conquery.sql.conversion.query.ConceptQueryConverter; +import com.bakdata.conquery.sql.conversion.query.SecondaryIdQueryConverter; import com.bakdata.conquery.sql.conversion.supplier.DateNowSupplier; import com.bakdata.conquery.sql.conversion.supplier.SystemDateNowSupplier; import com.bakdata.conquery.sql.execution.SqlCDateSetParser; @@ -44,6 +45,10 @@ default boolean requiresAggregationInFinalStep() { return true; } + default boolean supportsSingleColumnRanges() { + return false; + } + default List> getDefaultNodeConverters() { return List.of( new CQDateRestrictionConverter(), @@ -51,7 +56,8 @@ default List> getDefaultNodeConverters() { new CQOrConverter(), new CQNegationConverter(), new CQConceptConverter(), - new ConceptQueryConverter(new QueryStepTransformer(getDSLContext())) + new ConceptQueryConverter(new QueryStepTransformer(getDSLContext())), + new SecondaryIdQueryConverter() ); } diff --git a/backend/src/main/java/com/bakdata/conquery/sql/conversion/dialect/SqlFunctionProvider.java b/backend/src/main/java/com/bakdata/conquery/sql/conversion/dialect/SqlFunctionProvider.java index 8e952cec7a..0aad646a93 100644 --- a/backend/src/main/java/com/bakdata/conquery/sql/conversion/dialect/SqlFunctionProvider.java +++ b/backend/src/main/java/com/bakdata/conquery/sql/conversion/dialect/SqlFunctionProvider.java @@ -4,8 +4,8 @@ import java.time.temporal.ChronoUnit; import java.util.List; +import com.bakdata.conquery.apiv1.query.concept.filter.CQTable; import com.bakdata.conquery.models.common.daterange.CDateRange; -import com.bakdata.conquery.models.datasets.concepts.ValidityDate; import com.bakdata.conquery.sql.conversion.model.ColumnDateRange; import com.bakdata.conquery.sql.conversion.model.QueryStep; import org.jooq.Condition; @@ -46,9 +46,18 @@ public interface SqlFunctionProvider { */ Condition dateRestriction(ColumnDateRange dateRestrictionRange, ColumnDateRange validityFieldRange); - ColumnDateRange daterange(CDateRange dateRestriction); + ColumnDateRange forDateRestriction(CDateRange dateRestriction); - ColumnDateRange daterange(ValidityDate validityDate, String qualifier, String label); + /** + * Creates a {@link ColumnDateRange} for a tables {@link CQTable}s validity date. + */ + ColumnDateRange forTablesValidityDate(CQTable cqTable, String alias); + + /** + * Creates a {@link ColumnDateRange} for a tables {@link CQTable}s validity date. The validity dates bounds will be restricted by the given date + * restriction. + */ + ColumnDateRange forTablesValidityDate(CQTable cqTable, CDateRange dateRestriction, String alias); ColumnDateRange aggregated(ColumnDateRange columnDateRange); @@ -109,23 +118,21 @@ default Condition in(Field column, String[] values) { default TableOnConditionStep innerJoin( Table leftPartQueryBase, QueryStep rightPartQS, - Field leftPartPrimaryColumn, - Field rightPartPrimaryColumn + List joinConditions ) { return leftPartQueryBase .innerJoin(DSL.name(rightPartQS.getCteName())) - .on(leftPartPrimaryColumn.eq(rightPartPrimaryColumn)); + .on(joinConditions.toArray(Condition[]::new)); } default TableOnConditionStep fullOuterJoin( Table leftPartQueryBase, QueryStep rightPartQS, - Field leftPartPrimaryColumn, - Field rightPartPrimaryColumn + List joinConditions ) { return leftPartQueryBase .fullOuterJoin(DSL.name(rightPartQS.getCteName())) - .on(leftPartPrimaryColumn.eq(rightPartPrimaryColumn)); + .on(joinConditions.toArray(Condition[]::new)); } default Field toDateField(String dateExpression) { diff --git a/backend/src/main/java/com/bakdata/conquery/sql/conversion/model/ColumnDateRange.java b/backend/src/main/java/com/bakdata/conquery/sql/conversion/model/ColumnDateRange.java index 55013f2efa..1d4fb6a46a 100644 --- a/backend/src/main/java/com/bakdata/conquery/sql/conversion/model/ColumnDateRange.java +++ b/backend/src/main/java/com/bakdata/conquery/sql/conversion/model/ColumnDateRange.java @@ -7,34 +7,48 @@ import lombok.Getter; import org.jooq.Field; -import org.jooq.impl.DSL; @Getter -public class ColumnDateRange { +public class ColumnDateRange implements Qualifiable { private static final String DATE_RESTRICTION_COLUMN_NAME = "date_restriction"; private static final String VALIDITY_DATE_COLUMN_NAME_SUFFIX = "_validity_date"; private static final String START_SUFFIX = "_start"; private static final String END_SUFFIX = "_end"; - private final boolean isEmpty; private final Field range; private final Field start; private final Field end; + private final String alias; - private ColumnDateRange(boolean isEmpty, Field range, Field startColumn, Field endColumn) { - this.isEmpty = isEmpty; - this.range = range; + protected ColumnDateRange(Field startColumn, Field endColumn, String alias) { + this.range = null; this.start = startColumn; this.end = endColumn; + this.alias = alias; + } + + protected ColumnDateRange(Field range, String alias) { + this.range = range; + this.start = null; + this.end = null; + this.alias = alias; + } + + public static ColumnDateRange of(Field rangeColumn, String alias) { + return new ColumnDateRange(rangeColumn, alias); } public static ColumnDateRange of(Field rangeColumn) { - return new ColumnDateRange(false, rangeColumn, null, null); + return new ColumnDateRange(rangeColumn, ""); } public static ColumnDateRange of(Field startColumn, Field endColumn) { - return new ColumnDateRange(true, null, startColumn, endColumn); + return new ColumnDateRange(startColumn, endColumn, ""); + } + + public static ColumnDateRange of(Field startColumn, Field endColumn, String alias) { + return new ColumnDateRange(startColumn, endColumn, alias); } public ColumnDateRange asDateRestrictionRange() { @@ -61,28 +75,27 @@ public List> toFields() { .collect(Collectors.toList()); } + @Override public ColumnDateRange qualify(String qualifier) { if (isSingleColumnRange()) { - return ColumnDateRange.of(mapFieldOntoQualifier(getRange(), Object.class, qualifier)); + return new ColumnDateRange(QualifyingUtil.qualify(getRange(), qualifier), getAlias()); } - return ColumnDateRange.of( - mapFieldOntoQualifier(getStart(), Date.class, qualifier), - mapFieldOntoQualifier(getEnd(), Date.class, qualifier) + return new ColumnDateRange( + QualifyingUtil.qualify(getStart(), qualifier), + QualifyingUtil.qualify(getEnd(), qualifier), + getAlias() ); } - private ColumnDateRange as(String alias) { + public ColumnDateRange as(String alias) { if (isSingleColumnRange()) { - return ColumnDateRange.of(this.range.as(alias)); + return new ColumnDateRange(this.range.as(alias), alias); } - return ColumnDateRange.of( + return new ColumnDateRange( this.start.as(alias + START_SUFFIX), - this.end.as(alias + END_SUFFIX) + this.end.as(alias + END_SUFFIX), + alias ); } - private Field mapFieldOntoQualifier(Field field, Class fieldType, String qualifier) { - return DSL.field(DSL.name(qualifier, field.getName()), fieldType); - } - } diff --git a/backend/src/main/java/com/bakdata/conquery/sql/conversion/model/ConceptConversionTables.java b/backend/src/main/java/com/bakdata/conquery/sql/conversion/model/ConceptConversionTables.java new file mode 100644 index 0000000000..8855afc2a0 --- /dev/null +++ b/backend/src/main/java/com/bakdata/conquery/sql/conversion/model/ConceptConversionTables.java @@ -0,0 +1,33 @@ +package com.bakdata.conquery.sql.conversion.model; + +import java.util.Map; + +import com.bakdata.conquery.sql.conversion.cqelement.intervalpacking.IntervalPackingCteStep; +import lombok.Getter; + +@Getter +public class ConceptConversionTables extends SqlTables { + + /** + * Stores the name of the predecessor of the last CTE these tables contain. + */ + private final String lastPredecessor; + + /** + * True if these tables contain interval packing CTEs {@link IntervalPackingCteStep}. + */ + private final boolean withIntervalPacking; + + public ConceptConversionTables( + String rootTable, + Map cteNameMap, + Map predecessorMap, + String lastPredecessor, + boolean containsIntervalPacking + ) { + super(rootTable, cteNameMap, predecessorMap); + this.lastPredecessor = lastPredecessor; + this.withIntervalPacking = containsIntervalPacking; + } + +} diff --git a/backend/src/main/java/com/bakdata/conquery/sql/conversion/model/CteStep.java b/backend/src/main/java/com/bakdata/conquery/sql/conversion/model/CteStep.java index 4b70212758..5f6833df01 100644 --- a/backend/src/main/java/com/bakdata/conquery/sql/conversion/model/CteStep.java +++ b/backend/src/main/java/com/bakdata/conquery/sql/conversion/model/CteStep.java @@ -1,5 +1,11 @@ package com.bakdata.conquery.sql.conversion.model; +import java.util.HashMap; +import java.util.Map; +import java.util.Set; +import java.util.function.Function; +import java.util.stream.Collectors; + import javax.annotation.Nullable; /** @@ -9,8 +15,27 @@ public interface CteStep { String getSuffix(); - default String cteName(String nodeLabel) { - return "%s-%s".formatted(nodeLabel, getSuffix()); + /** + * Maps each given required step to its default predecessor (see {@link CteStep#getPredecessor()}. The map will contain all given required steps as keys, + * but values might be null. + */ + static Map getDefaultPredecessorMap(Set requiredSteps) { + return requiredSteps.stream().collect( + HashMap::new, + (map, cteStep) -> map.put(cteStep, cteStep.getPredecessor()), // value might be null + Map::putAll + ); + } + + /** + * Generates a CTE name for each of the given required steps. Combines the given label with the CTE step suffix (@link CteStep#getSuffix()). + */ + static Map createCteNameMap(Set requiredSteps, String label, NameGenerator nameGenerator) { + return requiredSteps.stream().collect( + Collectors.toMap( + Function.identity(), + step -> nameGenerator.cteStepName(step, label) + )); } @Nullable @@ -18,4 +43,8 @@ default CteStep getPredecessor() { return null; } + default String cteName(String nodeLabel) { + return "%s-%s".formatted(nodeLabel, getSuffix()); + } + } diff --git a/backend/src/main/java/com/bakdata/conquery/sql/conversion/model/Qualifiable.java b/backend/src/main/java/com/bakdata/conquery/sql/conversion/model/Qualifiable.java new file mode 100644 index 0000000000..a5c54fa5e2 --- /dev/null +++ b/backend/src/main/java/com/bakdata/conquery/sql/conversion/model/Qualifiable.java @@ -0,0 +1,7 @@ +package com.bakdata.conquery.sql.conversion.model; + +public interface Qualifiable { + + T qualify(String qualifier); + +} diff --git a/backend/src/main/java/com/bakdata/conquery/sql/conversion/model/QueryStepJoiner.java b/backend/src/main/java/com/bakdata/conquery/sql/conversion/model/QueryStepJoiner.java index 7b5389729e..620fe845b5 100644 --- a/backend/src/main/java/com/bakdata/conquery/sql/conversion/model/QueryStepJoiner.java +++ b/backend/src/main/java/com/bakdata/conquery/sql/conversion/model/QueryStepJoiner.java @@ -7,13 +7,12 @@ import com.bakdata.conquery.apiv1.query.CQElement; import com.bakdata.conquery.models.query.queryplan.DateAggregationAction; -import com.bakdata.conquery.sql.conversion.SharedAliases; import com.bakdata.conquery.sql.conversion.cqelement.ConversionContext; import com.bakdata.conquery.sql.conversion.cqelement.aggregation.DateAggregationDates; import com.bakdata.conquery.sql.conversion.dialect.SqlDateAggregator; import com.bakdata.conquery.sql.conversion.dialect.SqlFunctionProvider; import com.bakdata.conquery.sql.conversion.model.select.SqlSelect; -import org.jooq.Field; +import org.jooq.Condition; import org.jooq.Record; import org.jooq.Table; import org.jooq.TableLike; @@ -45,7 +44,7 @@ public static QueryStep joinSteps( ConversionContext context ) { String joinedCteName = context.getNameGenerator().joinedNodeName(logicalOperation); - Field primaryColumn = coalescePrimaryColumns(queriesToJoin); + SqlIdColumns ids = coalesceIds(queriesToJoin); List mergedSelects = mergeSelects(queriesToJoin); TableLike joinedTable = constructJoinedTable(queriesToJoin, logicalOperation, context); @@ -57,15 +56,15 @@ public static QueryStep joinSteps( DateAggregationDates dateAggregationDates = DateAggregationDates.forSteps(queriesToJoin); if (dateAggregationAction == DateAggregationAction.BLOCK || dateAggregationDates.dateAggregationImpossible()) { - joinedStep = buildJoinedStep(primaryColumn, mergedSelects, Optional.empty(), joinedStepBuilder); + joinedStep = buildJoinedStep(ids, mergedSelects, Optional.empty(), joinedStepBuilder); } // if there is only 1 child node containing a validity date, we just keep it as overall validity date for the joined node else if (dateAggregationDates.getValidityDates().size() == 1) { ColumnDateRange validityDate = dateAggregationDates.getValidityDates().get(0); - joinedStep = buildJoinedStep(primaryColumn, mergedSelects, Optional.of(validityDate), joinedStepBuilder); + joinedStep = buildJoinedStep(ids, mergedSelects, Optional.of(validityDate), joinedStepBuilder); } else { - joinedStep = buildStepAndAggregateDates(primaryColumn, mergedSelects, joinedStepBuilder, dateAggregationDates, dateAggregationAction, context); + joinedStep = buildStepAndAggregateDates(ids, mergedSelects, joinedStepBuilder, dateAggregationDates, dateAggregationAction, context); } return joinedStep; } @@ -85,51 +84,40 @@ public static TableLike constructJoinedTable(List queriesToJo QueryStep leftPartQS = queriesToJoin.get(i); QueryStep rightPartQS = queriesToJoin.get(i + 1); - Field leftPartPrimaryColumn = leftPartQS.getQualifiedSelects().getPrimaryColumn(); - Field rightPartPrimaryColumn = rightPartQS.getQualifiedSelects().getPrimaryColumn(); + SqlIdColumns leftIds = leftPartQS.getQualifiedSelects().getIds(); + SqlIdColumns rightIds = rightPartQS.getQualifiedSelects().getIds(); - joinedQuery = joinType.join(joinedQuery, rightPartQS, leftPartPrimaryColumn, rightPartPrimaryColumn); + List joinConditions = SqlIdColumns.join(leftIds, rightIds); + + joinedQuery = joinType.join(joinedQuery, rightPartQS, joinConditions); } return joinedQuery; } - @FunctionalInterface - private interface JoinType { - TableOnConditionStep join( - Table leftPartQueryBase, - QueryStep rightPartQS, - Field leftPartPrimaryColumn, - Field rightPartPrimaryColumn - ); - } - - public static Field coalescePrimaryColumns(List querySteps) { - List> primaryColumns = querySteps.stream() - .map(queryStep -> queryStep.getQualifiedSelects().getPrimaryColumn()) - .collect(Collectors.toList()); - return DSL.coalesce(primaryColumns.get(0), primaryColumns.subList(1, primaryColumns.size()).toArray()) - .as(SharedAliases.PRIMARY_COLUMN.getAlias()); - } - public static List mergeSelects(List querySteps) { return querySteps.stream() .flatMap(queryStep -> queryStep.getQualifiedSelects().getSqlSelects().stream()) .collect(Collectors.toList()); } + public static SqlIdColumns coalesceIds(List querySteps) { + List ids = querySteps.stream().map(QueryStep::getQualifiedSelects).map(Selects::getIds).toList(); + return SqlIdColumns.coalesce(ids); + } + private static Table getIntitialJoinTable(List queriesToJoin) { return DSL.table(DSL.name(queriesToJoin.get(0).getCteName())); } private static QueryStep buildJoinedStep( - Field primaryColumn, + SqlIdColumns ids, List mergedSelects, Optional validityDate, QueryStep.QueryStepBuilder builder ) { Selects selects = Selects.builder() - .primaryColumn(primaryColumn) + .ids(ids) .sqlSelects(mergedSelects) .validityDate(validityDate) .build(); @@ -137,7 +125,7 @@ private static QueryStep buildJoinedStep( } private static QueryStep buildStepAndAggregateDates( - Field primaryColumn, + SqlIdColumns ids, List mergedSelects, QueryStep.QueryStepBuilder builder, DateAggregationDates dateAggregationDates, @@ -146,7 +134,7 @@ private static QueryStep buildStepAndAggregateDates( ) { List withAllValidityDates = new ArrayList<>(mergedSelects); withAllValidityDates.addAll(dateAggregationDates.allStartsAndEnds()); - QueryStep joinedStep = buildJoinedStep(primaryColumn, withAllValidityDates, Optional.empty(), builder); + QueryStep joinedStep = buildJoinedStep(ids, withAllValidityDates, Optional.empty(), builder); SqlDateAggregator sqlDateAggregator = context.getSqlDialect().getDateAggregator(); return sqlDateAggregator.apply( @@ -154,7 +142,16 @@ private static QueryStep buildStepAndAggregateDates( mergedSelects, dateAggregationDates, dateAggregationAction, - context.getNameGenerator() + context + ); + } + + @FunctionalInterface + private interface JoinType { + TableOnConditionStep join( + Table leftPartQueryBase, + QueryStep rightPartQS, + List joinConditions ); } diff --git a/backend/src/main/java/com/bakdata/conquery/sql/conversion/model/Selects.java b/backend/src/main/java/com/bakdata/conquery/sql/conversion/model/Selects.java index 02d7149535..5b0129d133 100644 --- a/backend/src/main/java/com/bakdata/conquery/sql/conversion/model/Selects.java +++ b/backend/src/main/java/com/bakdata/conquery/sql/conversion/model/Selects.java @@ -11,13 +11,12 @@ import lombok.Singular; import lombok.Value; import org.jooq.Field; -import org.jooq.impl.DSL; @Value @Builder(toBuilder = true) public class Selects { - Field primaryColumn; + SqlIdColumns ids; @Builder.Default Optional validityDate = Optional.empty(); @Singular @@ -36,13 +35,11 @@ public Selects blockValidityDate() { } public Selects qualify(String qualifier) { - Field qualifiedPrimaryColumn = DSL.field(DSL.name(qualifier, this.primaryColumn.getName())); - List sqlSelects = this.sqlSelects.stream() - .map(sqlSelect -> sqlSelect.qualify(qualifier)) - .collect(Collectors.toList()); + SqlIdColumns ids = this.ids.qualify(qualifier); + List sqlSelects = this.sqlSelects.stream().map(sqlSelect -> sqlSelect.qualify(qualifier)).collect(Collectors.toList()); SelectsBuilder builder = Selects.builder() - .primaryColumn(qualifiedPrimaryColumn) + .ids(ids) .sqlSelects(sqlSelects); if (this.validityDate.isPresent()) { @@ -54,7 +51,7 @@ public Selects qualify(String qualifier) { public List> all() { return Stream.of( - Stream.of(this.primaryColumn), + this.ids.toFields().stream(), this.validityDate.stream().flatMap(range -> range.toFields().stream()), this.sqlSelects.stream().map(SqlSelect::select) ) diff --git a/backend/src/main/java/com/bakdata/conquery/sql/conversion/model/SqlIdColumns.java b/backend/src/main/java/com/bakdata/conquery/sql/conversion/model/SqlIdColumns.java new file mode 100644 index 0000000000..029c970b24 --- /dev/null +++ b/backend/src/main/java/com/bakdata/conquery/sql/conversion/model/SqlIdColumns.java @@ -0,0 +1,94 @@ +package com.bakdata.conquery.sql.conversion.model; + +import java.util.ArrayList; +import java.util.List; +import java.util.Optional; +import java.util.stream.Collectors; +import java.util.stream.Stream; + +import javax.annotation.Nullable; + +import com.bakdata.conquery.sql.conversion.SharedAliases; +import lombok.Getter; +import lombok.NonNull; +import org.jooq.Condition; +import org.jooq.Field; +import org.jooq.impl.DSL; + +public class SqlIdColumns implements Qualifiable { + + @Getter + private final Field primaryColumn; + + @Nullable + private final Field secondaryId; + + private final List> ids; + + public SqlIdColumns(Field primaryColumn, @NonNull Field secondaryId) { + this.primaryColumn = primaryColumn; + this.secondaryId = secondaryId; + this.ids = Stream.concat(Stream.of(this.primaryColumn), Stream.ofNullable(this.secondaryId)).collect(Collectors.toList()); + } + + public SqlIdColumns(Field primaryColumn) { + this.primaryColumn = primaryColumn; + this.secondaryId = null; + this.ids = List.of(this.primaryColumn); + } + + @Override + public SqlIdColumns qualify(String qualifier) { + Field primaryColumn = QualifyingUtil.qualify(this.primaryColumn, qualifier); + if (this.secondaryId == null) { + return new SqlIdColumns(primaryColumn); + } + Field secondaryId = QualifyingUtil.qualify(this.secondaryId, qualifier); + return new SqlIdColumns(primaryColumn, secondaryId); + } + + public Optional> getSecondaryId() { + return Optional.ofNullable(this.secondaryId); + } + + public List> toFields() { + return this.ids; + } + + public static List join(SqlIdColumns leftIds, SqlIdColumns rightIds) { + Condition joinPrimariesCondition = leftIds.getPrimaryColumn().eq(rightIds.getPrimaryColumn()); + Condition joinSecondariesCondition; + if (leftIds.getSecondaryId().isPresent() && rightIds.getSecondaryId().isPresent()) { + joinSecondariesCondition = leftIds.getSecondaryId().get().eq(rightIds.getSecondaryId().get()); + } + else { + joinSecondariesCondition = DSL.noCondition(); + } + return List.of(joinPrimariesCondition, joinSecondariesCondition); + } + + public static SqlIdColumns coalesce(List selectsIds) { + + List> primaryColumns = new ArrayList<>(selectsIds.size()); + List> secondaryIds = new ArrayList<>(selectsIds.size()); + selectsIds.forEach(ids -> { + primaryColumns.add(ids.getPrimaryColumn()); + ids.getSecondaryId().ifPresent(secondaryIds::add); + }); + + Field coalescedPrimaryColumn = coalesceFields(primaryColumns).as(SharedAliases.PRIMARY_COLUMN.getAlias()); + if (secondaryIds.isEmpty()) { + return new SqlIdColumns(coalescedPrimaryColumn); + } + Field coalescedSecondaryIds = coalesceFields(secondaryIds).as(SharedAliases.SECONDARY_ID.getAlias()); + return new SqlIdColumns(coalescedPrimaryColumn, coalescedSecondaryIds); + } + + private static Field coalesceFields(List> fields) { + if (fields.size() == 1) { + return fields.get(0).coerce(Object.class); + } + return DSL.coalesce(fields.get(0), fields.subList(1, fields.size()).toArray()); + } + +} diff --git a/backend/src/main/java/com/bakdata/conquery/sql/conversion/model/SqlTables.java b/backend/src/main/java/com/bakdata/conquery/sql/conversion/model/SqlTables.java index d677b8cd28..48f2e39fa2 100644 --- a/backend/src/main/java/com/bakdata/conquery/sql/conversion/model/SqlTables.java +++ b/backend/src/main/java/com/bakdata/conquery/sql/conversion/model/SqlTables.java @@ -1,74 +1,46 @@ package com.bakdata.conquery.sql.conversion.model; import java.util.Map; -import java.util.Set; -import java.util.function.Function; -import java.util.stream.Collectors; -import com.bakdata.conquery.sql.conversion.cqelement.concept.ConnectorCteStep; import lombok.Getter; -import org.jooq.DataType; -import org.jooq.Field; -import org.jooq.impl.DSL; +import lombok.RequiredArgsConstructor; /** - * SqlTables provide a mapping from {@link CteStep}s to their respective table names/cte names in the generated SQL query. + * SqlTables provide a mapping from {@link CteStep}s to their respective table names/cte names and from a {@link CteStep} to the respective preceding step + * in the generated SQL query. */ -@Getter +@RequiredArgsConstructor public class SqlTables { - private final Map cteNames; + @Getter private final String rootTable; - - public SqlTables(String nodeLabel, Set requiredSteps, String rootTableName, NameGenerator nameGenerator) { - this.cteNames = requiredSteps.stream() - .collect(Collectors.toMap( - Function.identity(), - step -> nameGenerator.cteStepName(step, nodeLabel) - )); - this.rootTable = rootTableName; - } + private final Map cteNameMap; + private final Map predecessorMap; /** * @return The CTE name for a {@link CteStep}. */ public String cteName(CteStep cteStep) { - return this.cteNames.get(cteStep); + return cteNameMap.get(cteStep); } /** * @return True if the given {@link CteStep} is part of these {@link SqlTables}. */ public boolean isRequiredStep(CteStep cteStep) { - return this.cteNames.containsKey(cteStep); + return cteNameMap.containsKey(cteStep); } /** - * @return The name of the table the given {@link CteStep} will select from. + * @return The name of the table the given {@link CteStep} will select from. If their exists no mapped preceding {@link CteStep} for the given + * {@link CteStep}, the root table is returned. */ public String getPredecessor(CteStep cteStep) { - CteStep predecessor = cteStep.getPredecessor(); - while (!this.cteNames.containsKey(predecessor)) { - if (predecessor == null) { - return this.rootTable; - } - predecessor = predecessor.getPredecessor(); + CteStep predecessor = predecessorMap.get(cteStep); + if (predecessor == null) { + return rootTable; } - return this.cteNames.get(predecessor); + return cteNameMap.get(predecessor); } - /** - * Qualify a field for a {@link CteStep}. - *

- * For example, if you want to qualify a {@link Field} for the AGGREGATION_SELECT step of {@link ConnectorCteStep}, - * it's qualified on the EVENT_FILTER or PREPROCESSING_STEP depending on the presence of the respective step. - * See {@link SqlTables#getPredecessor(CteStep)} - * - * @param cteStep The {@link CteStep} you want to qualify the given field for. - * @param field The field you want to qualify. - */ - @SuppressWarnings("unchecked") - public Field qualifyOnPredecessor(CteStep cteStep, Field field) { - return DSL.field(DSL.name(getPredecessor(cteStep), field.getName()), (DataType) field.getDataType()); - } } diff --git a/backend/src/main/java/com/bakdata/conquery/sql/conversion/model/aggregator/CountQuartersSqlAggregator.java b/backend/src/main/java/com/bakdata/conquery/sql/conversion/model/aggregator/CountQuartersSqlAggregator.java index 33bde566f9..e9195c10e2 100644 --- a/backend/src/main/java/com/bakdata/conquery/sql/conversion/model/aggregator/CountQuartersSqlAggregator.java +++ b/backend/src/main/java/com/bakdata/conquery/sql/conversion/model/aggregator/CountQuartersSqlAggregator.java @@ -7,7 +7,7 @@ import com.bakdata.conquery.models.datasets.Column; import com.bakdata.conquery.models.datasets.concepts.filters.specific.CountQuartersFilter; import com.bakdata.conquery.models.datasets.concepts.select.connector.specific.CountQuartersSelect; -import com.bakdata.conquery.sql.conversion.cqelement.concept.ConnectorCteStep; +import com.bakdata.conquery.sql.conversion.cqelement.concept.ConceptCteStep; import com.bakdata.conquery.sql.conversion.cqelement.concept.FilterContext; import com.bakdata.conquery.sql.conversion.dialect.SqlFunctionProvider; import com.bakdata.conquery.sql.conversion.model.SqlTables; @@ -33,7 +33,7 @@ private CountQuartersSqlAggregator( ) { ExtractingSqlSelect rootSelect = new ExtractingSqlSelect<>(connectorTables.getRootTable(), column.getName(), Date.class); - Field qualifiedRootSelect = rootSelect.qualify(connectorTables.cteName(ConnectorCteStep.EVENT_FILTER)).select(); + Field qualifiedRootSelect = rootSelect.qualify(connectorTables.cteName(ConceptCteStep.EVENT_FILTER)).select(); FieldWrapper countQuartersField = new FieldWrapper<>( DSL.countDistinct(functionProvider.yearQuarter(qualifiedRootSelect)).as(alias), column.getName() @@ -43,7 +43,7 @@ private CountQuartersSqlAggregator( .preprocessingSelect(rootSelect) .aggregationSelect(countQuartersField); - String aggregationFilterPredecessor = connectorTables.getPredecessor(ConnectorCteStep.AGGREGATION_FILTER); + String aggregationFilterPredecessor = connectorTables.getPredecessor(ConceptCteStep.AGGREGATION_FILTER); if (filterValue == null) { ExtractingSqlSelect finalSelect = countQuartersField.qualify(aggregationFilterPredecessor); this.sqlSelects = builder.finalSelect(finalSelect).build(); @@ -63,8 +63,8 @@ public static CountQuartersSqlAggregator create(CountQuartersSelect countQuarter return new CountQuartersSqlAggregator( countQuartersSelect.getColumn(), selectContext.getNameGenerator().selectName(countQuartersSelect), - selectContext.getConnectorTables(), - selectContext.getParentContext().getSqlDialect().getFunctionProvider(), + selectContext.getTables(), + selectContext.getConversionContext().getSqlDialect().getFunctionProvider(), null ); } @@ -73,8 +73,8 @@ public static CountQuartersSqlAggregator create(CountQuartersFilter countQuarter return new CountQuartersSqlAggregator( countQuartersFilter.getColumn(), filterContext.getNameGenerator().selectName(countQuartersFilter), - filterContext.getConnectorTables(), - filterContext.getParentContext().getSqlDialect().getFunctionProvider(), + filterContext.getTables(), + filterContext.getConversionContext().getSqlDialect().getFunctionProvider(), filterContext.getValue() ); } diff --git a/backend/src/main/java/com/bakdata/conquery/sql/conversion/model/aggregator/CountSqlAggregator.java b/backend/src/main/java/com/bakdata/conquery/sql/conversion/model/aggregator/CountSqlAggregator.java index 492d7eb2de..346a436c47 100644 --- a/backend/src/main/java/com/bakdata/conquery/sql/conversion/model/aggregator/CountSqlAggregator.java +++ b/backend/src/main/java/com/bakdata/conquery/sql/conversion/model/aggregator/CountSqlAggregator.java @@ -5,7 +5,7 @@ import com.bakdata.conquery.models.datasets.Column; import com.bakdata.conquery.models.datasets.concepts.filters.specific.CountFilter; import com.bakdata.conquery.models.datasets.concepts.select.connector.specific.CountSelect; -import com.bakdata.conquery.sql.conversion.cqelement.concept.ConnectorCteStep; +import com.bakdata.conquery.sql.conversion.cqelement.concept.ConceptCteStep; import com.bakdata.conquery.sql.conversion.cqelement.concept.FilterContext; import com.bakdata.conquery.sql.conversion.model.SqlTables; import com.bakdata.conquery.sql.conversion.model.filter.CountCondition; @@ -35,7 +35,7 @@ private CountSqlAggregator( ) { ExtractingSqlSelect rootSelect = new ExtractingSqlSelect<>(connectorTables.getRootTable(), countColumn.getName(), Object.class); - Field qualifiedRootSelect = rootSelect.qualify(connectorTables.getPredecessor(ConnectorCteStep.AGGREGATION_SELECT)).select(); + Field qualifiedRootSelect = rootSelect.qualify(connectorTables.getPredecessor(ConceptCteStep.AGGREGATION_SELECT)).select(); Field countField = countType == CountType.DISTINCT ? DSL.countDistinct(qualifiedRootSelect) : DSL.count(qualifiedRootSelect); @@ -45,7 +45,7 @@ private CountSqlAggregator( .preprocessingSelect(rootSelect) .aggregationSelect(countGroupBy); - String finalPredecessor = connectorTables.getPredecessor(ConnectorCteStep.AGGREGATION_FILTER); + String finalPredecessor = connectorTables.getPredecessor(ConceptCteStep.AGGREGATION_FILTER); if (filterValue == null) { ExtractingSqlSelect finalSelect = countGroupBy.qualify(finalPredecessor); this.sqlSelects = builder.finalSelect(finalSelect).build(); @@ -66,7 +66,7 @@ public static CountSqlAggregator create(CountSelect countSelect, SelectContext s countSelect.getColumn(), CountType.fromBoolean(countSelect.isDistinct()), selectContext.getNameGenerator().selectName(countSelect), - selectContext.getConnectorTables(), + selectContext.getTables(), null ); } @@ -76,7 +76,7 @@ public static CountSqlAggregator create(CountFilter countFilter, FilterContext qualifiedDateDistance = dateDistanceSelect.qualify(connectorTables.getPredecessor(ConnectorCteStep.AGGREGATION_SELECT)) + Field qualifiedDateDistance = dateDistanceSelect.qualify(connectorTables.getPredecessor(ConceptCteStep.AGGREGATION_SELECT)) .select(); FieldWrapper minDateDistance = new FieldWrapper<>(DSL.min(qualifiedDateDistance).as(alias)); - String finalPredecessor = connectorTables.getPredecessor(ConnectorCteStep.AGGREGATION_FILTER); + String finalPredecessor = connectorTables.getPredecessor(ConceptCteStep.AGGREGATION_FILTER); ExtractingSqlSelect finalSelect = minDateDistance.qualify(finalPredecessor); this.sqlSelects = builder.aggregationSelect(minDateDistance) @@ -69,7 +69,7 @@ private DateDistanceSqlAggregator( } else { this.sqlSelects = builder.build(); - String predecessorCte = connectorTables.getPredecessor(ConnectorCteStep.EVENT_FILTER); + String predecessorCte = connectorTables.getPredecessor(ConceptCteStep.EVENT_FILTER); Field qualifiedDateDistanceSelect = dateDistanceSelect.qualify(predecessorCte).select(); WhereCondition dateDistanceCondition = new DateDistanceCondition(qualifiedDateDistanceSelect, filterValue); this.whereClauses = WhereClauses.builder() @@ -85,12 +85,12 @@ public static DateDistanceSqlAggregator create( return new DateDistanceSqlAggregator( dateDistanceSelect.getColumn(), selectContext.getNameGenerator().selectName(dateDistanceSelect), - selectContext.getParentContext().getDateRestrictionRange(), + selectContext.getConversionContext().getDateRestrictionRange(), dateDistanceSelect.getTimeUnit(), - selectContext.getConnectorTables(), - selectContext.getParentContext().getSqlDialect().getDateNowSupplier(), + selectContext.getTables(), + selectContext.getConversionContext().getSqlDialect().getDateNowSupplier(), null, - selectContext.getParentContext().getSqlDialect().getFunctionProvider() + selectContext.getConversionContext().getSqlDialect().getFunctionProvider() ); } @@ -101,12 +101,12 @@ public static DateDistanceSqlAggregator create( return new DateDistanceSqlAggregator( dateDistanceFilter.getColumn(), filterContext.getNameGenerator().selectName(dateDistanceFilter), - filterContext.getParentContext().getDateRestrictionRange(), + filterContext.getConversionContext().getDateRestrictionRange(), dateDistanceFilter.getTimeUnit(), - filterContext.getConnectorTables(), - filterContext.getParentContext().getSqlDialect().getDateNowSupplier(), + filterContext.getTables(), + filterContext.getConversionContext().getSqlDialect().getDateNowSupplier(), filterContext.getValue(), - filterContext.getParentContext().getSqlDialect().getFunctionProvider() + filterContext.getConversionContext().getSqlDialect().getFunctionProvider() ); } diff --git a/backend/src/main/java/com/bakdata/conquery/sql/conversion/model/aggregator/FirstValueSqlAggregator.java b/backend/src/main/java/com/bakdata/conquery/sql/conversion/model/aggregator/FirstValueSqlAggregator.java index 108349c919..04cf59bdb9 100644 --- a/backend/src/main/java/com/bakdata/conquery/sql/conversion/model/aggregator/FirstValueSqlAggregator.java +++ b/backend/src/main/java/com/bakdata/conquery/sql/conversion/model/aggregator/FirstValueSqlAggregator.java @@ -6,7 +6,7 @@ import com.bakdata.conquery.models.datasets.Column; import com.bakdata.conquery.models.datasets.concepts.select.connector.FirstValueSelect; -import com.bakdata.conquery.sql.conversion.cqelement.concept.ConnectorCteStep; +import com.bakdata.conquery.sql.conversion.cqelement.concept.ConceptCteStep; import com.bakdata.conquery.sql.conversion.dialect.SqlFunctionProvider; import com.bakdata.conquery.sql.conversion.model.ColumnDateRange; import com.bakdata.conquery.sql.conversion.model.SqlTables; @@ -36,13 +36,13 @@ private FirstValueSqlAggregator( ExtractingSqlSelect rootSelect = new ExtractingSqlSelect<>(rootTableName, columnName, Object.class); List> validityDateFields = - validityDate.map(_validityDate -> _validityDate.qualify(connectorTables.getPredecessor(ConnectorCteStep.AGGREGATION_SELECT))) + validityDate.map(_validityDate -> _validityDate.qualify(connectorTables.getPredecessor(ConceptCteStep.AGGREGATION_SELECT))) .map(ColumnDateRange::toFields) .orElse(Collections.emptyList()); - Field qualifiedRootSelect = rootSelect.qualify(connectorTables.getPredecessor(ConnectorCteStep.AGGREGATION_SELECT)).select(); + Field qualifiedRootSelect = rootSelect.qualify(connectorTables.getPredecessor(ConceptCteStep.AGGREGATION_SELECT)).select(); FieldWrapper firstGroupBy = new FieldWrapper<>(functionProvider.first(qualifiedRootSelect, validityDateFields).as(alias), columnName); - ExtractingSqlSelect finalSelect = firstGroupBy.qualify(connectorTables.getPredecessor(ConnectorCteStep.AGGREGATION_FILTER)); + ExtractingSqlSelect finalSelect = firstGroupBy.qualify(connectorTables.getPredecessor(ConceptCteStep.AGGREGATION_FILTER)); this.sqlSelects = SqlSelects.builder() .preprocessingSelect(rootSelect) @@ -58,8 +58,8 @@ public static FirstValueSqlAggregator create(FirstValueSelect firstValueSelect, firstValueSelect.getColumn(), selectContext.getNameGenerator().selectName(firstValueSelect), selectContext.getValidityDate(), - selectContext.getConnectorTables(), - selectContext.getParentContext().getSqlDialect().getFunctionProvider() + selectContext.getTables(), + selectContext.getConversionContext().getSqlDialect().getFunctionProvider() ); } diff --git a/backend/src/main/java/com/bakdata/conquery/sql/conversion/model/aggregator/FlagSqlAggregator.java b/backend/src/main/java/com/bakdata/conquery/sql/conversion/model/aggregator/FlagSqlAggregator.java index f98414d508..48b13bda9e 100644 --- a/backend/src/main/java/com/bakdata/conquery/sql/conversion/model/aggregator/FlagSqlAggregator.java +++ b/backend/src/main/java/com/bakdata/conquery/sql/conversion/model/aggregator/FlagSqlAggregator.java @@ -9,7 +9,7 @@ import com.bakdata.conquery.models.datasets.Column; import com.bakdata.conquery.models.datasets.concepts.filters.specific.FlagFilter; import com.bakdata.conquery.models.datasets.concepts.select.connector.specific.FlagSelect; -import com.bakdata.conquery.sql.conversion.cqelement.concept.ConnectorCteStep; +import com.bakdata.conquery.sql.conversion.cqelement.concept.ConceptCteStep; import com.bakdata.conquery.sql.conversion.cqelement.concept.FilterContext; import com.bakdata.conquery.sql.conversion.dialect.SqlFunctionProvider; import com.bakdata.conquery.sql.conversion.model.SqlTables; @@ -18,7 +18,6 @@ import com.bakdata.conquery.sql.conversion.model.select.ExtractingSqlSelect; import com.bakdata.conquery.sql.conversion.model.select.FieldWrapper; import com.bakdata.conquery.sql.conversion.model.select.SelectContext; -import com.bakdata.conquery.sql.conversion.model.select.SqlSelect; import com.bakdata.conquery.sql.conversion.model.select.SqlSelects; import lombok.Value; import org.jooq.Condition; @@ -55,12 +54,12 @@ *

  * {@code
  * "event_filter" as (
- *		select "pid"
- *		from "preprocessing"
- *		where (
- *			"preprocessing"."b" = true
- *			or "preprocessing"."c" = true
- *		)
+ * 		select "pid"
+ * 		from "preprocessing"
+ * 		where (
+ * 			"preprocessing"."b" = true
+ * 			or "preprocessing"."c" = true
+ * 		)
  * )
  * }
  * 
@@ -75,15 +74,15 @@ public class FlagSqlAggregator implements SqlAggregator { public static FlagSqlAggregator create(FlagSelect flagSelect, SelectContext selectContext) { - SqlFunctionProvider functionProvider = selectContext.getParentContext().getSqlDialect().getFunctionProvider(); - SqlTables connectorTables = selectContext.getConnectorTables(); + SqlFunctionProvider functionProvider = selectContext.getConversionContext().getSqlDialect().getFunctionProvider(); + SqlTables connectorTables = selectContext.getTables(); - Map rootSelects = createFlagRootSelectMap(flagSelect, connectorTables.getRootTable()); + Map> rootSelects = createFlagRootSelectMap(flagSelect, connectorTables.getRootTable()); String alias = selectContext.getNameGenerator().selectName(flagSelect); FieldWrapper flagAggregation = createFlagSelect(alias, connectorTables, functionProvider, rootSelects); - ExtractingSqlSelect finalSelect = flagAggregation.qualify(connectorTables.getPredecessor(ConnectorCteStep.AGGREGATION_FILTER)); + ExtractingSqlSelect finalSelect = flagAggregation.qualify(connectorTables.getPredecessor(ConceptCteStep.AGGREGATION_FILTER)); SqlSelects sqlSelects = SqlSelects.builder().preprocessingSelects(rootSelects.values()) .aggregationSelect(flagAggregation) @@ -94,10 +93,10 @@ public static FlagSqlAggregator create(FlagSelect flagSelect, SelectContext sele } public static FlagSqlAggregator create(FlagFilter flagFilter, FilterContext filterContext) { - SqlTables connectorTables = filterContext.getConnectorTables(); - String rootTable = connectorTables.getPredecessor(ConnectorCteStep.PREPROCESSING); + SqlTables connectorTables = filterContext.getTables(); + String rootTable = connectorTables.getPredecessor(ConceptCteStep.PREPROCESSING); - List rootSelects = + List> rootSelects = getRequiredColumnNames(flagFilter.getFlags(), filterContext.getValue()) .stream() .map(columnName -> new ExtractingSqlSelect<>(rootTable, columnName, Boolean.class)) @@ -107,7 +106,7 @@ public static FlagSqlAggregator create(FlagFilter flagFilter, FilterContext> flagFields = rootSelects.stream() - .map(sqlSelect -> connectorTables.qualifyOnPredecessor(ConnectorCteStep.EVENT_FILTER, sqlSelect.aliased())) + .map(sqlSelect -> sqlSelect.qualify(connectorTables.getPredecessor(ConceptCteStep.EVENT_FILTER)).select()) .toList(); FlagCondition flagCondition = new FlagCondition(flagFields); WhereClauses whereClauses = WhereClauses.builder() @@ -120,7 +119,7 @@ public static FlagSqlAggregator create(FlagFilter flagFilter, FilterContext createFlagRootSelectMap(FlagSelect flagSelect, String rootTable) { + private static Map> createFlagRootSelectMap(FlagSelect flagSelect, String rootTable) { return flagSelect.getFlags() .entrySet().stream() .collect(Collectors.toMap( @@ -133,7 +132,7 @@ private static FieldWrapper createFlagSelect( String alias, SqlTables connectorTables, SqlFunctionProvider functionProvider, - Map flagRootSelectMap + Map> flagRootSelectMap ) { Map> flagFieldsMap = createRootSelectReferences(connectorTables, flagRootSelectMap); @@ -155,12 +154,15 @@ private static FieldWrapper createFlagSelect( return new FieldWrapper<>(flagsArray, requiredColumns); } - private static Map> createRootSelectReferences(SqlTables connectorTables, Map flagRootSelectMap) { + private static Map> createRootSelectReferences( + SqlTables connectorTables, + Map> flagRootSelectMap + ) { return flagRootSelectMap.entrySet().stream() - .collect(Collectors.toMap( - Map.Entry::getKey, - entry -> connectorTables.qualifyOnPredecessor(ConnectorCteStep.AGGREGATION_SELECT, entry.getValue().aliased()) - )); + .collect(Collectors.toMap( + Map.Entry::getKey, + entry -> entry.getValue().qualify(connectorTables.getPredecessor(ConceptCteStep.AGGREGATION_SELECT)).select() + )); } /** diff --git a/backend/src/main/java/com/bakdata/conquery/sql/conversion/model/aggregator/LastValueSqlAggregator.java b/backend/src/main/java/com/bakdata/conquery/sql/conversion/model/aggregator/LastValueSqlAggregator.java index 25a7b4df8f..6d59271d71 100644 --- a/backend/src/main/java/com/bakdata/conquery/sql/conversion/model/aggregator/LastValueSqlAggregator.java +++ b/backend/src/main/java/com/bakdata/conquery/sql/conversion/model/aggregator/LastValueSqlAggregator.java @@ -6,7 +6,7 @@ import com.bakdata.conquery.models.datasets.Column; import com.bakdata.conquery.models.datasets.concepts.select.connector.LastValueSelect; -import com.bakdata.conquery.sql.conversion.cqelement.concept.ConnectorCteStep; +import com.bakdata.conquery.sql.conversion.cqelement.concept.ConceptCteStep; import com.bakdata.conquery.sql.conversion.dialect.SqlFunctionProvider; import com.bakdata.conquery.sql.conversion.model.ColumnDateRange; import com.bakdata.conquery.sql.conversion.model.SqlTables; @@ -35,13 +35,13 @@ private LastValueSqlAggregator( ExtractingSqlSelect rootSelect = new ExtractingSqlSelect<>(connectorTables.getRootTable(), columnName, Object.class); List> validityDateFields = - validityDate.map(_validityDate -> _validityDate.qualify(connectorTables.getPredecessor(ConnectorCteStep.AGGREGATION_SELECT))) + validityDate.map(_validityDate -> _validityDate.qualify(connectorTables.getPredecessor(ConceptCteStep.AGGREGATION_SELECT))) .map(ColumnDateRange::toFields) .orElse(Collections.emptyList()); - Field qualifiedRootSelect = rootSelect.qualify(connectorTables.getPredecessor(ConnectorCteStep.AGGREGATION_SELECT)).select(); + Field qualifiedRootSelect = rootSelect.qualify(connectorTables.getPredecessor(ConceptCteStep.AGGREGATION_SELECT)).select(); FieldWrapper lastGroupBy = new FieldWrapper<>(functionProvider.last(qualifiedRootSelect, validityDateFields).as(alias), columnName); - ExtractingSqlSelect finalSelect = lastGroupBy.qualify(connectorTables.getPredecessor(ConnectorCteStep.AGGREGATION_FILTER)); + ExtractingSqlSelect finalSelect = lastGroupBy.qualify(connectorTables.getPredecessor(ConceptCteStep.AGGREGATION_FILTER)); this.sqlSelects = SqlSelects.builder() .preprocessingSelect(rootSelect) @@ -57,8 +57,8 @@ public static LastValueSqlAggregator create(LastValueSelect lastValueSelect, Sel lastValueSelect.getColumn(), selectContext.getNameGenerator().selectName(lastValueSelect), selectContext.getValidityDate(), - selectContext.getConnectorTables(), - selectContext.getParentContext().getSqlDialect().getFunctionProvider() + selectContext.getTables(), + selectContext.getConversionContext().getSqlDialect().getFunctionProvider() ); } diff --git a/backend/src/main/java/com/bakdata/conquery/sql/conversion/model/aggregator/NumberSqlAggregator.java b/backend/src/main/java/com/bakdata/conquery/sql/conversion/model/aggregator/NumberSqlAggregator.java index 586d42c285..7b93747f81 100644 --- a/backend/src/main/java/com/bakdata/conquery/sql/conversion/model/aggregator/NumberSqlAggregator.java +++ b/backend/src/main/java/com/bakdata/conquery/sql/conversion/model/aggregator/NumberSqlAggregator.java @@ -8,7 +8,7 @@ import com.bakdata.conquery.models.datasets.Column; import com.bakdata.conquery.models.datasets.concepts.filters.specific.NumberFilter; import com.bakdata.conquery.models.events.MajorTypeId; -import com.bakdata.conquery.sql.conversion.cqelement.concept.ConnectorCteStep; +import com.bakdata.conquery.sql.conversion.cqelement.concept.ConceptCteStep; import com.bakdata.conquery.sql.conversion.cqelement.concept.FilterContext; import com.bakdata.conquery.sql.conversion.model.SqlTables; import com.bakdata.conquery.sql.conversion.model.filter.NumberCondition; @@ -32,7 +32,7 @@ public NumberSqlAggregator( Class numberClass = NumberMapUtil.NUMBER_MAP.get(column.getType()); ExtractingSqlSelect rootSelect = new ExtractingSqlSelect<>(connectorTables.getRootTable(), column.getName(), numberClass); - Field eventFilterCtePredecessor = connectorTables.qualifyOnPredecessor(ConnectorCteStep.EVENT_FILTER, rootSelect.aliased()); + Field eventFilterCtePredecessor = rootSelect.qualify(connectorTables.getPredecessor(ConceptCteStep.EVENT_FILTER)).select(); NumberCondition condition = new NumberCondition(eventFilterCtePredecessor, filterValue); this.sqlSelects = SqlSelects.builder() @@ -49,7 +49,7 @@ public static NumberSqlAggregator create( ) { return new NumberSqlAggregator( numberFilter.getColumn(), - filterContext.getConnectorTables(), + filterContext.getTables(), prepareFilterValue(numberFilter.getColumn(), filterContext.getValue()) ); } diff --git a/backend/src/main/java/com/bakdata/conquery/sql/conversion/model/aggregator/RandomValueSqlAggregator.java b/backend/src/main/java/com/bakdata/conquery/sql/conversion/model/aggregator/RandomValueSqlAggregator.java index b36bed0163..0aea7f0380 100644 --- a/backend/src/main/java/com/bakdata/conquery/sql/conversion/model/aggregator/RandomValueSqlAggregator.java +++ b/backend/src/main/java/com/bakdata/conquery/sql/conversion/model/aggregator/RandomValueSqlAggregator.java @@ -2,7 +2,7 @@ import com.bakdata.conquery.models.datasets.Column; import com.bakdata.conquery.models.datasets.concepts.select.connector.RandomValueSelect; -import com.bakdata.conquery.sql.conversion.cqelement.concept.ConnectorCteStep; +import com.bakdata.conquery.sql.conversion.cqelement.concept.ConceptCteStep; import com.bakdata.conquery.sql.conversion.dialect.SqlFunctionProvider; import com.bakdata.conquery.sql.conversion.model.SqlTables; import com.bakdata.conquery.sql.conversion.model.filter.WhereClauses; @@ -27,10 +27,10 @@ private RandomValueSqlAggregator( ) { ExtractingSqlSelect rootSelect = new ExtractingSqlSelect<>(connectorTables.getRootTable(), column.getName(), Object.class); - Field qualifiedRootSelect = rootSelect.qualify(connectorTables.getPredecessor(ConnectorCteStep.AGGREGATION_SELECT)).select(); + Field qualifiedRootSelect = rootSelect.qualify(connectorTables.getPredecessor(ConceptCteStep.AGGREGATION_SELECT)).select(); FieldWrapper randomGroupBy = new FieldWrapper<>(functionProvider.random(qualifiedRootSelect).as(alias), column.getName()); - ExtractingSqlSelect finalSelect = randomGroupBy.qualify(connectorTables.getPredecessor(ConnectorCteStep.AGGREGATION_FILTER)); + ExtractingSqlSelect finalSelect = randomGroupBy.qualify(connectorTables.getPredecessor(ConceptCteStep.AGGREGATION_FILTER)); this.sqlSelects = SqlSelects.builder() .preprocessingSelect(rootSelect) @@ -45,8 +45,8 @@ public static RandomValueSqlAggregator create(RandomValueSelect randomValueSelec return new RandomValueSqlAggregator( randomValueSelect.getColumn(), selectContext.getNameGenerator().selectName(randomValueSelect), - selectContext.getConnectorTables(), - selectContext.getParentContext().getSqlDialect().getFunctionProvider() + selectContext.getTables(), + selectContext.getConversionContext().getSqlDialect().getFunctionProvider() ); } diff --git a/backend/src/main/java/com/bakdata/conquery/sql/conversion/model/aggregator/SumDistinctSqlAggregator.java b/backend/src/main/java/com/bakdata/conquery/sql/conversion/model/aggregator/SumDistinctSqlAggregator.java index e2ec302ade..0fad8b5e46 100644 --- a/backend/src/main/java/com/bakdata/conquery/sql/conversion/model/aggregator/SumDistinctSqlAggregator.java +++ b/backend/src/main/java/com/bakdata/conquery/sql/conversion/model/aggregator/SumDistinctSqlAggregator.java @@ -10,13 +10,13 @@ import com.bakdata.conquery.models.datasets.Column; import com.bakdata.conquery.models.datasets.concepts.filters.specific.SumFilter; import com.bakdata.conquery.models.datasets.concepts.select.connector.specific.SumSelect; -import com.bakdata.conquery.sql.conversion.cqelement.concept.ConnectorCteStep; +import com.bakdata.conquery.sql.conversion.cqelement.concept.ConceptCteStep; import com.bakdata.conquery.sql.conversion.cqelement.concept.FilterContext; import com.bakdata.conquery.sql.conversion.model.CteStep; import com.bakdata.conquery.sql.conversion.model.NameGenerator; -import com.bakdata.conquery.sql.conversion.model.QualifyingUtil; import com.bakdata.conquery.sql.conversion.model.QueryStep; import com.bakdata.conquery.sql.conversion.model.Selects; +import com.bakdata.conquery.sql.conversion.model.SqlIdColumns; import com.bakdata.conquery.sql.conversion.model.SqlTables; import com.bakdata.conquery.sql.conversion.model.filter.SumCondition; import com.bakdata.conquery.sql.conversion.model.filter.WhereClauses; @@ -33,7 +33,7 @@ /** * Conversion of a {@link SumSelect} with {@link SumSelect#getDistinctByColumn()}. Sum's the values of a column for each row which is distinct by the - * distinct-by columns by creating 2 additional CTEs. We can't use our usual {@link ConnectorCteStep#PREPROCESSING} CTE for achieving distinctness, because + * distinct-by columns by creating 2 additional CTEs. We can't use our usual {@link ConceptCteStep#PREPROCESSING} CTE for achieving distinctness, because * it's used for the conversion of other selects where distinctness by distinct-by columns is not required and would cause wrong results. *

* @@ -93,7 +93,7 @@ public SumDistinctSqlAggregator( List distinctByColumns, String alias, IRange filterValue, - Field primaryColumn, + SqlIdColumns ids, SqlTables connectorTables, NameGenerator nameGenerator ) { @@ -107,10 +107,10 @@ public SumDistinctSqlAggregator( .toList(); // additional predecessors - QueryStep rowNumberCte = createRowNumberCte(primaryColumn, sumColumnRootSelect, distinctByRootSelects, alias, connectorTables, nameGenerator); + QueryStep rowNumberCte = createRowNumberCte(ids, sumColumnRootSelect, distinctByRootSelects, alias, connectorTables, nameGenerator); Field rootSelectQualified = sumColumnRootSelect.qualify(rowNumberCte.getCteName()).select(); FieldWrapper distinctSum = new FieldWrapper<>(DSL.sum(rootSelectQualified).as(alias)); - QueryStep rowNumberFilteredCte = createRowNumberFilteredCte(rowNumberCte, primaryColumn, distinctSum, alias, nameGenerator); + QueryStep rowNumberFilteredCte = createRowNumberFilteredCte(rowNumberCte, distinctSum, alias, nameGenerator); SqlSelects.SqlSelectsBuilder builder = SqlSelects.builder() .preprocessingSelect(sumColumnRootSelect) @@ -119,7 +119,7 @@ public SumDistinctSqlAggregator( if (filterValue != null) { this.sqlSelects = builder.build(); - String groupFilterPredecessor = connectorTables.getPredecessor(ConnectorCteStep.AGGREGATION_FILTER); + String groupFilterPredecessor = connectorTables.getPredecessor(ConceptCteStep.AGGREGATION_FILTER); Field qualifiedSumSelect = distinctSum.qualify(groupFilterPredecessor).select(); SumCondition sumCondition = new SumCondition(qualifiedSumSelect, filterValue); this.whereClauses = WhereClauses.builder() @@ -127,7 +127,7 @@ public SumDistinctSqlAggregator( .build(); } else { - ExtractingSqlSelect finalSelect = distinctSum.qualify(connectorTables.getPredecessor(ConnectorCteStep.AGGREGATION_FILTER)); + ExtractingSqlSelect finalSelect = distinctSum.qualify(connectorTables.getPredecessor(ConceptCteStep.AGGREGATION_FILTER)); this.sqlSelects = builder.finalSelect(finalSelect).build(); this.whereClauses = WhereClauses.empty(); } @@ -139,8 +139,8 @@ public static SumDistinctSqlAggregator create(SumSelect sumSelect, SelectContext sumSelect.getDistinctByColumn(), selectContext.getNameGenerator().selectName(sumSelect), null, - selectContext.getParentContext().getPrimaryColumn(), - selectContext.getConnectorTables(), + selectContext.getIds(), + selectContext.getTables(), selectContext.getNameGenerator() ); } @@ -151,8 +151,8 @@ public static SumDistinctSqlAggregator create(SumSelect sumSelect, SelectContext sumFilter.getDistinctByColumn(), filterContext.getNameGenerator().selectName(sumFilter), filterContext.getValue(), - filterContext.getParentContext().getPrimaryColumn(), - filterContext.getConnectorTables(), + filterContext.getIds(), + filterContext.getTables(), filterContext.getNameGenerator() ); } @@ -162,20 +162,19 @@ public static SumDistinctSqlAggregator create(SumSelect sumSelect, SelectContext * the row number will be incremented for each duplicated entry. */ private static QueryStep createRowNumberCte( - Field primaryColumn, + SqlIdColumns ids, ExtractingSqlSelect sumColumnRootSelect, List> distinctByRootSelects, String alias, SqlTables connectorTables, NameGenerator nameGenerator ) { - String predecessor = connectorTables.getPredecessor(ConnectorCteStep.AGGREGATION_SELECT); - - Field qualifiedPrimaryColumn = QualifyingUtil.qualify(primaryColumn, predecessor); + String predecessor = connectorTables.getPredecessor(ConceptCteStep.AGGREGATION_SELECT); + SqlIdColumns qualifiedIds = ids.qualify(predecessor); ExtractingSqlSelect qualifiedSumRootSelect = sumColumnRootSelect.qualify(predecessor); List> partitioningFields = Stream.concat( - Stream.of(qualifiedPrimaryColumn), + qualifiedIds.toFields().stream(), distinctByRootSelects.stream().map(sqlSelect -> sqlSelect.qualify(predecessor).select()) ) .collect(Collectors.toList()); @@ -185,7 +184,7 @@ private static QueryStep createRowNumberCte( ); Selects rowNumberAssignedSelects = Selects.builder() - .primaryColumn(qualifiedPrimaryColumn) + .ids(qualifiedIds) .sqlSelects(List.of(qualifiedSumRootSelect, rowNumber)) .build(); @@ -201,13 +200,14 @@ private static QueryStep createRowNumberCte( */ private static QueryStep createRowNumberFilteredCte( QueryStep rowNumberCte, - Field primaryColumn, FieldWrapper sumSelect, String alias, NameGenerator nameGenerator ) { + SqlIdColumns ids = rowNumberCte.getQualifiedSelects().getIds(); + Selects rowNumberFilteredSelects = Selects.builder() - .primaryColumn(primaryColumn) + .ids(ids) .sqlSelects(List.of(sumSelect)) .build(); @@ -220,7 +220,7 @@ private static QueryStep createRowNumberFilteredCte( .fromTable(QueryStep.toTableLike(rowNumberCte.getCteName())) .conditions(List.of(firstOccurrence)) .predecessors(List.of(rowNumberCte)) - .groupBy(List.of(primaryColumn)) + .groupBy(ids.toFields()) .build(); } diff --git a/backend/src/main/java/com/bakdata/conquery/sql/conversion/model/aggregator/SumSqlAggregator.java b/backend/src/main/java/com/bakdata/conquery/sql/conversion/model/aggregator/SumSqlAggregator.java index ef7044d250..0a0703fd4f 100644 --- a/backend/src/main/java/com/bakdata/conquery/sql/conversion/model/aggregator/SumSqlAggregator.java +++ b/backend/src/main/java/com/bakdata/conquery/sql/conversion/model/aggregator/SumSqlAggregator.java @@ -8,7 +8,7 @@ import com.bakdata.conquery.models.datasets.Column; import com.bakdata.conquery.models.datasets.concepts.filters.specific.SumFilter; import com.bakdata.conquery.models.datasets.concepts.select.connector.specific.SumSelect; -import com.bakdata.conquery.sql.conversion.cqelement.concept.ConnectorCteStep; +import com.bakdata.conquery.sql.conversion.cqelement.concept.ConceptCteStep; import com.bakdata.conquery.sql.conversion.cqelement.concept.FilterContext; import com.bakdata.conquery.sql.conversion.model.SqlTables; import com.bakdata.conquery.sql.conversion.model.filter.SumCondition; @@ -40,7 +40,7 @@ private SumSqlAggregator( ExtractingSqlSelect rootSelect = new ExtractingSqlSelect<>(connectorTables.getRootTable(), sumColumn.getName(), numberClass); preprocessingSelects.add(rootSelect); - String eventFilterCte = connectorTables.cteName(ConnectorCteStep.EVENT_FILTER); + String eventFilterCte = connectorTables.cteName(ConceptCteStep.EVENT_FILTER); Field sumField = rootSelect.qualify(eventFilterCte).select(); FieldWrapper sumGroupBy; if (subtractColumn != null) { @@ -63,13 +63,13 @@ private SumSqlAggregator( .aggregationSelect(sumGroupBy); if (filterValue == null) { - ExtractingSqlSelect finalSelect = sumGroupBy.qualify(connectorTables.getPredecessor(ConnectorCteStep.AGGREGATION_FILTER)); + ExtractingSqlSelect finalSelect = sumGroupBy.qualify(connectorTables.getPredecessor(ConceptCteStep.AGGREGATION_FILTER)); this.sqlSelects = builder.finalSelect(finalSelect).build(); this.whereClauses = WhereClauses.empty(); } else { this.sqlSelects = builder.build(); - String predecessor = connectorTables.getPredecessor(ConnectorCteStep.AGGREGATION_FILTER); + String predecessor = connectorTables.getPredecessor(ConceptCteStep.AGGREGATION_FILTER); Field qualifiedSumGroupBy = sumGroupBy.qualify(predecessor).select(); SumCondition sumCondition = new SumCondition(qualifiedSumGroupBy, filterValue); this.whereClauses = WhereClauses.builder() @@ -83,7 +83,7 @@ public static SumSqlAggregator create(SumSelect sumSelect, SelectContext selectC sumSelect.getColumn(), sumSelect.getSubtractColumn(), selectContext.getNameGenerator().selectName(sumSelect), - selectContext.getConnectorTables(), + selectContext.getTables(), null ); } @@ -93,7 +93,7 @@ public static SumSqlAggregator create(SumSelect sumSelect, SelectContext selectC sumFilter.getColumn(), sumFilter.getSubtractColumn(), filterContext.getNameGenerator().selectName(sumFilter), - filterContext.getConnectorTables(), + filterContext.getTables(), filterContext.getValue() ); } diff --git a/backend/src/main/java/com/bakdata/conquery/sql/conversion/model/select/SelectContext.java b/backend/src/main/java/com/bakdata/conquery/sql/conversion/model/select/SelectContext.java index 82144768c0..fb06c4369e 100644 --- a/backend/src/main/java/com/bakdata/conquery/sql/conversion/model/select/SelectContext.java +++ b/backend/src/main/java/com/bakdata/conquery/sql/conversion/model/select/SelectContext.java @@ -5,35 +5,16 @@ import com.bakdata.conquery.sql.conversion.Context; import com.bakdata.conquery.sql.conversion.cqelement.ConversionContext; import com.bakdata.conquery.sql.conversion.model.ColumnDateRange; -import com.bakdata.conquery.sql.conversion.model.NameGenerator; +import com.bakdata.conquery.sql.conversion.model.SqlIdColumns; import com.bakdata.conquery.sql.conversion.model.SqlTables; import lombok.Value; -import org.jooq.Field; @Value public class SelectContext implements Context { - Field primaryColumn; + SqlIdColumns ids; Optional validityDate; - SqlTables connectorTables; - ConversionContext parentContext; - - public static SelectContext forUniversalSelects(Field primaryColumn, Optional validityDate, ConversionContext conversionContext) { - return new SelectContext(primaryColumn, validityDate, null, conversionContext); - } - - public static SelectContext forConnectorSelects( - Field primaryColumn, - Optional validityDate, - SqlTables connectorTables, - ConversionContext conversionContext - ) { - return new SelectContext(primaryColumn, validityDate, connectorTables, conversionContext); - } - - @Override - public NameGenerator getNameGenerator() { - return this.parentContext.getNameGenerator(); - } + SqlTables tables; + ConversionContext conversionContext; } diff --git a/backend/src/main/java/com/bakdata/conquery/sql/conversion/model/select/SqlSelect.java b/backend/src/main/java/com/bakdata/conquery/sql/conversion/model/select/SqlSelect.java index d2f9ea7bdb..508545d7c0 100644 --- a/backend/src/main/java/com/bakdata/conquery/sql/conversion/model/select/SqlSelect.java +++ b/backend/src/main/java/com/bakdata/conquery/sql/conversion/model/select/SqlSelect.java @@ -2,7 +2,7 @@ import java.util.List; -import com.bakdata.conquery.sql.conversion.cqelement.concept.ConnectorCteStep; +import com.bakdata.conquery.sql.conversion.cqelement.concept.ConceptCteStep; import org.jooq.Field; public interface SqlSelect { @@ -30,7 +30,7 @@ public interface SqlSelect { ExtractingSqlSelect qualify(String qualifier); /** - * @return Determines if this is only part of the {@link ConnectorCteStep#FINAL} CTE and has no predeceasing selects. + * @return Determines if this is only part of the {@link ConceptCteStep#FINAL} CTE and has no predeceasing selects. */ default boolean isUniversal() { return false; diff --git a/backend/src/main/java/com/bakdata/conquery/sql/conversion/query/ConceptQueryConverter.java b/backend/src/main/java/com/bakdata/conquery/sql/conversion/query/ConceptQueryConverter.java index 2cc84c0201..526d97383c 100644 --- a/backend/src/main/java/com/bakdata/conquery/sql/conversion/query/ConceptQueryConverter.java +++ b/backend/src/main/java/com/bakdata/conquery/sql/conversion/query/ConceptQueryConverter.java @@ -6,8 +6,8 @@ import com.bakdata.conquery.apiv1.query.ConceptQuery; import com.bakdata.conquery.models.query.DateAggregationMode; -import com.bakdata.conquery.sql.ConceptSqlQuery; import com.bakdata.conquery.sql.conversion.NodeConverter; +import com.bakdata.conquery.sql.conversion.SharedAliases; import com.bakdata.conquery.sql.conversion.cqelement.ConversionContext; import com.bakdata.conquery.sql.conversion.dialect.SqlDialect; import com.bakdata.conquery.sql.conversion.dialect.SqlFunctionProvider; @@ -22,7 +22,6 @@ public class ConceptQueryConverter implements NodeConverter { - public static final String FINAL_VALIDITY_DATE_COLUMN_NAME = "dates"; private final QueryStepTransformer queryStepTransformer; public ConceptQueryConverter(QueryStepTransformer queryStepTransformer) { @@ -64,7 +63,7 @@ else if (preFinalSelects.getValidityDate().isEmpty()) { return preFinalSelects.withValidityDate(ColumnDateRange.of(emptyRange)); } Field validityDateStringAggregation = functionProvider.validityDateStringAggregation(preFinalSelects.getValidityDate().get()) - .as(FINAL_VALIDITY_DATE_COLUMN_NAME); + .as(SharedAliases.DATES_COLUMN.getAlias()); return preFinalSelects.withValidityDate(ColumnDateRange.of(validityDateStringAggregation)); } @@ -73,7 +72,7 @@ private List> getFinalGroupBySelects(Selects preFinalSelects, SqlDialec return Collections.emptyList(); } List> groupBySelects = new ArrayList<>(); - groupBySelects.add(preFinalSelects.getPrimaryColumn()); + groupBySelects.addAll(preFinalSelects.getIds().toFields()); groupBySelects.addAll(preFinalSelects.explicitSelects()); return groupBySelects; } diff --git a/backend/src/main/java/com/bakdata/conquery/sql/conversion/query/ConceptSqlQuery.java b/backend/src/main/java/com/bakdata/conquery/sql/conversion/query/ConceptSqlQuery.java index a5a7e67517..b7e7c2aca3 100644 --- a/backend/src/main/java/com/bakdata/conquery/sql/conversion/query/ConceptSqlQuery.java +++ b/backend/src/main/java/com/bakdata/conquery/sql/conversion/query/ConceptSqlQuery.java @@ -4,25 +4,30 @@ import com.bakdata.conquery.models.query.resultinfo.ResultInfo; import com.bakdata.conquery.sql.conversion.model.SqlQuery; -import lombok.Value; +import lombok.Getter; import org.jooq.Record; import org.jooq.Select; import org.jooq.conf.ParamType; -@Value +@Getter class ConceptSqlQuery implements SqlQuery { - String query; + String sqlString; List resultInfos; - public ConceptSqlQuery(Select query, List resultInfos) { - this.query = query.getSQL(ParamType.INLINED); + public ConceptSqlQuery(Select finalQuery, List resultInfos) { + this.sqlString = finalQuery.getSQL(ParamType.INLINED); + this.resultInfos = resultInfos; + } + + protected ConceptSqlQuery(String sqlString, List resultInfos) { + this.sqlString = sqlString; this.resultInfos = resultInfos; } @Override public String getSql() { - return this.query; + return this.sqlString; } } diff --git a/backend/src/main/java/com/bakdata/conquery/sql/conversion/query/SecondaryIdQueryConverter.java b/backend/src/main/java/com/bakdata/conquery/sql/conversion/query/SecondaryIdQueryConverter.java new file mode 100644 index 0000000000..2b10605fc4 --- /dev/null +++ b/backend/src/main/java/com/bakdata/conquery/sql/conversion/query/SecondaryIdQueryConverter.java @@ -0,0 +1,29 @@ +package com.bakdata.conquery.sql.conversion.query; + +import com.bakdata.conquery.apiv1.query.SecondaryIdQuery; +import com.bakdata.conquery.sql.conversion.NodeConverter; +import com.bakdata.conquery.sql.conversion.cqelement.ConversionContext; +import com.google.common.base.Preconditions; + +public class SecondaryIdQueryConverter implements NodeConverter { + + @Override + public Class getConversionClass() { + return SecondaryIdQuery.class; + } + + @Override + public ConversionContext convert(SecondaryIdQuery query, ConversionContext context) { + + ConversionContext withConvertedQuery = context.getNodeConversions().convert( + query.getQuery(), + context.withSecondaryIdDescription(query.getSecondaryId()) + ); + + Preconditions.checkArgument(withConvertedQuery.getFinalQuery() != null, "The SecondaryIdQuery's query should be converted by now."); + SecondaryIdSqlQuery secondaryIdSqlQuery = SecondaryIdSqlQuery.overwriteResultInfos(withConvertedQuery.getFinalQuery(), query.getResultInfos()); + + return withConvertedQuery.withFinalQuery(secondaryIdSqlQuery); + } + +} diff --git a/backend/src/main/java/com/bakdata/conquery/sql/conversion/query/SecondaryIdSqlQuery.java b/backend/src/main/java/com/bakdata/conquery/sql/conversion/query/SecondaryIdSqlQuery.java new file mode 100644 index 0000000000..aaa1e17399 --- /dev/null +++ b/backend/src/main/java/com/bakdata/conquery/sql/conversion/query/SecondaryIdSqlQuery.java @@ -0,0 +1,18 @@ +package com.bakdata.conquery.sql.conversion.query; + +import java.util.List; + +import com.bakdata.conquery.models.query.resultinfo.ResultInfo; +import com.bakdata.conquery.sql.conversion.model.SqlQuery; + +public class SecondaryIdSqlQuery extends ConceptSqlQuery { + + private SecondaryIdSqlQuery(String sqlString, List resultInfos) { + super(sqlString, resultInfos); + } + + public static SecondaryIdSqlQuery overwriteResultInfos(SqlQuery query, List resultInfos) { + return new SecondaryIdSqlQuery(query.getSql(), resultInfos); + } + +} diff --git a/backend/src/main/java/com/bakdata/conquery/sql/execution/SqlEntityResult.java b/backend/src/main/java/com/bakdata/conquery/sql/execution/SqlEntityResult.java index 91c7a0d1b2..d98d7f9bb0 100644 --- a/backend/src/main/java/com/bakdata/conquery/sql/execution/SqlEntityResult.java +++ b/backend/src/main/java/com/bakdata/conquery/sql/execution/SqlEntityResult.java @@ -20,13 +20,12 @@ @CPSType(id="SQL_RESULT", base= EntityResult.class) public class SqlEntityResult implements EntityResult { - private final int entityId; private final String id; private Object[] values; @Override - public int getEntityId() { - return entityId; + public String getEntityId() { + return id; } @Override diff --git a/backend/src/main/java/com/bakdata/conquery/sql/execution/SqlExecutionResult.java b/backend/src/main/java/com/bakdata/conquery/sql/execution/SqlExecutionResult.java index 72393776de..fa9a313918 100644 --- a/backend/src/main/java/com/bakdata/conquery/sql/execution/SqlExecutionResult.java +++ b/backend/src/main/java/com/bakdata/conquery/sql/execution/SqlExecutionResult.java @@ -1,12 +1,14 @@ package com.bakdata.conquery.sql.execution; import java.util.List; +import java.util.stream.Stream; +import com.bakdata.conquery.models.query.ExecutionManager; import com.bakdata.conquery.models.query.results.EntityResult; import lombok.Value; @Value -public class SqlExecutionResult { +public class SqlExecutionResult implements ExecutionManager.Result { List columnNames; List table; @@ -15,7 +17,11 @@ public class SqlExecutionResult { public SqlExecutionResult(List columnNames, List table) { this.columnNames = columnNames; this.table = table; - this.rowCount = table.size(); + rowCount = table.size(); } + @Override + public Stream streamQueryResults() { + return table.stream(); + } } diff --git a/backend/src/main/java/com/bakdata/conquery/sql/execution/SqlExecutionService.java b/backend/src/main/java/com/bakdata/conquery/sql/execution/SqlExecutionService.java index 311a59cfc6..a2a4ff7050 100644 --- a/backend/src/main/java/com/bakdata/conquery/sql/execution/SqlExecutionService.java +++ b/backend/src/main/java/com/bakdata/conquery/sql/execution/SqlExecutionService.java @@ -15,10 +15,9 @@ import com.bakdata.conquery.models.query.resultinfo.ResultInfo; import com.bakdata.conquery.models.query.results.EntityResult; import com.bakdata.conquery.models.types.ResultType; -import com.bakdata.conquery.sql.conquery.SqlManagedQuery; -import com.google.common.base.Stopwatch; +import com.bakdata.conquery.sql.conversion.model.SqlQuery; +import lombok.Data; import lombok.Getter; -import lombok.RequiredArgsConstructor; import lombok.extern.slf4j.Slf4j; import org.jooq.DSLContext; import org.jooq.Record; @@ -26,8 +25,9 @@ import org.jooq.Select; import org.jooq.exception.DataAccessException; -@RequiredArgsConstructor + @Slf4j +@Data public class SqlExecutionService { private static final int PID_COLUMN_INDEX = 1; @@ -38,55 +38,25 @@ public class SqlExecutionService { private final ResultSetProcessor resultSetProcessor; - public SqlExecutionResult execute(SqlManagedQuery sqlQuery) { - log.info("Starting SQL execution[{}]", sqlQuery.getQueryId()); - Stopwatch stopwatch = Stopwatch.createStarted(); - SqlExecutionResult result = dslContext.connectionResult(connection -> createStatementAndExecute(sqlQuery, connection)); - log.info("Finished SQL execution[{}] with {} results within {}", sqlQuery.getQueryId(), result.getRowCount(), stopwatch.elapsed()); - return result; - } + public SqlExecutionResult execute(SqlQuery sqlQuery) { - public Result fetch(Select query) { - log.debug("Executing query: \n{}", query); - try { - return dslContext.fetch(query); - } - catch (DataAccessException exception) { - throw new ConqueryError.SqlError(exception); - } - } + final SqlExecutionResult result = dslContext.connectionResult(connection -> createStatementAndExecute(sqlQuery, connection)); - /** - * Executes the query and returns the results as a Stream. - *

- * Note: The returned Stream is resourceful. It must be closed by the caller, because it contains a reference to an open {@link ResultSet} - * and {@link PreparedStatement}. - * - * @param query The query to be executed. - * @return A Stream of query results. - */ - public Stream fetchStream(Select query) { - log.debug("Executing query: \n{}", query); - try { - return dslContext.fetchStream(query); - } - catch (DataAccessException exception) { - throw new ConqueryError.SqlError(exception); - } + return result; } - private SqlExecutionResult createStatementAndExecute(SqlManagedQuery sqlQuery, Connection connection) { + private SqlExecutionResult createStatementAndExecute(SqlQuery sqlQuery, Connection connection) { - String sqlString = sqlQuery.getSqlQuery().getSql(); - List> resultTypes = sqlQuery.getSqlQuery().getResultInfos().stream().map(ResultInfo::getType).collect(Collectors.toList()); + final String sqlString = sqlQuery.getSql(); + final List> resultTypes = sqlQuery.getResultInfos().stream().map(ResultInfo::getType).collect(Collectors.toList()); log.info("Executing query: \n{}", sqlString); try (Statement statement = connection.createStatement(); ResultSet resultSet = statement.executeQuery(sqlString)) { - int columnCount = resultSet.getMetaData().getColumnCount(); - List columnNames = getColumnNames(resultSet, columnCount); - List resultTable = createResultTable(resultSet, resultTypes, columnCount); + final int columnCount = resultSet.getMetaData().getColumnCount(); + final List columnNames = getColumnNames(resultSet, columnCount); + final List resultTable = createResultTable(resultSet, resultTypes, columnCount); return new SqlExecutionResult(columnNames, resultTable); } @@ -96,15 +66,6 @@ private SqlExecutionResult createStatementAndExecute(SqlManagedQuery sqlQuery, C } } - private List createResultTable(ResultSet resultSet, List> resultTypes, int columnCount) throws SQLException { - List resultTable = new ArrayList<>(resultSet.getFetchSize()); - while (resultSet.next()) { - SqlEntityResult resultRow = getResultRow(resultSet, resultTypes, columnCount); - resultTable.add(resultRow); - } - return resultTable; - } - private List getColumnNames(ResultSet resultSet, int columnCount) { // JDBC ResultSet indices start with 1 return IntStream.rangeClosed(1, columnCount) @@ -112,6 +73,15 @@ private List getColumnNames(ResultSet resultSet, int columnCount) { .toList(); } + private List createResultTable(ResultSet resultSet, List> resultTypes, int columnCount) throws SQLException { + final List resultTable = new ArrayList<>(resultSet.getFetchSize()); + while (resultSet.next()) { + final SqlEntityResult resultRow = getResultRow(resultSet, resultTypes, columnCount); + resultTable.add(resultRow); + } + return resultTable; + } + private String getColumnName(ResultSet resultSet, int columnIndex) { try { return resultSet.getMetaData().getColumnName(columnIndex); @@ -123,16 +93,44 @@ private String getColumnName(ResultSet resultSet, int columnIndex) { private SqlEntityResult getResultRow(ResultSet resultSet, List> resultTypes, int columnCount) throws SQLException { - int rowNumber = resultSet.getRow(); - String id = resultSet.getString(PID_COLUMN_INDEX); - Object[] resultRow = new Object[columnCount - 1]; + final String id = resultSet.getString(PID_COLUMN_INDEX); + final Object[] resultRow = new Object[columnCount - 1]; for (int resultSetIndex = VALUES_OFFSET_INDEX; resultSetIndex <= columnCount; resultSetIndex++) { - int resultTypeIndex = resultSetIndex - VALUES_OFFSET_INDEX; - resultRow[resultTypeIndex] = resultTypes.get(resultTypeIndex).getFromResultSet(resultSet, resultSetIndex, this.resultSetProcessor); + final int resultTypeIndex = resultSetIndex - VALUES_OFFSET_INDEX; + resultRow[resultTypeIndex] = resultTypes.get(resultTypeIndex).getFromResultSet(resultSet, resultSetIndex, resultSetProcessor); + } + + return new SqlEntityResult(id, resultRow); + } + + public Result fetch(Select query) { + log.debug("Executing query: \n{}", query); + try { + return dslContext.fetch(query); + } + catch (DataAccessException exception) { + throw new ConqueryError.SqlError(exception); } + } - return new SqlEntityResult(rowNumber, id, resultRow); + /** + * Executes the query and returns the results as a Stream. + *

+ * Note: The returned Stream is resourceful. It must be closed by the caller, because it contains a reference to an open {@link ResultSet} + * and {@link PreparedStatement}. + * + * @param query The query to be executed. + * @return A Stream of query results. + */ + public Stream fetchStream(Select query) { + log.debug("Executing query: \n{}", query); + try { + return dslContext.fetchStream(query); + } + catch (DataAccessException exception) { + throw new ConqueryError.SqlError(exception); + } } } diff --git a/backend/src/main/java/com/bakdata/conquery/util/VersionInfo.java b/backend/src/main/java/com/bakdata/conquery/util/VersionInfo.java index 44031d5bdd..ae60711cf0 100644 --- a/backend/src/main/java/com/bakdata/conquery/util/VersionInfo.java +++ b/backend/src/main/java/com/bakdata/conquery/util/VersionInfo.java @@ -2,40 +2,50 @@ import java.io.BufferedReader; import java.time.ZonedDateTime; +import java.util.HashMap; +import java.util.Map; import java.util.Properties; import com.github.powerlibraries.io.In; - import lombok.Getter; import lombok.ToString; import lombok.extern.slf4j.Slf4j; -@ToString @Getter @Slf4j +@ToString +@Getter +@Slf4j public class VersionInfo { - + public final static VersionInfo INSTANCE = new VersionInfo(); - + private ZonedDateTime buildTime; private String projectVersion; - + + // Form backend id -> version + private final Map formBackendVersions = new HashMap<>(); + private VersionInfo() { try { Properties properties = new Properties(); - try(BufferedReader in = In.resource("/git.properties").withUTF8().asReader()) { + try (BufferedReader in = In.resource("/git.properties").withUTF8().asReader()) { properties.load(in); } - + String timeProp = properties.getProperty("build.time"); try { buildTime = ZonedDateTime.parse(timeProp); } - catch(Exception e) { + catch (Exception e) { log.error("Could not parse date time from git.properties", e); } - projectVersion =properties.getProperty("project.version"); + projectVersion = properties.getProperty("project.version"); } catch (Exception e) { throw new IllegalStateException("Could not read git properties information", e); } } + + public String setFormBackendVersion(String formBackendId, String version) { + return formBackendVersions.put(formBackendId, version); + } } \ No newline at end of file diff --git a/backend/src/main/java/com/bakdata/conquery/util/dict/SuccinctTrie.java b/backend/src/main/java/com/bakdata/conquery/util/dict/SuccinctTrie.java deleted file mode 100644 index 44e122a9df..0000000000 --- a/backend/src/main/java/com/bakdata/conquery/util/dict/SuccinctTrie.java +++ /dev/null @@ -1,377 +0,0 @@ -package com.bakdata.conquery.util.dict; - -import java.nio.charset.StandardCharsets; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.Iterator; -import java.util.List; - -import com.bakdata.conquery.io.cps.CPSType; -import com.bakdata.conquery.models.datasets.Dataset; -import com.bakdata.conquery.models.dictionary.Dictionary; -import com.bakdata.conquery.models.dictionary.DictionaryEntry; -import com.fasterxml.jackson.annotation.JsonCreator; -import com.fasterxml.jackson.annotation.JsonIgnore; -import com.google.common.collect.AbstractIterator; -import it.unimi.dsi.fastutil.bytes.Byte2ObjectArrayMap; -import it.unimi.dsi.fastutil.bytes.Byte2ObjectMap; -import it.unimi.dsi.fastutil.bytes.ByteArrayList; -import lombok.Data; -import lombok.Getter; -import lombok.RequiredArgsConstructor; -import lombok.ToString; - -/** - * Implementation of a succinct trie that maps stored strings (byte arrays) to an id (https://en.wikipedia.org/wiki/Succinct_data_structure). The id is the node index of the - * starting byte in the trie. To get all bytes of a string, all bytes towards the root must be collected. This means - * that every node in the trie can be the beginning of a string, and that the nodes closest to the root are the endings - * of the string. - *

- * Inserting the strings this way (reversed) into the trie allows lookups in either direction with little computational - * overhead. - */ -@CPSType(id = "SUCCINCT_TRIE", base = Dictionary.class) -@ToString(callSuper = true, onlyExplicitlyIncluded = true) -@Getter -public class SuccinctTrie extends Dictionary { - - @Getter - private int nodeCount; - @Getter - private int depth = 0; - @Getter - private int entryCount; - @Getter - private long totalBytesStored; - // Reverse lookup can be performed with an int array, because the values are - // consecutive increasing - private int[] reverseLookup; - private int[] parentIndex; - private int[] lookup; - - // keyPartArray[x] contains the byte stored in node x - private byte[] keyPartArray; - - @JsonIgnore - private HelpNode root; - - // caches the the access on select0 - private int[] selectZeroCache; - - // indicates whether compress() has been performed and if the trie is ready to - // query - @Getter - @JsonIgnore - private boolean compressed; - - public SuccinctTrie(Dataset dataset, String name) { - super(dataset, name); - this.root = new HelpNode(null, (byte) 0); - this.root.setPositionInArray(0); - this.nodeCount = 2; - entryCount = 0; - } - - @JsonCreator(mode = JsonCreator.Mode.PROPERTIES) - public SuccinctTrie(String name, - Dataset dataset, - int nodeCount, - int entryCount, - int[] reverseLookup, - int[] parentIndex, - int[] lookup, - byte[] keyPartArray, - int[] selectZeroCache, - long totalBytesStored, - int depth) { - super(dataset, name); - this.nodeCount = nodeCount; - this.entryCount = entryCount; - this.reverseLookup = reverseLookup; - this.parentIndex = parentIndex; - this.lookup = lookup; - this.keyPartArray = keyPartArray; - this.selectZeroCache = selectZeroCache; - this.totalBytesStored = totalBytesStored; - this.depth = depth; - - this.root = null; - this.compressed = true; - } - - @Override - public int add(byte[] bytes) { - return put(bytes, entryCount, true); - } - - @Override - public int put(byte[] key) { - return put(key, entryCount, false); - } - - public void checkCompressed(String errorMessage) { - if (!isCompressed()) { - throw new IllegalStateException(errorMessage); - } - } - - public void checkUncompressed(String errorMessage) { - if (isCompressed()) { - throw new IllegalStateException(errorMessage); - } - } - - private int put(byte[] key, int entryCount, boolean failOnDuplicate) { - checkUncompressed("No put allowed after compression"); - - // start at the end of the byte sequence and insert it reversed - int keyIndex = key.length - 1; - HelpNode current = root; - while (keyIndex >= 0) { - // check if a prefix node exists - HelpNode next = current.children.get(key[keyIndex]); - if (next == null) { - // no prefix node could be found, we add a new one - next = new HelpNode(current, key[keyIndex]); - next.setParent(current); - current.addChild(next); - nodeCount++; - - if (next.depth > depth) { - depth = next.depth; - } - - if (nodeCount > Integer.MAX_VALUE - 10) { - throw new IllegalStateException("This dictionary is too large " + nodeCount); - } - } - current = next; - keyIndex--; - } - - // end of key, write the value into current - if (current.getValue() == -1) { - current.setValue(entryCount); - totalBytesStored += key.length; - this.entryCount++; - } - else if (failOnDuplicate) { - throw new IllegalStateException(String.format("the key `%s` was already part of this trie", new String(key, StandardCharsets.UTF_8))); - } - - return current.getValue(); - } - - public void compress() { - if (compressed) { - return; - } - - // get the nodes in left right, top down order (level order) - List nodesInOrder = createNodesInOrder(); - - // write the bits - selectZeroCache = new int[nodeCount + 1]; - int position = 2; - int zeroesWritten = 1; - selectZeroCache[1] = 1; - - for (HelpNode node : nodesInOrder) { - position += node.children.size(); - zeroesWritten++; - selectZeroCache[zeroesWritten] = position; - position++; - } - - // free the helpTrie for GC - root = null; - compressed = true; - } - - private List createNodesInOrder() { - ArrayList nodesInOrder = new ArrayList(nodeCount - 1); - - // initialize arrays for rebuilding the data later on - reverseLookup = new int[entryCount]; - parentIndex = new int[nodeCount]; - Arrays.fill(parentIndex, -1); - - lookup = new int[nodeCount]; - Arrays.fill(lookup, -1); - - - keyPartArray = new byte[nodeCount]; - - nodesInOrder.add(root); - for (int index = 0; index < nodeCount - 1; index++) { - HelpNode node = nodesInOrder.get(index); - node.setPositionInArray(index); - if (node != root) { - keyPartArray[index] = node.getPartialKey(); - } - - if (node.getParent() != null) { - parentIndex[index] = node.getParent().getPositionInArray(); - } - - node.getChildren().values().forEach(nodesInOrder::add); - - if (node.getValue() != -1) { - reverseLookup[node.getValue()] = index; - lookup[index] = node.getValue(); - } - } - return nodesInOrder; - } - - @Override - @JsonIgnore - public int getId(byte[] value) { - if (!compressed) { - HelpNode node = root; - for (int i = value.length - 1; i >= 0; i--) { - node = findChildWithKey(node, value[i]); - if (node == null) { - return -1; - } - } - - return node.value; - } - - int node = 0; - // Traverse the tree along the byte[], exiting when we don't find a match - for (int i = value.length - 1; i >= 0; i--) { - - node = childIdWithKey(node, value[i]); - - if (node == -1) { - // no fitting child found - return -1; - } - } - // node has a value - return lookup[node]; - } - - public int findStart(int node) { - return selectZeroCache[node + 1] - node; - } - - private HelpNode findChildWithKey(HelpNode node, byte val) { - return node.children.get(val); - } - - private int childIdWithKey(int node, byte val) { - int firstChildNode = findStart(node); - // get the first child of the next node - int lastChildNode = findStart(node + 1); - - for (int i = firstChildNode; i < lastChildNode; i++) { - if (keyPartArray[i] == val) { - return i; - } - } - // no fitting child found - return -1; - } - - - /** - * The provided id for the string is the index of the trie node that holds the first byte of the sequence. - * From there on, the bytes of the parents until the root are collected to build byte sequence in forward order. - * - * @param id the id that references the search byte sequence - * @param buf the buffer into which the bytes are inserted - */ - public void get(int id, ByteArrayList buf) { - checkCompressed("use compress before performing getReverse on the trie"); - - if (id >= reverseLookup.length) { - throw new IllegalArgumentException(String.format("intValue %d too high, no such key in the trie (Have only %d values)", id, reverseLookup.length)); - } - - int nodeIndex = reverseLookup[id]; - int parentIndex = -1; - while ((parentIndex = this.parentIndex[nodeIndex]) != -1) { - buf.add(keyPartArray[nodeIndex]); - nodeIndex = parentIndex; - } - ; - } - - @Override - public int size() { - return entryCount; - } - - @JsonIgnore - public boolean isEmpty() { - return entryCount == 0; - } - - @Data - @RequiredArgsConstructor - public static class Entry { - private final int key; - private final String value; - } - - - @Override - public Iterator iterator() { - - return new AbstractIterator<>() { - - private final ByteArrayList buf = new ByteArrayList(depth); - private int index = 0; - - @Override - protected DictionaryEntry computeNext() { - if (index == entryCount) { - return endOfData(); - } - buf.clear(); - - final int id = index++; - - get(id, buf); - return new DictionaryEntry(id, buf.toByteArray()); - } - }; - } - - @Data - private class HelpNode { - - private final Byte2ObjectMap children = new Byte2ObjectArrayMap<>(); - private final byte partialKey; - private HelpNode parent; - private int value = -1; - private int positionInArray = -1; - private int depth = 0; - - public HelpNode(HelpNode parent, byte key) { - this.parent = parent; - this.partialKey = key; - } - - public void addChild(HelpNode child) { - child.setDepth(this.depth + 1); - this.children.put(child.partialKey, child); - } - - } - - @Override - public byte[] getElement(int id) { - ByteArrayList buf = new ByteArrayList(depth); - get(id, buf); - return buf.toByteArray(); - } - - @Override - public long estimateMemoryConsumption() { - return 13L * getNodeCount() + 4L * size(); - } -} diff --git a/backend/src/main/java/com/bakdata/conquery/util/io/IdColumnUtil.java b/backend/src/main/java/com/bakdata/conquery/util/io/IdColumnUtil.java index 20615f7d82..8c2832477c 100644 --- a/backend/src/main/java/com/bakdata/conquery/util/io/IdColumnUtil.java +++ b/backend/src/main/java/com/bakdata/conquery/util/io/IdColumnUtil.java @@ -12,12 +12,9 @@ import com.bakdata.conquery.models.execution.ManagedExecution; import com.bakdata.conquery.models.identifiable.mapping.AutoIncrementingPseudomizer; import com.bakdata.conquery.models.identifiable.mapping.EntityIdMap; -import com.bakdata.conquery.models.identifiable.mapping.EntityPrintId; import com.bakdata.conquery.models.identifiable.mapping.FullIdPrinter; import com.bakdata.conquery.models.identifiable.mapping.IdPrinter; import com.bakdata.conquery.models.worker.Namespace; -import com.bakdata.conquery.sql.conquery.SqlManagedQuery; -import com.bakdata.conquery.sql.execution.SqlEntityResult; import lombok.experimental.UtilityClass; @UtilityClass @@ -57,10 +54,7 @@ public static IdPrinter getIdPrinter(Subject owner, ManagedExecution execution, if (owner.isPermitted(execution.getDataset(), Ability.PRESERVE_ID)) { // todo(tm): The integration of ids in the sql connector needs to be properly managed - if (execution instanceof SqlManagedQuery) { - return entityResult -> EntityPrintId.from(((SqlEntityResult) entityResult).getId()); - } - return new FullIdPrinter(namespace.getStorage().getPrimaryDictionary(), namespace.getStorage().getIdMapping(), size, pos); + return new FullIdPrinter(namespace.getStorage().getIdMapping(), size, pos); } diff --git a/backend/src/main/java/com/bakdata/conquery/util/search/TrieSearch.java b/backend/src/main/java/com/bakdata/conquery/util/search/TrieSearch.java index d01693be0e..923d72186d 100644 --- a/backend/src/main/java/com/bakdata/conquery/util/search/TrieSearch.java +++ b/backend/src/main/java/com/bakdata/conquery/util/search/TrieSearch.java @@ -296,4 +296,4 @@ public Iterator iterator() { ); } -} +} \ No newline at end of file diff --git a/backend/src/main/resources/assets/custom/js/script.js b/backend/src/main/resources/assets/custom/js/script.js index d6dfab59ff..5e12ff0b29 100644 --- a/backend/src/main/resources/assets/custom/js/script.js +++ b/backend/src/main/resources/assets/custom/js/script.js @@ -118,26 +118,21 @@ function loginClickHandler() { }) .then((json) => { var searchParams = new URLSearchParams(window.location.search); - searchParams.set("access_token", json.access_token); + var redirect = new URL(searchParams.get("redirect_uri")) + redirect.searchParams.append("access_token", json.access_token) + // This triggers a page reload - window.location.search = searchParams.toString(); - } - ) + window.location = redirect.toString(); + }) .catch(function (error) { var p = document.createElement('p'); p.appendChild( document.createTextNode('Error: ' + error.message) ); - document.body.insertBefore(p, myImage); + document.getElementById('login-form').insertBefore(p, document.getElementById('login-button')); }); } -function logout() { - event.preventDefault(); - rest('/admin/logout') - .then(function () { location.reload() }); -} - function postFile(event, url) { event.preventDefault(); diff --git a/backend/src/main/resources/com/bakdata/conquery/external/openapi-form-backend.yaml b/backend/src/main/resources/com/bakdata/conquery/external/openapi-form-backend.yaml index 5d999797da..25d2e581a8 100644 --- a/backend/src/main/resources/com/bakdata/conquery/external/openapi-form-backend.yaml +++ b/backend/src/main/resources/com/bakdata/conquery/external/openapi-form-backend.yaml @@ -1,10 +1,10 @@ openapi: 3.0.0 info: title: Form Backend - version: 1.0.0 + version: 1.0.1 description: | API for generic external form backends in [Conquery](https://github.com/ingef/conquery). - + An external form backend implements this API as a server. A Conquery instance is then configured to act as a client. The configuration might look like this: ```json @@ -25,12 +25,12 @@ info: ] } ``` - + It is possible to override every path in this spec. The overrides must be configured in the above configuration accordingly (see [backend class](https://github.com/ingef/conquery/blob/develop/backend/src/main/java/com/bakdata/conquery/models/config/FormBackendConfig.java)). - + Caution: The examples in this spec are used by a mock server in the ExternalFormBackendTest. Changes here might fail that test. servers: - - url: "{protocol}://{serverAndPort}:{port}/{basePath}" + - url: '{protocol}://{serverAndPort}:{port}/{basePath}' variables: protocol: default: https @@ -42,7 +42,7 @@ servers: default: localhost description: Server address and optional port if it differs from the protocol's default basePath: - default: "" + default: '' tags: - name: Form Configuration - name: Task @@ -65,14 +65,13 @@ paths: schema: type: array items: - $ref: "#/components/schemas/formConfig" + $ref: '#/components/schemas/formConfig' default: description: Unexpected error content: application/json: schema: $ref: '#/components/schemas/error' - /task: post: summary: Create a new form task @@ -82,33 +81,31 @@ paths: - { } tags: - Task - requestBody: description: Represents the form that should be executed content: application/json: schema: - $ref: "#/components/schemas/form" + $ref: '#/components/schemas/form' responses: '201': description: Form task was successfully created content: application/json: schema: - $ref: "#/components/schemas/taskState" + $ref: '#/components/schemas/taskState' default: description: Unexpected error content: application/json: schema: $ref: '#/components/schemas/error' - /task/{id}: parameters: - in: path name: id schema: - $ref: "#/components/schemas/taskId" + $ref: '#/components/schemas/taskId' required: true description: The task id get: @@ -120,12 +117,12 @@ paths: tags: - Task responses: - "200": + '200': description: State of existing task content: application/json: schema: - $ref: "#/components/schemas/taskState" + $ref: '#/components/schemas/taskState' default: description: Unexpected error content: @@ -137,7 +134,7 @@ paths: - in: path name: id schema: - $ref: "#/components/schemas/taskId" + $ref: '#/components/schemas/taskId' required: true description: The task id post: @@ -149,33 +146,50 @@ paths: tags: - Task responses: - "200": + '200': description: State of the now cancelled task content: application/json: schema: - $ref: "#/components/schemas/taskState" + $ref: '#/components/schemas/taskState' default: description: Unexpected error content: application/json: schema: $ref: '#/components/schemas/error' - /health: get: - summary: Request health State + summary: Request health state operationId: healthCheck - # Cannot do gobal auth and local overrides yet with mocker-server: https://github.com/mock-server/mockserver/issues/1315 tags: - Operation responses: - "200": + '200': description: State of service health content: application/json: schema: - $ref: "#/components/schemas/health" + $ref: '#/components/schemas/health' + default: + description: Unexpected error + content: + application/json: + schema: + $ref: '#/components/schemas/error' + /version: + get: + summary: Request version information + operationId: version + tags: + - Operation + responses: + '200': + description: Version of the form backend + content: + application/json: + schema: + $ref: '#/components/schemas/version' default: description: Unexpected error content: @@ -191,8 +205,8 @@ components: title: type: object example: - "en": "External Form" - "de": "External Form" + en: External Form + de: External Form type: type: string example: SOME_EXTERNAL_FORM @@ -200,33 +214,31 @@ components: type: array items: type: object - required: - type - title additionalProperties: true example: - "title": - "en": "External Form" - "de": "External Form" - "type": "SOME_EXTERNAL_FORM" - "fields": - - "label": - "en": "Cohort" - "de": "Kohorte" - "style": - "size": "h1" - "type": "HEADLINE" + title: + en: External Form + de: External Form + type: SOME_EXTERNAL_FORM + fields: + - label: + en: Cohort + de: Kohorte + style: + size: h1 + type: HEADLINE taskId: type: string format: uuid - form: type: object properties: type: type: string - example: "SOME_EXTERNAL_FORM" + example: SOME_EXTERNAL_FORM additionalProperties: true required: - type @@ -234,9 +246,9 @@ components: type: object properties: id: - $ref: "#/components/schemas/taskId" + $ref: '#/components/schemas/taskId' status: - $ref: "#/components/schemas/taskStatus" + $ref: '#/components/schemas/taskStatus' progress: type: number nullable: true @@ -245,19 +257,18 @@ components: results: type: array items: - $ref: "#/components/schemas/resultAsset" + $ref: '#/components/schemas/resultAsset' error: - $ref: "#/components/schemas/error" + $ref: '#/components/schemas/error' example: - "id": "3fa85f64-5717-4562-b3fc-2c963f66afa6" - "status": "SUCCESS" - "progress": 1.0 - "results": - - label: "Result" - url: "/result.txt" - - label: "Another Result" - url: "/another_result.txt" - + id: 3fa85f64-5717-4562-b3fc-2c963f66afa6 + status: SUCCESS + progress: 1 + results: + - label: Result + url: /result.txt + - label: Another Result + url: /another_result.txt taskStatus: type: string enum: @@ -274,8 +285,8 @@ components: type: string format: url example: - label: "Result" - url: "/result.txt" + label: Result + url: /result.txt error: type: object properties: @@ -289,11 +300,11 @@ components: context: type: object example: - id: "3fa85f64-5717-4562-b3fc-2c963f66afa6" - code: "SOME_ERROR" - message: "This is a default template message, for a fallback. Use a template variable from the context like this: ${temp_var}" + id: 3fa85f64-5717-4562-b3fc-2c963f66afa6 + code: SOME_ERROR + message: 'This is a default template message, for a fallback. Use a template variable from the context like this: ${temp_var}' context: - temp_var: "resolved variable" + temp_var: resolved variable health: type: object properties: @@ -304,8 +315,14 @@ components: example: I'm good required: - healthy + version: + type: object + properties: + version: + type: string + example: "3.2.1-ge966c285" securitySchemes: ApiKeyAuth: type: apiKey name: X-API-KEY - in: header \ No newline at end of file + in: header diff --git a/backend/src/main/resources/com/bakdata/conquery/resources/admin/ui/dataset.html.ftl b/backend/src/main/resources/com/bakdata/conquery/resources/admin/ui/dataset.html.ftl index 4ce4c45e7b..51b1aa8e22 100644 --- a/backend/src/main/resources/com/bakdata/conquery/resources/admin/ui/dataset.html.ftl +++ b/backend/src/main/resources/com/bakdata/conquery/resources/admin/ui/dataset.html.ftl @@ -40,8 +40,8 @@ <@infoCard.infoCard class="d-inline-flex" title="Dataset ${c.ds.label}" - labels=["ID", "Label", "Dictionaries", "Size", "IdMapping"] - values=[c.ds.id, label, layout.si(c.dictionariesSize)+"B", layout.si(c.size)+"B", idMapping] + labels=["ID", "Label", "Size", "IdMapping"] + values=[c.ds.id, label, layout.si(c.size)+"B", idMapping] />

diff --git a/backend/src/main/resources/com/bakdata/conquery/resources/admin/ui/login.html.ftl b/backend/src/main/resources/com/bakdata/conquery/resources/admin/ui/login.html.ftl index a0b47f36cb..24f4903cab 100644 --- a/backend/src/main/resources/com/bakdata/conquery/resources/admin/ui/login.html.ftl +++ b/backend/src/main/resources/com/bakdata/conquery/resources/admin/ui/login.html.ftl @@ -1,40 +1,29 @@ - - - - - +<#import "templates/base.html.ftl" as base> +<@base.html "Conquery Admin Login"> - Conquery Admin Login - - - - - - -
-
-
-

Conquery

-

Admin Login

-

Please sign in

-
-
- - -
-
- - -
- -
-
- -
-
- - - - - - \ No newline at end of file + +
+
+
+

Conquery

+

Admin Login

+

Please sign in

+
+
+ + +
+
+ + +
+ +
+
+
+
+ + \ No newline at end of file diff --git a/backend/src/main/resources/com/bakdata/conquery/resources/admin/ui/logins.html.ftl b/backend/src/main/resources/com/bakdata/conquery/resources/admin/ui/logins.html.ftl index 62e7cd48be..b3baddc192 100644 --- a/backend/src/main/resources/com/bakdata/conquery/resources/admin/ui/logins.html.ftl +++ b/backend/src/main/resources/com/bakdata/conquery/resources/admin/ui/logins.html.ftl @@ -1,15 +1,23 @@ - - - - +<#import "templates/base.html.ftl" as base> +<@base.html "Conquery Admin UI"> -Logins - -

Available Logins

-
    - <#list c as login_schema> -
  1. ${login_schema}
  2. - -
- - \ No newline at end of file + +
+
+
+

Conquery

+

Available Logins

+ +
+
+
+ + \ No newline at end of file diff --git a/backend/src/main/resources/com/bakdata/conquery/resources/admin/ui/script.html.ftl b/backend/src/main/resources/com/bakdata/conquery/resources/admin/ui/script.html.ftl index 908e4653ac..b8d0881c84 100644 --- a/backend/src/main/resources/com/bakdata/conquery/resources/admin/ui/script.html.ftl +++ b/backend/src/main/resources/com/bakdata/conquery/resources/admin/ui/script.html.ftl @@ -11,45 +11,17 @@ var samples = { "null": "", "datasetLabel": "com.bakdata.conquery.models.worker.Namespace ns = namespaces.get(new DatasetId(\"demo\"));\nns.getDataset().setLabel(\"Demo\");\nns.getStorage().updateDataset(ns.getDataset());", - "translateId": `namespaces.get(new DatasetId("demo")).getStorage().getPrimaryDictionary().getId("3124")`, "addPermission": `namespaces.getMetaStorage().addPermission( new com.bakdata.conquery.models.auth.permissions.DatasetPermission(new com.bakdata.conquery.models.identifiable.ids.specific.UserId("demo@demo.com"), com.bakdata.conquery.models.auth.permissions.Ability.READ.asSet(), new com.bakdata.conquery.models.identifiable.ids.specific.DatasetId("test")) -)`, - "dictionaryOverview": `result = [] -; -for(def ns : namespaces.getNamespaces()) { - storage = ns.getStorage(); - result.add(new Tuple( - storage.getPrimaryDictionary().estimateTypeSize(), - storage.getPrimaryDictionary().size(), - "PID "+ns.getDataset().getName(), - storage.getPrimaryDictionary().getElement(0) - )); - for(def imp : storage.getAllImports()) { - if(imp.getTable().getTable()!="ALL_IDS_TABLE") { - for(def col : imp.getColumns()) { - if(col.getType() instanceof com.bakdata.conquery.models.events.stores.specific.AStringType) { - result.add(new Tuple( - col.getType().estimateTypeSize(), - col.getType().size(), - col.getId(), - col.getType().getElement(0) - )); - } - } - } - } -} -result.sort{-it[0]}; -String print ="size\tentries\tname\texample\\n"; -for(def t:result) { - print+="\${com.jakewharton.byteunits.BinaryByteUnit.format(t[0] -)}\t\${t[1]}\t\${t[2]}\t\${t[3]}\\n"; -} -return print;` - }; +)` }; + + + + + + Sample Scripts:
diff --git a/backend/src/main/resources/com/bakdata/conquery/resources/admin/ui/table.html.ftl b/backend/src/main/resources/com/bakdata/conquery/resources/admin/ui/table.html.ftl index 02e5a7e039..6ece489be3 100644 --- a/backend/src/main/resources/com/bakdata/conquery/resources/admin/ui/table.html.ftl +++ b/backend/src/main/resources/com/bakdata/conquery/resources/admin/ui/table.html.ftl @@ -14,9 +14,6 @@ ?first />
    - <#if element.sharedDictionary??> -
  • Shared Dictionary ${(""?no_esc+element.sharedDictionary+""?no_esc)!}
  • - <#if element.secondaryId??>
  • ${element.secondaryId}
  • @@ -35,8 +32,8 @@ <@infoCard.infoCard class="d-inline-flex" title="Table ${c.table.label}" - labels=["ID", "Label", "Dictionaries", "CBlocks", "Size"] - values=[c.table.id, c.table.label, layout.si(c.dictionariesSize)+"B", layout.si(c.getCBlocksSize())+"B", layout.si(c.size)+"B"] + labels=["ID", "Label", "CBlocks", "Size"] + values=[c.table.id, c.table.label, layout.si(c.getCBlocksSize())+"B", layout.si(c.size)+"B"] /> <@accordion.accordionGroup class="mt-3"> diff --git a/backend/src/main/resources/com/bakdata/conquery/resources/admin/ui/templates/README.md b/backend/src/main/resources/com/bakdata/conquery/resources/admin/ui/templates/README.md new file mode 100644 index 0000000000..ab3e385a88 --- /dev/null +++ b/backend/src/main/resources/com/bakdata/conquery/resources/admin/ui/templates/README.md @@ -0,0 +1,4 @@ +# Debuging FTL + +Sometimes FTL errors are printed nowhere, e.g. if an import fails. +In this case but a breakpoint in `freemarker.template.TemplateExceptionHandler#DEBUG_HANDLER`. \ No newline at end of file diff --git a/backend/src/main/resources/com/bakdata/conquery/resources/admin/ui/templates/base.html.ftl b/backend/src/main/resources/com/bakdata/conquery/resources/admin/ui/templates/base.html.ftl new file mode 100644 index 0000000000..e8e5a180b4 --- /dev/null +++ b/backend/src/main/resources/com/bakdata/conquery/resources/admin/ui/templates/base.html.ftl @@ -0,0 +1,23 @@ +<#macro html title> + + + + + + + + + + + + + + + + + + ${title} + + <#nested /> + + \ No newline at end of file diff --git a/backend/src/main/resources/com/bakdata/conquery/resources/admin/ui/templates/template.html.ftl b/backend/src/main/resources/com/bakdata/conquery/resources/admin/ui/templates/template.html.ftl index be86153ece..c0fd512493 100644 --- a/backend/src/main/resources/com/bakdata/conquery/resources/admin/ui/templates/template.html.ftl +++ b/backend/src/main/resources/com/bakdata/conquery/resources/admin/ui/templates/template.html.ftl @@ -1,100 +1,80 @@ +<#import "base.html.ftl" as base> <#macro layout> - - + <@base.html "Conquery Admin UI" > + + - Conquery Admin UI - +
    + <#nested /> +
    - - - - - +
    +
    +
    - - -
    - <#nested /> -
    - -
    -
    -
    - - - - + + <#-- General key-value macro --> diff --git a/backend/src/test/java/com/bakdata/conquery/api/StoredQueriesProcessorTest.java b/backend/src/test/java/com/bakdata/conquery/api/StoredQueriesProcessorTest.java index ceed909a25..69c3233289 100644 --- a/backend/src/test/java/com/bakdata/conquery/api/StoredQueriesProcessorTest.java +++ b/backend/src/test/java/com/bakdata/conquery/api/StoredQueriesProcessorTest.java @@ -35,7 +35,6 @@ import com.bakdata.conquery.models.config.CsvResultProvider; import com.bakdata.conquery.models.config.ExcelResultProvider; import com.bakdata.conquery.models.config.ParquetResultProvider; -import com.bakdata.conquery.models.config.auth.DevelopmentAuthorizationConfig; import com.bakdata.conquery.models.datasets.Dataset; import com.bakdata.conquery.models.datasets.SecondaryIdDescription; import com.bakdata.conquery.models.execution.ExecutionState; @@ -51,16 +50,16 @@ import com.bakdata.conquery.models.worker.DistributedNamespace; import com.bakdata.conquery.util.NonPersistentStoreFactory; import com.google.common.collect.ImmutableList; +import io.dropwizard.setup.Environment; import lombok.SneakyThrows; +import org.junit.jupiter.api.BeforeAll; import org.junit.jupiter.api.Test; public class StoredQueriesProcessorTest { private static final MetaStorage STORAGE = new NonPersistentStoreFactory().createMetaStorage(); - // Marked Unused, but does inject itself. - public static final AuthorizationController AUTHORIZATION_CONTROLLER = new AuthorizationController(STORAGE, new DevelopmentAuthorizationConfig()); public static final ConqueryConfig CONFIG = new ConqueryConfig(); - private static final DatasetRegistry datasetRegistry = new DatasetRegistry<>(0, CONFIG, null, null, null, null); + private static final DatasetRegistry datasetRegistry = new DatasetRegistry<>(0, CONFIG, null, null, null); private static final QueryProcessor processor = new QueryProcessor(datasetRegistry, STORAGE, CONFIG); private static final Dataset DATASET_0 = new Dataset() {{ @@ -117,6 +116,11 @@ private static ManagedExecutionId createExecutionId(Dataset dataset0, String s) ); + @BeforeAll + public static void beforeAll() { + new AuthorizationController(STORAGE, CONFIG, new Environment(StoredQueriesProcessorTest.class.getSimpleName()), null); + } + @Test public void getQueriesFiltered() { diff --git a/backend/src/test/java/com/bakdata/conquery/api/form/config/FormConfigTest.java b/backend/src/test/java/com/bakdata/conquery/api/form/config/FormConfigTest.java index d74b89ac95..6773eb3996 100644 --- a/backend/src/test/java/com/bakdata/conquery/api/form/config/FormConfigTest.java +++ b/backend/src/test/java/com/bakdata/conquery/api/form/config/FormConfigTest.java @@ -33,7 +33,6 @@ import com.bakdata.conquery.models.auth.permissions.FormConfigPermission; import com.bakdata.conquery.models.auth.permissions.FormPermission; import com.bakdata.conquery.models.config.ConqueryConfig; -import com.bakdata.conquery.models.config.auth.DevelopmentAuthorizationConfig; import com.bakdata.conquery.models.datasets.Dataset; import com.bakdata.conquery.models.forms.configs.FormConfig; import com.bakdata.conquery.models.forms.configs.FormConfig.FormConfigFullRepresentation; @@ -54,6 +53,7 @@ import com.fasterxml.jackson.databind.node.ObjectNode; import com.fasterxml.jackson.databind.node.TextNode; import io.dropwizard.jersey.validation.Validators; +import io.dropwizard.setup.Environment; import org.junit.jupiter.api.AfterEach; import org.junit.jupiter.api.BeforeAll; import org.junit.jupiter.api.BeforeEach; @@ -123,7 +123,7 @@ else if (id.equals(datasetId1)) { ((MutableInjectableValues) FormConfigProcessor.getMAPPER().getInjectableValues()) .add(IdResolveContext.class, namespacesMock); processor = new FormConfigProcessor(validator, storage, namespacesMock); - controller = new AuthorizationController(storage, new DevelopmentAuthorizationConfig()); + controller = new AuthorizationController(storage, config, new Environment(this.getClass().getSimpleName()), null); controller.start(); } diff --git a/backend/src/test/java/com/bakdata/conquery/integration/ConqueryIntegrationTests.java b/backend/src/test/java/com/bakdata/conquery/integration/ConqueryIntegrationTests.java index 7c643bb4cd..89ddb15661 100644 --- a/backend/src/test/java/com/bakdata/conquery/integration/ConqueryIntegrationTests.java +++ b/backend/src/test/java/com/bakdata/conquery/integration/ConqueryIntegrationTests.java @@ -22,6 +22,7 @@ public ConqueryIntegrationTests() { @Override @TestFactory + @Tag(TestTags.INTEGRATION_JSON) public List jsonTests() { return super.jsonTests(); diff --git a/backend/src/test/java/com/bakdata/conquery/integration/common/LoadingUtil.java b/backend/src/test/java/com/bakdata/conquery/integration/common/LoadingUtil.java index b3e26e9f9a..89837a6112 100644 --- a/backend/src/test/java/com/bakdata/conquery/integration/common/LoadingUtil.java +++ b/backend/src/test/java/com/bakdata/conquery/integration/common/LoadingUtil.java @@ -376,7 +376,7 @@ private static void uploadSearchIndex(@NonNull StandaloneSupport support, @NonNu public static void updateMatchingStats(@NonNull StandaloneSupport support) { final URI matchingStatsUri = HierarchyHelper.hierarchicalPath(support.defaultAdminURIBuilder() - , AdminDatasetResource.class, "updateMatchingStats") + , AdminDatasetResource.class, "postprocessNamespace") .buildFromMap(Map.of(DATASET, support.getDataset().getId())); final Response post = support.getClient().target(matchingStatsUri) diff --git a/backend/src/test/java/com/bakdata/conquery/integration/common/RequiredColumn.java b/backend/src/test/java/com/bakdata/conquery/integration/common/RequiredColumn.java index 5cdd57ee3b..fbbd76227e 100644 --- a/backend/src/test/java/com/bakdata/conquery/integration/common/RequiredColumn.java +++ b/backend/src/test/java/com/bakdata/conquery/integration/common/RequiredColumn.java @@ -23,7 +23,6 @@ public class RequiredColumn { private String name; @NotNull private MajorTypeId type; - private String sharedDictionary; @Nullable private String description; @@ -48,7 +47,6 @@ public Column toColumn(Table table, CentralRegistry storage) { Column col = new Column(); col.setName(name); col.setType(type); - col.setSharedDictionary(sharedDictionary); col.setTable(table); col.setDescription(description); diff --git a/backend/src/test/java/com/bakdata/conquery/integration/json/SqlTestDataImporter.java b/backend/src/test/java/com/bakdata/conquery/integration/json/SqlTestDataImporter.java index a7e945f33d..375fa9d140 100644 --- a/backend/src/test/java/com/bakdata/conquery/integration/json/SqlTestDataImporter.java +++ b/backend/src/test/java/com/bakdata/conquery/integration/json/SqlTestDataImporter.java @@ -27,6 +27,7 @@ public class SqlTestDataImporter implements TestDataImporter { @Override public void importQueryTestData(StandaloneSupport support, QueryTest test) throws Exception { RequiredData content = test.getContent(); + importSecondaryIds(support, content.getSecondaryIds()); importTables(support, content.getTables(), true); importConcepts(support, test.getRawConcepts()); importTableContents(support, content.getTables()); diff --git a/backend/src/test/java/com/bakdata/conquery/integration/sql/CsvTableImporter.java b/backend/src/test/java/com/bakdata/conquery/integration/sql/CsvTableImporter.java index b8534cc997..bb5f40995b 100644 --- a/backend/src/test/java/com/bakdata/conquery/integration/sql/CsvTableImporter.java +++ b/backend/src/test/java/com/bakdata/conquery/integration/sql/CsvTableImporter.java @@ -82,9 +82,8 @@ public void importTableIntoDatabase(RequiredTable requiredTable) { public List readExpectedEntities(Path csv) throws IOException { List rawEntities = this.csvReader.parseAll(Files.newInputStream(csv)); List results = new ArrayList<>(rawEntities.size()); - for (int i = 0; i < rawEntities.size(); i++) { - String[] row = rawEntities.get(i); - results.add(new SqlEntityResult(i + 1, row[0], Arrays.copyOfRange(row, 1, row.length))); + for (String[] row : rawEntities) { + results.add(new SqlEntityResult(row[0], Arrays.copyOfRange(row, 1, row.length))); } return results; } diff --git a/backend/src/test/java/com/bakdata/conquery/integration/sql/dialect/PostgreSqlIntegrationTests.java b/backend/src/test/java/com/bakdata/conquery/integration/sql/dialect/PostgreSqlIntegrationTests.java index 3053d6f9e1..614efe4ff7 100644 --- a/backend/src/test/java/com/bakdata/conquery/integration/sql/dialect/PostgreSqlIntegrationTests.java +++ b/backend/src/test/java/com/bakdata/conquery/integration/sql/dialect/PostgreSqlIntegrationTests.java @@ -5,7 +5,6 @@ import java.util.stream.Stream; import com.bakdata.conquery.TestTags; -import com.bakdata.conquery.apiv1.query.ConceptQuery; import com.bakdata.conquery.integration.ConqueryIntegrationTests; import com.bakdata.conquery.integration.IntegrationTests; import com.bakdata.conquery.integration.json.SqlTestDataImporter; @@ -16,7 +15,6 @@ import com.bakdata.conquery.models.i18n.I18n; import com.bakdata.conquery.models.query.resultinfo.ResultInfo; import com.bakdata.conquery.sql.DslContextFactory; -import com.bakdata.conquery.sql.conquery.SqlManagedQuery; import com.bakdata.conquery.sql.conversion.dialect.PostgreSqlDialect; import com.bakdata.conquery.sql.conversion.model.SqlQuery; import com.bakdata.conquery.sql.conversion.supplier.DateNowSupplier; @@ -82,11 +80,11 @@ public void shouldThrowException() { // This can be removed as soon as we switch to a full integration test including the REST API I18n.init(); SqlExecutionService executionService = new SqlExecutionService(dslContext, ResultSetProcessorFactory.create(testSqlDialect)); - SqlManagedQuery validQuery = new SqlManagedQuery(new ConceptQuery(), null, null, null, toSqlQuery("SELECT 1")); + SqlQuery validQuery = toSqlQuery("SELECT 1"); Assertions.assertThatNoException().isThrownBy(() -> executionService.execute(validQuery)); // executing an empty query should throw an SQL error - SqlManagedQuery emptyQuery = new SqlManagedQuery(new ConceptQuery(), null, null, null, toSqlQuery("")); + SqlQuery emptyQuery = toSqlQuery(""); Assertions.assertThatThrownBy(() -> executionService.execute(emptyQuery)) .isInstanceOf(ConqueryError.SqlError.class) .hasMessageContaining("$org.postgresql.util.PSQLException"); diff --git a/backend/src/test/java/com/bakdata/conquery/integration/tests/ConceptResolutionTest.java b/backend/src/test/java/com/bakdata/conquery/integration/tests/ConceptResolutionTest.java index 3b41f04627..3472aa1f03 100644 --- a/backend/src/test/java/com/bakdata/conquery/integration/tests/ConceptResolutionTest.java +++ b/backend/src/test/java/com/bakdata/conquery/integration/tests/ConceptResolutionTest.java @@ -43,7 +43,7 @@ public void execute(StandaloneSupport conquery) throws Exception { test.importRequiredData(conquery); final URI matchingStatsUri = HierarchyHelper.hierarchicalPath(conquery.defaultAdminURIBuilder() - , AdminDatasetResource.class, "updateMatchingStats") + , AdminDatasetResource.class, "postprocessNamespace") .buildFromMap(Map.of(DATASET, conquery.getDataset().getId())); conquery.getClient().target(matchingStatsUri) diff --git a/backend/src/test/java/com/bakdata/conquery/integration/tests/ExternalFormBackendTest.java b/backend/src/test/java/com/bakdata/conquery/integration/tests/ExternalFormBackendTest.java index 93617d472f..fd4afd6863 100644 --- a/backend/src/test/java/com/bakdata/conquery/integration/tests/ExternalFormBackendTest.java +++ b/backend/src/test/java/com/bakdata/conquery/integration/tests/ExternalFormBackendTest.java @@ -4,16 +4,18 @@ import static org.mockserver.model.HttpRequest.request; import java.io.File; -import java.io.IOException; import java.net.URI; import java.nio.file.Path; import java.util.Collections; import java.util.List; +import java.util.Map; +import javax.ws.rs.core.MediaType; import javax.ws.rs.core.UriBuilder; import com.bakdata.conquery.apiv1.execution.FullExecutionStatus; import com.bakdata.conquery.apiv1.execution.ResultAsset; +import com.bakdata.conquery.apiv1.frontend.FrontendConfiguration; import com.bakdata.conquery.integration.common.IntegrationUtils; import com.bakdata.conquery.io.result.ExternalResult; import com.bakdata.conquery.models.auth.entities.User; @@ -26,7 +28,9 @@ import com.bakdata.conquery.models.execution.ManagedExecution; import com.bakdata.conquery.models.forms.frontendconfiguration.FormScanner; import com.bakdata.conquery.models.identifiable.ids.specific.ManagedExecutionId; +import com.bakdata.conquery.resources.api.ConfigResource; import com.bakdata.conquery.resources.api.ResultExternalResource; +import com.bakdata.conquery.resources.hierarchies.HierarchyHelper; import com.bakdata.conquery.util.support.StandaloneSupport; import com.bakdata.conquery.util.support.TestConquery; import lombok.SneakyThrows; @@ -66,6 +70,18 @@ public void execute(String name, TestConquery testConquery) throws Exception { final String externalFormId = FormBackendConfig.createSubTypedId("SOME_EXTERNAL_FORM"); assertThat(FormScanner.FRONTEND_FORM_CONFIGS.keySet()).contains(externalFormId); + log.info("Get version info"); + final UriBuilder apiUriBuilder = testConquery.getSupport(name).defaultApiURIBuilder(); + final URI frontendConfigURI = HierarchyHelper.hierarchicalPath(apiUriBuilder.clone(), ConfigResource.class, "getFrontendConfig") + .build(); + final FrontendConfiguration + frontendConfiguration = + support.getClient().target(frontendConfigURI).request(MediaType.APPLICATION_JSON_TYPE).get().readEntity(FrontendConfiguration.class); + + assertThat(frontendConfiguration.formBackendVersions()) + .describedAs("Checking health of form backend") + .containsExactlyEntriesOf(Map.of(FORM_BACKEND_ID, "3.2.1-ge966c285")); // example value from OpenAPI Spec + log.info("Send an external form"); final User testUser = support.getTestUser(); final ManagedExecutionId @@ -73,11 +89,11 @@ public void execute(String name, TestConquery testConquery) throws Exception { IntegrationUtils.assertQueryResult(support, String.format("{\"type\": \"%s\", \"testProp\": \"testVal\"}", externalFormId), -1, ExecutionState.DONE, testUser, 201); log.info("Request state"); + assert managedExecutionId != null; final FullExecutionStatus executionStatus = IntegrationUtils.getExecutionStatus(support, managedExecutionId, testUser, 200); // Generate asset urls and check them in the status - final UriBuilder apiUriBuilder = testConquery.getSupport(name).defaultApiURIBuilder(); final ManagedExecution storedExecution = testConquery.getSupport(name).getMetaStorage().getExecution(managedExecutionId); final URI downloadURLasset1 = @@ -106,7 +122,6 @@ public void execute(String name, TestConquery testConquery) throws Exception { } @Override - @SneakyThrows(IOException.class) public ConqueryConfig overrideConfig(ConqueryConfig conf, File workdir) { // Prepare mock server final URI baseURI = createFormServer(); @@ -133,7 +148,7 @@ public ConqueryConfig overrideConfig(ConqueryConfig conf, File workdir) { @SneakyThrows @NotNull - private URI createFormServer() throws IOException { + private URI createFormServer() { log.info("Starting mock form backend server"); formBackend = ClientAndServer.startClientAndServer(1080); diff --git a/backend/src/test/java/com/bakdata/conquery/integration/tests/FilterAutocompleteTest.java b/backend/src/test/java/com/bakdata/conquery/integration/tests/FilterAutocompleteTest.java index 08cf5cc4b0..227d40e0e2 100644 --- a/backend/src/test/java/com/bakdata/conquery/integration/tests/FilterAutocompleteTest.java +++ b/backend/src/test/java/com/bakdata/conquery/integration/tests/FilterAutocompleteTest.java @@ -13,6 +13,7 @@ import java.util.Set; import javax.ws.rs.client.Entity; +import javax.ws.rs.client.Invocation; import javax.ws.rs.core.MediaType; import javax.ws.rs.core.Response; @@ -91,7 +92,7 @@ public void execute(StandaloneSupport conquery) throws Exception { filter.setTemplate(new FilterTemplate(conquery.getDataset(), "test", tmpCSv.toUri(), "id", "{{label}}", "Hello this is {{option}}", 2, true, indexService)); final URI matchingStatsUri = HierarchyHelper.hierarchicalPath(conquery.defaultAdminURIBuilder() - , AdminDatasetResource.class, "updateMatchingStats") + , AdminDatasetResource.class, "postprocessNamespace") .buildFromMap(Map.of(DATASET, conquery.getDataset().getId())); conquery.getClient().target(matchingStatsUri) @@ -115,58 +116,60 @@ public void execute(StandaloneSupport conquery) throws Exception { ) ); + final Invocation.Builder autocompleteRequestBuilder = conquery.getClient().target(autocompleteUri) + .request(MediaType.APPLICATION_JSON_TYPE); // Data starting with a is in reference csv { - final Response fromCsvResponse = conquery.getClient().target(autocompleteUri) - .request(MediaType.APPLICATION_JSON_TYPE) - .post(Entity.entity(new FilterResource.AutocompleteRequest( - Optional.of("a"), - OptionalInt.empty(), - OptionalInt.empty() - ), MediaType.APPLICATION_JSON_TYPE)); + try (final Response fromCsvResponse = autocompleteRequestBuilder.post(Entity.entity(new FilterResource.AutocompleteRequest( + Optional.of("a"), + OptionalInt.empty(), + OptionalInt.empty() + ), MediaType.APPLICATION_JSON_TYPE))) { - final ConceptsProcessor.AutoCompleteResult resolvedFromCsv = fromCsvResponse.readEntity(ConceptsProcessor.AutoCompleteResult.class); + final ConceptsProcessor.AutoCompleteResult resolvedFromCsv = fromCsvResponse.readEntity(ConceptsProcessor.AutoCompleteResult.class); // "aaa" occurs after "aab" due to it consisting only of duplicate entries. // The empty string results from `No V*a*lue` and `..Def*au*lt..` - assertThat(resolvedFromCsv.values().stream().map(FrontendValue::getValue)) - .containsExactly("a", "aab", "aaa", "" /* `No V*a*lue` :^) */, "baaa"); + assertThat(resolvedFromCsv.values().stream().map(FrontendValue::getValue)) + .containsExactly("a", "aab", "aaa", "" /* `No V*a*lue` :^) */, "baaa"); + + } } // Data starting with f is in column values { - final Response fromCsvResponse = conquery.getClient().target(autocompleteUri) - .request(MediaType.APPLICATION_JSON_TYPE) - .post(Entity.entity(new FilterResource.AutocompleteRequest( - Optional.of("f"), - OptionalInt.empty(), - OptionalInt.empty() - ), MediaType.APPLICATION_JSON_TYPE)); - - final ConceptsProcessor.AutoCompleteResult resolvedFromValues = fromCsvResponse.readEntity(ConceptsProcessor.AutoCompleteResult.class); - - //check the resolved values - assertThat(resolvedFromValues.values().stream().map(FrontendValue::getValue)) - .containsExactly("", "f", "fm"); + try (final Response fromCsvResponse = autocompleteRequestBuilder + .post(Entity.entity(new FilterResource.AutocompleteRequest( + Optional.of("f"), + OptionalInt.empty(), + OptionalInt.empty() + ), MediaType.APPLICATION_JSON_TYPE))) { + + final ConceptsProcessor.AutoCompleteResult resolvedFromValues = fromCsvResponse.readEntity(ConceptsProcessor.AutoCompleteResult.class); + + //check the resolved values + assertThat(resolvedFromValues.values().stream().map(FrontendValue::getValue)) + .containsExactly("", "f", "fm"); + } } // Data starting with a is in reference csv { - final Response fromCsvResponse = conquery.getClient().target(autocompleteUri) - .request(MediaType.APPLICATION_JSON_TYPE) - .post(Entity.entity(new FilterResource.AutocompleteRequest( - Optional.of(""), - OptionalInt.empty(), - OptionalInt.empty() - ), MediaType.APPLICATION_JSON_TYPE)); - - final ConceptsProcessor.AutoCompleteResult resolvedFromCsv = fromCsvResponse.readEntity(ConceptsProcessor.AutoCompleteResult.class); + try (final Response fromCsvResponse = autocompleteRequestBuilder + .post(Entity.entity(new FilterResource.AutocompleteRequest( + Optional.of(""), + OptionalInt.empty(), + OptionalInt.empty() + ), MediaType.APPLICATION_JSON_TYPE))) { + + final ConceptsProcessor.AutoCompleteResult resolvedFromCsv = fromCsvResponse.readEntity(ConceptsProcessor.AutoCompleteResult.class); // This is probably the insertion order - assertThat(resolvedFromCsv.values().stream().map(FrontendValue::getValue)) + assertThat(resolvedFromCsv.values().stream().map(FrontendValue::getValue)) .containsExactlyInAnyOrder("", "a", "aab", "aaa", "baaa", "b", "f", "m", "mf", "fm"); + } } } } diff --git a/backend/src/test/java/com/bakdata/conquery/integration/tests/FilterResolutionTest.java b/backend/src/test/java/com/bakdata/conquery/integration/tests/FilterResolutionTest.java index bd39ba768d..f4a8983766 100644 --- a/backend/src/test/java/com/bakdata/conquery/integration/tests/FilterResolutionTest.java +++ b/backend/src/test/java/com/bakdata/conquery/integration/tests/FilterResolutionTest.java @@ -82,7 +82,7 @@ public void execute(StandaloneSupport conquery) throws Exception { filter.setTemplate(new FilterTemplate(conquery.getDataset(), "test", tmpCSv.toUri(), "HEADER", "", "", 2, true, indexService)); final URI matchingStatsUri = HierarchyHelper.hierarchicalPath(conquery.defaultAdminURIBuilder() - , AdminDatasetResource.class, "updateMatchingStats") + , AdminDatasetResource.class, "postprocessNamespace") .buildFromMap(Map.of(DATASET, conquery.getDataset().getId())); final Response post = conquery.getClient().target(matchingStatsUri) diff --git a/backend/src/test/java/com/bakdata/conquery/integration/tests/QueryStatisticsTest.java b/backend/src/test/java/com/bakdata/conquery/integration/tests/QueryStatisticsTest.java index 2258fb9c34..2c94d39c10 100644 --- a/backend/src/test/java/com/bakdata/conquery/integration/tests/QueryStatisticsTest.java +++ b/backend/src/test/java/com/bakdata/conquery/integration/tests/QueryStatisticsTest.java @@ -74,10 +74,6 @@ public void execute(String name, TestConquery testConquery) throws Exception { "", 6, 0, - new TreeMap<>(Map.of( - "2021-1", 5, - "2021-4", 1 - )), new TreeMap<>(Map.of( "2021-01", 5, "2021-10", 1 @@ -93,10 +89,6 @@ public void execute(String name, TestConquery testConquery) throws Exception { 6, 0, - new TreeMap<>(Map.of( - "2021-1", 5, - "2021-4", 1 - )), new TreeMap<>(Map.of( "2021-01", 5, "2021-10", 1 @@ -115,7 +107,9 @@ public void execute(String name, TestConquery testConquery) throws Exception { new HistogramColumnDescription.Entry("b", 1), new HistogramColumnDescription.Entry("d", 1) ), - Map.of() + Map.of( + labels.missing(), "1" + ) ), new HistogramColumnDescription( "concept mapped", @@ -127,7 +121,9 @@ public void execute(String name, TestConquery testConquery) throws Exception { new HistogramColumnDescription.Entry("BEH", 1), new HistogramColumnDescription.Entry("d", 1) ), - Map.of() + Map.of( + labels.missing(), "0" + ) ), new HistogramColumnDescription( "concept int", @@ -211,7 +207,9 @@ public void execute(String name, TestConquery testConquery) throws Exception { new HistogramColumnDescription.Entry("Yes", 4), new HistogramColumnDescription.Entry("No", 1) ), - Map.of() + Map.of( + labels.missing(), "1" + ) ) ), Range.of(LocalDate.of(2021, 1, 1), LocalDate.of(2021, 10, 1)) diff --git a/backend/src/test/java/com/bakdata/conquery/integration/tests/deletion/TableDeletionTest.java b/backend/src/test/java/com/bakdata/conquery/integration/tests/deletion/TableDeletionTest.java index 04b2a7bb84..c20ac871ec 100644 --- a/backend/src/test/java/com/bakdata/conquery/integration/tests/deletion/TableDeletionTest.java +++ b/backend/src/test/java/com/bakdata/conquery/integration/tests/deletion/TableDeletionTest.java @@ -177,9 +177,9 @@ public void execute(String name, TestConquery testConquery) throws Exception { } } - log.info("Executing query after deletion"); + log.info("Executing query after deletion. Expecting a failure here."); - // Issue a query and asseert that it has less content. + // Issue a query and assert that it has less content. IntegrationUtils.assertQueryResult(conquery, query, 0L, ExecutionState.FAILED, conquery.getTestUser(), 400); } diff --git a/backend/src/test/java/com/bakdata/conquery/io/AbstractSerializationTest.java b/backend/src/test/java/com/bakdata/conquery/io/AbstractSerializationTest.java index 2e0a9d36e7..e09081772f 100644 --- a/backend/src/test/java/com/bakdata/conquery/io/AbstractSerializationTest.java +++ b/backend/src/test/java/com/bakdata/conquery/io/AbstractSerializationTest.java @@ -41,7 +41,7 @@ public void before() { InternalObjectMapperCreator creator = new InternalObjectMapperCreator(config, validator); final IndexService indexService = new IndexService(config.getCsv().createCsvParserSettings(), "emptyDefaultLabel"); final ClusterNamespaceHandler clusterNamespaceHandler = new ClusterNamespaceHandler(new ClusterState(), config, creator); - datasetRegistry = new DatasetRegistry<>(0, config, null, clusterNamespaceHandler, indexService, null); + datasetRegistry = new DatasetRegistry<>(0, config, null, clusterNamespaceHandler, indexService); metaStorage = new MetaStorage(new NonPersistentStoreFactory(), datasetRegistry); datasetRegistry.setMetaStorage(metaStorage); creator.init(datasetRegistry); diff --git a/backend/src/test/java/com/bakdata/conquery/io/jackson/serializer/IdRefrenceTest.java b/backend/src/test/java/com/bakdata/conquery/io/jackson/serializer/IdRefrenceTest.java index 86334a2f02..69f77b30f9 100644 --- a/backend/src/test/java/com/bakdata/conquery/io/jackson/serializer/IdRefrenceTest.java +++ b/backend/src/test/java/com/bakdata/conquery/io/jackson/serializer/IdRefrenceTest.java @@ -39,7 +39,7 @@ public void testListReferences() throws IOException { registry.register(dataset); registry.register(table); - final DatasetRegistry datasetRegistry = new DatasetRegistry<>(0, null, null, null, null, null); + final DatasetRegistry datasetRegistry = new DatasetRegistry<>(0, null, null, null, null); final MetaStorage metaStorage = new MetaStorage(new NonPersistentStoreFactory(),datasetRegistry); diff --git a/backend/src/test/java/com/bakdata/conquery/io/jackson/serializer/SerializationTestUtil.java b/backend/src/test/java/com/bakdata/conquery/io/jackson/serializer/SerializationTestUtil.java index 1bc5d7d178..6b51838920 100644 --- a/backend/src/test/java/com/bakdata/conquery/io/jackson/serializer/SerializationTestUtil.java +++ b/backend/src/test/java/com/bakdata/conquery/io/jackson/serializer/SerializationTestUtil.java @@ -7,6 +7,7 @@ import java.lang.ref.SoftReference; import java.lang.ref.WeakReference; import java.util.concurrent.atomic.AtomicInteger; +import java.util.function.UnaryOperator; import javax.validation.Validator; @@ -61,6 +62,8 @@ public class SerializationTestUtil { private boolean forceHashCodeEqual = false; + private UnaryOperator> assertCustomizer = UnaryOperator.identity(); + public static SerializationTestUtil forType(TypeReference type) { return new SerializationTestUtil<>(Jackson.MAPPER.getTypeFactory().constructType(type)); } @@ -69,6 +72,10 @@ public static SerializationTestUtil forType(Class type) { return new SerializationTestUtil<>(Jackson.MAPPER.copy().getTypeFactory().constructType(type)); } + public static SerializationTestUtil forArrayType(TypeReference elementType) { + return new SerializationTestUtil<>(Jackson.MAPPER.getTypeFactory().constructArrayType(Jackson.MAPPER.getTypeFactory().constructType(elementType))); + } + public SerializationTestUtil objectMappers(ObjectMapper... objectMappers) { this.objectMappers = objectMappers; return this; @@ -84,6 +91,11 @@ public SerializationTestUtil checkHashCode() { return this; } + public SerializationTestUtil customizingAssertion(UnaryOperator> assertCustomizer) { + this.assertCustomizer = assertCustomizer; + return this; + } + public void test(T value, T expected) throws JSONException, IOException { if (objectMappers == null || objectMappers.length == 0) { fail("No objectmappers were set"); @@ -134,7 +146,11 @@ private void test(T value, T expected, ObjectMapper mapper) throws IOException { RecursiveComparisonAssert ass = assertThat(copy) .as("Unequal after copy.") - .usingRecursiveComparison().ignoringFieldsOfTypes(TYPES_TO_IGNORE); + .usingRecursiveComparison() + .ignoringFieldsOfTypes(TYPES_TO_IGNORE); + + // Apply assertion customizations + ass = assertCustomizer.apply(ass); ass.isEqualTo(expected); } diff --git a/backend/src/test/java/com/bakdata/conquery/io/result/ResultTestUtil.java b/backend/src/test/java/com/bakdata/conquery/io/result/ResultTestUtil.java index 161c93b063..5f61310cbc 100644 --- a/backend/src/test/java/com/bakdata/conquery/io/result/ResultTestUtil.java +++ b/backend/src/test/java/com/bakdata/conquery/io/result/ResultTestUtil.java @@ -54,10 +54,10 @@ public static List getResultTypes() { @NotNull public static List getTestEntityResults() { return List.of( - new SinglelineEntityResult(1, new Object[]{Boolean.TRUE, 2345634, 123423.34, 5646, List.of(345, 534), "test_string", 4521, List.of(true, false), List.of(List.of(345, 534), List.of(1, 2)), List.of("fizz", "buzz")}), - new SinglelineEntityResult(2, new Object[]{Boolean.FALSE, null, null, null, null, null, null, List.of(), List.of(List.of(1234, Integer.MAX_VALUE)), List.of()}), - new SinglelineEntityResult(2, new Object[]{Boolean.TRUE, null, null, null, null, null, null, List.of(false, false), null, null}), - new MultilineEntityResult(3, List.of( + new SinglelineEntityResult("1", new Object[]{Boolean.TRUE, 2345634, 123423.34, 5646, List.of(345, 534), "test_string", 4521, List.of(true, false), List.of(List.of(345, 534), List.of(1, 2)), List.of("fizz", "buzz")}), + new SinglelineEntityResult("2", new Object[]{Boolean.FALSE, null, null, null, null, null, null, List.of(), List.of(List.of(1234, Integer.MAX_VALUE)), List.of()}), + new SinglelineEntityResult("2", new Object[]{Boolean.TRUE, null, null, null, null, null, null, List.of(false, false), null, null}), + new MultilineEntityResult("3", List.of( new Object[]{Boolean.FALSE, null,null, null, null, null, null, List.of(false), null, null}, new Object[]{Boolean.TRUE, null, null, null, null, null, null, null, null, null}, new Object[]{Boolean.TRUE, null, null, null, null, null, 4, List.of(true, false, true, false), null, null} diff --git a/backend/src/test/java/com/bakdata/conquery/io/result/arrow/ArrowResultGenerationTest.java b/backend/src/test/java/com/bakdata/conquery/io/result/arrow/ArrowResultGenerationTest.java index 3c641e37a8..eefac9709a 100644 --- a/backend/src/test/java/com/bakdata/conquery/io/result/arrow/ArrowResultGenerationTest.java +++ b/backend/src/test/java/com/bakdata/conquery/io/result/arrow/ArrowResultGenerationTest.java @@ -120,7 +120,7 @@ void writeAndRead() throws IOException { Locale.ROOT, null, CONFIG, - (cer) -> EntityPrintId.from(Integer.toString(cer.getEntityId()), Integer.toString(cer.getEntityId())), + (cer) -> EntityPrintId.from(cer.getEntityId(), cer.getEntityId()), (selectInfo) -> selectInfo.getSelect().getLabel()); // The Shard nodes send Object[] but since Jackson is used for deserialization, nested collections are always a list because they are not further specialized List results = getTestEntityResults(); diff --git a/backend/src/test/java/com/bakdata/conquery/io/result/csv/CsvResultGenerationTest.java b/backend/src/test/java/com/bakdata/conquery/io/result/csv/CsvResultGenerationTest.java index e7d9e89183..60e23c85e2 100644 --- a/backend/src/test/java/com/bakdata/conquery/io/result/csv/CsvResultGenerationTest.java +++ b/backend/src/test/java/com/bakdata/conquery/io/result/csv/CsvResultGenerationTest.java @@ -46,7 +46,7 @@ void writeAndRead() throws IOException { Locale.GERMAN, null, CONFIG, - (cer) -> EntityPrintId.from(Integer.toString(cer.getEntityId()), Integer.toString(cer.getEntityId())), + (cer) -> EntityPrintId.from(cer.getEntityId(), cer.getEntityId()), (selectInfo) -> selectInfo.getSelect().getLabel()); // The Shard nodes send Object[] but since Jackson is used for deserialization, nested collections are always a list because they are not further specialized List results = getTestEntityResults(); diff --git a/backend/src/test/java/com/bakdata/conquery/io/result/excel/ExcelResultRenderTest.java b/backend/src/test/java/com/bakdata/conquery/io/result/excel/ExcelResultRenderTest.java index b19db6dfc5..eff77b3c41 100644 --- a/backend/src/test/java/com/bakdata/conquery/io/result/excel/ExcelResultRenderTest.java +++ b/backend/src/test/java/com/bakdata/conquery/io/result/excel/ExcelResultRenderTest.java @@ -60,7 +60,7 @@ void writeAndRead() throws IOException { Locale.GERMAN, null, CONFIG, - (cer) -> EntityPrintId.from(Integer.toString(cer.getEntityId()), Integer.toString(cer.getEntityId())), + (cer) -> EntityPrintId.from(cer.getEntityId(), cer.getEntityId()), (selectInfo) -> selectInfo.getSelect().getLabel()); // The Shard nodes send Object[] but since Jackson is used for deserialization, nested collections are always a list because they are not further specialized List results = getTestEntityResults(); diff --git a/backend/src/test/java/com/bakdata/conquery/io/result/parquet/ParquetResultGenerationTest.java b/backend/src/test/java/com/bakdata/conquery/io/result/parquet/ParquetResultGenerationTest.java index f991afa214..b4c3723a04 100644 --- a/backend/src/test/java/com/bakdata/conquery/io/result/parquet/ParquetResultGenerationTest.java +++ b/backend/src/test/java/com/bakdata/conquery/io/result/parquet/ParquetResultGenerationTest.java @@ -105,7 +105,7 @@ void writeAndRead() throws IOException { Locale.ROOT, null, CONFIG, - (cer) -> EntityPrintId.from(Integer.toString(cer.getEntityId()), Integer.toString(cer.getEntityId())), + (cer) -> EntityPrintId.from(cer.getEntityId(), cer.getEntityId()), (selectInfo) -> selectInfo.getSelect().getLabel() ); // The Shard nodes send Object[] but since Jackson is used for deserialization, nested collections are always a list because they are not further specialized diff --git a/backend/src/test/java/com/bakdata/conquery/io/storage/xodus/stores/BigStoreTest.java b/backend/src/test/java/com/bakdata/conquery/io/storage/xodus/stores/BigStoreTest.java deleted file mode 100644 index 4ce7bb25d7..0000000000 --- a/backend/src/test/java/com/bakdata/conquery/io/storage/xodus/stores/BigStoreTest.java +++ /dev/null @@ -1,134 +0,0 @@ -package com.bakdata.conquery.io.storage.xodus.stores; - -import static org.assertj.core.api.Assertions.assertThat; - -import java.io.ByteArrayInputStream; -import java.io.File; -import java.io.IOException; -import java.io.SequenceInputStream; -import java.nio.file.Files; -import java.util.concurrent.Executors; - -import com.bakdata.conquery.io.jackson.Jackson; -import com.bakdata.conquery.io.storage.StoreMappings; -import com.bakdata.conquery.models.config.XodusStoreFactory; -import com.bakdata.conquery.models.datasets.Dataset; -import com.bakdata.conquery.models.dictionary.Dictionary; -import com.bakdata.conquery.models.dictionary.EncodedDictionary; -import com.bakdata.conquery.models.dictionary.MapDictionary; -import com.bakdata.conquery.models.events.stores.specific.string.EncodedStringStore; -import com.bakdata.conquery.models.exceptions.JSONException; -import com.bakdata.conquery.models.identifiable.CentralRegistry; -import com.bakdata.conquery.models.identifiable.ids.specific.DictionaryId; -import com.bakdata.conquery.models.worker.SingletonNamespaceCollection; -import com.fasterxml.jackson.databind.ObjectMapper; -import com.google.common.collect.Iterators; -import com.google.common.primitives.Ints; -import io.dropwizard.jersey.validation.Validators; -import io.dropwizard.util.DataSize; -import jetbrains.exodus.env.Environment; -import jetbrains.exodus.env.Environments; -import org.junit.jupiter.api.AfterEach; -import org.junit.jupiter.api.BeforeAll; -import org.junit.jupiter.api.BeforeEach; -import org.junit.jupiter.api.Test; - -public class BigStoreTest { - - private File tmpDir; - private Environment env; - - private static final CentralRegistry CENTRAL_REGISTRY = new CentralRegistry(); - private static SingletonNamespaceCollection NAMESPACE_COLLECTION = new SingletonNamespaceCollection(CENTRAL_REGISTRY); - private static ObjectMapper MAPPER = NAMESPACE_COLLECTION.injectIntoNew(Jackson.BINARY_MAPPER); - - @BeforeAll - public static void setupRegistry(){ - CENTRAL_REGISTRY.register(Dataset.PLACEHOLDER); - } - - @BeforeEach - public void init() throws IOException { - tmpDir = Files.createTempDirectory(BigStoreTest.class.getSimpleName()).toFile(); - tmpDir.mkdirs(); - env = Environments.newInstance(tmpDir); - - } - - @AfterEach - public void destroy() throws IOException { - env.close(); - tmpDir.delete(); - } - - @Test - public void testFull() throws JSONException, IOException { - - BigStore store = new BigStore<>(new XodusStoreFactory(), Validators.newValidator(), env, - StoreMappings.DICTIONARIES.storeInfo(), (e) -> {}, (e) -> {}, MAPPER, Executors.newSingleThreadExecutor() - ); - - - store.setChunkByteSize(Ints.checkedCast(DataSize.megabytes(1).toBytes())); - - Dictionary nDict = new MapDictionary(Dataset.PLACEHOLDER, "dict"); - - for (int v = 0; v < 1000000; v++) { - nDict.add(Integer.toHexString(v).getBytes()); - } - - // check if manual serialization deserialization works - byte[] bytes = Jackson.BINARY_MAPPER.writeValueAsBytes(nDict); - - - Dictionary simpleCopy = MAPPER.readValue(bytes, Dictionary.class); - - - for (int v = 0; v < 1000000; v++) { - assertThat(simpleCopy.getId(Integer.toHexString(v).getBytes())).isEqualTo(v); - } - - // check if store works - store.add(nDict.getId(), nDict); - - // check if the bytes in the store are the same as bytes - assertThat( - new SequenceInputStream(Iterators.asEnumeration( - store.getMetaStore().get(nDict.getId()).loadData(store.getDataStore()).map(ByteArrayInputStream::new).iterator()))) - .hasSameContentAs(new ByteArrayInputStream(bytes)); - - EncodedDictionary copy = new EncodedDictionary(store.get(nDict.getId()), EncodedStringStore.Encoding.UTF8); - for (int v = 0; v < 1000000; v++) { - assertThat(copy.getId(Integer.toHexString(v))).isEqualTo(v); - } - - } - - @Test - public void testEmpty() throws JSONException, IOException { - BigStore store = new BigStore<>(new XodusStoreFactory(), Validators.newValidator(), env, - StoreMappings.DICTIONARIES.storeInfo(), (e) -> {}, (e) -> {}, MAPPER, Executors.newSingleThreadExecutor() - ); - store.setChunkByteSize(Ints.checkedCast(DataSize.megabytes(1).toBytes())); - - Dictionary nDict = new MapDictionary(Dataset.PLACEHOLDER,"dict"); - - // check if manual serialization deserialization works - byte[] bytes = MAPPER.writeValueAsBytes(nDict); - Dictionary simpleCopy = MAPPER.readValue(bytes, Dictionary.class); - assertThat(simpleCopy).isEmpty(); - - // check if store works - store.add(nDict.getId(), nDict); - - // check if the bytes in the store are the same as bytes - assertThat( - new SequenceInputStream(Iterators.asEnumeration( - store.getMetaStore().get(nDict.getId()).loadData(store.getDataStore()).map(ByteArrayInputStream::new).iterator()))) - .hasSameContentAs(new ByteArrayInputStream(bytes)); - - Dictionary copy = store.get(nDict.getId()); - assertThat(copy).isEmpty(); - } - -} diff --git a/backend/src/test/java/com/bakdata/conquery/models/SerializationTests.java b/backend/src/test/java/com/bakdata/conquery/models/SerializationTests.java index 18e949e1b3..5f717a149d 100644 --- a/backend/src/test/java/com/bakdata/conquery/models/SerializationTests.java +++ b/backend/src/test/java/com/bakdata/conquery/models/SerializationTests.java @@ -49,8 +49,6 @@ import com.bakdata.conquery.models.datasets.Table; import com.bakdata.conquery.models.datasets.concepts.tree.ConceptTreeConnector; import com.bakdata.conquery.models.datasets.concepts.tree.TreeConcept; -import com.bakdata.conquery.models.dictionary.Dictionary; -import com.bakdata.conquery.models.dictionary.MapDictionary; import com.bakdata.conquery.models.error.ConqueryError; import com.bakdata.conquery.models.events.Bucket; import com.bakdata.conquery.models.events.CBlock; @@ -81,8 +79,6 @@ import com.bakdata.conquery.models.query.results.EntityResult; import com.bakdata.conquery.models.query.results.MultilineEntityResult; import com.bakdata.conquery.util.SerialisationObjectsUtil; -import com.bakdata.conquery.util.dict.SuccinctTrie; -import com.bakdata.conquery.util.dict.SuccinctTrieTest; import com.fasterxml.jackson.core.JsonProcessingException; import com.fasterxml.jackson.core.type.TypeReference; import com.fasterxml.jackson.databind.JsonNode; @@ -90,18 +86,21 @@ import com.fasterxml.jackson.databind.node.JsonNodeFactory; import com.fasterxml.jackson.databind.node.ObjectNode; import com.fasterxml.jackson.databind.node.TextNode; -import com.github.powerlibraries.io.In; import com.google.common.collect.BiMap; import com.google.common.collect.HashBiMap; import io.dropwizard.jersey.validation.Validators; -import lombok.Getter; +import it.unimi.dsi.fastutil.objects.Object2IntMap; +import it.unimi.dsi.fastutil.objects.Object2IntMaps; +import it.unimi.dsi.fastutil.objects.Object2IntOpenHashMap; import lombok.extern.slf4j.Slf4j; +import org.assertj.core.api.RecursiveComparisonAssert; +import org.junit.jupiter.api.Tag; import org.junit.jupiter.api.Test; import org.junit.jupiter.params.ParameterizedTest; import org.junit.jupiter.params.provider.MethodSource; + @Slf4j -@Getter public class SerializationTests extends AbstractSerializationTest { @Test @@ -180,6 +179,7 @@ public void group() throws IOException, JSONException { @Test + @Tag("OBJECT_2_INT_MAP") // Bucket uses Object2IntMap public void bucketCompoundDateRange() throws JSONException, IOException { Dataset dataset = new Dataset(); dataset.setName("datasetName"); @@ -212,15 +212,14 @@ public void bucketCompoundDateRange() throws JSONException, IOException { imp.setName("importTest"); - CompoundDateRangeStore - compoundStore = + CompoundDateRangeStore compoundStore = new CompoundDateRangeStore(startCol.getName(), endCol.getName(), new BitSetStore(BitSet.valueOf(new byte[]{0b1000}), new BitSet(), 4)); //0b1000 is a binary representation of 8 so that the 4th is set to make sure that BitSet length is 4. ColumnStore startStore = new IntegerDateStore(new ShortArrayStore(new short[]{1, 2, 3, 4}, Short.MIN_VALUE)); ColumnStore endStore = new IntegerDateStore(new ShortArrayStore(new short[]{5, 6, 7, 8}, Short.MIN_VALUE)); - Bucket bucket = new Bucket(0, 1, 4, new ColumnStore[]{startStore, endStore, compoundStore}, Collections.emptySet(), new int[0], new int[0], imp); + Bucket bucket = new Bucket(0, 4, new ColumnStore[]{startStore, endStore, compoundStore}, Object2IntMaps.emptyMap(), Object2IntMaps.emptyMap(), imp); compoundStore.setParent(bucket); @@ -484,7 +483,7 @@ public void executionCreationResolveError() throws JSONException, IOException { @Test public void executionQueryJobError() throws JSONException, IOException { log.info("Beware, this test will print an ERROR message."); - ConqueryError error = new ConqueryError.ExecutionJobErrorWrapper(new Entity(5), new ConqueryError.UnknownError(null)); + ConqueryError error = new ConqueryError.ExecutionJobErrorWrapper(new Entity("5"), new ConqueryError.UnknownError(null)); SerializationTestUtil .forType(ConqueryError.class) @@ -558,24 +557,6 @@ public void testFormQuery() throws IOException, JSONException { .test(query); } - @Test - void testMapDictionary() throws IOException, JSONException { - - MapDictionary map = new MapDictionary(Dataset.PLACEHOLDER, "dictionary"); - - map.add("a".getBytes()); - map.add("b".getBytes()); - map.add("c".getBytes()); - - final CentralRegistry registry = getMetaStorage().getCentralRegistry(); - registry.register(Dataset.PLACEHOLDER); - - SerializationTestUtil - .forType(MapDictionary.class) - .objectMappers(getManagerInternalMapper(), getShardInternalMapper()) - .registry(registry) - .test(map); - } @Test public void serialize() throws IOException, JSONException { @@ -601,7 +582,7 @@ public void serialize() throws IOException, JSONException { final Import imp = new Import(table); imp.setName("import"); - final Bucket bucket = new Bucket(0, 0, 0, new ColumnStore[0], Collections.emptySet(), new int[10], new int[10], imp); + final Bucket bucket = new Bucket(0, 0, new ColumnStore[0], Object2IntMaps.emptyMap(), Object2IntMaps.emptyMap(), imp); final CBlock cBlock = CBlock.createCBlock(connector, bucket, 10); @@ -619,27 +600,6 @@ public void serialize() throws IOException, JSONException { .test(cBlock); } - @Test - public void testSuccinctTrie() - throws IOException, JSONException { - - final CentralRegistry registry = getMetaStorage().getCentralRegistry(); - registry.register(Dataset.PLACEHOLDER); - - SuccinctTrie dict = new SuccinctTrie(Dataset.PLACEHOLDER, "testDict"); - - In.resource(SuccinctTrieTest.class, "SuccinctTrieTest.data").streamLines() - .forEach(value -> dict.put(value.getBytes())); - - dict.compress(); - SerializationTestUtil - .forType(Dictionary.class) - .objectMappers(getManagerInternalMapper(), getShardInternalMapper()) - .registry(registry) - .test(dict); - } - - @Test public void testBiMapSerialization() throws JSONException, IOException { BiMap map = HashBiMap.create(); @@ -668,11 +628,11 @@ public void testNonStrictNumbers() throws JSONException, IOException { .forType(EntityResult.class) .objectMappers(getApiMapper(), getManagerInternalMapper()) .test( - new MultilineEntityResult(4, List.of( + new MultilineEntityResult("4", List.of( new Object[]{0, 1, 2}, new Object[]{Double.NaN, Double.NEGATIVE_INFINITY, Double.POSITIVE_INFINITY} )), - new MultilineEntityResult(4, List.of( + new MultilineEntityResult("4", List.of( new Object[]{0, 1, 2}, new Object[]{null, null, null} )) @@ -821,4 +781,57 @@ public void externalFormToFormBackend() throws JsonProcessingException { assertThat(actual).as("Result of mixin for form backend").isEqualTo(expected); } + @Test + @Tag("OBJECT_2_INT_MAP") + public void object2IntEmpty() throws JSONException, IOException { + Object2IntMap empty = Object2IntMaps.emptyMap(); + + SerializationTestUtil.forType(new TypeReference>() { + }) + .objectMappers(getApiMapper(), getShardInternalMapper(), getManagerInternalMapper()) + .customizingAssertion(RecursiveComparisonAssert::ignoringCollectionOrder) + .test(empty); + + } + + @Test + @Tag("OBJECT_2_INT_MAP") + public void object2IntString() throws JSONException, IOException { + Object2IntMap map = new Object2IntOpenHashMap<>(); + + map.put("zero", 0); + map.put("one", 1); + map.put("two", 2); + SerializationTestUtil.forType(new TypeReference>() { + }) + .objectMappers(getApiMapper(), getShardInternalMapper(), getManagerInternalMapper()) + .customizingAssertion(RecursiveComparisonAssert::ignoringCollectionOrder) + .test(map); + + } + + @Test + @Tag("OBJECT_2_INT_MAP") + public void arrayObject2Int() throws JSONException, IOException { + Object2IntMap[] map = new Object2IntOpenHashMap[]{ + new Object2IntOpenHashMap<>() {{ + put("zero", 0); + }}, + new Object2IntOpenHashMap<>() {{ + put("zero", 0); + put("one", 1); + }}, + new Object2IntOpenHashMap<>() {{ + put("zero", 0); + put("one", 1); + put("two", 2); + }} + }; + SerializationTestUtil.forArrayType(new TypeReference>() { + }).objectMappers(getApiMapper(), getShardInternalMapper(), getManagerInternalMapper()) + .customizingAssertion(RecursiveComparisonAssert::ignoringCollectionOrder) + .test(map); + + } + } diff --git a/backend/src/test/java/com/bakdata/conquery/models/auth/IntrospectionDelegatingRealmTest.java b/backend/src/test/java/com/bakdata/conquery/models/auth/IntrospectionDelegatingRealmTest.java index 7ca02946a9..5193bf847d 100644 --- a/backend/src/test/java/com/bakdata/conquery/models/auth/IntrospectionDelegatingRealmTest.java +++ b/backend/src/test/java/com/bakdata/conquery/models/auth/IntrospectionDelegatingRealmTest.java @@ -9,6 +9,7 @@ import static org.mockserver.model.Parameter.param; import static org.mockserver.model.ParameterBody.params; +import java.net.URI; import java.util.Map; import java.util.Set; import java.util.UUID; @@ -93,6 +94,7 @@ public class IntrospectionDelegatingRealmTest { public static final KeycloakGroup KEYCLOAK_GROUP_2 = new KeycloakGroup(UUID.randomUUID().toString(), "Group2", "g2", Map.of(GROUP_ID_ATTRIBUTE, new GroupId(GROUPNAME_2).toString()), Set.of()); + public static final URI FRONT_CHANNEL_LOGOUT = URI.create("http://localhost:1080/realms/test_realm/protocol/openid-connect/logout"); private static OIDCMockServer OIDC_SERVER; private static TestRealm REALM; @@ -206,7 +208,7 @@ public void tokenIntrospectionSimpleUserNew() { .usingRecursiveComparison() .ignoringFields(ConqueryAuthenticationInfo.Fields.credentials) .ignoringFieldsOfTypes(User.ShiroUserAdapter.class) - .isEqualTo(new ConqueryAuthenticationInfo(USER_1, USER1_TOKEN_WRAPPED, REALM, true)); + .isEqualTo(new ConqueryAuthenticationInfo(USER_1, USER1_TOKEN_WRAPPED, REALM, true, FRONT_CHANNEL_LOGOUT)); assertThat(STORAGE.getAllUsers()).containsOnly(new User(USER_1_NAME, USER_1_NAME, STORAGE)); } @@ -219,7 +221,7 @@ public void tokenIntrospectionSimpleUserExisting() { assertThat(info) .usingRecursiveComparison() .ignoringFields(ConqueryAuthenticationInfo.Fields.credentials) - .isEqualTo(new ConqueryAuthenticationInfo(USER_1, USER1_TOKEN_WRAPPED, REALM, true)); + .isEqualTo(new ConqueryAuthenticationInfo(USER_1, USER1_TOKEN_WRAPPED, REALM, true, FRONT_CHANNEL_LOGOUT)); assertThat(STORAGE.getAllUsers()).containsOnly(USER_1); } @@ -229,7 +231,7 @@ public void tokenIntrospectionGroupedUser() { AuthenticationInfo info = REALM.doGetAuthenticationInfo(USER_2_TOKEN_WRAPPED); - final ConqueryAuthenticationInfo expected = new ConqueryAuthenticationInfo(USER_2, USER_2_TOKEN_WRAPPED, REALM, true); + final ConqueryAuthenticationInfo expected = new ConqueryAuthenticationInfo(USER_2, USER_2_TOKEN_WRAPPED, REALM, true, FRONT_CHANNEL_LOGOUT); assertThat(info) .usingRecursiveComparison() .isEqualTo(expected); @@ -251,7 +253,7 @@ public void tokenIntrospectionGroupedUserRemoveGroupMapping() { assertThat(info) .usingRecursiveComparison() .ignoringFields(ConqueryAuthenticationInfo.Fields.credentials) - .isEqualTo(new ConqueryAuthenticationInfo(USER_3, USER_3_TOKEN_WRAPPED, REALM, true)); + .isEqualTo(new ConqueryAuthenticationInfo(USER_3, USER_3_TOKEN_WRAPPED, REALM, true, FRONT_CHANNEL_LOGOUT)); assertThat(STORAGE.getAllUsers()).containsOnly(USER_3); assertThat(STORAGE.getAllGroups()).hasSize(1); // Pre-existing group assertThat(STORAGE.getGroup(new GroupId(GROUPNAME_1)).getMembers()).doesNotContain(new UserId(USER_3_NAME)); diff --git a/backend/src/test/java/com/bakdata/conquery/models/auth/oidc/JwtPkceVerifyingRealmTest.java b/backend/src/test/java/com/bakdata/conquery/models/auth/oidc/JwtPkceVerifyingRealmTest.java index 6b544c3e2d..1c39b1ac37 100644 --- a/backend/src/test/java/com/bakdata/conquery/models/auth/oidc/JwtPkceVerifyingRealmTest.java +++ b/backend/src/test/java/com/bakdata/conquery/models/auth/oidc/JwtPkceVerifyingRealmTest.java @@ -18,6 +18,7 @@ import com.auth0.jwt.JWT; import com.auth0.jwt.algorithms.Algorithm; import com.bakdata.conquery.io.storage.MetaStorage; +import com.bakdata.conquery.models.auth.entities.Role; import com.bakdata.conquery.models.auth.entities.User; import com.bakdata.conquery.models.config.auth.JwtPkceVerifyingRealmFactory; import com.bakdata.conquery.models.identifiable.ids.specific.UserId; @@ -34,13 +35,13 @@ class JwtPkceVerifyingRealmTest { + public static final int TOKEN_LEEWAY = 60; private static final MetaStorage STORAGE = new NonPersistentStoreFactory().createMetaStorage(); private static final String HTTP_REALM_URL = "http://realm.url"; private static final String AUDIENCE = "test_aud"; private static final String ALTERNATIVE_ID_CLAIM = "alternativeId"; - public static final int TOKEN_LEEWAY = 60; - private static JwtPkceVerifyingRealm REALM; private static final String KEY_ID = "valid_key_id"; + private static JwtPkceVerifyingRealm REALM; private static RSAPrivateKey PRIVATE_KEY; private static RSAPublicKey PUBLIC_KEY; @@ -56,7 +57,7 @@ static void setup() throws NoSuchAlgorithmException { // Create the realm REALM = new JwtPkceVerifyingRealm( - () -> Optional.of(new JwtPkceVerifyingRealmFactory.IdpConfiguration(Map.of(KEY_ID, PUBLIC_KEY), URI.create("auth"), URI.create("token"), HTTP_REALM_URL)), + () -> Optional.of(new JwtPkceVerifyingRealmFactory.IdpConfiguration(Map.of(KEY_ID, PUBLIC_KEY), URI.create("auth"), URI.create("token"), URI.create("logout"), HTTP_REALM_URL)), AUDIENCE, List.of(JwtPkceVerifyingRealmFactory.ScriptedTokenChecker.create("t.getOtherClaims().get(\"groups\").equals(\"conquery\")")), List.of(ALTERNATIVE_ID_CLAIM), @@ -85,12 +86,45 @@ void verifyToken() { .withIssuedAt(issueDate) .withExpiresAt(expDate) .withKeyId(KEY_ID) + .withJWTId(UUID.randomUUID().toString()) .sign(Algorithm.RSA256(PUBLIC_KEY, PRIVATE_KEY)); BearerToken accessToken = new BearerToken(token); assertThat(REALM.doGetAuthenticationInfo(accessToken).getPrincipals().getPrimaryPrincipal()).isEqualTo(expected); } + @Test + void verifyTokenAndAddRole() { + + // Setup the expected user id + User expected = new User("Test", "Test", STORAGE); + Role role = new Role("admin", "admin", STORAGE); + + STORAGE.updateRole(role); + STORAGE.updateUser(expected); + + Date issueDate = new Date(); + Date expDate = DateUtils.addMinutes(issueDate, 1); + String token = JWT.create() + .withIssuer(HTTP_REALM_URL) + .withAudience(AUDIENCE) + .withSubject(expected.getName()) + .withIssuedAt(issueDate) + .withExpiresAt(expDate) + .withClaim("groups", "conquery") + .withClaim("resource_access", Map.of(AUDIENCE, Map.of("roles", List.of("admin", "unknown")))) // See structure of AccessToken.Access + .withIssuedAt(issueDate) + .withExpiresAt(expDate) + .withKeyId(KEY_ID) + .withJWTId(UUID.randomUUID().toString()) + .sign(Algorithm.RSA256(PUBLIC_KEY, PRIVATE_KEY)); + + BearerToken accessToken = new BearerToken(token); + + assertThat(REALM.doGetAuthenticationInfo(accessToken).getPrincipals().getPrimaryPrincipal()).isEqualTo(expected); + assertThat(expected.getRoles()).contains(role.getId()); + } + @Test void verifyTokenInLeeway() { @@ -113,6 +147,7 @@ void verifyTokenInLeeway() { .withIssuedAt(issueDate) .withExpiresAt(expDate) .withKeyId(KEY_ID) + .withJWTId(UUID.randomUUID().toString()) .sign(Algorithm.RSA256(PUBLIC_KEY, PRIVATE_KEY)); BearerToken accessToken = new BearerToken(token); @@ -139,7 +174,9 @@ void verifyTokenAlternativeId() { .withExpiresAt(expDate) .withClaim(ALTERNATIVE_ID_CLAIM, expected.getName()) .withKeyId(KEY_ID) + .withJWTId(UUID.randomUUID().toString()) .sign(Algorithm.RSA256(PUBLIC_KEY, PRIVATE_KEY)); + BearerToken accessToken = new BearerToken(token); assertThat(REALM.doGetAuthenticationInfo(accessToken).getPrincipals().getPrimaryPrincipal()).isEqualTo(expected); @@ -183,6 +220,7 @@ void falsifyTokenWrongAudience() { .withIssuedAt(issueDate) .withExpiresAt(expDate) .withKeyId(KEY_ID) + .withJWTId(UUID.randomUUID().toString()) .sign(Algorithm.RSA256(PUBLIC_KEY, PRIVATE_KEY)); BearerToken accessToken = new BearerToken(token); @@ -203,6 +241,7 @@ void falsifyTokenOutdated() { .withIssuedAt(issueDate) .withExpiresAt(expDate) .withKeyId(KEY_ID) + .withJWTId(UUID.randomUUID().toString()) .sign(Algorithm.RSA256(PUBLIC_KEY, PRIVATE_KEY)); BearerToken accessToken = new BearerToken(token); @@ -227,6 +266,7 @@ void falsifyTokenWrongIssuer() { .withIssuedAt(issueDate) .withExpiresAt(expDate) .withKeyId(KEY_ID) + .withJWTId(UUID.randomUUID().toString()) .sign(Algorithm.RSA256(PUBLIC_KEY, PRIVATE_KEY)); BearerToken accessToken = new BearerToken(token); @@ -252,6 +292,7 @@ void falsifyTokenUnknownKid() { .withIssuedAt(issueDate) .withExpiresAt(expDate) .withKeyId("unknown_key_id") + .withJWTId(UUID.randomUUID().toString()) .sign(Algorithm.RSA256(PUBLIC_KEY, PRIVATE_KEY)); BearerToken accessToken = new BearerToken(token); diff --git a/backend/src/test/java/com/bakdata/conquery/models/common/RangeTest.java b/backend/src/test/java/com/bakdata/conquery/models/common/RangeTest.java index 9ed314be18..0bfb7d2e64 100644 --- a/backend/src/test/java/com/bakdata/conquery/models/common/RangeTest.java +++ b/backend/src/test/java/com/bakdata/conquery/models/common/RangeTest.java @@ -143,20 +143,20 @@ public void coveredQuartersNotAFullQuarter() { public static List deserialize() { return Arrays.asList( Arguments.of( - "{\"min\":\"2017-01-01\", \"max\":\"2017-01-01\"}", - new Range<>(LocalDate.of(2017, 1, 1), LocalDate.of(2017, 1, 1)), - CDateRange.of(LocalDate.of(2017, 1, 1), LocalDate.of(2017, 1, 1)) + "{\"min\":\"2017-01-01\", \"max\":\"2017-01-01\"}", + new Range<>(LocalDate.of(2017, 1, 1), LocalDate.of(2017, 1, 1)), + CDateRange.of(LocalDate.of(2017, 1, 1), LocalDate.of(2017, 1, 1)) ), Arguments.of( - "{\"min\":\"2017-01-01\"}", - new Range<>(LocalDate.of(2017, 1, 1), null), - CDateRange.atLeast(LocalDate.of(2017, 1, 1)) + "{\"min\":\"2017-01-01\"}", + new Range<>(LocalDate.of(2017, 1, 1), null), + CDateRange.atLeast(LocalDate.of(2017, 1, 1)) ) , Arguments.of( - "{\"max\":\"2017-01-01\"}", - new Range<>(null, LocalDate.of(2017, 1, 1)), - CDateRange.atMost(LocalDate.of(2017, 1, 1)) + "{\"max\":\"2017-01-01\"}", + new Range<>(null, LocalDate.of(2017, 1, 1)), + CDateRange.atMost(LocalDate.of(2017, 1, 1)) ) ); } diff --git a/backend/src/test/java/com/bakdata/conquery/models/datasets/concepts/tree/MatchingStatsTests.java b/backend/src/test/java/com/bakdata/conquery/models/datasets/concepts/tree/MatchingStatsTests.java index 0caa9fc8e8..8938a83e1c 100644 --- a/backend/src/test/java/com/bakdata/conquery/models/datasets/concepts/tree/MatchingStatsTests.java +++ b/backend/src/test/java/com/bakdata/conquery/models/datasets/concepts/tree/MatchingStatsTests.java @@ -2,7 +2,6 @@ import static org.assertj.core.api.Assertions.assertThat; -import com.bakdata.conquery.models.common.daterange.CDateRange; import com.bakdata.conquery.models.datasets.Column; import com.bakdata.conquery.models.datasets.Table; import com.bakdata.conquery.models.datasets.concepts.MatchingStats; @@ -10,9 +9,6 @@ import com.bakdata.conquery.models.identifiable.ids.specific.WorkerId; import org.junit.jupiter.api.Test; -import java.util.HashMap; -import java.util.Map; - public class MatchingStatsTests { private final WorkerId workerId1 = new WorkerId(new DatasetId("sampleDataset"), "sampleWorker"); @@ -25,13 +21,13 @@ public void entitiesCountTest() { assertThat(stats.countEntities()).isEqualTo(0); - stats.putEntry(workerId1, new MatchingStats.Entry(5, 5, CDateRange.of(10, 20))); + stats.putEntry(workerId1, new MatchingStats.Entry(5, 5, 10, 20)); assertThat(stats.countEntities()).isEqualTo(5); - stats.putEntry(workerId1, new MatchingStats.Entry(5, 8, CDateRange.of(10, 20))); + stats.putEntry(workerId1, new MatchingStats.Entry(5, 8, 10, 20)); assertThat(stats.countEntities()).isEqualTo(8); - stats.putEntry(workerId2, new MatchingStats.Entry(5, 2, CDateRange.of(10, 20))); + stats.putEntry(workerId2, new MatchingStats.Entry(5, 2, 10, 20)); assertThat(stats.countEntities()).isEqualTo(10); @@ -48,17 +44,17 @@ public void addEventTest(){ MatchingStats.Entry entry1 = new MatchingStats.Entry(); - entry1.addEvent(table, null, 1, 1); - entry1.addEvent(table, null, 2, 1); + entry1.addEvent(table, null, 1, "1"); + entry1.addEvent(table, null, 2, "1"); - entry1.addEvent(table, null, 3, 2); - entry1.addEvent(table, null, 4, 2); + entry1.addEvent(table, null, 3, "2"); + entry1.addEvent(table, null, 4, "2"); - entry1.addEvent(table, null, 5, 3); - entry1.addEvent(table, null, 6, 3); + entry1.addEvent(table, null, 5, "3"); + entry1.addEvent(table, null, 6, "3"); - entry1.addEvent(table, null, 7, 4); - entry1.addEvent(table, null, 8, 4); + entry1.addEvent(table, null, 7, "4"); + entry1.addEvent(table, null, 8, "4"); @@ -69,20 +65,20 @@ public void addEventTest(){ MatchingStats.Entry entry2 = new MatchingStats.Entry(); - entry2.addEvent(table, null, 1, 1); - entry2.addEvent(table, null, 2, 2); + entry2.addEvent(table, null, 1, "1"); + entry2.addEvent(table, null, 2, "2"); - entry2.addEvent(table, null, 3, 3); - entry2.addEvent(table, null, 4, 4); + entry2.addEvent(table, null, 3, "3"); + entry2.addEvent(table, null, 4, "4"); - entry2.addEvent(table, null, 5, 5); - entry2.addEvent(table, null, 6, 6); + entry2.addEvent(table, null, 5, "5"); + entry2.addEvent(table, null, 6, "6"); - entry2.addEvent(table, null, 7, 7); - entry2.addEvent(table, null, 8, 8); + entry2.addEvent(table, null, 7, "7"); + entry2.addEvent(table, null, 8, "8"); - entry2.addEvent(table, null, 9, 9); - entry2.addEvent(table, null, 10, 10); + entry2.addEvent(table, null, 9, "9"); + entry2.addEvent(table, null, 10, "10"); stats.putEntry(workerId2, entry2); assertThat(stats.countEvents()).isEqualTo(18); diff --git a/backend/src/test/java/com/bakdata/conquery/models/events/stores/types/ColumnStoreSerializationTests.java b/backend/src/test/java/com/bakdata/conquery/models/events/stores/types/ColumnStoreSerializationTests.java index 62309bb1c8..caa533c839 100644 --- a/backend/src/test/java/com/bakdata/conquery/models/events/stores/types/ColumnStoreSerializationTests.java +++ b/backend/src/test/java/com/bakdata/conquery/models/events/stores/types/ColumnStoreSerializationTests.java @@ -15,11 +15,8 @@ import com.bakdata.conquery.io.cps.CPSTypeIdResolver; import com.bakdata.conquery.io.jackson.View; import com.bakdata.conquery.io.jackson.serializer.SerializationTestUtil; -import com.bakdata.conquery.models.common.Range; import com.bakdata.conquery.models.config.ConqueryConfig; import com.bakdata.conquery.models.datasets.Dataset; -import com.bakdata.conquery.models.dictionary.Dictionary; -import com.bakdata.conquery.models.dictionary.MapDictionary; import com.bakdata.conquery.models.events.EmptyStore; import com.bakdata.conquery.models.events.stores.primitive.BitSetStore; import com.bakdata.conquery.models.events.stores.primitive.ByteArrayStore; @@ -30,6 +27,7 @@ import com.bakdata.conquery.models.events.stores.primitive.IntegerDateStore; import com.bakdata.conquery.models.events.stores.primitive.LongArrayStore; import com.bakdata.conquery.models.events.stores.primitive.ShortArrayStore; +import com.bakdata.conquery.models.events.stores.primitive.StringStoreString; import com.bakdata.conquery.models.events.stores.root.ColumnStore; import com.bakdata.conquery.models.events.stores.specific.CompoundDateRangeStore; import com.bakdata.conquery.models.events.stores.specific.DirectDateRangeStore; @@ -37,12 +35,6 @@ import com.bakdata.conquery.models.events.stores.specific.QuarterDateRangeStore; import com.bakdata.conquery.models.events.stores.specific.RebasingIntegerStore; import com.bakdata.conquery.models.events.stores.specific.ScaledDecimalStore; -import com.bakdata.conquery.models.events.stores.specific.string.DictionaryStore; -import com.bakdata.conquery.models.events.stores.specific.string.EncodedStringStore; -import com.bakdata.conquery.models.events.stores.specific.string.EncodedStringStore.Encoding; -import com.bakdata.conquery.models.events.stores.specific.string.NumberStringStore; -import com.bakdata.conquery.models.events.stores.specific.string.PrefixSuffixStringStore; -import com.bakdata.conquery.models.events.stores.specific.string.SingletonStringStore; import com.bakdata.conquery.models.exceptions.JSONException; import com.bakdata.conquery.models.identifiable.CentralRegistry; import com.fasterxml.jackson.databind.ObjectMapper; @@ -62,14 +54,12 @@ public class ColumnStoreSerializationTests { private static final Set> EXCLUDING = Set.of(CompoundDateRangeStore.class); private static final CentralRegistry CENTRAL_REGISTRY = new CentralRegistry(); - private static final Dictionary DICTIONARY = new MapDictionary(Dataset.PLACEHOLDER, "dictionary"); private static ObjectMapper shardInternalMapper; @BeforeAll public static void setupRegistry() { CENTRAL_REGISTRY.register(Dataset.PLACEHOLDER); - CENTRAL_REGISTRY.register(DICTIONARY); // Prepare shard node internal mapper @@ -105,16 +95,10 @@ public static List createCTypes() { return Arrays.asList( new ScaledDecimalStore(13, IntArrayStore.create(10)), new MoneyIntStore(IntArrayStore.create(10)), - new DictionaryStore(IntArrayStore.create(10), DICTIONARY), - new EncodedStringStore(new DictionaryStore(IntArrayStore.create(10), DICTIONARY), Encoding.Base16LowerCase), - new PrefixSuffixStringStore(new EncodedStringStore(new DictionaryStore(IntArrayStore.create(10), DICTIONARY), Encoding.Base16LowerCase), "a", "b"), - - new NumberStringStore(new Range.IntegerRange(0, 7), ByteArrayStore.create(10)), - new SingletonStringStore("a", BitSetStore.create(10)), new DirectDateRangeStore(IntegerDateStore.create(10), IntegerDateStore.create(10)), new QuarterDateRangeStore(LongArrayStore.create(10)), new IntegerDateStore(LongArrayStore.create(10)), - + StringStoreString.withInternedStrings(new String[]{"a", "b", "c"}), DecimalArrayStore.create(10), LongArrayStore.create(10), IntArrayStore.create(10), diff --git a/backend/src/test/java/com/bakdata/conquery/models/events/stores/types/StringEncodingTest.java b/backend/src/test/java/com/bakdata/conquery/models/events/stores/types/StringEncodingTest.java deleted file mode 100644 index 81a3bfd2f5..0000000000 --- a/backend/src/test/java/com/bakdata/conquery/models/events/stores/types/StringEncodingTest.java +++ /dev/null @@ -1,62 +0,0 @@ -package com.bakdata.conquery.models.events.stores.types; - - -import static org.assertj.core.api.Assertions.assertThat; - -import java.util.UUID; -import java.util.stream.Stream; - -import com.bakdata.conquery.models.config.ConqueryConfig; -import com.bakdata.conquery.models.events.stores.specific.string.EncodedStringStore; -import com.bakdata.conquery.models.exceptions.ParsingException; -import com.bakdata.conquery.models.preproc.parser.specific.StringParser; -import lombok.extern.slf4j.Slf4j; -import org.junit.jupiter.api.DynamicTest; -import org.junit.jupiter.api.Test; -import org.junit.jupiter.api.TestFactory; - - -@Slf4j -public class StringEncodingTest { - - @TestFactory - public Stream testEncodings() { - - EncodedStringStore.Encoding encoding = EncodedStringStore.Encoding.Base64; - - return Stream.generate(() -> UUID.randomUUID().toString().replace("-", "")) - .map(uuid -> DynamicTest.dynamicTest(uuid, () -> { - byte[] decoded = encoding.encode(uuid); - String encoded = encoding.decode(decoded); - - assertThat(encoded).isEqualTo(uuid); - assertThat(decoded.length).isLessThan(uuid.length()); - })) - .limit(100); - } - - @Test - public void testHexStreamStringType() { - StringParser parser = new StringParser(new ConqueryConfig()); - - Stream.generate(() -> UUID.randomUUID().toString().replace("-", "")) - .map(String::toUpperCase) - .mapToInt(v -> { - try { - return parser.parse(v); - } - catch (ParsingException e) { - return 0; // We know that StringTypeVarInt is able to parse our strings. - } - }) - .limit(100) - .forEach(parser::addLine); - - - EncodedStringStore subType = (EncodedStringStore) parser.findBestType(); - - assertThat(subType) - .isInstanceOf(EncodedStringStore.class); - assertThat(subType.getEncoding()).isEqualByComparingTo(EncodedStringStore.Encoding.Base16UpperCase); - } -} \ No newline at end of file diff --git a/backend/src/test/java/com/bakdata/conquery/models/forms/DateContextTest.java b/backend/src/test/java/com/bakdata/conquery/models/forms/DateContextTest.java index 6a07564d3e..758c08db78 100644 --- a/backend/src/test/java/com/bakdata/conquery/models/forms/DateContextTest.java +++ b/backend/src/test/java/com/bakdata/conquery/models/forms/DateContextTest.java @@ -1,5 +1,13 @@ package com.bakdata.conquery.models.forms; +import static com.bakdata.conquery.models.forms.util.Alignment.*; +import static com.bakdata.conquery.models.forms.util.Resolution.*; +import static org.assertj.core.api.Assertions.assertThat; + +import java.time.LocalDate; +import java.util.Arrays; +import java.util.List; + import com.bakdata.conquery.apiv1.forms.FeatureGroup; import com.bakdata.conquery.apiv1.forms.IndexPlacement; import com.bakdata.conquery.apiv1.forms.export_form.ExportForm; @@ -9,14 +17,6 @@ import com.bakdata.conquery.models.forms.util.DateContext; import org.junit.jupiter.api.Test; -import java.time.LocalDate; -import java.util.Arrays; -import java.util.List; - -import static com.bakdata.conquery.models.forms.util.Alignment.*; -import static com.bakdata.conquery.models.forms.util.Resolution.*; -import static org.assertj.core.api.Assertions.assertThat; - public class DateContextTest { @Test @@ -36,12 +36,12 @@ public void rangeAbsYearTestWithCoarse() { List contexts = DateContext.generateAbsoluteContexts(mask, ExportForm.getResolutionAlignmentMap(YEARS.getThisAndCoarserSubdivisions(),YEAR)); assertThat(contexts).extracting(DateContext::getDateRange).containsExactly ( - mask, - CDateRange.of(LocalDate.of(2001, 5, 23), LocalDate.of(2001, 12, 31)), - CDateRange.of(LocalDate.of(2002, 1, 1), LocalDate.of(2002, 12, 31)), - CDateRange.of(LocalDate.of(2003, 1, 1), LocalDate.of(2003, 12, 31)), - CDateRange.of(LocalDate.of(2004, 1, 1), LocalDate.of(2004, 12, 31)), - CDateRange.of(LocalDate.of(2005, 1, 1), LocalDate.of(2005, 4, 21)) + mask, + CDateRange.of(LocalDate.of(2001, 5, 23), LocalDate.of(2001, 12, 31)), + CDateRange.of(LocalDate.of(2002, 1, 1), LocalDate.of(2002, 12, 31)), + CDateRange.of(LocalDate.of(2003, 1, 1), LocalDate.of(2003, 12, 31)), + CDateRange.of(LocalDate.of(2004, 1, 1), LocalDate.of(2004, 12, 31)), + CDateRange.of(LocalDate.of(2005, 1, 1), LocalDate.of(2005, 4, 21)) ); assertThat(contexts).extracting(DateContext::getFeatureGroup).containsOnly(FeatureGroup.SINGLE_GROUP); } @@ -52,11 +52,11 @@ public void rangeAbsYearTestWithoutCoarse() { List contexts = DateContext.generateAbsoluteContexts(mask, ExportForm.getResolutionAlignmentMap(List.of(YEARS),YEAR)); assertThat(contexts).extracting(DateContext::getDateRange).containsExactly ( - CDateRange.of(LocalDate.of(2001, 5, 23), LocalDate.of(2001, 12, 31)), - CDateRange.of(LocalDate.of(2002, 1, 1), LocalDate.of(2002, 12, 31)), - CDateRange.of(LocalDate.of(2003, 1, 1), LocalDate.of(2003, 12, 31)), - CDateRange.of(LocalDate.of(2004, 1, 1), LocalDate.of(2004, 12, 31)), - CDateRange.of(LocalDate.of(2005, 1, 1), LocalDate.of(2005, 4, 21)) + CDateRange.of(LocalDate.of(2001, 5, 23), LocalDate.of(2001, 12, 31)), + CDateRange.of(LocalDate.of(2002, 1, 1), LocalDate.of(2002, 12, 31)), + CDateRange.of(LocalDate.of(2003, 1, 1), LocalDate.of(2003, 12, 31)), + CDateRange.of(LocalDate.of(2004, 1, 1), LocalDate.of(2004, 12, 31)), + CDateRange.of(LocalDate.of(2005, 1, 1), LocalDate.of(2005, 4, 21)) ); assertThat(contexts).extracting(DateContext::getFeatureGroup).containsOnly(FeatureGroup.SINGLE_GROUP); } @@ -67,22 +67,22 @@ public void rangeAbsQuarterTestWithCoarse() { List contexts = DateContext.generateAbsoluteContexts(mask, ExportForm.getResolutionAlignmentMap(QUARTERS.getThisAndCoarserSubdivisions(), QUARTER)); assertThat(contexts).extracting(DateContext::getDateRange).containsExactly ( - // Complete - mask, - // Years - CDateRange.of(LocalDate.of(2001, 5, 23), LocalDate.of(2002, 3, 31)), - CDateRange.of(LocalDate.of(2002, 4, 1), LocalDate.of(2003, 3, 31)), - CDateRange.of(LocalDate.of(2003, 4, 1), LocalDate.of(2003, 4, 21)), - // Quarters - CDateRange.of(LocalDate.of(2001, 5, 23), LocalDate.of(2001, 6, 30)), - CDateRange.of(LocalDate.of(2001, 7, 1), LocalDate.of(2001, 9, 30)), - CDateRange.of(LocalDate.of(2001, 10, 1), LocalDate.of(2001, 12, 31)), - CDateRange.of(LocalDate.of(2002, 1, 1), LocalDate.of(2002, 3, 31)), - CDateRange.of(LocalDate.of(2002, 4, 1), LocalDate.of(2002, 6, 30)), - CDateRange.of(LocalDate.of(2002, 7, 1), LocalDate.of(2002, 9, 30)), - CDateRange.of(LocalDate.of(2002, 10, 1), LocalDate.of(2002, 12, 31)), - CDateRange.of(LocalDate.of(2003, 1, 1), LocalDate.of(2003, 3, 31)), - CDateRange.of(LocalDate.of(2003, 4, 1), LocalDate.of(2003, 4, 21)) + // Complete + mask, + // Years + CDateRange.of(LocalDate.of(2001, 5, 23), LocalDate.of(2002, 3, 31)), + CDateRange.of(LocalDate.of(2002, 4, 1), LocalDate.of(2003, 3, 31)), + CDateRange.of(LocalDate.of(2003, 4, 1), LocalDate.of(2003, 4, 21)), + // Quarters + CDateRange.of(LocalDate.of(2001, 5, 23), LocalDate.of(2001, 6, 30)), + CDateRange.of(LocalDate.of(2001, 7, 1), LocalDate.of(2001, 9, 30)), + CDateRange.of(LocalDate.of(2001, 10, 1), LocalDate.of(2001, 12, 31)), + CDateRange.of(LocalDate.of(2002, 1, 1), LocalDate.of(2002, 3, 31)), + CDateRange.of(LocalDate.of(2002, 4, 1), LocalDate.of(2002, 6, 30)), + CDateRange.of(LocalDate.of(2002, 7, 1), LocalDate.of(2002, 9, 30)), + CDateRange.of(LocalDate.of(2002, 10, 1), LocalDate.of(2002, 12, 31)), + CDateRange.of(LocalDate.of(2003, 1, 1), LocalDate.of(2003, 3, 31)), + CDateRange.of(LocalDate.of(2003, 4, 1), LocalDate.of(2003, 4, 21)) ); assertThat(contexts).extracting(DateContext::getFeatureGroup).containsOnly(FeatureGroup.SINGLE_GROUP); } @@ -93,15 +93,15 @@ public void rangeAbsQuarterTestWithoutCoarse() { List contexts = DateContext.generateAbsoluteContexts(mask, List.of(ExportForm.ResolutionAndAlignment.of(QUARTERS, QUARTER))); assertThat(contexts).extracting(DateContext::getDateRange).containsExactly ( - CDateRange.of(LocalDate.of(2001, 5, 23), LocalDate.of(2001, 6, 30)), - CDateRange.of(LocalDate.of(2001, 7, 1), LocalDate.of(2001, 9, 30)), - CDateRange.of(LocalDate.of(2001, 10, 1), LocalDate.of(2001, 12, 31)), - CDateRange.of(LocalDate.of(2002, 1, 1), LocalDate.of(2002, 3, 31)), - CDateRange.of(LocalDate.of(2002, 4, 1), LocalDate.of(2002, 6, 30)), - CDateRange.of(LocalDate.of(2002, 7, 1), LocalDate.of(2002, 9, 30)), - CDateRange.of(LocalDate.of(2002, 10, 1), LocalDate.of(2002, 12, 31)), - CDateRange.of(LocalDate.of(2003, 1, 1), LocalDate.of(2003, 3, 31)), - CDateRange.of(LocalDate.of(2003, 4, 1), LocalDate.of(2003, 4, 21)) + CDateRange.of(LocalDate.of(2001, 5, 23), LocalDate.of(2001, 6, 30)), + CDateRange.of(LocalDate.of(2001, 7, 1), LocalDate.of(2001, 9, 30)), + CDateRange.of(LocalDate.of(2001, 10, 1), LocalDate.of(2001, 12, 31)), + CDateRange.of(LocalDate.of(2002, 1, 1), LocalDate.of(2002, 3, 31)), + CDateRange.of(LocalDate.of(2002, 4, 1), LocalDate.of(2002, 6, 30)), + CDateRange.of(LocalDate.of(2002, 7, 1), LocalDate.of(2002, 9, 30)), + CDateRange.of(LocalDate.of(2002, 10, 1), LocalDate.of(2002, 12, 31)), + CDateRange.of(LocalDate.of(2003, 1, 1), LocalDate.of(2003, 3, 31)), + CDateRange.of(LocalDate.of(2003, 4, 1), LocalDate.of(2003, 4, 21)) ); assertThat(contexts).extracting(DateContext::getFeatureGroup).containsOnly(FeatureGroup.SINGLE_GROUP); } diff --git a/backend/src/test/java/com/bakdata/conquery/models/identifiable/mapping/PseudomizationTest.java b/backend/src/test/java/com/bakdata/conquery/models/identifiable/mapping/PseudomizationTest.java index 470024c659..74bfe9bd9e 100644 --- a/backend/src/test/java/com/bakdata/conquery/models/identifiable/mapping/PseudomizationTest.java +++ b/backend/src/test/java/com/bakdata/conquery/models/identifiable/mapping/PseudomizationTest.java @@ -5,20 +5,20 @@ import org.junit.jupiter.api.Test; public class PseudomizationTest { - + @Test void pseudoIdGeneration() { AutoIncrementingPseudomizer pseudomizer = new AutoIncrementingPseudomizer(4,2); // Id changes from internal to external - assertThat(pseudomizer.getPseudoId(0)).isEqualTo(EntityPrintId.from(null, null, "anon_0", null)); - + assertThat(pseudomizer.getPseudoId("0")).isEqualTo(EntityPrintId.from(null, null, "anon_0", null)); + // Id mapping is constant - assertThat(pseudomizer.getPseudoId(0)).isEqualTo(pseudomizer.getPseudoId(0)); - + assertThat(pseudomizer.getPseudoId("0")).isEqualTo(pseudomizer.getPseudoId("0")); + // Mapping produces differing external ids - assertThat(pseudomizer.getPseudoId(1)).isNotEqualTo(pseudomizer.getPseudoId(0)); - + assertThat(pseudomizer.getPseudoId("1")).isNotEqualTo(pseudomizer.getPseudoId("0")); + } -} +} \ No newline at end of file diff --git a/backend/src/test/java/com/bakdata/conquery/models/preproc/parser/specific/DateRangeParserTest.java b/backend/src/test/java/com/bakdata/conquery/models/preproc/parser/specific/DateRangeParserTest.java index 28494d2d8d..ba126fb654 100644 --- a/backend/src/test/java/com/bakdata/conquery/models/preproc/parser/specific/DateRangeParserTest.java +++ b/backend/src/test/java/com/bakdata/conquery/models/preproc/parser/specific/DateRangeParserTest.java @@ -20,7 +20,7 @@ class DateRangeParserTest { public void onlyClosed() { final DateRangeParser parser = new DateRangeParser(new ConqueryConfig()); - List.of(CDateRange.of(10,11), CDateRange.exactly(10)) + List.of(CDateRange.of(10, 11), CDateRange.exactly(10)) .forEach(parser::addLine); final ColumnStore actual = parser.decideType(); @@ -36,7 +36,7 @@ public void onlyClosed() { public void notOnlyClosed() { final DateRangeParser parser = new DateRangeParser(new ConqueryConfig()); - List.of(CDateRange.of(10,11), CDateRange.exactly(10), CDateRange.atMost(10)) + List.of(CDateRange.of(10, 11), CDateRange.exactly(10), CDateRange.atMost(10)) .forEach(parser::addLine); assertThat(parser.decideType()).isInstanceOf(DirectDateRangeStore.class); @@ -46,7 +46,7 @@ public void notOnlyClosed() { public void onlyQuarters() { final DateRangeParser parser = new DateRangeParser(new ConqueryConfig()); - List.of(CDateRange.of(QuarterUtils.getFirstDayOfQuarter(2011,1), QuarterUtils.getLastDayOfQuarter(2011,1))) + List.of(CDateRange.of(QuarterUtils.getFirstDayOfQuarter(2011, 1), QuarterUtils.getLastDayOfQuarter(2011, 1))) .forEach(parser::addLine); assertThat(parser.decideType()).isInstanceOf(QuarterDateRangeStore.class); diff --git a/backend/src/test/java/com/bakdata/conquery/models/preproc/parser/specific/StringParserTest.java b/backend/src/test/java/com/bakdata/conquery/models/preproc/parser/specific/StringParserTest.java deleted file mode 100644 index 0043f8a20b..0000000000 --- a/backend/src/test/java/com/bakdata/conquery/models/preproc/parser/specific/StringParserTest.java +++ /dev/null @@ -1,14 +0,0 @@ -package com.bakdata.conquery.models.preproc.parser.specific; - -import static org.assertj.core.api.Assertions.assertThat; - -import org.junit.jupiter.api.Test; - - -class StringParserTest { - @Test - public void isOnlyDigits() { - assertThat(StringParser.isOnlyDigits("01")).isFalse(); - } - -} \ No newline at end of file diff --git a/backend/src/test/java/com/bakdata/conquery/service/FilterSearchTest.java b/backend/src/test/java/com/bakdata/conquery/service/FilterSearchTest.java new file mode 100644 index 0000000000..825c13df7c --- /dev/null +++ b/backend/src/test/java/com/bakdata/conquery/service/FilterSearchTest.java @@ -0,0 +1,70 @@ +package com.bakdata.conquery.service; + +import static org.assertj.core.api.Assertions.assertThat; + +import java.util.List; +import java.util.Map; + +import com.bakdata.conquery.models.config.IndexConfig; +import com.bakdata.conquery.models.datasets.Column; +import com.bakdata.conquery.models.datasets.Dataset; +import com.bakdata.conquery.models.datasets.Table; +import com.bakdata.conquery.models.datasets.concepts.filters.specific.SelectFilter; +import com.bakdata.conquery.models.datasets.concepts.filters.specific.SingleSelectFilter; +import com.bakdata.conquery.models.datasets.concepts.tree.ConceptTreeConnector; +import com.bakdata.conquery.models.datasets.concepts.tree.TreeConcept; +import com.bakdata.conquery.models.query.FilterSearch; +import com.google.common.collect.ImmutableBiMap; +import org.junit.jupiter.api.Test; + +public class FilterSearchTest { + + @Test + public void totals() { + final IndexConfig indexConfig = new IndexConfig(); + FilterSearch search = new FilterSearch(indexConfig); + + // Column Searchable + SelectFilter filter = new SingleSelectFilter(); + ConceptTreeConnector connector = new ConceptTreeConnector(); + TreeConcept concept = new TreeConcept(); + Column column = new Column(); + Table table = new Table(); + Dataset dataset = new Dataset("test_dataset"); + + table.setName("test_table"); + table.setDataset(dataset); + concept.setDataset(dataset); + concept.setName("test_concept"); + concept.setConnectors(List.of(connector)); + connector.setName("test_connector"); + connector.setFilters(List.of(filter)); + connector.setConcept(concept); + column.setTable(table); + column.setName("test_column"); + filter.setColumn(column); + filter.setConnector(connector); + + + // Map Searchable + filter.setLabels(ImmutableBiMap.of( + "mm", "MM", + "nn", "NN" + )); + + // Register + filter.getSearchReferences().forEach(searchable -> { + search.addSearches(Map.of(searchable, searchable.createTrieSearch(indexConfig))); + }); + + search.registerValues(column, List.of( + "a", + "bb", + "cc", + "mm" + )); + search.shrinkSearch(column); + + assertThat(search.getTotal(filter)).isEqualTo(5); + } +} diff --git a/backend/src/test/java/com/bakdata/conquery/sql/conversion/cqelement/concept/ConnectorTablesTest.java b/backend/src/test/java/com/bakdata/conquery/sql/conversion/cqelement/concept/ConnectorTablesTest.java deleted file mode 100644 index 2b28824748..0000000000 --- a/backend/src/test/java/com/bakdata/conquery/sql/conversion/cqelement/concept/ConnectorTablesTest.java +++ /dev/null @@ -1,87 +0,0 @@ -package com.bakdata.conquery.sql.conversion.cqelement.concept; - -import java.util.Set; -import java.util.stream.Collectors; -import java.util.stream.Stream; - -import com.bakdata.conquery.sql.conversion.model.NameGenerator; -import com.bakdata.conquery.sql.conversion.model.SqlTables; -import org.junit.jupiter.api.Assertions; -import org.junit.jupiter.params.ParameterizedTest; -import org.junit.jupiter.params.provider.Arguments; -import org.junit.jupiter.params.provider.MethodSource; - -class ConnectorTablesTest { - - private static final String CONCEPT_LABEL = "foo"; - private static final String ROOT_TABLE = "root"; - public static final int NAME_MAX_LENGTH = 127; - private static final NameGenerator NAME_GENERATOR = new NameGenerator(NAME_MAX_LENGTH); - - private static class TestSqlTables extends SqlTables { - - public static Set MANDATORY_STEPS = Set.of( - ConnectorCteStep.PREPROCESSING, - ConnectorCteStep.AGGREGATION_SELECT, - ConnectorCteStep.AGGREGATION_FILTER - ); - - public TestSqlTables(String nodeLabel, Set requiredSteps, String rootTableName, NameGenerator nameGenerator) { - super(nodeLabel, requiredSteps, rootTableName, nameGenerator); - } - } - - @ParameterizedTest - @MethodSource("requiredStepsProvider") - public void getPredecessorTableName(Set requiredSteps, ConnectorCteStep step, String expectedPredecessorTableName) { - TestSqlTables connectorTables = new TestSqlTables(CONCEPT_LABEL, requiredSteps, ROOT_TABLE, NAME_GENERATOR); - Assertions.assertEquals( - expectedPredecessorTableName, - connectorTables.getPredecessor(step) - ); - } - - public static Stream requiredStepsProvider() { - return Stream.of( - - // AGGREGATION_SELECT and FINAL direct predecessors missing - Arguments.of(TestSqlTables.MANDATORY_STEPS, ConnectorCteStep.PREPROCESSING, ROOT_TABLE), - Arguments.of(TestSqlTables.MANDATORY_STEPS, ConnectorCteStep.EVENT_FILTER, ConnectorCteStep.PREPROCESSING.cteName(CONCEPT_LABEL)), - Arguments.of(TestSqlTables.MANDATORY_STEPS, ConnectorCteStep.AGGREGATION_SELECT, ConnectorCteStep.PREPROCESSING.cteName(CONCEPT_LABEL)), - Arguments.of(TestSqlTables.MANDATORY_STEPS, ConnectorCteStep.AGGREGATION_FILTER, ConnectorCteStep.AGGREGATION_SELECT.cteName(CONCEPT_LABEL)), - - // only FINAL direct predecessor missing - Arguments.of( - withAdditionalSteps(Set.of(ConnectorCteStep.EVENT_FILTER)), - ConnectorCteStep.AGGREGATION_SELECT, - ConnectorCteStep.EVENT_FILTER.cteName(CONCEPT_LABEL) - ), - - // only AGGREGATION_SELECT direct predecessor missing - Arguments.of( - withAdditionalSteps(Set.of(ConnectorCteStep.AGGREGATION_FILTER)), - ConnectorCteStep.AGGREGATION_FILTER, - ConnectorCteStep.AGGREGATION_SELECT.cteName(CONCEPT_LABEL) - ), - - // more than 1 predecessor missing of FINAL - Arguments.of( - Set.of(ConnectorCteStep.PREPROCESSING, ConnectorCteStep.AGGREGATION_FILTER), - ConnectorCteStep.AGGREGATION_FILTER, - ConnectorCteStep.PREPROCESSING.cteName(CONCEPT_LABEL) - ), - - // all predecessors missing of FINAL - Arguments.of( - Set.of(ConnectorCteStep.AGGREGATION_FILTER), - ConnectorCteStep.AGGREGATION_FILTER, - ROOT_TABLE - ) - ); - } - - private static Set withAdditionalSteps(Set additionalSteps) { - return Stream.concat(TestSqlTables.MANDATORY_STEPS.stream(), additionalSteps.stream()).collect(Collectors.toSet()); - } - -} diff --git a/backend/src/test/java/com/bakdata/conquery/sql/conversion/model/NameGeneratorTest.java b/backend/src/test/java/com/bakdata/conquery/sql/conversion/model/NameGeneratorTest.java deleted file mode 100644 index c14fea6676..0000000000 --- a/backend/src/test/java/com/bakdata/conquery/sql/conversion/model/NameGeneratorTest.java +++ /dev/null @@ -1,138 +0,0 @@ -package com.bakdata.conquery.sql.conversion.model; - -import com.bakdata.conquery.apiv1.query.concept.specific.CQConcept; -import com.bakdata.conquery.models.datasets.concepts.filters.specific.SumFilter; -import com.bakdata.conquery.models.datasets.concepts.select.connector.specific.SumSelect; -import com.bakdata.conquery.models.datasets.concepts.tree.ConceptTreeConnector; -import com.bakdata.conquery.sql.conversion.cqelement.concept.ConnectorCteStep; -import org.junit.jupiter.api.Assertions; -import org.junit.jupiter.api.Test; - -class NameGeneratorTest { - - private static final int NAME_MAX_LENGTH = 63; - - @Test - void cteStepName() { - NameGenerator nameGenerator = new NameGenerator(NAME_MAX_LENGTH); - - Assertions.assertEquals( - "foo-group_filter", - nameGenerator.cteStepName(ConnectorCteStep.AGGREGATION_FILTER, "foo") - ); - - String veryLongCteName = "concept-foobar-fizzbuzz-123_aggregation_select_filter_date_aggregation_cte"; - String actual = nameGenerator.cteStepName(ConnectorCteStep.AGGREGATION_FILTER, veryLongCteName); - Assertions.assertEquals(NAME_MAX_LENGTH, actual.length()); - Assertions.assertEquals("123_aggregation_select_filter_date_aggregation_cte-group_filter", actual); - } - - @Test - void selectName() { - - NameGenerator nameGenerator = new NameGenerator(NAME_MAX_LENGTH); - - SumSelect sumSelect = new SumSelect(); - sumSelect.setName("sum_select"); - Assertions.assertEquals("sum_select-1", nameGenerator.selectName(sumSelect)); - - SumSelect secondSumSelect = new SumSelect(); - secondSumSelect.setName("sum_select"); - Assertions.assertEquals("sum_select-2", nameGenerator.selectName(secondSumSelect)); - - SumFilter sumFilter = new SumFilter<>(); - sumFilter.setName("sum_filter"); - Assertions.assertEquals("sum_filter-1", nameGenerator.selectName(sumFilter)); - - SumSelect sumSelectWithTooLongName = new SumSelect(); - String veryLongSelectLabel = "First Last Random Date of Birth Aggregation Select Distinct By Minus Sum Count"; - sumSelectWithTooLongName.setName(veryLongSelectLabel); - - String actual = nameGenerator.selectName(sumSelectWithTooLongName); - Assertions.assertEquals(NAME_MAX_LENGTH, actual.length()); - Assertions.assertEquals("_date_of_birth_aggregation_select_distinct_by_minus_sum_count-1", actual); - } - - @Test - void joinedNodeName() { - NameGenerator nameGenerator = new NameGenerator(NAME_MAX_LENGTH); - - String firstAnd = nameGenerator.joinedNodeName(LogicalOperation.AND); - Assertions.assertEquals("AND-1", firstAnd); - - String firstOr = nameGenerator.joinedNodeName(LogicalOperation.OR); - Assertions.assertEquals("OR-1", firstOr); - - String secondAnd = nameGenerator.joinedNodeName(LogicalOperation.AND); - Assertions.assertEquals("AND-2", secondAnd); - - String thirdAnd = nameGenerator.joinedNodeName(LogicalOperation.AND); - Assertions.assertEquals("AND-3", thirdAnd); - - String secondOr = nameGenerator.joinedNodeName(LogicalOperation.OR); - Assertions.assertEquals("OR-2", secondOr); - - String thirdOr = nameGenerator.joinedNodeName(LogicalOperation.OR); - Assertions.assertEquals("OR-3", thirdOr); - } - - @Test - void conceptConnectorName() { - - NameGenerator nameGenerator = new NameGenerator(NAME_MAX_LENGTH); - - CQConcept foo = new CQConcept(); - foo.setLabel("foo"); - Assertions.assertEquals( - "concept_foo-1", - nameGenerator.conceptName(foo), - "first concept, count is 1" - ); - - ConceptTreeConnector fooConnector = new ConceptTreeConnector(); - fooConnector.setName("foo connector"); - Assertions.assertEquals( - "concept_foo_foo_connector-1", - nameGenerator.conceptConnectorName(foo, fooConnector), - "first concept, first connector, count is still 1" - ); - - ConceptTreeConnector barConnector = new ConceptTreeConnector(); - barConnector.setName("bar connector"); - Assertions.assertEquals( - "concept_foo_bar_connector-1", - nameGenerator.conceptConnectorName(foo, barConnector), - "first concept, second connector, count is still 1" - ); - - CQConcept bar = new CQConcept(); - bar.setLabel("bar"); - Assertions.assertEquals( - "concept_bar-2", - nameGenerator.conceptName(bar), - "second concept, count is 2" - ); - Assertions.assertEquals( - "concept_bar_bar_connector-2", - nameGenerator.conceptConnectorName(bar, barConnector), - "second concept, second connector, count is 2" - ); - - CQConcept withTooLongLabel = new CQConcept(); - String veryLongConceptLabel = "Verbose Concept label which exceeds the max length and nobody would ever choose I mean seriously"; - withTooLongLabel.setLabel(veryLongConceptLabel); - ConceptTreeConnector connector = new ConceptTreeConnector(); - connector.setName("with-too-long-label-connector"); - Assertions.assertEquals( - "_the_max_length_and_nobody_would_ever_choose_i_mean_seriously-3", - nameGenerator.conceptName(withTooLongLabel), - "third concept, name should be truncated" - ); - Assertions.assertEquals( - "ld_ever_choose_i_mean_seriously_with-too-long-label-connector-3", - nameGenerator.conceptConnectorName(withTooLongLabel, connector), - "label way too long, name should be truncated, counter still 3" - ); - } - -} diff --git a/backend/src/test/java/com/bakdata/conquery/util/NonPersistentStore.java b/backend/src/test/java/com/bakdata/conquery/util/NonPersistentStore.java index c9f901007c..e0f2ed9363 100644 --- a/backend/src/test/java/com/bakdata/conquery/util/NonPersistentStore.java +++ b/backend/src/test/java/com/bakdata/conquery/util/NonPersistentStore.java @@ -49,7 +49,7 @@ public void remove(KEY key) { } @Override - public void fillCache() { + public void loadData() { } @@ -74,7 +74,7 @@ public void clear() { } @Override - public void deleteStore() { + public void removeStore() { clear(); } diff --git a/backend/src/test/java/com/bakdata/conquery/util/NonPersistentStoreFactory.java b/backend/src/test/java/com/bakdata/conquery/util/NonPersistentStoreFactory.java index ca9b74af6c..c9622d73e1 100644 --- a/backend/src/test/java/com/bakdata/conquery/util/NonPersistentStoreFactory.java +++ b/backend/src/test/java/com/bakdata/conquery/util/NonPersistentStoreFactory.java @@ -11,8 +11,8 @@ import com.bakdata.conquery.io.storage.NamespaceStorage; import com.bakdata.conquery.io.storage.StoreMappings; import com.bakdata.conquery.io.storage.WorkerStorage; +import com.bakdata.conquery.io.storage.xodus.stores.CachedStore; import com.bakdata.conquery.io.storage.xodus.stores.SingletonStore; -import com.bakdata.conquery.mode.StorageHandler; import com.bakdata.conquery.models.auth.entities.Group; import com.bakdata.conquery.models.auth.entities.Role; import com.bakdata.conquery.models.auth.entities.User; @@ -24,7 +24,6 @@ import com.bakdata.conquery.models.datasets.Table; import com.bakdata.conquery.models.datasets.concepts.Concept; import com.bakdata.conquery.models.datasets.concepts.StructureNode; -import com.bakdata.conquery.models.dictionary.Dictionary; import com.bakdata.conquery.models.events.Bucket; import com.bakdata.conquery.models.events.CBlock; import com.bakdata.conquery.models.execution.ManagedExecution; @@ -45,7 +44,6 @@ public class NonPersistentStoreFactory implements StoreFactory { private final Map> datasetStores = new ConcurrentHashMap<>(); private final Map, SecondaryIdDescription>> secondaryIdDescriptionStore = new ConcurrentHashMap<>(); private final Map, Table>> tableStore = new ConcurrentHashMap<>(); - private final Map, Dictionary>> dictionaryStore = new ConcurrentHashMap<>(); private final Map>, Concept>> conceptStore = new ConcurrentHashMap<>(); private final Map, Import>> importStore = new ConcurrentHashMap<>(); private final Map, CBlock>> cBlockStore = new ConcurrentHashMap<>(); @@ -65,11 +63,11 @@ public class NonPersistentStoreFactory implements StoreFactory { private final Map, User>> userStore = new ConcurrentHashMap<>(); private final Map, Role>> roleStore = new ConcurrentHashMap<>(); private final Map, Group>> groupStore = new ConcurrentHashMap<>(); - private final Map> primaryDictionaryStoreStore = new ConcurrentHashMap<>(); + private final Map> entity2Bucket = new ConcurrentHashMap<>(); @Override - public Collection discoverNamespaceStorages(StorageHandler storageHandler) { + public Collection discoverNamespaceStorages() { return Collections.emptyList(); } @@ -105,13 +103,13 @@ public SingletonStore createPreviewStore(String pathName, Central } @Override - public IdentifiableStore createTableStore(CentralRegistry centralRegistry, String pathName, ObjectMapper objectMapper) { - return StoreMappings.identifiable(tableStore.computeIfAbsent(pathName, n -> new NonPersistentStore<>()), centralRegistry); + public CachedStore createEntity2BucketStore(String pathName, ObjectMapper objectMapper) { + return StoreMappings.cached(entity2Bucket.computeIfAbsent(pathName, ignored -> new NonPersistentStore<>())); } @Override - public IdentifiableStore createDictionaryStore(CentralRegistry centralRegistry, String pathName, ObjectMapper objectMapper) { - return StoreMappings.identifiable(dictionaryStore.computeIfAbsent(pathName, n -> new NonPersistentStore<>()), centralRegistry); + public IdentifiableStore
    createTableStore(CentralRegistry centralRegistry, String pathName, ObjectMapper objectMapper) { + return StoreMappings.identifiable(tableStore.computeIfAbsent(pathName, n -> new NonPersistentStore<>()), centralRegistry); } @Override @@ -179,11 +177,6 @@ public IdentifiableStore createGroupStore(CentralRegistry centralRegistry return StoreMappings.identifiable(groupStore.computeIfAbsent(pathName, n -> new NonPersistentStore<>()), centralRegistry); } - @Override - public SingletonStore createPrimaryDictionaryStore(String pathName, CentralRegistry centralRegistry, ObjectMapper objectMapper) { - return StoreMappings.singleton(primaryDictionaryStoreStore.computeIfAbsent(pathName, n -> new NonPersistentStore<>())); - } - /** * @implNote intended for Unit-tests */ diff --git a/backend/src/test/java/com/bakdata/conquery/util/dict/SuccinctTrieBenchmark.java b/backend/src/test/java/com/bakdata/conquery/util/dict/SuccinctTrieBenchmark.java deleted file mode 100644 index b5323813cc..0000000000 --- a/backend/src/test/java/com/bakdata/conquery/util/dict/SuccinctTrieBenchmark.java +++ /dev/null @@ -1,43 +0,0 @@ - -package com.bakdata.conquery.util.dict; - -import java.io.IOException; -import java.nio.charset.StandardCharsets; -import java.util.Arrays; -import java.util.List; -import java.util.stream.Collectors; - -import com.bakdata.conquery.models.datasets.Dataset; -import com.github.powerlibraries.io.In; -import lombok.extern.slf4j.Slf4j; -import org.apache.commons.lang3.ArrayUtils; -import org.junit.jupiter.params.provider.Arguments; - -@Slf4j -public class SuccinctTrieBenchmark { - - public static List data() throws IOException { - List list = In.resource(SuccinctTrieBenchmark.class, "SuccinctTrieTest.data").streamLines().map(v->v.getBytes(StandardCharsets.UTF_8)).collect(Collectors.toList()); - int size = list.size(); - for(int i=0;i<60;i++) { - for(int j=0;jasList( - - Arguments.of( - "succinct", - new SuccinctTrie(Dataset.PLACEHOLDER, "name"), - list - ) - ); - } - - //@ParameterizedTest(name = "{0}") @MethodSource("data") - public void test(String name, SuccinctTrie trie, List data) throws IOException { - data.forEach(trie::add); - trie.compress(); - } -} \ No newline at end of file diff --git a/backend/src/test/java/com/bakdata/conquery/util/dict/SuccinctTrieTest.java b/backend/src/test/java/com/bakdata/conquery/util/dict/SuccinctTrieTest.java deleted file mode 100644 index b24f45fe8e..0000000000 --- a/backend/src/test/java/com/bakdata/conquery/util/dict/SuccinctTrieTest.java +++ /dev/null @@ -1,106 +0,0 @@ -package com.bakdata.conquery.util.dict; - -import static org.assertj.core.api.Assertions.assertThat; - -import java.util.ArrayList; -import java.util.List; -import java.util.Random; -import java.util.concurrent.atomic.AtomicInteger; -import java.util.stream.IntStream; - -import com.bakdata.conquery.models.datasets.Dataset; -import com.bakdata.conquery.models.dictionary.DictionaryEntry; -import com.bakdata.conquery.models.dictionary.EncodedDictionary; -import com.bakdata.conquery.models.events.stores.specific.string.EncodedStringStore; -import com.google.common.collect.BiMap; -import com.google.common.collect.HashBiMap; -import lombok.extern.slf4j.Slf4j; -import org.junit.jupiter.api.Test; -import org.junit.jupiter.params.ParameterizedTest; -import org.junit.jupiter.params.provider.MethodSource; - -@Slf4j -public class SuccinctTrieTest { - - public static long[] getSeeds() { - return new long[]{0L, 7L}; - } - - - @Test - public void assertionTest() { - List words = new ArrayList(); - words.add("hat"); - words.add("it"); - words.add("is"); - words.add("a"); - words.add("is"); - words.add("ha"); - words.add("hat"); - - SuccinctTrie direct = new SuccinctTrie(Dataset.PLACEHOLDER, "name"); - - - int distinctValues = 0; - for (String entry : words) { - int id = direct.put(entry.getBytes()); - if (id > distinctValues) { - distinctValues++; - } - } - - direct.compress(); - - assertThat(direct.iterator().next()).isEqualTo(new DictionaryEntry(0, "hat".getBytes())); - - assertThat(direct.getElement(0)).isEqualTo("hat".getBytes()); - assertThat(direct.getElement(1)).isEqualTo("it".getBytes()); - assertThat(direct.getElement(2)).isEqualTo("is".getBytes()); - assertThat(direct.getElement(3)).isEqualTo("a".getBytes()); - assertThat(direct.getId("is".getBytes())).isEqualTo(2); - assertThat(direct.getId("ha".getBytes())).isEqualTo(4); - assertThat(direct.getId("h".getBytes())).isEqualTo(-1); - } - - @ParameterizedTest(name = "seed: {0}") - @MethodSource("getSeeds") - public void valid(long seed) { - final SuccinctTrie dict = new SuccinctTrie(Dataset.PLACEHOLDER, "name"); - EncodedDictionary direct = new EncodedDictionary(dict, EncodedStringStore.Encoding.UTF8); - final BiMap reference = HashBiMap.create(); - - AtomicInteger count = new AtomicInteger(0); - - Random random = new Random(seed); - - IntStream - .range(0, 8192) - .boxed() - .sorted(TreeTestUtil.shuffle(random)) - .forEach(rep -> { - final String prefix = Integer.toString(rep, 26); - - reference.put(prefix, count.get()); - dict.add(prefix.getBytes()); - count.incrementAndGet(); - }); - - log.info("structure build"); - dict.compress(); - log.info("trie compressed"); - //assert key value lookup - assertThat(reference.entrySet().stream()).allSatisfy(entry -> { - assertThat(direct.getId(entry.getKey())) - .isEqualTo(entry.getValue()); - }); - - log.info("forward lookup done"); - - //assert reverse lookup - assertThat(reference.inverse().entrySet().stream()).allSatisfy(entry -> { - assertThat(dict.getElement(entry.getKey())) - .isEqualTo(entry.getValue().getBytes()); - }); - log.info("reverse lookup done"); - } -} \ No newline at end of file diff --git a/backend/src/test/resources/com/bakdata/conquery/util/dict/SuccinctTrieTest.data b/backend/src/test/resources/com/bakdata/conquery/util/dict/SuccinctTrieTest.data deleted file mode 100644 index 7d736f68e2..0000000000 --- a/backend/src/test/resources/com/bakdata/conquery/util/dict/SuccinctTrieTest.data +++ /dev/null @@ -1,625 +0,0 @@ -2 -1080 -&c -10-point -10th -11-point -12-point -16-point -18-point -1st -2,4,5-t -2,4-d -20-point -2D -2nd -30-30 -3D -3-D -3M -3rd -48-point -4-D -4GL -4H -4th -5-point -5-T -5th -6-point -6th -7-point -7th -8-point -8th -9-point -9th -a -a' -a- -A&M -A&P -A. -A.A.A. -A.B. -A.B.A. -A.C. -A.D. -A.D.C. -A.F. -A.F.A.M. -A.G. -A.H. -A.I. -A.I.A. -A.I.D. -A.L. -A.L.P. -A.M. -A.M.A. -A.M.D.G. -A.N. -a.p. -a.r. -A.R.C.S. -A.U. -A.U.C. -A.V. -a.w. -A.W.O.L. -A/C -A/F -A/O -A/P -A/V -A1 -A-1 -A4 -A5 -AA -AAA -AAAA -AAAAAA -AAAL -AAAS -Aaberg -Aachen -AAE -AAEE -AAF -AAG -aah -aahed -aahing -aahs -AAII -aal -Aalborg -Aalesund -aalii -aaliis -aals -Aalst -Aalto -AAM -AAMSI -Aandahl -A-and-R -Aani -AAO -AAP -AAPSS -Aaqbiye -Aar -Aara -Aarau -AARC -aardvark -aardvarks -aardwolf -aardwolves -Aaren -Aargau -aargh -Aarhus -Aarika -Aaron -Aaronic -Aaronical -Aaronite -Aaronitic -Aaron's-beard -Aaronsburg -Aaronson -AARP -aarrgh -aarrghh -Aaru -AAS -A'asia -aasvogel -aasvogels -AAU -AAUP -AAUW -AAVSO -AAX -A-axes -A-axis -AB -ab- -ABA -Ababa -Ababdeh -Ababua -abac -abaca -abacay -abacas -abacate -abacaxi -abaci -abacinate -abacination -abacisci -abaciscus -abacist -aback -abacli -Abaco -abacot -abacterial -abactinal -abactinally -abaction -abactor -abaculi -abaculus -abacus -abacuses -Abad -abada -Abadan -Abaddon -abadejo -abadengo -abadia -Abadite -abaff -abaft -Abagael -Abagail -Abagtha -abay -abayah -Abailard -abaisance -abaised -abaiser -abaisse -abaissed -abaka -Abakan -abakas -Abakumov -abalation -abalienate -abalienated -abalienating -abalienation -abalone -abalones -Abama -abamp -abampere -abamperes -abamps -Abana -aband -abandon -abandonable -abandoned -abandonedly -abandonee -abandoner -abandoners -abandoning -abandonment -abandonments -abandons -abandum -abanet -abanga -Abanic -abannition -Abantes -abapical -abaptiston -abaptistum -Abarambo -Abarbarea -Abaris -abarthrosis -abarticular -abarticulation -Abas -abase -abased -abasedly -abasedness -abasement -abasements -abaser -abasers -abases -Abasgi -abash -abashed -abashedly -abashedness -abashes -abashing -abashless -abashlessly -abashment -abashments -abasia -abasias -abasic -abasing -abasio -abask -abassi -Abassieh -Abassin -abastard -abastardize -abastral -abatable -abatage -Abate -abated -abatement -abatements -abater -abaters -abates -abatic -abating -abatis -abatised -abatises -abatjour -abatjours -abaton -abator -abators -ABATS -abattage -abattis -abattised -abattises -abattoir -abattoirs -abattu -abattue -Abatua -abature -abaue -abave -abaxial -abaxile -abaze -abb -Abba -abbacy -abbacies -abbacomes -Abbadide -Abbai -abbaye -abbandono -abbas -abbasi -Abbasid -abbassi -Abbassid -Abbasside -Abbate -abbatial -abbatical -abbatie -Abbe -Abbey -abbeys -abbey's -abbeystead -abbeystede -abbes -abbess -abbesses -abbest -Abbevilean -Abbeville -Abbevillian -Abbi -Abby -Abbie -Abbye -Abbyville -abboccato -abbogada -Abbot -abbotcy -abbotcies -abbotnullius -abbotric -abbots -abbot's -Abbotsen -Abbotsford -abbotship -abbotships -Abbotson -Abbotsun -Abbott -Abbottson -Abbottstown -Abboud -abbozzo -ABBR -abbrev -abbreviatable -abbreviate -abbreviated -abbreviately -abbreviates -abbreviating -abbreviation -abbreviations -abbreviator -abbreviatory -abbreviators -abbreviature -abbroachment -ABC -abcess -abcissa -abcoulomb -ABCs -abd -abdal -abdali -abdaria -abdat -Abdel -Abd-el-Kadir -Abd-el-Krim -Abdella -Abderhalden -Abderian -Abderite -Abderus -abdest -Abdias -abdicable -abdicant -abdicate -abdicated -abdicates -abdicating -abdication -abdications -abdicative -abdicator -Abdiel -abditive -abditory -abdom -abdomen -abdomens -abdomen's -abdomina -abdominal -Abdominales -abdominalia -abdominalian -abdominally -abdominals -abdominoanterior -abdominocardiac -abdominocentesis -abdominocystic -abdominogenital -abdominohysterectomy -abdominohysterotomy -abdominoposterior -abdominoscope -abdominoscopy -abdominothoracic -abdominous -abdomino-uterotomy -abdominovaginal -abdominovesical -Abdon -Abdu -abduce -abduced -abducens -abducent -abducentes -abduces -abducing -abduct -abducted -abducting -abduction -abductions -abduction's -abductor -abductores -abductors -abductor's -abducts -Abdul -Abdul-Aziz -Abdul-baha -Abdulla -Abe -a-be -abeam -abear -abearance -Abebi -abecedaire -abecedary -abecedaria -abecedarian -abecedarians -abecedaries -abecedarium -abecedarius -abed -abede -abedge -Abednego -abegge -Abey -abeyance -abeyances -abeyancy -abeyancies -abeyant -abeigh -ABEL -Abelard -abele -abeles -Abelia -Abelian -Abelicea -Abelite -Abell -Abelmoschus -abelmosk -abelmosks -abelmusk -Abelonian -Abelson -abeltree -Abencerrages -abend -abends -Abenezra -abenteric -Abeokuta -abepithymia -ABEPP -Abercromby -Abercrombie -Aberdare -aberdavine -Aberdeen -Aberdeenshire -aberdevine -Aberdonian -aberduvine -Aberfan -Aberglaube -Aberia -Aberystwyth -Abernant -Abernathy -abernethy -Abernon -aberr -aberrance -aberrancy -aberrancies -aberrant -aberrantly -aberrants -aberrate -aberrated -aberrating -aberration -aberrational -aberrations -aberrative -aberrator -aberrometer -aberroscope -Abert -aberuncate -aberuncator -abesse -abessive -abet -abetment -abetments -abets -abettal -abettals -abetted -abetter -abetters -abetting -abettor -abettors -Abeu -abevacuation -abfarad -abfarads -ABFM -Abgatha -ABHC -abhenry -abhenries -abhenrys -abhinaya -abhiseka -abhominable -abhor -abhorred -abhorrence -abhorrences -abhorrency -abhorrent -abhorrently -abhorrer -abhorrers -abhorrible -abhorring -abhors -Abhorson -ABI -aby -Abia -Abiathar -Abib -abichite -abidal -abidance -abidances -abidden -abide -abided -abider -abiders -abides -abidi -abiding -abidingly -abidingness -Abidjan -Abydos -Abie -abye -abied -abyed -abiegh -abience -abient -Abies -abyes -abietate \ No newline at end of file diff --git a/backend/src/test/resources/tests/endpoints/adminEndpointInfo.json b/backend/src/test/resources/tests/endpoints/adminEndpointInfo.json index ed247b1471..c591f1ef5a 100644 --- a/backend/src/test/resources/tests/endpoints/adminEndpointInfo.json +++ b/backend/src/test/resources/tests/endpoints/adminEndpointInfo.json @@ -284,11 +284,6 @@ "path": "/users/{userId}/roles/{roleId}", "clazz": "UserResource" }, - { - "method": "GET", - "path": "/logout", - "clazz": "AdminResource" - }, { "method": "GET", "path": "/jobs/", diff --git a/backend/src/test/resources/tests/endpoints/adminUIEndpointInfo.json b/backend/src/test/resources/tests/endpoints/adminUIEndpointInfo.json index 319b865e61..268380bc0f 100644 --- a/backend/src/test/resources/tests/endpoints/adminUIEndpointInfo.json +++ b/backend/src/test/resources/tests/endpoints/adminUIEndpointInfo.json @@ -93,5 +93,10 @@ "method": "GET", "path": "/index-service", "clazz": "IndexServiceUIResource" + }, + { + "method": "GET", + "path": "/logout", + "clazz": "AdminUIResource" } ] \ No newline at end of file diff --git a/backend/src/test/resources/tests/query/MULTIPLE_TABLES_ICD_QUERY2/MULTIPLE_TABLES_ICD_KH_AU_QUERY_ohneFilter.test.json b/backend/src/test/resources/tests/query/MULTIPLE_TABLES_ICD_QUERY2/MULTIPLE_TABLES_ICD_KH_AU_QUERY_ohneFilter.test.json index 5ab3b1c633..5236855788 100644 --- a/backend/src/test/resources/tests/query/MULTIPLE_TABLES_ICD_QUERY2/MULTIPLE_TABLES_ICD_KH_AU_QUERY_ohneFilter.test.json +++ b/backend/src/test/resources/tests/query/MULTIPLE_TABLES_ICD_QUERY2/MULTIPLE_TABLES_ICD_KH_AU_QUERY_ohneFilter.test.json @@ -102,8 +102,7 @@ "columns": [ { "name": "icd_code", - "type": "STRING", - "sharedDictionary": "icd" + "type": "STRING" }, { "name": "entlassungsdatum", @@ -121,8 +120,7 @@ "columns": [ { "name": "icd_code", - "type": "STRING", - "sharedDictionary": "icd" + "type": "STRING" }, { "name": "au_beginn", diff --git a/backend/src/test/resources/tests/sql/date_restriction/daterange/content.csv b/backend/src/test/resources/tests/sql/date_restriction/daterange/content.csv new file mode 100644 index 0000000000..978380e1e8 --- /dev/null +++ b/backend/src/test/resources/tests/sql/date_restriction/daterange/content.csv @@ -0,0 +1,9 @@ +pid,datum_start,datum_end +1,2012-06-30,2015-06-30 +2,2012-06-30,2015-06-30 +3,2012-02-03,2012-06-30 +4,2010-06-30,2015-06-30 +5,2011-04-30,2014-06-30 +6,2015-06-30,2016-06-30 +7,2014-04-30,2015-06-30 +8,2012-04-30,2014-06-30 diff --git a/backend/src/test/resources/tests/sql/date_restriction/daterange/daterange_column.spec.json b/backend/src/test/resources/tests/sql/date_restriction/daterange/daterange_column.spec.json new file mode 100644 index 0000000000..4cdf8a4176 --- /dev/null +++ b/backend/src/test/resources/tests/sql/date_restriction/daterange/daterange_column.spec.json @@ -0,0 +1,69 @@ +{ + "type": "QUERY_TEST", + "sqlSpec": { + "isEnabled": true + }, + "label": "Date restriction with a validity date with start and end column", + "expectedCsv": "tests/sql/date_restriction/daterange/expected.csv", + "query": { + "type": "CONCEPT_QUERY", + "root": { + "type": "DATE_RESTRICTION", + "dateRange": { + "min": "2012-01-01", + "max": "2012-12-31" + }, + "child": { + "ids": [ + "geschlecht_select" + ], + "type": "CONCEPT", + "label": "Geschlecht SELECT", + "tables": [ + { + "id": "geschlecht_select.geschlecht_connector" + } + ] + } + } + }, + "concepts": [ + { + "label": "geschlecht_select", + "type": "TREE", + "connectors": [ + { + "label": "geschlecht_connector", + "table": "table1", + "validityDates": { + "label": "datum", + "startColumn": "table1.datum_start", + "endColumn": "table1.datum_end" + } + } + ] + } + ], + "content": { + "tables": [ + { + "csv": "tests/sql/date_restriction/daterange/content.csv", + "name": "table1", + "primaryColumn": { + "name": "pid", + "type": "STRING" + }, + "columns": [ + { + "name": "datum_start", + "type": "DATE" + }, + { + "name": "datum_end", + "type": "DATE" + } + ] + } + ] + } +} diff --git a/backend/src/test/resources/tests/sql/date_restriction/daterange/expected.csv b/backend/src/test/resources/tests/sql/date_restriction/daterange/expected.csv new file mode 100644 index 0000000000..8561aaa17a --- /dev/null +++ b/backend/src/test/resources/tests/sql/date_restriction/daterange/expected.csv @@ -0,0 +1,7 @@ +result,dates +1,{2012-06-30/2012-12-31} +2,{2012-06-30/2012-12-31} +3,{2012-02-03/2012-06-30} +4,{2012-01-01/2012-12-31} +5,{2012-01-01/2012-12-31} +8,{2012-04-30/2012-12-31} diff --git a/backend/src/test/resources/tests/sql/date_restriction/postgres_daterange/expected.csv b/backend/src/test/resources/tests/sql/date_restriction/postgres_daterange/expected.csv index f382526dda..8561aaa17a 100644 --- a/backend/src/test/resources/tests/sql/date_restriction/postgres_daterange/expected.csv +++ b/backend/src/test/resources/tests/sql/date_restriction/postgres_daterange/expected.csv @@ -1,7 +1,7 @@ result,dates -2,{2012-06-30/2015-06-29} -4,{2010-06-30/2015-06-29} -3,{2012-02-03/2012-06-29} -5,{2011-04-30/2014-06-29} -1,{2012-06-30/2015-06-29} -8,{2012-04-30/2014-06-29} +1,{2012-06-30/2012-12-31} +2,{2012-06-30/2012-12-31} +3,{2012-02-03/2012-06-30} +4,{2012-01-01/2012-12-31} +5,{2012-01-01/2012-12-31} +8,{2012-04-30/2012-12-31} diff --git a/backend/src/test/resources/tests/sql/multiple_tables/multiple_tables.spec.json b/backend/src/test/resources/tests/sql/multiple_tables/multiple_tables.spec.json index 886be63183..f1d660abfc 100644 --- a/backend/src/test/resources/tests/sql/multiple_tables/multiple_tables.spec.json +++ b/backend/src/test/resources/tests/sql/multiple_tables/multiple_tables.spec.json @@ -105,8 +105,7 @@ "columns": [ { "name": "icd_code", - "type": "STRING", - "sharedDictionary": "icd" + "type": "STRING" }, { "name": "entlassungsdatum", @@ -124,8 +123,7 @@ "columns": [ { "name": "icd_code", - "type": "STRING", - "sharedDictionary": "icd" + "type": "STRING" }, { "name": "au_beginn", diff --git a/backend/src/test/resources/tests/sql/secondary_id/SECONDARY_IDS.test.json b/backend/src/test/resources/tests/sql/secondary_id/SECONDARY_IDS.test.json new file mode 100644 index 0000000000..bbdc441bd2 --- /dev/null +++ b/backend/src/test/resources/tests/sql/secondary_id/SECONDARY_IDS.test.json @@ -0,0 +1,145 @@ +{ + "type": "QUERY_TEST", + "label": "SECONDARY_ID Test", + "expectedCsv": "tests/sql/secondary_id/expected.csv", + "query": { + "type": "SECONDARY_ID_QUERY", + "secondaryId": "secondary", + "root": { + "ids": [ + "number" + ], + "type": "CONCEPT", + "excludeFromSecondaryId": false, + "tables": [ + { + "id": "number.number_connector", + "filters": [ + { + "filter": "number.number_connector.value", + "type": "REAL_RANGE", + "value": { + "min": 1, + "max": 1 + } + } + ] + }, + { + "id": "number.number_connector2", + "filters": [ + { + "filter": "number.number_connector2.value", + "type": "REAL_RANGE", + "value": { + "min": 1, + "max": 1 + } + } + ] + } + ] + } + }, + "concepts": [ + { + "name": "number", + "type": "TREE", + "connectors": [ + { + "name": "number_connector", + "table": "table1", + "validityDates": { + "label": "datum", + "startColumn": "table1.datum_start", + "endColumn": "table1.datum_end" + }, + "filters": { + "label": "value", + "description": "xy", + "column": "table1.value", + "type": "NUMBER" + } + }, + { + "name": "number_connector2", + "table": "table2", + "validityDates": { + "label": "datum", + "startColumn": "table2.datum_start", + "endColumn": "table2.datum_end" + }, + "filters": { + "label": "value", + "description": "xy", + "column": "table2.value", + "type": "NUMBER" + } + } + ] + } + ], + "content": { + "secondaryIds": [ + { + "name": "secondary" + } + ], + "tables": [ + { + "csv": "tests/sql/secondary_id/content.csv", + "name": "table1", + "primaryColumn": { + "name": "pid", + "type": "STRING" + }, + "columns": [ + { + "name": "sid", + "type": "STRING", + "secondaryId": "secondary" + }, + { + "name": "value", + "type": "REAL" + }, + { + "name": "datum_start", + "type": "DATE" + }, + { + "name": "datum_end", + "type": "DATE" + } + ] + }, + { + "csv": "tests/sql/secondary_id/content2.csv", + "name": "table2", + "primaryColumn": { + "name": "pid", + "type": "STRING" + }, + "columns": [ + { + "name": "sid", + "type": "STRING", + "secondaryId": "secondary" + }, + { + "name": "value", + "type": "REAL" + }, + { + "name": "datum_start", + "type": "DATE" + }, + { + "name": "datum_end", + "type": "DATE" + } + ] + } + ] + } +} diff --git a/backend/src/test/resources/tests/sql/secondary_id/content.csv b/backend/src/test/resources/tests/sql/secondary_id/content.csv new file mode 100644 index 0000000000..2b33b32208 --- /dev/null +++ b/backend/src/test/resources/tests/sql/secondary_id/content.csv @@ -0,0 +1,7 @@ +pid,sid,value,datum_start,datum_end +a,f_a1,1,2014-06-30,2015-06-30 +a,f_a1,1,2016-06-30,2016-06-30 +a,f_a2,1,2014-06-30,2015-06-30 +a,,1,2010-06-30,2010-06-30 +a,f_a3,1.01,2014-06-30,2015-06-30 +b,f_b1,1,2015-02-03,2015-06-30 diff --git a/backend/src/test/resources/tests/sql/secondary_id/content2.csv b/backend/src/test/resources/tests/sql/secondary_id/content2.csv new file mode 100644 index 0000000000..fb989445e9 --- /dev/null +++ b/backend/src/test/resources/tests/sql/secondary_id/content2.csv @@ -0,0 +1,7 @@ +pid,sid,value,datum_start,datum_end +a,f_a4,1,2024-06-30,2025-06-30 +a,f_a4,1,2026-06-30,2026-06-30 +a,f_a4,1,2024-06-30,2025-06-30 +a,,13,2020-06-30,2020-06-30 +a,f_a5,1.01,2024-06-30,2025-06-30 +b,f_b6,1,2025-02-03,2025-06-30 diff --git a/backend/src/test/resources/tests/sql/secondary_id/date_mode_logical/SECONDARY_IDS.test.json b/backend/src/test/resources/tests/sql/secondary_id/date_mode_logical/SECONDARY_IDS.test.json new file mode 100644 index 0000000000..87ccc5a412 --- /dev/null +++ b/backend/src/test/resources/tests/sql/secondary_id/date_mode_logical/SECONDARY_IDS.test.json @@ -0,0 +1,165 @@ +{ + "type": "QUERY_TEST", + "label": "SECONDARY_ID Test", + "expectedCsv": "tests/sql/secondary_id/date_mode_logical/expected.csv", + "query": { + "type": "SECONDARY_ID_QUERY", + "secondaryId": "secondary", + "dateAggregationMode": "LOGICAL", + "root": { + "type": "AND", + "children": [ + { + "ids": [ + "number" + ], + "type": "CONCEPT", + "excludeFromSecondaryId": false, + "label": "vs", + "tables": [ + { + "id": "number.number_connector", + "filters": [ + { + "filter": "number.number_connector.value", + "type": "REAL_RANGE", + "value": { + "min": 1, + "max": 1 + } + } + ] + } + ] + }, + { + "ids": [ + "number" + ], + "type": "CONCEPT", + "excludeFromSecondaryId": false, + "label": "vs", + "tables": [ + { + "id": "number.number_connector2", + "filters": [ + { + "filter": "number.number_connector2.value", + "type": "REAL_RANGE", + "value": { + "min": 1, + "max": 1 + } + } + ] + } + ] + } + ] + } + }, + "concepts": [ + { + "label": "number", + "type": "TREE", + "connectors": [ + { + "label": "number_connector", + "table": "table1", + "validityDates": { + "label": "datum", + "startColumn": "table1.datum_start", + "endColumn": "table1.datum_end" + }, + "filters": { + "label": "value", + "description": "xy", + "column": "table1.value", + "type": "NUMBER" + } + }, + { + "label": "number_connector2", + "table": "table12", + "validityDates": { + "label": "datum", + "startColumn": "table12.datum_start", + "endColumn": "table12.datum_end" + }, + "filters": { + "label": "value", + "description": "xy", + "column": "table12.value", + "type": "NUMBER" + } + } + ] + } + ], + "content": { + "secondaryIds": [ + { + "name": "secondary" + }, + { + "name": "ignored" + } + ], + "tables": [ + { + "csv": "tests/sql/secondary_id/date_mode_logical/content.csv", + "name": "table1", + "primaryColumn": { + "name": "pid", + "type": "STRING" + }, + "columns": [ + { + "name": "sid", + "type": "STRING", + "secondaryId": "secondary" + }, + { + "name": "value", + "type": "REAL" + }, + { + "name": "datum_start", + "type": "DATE" + }, + { + "name": "datum_end", + "type": "DATE" + } + ] + }, + { + "csv": "tests/sql/secondary_id/date_mode_logical/content2.csv", + "name": "table12", + "primaryColumn": { + "name": "pid", + "type": "STRING" + }, + "columns": [ + { + "name": "sid", + "type": "STRING", + "secondaryId": "secondary" + }, + { + "name": "value", + "type": "REAL" + }, + { + "name": "datum_start", + "type": "DATE" + }, + { + "name": "datum_end", + "type": "DATE" + } + ] + } + ] + } +} diff --git a/backend/src/test/resources/tests/sql/secondary_id/date_mode_logical/content.csv b/backend/src/test/resources/tests/sql/secondary_id/date_mode_logical/content.csv new file mode 100644 index 0000000000..3081ae37de --- /dev/null +++ b/backend/src/test/resources/tests/sql/secondary_id/date_mode_logical/content.csv @@ -0,0 +1,5 @@ +pid,sid,value,datum_start,datum_end +a,f_a1,1,2014-06-30,2015-06-30 +a,f_a1,1,2016-06-30,2027-06-30 +a,f_a2,1,2014-06-30,2015-06-30 +a,,1,2010-06-30,2010-06-30 diff --git a/backend/src/test/resources/tests/sql/secondary_id/date_mode_logical/content2.csv b/backend/src/test/resources/tests/sql/secondary_id/date_mode_logical/content2.csv new file mode 100644 index 0000000000..670e9fc815 --- /dev/null +++ b/backend/src/test/resources/tests/sql/secondary_id/date_mode_logical/content2.csv @@ -0,0 +1,5 @@ +pid,sid,value,datum_start,datum_end +a,f_a1,1,2024-06-30,2025-06-30 +a,f_a1,1,2026-06-30,2026-06-30 +a,f_a2,1,2024-06-30,2025-06-30 +a,,13,2020-06-30,2020-06-30 diff --git a/backend/src/test/resources/tests/sql/secondary_id/date_mode_logical/expected.csv b/backend/src/test/resources/tests/sql/secondary_id/date_mode_logical/expected.csv new file mode 100644 index 0000000000..2834b221b9 --- /dev/null +++ b/backend/src/test/resources/tests/sql/secondary_id/date_mode_logical/expected.csv @@ -0,0 +1,3 @@ +result,secondary,dates +a,f_a1,"{2024-06-30/2025-06-30,2026-06-30/2026-06-30}" +a,f_a2,{} diff --git a/backend/src/test/resources/tests/sql/secondary_id/excluded/SECONDARY_IDS_EXCLUDED.test.json b/backend/src/test/resources/tests/sql/secondary_id/excluded/SECONDARY_IDS_EXCLUDED.test.json new file mode 100644 index 0000000000..f9880afcae --- /dev/null +++ b/backend/src/test/resources/tests/sql/secondary_id/excluded/SECONDARY_IDS_EXCLUDED.test.json @@ -0,0 +1,101 @@ +{ + "type": "QUERY_TEST", + "label": "SECONDARY_ID_EXCLUDED Test", + "expectedCsv": "tests/query/SECONDARY_ID_EXCLUDED/expected.csv", + "query": { + "type": "SECONDARY_ID_QUERY", + "secondaryId": "secondary", + "root": { + "type": "AND", + "children": [ + { + "ids": [ + "concept" + ], + "type": "CONCEPT", + "label": "vs", + "excludeFromSecondaryId": false, + "tables": [ + { + "id": "concept.connector1" + } + ] + }, + { + "ids": [ + "concept" + ], + "excludeFromSecondaryId": true, + "type": "CONCEPT", + "tables": [ + { + "id": "concept.connector1", + "filters": [ + { + "filter": "concept.connector1.value", + "type": "INTEGER_RANGE", + "value": { + "min": 2 + } + } + ] + } + ] + } + ] + } + }, + "concepts": [ + { + "name": "concept", + "type": "TREE", + "connectors": [ + { + "name": "connector1", + "table": "table1", + "validityDates": { + "name": "datum", + "column": "table1.datum" + }, + "filters": { + "name": "value", + "column": "table1.value", + "type": "COUNT" + } + } + ] + } + ], + "content": { + "secondaryIds": [ + { + "name": "secondary" + } + ], + "tables": [ + { + "csv": "tests/query/SECONDARY_ID_EXCLUDED/content.csv", + "name": "table1", + "primaryColumn": { + "name": "pid", + "type": "STRING" + }, + "columns": [ + { + "name": "sid", + "type": "STRING", + "secondaryId": "secondary" + }, + { + "name": "value", + "type": "STRING" + }, + { + "name": "datum", + "type": "DATE" + } + ] + } + ] + } +} diff --git a/backend/src/test/resources/tests/sql/secondary_id/expected.csv b/backend/src/test/resources/tests/sql/secondary_id/expected.csv new file mode 100644 index 0000000000..f325999afe --- /dev/null +++ b/backend/src/test/resources/tests/sql/secondary_id/expected.csv @@ -0,0 +1,6 @@ +result,secondary,dates +a,f_a1,"{2014-06-30/2015-06-30,2016-06-30/2016-06-30}" +a,f_a2,{2014-06-30/2015-06-30} +b,f_b1,{2015-02-03/2015-06-30} +a,f_a4,"{2024-06-30/2025-06-30,2026-06-30/2026-06-30}" +b,f_b6,{2025-02-03/2025-06-30} \ No newline at end of file diff --git a/backend/src/test/resources/tests/sql/secondary_id/mixed/SECONDARY_IDS_MIXED.test.json b/backend/src/test/resources/tests/sql/secondary_id/mixed/SECONDARY_IDS_MIXED.test.json new file mode 100644 index 0000000000..28f013fb3a --- /dev/null +++ b/backend/src/test/resources/tests/sql/secondary_id/mixed/SECONDARY_IDS_MIXED.test.json @@ -0,0 +1,152 @@ +{ + "type": "QUERY_TEST", + "label": "SECONDARY_ID_MIXED Test", + "expectedCsv": "tests/sql/secondary_id/mixed/expected.csv", + "query": { + "type": "SECONDARY_ID_QUERY", + "secondaryId": "secondary", + "root": { + "ids": [ + "concept" + ], + "type": "CONCEPT", + "excludeFromSecondaryId": false, + "tables": [ + { + "id": "concept.connector1", + "filters": [ + { + "filter": "concept.connector1.filter", + "type": "REAL_RANGE", + "value": { + "min": 1, + "max": 1 + } + } + ] + }, + { + "id": "concept.connector2", + "filters": [ + { + "filter": "concept.connector2.filter", + "type": "REAL_RANGE", + "value": { + "min": 1, + "max": 1 + } + } + ] + } + ] + } + }, + "concepts": [ + { + "name": "concept", + "type": "TREE", + "connectors": [ + { + "label": "connector1", + "table": "table", + "validityDates": { + "label": "datum", + "startColumn": "table.datum_start", + "endColumn": "table.datum_end" + }, + "filters": { + "label": "filter", + "description": "xy", + "column": "table.value", + "type": "NUMBER" + } + }, + { + "label": "connector2", + "table": "table2", + "validityDates": { + "label": "datum", + "startColumn": "table2.datum_start", + "endColumn": "table2.datum_end" + }, + "filters": { + "label": "filter", + "description": "xy", + "column": "table2.value", + "type": "NUMBER" + } + } + ] + } + ], + "content": { + "secondaryIds": [ + { + "name": "secondary" + }, + { + "name": "ignored" + } + ], + "tables": [ + { + "csv": "tests/sql/secondary_id/mixed/content.csv", + "name": "table", + "primaryColumn": { + "name": "pid", + "type": "STRING" + }, + "columns": [ + { + "name": "sid", + "type": "STRING", + "secondaryId": "secondary" + }, + { + "name": "value", + "type": "REAL" + }, + { + "name": "datum_start", + "type": "DATE" + }, + { + "name": "datum_end", + "type": "DATE" + }, + { + "name": "ignored", + "type": "STRING", + "secondaryId": "ignored" + } + ] + }, + { + "csv": "tests/sql/secondary_id/mixed/content2.csv", + "name": "table2", + "primaryColumn": { + "name": "pid", + "type": "STRING" + }, + "columns": [ + { + "name": "sid", + "type": "STRING" + }, + { + "name": "value", + "type": "REAL" + }, + { + "name": "datum_start", + "type": "DATE" + }, + { + "name": "datum_end", + "type": "DATE" + } + ] + } + ] + } +} diff --git a/backend/src/test/resources/tests/sql/secondary_id/mixed/content.csv b/backend/src/test/resources/tests/sql/secondary_id/mixed/content.csv new file mode 100644 index 0000000000..62918f9a9b --- /dev/null +++ b/backend/src/test/resources/tests/sql/secondary_id/mixed/content.csv @@ -0,0 +1,7 @@ +pid,sid,value,datum_start,datum_end,ignored +a,f_a1,1,2014-06-30,2015-06-30,"a" +a,f_a1,1,2016-06-30,2016-06-30,"a" +a,f_a2,1,2014-06-30,2015-06-30,"a" +a,,1,2010-06-30,2010-06-30,"a" +a,f_a3,1.01,2014-06-30,2015-06-30,"a" +b,f_b1,1,2015-02-03,2015-06-30,"a" diff --git a/backend/src/test/resources/tests/sql/secondary_id/mixed/content2.csv b/backend/src/test/resources/tests/sql/secondary_id/mixed/content2.csv new file mode 100644 index 0000000000..fb989445e9 --- /dev/null +++ b/backend/src/test/resources/tests/sql/secondary_id/mixed/content2.csv @@ -0,0 +1,7 @@ +pid,sid,value,datum_start,datum_end +a,f_a4,1,2024-06-30,2025-06-30 +a,f_a4,1,2026-06-30,2026-06-30 +a,f_a4,1,2024-06-30,2025-06-30 +a,,13,2020-06-30,2020-06-30 +a,f_a5,1.01,2024-06-30,2025-06-30 +b,f_b6,1,2025-02-03,2025-06-30 diff --git a/backend/src/test/resources/tests/sql/secondary_id/mixed/expected.csv b/backend/src/test/resources/tests/sql/secondary_id/mixed/expected.csv new file mode 100644 index 0000000000..c0ecc10b2c --- /dev/null +++ b/backend/src/test/resources/tests/sql/secondary_id/mixed/expected.csv @@ -0,0 +1,4 @@ +result,secondary,dates +a,f_a2,"{2014-06-30/2015-06-30,2024-06-30/2025-06-30,2026-06-30/2026-06-30}" +a,f_a1,"{2014-06-30/2015-06-30,2016-06-30/2016-06-30,2024-06-30/2025-06-30,2026-06-30/2026-06-30}" +b,f_b1,"{2015-02-03/2015-06-30,2025-02-03/2025-06-30}" diff --git a/frontend/Dockerfile b/frontend.Dockerfile similarity index 73% rename from frontend/Dockerfile rename to frontend.Dockerfile index 8b2e2c78b8..164312a625 100644 --- a/frontend/Dockerfile +++ b/frontend.Dockerfile @@ -1,11 +1,23 @@ +# Version Extractor +FROM bitnami/git:2.38.1 AS version-extractor + +WORKDIR /app +COPY .git . + +RUN git describe --tags | sed 's/^v//' > git_describe.txt + +# Builder FROM node:18-alpine AS builder WORKDIR /app -COPY ./package.json ./package-lock.json ./ +COPY ./frontend/package.json ./frontend/package-lock.json ./ RUN npm ci -COPY . . +COPY ./frontend . + +# Get the version from previous step +COPY --from=version-extractor /app/git_describe.txt . # Uses env variables from .env file (BUILD TIME) RUN PUBLIC_URL=/ npm run build @@ -28,7 +40,7 @@ ENV REACT_APP_IDP_CLIENT_ID=$REACT_APP_IDP_CLIENT_ID # Copy the build artifacts from the builder phase COPY --from=builder /app/dist /usr/share/nginx/html # Copy the env replacer -COPY ./scripts/replace-env-at-runtime.sh / +COPY ./frontend/scripts/replace-env-at-runtime.sh / # The default command replaces the environment variables and starts nginx as a subprocess CMD [ "/bin/sh", "-c", "/replace-env-at-runtime.sh /usr/share/nginx/html/index.html && nginx -g \"daemon off;\""] diff --git a/frontend/src/js/api/types.ts b/frontend/src/js/api/types.ts index 01a3458a31..e00d766843 100644 --- a/frontend/src/js/api/types.ts +++ b/frontend/src/js/api/types.ts @@ -293,6 +293,7 @@ export interface QueryUploadConfigT { export interface GetFrontendConfigResponseT { version: string; + formBackendVersions: Record; currency: CurrencyConfigT; queryUpload: QueryUploadConfigT; manualUrl?: string; diff --git a/frontend/src/js/app/About.tsx b/frontend/src/js/app/About.tsx index 54122dc829..4e4a7ae2c3 100644 --- a/frontend/src/js/app/About.tsx +++ b/frontend/src/js/app/About.tsx @@ -1,6 +1,13 @@ import styled from "@emotion/styled"; import { faCopy } from "@fortawesome/free-regular-svg-icons"; -import { memo, useState } from "react"; +import { + ReactNode, + createContext, + memo, + useCallback, + useContext, + useState, +} from "react"; import { useHotkeys } from "react-hotkeys-hook"; import { useSelector } from "react-redux"; @@ -9,6 +16,27 @@ import Modal from "../modal/Modal"; import { StateT } from "./reducers"; +const initialState = { + isOpen: false, + setOpen: () => {}, +}; + +const Context = createContext>(initialState); + +const useContextValue = () => { + const [isOpen, setOpen] = useState(false); + + return { isOpen, setOpen }; +}; + +export const AboutProvider = ({ children }: { children: ReactNode }) => ( + {children} +); + +export const useAbout = () => { + return useContext(Context); +}; + const Grid = styled("div")` display: grid; grid-template-columns: auto 1fr; @@ -22,7 +50,7 @@ const Version = styled("code")` `; const useVersion = () => { - const backendVersion = useSelector( + const backendGitDescribe = useSelector( (state) => state.startup.config.version, ); @@ -37,36 +65,42 @@ const useVersion = () => { // `; // THIS IS GETTING STATICALLY REPLACED USING "VITE DEFINE" - const frontendVersion = `__BUILD_TIMESTAMP__`.replace(/"/g, ""); + const frontendTimestamp = `__BUILD_TIMESTAMP__`.replace(/"/g, ""); + const frontendGitDescribe = `__BUILD_GIT_DESCRIBE__`.replace(/"/g, ""); return { - backendVersion, - frontendVersion, + backendGitDescribe, + frontendTimestamp, + frontendGitDescribe, }; }; export const About = memo(() => { - const [isOpen, setIsOpen] = useState(false); - const { backendVersion, frontendVersion } = useVersion(); + const { isOpen, setOpen } = useAbout(); + const toggleOpen = useCallback(() => setOpen((open) => !open), [setOpen]); + const { backendGitDescribe, frontendTimestamp, frontendGitDescribe } = + useVersion(); const copyVersionToClipboard = () => { navigator.clipboard.writeText( - `BE: ${backendVersion} FE: ${frontendVersion}`, + `BE: ${backendGitDescribe} FE: ${frontendGitDescribe}`, ); - setIsOpen(false); + setOpen(false); }; - useHotkeys("shift+?", () => setIsOpen((open) => !open)); + useHotkeys("shift+?", toggleOpen, [toggleOpen]); if (!isOpen) return null; return ( - setIsOpen(false)}> + setOpen(false)}>
    Backend
    - {backendVersion} + {backendGitDescribe}
    Frontend
    - {frontendVersion} + + {frontendGitDescribe} – {frontendTimestamp} +
    Copy version info diff --git a/frontend/src/js/app/AppRouter.tsx b/frontend/src/js/app/AppRouter.tsx index 370459ade0..18a59f953b 100644 --- a/frontend/src/js/app/AppRouter.tsx +++ b/frontend/src/js/app/AppRouter.tsx @@ -10,6 +10,7 @@ import LoginPage from "../authorization/LoginPage"; import WithAuthToken from "../authorization/WithAuthToken"; import { basename } from "../environment"; +import { AboutProvider } from "./About"; import App from "./App"; const ContextProviders = ({ children }: { children: ReactNode }) => { @@ -17,7 +18,9 @@ const ContextProviders = ({ children }: { children: ReactNode }) => { return ( - {children} + + {children} + ); }; diff --git a/frontend/src/js/app/Content.tsx b/frontend/src/js/app/Content.tsx index 633e9721fc..f002ce9ca7 100644 --- a/frontend/src/js/app/Content.tsx +++ b/frontend/src/js/app/Content.tsx @@ -33,10 +33,6 @@ const Content = () => { (state) => state.entityHistory.isOpen, ); - const disableDragHandles = useSelector( - (state) => state.panes.disableDragHandles, - ); - const collapsedStyles = useMemo(() => { if (displayTooltip) return {}; @@ -51,8 +47,6 @@ const Content = () => { return ( - {isHistoryOpen && } - {isPreviewOpen && } { > {displayTooltip ? : } - {!disableDragHandles && } + - {!disableDragHandles && } + + {isHistoryOpen && } + {isPreviewOpen && } ); diff --git a/frontend/src/js/external-forms/FormsNavigation.tsx b/frontend/src/js/external-forms/FormsNavigation.tsx index 5d5cc91c27..b2fe71b28f 100644 --- a/frontend/src/js/external-forms/FormsNavigation.tsx +++ b/frontend/src/js/external-forms/FormsNavigation.tsx @@ -20,7 +20,8 @@ const Root = styled("div")` box-shadow: 0 0 3px 0 rgba(0, 0, 0, 0.3); box-sizing: border-box; background-color: ${({ theme }) => theme.col.bg}; - z-index: 1; + position: relative; + z-index: 2; `; const Row = styled("div")` diff --git a/frontend/src/js/header/HelpMenu.tsx b/frontend/src/js/header/HelpMenu.tsx index 1632b199af..3d76e4ff24 100644 --- a/frontend/src/js/header/HelpMenu.tsx +++ b/frontend/src/js/header/HelpMenu.tsx @@ -1,12 +1,14 @@ import styled from "@emotion/styled"; import { faBook, + faInfoCircle, faPaperPlane, faQuestion, } from "@fortawesome/free-solid-svg-icons"; import { useMemo } from "react"; import { useTranslation } from "react-i18next"; +import { useAbout } from "../app/About"; import IconButton from "../button/IconButton"; import WithTooltip from "../tooltip/WithTooltip"; @@ -36,6 +38,7 @@ const dropdownOffset: [number, number] = [-47, 5]; // [skidding, distance] / def export const HelpMenu = ({ contactEmail, manualUrl }: Props) => { const { t } = useTranslation(); + const { setOpen } = useAbout(); const Dropdown = useMemo( () => ( @@ -59,9 +62,17 @@ export const HelpMenu = ({ contactEmail, manualUrl }: Props) => { {t("common.manual")} + setOpen(true)} + > + {t("common.version")} + ), - [t, manualUrl, contactEmail], + [t, manualUrl, contactEmail, setOpen], ); return ( { arrow={false} html={Dropdown} offset={dropdownOffset} + hideOnClick > diff --git a/frontend/src/js/pane/actions.ts b/frontend/src/js/pane/actions.ts index 1af88466fd..b86e534741 100644 --- a/frontend/src/js/pane/actions.ts +++ b/frontend/src/js/pane/actions.ts @@ -1,12 +1,8 @@ import { ActionType, createAction } from "typesafe-actions"; -export type PaneActions = - | ActionType - | ActionType; +export type PaneActions = ActionType; export const clickPaneTab = createAction("pane/CLICK_PANE_TAB")<{ paneType: "left" | "right"; tab: string; }>(); - -export const toggleDragHandles = createAction("pane/TOGGLE_DRAG_HANDLES")(); diff --git a/frontend/src/js/pane/reducer.ts b/frontend/src/js/pane/reducer.ts index 13d4769ecb..27cf4d6a66 100644 --- a/frontend/src/js/pane/reducer.ts +++ b/frontend/src/js/pane/reducer.ts @@ -2,13 +2,12 @@ import { getType } from "typesafe-actions"; import type { Action } from "../app/actions"; -import { clickPaneTab, toggleDragHandles } from "./actions"; +import { clickPaneTab } from "./actions"; export type LeftPaneTab = "conceptTrees" | "previousQueries" | "formConfigs"; export interface PanesStateT { left: { activeTab: LeftPaneTab }; right: { activeTab: string | null }; - disableDragHandles: boolean; } const initialState: PanesStateT = { @@ -18,7 +17,6 @@ const initialState: PanesStateT = { right: { activeTab: "queryEditor", }, - disableDragHandles: false, }; const reducer = ( @@ -36,11 +34,6 @@ const reducer = ( activeTab: tab, }, }; - case getType(toggleDragHandles): - return { - ...state, - disableDragHandles: !state.disableDragHandles, - }; default: return state; } diff --git a/frontend/src/js/preview/Preview.tsx b/frontend/src/js/preview/Preview.tsx index 2cdc7db457..9339722bcf 100644 --- a/frontend/src/js/preview/Preview.tsx +++ b/frontend/src/js/preview/Preview.tsx @@ -1,8 +1,8 @@ import styled from "@emotion/styled"; -import { useEffect, useState } from "react"; +import { useState } from "react"; import { useHotkeys } from "react-hotkeys-hook"; import { useTranslation } from "react-i18next"; -import { useDispatch, useSelector, useStore } from "react-redux"; +import { useDispatch, useSelector } from "react-redux"; import { StateT } from "../app/reducers"; @@ -11,7 +11,6 @@ import { TransparentButton } from "../button/TransparentButton"; import FaIcon from "../icon/FaIcon"; import { faSpinner } from "@fortawesome/free-solid-svg-icons"; -import { toggleDragHandles } from "../pane/actions"; import Charts from "./Charts"; import DiagramModal from "./DiagramModal"; import HeadlineStats from "./HeadlineStats"; @@ -92,16 +91,6 @@ export default function Preview() { if (!selectBoxOpen && !popOver) onClose(); }); - const store = useStore(); - useEffect(() => { - if (!(store.getState() as StateT).panes.disableDragHandles) { - dispatch(toggleDragHandles()); - return () => { - dispatch(toggleDragHandles()); - }; - } - }, [preview.statisticsData, dispatch, store]); - return ( diff --git a/frontend/src/js/preview/actions.ts b/frontend/src/js/preview/actions.ts index 7ca717c07f..cf856360c7 100644 --- a/frontend/src/js/preview/actions.ts +++ b/frontend/src/js/preview/actions.ts @@ -51,6 +51,7 @@ export function useLoadPreviewData() { queryData, statisticsData, } = useSelector((state) => state.preview); + const currentPreviewData: PreviewData | null = dataLoadedForQueryId && arrowReader && @@ -84,7 +85,7 @@ export function useLoadPreviewData() { ); const loadInitialData = async () => { await arrowReader.open(); - return await arrowReader.next(); + return arrowReader.next(); }; const awaitedData = await Promise.all([ diff --git a/frontend/src/js/query-node-editor/QueryNodeEditor.tsx b/frontend/src/js/query-node-editor/QueryNodeEditor.tsx index 743ac73b60..d94bdffc24 100644 --- a/frontend/src/js/query-node-editor/QueryNodeEditor.tsx +++ b/frontend/src/js/query-node-editor/QueryNodeEditor.tsx @@ -35,7 +35,7 @@ const Root = styled("div")` right: 0; bottom: 0; position: absolute; - z-index: 1; + z-index: 2; background-color: ${({ theme }) => theme.col.bg}; `; diff --git a/frontend/src/js/startup/reducer.ts b/frontend/src/js/startup/reducer.ts index e8cfb15a29..bd7d13f2ef 100644 --- a/frontend/src/js/startup/reducer.ts +++ b/frontend/src/js/startup/reducer.ts @@ -19,6 +19,7 @@ const initialState: StartupStateT = { queryUpload: { ids: [], }, + formBackendVersions: {}, currency: { unit: "€", thousandSeparator: ".", diff --git a/frontend/src/js/ui-components/InputMultiSelect/InputMultiSelect.tsx b/frontend/src/js/ui-components/InputMultiSelect/InputMultiSelect.tsx index 2d322ff2ac..6b78a9ca43 100644 --- a/frontend/src/js/ui-components/InputMultiSelect/InputMultiSelect.tsx +++ b/frontend/src/js/ui-components/InputMultiSelect/InputMultiSelect.tsx @@ -95,14 +95,18 @@ const InputMultiSelect = ({ onLoadMore, onLoadAndInsertAll, }: Props) => { + const { t } = useTranslation(); + useResolvableSelect({ defaultValue, onResolve, }); const menuContainerRef = useRef(null); + const [inputValue, setInputValue] = useState(""); - const { t } = useTranslation(); + + const inputRef = useRef(null); const { getSelectedItemProps, @@ -193,6 +197,7 @@ const InputMultiSelect = ({ if (isNotSelectedYet && hasItemHighlighted) { addSelectedItem(selectedItem); + inputRef.current?.select(); } return { @@ -247,8 +252,6 @@ const InputMultiSelect = ({ ); const labelProps = getLabelProps({}); - const inputRef = useRef(null); - const clickOutsideRef = useCloseOnClickOutside({ isOpen, toggleMenu }); const clearStaleSearch = () => { diff --git a/frontend/src/localization/de.json b/frontend/src/localization/de.json index 675e394acc..2ee053e84f 100644 --- a/frontend/src/localization/de.json +++ b/frontend/src/localization/de.json @@ -133,6 +133,7 @@ "unshare": "Freigabe aufheben" }, "common": { + "version": "Version", "manual": "Handbuch", "contact": "Kontakt", "confirm": "Bestätigen", diff --git a/frontend/src/localization/en.json b/frontend/src/localization/en.json index b7da2348ab..59f734e7df 100644 --- a/frontend/src/localization/en.json +++ b/frontend/src/localization/en.json @@ -133,6 +133,7 @@ "unshare": "Revoke sharing" }, "common": { + "version": "Version", "manual": "Manual", "contact": "Contact", "confirm": "Confirm", diff --git a/frontend/src/react-app-env.d.ts b/frontend/src/react-app-env.d.ts index 16604e8489..58c14a88a3 100644 --- a/frontend/src/react-app-env.d.ts +++ b/frontend/src/react-app-env.d.ts @@ -3,6 +3,9 @@ import "@emotion/react"; import type { ConceptIdT, ConceptT } from "./js/api/types"; +declare const __BUILD_GIT_DESCRIBE__: string; +declare const __BUILD_TIMESTAMP__: string; + declare namespace NodeJS { interface ProcessEnv { NODE_ENV: "development" | "production"; diff --git a/frontend/vite.config.ts b/frontend/vite.config.ts index fc65249fae..5ba892a6fd 100644 --- a/frontend/vite.config.ts +++ b/frontend/vite.config.ts @@ -1,4 +1,5 @@ import react from "@vitejs/plugin-react"; +import fs from "fs"; import { defineConfig } from "vite"; import eslint from "vite-plugin-eslint"; @@ -26,5 +27,8 @@ export default defineConfig({ __BUILD_TIMESTAMP__: JSON.stringify( new Date().toISOString().split(".")[0].split("T").join(" "), ), + __BUILD_GIT_DESCRIBE__: fs.existsSync("./git_describe.txt") + ? fs.readFileSync("./git_describe.txt", "utf-8").trim() + : '"__BUILD_GIT_DESCRIBE__"', }, }); diff --git a/openapi.yaml b/openapi.yaml index d1c019e358..4fa06a6fe1 100644 --- a/openapi.yaml +++ b/openapi.yaml @@ -115,8 +115,6 @@ components: type: boolean searchDisabled: type: boolean - sharedDictionary: - type: string secondaryId: $ref: "#/components/schemas/SecondaryIdId" Table: