Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[WIP] Fixing Data Insights index mapping #19423

Draft
wants to merge 1 commit into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -146,11 +146,19 @@ private void deleteDataQualityDataIndex() {
private void createDataAssetsDataStream() {
DataInsightsSearchInterface searchInterface = getSearchInterface();

ElasticSearchConfiguration config = searchRepository.getElasticSearchConfiguration();
String language =
config != null && config.getSearchIndexMappingLanguage() != null
? config.getSearchIndexMappingLanguage().value()
: "en";

try {
for (String dataAssetType : dataAssetTypes) {
IndexMapping dataAssetIndex = searchRepository.getIndexMapping(dataAssetType);
String dataStreamName = getDataStreamName(dataAssetType);
if (!searchInterface.dataAssetDataStreamExists(dataStreamName)) {
searchInterface.createDataAssetsDataStream(dataStreamName);
searchInterface.createDataAssetsDataStream(
dataStreamName, dataAssetType, dataAssetIndex, language);
}
}
} catch (IOException ex) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import java.io.IOException;
import java.io.InputStream;
import org.openmetadata.service.exception.UnhandledServerException;
import org.openmetadata.service.search.models.IndexMapping;

public interface DataInsightsSearchInterface {

Expand All @@ -23,7 +24,9 @@ default String readResource(String resourceFile) {
}
}

void createDataAssetsDataStream(String name) throws IOException;
void createDataAssetsDataStream(
String name, String entityType, IndexMapping entityIndexMapping, String language)
throws IOException;

void deleteDataAssetDataStream(String name) throws IOException;

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,11 @@
import es.org.elasticsearch.client.Response;
import es.org.elasticsearch.client.RestClient;
import java.io.IOException;
import java.util.List;
import java.util.Map;
import org.openmetadata.service.apps.bundles.insights.search.DataInsightsSearchInterface;
import org.openmetadata.service.search.models.IndexMapping;
import org.openmetadata.service.util.JsonUtils;

public class ElasticSearchDataInsightsClient implements DataInsightsSearchInterface {
private final RestClient client;
Expand Down Expand Up @@ -51,8 +55,53 @@ public Boolean dataAssetDataStreamExists(String name) throws IOException {
return response.getStatusLine().getStatusCode() == 200;
}

private String buildMapping(
String entityType,
IndexMapping entityIndexMapping,
String language,
String indexMappingTemplateStr) {
Map<String, Map<String, Map<String, Map<String, Object>>>> indexMappingTemplate =
JsonUtils.readOrConvertValue(indexMappingTemplateStr, Map.class);
Map<String, Map<String, List<String>>> entityConfig =
JsonUtils.readOrConvertValue(readResource("/dataInsights/config.json"), Map.class);
Map<String, Map<String, Map<String, Object>>> entityIndexMap =
JsonUtils.readOrConvertValue(
readResource(
String.format(entityIndexMapping.getIndexMappingFile(), language.toLowerCase())),
Map.class);

List<String> entityAttributes = entityConfig.get("mappingFields").get("common");
entityAttributes.addAll(entityConfig.get("mappingFields").get(entityType));

indexMappingTemplate
.get("template")
.get("settings")
.put("analysis", entityIndexMap.get("settings").get("analysis"));

for (String attribute : entityAttributes) {
if (!indexMappingTemplate
.get("template")
.get("mappings")
.get("properties")
.containsKey(attribute)) {
Object value = entityIndexMap.get("mappings").get("properties").get(attribute);
if (value != null) {
indexMappingTemplate
.get("template")
.get("mappings")
.get("properties")
.put(attribute, value);
}
}
}

return JsonUtils.pojoToJson(indexMappingTemplate);
}

@Override
public void createDataAssetsDataStream(String name) throws IOException {
public void createDataAssetsDataStream(
String name, String entityType, IndexMapping entityIndexMapping, String language)
throws IOException {
String resourcePath = "/dataInsights/elasticsearch";
createLifecyclePolicy(
"di-data-assets-lifecycle",
Expand All @@ -62,7 +111,11 @@ public void createDataAssetsDataStream(String name) throws IOException {
readResource(String.format("%s/indexSettingsTemplate.json", resourcePath)));
createComponentTemplate(
"di-data-assets-mapping",
readResource(String.format("%s/indexMappingsTemplate.json", resourcePath)));
buildMapping(
entityType,
entityIndexMapping,
language,
readResource(String.format("%s/indexMappingsTemplate.json", resourcePath))));
createIndexTemplate(
"di-data-assets", readResource(String.format("%s/indexTemplate.json", resourcePath)));
createDataStream(name);
Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,11 @@
package org.openmetadata.service.apps.bundles.insights.search.opensearch;

import java.io.IOException;
import java.util.List;
import java.util.Map;
import org.openmetadata.service.apps.bundles.insights.search.DataInsightsSearchInterface;
import org.openmetadata.service.search.models.IndexMapping;
import org.openmetadata.service.util.JsonUtils;
import os.org.opensearch.client.Request;
import os.org.opensearch.client.Response;
import os.org.opensearch.client.ResponseException;
Expand Down Expand Up @@ -62,15 +66,64 @@ public Boolean dataAssetDataStreamExists(String name) throws IOException {
return response.getStatusLine().getStatusCode() == 200;
}

private String buildMapping(
String entityType,
IndexMapping entityIndexMapping,
String language,
String indexMappingTemplateStr) {
Map<String, Map<String, Map<String, Map<String, Object>>>> indexMappingTemplate =
JsonUtils.readOrConvertValue(indexMappingTemplateStr, Map.class);
Map<String, Map<String, List<String>>> entityConfig =
JsonUtils.readOrConvertValue(readResource("/dataInsights/config.json"), Map.class);
Map<String, Map<String, Map<String, Object>>> entityIndexMap =
JsonUtils.readOrConvertValue(
readResource(
String.format(entityIndexMapping.getIndexMappingFile(), language.toLowerCase())),
Map.class);

List<String> entityAttributes = entityConfig.get("mappingFields").get("common");
entityAttributes.addAll(entityConfig.get("mappingFields").get(entityType));

indexMappingTemplate
.get("template")
.get("settings")
.put("analysis", entityIndexMap.get("settings").get("analysis"));

for (String attribute : entityAttributes) {
if (!indexMappingTemplate
.get("template")
.get("mappings")
.get("properties")
.containsKey(attribute)) {
Object value = entityIndexMap.get("mappings").get("properties").get(attribute);
if (value != null) {
indexMappingTemplate
.get("template")
.get("mappings")
.get("properties")
.put(attribute, value);
}
}
}

return JsonUtils.pojoToJson(indexMappingTemplate);
}

@Override
public void createDataAssetsDataStream(String name) throws IOException {
public void createDataAssetsDataStream(
String name, String entityType, IndexMapping entityIndexMapping, String language)
throws IOException {
String resourcePath = "/dataInsights/opensearch";
createLifecyclePolicy(
"di-data-assets-lifecycle",
readResource(String.format("%s/indexLifecyclePolicy.json", resourcePath)));
createComponentTemplate(
"di-data-assets-mapping",
readResource(String.format("%s/indexMappingsTemplate.json", resourcePath)));
buildMapping(
entityType,
entityIndexMapping,
language,
readResource(String.format("%s/indexMappingsTemplate.json", resourcePath))));
createIndexTemplate(
"di-data-assets", readResource(String.format("%s/indexTemplate.json", resourcePath)));
createDataStream(name);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
import static org.openmetadata.service.workflows.searchIndex.ReindexingUtil.ENTITY_TYPE_KEY;
import static org.openmetadata.service.workflows.searchIndex.ReindexingUtil.getInitialStatsForEntities;

import java.io.InputStream;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
Expand All @@ -29,11 +30,13 @@
import org.openmetadata.service.apps.bundles.insights.workflows.dataAssets.processors.DataInsightsEntityEnricherProcessor;
import org.openmetadata.service.apps.bundles.insights.workflows.dataAssets.processors.DataInsightsOpenSearchProcessor;
import org.openmetadata.service.exception.SearchIndexException;
import org.openmetadata.service.exception.UnhandledServerException;
import org.openmetadata.service.jdbi3.CollectionDAO;
import org.openmetadata.service.jdbi3.ListFilter;
import org.openmetadata.service.search.SearchRepository;
import org.openmetadata.service.search.elasticsearch.ElasticSearchIndexSink;
import org.openmetadata.service.search.opensearch.OpenSearchIndexSink;
import org.openmetadata.service.util.JsonUtils;
import org.openmetadata.service.util.ResultList;
import org.openmetadata.service.workflows.interfaces.Processor;
import org.openmetadata.service.workflows.interfaces.Sink;
Expand All @@ -43,6 +46,7 @@
@Slf4j
public class DataAssetsWorkflow {
public static final String DATA_STREAM_KEY = "DataStreamKey";
public static final String ENTITY_TYPE_FIELDS_KEY = "EnityTypeFields";
private final int retentionDays = 30;
private final Long startTimestamp;
private final Long endTimestamp;
Expand All @@ -51,6 +55,7 @@ public class DataAssetsWorkflow {
private final CollectionDAO collectionDAO;
private final List<PaginatedEntitiesSource> sources = new ArrayList<>();
private final Set<String> entityTypes;
private final Map<String, Map<String, List<String>>> entityTypeFields;

private DataInsightsEntityEnricherProcessor entityEnricher;
private Processor entityProcessor;
Expand Down Expand Up @@ -91,10 +96,20 @@ public DataAssetsWorkflow(
TimestampUtils.getStartOfDayTimestamp(TimestampUtils.subtractDays(timestamp, 1));
}

Map<String, Map<String, List<String>>> entityTypeFields = null;

try (InputStream in = getClass().getResourceAsStream("/dataInsights/config.json")) {
assert in != null;
entityTypeFields = JsonUtils.readOrConvertValue(new String(in.readAllBytes()), Map.class);
} catch (Exception e) {
throw new UnhandledServerException("Failed to load DataInsight Search Configurations.");
}

this.batchSize = batchSize;
this.searchRepository = searchRepository;
this.collectionDAO = collectionDAO;
this.entityTypes = entityTypes;
this.entityTypeFields = entityTypeFields;
}

private void initialize() {
Expand Down Expand Up @@ -146,6 +161,7 @@ public void process() throws SearchIndexException {
deleteDataBeforeInserting(getDataStreamName(source.getEntityType()));
contextData.put(DATA_STREAM_KEY, getDataStreamName(source.getEntityType()));
contextData.put(ENTITY_TYPE_KEY, source.getEntityType());
contextData.put(ENTITY_TYPE_FIELDS_KEY, getEntityTypeFields(source.getEntityType()));

while (!source.isDone().get()) {
try {
Expand All @@ -163,6 +179,12 @@ public void process() throws SearchIndexException {
}
}

private List<String> getEntityTypeFields(String entityType) {
List<String> fields = entityTypeFields.get("mappingFields").get("common");
fields.addAll(entityTypeFields.get("mappingFields").get(entityType));
return fields;
}

private void processEntity(
ResultList<? extends EntityInterface> resultList,
Map<String, Object> contextData,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import static org.openmetadata.schema.EntityInterface.ENTITY_TYPE_TO_CLASS_MAP;
import static org.openmetadata.service.apps.bundles.insights.utils.TimestampUtils.END_TIMESTAMP_KEY;
import static org.openmetadata.service.apps.bundles.insights.utils.TimestampUtils.START_TIMESTAMP_KEY;
import static org.openmetadata.service.apps.bundles.insights.workflows.dataAssets.DataAssetsWorkflow.ENTITY_TYPE_FIELDS_KEY;
import static org.openmetadata.service.workflows.searchIndex.ReindexingUtil.ENTITY_TYPE_KEY;
import static org.openmetadata.service.workflows.searchIndex.ReindexingUtil.TIMESTAMP_KEY;
import static org.openmetadata.service.workflows.searchIndex.ReindexingUtil.getUpdatedStats;
Expand Down Expand Up @@ -141,6 +142,8 @@ private Map<String, Object> enrichEntity(
Long endTimestamp = (Long) entityVersionMap.get("endTimestamp");

Map<String, Object> entityMap = JsonUtils.getMap(entity);
entityMap.keySet().retainAll((List<String>) contextData.get(ENTITY_TYPE_FIELDS_KEY));

String entityType = (String) contextData.get(ENTITY_TYPE_KEY);
List<Class<?>> interfaces = List.of(entity.getClass().getInterfaces());

Expand Down
Loading
Loading