From 449bf2f97680a6f500c8d1655f2d28952f307856 Mon Sep 17 00:00:00 2001 From: Chenyang Ji Date: Fri, 23 Feb 2024 17:51:03 -0800 Subject: [PATCH 1/7] Query-level resource usages tracking Signed-off-by: Chenyang Ji --- .../resourcetracker/ResourceUsageInfo.java | 4 + .../resourcetracker/TaskResourceInfo.java | 151 ++++++++++++++++++ .../common/CommonAnalysisModulePlugin.java | 4 +- .../painless/PainlessModulePlugin.java | 4 +- .../index/reindex/ReindexModulePlugin.java | 4 +- .../systemd/SystemdModulePlugin.java | 4 +- .../correlation/EventsCorrelationPlugin.java | 4 +- .../listener/ResourceTrackingListener.java | 62 +++++++ .../insights/rules/model/MetricType.java | 12 +- .../rules/model/SearchQueryRecord.java | 10 ++ .../insights/QueryInsightsPluginTests.java | 1 + .../insights/QueryInsightsTestUtils.java | 2 +- .../listener/QueryInsightsListenerTests.java | 2 +- .../service/QueryInsightsServiceTests.java | 2 +- .../rules/model/SearchQueryRecordTests.java | 2 +- .../repositories/s3/S3RepositoryPlugin.java | 4 +- .../action/ingest/AsyncIngestProcessorIT.java | 4 +- .../cluster/SimpleClusterStateIT.java | 4 +- .../metadata/TemplateUpgradeServiceIT.java | 7 +- .../org/opensearch/index/FinalPipelineIT.java | 4 +- .../search/AbstractSearchAsyncAction.java | 11 ++ .../action/search/DfsQueryPhase.java | 10 ++ .../action/search/ExpandSearchPhase.java | 11 ++ .../action/search/FetchSearchPhase.java | 12 ++ .../opensearch/action/search/SearchPhase.java | 6 + .../search/TransportCreatePitAction.java | 2 + .../java/org/opensearch/plugins/Plugin.java | 4 +- .../opensearch/search/SearchPhaseResult.java | 49 ++++++ .../org/opensearch/search/SearchService.java | 31 +++- .../search/dfs/DfsSearchResult.java | 2 + .../search/fetch/FetchSearchResult.java | 2 + .../search/fetch/QueryFetchSearchResult.java | 2 + .../fetch/ScrollQueryFetchSearchResult.java | 2 + .../search/internal/SearchContext.java | 5 + .../search/query/QuerySearchResult.java | 2 + .../search/query/ScrollQuerySearchResult.java | 2 + .../main/java/org/opensearch/tasks/Task.java | 12 ++ .../tasks/TaskResourceTrackingService.java | 29 +++- 38 files changed, 457 insertions(+), 28 deletions(-) create mode 100644 libs/core/src/main/java/org/opensearch/core/tasks/resourcetracker/TaskResourceInfo.java create mode 100644 plugins/query-insights/src/main/java/org/opensearch/plugin/insights/core/listener/ResourceTrackingListener.java diff --git a/libs/core/src/main/java/org/opensearch/core/tasks/resourcetracker/ResourceUsageInfo.java b/libs/core/src/main/java/org/opensearch/core/tasks/resourcetracker/ResourceUsageInfo.java index a278b61894a65..e7b51c3389b52 100644 --- a/libs/core/src/main/java/org/opensearch/core/tasks/resourcetracker/ResourceUsageInfo.java +++ b/libs/core/src/main/java/org/opensearch/core/tasks/resourcetracker/ResourceUsageInfo.java @@ -104,6 +104,10 @@ public long getTotalValue() { return endValue.get() - startValue; } + public long getStartValue() { + return startValue; + } + @Override public String toString() { return String.valueOf(getTotalValue()); diff --git a/libs/core/src/main/java/org/opensearch/core/tasks/resourcetracker/TaskResourceInfo.java b/libs/core/src/main/java/org/opensearch/core/tasks/resourcetracker/TaskResourceInfo.java new file mode 100644 index 0000000000000..1c1b3d9448c07 --- /dev/null +++ b/libs/core/src/main/java/org/opensearch/core/tasks/resourcetracker/TaskResourceInfo.java @@ -0,0 +1,151 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.core.tasks.resourcetracker; + +import org.opensearch.common.annotation.PublicApi; +import org.opensearch.core.common.io.stream.StreamInput; +import org.opensearch.core.common.io.stream.StreamOutput; +import org.opensearch.core.common.io.stream.Writeable; + +import java.io.IOException; +import java.util.Objects; + +/** + * Task resource usage information with minimal information about the task + *

+ * Writeable TaskResourceInfo objects are used to represent resource usage + * information of running tasks, which can be propagated to coordinator node + * to infer query-level resource usage + * + * @opensearch.api + */ +@PublicApi(since = "2.15.0") +public class TaskResourceInfo implements Writeable { + private TaskResourceUsage taskResourceUsage; + private String action; + private long taskId; + private long parentTaskId; + + public TaskResourceInfo() { + this.action = ""; + this.taskId = -1L; + this.taskResourceUsage = new TaskResourceUsage(0, 0); + } + + public TaskResourceInfo(String action, long taskId, long parentTaskId, TaskResourceUsage taskResourceUsage) { + this.action = action; + this.taskId = taskId; + this.parentTaskId = parentTaskId; + this.taskResourceUsage = taskResourceUsage; + } + + /** + * Read task info from a stream. + * + * @param in StreamInput to read + * @return {@link TaskResourceInfo} + * @throws IOException IOException + */ + public static TaskResourceInfo readFromStream(StreamInput in) throws IOException { + TaskResourceInfo info = new TaskResourceInfo(); + info.action = in.readString(); + info.taskId = in.readLong(); + info.taskResourceUsage = TaskResourceUsage.readFromStream(in); + info.parentTaskId = in.readLong(); + return info; + } + + /** + * Get taskResourceUsage + * + * @return taskResourceUsage + */ + public TaskResourceUsage getTaskResourceUsage() { + return taskResourceUsage; + } + + /** + * Set taskResourceUsage + * @param taskResourceUsage the updated taskResourceUsage + */ + public void setTaskResourceUsage(TaskResourceUsage taskResourceUsage) { + this.taskResourceUsage = taskResourceUsage; + } + + /** + * Get parent task id + * + * @return parent task id + */ + public long getParentTaskId() { + return parentTaskId; + } + + /** + * Set parent task id + * @param parentTaskId parent task id + */ + public void setParentTaskId(long parentTaskId) { + this.parentTaskId = parentTaskId; + } + + /** + * Get task id + * @return task id + */ + public long getTaskId() { + return taskId; + } + + /** + * Set task id + * @param taskId task id + */ + public void setTaskId(long taskId) { + this.taskId = taskId; + } + + /** + * Get task action + * @return task action + */ + public String getAction() { + return action; + } + + /** + * Set task action + * @param action task action + */ + public void setAction(String action) { + this.action = action; + } + + @Override + public void writeTo(StreamOutput out) throws IOException { + out.writeString(action); + out.writeLong(taskId); + taskResourceUsage.writeTo(out); + out.writeLong(parentTaskId); + } + + @Override + public boolean equals(Object obj) { + if (obj == null || obj.getClass() != TaskResourceInfo.class) { + return false; + } + TaskResourceInfo other = (TaskResourceInfo) obj; + return action.equals(other.action) && taskId == other.taskId && taskResourceUsage.equals(other.taskResourceUsage); + } + + @Override + public int hashCode() { + return Objects.hash(action, taskId, taskResourceUsage); + } +} diff --git a/modules/analysis-common/src/main/java/org/opensearch/analysis/common/CommonAnalysisModulePlugin.java b/modules/analysis-common/src/main/java/org/opensearch/analysis/common/CommonAnalysisModulePlugin.java index cf2736a8583d2..a28a4f93f2826 100644 --- a/modules/analysis-common/src/main/java/org/opensearch/analysis/common/CommonAnalysisModulePlugin.java +++ b/modules/analysis-common/src/main/java/org/opensearch/analysis/common/CommonAnalysisModulePlugin.java @@ -153,6 +153,7 @@ import org.opensearch.repositories.RepositoriesService; import org.opensearch.script.ScriptContext; import org.opensearch.script.ScriptService; +import org.opensearch.tasks.TaskResourceTrackingService; import org.opensearch.threadpool.ThreadPool; import org.opensearch.watcher.ResourceWatcherService; @@ -187,7 +188,8 @@ public Collection createComponents( NodeEnvironment nodeEnvironment, NamedWriteableRegistry namedWriteableRegistry, IndexNameExpressionResolver expressionResolver, - Supplier repositoriesServiceSupplier + Supplier repositoriesServiceSupplier, + TaskResourceTrackingService taskResourceTrackingService ) { this.scriptService.set(scriptService); return Collections.emptyList(); diff --git a/modules/lang-painless/src/main/java/org/opensearch/painless/PainlessModulePlugin.java b/modules/lang-painless/src/main/java/org/opensearch/painless/PainlessModulePlugin.java index 55dc23f665d2e..555fbb28b393c 100644 --- a/modules/lang-painless/src/main/java/org/opensearch/painless/PainlessModulePlugin.java +++ b/modules/lang-painless/src/main/java/org/opensearch/painless/PainlessModulePlugin.java @@ -67,6 +67,7 @@ import org.opensearch.script.ScriptEngine; import org.opensearch.script.ScriptService; import org.opensearch.search.aggregations.pipeline.MovingFunctionScript; +import org.opensearch.tasks.TaskResourceTrackingService; import org.opensearch.threadpool.ThreadPool; import org.opensearch.watcher.ResourceWatcherService; @@ -146,7 +147,8 @@ public Collection createComponents( NodeEnvironment nodeEnvironment, NamedWriteableRegistry namedWriteableRegistry, IndexNameExpressionResolver expressionResolver, - Supplier repositoriesServiceSupplier + Supplier repositoriesServiceSupplier, + TaskResourceTrackingService taskResourceTrackingService ) { // this is a hack to bind the painless script engine in guice (all components are added to guice), so that // the painless context api. this is a temporary measure until transport actions do no require guice diff --git a/modules/reindex/src/main/java/org/opensearch/index/reindex/ReindexModulePlugin.java b/modules/reindex/src/main/java/org/opensearch/index/reindex/ReindexModulePlugin.java index aa48da4cb2421..d25dde80f1cbf 100644 --- a/modules/reindex/src/main/java/org/opensearch/index/reindex/ReindexModulePlugin.java +++ b/modules/reindex/src/main/java/org/opensearch/index/reindex/ReindexModulePlugin.java @@ -58,6 +58,7 @@ import org.opensearch.rest.RestHandler; import org.opensearch.script.ScriptService; import org.opensearch.tasks.Task; +import org.opensearch.tasks.TaskResourceTrackingService; import org.opensearch.threadpool.ThreadPool; import org.opensearch.watcher.ResourceWatcherService; @@ -122,7 +123,8 @@ public Collection createComponents( NodeEnvironment nodeEnvironment, NamedWriteableRegistry namedWriteableRegistry, IndexNameExpressionResolver expressionResolver, - Supplier repositoriesServiceSupplier + Supplier repositoriesServiceSupplier, + TaskResourceTrackingService taskResourceTrackingService ) { return Collections.singletonList(new ReindexSslConfig(environment.settings(), environment, resourceWatcherService)); } diff --git a/modules/systemd/src/main/java/org/opensearch/systemd/SystemdModulePlugin.java b/modules/systemd/src/main/java/org/opensearch/systemd/SystemdModulePlugin.java index 6e291027fa35f..a6acf4f3797c0 100644 --- a/modules/systemd/src/main/java/org/opensearch/systemd/SystemdModulePlugin.java +++ b/modules/systemd/src/main/java/org/opensearch/systemd/SystemdModulePlugin.java @@ -47,6 +47,7 @@ import org.opensearch.plugins.Plugin; import org.opensearch.repositories.RepositoriesService; import org.opensearch.script.ScriptService; +import org.opensearch.tasks.TaskResourceTrackingService; import org.opensearch.threadpool.Scheduler; import org.opensearch.threadpool.ThreadPool; import org.opensearch.watcher.ResourceWatcherService; @@ -100,7 +101,8 @@ public Collection createComponents( final NodeEnvironment nodeEnvironment, final NamedWriteableRegistry namedWriteableRegistry, final IndexNameExpressionResolver expressionResolver, - final Supplier repositoriesServiceSupplier + final Supplier repositoriesServiceSupplier, + TaskResourceTrackingService taskResourceTrackingService ) { if (enabled == false) { extender.set(null); diff --git a/plugins/events-correlation-engine/src/main/java/org/opensearch/plugin/correlation/EventsCorrelationPlugin.java b/plugins/events-correlation-engine/src/main/java/org/opensearch/plugin/correlation/EventsCorrelationPlugin.java index 9637042974d03..81035feea074b 100644 --- a/plugins/events-correlation-engine/src/main/java/org/opensearch/plugin/correlation/EventsCorrelationPlugin.java +++ b/plugins/events-correlation-engine/src/main/java/org/opensearch/plugin/correlation/EventsCorrelationPlugin.java @@ -44,6 +44,7 @@ import org.opensearch.rest.RestController; import org.opensearch.rest.RestHandler; import org.opensearch.script.ScriptService; +import org.opensearch.tasks.TaskResourceTrackingService; import org.opensearch.threadpool.ThreadPool; import org.opensearch.watcher.ResourceWatcherService; @@ -87,7 +88,8 @@ public Collection createComponents( NodeEnvironment nodeEnvironment, NamedWriteableRegistry namedWriteableRegistry, IndexNameExpressionResolver indexNameExpressionResolver, - Supplier repositoriesServiceSupplier + Supplier repositoriesServiceSupplier, + TaskResourceTrackingService taskResourceTrackingService ) { correlationRuleIndices = new CorrelationRuleIndices(client, clusterService); return List.of(correlationRuleIndices); diff --git a/plugins/query-insights/src/main/java/org/opensearch/plugin/insights/core/listener/ResourceTrackingListener.java b/plugins/query-insights/src/main/java/org/opensearch/plugin/insights/core/listener/ResourceTrackingListener.java new file mode 100644 index 0000000000000..1409ac8f92e5b --- /dev/null +++ b/plugins/query-insights/src/main/java/org/opensearch/plugin/insights/core/listener/ResourceTrackingListener.java @@ -0,0 +1,62 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.plugin.insights.core.listener; + +import org.opensearch.core.tasks.resourcetracker.TaskResourceInfo; +import org.opensearch.plugin.insights.core.service.QueryInsightsService; +import org.opensearch.tasks.Task; +import org.opensearch.tasks.TaskResourceTrackingService; +import org.opensearch.tasks.TaskResourceTrackingService.TaskCompletionListener; +import org.opensearch.tasks.TaskResourceTrackingService.TaskStartListener; + +import java.util.concurrent.atomic.AtomicInteger; + +/** + * Listener for task level resource usage + */ +public class ResourceTrackingListener implements TaskCompletionListener, TaskStartListener { + private final QueryInsightsService queryInsightsService; + + /** + * Constructor of ResourceTrackingListener + * @param queryInsightsService queryInsightsService + * @param taskResourceTrackingService taskResourceTrackingService + */ + public ResourceTrackingListener(QueryInsightsService queryInsightsService, TaskResourceTrackingService taskResourceTrackingService) { + this.queryInsightsService = queryInsightsService; + taskResourceTrackingService.addTaskCompletionListener(this); + taskResourceTrackingService.addTaskStartListener(this); + } + + @Override + public void onTaskCompleted(Task task) { + TaskResourceInfo info = new TaskResourceInfo( + task.getAction(), + task.getId(), + task.getParentTaskId().getId(), + task.getTotalResourceStats() + ); + long parentTaskId = task.getParentTaskId().getId(); + if (parentTaskId == -1) { + parentTaskId = task.getId(); + } + this.queryInsightsService.taskStatusMap.get(parentTaskId).decrementAndGet(); + queryInsightsService.taskRecordsQueue.add(info); + } + + @Override + public void onTaskStarts(Task task) { + long parentTaskId = task.getParentTaskId().getId(); + if (parentTaskId == -1) { + parentTaskId = task.getId(); + } + this.queryInsightsService.taskStatusMap.putIfAbsent(parentTaskId, new AtomicInteger(0)); + this.queryInsightsService.taskStatusMap.get(parentTaskId).incrementAndGet(); + } +} diff --git a/plugins/query-insights/src/main/java/org/opensearch/plugin/insights/rules/model/MetricType.java b/plugins/query-insights/src/main/java/org/opensearch/plugin/insights/rules/model/MetricType.java index cdd090fbf4804..4694c757f4ef2 100644 --- a/plugins/query-insights/src/main/java/org/opensearch/plugin/insights/rules/model/MetricType.java +++ b/plugins/query-insights/src/main/java/org/opensearch/plugin/insights/rules/model/MetricType.java @@ -35,7 +35,7 @@ public enum MetricType implements Comparator { /** * JVM heap usage metric type */ - JVM; + MEMORY; /** * Read a MetricType from a StreamInput @@ -93,10 +93,9 @@ public static Set allMetricTypes() { public int compare(final Number a, final Number b) { switch (this) { case LATENCY: - return Long.compare(a.longValue(), b.longValue()); - case JVM: case CPU: - return Double.compare(a.doubleValue(), b.doubleValue()); + case MEMORY: + return Long.compare(a.longValue(), b.longValue()); } return -1; } @@ -110,10 +109,9 @@ public int compare(final Number a, final Number b) { Number parseValue(final Object o) { switch (this) { case LATENCY: - return (Long) o; - case JVM: case CPU: - return (Double) o; + case MEMORY: + return (Long) o; default: return (Number) o; } diff --git a/plugins/query-insights/src/main/java/org/opensearch/plugin/insights/rules/model/SearchQueryRecord.java b/plugins/query-insights/src/main/java/org/opensearch/plugin/insights/rules/model/SearchQueryRecord.java index fec00a680ae58..279c05fefed33 100644 --- a/plugins/query-insights/src/main/java/org/opensearch/plugin/insights/rules/model/SearchQueryRecord.java +++ b/plugins/query-insights/src/main/java/org/opensearch/plugin/insights/rules/model/SearchQueryRecord.java @@ -111,6 +111,16 @@ public void addAttribute(final Attribute attribute, final Object value) { attributes.put(attribute, value); } + /** + * Add a measurement to this record + * + * @param metricType Type of measurement to add + * @param value the value associated with the measurement + */ + public void addMeasurement(final MetricType metricType, final Number value) { + measurements.put(metricType, value); + } + @Override public XContentBuilder toXContent(final XContentBuilder builder, final ToXContent.Params params) throws IOException { builder.startObject(); diff --git a/plugins/query-insights/src/test/java/org/opensearch/plugin/insights/QueryInsightsPluginTests.java b/plugins/query-insights/src/test/java/org/opensearch/plugin/insights/QueryInsightsPluginTests.java index 8b8856e3e305c..b3a4c1ee541cc 100644 --- a/plugins/query-insights/src/test/java/org/opensearch/plugin/insights/QueryInsightsPluginTests.java +++ b/plugins/query-insights/src/test/java/org/opensearch/plugin/insights/QueryInsightsPluginTests.java @@ -79,6 +79,7 @@ public void testCreateComponent() { null, null, null, + null, null ); assertEquals(2, components.size()); diff --git a/plugins/query-insights/src/test/java/org/opensearch/plugin/insights/QueryInsightsTestUtils.java b/plugins/query-insights/src/test/java/org/opensearch/plugin/insights/QueryInsightsTestUtils.java index 870ef5b9c8be9..eccef659f0558 100644 --- a/plugins/query-insights/src/test/java/org/opensearch/plugin/insights/QueryInsightsTestUtils.java +++ b/plugins/query-insights/src/test/java/org/opensearch/plugin/insights/QueryInsightsTestUtils.java @@ -64,7 +64,7 @@ public static List generateQueryInsightRecords(int lower, int randomLongBetween(1000, 10000), MetricType.CPU, randomDouble(), - MetricType.JVM, + MetricType.MEMORY, randomDouble() ); diff --git a/plugins/query-insights/src/test/java/org/opensearch/plugin/insights/core/listener/QueryInsightsListenerTests.java b/plugins/query-insights/src/test/java/org/opensearch/plugin/insights/core/listener/QueryInsightsListenerTests.java index b794a2e4b8608..8daddebb06940 100644 --- a/plugins/query-insights/src/test/java/org/opensearch/plugin/insights/core/listener/QueryInsightsListenerTests.java +++ b/plugins/query-insights/src/test/java/org/opensearch/plugin/insights/core/listener/QueryInsightsListenerTests.java @@ -184,7 +184,7 @@ public void testSetEnabled() { when(queryInsightsService.isCollectionEnabled(MetricType.LATENCY)).thenReturn(false); when(queryInsightsService.isCollectionEnabled(MetricType.CPU)).thenReturn(false); - when(queryInsightsService.isCollectionEnabled(MetricType.JVM)).thenReturn(false); + when(queryInsightsService.isCollectionEnabled(MetricType.MEMORY)).thenReturn(false); queryInsightsListener.setEnableTopQueries(MetricType.LATENCY, false); assertFalse(queryInsightsListener.isEnabled()); } diff --git a/plugins/query-insights/src/test/java/org/opensearch/plugin/insights/core/service/QueryInsightsServiceTests.java b/plugins/query-insights/src/test/java/org/opensearch/plugin/insights/core/service/QueryInsightsServiceTests.java index 428f615ce2f90..114b7ccea7a65 100644 --- a/plugins/query-insights/src/test/java/org/opensearch/plugin/insights/core/service/QueryInsightsServiceTests.java +++ b/plugins/query-insights/src/test/java/org/opensearch/plugin/insights/core/service/QueryInsightsServiceTests.java @@ -38,7 +38,7 @@ public void setup() { queryInsightsService = new QueryInsightsService(clusterSettings, threadPool, client); queryInsightsService.enableCollection(MetricType.LATENCY, true); queryInsightsService.enableCollection(MetricType.CPU, true); - queryInsightsService.enableCollection(MetricType.JVM, true); + queryInsightsService.enableCollection(MetricType.MEMORY, true); } public void testAddRecordToLimitAndDrain() { diff --git a/plugins/query-insights/src/test/java/org/opensearch/plugin/insights/rules/model/SearchQueryRecordTests.java b/plugins/query-insights/src/test/java/org/opensearch/plugin/insights/rules/model/SearchQueryRecordTests.java index 793d5878e2300..ad45b53ec5363 100644 --- a/plugins/query-insights/src/test/java/org/opensearch/plugin/insights/rules/model/SearchQueryRecordTests.java +++ b/plugins/query-insights/src/test/java/org/opensearch/plugin/insights/rules/model/SearchQueryRecordTests.java @@ -39,7 +39,7 @@ public void testSerializationAndEquals() throws Exception { public void testAllMetricTypes() { Set allMetrics = MetricType.allMetricTypes(); - Set expected = new HashSet<>(Arrays.asList(MetricType.LATENCY, MetricType.CPU, MetricType.JVM)); + Set expected = new HashSet<>(Arrays.asList(MetricType.LATENCY, MetricType.CPU, MetricType.MEMORY)); assertEquals(expected, allMetrics); } diff --git a/plugins/repository-s3/src/main/java/org/opensearch/repositories/s3/S3RepositoryPlugin.java b/plugins/repository-s3/src/main/java/org/opensearch/repositories/s3/S3RepositoryPlugin.java index 110d91bfbd822..ab2b45e78d61c 100644 --- a/plugins/repository-s3/src/main/java/org/opensearch/repositories/s3/S3RepositoryPlugin.java +++ b/plugins/repository-s3/src/main/java/org/opensearch/repositories/s3/S3RepositoryPlugin.java @@ -59,6 +59,7 @@ import org.opensearch.repositories.s3.async.SizeBasedBlockingQ; import org.opensearch.repositories.s3.async.TransferSemaphoresHolder; import org.opensearch.script.ScriptService; +import org.opensearch.tasks.TaskResourceTrackingService; import org.opensearch.threadpool.ExecutorBuilder; import org.opensearch.threadpool.FixedExecutorBuilder; import org.opensearch.threadpool.ScalingExecutorBuilder; @@ -211,7 +212,8 @@ public Collection createComponents( final NodeEnvironment nodeEnvironment, final NamedWriteableRegistry namedWriteableRegistry, final IndexNameExpressionResolver expressionResolver, - final Supplier repositoriesServiceSupplier + final Supplier repositoriesServiceSupplier, + final TaskResourceTrackingService taskResourceTrackingService ) { int urgentEventLoopThreads = urgentPoolCount(clusterService.getSettings()); int priorityEventLoopThreads = priorityPoolCount(clusterService.getSettings()); diff --git a/server/src/internalClusterTest/java/org/opensearch/action/ingest/AsyncIngestProcessorIT.java b/server/src/internalClusterTest/java/org/opensearch/action/ingest/AsyncIngestProcessorIT.java index aefabcb9bc14f..70de07a524b6f 100644 --- a/server/src/internalClusterTest/java/org/opensearch/action/ingest/AsyncIngestProcessorIT.java +++ b/server/src/internalClusterTest/java/org/opensearch/action/ingest/AsyncIngestProcessorIT.java @@ -53,6 +53,7 @@ import org.opensearch.plugins.Plugin; import org.opensearch.repositories.RepositoriesService; import org.opensearch.script.ScriptService; +import org.opensearch.tasks.TaskResourceTrackingService; import org.opensearch.test.OpenSearchSingleNodeTestCase; import org.opensearch.threadpool.ThreadPool; import org.opensearch.watcher.ResourceWatcherService; @@ -120,7 +121,8 @@ public Collection createComponents( NodeEnvironment nodeEnvironment, NamedWriteableRegistry namedWriteableRegistry, IndexNameExpressionResolver expressionResolver, - Supplier repositoriesServiceSupplier + Supplier repositoriesServiceSupplier, + TaskResourceTrackingService taskResourceTrackingService ) { this.threadPool = threadPool; return Collections.emptyList(); diff --git a/server/src/internalClusterTest/java/org/opensearch/cluster/SimpleClusterStateIT.java b/server/src/internalClusterTest/java/org/opensearch/cluster/SimpleClusterStateIT.java index af5900b1cba6c..c598be563cc27 100644 --- a/server/src/internalClusterTest/java/org/opensearch/cluster/SimpleClusterStateIT.java +++ b/server/src/internalClusterTest/java/org/opensearch/cluster/SimpleClusterStateIT.java @@ -63,6 +63,7 @@ import org.opensearch.plugins.Plugin; import org.opensearch.repositories.RepositoriesService; import org.opensearch.script.ScriptService; +import org.opensearch.tasks.TaskResourceTrackingService; import org.opensearch.test.OpenSearchIntegTestCase; import org.opensearch.threadpool.ThreadPool; import org.opensearch.watcher.ResourceWatcherService; @@ -469,7 +470,8 @@ public Collection createComponents( final NodeEnvironment nodeEnvironment, final NamedWriteableRegistry namedWriteableRegistry, final IndexNameExpressionResolver expressionResolver, - final Supplier repositoriesServiceSupplier + final Supplier repositoriesServiceSupplier, + final TaskResourceTrackingService taskResourceTrackingService ) { clusterService.addListener(event -> { final ClusterState state = event.state(); diff --git a/server/src/internalClusterTest/java/org/opensearch/cluster/metadata/TemplateUpgradeServiceIT.java b/server/src/internalClusterTest/java/org/opensearch/cluster/metadata/TemplateUpgradeServiceIT.java index ba1679d873bf4..cc77fb16ccb45 100644 --- a/server/src/internalClusterTest/java/org/opensearch/cluster/metadata/TemplateUpgradeServiceIT.java +++ b/server/src/internalClusterTest/java/org/opensearch/cluster/metadata/TemplateUpgradeServiceIT.java @@ -45,6 +45,7 @@ import org.opensearch.plugins.Plugin; import org.opensearch.repositories.RepositoriesService; import org.opensearch.script.ScriptService; +import org.opensearch.tasks.TaskResourceTrackingService; import org.opensearch.test.OpenSearchIntegTestCase; import org.opensearch.threadpool.ThreadPool; import org.opensearch.watcher.ResourceWatcherService; @@ -97,7 +98,8 @@ public Collection createComponents( NodeEnvironment nodeEnvironment, NamedWriteableRegistry namedWriteableRegistry, IndexNameExpressionResolver expressionResolver, - Supplier repositoriesServiceSupplier + Supplier repositoriesServiceSupplier, + TaskResourceTrackingService taskResourceTrackingService ) { clusterService.getClusterSettings().addSettingsUpdateConsumer(UPDATE_TEMPLATE_DUMMY_SETTING, integer -> { logger.debug("the template dummy setting was updated to {}", integer); @@ -113,7 +115,8 @@ public Collection createComponents( nodeEnvironment, namedWriteableRegistry, expressionResolver, - repositoriesServiceSupplier + repositoriesServiceSupplier, + taskResourceTrackingService ); } diff --git a/server/src/internalClusterTest/java/org/opensearch/index/FinalPipelineIT.java b/server/src/internalClusterTest/java/org/opensearch/index/FinalPipelineIT.java index 03b8fb5ff7afc..c0aecae7127a1 100644 --- a/server/src/internalClusterTest/java/org/opensearch/index/FinalPipelineIT.java +++ b/server/src/internalClusterTest/java/org/opensearch/index/FinalPipelineIT.java @@ -62,6 +62,7 @@ import org.opensearch.plugins.Plugin; import org.opensearch.repositories.RepositoriesService; import org.opensearch.script.ScriptService; +import org.opensearch.tasks.TaskResourceTrackingService; import org.opensearch.test.OpenSearchIntegTestCase; import org.opensearch.threadpool.ThreadPool; import org.opensearch.watcher.ResourceWatcherService; @@ -373,7 +374,8 @@ public Collection createComponents( final NodeEnvironment nodeEnvironment, final NamedWriteableRegistry namedWriteableRegistry, final IndexNameExpressionResolver expressionResolver, - final Supplier repositoriesServiceSupplier + final Supplier repositoriesServiceSupplier, + final TaskResourceTrackingService taskResourceTrackingService ) { return Collections.emptyList(); } diff --git a/server/src/main/java/org/opensearch/action/search/AbstractSearchAsyncAction.java b/server/src/main/java/org/opensearch/action/search/AbstractSearchAsyncAction.java index 9bf4a4b1e18f1..b54c20cbbb828 100644 --- a/server/src/main/java/org/opensearch/action/search/AbstractSearchAsyncAction.java +++ b/server/src/main/java/org/opensearch/action/search/AbstractSearchAsyncAction.java @@ -51,6 +51,7 @@ import org.opensearch.core.action.ActionListener; import org.opensearch.core.action.ShardOperationFailedException; import org.opensearch.core.index.shard.ShardId; +import org.opensearch.core.tasks.resourcetracker.TaskResourceInfo; import org.opensearch.search.SearchPhaseResult; import org.opensearch.search.SearchShardTarget; import org.opensearch.search.internal.AliasFilter; @@ -844,6 +845,16 @@ public final ShardSearchRequest buildShardSearchRequest(SearchShardIterator shar return shardRequest; } + @Override + public List getPhaseResourceUsageFromResults() { + return results.getAtomicArray() + .asList() + .stream() + .filter(a -> a.remoteAddress() != null) + .map(SearchPhaseResult::getTaskResourceInfo) + .collect(Collectors.toList()); + } + /** * Returns the next phase based on the results of the initial search phase * @param results the results of the initial search phase. Each non null element in the result array represent a successfully diff --git a/server/src/main/java/org/opensearch/action/search/DfsQueryPhase.java b/server/src/main/java/org/opensearch/action/search/DfsQueryPhase.java index 6fe4eaabd6d17..6e0e705df1a45 100644 --- a/server/src/main/java/org/opensearch/action/search/DfsQueryPhase.java +++ b/server/src/main/java/org/opensearch/action/search/DfsQueryPhase.java @@ -32,6 +32,7 @@ package org.opensearch.action.search; import org.apache.logging.log4j.message.ParameterizedMessage; +import org.opensearch.core.tasks.resourcetracker.TaskResourceInfo; import org.opensearch.search.SearchPhaseResult; import org.opensearch.search.SearchShardTarget; import org.opensearch.search.dfs.AggregatedDfs; @@ -43,6 +44,7 @@ import java.io.IOException; import java.util.List; import java.util.function.Function; +import java.util.stream.Collectors; /** * This search phase fans out to every shards to execute a distributed search with a pre-collected distributed frequencies for all @@ -83,6 +85,14 @@ final class DfsQueryPhase extends SearchPhase { context.addReleasable(queryResult); } + @Override + public List getPhaseResourceUsageFromResults() { + return searchResults.stream() + .filter(a -> a.remoteAddress() != null) + .map(SearchPhaseResult::getTaskResourceInfo) + .collect(Collectors.toList()); + } + @Override public void run() throws IOException { // TODO we can potentially also consume the actual per shard results from the initial phase here in the aggregateDfs diff --git a/server/src/main/java/org/opensearch/action/search/ExpandSearchPhase.java b/server/src/main/java/org/opensearch/action/search/ExpandSearchPhase.java index e249fb239dae9..b65c96f0757d7 100644 --- a/server/src/main/java/org/opensearch/action/search/ExpandSearchPhase.java +++ b/server/src/main/java/org/opensearch/action/search/ExpandSearchPhase.java @@ -34,6 +34,7 @@ import org.opensearch.common.util.concurrent.AtomicArray; import org.opensearch.core.action.ActionListener; +import org.opensearch.core.tasks.resourcetracker.TaskResourceInfo; import org.opensearch.index.query.BoolQueryBuilder; import org.opensearch.index.query.InnerHitBuilder; import org.opensearch.index.query.QueryBuilder; @@ -48,6 +49,7 @@ import java.util.HashMap; import java.util.Iterator; import java.util.List; +import java.util.stream.Collectors; /** * This search phase is an optional phase that will be executed once all hits are fetched from the shards that executes @@ -78,6 +80,15 @@ private boolean isCollapseRequest() { && searchRequest.source().collapse().getInnerHits().isEmpty() == false; } + @Override + public List getPhaseResourceUsageFromResults() { + return queryResults.asList() + .stream() + .filter(a -> a.remoteAddress() != null) + .map(SearchPhaseResult::getTaskResourceInfo) + .collect(Collectors.toList()); + } + @Override public void run() { if (isCollapseRequest() && searchResponse.hits().getHits().length > 0) { diff --git a/server/src/main/java/org/opensearch/action/search/FetchSearchPhase.java b/server/src/main/java/org/opensearch/action/search/FetchSearchPhase.java index ebb2f33f8f37d..4729ab0def195 100644 --- a/server/src/main/java/org/opensearch/action/search/FetchSearchPhase.java +++ b/server/src/main/java/org/opensearch/action/search/FetchSearchPhase.java @@ -37,6 +37,7 @@ import org.opensearch.action.OriginalIndices; import org.opensearch.common.util.concurrent.AbstractRunnable; import org.opensearch.common.util.concurrent.AtomicArray; +import org.opensearch.core.tasks.resourcetracker.TaskResourceInfo; import org.opensearch.search.RescoreDocIds; import org.opensearch.search.SearchPhaseResult; import org.opensearch.search.SearchShardTarget; @@ -51,6 +52,7 @@ import java.util.List; import java.util.function.BiFunction; +import java.util.stream.Collectors; /** * This search phase merges the query results from the previous phase together and calculates the topN hits for this search. @@ -111,6 +113,16 @@ final class FetchSearchPhase extends SearchPhase { this.progressListener = context.getTask().getProgressListener(); } + @Override + public List getPhaseResourceUsageFromResults() { + return fetchResults.getAtomicArray() + .asList() + .stream() + .filter(a -> a.remoteAddress() != null) + .map(SearchPhaseResult::getTaskResourceInfo) + .collect(Collectors.toList()); + } + @Override public void run() { context.execute(new AbstractRunnable() { diff --git a/server/src/main/java/org/opensearch/action/search/SearchPhase.java b/server/src/main/java/org/opensearch/action/search/SearchPhase.java index 0890e9f5de8d4..1f47f6f35548e 100644 --- a/server/src/main/java/org/opensearch/action/search/SearchPhase.java +++ b/server/src/main/java/org/opensearch/action/search/SearchPhase.java @@ -33,8 +33,10 @@ import org.opensearch.common.CheckedRunnable; import org.opensearch.common.annotation.PublicApi; +import org.opensearch.core.tasks.resourcetracker.TaskResourceInfo; import java.io.IOException; +import java.util.List; import java.util.Locale; import java.util.Objects; @@ -48,6 +50,10 @@ public abstract class SearchPhase implements CheckedRunnable { private final String name; private long startTimeInNanos; + public List getPhaseResourceUsageFromResults() { + return List.of(); + } + protected SearchPhase(String name) { this.name = Objects.requireNonNull(name, "name must not be null"); } diff --git a/server/src/main/java/org/opensearch/action/search/TransportCreatePitAction.java b/server/src/main/java/org/opensearch/action/search/TransportCreatePitAction.java index baa113997f243..81e8f7db47615 100644 --- a/server/src/main/java/org/opensearch/action/search/TransportCreatePitAction.java +++ b/server/src/main/java/org/opensearch/action/search/TransportCreatePitAction.java @@ -121,12 +121,14 @@ public CreateReaderContextResponse(ShardSearchContextId shardSearchContextId) { public CreateReaderContextResponse(StreamInput in) throws IOException { super(in); contextId = new ShardSearchContextId(in); + readResourceUsage(in); } @Override public void writeTo(StreamOutput out) throws IOException { super.writeTo(out); contextId.writeTo(out); + writeResourceUsage(out); } } diff --git a/server/src/main/java/org/opensearch/plugins/Plugin.java b/server/src/main/java/org/opensearch/plugins/Plugin.java index 33c4155d12c25..8af338d299867 100644 --- a/server/src/main/java/org/opensearch/plugins/Plugin.java +++ b/server/src/main/java/org/opensearch/plugins/Plugin.java @@ -56,6 +56,7 @@ import org.opensearch.index.shard.IndexSettingProvider; import org.opensearch.repositories.RepositoriesService; import org.opensearch.script.ScriptService; +import org.opensearch.tasks.TaskResourceTrackingService; import org.opensearch.threadpool.ExecutorBuilder; import org.opensearch.threadpool.ThreadPool; import org.opensearch.watcher.ResourceWatcherService; @@ -150,7 +151,8 @@ public Collection createComponents( NodeEnvironment nodeEnvironment, NamedWriteableRegistry namedWriteableRegistry, IndexNameExpressionResolver indexNameExpressionResolver, - Supplier repositoriesServiceSupplier + Supplier repositoriesServiceSupplier, + TaskResourceTrackingService taskResourceTrackingService ) { return Collections.emptyList(); } diff --git a/server/src/main/java/org/opensearch/search/SearchPhaseResult.java b/server/src/main/java/org/opensearch/search/SearchPhaseResult.java index a351b3bd2dda6..dffe49a5d8889 100644 --- a/server/src/main/java/org/opensearch/search/SearchPhaseResult.java +++ b/server/src/main/java/org/opensearch/search/SearchPhaseResult.java @@ -36,13 +36,22 @@ import org.opensearch.common.annotation.PublicApi; import org.opensearch.core.common.io.stream.StreamInput; import org.opensearch.core.common.io.stream.StreamOutput; +import org.opensearch.core.tasks.resourcetracker.ResourceStats; +import org.opensearch.core.tasks.resourcetracker.ResourceUsageInfo; +import org.opensearch.core.tasks.resourcetracker.ResourceUsageMetric; +import org.opensearch.core.tasks.resourcetracker.TaskResourceInfo; +import org.opensearch.core.tasks.resourcetracker.TaskResourceUsage; import org.opensearch.core.transport.TransportResponse; import org.opensearch.search.fetch.FetchSearchResult; +import org.opensearch.search.internal.SearchContext; import org.opensearch.search.internal.ShardSearchContextId; import org.opensearch.search.internal.ShardSearchRequest; import org.opensearch.search.query.QuerySearchResult; +import org.opensearch.tasks.TaskResourceTrackingService; import java.io.IOException; +import java.util.HashMap; +import java.util.Map; /** * This class is a base class for all search related results. It contains the shard target it @@ -62,6 +71,8 @@ public abstract class SearchPhaseResult extends TransportResponse { protected ShardSearchContextId contextId; private ShardSearchRequest shardSearchRequest; private RescoreDocIds rescoreDocIds = RescoreDocIds.EMPTY; + private TaskResourceInfo taskResourceInfo = new TaskResourceInfo(); + private final Map taskResourceUsageStartValues = new HashMap<>(); protected SearchPhaseResult() { @@ -137,4 +148,42 @@ public void setRescoreDocIds(RescoreDocIds rescoreDocIds) { public void writeTo(StreamOutput out) throws IOException { // TODO: this seems wrong, SearchPhaseResult should have a writeTo? } + + /** + * Get task resource usage info + * @return TaskResourceInfo + */ + public TaskResourceInfo getTaskResourceInfo() { + return taskResourceInfo; + } + + protected void readResourceUsage(StreamInput in) throws IOException { + taskResourceInfo = TaskResourceInfo.readFromStream(in); + } + + protected void writeResourceUsage(StreamOutput out) throws IOException { + ResourceUsageMetric[] usages = TaskResourceTrackingService.getResourceUsageMetricsForThread(Thread.currentThread().getId()); + long cpu = 0, mem = 0; + for (ResourceUsageMetric usage : usages) { + if (usage.getStats() == ResourceStats.MEMORY && taskResourceUsageStartValues.containsKey(ResourceStats.MEMORY)) { + mem = usage.getValue() - taskResourceUsageStartValues.get(ResourceStats.MEMORY); + } else if (usage.getStats() == ResourceStats.CPU && taskResourceUsageStartValues.containsKey(ResourceStats.CPU)) { + cpu = usage.getValue() - taskResourceUsageStartValues.get(ResourceStats.CPU); + } + } + taskResourceInfo.setTaskResourceUsage(new TaskResourceUsage(cpu, mem)); + taskResourceInfo.writeTo(out); + } + + protected void injectInitialResourceUsage(SearchContext context) { + Map usageInfo = context.usageInfo; + // initial resource usage when the task starts + for (Map.Entry entry : usageInfo.entrySet()) { + taskResourceUsageStartValues.put(entry.getKey(), entry.getValue().getStartValue()); + } + // inject metadata related to the task + taskResourceInfo.setAction(context.getTask().getAction()); + taskResourceInfo.setTaskId(context.getTask().getId()); + taskResourceInfo.setParentTaskId(context.getTask().getParentTaskId().getId()); + } } diff --git a/server/src/main/java/org/opensearch/search/SearchService.java b/server/src/main/java/org/opensearch/search/SearchService.java index d371d69a57804..8d2f4ff699479 100644 --- a/server/src/main/java/org/opensearch/search/SearchService.java +++ b/server/src/main/java/org/opensearch/search/SearchService.java @@ -73,6 +73,7 @@ import org.opensearch.core.index.Index; import org.opensearch.core.index.shard.ShardId; import org.opensearch.core.indices.breaker.CircuitBreakerService; +import org.opensearch.core.tasks.resourcetracker.ResourceStatsType; import org.opensearch.index.IndexNotFoundException; import org.opensearch.index.IndexService; import org.opensearch.index.IndexSettings; @@ -553,6 +554,7 @@ private DfsSearchResult executeDfsPhase(ShardSearchRequest request, SearchShardT SearchContext context = createContext(readerContext, request, task, true) ) { dfsPhase.execute(context); + context.dfsResult().injectInitialResourceUsage(context); return context.dfsResult(); } catch (Exception e) { logger.trace("Dfs phase failed", e); @@ -648,6 +650,7 @@ private SearchPhaseResult executeQueryPhase(ShardSearchRequest request, SearchSh final RescoreDocIds rescoreDocIds = context.rescoreDocIds(); context.queryResult().setRescoreDocIds(rescoreDocIds); readerContext.setRescoreDocIds(rescoreDocIds); + context.queryResult().injectInitialResourceUsage(context); return context.queryResult(); } } catch (Exception e) { @@ -672,7 +675,9 @@ private QueryFetchSearchResult executeFetchPhase(ReaderContext reader, SearchCon } executor.success(); } - return new QueryFetchSearchResult(context.queryResult(), context.fetchResult()); + QueryFetchSearchResult result = new QueryFetchSearchResult(context.queryResult(), context.fetchResult()); + result.injectInitialResourceUsage(context); + return result; } public void executeQueryPhase( @@ -700,7 +705,12 @@ public void executeQueryPhase( queryPhase.execute(searchContext); executor.success(); readerContext.setRescoreDocIds(searchContext.rescoreDocIds()); - return new ScrollQuerySearchResult(searchContext.queryResult(), searchContext.shardTarget()); + ScrollQuerySearchResult scrollQuerySearchResult = new ScrollQuerySearchResult( + searchContext.queryResult(), + searchContext.shardTarget() + ); + scrollQuerySearchResult.injectInitialResourceUsage(searchContext); + return scrollQuerySearchResult; } catch (Exception e) { logger.trace("Query phase failed", e); // we handle the failure in the failure listener below @@ -731,6 +741,7 @@ public void executeQueryPhase(QuerySearchRequest request, SearchShardTask task, final RescoreDocIds rescoreDocIds = searchContext.rescoreDocIds(); searchContext.queryResult().setRescoreDocIds(rescoreDocIds); readerContext.setRescoreDocIds(rescoreDocIds); + searchContext.queryResult().injectInitialResourceUsage(searchContext); return searchContext.queryResult(); } catch (Exception e) { assert TransportActions.isShardNotAvailableException(e) == false : new AssertionError(e); @@ -780,7 +791,12 @@ public void executeFetchPhase( queryPhase.execute(searchContext); final long afterQueryTime = executor.success(); QueryFetchSearchResult fetchSearchResult = executeFetchPhase(readerContext, searchContext, afterQueryTime); - return new ScrollQueryFetchSearchResult(fetchSearchResult, searchContext.shardTarget()); + ScrollQueryFetchSearchResult scrollQueryFetchSearchResult = new ScrollQueryFetchSearchResult( + fetchSearchResult, + searchContext.shardTarget() + ); + scrollQueryFetchSearchResult.injectInitialResourceUsage(searchContext); + return scrollQueryFetchSearchResult; } catch (Exception e) { assert TransportActions.isShardNotAvailableException(e) == false : new AssertionError(e); logger.trace("Fetch phase failed", e); @@ -811,6 +827,7 @@ public void executeFetchPhase(ShardFetchRequest request, SearchShardTask task, A } executor.success(); } + searchContext.fetchResult().injectInitialResourceUsage(searchContext); return searchContext.fetchResult(); } catch (Exception e) { assert TransportActions.isShardNotAvailableException(e) == false : new AssertionError(e); @@ -1032,9 +1049,11 @@ final SearchContext createContext( context.size(DEFAULT_SIZE); } context.setTask(task); - // pre process queryPhase.preProcess(context); + context.usageInfo = task.getActiveThreadResourceInfo(Thread.currentThread().getId(), ResourceStatsType.WORKER_STATS) + .getResourceUsageInfo() + .getStatsInfo(); } catch (Exception e) { context.close(); throw e; @@ -1740,6 +1759,7 @@ public CanMatchResponse(StreamInput in) throws IOException { super(in); this.canMatch = in.readBoolean(); this.estimatedMinAndMax = in.readOptionalWriteable(MinAndMax::new); + readResourceUsage(in); } public CanMatchResponse(boolean canMatch, MinAndMax estimatedMinAndMax) { @@ -1749,8 +1769,11 @@ public CanMatchResponse(boolean canMatch, MinAndMax estimatedMinAndMax) { @Override public void writeTo(StreamOutput out) throws IOException { + super.writeTo(out); out.writeBoolean(canMatch); out.writeOptionalWriteable(estimatedMinAndMax); + + writeResourceUsage(out); } public boolean canMatch() { diff --git a/server/src/main/java/org/opensearch/search/dfs/DfsSearchResult.java b/server/src/main/java/org/opensearch/search/dfs/DfsSearchResult.java index 2338a47435012..3cdd078c2d700 100644 --- a/server/src/main/java/org/opensearch/search/dfs/DfsSearchResult.java +++ b/server/src/main/java/org/opensearch/search/dfs/DfsSearchResult.java @@ -81,6 +81,7 @@ public DfsSearchResult(StreamInput in) throws IOException { maxDoc = in.readVInt(); setShardSearchRequest(in.readOptionalWriteable(ShardSearchRequest::new)); + readResourceUsage(in); } public DfsSearchResult(ShardSearchContextId contextId, SearchShardTarget shardTarget, ShardSearchRequest shardSearchRequest) { @@ -133,6 +134,7 @@ public void writeTo(StreamOutput out) throws IOException { writeFieldStats(out, fieldStatistics); out.writeVInt(maxDoc); out.writeOptionalWriteable(getShardSearchRequest()); + writeResourceUsage(out); } public static void writeFieldStats(StreamOutput out, final Map fieldStatistics) throws IOException { diff --git a/server/src/main/java/org/opensearch/search/fetch/FetchSearchResult.java b/server/src/main/java/org/opensearch/search/fetch/FetchSearchResult.java index 26fa90141c2a9..f6be61dbb7659 100644 --- a/server/src/main/java/org/opensearch/search/fetch/FetchSearchResult.java +++ b/server/src/main/java/org/opensearch/search/fetch/FetchSearchResult.java @@ -62,6 +62,7 @@ public FetchSearchResult(StreamInput in) throws IOException { super(in); contextId = new ShardSearchContextId(in); hits = new SearchHits(in); + readResourceUsage(in); } public FetchSearchResult(ShardSearchContextId id, SearchShardTarget shardTarget) { @@ -108,5 +109,6 @@ public int counterGetAndIncrement() { public void writeTo(StreamOutput out) throws IOException { contextId.writeTo(out); hits.writeTo(out); + writeResourceUsage(out); } } diff --git a/server/src/main/java/org/opensearch/search/fetch/QueryFetchSearchResult.java b/server/src/main/java/org/opensearch/search/fetch/QueryFetchSearchResult.java index ce4c59fc77489..626fc3b408918 100644 --- a/server/src/main/java/org/opensearch/search/fetch/QueryFetchSearchResult.java +++ b/server/src/main/java/org/opensearch/search/fetch/QueryFetchSearchResult.java @@ -55,6 +55,7 @@ public QueryFetchSearchResult(StreamInput in) throws IOException { super(in); queryResult = new QuerySearchResult(in); fetchResult = new FetchSearchResult(in); + readResourceUsage(in); } public QueryFetchSearchResult(QuerySearchResult queryResult, FetchSearchResult fetchResult) { @@ -100,5 +101,6 @@ public FetchSearchResult fetchResult() { public void writeTo(StreamOutput out) throws IOException { queryResult.writeTo(out); fetchResult.writeTo(out); + writeResourceUsage(out); } } diff --git a/server/src/main/java/org/opensearch/search/fetch/ScrollQueryFetchSearchResult.java b/server/src/main/java/org/opensearch/search/fetch/ScrollQueryFetchSearchResult.java index 415350b4c5dc7..2e2f0914c0d9c 100644 --- a/server/src/main/java/org/opensearch/search/fetch/ScrollQueryFetchSearchResult.java +++ b/server/src/main/java/org/opensearch/search/fetch/ScrollQueryFetchSearchResult.java @@ -54,6 +54,7 @@ public ScrollQueryFetchSearchResult(StreamInput in) throws IOException { SearchShardTarget searchShardTarget = new SearchShardTarget(in); result = new QueryFetchSearchResult(in); setSearchShardTarget(searchShardTarget); + readResourceUsage(in); } public ScrollQueryFetchSearchResult(QueryFetchSearchResult result, SearchShardTarget shardTarget) { @@ -91,5 +92,6 @@ public FetchSearchResult fetchResult() { public void writeTo(StreamOutput out) throws IOException { getSearchShardTarget().writeTo(out); result.writeTo(out); + writeResourceUsage(out); } } diff --git a/server/src/main/java/org/opensearch/search/internal/SearchContext.java b/server/src/main/java/org/opensearch/search/internal/SearchContext.java index 0c8240d3a8322..3564322c8e349 100644 --- a/server/src/main/java/org/opensearch/search/internal/SearchContext.java +++ b/server/src/main/java/org/opensearch/search/internal/SearchContext.java @@ -43,6 +43,8 @@ import org.opensearch.common.lease.Releasables; import org.opensearch.common.unit.TimeValue; import org.opensearch.common.util.BigArrays; +import org.opensearch.core.tasks.resourcetracker.ResourceStats; +import org.opensearch.core.tasks.resourcetracker.ResourceUsageInfo; import org.opensearch.index.cache.bitset.BitsetFilterCache; import org.opensearch.index.mapper.MappedFieldType; import org.opensearch.index.mapper.MapperService; @@ -79,6 +81,7 @@ import org.opensearch.search.suggest.SuggestionSearchContext; import java.util.Collection; +import java.util.EnumMap; import java.util.HashMap; import java.util.List; import java.util.Map; @@ -127,6 +130,8 @@ public List toInternalAggregations(Collection co private volatile boolean searchTimedOut; + public Map usageInfo = new EnumMap<>(ResourceStats.class); + protected SearchContext() {} public abstract void setTask(SearchShardTask task); diff --git a/server/src/main/java/org/opensearch/search/query/QuerySearchResult.java b/server/src/main/java/org/opensearch/search/query/QuerySearchResult.java index f3ac953ab9d1d..a2a97c9a3c374 100644 --- a/server/src/main/java/org/opensearch/search/query/QuerySearchResult.java +++ b/server/src/main/java/org/opensearch/search/query/QuerySearchResult.java @@ -101,6 +101,7 @@ public QuerySearchResult(StreamInput in) throws IOException { ShardSearchContextId id = new ShardSearchContextId(in); readFromWithId(id, in); } + readResourceUsage(in); } public QuerySearchResult(ShardSearchContextId contextId, SearchShardTarget shardTarget, ShardSearchRequest shardSearchRequest) { @@ -375,6 +376,7 @@ public void writeTo(StreamOutput out) throws IOException { contextId.writeTo(out); writeToNoId(out); } + writeResourceUsage(out); } public void writeToNoId(StreamOutput out) throws IOException { diff --git a/server/src/main/java/org/opensearch/search/query/ScrollQuerySearchResult.java b/server/src/main/java/org/opensearch/search/query/ScrollQuerySearchResult.java index 0cdc8749253f0..bfc8e94e50e13 100644 --- a/server/src/main/java/org/opensearch/search/query/ScrollQuerySearchResult.java +++ b/server/src/main/java/org/opensearch/search/query/ScrollQuerySearchResult.java @@ -53,6 +53,7 @@ public ScrollQuerySearchResult(StreamInput in) throws IOException { SearchShardTarget shardTarget = new SearchShardTarget(in); result = new QuerySearchResult(in); setSearchShardTarget(shardTarget); + readResourceUsage(in); } public ScrollQuerySearchResult(QuerySearchResult result, SearchShardTarget shardTarget) { @@ -81,5 +82,6 @@ public QuerySearchResult queryResult() { public void writeTo(StreamOutput out) throws IOException { getSearchShardTarget().writeTo(out); result.writeTo(out); + writeResourceUsage(out); } } diff --git a/server/src/main/java/org/opensearch/tasks/Task.java b/server/src/main/java/org/opensearch/tasks/Task.java index a21a454a65d0e..0fa65bc16516f 100644 --- a/server/src/main/java/org/opensearch/tasks/Task.java +++ b/server/src/main/java/org/opensearch/tasks/Task.java @@ -476,6 +476,18 @@ public void stopThreadResourceTracking(long threadId, ResourceStatsType statsTyp throw new IllegalStateException("cannot update final values if active thread resource entry is not present"); } + public ThreadResourceInfo getActiveThreadResourceInfo(long threadId, ResourceStatsType statsType) { + final List threadResourceInfoList = resourceStats.get(threadId); + if (threadResourceInfoList != null) { + for (ThreadResourceInfo threadResourceInfo : threadResourceInfoList) { + if (threadResourceInfo.getStatsType() == statsType && threadResourceInfo.isActive()) { + return threadResourceInfo; + } + } + } + return null; + } + /** * Individual tasks can override this if they want to support task resource tracking. We just need to make sure that * the ThreadPool on which the task runs on have runnable wrapper similar to diff --git a/server/src/main/java/org/opensearch/tasks/TaskResourceTrackingService.java b/server/src/main/java/org/opensearch/tasks/TaskResourceTrackingService.java index f32559f6314c0..d843948071368 100644 --- a/server/src/main/java/org/opensearch/tasks/TaskResourceTrackingService.java +++ b/server/src/main/java/org/opensearch/tasks/TaskResourceTrackingService.java @@ -15,6 +15,7 @@ import org.apache.logging.log4j.message.ParameterizedMessage; import org.opensearch.ExceptionsHelper; import org.opensearch.common.SuppressForbidden; +import org.opensearch.common.annotation.PublicApi; import org.opensearch.common.inject.Inject; import org.opensearch.common.settings.ClusterSettings; import org.opensearch.common.settings.Setting; @@ -39,6 +40,7 @@ /** * Service that helps track resource usage of tasks running on a node. */ +@PublicApi(since = "2.2.0") @SuppressForbidden(reason = "ThreadMXBean#getThreadAllocatedBytes") public class TaskResourceTrackingService implements RunnableTaskExecutionListener { @@ -56,6 +58,7 @@ public class TaskResourceTrackingService implements RunnableTaskExecutionListene private final ConcurrentMapLong resourceAwareTasks = ConcurrentCollections.newConcurrentMapLongWithAggressiveConcurrency(); private final List taskCompletionListeners = new ArrayList<>(); + private final List taskStartListeners = new ArrayList<>(); private final ThreadPool threadPool; private volatile boolean taskResourceTrackingEnabled; @@ -96,6 +99,17 @@ public ThreadContext.StoredContext startTracking(Task task) { logger.debug("Starting resource tracking for task: {}", task.getId()); resourceAwareTasks.put(task.getId(), task); + + List exceptions = new ArrayList<>(); + for (TaskStartListener listener : taskStartListeners) { + try { + listener.onTaskStarts(task); + } catch (Exception e) { + exceptions.add(e); + } + } + ExceptionsHelper.maybeThrowRuntimeAndSuppress(exceptions); + return addTaskIdToThreadContext(task); } @@ -211,7 +225,7 @@ public Map getResourceAwareTasks() { return Collections.unmodifiableMap(resourceAwareTasks); } - private ResourceUsageMetric[] getResourceUsageMetricsForThread(long threadId) { + public static ResourceUsageMetric[] getResourceUsageMetricsForThread(long threadId) { ResourceUsageMetric currentMemoryUsage = new ResourceUsageMetric( ResourceStats.MEMORY, threadMXBean.getThreadAllocatedBytes(threadId) @@ -264,11 +278,24 @@ private ThreadContext.StoredContext addTaskIdToThreadContext(Task task) { /** * Listener that gets invoked when a task execution completes. */ + @PublicApi(since = "2.2.0") public interface TaskCompletionListener { void onTaskCompleted(Task task); } + /** + * Listener that gets invoked when a task execution starts. + */ + @PublicApi(since = "2.2.0") + public interface TaskStartListener { + void onTaskStarts(Task task); + } + public void addTaskCompletionListener(TaskCompletionListener listener) { this.taskCompletionListeners.add(listener); } + + public void addTaskStartListener(TaskStartListener listener) { + this.taskStartListeners.add(listener); + } } From 4314dd4938f624f72a6197fae3f22afbdf588bee Mon Sep 17 00:00:00 2001 From: Chenyang Ji Date: Tue, 16 Apr 2024 14:27:34 -0700 Subject: [PATCH 2/7] Moving TaskResourceTrackingService to clusterService Signed-off-by: Chenyang Ji --- .../common/CommonAnalysisModulePlugin.java | 4 +--- .../painless/PainlessModulePlugin.java | 4 +--- .../index/reindex/ReindexModulePlugin.java | 4 +--- .../systemd/SystemdModulePlugin.java | 4 +--- .../correlation/EventsCorrelationPlugin.java | 4 +--- .../insights/QueryInsightsPluginTests.java | 1 - .../repositories/s3/S3RepositoryPlugin.java | 4 +--- .../action/ingest/AsyncIngestProcessorIT.java | 4 +--- .../cluster/SimpleClusterStateIT.java | 4 +--- .../metadata/TemplateUpgradeServiceIT.java | 7 ++----- .../org/opensearch/index/FinalPipelineIT.java | 4 +--- .../cluster/service/ClusterService.java | 20 +++++++++++++++++++ .../java/org/opensearch/plugins/Plugin.java | 4 +--- 13 files changed, 32 insertions(+), 36 deletions(-) diff --git a/modules/analysis-common/src/main/java/org/opensearch/analysis/common/CommonAnalysisModulePlugin.java b/modules/analysis-common/src/main/java/org/opensearch/analysis/common/CommonAnalysisModulePlugin.java index a28a4f93f2826..cf2736a8583d2 100644 --- a/modules/analysis-common/src/main/java/org/opensearch/analysis/common/CommonAnalysisModulePlugin.java +++ b/modules/analysis-common/src/main/java/org/opensearch/analysis/common/CommonAnalysisModulePlugin.java @@ -153,7 +153,6 @@ import org.opensearch.repositories.RepositoriesService; import org.opensearch.script.ScriptContext; import org.opensearch.script.ScriptService; -import org.opensearch.tasks.TaskResourceTrackingService; import org.opensearch.threadpool.ThreadPool; import org.opensearch.watcher.ResourceWatcherService; @@ -188,8 +187,7 @@ public Collection createComponents( NodeEnvironment nodeEnvironment, NamedWriteableRegistry namedWriteableRegistry, IndexNameExpressionResolver expressionResolver, - Supplier repositoriesServiceSupplier, - TaskResourceTrackingService taskResourceTrackingService + Supplier repositoriesServiceSupplier ) { this.scriptService.set(scriptService); return Collections.emptyList(); diff --git a/modules/lang-painless/src/main/java/org/opensearch/painless/PainlessModulePlugin.java b/modules/lang-painless/src/main/java/org/opensearch/painless/PainlessModulePlugin.java index 555fbb28b393c..55dc23f665d2e 100644 --- a/modules/lang-painless/src/main/java/org/opensearch/painless/PainlessModulePlugin.java +++ b/modules/lang-painless/src/main/java/org/opensearch/painless/PainlessModulePlugin.java @@ -67,7 +67,6 @@ import org.opensearch.script.ScriptEngine; import org.opensearch.script.ScriptService; import org.opensearch.search.aggregations.pipeline.MovingFunctionScript; -import org.opensearch.tasks.TaskResourceTrackingService; import org.opensearch.threadpool.ThreadPool; import org.opensearch.watcher.ResourceWatcherService; @@ -147,8 +146,7 @@ public Collection createComponents( NodeEnvironment nodeEnvironment, NamedWriteableRegistry namedWriteableRegistry, IndexNameExpressionResolver expressionResolver, - Supplier repositoriesServiceSupplier, - TaskResourceTrackingService taskResourceTrackingService + Supplier repositoriesServiceSupplier ) { // this is a hack to bind the painless script engine in guice (all components are added to guice), so that // the painless context api. this is a temporary measure until transport actions do no require guice diff --git a/modules/reindex/src/main/java/org/opensearch/index/reindex/ReindexModulePlugin.java b/modules/reindex/src/main/java/org/opensearch/index/reindex/ReindexModulePlugin.java index d25dde80f1cbf..aa48da4cb2421 100644 --- a/modules/reindex/src/main/java/org/opensearch/index/reindex/ReindexModulePlugin.java +++ b/modules/reindex/src/main/java/org/opensearch/index/reindex/ReindexModulePlugin.java @@ -58,7 +58,6 @@ import org.opensearch.rest.RestHandler; import org.opensearch.script.ScriptService; import org.opensearch.tasks.Task; -import org.opensearch.tasks.TaskResourceTrackingService; import org.opensearch.threadpool.ThreadPool; import org.opensearch.watcher.ResourceWatcherService; @@ -123,8 +122,7 @@ public Collection createComponents( NodeEnvironment nodeEnvironment, NamedWriteableRegistry namedWriteableRegistry, IndexNameExpressionResolver expressionResolver, - Supplier repositoriesServiceSupplier, - TaskResourceTrackingService taskResourceTrackingService + Supplier repositoriesServiceSupplier ) { return Collections.singletonList(new ReindexSslConfig(environment.settings(), environment, resourceWatcherService)); } diff --git a/modules/systemd/src/main/java/org/opensearch/systemd/SystemdModulePlugin.java b/modules/systemd/src/main/java/org/opensearch/systemd/SystemdModulePlugin.java index a6acf4f3797c0..6e291027fa35f 100644 --- a/modules/systemd/src/main/java/org/opensearch/systemd/SystemdModulePlugin.java +++ b/modules/systemd/src/main/java/org/opensearch/systemd/SystemdModulePlugin.java @@ -47,7 +47,6 @@ import org.opensearch.plugins.Plugin; import org.opensearch.repositories.RepositoriesService; import org.opensearch.script.ScriptService; -import org.opensearch.tasks.TaskResourceTrackingService; import org.opensearch.threadpool.Scheduler; import org.opensearch.threadpool.ThreadPool; import org.opensearch.watcher.ResourceWatcherService; @@ -101,8 +100,7 @@ public Collection createComponents( final NodeEnvironment nodeEnvironment, final NamedWriteableRegistry namedWriteableRegistry, final IndexNameExpressionResolver expressionResolver, - final Supplier repositoriesServiceSupplier, - TaskResourceTrackingService taskResourceTrackingService + final Supplier repositoriesServiceSupplier ) { if (enabled == false) { extender.set(null); diff --git a/plugins/events-correlation-engine/src/main/java/org/opensearch/plugin/correlation/EventsCorrelationPlugin.java b/plugins/events-correlation-engine/src/main/java/org/opensearch/plugin/correlation/EventsCorrelationPlugin.java index 81035feea074b..9637042974d03 100644 --- a/plugins/events-correlation-engine/src/main/java/org/opensearch/plugin/correlation/EventsCorrelationPlugin.java +++ b/plugins/events-correlation-engine/src/main/java/org/opensearch/plugin/correlation/EventsCorrelationPlugin.java @@ -44,7 +44,6 @@ import org.opensearch.rest.RestController; import org.opensearch.rest.RestHandler; import org.opensearch.script.ScriptService; -import org.opensearch.tasks.TaskResourceTrackingService; import org.opensearch.threadpool.ThreadPool; import org.opensearch.watcher.ResourceWatcherService; @@ -88,8 +87,7 @@ public Collection createComponents( NodeEnvironment nodeEnvironment, NamedWriteableRegistry namedWriteableRegistry, IndexNameExpressionResolver indexNameExpressionResolver, - Supplier repositoriesServiceSupplier, - TaskResourceTrackingService taskResourceTrackingService + Supplier repositoriesServiceSupplier ) { correlationRuleIndices = new CorrelationRuleIndices(client, clusterService); return List.of(correlationRuleIndices); diff --git a/plugins/query-insights/src/test/java/org/opensearch/plugin/insights/QueryInsightsPluginTests.java b/plugins/query-insights/src/test/java/org/opensearch/plugin/insights/QueryInsightsPluginTests.java index b3a4c1ee541cc..8b8856e3e305c 100644 --- a/plugins/query-insights/src/test/java/org/opensearch/plugin/insights/QueryInsightsPluginTests.java +++ b/plugins/query-insights/src/test/java/org/opensearch/plugin/insights/QueryInsightsPluginTests.java @@ -79,7 +79,6 @@ public void testCreateComponent() { null, null, null, - null, null ); assertEquals(2, components.size()); diff --git a/plugins/repository-s3/src/main/java/org/opensearch/repositories/s3/S3RepositoryPlugin.java b/plugins/repository-s3/src/main/java/org/opensearch/repositories/s3/S3RepositoryPlugin.java index ab2b45e78d61c..110d91bfbd822 100644 --- a/plugins/repository-s3/src/main/java/org/opensearch/repositories/s3/S3RepositoryPlugin.java +++ b/plugins/repository-s3/src/main/java/org/opensearch/repositories/s3/S3RepositoryPlugin.java @@ -59,7 +59,6 @@ import org.opensearch.repositories.s3.async.SizeBasedBlockingQ; import org.opensearch.repositories.s3.async.TransferSemaphoresHolder; import org.opensearch.script.ScriptService; -import org.opensearch.tasks.TaskResourceTrackingService; import org.opensearch.threadpool.ExecutorBuilder; import org.opensearch.threadpool.FixedExecutorBuilder; import org.opensearch.threadpool.ScalingExecutorBuilder; @@ -212,8 +211,7 @@ public Collection createComponents( final NodeEnvironment nodeEnvironment, final NamedWriteableRegistry namedWriteableRegistry, final IndexNameExpressionResolver expressionResolver, - final Supplier repositoriesServiceSupplier, - final TaskResourceTrackingService taskResourceTrackingService + final Supplier repositoriesServiceSupplier ) { int urgentEventLoopThreads = urgentPoolCount(clusterService.getSettings()); int priorityEventLoopThreads = priorityPoolCount(clusterService.getSettings()); diff --git a/server/src/internalClusterTest/java/org/opensearch/action/ingest/AsyncIngestProcessorIT.java b/server/src/internalClusterTest/java/org/opensearch/action/ingest/AsyncIngestProcessorIT.java index 70de07a524b6f..aefabcb9bc14f 100644 --- a/server/src/internalClusterTest/java/org/opensearch/action/ingest/AsyncIngestProcessorIT.java +++ b/server/src/internalClusterTest/java/org/opensearch/action/ingest/AsyncIngestProcessorIT.java @@ -53,7 +53,6 @@ import org.opensearch.plugins.Plugin; import org.opensearch.repositories.RepositoriesService; import org.opensearch.script.ScriptService; -import org.opensearch.tasks.TaskResourceTrackingService; import org.opensearch.test.OpenSearchSingleNodeTestCase; import org.opensearch.threadpool.ThreadPool; import org.opensearch.watcher.ResourceWatcherService; @@ -121,8 +120,7 @@ public Collection createComponents( NodeEnvironment nodeEnvironment, NamedWriteableRegistry namedWriteableRegistry, IndexNameExpressionResolver expressionResolver, - Supplier repositoriesServiceSupplier, - TaskResourceTrackingService taskResourceTrackingService + Supplier repositoriesServiceSupplier ) { this.threadPool = threadPool; return Collections.emptyList(); diff --git a/server/src/internalClusterTest/java/org/opensearch/cluster/SimpleClusterStateIT.java b/server/src/internalClusterTest/java/org/opensearch/cluster/SimpleClusterStateIT.java index c598be563cc27..af5900b1cba6c 100644 --- a/server/src/internalClusterTest/java/org/opensearch/cluster/SimpleClusterStateIT.java +++ b/server/src/internalClusterTest/java/org/opensearch/cluster/SimpleClusterStateIT.java @@ -63,7 +63,6 @@ import org.opensearch.plugins.Plugin; import org.opensearch.repositories.RepositoriesService; import org.opensearch.script.ScriptService; -import org.opensearch.tasks.TaskResourceTrackingService; import org.opensearch.test.OpenSearchIntegTestCase; import org.opensearch.threadpool.ThreadPool; import org.opensearch.watcher.ResourceWatcherService; @@ -470,8 +469,7 @@ public Collection createComponents( final NodeEnvironment nodeEnvironment, final NamedWriteableRegistry namedWriteableRegistry, final IndexNameExpressionResolver expressionResolver, - final Supplier repositoriesServiceSupplier, - final TaskResourceTrackingService taskResourceTrackingService + final Supplier repositoriesServiceSupplier ) { clusterService.addListener(event -> { final ClusterState state = event.state(); diff --git a/server/src/internalClusterTest/java/org/opensearch/cluster/metadata/TemplateUpgradeServiceIT.java b/server/src/internalClusterTest/java/org/opensearch/cluster/metadata/TemplateUpgradeServiceIT.java index cc77fb16ccb45..ba1679d873bf4 100644 --- a/server/src/internalClusterTest/java/org/opensearch/cluster/metadata/TemplateUpgradeServiceIT.java +++ b/server/src/internalClusterTest/java/org/opensearch/cluster/metadata/TemplateUpgradeServiceIT.java @@ -45,7 +45,6 @@ import org.opensearch.plugins.Plugin; import org.opensearch.repositories.RepositoriesService; import org.opensearch.script.ScriptService; -import org.opensearch.tasks.TaskResourceTrackingService; import org.opensearch.test.OpenSearchIntegTestCase; import org.opensearch.threadpool.ThreadPool; import org.opensearch.watcher.ResourceWatcherService; @@ -98,8 +97,7 @@ public Collection createComponents( NodeEnvironment nodeEnvironment, NamedWriteableRegistry namedWriteableRegistry, IndexNameExpressionResolver expressionResolver, - Supplier repositoriesServiceSupplier, - TaskResourceTrackingService taskResourceTrackingService + Supplier repositoriesServiceSupplier ) { clusterService.getClusterSettings().addSettingsUpdateConsumer(UPDATE_TEMPLATE_DUMMY_SETTING, integer -> { logger.debug("the template dummy setting was updated to {}", integer); @@ -115,8 +113,7 @@ public Collection createComponents( nodeEnvironment, namedWriteableRegistry, expressionResolver, - repositoriesServiceSupplier, - taskResourceTrackingService + repositoriesServiceSupplier ); } diff --git a/server/src/internalClusterTest/java/org/opensearch/index/FinalPipelineIT.java b/server/src/internalClusterTest/java/org/opensearch/index/FinalPipelineIT.java index c0aecae7127a1..03b8fb5ff7afc 100644 --- a/server/src/internalClusterTest/java/org/opensearch/index/FinalPipelineIT.java +++ b/server/src/internalClusterTest/java/org/opensearch/index/FinalPipelineIT.java @@ -62,7 +62,6 @@ import org.opensearch.plugins.Plugin; import org.opensearch.repositories.RepositoriesService; import org.opensearch.script.ScriptService; -import org.opensearch.tasks.TaskResourceTrackingService; import org.opensearch.test.OpenSearchIntegTestCase; import org.opensearch.threadpool.ThreadPool; import org.opensearch.watcher.ResourceWatcherService; @@ -374,8 +373,7 @@ public Collection createComponents( final NodeEnvironment nodeEnvironment, final NamedWriteableRegistry namedWriteableRegistry, final IndexNameExpressionResolver expressionResolver, - final Supplier repositoriesServiceSupplier, - final TaskResourceTrackingService taskResourceTrackingService + final Supplier repositoriesServiceSupplier ) { return Collections.emptyList(); } diff --git a/server/src/main/java/org/opensearch/cluster/service/ClusterService.java b/server/src/main/java/org/opensearch/cluster/service/ClusterService.java index fa61375e85c25..8898ae340f3af 100644 --- a/server/src/main/java/org/opensearch/cluster/service/ClusterService.java +++ b/server/src/main/java/org/opensearch/cluster/service/ClusterService.java @@ -54,6 +54,7 @@ import org.opensearch.common.settings.Settings; import org.opensearch.index.IndexingPressureService; import org.opensearch.node.Node; +import org.opensearch.tasks.TaskResourceTrackingService; import org.opensearch.threadpool.ThreadPool; import java.util.Collections; @@ -91,6 +92,7 @@ public class ClusterService extends AbstractLifecycleComponent { private RerouteService rerouteService; private IndexingPressureService indexingPressureService; + private TaskResourceTrackingService taskResourceTrackingService; public ClusterService( Settings settings, @@ -260,6 +262,24 @@ public IndexingPressureService getIndexingPressureService() { return indexingPressureService; } + /** + * Getter for {@link TaskResourceTrackingService}, This method exposes task level resource usage for other plugins to use. + * + * @return TaskResourceTrackingService + */ + public TaskResourceTrackingService getTaskResourceTrackingService() { + return taskResourceTrackingService; + } + + /** + * Setter for {@link TaskResourceTrackingService} + * + * @param taskResourceTrackingService taskResourceTrackingService + */ + public void setTaskResourceTrackingService(TaskResourceTrackingService taskResourceTrackingService) { + this.taskResourceTrackingService = taskResourceTrackingService; + } + public ClusterApplierService getClusterApplierService() { return clusterApplierService; } diff --git a/server/src/main/java/org/opensearch/plugins/Plugin.java b/server/src/main/java/org/opensearch/plugins/Plugin.java index 8af338d299867..33c4155d12c25 100644 --- a/server/src/main/java/org/opensearch/plugins/Plugin.java +++ b/server/src/main/java/org/opensearch/plugins/Plugin.java @@ -56,7 +56,6 @@ import org.opensearch.index.shard.IndexSettingProvider; import org.opensearch.repositories.RepositoriesService; import org.opensearch.script.ScriptService; -import org.opensearch.tasks.TaskResourceTrackingService; import org.opensearch.threadpool.ExecutorBuilder; import org.opensearch.threadpool.ThreadPool; import org.opensearch.watcher.ResourceWatcherService; @@ -151,8 +150,7 @@ public Collection createComponents( NodeEnvironment nodeEnvironment, NamedWriteableRegistry namedWriteableRegistry, IndexNameExpressionResolver indexNameExpressionResolver, - Supplier repositoriesServiceSupplier, - TaskResourceTrackingService taskResourceTrackingService + Supplier repositoriesServiceSupplier ) { return Collections.emptyList(); } From 51b37dc2b5cd86b3c6eb92ed3858cfd354cced1c Mon Sep 17 00:00:00 2001 From: Chenyang Ji Date: Fri, 17 May 2024 15:15:23 -0700 Subject: [PATCH 3/7] use shard response header to piggyback task resource usages Signed-off-by: Chenyang Ji --- .../resourcetracker/TaskResourceInfo.java | 178 +++++++++++++----- .../listener/ResourceTrackingListener.java | 62 ------ .../rules/model/SearchQueryRecord.java | 10 - .../TransportTopQueriesAction.java | 37 ++-- .../search/AbstractSearchAsyncAction.java | 23 +-- .../action/search/DfsQueryPhase.java | 10 - .../action/search/ExpandSearchPhase.java | 11 -- .../action/search/FetchSearchPhase.java | 13 +- .../opensearch/action/search/SearchPhase.java | 6 - .../action/search/SearchPhaseContext.java | 5 + .../action/search/SearchRequestContext.java | 51 ++++- .../search/TransportCreatePitAction.java | 2 - .../action/search/TransportSearchAction.java | 6 +- .../cluster/service/ClusterService.java | 18 -- .../common/util/concurrent/ThreadContext.java | 12 ++ .../opensearch/search/SearchPhaseResult.java | 49 ----- .../org/opensearch/search/SearchService.java | 66 +++++-- .../search/dfs/DfsSearchResult.java | 2 - .../search/fetch/FetchSearchResult.java | 2 - .../search/fetch/QueryFetchSearchResult.java | 2 - .../fetch/ScrollQueryFetchSearchResult.java | 2 - .../search/internal/SearchContext.java | 5 - .../search/query/QuerySearchResult.java | 2 - .../search/query/ScrollQuerySearchResult.java | 2 - .../tasks/TaskResourceTrackingService.java | 25 +-- .../AbstractSearchAsyncActionTests.java | 9 +- .../CanMatchPreFilterSearchPhaseTests.java | 15 +- .../action/search/MockSearchPhaseContext.java | 8 + .../action/search/SearchAsyncActionTests.java | 12 +- .../SearchQueryThenFetchAsyncActionTests.java | 3 +- ...earchRequestOperationsListenerSupport.java | 3 +- .../search/SearchRequestSlowLogTests.java | 15 +- .../search/SearchRequestStatsTests.java | 6 +- .../search/SearchResponseMergerTests.java | 36 ++-- .../search/TransportSearchActionTests.java | 18 +- 35 files changed, 362 insertions(+), 364 deletions(-) delete mode 100644 plugins/query-insights/src/main/java/org/opensearch/plugin/insights/core/listener/ResourceTrackingListener.java diff --git a/libs/core/src/main/java/org/opensearch/core/tasks/resourcetracker/TaskResourceInfo.java b/libs/core/src/main/java/org/opensearch/core/tasks/resourcetracker/TaskResourceInfo.java index 1c1b3d9448c07..26ef8673a3283 100644 --- a/libs/core/src/main/java/org/opensearch/core/tasks/resourcetracker/TaskResourceInfo.java +++ b/libs/core/src/main/java/org/opensearch/core/tasks/resourcetracker/TaskResourceInfo.java @@ -9,13 +9,22 @@ package org.opensearch.core.tasks.resourcetracker; import org.opensearch.common.annotation.PublicApi; +import org.opensearch.core.ParseField; +import org.opensearch.core.common.Strings; import org.opensearch.core.common.io.stream.StreamInput; import org.opensearch.core.common.io.stream.StreamOutput; import org.opensearch.core.common.io.stream.Writeable; +import org.opensearch.core.xcontent.ConstructingObjectParser; +import org.opensearch.core.xcontent.MediaTypeRegistry; +import org.opensearch.core.xcontent.ToXContentObject; +import org.opensearch.core.xcontent.XContentBuilder; +import org.opensearch.core.xcontent.XContentParser; import java.io.IOException; import java.util.Objects; +import static org.opensearch.core.xcontent.ConstructingObjectParser.constructorArg; + /** * Task resource usage information with minimal information about the task *

@@ -26,25 +35,106 @@ * @opensearch.api */ @PublicApi(since = "2.15.0") -public class TaskResourceInfo implements Writeable { - private TaskResourceUsage taskResourceUsage; - private String action; - private long taskId; - private long parentTaskId; - - public TaskResourceInfo() { - this.action = ""; - this.taskId = -1L; - this.taskResourceUsage = new TaskResourceUsage(0, 0); - } - - public TaskResourceInfo(String action, long taskId, long parentTaskId, TaskResourceUsage taskResourceUsage) { +public class TaskResourceInfo implements Writeable, ToXContentObject { + private final String action; + private final long taskId; + private final long parentTaskId; + private final String nodeId; + private final TaskResourceUsage taskResourceUsage; + + private static final ParseField ACTION = new ParseField("action"); + private static final ParseField TASK_ID = new ParseField("taskId"); + private static final ParseField PARENT_TASK_ID = new ParseField("parentTaskId"); + private static final ParseField NODE_ID = new ParseField("nodeId"); + private static final ParseField TASK_RESOURCE_USAGE = new ParseField("taskResourceUsage"); + + public TaskResourceInfo( + final String action, + final long taskId, + final long parentTaskId, + final String nodeId, + final TaskResourceUsage taskResourceUsage + ) { this.action = action; this.taskId = taskId; this.parentTaskId = parentTaskId; + this.nodeId = nodeId; this.taskResourceUsage = taskResourceUsage; } + public static final ConstructingObjectParser PARSER = new ConstructingObjectParser<>( + "task_resource_usage", + a -> new Builder().setAction((String) a[0]) + .setTaskId((Long) a[1]) + .setParentTaskId((Long) a[2]) + .setNodeId((String) a[3]) + .setTaskResourceUsage((TaskResourceUsage) a[4]) + .build() + ); + + static { + PARSER.declareString(constructorArg(), ACTION); + PARSER.declareLong(constructorArg(), TASK_ID); + PARSER.declareLong(constructorArg(), PARENT_TASK_ID); + PARSER.declareString(constructorArg(), NODE_ID); + PARSER.declareObject(constructorArg(), TaskResourceUsage.PARSER, TASK_RESOURCE_USAGE); + } + + public static TaskResourceInfo fromXContent(XContentParser parser) { + return PARSER.apply(parser, null); + } + + @Override + public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException { + builder.startObject(); + builder.field(ACTION.getPreferredName(), this.action); + builder.field(TASK_ID.getPreferredName(), this.taskId); + builder.field(PARENT_TASK_ID.getPreferredName(), this.parentTaskId); + builder.field(NODE_ID.getPreferredName(), this.nodeId); + builder.startObject(TASK_RESOURCE_USAGE.getPreferredName()); + this.taskResourceUsage.toXContent(builder, params); + builder.endObject(); + builder.endObject(); + return builder; + } + + public static class Builder { + private TaskResourceUsage taskResourceUsage; + private String action; + private long taskId; + private long parentTaskId; + private String nodeId; + + public Builder setTaskResourceUsage(final TaskResourceUsage taskResourceUsage) { + this.taskResourceUsage = taskResourceUsage; + return this; + } + + public Builder setAction(final String action) { + this.action = action; + return this; + } + + public Builder setTaskId(final long taskId) { + this.taskId = taskId; + return this; + } + + public Builder setParentTaskId(final long parentTaskId) { + this.parentTaskId = parentTaskId; + return this; + } + + public Builder setNodeId(final String nodeId) { + this.nodeId = nodeId; + return this; + } + + public TaskResourceInfo build() { + return new TaskResourceInfo(action, taskId, parentTaskId, nodeId, taskResourceUsage); + } + } + /** * Read task info from a stream. * @@ -53,16 +143,16 @@ public TaskResourceInfo(String action, long taskId, long parentTaskId, TaskResou * @throws IOException IOException */ public static TaskResourceInfo readFromStream(StreamInput in) throws IOException { - TaskResourceInfo info = new TaskResourceInfo(); - info.action = in.readString(); - info.taskId = in.readLong(); - info.taskResourceUsage = TaskResourceUsage.readFromStream(in); - info.parentTaskId = in.readLong(); - return info; + return new TaskResourceInfo.Builder().setAction(in.readString()) + .setTaskId(in.readLong()) + .setParentTaskId(in.readLong()) + .setNodeId(in.readString()) + .setTaskResourceUsage(TaskResourceUsage.readFromStream(in)) + .build(); } /** - * Get taskResourceUsage + * Get TaskResourceUsage * * @return taskResourceUsage */ @@ -70,14 +160,6 @@ public TaskResourceUsage getTaskResourceUsage() { return taskResourceUsage; } - /** - * Set taskResourceUsage - * @param taskResourceUsage the updated taskResourceUsage - */ - public void setTaskResourceUsage(TaskResourceUsage taskResourceUsage) { - this.taskResourceUsage = taskResourceUsage; - } - /** * Get parent task id * @@ -87,14 +169,6 @@ public long getParentTaskId() { return parentTaskId; } - /** - * Set parent task id - * @param parentTaskId parent task id - */ - public void setParentTaskId(long parentTaskId) { - this.parentTaskId = parentTaskId; - } - /** * Get task id * @return task id @@ -104,11 +178,11 @@ public long getTaskId() { } /** - * Set task id - * @param taskId task id + * Get node id + * @return node id */ - public void setTaskId(long taskId) { - this.taskId = taskId; + public String getNodeId() { + return nodeId; } /** @@ -119,20 +193,18 @@ public String getAction() { return action; } - /** - * Set task action - * @param action task action - */ - public void setAction(String action) { - this.action = action; - } - @Override public void writeTo(StreamOutput out) throws IOException { out.writeString(action); out.writeLong(taskId); - taskResourceUsage.writeTo(out); out.writeLong(parentTaskId); + out.writeString(nodeId); + taskResourceUsage.writeTo(out); + } + + @Override + public String toString() { + return Strings.toString(MediaTypeRegistry.JSON, this); } @Override @@ -141,11 +213,15 @@ public boolean equals(Object obj) { return false; } TaskResourceInfo other = (TaskResourceInfo) obj; - return action.equals(other.action) && taskId == other.taskId && taskResourceUsage.equals(other.taskResourceUsage); + return action.equals(other.action) + && taskId == other.taskId + && parentTaskId == other.parentTaskId + && Objects.equals(nodeId, other.nodeId) + && taskResourceUsage.equals(other.taskResourceUsage); } @Override public int hashCode() { - return Objects.hash(action, taskId, taskResourceUsage); + return Objects.hash(action, taskId, parentTaskId, nodeId, taskResourceUsage); } } diff --git a/plugins/query-insights/src/main/java/org/opensearch/plugin/insights/core/listener/ResourceTrackingListener.java b/plugins/query-insights/src/main/java/org/opensearch/plugin/insights/core/listener/ResourceTrackingListener.java deleted file mode 100644 index 1409ac8f92e5b..0000000000000 --- a/plugins/query-insights/src/main/java/org/opensearch/plugin/insights/core/listener/ResourceTrackingListener.java +++ /dev/null @@ -1,62 +0,0 @@ -/* - * SPDX-License-Identifier: Apache-2.0 - * - * The OpenSearch Contributors require contributions made to - * this file be licensed under the Apache-2.0 license or a - * compatible open source license. - */ - -package org.opensearch.plugin.insights.core.listener; - -import org.opensearch.core.tasks.resourcetracker.TaskResourceInfo; -import org.opensearch.plugin.insights.core.service.QueryInsightsService; -import org.opensearch.tasks.Task; -import org.opensearch.tasks.TaskResourceTrackingService; -import org.opensearch.tasks.TaskResourceTrackingService.TaskCompletionListener; -import org.opensearch.tasks.TaskResourceTrackingService.TaskStartListener; - -import java.util.concurrent.atomic.AtomicInteger; - -/** - * Listener for task level resource usage - */ -public class ResourceTrackingListener implements TaskCompletionListener, TaskStartListener { - private final QueryInsightsService queryInsightsService; - - /** - * Constructor of ResourceTrackingListener - * @param queryInsightsService queryInsightsService - * @param taskResourceTrackingService taskResourceTrackingService - */ - public ResourceTrackingListener(QueryInsightsService queryInsightsService, TaskResourceTrackingService taskResourceTrackingService) { - this.queryInsightsService = queryInsightsService; - taskResourceTrackingService.addTaskCompletionListener(this); - taskResourceTrackingService.addTaskStartListener(this); - } - - @Override - public void onTaskCompleted(Task task) { - TaskResourceInfo info = new TaskResourceInfo( - task.getAction(), - task.getId(), - task.getParentTaskId().getId(), - task.getTotalResourceStats() - ); - long parentTaskId = task.getParentTaskId().getId(); - if (parentTaskId == -1) { - parentTaskId = task.getId(); - } - this.queryInsightsService.taskStatusMap.get(parentTaskId).decrementAndGet(); - queryInsightsService.taskRecordsQueue.add(info); - } - - @Override - public void onTaskStarts(Task task) { - long parentTaskId = task.getParentTaskId().getId(); - if (parentTaskId == -1) { - parentTaskId = task.getId(); - } - this.queryInsightsService.taskStatusMap.putIfAbsent(parentTaskId, new AtomicInteger(0)); - this.queryInsightsService.taskStatusMap.get(parentTaskId).incrementAndGet(); - } -} diff --git a/plugins/query-insights/src/main/java/org/opensearch/plugin/insights/rules/model/SearchQueryRecord.java b/plugins/query-insights/src/main/java/org/opensearch/plugin/insights/rules/model/SearchQueryRecord.java index 279c05fefed33..fec00a680ae58 100644 --- a/plugins/query-insights/src/main/java/org/opensearch/plugin/insights/rules/model/SearchQueryRecord.java +++ b/plugins/query-insights/src/main/java/org/opensearch/plugin/insights/rules/model/SearchQueryRecord.java @@ -111,16 +111,6 @@ public void addAttribute(final Attribute attribute, final Object value) { attributes.put(attribute, value); } - /** - * Add a measurement to this record - * - * @param metricType Type of measurement to add - * @param value the value associated with the measurement - */ - public void addMeasurement(final MetricType metricType, final Number value) { - measurements.put(metricType, value); - } - @Override public XContentBuilder toXContent(final XContentBuilder builder, final ToXContent.Params params) throws IOException { builder.startObject(); diff --git a/plugins/query-insights/src/main/java/org/opensearch/plugin/insights/rules/transport/top_queries/TransportTopQueriesAction.java b/plugins/query-insights/src/main/java/org/opensearch/plugin/insights/rules/transport/top_queries/TransportTopQueriesAction.java index ddf614211bc41..7949b70a16db6 100644 --- a/plugins/query-insights/src/main/java/org/opensearch/plugin/insights/rules/transport/top_queries/TransportTopQueriesAction.java +++ b/plugins/query-insights/src/main/java/org/opensearch/plugin/insights/rules/transport/top_queries/TransportTopQueriesAction.java @@ -8,7 +8,6 @@ package org.opensearch.plugin.insights.rules.transport.top_queries; -import org.opensearch.OpenSearchException; import org.opensearch.action.FailedNodeException; import org.opensearch.action.support.ActionFilters; import org.opensearch.action.support.nodes.TransportNodesAction; @@ -21,7 +20,6 @@ import org.opensearch.plugin.insights.rules.action.top_queries.TopQueriesAction; import org.opensearch.plugin.insights.rules.action.top_queries.TopQueriesRequest; import org.opensearch.plugin.insights.rules.action.top_queries.TopQueriesResponse; -import org.opensearch.plugin.insights.rules.model.MetricType; import org.opensearch.plugin.insights.settings.QueryInsightsSettings; import org.opensearch.threadpool.ThreadPool; import org.opensearch.transport.TransportRequest; @@ -29,7 +27,6 @@ import java.io.IOException; import java.util.List; -import java.util.Locale; /** * Transport action for cluster/node level top queries information. @@ -81,17 +78,18 @@ protected TopQueriesResponse newResponse( final List responses, final List failures ) { - if (topQueriesRequest.getMetricType() == MetricType.LATENCY) { - return new TopQueriesResponse( - clusterService.getClusterName(), - responses, - failures, - clusterService.getClusterSettings().get(QueryInsightsSettings.TOP_N_LATENCY_QUERIES_SIZE), - MetricType.LATENCY - ); - } else { - throw new OpenSearchException(String.format(Locale.ROOT, "invalid metric type %s", topQueriesRequest.getMetricType())); + int size; + switch (topQueriesRequest.getMetricType()) { + case CPU: + size = clusterService.getClusterSettings().get(QueryInsightsSettings.TOP_N_CPU_QUERIES_SIZE); + break; + case MEMORY: + size = clusterService.getClusterSettings().get(QueryInsightsSettings.TOP_N_MEMORY_QUERIES_SIZE); + break; + default: + size = clusterService.getClusterSettings().get(QueryInsightsSettings.TOP_N_LATENCY_QUERIES_SIZE); } + return new TopQueriesResponse(clusterService.getClusterName(), responses, failures, size, topQueriesRequest.getMetricType()); } @Override @@ -107,15 +105,10 @@ protected TopQueries newNodeResponse(final StreamInput in) throws IOException { @Override protected TopQueries nodeOperation(final NodeRequest nodeRequest) { final TopQueriesRequest request = nodeRequest.request; - if (request.getMetricType() == MetricType.LATENCY) { - return new TopQueries( - clusterService.localNode(), - queryInsightsService.getTopQueriesService(MetricType.LATENCY).getTopQueriesRecords(true) - ); - } else { - throw new OpenSearchException(String.format(Locale.ROOT, "invalid metric type %s", request.getMetricType())); - } - + return new TopQueries( + clusterService.localNode(), + queryInsightsService.getTopQueriesService(request.getMetricType()).getTopQueriesRecords(true) + ); } /** diff --git a/server/src/main/java/org/opensearch/action/search/AbstractSearchAsyncAction.java b/server/src/main/java/org/opensearch/action/search/AbstractSearchAsyncAction.java index b54c20cbbb828..2e234e89e3f0e 100644 --- a/server/src/main/java/org/opensearch/action/search/AbstractSearchAsyncAction.java +++ b/server/src/main/java/org/opensearch/action/search/AbstractSearchAsyncAction.java @@ -48,10 +48,10 @@ import org.opensearch.common.lease.Releasables; import org.opensearch.common.util.concurrent.AbstractRunnable; import org.opensearch.common.util.concurrent.AtomicArray; +import org.opensearch.common.util.concurrent.ThreadContext; import org.opensearch.core.action.ActionListener; import org.opensearch.core.action.ShardOperationFailedException; import org.opensearch.core.index.shard.ShardId; -import org.opensearch.core.tasks.resourcetracker.TaskResourceInfo; import org.opensearch.search.SearchPhaseResult; import org.opensearch.search.SearchShardTarget; import org.opensearch.search.internal.AliasFilter; @@ -79,6 +79,8 @@ import java.util.function.BiFunction; import java.util.stream.Collectors; +import static org.opensearch.tasks.TaskResourceTrackingService.TASK_RESOURCE_USAGE; + /** * This is an abstract base class that encapsulates the logic to fan out to all shards in provided {@link GroupShardsIterator} * and collect the results. If a shard request returns a failure this class handles the advance to the next replica of the shard until @@ -619,9 +621,18 @@ protected void onShardResult(Result result, SearchShardIterator shardIt) { if (logger.isTraceEnabled()) { logger.trace("got first-phase result from {}", result != null ? result.getSearchShardTarget() : null); } + this.setPhaseResourceUsages(); results.consumeResult(result, () -> onShardResultConsumed(result, shardIt)); } + public void setPhaseResourceUsages() { + ThreadContext threadContext = searchRequestContext.getThreadContextSupplier().get(); + List taskResourceUsages = threadContext.getResponseHeaders().get(TASK_RESOURCE_USAGE); + if (taskResourceUsages != null && taskResourceUsages.size() > 0) { + searchRequestContext.recordPhaseResourceUsage(taskResourceUsages.get(0)); + } + } + private void onShardResultConsumed(Result result, SearchShardIterator shardIt) { successfulOps.incrementAndGet(); // clean a previous error on this shard group (note, this code will be serialized on the same shardIndex value level @@ -845,16 +856,6 @@ public final ShardSearchRequest buildShardSearchRequest(SearchShardIterator shar return shardRequest; } - @Override - public List getPhaseResourceUsageFromResults() { - return results.getAtomicArray() - .asList() - .stream() - .filter(a -> a.remoteAddress() != null) - .map(SearchPhaseResult::getTaskResourceInfo) - .collect(Collectors.toList()); - } - /** * Returns the next phase based on the results of the initial search phase * @param results the results of the initial search phase. Each non null element in the result array represent a successfully diff --git a/server/src/main/java/org/opensearch/action/search/DfsQueryPhase.java b/server/src/main/java/org/opensearch/action/search/DfsQueryPhase.java index 6e0e705df1a45..6fe4eaabd6d17 100644 --- a/server/src/main/java/org/opensearch/action/search/DfsQueryPhase.java +++ b/server/src/main/java/org/opensearch/action/search/DfsQueryPhase.java @@ -32,7 +32,6 @@ package org.opensearch.action.search; import org.apache.logging.log4j.message.ParameterizedMessage; -import org.opensearch.core.tasks.resourcetracker.TaskResourceInfo; import org.opensearch.search.SearchPhaseResult; import org.opensearch.search.SearchShardTarget; import org.opensearch.search.dfs.AggregatedDfs; @@ -44,7 +43,6 @@ import java.io.IOException; import java.util.List; import java.util.function.Function; -import java.util.stream.Collectors; /** * This search phase fans out to every shards to execute a distributed search with a pre-collected distributed frequencies for all @@ -85,14 +83,6 @@ final class DfsQueryPhase extends SearchPhase { context.addReleasable(queryResult); } - @Override - public List getPhaseResourceUsageFromResults() { - return searchResults.stream() - .filter(a -> a.remoteAddress() != null) - .map(SearchPhaseResult::getTaskResourceInfo) - .collect(Collectors.toList()); - } - @Override public void run() throws IOException { // TODO we can potentially also consume the actual per shard results from the initial phase here in the aggregateDfs diff --git a/server/src/main/java/org/opensearch/action/search/ExpandSearchPhase.java b/server/src/main/java/org/opensearch/action/search/ExpandSearchPhase.java index b65c96f0757d7..e249fb239dae9 100644 --- a/server/src/main/java/org/opensearch/action/search/ExpandSearchPhase.java +++ b/server/src/main/java/org/opensearch/action/search/ExpandSearchPhase.java @@ -34,7 +34,6 @@ import org.opensearch.common.util.concurrent.AtomicArray; import org.opensearch.core.action.ActionListener; -import org.opensearch.core.tasks.resourcetracker.TaskResourceInfo; import org.opensearch.index.query.BoolQueryBuilder; import org.opensearch.index.query.InnerHitBuilder; import org.opensearch.index.query.QueryBuilder; @@ -49,7 +48,6 @@ import java.util.HashMap; import java.util.Iterator; import java.util.List; -import java.util.stream.Collectors; /** * This search phase is an optional phase that will be executed once all hits are fetched from the shards that executes @@ -80,15 +78,6 @@ private boolean isCollapseRequest() { && searchRequest.source().collapse().getInnerHits().isEmpty() == false; } - @Override - public List getPhaseResourceUsageFromResults() { - return queryResults.asList() - .stream() - .filter(a -> a.remoteAddress() != null) - .map(SearchPhaseResult::getTaskResourceInfo) - .collect(Collectors.toList()); - } - @Override public void run() { if (isCollapseRequest() && searchResponse.hits().getHits().length > 0) { diff --git a/server/src/main/java/org/opensearch/action/search/FetchSearchPhase.java b/server/src/main/java/org/opensearch/action/search/FetchSearchPhase.java index 4729ab0def195..eb71a302e8e78 100644 --- a/server/src/main/java/org/opensearch/action/search/FetchSearchPhase.java +++ b/server/src/main/java/org/opensearch/action/search/FetchSearchPhase.java @@ -37,7 +37,6 @@ import org.opensearch.action.OriginalIndices; import org.opensearch.common.util.concurrent.AbstractRunnable; import org.opensearch.common.util.concurrent.AtomicArray; -import org.opensearch.core.tasks.resourcetracker.TaskResourceInfo; import org.opensearch.search.RescoreDocIds; import org.opensearch.search.SearchPhaseResult; import org.opensearch.search.SearchShardTarget; @@ -52,7 +51,6 @@ import java.util.List; import java.util.function.BiFunction; -import java.util.stream.Collectors; /** * This search phase merges the query results from the previous phase together and calculates the topN hits for this search. @@ -113,16 +111,6 @@ final class FetchSearchPhase extends SearchPhase { this.progressListener = context.getTask().getProgressListener(); } - @Override - public List getPhaseResourceUsageFromResults() { - return fetchResults.getAtomicArray() - .asList() - .stream() - .filter(a -> a.remoteAddress() != null) - .map(SearchPhaseResult::getTaskResourceInfo) - .collect(Collectors.toList()); - } - @Override public void run() { context.execute(new AbstractRunnable() { @@ -252,6 +240,7 @@ private void executeFetch( public void innerOnResponse(FetchSearchResult result) { try { progressListener.notifyFetchResult(shardIndex); + context.setPhaseResourceUsages(); counter.onResult(result); } catch (Exception e) { context.onPhaseFailure(FetchSearchPhase.this, "", e); diff --git a/server/src/main/java/org/opensearch/action/search/SearchPhase.java b/server/src/main/java/org/opensearch/action/search/SearchPhase.java index 1f47f6f35548e..0890e9f5de8d4 100644 --- a/server/src/main/java/org/opensearch/action/search/SearchPhase.java +++ b/server/src/main/java/org/opensearch/action/search/SearchPhase.java @@ -33,10 +33,8 @@ import org.opensearch.common.CheckedRunnable; import org.opensearch.common.annotation.PublicApi; -import org.opensearch.core.tasks.resourcetracker.TaskResourceInfo; import java.io.IOException; -import java.util.List; import java.util.Locale; import java.util.Objects; @@ -50,10 +48,6 @@ public abstract class SearchPhase implements CheckedRunnable { private final String name; private long startTimeInNanos; - public List getPhaseResourceUsageFromResults() { - return List.of(); - } - protected SearchPhase(String name) { this.name = Objects.requireNonNull(name, "name must not be null"); } diff --git a/server/src/main/java/org/opensearch/action/search/SearchPhaseContext.java b/server/src/main/java/org/opensearch/action/search/SearchPhaseContext.java index df451e0745e3c..55f2a22749e70 100644 --- a/server/src/main/java/org/opensearch/action/search/SearchPhaseContext.java +++ b/server/src/main/java/org/opensearch/action/search/SearchPhaseContext.java @@ -150,4 +150,9 @@ default void sendReleaseSearchContext( * Registers a {@link Releasable} that will be closed when the search request finishes or fails. */ void addReleasable(Releasable releasable); + + /** + * Set the resource usage info for this phase + */ + void setPhaseResourceUsages(); } diff --git a/server/src/main/java/org/opensearch/action/search/SearchRequestContext.java b/server/src/main/java/org/opensearch/action/search/SearchRequestContext.java index 5b133ba0554f4..389109c1860ac 100644 --- a/server/src/main/java/org/opensearch/action/search/SearchRequestContext.java +++ b/server/src/main/java/org/opensearch/action/search/SearchRequestContext.java @@ -8,13 +8,27 @@ package org.opensearch.action.search; +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; import org.apache.lucene.search.TotalHits; import org.opensearch.common.annotation.InternalApi; - +import org.opensearch.common.util.concurrent.ThreadContext; +import org.opensearch.common.xcontent.XContentHelper; +import org.opensearch.core.common.bytes.BytesArray; +import org.opensearch.core.tasks.resourcetracker.TaskResourceInfo; +import org.opensearch.core.xcontent.DeprecationHandler; +import org.opensearch.core.xcontent.MediaTypeRegistry; +import org.opensearch.core.xcontent.NamedXContentRegistry; +import org.opensearch.core.xcontent.XContentParser; + +import java.io.IOException; +import java.util.ArrayList; import java.util.EnumMap; import java.util.HashMap; +import java.util.List; import java.util.Locale; import java.util.Map; +import java.util.function.Supplier; /** * This class holds request-level context for search queries at the coordinator node @@ -23,6 +37,7 @@ */ @InternalApi public class SearchRequestContext { + private static final Logger logger = LogManager.getLogger(); private final SearchRequestOperationsListener searchRequestOperationsListener; private long absoluteStartNanos; private final Map phaseTookMap; @@ -30,13 +45,21 @@ public class SearchRequestContext { private final EnumMap shardStats; private final SearchRequest searchRequest; - - SearchRequestContext(final SearchRequestOperationsListener searchRequestOperationsListener, final SearchRequest searchRequest) { + private final List phaseResourceUsage; + private final Supplier threadContextSupplier; + + SearchRequestContext( + final SearchRequestOperationsListener searchRequestOperationsListener, + final SearchRequest searchRequest, + final Supplier threadContextSupplier + ) { this.searchRequestOperationsListener = searchRequestOperationsListener; this.absoluteStartNanos = System.nanoTime(); this.phaseTookMap = new HashMap<>(); this.shardStats = new EnumMap<>(ShardStatsFieldNames.class); this.searchRequest = searchRequest; + this.phaseResourceUsage = new ArrayList<>(); + this.threadContextSupplier = threadContextSupplier; } SearchRequestOperationsListener getSearchRequestOperationsListener() { @@ -111,6 +134,28 @@ String formattedShardStats() { public SearchRequest getRequest() { return searchRequest; } + + public Supplier getThreadContextSupplier() { + return threadContextSupplier; + } + + public void recordPhaseResourceUsage(String usage) { + try { + XContentParser parser = XContentHelper.createParser( + NamedXContentRegistry.EMPTY, + DeprecationHandler.THROW_UNSUPPORTED_OPERATION, + new BytesArray(usage), + MediaTypeRegistry.JSON + ); + this.phaseResourceUsage.add(TaskResourceInfo.PARSER.apply(parser, null)); + } catch (IOException e) { + logger.debug("fail to parse phase resource usages: ", e); + } + } + + public List getPhaseResourceUsage() { + return phaseResourceUsage; + } } enum ShardStatsFieldNames { diff --git a/server/src/main/java/org/opensearch/action/search/TransportCreatePitAction.java b/server/src/main/java/org/opensearch/action/search/TransportCreatePitAction.java index 81e8f7db47615..baa113997f243 100644 --- a/server/src/main/java/org/opensearch/action/search/TransportCreatePitAction.java +++ b/server/src/main/java/org/opensearch/action/search/TransportCreatePitAction.java @@ -121,14 +121,12 @@ public CreateReaderContextResponse(ShardSearchContextId shardSearchContextId) { public CreateReaderContextResponse(StreamInput in) throws IOException { super(in); contextId = new ShardSearchContextId(in); - readResourceUsage(in); } @Override public void writeTo(StreamOutput out) throws IOException { super.writeTo(out); contextId.writeTo(out); - writeResourceUsage(out); } } diff --git a/server/src/main/java/org/opensearch/action/search/TransportSearchAction.java b/server/src/main/java/org/opensearch/action/search/TransportSearchAction.java index 143b01af3f62f..9ae1dde5312fd 100644 --- a/server/src/main/java/org/opensearch/action/search/TransportSearchAction.java +++ b/server/src/main/java/org/opensearch/action/search/TransportSearchAction.java @@ -451,7 +451,11 @@ private void executeRequest( logger, TraceableSearchRequestOperationsListener.create(tracer, requestSpan) ); - SearchRequestContext searchRequestContext = new SearchRequestContext(requestOperationsListeners, originalSearchRequest); + SearchRequestContext searchRequestContext = new SearchRequestContext( + requestOperationsListeners, + originalSearchRequest, + threadPool::getThreadContext + ); searchRequestContext.getSearchRequestOperationsListener().onRequestStart(searchRequestContext); PipelinedRequest searchRequest; diff --git a/server/src/main/java/org/opensearch/cluster/service/ClusterService.java b/server/src/main/java/org/opensearch/cluster/service/ClusterService.java index 8898ae340f3af..fec0fc074b3f1 100644 --- a/server/src/main/java/org/opensearch/cluster/service/ClusterService.java +++ b/server/src/main/java/org/opensearch/cluster/service/ClusterService.java @@ -262,24 +262,6 @@ public IndexingPressureService getIndexingPressureService() { return indexingPressureService; } - /** - * Getter for {@link TaskResourceTrackingService}, This method exposes task level resource usage for other plugins to use. - * - * @return TaskResourceTrackingService - */ - public TaskResourceTrackingService getTaskResourceTrackingService() { - return taskResourceTrackingService; - } - - /** - * Setter for {@link TaskResourceTrackingService} - * - * @param taskResourceTrackingService taskResourceTrackingService - */ - public void setTaskResourceTrackingService(TaskResourceTrackingService taskResourceTrackingService) { - this.taskResourceTrackingService = taskResourceTrackingService; - } - public ClusterApplierService getClusterApplierService() { return clusterApplierService; } diff --git a/server/src/main/java/org/opensearch/common/util/concurrent/ThreadContext.java b/server/src/main/java/org/opensearch/common/util/concurrent/ThreadContext.java index 6580b0e0085ef..89bed9894790c 100644 --- a/server/src/main/java/org/opensearch/common/util/concurrent/ThreadContext.java +++ b/server/src/main/java/org/opensearch/common/util/concurrent/ThreadContext.java @@ -483,6 +483,18 @@ public void addResponseHeader(final String key, final String value) { addResponseHeader(key, value, v -> v); } + /** + * Remove the {@code value} for the specified {@code key}. + * + * @param key the header name + */ + public void removeResponseHeader(final String key) { + ThreadContextStruct threadContextStruct = threadLocal.get(); + if (threadContextStruct.responseHeaders != null) { + threadContextStruct.responseHeaders.remove(key); + } + } + /** * Add the {@code value} for the specified {@code key} with the specified {@code uniqueValue} used for de-duplication. Any duplicate * {@code value} after applying {@code uniqueValue} is ignored. diff --git a/server/src/main/java/org/opensearch/search/SearchPhaseResult.java b/server/src/main/java/org/opensearch/search/SearchPhaseResult.java index dffe49a5d8889..a351b3bd2dda6 100644 --- a/server/src/main/java/org/opensearch/search/SearchPhaseResult.java +++ b/server/src/main/java/org/opensearch/search/SearchPhaseResult.java @@ -36,22 +36,13 @@ import org.opensearch.common.annotation.PublicApi; import org.opensearch.core.common.io.stream.StreamInput; import org.opensearch.core.common.io.stream.StreamOutput; -import org.opensearch.core.tasks.resourcetracker.ResourceStats; -import org.opensearch.core.tasks.resourcetracker.ResourceUsageInfo; -import org.opensearch.core.tasks.resourcetracker.ResourceUsageMetric; -import org.opensearch.core.tasks.resourcetracker.TaskResourceInfo; -import org.opensearch.core.tasks.resourcetracker.TaskResourceUsage; import org.opensearch.core.transport.TransportResponse; import org.opensearch.search.fetch.FetchSearchResult; -import org.opensearch.search.internal.SearchContext; import org.opensearch.search.internal.ShardSearchContextId; import org.opensearch.search.internal.ShardSearchRequest; import org.opensearch.search.query.QuerySearchResult; -import org.opensearch.tasks.TaskResourceTrackingService; import java.io.IOException; -import java.util.HashMap; -import java.util.Map; /** * This class is a base class for all search related results. It contains the shard target it @@ -71,8 +62,6 @@ public abstract class SearchPhaseResult extends TransportResponse { protected ShardSearchContextId contextId; private ShardSearchRequest shardSearchRequest; private RescoreDocIds rescoreDocIds = RescoreDocIds.EMPTY; - private TaskResourceInfo taskResourceInfo = new TaskResourceInfo(); - private final Map taskResourceUsageStartValues = new HashMap<>(); protected SearchPhaseResult() { @@ -148,42 +137,4 @@ public void setRescoreDocIds(RescoreDocIds rescoreDocIds) { public void writeTo(StreamOutput out) throws IOException { // TODO: this seems wrong, SearchPhaseResult should have a writeTo? } - - /** - * Get task resource usage info - * @return TaskResourceInfo - */ - public TaskResourceInfo getTaskResourceInfo() { - return taskResourceInfo; - } - - protected void readResourceUsage(StreamInput in) throws IOException { - taskResourceInfo = TaskResourceInfo.readFromStream(in); - } - - protected void writeResourceUsage(StreamOutput out) throws IOException { - ResourceUsageMetric[] usages = TaskResourceTrackingService.getResourceUsageMetricsForThread(Thread.currentThread().getId()); - long cpu = 0, mem = 0; - for (ResourceUsageMetric usage : usages) { - if (usage.getStats() == ResourceStats.MEMORY && taskResourceUsageStartValues.containsKey(ResourceStats.MEMORY)) { - mem = usage.getValue() - taskResourceUsageStartValues.get(ResourceStats.MEMORY); - } else if (usage.getStats() == ResourceStats.CPU && taskResourceUsageStartValues.containsKey(ResourceStats.CPU)) { - cpu = usage.getValue() - taskResourceUsageStartValues.get(ResourceStats.CPU); - } - } - taskResourceInfo.setTaskResourceUsage(new TaskResourceUsage(cpu, mem)); - taskResourceInfo.writeTo(out); - } - - protected void injectInitialResourceUsage(SearchContext context) { - Map usageInfo = context.usageInfo; - // initial resource usage when the task starts - for (Map.Entry entry : usageInfo.entrySet()) { - taskResourceUsageStartValues.put(entry.getKey(), entry.getValue().getStartValue()); - } - // inject metadata related to the task - taskResourceInfo.setAction(context.getTask().getAction()); - taskResourceInfo.setTaskId(context.getTask().getId()); - taskResourceInfo.setParentTaskId(context.getTask().getParentTaskId().getId()); - } } diff --git a/server/src/main/java/org/opensearch/search/SearchService.java b/server/src/main/java/org/opensearch/search/SearchService.java index 8d2f4ff699479..4f01cecf51326 100644 --- a/server/src/main/java/org/opensearch/search/SearchService.java +++ b/server/src/main/java/org/opensearch/search/SearchService.java @@ -73,7 +73,12 @@ import org.opensearch.core.index.Index; import org.opensearch.core.index.shard.ShardId; import org.opensearch.core.indices.breaker.CircuitBreakerService; +import org.opensearch.core.tasks.resourcetracker.ResourceStats; import org.opensearch.core.tasks.resourcetracker.ResourceStatsType; +import org.opensearch.core.tasks.resourcetracker.ResourceUsageInfo; +import org.opensearch.core.tasks.resourcetracker.ResourceUsageMetric; +import org.opensearch.core.tasks.resourcetracker.TaskResourceInfo; +import org.opensearch.core.tasks.resourcetracker.TaskResourceUsage; import org.opensearch.index.IndexNotFoundException; import org.opensearch.index.IndexService; import org.opensearch.index.IndexSettings; @@ -138,6 +143,7 @@ import org.opensearch.search.sort.SortOrder; import org.opensearch.search.suggest.Suggest; import org.opensearch.search.suggest.completion.CompletionSuggestion; +import org.opensearch.tasks.TaskResourceTrackingService; import org.opensearch.threadpool.Scheduler.Cancellable; import org.opensearch.threadpool.ThreadPool; import org.opensearch.threadpool.ThreadPool.Names; @@ -161,6 +167,7 @@ import static org.opensearch.common.unit.TimeValue.timeValueHours; import static org.opensearch.common.unit.TimeValue.timeValueMillis; import static org.opensearch.common.unit.TimeValue.timeValueMinutes; +import static org.opensearch.tasks.TaskResourceTrackingService.TASK_RESOURCE_USAGE; /** * The main search service @@ -554,7 +561,7 @@ private DfsSearchResult executeDfsPhase(ShardSearchRequest request, SearchShardT SearchContext context = createContext(readerContext, request, task, true) ) { dfsPhase.execute(context); - context.dfsResult().injectInitialResourceUsage(context); + writeTaskResourceUsage(task); return context.dfsResult(); } catch (Exception e) { logger.trace("Dfs phase failed", e); @@ -650,7 +657,7 @@ private SearchPhaseResult executeQueryPhase(ShardSearchRequest request, SearchSh final RescoreDocIds rescoreDocIds = context.rescoreDocIds(); context.queryResult().setRescoreDocIds(rescoreDocIds); readerContext.setRescoreDocIds(rescoreDocIds); - context.queryResult().injectInitialResourceUsage(context); + writeTaskResourceUsage(task); return context.queryResult(); } } catch (Exception e) { @@ -676,7 +683,7 @@ private QueryFetchSearchResult executeFetchPhase(ReaderContext reader, SearchCon executor.success(); } QueryFetchSearchResult result = new QueryFetchSearchResult(context.queryResult(), context.fetchResult()); - result.injectInitialResourceUsage(context); + writeTaskResourceUsage(context.getTask()); return result; } @@ -709,7 +716,7 @@ public void executeQueryPhase( searchContext.queryResult(), searchContext.shardTarget() ); - scrollQuerySearchResult.injectInitialResourceUsage(searchContext); + writeTaskResourceUsage(task); return scrollQuerySearchResult; } catch (Exception e) { logger.trace("Query phase failed", e); @@ -741,7 +748,7 @@ public void executeQueryPhase(QuerySearchRequest request, SearchShardTask task, final RescoreDocIds rescoreDocIds = searchContext.rescoreDocIds(); searchContext.queryResult().setRescoreDocIds(rescoreDocIds); readerContext.setRescoreDocIds(rescoreDocIds); - searchContext.queryResult().injectInitialResourceUsage(searchContext); + writeTaskResourceUsage(task); return searchContext.queryResult(); } catch (Exception e) { assert TransportActions.isShardNotAvailableException(e) == false : new AssertionError(e); @@ -795,7 +802,7 @@ public void executeFetchPhase( fetchSearchResult, searchContext.shardTarget() ); - scrollQueryFetchSearchResult.injectInitialResourceUsage(searchContext); + writeTaskResourceUsage(task); return scrollQueryFetchSearchResult; } catch (Exception e) { assert TransportActions.isShardNotAvailableException(e) == false : new AssertionError(e); @@ -827,7 +834,7 @@ public void executeFetchPhase(ShardFetchRequest request, SearchShardTask task, A } executor.success(); } - searchContext.fetchResult().injectInitialResourceUsage(searchContext); + writeTaskResourceUsage(task); return searchContext.fetchResult(); } catch (Exception e) { assert TransportActions.isShardNotAvailableException(e) == false : new AssertionError(e); @@ -1051,9 +1058,6 @@ final SearchContext createContext( context.setTask(task); // pre process queryPhase.preProcess(context); - context.usageInfo = task.getActiveThreadResourceInfo(Thread.currentThread().getId(), ResourceStatsType.WORKER_STATS) - .getResourceUsageInfo() - .getStatsInfo(); } catch (Exception e) { context.close(); throw e; @@ -1134,6 +1138,45 @@ private DefaultSearchContext createSearchContext(ReaderContext reader, ShardSear return searchContext; } + protected void writeTaskResourceUsage(SearchShardTask task) { + // Get resource usages when task starts + Map startValues = task.getActiveThreadResourceInfo( + Thread.currentThread().getId(), + ResourceStatsType.WORKER_STATS + ).getResourceUsageInfo().getStatsInfo(); + + // Get current resource usages + ResourceUsageMetric[] endValues = TaskResourceTrackingService.getResourceUsageMetricsForThread(Thread.currentThread().getId()); + long cpu = -1, mem = -1; + for (ResourceUsageMetric endValue : endValues) { + if (endValue.getStats() == ResourceStats.MEMORY) { + mem = endValue.getValue(); + } else if (endValue.getStats() == ResourceStats.CPU) { + cpu = endValue.getValue(); + } + } + if (cpu == -1 || mem == -1) { + logger.debug("Invalid resource usage value, cpu [{}], memory [{}]: ", cpu, mem); + return; + } + + // initial resource usages when the task started + TaskResourceInfo taskResourceInfo = new TaskResourceInfo.Builder().setAction(task.getAction()) + .setTaskId(task.getId()) + .setParentTaskId(task.getParentTaskId().getId()) + .setNodeId(clusterService.localNode().getId()) + .setTaskResourceUsage( + new TaskResourceUsage( + cpu - startValues.get(ResourceStats.CPU).getStartValue(), + mem - startValues.get(ResourceStats.MEMORY).getStartValue() + ) + ) + .build(); + + threadPool.getThreadContext().removeResponseHeader(TASK_RESOURCE_USAGE); + threadPool.getThreadContext().addResponseHeader(TASK_RESOURCE_USAGE, taskResourceInfo.toString()); + } + private void freeAllContextForIndex(Index index) { assert index != null; for (ReaderContext ctx : activeReaders.values()) { @@ -1759,7 +1802,6 @@ public CanMatchResponse(StreamInput in) throws IOException { super(in); this.canMatch = in.readBoolean(); this.estimatedMinAndMax = in.readOptionalWriteable(MinAndMax::new); - readResourceUsage(in); } public CanMatchResponse(boolean canMatch, MinAndMax estimatedMinAndMax) { @@ -1772,8 +1814,6 @@ public void writeTo(StreamOutput out) throws IOException { super.writeTo(out); out.writeBoolean(canMatch); out.writeOptionalWriteable(estimatedMinAndMax); - - writeResourceUsage(out); } public boolean canMatch() { diff --git a/server/src/main/java/org/opensearch/search/dfs/DfsSearchResult.java b/server/src/main/java/org/opensearch/search/dfs/DfsSearchResult.java index 3cdd078c2d700..2338a47435012 100644 --- a/server/src/main/java/org/opensearch/search/dfs/DfsSearchResult.java +++ b/server/src/main/java/org/opensearch/search/dfs/DfsSearchResult.java @@ -81,7 +81,6 @@ public DfsSearchResult(StreamInput in) throws IOException { maxDoc = in.readVInt(); setShardSearchRequest(in.readOptionalWriteable(ShardSearchRequest::new)); - readResourceUsage(in); } public DfsSearchResult(ShardSearchContextId contextId, SearchShardTarget shardTarget, ShardSearchRequest shardSearchRequest) { @@ -134,7 +133,6 @@ public void writeTo(StreamOutput out) throws IOException { writeFieldStats(out, fieldStatistics); out.writeVInt(maxDoc); out.writeOptionalWriteable(getShardSearchRequest()); - writeResourceUsage(out); } public static void writeFieldStats(StreamOutput out, final Map fieldStatistics) throws IOException { diff --git a/server/src/main/java/org/opensearch/search/fetch/FetchSearchResult.java b/server/src/main/java/org/opensearch/search/fetch/FetchSearchResult.java index f6be61dbb7659..26fa90141c2a9 100644 --- a/server/src/main/java/org/opensearch/search/fetch/FetchSearchResult.java +++ b/server/src/main/java/org/opensearch/search/fetch/FetchSearchResult.java @@ -62,7 +62,6 @@ public FetchSearchResult(StreamInput in) throws IOException { super(in); contextId = new ShardSearchContextId(in); hits = new SearchHits(in); - readResourceUsage(in); } public FetchSearchResult(ShardSearchContextId id, SearchShardTarget shardTarget) { @@ -109,6 +108,5 @@ public int counterGetAndIncrement() { public void writeTo(StreamOutput out) throws IOException { contextId.writeTo(out); hits.writeTo(out); - writeResourceUsage(out); } } diff --git a/server/src/main/java/org/opensearch/search/fetch/QueryFetchSearchResult.java b/server/src/main/java/org/opensearch/search/fetch/QueryFetchSearchResult.java index 626fc3b408918..ce4c59fc77489 100644 --- a/server/src/main/java/org/opensearch/search/fetch/QueryFetchSearchResult.java +++ b/server/src/main/java/org/opensearch/search/fetch/QueryFetchSearchResult.java @@ -55,7 +55,6 @@ public QueryFetchSearchResult(StreamInput in) throws IOException { super(in); queryResult = new QuerySearchResult(in); fetchResult = new FetchSearchResult(in); - readResourceUsage(in); } public QueryFetchSearchResult(QuerySearchResult queryResult, FetchSearchResult fetchResult) { @@ -101,6 +100,5 @@ public FetchSearchResult fetchResult() { public void writeTo(StreamOutput out) throws IOException { queryResult.writeTo(out); fetchResult.writeTo(out); - writeResourceUsage(out); } } diff --git a/server/src/main/java/org/opensearch/search/fetch/ScrollQueryFetchSearchResult.java b/server/src/main/java/org/opensearch/search/fetch/ScrollQueryFetchSearchResult.java index 2e2f0914c0d9c..415350b4c5dc7 100644 --- a/server/src/main/java/org/opensearch/search/fetch/ScrollQueryFetchSearchResult.java +++ b/server/src/main/java/org/opensearch/search/fetch/ScrollQueryFetchSearchResult.java @@ -54,7 +54,6 @@ public ScrollQueryFetchSearchResult(StreamInput in) throws IOException { SearchShardTarget searchShardTarget = new SearchShardTarget(in); result = new QueryFetchSearchResult(in); setSearchShardTarget(searchShardTarget); - readResourceUsage(in); } public ScrollQueryFetchSearchResult(QueryFetchSearchResult result, SearchShardTarget shardTarget) { @@ -92,6 +91,5 @@ public FetchSearchResult fetchResult() { public void writeTo(StreamOutput out) throws IOException { getSearchShardTarget().writeTo(out); result.writeTo(out); - writeResourceUsage(out); } } diff --git a/server/src/main/java/org/opensearch/search/internal/SearchContext.java b/server/src/main/java/org/opensearch/search/internal/SearchContext.java index 3564322c8e349..0c8240d3a8322 100644 --- a/server/src/main/java/org/opensearch/search/internal/SearchContext.java +++ b/server/src/main/java/org/opensearch/search/internal/SearchContext.java @@ -43,8 +43,6 @@ import org.opensearch.common.lease.Releasables; import org.opensearch.common.unit.TimeValue; import org.opensearch.common.util.BigArrays; -import org.opensearch.core.tasks.resourcetracker.ResourceStats; -import org.opensearch.core.tasks.resourcetracker.ResourceUsageInfo; import org.opensearch.index.cache.bitset.BitsetFilterCache; import org.opensearch.index.mapper.MappedFieldType; import org.opensearch.index.mapper.MapperService; @@ -81,7 +79,6 @@ import org.opensearch.search.suggest.SuggestionSearchContext; import java.util.Collection; -import java.util.EnumMap; import java.util.HashMap; import java.util.List; import java.util.Map; @@ -130,8 +127,6 @@ public List toInternalAggregations(Collection co private volatile boolean searchTimedOut; - public Map usageInfo = new EnumMap<>(ResourceStats.class); - protected SearchContext() {} public abstract void setTask(SearchShardTask task); diff --git a/server/src/main/java/org/opensearch/search/query/QuerySearchResult.java b/server/src/main/java/org/opensearch/search/query/QuerySearchResult.java index a2a97c9a3c374..f3ac953ab9d1d 100644 --- a/server/src/main/java/org/opensearch/search/query/QuerySearchResult.java +++ b/server/src/main/java/org/opensearch/search/query/QuerySearchResult.java @@ -101,7 +101,6 @@ public QuerySearchResult(StreamInput in) throws IOException { ShardSearchContextId id = new ShardSearchContextId(in); readFromWithId(id, in); } - readResourceUsage(in); } public QuerySearchResult(ShardSearchContextId contextId, SearchShardTarget shardTarget, ShardSearchRequest shardSearchRequest) { @@ -376,7 +375,6 @@ public void writeTo(StreamOutput out) throws IOException { contextId.writeTo(out); writeToNoId(out); } - writeResourceUsage(out); } public void writeToNoId(StreamOutput out) throws IOException { diff --git a/server/src/main/java/org/opensearch/search/query/ScrollQuerySearchResult.java b/server/src/main/java/org/opensearch/search/query/ScrollQuerySearchResult.java index bfc8e94e50e13..0cdc8749253f0 100644 --- a/server/src/main/java/org/opensearch/search/query/ScrollQuerySearchResult.java +++ b/server/src/main/java/org/opensearch/search/query/ScrollQuerySearchResult.java @@ -53,7 +53,6 @@ public ScrollQuerySearchResult(StreamInput in) throws IOException { SearchShardTarget shardTarget = new SearchShardTarget(in); result = new QuerySearchResult(in); setSearchShardTarget(shardTarget); - readResourceUsage(in); } public ScrollQuerySearchResult(QuerySearchResult result, SearchShardTarget shardTarget) { @@ -82,6 +81,5 @@ public QuerySearchResult queryResult() { public void writeTo(StreamOutput out) throws IOException { getSearchShardTarget().writeTo(out); result.writeTo(out); - writeResourceUsage(out); } } diff --git a/server/src/main/java/org/opensearch/tasks/TaskResourceTrackingService.java b/server/src/main/java/org/opensearch/tasks/TaskResourceTrackingService.java index d843948071368..0ea27b16f19ae 100644 --- a/server/src/main/java/org/opensearch/tasks/TaskResourceTrackingService.java +++ b/server/src/main/java/org/opensearch/tasks/TaskResourceTrackingService.java @@ -53,12 +53,12 @@ public class TaskResourceTrackingService implements RunnableTaskExecutionListene Setting.Property.NodeScope ); public static final String TASK_ID = "TASK_ID"; + public static final String TASK_RESOURCE_USAGE = "TASK_RESOURCE_USAGE"; private static final ThreadMXBean threadMXBean = (ThreadMXBean) ManagementFactory.getThreadMXBean(); private final ConcurrentMapLong resourceAwareTasks = ConcurrentCollections.newConcurrentMapLongWithAggressiveConcurrency(); private final List taskCompletionListeners = new ArrayList<>(); - private final List taskStartListeners = new ArrayList<>(); private final ThreadPool threadPool; private volatile boolean taskResourceTrackingEnabled; @@ -99,17 +99,6 @@ public ThreadContext.StoredContext startTracking(Task task) { logger.debug("Starting resource tracking for task: {}", task.getId()); resourceAwareTasks.put(task.getId(), task); - - List exceptions = new ArrayList<>(); - for (TaskStartListener listener : taskStartListeners) { - try { - listener.onTaskStarts(task); - } catch (Exception e) { - exceptions.add(e); - } - } - ExceptionsHelper.maybeThrowRuntimeAndSuppress(exceptions); - return addTaskIdToThreadContext(task); } @@ -283,19 +272,7 @@ public interface TaskCompletionListener { void onTaskCompleted(Task task); } - /** - * Listener that gets invoked when a task execution starts. - */ - @PublicApi(since = "2.2.0") - public interface TaskStartListener { - void onTaskStarts(Task task); - } - public void addTaskCompletionListener(TaskCompletionListener listener) { this.taskCompletionListeners.add(listener); } - - public void addTaskStartListener(TaskStartListener listener) { - this.taskStartListeners.add(listener); - } } diff --git a/server/src/test/java/org/opensearch/action/search/AbstractSearchAsyncActionTests.java b/server/src/test/java/org/opensearch/action/search/AbstractSearchAsyncActionTests.java index 7dcbf213d6c9d..551842b9fcb7a 100644 --- a/server/src/test/java/org/opensearch/action/search/AbstractSearchAsyncActionTests.java +++ b/server/src/test/java/org/opensearch/action/search/AbstractSearchAsyncActionTests.java @@ -186,7 +186,8 @@ private AbstractSearchAsyncAction createAction( SearchResponse.Clusters.EMPTY, new SearchRequestContext( new SearchRequestOperationsListener.CompositeListener(List.of(assertingListener), LogManager.getLogger()), - request + request, + () -> null ), NoopTracer.INSTANCE ) { @@ -771,7 +772,8 @@ private SearchDfsQueryThenFetchAsyncAction createSearchDfsQueryThenFetchAsyncAct SearchResponse.Clusters.EMPTY, new SearchRequestContext( new SearchRequestOperationsListener.CompositeListener(searchRequestOperationsListeners, logger), - searchRequest + searchRequest, + () -> null ), NoopTracer.INSTANCE ); @@ -825,7 +827,8 @@ private SearchQueryThenFetchAsyncAction createSearchQueryThenFetchAsyncAction( SearchResponse.Clusters.EMPTY, new SearchRequestContext( new SearchRequestOperationsListener.CompositeListener(searchRequestOperationsListeners, logger), - searchRequest + searchRequest, + () -> null ), NoopTracer.INSTANCE ) { diff --git a/server/src/test/java/org/opensearch/action/search/CanMatchPreFilterSearchPhaseTests.java b/server/src/test/java/org/opensearch/action/search/CanMatchPreFilterSearchPhaseTests.java index 1881c705fe6b3..bb51aeaeee9dd 100644 --- a/server/src/test/java/org/opensearch/action/search/CanMatchPreFilterSearchPhaseTests.java +++ b/server/src/test/java/org/opensearch/action/search/CanMatchPreFilterSearchPhaseTests.java @@ -170,7 +170,7 @@ public void run() throws IOException { } }, SearchResponse.Clusters.EMPTY, - new SearchRequestContext(searchRequestOperationsListener, searchRequest), + new SearchRequestContext(searchRequestOperationsListener, searchRequest, () -> null), NoopTracer.INSTANCE ); @@ -268,7 +268,7 @@ public void run() throws IOException { } }, SearchResponse.Clusters.EMPTY, - new SearchRequestContext(searchRequestOperationsListener, searchRequest), + new SearchRequestContext(searchRequestOperationsListener, searchRequest, () -> null), NoopTracer.INSTANCE ); @@ -366,7 +366,7 @@ public void sendCanMatch( new ArraySearchPhaseResults<>(iter.size()), randomIntBetween(1, 32), SearchResponse.Clusters.EMPTY, - new SearchRequestContext(searchRequestOperationsListener, searchRequest), + new SearchRequestContext(searchRequestOperationsListener, searchRequest, () -> null), NoopTracer.INSTANCE ) { @Override @@ -396,7 +396,7 @@ protected void executePhaseOnShard( ); }, SearchResponse.Clusters.EMPTY, - new SearchRequestContext(searchRequestOperationsListener, searchRequest), + new SearchRequestContext(searchRequestOperationsListener, searchRequest, () -> null), NoopTracer.INSTANCE ); @@ -488,7 +488,7 @@ public void run() { } }, SearchResponse.Clusters.EMPTY, - new SearchRequestContext(searchRequestOperationsListener, searchRequest), + new SearchRequestContext(searchRequestOperationsListener, searchRequest, () -> null), NoopTracer.INSTANCE ); @@ -595,7 +595,7 @@ public void run() { } }, SearchResponse.Clusters.EMPTY, - new SearchRequestContext(searchRequestOperationsListener, searchRequest), + new SearchRequestContext(searchRequestOperationsListener, searchRequest, () -> null), NoopTracer.INSTANCE ); @@ -658,7 +658,8 @@ public void sendCanMatch( ExecutorService executor = OpenSearchExecutors.newDirectExecutorService(); SearchRequestContext searchRequestContext = new SearchRequestContext( new SearchRequestOperationsListener.CompositeListener(List.of(assertingListener), LogManager.getLogger()), - searchRequest + searchRequest, + () -> null ); SearchPhaseController controller = new SearchPhaseController( diff --git a/server/src/test/java/org/opensearch/action/search/MockSearchPhaseContext.java b/server/src/test/java/org/opensearch/action/search/MockSearchPhaseContext.java index cc10da8fc1f12..2f3e462f741b8 100644 --- a/server/src/test/java/org/opensearch/action/search/MockSearchPhaseContext.java +++ b/server/src/test/java/org/opensearch/action/search/MockSearchPhaseContext.java @@ -182,6 +182,14 @@ public void addReleasable(Releasable releasable) { // Noop } + /** + * Set the resource usage info for this phase + */ + @Override + public void setPhaseResourceUsages() { + // Noop + } + @Override public void execute(Runnable command) { command.run(); diff --git a/server/src/test/java/org/opensearch/action/search/SearchAsyncActionTests.java b/server/src/test/java/org/opensearch/action/search/SearchAsyncActionTests.java index 35e90ff662b19..8fe2d9af217d5 100644 --- a/server/src/test/java/org/opensearch/action/search/SearchAsyncActionTests.java +++ b/server/src/test/java/org/opensearch/action/search/SearchAsyncActionTests.java @@ -162,7 +162,7 @@ public void testSkipSearchShards() throws InterruptedException { new ArraySearchPhaseResults<>(shardsIter.size()), request.getMaxConcurrentShardRequests(), SearchResponse.Clusters.EMPTY, - new SearchRequestContext(searchRequestOperationsListener, request), + new SearchRequestContext(searchRequestOperationsListener, request, () -> null), NoopTracer.INSTANCE ) { @@ -287,7 +287,7 @@ public void testLimitConcurrentShardRequests() throws InterruptedException { new ArraySearchPhaseResults<>(shardsIter.size()), request.getMaxConcurrentShardRequests(), SearchResponse.Clusters.EMPTY, - new SearchRequestContext(searchRequestOperationsListener, request), + new SearchRequestContext(searchRequestOperationsListener, request, () -> null), NoopTracer.INSTANCE ) { @@ -409,7 +409,8 @@ public void sendFreeContext(Transport.Connection connection, ShardSearchContextI SearchResponse.Clusters.EMPTY, new SearchRequestContext( new SearchRequestOperationsListener.CompositeListener(List.of(assertingListener), LogManager.getLogger()), - request + request, + () -> null ), NoopTracer.INSTANCE ) { @@ -537,7 +538,8 @@ public void sendFreeContext(Transport.Connection connection, ShardSearchContextI SearchResponse.Clusters.EMPTY, new SearchRequestContext( new SearchRequestOperationsListener.CompositeListener(List.of(assertingListener), LogManager.getLogger()), - request + request, + () -> null ), NoopTracer.INSTANCE ) { @@ -657,7 +659,7 @@ public void testAllowPartialResults() throws InterruptedException { new ArraySearchPhaseResults<>(shardsIter.size()), request.getMaxConcurrentShardRequests(), SearchResponse.Clusters.EMPTY, - new SearchRequestContext(searchRequestOperationsListener, request), + new SearchRequestContext(searchRequestOperationsListener, request, () -> null), NoopTracer.INSTANCE ) { @Override diff --git a/server/src/test/java/org/opensearch/action/search/SearchQueryThenFetchAsyncActionTests.java b/server/src/test/java/org/opensearch/action/search/SearchQueryThenFetchAsyncActionTests.java index aefbbe80d5fa1..f6a06a51c7b43 100644 --- a/server/src/test/java/org/opensearch/action/search/SearchQueryThenFetchAsyncActionTests.java +++ b/server/src/test/java/org/opensearch/action/search/SearchQueryThenFetchAsyncActionTests.java @@ -240,7 +240,8 @@ public void sendExecuteQuery( SearchResponse.Clusters.EMPTY, new SearchRequestContext( new SearchRequestOperationsListener.CompositeListener(List.of(assertingListener), LogManager.getLogger()), - searchRequest + searchRequest, + () -> null ), NoopTracer.INSTANCE ) { diff --git a/server/src/test/java/org/opensearch/action/search/SearchRequestOperationsListenerSupport.java b/server/src/test/java/org/opensearch/action/search/SearchRequestOperationsListenerSupport.java index 0f737e00478cb..fdac91a0e3124 100644 --- a/server/src/test/java/org/opensearch/action/search/SearchRequestOperationsListenerSupport.java +++ b/server/src/test/java/org/opensearch/action/search/SearchRequestOperationsListenerSupport.java @@ -25,7 +25,8 @@ default void onPhaseEnd(SearchRequestOperationsListener listener, SearchPhaseCon context, new SearchRequestContext( new SearchRequestOperationsListener.CompositeListener(List.of(), LogManager.getLogger()), - new SearchRequest() + new SearchRequest(), + () -> null ) ); } diff --git a/server/src/test/java/org/opensearch/action/search/SearchRequestSlowLogTests.java b/server/src/test/java/org/opensearch/action/search/SearchRequestSlowLogTests.java index 91a2552ac3f04..453fc6cd8a74c 100644 --- a/server/src/test/java/org/opensearch/action/search/SearchRequestSlowLogTests.java +++ b/server/src/test/java/org/opensearch/action/search/SearchRequestSlowLogTests.java @@ -178,7 +178,8 @@ public void testConcurrentOnRequestEnd() throws InterruptedException { for (int i = 0; i < numRequests; i++) { SearchRequestContext searchRequestContext = new SearchRequestContext( new SearchRequestOperationsListener.CompositeListener(searchListenersList, logger), - searchRequest + searchRequest, + () -> null ); searchRequestContext.setAbsoluteStartNanos((i < numRequestsLogged) ? 0 : System.nanoTime()); searchRequestContexts.add(searchRequestContext); @@ -209,7 +210,8 @@ public void testSearchRequestSlowLogHasJsonFields_EmptySearchRequestContext() th SearchPhaseContext searchPhaseContext = new MockSearchPhaseContext(1, searchRequest); SearchRequestContext searchRequestContext = new SearchRequestContext( new SearchRequestOperationsListener.CompositeListener(List.of(), LogManager.getLogger()), - searchRequest + searchRequest, + () -> null ); SearchRequestSlowLog.SearchRequestSlowLogMessage p = new SearchRequestSlowLog.SearchRequestSlowLogMessage( searchPhaseContext, @@ -233,7 +235,8 @@ public void testSearchRequestSlowLogHasJsonFields_NotEmptySearchRequestContext() SearchPhaseContext searchPhaseContext = new MockSearchPhaseContext(1, searchRequest); SearchRequestContext searchRequestContext = new SearchRequestContext( new SearchRequestOperationsListener.CompositeListener(List.of(), LogManager.getLogger()), - searchRequest + searchRequest, + () -> null ); searchRequestContext.updatePhaseTookMap(SearchPhaseName.FETCH.getName(), 10L); searchRequestContext.updatePhaseTookMap(SearchPhaseName.QUERY.getName(), 50L); @@ -262,7 +265,8 @@ public void testSearchRequestSlowLogHasJsonFields_PartialContext() throws IOExce SearchPhaseContext searchPhaseContext = new MockSearchPhaseContext(1, searchRequest); SearchRequestContext searchRequestContext = new SearchRequestContext( new SearchRequestOperationsListener.CompositeListener(List.of(), LogManager.getLogger()), - searchRequest + searchRequest, + () -> null ); searchRequestContext.updatePhaseTookMap(SearchPhaseName.FETCH.getName(), 10L); searchRequestContext.updatePhaseTookMap(SearchPhaseName.QUERY.getName(), 50L); @@ -291,7 +295,8 @@ public void testSearchRequestSlowLogSearchContextPrinterToLog() throws IOExcepti SearchPhaseContext searchPhaseContext = new MockSearchPhaseContext(1, searchRequest); SearchRequestContext searchRequestContext = new SearchRequestContext( new SearchRequestOperationsListener.CompositeListener(List.of(), LogManager.getLogger()), - searchRequest + searchRequest, + () -> null ); searchRequestContext.updatePhaseTookMap(SearchPhaseName.FETCH.getName(), 10L); searchRequestContext.updatePhaseTookMap(SearchPhaseName.QUERY.getName(), 50L); diff --git a/server/src/test/java/org/opensearch/action/search/SearchRequestStatsTests.java b/server/src/test/java/org/opensearch/action/search/SearchRequestStatsTests.java index fb9b26e3f3ad1..1af3eb2738a58 100644 --- a/server/src/test/java/org/opensearch/action/search/SearchRequestStatsTests.java +++ b/server/src/test/java/org/opensearch/action/search/SearchRequestStatsTests.java @@ -60,7 +60,8 @@ public void testSearchRequestStats() { ctx, new SearchRequestContext( new SearchRequestOperationsListener.CompositeListener(List.of(), LogManager.getLogger()), - new SearchRequest() + new SearchRequest(), + () -> null ) ); assertEquals(0, testRequestStats.getPhaseCurrent(searchPhaseName)); @@ -120,7 +121,8 @@ public void testSearchRequestStatsOnPhaseEndConcurrently() throws InterruptedExc ctx, new SearchRequestContext( new SearchRequestOperationsListener.CompositeListener(List.of(), LogManager.getLogger()), - new SearchRequest() + new SearchRequest(), + () -> null ) ); countDownLatch.countDown(); diff --git a/server/src/test/java/org/opensearch/action/search/SearchResponseMergerTests.java b/server/src/test/java/org/opensearch/action/search/SearchResponseMergerTests.java index ce4d5ca4f7091..0eefa413c1864 100644 --- a/server/src/test/java/org/opensearch/action/search/SearchResponseMergerTests.java +++ b/server/src/test/java/org/opensearch/action/search/SearchResponseMergerTests.java @@ -137,7 +137,8 @@ public void testMergeTookInMillis() throws InterruptedException { SearchResponse.Clusters.EMPTY, new SearchRequestContext( new SearchRequestOperationsListener.CompositeListener(List.of(), LogManager.getLogger()), - new SearchRequest() + new SearchRequest(), + () -> null ) ); assertEquals(TimeUnit.NANOSECONDS.toMillis(currentRelativeTime), searchResponse.getTook().millis()); @@ -195,7 +196,8 @@ public void testMergeShardFailures() throws InterruptedException { clusters, new SearchRequestContext( new SearchRequestOperationsListener.CompositeListener(List.of(), LogManager.getLogger()), - new SearchRequest() + new SearchRequest(), + () -> null ) ); assertSame(clusters, mergedResponse.getClusters()); @@ -252,7 +254,8 @@ public void testMergeShardFailuresNullShardTarget() throws InterruptedException clusters, new SearchRequestContext( new SearchRequestOperationsListener.CompositeListener(List.of(), LogManager.getLogger()), - new SearchRequest() + new SearchRequest(), + () -> null ) ); assertSame(clusters, mergedResponse.getClusters()); @@ -304,7 +307,8 @@ public void testMergeShardFailuresNullShardId() throws InterruptedException { SearchResponse.Clusters.EMPTY, new SearchRequestContext( new SearchRequestOperationsListener.CompositeListener(List.of(), LogManager.getLogger()), - new SearchRequest() + new SearchRequest(), + () -> null ) ).getShardFailures(); assertThat(Arrays.asList(shardFailures), containsInAnyOrder(expectedFailures.toArray(ShardSearchFailure.EMPTY_ARRAY))); @@ -344,7 +348,8 @@ public void testMergeProfileResults() throws InterruptedException { clusters, new SearchRequestContext( new SearchRequestOperationsListener.CompositeListener(List.of(), LogManager.getLogger()), - new SearchRequest() + new SearchRequest(), + () -> null ) ); assertSame(clusters, mergedResponse.getClusters()); @@ -412,7 +417,8 @@ public void testMergeCompletionSuggestions() throws InterruptedException { clusters, new SearchRequestContext( new SearchRequestOperationsListener.CompositeListener(List.of(), LogManager.getLogger()), - new SearchRequest() + new SearchRequest(), + () -> null ) ); assertSame(clusters, mergedResponse.getClusters()); @@ -490,7 +496,8 @@ public void testMergeCompletionSuggestionsTieBreak() throws InterruptedException clusters, new SearchRequestContext( new SearchRequestOperationsListener.CompositeListener(List.of(), LogManager.getLogger()), - new SearchRequest() + new SearchRequest(), + () -> null ) ); assertSame(clusters, mergedResponse.getClusters()); @@ -570,7 +577,8 @@ public void testMergeAggs() throws InterruptedException { clusters, new SearchRequestContext( new SearchRequestOperationsListener.CompositeListener(List.of(), LogManager.getLogger()), - new SearchRequest() + new SearchRequest(), + () -> null ) ); assertSame(clusters, mergedResponse.getClusters()); @@ -733,7 +741,8 @@ public void testMergeSearchHits() throws InterruptedException { clusters, new SearchRequestContext( new SearchRequestOperationsListener.CompositeListener(List.of(), LogManager.getLogger()), - new SearchRequest() + new SearchRequest(), + () -> null ) ); @@ -799,7 +808,8 @@ public void testMergeNoResponsesAdded() { clusters, new SearchRequestContext( new SearchRequestOperationsListener.CompositeListener(List.of(), LogManager.getLogger()), - new SearchRequest() + new SearchRequest(), + () -> null ) ); assertSame(clusters, response.getClusters()); @@ -878,7 +888,8 @@ public void testMergeEmptySearchHitsWithNonEmpty() { clusters, new SearchRequestContext( new SearchRequestOperationsListener.CompositeListener(List.of(), LogManager.getLogger()), - new SearchRequest() + new SearchRequest(), + () -> null ) ); assertEquals(10, mergedResponse.getHits().getTotalHits().value); @@ -926,7 +937,8 @@ public void testMergeOnlyEmptyHits() { clusters, new SearchRequestContext( new SearchRequestOperationsListener.CompositeListener(List.of(), LogManager.getLogger()), - new SearchRequest() + new SearchRequest(), + () -> null ) ); assertEquals(expectedTotalHits, mergedResponse.getHits().getTotalHits()); diff --git a/server/src/test/java/org/opensearch/action/search/TransportSearchActionTests.java b/server/src/test/java/org/opensearch/action/search/TransportSearchActionTests.java index da19c839f3826..84955d01a59ce 100644 --- a/server/src/test/java/org/opensearch/action/search/TransportSearchActionTests.java +++ b/server/src/test/java/org/opensearch/action/search/TransportSearchActionTests.java @@ -487,7 +487,8 @@ public void testCCSRemoteReduceMergeFails() throws Exception { (r, l) -> setOnce.set(Tuple.tuple(r, l)), new SearchRequestContext( new SearchRequestOperationsListener.CompositeListener(List.of(), LogManager.getLogger()), - searchRequest + searchRequest, + () -> null ) ); if (localIndices == null) { @@ -549,7 +550,8 @@ public void testCCSRemoteReduce() throws Exception { (r, l) -> setOnce.set(Tuple.tuple(r, l)), new SearchRequestContext( new SearchRequestOperationsListener.CompositeListener(List.of(), LogManager.getLogger()), - searchRequest + searchRequest, + () -> null ) ); if (localIndices == null) { @@ -590,7 +592,8 @@ public void testCCSRemoteReduce() throws Exception { (r, l) -> setOnce.set(Tuple.tuple(r, l)), new SearchRequestContext( new SearchRequestOperationsListener.CompositeListener(List.of(), LogManager.getLogger()), - searchRequest + searchRequest, + () -> null ) ); if (localIndices == null) { @@ -652,7 +655,8 @@ public void onNodeDisconnected(DiscoveryNode node, Transport.Connection connecti (r, l) -> setOnce.set(Tuple.tuple(r, l)), new SearchRequestContext( new SearchRequestOperationsListener.CompositeListener(List.of(), LogManager.getLogger()), - searchRequest + searchRequest, + () -> null ) ); if (localIndices == null) { @@ -696,7 +700,8 @@ public void onNodeDisconnected(DiscoveryNode node, Transport.Connection connecti (r, l) -> setOnce.set(Tuple.tuple(r, l)), new SearchRequestContext( new SearchRequestOperationsListener.CompositeListener(List.of(), LogManager.getLogger()), - searchRequest + searchRequest, + () -> null ) ); if (localIndices == null) { @@ -751,7 +756,8 @@ public void onNodeDisconnected(DiscoveryNode node, Transport.Connection connecti (r, l) -> setOnce.set(Tuple.tuple(r, l)), new SearchRequestContext( new SearchRequestOperationsListener.CompositeListener(List.of(), LogManager.getLogger()), - searchRequest + searchRequest, + () -> null ) ); if (localIndices == null) { From c0900ab9f1ca6a0b418fa20a01da81dd587d8b44 Mon Sep 17 00:00:00 2001 From: Chenyang Ji Date: Mon, 20 May 2024 13:56:06 -0700 Subject: [PATCH 4/7] split changes for query insights plugin Signed-off-by: Chenyang Ji --- CHANGELOG.md | 1 + .../resourcetracker/TaskResourceInfo.java | 3 + .../insights/rules/model/MetricType.java | 12 +- .../TransportTopQueriesAction.java | 37 +++--- .../insights/QueryInsightsTestUtils.java | 2 +- .../listener/QueryInsightsListenerTests.java | 2 +- .../service/QueryInsightsServiceTests.java | 2 +- .../rules/model/SearchQueryRecordTests.java | 2 +- .../search/AbstractSearchAsyncAction.java | 8 +- .../org/opensearch/search/SearchService.java | 77 +++++++------ .../tasks/TaskResourceTrackingService.java | 3 - .../AbstractSearchAsyncActionTests.java | 87 ++++++++++++-- .../tasks/TaskResourceInfoTests.java | 106 ++++++++++++++++++ 13 files changed, 269 insertions(+), 73 deletions(-) create mode 100644 server/src/test/java/org/opensearch/tasks/TaskResourceInfoTests.java diff --git a/CHANGELOG.md b/CHANGELOG.md index db0e26375cbfb..549ead2b587cf 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -20,6 +20,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), - [Remote Store] Add support to disable flush based on translog reader count ([#14027](https://github.com/opensearch-project/OpenSearch/pull/14027)) - [Query Insights] Add exporter support for top n queries ([#12982](https://github.com/opensearch-project/OpenSearch/pull/12982)) - [Query Insights] Add X-Opaque-Id to search request metadata for top n queries ([#13374](https://github.com/opensearch-project/OpenSearch/pull/13374)) +- Add support for query-level resource usage tracking ([#13172](https://github.com/opensearch-project/OpenSearch/pull/13172)) ### Dependencies - Bump `com.github.spullara.mustache.java:compiler` from 0.9.10 to 0.9.13 ([#13329](https://github.com/opensearch-project/OpenSearch/pull/13329), [#13559](https://github.com/opensearch-project/OpenSearch/pull/13559)) diff --git a/libs/core/src/main/java/org/opensearch/core/tasks/resourcetracker/TaskResourceInfo.java b/libs/core/src/main/java/org/opensearch/core/tasks/resourcetracker/TaskResourceInfo.java index 26ef8673a3283..b23abae175cec 100644 --- a/libs/core/src/main/java/org/opensearch/core/tasks/resourcetracker/TaskResourceInfo.java +++ b/libs/core/src/main/java/org/opensearch/core/tasks/resourcetracker/TaskResourceInfo.java @@ -98,6 +98,9 @@ public XContentBuilder toXContent(XContentBuilder builder, Params params) throws return builder; } + /** + * Builder for {@link TaskResourceInfo} + */ public static class Builder { private TaskResourceUsage taskResourceUsage; private String action; diff --git a/plugins/query-insights/src/main/java/org/opensearch/plugin/insights/rules/model/MetricType.java b/plugins/query-insights/src/main/java/org/opensearch/plugin/insights/rules/model/MetricType.java index 4694c757f4ef2..cdd090fbf4804 100644 --- a/plugins/query-insights/src/main/java/org/opensearch/plugin/insights/rules/model/MetricType.java +++ b/plugins/query-insights/src/main/java/org/opensearch/plugin/insights/rules/model/MetricType.java @@ -35,7 +35,7 @@ public enum MetricType implements Comparator { /** * JVM heap usage metric type */ - MEMORY; + JVM; /** * Read a MetricType from a StreamInput @@ -93,9 +93,10 @@ public static Set allMetricTypes() { public int compare(final Number a, final Number b) { switch (this) { case LATENCY: - case CPU: - case MEMORY: return Long.compare(a.longValue(), b.longValue()); + case JVM: + case CPU: + return Double.compare(a.doubleValue(), b.doubleValue()); } return -1; } @@ -109,9 +110,10 @@ public int compare(final Number a, final Number b) { Number parseValue(final Object o) { switch (this) { case LATENCY: - case CPU: - case MEMORY: return (Long) o; + case JVM: + case CPU: + return (Double) o; default: return (Number) o; } diff --git a/plugins/query-insights/src/main/java/org/opensearch/plugin/insights/rules/transport/top_queries/TransportTopQueriesAction.java b/plugins/query-insights/src/main/java/org/opensearch/plugin/insights/rules/transport/top_queries/TransportTopQueriesAction.java index 7949b70a16db6..ddf614211bc41 100644 --- a/plugins/query-insights/src/main/java/org/opensearch/plugin/insights/rules/transport/top_queries/TransportTopQueriesAction.java +++ b/plugins/query-insights/src/main/java/org/opensearch/plugin/insights/rules/transport/top_queries/TransportTopQueriesAction.java @@ -8,6 +8,7 @@ package org.opensearch.plugin.insights.rules.transport.top_queries; +import org.opensearch.OpenSearchException; import org.opensearch.action.FailedNodeException; import org.opensearch.action.support.ActionFilters; import org.opensearch.action.support.nodes.TransportNodesAction; @@ -20,6 +21,7 @@ import org.opensearch.plugin.insights.rules.action.top_queries.TopQueriesAction; import org.opensearch.plugin.insights.rules.action.top_queries.TopQueriesRequest; import org.opensearch.plugin.insights.rules.action.top_queries.TopQueriesResponse; +import org.opensearch.plugin.insights.rules.model.MetricType; import org.opensearch.plugin.insights.settings.QueryInsightsSettings; import org.opensearch.threadpool.ThreadPool; import org.opensearch.transport.TransportRequest; @@ -27,6 +29,7 @@ import java.io.IOException; import java.util.List; +import java.util.Locale; /** * Transport action for cluster/node level top queries information. @@ -78,18 +81,17 @@ protected TopQueriesResponse newResponse( final List responses, final List failures ) { - int size; - switch (topQueriesRequest.getMetricType()) { - case CPU: - size = clusterService.getClusterSettings().get(QueryInsightsSettings.TOP_N_CPU_QUERIES_SIZE); - break; - case MEMORY: - size = clusterService.getClusterSettings().get(QueryInsightsSettings.TOP_N_MEMORY_QUERIES_SIZE); - break; - default: - size = clusterService.getClusterSettings().get(QueryInsightsSettings.TOP_N_LATENCY_QUERIES_SIZE); + if (topQueriesRequest.getMetricType() == MetricType.LATENCY) { + return new TopQueriesResponse( + clusterService.getClusterName(), + responses, + failures, + clusterService.getClusterSettings().get(QueryInsightsSettings.TOP_N_LATENCY_QUERIES_SIZE), + MetricType.LATENCY + ); + } else { + throw new OpenSearchException(String.format(Locale.ROOT, "invalid metric type %s", topQueriesRequest.getMetricType())); } - return new TopQueriesResponse(clusterService.getClusterName(), responses, failures, size, topQueriesRequest.getMetricType()); } @Override @@ -105,10 +107,15 @@ protected TopQueries newNodeResponse(final StreamInput in) throws IOException { @Override protected TopQueries nodeOperation(final NodeRequest nodeRequest) { final TopQueriesRequest request = nodeRequest.request; - return new TopQueries( - clusterService.localNode(), - queryInsightsService.getTopQueriesService(request.getMetricType()).getTopQueriesRecords(true) - ); + if (request.getMetricType() == MetricType.LATENCY) { + return new TopQueries( + clusterService.localNode(), + queryInsightsService.getTopQueriesService(MetricType.LATENCY).getTopQueriesRecords(true) + ); + } else { + throw new OpenSearchException(String.format(Locale.ROOT, "invalid metric type %s", request.getMetricType())); + } + } /** diff --git a/plugins/query-insights/src/test/java/org/opensearch/plugin/insights/QueryInsightsTestUtils.java b/plugins/query-insights/src/test/java/org/opensearch/plugin/insights/QueryInsightsTestUtils.java index eccef659f0558..870ef5b9c8be9 100644 --- a/plugins/query-insights/src/test/java/org/opensearch/plugin/insights/QueryInsightsTestUtils.java +++ b/plugins/query-insights/src/test/java/org/opensearch/plugin/insights/QueryInsightsTestUtils.java @@ -64,7 +64,7 @@ public static List generateQueryInsightRecords(int lower, int randomLongBetween(1000, 10000), MetricType.CPU, randomDouble(), - MetricType.MEMORY, + MetricType.JVM, randomDouble() ); diff --git a/plugins/query-insights/src/test/java/org/opensearch/plugin/insights/core/listener/QueryInsightsListenerTests.java b/plugins/query-insights/src/test/java/org/opensearch/plugin/insights/core/listener/QueryInsightsListenerTests.java index 8daddebb06940..b794a2e4b8608 100644 --- a/plugins/query-insights/src/test/java/org/opensearch/plugin/insights/core/listener/QueryInsightsListenerTests.java +++ b/plugins/query-insights/src/test/java/org/opensearch/plugin/insights/core/listener/QueryInsightsListenerTests.java @@ -184,7 +184,7 @@ public void testSetEnabled() { when(queryInsightsService.isCollectionEnabled(MetricType.LATENCY)).thenReturn(false); when(queryInsightsService.isCollectionEnabled(MetricType.CPU)).thenReturn(false); - when(queryInsightsService.isCollectionEnabled(MetricType.MEMORY)).thenReturn(false); + when(queryInsightsService.isCollectionEnabled(MetricType.JVM)).thenReturn(false); queryInsightsListener.setEnableTopQueries(MetricType.LATENCY, false); assertFalse(queryInsightsListener.isEnabled()); } diff --git a/plugins/query-insights/src/test/java/org/opensearch/plugin/insights/core/service/QueryInsightsServiceTests.java b/plugins/query-insights/src/test/java/org/opensearch/plugin/insights/core/service/QueryInsightsServiceTests.java index 114b7ccea7a65..428f615ce2f90 100644 --- a/plugins/query-insights/src/test/java/org/opensearch/plugin/insights/core/service/QueryInsightsServiceTests.java +++ b/plugins/query-insights/src/test/java/org/opensearch/plugin/insights/core/service/QueryInsightsServiceTests.java @@ -38,7 +38,7 @@ public void setup() { queryInsightsService = new QueryInsightsService(clusterSettings, threadPool, client); queryInsightsService.enableCollection(MetricType.LATENCY, true); queryInsightsService.enableCollection(MetricType.CPU, true); - queryInsightsService.enableCollection(MetricType.MEMORY, true); + queryInsightsService.enableCollection(MetricType.JVM, true); } public void testAddRecordToLimitAndDrain() { diff --git a/plugins/query-insights/src/test/java/org/opensearch/plugin/insights/rules/model/SearchQueryRecordTests.java b/plugins/query-insights/src/test/java/org/opensearch/plugin/insights/rules/model/SearchQueryRecordTests.java index ad45b53ec5363..793d5878e2300 100644 --- a/plugins/query-insights/src/test/java/org/opensearch/plugin/insights/rules/model/SearchQueryRecordTests.java +++ b/plugins/query-insights/src/test/java/org/opensearch/plugin/insights/rules/model/SearchQueryRecordTests.java @@ -39,7 +39,7 @@ public void testSerializationAndEquals() throws Exception { public void testAllMetricTypes() { Set allMetrics = MetricType.allMetricTypes(); - Set expected = new HashSet<>(Arrays.asList(MetricType.LATENCY, MetricType.CPU, MetricType.MEMORY)); + Set expected = new HashSet<>(Arrays.asList(MetricType.LATENCY, MetricType.CPU, MetricType.JVM)); assertEquals(expected, allMetrics); } diff --git a/server/src/main/java/org/opensearch/action/search/AbstractSearchAsyncAction.java b/server/src/main/java/org/opensearch/action/search/AbstractSearchAsyncAction.java index 2e234e89e3f0e..80e3d28c2df64 100644 --- a/server/src/main/java/org/opensearch/action/search/AbstractSearchAsyncAction.java +++ b/server/src/main/java/org/opensearch/action/search/AbstractSearchAsyncAction.java @@ -627,9 +627,11 @@ protected void onShardResult(Result result, SearchShardIterator shardIt) { public void setPhaseResourceUsages() { ThreadContext threadContext = searchRequestContext.getThreadContextSupplier().get(); - List taskResourceUsages = threadContext.getResponseHeaders().get(TASK_RESOURCE_USAGE); - if (taskResourceUsages != null && taskResourceUsages.size() > 0) { - searchRequestContext.recordPhaseResourceUsage(taskResourceUsages.get(0)); + if (threadContext != null) { + List taskResourceUsages = threadContext.getResponseHeaders().get(TASK_RESOURCE_USAGE); + if (taskResourceUsages != null && taskResourceUsages.size() > 0) { + searchRequestContext.recordPhaseResourceUsage(taskResourceUsages.get(0)); + } } } diff --git a/server/src/main/java/org/opensearch/search/SearchService.java b/server/src/main/java/org/opensearch/search/SearchService.java index 4f01cecf51326..d2a5020ae68d7 100644 --- a/server/src/main/java/org/opensearch/search/SearchService.java +++ b/server/src/main/java/org/opensearch/search/SearchService.java @@ -79,6 +79,7 @@ import org.opensearch.core.tasks.resourcetracker.ResourceUsageMetric; import org.opensearch.core.tasks.resourcetracker.TaskResourceInfo; import org.opensearch.core.tasks.resourcetracker.TaskResourceUsage; +import org.opensearch.core.tasks.resourcetracker.ThreadResourceInfo; import org.opensearch.index.IndexNotFoundException; import org.opensearch.index.IndexService; import org.opensearch.index.IndexSettings; @@ -1138,43 +1139,53 @@ private DefaultSearchContext createSearchContext(ReaderContext reader, ShardSear return searchContext; } - protected void writeTaskResourceUsage(SearchShardTask task) { - // Get resource usages when task starts - Map startValues = task.getActiveThreadResourceInfo( - Thread.currentThread().getId(), - ResourceStatsType.WORKER_STATS - ).getResourceUsageInfo().getStatsInfo(); - - // Get current resource usages - ResourceUsageMetric[] endValues = TaskResourceTrackingService.getResourceUsageMetricsForThread(Thread.currentThread().getId()); - long cpu = -1, mem = -1; - for (ResourceUsageMetric endValue : endValues) { - if (endValue.getStats() == ResourceStats.MEMORY) { - mem = endValue.getValue(); - } else if (endValue.getStats() == ResourceStats.CPU) { - cpu = endValue.getValue(); + private void writeTaskResourceUsage(SearchShardTask task) { + try { + // Get resource usages when task starts + ThreadResourceInfo threadResourceInfo = task.getActiveThreadResourceInfo( + Thread.currentThread().getId(), + ResourceStatsType.WORKER_STATS + ); + if (threadResourceInfo == null) { + return; + } + Map startValues = threadResourceInfo.getResourceUsageInfo().getStatsInfo(); + if (startValues.containsKey(ResourceStats.CPU) && startValues.containsKey(ResourceStats.MEMORY)) { + return; + } + // Get current resource usages + ResourceUsageMetric[] endValues = TaskResourceTrackingService.getResourceUsageMetricsForThread(Thread.currentThread().getId()); + long cpu = -1, mem = -1; + for (ResourceUsageMetric endValue : endValues) { + if (endValue.getStats() == ResourceStats.MEMORY) { + mem = endValue.getValue(); + } else if (endValue.getStats() == ResourceStats.CPU) { + cpu = endValue.getValue(); + } + } + if (cpu == -1 || mem == -1) { + logger.debug("Invalid resource usage value, cpu [{}], memory [{}]: ", cpu, mem); + return; } - } - if (cpu == -1 || mem == -1) { - logger.debug("Invalid resource usage value, cpu [{}], memory [{}]: ", cpu, mem); - return; - } - // initial resource usages when the task started - TaskResourceInfo taskResourceInfo = new TaskResourceInfo.Builder().setAction(task.getAction()) - .setTaskId(task.getId()) - .setParentTaskId(task.getParentTaskId().getId()) - .setNodeId(clusterService.localNode().getId()) - .setTaskResourceUsage( - new TaskResourceUsage( - cpu - startValues.get(ResourceStats.CPU).getStartValue(), - mem - startValues.get(ResourceStats.MEMORY).getStartValue() + // Build task resource usage info + TaskResourceInfo taskResourceInfo = new TaskResourceInfo.Builder().setAction(task.getAction()) + .setTaskId(task.getId()) + .setParentTaskId(task.getParentTaskId().getId()) + .setNodeId(clusterService.localNode().getId()) + .setTaskResourceUsage( + new TaskResourceUsage( + cpu - startValues.get(ResourceStats.CPU).getStartValue(), + mem - startValues.get(ResourceStats.MEMORY).getStartValue() + ) ) - ) - .build(); + .build(); - threadPool.getThreadContext().removeResponseHeader(TASK_RESOURCE_USAGE); - threadPool.getThreadContext().addResponseHeader(TASK_RESOURCE_USAGE, taskResourceInfo.toString()); + threadPool.getThreadContext().removeResponseHeader(TASK_RESOURCE_USAGE); + threadPool.getThreadContext().addResponseHeader(TASK_RESOURCE_USAGE, taskResourceInfo.toString()); + } catch (Exception e) { + logger.debug("Error during writing task resource usage: ", e); + } } private void freeAllContextForIndex(Index index) { diff --git a/server/src/main/java/org/opensearch/tasks/TaskResourceTrackingService.java b/server/src/main/java/org/opensearch/tasks/TaskResourceTrackingService.java index 0ea27b16f19ae..0abe8cbe061d4 100644 --- a/server/src/main/java/org/opensearch/tasks/TaskResourceTrackingService.java +++ b/server/src/main/java/org/opensearch/tasks/TaskResourceTrackingService.java @@ -15,7 +15,6 @@ import org.apache.logging.log4j.message.ParameterizedMessage; import org.opensearch.ExceptionsHelper; import org.opensearch.common.SuppressForbidden; -import org.opensearch.common.annotation.PublicApi; import org.opensearch.common.inject.Inject; import org.opensearch.common.settings.ClusterSettings; import org.opensearch.common.settings.Setting; @@ -40,7 +39,6 @@ /** * Service that helps track resource usage of tasks running on a node. */ -@PublicApi(since = "2.2.0") @SuppressForbidden(reason = "ThreadMXBean#getThreadAllocatedBytes") public class TaskResourceTrackingService implements RunnableTaskExecutionListener { @@ -267,7 +265,6 @@ private ThreadContext.StoredContext addTaskIdToThreadContext(Task task) { /** * Listener that gets invoked when a task execution completes. */ - @PublicApi(since = "2.2.0") public interface TaskCompletionListener { void onTaskCompleted(Task task); } diff --git a/server/src/test/java/org/opensearch/action/search/AbstractSearchAsyncActionTests.java b/server/src/test/java/org/opensearch/action/search/AbstractSearchAsyncActionTests.java index 551842b9fcb7a..d4b405ef3ddd9 100644 --- a/server/src/test/java/org/opensearch/action/search/AbstractSearchAsyncActionTests.java +++ b/server/src/test/java/org/opensearch/action/search/AbstractSearchAsyncActionTests.java @@ -49,6 +49,8 @@ import org.opensearch.core.common.breaker.NoopCircuitBreaker; import org.opensearch.core.index.Index; import org.opensearch.core.index.shard.ShardId; +import org.opensearch.core.tasks.resourcetracker.TaskResourceInfo; +import org.opensearch.core.tasks.resourcetracker.TaskResourceUsage; import org.opensearch.index.query.MatchAllQueryBuilder; import org.opensearch.index.shard.ShardNotFoundException; import org.opensearch.search.SearchPhaseResult; @@ -87,6 +89,7 @@ import java.util.function.BiFunction; import java.util.stream.IntStream; +import static org.opensearch.tasks.TaskResourceTrackingService.TASK_RESOURCE_USAGE; import static org.hamcrest.Matchers.equalTo; import static org.hamcrest.Matchers.greaterThanOrEqualTo; import static org.hamcrest.Matchers.instanceOf; @@ -123,7 +126,8 @@ private AbstractSearchAsyncAction createAction( ArraySearchPhaseResults results, ActionListener listener, final boolean controlled, - final AtomicLong expected + final AtomicLong expected, + final TaskResourceUsage resourceUsage ) { return createAction( request, @@ -133,6 +137,7 @@ private AbstractSearchAsyncAction createAction( false, false, expected, + resourceUsage, new SearchShardIterator(null, null, Collections.emptyList(), null) ); } @@ -145,6 +150,7 @@ private AbstractSearchAsyncAction createAction( final boolean failExecutePhaseOnShard, final boolean catchExceptionWhenExecutePhaseOnShard, final AtomicLong expected, + final TaskResourceUsage resourceUsage, final SearchShardIterator... shards ) { @@ -166,6 +172,14 @@ private AbstractSearchAsyncAction createAction( return null; }; + TaskResourceInfo taskResourceInfo = new TaskResourceInfo.Builder().setTaskResourceUsage(resourceUsage) + .setTaskId(randomLong()) + .setParentTaskId(randomLong()) + .setAction(randomAlphaOfLengthBetween(1, 5)) + .setNodeId(randomAlphaOfLengthBetween(1, 5)) + .build(); + threadPool.getThreadContext().addResponseHeader(TASK_RESOURCE_USAGE, taskResourceInfo.toString()); + return new AbstractSearchAsyncAction( "test", logger, @@ -187,7 +201,7 @@ private AbstractSearchAsyncAction createAction( new SearchRequestContext( new SearchRequestOperationsListener.CompositeListener(List.of(assertingListener), LogManager.getLogger()), request, - () -> null + () -> threadPool.getThreadContext() ), NoopTracer.INSTANCE ) { @@ -249,7 +263,8 @@ private void runTestTook(final boolean controlled) { new ArraySearchPhaseResults<>(10), null, controlled, - expected + expected, + new TaskResourceUsage(0, 0) ); final long actual = action.buildTookInMillis(); if (controlled) { @@ -269,7 +284,8 @@ public void testBuildShardSearchTransportRequest() { new ArraySearchPhaseResults<>(10), null, false, - expected + expected, + new TaskResourceUsage(randomLong(), randomLong()) ); String clusterAlias = randomBoolean() ? null : randomAlphaOfLengthBetween(5, 10); SearchShardIterator iterator = new SearchShardIterator( @@ -292,19 +308,39 @@ public void testBuildShardSearchTransportRequest() { public void testBuildSearchResponse() { SearchRequest searchRequest = new SearchRequest().allowPartialSearchResults(randomBoolean()); ArraySearchPhaseResults phaseResults = new ArraySearchPhaseResults<>(10); - AbstractSearchAsyncAction action = createAction(searchRequest, phaseResults, null, false, new AtomicLong()); + TaskResourceUsage taskResourceUsage = new TaskResourceUsage(randomLong(), randomLong()); + AbstractSearchAsyncAction action = createAction( + searchRequest, + phaseResults, + null, + false, + new AtomicLong(), + taskResourceUsage + ); InternalSearchResponse internalSearchResponse = InternalSearchResponse.empty(); SearchResponse searchResponse = action.buildSearchResponse(internalSearchResponse, action.buildShardFailures(), null, null); assertSame(searchResponse.getAggregations(), internalSearchResponse.aggregations()); assertSame(searchResponse.getSuggest(), internalSearchResponse.suggest()); assertSame(searchResponse.getProfileResults(), internalSearchResponse.profile()); assertSame(searchResponse.getHits(), internalSearchResponse.hits()); + List resourceUsages = threadPool.getThreadContext().getResponseHeaders().get(TASK_RESOURCE_USAGE); + assertNotNull(resourceUsages); + assertEquals(1, resourceUsages.size()); + assertTrue(resourceUsages.get(0).contains(Long.toString(taskResourceUsage.getCpuTimeInNanos()))); + assertTrue(resourceUsages.get(0).contains(Long.toString(taskResourceUsage.getMemoryInBytes()))); } public void testBuildSearchResponseAllowPartialFailures() { SearchRequest searchRequest = new SearchRequest().allowPartialSearchResults(true); final ArraySearchPhaseResults queryResult = new ArraySearchPhaseResults<>(10); - AbstractSearchAsyncAction action = createAction(searchRequest, queryResult, null, false, new AtomicLong()); + AbstractSearchAsyncAction action = createAction( + searchRequest, + queryResult, + null, + false, + new AtomicLong(), + new TaskResourceUsage(randomLong(), randomLong()) + ); action.onShardFailure( 0, new SearchShardTarget("node", new ShardId("index", "index-uuid", 0), null, OriginalIndices.NONE), @@ -326,7 +362,14 @@ public void testSendSearchResponseDisallowPartialFailures() { List> nodeLookups = new ArrayList<>(); int numFailures = randomIntBetween(1, 5); ArraySearchPhaseResults phaseResults = phaseResults(requestIds, nodeLookups, numFailures); - AbstractSearchAsyncAction action = createAction(searchRequest, phaseResults, listener, false, new AtomicLong()); + AbstractSearchAsyncAction action = createAction( + searchRequest, + phaseResults, + listener, + false, + new AtomicLong(), + new TaskResourceUsage(randomLong(), randomLong()) + ); for (int i = 0; i < numFailures; i++) { ShardId failureShardId = new ShardId("index", "index-uuid", i); String failureClusterAlias = randomBoolean() ? null : randomAlphaOfLengthBetween(5, 10); @@ -405,7 +448,14 @@ public void testOnPhaseFailure() { Set requestIds = new HashSet<>(); List> nodeLookups = new ArrayList<>(); ArraySearchPhaseResults phaseResults = phaseResults(requestIds, nodeLookups, 0); - AbstractSearchAsyncAction action = createAction(searchRequest, phaseResults, listener, false, new AtomicLong()); + AbstractSearchAsyncAction action = createAction( + searchRequest, + phaseResults, + listener, + false, + new AtomicLong(), + new TaskResourceUsage(randomLong(), randomLong()) + ); action.onPhaseFailure(new SearchPhase("test") { @Override @@ -429,7 +479,14 @@ public void testShardNotAvailableWithDisallowPartialFailures() { ActionListener listener = ActionListener.wrap(response -> fail("onResponse should not be called"), exception::set); int numShards = randomIntBetween(2, 10); ArraySearchPhaseResults phaseResults = new ArraySearchPhaseResults<>(numShards); - AbstractSearchAsyncAction action = createAction(searchRequest, phaseResults, listener, false, new AtomicLong()); + AbstractSearchAsyncAction action = createAction( + searchRequest, + phaseResults, + listener, + false, + new AtomicLong(), + new TaskResourceUsage(randomLong(), randomLong()) + ); // skip one to avoid the "all shards failed" failure. SearchShardIterator skipIterator = new SearchShardIterator(null, null, Collections.emptyList(), null); skipIterator.resetAndSkip(); @@ -451,7 +508,14 @@ public void testShardNotAvailableWithIgnoreUnavailable() { ActionListener listener = ActionListener.wrap(response -> {}, exception::set); int numShards = randomIntBetween(2, 10); ArraySearchPhaseResults phaseResults = new ArraySearchPhaseResults<>(numShards); - AbstractSearchAsyncAction action = createAction(searchRequest, phaseResults, listener, false, new AtomicLong()); + AbstractSearchAsyncAction action = createAction( + searchRequest, + phaseResults, + listener, + false, + new AtomicLong(), + new TaskResourceUsage(randomLong(), randomLong()) + ); // skip one to avoid the "all shards failed" failure. SearchShardIterator skipIterator = new SearchShardIterator(null, null, Collections.emptyList(), null); skipIterator.resetAndSkip(); @@ -522,6 +586,7 @@ public void onFailure(Exception e) { true, false, new AtomicLong(), + new TaskResourceUsage(randomLong(), randomLong()), shards ); action.run(); @@ -569,6 +634,7 @@ public void onFailure(Exception e) { false, false, new AtomicLong(), + new TaskResourceUsage(randomLong(), randomLong()), shards ); action.run(); @@ -621,6 +687,7 @@ public void onFailure(Exception e) { false, catchExceptionWhenExecutePhaseOnShard, new AtomicLong(), + new TaskResourceUsage(randomLong(), randomLong()), shards ); action.run(); diff --git a/server/src/test/java/org/opensearch/tasks/TaskResourceInfoTests.java b/server/src/test/java/org/opensearch/tasks/TaskResourceInfoTests.java new file mode 100644 index 0000000000000..e0bfb8710bbaa --- /dev/null +++ b/server/src/test/java/org/opensearch/tasks/TaskResourceInfoTests.java @@ -0,0 +1,106 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.tasks; + +import org.opensearch.common.io.stream.BytesStreamOutput; +import org.opensearch.core.common.bytes.BytesReference; +import org.opensearch.core.common.io.stream.StreamInput; +import org.opensearch.core.tasks.resourcetracker.TaskResourceInfo; +import org.opensearch.core.tasks.resourcetracker.TaskResourceUsage; +import org.opensearch.core.xcontent.MediaTypeRegistry; +import org.opensearch.core.xcontent.ToXContent; +import org.opensearch.core.xcontent.XContentBuilder; +import org.opensearch.test.OpenSearchTestCase; +import org.junit.Before; + +import java.io.IOException; +import java.util.Arrays; +import java.util.Locale; + +/** + * Test cases for TaskResourceInfo + */ +public class TaskResourceInfoTests extends OpenSearchTestCase { + private final Long cpuUsage = randomNonNegativeLong(); + private final Long memoryUsage = randomNonNegativeLong(); + private final String action = randomAlphaOfLengthBetween(1, 10); + private final Long taskId = randomNonNegativeLong(); + private final Long parentTaskId = randomNonNegativeLong(); + private final String nodeId = randomAlphaOfLengthBetween(1, 10); + private TaskResourceInfo taskResourceInfo; + private TaskResourceUsage taskResourceUsage; + + @Before + public void setUpVariables() { + taskResourceUsage = new TaskResourceUsage(cpuUsage, memoryUsage); + taskResourceInfo = new TaskResourceInfo(action, taskId, parentTaskId, nodeId, taskResourceUsage); + } + + public void testGetters() { + assertEquals(action, taskResourceInfo.getAction()); + assertEquals(taskId.longValue(), taskResourceInfo.getTaskId()); + assertEquals(parentTaskId.longValue(), taskResourceInfo.getParentTaskId()); + assertEquals(nodeId, taskResourceInfo.getNodeId()); + assertEquals(taskResourceUsage, taskResourceInfo.getTaskResourceUsage()); + } + + public void testEqualsAndHashCode() { + TaskResourceInfo taskResourceInfoCopy = new TaskResourceInfo(action, taskId, parentTaskId, nodeId, taskResourceUsage); + assertEquals(taskResourceInfo, taskResourceInfoCopy); + assertEquals(taskResourceInfo.hashCode(), taskResourceInfoCopy.hashCode()); + TaskResourceInfo differentTaskResourceInfo = new TaskResourceInfo( + "differentAction", + taskId, + parentTaskId, + nodeId, + taskResourceUsage + ); + assertNotEquals(taskResourceInfo, differentTaskResourceInfo); + assertNotEquals(taskResourceInfo.hashCode(), differentTaskResourceInfo.hashCode()); + } + + public void testSerialization() throws IOException { + BytesStreamOutput output = new BytesStreamOutput(); + taskResourceInfo.writeTo(output); + StreamInput input = StreamInput.wrap(output.bytes().toBytesRef().bytes); + TaskResourceInfo deserializedTaskResourceInfo = TaskResourceInfo.readFromStream(input); + assertEquals(taskResourceInfo, deserializedTaskResourceInfo); + } + + public void testToString() { + String expectedString = String.format( + Locale.ROOT, + "{\"action\":\"%s\",\"taskId\":%s,\"parentTaskId\":%s,\"nodeId\":\"%s\",\"taskResourceUsage\":{\"cpu_time_in_nanos\":%s,\"memory_in_bytes\":%s}}", + action, + taskId, + parentTaskId, + nodeId, + taskResourceUsage.getCpuTimeInNanos(), + taskResourceUsage.getMemoryInBytes() + ); + assertTrue(expectedString.equals(taskResourceInfo.toString())); + } + + public void testToXContent() throws IOException { + char[] expectedXcontent = String.format( + Locale.ROOT, + "{\"action\":\"%s\",\"taskId\":%s,\"parentTaskId\":%s,\"nodeId\":\"%s\",\"taskResourceUsage\":{\"cpu_time_in_nanos\":%s,\"memory_in_bytes\":%s}}", + action, + taskId, + parentTaskId, + nodeId, + taskResourceUsage.getCpuTimeInNanos(), + taskResourceUsage.getMemoryInBytes() + ).toCharArray(); + + XContentBuilder builder = MediaTypeRegistry.contentBuilder(MediaTypeRegistry.JSON); + char[] xContent = BytesReference.bytes(taskResourceInfo.toXContent(builder, ToXContent.EMPTY_PARAMS)).utf8ToString().toCharArray(); + assertEquals(Arrays.hashCode(expectedXcontent), Arrays.hashCode(xContent)); + } +} From cdfa84ae82050e63a6adf20ef7233abf12c1db71 Mon Sep 17 00:00:00 2001 From: Chenyang Ji Date: Wed, 29 May 2024 16:38:01 -0700 Subject: [PATCH 5/7] improve the supplier logic and other misc items Signed-off-by: Chenyang Ji --- .../resourcetracker/TaskResourceInfo.java | 7 +---- .../search/AbstractSearchAsyncAction.java | 12 ++----- .../action/search/SearchRequestContext.java | 31 ++++++++++--------- .../action/search/TransportSearchAction.java | 13 +++++--- .../cluster/service/ClusterService.java | 2 -- .../common/util/concurrent/ThreadContext.java | 5 +-- .../main/java/org/opensearch/node/Node.java | 9 ++++-- .../org/opensearch/search/SearchService.java | 9 ++++-- .../tasks/TaskResourceTrackingService.java | 2 +- .../AbstractSearchAsyncActionTests.java | 2 +- .../snapshots/SnapshotResiliencyTests.java | 1 + .../java/org/opensearch/node/MockNode.java | 10 ++++-- .../opensearch/search/MockSearchService.java | 7 +++-- 13 files changed, 56 insertions(+), 54 deletions(-) diff --git a/libs/core/src/main/java/org/opensearch/core/tasks/resourcetracker/TaskResourceInfo.java b/libs/core/src/main/java/org/opensearch/core/tasks/resourcetracker/TaskResourceInfo.java index b23abae175cec..373cdbfa7e9a1 100644 --- a/libs/core/src/main/java/org/opensearch/core/tasks/resourcetracker/TaskResourceInfo.java +++ b/libs/core/src/main/java/org/opensearch/core/tasks/resourcetracker/TaskResourceInfo.java @@ -18,7 +18,6 @@ import org.opensearch.core.xcontent.MediaTypeRegistry; import org.opensearch.core.xcontent.ToXContentObject; import org.opensearch.core.xcontent.XContentBuilder; -import org.opensearch.core.xcontent.XContentParser; import java.io.IOException; import java.util.Objects; @@ -63,7 +62,7 @@ public TaskResourceInfo( } public static final ConstructingObjectParser PARSER = new ConstructingObjectParser<>( - "task_resource_usage", + "task_resource_info", a -> new Builder().setAction((String) a[0]) .setTaskId((Long) a[1]) .setParentTaskId((Long) a[2]) @@ -80,10 +79,6 @@ public TaskResourceInfo( PARSER.declareObject(constructorArg(), TaskResourceUsage.PARSER, TASK_RESOURCE_USAGE); } - public static TaskResourceInfo fromXContent(XContentParser parser) { - return PARSER.apply(parser, null); - } - @Override public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException { builder.startObject(); diff --git a/server/src/main/java/org/opensearch/action/search/AbstractSearchAsyncAction.java b/server/src/main/java/org/opensearch/action/search/AbstractSearchAsyncAction.java index 80e3d28c2df64..412d077716e33 100644 --- a/server/src/main/java/org/opensearch/action/search/AbstractSearchAsyncAction.java +++ b/server/src/main/java/org/opensearch/action/search/AbstractSearchAsyncAction.java @@ -48,7 +48,6 @@ import org.opensearch.common.lease.Releasables; import org.opensearch.common.util.concurrent.AbstractRunnable; import org.opensearch.common.util.concurrent.AtomicArray; -import org.opensearch.common.util.concurrent.ThreadContext; import org.opensearch.core.action.ActionListener; import org.opensearch.core.action.ShardOperationFailedException; import org.opensearch.core.index.shard.ShardId; @@ -79,8 +78,6 @@ import java.util.function.BiFunction; import java.util.stream.Collectors; -import static org.opensearch.tasks.TaskResourceTrackingService.TASK_RESOURCE_USAGE; - /** * This is an abstract base class that encapsulates the logic to fan out to all shards in provided {@link GroupShardsIterator} * and collect the results. If a shard request returns a failure this class handles the advance to the next replica of the shard until @@ -626,13 +623,8 @@ protected void onShardResult(Result result, SearchShardIterator shardIt) { } public void setPhaseResourceUsages() { - ThreadContext threadContext = searchRequestContext.getThreadContextSupplier().get(); - if (threadContext != null) { - List taskResourceUsages = threadContext.getResponseHeaders().get(TASK_RESOURCE_USAGE); - if (taskResourceUsages != null && taskResourceUsages.size() > 0) { - searchRequestContext.recordPhaseResourceUsage(taskResourceUsages.get(0)); - } - } + String taskResourceUsage = searchRequestContext.getTaskResourceUsageSupplier().get(); + searchRequestContext.recordPhaseResourceUsage(taskResourceUsage); } private void onShardResultConsumed(Result result, SearchShardIterator shardIt) { diff --git a/server/src/main/java/org/opensearch/action/search/SearchRequestContext.java b/server/src/main/java/org/opensearch/action/search/SearchRequestContext.java index 389109c1860ac..45bb10b989ca7 100644 --- a/server/src/main/java/org/opensearch/action/search/SearchRequestContext.java +++ b/server/src/main/java/org/opensearch/action/search/SearchRequestContext.java @@ -12,7 +12,6 @@ import org.apache.logging.log4j.Logger; import org.apache.lucene.search.TotalHits; import org.opensearch.common.annotation.InternalApi; -import org.opensearch.common.util.concurrent.ThreadContext; import org.opensearch.common.xcontent.XContentHelper; import org.opensearch.core.common.bytes.BytesArray; import org.opensearch.core.tasks.resourcetracker.TaskResourceInfo; @@ -46,12 +45,12 @@ public class SearchRequestContext { private final SearchRequest searchRequest; private final List phaseResourceUsage; - private final Supplier threadContextSupplier; + private final Supplier taskResourceUsageSupplier; SearchRequestContext( final SearchRequestOperationsListener searchRequestOperationsListener, final SearchRequest searchRequest, - final Supplier threadContextSupplier + final Supplier taskResourceUsageSupplier ) { this.searchRequestOperationsListener = searchRequestOperationsListener; this.absoluteStartNanos = System.nanoTime(); @@ -59,7 +58,7 @@ public class SearchRequestContext { this.shardStats = new EnumMap<>(ShardStatsFieldNames.class); this.searchRequest = searchRequest; this.phaseResourceUsage = new ArrayList<>(); - this.threadContextSupplier = threadContextSupplier; + this.taskResourceUsageSupplier = taskResourceUsageSupplier; } SearchRequestOperationsListener getSearchRequestOperationsListener() { @@ -131,23 +130,25 @@ String formattedShardStats() { } } - public SearchRequest getRequest() { - return searchRequest; + public Supplier getTaskResourceUsageSupplier() { + return taskResourceUsageSupplier; } - public Supplier getThreadContextSupplier() { - return threadContextSupplier; + public SearchRequest getRequest() { + return searchRequest; } public void recordPhaseResourceUsage(String usage) { try { - XContentParser parser = XContentHelper.createParser( - NamedXContentRegistry.EMPTY, - DeprecationHandler.THROW_UNSUPPORTED_OPERATION, - new BytesArray(usage), - MediaTypeRegistry.JSON - ); - this.phaseResourceUsage.add(TaskResourceInfo.PARSER.apply(parser, null)); + if (usage != null && !usage.isEmpty()) { + XContentParser parser = XContentHelper.createParser( + NamedXContentRegistry.EMPTY, + DeprecationHandler.THROW_UNSUPPORTED_OPERATION, + new BytesArray(usage), + MediaTypeRegistry.JSON + ); + this.phaseResourceUsage.add(TaskResourceInfo.PARSER.apply(parser, null)); + } } catch (IOException e) { logger.debug("fail to parse phase resource usages: ", e); } diff --git a/server/src/main/java/org/opensearch/action/search/TransportSearchAction.java b/server/src/main/java/org/opensearch/action/search/TransportSearchAction.java index 9ae1dde5312fd..09da8d03a0aeb 100644 --- a/server/src/main/java/org/opensearch/action/search/TransportSearchAction.java +++ b/server/src/main/java/org/opensearch/action/search/TransportSearchAction.java @@ -125,6 +125,7 @@ import static org.opensearch.action.search.SearchType.DFS_QUERY_THEN_FETCH; import static org.opensearch.action.search.SearchType.QUERY_THEN_FETCH; import static org.opensearch.search.sort.FieldSortBuilder.hasPrimaryFieldSort; +import static org.opensearch.tasks.TaskResourceTrackingService.TASK_RESOURCE_USAGE; /** * Perform search action @@ -451,10 +452,14 @@ private void executeRequest( logger, TraceableSearchRequestOperationsListener.create(tracer, requestSpan) ); - SearchRequestContext searchRequestContext = new SearchRequestContext( - requestOperationsListeners, - originalSearchRequest, - threadPool::getThreadContext + SearchRequestContext searchRequestContext = new SearchRequestContext(requestOperationsListeners, originalSearchRequest, () -> { + List taskResourceUsages = threadPool.getThreadContext().getResponseHeaders().get(TASK_RESOURCE_USAGE); + if (taskResourceUsages != null && taskResourceUsages.size() > 0) { + return taskResourceUsages.get(0); + } + return null; + } + ); searchRequestContext.getSearchRequestOperationsListener().onRequestStart(searchRequestContext); diff --git a/server/src/main/java/org/opensearch/cluster/service/ClusterService.java b/server/src/main/java/org/opensearch/cluster/service/ClusterService.java index fec0fc074b3f1..fa61375e85c25 100644 --- a/server/src/main/java/org/opensearch/cluster/service/ClusterService.java +++ b/server/src/main/java/org/opensearch/cluster/service/ClusterService.java @@ -54,7 +54,6 @@ import org.opensearch.common.settings.Settings; import org.opensearch.index.IndexingPressureService; import org.opensearch.node.Node; -import org.opensearch.tasks.TaskResourceTrackingService; import org.opensearch.threadpool.ThreadPool; import java.util.Collections; @@ -92,7 +91,6 @@ public class ClusterService extends AbstractLifecycleComponent { private RerouteService rerouteService; private IndexingPressureService indexingPressureService; - private TaskResourceTrackingService taskResourceTrackingService; public ClusterService( Settings settings, diff --git a/server/src/main/java/org/opensearch/common/util/concurrent/ThreadContext.java b/server/src/main/java/org/opensearch/common/util/concurrent/ThreadContext.java index 89bed9894790c..0b1aa9a4a759a 100644 --- a/server/src/main/java/org/opensearch/common/util/concurrent/ThreadContext.java +++ b/server/src/main/java/org/opensearch/common/util/concurrent/ThreadContext.java @@ -489,10 +489,7 @@ public void addResponseHeader(final String key, final String value) { * @param key the header name */ public void removeResponseHeader(final String key) { - ThreadContextStruct threadContextStruct = threadLocal.get(); - if (threadContextStruct.responseHeaders != null) { - threadContextStruct.responseHeaders.remove(key); - } + threadLocal.get().responseHeaders.remove(key); } /** diff --git a/server/src/main/java/org/opensearch/node/Node.java b/server/src/main/java/org/opensearch/node/Node.java index cb1f2caa082fc..f7a901335f34a 100644 --- a/server/src/main/java/org/opensearch/node/Node.java +++ b/server/src/main/java/org/opensearch/node/Node.java @@ -1261,7 +1261,8 @@ protected Node( searchModule.getFetchPhase(), responseCollectorService, circuitBreakerService, - searchModule.getIndexSearcherExecutor(threadPool) + searchModule.getIndexSearcherExecutor(threadPool), + taskResourceTrackingService ); final List> tasksExecutors = pluginsService.filterPlugins(PersistentTaskPlugin.class) @@ -1905,7 +1906,8 @@ protected SearchService newSearchService( FetchPhase fetchPhase, ResponseCollectorService responseCollectorService, CircuitBreakerService circuitBreakerService, - Executor indexSearcherExecutor + Executor indexSearcherExecutor, + TaskResourceTrackingService taskResourceTrackingService ) { return new SearchService( clusterService, @@ -1917,7 +1919,8 @@ protected SearchService newSearchService( fetchPhase, responseCollectorService, circuitBreakerService, - indexSearcherExecutor + indexSearcherExecutor, + taskResourceTrackingService ); } diff --git a/server/src/main/java/org/opensearch/search/SearchService.java b/server/src/main/java/org/opensearch/search/SearchService.java index d2a5020ae68d7..b9207fcb66cac 100644 --- a/server/src/main/java/org/opensearch/search/SearchService.java +++ b/server/src/main/java/org/opensearch/search/SearchService.java @@ -347,6 +347,7 @@ public class SearchService extends AbstractLifecycleComponent implements IndexEv private final AtomicInteger openPitContexts = new AtomicInteger(); private final String sessionId = UUIDs.randomBase64UUID(); private final Executor indexSearcherExecutor; + private final TaskResourceTrackingService taskResourceTrackingService; public SearchService( ClusterService clusterService, @@ -358,7 +359,8 @@ public SearchService( FetchPhase fetchPhase, ResponseCollectorService responseCollectorService, CircuitBreakerService circuitBreakerService, - Executor indexSearcherExecutor + Executor indexSearcherExecutor, + TaskResourceTrackingService taskResourceTrackingService ) { Settings settings = clusterService.getSettings(); this.threadPool = threadPool; @@ -375,6 +377,7 @@ public SearchService( circuitBreakerService.getBreaker(CircuitBreaker.REQUEST) ); this.indexSearcherExecutor = indexSearcherExecutor; + this.taskResourceTrackingService = taskResourceTrackingService; TimeValue keepAliveInterval = KEEPALIVE_INTERVAL_SETTING.get(settings); setKeepAlives(DEFAULT_KEEPALIVE_SETTING.get(settings), MAX_KEEPALIVE_SETTING.get(settings)); setPitKeepAlives(DEFAULT_KEEPALIVE_SETTING.get(settings), MAX_PIT_KEEPALIVE_SETTING.get(settings)); @@ -1150,11 +1153,11 @@ private void writeTaskResourceUsage(SearchShardTask task) { return; } Map startValues = threadResourceInfo.getResourceUsageInfo().getStatsInfo(); - if (startValues.containsKey(ResourceStats.CPU) && startValues.containsKey(ResourceStats.MEMORY)) { + if (!(startValues.containsKey(ResourceStats.CPU) && startValues.containsKey(ResourceStats.MEMORY))) { return; } // Get current resource usages - ResourceUsageMetric[] endValues = TaskResourceTrackingService.getResourceUsageMetricsForThread(Thread.currentThread().getId()); + ResourceUsageMetric[] endValues = taskResourceTrackingService.getResourceUsageMetricsForThread(Thread.currentThread().getId()); long cpu = -1, mem = -1; for (ResourceUsageMetric endValue : endValues) { if (endValue.getStats() == ResourceStats.MEMORY) { diff --git a/server/src/main/java/org/opensearch/tasks/TaskResourceTrackingService.java b/server/src/main/java/org/opensearch/tasks/TaskResourceTrackingService.java index 0abe8cbe061d4..59e719a3c3250 100644 --- a/server/src/main/java/org/opensearch/tasks/TaskResourceTrackingService.java +++ b/server/src/main/java/org/opensearch/tasks/TaskResourceTrackingService.java @@ -212,7 +212,7 @@ public Map getResourceAwareTasks() { return Collections.unmodifiableMap(resourceAwareTasks); } - public static ResourceUsageMetric[] getResourceUsageMetricsForThread(long threadId) { + public ResourceUsageMetric[] getResourceUsageMetricsForThread(long threadId) { ResourceUsageMetric currentMemoryUsage = new ResourceUsageMetric( ResourceStats.MEMORY, threadMXBean.getThreadAllocatedBytes(threadId) diff --git a/server/src/test/java/org/opensearch/action/search/AbstractSearchAsyncActionTests.java b/server/src/test/java/org/opensearch/action/search/AbstractSearchAsyncActionTests.java index d4b405ef3ddd9..730f0569f8bc5 100644 --- a/server/src/test/java/org/opensearch/action/search/AbstractSearchAsyncActionTests.java +++ b/server/src/test/java/org/opensearch/action/search/AbstractSearchAsyncActionTests.java @@ -201,7 +201,7 @@ private AbstractSearchAsyncAction createAction( new SearchRequestContext( new SearchRequestOperationsListener.CompositeListener(List.of(assertingListener), LogManager.getLogger()), request, - () -> threadPool.getThreadContext() + () -> "" ), NoopTracer.INSTANCE ) { diff --git a/server/src/test/java/org/opensearch/snapshots/SnapshotResiliencyTests.java b/server/src/test/java/org/opensearch/snapshots/SnapshotResiliencyTests.java index 86de008b5dee5..bbd1bcdc35c82 100644 --- a/server/src/test/java/org/opensearch/snapshots/SnapshotResiliencyTests.java +++ b/server/src/test/java/org/opensearch/snapshots/SnapshotResiliencyTests.java @@ -2291,6 +2291,7 @@ public void onFailure(final Exception e) { new FetchPhase(Collections.emptyList()), responseCollectorService, new NoneCircuitBreakerService(), + null, null ); SearchPhaseController searchPhaseController = new SearchPhaseController( diff --git a/test/framework/src/main/java/org/opensearch/node/MockNode.java b/test/framework/src/main/java/org/opensearch/node/MockNode.java index e6c7e21d5b3ea..19c65ec169d3c 100644 --- a/test/framework/src/main/java/org/opensearch/node/MockNode.java +++ b/test/framework/src/main/java/org/opensearch/node/MockNode.java @@ -60,6 +60,7 @@ import org.opensearch.search.SearchService; import org.opensearch.search.fetch.FetchPhase; import org.opensearch.search.query.QueryPhase; +import org.opensearch.tasks.TaskResourceTrackingService; import org.opensearch.telemetry.tracing.Tracer; import org.opensearch.test.MockHttpTransport; import org.opensearch.test.transport.MockTransportService; @@ -155,7 +156,8 @@ protected SearchService newSearchService( FetchPhase fetchPhase, ResponseCollectorService responseCollectorService, CircuitBreakerService circuitBreakerService, - Executor indexSearcherExecutor + Executor indexSearcherExecutor, + TaskResourceTrackingService taskResourceTrackingService ) { if (getPluginsService().filterPlugins(MockSearchService.TestPlugin.class).isEmpty()) { return super.newSearchService( @@ -168,7 +170,8 @@ protected SearchService newSearchService( fetchPhase, responseCollectorService, circuitBreakerService, - indexSearcherExecutor + indexSearcherExecutor, + taskResourceTrackingService ); } return new MockSearchService( @@ -180,7 +183,8 @@ protected SearchService newSearchService( queryPhase, fetchPhase, circuitBreakerService, - indexSearcherExecutor + indexSearcherExecutor, + taskResourceTrackingService ); } diff --git a/test/framework/src/main/java/org/opensearch/search/MockSearchService.java b/test/framework/src/main/java/org/opensearch/search/MockSearchService.java index a0bbcb7be05f9..6c9ace06c8219 100644 --- a/test/framework/src/main/java/org/opensearch/search/MockSearchService.java +++ b/test/framework/src/main/java/org/opensearch/search/MockSearchService.java @@ -42,6 +42,7 @@ import org.opensearch.search.fetch.FetchPhase; import org.opensearch.search.internal.ReaderContext; import org.opensearch.search.query.QueryPhase; +import org.opensearch.tasks.TaskResourceTrackingService; import org.opensearch.threadpool.ThreadPool; import java.util.HashMap; @@ -96,7 +97,8 @@ public MockSearchService( QueryPhase queryPhase, FetchPhase fetchPhase, CircuitBreakerService circuitBreakerService, - Executor indexSearcherExecutor + Executor indexSearcherExecutor, + TaskResourceTrackingService taskResourceTrackingService ) { super( clusterService, @@ -108,7 +110,8 @@ public MockSearchService( fetchPhase, null, circuitBreakerService, - indexSearcherExecutor + indexSearcherExecutor, + taskResourceTrackingService ); } From d671dd25291151251d73f3c4a4156b0142e78ad5 Mon Sep 17 00:00:00 2001 From: Chenyang Ji Date: Thu, 30 May 2024 15:21:36 -0700 Subject: [PATCH 6/7] track resource usage for failed requests Signed-off-by: Chenyang Ji --- .../search/AbstractSearchAsyncAction.java | 7 ++++ .../action/search/FetchSearchPhase.java | 1 + .../SearchRequestOperationsListener.java | 13 +++++++ .../org/opensearch/search/SearchService.java | 38 +++++++++---------- 4 files changed, 39 insertions(+), 20 deletions(-) diff --git a/server/src/main/java/org/opensearch/action/search/AbstractSearchAsyncAction.java b/server/src/main/java/org/opensearch/action/search/AbstractSearchAsyncAction.java index 412d077716e33..af84422df7067 100644 --- a/server/src/main/java/org/opensearch/action/search/AbstractSearchAsyncAction.java +++ b/server/src/main/java/org/opensearch/action/search/AbstractSearchAsyncAction.java @@ -469,6 +469,10 @@ private void onRequestEnd(SearchRequestContext searchRequestContext) { this.searchRequestContext.getSearchRequestOperationsListener().onRequestEnd(this, searchRequestContext); } + private void onRequestFailure(SearchRequestContext searchRequestContext) { + this.searchRequestContext.getSearchRequestOperationsListener().onRequestFailure(this, searchRequestContext); + } + private void executePhase(SearchPhase phase) { Span phaseSpan = tracer.startSpan(SpanCreationContext.server().name("[phase/" + phase.getName() + "]")); try (final SpanScope scope = tracer.withSpanInScope(phaseSpan)) { @@ -507,6 +511,7 @@ ShardSearchFailure[] buildShardFailures() { private void onShardFailure(final int shardIndex, @Nullable SearchShardTarget shard, final SearchShardIterator shardIt, Exception e) { // we always add the shard failure for a specific shard instance // we do make sure to clean it on a successful response from a shard + setPhaseResourceUsages(); onShardFailure(shardIndex, shard, e); SearchShardTarget nextShard = FailAwareWeightedRouting.getInstance() .findNext(shardIt, clusterState, e, () -> totalOps.incrementAndGet()); @@ -757,6 +762,7 @@ public void sendSearchResponse(InternalSearchResponse internalSearchResponse, At @Override public final void onPhaseFailure(SearchPhase phase, String msg, Throwable cause) { + setPhaseResourceUsages(); if (currentPhaseHasLifecycle) { this.searchRequestContext.getSearchRequestOperationsListener().onPhaseFailure(this, cause); } @@ -786,6 +792,7 @@ private void raisePhaseFailure(SearchPhaseExecutionException exception) { }); } Releasables.close(releasables); + onRequestFailure(searchRequestContext); listener.onFailure(exception); } diff --git a/server/src/main/java/org/opensearch/action/search/FetchSearchPhase.java b/server/src/main/java/org/opensearch/action/search/FetchSearchPhase.java index eb71a302e8e78..2ad7f8a29896c 100644 --- a/server/src/main/java/org/opensearch/action/search/FetchSearchPhase.java +++ b/server/src/main/java/org/opensearch/action/search/FetchSearchPhase.java @@ -255,6 +255,7 @@ public void onFailure(Exception e) { e ); progressListener.notifyFetchFailure(shardIndex, shardTarget, e); + context.setPhaseResourceUsages(); counter.onFailure(shardIndex, shardTarget, e); } finally { // the search context might not be cleared on the node where the fetch was executed for example diff --git a/server/src/main/java/org/opensearch/action/search/SearchRequestOperationsListener.java b/server/src/main/java/org/opensearch/action/search/SearchRequestOperationsListener.java index b944572cef122..61f19977ae5ce 100644 --- a/server/src/main/java/org/opensearch/action/search/SearchRequestOperationsListener.java +++ b/server/src/main/java/org/opensearch/action/search/SearchRequestOperationsListener.java @@ -51,6 +51,8 @@ protected void onRequestStart(SearchRequestContext searchRequestContext) {} protected void onRequestEnd(SearchPhaseContext context, SearchRequestContext searchRequestContext) {} + protected void onRequestFailure(SearchPhaseContext context, SearchRequestContext searchRequestContext) {} + protected boolean isEnabled(SearchRequest searchRequest) { return isEnabled(); } @@ -133,6 +135,17 @@ public void onRequestEnd(SearchPhaseContext context, SearchRequestContext search } } + @Override + public void onRequestFailure(SearchPhaseContext context, SearchRequestContext searchRequestContext) { + for (SearchRequestOperationsListener listener : listeners) { + try { + listener.onRequestFailure(context, searchRequestContext); + } catch (Exception e) { + logger.warn(() -> new ParameterizedMessage("onRequestFailure listener [{}] failed", listener), e); + } + } + } + public List getListeners() { return listeners; } diff --git a/server/src/main/java/org/opensearch/search/SearchService.java b/server/src/main/java/org/opensearch/search/SearchService.java index b9207fcb66cac..d5c2b13eb5041 100644 --- a/server/src/main/java/org/opensearch/search/SearchService.java +++ b/server/src/main/java/org/opensearch/search/SearchService.java @@ -565,12 +565,13 @@ private DfsSearchResult executeDfsPhase(ShardSearchRequest request, SearchShardT SearchContext context = createContext(readerContext, request, task, true) ) { dfsPhase.execute(context); - writeTaskResourceUsage(task); return context.dfsResult(); } catch (Exception e) { logger.trace("Dfs phase failed", e); processFailure(readerContext, e); throw e; + } finally { + writeTaskResourceUsage(task); } } @@ -661,7 +662,6 @@ private SearchPhaseResult executeQueryPhase(ShardSearchRequest request, SearchSh final RescoreDocIds rescoreDocIds = context.rescoreDocIds(); context.queryResult().setRescoreDocIds(rescoreDocIds); readerContext.setRescoreDocIds(rescoreDocIds); - writeTaskResourceUsage(task); return context.queryResult(); } } catch (Exception e) { @@ -674,6 +674,8 @@ private SearchPhaseResult executeQueryPhase(ShardSearchRequest request, SearchSh logger.trace("Query phase failed", e); processFailure(readerContext, e); throw e; + } finally { + writeTaskResourceUsage(task); } } @@ -686,9 +688,7 @@ private QueryFetchSearchResult executeFetchPhase(ReaderContext reader, SearchCon } executor.success(); } - QueryFetchSearchResult result = new QueryFetchSearchResult(context.queryResult(), context.fetchResult()); - writeTaskResourceUsage(context.getTask()); - return result; + return new QueryFetchSearchResult(context.queryResult(), context.fetchResult()); } public void executeQueryPhase( @@ -716,16 +716,13 @@ public void executeQueryPhase( queryPhase.execute(searchContext); executor.success(); readerContext.setRescoreDocIds(searchContext.rescoreDocIds()); - ScrollQuerySearchResult scrollQuerySearchResult = new ScrollQuerySearchResult( - searchContext.queryResult(), - searchContext.shardTarget() - ); - writeTaskResourceUsage(task); - return scrollQuerySearchResult; + return new ScrollQuerySearchResult(searchContext.queryResult(), searchContext.shardTarget()); } catch (Exception e) { logger.trace("Query phase failed", e); // we handle the failure in the failure listener below throw e; + } finally { + writeTaskResourceUsage(task); } }, wrapFailureListener(listener, readerContext, markAsUsed)); } @@ -752,13 +749,14 @@ public void executeQueryPhase(QuerySearchRequest request, SearchShardTask task, final RescoreDocIds rescoreDocIds = searchContext.rescoreDocIds(); searchContext.queryResult().setRescoreDocIds(rescoreDocIds); readerContext.setRescoreDocIds(rescoreDocIds); - writeTaskResourceUsage(task); return searchContext.queryResult(); } catch (Exception e) { assert TransportActions.isShardNotAvailableException(e) == false : new AssertionError(e); logger.trace("Query phase failed", e); // we handle the failure in the failure listener below throw e; + } finally { + writeTaskResourceUsage(task); } }, wrapFailureListener(listener, readerContext, markAsUsed)); } @@ -802,17 +800,14 @@ public void executeFetchPhase( queryPhase.execute(searchContext); final long afterQueryTime = executor.success(); QueryFetchSearchResult fetchSearchResult = executeFetchPhase(readerContext, searchContext, afterQueryTime); - ScrollQueryFetchSearchResult scrollQueryFetchSearchResult = new ScrollQueryFetchSearchResult( - fetchSearchResult, - searchContext.shardTarget() - ); - writeTaskResourceUsage(task); - return scrollQueryFetchSearchResult; + return new ScrollQueryFetchSearchResult(fetchSearchResult, searchContext.shardTarget()); } catch (Exception e) { assert TransportActions.isShardNotAvailableException(e) == false : new AssertionError(e); logger.trace("Fetch phase failed", e); // we handle the failure in the failure listener below throw e; + } finally { + writeTaskResourceUsage(task); } }, wrapFailureListener(listener, readerContext, markAsUsed)); } @@ -838,12 +833,13 @@ public void executeFetchPhase(ShardFetchRequest request, SearchShardTask task, A } executor.success(); } - writeTaskResourceUsage(task); return searchContext.fetchResult(); } catch (Exception e) { assert TransportActions.isShardNotAvailableException(e) == false : new AssertionError(e); // we handle the failure in the failure listener below throw e; + } finally { + writeTaskResourceUsage(task); } }, wrapFailureListener(listener, readerContext, markAsUsed)); } @@ -1060,6 +1056,7 @@ final SearchContext createContext( context.size(DEFAULT_SIZE); } context.setTask(task); + // pre process queryPhase.preProcess(context); } catch (Exception e) { @@ -1144,7 +1141,7 @@ private DefaultSearchContext createSearchContext(ReaderContext reader, ShardSear private void writeTaskResourceUsage(SearchShardTask task) { try { - // Get resource usages when task starts + // Get resource usages from when the task started ThreadResourceInfo threadResourceInfo = task.getActiveThreadResourceInfo( Thread.currentThread().getId(), ResourceStatsType.WORKER_STATS @@ -1184,6 +1181,7 @@ private void writeTaskResourceUsage(SearchShardTask task) { ) .build(); + // Remove the existing TASK_RESOURCE_USAGE header since it would have come from an earlier phase in the same request. threadPool.getThreadContext().removeResponseHeader(TASK_RESOURCE_USAGE); threadPool.getThreadContext().addResponseHeader(TASK_RESOURCE_USAGE, taskResourceInfo.toString()); } catch (Exception e) { From 69629ff2b5801f3ea16cdd3585b9a322febcf032 Mon Sep 17 00:00:00 2001 From: Chenyang Ji Date: Tue, 4 Jun 2024 15:23:51 -0700 Subject: [PATCH 7/7] move resource usages interactions into TaskResourceTrackingService Signed-off-by: Chenyang Ji --- CHANGELOG.md | 2 +- .../search/AbstractSearchAsyncAction.java | 3 +- .../action/search/SearchRequestContext.java | 44 +++------ .../action/search/TransportSearchAction.java | 19 ++-- .../org/opensearch/search/SearchService.java | 70 ++----------- .../tasks/TaskResourceTrackingService.java | 97 ++++++++++++++++++- .../AbstractSearchAsyncActionTests.java | 2 +- .../snapshots/SnapshotResiliencyTests.java | 5 +- .../TaskResourceTrackingServiceTests.java | 35 +++++++ 9 files changed, 167 insertions(+), 110 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 549ead2b587cf..4b714d882ea65 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -20,7 +20,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), - [Remote Store] Add support to disable flush based on translog reader count ([#14027](https://github.com/opensearch-project/OpenSearch/pull/14027)) - [Query Insights] Add exporter support for top n queries ([#12982](https://github.com/opensearch-project/OpenSearch/pull/12982)) - [Query Insights] Add X-Opaque-Id to search request metadata for top n queries ([#13374](https://github.com/opensearch-project/OpenSearch/pull/13374)) -- Add support for query-level resource usage tracking ([#13172](https://github.com/opensearch-project/OpenSearch/pull/13172)) +- Add support for query level resource usage tracking ([#13172](https://github.com/opensearch-project/OpenSearch/pull/13172)) ### Dependencies - Bump `com.github.spullara.mustache.java:compiler` from 0.9.10 to 0.9.13 ([#13329](https://github.com/opensearch-project/OpenSearch/pull/13329), [#13559](https://github.com/opensearch-project/OpenSearch/pull/13559)) diff --git a/server/src/main/java/org/opensearch/action/search/AbstractSearchAsyncAction.java b/server/src/main/java/org/opensearch/action/search/AbstractSearchAsyncAction.java index af84422df7067..f0fc05c595d6f 100644 --- a/server/src/main/java/org/opensearch/action/search/AbstractSearchAsyncAction.java +++ b/server/src/main/java/org/opensearch/action/search/AbstractSearchAsyncAction.java @@ -51,6 +51,7 @@ import org.opensearch.core.action.ActionListener; import org.opensearch.core.action.ShardOperationFailedException; import org.opensearch.core.index.shard.ShardId; +import org.opensearch.core.tasks.resourcetracker.TaskResourceInfo; import org.opensearch.search.SearchPhaseResult; import org.opensearch.search.SearchShardTarget; import org.opensearch.search.internal.AliasFilter; @@ -628,7 +629,7 @@ protected void onShardResult(Result result, SearchShardIterator shardIt) { } public void setPhaseResourceUsages() { - String taskResourceUsage = searchRequestContext.getTaskResourceUsageSupplier().get(); + TaskResourceInfo taskResourceUsage = searchRequestContext.getTaskResourceUsageSupplier().get(); searchRequestContext.recordPhaseResourceUsage(taskResourceUsage); } diff --git a/server/src/main/java/org/opensearch/action/search/SearchRequestContext.java b/server/src/main/java/org/opensearch/action/search/SearchRequestContext.java index 45bb10b989ca7..111d9c64550b3 100644 --- a/server/src/main/java/org/opensearch/action/search/SearchRequestContext.java +++ b/server/src/main/java/org/opensearch/action/search/SearchRequestContext.java @@ -12,21 +12,15 @@ import org.apache.logging.log4j.Logger; import org.apache.lucene.search.TotalHits; import org.opensearch.common.annotation.InternalApi; -import org.opensearch.common.xcontent.XContentHelper; -import org.opensearch.core.common.bytes.BytesArray; import org.opensearch.core.tasks.resourcetracker.TaskResourceInfo; -import org.opensearch.core.xcontent.DeprecationHandler; -import org.opensearch.core.xcontent.MediaTypeRegistry; -import org.opensearch.core.xcontent.NamedXContentRegistry; -import org.opensearch.core.xcontent.XContentParser; -import java.io.IOException; import java.util.ArrayList; import java.util.EnumMap; import java.util.HashMap; import java.util.List; import java.util.Locale; import java.util.Map; +import java.util.concurrent.LinkedBlockingQueue; import java.util.function.Supplier; /** @@ -44,20 +38,20 @@ public class SearchRequestContext { private final EnumMap shardStats; private final SearchRequest searchRequest; - private final List phaseResourceUsage; - private final Supplier taskResourceUsageSupplier; + private final LinkedBlockingQueue phaseResourceUsage; + private final Supplier taskResourceUsageSupplier; SearchRequestContext( final SearchRequestOperationsListener searchRequestOperationsListener, final SearchRequest searchRequest, - final Supplier taskResourceUsageSupplier + final Supplier taskResourceUsageSupplier ) { this.searchRequestOperationsListener = searchRequestOperationsListener; this.absoluteStartNanos = System.nanoTime(); this.phaseTookMap = new HashMap<>(); this.shardStats = new EnumMap<>(ShardStatsFieldNames.class); this.searchRequest = searchRequest; - this.phaseResourceUsage = new ArrayList<>(); + this.phaseResourceUsage = new LinkedBlockingQueue<>(); this.taskResourceUsageSupplier = taskResourceUsageSupplier; } @@ -130,32 +124,22 @@ String formattedShardStats() { } } - public Supplier getTaskResourceUsageSupplier() { + public Supplier getTaskResourceUsageSupplier() { return taskResourceUsageSupplier; } - public SearchRequest getRequest() { - return searchRequest; - } - - public void recordPhaseResourceUsage(String usage) { - try { - if (usage != null && !usage.isEmpty()) { - XContentParser parser = XContentHelper.createParser( - NamedXContentRegistry.EMPTY, - DeprecationHandler.THROW_UNSUPPORTED_OPERATION, - new BytesArray(usage), - MediaTypeRegistry.JSON - ); - this.phaseResourceUsage.add(TaskResourceInfo.PARSER.apply(parser, null)); - } - } catch (IOException e) { - logger.debug("fail to parse phase resource usages: ", e); + public void recordPhaseResourceUsage(TaskResourceInfo usage) { + if (usage != null) { + this.phaseResourceUsage.add(usage); } } public List getPhaseResourceUsage() { - return phaseResourceUsage; + return new ArrayList<>(phaseResourceUsage); + } + + public SearchRequest getRequest() { + return searchRequest; } } diff --git a/server/src/main/java/org/opensearch/action/search/TransportSearchAction.java b/server/src/main/java/org/opensearch/action/search/TransportSearchAction.java index 09da8d03a0aeb..6e380775355a2 100644 --- a/server/src/main/java/org/opensearch/action/search/TransportSearchAction.java +++ b/server/src/main/java/org/opensearch/action/search/TransportSearchAction.java @@ -87,6 +87,7 @@ import org.opensearch.search.profile.SearchProfileShardResults; import org.opensearch.tasks.CancellableTask; import org.opensearch.tasks.Task; +import org.opensearch.tasks.TaskResourceTrackingService; import org.opensearch.telemetry.metrics.MetricsRegistry; import org.opensearch.telemetry.tracing.Span; import org.opensearch.telemetry.tracing.SpanBuilder; @@ -125,7 +126,6 @@ import static org.opensearch.action.search.SearchType.DFS_QUERY_THEN_FETCH; import static org.opensearch.action.search.SearchType.QUERY_THEN_FETCH; import static org.opensearch.search.sort.FieldSortBuilder.hasPrimaryFieldSort; -import static org.opensearch.tasks.TaskResourceTrackingService.TASK_RESOURCE_USAGE; /** * Perform search action @@ -187,6 +187,7 @@ public class TransportSearchAction extends HandledTransportAction) SearchRequest::new); this.client = client; @@ -225,6 +227,7 @@ public TransportSearchAction( clusterService.getClusterSettings() .addSettingsUpdateConsumer(SEARCH_QUERY_METRICS_ENABLED_SETTING, this::setSearchQueryMetricsEnabled); this.tracer = tracer; + this.taskResourceTrackingService = taskResourceTrackingService; } private void setSearchQueryMetricsEnabled(boolean searchQueryMetricsEnabled) { @@ -452,14 +455,10 @@ private void executeRequest( logger, TraceableSearchRequestOperationsListener.create(tracer, requestSpan) ); - SearchRequestContext searchRequestContext = new SearchRequestContext(requestOperationsListeners, originalSearchRequest, () -> { - List taskResourceUsages = threadPool.getThreadContext().getResponseHeaders().get(TASK_RESOURCE_USAGE); - if (taskResourceUsages != null && taskResourceUsages.size() > 0) { - return taskResourceUsages.get(0); - } - return null; - } - + SearchRequestContext searchRequestContext = new SearchRequestContext( + requestOperationsListeners, + originalSearchRequest, + taskResourceTrackingService::getTaskResourceUsageFromThreadContext ); searchRequestContext.getSearchRequestOperationsListener().onRequestStart(searchRequestContext); diff --git a/server/src/main/java/org/opensearch/search/SearchService.java b/server/src/main/java/org/opensearch/search/SearchService.java index d5c2b13eb5041..45f111d889522 100644 --- a/server/src/main/java/org/opensearch/search/SearchService.java +++ b/server/src/main/java/org/opensearch/search/SearchService.java @@ -73,13 +73,6 @@ import org.opensearch.core.index.Index; import org.opensearch.core.index.shard.ShardId; import org.opensearch.core.indices.breaker.CircuitBreakerService; -import org.opensearch.core.tasks.resourcetracker.ResourceStats; -import org.opensearch.core.tasks.resourcetracker.ResourceStatsType; -import org.opensearch.core.tasks.resourcetracker.ResourceUsageInfo; -import org.opensearch.core.tasks.resourcetracker.ResourceUsageMetric; -import org.opensearch.core.tasks.resourcetracker.TaskResourceInfo; -import org.opensearch.core.tasks.resourcetracker.TaskResourceUsage; -import org.opensearch.core.tasks.resourcetracker.ThreadResourceInfo; import org.opensearch.index.IndexNotFoundException; import org.opensearch.index.IndexService; import org.opensearch.index.IndexSettings; @@ -168,7 +161,6 @@ import static org.opensearch.common.unit.TimeValue.timeValueHours; import static org.opensearch.common.unit.TimeValue.timeValueMillis; import static org.opensearch.common.unit.TimeValue.timeValueMinutes; -import static org.opensearch.tasks.TaskResourceTrackingService.TASK_RESOURCE_USAGE; /** * The main search service @@ -571,7 +563,7 @@ private DfsSearchResult executeDfsPhase(ShardSearchRequest request, SearchShardT processFailure(readerContext, e); throw e; } finally { - writeTaskResourceUsage(task); + taskResourceTrackingService.writeTaskResourceUsage(task, clusterService.localNode().getId()); } } @@ -675,7 +667,7 @@ private SearchPhaseResult executeQueryPhase(ShardSearchRequest request, SearchSh processFailure(readerContext, e); throw e; } finally { - writeTaskResourceUsage(task); + taskResourceTrackingService.writeTaskResourceUsage(task, clusterService.localNode().getId()); } } @@ -722,7 +714,7 @@ public void executeQueryPhase( // we handle the failure in the failure listener below throw e; } finally { - writeTaskResourceUsage(task); + taskResourceTrackingService.writeTaskResourceUsage(task, clusterService.localNode().getId()); } }, wrapFailureListener(listener, readerContext, markAsUsed)); } @@ -756,7 +748,7 @@ public void executeQueryPhase(QuerySearchRequest request, SearchShardTask task, // we handle the failure in the failure listener below throw e; } finally { - writeTaskResourceUsage(task); + taskResourceTrackingService.writeTaskResourceUsage(task, clusterService.localNode().getId()); } }, wrapFailureListener(listener, readerContext, markAsUsed)); } @@ -807,7 +799,7 @@ public void executeFetchPhase( // we handle the failure in the failure listener below throw e; } finally { - writeTaskResourceUsage(task); + taskResourceTrackingService.writeTaskResourceUsage(task, clusterService.localNode().getId()); } }, wrapFailureListener(listener, readerContext, markAsUsed)); } @@ -839,7 +831,7 @@ public void executeFetchPhase(ShardFetchRequest request, SearchShardTask task, A // we handle the failure in the failure listener below throw e; } finally { - writeTaskResourceUsage(task); + taskResourceTrackingService.writeTaskResourceUsage(task, clusterService.localNode().getId()); } }, wrapFailureListener(listener, readerContext, markAsUsed)); } @@ -1139,56 +1131,6 @@ private DefaultSearchContext createSearchContext(ReaderContext reader, ShardSear return searchContext; } - private void writeTaskResourceUsage(SearchShardTask task) { - try { - // Get resource usages from when the task started - ThreadResourceInfo threadResourceInfo = task.getActiveThreadResourceInfo( - Thread.currentThread().getId(), - ResourceStatsType.WORKER_STATS - ); - if (threadResourceInfo == null) { - return; - } - Map startValues = threadResourceInfo.getResourceUsageInfo().getStatsInfo(); - if (!(startValues.containsKey(ResourceStats.CPU) && startValues.containsKey(ResourceStats.MEMORY))) { - return; - } - // Get current resource usages - ResourceUsageMetric[] endValues = taskResourceTrackingService.getResourceUsageMetricsForThread(Thread.currentThread().getId()); - long cpu = -1, mem = -1; - for (ResourceUsageMetric endValue : endValues) { - if (endValue.getStats() == ResourceStats.MEMORY) { - mem = endValue.getValue(); - } else if (endValue.getStats() == ResourceStats.CPU) { - cpu = endValue.getValue(); - } - } - if (cpu == -1 || mem == -1) { - logger.debug("Invalid resource usage value, cpu [{}], memory [{}]: ", cpu, mem); - return; - } - - // Build task resource usage info - TaskResourceInfo taskResourceInfo = new TaskResourceInfo.Builder().setAction(task.getAction()) - .setTaskId(task.getId()) - .setParentTaskId(task.getParentTaskId().getId()) - .setNodeId(clusterService.localNode().getId()) - .setTaskResourceUsage( - new TaskResourceUsage( - cpu - startValues.get(ResourceStats.CPU).getStartValue(), - mem - startValues.get(ResourceStats.MEMORY).getStartValue() - ) - ) - .build(); - - // Remove the existing TASK_RESOURCE_USAGE header since it would have come from an earlier phase in the same request. - threadPool.getThreadContext().removeResponseHeader(TASK_RESOURCE_USAGE); - threadPool.getThreadContext().addResponseHeader(TASK_RESOURCE_USAGE, taskResourceInfo.toString()); - } catch (Exception e) { - logger.debug("Error during writing task resource usage: ", e); - } - } - private void freeAllContextForIndex(Index index) { assert index != null; for (ReaderContext ctx : activeReaders.values()) { diff --git a/server/src/main/java/org/opensearch/tasks/TaskResourceTrackingService.java b/server/src/main/java/org/opensearch/tasks/TaskResourceTrackingService.java index 59e719a3c3250..564eff6c10df6 100644 --- a/server/src/main/java/org/opensearch/tasks/TaskResourceTrackingService.java +++ b/server/src/main/java/org/opensearch/tasks/TaskResourceTrackingService.java @@ -14,6 +14,7 @@ import org.apache.logging.log4j.Logger; import org.apache.logging.log4j.message.ParameterizedMessage; import org.opensearch.ExceptionsHelper; +import org.opensearch.action.search.SearchShardTask; import org.opensearch.common.SuppressForbidden; import org.opensearch.common.inject.Inject; import org.opensearch.common.settings.ClusterSettings; @@ -22,12 +23,23 @@ import org.opensearch.common.util.concurrent.ConcurrentCollections; import org.opensearch.common.util.concurrent.ConcurrentMapLong; import org.opensearch.common.util.concurrent.ThreadContext; +import org.opensearch.common.xcontent.XContentHelper; +import org.opensearch.core.common.bytes.BytesArray; import org.opensearch.core.tasks.resourcetracker.ResourceStats; +import org.opensearch.core.tasks.resourcetracker.ResourceStatsType; +import org.opensearch.core.tasks.resourcetracker.ResourceUsageInfo; import org.opensearch.core.tasks.resourcetracker.ResourceUsageMetric; +import org.opensearch.core.tasks.resourcetracker.TaskResourceInfo; +import org.opensearch.core.tasks.resourcetracker.TaskResourceUsage; import org.opensearch.core.tasks.resourcetracker.ThreadResourceInfo; +import org.opensearch.core.xcontent.DeprecationHandler; +import org.opensearch.core.xcontent.MediaTypeRegistry; +import org.opensearch.core.xcontent.NamedXContentRegistry; +import org.opensearch.core.xcontent.XContentParser; import org.opensearch.threadpool.RunnableTaskExecutionListener; import org.opensearch.threadpool.ThreadPool; +import java.io.IOException; import java.lang.management.ManagementFactory; import java.util.ArrayList; import java.util.Collections; @@ -212,7 +224,7 @@ public Map getResourceAwareTasks() { return Collections.unmodifiableMap(resourceAwareTasks); } - public ResourceUsageMetric[] getResourceUsageMetricsForThread(long threadId) { + private ResourceUsageMetric[] getResourceUsageMetricsForThread(long threadId) { ResourceUsageMetric currentMemoryUsage = new ResourceUsageMetric( ResourceStats.MEMORY, threadMXBean.getThreadAllocatedBytes(threadId) @@ -262,6 +274,89 @@ private ThreadContext.StoredContext addTaskIdToThreadContext(Task task) { return storedContext; } + /** + * Get the current task level resource usage. + * + * @param task {@link SearchShardTask} + * @param nodeId the local nodeId + */ + public void writeTaskResourceUsage(SearchShardTask task, String nodeId) { + try { + // Get resource usages from when the task started + ThreadResourceInfo threadResourceInfo = task.getActiveThreadResourceInfo( + Thread.currentThread().getId(), + ResourceStatsType.WORKER_STATS + ); + if (threadResourceInfo == null) { + return; + } + Map startValues = threadResourceInfo.getResourceUsageInfo().getStatsInfo(); + if (!(startValues.containsKey(ResourceStats.CPU) && startValues.containsKey(ResourceStats.MEMORY))) { + return; + } + // Get current resource usages + ResourceUsageMetric[] endValues = getResourceUsageMetricsForThread(Thread.currentThread().getId()); + long cpu = -1, mem = -1; + for (ResourceUsageMetric endValue : endValues) { + if (endValue.getStats() == ResourceStats.MEMORY) { + mem = endValue.getValue(); + } else if (endValue.getStats() == ResourceStats.CPU) { + cpu = endValue.getValue(); + } + } + if (cpu == -1 || mem == -1) { + logger.debug("Invalid resource usage value, cpu [{}], memory [{}]: ", cpu, mem); + return; + } + + // Build task resource usage info + TaskResourceInfo taskResourceInfo = new TaskResourceInfo.Builder().setAction(task.getAction()) + .setTaskId(task.getId()) + .setParentTaskId(task.getParentTaskId().getId()) + .setNodeId(nodeId) + .setTaskResourceUsage( + new TaskResourceUsage( + cpu - startValues.get(ResourceStats.CPU).getStartValue(), + mem - startValues.get(ResourceStats.MEMORY).getStartValue() + ) + ) + .build(); + // Remove the existing TASK_RESOURCE_USAGE header since it would have come from an earlier phase in the same request. + synchronized (this) { + threadPool.getThreadContext().removeResponseHeader(TASK_RESOURCE_USAGE); + threadPool.getThreadContext().addResponseHeader(TASK_RESOURCE_USAGE, taskResourceInfo.toString()); + } + } catch (Exception e) { + logger.debug("Error during writing task resource usage: ", e); + } + } + + /** + * Get the task resource usages from {@link ThreadContext} + * + * @return {@link TaskResourceInfo} + */ + public TaskResourceInfo getTaskResourceUsageFromThreadContext() { + List taskResourceUsages = threadPool.getThreadContext().getResponseHeaders().get(TASK_RESOURCE_USAGE); + if (taskResourceUsages != null && taskResourceUsages.size() > 0) { + String usage = taskResourceUsages.get(0); + try { + if (usage != null && !usage.isEmpty()) { + XContentParser parser = XContentHelper.createParser( + NamedXContentRegistry.EMPTY, + DeprecationHandler.THROW_UNSUPPORTED_OPERATION, + new BytesArray(usage), + MediaTypeRegistry.JSON + ); + return TaskResourceInfo.PARSER.apply(parser, null); + } + } catch (IOException e) { + logger.debug("fail to parse phase resource usages: ", e); + } + } + return null; + } + /** * Listener that gets invoked when a task execution completes. */ diff --git a/server/src/test/java/org/opensearch/action/search/AbstractSearchAsyncActionTests.java b/server/src/test/java/org/opensearch/action/search/AbstractSearchAsyncActionTests.java index 730f0569f8bc5..27336e86e52b0 100644 --- a/server/src/test/java/org/opensearch/action/search/AbstractSearchAsyncActionTests.java +++ b/server/src/test/java/org/opensearch/action/search/AbstractSearchAsyncActionTests.java @@ -201,7 +201,7 @@ private AbstractSearchAsyncAction createAction( new SearchRequestContext( new SearchRequestOperationsListener.CompositeListener(List.of(assertingListener), LogManager.getLogger()), request, - () -> "" + () -> null ), NoopTracer.INSTANCE ) { diff --git a/server/src/test/java/org/opensearch/snapshots/SnapshotResiliencyTests.java b/server/src/test/java/org/opensearch/snapshots/SnapshotResiliencyTests.java index bbd1bcdc35c82..622507f885814 100644 --- a/server/src/test/java/org/opensearch/snapshots/SnapshotResiliencyTests.java +++ b/server/src/test/java/org/opensearch/snapshots/SnapshotResiliencyTests.java @@ -2292,7 +2292,7 @@ public void onFailure(final Exception e) { responseCollectorService, new NoneCircuitBreakerService(), null, - null + new TaskResourceTrackingService(settings, clusterSettings, threadPool) ); SearchPhaseController searchPhaseController = new SearchPhaseController( writableRegistry(), @@ -2327,7 +2327,8 @@ public void onFailure(final Exception e) { ), NoopMetricsRegistry.INSTANCE, searchRequestOperationsCompositeListenerFactory, - NoopTracer.INSTANCE + NoopTracer.INSTANCE, + new TaskResourceTrackingService(settings, clusterSettings, threadPool) ) ); actions.put( diff --git a/server/src/test/java/org/opensearch/tasks/TaskResourceTrackingServiceTests.java b/server/src/test/java/org/opensearch/tasks/TaskResourceTrackingServiceTests.java index 45d438f8d04c9..0c19c331e1510 100644 --- a/server/src/test/java/org/opensearch/tasks/TaskResourceTrackingServiceTests.java +++ b/server/src/test/java/org/opensearch/tasks/TaskResourceTrackingServiceTests.java @@ -9,11 +9,15 @@ package org.opensearch.tasks; import org.opensearch.action.admin.cluster.node.tasks.TransportTasksActionTests; +import org.opensearch.action.search.SearchShardTask; import org.opensearch.action.search.SearchTask; import org.opensearch.common.settings.ClusterSettings; import org.opensearch.common.settings.Settings; import org.opensearch.common.util.concurrent.ThreadContext; import org.opensearch.core.tasks.TaskId; +import org.opensearch.core.tasks.resourcetracker.ResourceStatsType; +import org.opensearch.core.tasks.resourcetracker.ResourceUsageMetric; +import org.opensearch.core.tasks.resourcetracker.TaskResourceInfo; import org.opensearch.core.tasks.resourcetracker.ThreadResourceInfo; import org.opensearch.test.OpenSearchTestCase; import org.opensearch.threadpool.TestThreadPool; @@ -31,6 +35,7 @@ import static org.opensearch.core.tasks.resourcetracker.ResourceStats.CPU; import static org.opensearch.core.tasks.resourcetracker.ResourceStats.MEMORY; import static org.opensearch.tasks.TaskResourceTrackingService.TASK_ID; +import static org.opensearch.tasks.TaskResourceTrackingService.TASK_RESOURCE_USAGE; public class TaskResourceTrackingServiceTests extends OpenSearchTestCase { @@ -142,6 +147,36 @@ public void testStartingTrackingHandlesMultipleThreadsPerTask() throws Interrupt assertEquals(numTasks, numExecutions); } + public void testWriteTaskResourceUsage() { + SearchShardTask task = new SearchShardTask(1, "test", "test", "task", TaskId.EMPTY_TASK_ID, new HashMap<>()); + taskResourceTrackingService.setTaskResourceTrackingEnabled(true); + taskResourceTrackingService.startTracking(task); + task.startThreadResourceTracking( + Thread.currentThread().getId(), + ResourceStatsType.WORKER_STATS, + new ResourceUsageMetric(CPU, 100), + new ResourceUsageMetric(MEMORY, 100) + ); + taskResourceTrackingService.writeTaskResourceUsage(task, "node_1"); + Map> headers = threadPool.getThreadContext().getResponseHeaders(); + assertEquals(1, headers.size()); + assertTrue(headers.containsKey(TASK_RESOURCE_USAGE)); + } + + public void testGetTaskResourceUsageFromThreadContext() { + String taskResourceUsageJson = + "{\"action\":\"testAction\",\"taskId\":1,\"parentTaskId\":2,\"nodeId\":\"nodeId\",\"taskResourceUsage\":{\"cpu_time_in_nanos\":1000,\"memory_in_bytes\":2000}}"; + threadPool.getThreadContext().addResponseHeader(TASK_RESOURCE_USAGE, taskResourceUsageJson); + TaskResourceInfo result = taskResourceTrackingService.getTaskResourceUsageFromThreadContext(); + assertNotNull(result); + assertEquals("testAction", result.getAction()); + assertEquals(1L, result.getTaskId()); + assertEquals(2L, result.getParentTaskId()); + assertEquals("nodeId", result.getNodeId()); + assertEquals(1000L, result.getTaskResourceUsage().getCpuTimeInNanos()); + assertEquals(2000L, result.getTaskResourceUsage().getMemoryInBytes()); + } + private void verifyThreadContextFixedHeaders(String key, String value) { assertEquals(threadPool.getThreadContext().getHeader(key), value); assertEquals(threadPool.getThreadContext().getTransient(key), value);