Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Backport to 2.x] Refinement of Forecasting and AD Precision/Recall Improvements #1214

Merged
merged 1 commit into from
May 29, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
The table of contents is too big for display.
Diff view
Diff view
  •  
  •  
  •  
40 changes: 31 additions & 9 deletions .github/workflows/benchmark.yml
Original file line number Diff line number Diff line change
Expand Up @@ -13,15 +13,19 @@ jobs:
with:
product: opensearch

Build-ad:
Run-Tests:
needs: Get-CI-Image-Tag
runs-on: ubuntu-latest
strategy:
matrix:
java: [17]
# each test scenario (rule, hc, single_stream) is treated as a separate job.
test: [rule, hc, single_stream]
fail-fast: false

concurrency:
# The concurrency setting is used to limit the concurrency of each test scenario group to ensure they do not run concurrently on the same machine.
group: ${{ github.workflow }}-${{ matrix.test }}
name: Run Anomaly detection model performance benchmark
runs-on: ubuntu-latest

container:
# using the same image which is used by opensearch-build team to build the OpenSearch Distribution
# this image tag is subject to change as more dependencies and updates will arrive over time
Expand All @@ -30,18 +34,36 @@ jobs:
options: --user root

steps:
- name: Setup Java ${{ matrix.java }}
- name: Setup Java
uses: actions/setup-java@v3
with:
distribution: 'temurin'
java-version: ${{ matrix.java }}
java-version: 21

# anomaly-detection
- name: Checkout AD
uses: actions/checkout@v3

- name: Build and Run Tests
run: |
chown -R 1000:1000 `pwd`
su `id -un 1000` -c "./gradlew ':test' --tests 'org.opensearch.ad.ml.HCADModelPerfTests' -Dtests.seed=2AEBDBBAE75AC5E0 -Dtests.security.manager=false -Dtests.locale=es-CU -Dtests.timezone=Chile/EasterIsland -Dtest.logs=true -Dmodel-benchmark=true"
su `id -un 1000` -c "./gradlew integTest --tests 'org.opensearch.ad.e2e.SingleStreamModelPerfIT' -Dtests.seed=60CDDB34427ACD0C -Dtests.security.manager=false -Dtests.locale=kab-DZ -Dtests.timezone=Asia/Hebron -Dtest.logs=true -Dmodel-benchmark=true"
case ${{ matrix.test }} in
rule)
su `id -un 1000` -c "./gradlew integTest --tests 'org.opensearch.ad.e2e.RuleModelPerfIT' \
-Dtests.seed=B4BA12CCF1D9E825 -Dtests.security.manager=false \
-Dtests.jvm.argline='-XX:TieredStopAtLevel=1 -XX:ReservedCodeCacheSize=64m' \
-Dtests.locale=ar-JO -Dtests.timezone=Asia/Samarkand -Dmodel-benchmark=true \
-Dtests.timeoutSuite=3600000! -Dtests.logs=true"
;;
hc)
su `id -un 1000` -c "./gradlew ':test' --tests 'org.opensearch.ad.ml.HCADModelPerfTests' \
-Dtests.seed=2AEBDBBAE75AC5E0 -Dtests.security.manager=false \
-Dtests.locale=es-CU -Dtests.timezone=Chile/EasterIsland -Dtest.logs=true \
-Dmodel-benchmark=true"
;;
single_stream)
su `id -un 1000` -c "./gradlew integTest --tests 'org.opensearch.ad.e2e.SingleStreamModelPerfIT' \
-Dtests.seed=60CDDB34427ACD0C -Dtests.security.manager=false \
-Dtests.locale=kab-DZ -Dtests.timezone=Asia/Hebron -Dtest.logs=true \
-Dmodel-benchmark=true"
;;
esac
19 changes: 17 additions & 2 deletions build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -203,8 +203,8 @@ ext {
}

opensearchplugin {
name 'opensearch-time-series-analytics'
description 'OpenSearch time series analytics plugin'
name 'opensearch-anomaly-detection'
description 'OpenSearch anomaly detector plugin'
classname 'org.opensearch.timeseries.TimeSeriesAnalyticsPlugin'
extendedPlugins = ['lang-painless', 'opensearch-job-scheduler']
}
Expand Down Expand Up @@ -357,6 +357,7 @@ integTest {
if (System.getProperty("model-benchmark") == null || System.getProperty("model-benchmark") == "false") {
filter {
excludeTestsMatching "org.opensearch.ad.e2e.SingleStreamModelPerfIT"
excludeTestsMatching "org.opensearch.ad.e2e.RuleModelPerfIT"
}
}

Expand Down Expand Up @@ -692,6 +693,10 @@ List<String> jacocoExclusions = [
// https://github.com/opensearch-project/anomaly-detection/issues/241
'org.opensearch.ad.task.ADBatchTaskRunner',
'org.opensearch.ad.task.ADTaskManager',
// TODO: add forecast test coverage before release
'org.opensearch.forecast.*',
'org.opensearch.timeseries.*',
'org.opensearch.ad.*',
]


Expand Down Expand Up @@ -829,3 +834,13 @@ task updateVersion {
ant.replaceregexp(file:'build.gradle', match: '"opensearch.version", "\\d.*"', replace: '"opensearch.version", "' + newVersion.tokenize('-')[0] + '-SNAPSHOT"', flags:'g', byline:true)
}
}

// https://github.com/opensearch-project/flow-framework/pull/226
tasks.withType(AbstractPublishToMaven) {
def predicate = provider {
publication.name == "pluginZip"
}
onlyIf("Publishing only ZIP distributions") {
predicate.get()
}
}
4 changes: 3 additions & 1 deletion dataGeneration/requirements.txt
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
# indirect dependency of opensearch_py. Lower than 2.30 can cause CVE-2024-35195.
requests>=2.32.0
numpy==1.23.0
opensearch_py==2.0.0
retry==0.9.2
scipy==1.10.0
urllib3==1.26.18
urllib3==1.26.18
46 changes: 46 additions & 0 deletions src/main/java/org/opensearch/ad/ADEntityProfileRunner.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
/*
* SPDX-License-Identifier: Apache-2.0
*
* The OpenSearch Contributors require contributions made to
* this file be licensed under the Apache-2.0 license or a
* compatible open source license.
*
* Modifications Copyright OpenSearch Contributors. See
* GitHub history for details.
*/

package org.opensearch.ad;

import org.opensearch.ad.constant.ADCommonName;
import org.opensearch.ad.model.AnomalyDetector;
import org.opensearch.ad.model.AnomalyResult;
import org.opensearch.ad.settings.ADNumericSetting;
import org.opensearch.ad.transport.ADEntityProfileAction;
import org.opensearch.client.Client;
import org.opensearch.core.xcontent.NamedXContentRegistry;
import org.opensearch.timeseries.AnalysisType;
import org.opensearch.timeseries.EntityProfileRunner;
import org.opensearch.timeseries.util.SecurityClientUtil;

public class ADEntityProfileRunner extends EntityProfileRunner<ADEntityProfileAction> {

public ADEntityProfileRunner(
Client client,
SecurityClientUtil clientUtil,
NamedXContentRegistry xContentRegistry,
long requiredSamples
) {
super(
client,
clientUtil,
xContentRegistry,
requiredSamples,
AnomalyDetector::parse,
ADNumericSetting.maxCategoricalFields(),
AnalysisType.AD,
ADEntityProfileAction.INSTANCE,
ADCommonName.ANOMALY_RESULT_INDEX_ALIAS,
AnomalyResult.DETECTOR_ID_FIELD
);
}
}
98 changes: 98 additions & 0 deletions src/main/java/org/opensearch/ad/ADJobProcessor.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,98 @@
/*
* Copyright OpenSearch Contributors
* SPDX-License-Identifier: Apache-2.0
*/

package org.opensearch.ad;

import java.time.Instant;
import java.util.List;

import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.opensearch.ad.indices.ADIndex;
import org.opensearch.ad.indices.ADIndexManagement;
import org.opensearch.ad.model.ADTask;
import org.opensearch.ad.model.ADTaskType;
import org.opensearch.ad.model.AnomalyResult;
import org.opensearch.ad.rest.handler.ADIndexJobActionHandler;
import org.opensearch.ad.settings.AnomalyDetectorSettings;
import org.opensearch.ad.task.ADTaskCacheManager;
import org.opensearch.ad.task.ADTaskManager;
import org.opensearch.ad.transport.ADProfileAction;
import org.opensearch.ad.transport.AnomalyResultAction;
import org.opensearch.ad.transport.AnomalyResultRequest;
import org.opensearch.common.settings.Settings;
import org.opensearch.core.action.ActionListener;
import org.opensearch.jobscheduler.spi.LockModel;
import org.opensearch.jobscheduler.spi.utils.LockService;
import org.opensearch.timeseries.AnalysisType;
import org.opensearch.timeseries.JobProcessor;
import org.opensearch.timeseries.TimeSeriesAnalyticsPlugin;
import org.opensearch.timeseries.common.exception.EndRunException;
import org.opensearch.timeseries.model.Config;
import org.opensearch.timeseries.model.Job;
import org.opensearch.timeseries.transport.ResultRequest;

public class ADJobProcessor extends
JobProcessor<ADIndex, ADIndexManagement, ADTaskCacheManager, ADTaskType, ADTask, ADTaskManager, AnomalyResult, ADProfileAction, ExecuteADResultResponseRecorder, ADIndexJobActionHandler> {

private static final Logger log = LogManager.getLogger(ADJobProcessor.class);

private static ADJobProcessor INSTANCE;

public static ADJobProcessor getInstance() {
if (INSTANCE != null) {
return INSTANCE;
}
synchronized (ADJobProcessor.class) {
if (INSTANCE != null) {
return INSTANCE;
}
INSTANCE = new ADJobProcessor();
return INSTANCE;
}
}

private ADJobProcessor() {
// Singleton class, use getJobRunnerInstance method instead of constructor
super(AnalysisType.AD, TimeSeriesAnalyticsPlugin.AD_THREAD_POOL_NAME, AnomalyResultAction.INSTANCE);
}

public void registerSettings(Settings settings) {
super.registerSettings(settings, AnomalyDetectorSettings.AD_MAX_RETRY_FOR_END_RUN_EXCEPTION);
}

@Override
protected ResultRequest createResultRequest(String configId, long start, long end) {
return new AnomalyResultRequest(configId, start, end);
}

@Override
protected void validateResultIndexAndRunJob(
Job jobParameter,
LockService lockService,
LockModel lock,
Instant executionStartTime,
Instant executionEndTime,
String configId,
String user,
List<String> roles,
ExecuteADResultResponseRecorder recorder,
Config detector
) {
String resultIndex = jobParameter.getCustomResultIndex();
if (resultIndex == null) {
runJob(jobParameter, lockService, lock, executionStartTime, executionEndTime, configId, user, roles, recorder, detector);
return;
}
ActionListener<Boolean> listener = ActionListener.wrap(r -> { log.debug("Custom index is valid"); }, e -> {
Exception exception = new EndRunException(configId, e.getMessage(), false);
handleException(jobParameter, lockService, lock, executionStartTime, executionEndTime, exception, recorder, detector);
});
indexManagement.validateCustomIndexForBackendJob(resultIndex, configId, user, roles, () -> {
listener.onResponse(true);
runJob(jobParameter, lockService, lock, executionStartTime, executionEndTime, configId, user, roles, recorder, detector);
}, listener);
}
}
85 changes: 85 additions & 0 deletions src/main/java/org/opensearch/ad/ADTaskProfileRunner.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,85 @@
/*
* Copyright OpenSearch Contributors
* SPDX-License-Identifier: Apache-2.0
*/

package org.opensearch.ad;

import java.util.ArrayList;
import java.util.List;

import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.opensearch.ad.model.ADTask;
import org.opensearch.ad.model.ADTaskProfile;
import org.opensearch.ad.transport.ADTaskProfileAction;
import org.opensearch.ad.transport.ADTaskProfileNodeResponse;
import org.opensearch.ad.transport.ADTaskProfileRequest;
import org.opensearch.client.Client;
import org.opensearch.core.action.ActionListener;
import org.opensearch.timeseries.TaskProfileRunner;
import org.opensearch.timeseries.cluster.HashRing;
import org.opensearch.timeseries.model.EntityTaskProfile;

public class ADTaskProfileRunner implements TaskProfileRunner<ADTask, ADTaskProfile> {
public final Logger logger = LogManager.getLogger(ADTaskProfileRunner.class);

private final HashRing hashRing;
private final Client client;

public ADTaskProfileRunner(HashRing hashRing, Client client) {
this.hashRing = hashRing;
this.client = client;
}

@Override
public void getTaskProfile(ADTask configLevelTask, ActionListener<ADTaskProfile> listener) {
String detectorId = configLevelTask.getConfigId();

hashRing.getAllEligibleDataNodesWithKnownVersion(dataNodes -> {
ADTaskProfileRequest adTaskProfileRequest = new ADTaskProfileRequest(detectorId, dataNodes);
client.execute(ADTaskProfileAction.INSTANCE, adTaskProfileRequest, ActionListener.wrap(response -> {
if (response.hasFailures()) {
listener.onFailure(response.failures().get(0));
return;
}

List<EntityTaskProfile> adEntityTaskProfiles = new ArrayList<>();
ADTaskProfile detectorTaskProfile = new ADTaskProfile(configLevelTask);
for (ADTaskProfileNodeResponse node : response.getNodes()) {
ADTaskProfile taskProfile = node.getAdTaskProfile();
if (taskProfile != null) {
if (taskProfile.getNodeId() != null) {
// HC detector: task profile from coordinating node
// Single entity detector: task profile from worker node
detectorTaskProfile.setTaskId(taskProfile.getTaskId());
detectorTaskProfile.setRcfTotalUpdates(taskProfile.getRcfTotalUpdates());
detectorTaskProfile.setThresholdModelTrained(taskProfile.getThresholdModelTrained());
detectorTaskProfile.setThresholdModelTrainingDataSize(taskProfile.getThresholdModelTrainingDataSize());
detectorTaskProfile.setModelSizeInBytes(taskProfile.getModelSizeInBytes());
detectorTaskProfile.setNodeId(taskProfile.getNodeId());
detectorTaskProfile.setTotalEntitiesCount(taskProfile.getTotalEntitiesCount());
detectorTaskProfile.setDetectorTaskSlots(taskProfile.getDetectorTaskSlots());
detectorTaskProfile.setPendingEntitiesCount(taskProfile.getPendingEntitiesCount());
detectorTaskProfile.setRunningEntitiesCount(taskProfile.getRunningEntitiesCount());
detectorTaskProfile.setRunningEntities(taskProfile.getRunningEntities());
detectorTaskProfile.setTaskType(taskProfile.getTaskType());
}
if (taskProfile.getEntityTaskProfiles() != null) {
adEntityTaskProfiles.addAll(taskProfile.getEntityTaskProfiles());
}
}
}
if (adEntityTaskProfiles != null && adEntityTaskProfiles.size() > 0) {
detectorTaskProfile.setEntityTaskProfiles(adEntityTaskProfiles);
}
listener.onResponse(detectorTaskProfile);
}, e -> {
logger.error("Failed to get task profile for task " + configLevelTask.getTaskId(), e);
listener.onFailure(e);
}));
}, listener);

}

}
Loading
Loading