Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Optimise memory consumption, CPU usage and disk writes #381

Merged
Merged
Show file tree
Hide file tree
Changes from 18 commits
Commits
Show all changes
20 commits
Select commit Hold shift + click to select a range
7edec11
Rework CI Visibility spans batching
nikita-tkachenko-datadog Jan 19, 2024
5c3ffb3
Submit pipeline stages and steps without waiting until the pipeline f…
nikita-tkachenko-datadog Jan 23, 2024
36d59f5
Preserve node-name population logic
nikita-tkachenko-datadog Jan 24, 2024
1419368
Fix webhook tests
nikita-tkachenko-datadog Jan 24, 2024
ddfef46
Address review comments
nikita-tkachenko-datadog Jan 24, 2024
7bf5c20
Fix SpotBugs warning
nikita-tkachenko-datadog Jan 24, 2024
7eefea6
Fix integration tests
nikita-tkachenko-datadog Jan 25, 2024
c751e83
Properly convert Windows path to URL
nikita-tkachenko-datadog Jan 25, 2024
ae44b1d
Fix git repo URL asserts in integration tests
nikita-tkachenko-datadog Jan 25, 2024
e7566bd
Replace custom shutdown hook with Jenkins @Terminator mechanism
nikita-tkachenko-datadog Jan 25, 2024
5668099
Merge branch 'nikita-tkachenko/ci-visibility-batching' into nikita-tk…
nikita-tkachenko-datadog Jan 25, 2024
52371d7
Fix integration tests on Windows
nikita-tkachenko-datadog Jan 26, 2024
c793cf8
Address review comments
nikita-tkachenko-datadog Jan 26, 2024
eeeb4c2
Fix NPE
nikita-tkachenko-datadog Jan 26, 2024
1756c79
Renamed some classes and methods
nikita-tkachenko-datadog Jan 26, 2024
d18d872
Merge branch 'nikita-tkachenko/ci-visibility-batching' into nikita-tk…
nikita-tkachenko-datadog Jan 26, 2024
9f40662
Address review comments
nikita-tkachenko-datadog Jan 30, 2024
8a654da
Merge branch 'master' into nikita-tkachenko/memory-consumption-optimi…
nikita-tkachenko-datadog Jan 30, 2024
88b5e10
Add max payload size check for webhook requests
nikita-tkachenko-datadog Jan 29, 2024
d511656
Fix SpotBugs warnings
nikita-tkachenko-datadog Jan 30, 2024
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -152,6 +152,11 @@
<artifactId>workflow-basic-steps</artifactId>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.jenkins-ci.plugins</groupId>
<artifactId>git</artifactId>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.jenkins-ci.plugins.workflow</groupId>
<artifactId>workflow-durable-task-step</artifactId>
Expand Down
251 changes: 129 additions & 122 deletions src/main/java/org/datadog/jenkins/plugins/datadog/DatadogUtilities.java

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
Expand Up @@ -66,8 +66,6 @@ of this software and associated documentation files (the "Software"), to deal
*/
public class DatadogAgentClient implements DatadogClient {

private static final int PAYLOAD_SIZE_LIMIT = 5 * 1024 * 1024; // 5 MB

private static volatile DatadogAgentClient instance = null;
// Used to determine if the instance failed last validation last time, so
// we do not keep retrying to create the instance and logging the same error
Expand Down Expand Up @@ -142,11 +140,15 @@ public static DatadogClient getInstance(String hostname, Integer port, Integer l
}

protected DatadogAgentClient(String hostname, Integer port, Integer logCollectionPort, Integer traceCollectionPort) {
this(hostname, port, logCollectionPort, traceCollectionPort, HTTP_TIMEOUT_EVP_PROXY_MS);
}

protected DatadogAgentClient(String hostname, Integer port, Integer logCollectionPort, Integer traceCollectionPort, long evpProxyTimeoutMillis) {
this.hostname = hostname;
this.port = port;
this.logCollectionPort = logCollectionPort;
this.traceCollectionPort = traceCollectionPort;
this.client = new HttpClient(HTTP_TIMEOUT_EVP_PROXY_MS);
this.client = new HttpClient(evpProxyTimeoutMillis);
}

public static ConnectivityResult checkConnectivity(final String host, final int port) {
Expand Down Expand Up @@ -554,10 +556,6 @@ private void sendSpansToWebhook(Collection<Payload> spans) {
}

byte[] body = span.getJson().toString().getBytes(StandardCharsets.UTF_8);
if (body.length > PAYLOAD_SIZE_LIMIT) {
logger.severe("Dropping span because payload size (" + body.length + ") exceeds the allowed limit of " + PAYLOAD_SIZE_LIMIT);
continue;
}

// webhook intake does not support batch requests
logger.fine("Sending webhook");
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -59,8 +59,6 @@ of this software and associated documentation files (the "Software"), to deal
*/
public class DatadogApiClient implements DatadogClient {

private static final int PAYLOAD_SIZE_LIMIT = 5 * 1024 * 1024; // 5 MB

private static volatile DatadogApiClient instance = null;
// Used to determine if the instance failed last validation last time, so
// we do not keep retrying to create the instance and logging the same error
Expand Down Expand Up @@ -517,10 +515,6 @@ private void sendSpans(Collection<Payload> spans) {
}

byte[] body = span.getJson().toString().getBytes(StandardCharsets.UTF_8);
if (body.length > PAYLOAD_SIZE_LIMIT) {
logger.severe("Dropping span because payload size (" + body.length + ") exceeds the allowed limit of " + PAYLOAD_SIZE_LIMIT);
continue;
}

// webhook intake does not support batch requests
logger.fine("Sending webhook");
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ public abstract class AbstractDatadogBuildEvent extends AbstractDatadogEvent {

public AbstractDatadogBuildEvent(BuildData buildData) {
this.buildData = buildData;
setHost(buildData.getHostname("unknown"));
setHost(buildData.getHostname(DatadogUtilities.getHostname(null)));
setJenkinsUrl(buildData.getJenkinsUrl("unknown"));
setAggregationKey(buildData.getJobName("unknown"));
setDate(buildData.getEndTime(DatadogUtilities.currentTimeMillis()) / 1000);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,6 @@ of this software and associated documentation files (the "Software"), to deal
package org.datadog.jenkins.plugins.datadog.listeners;

import static org.datadog.jenkins.plugins.datadog.DatadogUtilities.cleanUpTraceActions;
import static org.datadog.jenkins.plugins.datadog.DatadogUtilities.isPipeline;
import static org.datadog.jenkins.plugins.datadog.traces.TracerConstants.SPAN_ID_ENVVAR_KEY;
import static org.datadog.jenkins.plugins.datadog.traces.TracerConstants.TRACE_ID_ENVVAR_KEY;

Expand Down Expand Up @@ -58,13 +57,12 @@ of this software and associated documentation files (the "Software"), to deal
import org.datadog.jenkins.plugins.datadog.events.BuildFinishedEventImpl;
import org.datadog.jenkins.plugins.datadog.events.BuildStartedEventImpl;
import org.datadog.jenkins.plugins.datadog.model.BuildData;
import org.datadog.jenkins.plugins.datadog.model.CIGlobalTagsAction;
import org.datadog.jenkins.plugins.datadog.model.GitCommitAction;
import org.datadog.jenkins.plugins.datadog.model.GitRepositoryAction;
import org.datadog.jenkins.plugins.datadog.model.PipelineQueueInfoAction;
import org.datadog.jenkins.plugins.datadog.model.StageBreakdownAction;
import org.datadog.jenkins.plugins.datadog.model.TraceInfoAction;
import org.datadog.jenkins.plugins.datadog.traces.BuildSpanAction;
import org.datadog.jenkins.plugins.datadog.traces.BuildSpanManager;
import org.datadog.jenkins.plugins.datadog.traces.StepDataAction;
import org.datadog.jenkins.plugins.datadog.traces.StepTraceDataAction;
import org.datadog.jenkins.plugins.datadog.traces.message.TraceSpan;
import org.datadog.jenkins.plugins.datadog.traces.write.TraceWriter;
import org.datadog.jenkins.plugins.datadog.traces.write.TraceWriterFactory;
Expand Down Expand Up @@ -115,45 +113,20 @@ public void onInitialize(Run run) {
final TraceSpan buildSpan = new TraceSpan("jenkins.build", TimeUnit.MILLISECONDS.toNanos(buildData.getStartTime(0L)));
BuildSpanManager.get().put(buildData.getBuildTag(""), buildSpan);

// The buildData object is stored in the BuildSpanAction to be updated
// by the information that will be calculated when the pipeline listeners
// were executed. This is needed because if the user build is based on
// Jenkins Pipelines, there are many information that is missing when the
// root span is created, such as Git info (this is calculated in an inner step
// of the pipeline)
final BuildSpanAction buildSpanAction = new BuildSpanAction(buildData, buildSpan.context());
final BuildSpanAction buildSpanAction = new BuildSpanAction(buildSpan.context());
run.addAction(buildSpanAction);

final StepDataAction stepDataAction = new StepDataAction();
run.addAction(stepDataAction);
run.addAction(new GitCommitAction());
run.addAction(new GitRepositoryAction());
run.addAction(new TraceInfoAction());
run.addAction(new PipelineQueueInfoAction());

// Traces
startBuildTrace(buildData, run);
logger.fine("End DatadogBuildListener#onInitialize");
} catch (Exception e) {
DatadogUtilities.severe(logger, e, "Failed to process build initialization");
}
}

private void startBuildTrace(final BuildData buildData, Run run) {
if (!DatadogUtilities.getDatadogGlobalDescriptor().getEnableCiVisibility()) {
logger.fine("CI Visibility is disabled");
return;
}

final StepTraceDataAction stepTraceDataAction = new StepTraceDataAction();
run.addAction(stepTraceDataAction);

final StageBreakdownAction stageBreakdownAction = new StageBreakdownAction();
run.addAction(stageBreakdownAction);

final PipelineQueueInfoAction pipelineQueueInfoAction = new PipelineQueueInfoAction();
run.addAction(pipelineQueueInfoAction);

final CIGlobalTagsAction ciGlobalTags = new CIGlobalTagsAction(buildData.getTagsForTraces());
run.addAction(ciGlobalTags);
}

/**
* Called before the SCMCheckout is run in a Jenkins build.
* This method is called after onInitialize callback.
Expand All @@ -164,7 +137,7 @@ public Environment setUpEnvironment(AbstractBuild build, Launcher launcher, Buil
logger.fine("Start DatadogBuildListener#setUpEnvironment");

final BuildSpanAction buildSpanAction = build.getAction(BuildSpanAction.class);
if(buildSpanAction == null || buildSpanAction.getBuildData() == null) {
if(buildSpanAction == null || buildSpanAction.getBuildSpanContext() == null) {
return new Environment() {
};
}
Expand Down Expand Up @@ -233,15 +206,16 @@ public void onStarted(Run run, TaskListener listener) {
Queue queue = getQueue();
Queue.Item item = queue.getItem(run.getQueueId());
Map<String, Set<String>> tags = buildData.getTags();
String hostname = buildData.getHostname("unknown");
String hostname = buildData.getHostname(DatadogUtilities.getHostname(null));
try (Metrics metrics = client.metrics()) {
long waitingMs = (DatadogUtilities.currentTimeMillis() - item.getInQueueSince());
metrics.gauge("jenkins.job.waiting", TimeUnit.MILLISECONDS.toSeconds(waitingMs), hostname, tags);

final BuildSpanAction buildSpanAction = run.getAction(BuildSpanAction.class);
if(buildSpanAction != null && buildSpanAction.getBuildData() != null) {
buildSpanAction.getBuildData().setMillisInQueue(waitingMs);
PipelineQueueInfoAction queueInfoAction = run.getAction(PipelineQueueInfoAction.class);
if (queueInfoAction != null) {
queueInfoAction.setQueueTimeMillis(waitingMs);
}

} catch (NullPointerException e) {
logger.warning("Unable to compute 'waiting' metric. " +
"item.getInQueueSince() unavailable, possibly due to worker instance provisioning");
Expand Down Expand Up @@ -303,7 +277,7 @@ public void onCompleted(Run run, @Nonnull TaskListener listener) {

// Send a metric
Map<String, Set<String>> tags = buildData.getTags();
String hostname = buildData.getHostname("unknown");
String hostname = buildData.getHostname(DatadogUtilities.getHostname(null));
metrics.gauge("jenkins.job.duration", buildData.getDuration(0L) / 1000, hostname, tags);
logger.fine(String.format("[%s]: Duration: %s", buildData.getJobName(null), toTimeString(buildData.getDuration(0L))));

Expand Down Expand Up @@ -415,15 +389,8 @@ public void onFinalized(Run run) {
} catch (Exception e) {
DatadogUtilities.severe(logger, e, "Failed to process build finalization");
} finally {
// If the run belongs to a Jenkins pipeline (based on FlowNodes),
// the `onFinalized` method is executed before processing the last node.
// This means we cannot clean up trace actions at this point if the run is a Jenkins pipeline.
// The trace actions will be removed after last FlowNode has been processed.
// (See DatadogTracePipelineLogic.execute(...) method)
if(!isPipeline(run)) {
// Explicit removal of InvisibleActions used to collect Traces when the Run finishes.
cleanUpTraceActions(run);
}
// Explicit removal of InvisibleActions used to collect Traces when the Run finishes.
cleanUpTraceActions(run);
}
}

Expand Down Expand Up @@ -467,7 +434,7 @@ public void onDeleted(Run run) {
}

// Get the list of global tags to apply
String hostname = buildData.getHostname("unknown");
String hostname = buildData.getHostname(DatadogUtilities.getHostname(null));

// Send an event
final boolean shouldSendEvent = DatadogUtilities.shouldSendEvent(BuildAbortedEventImpl.BUILD_ABORTED_EVENT_NAME);
Expand Down
Loading
Loading