Skip to content

Commit

Permalink
4.x Memory leak cleanup code - for component pipeline (#980)
Browse files Browse the repository at this point in the history
  • Loading branch information
clemensutschig authored Mar 21, 2023
1 parent f9aef74 commit 59ab2a1
Show file tree
Hide file tree
Showing 7 changed files with 102 additions and 40 deletions.
3 changes: 3 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,9 @@

## Unreleased

### Fixed
- Memory leak fixes for component pipeline ([#857](https://github.com/opendevstack/ods-jenkins-shared-library/issues/857))

## [4.2.0] - 2023-02-21

### Added
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,11 +7,11 @@ Load the shared library in your `Jenkinsfile` like this:

[source,groovy]
----
@Library('ods-jenkins-shared-library@3.x') _
@Library('ods-jenkins-shared-library@4.x') _
odsOrchestrationPipeline(
debug: true,
odsImageTag: '3.x'
odsImageTag: '4.x'
)
----

Expand Down Expand Up @@ -158,6 +158,13 @@ cluster node setup, this may decrease execution performance. In order to re-use
alwaysPullImage: true
----

By default the orchestration pipeline will create a pod based on the jenkins-base-agent image to do much of its work.
In seldom cases, ususally with a lot of repositories, one may hit an out of memory error on the pod named 'mro-XX'. In this case the below
memory limit should be adjusted (defaulting to '1Gi')
----
mroAgentMemoryLimit = "1Gi"
----

== Automated Generation of Compliance Documents

The library automatically generates Lean Validation (LeVA) compliance reports based on data in your Jira project, as well as data generated along the automated build, deploy, test, and release process by the release manager component.
Expand Down
24 changes: 6 additions & 18 deletions src/org/ods/component/Pipeline.groovy
Original file line number Diff line number Diff line change
Expand Up @@ -155,6 +155,12 @@ class Pipeline implements Serializable {
}
}

// check if there is a skipped previous run - if so - delete (to save memory)
if (!script.env.MULTI_REPO_BUILD) {
jenkinsService.deleteNotBuiltBuilds(
script.currentBuild.getPreviousBuild())
}

skipCi = isCiSkip()
if (skipCi) {
script.stage('odsPipeline (ci skip) finished') {
Expand Down Expand Up @@ -289,30 +295,12 @@ class Pipeline implements Serializable {
"ODS Build Artifacts '${context.componentId}': " +
"\r${JsonOutput.prettyPrint(JsonOutput.toJson(context.getBuildArtifactURIs()))}"
)
if (!!!script.env.MULTI_REPO_BUILD) {
cleanUp()
}
}
}
}
}
}

private void cleanUp() {
logger.debug('-- SHUTTING DOWN RM (..) --')
logger.resetStopwatch()
this.script = null
this.steps = null
this.logger = null

this.gitService = null
this.openShiftService = null
this.jenkinsService = null
this.bitbucketService = null

ServiceRegistry.removeInstance()
}

def setupForMultiRepoBuild(def config) {
logger.info '-> Detected multirepo orchestration pipeline build'
config.localCheckoutEnabled = false
Expand Down
27 changes: 12 additions & 15 deletions src/org/ods/orchestration/BuildStage.groovy
Original file line number Diff line number Diff line change
Expand Up @@ -58,31 +58,28 @@ class BuildStage extends Stage {
util.warnBuildIfTestResultsContainFailure(data.tests.unit.testResults)
}

logger.info("levaDocScheduler.run start")
logger.debug("levaDocScheduler.run start")
levaDocScheduler.run(
phase,
PipelinePhaseLifecycleStage.POST_EXECUTE_REPO,
repo,
data
)
logger.info("levaDocScheduler.run end")
logger.debug("levaDocScheduler.run end")
}
}

Closure generateDocuments = {
levaDocScheduler.run(phase, PipelinePhaseLifecycleStage.POST_START)
}
// (cut) the reason to NOT go parallel here is a jenkins feature with too many
// parallels causing arraylist$itr serioalouation errors
levaDocScheduler.run(phase, PipelinePhaseLifecycleStage.POST_START)

util.prepareExecutePhaseForReposNamedJob(phase, repos, preExecuteRepo, postExecuteRepo)
.each { group ->
// FailFast only if not WIP
group.failFast = !project.isWorkInProgress
script.parallel(group)
}

// Execute phase for each repository
Closure executeRepos = {
util.prepareExecutePhaseForReposNamedJob(phase, repos, preExecuteRepo, postExecuteRepo)
.each { group ->
// FailFast only if not WIP
group.failFast = !project.isWorkInProgress
script.parallel(group)
}
}
executeInParallel(executeRepos, generateDocuments)
levaDocScheduler.run(phase, PipelinePhaseLifecycleStage.PRE_END)

// in case of WIP we fail AFTER all pieces have been executed - so we can report as many
Expand Down
36 changes: 36 additions & 0 deletions src/org/ods/services/JenkinsService.groovy
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
package org.ods.services

import org.ods.util.ILogger
import hudson.scm.ChangeLogSet

class JenkinsService {

Expand Down Expand Up @@ -125,4 +126,39 @@ class JenkinsService {
}
}

// MEM leak saver! delete the previous run, if it was triggered by an RM skip commit
void deleteNotBuiltBuilds (def previousBuild = null) {
if (!previousBuild) {
return
}
// we need to do this super early - similar to the id, because once deleted - no access
// option2 -reset the result to SUCCESS
def previousMinusOneBuild = previousBuild.getPreviousBuild()
if (previousBuild?.getResult()?.toString() == 'NOT_BUILT') {
def buildId = "${previousBuild.getId()}"
logger.debug("Found CI SKIP run: ${buildId}, ${previousBuild.getDescription()}")
// get the change set(s) and look for the first (== last commit and its message)
if (!previousBuild.getChangeSets()?.isEmpty()) {
ChangeLogSet changes = previousBuild.getChangeSets().get(0)
if (!changes.isEmptySet()) {
ChangeLogSet.Entry change = changes.getItems()[0]
logger.debug("Changlog message: ${change.getMsg()}")
if (change.getMsg()?.startsWith('ODS: Export OpenShift configuration') ||
change.getMsg()?.startsWith('ODS: Export Openshift deployment state')) {
try {
previousBuild.getRawBuild().delete()
logger.info("Deleted (CI SKIP) build: '${buildId}' because it was autogenerated by RM")
} catch (err) {
logger.warn ("Could not delete build with id: '${buildId}', ${err}")
}
} else {
logger.debug("Found human changelog: \n${change.getMsg()}, " +
"hence build '${buildId}' will not be deleted")
}
}
}
}
// call this recursively to clean-up all the rm created builds
deleteNotBuiltBuilds (previousMinusOneBuild)
}
}
32 changes: 30 additions & 2 deletions vars/odsComponentPipeline.groovy
Original file line number Diff line number Diff line change
@@ -1,6 +1,11 @@
import org.ods.component.Pipeline
import org.ods.util.Logger

import org.ods.services.ServiceRegistry
import org.ods.util.ClassLoaderCleaner
import org.ods.util.UnirestConfig
import java.lang.reflect.Method

def call(Map config, Closure body) {
def debug = env.DEBUG
if (debug != null) {
Expand All @@ -11,8 +16,31 @@ def call(Map config, Closure body) {

def logger = new Logger(this, config.debug)
def pipeline = new Pipeline(this, logger)

pipeline.execute(config, body)
String processId = "${env.JOB_NAME}/${env.BUILD_NUMBER}"
try {
pipeline.execute(config, body)
} finally {
if (env.MULTI_REPO_BUILD) {
logger.debug('-- in RM mode, shutdown skipped --')
}
if (!env.MULTI_REPO_BUILD) {
logger.warn('-- SHUTTING DOWN Component Pipeline (..) --')
logger.resetStopwatch()
ServiceRegistry.removeInstance()
UnirestConfig.shutdown()
try {
new ClassLoaderCleaner().clean(logger, processId)
// use the jenkins INTERNAL cleanupHeap method - attention NOTHING can happen after this method!
logger.debug("forceClean via jenkins internals....")
Method cleanupHeap = currentBuild.getRawBuild().getExecution().class.getDeclaredMethod("cleanUpHeap")
cleanupHeap.setAccessible(true)
cleanupHeap.invoke(currentBuild.getRawBuild().getExecution(), null)
} catch (Exception e) {
logger.debug("cleanupHeap err: ${e}")
}
logger = null
}
}
}

return this
9 changes: 6 additions & 3 deletions vars/odsOrchestrationPipeline.groovy
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,8 @@ def call(Map config) {
boolean startAgentEarly = config.get('startOrchestrationAgentOnInit', true)
def startAgentStage = startAgentEarly ? MROPipelineUtil.PipelinePhases.INIT : null

resourceLimitMemory = config.get('mroAgentMemoryLimit', '1Gi')

logger.debug("Start agent stage: ${startAgentStage}")
Project project = new Project(steps, logger)
def repos = []
Expand All @@ -57,7 +59,7 @@ def call(Map config) {

logger.startClocked('pod-template')
def envs = Project.getBuildEnvironment(steps, debug, versionedDevEnvsEnabled)
withPodTemplate(odsImageTag, steps, alwaysPullImage) {
withPodTemplate(odsImageTag, steps, alwaysPullImage, resourceLimitMemory) {
logger.debugClocked('pod-template')
withEnv(envs) {
def result
Expand Down Expand Up @@ -146,7 +148,8 @@ private void checkOutLocalBranch(GitService git, scm, ILogger logger) {
}

@SuppressWarnings('GStringExpressionWithinString')
private withPodTemplate(String odsImageTag, IPipelineSteps steps, boolean alwaysPullImage, Closure block) {
private withPodTemplate(String odsImageTag, IPipelineSteps steps, boolean alwaysPullImage,
String mroAgentLimit, Closure block) {
ILogger logger = ServiceRegistry.instance.get(Logger)
def dockerRegistry = steps.env.DOCKER_REGISTRY ?: 'docker-registry.default.svc:5000'
def podLabel = "mro-jenkins-agent-${env.BUILD_NUMBER}"
Expand All @@ -165,7 +168,7 @@ private withPodTemplate(String odsImageTag, IPipelineSteps steps, boolean always
image: "${dockerRegistry}/${odsNamespace}/jenkins-agent-base:${odsImageTag}",
workingDir: '/tmp',
resourceRequestMemory: '512Mi',
resourceLimitMemory: '1Gi',
resourceLimitMemory: "${mroAgentLimit}",
resourceRequestCpu: '200m',
resourceLimitCpu: '1',
alwaysPullImage: "${alwaysPullImage}",
Expand Down

0 comments on commit 59ab2a1

Please sign in to comment.