diff --git a/pom.xml b/pom.xml index 32e52899a..3bf8b6f46 100644 --- a/pom.xml +++ b/pom.xml @@ -35,7 +35,7 @@ THE SOFTWARE. ec2 - ${revision}${changelist} + ${revision}-oc${changelist} hpi Amazon EC2 plugin @@ -244,7 +244,7 @@ THE SOFTWARE. org.testcontainers testcontainers - 1.8.3 + 1.15.2 test diff --git a/src/main/java/hudson/plugins/ec2/EC2RetentionStrategy.java b/src/main/java/hudson/plugins/ec2/EC2RetentionStrategy.java index 2be14b206..2aba4fcb6 100644 --- a/src/main/java/hudson/plugins/ec2/EC2RetentionStrategy.java +++ b/src/main/java/hudson/plugins/ec2/EC2RetentionStrategy.java @@ -32,6 +32,7 @@ import hudson.model.ExecutorListener; import hudson.model.Queue; import hudson.plugins.ec2.util.MinimumInstanceChecker; +import hudson.model.Label; import hudson.slaves.RetentionStrategy; import jenkins.model.Jenkins; @@ -191,7 +192,8 @@ private long internalCheck(EC2Computer computer) { // TODO: really think about the right strategy here, see // JENKINS-23792 - if (idleMilliseconds > TimeUnit.MINUTES.toMillis(idleTerminationMinutes)) { + if (idleMilliseconds > TimeUnit.MINUTES.toMillis(idleTerminationMinutes) && + !itemsInQueueForThisSlave(computer)){ LOGGER.info("Idle timeout of " + computer.getName() + " after " + TimeUnit.MILLISECONDS.toMinutes(idleMilliseconds) + @@ -210,7 +212,7 @@ private long internalCheck(EC2Computer computer) { // if we have less "free" (aka already paid for) time left than // our idle time, stop/terminate the instance // See JENKINS-23821 - if (freeSecondsLeft <= TimeUnit.MINUTES.toSeconds(Math.abs(idleTerminationMinutes))) { + if (freeSecondsLeft <= TimeUnit.MINUTES.toSeconds(Math.abs(idleTerminationMinutes)) && !itemsInQueueForThisSlave(computer)) { LOGGER.info("Idle timeout of " + computer.getName() + " after " + TimeUnit.MILLISECONDS.toMinutes(idleMilliseconds) + " idle minutes, with " + TimeUnit.SECONDS.toMinutes(freeSecondsLeft) @@ -225,6 +227,28 @@ private long internalCheck(EC2Computer computer) { return 1; } + /* + * Checks if there are any items in the queue that are waiting for this node explicitly. + * This prevents a node from being taken offline while there are Ivy/Maven Modules waiting to build. + * Need to check entire queue as some modules may be blocked by upstream dependencies. + * Accessing the queue in this way can block other threads, so only perform this check just prior + * to timing out the slave. + */ + private boolean itemsInQueueForThisSlave(EC2Computer c) { + final Label selfLabel = c.getNode().getSelfLabel(); + Queue.Item[] items = Jenkins.getInstance().getQueue().getItems(); + for (int i = 0; i < items.length; i++) { + Queue.Item item = items[i]; + final Label assignedLabel = item.getAssignedLabel(); + if (assignedLabel == selfLabel) { + LOGGER.fine("Preventing idle timeout of " + c.getName() + + " as there is at least one item in the queue explicitly waiting for this slave"); + return true; + } + } + return false; + } + /** * Called when a new {@link EC2Computer} object is introduced (such as when Hudson started, or when * a new agent is added.) diff --git a/src/test/java/hudson/plugins/ec2/EC2RetentionStrategyTest.java b/src/test/java/hudson/plugins/ec2/EC2RetentionStrategyTest.java index 18aefd8a8..bd809c82a 100644 --- a/src/test/java/hudson/plugins/ec2/EC2RetentionStrategyTest.java +++ b/src/test/java/hudson/plugins/ec2/EC2RetentionStrategyTest.java @@ -7,20 +7,33 @@ import hudson.plugins.ec2.util.MinimumInstanceChecker; import hudson.plugins.ec2.util.MinimumNumberOfInstancesTimeRangeConfig; import hudson.plugins.ec2.util.PrivateKeyHelper; +import hudson.security.ACL; +import hudson.security.AccessControlled; +import hudson.security.Permission; import hudson.slaves.NodeProperty; import hudson.model.Executor; import hudson.model.Node; +import hudson.model.Label; +import hudson.model.Queue; +import hudson.model.ResourceList; +import hudson.model.Queue.Executable; +import hudson.model.Queue.Task; +import hudson.model.queue.CauseOfBlockage; import net.sf.json.JSONObject; +import org.acegisecurity.Authentication; import org.junit.Rule; import org.junit.Test; import org.jvnet.hudson.test.JenkinsRule; +import jenkins.model.Jenkins; +import javax.annotation.Nonnull; import java.time.Clock; import java.time.Instant; import java.time.LocalDateTime; import java.time.Month; import java.time.ZoneId; +import java.io.IOException; import java.util.ArrayList; import java.util.Arrays; import java.util.Collections; @@ -30,6 +43,7 @@ import java.util.stream.Collectors; import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; import static org.junit.Assert.assertTrue; public class EC2RetentionStrategyTest { @@ -67,6 +81,106 @@ public void testOnBillingHourRetention() throws Exception { } } + @Test + public void testRetentionWhenQueueHasWaitingItemForThisNode() throws Exception { + EC2RetentionStrategy rs = new EC2RetentionStrategy("-2"); + EC2Computer computer = computerWithIdleTime(59, 0); + final Label selfLabel = computer.getNode().getSelfLabel(); + final Queue queue = Jenkins.get().getQueue(); + final Task task = taskForLabel(selfLabel, false); + queue.schedule(task, 500); + checkRetentionStrategy(rs, computer); + assertFalse("Expected computer to be left running", idleTimeoutCalled.get()); + queue.cancel(task); + EC2RetentionStrategy rs2 = new EC2RetentionStrategy("-2"); + checkRetentionStrategy(rs2, computer); + assertTrue("Expected computer to be idled", idleTimeoutCalled.get()); + } + + @Test + public void testRetentionWhenQueueHasBlockedItemForThisNode() throws Exception { + EC2RetentionStrategy rs = new EC2RetentionStrategy("-2"); + EC2Computer computer = computerWithIdleTime(59, 0); + final Label selfLabel = computer.getNode().getSelfLabel(); + final Queue queue = Jenkins.get().getQueue(); + final Task task = taskForLabel(selfLabel, true); + queue.schedule(task, 0); + checkRetentionStrategy(rs, computer); + assertFalse("Expected computer to be left running", idleTimeoutCalled.get()); + queue.cancel(task); + EC2RetentionStrategy rs2 = new EC2RetentionStrategy("-2"); + checkRetentionStrategy(rs2, computer); + assertTrue("Expected computer to be idled", idleTimeoutCalled.get()); + } + + private interface AccessControlledTask extends Queue.Task, AccessControlled { + } + + private Queue.Task taskForLabel(final Label label, boolean blocked) { + final CauseOfBlockage cob = blocked ? new CauseOfBlockage() { + @Override + public String getShortDescription() { + return "Blocked"; + } + } : null; + return new AccessControlledTask() { + @Nonnull + public ACL getACL() { + return new ACL() { + public boolean hasPermission(@Nonnull Authentication a, @Nonnull Permission permission) { + return true; + } + }; + } + public ResourceList getResourceList() { + return null; + } + + public Node getLastBuiltOn() { + return null; + } + + public long getEstimatedDuration() { + return -1; + } + + public Label getAssignedLabel() { + return label; + } + + public Executable createExecutable() { + return null; + } + + public String getDisplayName() { + return null; + } + + @Override + public CauseOfBlockage getCauseOfBlockage() { + return cob; + } + + public boolean hasAbortPermission() { + return false; + } + + public String getUrl() { + return null; + } + + public String getName() { + return null; + } + + public String getFullDisplayName() { + return null; + } + + public void checkAbortPermission() { + } + }; + } private EC2Computer computerWithIdleTime(final int minutes, final int seconds) throws Exception { final EC2AbstractSlave slave = new EC2AbstractSlave("name", "id", "description", "fs", 1, null, "label", null, null, "init", "tmpDir", new ArrayList>(), "remote", "jvm", false, "idle", null, "cloud", false, Integer.MAX_VALUE, null, ConnectionStrategy.PRIVATE_IP, -1) { @Override