Skip to content

Commit

Permalink
core: Changed waiting for reboot from 10 minutes to check connectivit…
Browse files Browse the repository at this point in the history
…y every 30 seconds

Altered sleepOnReboot to try and reach the host with a certain interval, ServerRebootSleepTime, as waiting time in between tries.
When the host is back online after reboot, cancel sleep timeout and continue with operations.
Instead of trying to reach the host after the ServerRebootTimeout has been reached, this is the max time the host has to come back online before breaking off operations.
Uses the stats it gets from the vdsproxy to check the state of the host and reports back that operations with this host can be continued.
Signed-off-by: Jasper Berton <[email protected]>
  • Loading branch information
JasperB-TeamBlue committed Dec 17, 2024
1 parent 89b2cd1 commit cf0f996
Show file tree
Hide file tree
Showing 5 changed files with 58 additions and 9 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,13 @@

import java.util.Collections;
import java.util.List;
import java.util.concurrent.CancellationException;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.ScheduledFuture;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.TimeoutException;

import javax.enterprise.concurrent.ManagedScheduledExecutorService;
import javax.inject.Inject;

import org.apache.commons.lang.StringUtils;
Expand Down Expand Up @@ -37,14 +42,18 @@
import org.ovirt.engine.core.dao.VdsStaticDao;
import org.ovirt.engine.core.dao.gluster.GlusterDBUtils;
import org.ovirt.engine.core.utils.EngineLocalConfig;
import org.ovirt.engine.core.utils.ThreadUtils;
import org.ovirt.engine.core.utils.lock.EngineLock;
import org.ovirt.engine.core.utils.threadpool.ThreadPoolUtil;
import org.ovirt.engine.core.utils.threadpool.ThreadPools;
import org.ovirt.engine.core.vdsbroker.ResourceManager;
import org.ovirt.engine.core.vdsbroker.vdsbroker.IVdsServer;
import org.ovirt.engine.core.vdsbroker.vdsbroker.VDSInfoReturn;


public abstract class VdsCommand<T extends VdsActionParameters> extends CommandBase<T> {

protected String _failureMessage = null;
private ScheduledFuture<?> reachableFuture;

@Inject
protected AuditLogDirector auditLogDirector;
Expand All @@ -68,6 +77,9 @@ public abstract class VdsCommand<T extends VdsActionParameters> extends CommandB
private AlertDirector alertDirector;
@Inject
private VdsStaticDao vdsStaticDao;
@Inject
@ThreadPools(ThreadPools.ThreadPoolType.EngineScheduledThreadPool)
private ManagedScheduledExecutorService executor;

/**
* Constructor for command creation when compensation is applied on startup
Expand Down Expand Up @@ -112,14 +124,46 @@ protected void runSleepOnReboot(boolean synchronous, final VDSStatus status) {
}
}

/**
* Enables timeout on the thread until max timeout time is exceeded or a connection is made with the rebooting device
*/
private void sleepOnReboot(final VDSStatus status) {
int sleepTimeInSec = Config.<Integer> getValue(ConfigValues.ServerRebootTimeout);
log.info("Waiting {} seconds, for server to finish reboot process.",
sleepTimeInSec);
resourceManager.getVdsManager(getVdsId()).setInServerRebootTimeout(true);
ThreadUtils.sleep(TimeUnit.SECONDS.toMillis(sleepTimeInSec));
resourceManager.getVdsManager(getVdsId()).setInServerRebootTimeout(false);
setVdsStatus(status);
int serverRebootMax = Config.<Integer> getValue(ConfigValues.ServerRebootTimeout);
int retryTime = Config.<Integer> getValue(ConfigValues.ServerRebootSleepTime);
try {
reachableFuture
= executor.scheduleAtFixedRate(() -> isReachable(), retryTime, retryTime, TimeUnit.SECONDS);
reachableFuture.get(serverRebootMax, TimeUnit.SECONDS);
} catch (InterruptedException e) {
log.info("Trying to reconnect with host {} after reboot failed due to {}", getVdsId(), e.toString());
} catch (ExecutionException e) {
log.info("Problem during execution of reconnection with host {} after reboot due to {}", getVdsId(), e.toString());
} catch (TimeoutException e) {
log.info("Unable to connect to host {} after {} seconds", getVdsId(), serverRebootMax);
} catch (CancellationException e) {
log.info("Future cancelled due to ability to connect to host {} after reboot.", getVdsId());
} finally {
resourceManager.getVdsManager(getVdsId()).setInServerRebootTimeout(false);
setVdsStatus(status);
}
}

/**
* Checks if the host is ready to reconnect
* if the status equals 0 it means the vds is done and ready to reconnect, so the thread can be interrupted
*/
private void isReachable() {
try {
IVdsServer serv = resourceManager.getVdsManager(getVdsId()).getVdsProxy();
VDSInfoReturn info = serv.getVdsStats();
log.info("Status of host {} is {}", getVdsId(), info.status.toString());
if (info.status.code == 0) {
reachableFuture.cancel(false);
}
} catch (Throwable t) {
log.error("Error encountered {}", t.toString());
}
}

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -234,6 +234,8 @@ public enum ConfigValues {
@Reloadable
@TypeConverterAttribute(Integer.class)
ServerRebootTimeout,
@TypeConverterAttribute(Integer.class)
ServerRebootSleepTime,
@Reloadable
@TypeConverterAttribute(Integer.class)
VmGracefulShutdownTimeout,
Expand Down
1 change: 1 addition & 0 deletions packaging/dbscripts/upgrade/pre_upgrade/0000_config.sql
Original file line number Diff line number Diff line change
Expand Up @@ -614,6 +614,7 @@ select fn_db_add_config_value('ServerCPUList',
'4.8');

select fn_db_add_config_value('ServerRebootTimeout','600','general');
select fn_db_add_config_value('ServerRebootSleepTime','30','general');
select fn_db_add_config_value('SetupNetworksPollingTimeout','3','general');
select fn_db_add_config_value('SignCertTimeoutInSeconds','30','general');
--Handling Script name for signing
Expand Down
4 changes: 3 additions & 1 deletion packaging/etc/engine-config/engine-config.properties
Original file line number Diff line number Diff line change
Expand Up @@ -116,8 +116,10 @@ SANWipeAfterDelete.description="Initializing disk image is more secure but it is
SANWipeAfterDelete.validValues=true,false
SearchResultsLimit.description="Max Quantity of Search Results"
SearchResultsLimit.type=Integer
ServerRebootTimeout.description="Host Reboot Timeout (in seconds)"
ServerRebootTimeout.description="Max Host Reboot Timeout (in seconds)"
ServerRebootTimeout.type=Integer
ServerRebootSleepTime.description="Interval between each try to connect to host while in reboot (in seconds)"
ServerRebootSleepTime=Integer
ConsoleReleaseCursorKeys.description="Keyboard keys combination that causes the mouse cursor to be released from its grab on console client window"
SpiceSecureChannels.description="SPICE Secure Channels"
SpiceSecureChannels.type=StringMultiple
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -101,4 +101,4 @@ def _customization(self):
] = True


# vim: expandtab tabstop=4 shiftwidth=4
# vim: expandtab tabstop=4 shiftwidth=4

0 comments on commit cf0f996

Please sign in to comment.