Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Job fail on alloc failed #1135

Open
wants to merge 2 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -590,6 +590,9 @@ public static class AllocatorProperties {
/** Name of user that system-generated reports are done by. */
private String systemReportUser;

/** Whether an allocation that fails should immediately fail a job. */
private boolean failJobOnFailAllocation;

/**
* @param period
* Time between runs of the main allocation algorithm.
Expand All @@ -603,17 +606,22 @@ public static class AllocatorProperties {
* taken out of service.
* @param systemReportUser
* Name of user that system-generated reports are done by.
* @param failJobOnFailAllocation
* Fail jobs immediately if the allocation fails, rather than
* queuing the job. Avoids system locking up.
*/
public AllocatorProperties(@DefaultValue("5s") Duration period,
@DefaultValue("10000") int importanceSpan,
@DefaultValue PriorityScale priorityScale,
@DefaultValue("2") int reportActionThreshold,
@DefaultValue("") String systemReportUser) {
@DefaultValue("") String systemReportUser,
@DefaultValue("true") boolean failJobOnFailAllocation) {
this.period = period;
this.importanceSpan = importanceSpan;
this.priorityScale = priorityScale;
this.reportActionThreshold = reportActionThreshold;
this.systemReportUser = systemReportUser;
this.failJobOnFailAllocation = failJobOnFailAllocation;
Christian-B marked this conversation as resolved.
Show resolved Hide resolved
}

/**
Expand Down Expand Up @@ -683,6 +691,15 @@ public String getSystemReportUser() {
void setSystemReportUser(String systemReportUser) {
this.systemReportUser = systemReportUser;
}

/** @return true if jobs should be failed if allocation fails. */
public boolean isFailJobOnFailAllocation() {
return failJobOnFailAllocation;
}

void setFailJobOnFailAllocation(boolean failJobOnFailAllocation) {
this.failJobOnFailAllocation = failJobOnFailAllocation;
}
}

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -387,9 +387,6 @@ private final class AllocSQL extends PowerSQL {
/** Get the list of allocation tasks for jobs in a given state. */
private final Query getTasks;

/** Delete an allocation task. */
private final Update delete;

/** Find a single free board. */
private final Query findFreeBoard;

Expand Down Expand Up @@ -439,7 +436,6 @@ private final class AllocSQL extends PowerSQL {
super(conn);
bumpImportance = conn.update(BUMP_IMPORTANCE);
getTasks = conn.query(getAllocationTasks);
delete = conn.update(DELETE_TASK);
findFreeBoard = conn.query(FIND_FREE_BOARD);
getRectangles = conn.query(findRectangle);
getRectangleAt = conn.query(findRectangleAt);
Expand All @@ -455,7 +451,6 @@ public void close() {
super.close();
bumpImportance.close();
getTasks.close();
delete.close();
findFreeBoard.close();
getRectangles.close();
getRectangleAt.close();
Expand Down Expand Up @@ -651,13 +646,18 @@ private Allocations allocate(Connection conn) {
continue;
}
var handled = task.allocate(sql);
// If we handled it, delete the request
if (handled.size() > 0) {
sql.delete.call(task.id);
Christian-B marked this conversation as resolved.
Show resolved Hide resolved

if (handled.size() == 0
&& allocProps.isFailJobOnFailAllocation()) {
Christian-B marked this conversation as resolved.
Show resolved Hide resolved
destroyJob(conn, task.jobId,
"Unable to allocate - please try again");
log.debug("allocate for {} (job {}) failed",
task.id, task.jobId);
} else {
allocations.addAll(task.jobId, handled);
log.debug("allocate for {} (job {}): {}", task.id,
task.jobId, handled);
}
allocations.addAll(task.jobId, handled);
log.debug("allocate for {} (job {}): {}", task.id,
task.jobId, handled);
}
/*
* Those tasks which weren't allocated get their importance bumped
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -659,11 +659,6 @@ public abstract class SQLQueries {
+ "FROM board_reports JOIN user_info ON reporter = user_id "
+ "WHERE board_id = :board_id";

/** Delete an allocation task. */
@Parameter("request_id")
protected static final String DELETE_TASK =
"DELETE FROM job_request WHERE req_id = :request_id";

/** Find a single free board. */
@Parameter("machine_id")
@ResultColumn("x")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -173,17 +173,6 @@ void noteDestroyReason() {
}
}

@Test
void deleteTask() {
assumeWritable(c);
try (var u = c.update(DELETE_TASK)) {
c.transaction(() -> {
assertEquals(List.of("request_id"), u.getParameters());
assertEquals(0, u.call(NO_JOB));
});
}
}

@Test
void allocateBoardsJob() {
assumeWritable(c);
Expand Down
Loading