Skip to content

Commit

Permalink
1. Turn up max failed time to 10.
Browse files Browse the repository at this point in the history
2. If task fails to obtain locks, retry with no limit
3. Return explicit error to user.
  • Loading branch information
qidaye committed Oct 30, 2024
1 parent 173bb21 commit fca28fe
Showing 1 changed file with 8 additions and 3 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@

public class IndexChangeJob implements Writable {
private static final Logger LOG = LogManager.getLogger(IndexChangeJob.class);

private static final int MAX_FAILED_NUM = 10;

public enum JobState {
// CHECKSTYLE OFF
Expand Down Expand Up @@ -372,8 +372,13 @@ protected void runRunningJob() throws AlterCancelException {
LOG.info("inverted index tasks not finished. job: {}, partitionId: {}", jobId, partitionId);
List<AgentTask> tasks = invertedIndexBatchTask.getUnfinishedTasks(2000);
for (AgentTask task : tasks) {
if (task.getFailedTimes() > 3) {
if (task.getFailedTimes() >= MAX_FAILED_NUM) {
LOG.warn("alter inverted index task failed: " + task.getErrorMsg());
// If error is E-216, it indicates obtaining lock failed.
// we should retry this task.
if (task.getErrorMsg().contains("E-216")) {
continue;
}
Set<Long> failedBackends = failedTabletBackends.computeIfAbsent(task.getTabletId(),
k -> new HashSet<>());
failedBackends.add(task.getBackendId());
Expand All @@ -382,7 +387,7 @@ protected void runRunningJob() throws AlterCancelException {
int failedTaskCount = failedBackends.size();
if (expectSucceedTaskNum - failedTaskCount < expectSucceedTaskNum / 2 + 1) {
throw new AlterCancelException("inverted index tasks failed on same tablet reach threshold "
+ failedTaskCount);
+ failedTaskCount + ", error: " + task.getErrorMsg());
}
}
}
Expand Down

0 comments on commit fca28fe

Please sign in to comment.