Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat:优化dispatch-sdk调度逻辑对其他服务的依赖 #10882 #10886

Open
wants to merge 4 commits into
base: master
Choose a base branch
from
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -65,9 +65,10 @@ import com.tencent.devops.process.engine.pojo.PipelineBuildContainer
import com.tencent.devops.process.engine.pojo.PipelineBuildTask
import com.tencent.devops.process.pojo.mq.PipelineAgentShutdownEvent
import com.tencent.devops.process.pojo.mq.PipelineAgentStartupEvent
import feign.RetryableException
import org.slf4j.LoggerFactory
import java.util.Date
import java.util.concurrent.TimeUnit
import org.slf4j.LoggerFactory

@Suppress("LongParameterList", "TooManyFunctions")
class DispatchService constructor(
Expand Down Expand Up @@ -172,7 +173,7 @@ class DispatchService constructor(
executeCount: Int?,
logTag: String?
): Boolean {
val (startBuildTask, buildContainer) = getContainerStartupInfo(
val (startBuildTask, buildContainer) = getContainerStartupInfoWithRetry(
projectId = projectId,
buildId = buildId,
containerId = containerId,
Expand Down Expand Up @@ -251,7 +252,7 @@ class DispatchService constructor(
) {
logger.warn("[$buildId|$vmSeqId] Container startup failure")
try {
val (startBuildTask, buildContainer) = getContainerStartupInfo(
val (startBuildTask, buildContainer) = getContainerStartupInfoWithRetry(
projectId = projectId,
buildId = buildId,
containerId = vmSeqId,
Expand Down Expand Up @@ -318,6 +319,41 @@ class DispatchService constructor(
}
}

/**
* 针对服务间调用出现 Connection refused 的异常,进行重试
*/
private fun getContainerStartupInfoWithRetry(
projectId: String,
buildId: String,
containerId: String,
logTag: String?,
retryTimes: Int = RETRY_TIMES
): Pair<PipelineBuildTask, PipelineBuildContainer> {
try {
return getContainerStartupInfo(
projectId = projectId,
buildId = buildId,
containerId = containerId,
logTag = logTag
)
} catch (e: RetryableException) {
if (retryTimes > 0) {
logger.warn("[$buildId]|[$containerId]| getContainerStartupInfo failed, " +
"retryTimes=$retryTimes", e.message)
Thread.sleep(1000)
return getContainerStartupInfoWithRetry(
projectId = projectId,
buildId = buildId,
containerId = containerId,
logTag = logTag,
retryTimes = retryTimes - 1
)
} else {
throw e
}
}
}

private fun getContainerStartupInfo(
projectId: String,
buildId: String,
Expand Down Expand Up @@ -416,5 +452,7 @@ class DispatchService constructor(

companion object {
private val logger = LoggerFactory.getLogger(DispatchService::class.java)

private const val RETRY_TIMES = 3
}
}
Loading