Skip to content

Commit

Permalink
Timeout when scaling down services should be configurable
Browse files Browse the repository at this point in the history
  • Loading branch information
m90 committed Jan 28, 2024
1 parent e6ca4ac commit 8535967
Show file tree
Hide file tree
Showing 3 changed files with 14 additions and 4 deletions.
1 change: 1 addition & 0 deletions cmd/backup/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ type Config struct {
BackupPruningLeeway time.Duration `split_words:"true" default:"1m"`
BackupPruningPrefix string `split_words:"true"`
BackupStopContainerLabel string `split_words:"true" default:"true"`
BackupStopServiceTimeout time.Duration `split_words:"true" default:"5m"`
BackupFromSnapshot bool `split_words:"true"`
BackupExcludeRegexp RegexpDecoder `split_words:"true"`
BackupSkipBackendsFromPrune []string `split_words:"true"`
Expand Down
9 changes: 5 additions & 4 deletions cmd/backup/docker.go
Original file line number Diff line number Diff line change
Expand Up @@ -41,17 +41,18 @@ func scaleService(cli *client.Client, serviceID string, replicas uint64) ([]stri
return response.Warnings, nil
}

func awaitContainerCountForService(cli *client.Client, serviceID string, count int) error {
func awaitContainerCountForService(cli *client.Client, serviceID string, count int, timeoutAfter time.Duration) error {
poll := time.NewTicker(time.Second)
timeout := time.NewTimer(5 * time.Minute)
timeout := time.NewTimer(timeoutAfter)
defer timeout.Stop()
defer poll.Stop()

for {
select {
case <-timeout.C:
return fmt.Errorf(
"awaitContainerCount: timed out after waiting 5 minutes for service %s to reach desired container count of %d",
"awaitContainerCount: timed out after waiting %s for service %s to reach desired container count of %d",
timeoutAfter,
serviceID,
count,
)
Expand Down Expand Up @@ -196,7 +197,7 @@ func (s *script) stopContainersAndServices() (func() error, error) {
}
// progress.ServiceProgress returns too early, so we need to manually check
// whether all containers belonging to the service have actually been removed
if err := awaitContainerCountForService(s.cli, svc.serviceID, 0); err != nil {
if err := awaitContainerCountForService(s.cli, svc.serviceID, 0, s.c.BackupStopServiceTimeout); err != nil {
scaleDownErrors.append(err)
}
}(svc)
Expand Down
8 changes: 8 additions & 0 deletions docs/reference/index.md
Original file line number Diff line number Diff line change
Expand Up @@ -326,6 +326,14 @@ You can populate below template according to your requirements and use it as you
# BACKUP_STOP_CONTAINER_LABEL="service1"
# When trying to scale down Docker Swarm services, give up after
# the specified amount of time in case the service has not converged yet.
# In case you need to adjust this timeout, supply a duration
# value as per https://pkg.go.dev/time#ParseDuration to `BACKUP_STOP_SERVICE_TIMEOUT`.
# Defaults to 5 minutes.
# BACKUP_STOP_SERVICE_TIMEOUT="5m"
########### EXECUTING COMMANDS IN CONTAINERS PRE/POST BACKUP
# It is possible to define commands to be run in any container before and after
Expand Down

0 comments on commit 8535967

Please sign in to comment.