From aabac1cb86cd3fa5605ecfb764452f11517ebe11 Mon Sep 17 00:00:00 2001 From: xjxia Date: Thu, 28 Nov 2024 13:07:14 +0800 Subject: [PATCH] fix(dbm-services): dbha do ssh detect should with timeout close #8254 --- .../dbha/ha-module/agent/monitor_agent.go | 3 +- .../dbmodule/dbmysql/MySQL_detect.go | 40 ++++++++++++++++++- 2 files changed, 40 insertions(+), 3 deletions(-) diff --git a/dbm-services/common/dbha/ha-module/agent/monitor_agent.go b/dbm-services/common/dbha/ha-module/agent/monitor_agent.go index 384187773b..229a30417e 100644 --- a/dbm-services/common/dbha/ha-module/agent/monitor_agent.go +++ b/dbm-services/common/dbha/ha-module/agent/monitor_agent.go @@ -147,12 +147,13 @@ func (a *MonitorAgent) RefreshInstanceCache() { // DoDetectSingle do single instance detect func (a *MonitorAgent) DoDetectSingle(ins dbutil.DataBaseDetect) { ip, port := ins.GetAddress() - log.Logger.Debugf("begin to detect instance:%s#%d", ip, port) + log.Logger.Debugf("begin detect [%s] instance:%s#%d", ins.GetClusterType(), ip, port) err := ins.Detection() if err != nil { log.Logger.Warnf("Detect db instance failed. ins:[%s:%d],dbType:%s status:%s,DeteckErr=%s", ip, port, ins.GetDBType(), ins.GetStatus(), err.Error()) } + log.Logger.Debugf("finish detect [%s] instance:%s#%d", ins.GetClusterType(), ip, port) a.reportMonitor(ins, err) if ins.NeedReportAgent() { diff --git a/dbm-services/common/dbha/ha-module/dbmodule/dbmysql/MySQL_detect.go b/dbm-services/common/dbha/ha-module/dbmodule/dbmysql/MySQL_detect.go index 6e48060366..81bce51c37 100644 --- a/dbm-services/common/dbha/ha-module/dbmodule/dbmysql/MySQL_detect.go +++ b/dbm-services/common/dbha/ha-module/dbmodule/dbmysql/MySQL_detect.go @@ -1,6 +1,7 @@ package dbmysql import ( + "context" "encoding/json" "fmt" "math/rand" @@ -155,11 +156,12 @@ func (m *MySQLDetectInstance) Detection() error { return nil } case <-time.After(time.Second * time.Duration(m.Timeout)): - mysqlErr = fmt.Errorf("connect MySQL timeout recheck:%d", recheck) + mysqlErr = fmt.Errorf("connect MySQL[%s#%d] timeout recheck:%d", m.Ip, m.Port, recheck) log.Logger.Warnf(mysqlErr.Error()) m.Status = constvar.DBCheckFailed } } + log.Logger.Debugf("detect mysql[%s#%d] finish, try to detect ssh", m.Ip, m.Port) sshErr := m.CheckSSH() if sshErr != nil { @@ -230,7 +232,7 @@ func (m *MySQLDetectInstance) CheckMySQL(errChan chan error) { } // CheckSSH use ssh check whether machine alived -func (m *MySQLDetectInstance) CheckSSH() error { +func (m *MySQLDetectInstance) detectSSH() error { touchFile := fmt.Sprintf("%s_%s_%d", m.SshInfo.Dest, "agent", m.Port) touchStr := fmt.Sprintf("touch %s && if [ -d \"/data1/dbha/\" ]; then touch /data1/dbha/%s ; fi "+ @@ -243,6 +245,40 @@ func (m *MySQLDetectInstance) CheckSSH() error { return nil } +func (m *MySQLDetectInstance) CheckSSH() error { + sshCtx, cancel := context.WithTimeout(context.Background(), time.Second*time.Duration(m.SshInfo.Timeout)) + defer cancel() + + errChan := make(chan error, 1) + go func() { + errChan <- m.detectSSH() + }() + + select { + case <-sshCtx.Done(): + err := fmt.Errorf("check ssh timeout for ip:%s, port:%d", m.Ip, m.Port) + log.Logger.Warnf(err.Error()) + m.Status = constvar.SSHCheckFailed + return err + case sshErr := <-errChan: + if sshErr != nil { + if util.CheckSSHErrIsAuthFail(sshErr) { + m.Status = constvar.SSHAuthFailed + log.Logger.Warnf("check ssh auth failed. ip:%s, port:%d, app:%s, status:%s", + m.Ip, m.Port, m.App, m.Status) + } else { + m.Status = constvar.SSHCheckFailed + log.Logger.Warnf("check ssh failed. ip:%s, port:%d, app:%s, status:%s", + m.Ip, m.Port, m.App, m.Status) + } + return sshErr + } + log.Logger.Infof("check ssh success. ip:%s, port:%d, app:%s", m.Ip, m.Port, m.App) + m.Status = constvar.SSHCheckSuccess + return nil + } +} + // Serialization serialize mysql instance info func (m *MySQLDetectInstance) Serialization() ([]byte, error) { response := MySQLDetectResponse{