Skip to content

Commit

Permalink
fix(dbm-services): dbha do ssh detect should with timeout close #8254
Browse files Browse the repository at this point in the history
  • Loading branch information
xjxia committed Nov 28, 2024
1 parent eeb744f commit 045ab4a
Show file tree
Hide file tree
Showing 2 changed files with 40 additions and 3 deletions.
3 changes: 2 additions & 1 deletion dbm-services/common/dbha/ha-module/agent/monitor_agent.go
Original file line number Diff line number Diff line change
Expand Up @@ -147,12 +147,13 @@ func (a *MonitorAgent) RefreshInstanceCache() {
// DoDetectSingle do single instance detect
func (a *MonitorAgent) DoDetectSingle(ins dbutil.DataBaseDetect) {
ip, port := ins.GetAddress()
log.Logger.Debugf("begin to detect instance:%s#%d", ip, port)
log.Logger.Debugf("begin detect [%s] instance:%s#%d", ins.GetClusterType(), ip, port)
err := ins.Detection()
if err != nil {
log.Logger.Warnf("Detect db instance failed. ins:[%s:%d],dbType:%s status:%s,DeteckErr=%s",
ip, port, ins.GetDBType(), ins.GetStatus(), err.Error())
}
log.Logger.Debugf("finish detect [%s] instance:%s#%d", ins.GetClusterType(), ip, port)

a.reportMonitor(ins, err)
if ins.NeedReportAgent() {
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
package dbmysql

import (
"context"
"encoding/json"
"fmt"
"math/rand"
Expand Down Expand Up @@ -155,11 +156,12 @@ func (m *MySQLDetectInstance) Detection() error {
return nil
}
case <-time.After(time.Second * time.Duration(m.Timeout)):
mysqlErr = fmt.Errorf("connect MySQL timeout recheck:%d", recheck)
mysqlErr = fmt.Errorf("connect MySQL[%s#%d] timeout recheck:%d", m.Ip, m.Port, recheck)
log.Logger.Warnf(mysqlErr.Error())
m.Status = constvar.DBCheckFailed
}
}
log.Logger.Debugf("detect mysql[%s#%d] finish, try to detect ssh", m.Ip, m.Port)

sshErr := m.CheckSSH()
if sshErr != nil {
Expand Down Expand Up @@ -230,7 +232,7 @@ func (m *MySQLDetectInstance) CheckMySQL(errChan chan error) {
}

// CheckSSH use ssh check whether machine alived
func (m *MySQLDetectInstance) CheckSSH() error {
func (m *MySQLDetectInstance) detectSSH() error {
touchFile := fmt.Sprintf("%s_%s_%d", m.SshInfo.Dest, "agent", m.Port)

touchStr := fmt.Sprintf("touch %s && if [ -d \"/data1/dbha/\" ]; then touch /data1/dbha/%s ; fi "+
Expand All @@ -243,6 +245,40 @@ func (m *MySQLDetectInstance) CheckSSH() error {
return nil
}

func (m *MySQLDetectInstance) CheckSSH() error {
sshCtx, cancel := context.WithTimeout(context.Background(), time.Second*time.Duration(m.SshInfo.Timeout))
defer cancel()

errChan := make(chan error, 1)
go func() {
errChan <- m.detectSSH()
}()

select {
case <-sshCtx.Done():
err := fmt.Errorf("check ssh timeout for ip:%s, port:%d", m.Ip, m.Port)
log.Logger.Warnf(err.Error())
m.Status = constvar.SSHCheckFailed
return err
case sshErr := <-errChan:
if sshErr != nil {
if util.CheckSSHErrIsAuthFail(sshErr) {
m.Status = constvar.SSHAuthFailed
log.Logger.Warnf("check ssh auth failed. ip:%s, port:%d, app:%s, status:%s",
m.Ip, m.Port, m.App, m.Status)
} else {
m.Status = constvar.SSHCheckFailed
log.Logger.Warnf("check ssh failed. ip:%s, port:%d, app:%s, status:%s",
m.Ip, m.Port, m.App, m.Status)
}
return sshErr
}
log.Logger.Infof("check ssh success. ip:%s, port:%d, app:%s", m.Ip, m.Port, m.App)
m.Status = constvar.SSHCheckSuccess
return nil
}
}

// Serialization serialize mysql instance info
func (m *MySQLDetectInstance) Serialization() ([]byte, error) {
response := MySQLDetectResponse{
Expand Down

0 comments on commit 045ab4a

Please sign in to comment.