diff --git a/pkg/backend/backend.go b/pkg/backend/backend.go index 2732282..f0e2103 100644 --- a/pkg/backend/backend.go +++ b/pkg/backend/backend.go @@ -9,6 +9,7 @@ import ( type BackendOperations interface { CreateSession(context.Context, *ConfigurationSettings, *CreateSessionRequest) (*CreateSessionResponse, error) + CheckSession(context.Context) bool DeleteSession(context.Context, *ConfigurationSettings, *DeleteSessionRequest) (*DeleteSessionResponse, error) GetMemoryResourceBlocks(context.Context, *ConfigurationSettings, *MemoryResourceBlocksRequest) (*MemoryResourceBlocksResponse, error) GetMemoryResourceBlockById(context.Context, *ConfigurationSettings, *MemoryResourceBlockByIdRequest) (*MemoryResourceBlockByIdResponse, error) diff --git a/pkg/backend/httpfish.go b/pkg/backend/httpfish.go index 42c1541..c4bde8f 100644 --- a/pkg/backend/httpfish.go +++ b/pkg/backend/httpfish.go @@ -321,19 +321,6 @@ func (session *Session) queryWithJSON(operation HTTPOperationType, path string, return response } - // check for error due to session timeout ( service would return error code 401) - if session.xToken != "" && response.StatusCode == http.StatusUnauthorized { - // Re-authenticate - fmt.Print("Redfish session might have timed out. Re-authenticate. Warning! infinite loop might occur if the issue is from the redfish server.\n") - session.client = nil - session.xToken = "" - err := session.auth() - if err == nil { - path = session.buildPath(SessionServiceKey, session.RedfishSessionId) - response = session.queryWithJSON(operation, path, jsonData) - } - } - return response } @@ -544,6 +531,18 @@ func (service *httpfishService) CreateSession(ctx context.Context, settings *Con return &CreateSessionResponse{SessionId: session.SessionId, Status: "Success", ServiceError: nil, ChassisSN: session.BladeSN, EnclosureSN: session.ApplianceSN}, nil } +// CheckSession: Check if the redfish session is still alive +func (service *httpfishService) CheckSession(ctx context.Context) bool { + logger := klog.FromContext(ctx) + logger.V(4).Info("====== CheckSession ======") + session := service.service.session.(*Session) + logger.V(4).Info("check session", "session id", session.SessionId, "redfish session id", session.RedfishSessionId) + + response := session.query(HTTPOperation.GET, session.buildPath(SessionServiceKey, session.RedfishSessionId)) + + return response.err == nil +} + // DeleteSession: Delete a session previously established with an endpoint service func (service *httpfishService) DeleteSession(ctx context.Context, settings *ConfigurationSettings, req *DeleteSessionRequest) (*DeleteSessionResponse, error) { logger := klog.FromContext(ctx) diff --git a/pkg/common/parameters.go b/pkg/common/parameters.go index 533c96e..70ed12b 100644 --- a/pkg/common/parameters.go +++ b/pkg/common/parameters.go @@ -11,7 +11,8 @@ import ( ) const ( - NumUuidCharsForId = 4 // Number of chars to strip from an interally generated uuid (starting from the right) for use in the internally generated ID's for appliance, blade and host + NumUuidCharsForId = 4 // Number of chars to strip from an interally generated uuid (starting from the right) for use in the internally generated ID's for appliance, blade and host + SyncChekTimeoutSeconds = 30.0 // Number of seconds to check session timeout ) const ( DefaultBackend = "httpfish" // Default backend interface diff --git a/pkg/manager/appliance.go b/pkg/manager/appliance.go index c1b256b..91105da 100644 --- a/pkg/manager/appliance.go +++ b/pkg/manager/appliance.go @@ -200,7 +200,7 @@ func (a *Appliance) DeleteBladeById(ctx context.Context, bladeId string) (*Blade logger.V(2).Info("force blade deletion after backend session failure", "bladeId", blade.Id, "applianceId", a.Id) delete(a.Blades, blade.Id) - return nil, &common.RequestError{StatusCode: common.StatusBladeDeleteSessionFailure, Err: newErr} + return blade, &common.RequestError{StatusCode: common.StatusBladeDeleteSessionFailure, Err: newErr} // Still return the blade for recovery } // delete blade @@ -224,6 +224,7 @@ func (a *Appliance) GetAllBladeIds() []string { func (a *Appliance) GetBladeById(ctx context.Context, bladeId string) (*Blade, error) { logger := klog.FromContext(ctx) logger.V(4).Info(">>>>>> GetBladeById: ", "bladeId", bladeId, "applianceId", a.Id) + var err error blade, ok := a.Blades[bladeId] if !ok { @@ -232,6 +233,24 @@ func (a *Appliance) GetBladeById(ctx context.Context, bladeId string) (*Blade, e return nil, &common.RequestError{StatusCode: common.StatusBladeIdDoesNotExist, Err: newErr} } + // Check for resync + if !blade.CheckSync(ctx) { + logger.V(2).Info("GetBladeById: blade might be out of sync", "bladeId", bladeId) + ok := blade.backendOps.CheckSession(ctx) + if !ok { + blade, err = a.ResyncBladeById(ctx, bladeId) + if err != nil { + newErr := fmt.Errorf("failed to resync host(add): host [%s]: %w", bladeId, err) + logger.Error(newErr, "failure: resync host") + return nil, &common.RequestError{StatusCode: err.(*common.RequestError).StatusCode, Err: newErr} + } else { + logger.V(2).Info("success: auto resync host", "bladeId", bladeId) + } + } else { + blade.SetSync(ctx) + } + } + logger.V(2).Info("success: get blade by id", "bladeId", blade.Id, "applianceId", a.Id) return blade, nil diff --git a/pkg/manager/blade.go b/pkg/manager/blade.go index fbfb1ea..36de097 100644 --- a/pkg/manager/blade.go +++ b/pkg/manager/blade.go @@ -8,6 +8,7 @@ import ( "math" "sort" "strings" + "time" "cfm/pkg/backend" "cfm/pkg/common" @@ -33,8 +34,9 @@ type Blade struct { ResourceSizeMib int32 // Backend access data - backendOps backend.BackendOperations - creds *openapi.Credentials // Used during resync + backendOps backend.BackendOperations + creds *openapi.Credentials // Used during resync + lastSyncTimeStamp time.Time } type RequestNewBlade struct { @@ -69,11 +71,31 @@ func NewBlade(ctx context.Context, r *RequestNewBlade) (*Blade, error) { return nil, newErr } + b.SetSync(ctx) + logger.V(2).Info("success: new blade", "bladeId", b.Id, "applianceId", b.ApplianceId) return &b, nil } +func (b *Blade) SetSync(ctx context.Context) { + logger := klog.FromContext(ctx) + logger.V(3).Info(">>>>>> SetSyncFlag(Blade): ", "bladeId", b.Id) + b.lastSyncTimeStamp = time.Now() +} + +func (b *Blade) CheckSync(ctx context.Context) bool { + logger := klog.FromContext(ctx) + logger.V(2).Info(">>>>>> CheckSyncFlag(Blade): ", "bladeId", b.Id) + + if time.Since(b.lastSyncTimeStamp).Seconds() > common.SyncChekTimeoutSeconds { + return false + } else { + b.SetSync(ctx) // renew the timestamp + return true + } +} + type RequestAssignMemory struct { MemoryId string PortId string diff --git a/pkg/manager/host.go b/pkg/manager/host.go index 2913577..07e4d4e 100644 --- a/pkg/manager/host.go +++ b/pkg/manager/host.go @@ -6,6 +6,7 @@ import ( "context" "fmt" "strings" + "time" "cfm/pkg/backend" "cfm/pkg/common" @@ -26,8 +27,9 @@ type Host struct { Memory map[string]*HostMemory // Backend access data - backendOps backend.BackendOperations - creds *openapi.Credentials // Used during resync + backendOps backend.BackendOperations + creds *openapi.Credentials // Used during resync + lastSyncTimeStamp time.Time } var HostMemoryDomain = map[string]openapi.MemoryType{ @@ -65,11 +67,31 @@ func NewHost(ctx context.Context, r *RequestNewHost) (*Host, error) { return nil, newErr } + h.SetSync(ctx) + logger.V(2).Info("success: new host", "hostId", h.Id) return &h, nil } +func (h *Host) SetSync(ctx context.Context) { + logger := klog.FromContext(ctx) + logger.V(3).Info(">>>>>> SetSyncFlag(Host): ", "hostId", h.Id) + h.lastSyncTimeStamp = time.Now() +} + +func (h *Host) CheckSync(ctx context.Context) bool { + logger := klog.FromContext(ctx) + logger.V(2).Info(">>>>>> CheckSyncFlag(Host): ", "hostId", h.Id) + + if time.Since(h.lastSyncTimeStamp).Seconds() > common.SyncChekTimeoutSeconds { + return false + } else { + h.SetSync(ctx) // renew the timestamp + return true + } +} + func (h *Host) ComposeMemory(ctx context.Context, r *RequestComposeMemory) (*openapi.MemoryRegion, error) { logger := klog.FromContext(ctx) logger.V(4).Info(">>>>>> ComposeMemory(Host): ", "request", r, "hostId", h.Id) diff --git a/pkg/manager/manager.go b/pkg/manager/manager.go index e010b70..80ce725 100644 --- a/pkg/manager/manager.go +++ b/pkg/manager/manager.go @@ -294,7 +294,7 @@ func DeleteHostById(ctx context.Context, hostId string) (*Host, error) { logger.V(2).Info("force host deletion after backend session failure", "hostId", host.Id) deviceCache.DeleteHostById(host.Id) - return nil, &common.RequestError{StatusCode: common.StatusHostDeleteSessionFailure, Err: newErr} + return host, &common.RequestError{StatusCode: common.StatusHostDeleteSessionFailure, Err: newErr} // Still return the host for recovery } // delete host from cache @@ -326,6 +326,24 @@ func GetHostById(ctx context.Context, hostId string) (*Host, error) { return nil, &common.RequestError{StatusCode: err.(*common.RequestError).StatusCode, Err: newErr} } + // Check for resync + if !host.CheckSync(ctx) { + logger.V(2).Info("GetHostById: host might be out of sync", "hostId", hostId) + ok := host.backendOps.CheckSession(ctx) + if !ok { + host, err = ResyncHostById(ctx, hostId) + if err != nil { + newErr := fmt.Errorf("failed to resync host(add): host [%s]: %w", hostId, err) + logger.Error(newErr, "failure: resync host") + return nil, &common.RequestError{StatusCode: err.(*common.RequestError).StatusCode, Err: newErr} + } else { + logger.V(2).Info("success: auto resync host", "hostId", hostId) + } + } else { + host.SetSync(ctx) + } + } + logger.V(2).Info("success: get host by id", "hostId", hostId) return host, nil diff --git a/pkg/manager/memory.go b/pkg/manager/memory.go index 65d6db1..7412ba7 100644 --- a/pkg/manager/memory.go +++ b/pkg/manager/memory.go @@ -248,8 +248,7 @@ func (m *HostMemory) InvalidateCache() { } func (m *HostMemory) ValidateCache() { - // m.cacheUpdated = true - m.cacheUpdated = false // Temporarily disable host cache usage + m.cacheUpdated = true } func (m *HostMemory) init(ctx context.Context) error { diff --git a/pkg/manager/memory_device.go b/pkg/manager/memory_device.go index 97f1328..34829af 100644 --- a/pkg/manager/memory_device.go +++ b/pkg/manager/memory_device.go @@ -109,8 +109,7 @@ func (d *HostMemoryDevice) InvalidateCache() { } func (d *HostMemoryDevice) ValidateCache() { - // d.cacheUpdated = true - d.cacheUpdated = false // Temporarily disable host cache usage + d.cacheUpdated = true } func (d *HostMemoryDevice) init(ctx context.Context) error {