Skip to content

Commit

Permalink
Add monitoring of the servers time drifting (#44489)
Browse files Browse the repository at this point in the history
* Add monitoring of the servers time drifting

* Refactor monitor to use inventory stream

* Add command for inventory control stream for time reconciliation

* Reuse ping/pong logic for time reconciliation

* Add integration test to check if global notification is created

* Limits notification length, renaming

* Return system clock instead of time difference
Add parallel requests to ping the nodes
Fix issue with locking inventory store while making ping request

* Refactor to make time reconciliation part of the inventory control stream

* Fix tests

* Rewrite tests
Return inventory store iterator

* Naming adjustments

* Fix test

* Drop timeReconciliation in order to use direct handler functions

* Make time reconciliation with variable duration

* Replace with duration type in proto
Add test cleanup for possible goroutine leak
Add comment about half request duration

* Format notification message

* Add UpstreamInventoryPong.SystemClock check
  • Loading branch information
vapopov authored Nov 28, 2024
1 parent d521af2 commit af6a6ea
Show file tree
Hide file tree
Showing 13 changed files with 4,398 additions and 3,501 deletions.
1,958 changes: 1,005 additions & 953 deletions api/client/proto/authservice.pb.go

Large diffs are not rendered by default.

11 changes: 8 additions & 3 deletions api/proto/teleport/legacy/client/proto/authservice.proto
Original file line number Diff line number Diff line change
Expand Up @@ -2349,16 +2349,21 @@ message DownstreamInventoryOneOf {
}
}

// DownstreamInventoryPing is sent down the inventory control stream for testing/debug
// purposes.
// DownstreamInventoryPing is sent down the inventory control stream.
message DownstreamInventoryPing {
uint64 ID = 1;
}

// UpstreamInventoryPong is sent up the inventory control stream in response to a downstream
// ping (used for testing/debug purposes).
// ping including the system clock of the downstream.
message UpstreamInventoryPong {
uint64 ID = 1;
// SystemClock advertises the system clock of the upstream.
google.protobuf.Timestamp SystemClock = 2 [
(gogoproto.stdtime) = true,
(gogoproto.nullable) = false,
(gogoproto.jsontag) = "system_clock,omitempty"
];
}

// UpstreamInventoryHello is the hello message sent up the inventory control stream.
Expand Down
25 changes: 25 additions & 0 deletions api/proto/teleport/legacy/types/types.proto
Original file line number Diff line number Diff line change
Expand Up @@ -711,6 +711,31 @@ message InstanceSpecV1 {

// ExternalUpgraderVersion identifies the external upgrader version. Empty if no upgrader is defined.
string ExternalUpgraderVersion = 8 [(gogoproto.jsontag) = "ext_upgrader_version,omitempty"];

// LastMeasurement stores information about the latest measurement between services.
SystemClockMeasurement LastMeasurement = 9;
}

// SystemClockMeasurement represents the measurement state of the systems clock difference.
message SystemClockMeasurement {
// ControllerSystemClock is the system clock of the inventory controller.
google.protobuf.Timestamp ControllerSystemClock = 1 [
(gogoproto.stdtime) = true,
(gogoproto.nullable) = false,
(gogoproto.jsontag) = "controller_system_clock,omitempty"
];
// SystemClock is the system clock of the upstream.
google.protobuf.Timestamp SystemClock = 2 [
(gogoproto.stdtime) = true,
(gogoproto.nullable) = false,
(gogoproto.jsontag) = "system_clock,omitempty"
];
// RequestDuration stores information about the request duration between auth and remote service.
google.protobuf.Duration RequestDuration = 3 [
(gogoproto.jsontag) = "request_duration",
(gogoproto.nullable) = false,
(gogoproto.stdduration) = true
];
}

// InstanceControlLogEntry represents an entry in a given instance's control log. The control log of
Expand Down
8 changes: 8 additions & 0 deletions api/types/instance.go
Original file line number Diff line number Diff line change
Expand Up @@ -179,6 +179,10 @@ type Instance interface {
// so appends do not need to be performed in any particular order.
AppendControlLog(entries ...InstanceControlLogEntry)

// GetLastMeasurement returns information about the system clocks of the auth service and
// another instance.
GetLastMeasurement() *SystemClockMeasurement

// Clone performs a deep copy on this instance.
Clone() Instance
}
Expand Down Expand Up @@ -299,6 +303,10 @@ func (i *InstanceV1) AppendControlLog(entries ...InstanceControlLogEntry) {
})
}

func (i *InstanceV1) GetLastMeasurement() *SystemClockMeasurement {
return i.Spec.LastMeasurement
}

// expireControlLog removes expired entries from the control log relative to the supplied
// "now" value. The supplied ttl is used as the default ttl for entries that do not specify
// a custom ttl value. The returned timestamp is the observed expiry that was furthest in
Expand Down
Loading

0 comments on commit af6a6ea

Please sign in to comment.