Skip to content

Commit

Permalink
Merge branch 'release-7.5' into update-tidb-dashboard/release-7.5-v7.…
Browse files Browse the repository at this point in the history
…5.5-a2486d76-1730702798
  • Loading branch information
baurine authored Dec 12, 2024
2 parents ec0998e + 21a31ab commit 79cc304
Show file tree
Hide file tree
Showing 37 changed files with 599 additions and 236 deletions.
9 changes: 5 additions & 4 deletions client/resource_group/controller/controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ import (
"github.com/prometheus/client_golang/prometheus"
pd "github.com/tikv/pd/client"
"github.com/tikv/pd/client/errs"
"github.com/tikv/pd/client/timerutil"
atomicutil "go.uber.org/atomic"
"go.uber.org/zap"
"golang.org/x/exp/slices"
Expand Down Expand Up @@ -289,7 +290,7 @@ func (c *ResourceGroupsController) Start(ctx context.Context) {
watchMetaChannel, err = c.provider.Watch(ctx, pd.GroupSettingsPathPrefixBytes, pd.WithRev(metaRevision), pd.WithPrefix(), pd.WithPrevKV())
if err != nil {
log.Warn("watch resource group meta failed", zap.Error(err))
watchRetryTimer.Reset(watchRetryInterval)
timerutil.SafeResetTimer(watchRetryTimer, watchRetryInterval)
failpoint.Inject("watchStreamError", func() {
watchRetryTimer.Reset(20 * time.Millisecond)
})
Expand All @@ -299,7 +300,7 @@ func (c *ResourceGroupsController) Start(ctx context.Context) {
watchConfigChannel, err = c.provider.Watch(ctx, pd.ControllerConfigPathPrefixBytes, pd.WithRev(cfgRevision), pd.WithPrefix())
if err != nil {
log.Warn("watch resource group config failed", zap.Error(err))
watchRetryTimer.Reset(watchRetryInterval)
timerutil.SafeResetTimer(watchRetryTimer, watchRetryInterval)
}
}
case <-emergencyTokenAcquisitionTicker.C:
Expand Down Expand Up @@ -333,7 +334,7 @@ func (c *ResourceGroupsController) Start(ctx context.Context) {
})
if !ok {
watchMetaChannel = nil
watchRetryTimer.Reset(watchRetryInterval)
timerutil.SafeResetTimer(watchRetryTimer, watchRetryInterval)
failpoint.Inject("watchStreamError", func() {
watchRetryTimer.Reset(20 * time.Millisecond)
})
Expand Down Expand Up @@ -369,7 +370,7 @@ func (c *ResourceGroupsController) Start(ctx context.Context) {
case resp, ok := <-watchConfigChannel:
if !ok {
watchConfigChannel = nil
watchRetryTimer.Reset(watchRetryInterval)
timerutil.SafeResetTimer(watchRetryTimer, watchRetryInterval)
failpoint.Inject("watchStreamError", func() {
watchRetryTimer.Reset(20 * time.Millisecond)
})
Expand Down
2 changes: 1 addition & 1 deletion client/timerpool/pool.go → client/timerutil/pool.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@

// Note: This file is copied from https://go-review.googlesource.com/c/go/+/276133

package timerpool
package timerutil

import (
"sync"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@

// Note: This file is copied from https://go-review.googlesource.com/c/go/+/276133

package timerpool
package timerutil

import (
"testing"
Expand Down
32 changes: 32 additions & 0 deletions client/timerutil/util.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
// Copyright 2024 TiKV Project Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package timerutil

import "time"

// SafeResetTimer is used to reset timer safely.
// Before Go 1.23, the only safe way to use Reset was to call Timer.Stop and explicitly drain the timer first.
// We need be careful here, see more details in the comments of Timer.Reset.
// https://pkg.go.dev/time@master#Timer.Reset
func SafeResetTimer(t *time.Timer, d time.Duration) {
// Stop the timer if it's not stopped.
if !t.Stop() {
select {
case <-t.C: // try to drain from the channel
default:
}
}
t.Reset(d)
}
21 changes: 6 additions & 15 deletions client/tso_dispatcher.go
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ import (
"github.com/tikv/pd/client/errs"
"github.com/tikv/pd/client/grpcutil"
"github.com/tikv/pd/client/retry"
"github.com/tikv/pd/client/timerpool"
"github.com/tikv/pd/client/timerutil"
"github.com/tikv/pd/client/tsoutil"
"go.uber.org/zap"
"google.golang.org/grpc"
Expand Down Expand Up @@ -155,7 +155,7 @@ func newTSDeadline(
done chan struct{},
cancel context.CancelFunc,
) *deadline {
timer := timerpool.GlobalTimerPool.Get(timeout)
timer := timerutil.GlobalTimerPool.Get(timeout)
return &deadline{
timer: timer,
done: done,
Expand Down Expand Up @@ -201,11 +201,11 @@ func (c *tsoClient) watchTSDeadline(ctx context.Context, dcLocation string) {
case <-d.timer.C:
log.Error("[tso] tso request is canceled due to timeout", zap.String("dc-location", dc), errs.ZapError(errs.ErrClientGetTSOTimeout))
d.cancel()
timerpool.GlobalTimerPool.Put(d.timer)
timerutil.GlobalTimerPool.Put(d.timer)
case <-d.done:
timerpool.GlobalTimerPool.Put(d.timer)
timerutil.GlobalTimerPool.Put(d.timer)
case <-ctx.Done():
timerpool.GlobalTimerPool.Put(d.timer)
timerutil.GlobalTimerPool.Put(d.timer)
return
}
case <-ctx.Done():
Expand Down Expand Up @@ -419,16 +419,7 @@ tsoBatchLoop:
if maxBatchWaitInterval >= 0 {
tbc.adjustBestBatchSize()
}
// Stop the timer if it's not stopped.
if !streamLoopTimer.Stop() {
select {
case <-streamLoopTimer.C: // try to drain from the channel
default:
}
}
// We need be careful here, see more details in the comments of Timer.Reset.
// https://pkg.go.dev/time@master#Timer.Reset
streamLoopTimer.Reset(c.option.timeout)
timerutil.SafeResetTimer(streamLoopTimer, c.option.timeout)
// Choose a stream to send the TSO gRPC request.
streamChoosingLoop:
for {
Expand Down
2 changes: 1 addition & 1 deletion pkg/cluster/cluster.go
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ func HandleOverlaps(c Cluster, overlaps []*core.RegionInfo) {
if c.GetRegionStats() != nil {
c.GetRegionStats().ClearDefunctRegion(item.GetID())
}
c.GetLabelStats().ClearDefunctRegion(item.GetID())
c.GetLabelStats().MarkDefunctRegion(item.GetID())
c.GetRuleManager().InvalidCache(item.GetID())
}
}
Expand Down
12 changes: 2 additions & 10 deletions pkg/election/lease.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ import (
"github.com/tikv/pd/pkg/errs"
"github.com/tikv/pd/pkg/utils/etcdutil"
"github.com/tikv/pd/pkg/utils/logutil"
"github.com/tikv/pd/pkg/utils/timerutil"
"github.com/tikv/pd/pkg/utils/typeutil"
"go.etcd.io/etcd/clientv3"
"go.uber.org/zap"
Expand Down Expand Up @@ -124,16 +125,7 @@ func (l *lease) KeepAlive(ctx context.Context) {
l.expireTime.Store(t)
}
}
// Stop the timer if it's not stopped.
if !timer.Stop() {
select {
case <-timer.C: // try to drain from the channel
default:
}
}
// We need be careful here, see more details in the comments of Timer.Reset.
// https://pkg.go.dev/time@master#Timer.Reset
timer.Reset(l.leaseTimeout)
timerutil.SafeResetTimer(timer, l.leaseTimeout)
case <-timer.C:
log.Info("lease timeout", zap.Time("expire", l.expireTime.Load().(time.Time)), zap.String("purpose", l.Purpose))
return
Expand Down
5 changes: 2 additions & 3 deletions pkg/mcs/scheduling/server/cluster.go
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,7 @@ func NewCluster(parentCtx context.Context, persistConfig *config.PersistConfig,
ruleManager: ruleManager,
labelerManager: labelerManager,
persistConfig: persistConfig,
hotStat: statistics.NewHotStat(ctx),
hotStat: statistics.NewHotStat(ctx, basicCluster),
labelStats: statistics.NewLabelStatistics(),
regionStats: statistics.NewRegionStatistics(basicCluster, persistConfig, ruleManager),
storage: storage,
Expand Down Expand Up @@ -329,13 +329,12 @@ func (c *Cluster) waitSchedulersInitialized() {
}
}

// TODO: implement the following methods

// UpdateRegionsLabelLevelStats updates the status of the region label level by types.
func (c *Cluster) UpdateRegionsLabelLevelStats(regions []*core.RegionInfo) {
for _, region := range regions {
c.labelStats.Observe(region, c.getStoresWithoutLabelLocked(region, core.EngineKey, core.EngineTiFlash), c.persistConfig.GetLocationLabels())
}
c.labelStats.ClearDefunctRegions()
}

func (c *Cluster) getStoresWithoutLabelLocked(region *core.RegionInfo, key, value string) []*core.StoreInfo {
Expand Down
5 changes: 3 additions & 2 deletions pkg/mock/mockcluster/mockcluster.go
Original file line number Diff line number Diff line change
Expand Up @@ -63,11 +63,12 @@ type Cluster struct {

// NewCluster creates a new Cluster
func NewCluster(ctx context.Context, opts *config.PersistOptions) *Cluster {
bc := core.NewBasicCluster()
c := &Cluster{
ctx: ctx,
BasicCluster: core.NewBasicCluster(),
BasicCluster: bc,
IDAllocator: mockid.NewIDAllocator(),
HotStat: statistics.NewHotStat(ctx),
HotStat: statistics.NewHotStat(ctx, bc),
HotBucketCache: buckets.NewBucketsCache(ctx),
PersistOptions: opts,
suspectRegions: map[uint64]struct{}{},
Expand Down
88 changes: 88 additions & 0 deletions pkg/mock/mockserver/mockserver.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,88 @@
// Copyright 2024 TiKV Project Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package mockserver

import (
"context"

"github.com/pingcap/kvproto/pkg/pdpb"
"github.com/tikv/pd/pkg/core"
"github.com/tikv/pd/pkg/storage"
"github.com/tikv/pd/pkg/utils/grpcutil"
)

// MockServer is used to mock Server for test use.
type MockServer struct {
ctx context.Context
member, leader *pdpb.Member
storage storage.Storage
bc *core.BasicCluster
}

// NewMockServer creates a new MockServer.
func NewMockServer(ctx context.Context, member, leader *pdpb.Member, storage storage.Storage, bc *core.BasicCluster) *MockServer {
return &MockServer{
ctx: ctx,
member: member,
leader: leader,
storage: storage,
bc: bc,
}
}

// LoopContext returns the context of the server.
func (s *MockServer) LoopContext() context.Context {
return s.ctx
}

// ClusterID returns the cluster ID of the server.
func (*MockServer) ClusterID() uint64 {
return 1
}

// GetMemberInfo returns the member info of the server.
func (s *MockServer) GetMemberInfo() *pdpb.Member {
return s.member
}

// GetLeader returns the leader of the server.
func (s *MockServer) GetLeader() *pdpb.Member {
return s.leader
}

// GetStorage returns the storage of the server.
func (s *MockServer) GetStorage() storage.Storage {
return s.storage
}

// Name returns the name of the server.
func (*MockServer) Name() string {
return "mock-server"
}

// GetRegions returns the regions of the server.
func (s *MockServer) GetRegions() []*core.RegionInfo {
return s.bc.GetRegions()
}

// GetTLSConfig returns the TLS config of the server.
func (*MockServer) GetTLSConfig() *grpcutil.TLSConfig {
return &grpcutil.TLSConfig{}
}

// GetBasicCluster returns the basic cluster of the server.
func (s *MockServer) GetBasicCluster() *core.BasicCluster {
return s.bc
}
15 changes: 13 additions & 2 deletions pkg/replication/replication_mode.go
Original file line number Diff line number Diff line change
Expand Up @@ -212,6 +212,7 @@ const (
type drAutoSyncStatus struct {
State string `json:"state,omitempty"`
StateID uint64 `json:"state_id,omitempty"`
AsyncStartTime *time.Time `json:"async_start,omitempty"`
RecoverStartTime *time.Time `json:"recover_start,omitempty"`
TotalRegions int `json:"total_regions,omitempty"`
SyncedRegions int `json:"synced_regions,omitempty"`
Expand Down Expand Up @@ -262,7 +263,8 @@ func (m *ModeManager) drSwitchToAsyncWithLock(availableStores []uint64) error {
log.Warn("failed to switch to async state", zap.String("replicate-mode", modeDRAutoSync), errs.ZapError(err))
return err
}
dr := drAutoSyncStatus{State: drStateAsync, StateID: id, AvailableStores: availableStores}
now := time.Now()
dr := drAutoSyncStatus{State: drStateAsync, StateID: id, AvailableStores: availableStores, AsyncStartTime: &now}
if err := m.storage.SaveReplicationStatus(modeDRAutoSync, dr); err != nil {
log.Warn("failed to switch to async state", zap.String("replicate-mode", modeDRAutoSync), errs.ZapError(err))
return err
Expand All @@ -272,6 +274,15 @@ func (m *ModeManager) drSwitchToAsyncWithLock(availableStores []uint64) error {
return nil
}

func (m *ModeManager) drDurationSinceAsyncStart() time.Duration {
m.RLock()
defer m.RUnlock()
if m.drAutoSync.AsyncStartTime == nil {
return 0
}
return time.Since(*m.drAutoSync.AsyncStartTime)
}

func (m *ModeManager) drSwitchToSyncRecover() error {
m.Lock()
defer m.Unlock()
Expand Down Expand Up @@ -477,7 +488,7 @@ func (m *ModeManager) tickUpdateState() {
m.drSwitchToAsync(storeIDs[primaryUp])
}
case drStateAsync:
if canSync {
if canSync && m.drDurationSinceAsyncStart() > m.config.DRAutoSync.WaitRecoverTimeout.Duration {
m.drSwitchToSyncRecover()
break
}
Expand Down
Loading

0 comments on commit 79cc304

Please sign in to comment.