From c39f17ce594d79a8687c42701af8e20c66538a8b Mon Sep 17 00:00:00 2001 From: Brian Olson Date: Thu, 14 Nov 2024 14:13:22 -0500 Subject: [PATCH 01/31] wip, start pebble persister --- events/events.go | 6 +++++ events/pebblepersist.go | 59 +++++++++++++++++++++++++++++++++++++++++ go.mod | 23 +++++++++++----- go.sum | 51 ++++++++++++++++++++++------------- 4 files changed, 114 insertions(+), 25 deletions(-) create mode 100644 events/pebblepersist.go diff --git a/events/events.go b/events/events.go index 915beaf19..46fbb96df 100644 --- a/events/events.go +++ b/events/events.go @@ -219,6 +219,8 @@ func (evt *XRPCStreamEvent) Serialize(wc io.Writer) error { return obj.MarshalCBOR(cborWriter) } +var ErrNoSeq = errors.New("event has no sequence number") + // serialize content into Preserialized cache func (evt *XRPCStreamEvent) Preserialize() error { if evt.Preserialized != nil { @@ -352,6 +354,10 @@ func (em *EventManager) Subscribe(ctx context.Context, ident string, filter func } func sequenceForEvent(evt *XRPCStreamEvent) int64 { + return evt.Sequence() +} + +func (evt *XRPCStreamEvent) Sequence() int64 { switch { case evt == nil: return -1 diff --git a/events/pebblepersist.go b/events/pebblepersist.go new file mode 100644 index 000000000..3564297d1 --- /dev/null +++ b/events/pebblepersist.go @@ -0,0 +1,59 @@ +package events + +import ( + "context" + "encoding/binary" + "fmt" + "github.com/bluesky-social/indigo/models" + "github.com/cockroachdb/pebble" +) + +type PebblePersist struct { + broadcast func(*XRPCStreamEvent) + db *pebble.DB +} + +func NewPebblePersistance(path string) (*PebblePersist, error) { + db, err := pebble.Open(path, &pebble.Options{}) + if err != nil { + return nil, fmt.Errorf("%s: %w", path, err) + } + pp := new(PebblePersist) + pp.db = db + return pp, nil +} + +func (pp *PebblePersist) Persist(ctx context.Context, e *XRPCStreamEvent) error { + err := e.Preserialize() + if err != nil { + return err + } + blob := e.Preserialized + + seq := e.Se + + var key [8]byte + binary.BigEndian.PutUint64(key, seq) + + return nil +} +func (pp *PebblePersist) Playback(ctx context.Context, since int64, cb func(*XRPCStreamEvent) error) error { + return nil +} +func (pp *PebblePersist) TakeDownRepo(ctx context.Context, usr models.Uid) error { + return nil +} +func (pp *PebblePersist) Flush(context.Context) error { + return nil +} +func (pp *PebblePersist) Shutdown(context.Context) error { + return nil +} + +func (pp *PebblePersist) SetEventBroadcaster(broadcast func(*XRPCStreamEvent)) { + pp.broadcast = broadcast +} + +func (pp *PebblePersist) GetLast(ctx context.Context) (*XRPCStreamEvent, error) { + +} diff --git a/go.mod b/go.mod index 66391db61..da1a77d84 100644 --- a/go.mod +++ b/go.mod @@ -10,11 +10,11 @@ require ( github.com/araddon/dateparse v0.0.0-20210429162001-6b43995a97de github.com/brianvoe/gofakeit/v6 v6.25.0 github.com/carlmjohnson/versioninfo v0.22.5 + github.com/cockroachdb/pebble v1.1.2 github.com/dustinkirkland/golang-petname v0.0.0-20231002161417-6a283f1aaaf2 github.com/flosch/pongo2/v6 v6.0.0 github.com/go-redis/cache/v9 v9.0.0 github.com/goccy/go-json v0.10.2 - github.com/gocql/gocql v1.7.0 github.com/golang-jwt/jwt v3.2.2+incompatible github.com/gorilla/websocket v1.5.1 github.com/hashicorp/go-retryablehttp v0.7.5 @@ -66,7 +66,7 @@ require ( go.opentelemetry.io/otel/trace v1.21.0 go.uber.org/automaxprocs v1.5.3 golang.org/x/crypto v0.21.0 - golang.org/x/sync v0.5.0 + golang.org/x/sync v0.7.0 golang.org/x/text v0.14.0 golang.org/x/time v0.3.0 golang.org/x/tools v0.15.0 @@ -78,23 +78,32 @@ require ( ) require ( + github.com/DataDog/zstd v1.4.5 // indirect + github.com/cockroachdb/errors v1.11.3 // indirect + github.com/cockroachdb/fifo v0.0.0-20240606204812-0bbfbd93a7ce // indirect + github.com/cockroachdb/logtags v0.0.0-20230118201751-21c54148d20b // indirect + github.com/cockroachdb/redact v1.1.5 // indirect + github.com/cockroachdb/tokenbucket v0.0.0-20230807174530-cc333fc44b06 // indirect github.com/dgryski/go-rendezvous v0.0.0-20200823014737-9f7001d12a5f // indirect + github.com/getsentry/sentry-go v0.27.0 // indirect github.com/go-redis/redis v6.15.9+incompatible // indirect - github.com/golang/snappy v0.0.3 // indirect - github.com/hailocab/go-hostpool v0.0.0-20160125115350-e80d13ce29ed // indirect + github.com/golang/snappy v0.0.4 // indirect github.com/hashicorp/golang-lru v1.0.2 // indirect github.com/jackc/puddle/v2 v2.2.1 // indirect github.com/klauspost/compress v1.17.3 // indirect + github.com/kr/pretty v0.3.1 // indirect + github.com/kr/text v0.2.0 // indirect github.com/labstack/gommon v0.4.1 // indirect github.com/matttproud/golang_protobuf_extensions/v2 v2.0.0 // indirect github.com/petar/GoLLRB v0.0.0-20210522233825-ae3b015fd3e9 // indirect + github.com/pkg/errors v0.9.1 // indirect + github.com/rogpeppe/go-internal v1.10.0 // indirect github.com/vmihailenco/go-tinylfu v0.2.2 // indirect github.com/vmihailenco/msgpack/v5 v5.4.1 // indirect github.com/vmihailenco/tagparser/v2 v2.0.0 // indirect github.com/whyrusleeping/cbor v0.0.0-20171005072247-63513f603b11 // indirect go.uber.org/zap v1.26.0 // indirect golang.org/x/exp v0.0.0-20231110203233-9a3e6036ecaa // indirect - gopkg.in/inf.v0 v0.9.1 // indirect ) require ( @@ -167,12 +176,12 @@ require ( go.uber.org/atomic v1.11.0 // indirect go.uber.org/multierr v1.11.0 // indirect golang.org/x/mod v0.14.0 // indirect - golang.org/x/net v0.21.0 // indirect + golang.org/x/net v0.23.0 // indirect golang.org/x/sys v0.22.0 // indirect google.golang.org/genproto/googleapis/api v0.0.0-20231120223509-83a465c0220f // indirect google.golang.org/genproto/googleapis/rpc v0.0.0-20231120223509-83a465c0220f // indirect google.golang.org/grpc v1.59.0 // indirect - google.golang.org/protobuf v1.31.0 // indirect + google.golang.org/protobuf v1.33.0 // indirect gopkg.in/yaml.v2 v2.4.0 // indirect gopkg.in/yaml.v3 v3.0.1 // indirect lukechampine.com/blake3 v1.2.1 // indirect diff --git a/go.sum b/go.sum index 8cd2edd60..446851e37 100644 --- a/go.sum +++ b/go.sum @@ -35,6 +35,8 @@ contrib.go.opencensus.io/exporter/prometheus v0.4.2/go.mod h1:dvEHbiKmgvbr5pjaF9 dmitri.shuralyov.com/gpu/mtl v0.0.0-20190408044501-666a987793e9/go.mod h1:H6x//7gZCb22OMCxBHrMx7a5I7Hp++hsVxbQ4BYO7hU= github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU= github.com/BurntSushi/xgb v0.0.0-20160522181843-27f122750802/go.mod h1:IVnqGOEym/WlBOVXweHU+Q+/VP0lqqI8lqeDx9IjBqo= +github.com/DataDog/zstd v1.4.5 h1:EndNeuB0l9syBZhut0wns3gV1hL8zX8LIu6ZiVHWLIQ= +github.com/DataDog/zstd v1.4.5/go.mod h1:1jcaCB/ufaK+sKp1NBhlGmpz41jOoPQ35bpF36t7BBo= github.com/PuerkitoBio/purell v1.2.1 h1:QsZ4TjvwiMpat6gBCBxEQI0rcS9ehtkKtSpiUnd9N28= github.com/PuerkitoBio/purell v1.2.1/go.mod h1:ZwHcC/82TOaovDi//J/804umJFFmbOHPngi8iYYv/Eo= github.com/RussellLuo/slidingwindow v0.0.0-20200528002341-535bb99d338b h1:5/++qT1/z812ZqBvqQt6ToRswSuPZ/B33m6xVHRzADU= @@ -71,10 +73,6 @@ github.com/beorn7/perks v0.0.0-20180321164747-3a771d992973/go.mod h1:Dwedo/Wpr24 github.com/beorn7/perks v1.0.0/go.mod h1:KWe93zE9D1o94FZ5RNwFwVgaQK1VOXiVxmqh+CedLV8= github.com/beorn7/perks v1.0.1 h1:VlbKKnNfV8bJzeqoa4cOKqO6bYr3WgKZxO8Z16+hsOM= github.com/beorn7/perks v1.0.1/go.mod h1:G2ZrVWU2WbWT9wwq4/hrbKbnv/1ERSJQ0ibhJ6rlkpw= -github.com/bitly/go-hostpool v0.0.0-20171023180738-a3a6125de932 h1:mXoPYz/Ul5HYEDvkta6I8/rnYM5gSdSV2tJ6XbZuEtY= -github.com/bitly/go-hostpool v0.0.0-20171023180738-a3a6125de932/go.mod h1:NOuUCSz6Q9T7+igc/hlvDOUdtWKryOrtFyIVABv/p7k= -github.com/bmizerany/assert v0.0.0-20160611221934-b7ed37b82869 h1:DDGfHa7BWjL4YnC6+E63dPcxHo2sUxDIu8g3QgEJdRY= -github.com/bmizerany/assert v0.0.0-20160611221934-b7ed37b82869/go.mod h1:Ekp36dRnpXw/yCqJaO+ZrUyxD+3VXMFFr56k5XYrpB4= github.com/brianvoe/gofakeit/v6 v6.25.0 h1:ZpFjktOpLZUeF8q223o0rUuXtA+m5qW5srjvVi+JkXk= github.com/brianvoe/gofakeit/v6 v6.25.0/go.mod h1:Xj58BMSnFqcn/fAQeSK+/PLtC5kSb7FJIq4JyGa8vEs= github.com/bsm/ginkgo/v2 v2.12.0 h1:Ny8MWAHyOepLGlLKYmXG4IEkioBysk6GpaRTLC8zwWs= @@ -95,6 +93,20 @@ github.com/chzyer/readline v0.0.0-20180603132655-2972be24d48e/go.mod h1:nSuG5e5P github.com/chzyer/test v0.0.0-20180213035817-a1ea475d72b1/go.mod h1:Q3SI9o4m/ZMnBNeIyt5eFwwo7qiLfzFZmjNmxjkiQlU= github.com/client9/misspell v0.3.4/go.mod h1:qj6jICC3Q7zFZvVWo7KLAzC3yx5G7kyvSDkc90ppPyw= github.com/cncf/udpa/go v0.0.0-20191209042840-269d4d468f6f/go.mod h1:M8M6+tZqaGXZJjfX53e64911xZQV5JYwmTeXPW+k8Sc= +github.com/cockroachdb/datadriven v1.0.3-0.20230413201302-be42291fc80f h1:otljaYPt5hWxV3MUfO5dFPFiOXg9CyG5/kCfayTqsJ4= +github.com/cockroachdb/datadriven v1.0.3-0.20230413201302-be42291fc80f/go.mod h1:a9RdTaap04u637JoCzcUoIcDmvwSUtcUFtT/C3kJlTU= +github.com/cockroachdb/errors v1.11.3 h1:5bA+k2Y6r+oz/6Z/RFlNeVCesGARKuC6YymtcDrbC/I= +github.com/cockroachdb/errors v1.11.3/go.mod h1:m4UIW4CDjx+R5cybPsNrRbreomiFqt8o1h1wUVazSd8= +github.com/cockroachdb/fifo v0.0.0-20240606204812-0bbfbd93a7ce h1:giXvy4KSc/6g/esnpM7Geqxka4WSqI1SZc7sMJFd3y4= +github.com/cockroachdb/fifo v0.0.0-20240606204812-0bbfbd93a7ce/go.mod h1:9/y3cnZ5GKakj/H4y9r9GTjCvAFta7KLgSHPJJYc52M= +github.com/cockroachdb/logtags v0.0.0-20230118201751-21c54148d20b h1:r6VH0faHjZeQy818SGhaone5OnYfxFR/+AzdY3sf5aE= +github.com/cockroachdb/logtags v0.0.0-20230118201751-21c54148d20b/go.mod h1:Vz9DsVWQQhf3vs21MhPMZpMGSht7O/2vFW2xusFUVOs= +github.com/cockroachdb/pebble v1.1.2 h1:CUh2IPtR4swHlEj48Rhfzw6l/d0qA31fItcIszQVIsA= +github.com/cockroachdb/pebble v1.1.2/go.mod h1:4exszw1r40423ZsmkG/09AFEG83I0uDgfujJdbL6kYU= +github.com/cockroachdb/redact v1.1.5 h1:u1PMllDkdFfPWaNGMyLD1+so+aq3uUItthCFqzwPJ30= +github.com/cockroachdb/redact v1.1.5/go.mod h1:BVNblN9mBWFyMyqK1k3AAiSxhvhfK2oOZZ2lK+dpvRg= +github.com/cockroachdb/tokenbucket v0.0.0-20230807174530-cc333fc44b06 h1:zuQyyAKVxetITBuuhv3BI9cMrmStnpT18zmgmTxunpo= +github.com/cockroachdb/tokenbucket v0.0.0-20230807174530-cc333fc44b06/go.mod h1:7nc4anLGjupUW/PeY5qiNYsdNXj7zopG+eqsS7To5IQ= github.com/corpix/uarand v0.2.0 h1:U98xXwud/AVuCpkpgfPF7J5TQgr7R5tqT8VZP5KWbzE= github.com/corpix/uarand v0.2.0/go.mod h1:/3Z1QIqWkDIhf6XWn/08/uMHoQ8JUoTIKc2iPchBOmM= github.com/cpuguy83/go-md2man/v2 v2.0.0-20190314233015-f79a8a8ca69d/go.mod h1:maD7wRr/U5Z6m/iR4s+kqSMx2CaBsrgA7czyZG/E6dU= @@ -126,6 +138,10 @@ github.com/frankban/quicktest v1.14.6/go.mod h1:4ptaffx2x8+WTWXmUCuVU6aPUX1/Mz7z github.com/fsnotify/fsnotify v1.4.7/go.mod h1:jwhsz4b93w/PPRr/qN1Yymfu8t87LnFCMoQvtojpjFo= github.com/fsnotify/fsnotify v1.4.9 h1:hsms1Qyu0jgnwNXIxa+/V/PDsU6CfLf6CNO8H7IWoS4= github.com/fsnotify/fsnotify v1.4.9/go.mod h1:znqG4EE+3YCdAaPaxE2ZRY/06pZUdp0tY4IgpuI1SZQ= +github.com/getsentry/sentry-go v0.27.0 h1:Pv98CIbtB3LkMWmXi4Joa5OOcwbmnX88sF5qbK3r3Ps= +github.com/getsentry/sentry-go v0.27.0/go.mod h1:lc76E2QywIyW8WuBnwl8Lc4bkmQH4+w1gwTf25trprY= +github.com/go-errors/errors v1.4.2 h1:J6MZopCL4uSllY1OfXM374weqZFFItUbrImctkmUxIA= +github.com/go-errors/errors v1.4.2/go.mod h1:sIVyrIiJhuEF+Pj9Ebtd6P/rEYROXFi3BopGUQ5a5Og= github.com/go-gl/glfw v0.0.0-20190409004039-e6da0acd62b1/go.mod h1:vR7hzQXu2zJy9AVAgeJqvqgH9Q5CA+iKCZ2gyEVpxRU= github.com/go-gl/glfw/v3.3/glfw v0.0.0-20191125211704-12ad95a8df72/go.mod h1:tQ2UAYgL5IevRw8kRxooKSPJfGvJ9fJQFa0TUsXzTg8= github.com/go-gl/glfw/v3.3/glfw v0.0.0-20200222043503-6f7a984d4dc4/go.mod h1:tQ2UAYgL5IevRw8kRxooKSPJfGvJ9fJQFa0TUsXzTg8= @@ -156,8 +172,6 @@ github.com/go-task/slim-sprig v0.0.0-20210107165309-348f09dbbbc0/go.mod h1:fyg78 github.com/go-yaml/yaml v2.1.0+incompatible/go.mod h1:w2MrLa16VYP0jy6N7M5kHaCkaLENm+P+Tv+MfurjSw0= github.com/goccy/go-json v0.10.2 h1:CrxCmQqYDkv1z7lO7Wbh2HN93uovUHgrECaO5ZrCXAU= github.com/goccy/go-json v0.10.2/go.mod h1:6MelG93GURQebXPDq3khkgXZkazVtN9CRI+MGFi0w8I= -github.com/gocql/gocql v1.7.0 h1:O+7U7/1gSN7QTEAaMEsJc1Oq2QHXvCWoF3DFK9HDHus= -github.com/gocql/gocql v1.7.0/go.mod h1:vnlvXyFZeLBF0Wy+RS8hrOdbn0UWsWtdg07XJnFxZ+4= github.com/gogo/protobuf v1.1.1/go.mod h1:r8qH/GZQm5c6nD/R0oafs1akxWv10x8SbQlK7atdtwQ= github.com/gogo/protobuf v1.2.1/go.mod h1:hp+jE20tsWTFYpLwKvXlhS1hjn+gTNwPg2I6zVXpSg4= github.com/gogo/protobuf v1.3.2 h1:Ov1cvc58UF3b5XjBnZv7+opcTcQFZebYjWzi34vdm4Q= @@ -195,8 +209,8 @@ github.com/golang/protobuf v1.5.0/go.mod h1:FsONVRAS9T7sI+LIUmWTfcYkHO4aIWwzhcaS github.com/golang/protobuf v1.5.2/go.mod h1:XVQd3VNwM+JqD3oG2Ue2ip4fOMUkwXdXDdiuN0vRsmY= github.com/golang/protobuf v1.5.3 h1:KhyjKVUg7Usr/dYsdSqoFveMYd5ko72D+zANwlG1mmg= github.com/golang/protobuf v1.5.3/go.mod h1:XVQd3VNwM+JqD3oG2Ue2ip4fOMUkwXdXDdiuN0vRsmY= -github.com/golang/snappy v0.0.3 h1:fHPg5GQYlCeLIPB9BZqMVR5nR9A+IM5zcgeTdjMYmLA= -github.com/golang/snappy v0.0.3/go.mod h1:/XxbfmMg8lxefKM7IXC3fBNl/7bRcc72aCRzEWrmP2Q= +github.com/golang/snappy v0.0.4 h1:yAGX7huGHXlcLOEtBnF4w7FQwA26wojNCwOYAEhLjQM= +github.com/golang/snappy v0.0.4/go.mod h1:/XxbfmMg8lxefKM7IXC3fBNl/7bRcc72aCRzEWrmP2Q= github.com/google/btree v0.0.0-20180813153112-4030bb1f1f0c/go.mod h1:lNA+9X1NB3Zf8V7Ke586lFgjr2dZNuvo3lPJSGZ5JPQ= github.com/google/btree v1.0.0/go.mod h1:lNA+9X1NB3Zf8V7Ke586lFgjr2dZNuvo3lPJSGZ5JPQ= github.com/google/go-cmp v0.2.0/go.mod h1:oXzfMopK8JAjlY9xF4vHSVASa0yLyX7SntLO5aqRK0M= @@ -239,8 +253,6 @@ github.com/gorilla/websocket v1.5.1 h1:gmztn0JnHVt9JZquRuzLw3g4wouNVzKL15iLr/zn/ github.com/gorilla/websocket v1.5.1/go.mod h1:x3kM2JMyaluk02fnUJpQuwD2dCS5NDG2ZHL0uE0tcaY= github.com/grpc-ecosystem/grpc-gateway/v2 v2.18.1 h1:6UKoz5ujsI55KNpsJH3UwCq3T8kKbZwNZBNPuTTje8U= github.com/grpc-ecosystem/grpc-gateway/v2 v2.18.1/go.mod h1:YvJ2f6MplWDhfxiUC3KpyTy76kYUZA4W3pTv/wdKQ9Y= -github.com/hailocab/go-hostpool v0.0.0-20160125115350-e80d13ce29ed h1:5upAirOpQc1Q53c0bnx2ufif5kANL7bfZWcc6VJWJd8= -github.com/hailocab/go-hostpool v0.0.0-20160125115350-e80d13ce29ed/go.mod h1:tMWxXQ9wFIaZeTI9F+hmhFiGpFmhOHzyShyFUhRm0H4= github.com/hashicorp/go-cleanhttp v0.5.2 h1:035FKYIWjmULyFRBKPs8TBQoi0x6d9G4xc9neXJWAZQ= github.com/hashicorp/go-cleanhttp v0.5.2/go.mod h1:kO/YDlP8L1346E6Sodw+PrpBSV4/SoxCXGY6BqNFT48= github.com/hashicorp/go-hclog v0.9.2 h1:CG6TE5H9/JXsFWJCfoIVpKFIkFe6ysEuHirp4DxCsHI= @@ -512,8 +524,12 @@ github.com/orandin/slog-gorm v1.3.2 h1:C0lKDQPAx/pF+8K2HL7bdShPwOEJpPM0Bn80zTzxU github.com/orandin/slog-gorm v1.3.2/go.mod h1:MoZ51+b7xE9lwGNPYEhxcUtRNrYzjdcKvA8QXQQGEPA= github.com/petar/GoLLRB v0.0.0-20210522233825-ae3b015fd3e9 h1:1/WtZae0yGtPq+TI6+Tv1WTxkukpXeMlviSxvL7SRgk= github.com/petar/GoLLRB v0.0.0-20210522233825-ae3b015fd3e9/go.mod h1:x3N5drFsm2uilKKuuYo6LdyD8vZAW55sH/9w+pbo1sw= +github.com/pingcap/errors v0.11.4 h1:lFuQV/oaUMGcD2tqt+01ROSmJs75VG1ToEOkZIZ4nE4= +github.com/pingcap/errors v0.11.4/go.mod h1:Oi8TUi2kEtXXLMJk9l1cGmz20kV3TaQ0usTwv5KuLY8= +github.com/pkg/diff v0.0.0-20210226163009-20ebb0f2a09e/go.mod h1:pJLUxLENpZxwdsKMEsNbx1VGcRFpLqf3715MtcvvzbA= github.com/pkg/errors v0.8.0/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= github.com/pkg/errors v0.8.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= +github.com/pkg/errors v0.9.1 h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4= github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= @@ -564,6 +580,7 @@ github.com/rivo/uniseg v0.1.0 h1:+2KBaVoUmb9XzDsrx/Ct0W/EYOSFf/nWTauy++DprtY= github.com/rivo/uniseg v0.1.0/go.mod h1:J6wj4VEh+S6ZtnVlnTBMWIodfgj8LQOQFoIToxlJtxc= github.com/rogpeppe/go-internal v1.3.0/go.mod h1:M8bDsm7K2OlrFYOpmOWEs/qY81heoFRclV5y23lUDJ4= github.com/rogpeppe/go-internal v1.6.1/go.mod h1:xXDCJY+GAPziupqXw64V24skbSoqbTEfhy4qGm1nDQc= +github.com/rogpeppe/go-internal v1.9.0/go.mod h1:WtVeX8xhTBvf0smdhujwtBcq4Qrzq/fJaraNFVN+nFs= github.com/rogpeppe/go-internal v1.10.0 h1:TMyTOH3F/DB16zRVcYyreMH6GnZZrwQVAoYjRBZyWFQ= github.com/rogpeppe/go-internal v1.10.0/go.mod h1:UQnix2H7Ngw/k4C5ijL5+65zddjncjaFoBhdsK/akog= github.com/russross/blackfriday/v2 v2.0.1/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM= @@ -795,8 +812,8 @@ golang.org/x/net v0.5.0/go.mod h1:DivGGAXEgPSlEBzxGzZI+ZLohi+xUj054jfeKui00ws= golang.org/x/net v0.6.0/go.mod h1:2Tu9+aMcznHK/AK1HMvgo6xiTLG5rD5rZLDS+rp2Bjs= golang.org/x/net v0.7.0/go.mod h1:2Tu9+aMcznHK/AK1HMvgo6xiTLG5rD5rZLDS+rp2Bjs= golang.org/x/net v0.10.0/go.mod h1:0qNGK6F8kojg2nk9dLZ2mShWaEBan6FAoqfSigmmuDg= -golang.org/x/net v0.21.0 h1:AQyQV4dYCvJ7vGmJyKki9+PBdyvhkSd8EIx/qb0AYv4= -golang.org/x/net v0.21.0/go.mod h1:bIjVDfnllIU7BJ2DNgfnXvpSvtn8VRwhlsaeUTyUS44= +golang.org/x/net v0.23.0 h1:7EYJ93RZ9vYSZAIb2x3lnuvqO5zneoD6IvWjuhfxjTs= +golang.org/x/net v0.23.0/go.mod h1:JKghWKKOSdJwpW2GEx0Ja7fmaKnMsbu+MWVZTokSYmg= golang.org/x/oauth2 v0.0.0-20180821212333-d2e6202438be/go.mod h1:N/0e6XlmueqKjAGxoOufVs8QHGRruUQn6yWY3a++T0U= golang.org/x/oauth2 v0.0.0-20190226205417-e64efc72b421/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw= golang.org/x/oauth2 v0.0.0-20190604053449-0f29369cfe45/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw= @@ -818,8 +835,8 @@ golang.org/x/sync v0.0.0-20210220032951-036812b2e83c/go.mod h1:RxMgew5VJxzue5/jJ golang.org/x/sync v0.0.0-20220601150217-0de741cfad7f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20220722155255-886fb9371eb4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.1.0/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= -golang.org/x/sync v0.5.0 h1:60k92dhOjHxJkrqnwsfl8KuaHbn/5dl0lUPUklKo3qE= -golang.org/x/sync v0.5.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk= +golang.org/x/sync v0.7.0 h1:YsImfSBoP9QPYL0xyKJPq0gcaJdG3rInoqxTWbfQu9M= +golang.org/x/sync v0.7.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk= golang.org/x/sys v0.0.0-20180830151530-49385e6e1522/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20180905080454-ebe1bf3edb33/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20180909124046-d0be0721c37e/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= @@ -1061,8 +1078,8 @@ google.golang.org/protobuf v1.26.0-rc.1/go.mod h1:jlhhOSvTdKEhbULTjvd4ARK9grFBp0 google.golang.org/protobuf v1.26.0/go.mod h1:9q0QmTI4eRPtz6boOQmLYwt+qCgq0jsYwAQnmE0givc= google.golang.org/protobuf v1.28.0/go.mod h1:HV8QOd/L58Z+nl8r43ehVNZIU/HEI6OcFqwMG9pJV4I= google.golang.org/protobuf v1.28.1/go.mod h1:HV8QOd/L58Z+nl8r43ehVNZIU/HEI6OcFqwMG9pJV4I= -google.golang.org/protobuf v1.31.0 h1:g0LDEJHgrBl9N9r17Ru3sqWhkIx2NB67okBHPwC7hs8= -google.golang.org/protobuf v1.31.0/go.mod h1:HV8QOd/L58Z+nl8r43ehVNZIU/HEI6OcFqwMG9pJV4I= +google.golang.org/protobuf v1.33.0 h1:uNO2rsAINq/JlFpSdYEKIZ0uKD/R9cpdv0T+yoGwGmI= +google.golang.org/protobuf v1.33.0/go.mod h1:c6P6GXX6sHbq/GpV6MGZEdwhWPcYBgnhAHhKbcUYpos= gopkg.in/alecthomas/kingpin.v2 v2.2.6/go.mod h1:FMv+mEhP44yOT+4EoQTLFTRgOQ1FBLkstjWtayDeSgw= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= gopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= @@ -1071,8 +1088,6 @@ gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c h1:Hei/4ADfdWqJk1ZMxUNpqntN gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c/go.mod h1:JHkPIbrfpd72SG/EVd6muEfDQjcINNoR0C8j2r3qZ4Q= gopkg.in/errgo.v2 v2.1.0/go.mod h1:hNsd1EY+bozCKY1Ytp96fpM3vjJbqLJn88ws8XvfDNI= gopkg.in/fsnotify.v1 v1.4.7/go.mod h1:Tz8NjZHkW78fSQdbUxIjBTcgA1z1m8ZHf0WmKUhAMys= -gopkg.in/inf.v0 v0.9.1 h1:73M5CoZyi3ZLMOyDlQh031Cx6N9NDJ2Vvfl76EDAgDc= -gopkg.in/inf.v0 v0.9.1/go.mod h1:cWUDdTG/fYaXco+Dcufb5Vnc6Gp2YChqWtbxRZE0mXw= gopkg.in/tomb.v1 v1.0.0-20141024135613-dd632973f1e7 h1:uRGJdciOHaEIrze2W8Q3AKkepLTh2hOroT7a+7czfdQ= gopkg.in/tomb.v1 v1.0.0-20141024135613-dd632973f1e7/go.mod h1:dt/ZhP58zS4L8KSrWDmTeBkI65Dw0HsyUHuEVlX15mw= gopkg.in/yaml.v2 v2.2.1/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= From 61c836c974f5087277b31d9f2ff75875fca6ee27 Mon Sep 17 00:00:00 2001 From: Brian Olson Date: Thu, 14 Nov 2024 15:28:38 -0500 Subject: [PATCH 02/31] pebble persister --- events/events.go | 70 +++++++++++++++++++++++++++++++++++++++++ events/pebblepersist.go | 69 ++++++++++++++++++++++++++++++++++++---- 2 files changed, 133 insertions(+), 6 deletions(-) diff --git a/events/events.go b/events/events.go index 46fbb96df..60bd2363b 100644 --- a/events/events.go +++ b/events/events.go @@ -219,6 +219,76 @@ func (evt *XRPCStreamEvent) Serialize(wc io.Writer) error { return obj.MarshalCBOR(cborWriter) } +func (xevt *XRPCStreamEvent) Deserialize(r io.Reader) error { + var header EventHeader + if err := header.UnmarshalCBOR(r); err != nil { + return fmt.Errorf("reading header: %w", err) + } + switch header.Op { + case EvtKindMessage: + switch header.MsgType { + case "#commit": + var evt comatproto.SyncSubscribeRepos_Commit + if err := evt.UnmarshalCBOR(r); err != nil { + return fmt.Errorf("reading repoCommit event: %w", err) + } + xevt.RepoCommit = &evt + case "#handle": + var evt comatproto.SyncSubscribeRepos_Handle + if err := evt.UnmarshalCBOR(r); err != nil { + return err + } + xevt.RepoHandle = &evt + case "#identity": + var evt comatproto.SyncSubscribeRepos_Identity + if err := evt.UnmarshalCBOR(r); err != nil { + return err + } + xevt.RepoIdentity = &evt + case "#account": + var evt comatproto.SyncSubscribeRepos_Account + if err := evt.UnmarshalCBOR(r); err != nil { + return err + } + xevt.RepoAccount = &evt + case "#info": + // TODO: this might also be a LabelInfo (as opposed to RepoInfo) + var evt comatproto.SyncSubscribeRepos_Info + if err := evt.UnmarshalCBOR(r); err != nil { + return err + } + xevt.RepoInfo = &evt + case "#migrate": + var evt comatproto.SyncSubscribeRepos_Migrate + if err := evt.UnmarshalCBOR(r); err != nil { + return err + } + xevt.RepoMigrate = &evt + case "#tombstone": + var evt comatproto.SyncSubscribeRepos_Tombstone + if err := evt.UnmarshalCBOR(r); err != nil { + return err + } + xevt.RepoTombstone = &evt + case "#labels": + var evt comatproto.LabelSubscribeLabels_Labels + if err := evt.UnmarshalCBOR(r); err != nil { + return fmt.Errorf("reading Labels event: %w", err) + } + xevt.LabelLabels = &evt + } + case EvtKindErrorFrame: + var errframe ErrorFrame + if err := errframe.UnmarshalCBOR(r); err != nil { + return err + } + xevt.Error = &errframe + default: + return fmt.Errorf("unrecognized event stream type: %d", header.Op) + } + return nil +} + var ErrNoSeq = errors.New("event has no sequence number") // serialize content into Preserialized cache diff --git a/events/pebblepersist.go b/events/pebblepersist.go index 3564297d1..2d87f5135 100644 --- a/events/pebblepersist.go +++ b/events/pebblepersist.go @@ -1,6 +1,7 @@ package events import ( + "bytes" "context" "encoding/binary" "fmt" @@ -30,24 +31,71 @@ func (pp *PebblePersist) Persist(ctx context.Context, e *XRPCStreamEvent) error } blob := e.Preserialized - seq := e.Se + seq := e.Sequence() + if seq < 0 { + // drop event + // TODO: persist with longer key? {prev 8 byte key}{int32 extra counter} + return nil + } var key [8]byte - binary.BigEndian.PutUint64(key, seq) + binary.BigEndian.PutUint64(key[:], uint64(seq)) - return nil + err = pp.db.Set(key[:], blob, pebble.Sync) + + return err } + +func eventFromPebbleIter(iter *pebble.Iterator) (*XRPCStreamEvent, error) { + blob, err := iter.ValueAndErr() + if err != nil { + return nil, err + } + br := bytes.NewReader(blob) + evt := new(XRPCStreamEvent) + err = evt.Deserialize(br) + if err != nil { + return nil, err + } + evt.Preserialized = bytes.Clone(blob) + return evt, nil +} + func (pp *PebblePersist) Playback(ctx context.Context, since int64, cb func(*XRPCStreamEvent) error) error { + var key [8]byte + binary.BigEndian.PutUint64(key[:], uint64(since)) + + iter, err := pp.db.NewIterWithContext(ctx, &pebble.IterOptions{LowerBound: key[:]}) + if err != nil { + return err + } + defer iter.Close() + + for iter.First(); iter.Valid(); iter.Next() { + evt, err := eventFromPebbleIter(iter) + if err != nil { + return err + } + + err = cb(evt) + if err != nil { + return err + } + } + return nil } func (pp *PebblePersist) TakeDownRepo(ctx context.Context, usr models.Uid) error { + // TODO: implement filter on playback to ignore taken-down-repos? return nil } func (pp *PebblePersist) Flush(context.Context) error { - return nil + return pp.db.Flush() } func (pp *PebblePersist) Shutdown(context.Context) error { - return nil + err := pp.db.Close() + pp.db = nil + return err } func (pp *PebblePersist) SetEventBroadcaster(broadcast func(*XRPCStreamEvent)) { @@ -55,5 +103,14 @@ func (pp *PebblePersist) SetEventBroadcaster(broadcast func(*XRPCStreamEvent)) { } func (pp *PebblePersist) GetLast(ctx context.Context) (*XRPCStreamEvent, error) { - + iter, err := pp.db.NewIterWithContext(ctx, &pebble.IterOptions{}) + if err != nil { + return nil, err + } + ok := iter.Last() + if !ok { + return nil, nil + } + evt, err := eventFromPebbleIter(iter) + return evt, nil } From 44ce34e876e0823e83446ff181cb3c8ab2d237a6 Mon Sep 17 00:00:00 2001 From: Brian Olson Date: Thu, 14 Nov 2024 16:16:51 -0500 Subject: [PATCH 03/31] fix events tests add pebble test --- events/dbpersist_test.go | 29 +++-- events/diskpersist_test.go | 204 ++++++++++++++++++++++++++++++----- events/pebblepersist.go | 26 +++-- events/pebblepersist_test.go | 14 +++ pds/data/types.go | 27 +++++ pds/server.go | 21 +--- 6 files changed, 252 insertions(+), 69 deletions(-) create mode 100644 events/pebblepersist_test.go create mode 100644 pds/data/types.go diff --git a/events/dbpersist_test.go b/events/dbpersist_test.go index c299569da..7feced26d 100644 --- a/events/dbpersist_test.go +++ b/events/dbpersist_test.go @@ -1,4 +1,4 @@ -package events_test +package events import ( "context" @@ -11,19 +11,18 @@ import ( atproto "github.com/bluesky-social/indigo/api/atproto" "github.com/bluesky-social/indigo/api/bsky" "github.com/bluesky-social/indigo/carstore" - "github.com/bluesky-social/indigo/events" lexutil "github.com/bluesky-social/indigo/lex/util" "github.com/bluesky-social/indigo/models" - "github.com/bluesky-social/indigo/pds" + pds "github.com/bluesky-social/indigo/pds/data" "github.com/bluesky-social/indigo/repomgr" "github.com/bluesky-social/indigo/util" - "github.com/ipfs/go-log/v2" + logging "github.com/ipfs/go-log/v2" "gorm.io/driver/sqlite" "gorm.io/gorm" ) func init() { - log.SetAllLoggers(log.LevelDebug) + logging.SetAllLoggers(logging.LevelDebug) } func BenchmarkDBPersist(b *testing.B) { @@ -61,24 +60,24 @@ func BenchmarkDBPersist(b *testing.B) { defer os.RemoveAll(tempPath) // Initialize a DBPersister - dbp, err := events.NewDbPersistence(db, cs, nil) + dbp, err := NewDbPersistence(db, cs, nil) if err != nil { b.Fatal(err) } // Create a bunch of events - evtman := events.NewEventManager(dbp) + evtman := NewEventManager(dbp) userRepoHead, err := mgr.GetRepoRoot(ctx, 1) if err != nil { b.Fatal(err) } - inEvts := make([]*events.XRPCStreamEvent, b.N) + inEvts := make([]*XRPCStreamEvent, b.N) for i := 0; i < b.N; i++ { cidLink := lexutil.LexLink(cid) headLink := lexutil.LexLink(userRepoHead) - inEvts[i] = &events.XRPCStreamEvent{ + inEvts[i] = &XRPCStreamEvent{ RepoCommit: &atproto.SyncSubscribeRepos_Commit{ Repo: "did:example:123", Commit: headLink, @@ -136,7 +135,7 @@ func BenchmarkDBPersist(b *testing.B) { b.StopTimer() - dbp.Playback(ctx, 0, func(evt *events.XRPCStreamEvent) error { + dbp.Playback(ctx, 0, func(evt *XRPCStreamEvent) error { outEvtCount++ return nil }) @@ -183,24 +182,24 @@ func BenchmarkPlayback(b *testing.B) { defer os.RemoveAll(tempPath) // Initialize a DBPersister - dbp, err := events.NewDbPersistence(db, cs, nil) + dbp, err := NewDbPersistence(db, cs, nil) if err != nil { b.Fatal(err) } // Create a bunch of events - evtman := events.NewEventManager(dbp) + evtman := NewEventManager(dbp) userRepoHead, err := mgr.GetRepoRoot(ctx, 1) if err != nil { b.Fatal(err) } - inEvts := make([]*events.XRPCStreamEvent, n) + inEvts := make([]*XRPCStreamEvent, n) for i := 0; i < n; i++ { cidLink := lexutil.LexLink(cid) headLink := lexutil.LexLink(userRepoHead) - inEvts[i] = &events.XRPCStreamEvent{ + inEvts[i] = &XRPCStreamEvent{ RepoCommit: &atproto.SyncSubscribeRepos_Commit{ Repo: "did:example:123", Commit: headLink, @@ -256,7 +255,7 @@ func BenchmarkPlayback(b *testing.B) { b.ResetTimer() - dbp.Playback(ctx, 0, func(evt *events.XRPCStreamEvent) error { + dbp.Playback(ctx, 0, func(evt *XRPCStreamEvent) error { outEvtCount++ return nil }) diff --git a/events/diskpersist_test.go b/events/diskpersist_test.go index 5d09c0fc2..4867d1cbd 100644 --- a/events/diskpersist_test.go +++ b/events/diskpersist_test.go @@ -1,4 +1,4 @@ -package events_test +package events import ( "context" @@ -14,16 +14,162 @@ import ( atproto "github.com/bluesky-social/indigo/api/atproto" "github.com/bluesky-social/indigo/api/bsky" "github.com/bluesky-social/indigo/carstore" - "github.com/bluesky-social/indigo/events" lexutil "github.com/bluesky-social/indigo/lex/util" "github.com/bluesky-social/indigo/models" - "github.com/bluesky-social/indigo/pds" + pds "github.com/bluesky-social/indigo/pds/data" "github.com/bluesky-social/indigo/repomgr" "github.com/bluesky-social/indigo/util" "gorm.io/gorm" ) +func testPersister(t *testing.T, perisistenceFactory func(path string, db *gorm.DB) (EventPersistence, error)) { + ctx := context.Background() + + db, _, cs, tempPath, err := setupDBs(t) + if err != nil { + t.Fatal(err) + } + + db.AutoMigrate(&pds.User{}) + db.AutoMigrate(&pds.Peering{}) + db.AutoMigrate(&models.ActorInfo{}) + + db.Create(&models.ActorInfo{ + Uid: 1, + Did: "did:example:123", + }) + + mgr := repomgr.NewRepoManager(cs, &util.FakeKeyManager{}) + + err = mgr.InitNewActor(ctx, 1, "alice", "did:example:123", "Alice", "", "") + if err != nil { + t.Fatal(err) + } + + _, cid, err := mgr.CreateRecord(ctx, 1, "app.bsky.feed.post", &bsky.FeedPost{ + Text: "hello world", + CreatedAt: time.Now().Format(util.ISO8601), + }) + if err != nil { + t.Fatal(err) + } + + defer os.RemoveAll(tempPath) + + // Initialize a persister + dp, err := perisistenceFactory(tempPath, db) + if err != nil { + t.Fatal(err) + } + + // Create a bunch of events + evtman := NewEventManager(dp) + + userRepoHead, err := mgr.GetRepoRoot(ctx, 1) + if err != nil { + t.Fatal(err) + } + + n := 100 + inEvts := make([]*XRPCStreamEvent, n) + for i := 0; i < n; i++ { + cidLink := lexutil.LexLink(cid) + headLink := lexutil.LexLink(userRepoHead) + inEvts[i] = &XRPCStreamEvent{ + RepoCommit: &atproto.SyncSubscribeRepos_Commit{ + Repo: "did:example:123", + Commit: headLink, + Ops: []*atproto.SyncSubscribeRepos_RepoOp{ + { + Action: "add", + Cid: &cidLink, + Path: "path1", + }, + }, + Time: time.Now().Format(util.ISO8601), + Seq: int64(i), + }, + } + } + + // Add events in parallel + for i := 0; i < n; i++ { + err = evtman.AddEvent(ctx, inEvts[i]) + if err != nil { + t.Fatal(err) + } + } + + if err := dp.Flush(ctx); err != nil { + t.Fatal(err) + } + + outEvtCount := 0 + expectedEvtCount := n + + dp.Playback(ctx, 0, func(evt *XRPCStreamEvent) error { + outEvtCount++ + return nil + }) + + if outEvtCount != expectedEvtCount { + t.Fatalf("expected %d events, got %d", expectedEvtCount, outEvtCount) + } + + dp.Shutdown(ctx) + + time.Sleep(time.Millisecond * 100) + + dp2, err := NewDiskPersistence(filepath.Join(tempPath, "diskPrimary"), filepath.Join(tempPath, "diskArchive"), db, &DiskPersistOptions{ + EventsPerFile: 10, + UIDCacheSize: 100000, + DIDCacheSize: 100000, + }) + if err != nil { + t.Fatal(err) + } + + evtman2 := NewEventManager(dp2) + + inEvts = make([]*XRPCStreamEvent, n) + for i := 0; i < n; i++ { + cidLink := lexutil.LexLink(cid) + headLink := lexutil.LexLink(userRepoHead) + inEvts[i] = &XRPCStreamEvent{ + RepoCommit: &atproto.SyncSubscribeRepos_Commit{ + Repo: "did:example:123", + Commit: headLink, + Ops: []*atproto.SyncSubscribeRepos_RepoOp{ + { + Action: "add", + Cid: &cidLink, + Path: "path1", + }, + }, + Time: time.Now().Format(util.ISO8601), + }, + } + } + + for i := 0; i < n; i++ { + err = evtman2.AddEvent(ctx, inEvts[i]) + if err != nil { + t.Fatal(err) + } + } +} func TestDiskPersist(t *testing.T) { + factory := func(tempPath string, db *gorm.DB) (EventPersistence, error) { + return NewDiskPersistence(filepath.Join(tempPath, "diskPrimary"), filepath.Join(tempPath, "diskArchive"), db, &DiskPersistOptions{ + EventsPerFile: 10, + UIDCacheSize: 100000, + DIDCacheSize: 100000, + }) + } + testPersister(t, factory) +} + +func XTestDiskPersist(t *testing.T) { ctx := context.Background() db, _, cs, tempPath, err := setupDBs(t) @@ -59,7 +205,7 @@ func TestDiskPersist(t *testing.T) { // Initialize a DBPersister - dp, err := events.NewDiskPersistence(filepath.Join(tempPath, "diskPrimary"), filepath.Join(tempPath, "diskArchive"), db, &events.DiskPersistOptions{ + dp, err := NewDiskPersistence(filepath.Join(tempPath, "diskPrimary"), filepath.Join(tempPath, "diskArchive"), db, &DiskPersistOptions{ EventsPerFile: 10, UIDCacheSize: 100000, DIDCacheSize: 100000, @@ -69,7 +215,7 @@ func TestDiskPersist(t *testing.T) { } // Create a bunch of events - evtman := events.NewEventManager(dp) + evtman := NewEventManager(dp) userRepoHead, err := mgr.GetRepoRoot(ctx, 1) if err != nil { @@ -77,11 +223,11 @@ func TestDiskPersist(t *testing.T) { } n := 100 - inEvts := make([]*events.XRPCStreamEvent, n) + inEvts := make([]*XRPCStreamEvent, n) for i := 0; i < n; i++ { cidLink := lexutil.LexLink(cid) headLink := lexutil.LexLink(userRepoHead) - inEvts[i] = &events.XRPCStreamEvent{ + inEvts[i] = &XRPCStreamEvent{ RepoCommit: &atproto.SyncSubscribeRepos_Commit{ Repo: "did:example:123", Commit: headLink, @@ -112,7 +258,7 @@ func TestDiskPersist(t *testing.T) { outEvtCount := 0 expectedEvtCount := n - dp.Playback(ctx, 0, func(evt *events.XRPCStreamEvent) error { + dp.Playback(ctx, 0, func(evt *XRPCStreamEvent) error { outEvtCount++ return nil }) @@ -125,7 +271,7 @@ func TestDiskPersist(t *testing.T) { time.Sleep(time.Millisecond * 100) - dp2, err := events.NewDiskPersistence(filepath.Join(tempPath, "diskPrimary"), filepath.Join(tempPath, "diskArchive"), db, &events.DiskPersistOptions{ + dp2, err := NewDiskPersistence(filepath.Join(tempPath, "diskPrimary"), filepath.Join(tempPath, "diskArchive"), db, &DiskPersistOptions{ EventsPerFile: 10, UIDCacheSize: 100000, DIDCacheSize: 100000, @@ -134,13 +280,13 @@ func TestDiskPersist(t *testing.T) { t.Fatal(err) } - evtman2 := events.NewEventManager(dp2) + evtman2 := NewEventManager(dp2) - inEvts = make([]*events.XRPCStreamEvent, n) + inEvts = make([]*XRPCStreamEvent, n) for i := 0; i < n; i++ { cidLink := lexutil.LexLink(cid) headLink := lexutil.LexLink(userRepoHead) - inEvts[i] = &events.XRPCStreamEvent{ + inEvts[i] = &XRPCStreamEvent{ RepoCommit: &atproto.SyncSubscribeRepos_Commit{ Repo: "did:example:123", Commit: headLink, @@ -174,7 +320,7 @@ func BenchmarkDiskPersist(b *testing.B) { // Initialize a DBPersister - dp, err := events.NewDiskPersistence(filepath.Join(tempPath, "diskPrimary"), filepath.Join(tempPath, "diskArchive"), db, &events.DiskPersistOptions{ + dp, err := NewDiskPersistence(filepath.Join(tempPath, "diskPrimary"), filepath.Join(tempPath, "diskArchive"), db, &DiskPersistOptions{ EventsPerFile: 5000, UIDCacheSize: 100000, DIDCacheSize: 100000, @@ -187,7 +333,7 @@ func BenchmarkDiskPersist(b *testing.B) { } -func runPersisterBenchmark(b *testing.B, cs carstore.CarStore, db *gorm.DB, p events.EventPersistence) { +func runPersisterBenchmark(b *testing.B, cs carstore.CarStore, db *gorm.DB, p EventPersistence) { ctx := context.Background() db.AutoMigrate(&pds.User{}) @@ -215,18 +361,18 @@ func runPersisterBenchmark(b *testing.B, cs carstore.CarStore, db *gorm.DB, p ev } // Create a bunch of events - evtman := events.NewEventManager(p) + evtman := NewEventManager(p) userRepoHead, err := mgr.GetRepoRoot(ctx, 1) if err != nil { b.Fatal(err) } - inEvts := make([]*events.XRPCStreamEvent, b.N) + inEvts := make([]*XRPCStreamEvent, b.N) for i := 0; i < b.N; i++ { cidLink := lexutil.LexLink(cid) headLink := lexutil.LexLink(userRepoHead) - inEvts[i] = &events.XRPCStreamEvent{ + inEvts[i] = &XRPCStreamEvent{ RepoCommit: &atproto.SyncSubscribeRepos_Commit{ Repo: "did:example:123", Commit: headLink, @@ -290,7 +436,7 @@ func TestDiskPersister(t *testing.T) { // Initialize a DBPersister - dp, err := events.NewDiskPersistence(filepath.Join(tempPath, "diskPrimary"), filepath.Join(tempPath, "diskArchive"), db, &events.DiskPersistOptions{ + dp, err := NewDiskPersistence(filepath.Join(tempPath, "diskPrimary"), filepath.Join(tempPath, "diskArchive"), db, &DiskPersistOptions{ EventsPerFile: 20, UIDCacheSize: 100000, DIDCacheSize: 100000, @@ -302,7 +448,7 @@ func TestDiskPersister(t *testing.T) { runEventManagerTest(t, cs, db, dp) } -func runEventManagerTest(t *testing.T, cs carstore.CarStore, db *gorm.DB, p events.EventPersistence) { +func runEventManagerTest(t *testing.T, cs carstore.CarStore, db *gorm.DB, p EventPersistence) { ctx := context.Background() db.AutoMigrate(&pds.User{}) @@ -329,7 +475,7 @@ func runEventManagerTest(t *testing.T, cs carstore.CarStore, db *gorm.DB, p even t.Fatal(err) } - evtman := events.NewEventManager(p) + evtman := NewEventManager(p) userRepoHead, err := mgr.GetRepoRoot(ctx, 1) if err != nil { @@ -337,11 +483,11 @@ func runEventManagerTest(t *testing.T, cs carstore.CarStore, db *gorm.DB, p even } testSize := 100 // you can adjust this number as needed - inEvts := make([]*events.XRPCStreamEvent, testSize) + inEvts := make([]*XRPCStreamEvent, testSize) for i := 0; i < testSize; i++ { cidLink := lexutil.LexLink(cid) headLink := lexutil.LexLink(userRepoHead) - inEvts[i] = &events.XRPCStreamEvent{ + inEvts[i] = &XRPCStreamEvent{ RepoCommit: &atproto.SyncSubscribeRepos_Commit{ Repo: "did:example:123", Commit: headLink, @@ -368,7 +514,7 @@ func runEventManagerTest(t *testing.T, cs carstore.CarStore, db *gorm.DB, p even } outEvtCount := 0 - p.Playback(ctx, 0, func(evt *events.XRPCStreamEvent) error { + p.Playback(ctx, 0, func(evt *XRPCStreamEvent) error { // Check that the contents of the output events match the input events // Clear cache, don't care if one has it and not the other inEvts[outEvtCount].Preserialized = nil @@ -397,7 +543,7 @@ func TestDiskPersisterTakedowns(t *testing.T) { // Initialize a DBPersister - dp, err := events.NewDiskPersistence(filepath.Join(tempPath, "diskPrimary"), filepath.Join(tempPath, "diskArchive"), db, &events.DiskPersistOptions{ + dp, err := NewDiskPersistence(filepath.Join(tempPath, "diskPrimary"), filepath.Join(tempPath, "diskArchive"), db, &DiskPersistOptions{ EventsPerFile: 10, UIDCacheSize: 100000, DIDCacheSize: 100000, @@ -409,7 +555,7 @@ func TestDiskPersisterTakedowns(t *testing.T) { runTakedownTest(t, cs, db, dp) } -func runTakedownTest(t *testing.T, cs carstore.CarStore, db *gorm.DB, p events.EventPersistence) { +func runTakedownTest(t *testing.T, cs carstore.CarStore, db *gorm.DB, p EventPersistence) { ctx := context.TODO() db.AutoMigrate(&pds.User{}) @@ -439,10 +585,10 @@ func runTakedownTest(t *testing.T, cs carstore.CarStore, db *gorm.DB, p events.E } } - evtman := events.NewEventManager(p) + evtman := NewEventManager(p) testSize := 100 // you can adjust this number as needed - inEvts := make([]*events.XRPCStreamEvent, testSize*userCount) + inEvts := make([]*XRPCStreamEvent, testSize*userCount) for i := 0; i < testSize*userCount; i++ { user := users[i%userCount] _, cid, err := mgr.CreateRecord(ctx, user.Uid, "app.bsky.feed.post", &bsky.FeedPost{ @@ -460,7 +606,7 @@ func runTakedownTest(t *testing.T, cs carstore.CarStore, db *gorm.DB, p events.E cidLink := lexutil.LexLink(cid) headLink := lexutil.LexLink(userRepoHead) - inEvts[i] = &events.XRPCStreamEvent{ + inEvts[i] = &XRPCStreamEvent{ RepoCommit: &atproto.SyncSubscribeRepos_Commit{ Repo: user.Did, Commit: headLink, @@ -495,7 +641,7 @@ func runTakedownTest(t *testing.T, cs carstore.CarStore, db *gorm.DB, p events.E // Verify that the events of the user have been removed from the event stream var evtsCount int - if err := p.Playback(ctx, 0, func(evt *events.XRPCStreamEvent) error { + if err := p.Playback(ctx, 0, func(evt *XRPCStreamEvent) error { evtsCount++ if evt.RepoCommit.Repo == takeDownUser.Did { t.Fatalf("found event for user %d after takedown", takeDownUser.Uid) diff --git a/events/pebblepersist.go b/events/pebblepersist.go index 2d87f5135..272fe8837 100644 --- a/events/pebblepersist.go +++ b/events/pebblepersist.go @@ -12,6 +12,9 @@ import ( type PebblePersist struct { broadcast func(*XRPCStreamEvent) db *pebble.DB + + prevSeq int64 + prevSeqExtra uint32 } func NewPebblePersistance(path string) (*PebblePersist, error) { @@ -32,16 +35,25 @@ func (pp *PebblePersist) Persist(ctx context.Context, e *XRPCStreamEvent) error blob := e.Preserialized seq := e.Sequence() + log.Infof("persist %d", seq) + if seq < 0 { - // drop event - // TODO: persist with longer key? {prev 8 byte key}{int32 extra counter} - return nil - } + // persist with longer key {prev 8 byte key}{int32 extra counter} + pp.prevSeqExtra++ + var key [12]byte + binary.BigEndian.PutUint64(key[:8], uint64(pp.prevSeq)) + binary.BigEndian.PutUint32(key[8:], pp.prevSeqExtra) - var key [8]byte - binary.BigEndian.PutUint64(key[:], uint64(seq)) + err = pp.db.Set(key[:], blob, pebble.Sync) + return nil + } else { + pp.prevSeq = seq + pp.prevSeqExtra = 0 + var key [8]byte + binary.BigEndian.PutUint64(key[:], uint64(seq)) - err = pp.db.Set(key[:], blob, pebble.Sync) + err = pp.db.Set(key[:], blob, pebble.Sync) + } return err } diff --git a/events/pebblepersist_test.go b/events/pebblepersist_test.go new file mode 100644 index 000000000..e838f9306 --- /dev/null +++ b/events/pebblepersist_test.go @@ -0,0 +1,14 @@ +package events + +import ( + "gorm.io/gorm" + "path/filepath" + "testing" +) + +func TestPebblePersist(t *testing.T) { + factory := func(tempPath string, db *gorm.DB) (EventPersistence, error) { + return NewPebblePersistance(filepath.Join(tempPath, "pebble.db")) + } + testPersister(t, factory) +} diff --git a/pds/data/types.go b/pds/data/types.go new file mode 100644 index 000000000..f7ff96e54 --- /dev/null +++ b/pds/data/types.go @@ -0,0 +1,27 @@ +package data + +import ( + "github.com/bluesky-social/indigo/models" + "gorm.io/gorm" + "time" +) + +type User struct { + ID models.Uid `gorm:"primarykey"` + CreatedAt time.Time + UpdatedAt time.Time + DeletedAt gorm.DeletedAt `gorm:"index"` + Handle string `gorm:"uniqueIndex"` + Password string + RecoveryKey string + Email string + Did string `gorm:"uniqueIndex"` + PDS uint +} + +type Peering struct { + gorm.Model + Host string + Did string + Approved bool +} diff --git a/pds/server.go b/pds/server.go index b9d1c903b..54f1dfed1 100644 --- a/pds/server.go +++ b/pds/server.go @@ -21,6 +21,7 @@ import ( lexutil "github.com/bluesky-social/indigo/lex/util" "github.com/bluesky-social/indigo/models" "github.com/bluesky-social/indigo/notifs" + pdsdata "github.com/bluesky-social/indigo/pds/data" "github.com/bluesky-social/indigo/plc" "github.com/bluesky-social/indigo/repomgr" "github.com/bluesky-social/indigo/util" @@ -456,18 +457,7 @@ func (s *Server) HandleResolveDid(c echo.Context) error { return c.String(200, u.Did) } -type User struct { - ID models.Uid `gorm:"primarykey"` - CreatedAt time.Time - UpdatedAt time.Time - DeletedAt gorm.DeletedAt `gorm:"index"` - Handle string `gorm:"uniqueIndex"` - Password string - RecoveryKey string - Email string - Did string `gorm:"uniqueIndex"` - PDS uint -} +type User = pdsdata.User type RefreshToken struct { gorm.Model @@ -636,12 +626,7 @@ func (s *Server) invalidateToken(ctx context.Context, u *User, tok *jwt.Token) e panic("nyi") } -type Peering struct { - gorm.Model - Host string - Did string - Approved bool -} +type Peering = pdsdata.Peering func (s *Server) EventsHandler(c echo.Context) error { conn, err := websocket.Upgrade(c.Response().Writer, c.Request(), c.Response().Header(), 1<<10, 1<<10) From 3f77d63a94af1fd249a423fbc5433118e8b61097 Mon Sep 17 00:00:00 2001 From: Brian Olson Date: Thu, 14 Nov 2024 17:05:07 -0500 Subject: [PATCH 04/31] GC thread with retention and period --- events/pebblepersist.go | 66 +++++++++++++++++++++++++++++++++++++---- 1 file changed, 60 insertions(+), 6 deletions(-) diff --git a/events/pebblepersist.go b/events/pebblepersist.go index 272fe8837..63bb0cf1c 100644 --- a/events/pebblepersist.go +++ b/events/pebblepersist.go @@ -7,6 +7,7 @@ import ( "fmt" "github.com/bluesky-social/indigo/models" "github.com/cockroachdb/pebble" + "time" ) type PebblePersist struct { @@ -15,6 +16,8 @@ type PebblePersist struct { prevSeq int64 prevSeqExtra uint32 + + cancel func() } func NewPebblePersistance(path string) (*PebblePersist, error) { @@ -35,22 +38,24 @@ func (pp *PebblePersist) Persist(ctx context.Context, e *XRPCStreamEvent) error blob := e.Preserialized seq := e.Sequence() - log.Infof("persist %d", seq) + nowMillis := uint64(time.Now().UnixMilli()) if seq < 0 { - // persist with longer key {prev 8 byte key}{int32 extra counter} + // persist with longer key {prev 8 byte key}{time}{int32 extra counter} pp.prevSeqExtra++ - var key [12]byte + var key [20]byte binary.BigEndian.PutUint64(key[:8], uint64(pp.prevSeq)) - binary.BigEndian.PutUint32(key[8:], pp.prevSeqExtra) + binary.BigEndian.PutUint64(key[8:16], nowMillis) + binary.BigEndian.PutUint32(key[16:], pp.prevSeqExtra) err = pp.db.Set(key[:], blob, pebble.Sync) return nil } else { pp.prevSeq = seq pp.prevSeqExtra = 0 - var key [8]byte - binary.BigEndian.PutUint64(key[:], uint64(seq)) + var key [16]byte + binary.BigEndian.PutUint64(key[:8], uint64(seq)) + binary.BigEndian.PutUint64(key[8:16], nowMillis) err = pp.db.Set(key[:], blob, pebble.Sync) } @@ -105,6 +110,9 @@ func (pp *PebblePersist) Flush(context.Context) error { return pp.db.Flush() } func (pp *PebblePersist) Shutdown(context.Context) error { + if pp.cancel != nil { + pp.cancel() + } err := pp.db.Close() pp.db = nil return err @@ -126,3 +134,49 @@ func (pp *PebblePersist) GetLast(ctx context.Context) (*XRPCStreamEvent, error) evt, err := eventFromPebbleIter(iter) return evt, nil } + +// example; +// ``` +// pp := NewPebblePersistance("/tmp/foo.pebble") +// go pp.GCThread(context.TODO(), 48 * time.Hour, 5 * time.Minute) +// ``` +func (pp *PebblePersist) GCThread(ctx context.Context, retention, gcPeriod time.Duration) { + ctx, cancel := context.WithCancel(ctx) + pp.cancel = cancel + ticker := time.NewTicker(gcPeriod) + defer ticker.Stop() + for { + select { + case <-ticker.C: + pp.GarbageCollect(ctx, retention) + case <-ctx.Done(): + return + } + } +} +func (pp *PebblePersist) GarbageCollect(ctx context.Context, retention time.Duration) error { + nowMillis := time.Now().UnixMilli() + expired := uint64(nowMillis - retention.Milliseconds()) + iter, err := pp.db.NewIterWithContext(ctx, &pebble.IterOptions{}) + if err != nil { + return err + } + defer iter.Close() + todel := make(chan []byte, 100) + go func() { + for xkey := range todel { + pp.db.Delete(xkey, nil) + } + }() + defer close(todel) + for iter.First(); iter.Valid(); iter.Next() { + keyblob := iter.Key() + keyTime := binary.BigEndian.Uint64(keyblob[8:16]) + if keyTime < expired { + todel <- bytes.Clone(keyblob) + } else { + break + } + } + return nil +} From 81a9ce8370c64b3543f8401c8315c5d69d72bd98 Mon Sep 17 00:00:00 2001 From: Brian Olson Date: Thu, 14 Nov 2024 17:18:07 -0500 Subject: [PATCH 05/31] connect rainbow to pebble persister --- cmd/rainbow/main.go | 21 ++++++++++++++++++++- splitter/splitter.go | 18 +++++++++++++++++- 2 files changed, 37 insertions(+), 2 deletions(-) diff --git a/cmd/rainbow/main.go b/cmd/rainbow/main.go index 5d6ba14c6..2cb34abb6 100644 --- a/cmd/rainbow/main.go +++ b/cmd/rainbow/main.go @@ -52,6 +52,11 @@ func run(args []string) { Name: "splitter-host", Value: "bsky.network", }, + &cli.StringFlag{ + Name: "persist-db", + Value: "", + Usage: "path to persistence db", + }, &cli.StringFlag{ Name: "api-listen", Value: ":2480", @@ -110,7 +115,21 @@ func Splitter(cctx *cli.Context) error { otel.SetTracerProvider(tp) } - spl := splitter.NewSplitter(cctx.String("splitter-host")) + persistPath := cctx.String("persist-db") + upstreamHost := cctx.String("splitter-host") + var spl *splitter.Splitter + var err error + if persistPath != "" { + log.Infof("building splitter with storage at: %s", persistPath) + spl, err = splitter.NewDiskSplitter(upstreamHost, persistPath) + if err != nil { + log.Fatalw("failed to create splitter", "path", persistPath, "error", err) + return err + } + } else { + log.Info("building in-memory splitter") + spl = splitter.NewMemSplitter(upstreamHost) + } // set up metrics endpoint go func() { diff --git a/splitter/splitter.go b/splitter/splitter.go index 0f4a6cf8c..fb6f2fcfa 100644 --- a/splitter/splitter.go +++ b/splitter/splitter.go @@ -30,6 +30,7 @@ var log = logging.Logger("splitter") type Splitter struct { Host string erb *EventRingBuffer + pp *events.PebblePersist events *events.EventManager // cursor storage @@ -41,7 +42,7 @@ type Splitter struct { consumers map[uint64]*SocketConsumer } -func NewSplitter(host string) *Splitter { +func NewMemSplitter(host string) *Splitter { erb := NewEventRingBuffer(20_000, 10_000) em := events.NewEventManager(erb) @@ -53,6 +54,21 @@ func NewSplitter(host string) *Splitter { consumers: make(map[uint64]*SocketConsumer), } } +func NewDiskSplitter(host, path string) (*Splitter, error) { + pp, err := events.NewPebblePersistance(path) + if err != nil { + return nil, err + } + + em := events.NewEventManager(pp) + return &Splitter{ + cursorFile: "cursor-file", + Host: host, + pp: pp, + events: em, + consumers: make(map[uint64]*SocketConsumer), + }, nil +} func (s *Splitter) Start(addr string) error { var lc net.ListenConfig From a71ce5a4aab8f5d70f3f1af8b2ab56c26deb03c6 Mon Sep 17 00:00:00 2001 From: Brian Olson Date: Thu, 14 Nov 2024 19:30:00 -0500 Subject: [PATCH 06/31] delete dead code --- events/diskpersist_test.go | 141 ------------------------------------- 1 file changed, 141 deletions(-) diff --git a/events/diskpersist_test.go b/events/diskpersist_test.go index 4867d1cbd..74d0d62a5 100644 --- a/events/diskpersist_test.go +++ b/events/diskpersist_test.go @@ -169,147 +169,6 @@ func TestDiskPersist(t *testing.T) { testPersister(t, factory) } -func XTestDiskPersist(t *testing.T) { - ctx := context.Background() - - db, _, cs, tempPath, err := setupDBs(t) - if err != nil { - t.Fatal(err) - } - - db.AutoMigrate(&pds.User{}) - db.AutoMigrate(&pds.Peering{}) - db.AutoMigrate(&models.ActorInfo{}) - - db.Create(&models.ActorInfo{ - Uid: 1, - Did: "did:example:123", - }) - - mgr := repomgr.NewRepoManager(cs, &util.FakeKeyManager{}) - - err = mgr.InitNewActor(ctx, 1, "alice", "did:example:123", "Alice", "", "") - if err != nil { - t.Fatal(err) - } - - _, cid, err := mgr.CreateRecord(ctx, 1, "app.bsky.feed.post", &bsky.FeedPost{ - Text: "hello world", - CreatedAt: time.Now().Format(util.ISO8601), - }) - if err != nil { - t.Fatal(err) - } - - defer os.RemoveAll(tempPath) - - // Initialize a DBPersister - - dp, err := NewDiskPersistence(filepath.Join(tempPath, "diskPrimary"), filepath.Join(tempPath, "diskArchive"), db, &DiskPersistOptions{ - EventsPerFile: 10, - UIDCacheSize: 100000, - DIDCacheSize: 100000, - }) - if err != nil { - t.Fatal(err) - } - - // Create a bunch of events - evtman := NewEventManager(dp) - - userRepoHead, err := mgr.GetRepoRoot(ctx, 1) - if err != nil { - t.Fatal(err) - } - - n := 100 - inEvts := make([]*XRPCStreamEvent, n) - for i := 0; i < n; i++ { - cidLink := lexutil.LexLink(cid) - headLink := lexutil.LexLink(userRepoHead) - inEvts[i] = &XRPCStreamEvent{ - RepoCommit: &atproto.SyncSubscribeRepos_Commit{ - Repo: "did:example:123", - Commit: headLink, - Ops: []*atproto.SyncSubscribeRepos_RepoOp{ - { - Action: "add", - Cid: &cidLink, - Path: "path1", - }, - }, - Time: time.Now().Format(util.ISO8601), - }, - } - } - - // Add events in parallel - for i := 0; i < n; i++ { - err = evtman.AddEvent(ctx, inEvts[i]) - if err != nil { - t.Fatal(err) - } - } - - if err := dp.Flush(ctx); err != nil { - t.Fatal(err) - } - - outEvtCount := 0 - expectedEvtCount := n - - dp.Playback(ctx, 0, func(evt *XRPCStreamEvent) error { - outEvtCount++ - return nil - }) - - if outEvtCount != expectedEvtCount { - t.Fatalf("expected %d events, got %d", expectedEvtCount, outEvtCount) - } - - dp.Shutdown(ctx) - - time.Sleep(time.Millisecond * 100) - - dp2, err := NewDiskPersistence(filepath.Join(tempPath, "diskPrimary"), filepath.Join(tempPath, "diskArchive"), db, &DiskPersistOptions{ - EventsPerFile: 10, - UIDCacheSize: 100000, - DIDCacheSize: 100000, - }) - if err != nil { - t.Fatal(err) - } - - evtman2 := NewEventManager(dp2) - - inEvts = make([]*XRPCStreamEvent, n) - for i := 0; i < n; i++ { - cidLink := lexutil.LexLink(cid) - headLink := lexutil.LexLink(userRepoHead) - inEvts[i] = &XRPCStreamEvent{ - RepoCommit: &atproto.SyncSubscribeRepos_Commit{ - Repo: "did:example:123", - Commit: headLink, - Ops: []*atproto.SyncSubscribeRepos_RepoOp{ - { - Action: "add", - Cid: &cidLink, - Path: "path1", - }, - }, - Time: time.Now().Format(util.ISO8601), - }, - } - } - - for i := 0; i < n; i++ { - err = evtman2.AddEvent(ctx, inEvts[i]) - if err != nil { - t.Fatal(err) - } - } -} - func BenchmarkDiskPersist(b *testing.B) { db, _, cs, tempPath, err := setupDBs(b) if err != nil { From 096fc7cd8b012d18fc7d9a2ab141df7d6c2ebbf9 Mon Sep 17 00:00:00 2001 From: Brian Olson Date: Fri, 15 Nov 2024 08:38:12 -0500 Subject: [PATCH 07/31] PR feedback, use pebble DeleteRange --- events/pebblepersist.go | 55 ++++++++++++++++++++++++++--------------- 1 file changed, 35 insertions(+), 20 deletions(-) diff --git a/events/pebblepersist.go b/events/pebblepersist.go index 63bb0cf1c..f366894ec 100644 --- a/events/pebblepersist.go +++ b/events/pebblepersist.go @@ -30,6 +30,11 @@ func NewPebblePersistance(path string) (*PebblePersist, error) { return pp, nil } +func setKeySeqMillis(key []byte, seq, millis int64) { + binary.BigEndian.PutUint64(key[:8], uint64(seq)) + binary.BigEndian.PutUint64(key[8:16], uint64(millis)) +} + func (pp *PebblePersist) Persist(ctx context.Context, e *XRPCStreamEvent) error { err := e.Preserialize() if err != nil { @@ -38,24 +43,21 @@ func (pp *PebblePersist) Persist(ctx context.Context, e *XRPCStreamEvent) error blob := e.Preserialized seq := e.Sequence() - nowMillis := uint64(time.Now().UnixMilli()) + nowMillis := time.Now().UnixMilli() if seq < 0 { // persist with longer key {prev 8 byte key}{time}{int32 extra counter} pp.prevSeqExtra++ var key [20]byte - binary.BigEndian.PutUint64(key[:8], uint64(pp.prevSeq)) - binary.BigEndian.PutUint64(key[8:16], nowMillis) + setKeySeqMillis(key[:], seq, nowMillis) binary.BigEndian.PutUint32(key[16:], pp.prevSeqExtra) err = pp.db.Set(key[:], blob, pebble.Sync) - return nil } else { pp.prevSeq = seq pp.prevSeqExtra = 0 var key [16]byte - binary.BigEndian.PutUint64(key[:8], uint64(seq)) - binary.BigEndian.PutUint64(key[8:16], nowMillis) + setKeySeqMillis(key[:], seq, nowMillis) err = pp.db.Set(key[:], blob, pebble.Sync) } @@ -138,7 +140,7 @@ func (pp *PebblePersist) GetLast(ctx context.Context) (*XRPCStreamEvent, error) // example; // ``` // pp := NewPebblePersistance("/tmp/foo.pebble") -// go pp.GCThread(context.TODO(), 48 * time.Hour, 5 * time.Minute) +// go pp.GCThread(context.Background(), 48 * time.Hour, 5 * time.Minute) // ``` func (pp *PebblePersist) GCThread(ctx context.Context, retention, gcPeriod time.Duration) { ctx, cancel := context.WithCancel(ctx) @@ -148,35 +150,48 @@ func (pp *PebblePersist) GCThread(ctx context.Context, retention, gcPeriod time. for { select { case <-ticker.C: - pp.GarbageCollect(ctx, retention) + err := pp.GarbageCollect(ctx, retention) + log.Error("GC err", "err", err) case <-ctx.Done(): return } } } + +var zeroKey [16]byte + +func init() { + setKeySeqMillis(zeroKey[:], 0, 0) +} + func (pp *PebblePersist) GarbageCollect(ctx context.Context, retention time.Duration) error { nowMillis := time.Now().UnixMilli() - expired := uint64(nowMillis - retention.Milliseconds()) + expired := nowMillis - retention.Milliseconds() iter, err := pp.db.NewIterWithContext(ctx, &pebble.IterOptions{}) if err != nil { return err } defer iter.Close() - todel := make(chan []byte, 100) - go func() { - for xkey := range todel { - pp.db.Delete(xkey, nil) - } - }() - defer close(todel) + // scan keys to find last expired, then delete range + var seq int64 = int64(-1) + var lastKeyTime int64 for iter.First(); iter.Valid(); iter.Next() { keyblob := iter.Key() - keyTime := binary.BigEndian.Uint64(keyblob[8:16]) - if keyTime < expired { - todel <- bytes.Clone(keyblob) + + keyTime := int64(binary.BigEndian.Uint64(keyblob[8:16])) + if keyTime <= expired { + lastKeyTime = keyTime + seq = int64(binary.BigEndian.Uint64(keyblob[:8])) } else { break } } - return nil + if seq == -1 { + // nothing to delete + return nil + } + var key [16]byte + setKeySeqMillis(key[:], seq, lastKeyTime) + err = pp.db.DeleteRange(zeroKey[:], key[:], pebble.Sync) + return err } From e8c9d2e788367b4716c9eb4c82fc5488c348a83c Mon Sep 17 00:00:00 2001 From: bryan newbold Date: Fri, 15 Nov 2024 14:04:34 +0000 Subject: [PATCH 08/31] identity: default dir with 100 max idle conns, and 1sec idle --- atproto/identity/identity.go | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/atproto/identity/identity.go b/atproto/identity/identity.go index 02e66f22c..2e67e0a5c 100644 --- a/atproto/identity/identity.go +++ b/atproto/identity/identity.go @@ -66,6 +66,11 @@ func DefaultDirectory() Directory { PLCURL: DefaultPLCURL, HTTPClient: http.Client{ Timeout: time.Second * 15, + Transport: &http.Transport{ + // would want this around 100ms for services doing lots of handle resolution. Impacts PLC connections as well, but not too bad. + IdleConnTimeout: time.Millisecond * 1000, + MaxIdleConns: 100, + }, }, Resolver: net.Resolver{ Dial: func(ctx context.Context, network, address string) (net.Conn, error) { From ed0a5c6480a725d6fb1153723c49810f25ebad3d Mon Sep 17 00:00:00 2001 From: bryan newbold Date: Fri, 15 Nov 2024 14:05:05 +0000 Subject: [PATCH 09/31] identity: drop default HTTP timeout from 15s to 10s --- atproto/identity/identity.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/atproto/identity/identity.go b/atproto/identity/identity.go index 2e67e0a5c..c8192e6d4 100644 --- a/atproto/identity/identity.go +++ b/atproto/identity/identity.go @@ -65,7 +65,7 @@ func DefaultDirectory() Directory { base := BaseDirectory{ PLCURL: DefaultPLCURL, HTTPClient: http.Client{ - Timeout: time.Second * 15, + Timeout: time.Second * 10, Transport: &http.Transport{ // would want this around 100ms for services doing lots of handle resolution. Impacts PLC connections as well, but not too bad. IdleConnTimeout: time.Millisecond * 1000, From d88346ab5df72a2a1809e27d9994403ced006c71 Mon Sep 17 00:00:00 2001 From: bryan newbold Date: Fri, 15 Nov 2024 14:05:20 +0000 Subject: [PATCH 10/31] identity: drop default DNS timeout from 5s to 3s --- atproto/identity/identity.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/atproto/identity/identity.go b/atproto/identity/identity.go index c8192e6d4..c0453b2af 100644 --- a/atproto/identity/identity.go +++ b/atproto/identity/identity.go @@ -74,7 +74,7 @@ func DefaultDirectory() Directory { }, Resolver: net.Resolver{ Dial: func(ctx context.Context, network, address string) (net.Conn, error) { - d := net.Dialer{Timeout: time.Second * 5} + d := net.Dialer{Timeout: time.Second * 3} return d.DialContext(ctx, network, address) }, }, From 5dc06c0ccd47dadc579f4df6c40dd1dac3e56182 Mon Sep 17 00:00:00 2001 From: Brian Olson Date: Fri, 15 Nov 2024 09:27:35 -0500 Subject: [PATCH 11/31] add gauge spl_active_clients --- splitter/metrics.go | 5 +++++ splitter/splitter.go | 2 ++ 2 files changed, 7 insertions(+) diff --git a/splitter/metrics.go b/splitter/metrics.go index a2173a639..76161ce45 100644 --- a/splitter/metrics.go +++ b/splitter/metrics.go @@ -9,3 +9,8 @@ var eventsSentCounter = promauto.NewCounterVec(prometheus.CounterOpts{ Name: "spl_events_sent_counter", Help: "The total number of events sent to consumers", }, []string{"remote_addr", "user_agent"}) + +var activeClientGauge = promauto.NewGauge(prometheus.GaugeOpts{ + Name: "spl_active_clients", + Help: "Current number of active clients", +}) diff --git a/splitter/splitter.go b/splitter/splitter.go index c13d5e482..8f335063a 100644 --- a/splitter/splitter.go +++ b/splitter/splitter.go @@ -269,6 +269,8 @@ func (s *Splitter) EventsHandler(c echo.Context) error { "cursor", since, "consumer_id", consumerID, ) + activeClientGauge.Inc() + defer activeClientGauge.Dec() for { select { From 586cc75211949857eb704b954a514e1697c009dc Mon Sep 17 00:00:00 2001 From: Brian Olson Date: Fri, 15 Nov 2024 10:05:07 -0500 Subject: [PATCH 12/31] --persist-hours --- cmd/rainbow/main.go | 8 +++++++- splitter/splitter.go | 3 ++- 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/cmd/rainbow/main.go b/cmd/rainbow/main.go index 2cb34abb6..03d8be6b5 100644 --- a/cmd/rainbow/main.go +++ b/cmd/rainbow/main.go @@ -66,6 +66,12 @@ func run(args []string) { Value: ":2481", EnvVars: []string{"SPLITTER_METRICS_LISTEN"}, }, + &cli.Float64Flag{ + Name: "persist-hours", + Value: 24 * 7, + EnvVars: []string{"SPLITTER_PERSIST_HOURS"}, + Usage: "hours to buffer (float, may be fractional)", + }, } app.Action = Splitter @@ -121,7 +127,7 @@ func Splitter(cctx *cli.Context) error { var err error if persistPath != "" { log.Infof("building splitter with storage at: %s", persistPath) - spl, err = splitter.NewDiskSplitter(upstreamHost, persistPath) + spl, err = splitter.NewDiskSplitter(upstreamHost, persistPath, cctx.Float64("persist-hours")) if err != nil { log.Fatalw("failed to create splitter", "path", persistPath, "error", err) return err diff --git a/splitter/splitter.go b/splitter/splitter.go index 8f335063a..0a86dc757 100644 --- a/splitter/splitter.go +++ b/splitter/splitter.go @@ -54,12 +54,13 @@ func NewMemSplitter(host string) *Splitter { consumers: make(map[uint64]*SocketConsumer), } } -func NewDiskSplitter(host, path string) (*Splitter, error) { +func NewDiskSplitter(host, path string, persistHours float64) (*Splitter, error) { pp, err := events.NewPebblePersistance(path) if err != nil { return nil, err } + go pp.GCThread(context.Background(), time.Duration(float64(time.Hour)*persistHours), 5*time.Minute) em := events.NewEventManager(pp) return &Splitter{ cursorFile: "cursor-file", From 43c1d8e3def81128bd07c1bd66c5579c60a8bb24 Mon Sep 17 00:00:00 2001 From: Brian Olson Date: Fri, 15 Nov 2024 11:29:27 -0500 Subject: [PATCH 13/31] fix err log --- events/pebblepersist.go | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/events/pebblepersist.go b/events/pebblepersist.go index f366894ec..83d17b610 100644 --- a/events/pebblepersist.go +++ b/events/pebblepersist.go @@ -151,7 +151,9 @@ func (pp *PebblePersist) GCThread(ctx context.Context, retention, gcPeriod time. select { case <-ticker.C: err := pp.GarbageCollect(ctx, retention) - log.Error("GC err", "err", err) + if err != nil { + log.Errorw("GC err", "err", err) + } case <-ctx.Done(): return } From f5a301ba8acff65df1c369717340ba340d5b6e52 Mon Sep 17 00:00:00 2001 From: Brian Olson Date: Fri, 15 Nov 2024 11:51:55 -0500 Subject: [PATCH 14/31] fix broadcast --- events/pebblepersist.go | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/events/pebblepersist.go b/events/pebblepersist.go index 83d17b610..bbf4d58e4 100644 --- a/events/pebblepersist.go +++ b/events/pebblepersist.go @@ -62,6 +62,11 @@ func (pp *PebblePersist) Persist(ctx context.Context, e *XRPCStreamEvent) error err = pp.db.Set(key[:], blob, pebble.Sync) } + if err != nil { + return err + } + pp.broadcast(e) + return err } From 97806912da267d103f1c2d3c97b2a644f3a1f21e Mon Sep 17 00:00:00 2001 From: Brian Olson Date: Fri, 15 Nov 2024 13:58:19 -0500 Subject: [PATCH 15/31] last seq from pebble --- events/pebblepersist.go | 13 ++++++++----- splitter/splitter.go | 11 +++++++++++ 2 files changed, 19 insertions(+), 5 deletions(-) diff --git a/events/pebblepersist.go b/events/pebblepersist.go index bbf4d58e4..aeadf8cc3 100644 --- a/events/pebblepersist.go +++ b/events/pebblepersist.go @@ -129,17 +129,20 @@ func (pp *PebblePersist) SetEventBroadcaster(broadcast func(*XRPCStreamEvent)) { pp.broadcast = broadcast } -func (pp *PebblePersist) GetLast(ctx context.Context) (*XRPCStreamEvent, error) { +func (pp *PebblePersist) GetLast(ctx context.Context) (seq, millis int64, evt *XRPCStreamEvent, err error) { iter, err := pp.db.NewIterWithContext(ctx, &pebble.IterOptions{}) if err != nil { - return nil, err + return 0, 0, nil, err } ok := iter.Last() if !ok { - return nil, nil + return 0, 0, nil, nil } - evt, err := eventFromPebbleIter(iter) - return evt, nil + evt, err = eventFromPebbleIter(iter) + keyblob := iter.Key() + seq = int64(binary.BigEndian.Uint64(keyblob[:8])) + millis = int64(binary.BigEndian.Uint64(keyblob[8:16])) + return seq, millis, evt, nil } // example; diff --git a/splitter/splitter.go b/splitter/splitter.go index 0a86dc757..2da2f0a43 100644 --- a/splitter/splitter.go +++ b/splitter/splitter.go @@ -413,6 +413,7 @@ func (s *Splitter) handleConnection(ctx context.Context, host string, con *webso } if seq%5000 == 0 { + // TODO: don't need this after we move to getting seq from pebble if err := s.writeCursor(seq); err != nil { log.Errorf("write cursor failed: %s", err) } @@ -426,6 +427,16 @@ func (s *Splitter) handleConnection(ctx context.Context, host string, con *webso } func (s *Splitter) getLastCursor() (int64, error) { + if s.pp != nil { + seq, millis, _, err := s.pp.GetLast(context.Background()) + if err == nil { + log.Debugw("got last cursor from pebble", "seq", seq, "millis", millis) + return seq, nil + } else { + log.Errorw("pebble seq fail", "err", err) + } + } + fi, err := os.Open(s.cursorFile) if err != nil { if os.IsNotExist(err) { From 766dc86f3757e45d19e72209459c5082bcdda4d3 Mon Sep 17 00:00:00 2001 From: Brian Olson Date: Fri, 15 Nov 2024 14:11:07 -0500 Subject: [PATCH 16/31] log gc sizes --- events/pebblepersist.go | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/events/pebblepersist.go b/events/pebblepersist.go index aeadf8cc3..5452a7696 100644 --- a/events/pebblepersist.go +++ b/events/pebblepersist.go @@ -196,6 +196,7 @@ func (pp *PebblePersist) GarbageCollect(ctx context.Context, retention time.Dura break } } + sizeBefore, _ := pp.db.EstimateDiskUsage(nil, nil) if seq == -1 { // nothing to delete return nil @@ -203,5 +204,10 @@ func (pp *PebblePersist) GarbageCollect(ctx context.Context, retention time.Dura var key [16]byte setKeySeqMillis(key[:], seq, lastKeyTime) err = pp.db.DeleteRange(zeroKey[:], key[:], pebble.Sync) - return err + if err != nil { + return err + } + sizeAfter, _ := pp.db.EstimateDiskUsage(nil, nil) + log.Infow("pebble gc", "before", sizeBefore, "after", sizeAfter) + return nil } From 1b7e54e42e5dc9aa4a84671d52a0d3bd83969b15 Mon Sep 17 00:00:00 2001 From: whyrusleeping Date: Fri, 15 Nov 2024 11:57:09 -0800 Subject: [PATCH 17/31] allow carstore to use multiple directories, round robin style --- carstore/bs.go | 33 ++++++++++++++++++++------------- carstore/repo_test.go | 11 ++++++++--- cmd/bigsky/main.go | 17 +++++++++++++++-- 3 files changed, 43 insertions(+), 18 deletions(-) diff --git a/carstore/bs.go b/carstore/bs.go index e7af35d12..dac62e5d8 100644 --- a/carstore/bs.go +++ b/carstore/bs.go @@ -62,21 +62,23 @@ type CarStore interface { } type FileCarStore struct { - meta *CarStoreGormMeta - rootDir string + meta *CarStoreGormMeta + rootDirs []string lscLk sync.Mutex lastShardCache map[models.Uid]*CarShard } -func NewCarStore(meta *gorm.DB, root string) (CarStore, error) { - if _, err := os.Stat(root); err != nil { - if !os.IsNotExist(err) { - return nil, err - } +func NewCarStore(meta *gorm.DB, roots []string) (CarStore, error) { + for _, root := range roots { + if _, err := os.Stat(root); err != nil { + if !os.IsNotExist(err) { + return nil, err + } - if err := os.Mkdir(root, 0775); err != nil { - return nil, err + if err := os.Mkdir(root, 0775); err != nil { + return nil, err + } } } if err := meta.AutoMigrate(&CarShard{}, &blockRef{}); err != nil { @@ -88,7 +90,7 @@ func NewCarStore(meta *gorm.DB, root string) (CarStore, error) { return &FileCarStore{ meta: &CarStoreGormMeta{meta: meta}, - rootDir: root, + rootDirs: roots, lastShardCache: make(map[models.Uid]*CarShard), }, nil } @@ -541,9 +543,14 @@ func (ds *DeltaSession) GetSize(ctx context.Context, c cid.Cid) (int, error) { func fnameForShard(user models.Uid, seq int) string { return fmt.Sprintf("sh-%d-%d", user, seq) } + +func (cs *FileCarStore) dirForUser(user models.Uid) string { + return cs.rootDirs[int(user)%len(cs.rootDirs)] +} + func (cs *FileCarStore) openNewShardFile(ctx context.Context, user models.Uid, seq int) (*os.File, string, error) { // TODO: some overwrite protections - fname := filepath.Join(cs.rootDir, fnameForShard(user, seq)) + fname := filepath.Join(cs.dirForUser(user), fnameForShard(user, seq)) fi, err := os.Create(fname) if err != nil { return nil, "", err @@ -557,7 +564,7 @@ func (cs *FileCarStore) writeNewShardFile(ctx context.Context, user models.Uid, defer span.End() // TODO: some overwrite protections - fname := filepath.Join(cs.rootDir, fnameForShard(user, seq)) + fname := filepath.Join(cs.dirForUser(user), fnameForShard(user, seq)) if err := os.WriteFile(fname, data, 0664); err != nil { return "", err } @@ -982,7 +989,7 @@ func (cs *FileCarStore) openNewCompactedShardFile(ctx context.Context, user mode // TODO: some overwrite protections // NOTE CreateTemp is used for creating a non-colliding file, but we keep it and don't delete it so don't think of it as "temporary". // This creates "sh-%d-%d%s" with some random stuff in the last position - fi, err := os.CreateTemp(cs.rootDir, fnameForShard(user, seq)) + fi, err := os.CreateTemp(cs.dirForUser(user), fnameForShard(user, seq)) if err != nil { return nil, "", err } diff --git a/carstore/repo_test.go b/carstore/repo_test.go index a4d2c8cb8..8366cab95 100644 --- a/carstore/repo_test.go +++ b/carstore/repo_test.go @@ -30,8 +30,13 @@ func testCarStore() (CarStore, func(), error) { return nil, nil, err } - sharddir := filepath.Join(tempdir, "shards") - if err := os.MkdirAll(sharddir, 0775); err != nil { + sharddir1 := filepath.Join(tempdir, "shards1") + if err := os.MkdirAll(sharddir1, 0775); err != nil { + return nil, nil, err + } + + sharddir2 := filepath.Join(tempdir, "shards2") + if err := os.MkdirAll(sharddir2, 0775); err != nil { return nil, nil, err } @@ -45,7 +50,7 @@ func testCarStore() (CarStore, func(), error) { return nil, nil, err } - cs, err := NewCarStore(db, sharddir) + cs, err := NewCarStore(db, []string{sharddir1, sharddir2}) if err != nil { return nil, nil, err } diff --git a/cmd/bigsky/main.go b/cmd/bigsky/main.go index 540796f51..459b1fc20 100644 --- a/cmd/bigsky/main.go +++ b/cmd/bigsky/main.go @@ -200,6 +200,11 @@ func run(args []string) error { EnvVars: []string{"RELAY_NUM_COMPACTION_WORKERS"}, Value: 2, }, + &cli.StringSliceFlag{ + Name: "carstore-shard-dirs", + Usage: "specify list of shard directories for carstore storage, overrides default storage within datadir", + EnvVars: []string{"RELAY_CARSTORE_SHARD_DIRS"}, + }, } app.Action = runBigsky @@ -312,8 +317,16 @@ func runBigsky(cctx *cli.Context) error { } } - os.MkdirAll(filepath.Dir(csdir), os.ModePerm) - cstore, err := carstore.NewCarStore(csdb, csdir) + csdirs := []string{csdir} + if paramDirs := cctx.StringSlice("carstore-shard-dirs"); len(paramDirs) > 0 { + csdirs = paramDirs + } + + for _, csd := range csdirs { + os.MkdirAll(filepath.Dir(csd), os.ModePerm) + } + + cstore, err := carstore.NewCarStore(csdb, csdirs) if err != nil { return err } From fd6ae473a49f459441060f5563ccbc89ac086bf2 Mon Sep 17 00:00:00 2001 From: whyrusleeping Date: Fri, 15 Nov 2024 12:32:27 -0800 Subject: [PATCH 18/31] fixup build --- cmd/bigsky/main.go | 4 +++- cmd/laputa/main.go | 2 +- testing/utils.go | 4 ++-- 3 files changed, 6 insertions(+), 4 deletions(-) diff --git a/cmd/bigsky/main.go b/cmd/bigsky/main.go index 459b1fc20..54d56735d 100644 --- a/cmd/bigsky/main.go +++ b/cmd/bigsky/main.go @@ -323,7 +323,9 @@ func runBigsky(cctx *cli.Context) error { } for _, csd := range csdirs { - os.MkdirAll(filepath.Dir(csd), os.ModePerm) + if err := os.MkdirAll(filepath.Dir(csd), os.ModePerm); err != nil { + return err + } } cstore, err := carstore.NewCarStore(csdb, csdirs) diff --git a/cmd/laputa/main.go b/cmd/laputa/main.go index 2cedb393a..d91edfc62 100644 --- a/cmd/laputa/main.go +++ b/cmd/laputa/main.go @@ -158,7 +158,7 @@ func run(args []string) { } } - cstore, err := carstore.NewCarStore(csdb, csdir) + cstore, err := carstore.NewCarStore(csdb, []string{csdir}) if err != nil { return err } diff --git a/testing/utils.go b/testing/utils.go index 9b076ef17..7af6e1adc 100644 --- a/testing/utils.go +++ b/testing/utils.go @@ -117,7 +117,7 @@ func SetupPDS(ctx context.Context, suffix string, plc plc.PLCClient) (*TestPDS, return nil, err } - cs, err := carstore.NewCarStore(cardb, cspath) + cs, err := carstore.NewCarStore(cardb, []string{cspath}) if err != nil { return nil, err } @@ -550,7 +550,7 @@ func SetupRelay(ctx context.Context, didr plc.PLCClient) (*TestRelay, error) { return nil, err } - cs, err := carstore.NewCarStore(cardb, cspath) + cs, err := carstore.NewCarStore(cardb, []string{cspath}) if err != nil { return nil, err } From ffe7fb61bb6d7d889cf263422d7a306ef55187df Mon Sep 17 00:00:00 2001 From: whyrusleeping Date: Fri, 15 Nov 2024 12:46:03 -0800 Subject: [PATCH 19/31] more test fixups --- cmd/supercollider/main.go | 2 +- events/dbpersist_test.go | 2 +- indexer/posts_test.go | 2 +- pds/handlers_test.go | 2 +- repomgr/bench_test.go | 2 +- repomgr/ingest_test.go | 4 ++-- 6 files changed, 7 insertions(+), 7 deletions(-) diff --git a/cmd/supercollider/main.go b/cmd/supercollider/main.go index 49499ef69..1c64e71d3 100644 --- a/cmd/supercollider/main.go +++ b/cmd/supercollider/main.go @@ -565,7 +565,7 @@ func initSpeedyRepoMan(key *godid.PrivKey) (*repomgr.RepoManager, *godid.PrivKey return nil, nil, err } - cs, err := carstore.NewCarStore(cardb, cspath) + cs, err := carstore.NewCarStore(cardb, []string{cspath}) if err != nil { return nil, nil, err } diff --git a/events/dbpersist_test.go b/events/dbpersist_test.go index c299569da..ad8d266b6 100644 --- a/events/dbpersist_test.go +++ b/events/dbpersist_test.go @@ -301,7 +301,7 @@ func setupDBs(t testing.TB) (*gorm.DB, *gorm.DB, carstore.CarStore, string, erro return nil, nil, nil, "", err } - cs, err := carstore.NewCarStore(cardb, cspath) + cs, err := carstore.NewCarStore(cardb, []string{cspath}) if err != nil { return nil, nil, nil, "", err } diff --git a/indexer/posts_test.go b/indexer/posts_test.go index ed21ab666..aa6fc99b3 100644 --- a/indexer/posts_test.go +++ b/indexer/posts_test.go @@ -50,7 +50,7 @@ func testIndexer(t *testing.T) *testIx { t.Fatal(err) } - cs, err := carstore.NewCarStore(cardb, cspath) + cs, err := carstore.NewCarStore(cardb, []string{cspath}) if err != nil { t.Fatal(err) } diff --git a/pds/handlers_test.go b/pds/handlers_test.go index fe2bb14b8..9cecd3f91 100644 --- a/pds/handlers_test.go +++ b/pds/handlers_test.go @@ -29,7 +29,7 @@ func testCarStore(t *testing.T, db *gorm.DB) (carstore.CarStore, func()) { t.Fatal(err) } - cs, err := carstore.NewCarStore(db, sharddir) + cs, err := carstore.NewCarStore(db, []string{sharddir}) if err != nil { t.Fatal(err) } diff --git a/repomgr/bench_test.go b/repomgr/bench_test.go index 271813909..c01789422 100644 --- a/repomgr/bench_test.go +++ b/repomgr/bench_test.go @@ -54,7 +54,7 @@ func BenchmarkRepoMgrCreates(b *testing.B) { b.Fatal(err) } - cs, err := carstore.NewCarStore(cardb, cspath) + cs, err := carstore.NewCarStore(cardb, []string{cspath}) if err != nil { b.Fatal(err) } diff --git a/repomgr/ingest_test.go b/repomgr/ingest_test.go index dcb9097ac..38a8562e5 100644 --- a/repomgr/ingest_test.go +++ b/repomgr/ingest_test.go @@ -50,7 +50,7 @@ func TestLoadNewRepo(t *testing.T) { t.Fatal(err) } - cs, err := carstore.NewCarStore(cardb, cspath) + cs, err := carstore.NewCarStore(cardb, []string{cspath}) if err != nil { t.Fatal(err) } @@ -80,7 +80,7 @@ func testCarstore(t *testing.T, dir string) carstore.CarStore { t.Fatal(err) } - cs, err := carstore.NewCarStore(cardb, cspath) + cs, err := carstore.NewCarStore(cardb, []string{cspath}) if err != nil { t.Fatal(err) } From 9e941dbb7d9c68ba2c5327b063902e8ccfb68ae3 Mon Sep 17 00:00:00 2001 From: Brian Olson Date: Fri, 15 Nov 2024 17:05:22 -0500 Subject: [PATCH 20/31] reorg config --- cmd/rainbow/main.go | 12 ++++++++- events/pebblepersist.go | 52 +++++++++++++++++++++++++++++------- events/pebblepersist_test.go | 2 +- splitter/splitter.go | 11 +++++--- 4 files changed, 63 insertions(+), 14 deletions(-) diff --git a/cmd/rainbow/main.go b/cmd/rainbow/main.go index 03d8be6b5..6f556ad01 100644 --- a/cmd/rainbow/main.go +++ b/cmd/rainbow/main.go @@ -72,6 +72,12 @@ func run(args []string) { EnvVars: []string{"SPLITTER_PERSIST_HOURS"}, Usage: "hours to buffer (float, may be fractional)", }, + &cli.Int64Flag{ + Name: "persist-bytes", + Value: 1_000_000_000, + Usage: "max bytes target for event cache", + EnvVars: []string{"SPLITTER_PERSIST_BYTES"}, + }, } app.Action = Splitter @@ -127,7 +133,11 @@ func Splitter(cctx *cli.Context) error { var err error if persistPath != "" { log.Infof("building splitter with storage at: %s", persistPath) - spl, err = splitter.NewDiskSplitter(upstreamHost, persistPath, cctx.Float64("persist-hours")) + spl, err = splitter.NewDiskSplitter( + upstreamHost, + persistPath, + cctx.Float64("persist-hours"), + cctx.Int64("persist-bytes")) if err != nil { log.Fatalw("failed to create splitter", "path", persistPath, "error", err) return err diff --git a/events/pebblepersist.go b/events/pebblepersist.go index 5452a7696..f483b4bb4 100644 --- a/events/pebblepersist.go +++ b/events/pebblepersist.go @@ -4,10 +4,12 @@ import ( "bytes" "context" "encoding/binary" + "encoding/hex" "fmt" + "time" + "github.com/bluesky-social/indigo/models" "github.com/cockroachdb/pebble" - "time" ) type PebblePersist struct { @@ -18,15 +20,41 @@ type PebblePersist struct { prevSeqExtra uint32 cancel func() + + options PebblePersistOptions } -func NewPebblePersistance(path string) (*PebblePersist, error) { +type PebblePersistOptions struct { + // Throw away posts older than some time ago + PersistDuration time.Duration + + // Throw away old posts every so often + GCPeriod time.Duration + + // MaxBytes is what we _try_ to keep disk usage under + MaxBytes uint64 +} + +var DefaultPebblePersistOptions = PebblePersistOptions{ + PersistDuration: time.Minute * 20, + GCPeriod: time.Minute * 5, + MaxBytes: 1024 * 1024 * 1024, // 1 GiB +} + +// Create a new EventPersistence which stores data in pebbledb +// nil opts is ok +func NewPebblePersistance(path string, opts *PebblePersistOptions) (*PebblePersist, error) { db, err := pebble.Open(path, &pebble.Options{}) if err != nil { return nil, fmt.Errorf("%s: %w", path, err) } pp := new(PebblePersist) pp.db = db + if opts == nil { + pp.options = DefaultPebblePersistOptions + } else { + pp.options = *opts + } return pp, nil } @@ -150,15 +178,15 @@ func (pp *PebblePersist) GetLast(ctx context.Context) (seq, millis int64, evt *X // pp := NewPebblePersistance("/tmp/foo.pebble") // go pp.GCThread(context.Background(), 48 * time.Hour, 5 * time.Minute) // ``` -func (pp *PebblePersist) GCThread(ctx context.Context, retention, gcPeriod time.Duration) { +func (pp *PebblePersist) GCThread(ctx context.Context) { ctx, cancel := context.WithCancel(ctx) pp.cancel = cancel - ticker := time.NewTicker(gcPeriod) + ticker := time.NewTicker(pp.options.GCPeriod) defer ticker.Stop() for { select { case <-ticker.C: - err := pp.GarbageCollect(ctx, retention) + err := pp.GarbageCollect(ctx) if err != nil { log.Errorw("GC err", "err", err) } @@ -169,14 +197,18 @@ func (pp *PebblePersist) GCThread(ctx context.Context, retention, gcPeriod time. } var zeroKey [16]byte +var ffffKey [16]byte func init() { setKeySeqMillis(zeroKey[:], 0, 0) + for i := range ffffKey { + ffffKey[i] = 0xff + } } -func (pp *PebblePersist) GarbageCollect(ctx context.Context, retention time.Duration) error { +func (pp *PebblePersist) GarbageCollect(ctx context.Context) error { nowMillis := time.Now().UnixMilli() - expired := nowMillis - retention.Milliseconds() + expired := nowMillis - pp.options.PersistDuration.Milliseconds() iter, err := pp.db.NewIterWithContext(ctx, &pebble.IterOptions{}) if err != nil { return err @@ -196,18 +228,20 @@ func (pp *PebblePersist) GarbageCollect(ctx context.Context, retention time.Dura break } } - sizeBefore, _ := pp.db.EstimateDiskUsage(nil, nil) + sizeBefore, _ := pp.db.EstimateDiskUsage(zeroKey[:], ffffKey[:]) if seq == -1 { // nothing to delete + log.Infow("pebble gc nop", "size", sizeBefore) return nil } var key [16]byte setKeySeqMillis(key[:], seq, lastKeyTime) + log.Infow("pebble gc start", "to", hex.EncodeToString(key[:])) err = pp.db.DeleteRange(zeroKey[:], key[:], pebble.Sync) if err != nil { return err } - sizeAfter, _ := pp.db.EstimateDiskUsage(nil, nil) + sizeAfter, _ := pp.db.EstimateDiskUsage(zeroKey[:], ffffKey[:]) log.Infow("pebble gc", "before", sizeBefore, "after", sizeAfter) return nil } diff --git a/events/pebblepersist_test.go b/events/pebblepersist_test.go index e838f9306..6495ce24f 100644 --- a/events/pebblepersist_test.go +++ b/events/pebblepersist_test.go @@ -8,7 +8,7 @@ import ( func TestPebblePersist(t *testing.T) { factory := func(tempPath string, db *gorm.DB) (EventPersistence, error) { - return NewPebblePersistance(filepath.Join(tempPath, "pebble.db")) + return NewPebblePersistance(filepath.Join(tempPath, "pebble.db"), nil) } testPersister(t, factory) } diff --git a/splitter/splitter.go b/splitter/splitter.go index 2da2f0a43..75d363785 100644 --- a/splitter/splitter.go +++ b/splitter/splitter.go @@ -54,13 +54,18 @@ func NewMemSplitter(host string) *Splitter { consumers: make(map[uint64]*SocketConsumer), } } -func NewDiskSplitter(host, path string, persistHours float64) (*Splitter, error) { - pp, err := events.NewPebblePersistance(path) +func NewDiskSplitter(host, path string, persistHours float64, maxBytes int64) (*Splitter, error) { + ppopts := events.PebblePersistOptions{ + PersistDuration: time.Duration(float64(time.Hour) * persistHours), + GCPeriod: 5 * time.Minute, + MaxBytes: uint64(maxBytes), + } + pp, err := events.NewPebblePersistance(path, &ppopts) if err != nil { return nil, err } - go pp.GCThread(context.Background(), time.Duration(float64(time.Hour)*persistHours), 5*time.Minute) + go pp.GCThread(context.Background()) em := events.NewEventManager(pp) return &Splitter{ cursorFile: "cursor-file", From 0fb485d03383a7dbf4f610062471737a1a871b38 Mon Sep 17 00:00:00 2001 From: whyrusleeping Date: Sat, 16 Nov 2024 13:12:04 -0800 Subject: [PATCH 21/31] add a user cache on the bgs --- bgs/bgs.go | 78 ++++++++++++++++++++++++++++++++++++++++++++----- bgs/handlers.go | 33 +++++++++++---------- 2 files changed, 88 insertions(+), 23 deletions(-) diff --git a/bgs/bgs.go b/bgs/bgs.go index 35dfab9d9..8e584cb5f 100644 --- a/bgs/bgs.go +++ b/bgs/bgs.go @@ -27,6 +27,7 @@ import ( "github.com/bluesky-social/indigo/models" "github.com/bluesky-social/indigo/repomgr" "github.com/bluesky-social/indigo/xrpc" + lru "github.com/hashicorp/golang-lru/v2" "golang.org/x/sync/semaphore" "golang.org/x/time/rate" @@ -87,6 +88,9 @@ type BGS struct { // Management of Compaction compactor *Compactor + + // User cache + userCache *lru.Cache[string, *User] } type PDSResync struct { @@ -136,6 +140,8 @@ func NewBGS(db *gorm.DB, ix *indexer.Indexer, repoman *repomgr.RepoManager, evtm db.AutoMigrate(models.PDS{}) db.AutoMigrate(models.DomainBan{}) + uc, _ := lru.New[string, *User](1_000_000) + bgs := &BGS{ Index: ix, db: db, @@ -151,6 +157,8 @@ func NewBGS(db *gorm.DB, ix *indexer.Indexer, repoman *repomgr.RepoManager, evtm consumers: make(map[uint64]*SocketConsumer), pdsResyncs: make(map[uint]*PDSResync), + + userCache: uc, } ix.CreateExternalUser = bgs.createExternalUser @@ -520,7 +528,45 @@ type User struct { Tombstoned bool // UpstreamStatus is the state of the user as reported by the upstream PDS - UpstreamStatus string `gorm:"index"` + UpstreamStatusT string `gorm:"index"` + + lk sync.Mutex +} + +func (u *User) SetTakenDown(v bool) { + u.lk.Lock() + defer u.lk.Unlock() + u.TakenDown = v +} + +func (u *User) GetTakenDown() bool { + u.lk.Lock() + defer u.lk.Unlock() + return u.TakenDown +} + +func (u *User) SetTombstoned(v bool) { + u.lk.Lock() + defer u.lk.Unlock() + u.Tombstoned = v +} + +func (u *User) GetTombstoned() bool { + u.lk.Lock() + defer u.lk.Unlock() + return u.Tombstoned +} + +func (u *User) SetUpstreamStatus(v string) { + u.lk.Lock() + defer u.lk.Unlock() + u.UpstreamStatusT = v +} + +func (u *User) GetUpstreamStatus() string { + u.lk.Lock() + defer u.lk.Unlock() + return u.UpstreamStatusT } type addTargetBody struct { @@ -771,6 +817,11 @@ func (bgs *BGS) lookupUserByDid(ctx context.Context, did string) (*User, error) ctx, span := tracer.Start(ctx, "lookupUserByDid") defer span.End() + cu, ok := bgs.userCache.Get(did) + if ok { + return cu, nil + } + var u User if err := bgs.db.Find(&u, "did = ?", did).Error; err != nil { return nil, err @@ -780,6 +831,8 @@ func (bgs *BGS) lookupUserByDid(ctx context.Context, did string) (*User, error) return nil, gorm.ErrRecordNotFound } + bgs.userCache.Add(did, &u) + return &u, nil } @@ -840,20 +893,21 @@ func (bgs *BGS) handleFedEvent(ctx context.Context, host *models.PDS, env *event u.Did = evt.Repo } - span.SetAttributes(attribute.String("upstream_status", u.UpstreamStatus)) + ustatus := u.GetUpstreamStatus() + span.SetAttributes(attribute.String("upstream_status", ustatus)) - if u.TakenDown || u.UpstreamStatus == events.AccountStatusTakendown { - span.SetAttributes(attribute.Bool("taken_down_by_relay_admin", u.TakenDown)) + if u.GetTakenDown() || ustatus == events.AccountStatusTakendown { + span.SetAttributes(attribute.Bool("taken_down_by_relay_admin", u.GetTakenDown())) log.Debugw("dropping commit event from taken down user", "did", evt.Repo, "seq", evt.Seq, "pdsHost", host.Host) return nil } - if u.UpstreamStatus == events.AccountStatusSuspended { + if ustatus == events.AccountStatusSuspended { log.Debugw("dropping commit event from suspended user", "did", evt.Repo, "seq", evt.Seq, "pdsHost", host.Host) return nil } - if u.UpstreamStatus == events.AccountStatusDeactivated { + if ustatus == events.AccountStatusDeactivated { log.Debugw("dropping commit event from deactivated user", "did", evt.Repo, "seq", evt.Seq, "pdsHost", host.Host) return nil } @@ -877,12 +931,13 @@ func (bgs *BGS) handleFedEvent(ctx context.Context, host *models.PDS, env *event } } - if u.Tombstoned { + if u.GetTombstoned() { span.SetAttributes(attribute.Bool("tombstoned", true)) // we've checked the authority of the users PDS, so reinstate the account if err := bgs.db.Model(&User{}).Where("id = ?", u.ID).UpdateColumn("tombstoned", false).Error; err != nil { return fmt.Errorf("failed to un-tombstone a user: %w", err) } + u.SetTombstoned(false) ai, err := bgs.Index.LookupUser(ctx, u.ID) if err != nil { @@ -1041,7 +1096,7 @@ func (bgs *BGS) handleFedEvent(ctx context.Context, host *models.PDS, env *event return fmt.Errorf("failed to look up user by did: %w", err) } - if u.TakenDown { + if u.GetTakenDown() { shouldBeActive = false status = &events.AccountStatusTakendown } @@ -1370,18 +1425,22 @@ func (bgs *BGS) UpdateAccountStatus(ctx context.Context, did string, status stri if err := bgs.db.Model(User{}).Where("id = ?", u.ID).Update("upstream_status", events.AccountStatusActive).Error; err != nil { return fmt.Errorf("failed to set user active status: %w", err) } + u.SetUpstreamStatus(events.AccountStatusActive) case events.AccountStatusDeactivated: if err := bgs.db.Model(User{}).Where("id = ?", u.ID).Update("upstream_status", events.AccountStatusDeactivated).Error; err != nil { return fmt.Errorf("failed to set user deactivation status: %w", err) } + u.SetUpstreamStatus(events.AccountStatusDeactivated) case events.AccountStatusSuspended: if err := bgs.db.Model(User{}).Where("id = ?", u.ID).Update("upstream_status", events.AccountStatusSuspended).Error; err != nil { return fmt.Errorf("failed to set user suspension status: %w", err) } + u.SetUpstreamStatus(events.AccountStatusSuspended) case events.AccountStatusTakendown: if err := bgs.db.Model(User{}).Where("id = ?", u.ID).Update("upstream_status", events.AccountStatusTakendown).Error; err != nil { return fmt.Errorf("failed to set user taken down status: %w", err) } + u.SetUpstreamStatus(events.AccountStatusTakendown) if err := bgs.db.Model(&models.ActorInfo{}).Where("uid = ?", u.ID).UpdateColumns(map[string]any{ "handle": nil, @@ -1396,6 +1455,7 @@ func (bgs *BGS) UpdateAccountStatus(ctx context.Context, did string, status stri }).Error; err != nil { return err } + u.SetUpstreamStatus(events.AccountStatusDeleted) if err := bgs.db.Model(&models.ActorInfo{}).Where("uid = ?", u.ID).UpdateColumns(map[string]any{ "handle": nil, @@ -1422,6 +1482,7 @@ func (bgs *BGS) TakeDownRepo(ctx context.Context, did string) error { if err := bgs.db.Model(User{}).Where("id = ?", u.ID).Update("taken_down", true).Error; err != nil { return err } + u.SetTakenDown(true) if err := bgs.repoman.TakeDownRepo(ctx, u.ID); err != nil { return err @@ -1443,6 +1504,7 @@ func (bgs *BGS) ReverseTakedown(ctx context.Context, did string) error { if err := bgs.db.Model(User{}).Where("id = ?", u.ID).Update("taken_down", false).Error; err != nil { return err } + u.SetTakenDown(false) return nil } diff --git a/bgs/handlers.go b/bgs/handlers.go index da87c9521..28cf1f0f2 100644 --- a/bgs/handlers.go +++ b/bgs/handlers.go @@ -34,23 +34,24 @@ func (s *BGS) handleComAtprotoSyncGetRecord(ctx context.Context, collection stri return nil, echo.NewHTTPError(http.StatusInternalServerError, "failed to lookup user") } - if u.Tombstoned { + if u.GetTombstoned() { return nil, fmt.Errorf("account was deleted") } - if u.TakenDown { + if u.GetTakenDown() { return nil, fmt.Errorf("account was taken down by the Relay") } - if u.UpstreamStatus == events.AccountStatusTakendown { + ustatus := u.GetUpstreamStatus() + if ustatus == events.AccountStatusTakendown { return nil, fmt.Errorf("account was taken down by its PDS") } - if u.UpstreamStatus == events.AccountStatusDeactivated { + if ustatus == events.AccountStatusDeactivated { return nil, fmt.Errorf("account is temporarily deactivated") } - if u.UpstreamStatus == events.AccountStatusSuspended { + if ustatus == events.AccountStatusSuspended { return nil, fmt.Errorf("account is suspended by its PDS") } @@ -91,23 +92,24 @@ func (s *BGS) handleComAtprotoSyncGetRepo(ctx context.Context, did string, since return nil, echo.NewHTTPError(http.StatusInternalServerError, "failed to lookup user") } - if u.Tombstoned { + if u.GetTombstoned() { return nil, fmt.Errorf("account was deleted") } - if u.TakenDown { + if u.GetTakenDown() { return nil, fmt.Errorf("account was taken down by the Relay") } - if u.UpstreamStatus == events.AccountStatusTakendown { + ustatus := u.GetUpstreamStatus() + if ustatus == events.AccountStatusTakendown { return nil, fmt.Errorf("account was taken down by its PDS") } - if u.UpstreamStatus == events.AccountStatusDeactivated { + if ustatus == events.AccountStatusDeactivated { return nil, fmt.Errorf("account is temporarily deactivated") } - if u.UpstreamStatus == events.AccountStatusSuspended { + if ustatus == events.AccountStatusSuspended { return nil, fmt.Errorf("account is suspended by its PDS") } @@ -253,23 +255,24 @@ func (s *BGS) handleComAtprotoSyncGetLatestCommit(ctx context.Context, did strin return nil, echo.NewHTTPError(http.StatusInternalServerError, "failed to lookup user") } - if u.Tombstoned { + if u.GetTombstoned() { return nil, fmt.Errorf("account was deleted") } - if u.TakenDown { + if u.GetTakenDown() { return nil, fmt.Errorf("account was taken down by the Relay") } - if u.UpstreamStatus == events.AccountStatusTakendown { + ustatus := u.GetUpstreamStatus() + if ustatus == events.AccountStatusTakendown { return nil, fmt.Errorf("account was taken down by its PDS") } - if u.UpstreamStatus == events.AccountStatusDeactivated { + if ustatus == events.AccountStatusDeactivated { return nil, fmt.Errorf("account is temporarily deactivated") } - if u.UpstreamStatus == events.AccountStatusSuspended { + if ustatus == events.AccountStatusSuspended { return nil, fmt.Errorf("account is suspended by its PDS") } From 70575776b378329dd386ecb171c804e42b979afb Mon Sep 17 00:00:00 2001 From: whyrusleeping Date: Sat, 16 Nov 2024 13:17:30 -0800 Subject: [PATCH 22/31] fixup refactor --- bgs/bgs.go | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/bgs/bgs.go b/bgs/bgs.go index 8e584cb5f..bf1eae6f5 100644 --- a/bgs/bgs.go +++ b/bgs/bgs.go @@ -528,7 +528,7 @@ type User struct { Tombstoned bool // UpstreamStatus is the state of the user as reported by the upstream PDS - UpstreamStatusT string `gorm:"index"` + UpstreamStatus string `gorm:"index"` lk sync.Mutex } @@ -560,13 +560,13 @@ func (u *User) GetTombstoned() bool { func (u *User) SetUpstreamStatus(v string) { u.lk.Lock() defer u.lk.Unlock() - u.UpstreamStatusT = v + u.UpstreamStatus = v } func (u *User) GetUpstreamStatus() string { u.lk.Lock() defer u.lk.Unlock() - return u.UpstreamStatusT + return u.UpstreamStatus } type addTargetBody struct { From 4a15b387b31fa230cdf74f91db9c8de80475d606 Mon Sep 17 00:00:00 2001 From: whyrusleeping Date: Sat, 16 Nov 2024 13:25:05 -0800 Subject: [PATCH 23/31] fix lint --- bgs/compactor.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bgs/compactor.go b/bgs/compactor.go index bca4d8e02..dd4ec4211 100644 --- a/bgs/compactor.go +++ b/bgs/compactor.go @@ -349,7 +349,7 @@ func (c *Compactor) compactNext(ctx context.Context, bgs *BGS, strategy NextStra return state, nil } -func (c *Compactor) EnqueueRepo(ctx context.Context, user User, fast bool) { +func (c *Compactor) EnqueueRepo(ctx context.Context, user *User, fast bool) { ctx, span := otel.Tracer("compactor").Start(ctx, "EnqueueRepo") defer span.End() log.Infow("enqueueing compaction for repo", "repo", user.Did, "uid", user.ID, "fast", fast) From 75d29f8b81e7d8ef264ec8bbb66a9d1bef3719df Mon Sep 17 00:00:00 2001 From: Brian Olson Date: Sun, 17 Nov 2024 17:06:22 -0500 Subject: [PATCH 24/31] add compaction to gc --- events/pebblepersist.go | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/events/pebblepersist.go b/events/pebblepersist.go index f483b4bb4..0dc8a0001 100644 --- a/events/pebblepersist.go +++ b/events/pebblepersist.go @@ -243,5 +243,12 @@ func (pp *PebblePersist) GarbageCollect(ctx context.Context) error { } sizeAfter, _ := pp.db.EstimateDiskUsage(zeroKey[:], ffffKey[:]) log.Infow("pebble gc", "before", sizeBefore, "after", sizeAfter) + start := time.Now() + err = pp.db.Compact(zeroKey[:], key[:], true) + if err != nil { + log.Warnw("pebble gc compact", "err", err) + } + dt := time.Since(start) + log.Infow("pebble gc compact ok", "dt", dt) return nil } From 30a2725499865d2c0d58927dd40de1ca2dec527e Mon Sep 17 00:00:00 2001 From: Jaz Volpert Date: Sun, 17 Nov 2024 22:22:34 +0000 Subject: [PATCH 25/31] Add more metrics to the relay --- bgs/bgs.go | 5 +++++ bgs/metrics.go | 12 ++++++++++++ did/metrics.go | 6 ++++++ did/multi.go | 6 ++++++ repomgr/metrics.go | 18 ++++++++++++++++++ repomgr/repomgr.go | 8 +++++++- 6 files changed, 54 insertions(+), 1 deletion(-) diff --git a/bgs/bgs.go b/bgs/bgs.go index bf1eae6f5..b64df715e 100644 --- a/bgs/bgs.go +++ b/bgs/bgs.go @@ -876,14 +876,19 @@ func (bgs *BGS) handleFedEvent(ctx context.Context, host *models.PDS, env *event repoCommitsReceivedCounter.WithLabelValues(host.Host).Add(1) evt := env.RepoCommit log.Debugw("bgs got repo append event", "seq", evt.Seq, "pdsHost", host.Host, "repo", evt.Repo) + + s := time.Now() u, err := bgs.lookupUserByDid(ctx, evt.Repo) + userLookupDuration.Observe(time.Since(s).Seconds()) if err != nil { if !errors.Is(err, gorm.ErrRecordNotFound) { return fmt.Errorf("looking up event user: %w", err) } newUsersDiscovered.Inc() + start := time.Now() subj, err := bgs.createExternalUser(ctx, evt.Repo) + newUserDiscoveryDuration.Observe(time.Since(start).Seconds()) if err != nil { return fmt.Errorf("fed event create external user: %w", err) } diff --git a/bgs/metrics.go b/bgs/metrics.go index b33677e6e..edd687596 100644 --- a/bgs/metrics.go +++ b/bgs/metrics.go @@ -81,6 +81,18 @@ var resSz = promauto.NewHistogramVec(prometheus.HistogramOpts{ Buckets: prometheus.ExponentialBuckets(100, 10, 8), }, []string{"code", "method", "path"}) +var userLookupDuration = promauto.NewHistogram(prometheus.HistogramOpts{ + Name: "relay_user_lookup_duration", + Help: "A histogram of user lookup latencies", + Buckets: prometheus.ExponentialBuckets(0.001, 2, 15), +}) + +var newUserDiscoveryDuration = promauto.NewHistogram(prometheus.HistogramOpts{ + Name: "relay_new_user_discovery_duration", + Help: "A histogram of new user discovery latencies", + Buckets: prometheus.ExponentialBuckets(0.001, 2, 15), +}) + // MetricsMiddleware defines handler function for metrics middleware func MetricsMiddleware(next echo.HandlerFunc) echo.HandlerFunc { return func(c echo.Context) error { diff --git a/did/metrics.go b/did/metrics.go index e7bea3b56..67f14e434 100644 --- a/did/metrics.go +++ b/did/metrics.go @@ -9,3 +9,9 @@ var mrResolvedDidsTotal = promauto.NewCounterVec(prometheus.CounterOpts{ Name: "multiresolver_resolved_dids_total", Help: "Total number of DIDs resolved", }, []string{"resolver"}) + +var mrResolveDuration = promauto.NewHistogramVec(prometheus.HistogramOpts{ + Name: "indigo_multiresolver_resolve_duration", + Help: "A histogram of resolve latencies", + Buckets: prometheus.ExponentialBuckets(0.001, 2, 15), +}, []string{"resolver"}) diff --git a/did/multi.go b/did/multi.go index e871ff454..112394bee 100644 --- a/did/multi.go +++ b/did/multi.go @@ -3,6 +3,7 @@ package did import ( "context" "fmt" + "time" "github.com/whyrusleeping/go-did" ) @@ -43,6 +44,11 @@ func (mr *MultiResolver) FlushCacheFor(didstr string) { } func (mr *MultiResolver) GetDocument(ctx context.Context, didstr string) (*did.Document, error) { + s := time.Now() + defer func() { + mrResolveDuration.WithLabelValues(didstr).Observe(time.Since(s).Seconds()) + }() + pdid, err := did.ParseDID(didstr) if err != nil { return nil, err diff --git a/repomgr/metrics.go b/repomgr/metrics.go index a92e3091d..df3e4bea1 100644 --- a/repomgr/metrics.go +++ b/repomgr/metrics.go @@ -9,3 +9,21 @@ var repoOpsImported = promauto.NewCounter(prometheus.CounterOpts{ Name: "repomgr_repo_ops_imported", Help: "Number of repo ops imported", }) + +var openAndSigCheckDuration = promauto.NewHistogram(prometheus.HistogramOpts{ + Name: "repomgr_open_and_sig_check_duration", + Help: "Duration of opening and signature check", + Buckets: prometheus.ExponentialBuckets(0.001, 2, 15), +}) + +var calcDiffDuration = promauto.NewHistogram(prometheus.HistogramOpts{ + Name: "repomgr_calc_diff_duration", + Help: "Duration of calculating diff", + Buckets: prometheus.ExponentialBuckets(0.001, 2, 15), +}) + +var writeCarSliceDuration = promauto.NewHistogram(prometheus.HistogramOpts{ + Name: "repomgr_write_car_slice_duration", + Help: "Duration of writing car slice", + Buckets: prometheus.ExponentialBuckets(0.001, 2, 15), +}) diff --git a/repomgr/repomgr.go b/repomgr/repomgr.go index ad90a43b2..bdf20d784 100644 --- a/repomgr/repomgr.go +++ b/repomgr/repomgr.go @@ -8,6 +8,7 @@ import ( "io" "strings" "sync" + "time" atproto "github.com/bluesky-social/indigo/api/atproto" bsky "github.com/bluesky-social/indigo/api/bsky" @@ -538,6 +539,7 @@ func (rm *RepoManager) HandleExternalUserEvent(ctx context.Context, pdsid uint, unlock := rm.lockUser(ctx, uid) defer unlock() + start := time.Now() root, ds, err := rm.cs.ImportSlice(ctx, uid, since, carslice) if err != nil { return fmt.Errorf("importing external carslice: %w", err) @@ -551,6 +553,7 @@ func (rm *RepoManager) HandleExternalUserEvent(ctx context.Context, pdsid uint, if err := rm.CheckRepoSig(ctx, r, did); err != nil { return err } + openAndSigCheckDuration.Observe(time.Since(start).Seconds()) var skipcids map[cid.Cid]bool if ds.BaseCid().Defined() { @@ -571,10 +574,11 @@ func (rm *RepoManager) HandleExternalUserEvent(ctx context.Context, pdsid uint, } } + start = time.Now() if err := ds.CalcDiff(ctx, skipcids); err != nil { return fmt.Errorf("failed while calculating mst diff (since=%v): %w", since, err) - } + calcDiffDuration.Observe(time.Since(start).Seconds()) evtops := make([]RepoOp, 0, len(ops)) @@ -631,10 +635,12 @@ func (rm *RepoManager) HandleExternalUserEvent(ctx context.Context, pdsid uint, } } + start = time.Now() rslice, err := ds.CloseWithRoot(ctx, root, nrev) if err != nil { return fmt.Errorf("close with root: %w", err) } + writeCarSliceDuration.Observe(time.Since(start).Seconds()) if rm.events != nil { rm.events(ctx, &RepoEvent{ From bd727d1672de6dbde1a95ea8b97c24967220b1df Mon Sep 17 00:00:00 2001 From: Brian Olson Date: Sun, 17 Nov 2024 17:25:45 -0500 Subject: [PATCH 26/31] dockerize --- .github/workflows/container-rainbow-aws.yaml | 52 ++++++++++++++++++++ cmd/rainbow/Dockerfile | 43 ++++++++++++++++ 2 files changed, 95 insertions(+) create mode 100644 .github/workflows/container-rainbow-aws.yaml create mode 100644 cmd/rainbow/Dockerfile diff --git a/.github/workflows/container-rainbow-aws.yaml b/.github/workflows/container-rainbow-aws.yaml new file mode 100644 index 000000000..412be454a --- /dev/null +++ b/.github/workflows/container-rainbow-aws.yaml @@ -0,0 +1,52 @@ +name: container-rainbow-aws +on: [push] +env: + REGISTRY: ${{ secrets.AWS_ECR_REGISTRY_USEAST2_PACKAGES_REGISTRY }} + USERNAME: ${{ secrets.AWS_ECR_REGISTRY_USEAST2_PACKAGES_USERNAME }} + PASSWORD: ${{ secrets.AWS_ECR_REGISTRY_USEAST2_PACKAGES_PASSWORD }} + # github.repository as / + IMAGE_NAME: rainbow + +jobs: + container-rainbow-aws: + if: github.repository == 'bluesky-social/indigo' + runs-on: ubuntu-latest + permissions: + contents: read + packages: write + id-token: write + + steps: + - name: Checkout repository + uses: actions/checkout@v3 + + - name: Setup Docker buildx + uses: docker/setup-buildx-action@v1 + + - name: Log into registry ${{ env.REGISTRY }} + uses: docker/login-action@v2 + with: + registry: ${{ env.REGISTRY }} + username: ${{ env.USERNAME }} + password: ${{ env.PASSWORD }} + + - name: Extract Docker metadata + id: meta + uses: docker/metadata-action@v4 + with: + images: | + ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }} + tags: | + type=sha,enable=true,priority=100,prefix=,suffix=,format=long + + - name: Build and push Docker image + id: build-and-push + uses: docker/build-push-action@v4 + with: + context: . + file: ./cmd/rainbow/Dockerfile + push: ${{ github.event_name != 'pull_request' }} + tags: ${{ steps.meta.outputs.tags }} + labels: ${{ steps.meta.outputs.labels }} + cache-from: type=gha + cache-to: type=gha,mode=max diff --git a/cmd/rainbow/Dockerfile b/cmd/rainbow/Dockerfile new file mode 100644 index 000000000..72bfc3572 --- /dev/null +++ b/cmd/rainbow/Dockerfile @@ -0,0 +1,43 @@ +FROM golang:1.22-bullseye AS build-env + +ENV DEBIAN_FRONTEND=noninteractive +ENV TZ=Etc/UTC +ENV GODEBUG="netdns=go" +ENV GOOS="linux" +ENV GOARCH="amd64" +ENV CGO_ENABLED="1" + +WORKDIR /usr/src/rainbow + +COPY . . + +RUN go mod download && \ + go mod verify + +RUN go build \ + -v \ + -trimpath \ + -tags timetzdata \ + -o /rainbow-bin \ + ./cmd/rainbow + +FROM debian:bullseye-slim + +ENV DEBIAN_FRONTEND="noninteractive" +ENV TZ=Etc/UTC +ENV GODEBUG="netdns=go" + +RUN apt-get update && apt-get install --yes \ + dumb-init \ + ca-certificates \ + runit + +WORKDIR /rainbow +COPY --from=build-env /rainbow-bin /usr/bin/rainbow + +ENTRYPOINT ["/usr/bin/dumb-init", "--"] +CMD ["/usr/bin/rainbow"] + +LABEL org.opencontainers.image.source=https://github.com/bluesky-social/indigo +LABEL org.opencontainers.image.description="bsky.app rainbow" +LABEL org.opencontainers.image.licenses=MIT From 15004ab6dffb9acf206d2704db367fc128366c83 Mon Sep 17 00:00:00 2001 From: Jaz Volpert Date: Sun, 17 Nov 2024 22:32:31 +0000 Subject: [PATCH 27/31] whoops --- did/multi.go | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/did/multi.go b/did/multi.go index 112394bee..2c67781fe 100644 --- a/did/multi.go +++ b/did/multi.go @@ -45,9 +45,6 @@ func (mr *MultiResolver) FlushCacheFor(didstr string) { func (mr *MultiResolver) GetDocument(ctx context.Context, didstr string) (*did.Document, error) { s := time.Now() - defer func() { - mrResolveDuration.WithLabelValues(didstr).Observe(time.Since(s).Seconds()) - }() pdid, err := did.ParseDID(didstr) if err != nil { @@ -55,6 +52,9 @@ func (mr *MultiResolver) GetDocument(ctx context.Context, didstr string) (*did.D } method := pdid.Protocol() + defer func() { + mrResolveDuration.WithLabelValues(method).Observe(time.Since(s).Seconds()) + }() res, ok := mr.handlers[method] if !ok { From 3a4136fbc994f7e7311cd4ca367c37bbbe382ed9 Mon Sep 17 00:00:00 2001 From: Jaz Volpert Date: Sun, 17 Nov 2024 22:35:06 +0000 Subject: [PATCH 28/31] Rename series --- did/metrics.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/did/metrics.go b/did/metrics.go index 67f14e434..d3c8f0ecd 100644 --- a/did/metrics.go +++ b/did/metrics.go @@ -11,7 +11,7 @@ var mrResolvedDidsTotal = promauto.NewCounterVec(prometheus.CounterOpts{ }, []string{"resolver"}) var mrResolveDuration = promauto.NewHistogramVec(prometheus.HistogramOpts{ - Name: "indigo_multiresolver_resolve_duration", + Name: "indigo_multiresolver_resolve_duration_seconds", Help: "A histogram of resolve latencies", Buckets: prometheus.ExponentialBuckets(0.001, 2, 15), }, []string{"resolver"}) From 1eeb49688e356fc29b42da6174593129ab39c677 Mon Sep 17 00:00:00 2001 From: Jaz Volpert Date: Sun, 17 Nov 2024 22:55:19 +0000 Subject: [PATCH 29/31] Track disk vs meta write durations --- carstore/bs.go | 4 ++++ carstore/metrics.go | 18 ++++++++++++++++++ 2 files changed, 22 insertions(+) create mode 100644 carstore/metrics.go diff --git a/carstore/bs.go b/carstore/bs.go index dac62e5d8..2cf0a5093 100644 --- a/carstore/bs.go +++ b/carstore/bs.go @@ -645,10 +645,12 @@ func (cs *FileCarStore) writeNewShard(ctx context.Context, root cid.Cid, rev str offset += nw } + start := time.Now() path, err := cs.writeNewShardFile(ctx, user, seq, buf.Bytes()) if err != nil { return nil, fmt.Errorf("failed to write shard file: %w", err) } + writeShardFileDuration.Observe(time.Since(start).Seconds()) shard := CarShard{ Root: models.DbCID{CID: root}, @@ -659,9 +661,11 @@ func (cs *FileCarStore) writeNewShard(ctx context.Context, root cid.Cid, rev str Rev: rev, } + start = time.Now() if err := cs.putShard(ctx, &shard, brefs, rmcids, false); err != nil { return nil, err } + writeShardMetadataDuration.Observe(time.Since(start).Seconds()) return buf.Bytes(), nil } diff --git a/carstore/metrics.go b/carstore/metrics.go new file mode 100644 index 000000000..0d2a0794a --- /dev/null +++ b/carstore/metrics.go @@ -0,0 +1,18 @@ +package carstore + +import ( + "github.com/prometheus/client_golang/prometheus" + "github.com/prometheus/client_golang/prometheus/promauto" +) + +var writeShardFileDuration = promauto.NewHistogram(prometheus.HistogramOpts{ + Name: "carstore_write_shard_file_duration", + Help: "Duration of writing shard file to disk", + Buckets: prometheus.ExponentialBuckets(0.001, 2, 15), +}) + +var writeShardMetadataDuration = promauto.NewHistogram(prometheus.HistogramOpts{ + Name: "carstore_write_shard_metadata_duration", + Help: "Duration of writing shard metadata to DB", + Buckets: prometheus.ExponentialBuckets(0.001, 2, 15), +}) From bf3ffd54899473c4d9d6090d7e78451671aec004 Mon Sep 17 00:00:00 2001 From: Brian Olson Date: Sun, 17 Nov 2024 18:59:16 -0500 Subject: [PATCH 30/31] reorg config, make --cursor-file --- cmd/rainbow/main.go | 35 +++++++++++----- events/pebblepersist.go | 18 +++++---- events/pebblepersist_test.go | 4 +- splitter/splitter.go | 77 +++++++++++++++++++++++++++--------- 4 files changed, 98 insertions(+), 36 deletions(-) diff --git a/cmd/rainbow/main.go b/cmd/rainbow/main.go index 6f556ad01..47a7fe5bd 100644 --- a/cmd/rainbow/main.go +++ b/cmd/rainbow/main.go @@ -2,6 +2,7 @@ package main import ( "context" + "github.com/bluesky-social/indigo/events" "os" "os/signal" "syscall" @@ -57,6 +58,11 @@ func run(args []string) { Value: "", Usage: "path to persistence db", }, + &cli.StringFlag{ + Name: "cursor-file", + Value: "", + Usage: "write upstream cursor number to this file", + }, &cli.StringFlag{ Name: "api-listen", Value: ":2480", @@ -133,18 +139,29 @@ func Splitter(cctx *cli.Context) error { var err error if persistPath != "" { log.Infof("building splitter with storage at: %s", persistPath) - spl, err = splitter.NewDiskSplitter( - upstreamHost, - persistPath, - cctx.Float64("persist-hours"), - cctx.Int64("persist-bytes")) - if err != nil { - log.Fatalw("failed to create splitter", "path", persistPath, "error", err) - return err + ppopts := events.PebblePersistOptions{ + DbPath: persistPath, + PersistDuration: time.Duration(float64(time.Hour) * cctx.Float64("persist-hours")), + GCPeriod: 5 * time.Minute, + MaxBytes: uint64(cctx.Int64("persist-bytes")), } + conf := splitter.SplitterConfig{ + UpstreamHost: upstreamHost, + CursorFile: cctx.String("cursor-file"), + PebbleOptions: &ppopts, + } + spl, err = splitter.NewSplitter(conf) } else { log.Info("building in-memory splitter") - spl = splitter.NewMemSplitter(upstreamHost) + conf := splitter.SplitterConfig{ + UpstreamHost: upstreamHost, + CursorFile: cctx.String("cursor-file"), + } + spl, err = splitter.NewSplitter(conf) + } + if err != nil { + log.Fatalw("failed to create splitter", "path", persistPath, "error", err) + return err } // set up metrics endpoint diff --git a/events/pebblepersist.go b/events/pebblepersist.go index 0dc8a0001..164208c4c 100644 --- a/events/pebblepersist.go +++ b/events/pebblepersist.go @@ -25,6 +25,9 @@ type PebblePersist struct { } type PebblePersistOptions struct { + // path where pebble will create a directory full of files + DbPath string + // Throw away posts older than some time ago PersistDuration time.Duration @@ -43,18 +46,17 @@ var DefaultPebblePersistOptions = PebblePersistOptions{ // Create a new EventPersistence which stores data in pebbledb // nil opts is ok -func NewPebblePersistance(path string, opts *PebblePersistOptions) (*PebblePersist, error) { - db, err := pebble.Open(path, &pebble.Options{}) +func NewPebblePersistance(opts *PebblePersistOptions) (*PebblePersist, error) { + if opts == nil { + opts = &DefaultPebblePersistOptions + } + db, err := pebble.Open(opts.DbPath, &pebble.Options{}) if err != nil { - return nil, fmt.Errorf("%s: %w", path, err) + return nil, fmt.Errorf("%s: %w", opts.DbPath, err) } pp := new(PebblePersist) + pp.options = *opts pp.db = db - if opts == nil { - pp.options = DefaultPebblePersistOptions - } else { - pp.options = *opts - } return pp, nil } diff --git a/events/pebblepersist_test.go b/events/pebblepersist_test.go index 6495ce24f..901365c5d 100644 --- a/events/pebblepersist_test.go +++ b/events/pebblepersist_test.go @@ -8,7 +8,9 @@ import ( func TestPebblePersist(t *testing.T) { factory := func(tempPath string, db *gorm.DB) (EventPersistence, error) { - return NewPebblePersistance(filepath.Join(tempPath, "pebble.db"), nil) + opts := DefaultPebblePersistOptions + opts.DbPath = filepath.Join(tempPath, "pebble.db") + return NewPebblePersistance(&opts) } testPersister(t, factory) } diff --git a/splitter/splitter.go b/splitter/splitter.go index ccbccbbc5..3cad7f793 100644 --- a/splitter/splitter.go +++ b/splitter/splitter.go @@ -28,39 +28,81 @@ import ( var log = logging.Logger("splitter") type Splitter struct { - Host string erb *EventRingBuffer pp *events.PebblePersist events *events.EventManager - // cursor storage - cursorFile string - // Management of Socket Consumers consumersLk sync.RWMutex nextConsumerID uint64 consumers map[uint64]*SocketConsumer + + conf SplitterConfig +} + +type SplitterConfig struct { + UpstreamHost string + CursorFile string + PebbleOptions *events.PebblePersistOptions } func NewMemSplitter(host string) *Splitter { + conf := SplitterConfig{ + UpstreamHost: host, + CursorFile: "cursor-file", + } + erb := NewEventRingBuffer(20_000, 10_000) em := events.NewEventManager(erb) return &Splitter{ - cursorFile: "cursor-file", - Host: host, - erb: erb, - events: em, - consumers: make(map[uint64]*SocketConsumer), + conf: conf, + erb: erb, + events: em, + consumers: make(map[uint64]*SocketConsumer), + } +} +func NewSplitter(conf SplitterConfig) (*Splitter, error) { + if conf.PebbleOptions == nil { + // mem splitter + erb := NewEventRingBuffer(20_000, 10_000) + + em := events.NewEventManager(erb) + return &Splitter{ + conf: conf, + erb: erb, + events: em, + consumers: make(map[uint64]*SocketConsumer), + }, nil + } else { + pp, err := events.NewPebblePersistance(conf.PebbleOptions) + if err != nil { + return nil, err + } + + go pp.GCThread(context.Background()) + em := events.NewEventManager(pp) + return &Splitter{ + conf: conf, + pp: pp, + events: em, + consumers: make(map[uint64]*SocketConsumer), + }, nil } } func NewDiskSplitter(host, path string, persistHours float64, maxBytes int64) (*Splitter, error) { ppopts := events.PebblePersistOptions{ + DbPath: path, PersistDuration: time.Duration(float64(time.Hour) * persistHours), GCPeriod: 5 * time.Minute, MaxBytes: uint64(maxBytes), } - pp, err := events.NewPebblePersistance(path, &ppopts) + conf := SplitterConfig{ + UpstreamHost: host, + CursorFile: "cursor-file", + PebbleOptions: &ppopts, + } + pp, err := events.NewPebblePersistance(&ppopts) if err != nil { return nil, err } @@ -68,11 +110,10 @@ func NewDiskSplitter(host, path string, persistHours float64, maxBytes int64) (* go pp.GCThread(context.Background()) em := events.NewEventManager(pp) return &Splitter{ - cursorFile: "cursor-file", - Host: host, - pp: pp, - events: em, - consumers: make(map[uint64]*SocketConsumer), + conf: conf, + pp: pp, + events: em, + consumers: make(map[uint64]*SocketConsumer), }, nil } @@ -86,7 +127,7 @@ func (s *Splitter) Start(addr string) error { return fmt.Errorf("loading cursor failed: %w", err) } - go s.subscribeWithRedialer(context.Background(), s.Host, curs) + go s.subscribeWithRedialer(context.Background(), s.conf.UpstreamHost, curs) li, err := lc.Listen(ctx, "tcp", addr) if err != nil { @@ -442,7 +483,7 @@ func (s *Splitter) getLastCursor() (int64, error) { } } - fi, err := os.Open(s.cursorFile) + fi, err := os.Open(s.conf.CursorFile) if err != nil { if os.IsNotExist(err) { return 0, nil @@ -464,5 +505,5 @@ func (s *Splitter) getLastCursor() (int64, error) { } func (s *Splitter) writeCursor(curs int64) error { - return os.WriteFile(s.cursorFile, []byte(fmt.Sprint(curs)), 0664) + return os.WriteFile(s.conf.CursorFile, []byte(fmt.Sprint(curs)), 0664) } From 93497b7eba15354a1f0a575a3bec6ea95ac0c7b8 Mon Sep 17 00:00:00 2001 From: Brian Olson Date: Sun, 17 Nov 2024 20:57:45 -0500 Subject: [PATCH 31/31] no-cursor subscribe --- cmd/rainbow/main.go | 4 ++-- events/pebblepersist.go | 8 +++++++- splitter/splitter.go | 18 +++++++++++++----- 3 files changed, 22 insertions(+), 8 deletions(-) diff --git a/cmd/rainbow/main.go b/cmd/rainbow/main.go index 47a7fe5bd..398f56387 100644 --- a/cmd/rainbow/main.go +++ b/cmd/rainbow/main.go @@ -80,8 +80,8 @@ func run(args []string) { }, &cli.Int64Flag{ Name: "persist-bytes", - Value: 1_000_000_000, - Usage: "max bytes target for event cache", + Value: 0, + Usage: "max bytes target for event cache, 0 to disable size target trimming", EnvVars: []string{"SPLITTER_PERSIST_BYTES"}, }, } diff --git a/events/pebblepersist.go b/events/pebblepersist.go index 164208c4c..2c1c787e5 100644 --- a/events/pebblepersist.go +++ b/events/pebblepersist.go @@ -5,6 +5,7 @@ import ( "context" "encoding/binary" "encoding/hex" + "errors" "fmt" "time" @@ -159,6 +160,8 @@ func (pp *PebblePersist) SetEventBroadcaster(broadcast func(*XRPCStreamEvent)) { pp.broadcast = broadcast } +var ErrNoLast = errors.New("no last event") + func (pp *PebblePersist) GetLast(ctx context.Context) (seq, millis int64, evt *XRPCStreamEvent, err error) { iter, err := pp.db.NewIterWithContext(ctx, &pebble.IterOptions{}) if err != nil { @@ -166,7 +169,7 @@ func (pp *PebblePersist) GetLast(ctx context.Context) (seq, millis int64, evt *X } ok := iter.Last() if !ok { - return 0, 0, nil, nil + return 0, 0, nil, ErrNoLast } evt, err = eventFromPebbleIter(iter) keyblob := iter.Key() @@ -230,6 +233,9 @@ func (pp *PebblePersist) GarbageCollect(ctx context.Context) error { break } } + + // TODO: use pp.options.MaxBytes + sizeBefore, _ := pp.db.EstimateDiskUsage(zeroKey[:], ffffKey[:]) if seq == -1 { // nothing to delete diff --git a/splitter/splitter.go b/splitter/splitter.go index 3cad7f793..e167b7757 100644 --- a/splitter/splitter.go +++ b/splitter/splitter.go @@ -2,6 +2,7 @@ package splitter import ( "context" + "errors" "fmt" "io" "math/rand" @@ -425,7 +426,12 @@ func (s *Splitter) subscribeWithRedialer(ctx context.Context, host string, curso "User-Agent": []string{"bgs-rainbow-v0"}, } - url := fmt.Sprintf("%s://%s/xrpc/com.atproto.sync.subscribeRepos?cursor=%d", protocol, host, cursor) + var url string + if cursor < 0 { + url = fmt.Sprintf("%s://%s/xrpc/com.atproto.sync.subscribeRepos", protocol, host) + } else { + url = fmt.Sprintf("%s://%s/xrpc/com.atproto.sync.subscribeRepos?cursor=%d", protocol, host, cursor) + } con, res, err := d.DialContext(ctx, url, header) if err != nil { log.Warnw("dialing failed", "host", host, "err", err, "backoff", backoff) @@ -478,6 +484,8 @@ func (s *Splitter) getLastCursor() (int64, error) { if err == nil { log.Debugw("got last cursor from pebble", "seq", seq, "millis", millis) return seq, nil + } else if errors.Is(err, events.ErrNoLast) { + log.Info("pebble no last") } else { log.Errorw("pebble seq fail", "err", err) } @@ -486,19 +494,19 @@ func (s *Splitter) getLastCursor() (int64, error) { fi, err := os.Open(s.conf.CursorFile) if err != nil { if os.IsNotExist(err) { - return 0, nil + return -1, nil } - return 0, err + return -1, err } b, err := io.ReadAll(fi) if err != nil { - return 0, err + return -1, err } v, err := strconv.ParseInt(string(b), 10, 64) if err != nil { - return 0, err + return -1, err } return v, nil