Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat(storage): AWS backend using thanos.io/objstore #11221

Merged
merged 26 commits into from
Oct 28, 2024
Merged
Show file tree
Hide file tree
Changes from 25 commits
Commits
Show all changes
26 commits
Select commit Hold shift + click to select a range
43bba36
Groundwork for azure implementation
JoaoBraveCoding Nov 24, 2023
b93fbcd
Fixed configuration for bucket/azure
JoaoBraveCoding Nov 24, 2023
a259abd
Added support for BlobStorage using thanos/objstore
JoaoBraveCoding Nov 24, 2023
42f37ba
Azure CLI review
JoaoBraveCoding Nov 27, 2023
40aa2f9
Fixes from testing
JoaoBraveCoding Dec 7, 2023
e394042
Merge branch 'main' into log-4550-azure
JoaoBraveCoding Mar 14, 2024
02218a4
Merge branch 'main' into log-4550-azure
JoaoBraveCoding Mar 18, 2024
6db3b13
Merge branch 'main' into log-4550-azure
JoaoBraveCoding Apr 22, 2024
4399687
Merge branch 'main' into log-4550-azure
JoaoBraveCoding Jul 1, 2024
1386834
Merge branch 'main' into log-4550-azure
ashwanthgoli Oct 21, 2024
679c2a8
clean-up deleted files from main
ashwanthgoli Oct 21, 2024
06f2280
config parity
ashwanthgoli Oct 21, 2024
8d92fbd
add missing methods to azure thanos adapter
ashwanthgoli Oct 21, 2024
76c41fe
use objectclient adapter
ashwanthgoli Oct 22, 2024
0557d69
lint
ashwanthgoli Oct 22, 2024
a57c140
make format
ashwanthgoli Oct 22, 2024
a7da0f4
remove gcs comment
ashwanthgoli Oct 22, 2024
4337f02
s3: add support for thanos client
ashwanthgoli Oct 22, 2024
b032654
make format
ashwanthgoli Oct 22, 2024
f99e5e7
Update pkg/storage/chunk/client/aws/s3_storage_client.go
ashwanthgoli Oct 24, 2024
c9b0c10
review suggestions
ashwanthgoli Oct 24, 2024
65a0aeb
fixup! review suggestions
ashwanthgoli Oct 25, 2024
5c52572
Merge branch 'main' into log-4550-aws
ashwanthgoli Oct 25, 2024
3cd8596
use retryfunc option
ashwanthgoli Oct 25, 2024
cf4efc4
fix config_test
ashwanthgoli Oct 25, 2024
4752c12
remove signature version. not configurable anymore, defaults to v4
ashwanthgoli Oct 25, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 3 additions & 4 deletions pkg/storage/bucket/client.go
Original file line number Diff line number Diff line change
Expand Up @@ -84,10 +84,9 @@ func (cfg *StorageBackendConfig) RegisterFlagsWithPrefix(prefix string, f *flag.
}

func (cfg *StorageBackendConfig) Validate() error {
// TODO: enable validation when s3 flags are registered
// if err := cfg.S3.Validate(); err != nil {
// return err
//}
if err := cfg.S3.Validate(); err != nil {
return err
}

return nil
}
Expand Down
42 changes: 32 additions & 10 deletions pkg/storage/bucket/s3/bucket_client.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ import (
"github.com/go-kit/log"
"github.com/prometheus/common/model"
"github.com/thanos-io/objstore"
"github.com/thanos-io/objstore/exthttp"
"github.com/thanos-io/objstore/providers/s3"
)

Expand Down Expand Up @@ -38,17 +39,28 @@ func newS3Config(cfg Config) (s3.Config, error) {
return s3.Config{}, err
}

putUserMetadata := map[string]string{}

if cfg.StorageClass != "" {
putUserMetadata[awsStorageClassHeader] = cfg.StorageClass
}

return s3.Config{
Bucket: cfg.BucketName,
Endpoint: cfg.Endpoint,
Region: cfg.Region,
AccessKey: cfg.AccessKeyID,
SecretKey: cfg.SecretAccessKey.String(),
SessionToken: cfg.SessionToken.String(),
Insecure: cfg.Insecure,
DisableDualstack: cfg.DisableDualstack,
SSEConfig: sseCfg,
PutUserMetadata: map[string]string{awsStorageClassHeader: cfg.StorageClass},
Bucket: cfg.BucketName,
Endpoint: cfg.Endpoint,
Region: cfg.Region,
AccessKey: cfg.AccessKeyID,
SecretKey: cfg.SecretAccessKey.String(),
SessionToken: cfg.SessionToken.String(),
Insecure: cfg.Insecure,
PutUserMetadata: putUserMetadata,
SendContentMd5: cfg.SendContentMd5,
SSEConfig: sseCfg,
DisableDualstack: !cfg.DualstackEnabled,
ListObjectsVersion: cfg.ListObjectsVersion,
BucketLookupType: cfg.BucketLookupType,
AWSSDKAuth: cfg.NativeAWSAuthEnabled,
PartSize: cfg.PartSize,
HTTPConfig: s3.HTTPConfig{
IdleConnTimeout: model.Duration(cfg.HTTP.IdleConnTimeout),
ResponseHeaderTimeout: model.Duration(cfg.HTTP.ResponseHeaderTimeout),
Expand All @@ -59,6 +71,16 @@ func newS3Config(cfg Config) (s3.Config, error) {
MaxIdleConnsPerHost: cfg.HTTP.MaxIdleConnsPerHost,
MaxConnsPerHost: cfg.HTTP.MaxConnsPerHost,
Transport: cfg.HTTP.Transport,
TLSConfig: exthttp.TLSConfig{
CAFile: cfg.HTTP.TLSConfig.CAPath,
CertFile: cfg.HTTP.TLSConfig.CertPath,
KeyFile: cfg.HTTP.TLSConfig.KeyPath,
ServerName: cfg.HTTP.TLSConfig.ServerName,
},
},
TraceConfig: s3.TraceConfig{
Enable: cfg.TraceConfig.Enabled,
},
STSEndpoint: cfg.STSEndpoint,
}, nil
}
161 changes: 134 additions & 27 deletions pkg/storage/bucket/s3/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,21 +5,20 @@ import (
"flag"
"fmt"
"net/http"
"slices"
"strings"
"time"

s3_service "github.com/aws/aws-sdk-go/service/s3"
"github.com/grafana/dskit/flagext"
"github.com/minio/minio-go/v7/pkg/encrypt"
"github.com/pkg/errors"
"github.com/thanos-io/objstore/providers/s3"

bucket_http "github.com/grafana/loki/v3/pkg/storage/bucket/http"
"github.com/grafana/loki/v3/pkg/storage/common/aws"
"github.com/grafana/loki/v3/pkg/util"
)

const (
// Signature Version 2 is being turned off (deprecated) in Amazon S3. Amazon S3 will then only accept API requests that are signed using Signature Version 4.
// https://docs.aws.amazon.com/AmazonS3/latest/userguide/UsingAWSSDK.html#UsingAWSSDK-sig2-deprecation
SignatureVersionV4 = "v4"

// SSEKMS config type constant to configure S3 server side encryption using KMS
Expand All @@ -32,41 +31,102 @@ const (
)

var (
supportedSignatureVersions = []string{SignatureVersionV4}
supportedSSETypes = []string{SSEKMS, SSES3}
errUnsupportedSignatureVersion = errors.New("unsupported signature version")
supportedSignatureVersions = []string{SignatureVersionV4}
supportedSSETypes = []string{SSEKMS, SSES3}
supportedStorageClasses = s3_service.ObjectStorageClass_Values()
supportedBucketLookupTypes = thanosS3BucketLookupTypesValues()

errUnsupportedSignatureVersion = fmt.Errorf("unsupported signature version (supported values: %s)", strings.Join(supportedSignatureVersions, ", "))
errUnsupportedSSEType = errors.New("unsupported S3 SSE type")
errUnsupportedStorageClass = fmt.Errorf("unsupported S3 storage class (supported values: %s)", strings.Join(supportedStorageClasses, ", "))
errInvalidSSEContext = errors.New("invalid S3 SSE encryption context")
errInvalidEndpointPrefix = errors.New("the endpoint must not prefixed with the bucket name")
errInvalidSTSEndpoint = errors.New("sts-endpoint must be a valid url")
)

var thanosS3BucketLookupTypes = map[string]s3.BucketLookupType{
s3.AutoLookup.String(): s3.AutoLookup,
s3.VirtualHostLookup.String(): s3.VirtualHostLookup,
s3.PathLookup.String(): s3.PathLookup,
}

func thanosS3BucketLookupTypesValues() (list []string) {
for k := range thanosS3BucketLookupTypes {
list = append(list, k)
}
// sort the list for consistent output in help, where it's used
slices.Sort(list)
return list
}

// HTTPConfig stores the http.Transport configuration for the s3 minio client.
type HTTPConfig struct {
bucket_http.Config `yaml:",inline"`
IdleConnTimeout time.Duration `yaml:"idle_conn_timeout" category:"advanced"`
ResponseHeaderTimeout time.Duration `yaml:"response_header_timeout" category:"advanced"`
InsecureSkipVerify bool `yaml:"insecure_skip_verify" category:"advanced"`
TLSHandshakeTimeout time.Duration `yaml:"tls_handshake_timeout" category:"advanced"`
ExpectContinueTimeout time.Duration `yaml:"expect_continue_timeout" category:"advanced"`
MaxIdleConns int `yaml:"max_idle_connections" category:"advanced"`
MaxIdleConnsPerHost int `yaml:"max_idle_connections_per_host" category:"advanced"`
MaxConnsPerHost int `yaml:"max_connections_per_host" category:"advanced"`

// Allow upstream callers to inject a round tripper
Transport http.RoundTripper `yaml:"-"`

TLSConfig TLSConfig `yaml:",inline"`
}

// TLSConfig configures the options for TLS connections.
type TLSConfig struct {
CAPath string `yaml:"tls_ca_path" category:"advanced"`
CertPath string `yaml:"tls_cert_path" category:"advanced"`
KeyPath string `yaml:"tls_key_path" category:"advanced"`
ServerName string `yaml:"tls_server_name" category:"advanced"`
}

// RegisterFlagsWithPrefix registers the flags for s3 storage with the provided prefix
func (cfg *HTTPConfig) RegisterFlagsWithPrefix(prefix string, f *flag.FlagSet) {
cfg.Config.RegisterFlagsWithPrefix(prefix+"s3.", f)
f.DurationVar(&cfg.IdleConnTimeout, prefix+"s3.http.idle-conn-timeout", 90*time.Second, "The time an idle connection will remain idle before closing.")
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
f.DurationVar(&cfg.IdleConnTimeout, prefix+"s3.http.idle-conn-timeout", 90*time.Second, "The time an idle connection will remain idle before closing.")
s3prefix := prefix + "s3."
f.DurationVar(&cfg.IdleConnTimeout, s3prefix+"http.idle-conn-timeout", 90*time.Second, "The time an idle connection will remain idle before closing.")

f.DurationVar(&cfg.ResponseHeaderTimeout, prefix+"s3.http.response-header-timeout", 2*time.Minute, "The amount of time the client will wait for a servers response headers.")
f.BoolVar(&cfg.InsecureSkipVerify, prefix+"s3.http.insecure-skip-verify", false, "If the client connects to S3 via HTTPS and this option is enabled, the client will accept any certificate and hostname.")
f.DurationVar(&cfg.TLSHandshakeTimeout, prefix+"s3.tls-handshake-timeout", 10*time.Second, "Maximum time to wait for a TLS handshake. 0 means no limit.")
f.DurationVar(&cfg.ExpectContinueTimeout, prefix+"s3.expect-continue-timeout", 1*time.Second, "The time to wait for a server's first response headers after fully writing the request headers if the request has an Expect header. 0 to send the request body immediately.")
f.IntVar(&cfg.MaxIdleConns, prefix+"s3.max-idle-connections", 100, "Maximum number of idle (keep-alive) connections across all hosts. 0 means no limit.")
f.IntVar(&cfg.MaxIdleConnsPerHost, prefix+"s3.max-idle-connections-per-host", 100, "Maximum number of idle (keep-alive) connections to keep per-host. If 0, a built-in default value is used.")
f.IntVar(&cfg.MaxConnsPerHost, prefix+"s3.max-connections-per-host", 0, "Maximum number of connections per host. 0 means no limit.")
cfg.TLSConfig.RegisterFlagsWithPrefix(prefix, f)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You could also pass the prefix + "s3." as prefix here.

}

// RegisterFlagsWithPrefix registers the flags for s3 storage with the provided prefix.
func (cfg *TLSConfig) RegisterFlagsWithPrefix(prefix string, f *flag.FlagSet) {
f.StringVar(&cfg.CAPath, prefix+"s3.http.tls-ca-path", "", "Path to the CA certificates to validate server certificate against. If not set, the host's root CA certificates are used.")
f.StringVar(&cfg.CertPath, prefix+"s3.http.tls-cert-path", "", "Path to the client certificate, which will be used for authenticating with the server. Also requires the key path to be configured.")
f.StringVar(&cfg.KeyPath, prefix+"s3.http.tls-key-path", "", "Path to the key for the client certificate. Also requires the client certificate to be configured.")
f.StringVar(&cfg.ServerName, prefix+"s3.http.tls-server-name", "", "Override the expected name on the server certificate.")
}

// Config holds the config options for an S3 backend
type Config struct {
JoaoBraveCoding marked this conversation as resolved.
Show resolved Hide resolved
Endpoint string `yaml:"endpoint"`
Region string `yaml:"region"`
BucketName string `yaml:"bucket_name"`
SecretAccessKey flagext.Secret `yaml:"secret_access_key"`
SessionToken flagext.Secret `yaml:"session_token"`
AccessKeyID string `yaml:"access_key_id"`
Insecure bool `yaml:"insecure"`
DisableDualstack bool `yaml:"disable_dualstack"`
SignatureVersion string `yaml:"signature_version"`
StorageClass string `yaml:"storage_class"`
Endpoint string `yaml:"endpoint"`
Region string `yaml:"region"`
BucketName string `yaml:"bucket_name"`
SecretAccessKey flagext.Secret `yaml:"secret_access_key"`
AccessKeyID string `yaml:"access_key_id"`
SessionToken flagext.Secret `yaml:"session_token"`
Insecure bool `yaml:"insecure" category:"advanced"`
SignatureVersion string `yaml:"signature_version" category:"advanced"`
ListObjectsVersion string `yaml:"list_objects_version" category:"advanced"`
BucketLookupType s3.BucketLookupType `yaml:"bucket_lookup_type" category:"advanced"`
DualstackEnabled bool `yaml:"dualstack_enabled" category:"experimental"`
StorageClass string `yaml:"storage_class" category:"experimental"`
NativeAWSAuthEnabled bool `yaml:"native_aws_auth_enabled" category:"experimental"`
PartSize uint64 `yaml:"part_size" category:"experimental"`
SendContentMd5 bool `yaml:"send_content_md5" category:"experimental"`
STSEndpoint string `yaml:"sts_endpoint"`

SSE SSEConfig `yaml:"sse"`
HTTP HTTPConfig `yaml:"http"`
SSE SSEConfig `yaml:"sse"`
HTTP HTTPConfig `yaml:"http"`
TraceConfig TraceConfig `yaml:"trace"`
}

// RegisterFlags registers the flags for s3 storage with the provided prefix
Expand All @@ -83,21 +143,36 @@ func (cfg *Config) RegisterFlagsWithPrefix(prefix string, f *flag.FlagSet) {
f.StringVar(&cfg.Region, prefix+"s3.region", "", "S3 region. If unset, the client will issue a S3 GetBucketLocation API call to autodetect it.")
f.StringVar(&cfg.Endpoint, prefix+"s3.endpoint", "", "The S3 bucket endpoint. It could be an AWS S3 endpoint listed at https://docs.aws.amazon.com/general/latest/gr/s3.html or the address of an S3-compatible service in hostname:port format.")
f.BoolVar(&cfg.Insecure, prefix+"s3.insecure", false, "If enabled, use http:// for the S3 endpoint instead of https://. This could be useful in local dev/test environments while using an S3-compatible backend storage, like Minio.")
f.BoolVar(&cfg.DisableDualstack, prefix+"s3.disable-dualstack", false, "Disable forcing S3 dualstack endpoint usage.")
f.StringVar(&cfg.SignatureVersion, prefix+"s3.signature-version", SignatureVersionV4, fmt.Sprintf("The signature version to use for authenticating against S3. Supported values are: %s.", strings.Join(supportedSignatureVersions, ", ")))
f.StringVar(&cfg.StorageClass, prefix+"s3.storage-class", aws.StorageClassStandard, "The S3 storage class to use. Details can be found at https://aws.amazon.com/s3/storage-classes/.")
f.StringVar(&cfg.ListObjectsVersion, prefix+"s3.list-objects-version", "", "Use a specific version of the S3 list object API. Supported values are v1 or v2. Default is unset.")
f.StringVar(&cfg.StorageClass, prefix+"s3.storage-class", "", "The S3 storage class to use, not set by default. Details can be found at https://aws.amazon.com/s3/storage-classes/. Supported values are: "+strings.Join(supportedStorageClasses, ", "))
f.BoolVar(&cfg.NativeAWSAuthEnabled, prefix+"s3.native-aws-auth-enabled", false, "If enabled, it will use the default authentication methods of the AWS SDK for go based on known environment variables and known AWS config files.")
f.Uint64Var(&cfg.PartSize, prefix+"s3.part-size", 0, "The minimum file size in bytes used for multipart uploads. If 0, the value is optimally computed for each object.")
f.BoolVar(&cfg.SendContentMd5, prefix+"s3.send-content-md5", false, "If enabled, a Content-MD5 header is sent with S3 Put Object requests. Consumes more resources to compute the MD5, but may improve compatibility with object storage services that do not support checksums.")
f.Var(newBucketLookupTypeValue(s3.AutoLookup, &cfg.BucketLookupType), prefix+"s3.bucket-lookup-type", fmt.Sprintf("Bucket lookup style type, used to access bucket in S3-compatible service. Default is auto. Supported values are: %s.", strings.Join(supportedBucketLookupTypes, ", ")))
f.BoolVar(&cfg.DualstackEnabled, prefix+"s3.dualstack-enabled", true, "When enabled, direct all AWS S3 requests to the dual-stack IPv4/IPv6 endpoint for the configured region.")
f.StringVar(&cfg.STSEndpoint, prefix+"s3.sts-endpoint", "", "Accessing S3 resources using temporary, secure credentials provided by AWS Security Token Service.")
cfg.SSE.RegisterFlagsWithPrefix(prefix+"s3.sse.", f)
cfg.HTTP.RegisterFlagsWithPrefix(prefix, f)
cfg.TraceConfig.RegisterFlagsWithPrefix(prefix+"s3.trace.", f)
}

// Validate config and returns error on failure
func (cfg *Config) Validate() error {
if !util.StringsContain(supportedSignatureVersions, cfg.SignatureVersion) {
if !slices.Contains(supportedSignatureVersions, cfg.SignatureVersion) {
return errUnsupportedSignatureVersion
}

if err := aws.ValidateStorageClass(cfg.StorageClass); err != nil {
return err
if cfg.Endpoint != "" {
endpoint := strings.Split(cfg.Endpoint, ".")
if cfg.BucketName != "" && endpoint[0] != "" && endpoint[0] == cfg.BucketName {
return errInvalidEndpointPrefix
}
}
if cfg.STSEndpoint != "" && !util.IsValidURL(cfg.STSEndpoint) {
return errInvalidSTSEndpoint
}
if !slices.Contains(supportedStorageClasses, cfg.StorageClass) && cfg.StorageClass != "" {
return errUnsupportedStorageClass
}

return cfg.SSE.Validate()
Expand Down Expand Up @@ -191,3 +266,35 @@ func parseKMSEncryptionContext(data string) (map[string]string, error) {
err := errors.Wrap(json.Unmarshal([]byte(data), &decoded), "unable to parse KMS encryption context")
return decoded, err
}

type TraceConfig struct {
Enabled bool `yaml:"enabled" category:"advanced"`
}

func (cfg *TraceConfig) RegisterFlagsWithPrefix(prefix string, f *flag.FlagSet) {
f.BoolVar(&cfg.Enabled, prefix+"enabled", false, "When enabled, low-level S3 HTTP operation information is logged at the debug level.")
}

// bucketLookupTypeValue is an adapter between s3.BucketLookupType and flag.Value.
type bucketLookupTypeValue s3.BucketLookupType

func newBucketLookupTypeValue(value s3.BucketLookupType, p *s3.BucketLookupType) *bucketLookupTypeValue {
*p = value
return (*bucketLookupTypeValue)(p)
}

func (v *bucketLookupTypeValue) String() string {
if v == nil {
return s3.AutoLookup.String()
}
return s3.BucketLookupType(*v).String()
}

func (v *bucketLookupTypeValue) Set(s string) error {
t, ok := thanosS3BucketLookupTypes[s]
if !ok {
return fmt.Errorf("unsupported bucket lookup type: %s", s)
}
*v = bucketLookupTypeValue(t)
return nil
}
Loading
Loading