Skip to content

Commit

Permalink
feat: otel tracing
Browse files Browse the repository at this point in the history
  • Loading branch information
fenos committed Jun 11, 2024
1 parent 4aeb415 commit 615a65f
Show file tree
Hide file tree
Showing 31 changed files with 10,359 additions and 7,111 deletions.
23 changes: 17 additions & 6 deletions .docker/docker-compose-infra.yml
Original file line number Diff line number Diff line change
Expand Up @@ -153,12 +153,23 @@ services:
- IMGPROXY_USE_ETAG=true
- IMGPROXY_ENABLE_WEBP_DETECTION=true

# Optional for rate-limiting
redis:
image: redis:6.2-alpine
restart: always
ports:
- '6379:6379'
# Optional for rate-limiting
# redis:
# image: redis:6.2-alpine
# restart: always
# ports:
# - '6379:6379'

# Optional for tracing
# otel:
# extends:
# service: otel-collector
# file: ./.docker/docker-compose-monitoring.yml
#
# jaeger:
# extends:
# service: jaeger
# file: ./.docker/docker-compose-monitoring.yml

configs:
init.sql:
Expand Down
25 changes: 24 additions & 1 deletion .docker/docker-compose-monitoring.yml
Original file line number Diff line number Diff line change
Expand Up @@ -43,4 +43,27 @@ services:
- GF_SECURITY_ADMIN_PASSWORD=grafana
volumes:
- ../monitoring/grafana/config:/etc/grafana/provisioning
- ../monitoring/grafana/dashboards:/var/lib/grafana/dashboards
- ../monitoring/grafana/dashboards:/var/lib/grafana/dashboards

jaeger:
image: jaegertracing/all-in-one:1.57.0
ports:
- "16686:16686" # Jaeger UI
- "14250:14250" # GRPC
- "14268:14268" # HTTP
- "14269:14269" # HTTP
- "6831:6831/udp" # UDP
- "6832:6832/udp" # UDP
- "5778:5778" # HTTP

otel-collector:
image: otel/opentelemetry-collector-contrib:0.100.0
ports:
- "4317:4317" # OTLP gRPC receiver
- "4318:4318" # OTLP Http receiver
- "55680:55680" # OTLP HTTP receiver
command: [ "--config=/etc/otel/otel-collector-config.yml" ]
depends_on:
- jaeger
volumes:
- ../monitoring/otel/config:/etc/otel
10 changes: 10 additions & 0 deletions docker-compose.yml
Original file line number Diff line number Diff line change
Expand Up @@ -99,4 +99,14 @@ services:
# prometheus:
# extends:
# service: prometheus
# file: ./.docker/docker-compose-monitoring.yml
#
# otel:
# extends:
# service: otel-collector
# file: ./.docker/docker-compose-monitoring.yml
#
# jaeger:
# extends:
# service: jaeger
# file: ./.docker/docker-compose-monitoring.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
CREATE FUNCTION tenants_delete_notify_trigger ()
RETURNS TRIGGER
AS $$
BEGIN
PERFORM
pg_notify('tenants_update', '"' || OLD.id || '"');
RETURN NULL;
END;
$$
LANGUAGE plpgsql;
CREATE TRIGGER tenants_delete_notify_trigger
AFTER DELETE ON tenants
FOR EACH ROW
EXECUTE PROCEDURE tenants_delete_notify_trigger ();
2 changes: 2 additions & 0 deletions migrations/multitenant/0011-tracing-mode-column.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@

ALTER TABLE tenants ADD COLUMN tracing_mode text NOT NULL DEFAULT 'basic';
183 changes: 183 additions & 0 deletions monitoring/otel/config/otel-collector-config.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,183 @@
receivers:
otlp:
protocols:
grpc:
http:

processors:
memory_limiter:
check_interval: 1s
limit_percentage: 70
spike_limit_percentage: 20
batch:
send_batch_size: 10000
timeout: 10s
tail_sampling/storage:
decision_wait: 10s
expected_new_traces_per_sec: 10000
num_traces: 50000
policies:
[
# Exclude probes URLs
{
name: exclude-urls,
type: string_attribute,
string_attribute: { key: http.route, values: [ \/health.*, \/metrics, \/tenants, \/version, \/status ], enabled_regex_matching: true, invert_match: true }
},
# All error are sampled
{
name: error-status-codes,
type: numeric_attribute,
numeric_attribute: { key: http.status_code, min_value: 500, max_value: 599 }
},
# Always sample high latency traces that are not uploads
{
name: high-latency-excluding-uploads,
type: and,
and: {
and_sub_policy:
[
{
type: latency,
latency: { threshold_ms: 5000 }
},
# Exclude upload operations
{
type: string_attribute,
string_attribute: {
key: http.operation,
values: [ .*upload.* ],
enabled_regex_matching: true,
invert_match: true
}
}
]
}
},
# Always sample high latency uploads
{
name: high-latency-uploads,
type: and,
and: {
and_sub_policy:
[
{
type: latency,
latency: { threshold_ms: 300000 }
},
# Only upload operations
{
type: string_attribute,
string_attribute: {
key: http.operation,
values: [ .*upload.* ],
enabled_regex_matching: true,
}
}
]
}
},
# Sample traces for tenants with default mode
# Default mode is the mode where the trace.mode attribute is set to basic
# and only 0.2 of traces are sampled for each tenant
{
name: sampling-basic-tenants,
type: and,
and: {
and_sub_policy:
[
{
# must have tenant.ref attribute
name: has-tenant-ref,
type: string_attribute,
string_attribute:
{
key: tenant.ref,
values: [ .* ],
enabled_regex_matching: true
},
},
{
# trace.mode = basic
name: trace-mode-default,
type: string_attribute,
string_attribute:
{
key: trace.mode,
values: [ basic ],
},
},
{
name: success-status-codes,
type: numeric_attribute,
numeric_attribute: { key: http.status_code, min_value: 200, max_value: 399 }
},
{
name: basic-sampling,
type: probabilistic,
probabilistic: {
sampling_percentage: 5
}
}
]
}
},

# Sample traces for tenants with premium mode
# Premium mode sample 100% of traces for each tenant
{
name: sampling-premium-tenants,
type: and,
and: {
and_sub_policy:
[
{
# must have tenant.ref attribute
name: has-tenant-ref,
type: string_attribute,
string_attribute:
{
key: tenant.ref,
values: [ .* ],
enabled_regex_matching: true
},
},
{
# trace.mode = premium
name: trace-mode-default,
type: string_attribute,
string_attribute:
{
key: trace.mode,
values: [ full ],
},
},
{
name: success-status-codes,
type: numeric_attribute,
numeric_attribute: { key: http.status_code, min_value: 200, max_value: 399 }
},
{
name: basic-sampling,
type: probabilistic,
probabilistic: {
sampling_percentage: 100
}
}
]
}
}
]

exporters:
otlp/jaeger:
endpoint: "jaeger:4317"
tls:
insecure: true

service:
pipelines:
traces:
receivers: [otlp]
processors: [memory_limiter, tail_sampling/storage, batch]
exporters: [otlp/jaeger]
Loading

0 comments on commit 615a65f

Please sign in to comment.