Skip to content

Commit

Permalink
Tempo query improve search performance
Browse files Browse the repository at this point in the history
Signed-off-by: Pavol Loffay <[email protected]>
  • Loading branch information
pavolloffay committed Oct 9, 2024
1 parent 88d46a5 commit c9e77c0
Show file tree
Hide file tree
Showing 28 changed files with 288 additions and 40 deletions.
21 changes: 21 additions & 0 deletions .chloggen/tempo-query-find-traces-jobs.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
# One of 'breaking', 'deprecation', 'new_component', 'enhancement', 'bug_fix'
change_type: enhancement

# The name of the component, or a single word describing the area of concern, (e.g. tempostack, tempomonolithic, github action)
component: tempostack, tempomonolithic

# A brief description of the change. Surround your text with quotes ("") if it needs to start with a backtick (`).
note: Add tempo-query CRD option to speed up trace search.

# One or more tracking issues related to the change
issues: [1048]

# (Optional) One or more lines of additional information to render under the primary note.
# These lines will be padded with 2 spaces and then inserted directly into the document.
# Use pipe (|) for multiline entries.
subtext: |
Following CRD options were added to speed up trace search in Jaeger UI/API. The trace search first
searches for traceids and then it gets a full trace. With this configuration option the requests
to get the full trace can be run in parallel:
For `TempoStack` - `spec.template.queryFrontend.jaegerQuery.findTracesConcurrentRequests`
For `TempoMonolithic` - `spec.jaegerui.findTracesConcurrentRequests`
2 changes: 1 addition & 1 deletion Makefile
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
# Current Operator version
OPERATOR_VERSION ?= 0.13.0
TEMPO_VERSION ?= 2.5.0
TEMPO_QUERY_VERSION ?= main-2999520
JAEGER_QUERY_VERSION ?= 1.62.0
TEMPO_QUERY_VERSION ?= main-1de25ca
TEMPO_GATEWAY_VERSION ?= main-2024-08-05-11d0d94
TEMPO_GATEWAY_OPA_VERSION ?= main-2024-04-29-914c13f
OAUTH_PROXY_VERSION=4.14
Expand Down
12 changes: 8 additions & 4 deletions apis/tempo/v1alpha1/tempomonolithic_defaults.go
Original file line number Diff line number Diff line change
Expand Up @@ -13,10 +13,11 @@ import (
)

var (
twoGBQuantity = resource.MustParse("2Gi")
tenGBQuantity = resource.MustParse("10Gi")
defaultServicesDuration = metav1.Duration{Duration: time.Hour * 24 * 3}
defaultTimeout = metav1.Duration{Duration: time.Second * 30}
twoGBQuantity = resource.MustParse("2Gi")
tenGBQuantity = resource.MustParse("10Gi")
defaultServicesDuration = metav1.Duration{Duration: time.Hour * 24 * 3}
defaultTimeout = metav1.Duration{Duration: time.Second * 30}
defaultFindTracesConcurrentRequests = 1
)

// Default sets all default values in a central place, instead of setting it at every place where the value is accessed.
Expand Down Expand Up @@ -88,6 +89,9 @@ func (r *TempoMonolithic) Default(ctrlConfig configv1alpha1.ProjectConfig) {
if r.Spec.JaegerUI.ServicesQueryDuration == nil {
r.Spec.JaegerUI.ServicesQueryDuration = &defaultServicesDuration
}
if r.Spec.JaegerUI.FindTracesConcurrentRequests == 0 {
r.Spec.JaegerUI.FindTracesConcurrentRequests = defaultFindTracesConcurrentRequests
}
}

if r.Spec.Timeout.Duration == 0 {
Expand Down
20 changes: 13 additions & 7 deletions apis/tempo/v1alpha1/tempomonolithic_defaults_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -196,7 +196,8 @@ func TestMonolithicDefault(t *testing.T) {
Enabled: true,
SAR: "{\"namespace\": \"testns\", \"resource\": \"pods\", \"verb\": \"get\"}",
},
ServicesQueryDuration: &defaultServicesDuration,
ServicesQueryDuration: &defaultServicesDuration,
FindTracesConcurrentRequests: 1,
},
Management: "Managed",
Timeout: metav1.Duration{Duration: time.Second * 30},
Expand Down Expand Up @@ -269,7 +270,8 @@ func TestMonolithicDefault(t *testing.T) {
Enabled: false,
SAR: "{\"namespace\": \"testns\", \"resource\": \"pods\", \"verb\": \"get\"}",
},
ServicesQueryDuration: &defaultServicesDuration,
ServicesQueryDuration: &defaultServicesDuration,
FindTracesConcurrentRequests: 1,
},
Management: "Managed",
Timeout: metav1.Duration{Duration: time.Second * 30},
Expand Down Expand Up @@ -334,7 +336,8 @@ func TestMonolithicDefault(t *testing.T) {
Enabled: true,
SAR: "{\"namespace\": \"testns\", \"resource\": \"pods\", \"verb\": \"get\"}",
},
ServicesQueryDuration: &defaultServicesDuration,
ServicesQueryDuration: &defaultServicesDuration,
FindTracesConcurrentRequests: 1,
},
Management: "Managed",
Timeout: metav1.Duration{Duration: time.Second * 30},
Expand Down Expand Up @@ -398,15 +401,16 @@ func TestMonolithicDefault(t *testing.T) {
Enabled: false,
SAR: "{\"namespace\": \"testns\", \"resource\": \"pods\", \"verb\": \"get\"}",
},
ServicesQueryDuration: &defaultServicesDuration,
ServicesQueryDuration: &defaultServicesDuration,
FindTracesConcurrentRequests: 1,
},
Management: "Managed",
Timeout: metav1.Duration{Duration: time.Second * 30},
},
},
},
{
name: "define custom duration for services list and timeout",
name: "define custom duration for services list, timeout and find traces",
input: &TempoMonolithic{
ObjectMeta: v1.ObjectMeta{
Name: "test",
Expand All @@ -424,7 +428,8 @@ func TestMonolithicDefault(t *testing.T) {
Route: &MonolithicJaegerUIRouteSpec{
Enabled: true,
},
ServicesQueryDuration: &v1.Duration{Duration: time.Duration(100 * 100)},
ServicesQueryDuration: &v1.Duration{Duration: time.Duration(100 * 100)},
FindTracesConcurrentRequests: 40,
},
Timeout: metav1.Duration{Duration: time.Hour},
},
Expand Down Expand Up @@ -461,7 +466,8 @@ func TestMonolithicDefault(t *testing.T) {
Enabled: false,
SAR: "{\"namespace\": \"testns\", \"resource\": \"pods\", \"verb\": \"get\"}",
},
ServicesQueryDuration: &v1.Duration{Duration: time.Duration(100 * 100)},
ServicesQueryDuration: &v1.Duration{Duration: time.Duration(100 * 100)},
FindTracesConcurrentRequests: 40,
},
Management: "Managed",
Timeout: metav1.Duration{Duration: time.Hour},
Expand Down
13 changes: 13 additions & 0 deletions apis/tempo/v1alpha1/tempomonolithic_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -261,6 +261,19 @@ type MonolithicJaegerUISpec struct {
// +optional
// +operator-sdk:csv:customresourcedefinitions:type=spec,displayName="ServicesQueryDuration",xDescriptors="urn:alm:descriptor:com.tectonic.ui:advanced"
ServicesQueryDuration *metav1.Duration `json:"servicesQueryDuration,omitempty"`

// FindTracesConcurrentRequests defines how many concurrent request a single trace search can submit.
// The search for traces in Jaeger submits limit+1 requests. First requests finds trace IDs and then it fetches
// entire traces by ID. This property allows Jaeger to fetch traces in parallel.
// Note that by default a single Tempo querier can process 20 concurrent search jobs.
// Increasing this property might require scaling up querier instances, especially on error "job queue full"
// See also Tempo's extraConfig:
// querier.max_concurrent_queries (20 default)
// query_frontend.max_outstanding_per_tenant: (2000 default). Increase if the query-frontend returns 429
//
// +optional
// +operator-sdk:csv:customresourcedefinitions:type=spec,displayName="FindTracesConcurrentRequests",xDescriptors="urn:alm:descriptor:com.tectonic.ui:advanced"
FindTracesConcurrentRequests int `json:"findTracesConcurrentRequests,omitempty"`
}

// MonolithicJaegerUIIngressSpec defines the settings for the Jaeger UI ingress.
Expand Down
13 changes: 13 additions & 0 deletions apis/tempo/v1alpha1/tempostack_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -618,6 +618,19 @@ type JaegerQuerySpec struct {
// +operator-sdk:csv:customresourcedefinitions:type=spec,displayName="ServicesQueryDuration"
ServicesQueryDuration *metav1.Duration `json:"servicesQueryDuration,omitempty"`

// FindTracesConcurrentRequests defines how many concurrent request a single trace search can submit.
// The search for traces in Jaeger submits limit+1 requests. First requests finds trace IDs and then it fetches
// entire traces by ID. This property allows Jaeger to fetch traces in parallel.
// Note that by default a single Tempo querier can process 20 concurrent search jobs.
// Increasing this property might require scaling up querier instances, especially on error "job queue full"
// See also Tempo's extraConfig:
// querier.max_concurrent_queries (20 default)
// query_frontend.max_outstanding_per_tenant: (2000 default). Increase if the query-frontend returns 429
//
// +optional
// +operator-sdk:csv:customresourcedefinitions:type=spec,displayName="FindTracesConcurrentRequests",xDescriptors="urn:alm:descriptor:com.tectonic.ui:advanced"
FindTracesConcurrentRequests int `json:"findTracesConcurrentRequests,omitempty"`

// Authentication defines the options for the oauth proxy used to protect jaeger UI
//
// +optional
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,7 @@ metadata:
capabilities: Deep Insights
categories: Logging & Tracing,Monitoring
containerImage: ghcr.io/grafana/tempo-operator/tempo-operator:v0.13.0
createdAt: "2024-10-07T07:11:28Z"
createdAt: "2024-10-09T16:46:12Z"
description: Create and manage deployments of Tempo, a high-scale distributed
tracing backend.
operatorframework.io/cluster-monitoring: "true"
Expand Down Expand Up @@ -307,6 +307,19 @@ spec:
"{"namespace": "<tempo_stack_namespace>", "resource": "pods", "verb": "get"}'
displayName: SAR
path: jaegerui.authentication.sar
- description: 'FindTracesConcurrentRequests defines how many concurrent request
a single trace search can submit. The search for traces in Jaeger submits
limit+1 requests. First requests finds trace IDs and then it fetches entire
traces by ID. This property allows Jaeger to fetch traces in parallel. Note
that by default a single Tempo querier can process 20 concurrent search
jobs. Increasing this property might require scaling up querier instances,
especially on error "job queue full" See also Tempo''s extraConfig: querier.max_concurrent_queries
(20 default) query_frontend.max_outstanding_per_tenant: (2000 default).
Increase if the query-frontend returns 429'
displayName: FindTracesConcurrentRequests
path: jaegerui.findTracesConcurrentRequests
x-descriptors:
- urn:alm:descriptor:com.tectonic.ui:advanced
- description: Annotations defines the annotations of the Ingress object.
displayName: Annotations
path: jaegerui.ingress.annotations
Expand Down Expand Up @@ -1002,6 +1015,19 @@ spec:
path: template.queryFrontend.jaegerQuery.enabled
x-descriptors:
- urn:alm:descriptor:com.tectonic.ui:booleanSwitch
- description: 'FindTracesConcurrentRequests defines how many concurrent request
a single trace search can submit. The search for traces in Jaeger submits
limit+1 requests. First requests finds trace IDs and then it fetches entire
traces by ID. This property allows Jaeger to fetch traces in parallel. Note
that by default a single Tempo querier can process 20 concurrent search
jobs. Increasing this property might require scaling up querier instances,
especially on error "job queue full" See also Tempo''s extraConfig: querier.max_concurrent_queries
(20 default) query_frontend.max_outstanding_per_tenant: (2000 default).
Increase if the query-frontend returns 429'
displayName: FindTracesConcurrentRequests
path: template.queryFrontend.jaegerQuery.findTracesConcurrentRequests
x-descriptors:
- urn:alm:descriptor:com.tectonic.ui:advanced
- description: Ingress defines the options for the Jaeger Query ingress.
displayName: Jaeger Query UI Ingress Settings
path: template.queryFrontend.jaegerQuery.ingress
Expand Down Expand Up @@ -1427,7 +1453,7 @@ spec:
- name: RELATED_IMAGE_JAEGER_QUERY
value: docker.io/jaegertracing/jaeger-query:1.62.0
- name: RELATED_IMAGE_TEMPO_QUERY
value: docker.io/grafana/tempo-query:main-2999520
value: docker.io/grafana/tempo-query:main-1de25ca
- name: RELATED_IMAGE_TEMPO_GATEWAY
value: quay.io/observatorium/api:main-2024-08-05-11d0d94
- name: RELATED_IMAGE_TEMPO_GATEWAY_OPA
Expand Down Expand Up @@ -1576,7 +1602,7 @@ spec:
name: tempo
- image: docker.io/jaegertracing/jaeger-query:1.62.0
name: jaeger-query
- image: docker.io/grafana/tempo-query:main-2999520
- image: docker.io/grafana/tempo-query:main-1de25ca
name: tempo-query
- image: quay.io/observatorium/api:main-2024-08-05-11d0d94
name: tempo-gateway
Expand Down
11 changes: 11 additions & 0 deletions bundle/community/manifests/tempo.grafana.com_tempomonolithics.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -1109,6 +1109,17 @@ spec:
description: Enabled defines if the Jaeger UI component should
be created.
type: boolean
findTracesConcurrentRequests:
description: |-
FindTracesConcurrentRequests defines how many concurrent request a single trace search can submit.
The search for traces in Jaeger submits limit+1 requests. First requests finds trace IDs and then it fetches
entire traces by ID. This property allows Jaeger to fetch traces in parallel.
Note that by default a single Tempo querier can process 20 concurrent search jobs.
Increasing this property might require scaling up querier instances, especially on error "job queue full"
See also Tempo's extraConfig:
querier.max_concurrent_queries (20 default)
query_frontend.max_outstanding_per_tenant: (2000 default). Increase if the query-frontend returns 429
type: integer
ingress:
description: Ingress defines the Ingress configuration for the
Jaeger UI.
Expand Down
11 changes: 11 additions & 0 deletions bundle/community/manifests/tempo.grafana.com_tempostacks.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2391,6 +2391,17 @@ spec:
description: Enabled defines if the Jaeger Query component
should be created.
type: boolean
findTracesConcurrentRequests:
description: |-
FindTracesConcurrentRequests defines how many concurrent request a single trace search can submit.
The search for traces in Jaeger submits limit+1 requests. First requests finds trace IDs and then it fetches
entire traces by ID. This property allows Jaeger to fetch traces in parallel.
Note that by default a single Tempo querier can process 20 concurrent search jobs.
Increasing this property might require scaling up querier instances, especially on error "job queue full"
See also Tempo's extraConfig:
querier.max_concurrent_queries (20 default)
query_frontend.max_outstanding_per_tenant: (2000 default). Increase if the query-frontend returns 429
type: integer
ingress:
description: Ingress defines the options for the Jaeger
Query ingress.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,7 @@ metadata:
capabilities: Deep Insights
categories: Logging & Tracing,Monitoring
containerImage: ghcr.io/grafana/tempo-operator/tempo-operator:v0.13.0
createdAt: "2024-10-07T07:11:27Z"
createdAt: "2024-10-09T16:46:11Z"
description: Create and manage deployments of Tempo, a high-scale distributed
tracing backend.
operatorframework.io/cluster-monitoring: "true"
Expand Down Expand Up @@ -307,6 +307,19 @@ spec:
"{"namespace": "<tempo_stack_namespace>", "resource": "pods", "verb": "get"}'
displayName: SAR
path: jaegerui.authentication.sar
- description: 'FindTracesConcurrentRequests defines how many concurrent request
a single trace search can submit. The search for traces in Jaeger submits
limit+1 requests. First requests finds trace IDs and then it fetches entire
traces by ID. This property allows Jaeger to fetch traces in parallel. Note
that by default a single Tempo querier can process 20 concurrent search
jobs. Increasing this property might require scaling up querier instances,
especially on error "job queue full" See also Tempo''s extraConfig: querier.max_concurrent_queries
(20 default) query_frontend.max_outstanding_per_tenant: (2000 default).
Increase if the query-frontend returns 429'
displayName: FindTracesConcurrentRequests
path: jaegerui.findTracesConcurrentRequests
x-descriptors:
- urn:alm:descriptor:com.tectonic.ui:advanced
- description: Annotations defines the annotations of the Ingress object.
displayName: Annotations
path: jaegerui.ingress.annotations
Expand Down Expand Up @@ -1002,6 +1015,19 @@ spec:
path: template.queryFrontend.jaegerQuery.enabled
x-descriptors:
- urn:alm:descriptor:com.tectonic.ui:booleanSwitch
- description: 'FindTracesConcurrentRequests defines how many concurrent request
a single trace search can submit. The search for traces in Jaeger submits
limit+1 requests. First requests finds trace IDs and then it fetches entire
traces by ID. This property allows Jaeger to fetch traces in parallel. Note
that by default a single Tempo querier can process 20 concurrent search
jobs. Increasing this property might require scaling up querier instances,
especially on error "job queue full" See also Tempo''s extraConfig: querier.max_concurrent_queries
(20 default) query_frontend.max_outstanding_per_tenant: (2000 default).
Increase if the query-frontend returns 429'
displayName: FindTracesConcurrentRequests
path: template.queryFrontend.jaegerQuery.findTracesConcurrentRequests
x-descriptors:
- urn:alm:descriptor:com.tectonic.ui:advanced
- description: Ingress defines the options for the Jaeger Query ingress.
displayName: Jaeger Query UI Ingress Settings
path: template.queryFrontend.jaegerQuery.ingress
Expand Down Expand Up @@ -1437,7 +1463,7 @@ spec:
- name: RELATED_IMAGE_JAEGER_QUERY
value: docker.io/jaegertracing/jaeger-query:1.62.0
- name: RELATED_IMAGE_TEMPO_QUERY
value: docker.io/grafana/tempo-query:main-2999520
value: docker.io/grafana/tempo-query:main-1de25ca
- name: RELATED_IMAGE_TEMPO_GATEWAY
value: quay.io/observatorium/api:main-2024-08-05-11d0d94
- name: RELATED_IMAGE_TEMPO_GATEWAY_OPA
Expand Down Expand Up @@ -1597,7 +1623,7 @@ spec:
name: tempo
- image: docker.io/jaegertracing/jaeger-query:1.62.0
name: jaeger-query
- image: docker.io/grafana/tempo-query:main-2999520
- image: docker.io/grafana/tempo-query:main-1de25ca
name: tempo-query
- image: quay.io/observatorium/api:main-2024-08-05-11d0d94
name: tempo-gateway
Expand Down
11 changes: 11 additions & 0 deletions bundle/openshift/manifests/tempo.grafana.com_tempomonolithics.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -1109,6 +1109,17 @@ spec:
description: Enabled defines if the Jaeger UI component should
be created.
type: boolean
findTracesConcurrentRequests:
description: |-
FindTracesConcurrentRequests defines how many concurrent request a single trace search can submit.
The search for traces in Jaeger submits limit+1 requests. First requests finds trace IDs and then it fetches
entire traces by ID. This property allows Jaeger to fetch traces in parallel.
Note that by default a single Tempo querier can process 20 concurrent search jobs.
Increasing this property might require scaling up querier instances, especially on error "job queue full"
See also Tempo's extraConfig:
querier.max_concurrent_queries (20 default)
query_frontend.max_outstanding_per_tenant: (2000 default). Increase if the query-frontend returns 429
type: integer
ingress:
description: Ingress defines the Ingress configuration for the
Jaeger UI.
Expand Down
Loading

0 comments on commit c9e77c0

Please sign in to comment.