Skip to content

Commit

Permalink
DEVPROD-12960 handle resource detector errors (#8496)
Browse files Browse the repository at this point in the history
  • Loading branch information
ybrill authored Nov 21, 2024
1 parent bb22540 commit 10a60c4
Show file tree
Hide file tree
Showing 3 changed files with 46 additions and 23 deletions.
9 changes: 4 additions & 5 deletions agent/agent.go
Original file line number Diff line number Diff line change
Expand Up @@ -167,11 +167,6 @@ func newWithCommunicator(ctx context.Context, opts Options, comm client.Communic
return nil
}})

if err := a.initOtel(ctx); err != nil {
grip.Error(errors.Wrap(err, "initializing otel"))
a.tracer = otel.GetTracerProvider().Tracer("noop_tracer")
}

return a, nil
}

Expand Down Expand Up @@ -207,6 +202,10 @@ func (a *Agent) Close(ctx context.Context) {
func (a *Agent) Start(ctx context.Context) error {
defer recovery.LogStackTraceAndExit("main agent thread")

if err := a.initOtel(ctx); err != nil {
a.tracer = otel.GetTracerProvider().Tracer("noop_tracer")
}

err := a.startStatusServer(ctx, a.opts.StatusPort)
if err != nil {
return errors.Wrap(err, "starting status server")
Expand Down
58 changes: 41 additions & 17 deletions agent/otel.go
Original file line number Diff line number Diff line change
Expand Up @@ -68,11 +68,7 @@ func (a *Agent) initOtel(ctx context.Context) error {
return nil
}

r, err := hostResource(ctx)
if err != nil {
return errors.Wrap(err, "making host resource")
}

var err error
a.otelGrpcConn, err = grpc.DialContext(ctx,
a.opts.TraceCollectorEndpoint,
grpc.WithTransportCredentials(credentials.NewTLS(nil)),
Expand All @@ -88,7 +84,7 @@ func (a *Agent) initOtel(ctx context.Context) error {
}
tp := sdktrace.NewTracerProvider(
sdktrace.WithBatcher(traceExporter),
sdktrace.WithResource(r),
sdktrace.WithResource(hostResource(ctx)),
)
tp.RegisterSpanProcessor(utility.NewAttributeSpanProcessor())
otel.SetTracerProvider(tp)
Expand Down Expand Up @@ -123,12 +119,7 @@ func (a *Agent) startMetrics(ctx context.Context, tc *internal.TaskConfig) (func
return nil, errors.Wrap(err, "making otel metrics exporter")
}

r, err := hostResource(ctx)
if err != nil {
return nil, errors.Wrap(err, "making resource")
}

r, err = resource.Merge(r, resource.NewSchemaless(tc.TaskAttributes()...))
r, err := resource.Merge(hostResource(ctx), resource.NewSchemaless(tc.TaskAttributes()...))
if err != nil {
return nil, errors.Wrap(err, "merging host resource with task attributes")
}
Expand Down Expand Up @@ -358,12 +349,41 @@ func addNetworkMetrics(meter metric.Meter) error {
return errors.Wrap(err, "registering network io callback")
}

func hostResource(ctx context.Context) (*resource.Resource, error) {
return resource.New(ctx,
resource.WithAttributes(semconv.ServiceName("evergreen-agent")),
resource.WithAttributes(semconv.ServiceVersion(evergreen.BuildRevision)),
resource.WithDetectors(ec2.NewResourceDetector(), ecs.NewResourceDetector()),
func hostResource(ctx context.Context) *resource.Resource {
r := resource.NewSchemaless(
semconv.ServiceName("evergreen-agent"),
semconv.ServiceVersion(evergreen.BuildRevision),
)

mergedResource, err := addEnvironmentAttributes(ctx, r)
grip.Error(errors.Wrap(err, "adding environment attributes"))
if err == nil {
r = mergedResource
}

return r
}

// addEnvironmentAttributes adds attributes to the resource about the environment itself. When running in EC2
// this includes information like the instance id and when running in ECS this includes information like the
// container name. This will noop if not running in EC2/ECS.
func addEnvironmentAttributes(ctx context.Context, r *resource.Resource) (*resource.Resource, error) {
for name, detector := range map[string]resource.Detector{
"ec2": ec2.NewResourceDetector(),
"ecs": ecs.NewResourceDetector(),
} {
detectedResource, err := detector.Detect(ctx)
if err != nil {
return nil, errors.Wrapf(err, "detecting resource '%s'", name)
}
mergedResource, err := resource.Merge(r, detectedResource)
if err != nil {
return nil, errors.Wrapf(err, "merging resource for detector '%s'", name)
}
r = mergedResource
}

return r, nil
}

// uploadTraces finds all the trace files in taskDir, uploads their contents
Expand All @@ -373,6 +393,10 @@ func hostResource(ctx context.Context) (*resource.Resource, error) {
// [OTel JSON protobuf encoding] https://opentelemetry.io/docs/specs/otel/protocol/otlp/#json-protobuf-encoding
// [file exporter] https://pkg.go.dev/github.com/open-telemetry/opentelemetry-collector-contrib/exporter/fileexporter
func (a *Agent) uploadTraces(ctx context.Context, taskDir string) error {
if a.otelGrpcConn == nil {
return errors.New("OTel gRPC connection has not been configured")
}

files, err := getTraceFiles(taskDir)
if err != nil {
return errors.Wrapf(err, "getting trace files for '%s'", taskDir)
Expand Down
2 changes: 1 addition & 1 deletion config.go
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ var (

// Agent version to control agent rollover. The format is the calendar date
// (YYYY-MM-DD).
AgentVersion = "2024-11-19"
AgentVersion = "2024-11-20"
)

const (
Expand Down

0 comments on commit 10a60c4

Please sign in to comment.