-
Notifications
You must be signed in to change notification settings - Fork 3
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
feat (spike): Package opentelemetry forwarding to xray #115
base: main
Are you sure you want to change the base?
Changes from 5 commits
b1d0252
5bc9fc3
20b87b5
2678da5
b4c6b40
2656a91
ee6a471
455484e
004f7b6
9e9c9d2
9b62c5b
e8706d3
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,63 @@ | ||
import { ENV_NAMES } from "@eventual/aws-runtime"; | ||
import lambda, { | ||
Architecture, | ||
Code, | ||
ILayerVersion, | ||
LayerVersion, | ||
} from "aws-cdk-lib/aws-lambda"; | ||
import { | ||
ILogGroup, | ||
ILogStream, | ||
LogGroup, | ||
LogStream, | ||
} from "aws-cdk-lib/aws-logs"; | ||
import { Construct } from "constructs"; | ||
|
||
interface TelemetryProps { | ||
serviceName: string; | ||
} | ||
|
||
export interface ITelemetry { | ||
logGroup: ILogGroup; | ||
logStreams: ILogStream[]; | ||
collectorLayer: ILayerVersion; | ||
} | ||
|
||
export class Telemetry extends Construct { | ||
logGroup: LogGroup; | ||
logStreams: ILogStream[] = []; | ||
collectorLayer: ILayerVersion; | ||
|
||
constructor(scope: Construct, id: string, props: TelemetryProps) { | ||
super(scope, id); | ||
|
||
this.logGroup = new LogGroup(this, "LogGroup", { | ||
logGroupName: `${props.serviceName}-telemetry`, | ||
}); | ||
|
||
this.collectorLayer = new LayerVersion(this, "telemetry-collector", { | ||
code: Code.fromAsset( | ||
require.resolve("@eventual/aws-runtime/mini-collector-cloudwatch") | ||
), | ||
compatibleArchitectures: [Architecture.ARM_64], | ||
}); | ||
} | ||
|
||
attachToFunction(fn: lambda.Function, componentName: string) { | ||
const logStream = new LogStream(this, `LogStream${componentName}`, { | ||
logGroup: this.logGroup, | ||
logStreamName: componentName, | ||
}); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Is this what we want? A log steam per function for all time? Or is this just an experiment? There is a limit to the number of writes to a log stream.
The orchestrator, for all workflow executions, would be limited to 5TPS. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Hrm ok didnt realise there was a throttle. I originally had it creating a new log stream every execution, but reliased without static streams it would be difficult to attach events listeners to the streams, to forward logs to the real collector. With static streams we can just set that up in cdk. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. An option I can think of instead, actually, is just skip the logging to cloudwatch part. Instead the extension just sends the data to the otel collector running in a different lambda, over http |
||
fn.addEnvironment( | ||
ENV_NAMES.TELEMETRY_LOG_GROUP_NAME, | ||
this.logGroup.logGroupName | ||
); | ||
fn.addEnvironment( | ||
ENV_NAMES.TELEMETRY_LOG_STREAM_NAME, | ||
logStream.logStreamName | ||
); | ||
fn.addEnvironment(ENV_NAMES.TELEMETRY_COMPONENT_NAME, componentName); | ||
fn.addLayers(this.collectorLayer); | ||
this.logStreams.push(logStream); | ||
} | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -21,6 +21,7 @@ import { | |
} from "@eventual/core"; | ||
import { ulid } from "ulidx"; | ||
import { AWSActivityRuntimeClient } from "./activity-runtime-client.js"; | ||
import { SpanKind, trace } from "@opentelemetry/api"; | ||
|
||
export interface AWSWorkflowClientProps { | ||
readonly dynamo: DynamoDBClient; | ||
|
@@ -50,68 +51,82 @@ export class AWSWorkflowClient extends WorkflowClient { | |
timeoutSeconds, | ||
}: StartWorkflowRequest<W>) { | ||
const executionId = formatExecutionId(workflowName, executionName); | ||
console.log("execution input:", input); | ||
|
||
await this.props.dynamo.send( | ||
new PutItemCommand({ | ||
TableName: this.props.tableName, | ||
Item: { | ||
pk: { S: ExecutionRecord.PRIMARY_KEY }, | ||
sk: { S: ExecutionRecord.sortKey(executionId) }, | ||
id: { S: executionId }, | ||
name: { S: executionName }, | ||
workflowName: { S: workflowName }, | ||
status: { S: ExecutionStatus.IN_PROGRESS }, | ||
startTime: { S: new Date().toISOString() }, | ||
...(parentExecutionId | ||
? { | ||
parentExecutionId: { S: parentExecutionId }, | ||
seq: { N: seq!.toString(10) }, | ||
} | ||
: {}), | ||
}, | ||
}) | ||
); | ||
|
||
const workflowStartedEvent = createEvent<WorkflowStarted>({ | ||
type: WorkflowEventType.WorkflowStarted, | ||
input, | ||
workflowName, | ||
// generate the time for the workflow to timeout based on when it was started. | ||
// the timer will be started by the orchestrator so the client does not need to have access to the timer client. | ||
timeoutTime: timeoutSeconds | ||
? new Date(new Date().getTime() + timeoutSeconds * 1000).toISOString() | ||
: undefined, | ||
context: { | ||
name: executionName, | ||
parentId: parentExecutionId, | ||
const tracer = trace.getTracer(executionId, "0.0.0"); | ||
await tracer.startActiveSpan( | ||
"startWorkflow", | ||
{ | ||
attributes: { workflowName, input }, | ||
kind: SpanKind.PRODUCER, | ||
Comment on lines
+54
to
+59
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Does it make sense to trace in the client or should we trace in the orchestrator (aka: those who call the client). Not all of the callers of the client will have tracing on. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yeah orchestrator probably makes more sense. |
||
}, | ||
}); | ||
|
||
await this.submitWorkflowTask(executionId, workflowStartedEvent); | ||
|
||
async () => { | ||
console.log("execution input:", input); | ||
|
||
await this.props.dynamo.send( | ||
new PutItemCommand({ | ||
TableName: this.props.tableName, | ||
Item: { | ||
pk: { S: ExecutionRecord.PRIMARY_KEY }, | ||
sk: { S: ExecutionRecord.sortKey(executionId) }, | ||
id: { S: executionId }, | ||
name: { S: executionName }, | ||
workflowName: { S: workflowName }, | ||
status: { S: ExecutionStatus.IN_PROGRESS }, | ||
startTime: { S: new Date().toISOString() }, | ||
...(parentExecutionId | ||
? { | ||
parentExecutionId: { S: parentExecutionId }, | ||
seq: { N: seq!.toString(10) }, | ||
} | ||
: {}), | ||
}, | ||
}) | ||
); | ||
|
||
const workflowStartedEvent = createEvent<WorkflowStarted>({ | ||
type: WorkflowEventType.WorkflowStarted, | ||
input, | ||
workflowName, | ||
// generate the time for the workflow to timeout based on when it was started. | ||
// the timer will be started by the orchestrator so the client does not need to have access to the timer client. | ||
timeoutTime: timeoutSeconds | ||
? new Date( | ||
new Date().getTime() + timeoutSeconds * 1000 | ||
).toISOString() | ||
: undefined, | ||
context: { | ||
name: executionName, | ||
parentId: parentExecutionId, | ||
}, | ||
}); | ||
|
||
await this.submitWorkflowTask(executionId, workflowStartedEvent); | ||
} | ||
); | ||
return executionId; | ||
} | ||
|
||
public async submitWorkflowTask( | ||
executionId: string, | ||
...events: HistoryStateEvent[] | ||
) { | ||
// send workflow task to workflow queue | ||
const workflowTask: SQSWorkflowTaskMessage = { | ||
task: { | ||
executionId, | ||
events, | ||
}, | ||
}; | ||
|
||
await this.props.sqs.send( | ||
new SendMessageCommand({ | ||
MessageBody: JSON.stringify(workflowTask), | ||
QueueUrl: this.props.workflowQueueUrl, | ||
MessageGroupId: executionId, | ||
}) | ||
); | ||
const tracer = trace.getTracer(executionId, "0.0.0"); | ||
await tracer.startActiveSpan("submitWorkflowTask", async () => { | ||
// send workflow task to workflow queue | ||
const workflowTask: SQSWorkflowTaskMessage = { | ||
task: { | ||
executionId, | ||
events, | ||
}, | ||
}; | ||
|
||
await this.props.sqs.send( | ||
new SendMessageCommand({ | ||
MessageBody: JSON.stringify(workflowTask), | ||
QueueUrl: this.props.workflowQueueUrl, | ||
MessageGroupId: executionId, | ||
}) | ||
); | ||
}); | ||
} | ||
|
||
async getExecutions(): Promise<Execution[]> { | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Not sure the scheduler forwarded needs this? What is it logging? If it does need it, then the
scheuler.handler
also needs it.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Sure can take it off.