From 729e0152bcedeb60d10e5d49cc6f275e5a239d4f Mon Sep 17 00:00:00 2001 From: Anu Thomas Chandy Date: Tue, 17 Dec 2024 07:40:51 -0800 Subject: [PATCH] Enabling tracing for GenAI inference APIs (#43284) * Enabling tracing for GenAI inference APIs. --- .vscode/cspell.json | 1 + sdk/ai/azure-ai-inference/assets.json | 2 +- .../checkstyle-suppressions.xml | 5 + sdk/ai/azure-ai-inference/pom.xml | 47 ++ .../inference/ChatCompletionClientTracer.java | 624 ++++++++++++++++++ .../inference/ChatCompletionsAsyncClient.java | 23 +- .../ai/inference/ChatCompletionsClient.java | 23 +- .../ChatCompletionsClientBuilder.java | 24 +- .../TraceChatCompletionsToolCallSample.java | 314 +++++++++ ...treamingChatCompletionsToolCallSample.java | 315 +++++++++ .../ChatCompletionClientTracerTest.java | 416 ++++++++++++ .../ChatCompletionsSyncClientTest.java | 115 +++- 12 files changed, 1890 insertions(+), 19 deletions(-) create mode 100644 sdk/ai/azure-ai-inference/src/main/java/com/azure/ai/inference/ChatCompletionClientTracer.java create mode 100644 sdk/ai/azure-ai-inference/src/samples/java/com/azure/ai/inference/usage/TraceChatCompletionsToolCallSample.java create mode 100644 sdk/ai/azure-ai-inference/src/samples/java/com/azure/ai/inference/usage/TraceStreamingChatCompletionsToolCallSample.java create mode 100644 sdk/ai/azure-ai-inference/src/test/java/com/azure/ai/inference/ChatCompletionClientTracerTest.java diff --git a/.vscode/cspell.json b/.vscode/cspell.json index 3db28c78b3be4..85b3c51666611 100644 --- a/.vscode/cspell.json +++ b/.vscode/cspell.json @@ -839,6 +839,7 @@ { "filename": "/sdk/ai/**", "words": [ + "OTEL", "ubinary", "UBINARY" ] diff --git a/sdk/ai/azure-ai-inference/assets.json b/sdk/ai/azure-ai-inference/assets.json index 47460db31c1af..5aa919d38ad8b 100644 --- a/sdk/ai/azure-ai-inference/assets.json +++ b/sdk/ai/azure-ai-inference/assets.json @@ -2,5 +2,5 @@ "AssetsRepo" : "Azure/azure-sdk-assets", "AssetsRepoPrefixPath" : "java", "TagPrefix" : "java/ai/azure-ai-inference", - "Tag" : "java/ai/azure-ai-inference_5c740d7f95" + "Tag" : "java/ai/azure-ai-inference_47913ffcfa" } \ No newline at end of file diff --git a/sdk/ai/azure-ai-inference/checkstyle-suppressions.xml b/sdk/ai/azure-ai-inference/checkstyle-suppressions.xml index ace6d425bacc4..a2b6fac02675d 100644 --- a/sdk/ai/azure-ai-inference/checkstyle-suppressions.xml +++ b/sdk/ai/azure-ai-inference/checkstyle-suppressions.xml @@ -11,4 +11,9 @@ + + + + + diff --git a/sdk/ai/azure-ai-inference/pom.xml b/sdk/ai/azure-ai-inference/pom.xml index 0d1d948bafa04..0d0f9bcae14ea 100644 --- a/sdk/ai/azure-ai-inference/pom.xml +++ b/sdk/ai/azure-ai-inference/pom.xml @@ -49,6 +49,15 @@ checkstyle-suppressions.xml false spotbugs-exclude.xml + + --add-opens com.azure.ai.inference/com.azure.ai.inference=ALL-UNNAMED + + --add-exports com.azure.core/com.azure.core.implementation.util=ALL-UNNAMED + --add-opens com.azure.core/com.azure.core.implementation.util=ALL-UNNAMED + --add-reads com.azure.ai.inference=com.azure.http.netty + + --add-reads com.azure.ai.inference=com.azure.core.tracing.opentelemetry + @@ -85,5 +94,43 @@ 1.0.0 test + + com.azure + azure-core-tracing-opentelemetry + 1.0.0-beta.52 + test + + + io.opentelemetry + opentelemetry-sdk-extension-autoconfigure + 1.43.0 + test + + + io.opentelemetry + opentelemetry-exporter-otlp + 1.43.0 + test + + + + + + org.apache.maven.plugins + maven-enforcer-plugin + 3.5.0 + + + + + + io.opentelemetry:opentelemetry-exporter-otlp:[1.43.0] + + + + + + + diff --git a/sdk/ai/azure-ai-inference/src/main/java/com/azure/ai/inference/ChatCompletionClientTracer.java b/sdk/ai/azure-ai-inference/src/main/java/com/azure/ai/inference/ChatCompletionClientTracer.java new file mode 100644 index 0000000000000..09e7a7a28b08d --- /dev/null +++ b/sdk/ai/azure-ai-inference/src/main/java/com/azure/ai/inference/ChatCompletionClientTracer.java @@ -0,0 +1,624 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +package com.azure.ai.inference; + +import com.azure.ai.inference.models.ChatChoice; +import com.azure.ai.inference.models.ChatCompletions; +import com.azure.ai.inference.models.ChatCompletionsOptions; +import com.azure.ai.inference.models.ChatCompletionsToolCall; +import com.azure.ai.inference.models.ChatRequestMessage; +import com.azure.ai.inference.models.ChatRole; +import com.azure.ai.inference.models.CompletionsFinishReason; +import com.azure.ai.inference.models.CompletionsUsage; +import com.azure.ai.inference.models.StreamingChatChoiceUpdate; +import com.azure.ai.inference.models.StreamingChatCompletionsUpdate; +import com.azure.ai.inference.models.StreamingChatResponseMessageUpdate; +import com.azure.ai.inference.models.StreamingChatResponseToolCallUpdate; +import com.azure.core.http.rest.RequestOptions; +import com.azure.core.util.BinaryData; +import com.azure.core.util.Configuration; +import com.azure.core.util.ConfigurationProperty; +import com.azure.core.util.ConfigurationPropertyBuilder; +import com.azure.core.util.Context; +import com.azure.core.util.CoreUtils; +import com.azure.core.util.logging.ClientLogger; +import com.azure.core.util.tracing.SpanKind; +import com.azure.core.util.tracing.StartSpanOptions; +import com.azure.core.util.tracing.Tracer; +import com.azure.json.JsonProviders; +import com.azure.json.JsonWriter; +import reactor.core.publisher.Flux; +import reactor.core.publisher.Mono; + +import java.io.ByteArrayOutputStream; +import java.io.IOException; +import java.net.MalformedURLException; +import java.net.URI; +import java.net.URISyntaxException; +import java.net.URL; +import java.nio.charset.StandardCharsets; +import java.util.ArrayDeque; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.StringJoiner; +import java.util.function.BiFunction; +import java.util.function.Function; +import java.util.stream.Collectors; + +/** + * Tracing for the convenience methods in {@link com.azure.ai.inference.ChatCompletionsClient} and + * {@link com.azure.ai.inference.ChatCompletionsAsyncClient}. + *

+ * For more about the OTel semantic conventions this type enables, see + * Azure AI Inference semantic conventions. + *

+ */ +final class ChatCompletionClientTracer { + public static final String OTEL_SCHEMA_URL = "https://opentelemetry.io/schemas/1.29.0"; + + private static final ClientLogger LOGGER = new ClientLogger(ChatCompletionClientTracer.class); + private static final String INFERENCE_GEN_AI_SYSTEM_NAME = "az.ai.inference"; + private static final String FINISH_REASON_ERROR = "{\"finish_reason\": \"error\"}"; + private static final String FINISH_REASON_CANCELLED = "{\"finish_reason\": \"cancelled\"}"; + private static final StartSpanOptions START_SPAN_OPTIONS = new StartSpanOptions(SpanKind.CLIENT); + + private static final ConfigurationProperty CAPTURE_MESSAGE_CONTENT + = ConfigurationPropertyBuilder.ofBoolean("azure.tracing.gen_ai.content_recording_enabled") + .environmentVariableName("AZURE_TRACING_GEN_AI_CONTENT_RECORDING_ENABLED") + .systemPropertyName("azure.tracing.gen_ai.content_recording_enabled") + .shared(true) + .defaultValue(false) + .build(); + private static final Configuration GLOBAL_CONFIG = Configuration.getGlobalConfiguration(); + + private final String host; + private final int port; + private final boolean captureContent; + private final Tracer tracer; + + // + /** + * Reference to the operation performing the actual completion call. + */ + @FunctionalInterface + public interface SyncCompleteOperation { + /** + * invokes the operation. + * + * @param completeRequest The completeRequest parameter for the {@code operation}. + * @param requestOptions The requestOptions parameter for the {@code operation}. + * @return chat completions for the provided chat messages. + */ + ChatCompletions invoke(BinaryData completeRequest, RequestOptions requestOptions); + } + + /** + * Reference to the async operation performing the actual completion call. + */ + @FunctionalInterface + public interface CompleteOperation { + /** + * invokes the operation. + * + * @param completeRequest The completeRequest parameter for the {@code operation}. + * @param requestOptions The requestOptions parameter for the {@code operation}. + * @return chat completions for the provided chat messages. + */ + Mono invoke(BinaryData completeRequest, RequestOptions requestOptions); + } + + /** + * Reference to the async operation performing the actual completion streaming call. + */ + @FunctionalInterface + public interface StreamingCompleteOperation { + /** + * invokes the operation. + * + * @param completeRequest The completeRequest parameter for the {@code operation}. + * @param requestOptions The requestOptions parameter for the {@code operation}. + * @return chat completions streaming for the provided chat messages. + */ + Flux invoke(BinaryData completeRequest, RequestOptions requestOptions); + } + // + + /** + * Creates ChatCompletionClientTracer. + * + * @param endpoint the service endpoint. + * @param configuration the {@link Configuration} instance to check if message content needs to be captured, + * if {@code null} is passed then {@link Configuration#getGlobalConfiguration()} will be used. + * @param tracer the Tracer instance. + */ + ChatCompletionClientTracer(String endpoint, Configuration configuration, Tracer tracer) { + final URL url = parse(endpoint); + if (url != null) { + this.host = url.getHost(); + this.port = url.getPort() == -1 ? url.getDefaultPort() : url.getPort(); + } else { + this.host = null; + this.port = -1; + } + this.captureContent = configuration == null + ? GLOBAL_CONFIG.get(CAPTURE_MESSAGE_CONTENT) + : configuration.get(CAPTURE_MESSAGE_CONTENT); + this.tracer = tracer; + } + + /** + * Traces the synchronous convenience API - {@link com.azure.ai.inference.ChatCompletionsClient#complete(ChatCompletionsOptions)}. + * + * @param request input options containing chat options for complete API. + * @param operation the operation performing the actual completion call. + * @param completeRequest The completeRequest parameter for the {@code operation}. + * @param requestOptions The requestOptions parameter for the {@code operation}. + * @return chat completions for the provided chat messages. + */ + @SuppressWarnings("try") + ChatCompletions traceSyncComplete(ChatCompletionsOptions request, SyncCompleteOperation operation, + BinaryData completeRequest, RequestOptions requestOptions) { + if (!tracer.isEnabled()) { + return operation.invoke(completeRequest, requestOptions); + } + final Context span = tracer.start(spanName(request), START_SPAN_OPTIONS, parentSpan(requestOptions)); + if (tracer.isRecording(span)) { + traceCompletionRequestAttributes(request, span); + traceCompletionRequestEvents(request.getMessages(), span); + } + + try (AutoCloseable ignored = tracer.makeSpanCurrent(span)) { + final ChatCompletions response = operation.invoke(completeRequest, requestOptions.setContext(span)); + if (tracer.isRecording(span)) { + traceCompletionResponseAttributes(response, span); + traceCompletionResponseEvents(response, span); + } + tracer.end(null, null, span); + return response; + } catch (Exception e) { + tracer.end(null, e, span); + sneakyThrows(e); + } + return null; + } + + /** + * Traces the convenience API - {@link com.azure.ai.inference.ChatCompletionsAsyncClient#complete(ChatCompletionsOptions)}. + * + * @param request input options containing chat options for complete API. + * @param operation the operation performing the actual completion call. + * @param completeRequest The completeRequest parameter for the {@code operation}. + * @param requestOptions The requestOptions parameter for the {@code operation}. + * @return chat completions for the provided chat messages. + */ + Mono traceComplete(ChatCompletionsOptions request, CompleteOperation operation, + BinaryData completeRequest, RequestOptions requestOptions) { + if (!tracer.isEnabled()) { + return operation.invoke(completeRequest, requestOptions); + } + + final Mono resourceSupplier = Mono.fromSupplier(() -> { + final Context span = tracer.start(spanName(request), START_SPAN_OPTIONS, parentSpan(requestOptions)); + if (tracer.isRecording(span)) { + traceCompletionRequestAttributes(request, span); + traceCompletionRequestEvents(request.getMessages(), span); + } + return span; + }); + + final Function> resourceClosure = span -> { + final RequestOptions rOptions = requestOptions.setContext(span); + + return operation.invoke(completeRequest, rOptions).map(response -> { + if (tracer.isRecording(span)) { + traceCompletionResponseAttributes(response, span); + traceCompletionResponseEvents(response, span); + } + return response; + }); + }; + + final Function> asyncComplete = (span) -> { + tracer.end(null, null, span); + return Mono.empty(); + }; + + final BiFunction> asyncError = (span, throwable) -> { + if (tracer.isRecording(span)) { + traceChoiceEvent(FINISH_REASON_ERROR, span); + } + tracer.end(null, throwable, span); + return Mono.empty(); + }; + + final Function> asyncCancel = span -> { + if (tracer.isRecording(span)) { + traceChoiceEvent(FINISH_REASON_CANCELLED, span); + } + tracer.end("cancelled", null, span); + return Mono.empty(); + }; + + return Mono.usingWhen(resourceSupplier, resourceClosure, asyncComplete, asyncError, asyncCancel); + } + + /** + * Traces the convenience APIs - {@link com.azure.ai.inference.ChatCompletionsClient#completeStream(ChatCompletionsOptions)} + * and {@link com.azure.ai.inference.ChatCompletionsAsyncClient#completeStream(ChatCompletionsOptions)}}. + * + * @param request input options containing chat options for complete streaming API. + * @param operation the operation performing the actual streaming completion call. + * @param completeRequest The completeRequest parameter for the {@code operation}. + * @param requestOptions The requestOptions parameter for the {@code operation}. + * @return chat completions streaming for the provided chat messages. + */ + Flux traceStreamingCompletion(ChatCompletionsOptions request, + StreamingCompleteOperation operation, BinaryData completeRequest, RequestOptions requestOptions) { + if (!tracer.isEnabled()) { + return operation.invoke(completeRequest, requestOptions); + } + final StreamingChatCompletionsState state + = new StreamingChatCompletionsState(captureContent, request, operation, completeRequest, requestOptions); + + final Mono resourceSupplier = Mono.fromSupplier(() -> { + final StreamingChatCompletionsState resource = state; + + final Context span + = tracer.start(spanName(resource.request), START_SPAN_OPTIONS, parentSpan(resource.requestOptions)); + if (tracer.isRecording(span)) { + traceCompletionRequestAttributes(resource.request, span); + traceCompletionRequestEvents(resource.request.getMessages(), span); + } + return resource.setSpan(span); + }); + + final Function> resourceClosure + = resource -> { + final Context span = resource.span; + + final RequestOptions rOptions = resource.requestOptions.setContext(span); + final Flux completionChunks + = resource.operation.invoke(resource.completeRequest, rOptions); + if (tracer.isRecording(span)) { + return completionChunks.doOnNext(resource::onNextChunk); + } else { + return completionChunks; + } + }; + + final Function> asyncComplete = resource -> { + final Context span = resource.span; + if (tracer.isRecording(span)) { + final StreamingChatCompletionsUpdate lastChunk = resource.lastChunk; + final String finishReasons = resource.getFinishReasons(); + traceCompletionResponseAttributes(lastChunk, finishReasons, span); + traceChoiceEvent(resource.toJson(), span); + } + tracer.end(null, null, span); + return Mono.empty(); + }; + + final BiFunction> asyncError = (resource, throwable) -> { + final Context span = resource.span; + + if (tracer.isRecording(span)) { + traceChoiceEvent(FINISH_REASON_ERROR, span); + } + tracer.end(null, throwable, span); + return Mono.empty(); + }; + + final Function> asyncCancel = resource -> { + final Context span = resource.span; + + if (tracer.isRecording(span)) { + traceChoiceEvent(FINISH_REASON_CANCELLED, span); + } + tracer.end("cancelled", null, span); + return Mono.empty(); + }; + + return Flux.usingWhen(resourceSupplier, resourceClosure, asyncComplete, asyncError, asyncCancel); + } + + // + private String spanName(ChatCompletionsOptions completeRequest) { + return CoreUtils.isNullOrEmpty(completeRequest.getModel()) ? "chat" : "chat " + completeRequest.getModel(); + } + + private void traceCompletionRequestAttributes(ChatCompletionsOptions request, Context span) { + final String modelId = request.getModel(); + tracer.setAttribute("gen_ai.operation.name", "chat", span); + tracer.setAttribute("gen_ai.system", INFERENCE_GEN_AI_SYSTEM_NAME, span); + tracer.setAttribute("gen_ai.request.model", CoreUtils.isNullOrEmpty(modelId) ? "chat" : modelId, span); + if (request.getFrequencyPenalty() != null) { + tracer.setAttribute("gen_ai.request.frequency_penalty", request.getFrequencyPenalty(), span); + } + if (request.getMaxTokens() != null) { + tracer.setAttribute("gen_ai.request.max_tokens", request.getMaxTokens(), span); + } + if (request.getPresencePenalty() != null) { + tracer.setAttribute("gen_ai.request.presence_penalty", request.getPresencePenalty(), span); + } + if (request.getStop() != null) { + final StringJoiner stopSequence = new StringJoiner(",", "[", "]"); + for (String stop : request.getStop()) { + stopSequence.add(stop); + } + tracer.setAttribute("gen_ai.request.stop_sequences", stopSequence.toString(), span); + } + if (request.getTemperature() != null) { + tracer.setAttribute("gen_ai.request.temperature", request.getTemperature(), span); + } + if (request.getTopP() != null) { + tracer.setAttribute("gen_ai.request.top_p", request.getTopP(), span); + } + if (host != null) { + tracer.setAttribute("server.address", host, span); + if (port != 443) { + tracer.setAttribute("server.port", port, span); + } + } + } + + private void traceCompletionRequestEvents(List messages, Context span) { + if (!captureContent || messages == null) { + return; + } + for (ChatRequestMessage message : messages) { + final ChatRole role = message.getRole(); + if (role != null) { + final String eventName = "gen_ai." + role.getValue() + ".message"; + final String eventContent = toJsonString(message); + if (eventContent != null) { + final Map eventAttributes = new HashMap<>(2); + eventAttributes.put("gen_ai.system", INFERENCE_GEN_AI_SYSTEM_NAME); + eventAttributes.put("gen_ai.event.content", eventContent); + tracer.addEvent(eventName, eventAttributes, null, span); + } + } + } + } + + private void traceCompletionResponseAttributes(ChatCompletions response, Context span) { + tracer.setAttribute("gen_ai.response.id", response.getId(), span); + tracer.setAttribute("gen_ai.response.model", response.getModel(), span); + final CompletionsUsage usage = response.getUsage(); + if (usage != null) { + tracer.setAttribute("gen_ai.usage.input_tokens", usage.getPromptTokens(), span); + tracer.setAttribute("gen_ai.usage.output_tokens", usage.getCompletionTokens(), span); + } + final List choices = response.getChoices(); + if (choices != null) { + tracer.setAttribute("gen_ai.response.finish_reasons", getFinishReasons(choices), span); + } + } + + private void traceCompletionResponseAttributes(StreamingChatCompletionsUpdate response, String finishReasons, + Context span) { + tracer.setAttribute("gen_ai.response.id", response.getId(), span); + tracer.setAttribute("gen_ai.response.model", response.getModel(), span); + final CompletionsUsage usage = response.getUsage(); + if (usage != null) { + tracer.setAttribute("gen_ai.usage.input_tokens", usage.getPromptTokens(), span); + tracer.setAttribute("gen_ai.usage.output_tokens", usage.getCompletionTokens(), span); + } + tracer.setAttribute("gen_ai.response.finish_reasons", finishReasons, span); + } + + private void traceCompletionResponseEvents(ChatCompletions response, Context span) { + final List choices = response.getChoices(); + if (choices != null) { + for (ChatChoice choice : choices) { + traceChoiceEvent(toJsonString(choice), span); + } + } + } + + private void traceChoiceEvent(String choiceContent, Context span) { + final Map eventAttributes = new HashMap<>(2); + eventAttributes.put("gen_ai.system", INFERENCE_GEN_AI_SYSTEM_NAME); + eventAttributes.put("gen_ai.event.content", choiceContent); + tracer.addEvent("gen_ai.choice", eventAttributes, null, span); + } + + private String toJsonString(ChatRequestMessage message) { + try { + return message.toJsonString(); + } catch (IOException e) { + LOGGER.verbose("'ChatRequestMessage' serialization error", e); + } + return null; + } + + private String toJsonString(ChatChoice choice) { + try (ByteArrayOutputStream stream = new ByteArrayOutputStream(); + JsonWriter writer = JsonProviders.createWriter(stream)) { + writer.writeStartObject(); + writer.writeStartObject("message"); + if (captureContent) { + writer.writeStringField("content", choice.getMessage().getContent()); + } + if (choice.getMessage() != null) { + final List toolCalls = choice.getMessage().getToolCalls(); + if (toolCalls != null && !toolCalls.isEmpty()) { + writer.writeArrayField("tool_calls", toolCalls, (w, toolCall) -> { + if (captureContent) { + toolCall.toJson(w); + } else { + w.writeStartObject(); + w.writeStringField("id", toolCall.getId()); + w.writeStringField("type", toolCall.getType()); + w.writeEndObject(); + } + }); + } + } + writer.writeEndObject(); + final CompletionsFinishReason finishReason = choice.getFinishReason(); + if (finishReason != null) { + writer.writeStringField("finish_reason", finishReason.getValue()); + } + writer.writeIntField("index", choice.getIndex()); + writer.writeEndObject(); + writer.flush(); + return new String(stream.toByteArray(), StandardCharsets.UTF_8); + } catch (IOException e) { + LOGGER.verbose("'ChatChoice' serialization error", e); + } + return null; + } + + private static String getFinishReasons(List choices) { + final StringJoiner finishReasons = new StringJoiner(",", "[", "]"); + for (ChatChoice choice : choices) { + final CompletionsFinishReason finishReason = choice.getFinishReason(); + if (finishReason != null) { + finishReasons.add(finishReason.getValue()); + } + } + return finishReasons.toString(); + } + + private static URL parse(String endpoint) { + if (CoreUtils.isNullOrEmpty(endpoint)) { + return null; + } + try { + final URI uri = new URI(endpoint); + return uri.toURL(); + } catch (MalformedURLException | URISyntaxException e) { + LOGGER.atWarning().log("service endpoint uri parse error.", e); + } + return null; + } + + @SuppressWarnings("unchecked") + private static void sneakyThrows(Throwable e) throws E { + throw (E) e; + } + + private static Context parentSpan(RequestOptions requestOptions) { + return requestOptions.getContext() == null ? Context.NONE : requestOptions.getContext(); + } + + private static final class StreamingChatCompletionsState { + private final boolean captureContent; + private final ChatCompletionsOptions request; + private final StreamingCompleteOperation operation; + private final BinaryData completeRequest; + private final RequestOptions requestOptions; + // mutable part of the state to accumulate partial data from Completion chunks. + private final StringBuilder content; + private final ArrayDeque toolCalls; // uses Dequeue to release slots once consumed. + private final ArrayDeque toolCallIds; + private final ArrayDeque finishReasons; + private Context span; + private StreamingChatCompletionsUpdate lastChunk; + private CompletionsFinishReason finishReason; + private int index; + + StreamingChatCompletionsState(boolean captureContent, ChatCompletionsOptions request, + StreamingCompleteOperation operation, BinaryData completeRequest, RequestOptions requestOptions) { + this.captureContent = captureContent; + this.request = request; + this.operation = operation; + this.completeRequest = completeRequest; + this.requestOptions = requestOptions; + this.content = new StringBuilder(); + this.toolCalls = new ArrayDeque<>(); + this.toolCallIds = new ArrayDeque<>(); + this.finishReasons = new ArrayDeque<>(); + } + + StreamingChatCompletionsState setSpan(Context context) { + this.span = context; + return this; + } + + void onNextChunk(StreamingChatCompletionsUpdate chunk) { + this.lastChunk = chunk; + final List choices = chunk.getChoices(); + if (choices == null || choices.isEmpty()) { + return; + } + for (StreamingChatChoiceUpdate choice : choices) { + this.finishReason = choice.getFinishReason(); + this.index = choice.getIndex(); + if (choice.getFinishReason() != null) { + this.finishReasons.add(choice.getFinishReason()); + } + final StreamingChatResponseMessageUpdate delta = choice.getDelta(); + if (delta == null) { + continue; + } + final List toolCalls = delta.getToolCalls(); + if (this.captureContent) { + if (delta.getContent() != null) { + this.content.append(delta.getContent()); + } + if (toolCalls != null) { + this.toolCalls.addAll(toolCalls); + } + } else { + if (toolCalls != null) { + final List ids = toolCalls.stream() + .map(StreamingChatResponseToolCallUpdate::getId) + .filter(s -> !CoreUtils.isNullOrEmpty(s)) + .collect(Collectors.toList()); + this.toolCallIds.addAll(ids); + } + } + } + } + + String toJson() { + try (ByteArrayOutputStream stream = new ByteArrayOutputStream(); + JsonWriter writer = JsonProviders.createWriter(stream)) { + writer.writeStartObject(); + writer.writeStartObject("message"); + if (this.captureContent) { + writer.writeStringField("content", this.content.toString()); + writer.writeStartArray("tool_calls"); + StreamingChatResponseToolCallUpdate toolCall; + while ((toolCall = this.toolCalls.poll()) != null) { + toolCall.toJson(writer); + } + writer.writeEndArray(); + } else { + writer.writeStartArray("tool_calls"); + String toolCallId; + while ((toolCallId = this.toolCallIds.poll()) != null) { + writer.writeStartObject(); + writer.writeStringField("id", toolCallId); + writer.writeEndObject(); + } + writer.writeEndArray(); + } + writer.writeEndObject(); + if (this.finishReason != null) { + writer.writeStringField("finish_reason", this.finishReason.getValue()); + } + writer.writeIntField("index", this.index); + writer.writeEndObject(); + writer.flush(); + return new String(stream.toByteArray(), StandardCharsets.UTF_8); + } catch (IOException e) { + LOGGER.verbose("'StreamingChatCompletionsState' serialization error", e); + } + return null; + } + + String getFinishReasons() { + final StringJoiner finishReasonsSj = new StringJoiner(",", "[", "]"); + CompletionsFinishReason reason; + while ((reason = finishReasons.poll()) != null) { + finishReasonsSj.add(reason.getValue()); + } + return finishReasonsSj.toString(); + } + } + // +} diff --git a/sdk/ai/azure-ai-inference/src/main/java/com/azure/ai/inference/ChatCompletionsAsyncClient.java b/sdk/ai/azure-ai-inference/src/main/java/com/azure/ai/inference/ChatCompletionsAsyncClient.java index 2471ed1cfcc4b..f7ae795ad0f4e 100644 --- a/sdk/ai/azure-ai-inference/src/main/java/com/azure/ai/inference/ChatCompletionsAsyncClient.java +++ b/sdk/ai/azure-ai-inference/src/main/java/com/azure/ai/inference/ChatCompletionsAsyncClient.java @@ -39,14 +39,16 @@ public final class ChatCompletionsAsyncClient { @Generated private final ChatCompletionsClientImpl serviceClient; + private final ChatCompletionClientTracer tracer; + /** * Initializes an instance of ChatCompletionsAsyncClient class. * * @param serviceClient the service client implementation. */ - @Generated - ChatCompletionsAsyncClient(ChatCompletionsClientImpl serviceClient) { + ChatCompletionsAsyncClient(ChatCompletionsClientImpl serviceClient, ChatCompletionClientTracer tracer) { this.serviceClient = serviceClient; + this.tracer = tracer; } /** @@ -162,7 +164,7 @@ private Mono> completeWithResponse(BinaryData completeReque * Returns information about the AI model. * The method makes a REST API call to the `/info` route on the given endpoint. *

Response Body Schema

- * + * *
      * {@code
      * {
@@ -206,7 +208,14 @@ Mono> getModelInfoWithResponse(RequestOptions requestOption
     public Flux completeStream(ChatCompletionsOptions options) {
         ChatCompletionsOptionsAccessHelper.setStream(options, true);
         RequestOptions requestOptions = new RequestOptions();
-        Flux responseStream = completeWithResponse(BinaryData.fromObject(options), requestOptions)
+        final ChatCompletionClientTracer.StreamingCompleteOperation operation
+            = (arg0, arg1) -> completionStreaming(arg0, arg1);
+        return tracer.traceStreamingCompletion(options, operation, BinaryData.fromObject(options), requestOptions);
+    }
+
+    private Flux completionStreaming(BinaryData completeRequest,
+        RequestOptions requestOptions) {
+        Flux responseStream = completeWithResponse(completeRequest, requestOptions)
             .flatMapMany(response -> response.getValue().toFluxByteBuffer());
         InferenceServerSentEvents chatCompletionsStream
             = new InferenceServerSentEvents<>(responseStream, StreamingChatCompletionsUpdate.class);
@@ -271,8 +280,10 @@ public Mono complete(ChatCompletionsOptions options) {
         if (extraParams != null) {
             requestOptions.setHeader(HttpHeaderName.fromString("extra-parameters"), extraParams.toString());
         }
-        return completeWithResponse(completeRequest, requestOptions).flatMap(FluxUtil::toMono)
-            .map(protocolMethodData -> protocolMethodData.toObject(ChatCompletions.class));
+        final ChatCompletionClientTracer.CompleteOperation operation
+            = (arg0, arg1) -> completeWithResponse(arg0, arg1).flatMap(FluxUtil::toMono)
+                .map(protocolMethodData -> protocolMethodData.toObject(ChatCompletions.class));
+        return tracer.traceComplete(options, operation, completeRequest, requestOptions);
     }
 
     /**
diff --git a/sdk/ai/azure-ai-inference/src/main/java/com/azure/ai/inference/ChatCompletionsClient.java b/sdk/ai/azure-ai-inference/src/main/java/com/azure/ai/inference/ChatCompletionsClient.java
index 7a1a7a1b9f79a..77671227e57c9 100644
--- a/sdk/ai/azure-ai-inference/src/main/java/com/azure/ai/inference/ChatCompletionsClient.java
+++ b/sdk/ai/azure-ai-inference/src/main/java/com/azure/ai/inference/ChatCompletionsClient.java
@@ -38,14 +38,16 @@ public final class ChatCompletionsClient {
     @Generated
     private final ChatCompletionsClientImpl serviceClient;
 
+    private final ChatCompletionClientTracer tracer;
+
     /**
      * Initializes an instance of ChatCompletionsClient class.
      *
      * @param serviceClient the service client implementation.
      */
-    @Generated
-    ChatCompletionsClient(ChatCompletionsClientImpl serviceClient) {
+    ChatCompletionsClient(ChatCompletionsClientImpl serviceClient, ChatCompletionClientTracer tracer) {
         this.serviceClient = serviceClient;
+        this.tracer = tracer;
     }
 
     /**
@@ -161,7 +163,7 @@ public Response completeWithResponse(BinaryData completeRequest, Req
      * Returns information about the AI model.
      * The method makes a REST API call to the `/info` route on the given endpoint.
      * 

Response Body Schema

- * + * *
      * {@code
      * {
@@ -224,7 +226,9 @@ public ChatCompletions complete(ChatCompletionsOptions options) {
         if (extraParams != null) {
             requestOptions.setHeader(HttpHeaderName.fromString("extra-parameters"), extraParams.toString());
         }
-        return completeWithResponse(completeRequest, requestOptions).getValue().toObject(ChatCompletions.class);
+        final ChatCompletionClientTracer.SyncCompleteOperation operation
+            = (arg0, arg1) -> completeWithResponse(arg0, arg1).getValue().toObject(ChatCompletions.class);
+        return tracer.traceSyncComplete(options, operation, completeRequest, requestOptions);
     }
 
     /**
@@ -284,11 +288,20 @@ public IterableStream completeStream(ChatComplet
         if (extraParams != null) {
             requestOptions.setHeader(HttpHeaderName.fromString("extra-parameters"), extraParams.toString());
         }
+        final ChatCompletionClientTracer.StreamingCompleteOperation operation
+            = (arg0, arg1) -> completionStreaming(arg0, arg1);
+        final Flux events
+            = tracer.traceStreamingCompletion(options, operation, completeRequest, requestOptions);
+        return new IterableStream<>(events);
+    }
+
+    private Flux completionStreaming(BinaryData completeRequest,
+        RequestOptions requestOptions) {
         Flux responseStream
             = completeWithResponse(completeRequest, requestOptions).getValue().toFluxByteBuffer();
         InferenceServerSentEvents chatCompletionsStream
             = new InferenceServerSentEvents<>(responseStream, StreamingChatCompletionsUpdate.class);
-        return new IterableStream<>(chatCompletionsStream.getEvents());
+        return chatCompletionsStream.getEvents();
     }
 
     /**
diff --git a/sdk/ai/azure-ai-inference/src/main/java/com/azure/ai/inference/ChatCompletionsClientBuilder.java b/sdk/ai/azure-ai-inference/src/main/java/com/azure/ai/inference/ChatCompletionsClientBuilder.java
index 00dc0b24b9381..2c5ce1d746ded 100644
--- a/sdk/ai/azure-ai-inference/src/main/java/com/azure/ai/inference/ChatCompletionsClientBuilder.java
+++ b/sdk/ai/azure-ai-inference/src/main/java/com/azure/ai/inference/ChatCompletionsClientBuilder.java
@@ -340,6 +340,7 @@ private HttpPipeline createHttpPipeline() {
         HttpPipeline httpPipeline = new HttpPipelineBuilder().policies(policies.toArray(new HttpPipelinePolicy[0]))
             .httpClient(httpClient)
             .clientOptions(localClientOptions)
+            .tracer(createTracer())
             .build();
         return httpPipeline;
     }
@@ -349,9 +350,8 @@ private HttpPipeline createHttpPipeline() {
      *
      * @return an instance of ChatCompletionsAsyncClient.
      */
-    @Generated
     public ChatCompletionsAsyncClient buildAsyncClient() {
-        return new ChatCompletionsAsyncClient(buildInnerClient());
+        return new ChatCompletionsAsyncClient(buildInnerClient(), createInferenceTracer());
     }
 
     /**
@@ -359,9 +359,25 @@ public ChatCompletionsAsyncClient buildAsyncClient() {
      *
      * @return an instance of ChatCompletionsClient.
      */
-    @Generated
     public ChatCompletionsClient buildClient() {
-        return new ChatCompletionsClient(buildInnerClient());
+        return new ChatCompletionsClient(buildInnerClient(), createInferenceTracer());
+    }
+
+    private com.azure.core.util.tracing.Tracer createTracer() {
+        final String clientName = PROPERTIES.getOrDefault(SDK_NAME, "UnknownName");
+        final String clientVersion = PROPERTIES.getOrDefault(SDK_VERSION, "UnknownVersion");
+        final com.azure.core.util.LibraryTelemetryOptions telemetryOptions
+            = new com.azure.core.util.LibraryTelemetryOptions(clientName).setLibraryVersion(clientVersion)
+                .setResourceProviderNamespace("Microsoft.CognitiveServices")
+                .setSchemaUrl(com.azure.ai.inference.ChatCompletionClientTracer.OTEL_SCHEMA_URL);
+        final com.azure.core.util.TracingOptions tracingOptions
+            = this.clientOptions == null ? null : this.clientOptions.getTracingOptions();
+        return com.azure.core.util.tracing.TracerProvider.getDefaultProvider()
+            .createTracer(telemetryOptions, tracingOptions);
+    }
+
+    private ChatCompletionClientTracer createInferenceTracer() {
+        return new ChatCompletionClientTracer(this.endpoint, this.configuration, createTracer());
     }
 
     private static final ClientLogger LOGGER = new ClientLogger(ChatCompletionsClientBuilder.class);
diff --git a/sdk/ai/azure-ai-inference/src/samples/java/com/azure/ai/inference/usage/TraceChatCompletionsToolCallSample.java b/sdk/ai/azure-ai-inference/src/samples/java/com/azure/ai/inference/usage/TraceChatCompletionsToolCallSample.java
new file mode 100644
index 0000000000000..cace3c7c8a72b
--- /dev/null
+++ b/sdk/ai/azure-ai-inference/src/samples/java/com/azure/ai/inference/usage/TraceChatCompletionsToolCallSample.java
@@ -0,0 +1,314 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License.
+
+package com.azure.ai.inference.usage;
+
+import com.azure.ai.inference.ChatCompletionsClient;
+import com.azure.ai.inference.ChatCompletionsClientBuilder;
+import com.azure.ai.inference.models.ChatChoice;
+import com.azure.ai.inference.models.ChatCompletions;
+import com.azure.ai.inference.models.ChatCompletionsOptions;
+import com.azure.ai.inference.models.ChatCompletionsToolCall;
+import com.azure.ai.inference.models.ChatCompletionsToolDefinition;
+import com.azure.ai.inference.models.ChatRequestAssistantMessage;
+import com.azure.ai.inference.models.ChatRequestMessage;
+import com.azure.ai.inference.models.ChatRequestSystemMessage;
+import com.azure.ai.inference.models.ChatRequestToolMessage;
+import com.azure.ai.inference.models.ChatRequestUserMessage;
+import com.azure.ai.inference.models.CompletionsFinishReason;
+import com.azure.ai.inference.models.FunctionCall;
+import com.azure.ai.inference.models.FunctionDefinition;
+import com.azure.core.credential.AzureKeyCredential;
+import com.azure.core.util.BinaryData;
+import com.azure.json.JsonProviders;
+import com.azure.json.JsonReader;
+import com.azure.json.JsonSerializable;
+import com.azure.json.JsonToken;
+import com.azure.json.JsonWriter;
+import io.opentelemetry.api.trace.Span;
+import io.opentelemetry.api.trace.StatusCode;
+import io.opentelemetry.api.trace.Tracer;
+import io.opentelemetry.sdk.OpenTelemetrySdk;
+import io.opentelemetry.sdk.autoconfigure.AutoConfiguredOpenTelemetrySdk;
+import io.opentelemetry.sdk.autoconfigure.AutoConfiguredOpenTelemetrySdkBuilder;
+
+import java.io.ByteArrayOutputStream;
+import java.io.IOException;
+import java.io.UncheckedIOException;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.Optional;
+import java.util.concurrent.TimeUnit;
+
+public class TraceChatCompletionsToolCallSample {
+    /**
+     * @param args Unused. Arguments to the program.
+     */
+    @SuppressWarnings("try")
+    public static void main(final String[] args) {
+        final OpenTelemetrySdk telemetry = configureOpenTelemetry();
+        final Tracer tracer = telemetry.getTracer(TraceChatCompletionsToolCallSample.class.getName());
+
+        final Span span = tracer.spanBuilder("main").startSpan();
+        try (AutoCloseable scope = span.makeCurrent()) {
+
+            final ChatCompletionsClient client = createChatCompletionClient();
+
+            final List messages = new ArrayList<>();
+            messages.add(new ChatRequestSystemMessage("You are a helpful assistant."));
+            messages.add(new ChatRequestUserMessage("What is the weather and temperature in Seattle?"));
+            final GetWeatherTemperatureFunctions functions = new GetWeatherTemperatureFunctions();
+
+            ChatCompletions response = client.complete(new ChatCompletionsOptions(messages).setTools(functions.toolDefinitions()));
+            ChatChoice choice = response.getChoice();
+
+            while (isToolCalls(choice)) {
+                final List toolCalls = assertNonEmpty(choice.getMessage().getToolCalls());
+                messages.add(toAssistantMessage(toolCalls));
+                for (final ChatCompletionsToolCall toolCall : toolCalls) {
+                    final ChatRequestToolMessage toolMessage = functions.invoke(toolCall);
+                    messages.add(toolMessage);
+                }
+                response = client.complete(new ChatCompletionsOptions(messages).setTools(functions.toolDefinitions()));
+                choice = response.getChoice();
+            }
+
+            System.out.println("Model response: " + modelResponseContent(response));
+        } catch (Exception e) {
+            span.setStatus(StatusCode.ERROR, e.getMessage());
+            throw new RuntimeException(e);
+        } finally {
+            span.end();
+        }
+    }
+
+    private static OpenTelemetrySdk configureOpenTelemetry() {
+        // With the below configuration, the runtime sends OpenTelemetry data to the local OTLP/gRPC endpoint.
+        //
+        // For debugging purposes, Aspire Dashboard can be run locally that listens for telemetry data and offer a UI
+        // for viewing the collected data. To run Aspire Dashboard, run the following docker command:
+        //
+        // docker run --rm -p 18888:18888 -p 4317:18889 -p 4318:18890 --name aspire-dashboard mcr.microsoft.com/dotnet/nightly/aspire-dashboard:latest
+        //
+        // The output of the docker command includes a link to the dashboard. For more information on Aspire Dashboard,
+        // see https://learn.microsoft.com/dotnet/aspire/fundamentals/dashboard/overview
+        //
+        // See https://learn.microsoft.com/azure/developer/java/sdk/tracing for more information on tracing with Azure SDK.
+        //
+        final AutoConfiguredOpenTelemetrySdkBuilder sdkBuilder = AutoConfiguredOpenTelemetrySdk.builder();
+        return sdkBuilder
+            .addPropertiesSupplier(() -> {
+                final Map properties = new HashMap<>();
+                properties.put("otel.service.name", "get-weather-temperature-sample");
+                // change to your endpoint address, "http://localhost:4317" is used by default
+                // properties.put("otel.exporter.otlp.endpoint", "http://localhost:4317");
+                return properties;
+            })
+            .setResultAsGlobal()
+            .build()
+            .getOpenTelemetrySdk();
+    }
+
+    private static ChatCompletionsClient createChatCompletionClient() {
+        return new ChatCompletionsClientBuilder()
+            .endpoint(System.getenv("MODEL_ENDPOINT"))
+            .credential(new AzureKeyCredential(System.getenv("AZURE_API_KEY")))
+            // uncomment to capture message content in the telemetry (or you can set AZURE_TRACING_GEN_AI_CONTENT_RECORDING_ENABLED environment variable to `true`)
+            // .configuration(new ConfigurationBuilder().putProperty("azure.tracing.gen_ai.content_recording_enabled", "true").build())
+            .buildClient();
+    }
+
+    private static boolean isToolCalls(ChatChoice choice) {
+        return choice.getFinishReason() == CompletionsFinishReason.TOOL_CALLS;
+    }
+
+    private static List assertNonEmpty(List toolCalls) {
+        if (toolCalls == null || toolCalls.isEmpty()) {
+            throw new RuntimeException("Service requested tool-calls, but without information about function(s) to invoke.");
+        }
+        return toolCalls;
+    }
+
+    private static ChatRequestAssistantMessage toAssistantMessage(List toolCalls) {
+        return new ChatRequestAssistantMessage("").setToolCalls(toolCalls);
+    }
+
+    private static String modelResponseContent(ChatCompletions response) {
+        return response.getChoices().get(0).getMessage().getContent();
+    }
+
+    /**
+     * represents function tool ('get_weather', 'get_temperature') definitions and react to model evaluation of function tools.
+     */
+    private static final class GetWeatherTemperatureFunctions {
+        private final WeatherFunc weatherFunc;
+        private final TemperatureFunc temperatureFunc;
+        private final List toolDefinitions = new ArrayList<>(2);
+
+        GetWeatherTemperatureFunctions() {
+            this.weatherFunc = new WeatherFunc();
+            this.temperatureFunc = new TemperatureFunc();
+            this.toolDefinitions.add(new ChatCompletionsToolDefinition(weatherFunc.getDefinition()));
+            this.toolDefinitions.add(new ChatCompletionsToolDefinition(temperatureFunc.getDefinition()));
+        }
+
+        List toolDefinitions() {
+            return this.toolDefinitions;
+        }
+
+        ChatRequestToolMessage invoke(ChatCompletionsToolCall toolCall) {
+            final Optional wResponse = weatherFunc.tryInvoke(toolCall);
+            if (wResponse.isPresent()) {
+                return wResponse.get();
+            }
+            final Optional rwResponse = temperatureFunc.tryInvoke(toolCall);
+            if (rwResponse.isPresent()) {
+                return rwResponse.get();
+            }
+            throw new RuntimeException("Service requested tool-call has no matching function information.");
+        }
+
+        private static final class WeatherFunc {
+            private FunctionDefinition getDefinition() {
+                return new FunctionDefinition("get_weather")
+                    .setDescription("Returns description of the weather in the specified city")
+                    .setParameters(BinaryData.fromBytes(parameters()));
+            }
+
+            @SuppressWarnings("try")
+            private Optional tryInvoke(ChatCompletionsToolCall toolCall) {
+                final FunctionCall function = toolCall.getFunction();
+                final String functionName = function.getName();
+                if (functionName.equalsIgnoreCase("get_weather")) {
+                    final FunctionArguments functionArguments = BinaryData.fromString(function.getArguments()).toObject(FunctionArguments.class);
+                    final String functionResponse;
+                    if ("Seattle".equalsIgnoreCase(functionArguments.getCity())) {
+                        functionResponse = "Nice weather";
+                    } else if ("New York City".equalsIgnoreCase(functionArguments.getCity())) {
+                        functionResponse = "Good weather";
+                    } else {
+                        functionResponse = "Unavailable";
+                    }
+                    return Optional.of(new ChatRequestToolMessage(functionResponse, toolCall.getId()));
+                }
+                return Optional.empty();
+            }
+
+            private static byte[] parameters() {
+                try (ByteArrayOutputStream byteArrayOutputStream = new ByteArrayOutputStream();
+                     JsonWriter jsonWriter = JsonProviders.createWriter(byteArrayOutputStream)) {
+                    jsonWriter.writeStartObject();
+                    jsonWriter.writeStringField("type", "object");
+                    jsonWriter.writeStartObject("properties");
+                    jsonWriter.writeStartObject("city");
+                    jsonWriter.writeStringField("type", "string");
+                    jsonWriter.writeStringField("description", "The name of the city for which weather info is requested");
+                    jsonWriter.writeEndObject();
+                    jsonWriter.writeEndObject();
+                    jsonWriter.writeStartArray("required");
+                    jsonWriter.writeString("city");
+                    jsonWriter.writeEndArray();
+                    jsonWriter.writeEndObject();
+                    jsonWriter.flush();
+                    return byteArrayOutputStream.toByteArray();
+                } catch (IOException ioe) {
+                    throw new UncheckedIOException(ioe);
+                }
+            }
+
+            private static void sleep() {
+                try {
+                    TimeUnit.SECONDS.sleep(1);
+                } catch (InterruptedException e) {
+                    // ignored
+                }
+            }
+        }
+
+        private static final class TemperatureFunc {
+            private FunctionDefinition getDefinition() {
+                return new FunctionDefinition("get_temperature")
+                    .setDescription("Returns the current temperature for the specified city")
+                    .setParameters(BinaryData.fromBytes(parameters()));
+            }
+
+            @SuppressWarnings("try")
+            private Optional tryInvoke(ChatCompletionsToolCall toolCall) {
+                final FunctionCall function = toolCall.getFunction();
+                final String functionName = function.getName();
+                if (functionName.equalsIgnoreCase("get_temperature")) {
+                    final FunctionArguments functionArguments = BinaryData.fromString(function.getArguments()).toObject(FunctionArguments.class);
+                    final String functionResponse;
+                    if ("Seattle".equalsIgnoreCase(functionArguments.getCity())) {
+                        functionResponse = "75";
+                    } else if ("New York City".equalsIgnoreCase(functionArguments.getCity())) {
+                        functionResponse = "80";
+                    } else {
+                        functionResponse = "Unavailable";
+                    }
+                    return Optional.of(new ChatRequestToolMessage(functionResponse, toolCall.getId()));
+                }
+                return Optional.empty();
+            }
+
+            private static byte[] parameters() {
+                try (ByteArrayOutputStream byteArrayOutputStream = new ByteArrayOutputStream();
+                     JsonWriter jsonWriter = JsonProviders.createWriter(byteArrayOutputStream)) {
+                    jsonWriter.writeStartObject();
+                    jsonWriter.writeStringField("type", "object");
+                    jsonWriter.writeStartObject("properties");
+                    jsonWriter.writeStartObject("city");
+                    jsonWriter.writeStringField("type", "string");
+                    jsonWriter.writeStringField("description", "The name of the city for which temperature info is requested");
+                    jsonWriter.writeEndObject();
+                    jsonWriter.writeEndObject();
+                    jsonWriter.writeStartArray("required");
+                    jsonWriter.writeString("city");
+                    jsonWriter.writeEndArray();
+                    jsonWriter.writeEndObject();
+                    jsonWriter.flush();
+                    return byteArrayOutputStream.toByteArray();
+                } catch (IOException ioe) {
+                    throw new UncheckedIOException(ioe);
+                }
+            }
+        }
+
+        private static final class FunctionArguments implements JsonSerializable {
+            private final String city;
+
+            private FunctionArguments(String city) {
+                this.city = city;
+            }
+
+            public String getCity() {
+                return this.city;
+            }
+
+            @Override
+            public JsonWriter toJson(JsonWriter jsonWriter) throws IOException {
+                jsonWriter.writeStartObject();
+                jsonWriter.writeStringField("city", this.city);
+                return jsonWriter.writeEndObject();
+            }
+
+            public static FunctionArguments fromJson(JsonReader jsonReader) throws IOException {
+                return jsonReader.readObject(reader -> {
+                    String city = null;
+                    while (reader.nextToken() != JsonToken.END_OBJECT) {
+                        String fieldName = reader.getFieldName();
+                        reader.nextToken();
+                        if ("city".equals(fieldName)) {
+                            city = reader.getString();
+                        } else {
+                            reader.skipChildren();
+                        }
+                    }
+                    return new FunctionArguments(city);
+                });
+            }
+        }
+    }
+}
diff --git a/sdk/ai/azure-ai-inference/src/samples/java/com/azure/ai/inference/usage/TraceStreamingChatCompletionsToolCallSample.java b/sdk/ai/azure-ai-inference/src/samples/java/com/azure/ai/inference/usage/TraceStreamingChatCompletionsToolCallSample.java
new file mode 100644
index 0000000000000..367e696a463e7
--- /dev/null
+++ b/sdk/ai/azure-ai-inference/src/samples/java/com/azure/ai/inference/usage/TraceStreamingChatCompletionsToolCallSample.java
@@ -0,0 +1,315 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License.
+
+package com.azure.ai.inference.usage;
+
+import com.azure.ai.inference.ChatCompletionsClient;
+import com.azure.ai.inference.ChatCompletionsClientBuilder;
+import com.azure.ai.inference.models.ChatCompletionsOptions;
+import com.azure.ai.inference.models.ChatCompletionsToolCall;
+import com.azure.ai.inference.models.ChatCompletionsToolDefinition;
+import com.azure.ai.inference.models.ChatRequestAssistantMessage;
+import com.azure.ai.inference.models.ChatRequestMessage;
+import com.azure.ai.inference.models.ChatRequestSystemMessage;
+import com.azure.ai.inference.models.ChatRequestToolMessage;
+import com.azure.ai.inference.models.ChatRequestUserMessage;
+import com.azure.ai.inference.models.FunctionCall;
+import com.azure.ai.inference.models.FunctionDefinition;
+import com.azure.ai.inference.models.StreamingChatChoiceUpdate;
+import com.azure.ai.inference.models.StreamingChatCompletionsUpdate;
+import com.azure.ai.inference.models.StreamingChatResponseToolCallUpdate;
+import com.azure.core.credential.AzureKeyCredential;
+import com.azure.core.util.BinaryData;
+import com.azure.core.util.CoreUtils;
+import com.azure.core.util.IterableStream;
+import com.azure.json.JsonProviders;
+import com.azure.json.JsonReader;
+import com.azure.json.JsonSerializable;
+import com.azure.json.JsonToken;
+import com.azure.json.JsonWriter;
+import io.opentelemetry.api.trace.Span;
+import io.opentelemetry.api.trace.StatusCode;
+import io.opentelemetry.api.trace.Tracer;
+import io.opentelemetry.sdk.OpenTelemetrySdk;
+import io.opentelemetry.sdk.autoconfigure.AutoConfiguredOpenTelemetrySdk;
+import io.opentelemetry.sdk.autoconfigure.AutoConfiguredOpenTelemetrySdkBuilder;
+
+import java.io.ByteArrayOutputStream;
+import java.io.IOException;
+import java.io.UncheckedIOException;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.Optional;
+import java.util.concurrent.TimeUnit;
+
+public class TraceStreamingChatCompletionsToolCallSample {
+    /**
+     * @param args Unused. Arguments to the program.
+     */
+    @SuppressWarnings("try")
+    public static void main(final String[] args) {
+        final OpenTelemetrySdk telemetry = configureOpenTelemetry();
+        final Tracer tracer = telemetry.getTracer(TraceStreamingChatCompletionsToolCallSample.class.getName());
+
+        final Span span = tracer.spanBuilder("main").startSpan();
+        try (AutoCloseable scope = span.makeCurrent()) {
+
+            final ChatCompletionsClient client = createChatCompletionClient();
+
+            final List messages = new ArrayList<>();
+            messages.add(new ChatRequestSystemMessage("You an assistant that helps users find flight information."));
+            messages.add(new ChatRequestUserMessage("What is the next flights from Seattle to Miami?"));
+            final GetFlightInfoFunction function = new GetFlightInfoFunction();
+
+            final IterableStream toolCallChunks = client.completeStream(new ChatCompletionsOptions(messages).setTools(function.toolDefinitions()));
+            final ChunksMerged toolCallChunksMerged = ChunksMerged.create(toolCallChunks);
+            final ChatCompletionsToolCall toolCall = toolCallChunksMerged.asTooCall();
+            messages.add(toAssistantMessage(toolCall));
+
+            final ChatRequestToolMessage toolMessage = function.invoke(toolCall);
+            messages.add(toolMessage);
+
+            final IterableStream modelResponseChunks = client.completeStream(new ChatCompletionsOptions(messages).setTools(function.toolDefinitions()));
+            final ChunksMerged modelResponseChunksMerged = ChunksMerged.create(modelResponseChunks);
+            System.out.println("Model response: " + modelResponseChunksMerged.content);
+        } catch (Exception e) {
+            span.setStatus(StatusCode.ERROR, e.getMessage());
+            throw new RuntimeException(e);
+        } finally {
+            span.end();
+        }
+    }
+
+    private static OpenTelemetrySdk configureOpenTelemetry() {
+        // With the below configuration, the runtime sends OpenTelemetry data to the local OTLP/gRPC endpoint.
+        //
+        // For debugging purposes, Aspire Dashboard can be run locally that listens for telemetry data and offer a UI
+        // for viewing the collected data. To run Aspire Dashboard, run the following docker command:
+        //
+        // docker run --rm -p 18888:18888 -p 4317:18889 -p 4318:18890 --name aspire-dashboard mcr.microsoft.com/dotnet/nightly/aspire-dashboard:latest
+        //
+        // The output of the docker command includes a link to the dashboard. For more information on Aspire Dashboard,
+        // see https://learn.microsoft.com/dotnet/aspire/fundamentals/dashboard/overview
+        //
+        // See https://learn.microsoft.com/azure/developer/java/sdk/tracing for more information on tracing with Azure SDK.
+        //
+        final AutoConfiguredOpenTelemetrySdkBuilder sdkBuilder = AutoConfiguredOpenTelemetrySdk.builder();
+        return sdkBuilder
+            .addPropertiesSupplier(() -> {
+                final Map properties = new HashMap<>();
+                properties.put("otel.service.name", "get-flight-info-sample");
+                // change to your endpoint address, "http://localhost:4317" is used by default
+                // properties.put("otel.exporter.otlp.endpoint", "http://localhost:4317");
+                return properties;
+            })
+            .setResultAsGlobal()
+            .build()
+            .getOpenTelemetrySdk();
+    }
+
+    private static ChatCompletionsClient createChatCompletionClient() {
+        return new ChatCompletionsClientBuilder()
+            .endpoint(System.getenv("MODEL_ENDPOINT"))
+            .credential(new AzureKeyCredential(System.getenv("AZURE_API_KEY")))
+            .buildClient();
+    }
+
+    private static ChatRequestAssistantMessage toAssistantMessage(ChatCompletionsToolCall toolCall) {
+        final List toolCalls = new ArrayList<>(1);
+        toolCalls.add(toolCall);
+        return new ChatRequestAssistantMessage("").setToolCalls(toolCalls);
+    }
+
+    private static final class ChunksMerged {
+        private final String toolCallId;
+        private final String functionName;
+        private final String functionArguments;
+        private final String content;
+
+        static ChunksMerged create(IterableStream chunks) {
+            String toolCallId = null;
+            String functionName = null;
+            StringBuilder functionArguments = new StringBuilder();
+            StringBuilder content = new StringBuilder();
+
+            for (StreamingChatCompletionsUpdate chunk : chunks) {
+                if (chunk.getChoices() == null || chunk.getChoices().isEmpty()) {
+                    continue;
+                }
+                final StreamingChatChoiceUpdate choice = chunk.getChoices().get(0);
+                if (choice != null && choice.getDelta() != null) {
+                    final String contentChunk = choice.getDelta().getContent();
+                    if (contentChunk != null) {
+                        // function response content may be streamed across multiple chunks.
+                        content.append(contentChunk);
+                    }
+                    if (choice.getDelta().getToolCalls() != null) {
+                        final List toolCalls = choice.getDelta().getToolCalls();
+                        if (!toolCalls.isEmpty()) {
+                            final StreamingChatResponseToolCallUpdate toolCall = toolCalls.get(0);
+                            if (!CoreUtils.isNullOrEmpty(toolCall.getId())) {
+                                toolCallId = toolCall.getId();
+                            }
+                            final FunctionCall functionCall = toolCall.getFunction();
+                            if (functionCall != null) {
+                                if (!CoreUtils.isNullOrEmpty(functionCall.getName())) {
+                                    functionName = functionCall.getName();
+                                }
+                                if (functionCall.getArguments() != null) {
+                                    // function arguments may be streamed across multiple chunks.
+                                    functionArguments.append(functionCall.getArguments());
+                                }
+                            }
+                        }
+                    }
+                }
+            }
+            return new ChunksMerged(toolCallId, functionName, functionArguments.toString(), content.toString());
+        }
+
+        ChatCompletionsToolCall asTooCall() {
+            return new ChatCompletionsToolCall(toolCallId, new FunctionCall(functionName, functionArguments));
+        }
+
+        private ChunksMerged(String toolCallId, String functionName, String functionArguments, String content) {
+            this.toolCallId = toolCallId;
+            this.functionName = functionName;
+            this.functionArguments = functionArguments;
+            this.content = content;
+        }
+    }
+
+    /**
+     * represents function tool ('get_flight_info') definition and react to model evaluation of function tool.
+     */
+    private static class GetFlightInfoFunction {
+        private final FlightInfoFunc flightInfoFunc;
+        private final List toolDefinitions = new ArrayList<>(1);
+
+        GetFlightInfoFunction() {
+            this.flightInfoFunc = new FlightInfoFunc();
+            this.toolDefinitions.add(new ChatCompletionsToolDefinition(flightInfoFunc.getDefinition()));
+        }
+
+        List toolDefinitions() {
+            return this.toolDefinitions;
+        }
+
+        ChatRequestToolMessage invoke(ChatCompletionsToolCall toolCall) {
+            final Optional fResponse = flightInfoFunc.tryInvoke(toolCall);
+            if (fResponse.isPresent()) {
+                return fResponse.get();
+            }
+            throw new RuntimeException("Service requested tool-call has no matching function information.");
+        }
+
+        private static final class FlightInfoFunc {
+            private FunctionDefinition getDefinition() {
+                return new FunctionDefinition("get_flight_info")
+                    .setDescription("Returns information about the next flight between two cities. This includes the name of the airline, flight number and the date and time of the next flight, in JSON format.")
+                    .setParameters(BinaryData.fromBytes(parameters()));
+            }
+
+            @SuppressWarnings("try")
+            private Optional tryInvoke(ChatCompletionsToolCall toolCall) {
+                final String toolCallId = toolCall.getId();
+                final String funcName = toolCall.getFunction().getName();
+                final String funcArguments = toolCall.getFunction().getArguments();
+
+                if (funcName.equalsIgnoreCase("get_flight_info")) {
+                    final FunctionArguments functionArguments = BinaryData.fromString(funcArguments).toObject(FunctionArguments.class);
+                    final String functionResponse;
+                    if ("Seattle".equalsIgnoreCase(functionArguments.getOriginCity()) && "Miami".equalsIgnoreCase(functionArguments.getDestinationCity())) {
+                        functionResponse = "{\"airline\": \"Delta\", \"flight_number\": \"DL123\", \"flight_date\": \"May 7th, 2024\", \"flight_time\": \"10:00AM\"}";
+                    } else {
+                        functionResponse = "{\"error\": \"No flights found between the cities\"}";
+                    }
+                    return Optional.of(new ChatRequestToolMessage(functionResponse, toolCallId));
+                }
+                return Optional.empty();
+            }
+
+            private static byte[] parameters() {
+                try (ByteArrayOutputStream byteArrayOutputStream = new ByteArrayOutputStream();
+                     JsonWriter jsonWriter = JsonProviders.createWriter(byteArrayOutputStream)) {
+                    jsonWriter.writeStartObject();
+                    jsonWriter.writeStringField("type", "object");
+                    jsonWriter.writeStartObject("properties");
+                    jsonWriter.writeStartObject("origin_city");
+                    jsonWriter.writeStringField("type", "string");
+                    jsonWriter.writeStringField("description", "The name of the city where the flight originates");
+                    jsonWriter.writeEndObject();
+                    jsonWriter.writeStartObject("destination_city");
+                    jsonWriter.writeStringField("type", "string");
+                    jsonWriter.writeStringField("description", "The flight destination city");
+                    jsonWriter.writeEndObject();
+                    jsonWriter.writeEndObject();
+                    jsonWriter.writeStartArray("required");
+                    jsonWriter.writeString("origin_city");
+                    jsonWriter.writeString("destination_city");
+                    jsonWriter.writeEndArray();
+                    jsonWriter.writeEndObject();
+                    jsonWriter.flush();
+                    return byteArrayOutputStream.toByteArray();
+                } catch (IOException ioe) {
+                    throw new UncheckedIOException(ioe);
+                }
+            }
+
+            private static void sleep() {
+                try {
+                    TimeUnit.SECONDS.sleep(1);
+                } catch (InterruptedException e) {
+                    // ignored
+                }
+            }
+        }
+
+        private static final class FunctionArguments implements JsonSerializable {
+            private final String originCity;
+            private final String destinationCity;
+
+            private FunctionArguments(String originCity, String destinationCity) {
+                this.originCity = originCity;
+                this.destinationCity = destinationCity;
+            }
+
+            public String getOriginCity() {
+                return this.originCity;
+            }
+
+            public String getDestinationCity() {
+                return this.destinationCity;
+            }
+
+            @Override
+            public JsonWriter toJson(JsonWriter jsonWriter) throws IOException {
+                jsonWriter.writeStartObject();
+                jsonWriter.writeStringField("origin_city", this.originCity);
+                jsonWriter.writeStringField("destination_city", this.originCity);
+                return jsonWriter.writeEndObject();
+            }
+
+            public static FunctionArguments fromJson(JsonReader jsonReader) throws IOException {
+                return jsonReader.readObject(reader -> {
+                    String originCity = null;
+                    String destinationCity = null;
+                    while (reader.nextToken() != JsonToken.END_OBJECT) {
+                        String fieldName = reader.getFieldName();
+                        reader.nextToken();
+                        if ("origin_city".equals(fieldName)) {
+                            originCity = reader.getString();
+                        } else if ("destination_city".equals(fieldName)) {
+                            destinationCity = reader.getString();
+                        } else {
+                            reader.skipChildren();
+                        }
+                    }
+                    return new FunctionArguments(originCity, destinationCity);
+                });
+            }
+        }
+    }
+}
diff --git a/sdk/ai/azure-ai-inference/src/test/java/com/azure/ai/inference/ChatCompletionClientTracerTest.java b/sdk/ai/azure-ai-inference/src/test/java/com/azure/ai/inference/ChatCompletionClientTracerTest.java
new file mode 100644
index 0000000000000..5fb8784b2a5e1
--- /dev/null
+++ b/sdk/ai/azure-ai-inference/src/test/java/com/azure/ai/inference/ChatCompletionClientTracerTest.java
@@ -0,0 +1,416 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License.
+
+package com.azure.ai.inference;
+
+import com.azure.ai.inference.implementation.models.CompleteRequest;
+import com.azure.ai.inference.models.ChatCompletions;
+import com.azure.ai.inference.models.ChatCompletionsOptions;
+import com.azure.ai.inference.models.ChatCompletionsToolCall;
+import com.azure.ai.inference.models.ChatRequestAssistantMessage;
+import com.azure.ai.inference.models.ChatRequestMessage;
+import com.azure.ai.inference.models.ChatRequestSystemMessage;
+import com.azure.ai.inference.models.ChatRequestToolMessage;
+import com.azure.ai.inference.models.ChatRequestUserMessage;
+import com.azure.core.http.rest.RequestOptions;
+import com.azure.core.tracing.opentelemetry.OpenTelemetryTracingOptions;
+import com.azure.core.util.BinaryData;
+import com.azure.core.util.Configuration;
+import com.azure.core.util.CoreUtils;
+import com.azure.core.util.logging.ClientLogger;
+import com.azure.core.util.tracing.Tracer;
+import com.azure.core.util.tracing.TracerProvider;
+import com.azure.json.JsonProviders;
+import com.azure.json.JsonWriter;
+import io.opentelemetry.api.common.AttributeKey;
+import io.opentelemetry.api.common.Attributes;
+import io.opentelemetry.context.Context;
+import io.opentelemetry.sdk.OpenTelemetrySdk;
+import io.opentelemetry.sdk.trace.ReadWriteSpan;
+import io.opentelemetry.sdk.trace.ReadableSpan;
+import io.opentelemetry.sdk.trace.SdkTracerProvider;
+import io.opentelemetry.sdk.trace.SpanProcessor;
+import io.opentelemetry.sdk.trace.data.EventData;
+import org.junit.jupiter.api.AfterEach;
+import org.junit.jupiter.api.Assertions;
+import org.junit.jupiter.api.BeforeEach;
+import org.junit.jupiter.params.ParameterizedTest;
+import org.junit.jupiter.params.provider.ValueSource;
+import reactor.core.publisher.Mono;
+import reactor.test.StepVerifier;
+
+import java.io.ByteArrayOutputStream;
+import java.io.IOException;
+import java.io.UncheckedIOException;
+import java.nio.charset.StandardCharsets;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Optional;
+import java.util.concurrent.ConcurrentLinkedDeque;
+import java.util.stream.Collectors;
+
+import static org.junit.jupiter.api.Assertions.assertEquals;
+
+public final class ChatCompletionClientTracerTest {
+    private static final String MODEL_ENDPOINT_HOST = "contoso.openai.azure.com";
+    private static final String MODEL_ENDPOINT = "https://" + MODEL_ENDPOINT_HOST;
+    private static final String INFERENCE_GEN_AI_SYSTEM_NAME = "az.ai.inference";
+    private static final String GEN_AI_REQUEST_CHAT_MODEL = "chat";
+    private static final String GEN_AI_CHAT_OPERATION_NAME = "chat";
+    private static final String AZ_NAMESPACE_NAME = "Microsoft.CognitiveServices";
+
+    private static final AttributeKey AZ_NAMESPACE = AttributeKey.stringKey("az.namespace");
+    private static final AttributeKey GEN_AI_SYSTEM = AttributeKey.stringKey("gen_ai.system");
+    private static final AttributeKey GEN_AI_OPERATION_NAME = AttributeKey.stringKey("gen_ai.operation.name");
+    private static final AttributeKey GEN_AI_REQUEST_TOP_P = AttributeKey.doubleKey("gen_ai.request.top_p");
+    private static final AttributeKey GEN_AI_REQUEST_MODEL = AttributeKey.stringKey("gen_ai.request.model");
+    private static final AttributeKey GEN_AI_REQUEST_MAX_TOKENS
+        = AttributeKey.longKey("gen_ai.request.max_tokens");
+    private static final AttributeKey GEN_AI_REQUEST_TEMPERATURE
+        = AttributeKey.doubleKey("gen_ai.request.temperature");
+    private static final AttributeKey GEN_AI_RESPONSE_ID = AttributeKey.stringKey("gen_ai.response.id");
+    private static final AttributeKey GEN_AI_RESPONSE_MODEL = AttributeKey.stringKey("gen_ai.response.model");
+    private static final AttributeKey GEN_AI_RESPONSE_FINISH_REASONS
+        = AttributeKey.stringKey("gen_ai.response.finish_reasons");
+    private static final AttributeKey GEN_AI_USAGE_OUTPUT_TOKENS
+        = AttributeKey.longKey("gen_ai.usage.output_tokens");
+    private static final AttributeKey GEN_AI_USAGE_INPUT_TOKENS
+        = AttributeKey.longKey("gen_ai.usage.input_tokens");
+    private static final AttributeKey GEN_AI_EVENT_CONTENT = AttributeKey.stringKey("gen_ai.event.content");
+
+    private static final String GEN_AI_CHOICE_EVENT_NAME = "gen_ai.choice";
+    private static final String GEN_AI_SYSTEM_MESSAGE_EVENT_NAME = "gen_ai.system.message";
+    private static final String GEN_AI_USER_MESSAGE_EVENT_NAME = "gen_ai.user.message";
+    private static final String GEN_AI_ASSISTANT_MESSAGE_EVENT_NAME = "gen_ai.assistant.message";
+    private static final String GEN_AI_TOOL_MESSAGE_EVENT_NAME = "gen_ai.tool.message";
+
+    private static final String SYSTEM_MESSAGE = "You are a helpful assistant.";
+    private static final String USER_MESSAGE = "What is the weather in Seattle?";
+    private TestSpanProcessor spanProcessor;
+    private Tracer tracer;
+
+    @BeforeEach
+    public void setup() {
+        spanProcessor = new TestSpanProcessor(MODEL_ENDPOINT_HOST);
+        final OpenTelemetryTracingOptions tracingOptions
+            = new OpenTelemetryTracingOptions().setOpenTelemetry(OpenTelemetrySdk.builder()
+                .setTracerProvider(SdkTracerProvider.builder().addSpanProcessor(spanProcessor).build())
+                .build());
+        tracer = TracerProvider.getDefaultProvider()
+            .createTracer("test", null, "Microsoft.CognitiveServices", tracingOptions);
+    }
+
+    @AfterEach
+    public void teardown() {
+    }
+
+    @ParameterizedTest
+    @ValueSource(booleans = { true, false })
+    public void shouldTraceSyncChatComplete(boolean captureContent) {
+        final ChatCompletionClientTracer inferenceTracer
+            = new ChatCompletionClientTracer(MODEL_ENDPOINT, configuration(captureContent), tracer);
+
+        final List messages = new ArrayList<>();
+        messages.add(new ChatRequestSystemMessage(SYSTEM_MESSAGE));
+        messages.add(new ChatRequestUserMessage(USER_MESSAGE));
+        final ChatCompletionsOptions completionsOptions
+            = new ChatCompletionsOptions(messages).setTopP(5.0).setMaxTokens(100).setTemperature(75.4);
+        final ChatCompletions toolCallsResponse = getChatCompletionsModelResponse(true);
+
+        inferenceTracer.traceSyncComplete(completionsOptions, (arg0, arg1) -> toolCallsResponse,
+            toCompleteRequest(completionsOptions), new RequestOptions());
+
+        final List spans = spanProcessor.getEndedSpans();
+        final ReadableSpan chatSpan = getChatSpan(spans, completionsOptions);
+
+        final Attributes chatAttributes = chatSpan.getAttributes();
+        assertChatSpanRequestAttributes(chatAttributes, completionsOptions);
+        if (captureContent) {
+            assertCapturedChatEvents(chatSpan, messages);
+        } else {
+            assertNoChatEventsCaptured(chatSpan);
+        }
+        assertChatSpanResponseAttributes(chatAttributes, toolCallsResponse);
+        assertEquals("[tool_calls]", chatAttributes.get(GEN_AI_RESPONSE_FINISH_REASONS));
+        assertChatEventContent(chatSpan, GEN_AI_CHOICE_EVENT_NAME, getExpectedChoiceEventContent(true, captureContent));
+    }
+
+    @ParameterizedTest
+    @ValueSource(booleans = { true, false })
+    public void shouldTraceChatComplete(boolean captureContent) {
+        final ChatCompletionClientTracer inferenceTracer
+            = new ChatCompletionClientTracer(MODEL_ENDPOINT, configuration(captureContent), tracer);
+
+        final List messages = new ArrayList<>();
+        messages.add(new ChatRequestSystemMessage(SYSTEM_MESSAGE));
+        messages.add(new ChatRequestUserMessage(USER_MESSAGE));
+        messages.add(new ChatRequestAssistantMessage("").setToolCalls(getModelToolCalls()));
+        final ChatCompletionsOptions completionsOptions
+            = new ChatCompletionsOptions(messages).setTopP(5.0).setMaxTokens(100).setTemperature(75.4);
+        final ChatCompletions modelResponse = getChatCompletionsModelResponse(false);
+
+        final Mono r = inferenceTracer.traceComplete(completionsOptions,
+            (arg0, arg1) -> Mono.just(modelResponse), toCompleteRequest(completionsOptions), new RequestOptions());
+
+        StepVerifier.create(r).expectNextCount(1).verifyComplete();
+
+        final List spans = spanProcessor.getEndedSpans();
+        final ReadableSpan chatSpan = getChatSpan(spans, completionsOptions);
+
+        final Attributes chatAttributes = chatSpan.getAttributes();
+        assertChatSpanRequestAttributes(chatAttributes, completionsOptions);
+        if (captureContent) {
+            assertCapturedChatEvents(chatSpan, messages);
+        } else {
+            assertNoChatEventsCaptured(chatSpan);
+        }
+        assertChatSpanResponseAttributes(chatAttributes, modelResponse);
+        assertEquals("[stop]", chatAttributes.get(GEN_AI_RESPONSE_FINISH_REASONS));
+        assertChatEventContent(chatSpan, GEN_AI_CHOICE_EVENT_NAME,
+            getExpectedChoiceEventContent(false, captureContent));
+    }
+
+    static ReadableSpan getChatSpan(List spans, ChatCompletionsOptions completionRequest) {
+        Assertions.assertFalse(spans.isEmpty(), "Expects at least one span.");
+        final String name
+            = CoreUtils.isNullOrEmpty(completionRequest.getModel()) ? "chat" : "chat " + completionRequest.getModel();
+        final Optional chatSpan = spans.stream().filter(s -> s.getName().equals(name)).findFirst();
+        Assertions.assertTrue(chatSpan.isPresent(), "Span describing chat completion operation not found.");
+        return chatSpan.get();
+    }
+
+    static void assertChatSpanRequestAttributes(Attributes chatAttributes, ChatCompletionsOptions completionRequest) {
+        assertEquals(AZ_NAMESPACE_NAME, chatAttributes.get(AZ_NAMESPACE));
+        assertEquals(INFERENCE_GEN_AI_SYSTEM_NAME, chatAttributes.get(GEN_AI_SYSTEM));
+        assertEquals(GEN_AI_CHAT_OPERATION_NAME, chatAttributes.get(GEN_AI_OPERATION_NAME));
+        final String modelId = completionRequest.getModel();
+        final String expectedModel = CoreUtils.isNullOrEmpty(modelId) ? "chat" : modelId;
+        assertEquals(expectedModel, chatAttributes.get(GEN_AI_REQUEST_MODEL));
+        assertEquals(completionRequest.getTopP(), chatAttributes.get(GEN_AI_REQUEST_TOP_P));
+        if (completionRequest.getMaxTokens() != null) {
+            assertEquals(completionRequest.getMaxTokens().longValue(), chatAttributes.get(GEN_AI_REQUEST_MAX_TOKENS));
+        }
+        assertEquals(completionRequest.getTemperature(), chatAttributes.get(GEN_AI_REQUEST_TEMPERATURE));
+    }
+
+    static void assertNoChatEventsCaptured(ReadableSpan chatSpan) {
+        Assertions.assertFalse(getChatEvent(chatSpan, GEN_AI_SYSTEM_MESSAGE_EVENT_NAME).isPresent());
+        Assertions.assertFalse(getChatEvent(chatSpan, GEN_AI_USER_MESSAGE_EVENT_NAME).isPresent());
+        Assertions.assertFalse(getChatEvent(chatSpan, GEN_AI_ASSISTANT_MESSAGE_EVENT_NAME).isPresent());
+        Assertions.assertFalse(getChatEvent(chatSpan, GEN_AI_TOOL_MESSAGE_EVENT_NAME).isPresent());
+    }
+
+    static void assertCapturedChatEvents(ReadableSpan chatSpan, List messages) {
+        for (ChatRequestMessage message : messages) {
+            final String expectedContent;
+            try {
+                expectedContent = message.toJsonString();
+            } catch (IOException e) {
+                throw new RuntimeException(e);
+            }
+            if (message instanceof ChatRequestAssistantMessage) {
+                assertChatEventContent(chatSpan, GEN_AI_ASSISTANT_MESSAGE_EVENT_NAME, expectedContent);
+            } else if (message instanceof ChatRequestSystemMessage) {
+                assertChatEventContent(chatSpan, GEN_AI_SYSTEM_MESSAGE_EVENT_NAME, expectedContent);
+            } else if (message instanceof ChatRequestToolMessage) {
+                assertChatEventContent(chatSpan, GEN_AI_TOOL_MESSAGE_EVENT_NAME, expectedContent);
+            } else if (message instanceof ChatRequestUserMessage) {
+                assertChatEventContent(chatSpan, GEN_AI_USER_MESSAGE_EVENT_NAME, expectedContent);
+            }
+        }
+    }
+
+    static void assertChatSpanResponseAttributes(Attributes chatAttributes, ChatCompletions completionResponse) {
+        assertEquals(completionResponse.getId(), chatAttributes.get(GEN_AI_RESPONSE_ID));
+        assertEquals(completionResponse.getModel(), chatAttributes.get(GEN_AI_RESPONSE_MODEL));
+        if (completionResponse.getUsage() != null) {
+            assertEquals(completionResponse.getUsage().getCompletionTokens(),
+                chatAttributes.get(GEN_AI_USAGE_OUTPUT_TOKENS));
+            assertEquals(completionResponse.getUsage().getPromptTokens(),
+                chatAttributes.get(GEN_AI_USAGE_INPUT_TOKENS));
+        }
+    }
+
+    private static void assertChatEventContent(ReadableSpan span, String eventName, String expectedContent) {
+        final Optional systemMessageEvent = getChatEvent(span, eventName);
+        Assertions.assertTrue(systemMessageEvent.isPresent());
+        final Attributes eventAttributes = systemMessageEvent.get().getAttributes();
+        Assertions.assertEquals(expectedContent, eventAttributes.get(GEN_AI_EVENT_CONTENT));
+        assertEquals(INFERENCE_GEN_AI_SYSTEM_NAME, eventAttributes.get(GEN_AI_SYSTEM));
+    }
+
+    private static Optional getChatEvent(ReadableSpan span, String eventName) {
+        Assertions.assertEquals("chat", span.getName());
+        final List events = span.toSpanData().getEvents();
+        Assertions.assertFalse(events.isEmpty());
+        return events.stream().filter(s -> s.getName().equals(eventName)).findFirst();
+    }
+
+    private static BinaryData toCompleteRequest(ChatCompletionsOptions options) {
+        final CompleteRequest completeRequest
+            = new CompleteRequest(options.getMessages()).setFrequencyPenalty(options.getFrequencyPenalty())
+                .setStream(options.isStream())
+                .setPresencePenalty(options.getPresencePenalty())
+                .setTemperature(options.getTemperature())
+                .setTopP(options.getTopP())
+                .setMaxTokens(options.getMaxTokens())
+                .setResponseFormat(options.getResponseFormat())
+                .setStop(options.getStop())
+                .setTools(options.getTools())
+                .setToolChoice(options.getToolChoice())
+                .setSeed(options.getSeed())
+                .setModel(options.getModel());
+        return BinaryData.fromObject(completeRequest);
+    }
+
+    private static ChatCompletions getChatCompletionsModelResponse(boolean isToolCalls) {
+        try (ByteArrayOutputStream stream = new ByteArrayOutputStream();
+            JsonWriter writer = JsonProviders.createWriter(stream)) {
+            writer.writeStartObject();
+            writer.writeStringField("id", "model_uuid_0");
+            writer.writeStringField("model", "gpt-4-turbo-2024-04-09");
+            writer.writeStartObject("usage");
+            writer.writeLongField("completion_tokens", 14);
+            writer.writeLongField("prompt_tokens", 115);
+            writer.writeLongField("total_tokens", 129);
+            writer.writeEndObject();
+            writer.writeStartArray("choices");
+            writer.writeStartObject();
+            if (isToolCalls) {
+                writer.writeStringField("finish_reason", "tool_calls");
+                writer.writeLongField("index", 0);
+                writer.writeStartObject("message");
+                writer.writeStringField("role", "assistant");
+                writer.writeStartArray("tool_calls");
+                writer.writeStartObject();
+                writer.writeStringField("id", "tool_call_uuid0");
+                writer.writeStringField("type", "function");
+                writer.writeStartObject("function");
+                writer.writeStringField("name", "get_weather");
+                writer.writeStringField("arguments", "{\"city\":\"Seattle\"}");
+                writer.writeEndObject();
+                writer.writeEndObject();
+                writer.writeEndArray();
+                writer.writeEndObject();
+            } else {
+                writer.writeStringField("finish_reason", "stop");
+                writer.writeLongField("index", 0);
+                writer.writeStartObject("message");
+                writer.writeStringField("role", "assistant");
+                writer.writeStringField("content", "The weather in Seattle is nice.");
+                writer.writeEndObject();
+            }
+            writer.writeEndObject();
+            writer.writeEndArray();
+            writer.writeEndObject();
+            writer.flush();
+            final BinaryData binaryData
+                = BinaryData.fromString(new String(stream.toByteArray(), StandardCharsets.UTF_8));
+            return binaryData.toObject(ChatCompletions.class);
+        } catch (IOException e) {
+            throw new UncheckedIOException(e);
+        }
+    }
+
+    private static String getExpectedChoiceEventContent(boolean toolCalls, boolean captureContent) {
+        try (ByteArrayOutputStream stream = new ByteArrayOutputStream();
+            JsonWriter writer = JsonProviders.createWriter(stream)) {
+            writer.writeStartObject();
+            if (toolCalls) {
+                writer.writeStartObject("message");
+                writer.writeStartArray("tool_calls");
+                writer.writeStartObject();
+                writer.writeStringField("id", "tool_call_uuid0");
+                writer.writeStringField("type", "function");
+                if (captureContent) {
+                    writer.writeStartObject("function");
+                    writer.writeStringField("name", "get_weather");
+                    writer.writeStringField("arguments", "{\"city\":\"Seattle\"}");
+                    writer.writeEndObject();
+                }
+                writer.writeEndObject();
+                writer.writeEndArray();
+                writer.writeEndObject();
+                writer.writeStringField("finish_reason", "tool_calls");
+                writer.writeLongField("index", 0);
+            } else {
+                writer.writeStartObject("message");
+                if (captureContent) {
+                    writer.writeStringField("content", "The weather in Seattle is nice.");
+                }
+                writer.writeEndObject();
+                writer.writeStringField("finish_reason", "stop");
+                writer.writeLongField("index", 0);
+            }
+            writer.writeEndObject();
+            writer.flush();
+            return new String(stream.toByteArray(), StandardCharsets.UTF_8);
+        } catch (IOException e) {
+            throw new UncheckedIOException(e);
+        }
+    }
+
+    private static List getModelToolCalls() {
+        final ChatCompletions toolCallsResponse = getChatCompletionsModelResponse(true);
+        return toolCallsResponse.getChoice().getMessage().getToolCalls();
+    }
+
+    private static Configuration configuration(boolean captureContent) {
+        if (captureContent) {
+            return new com.azure.core.util.ConfigurationBuilder()
+                .putProperty("azure.tracing.gen_ai.content_recording_enabled", "true")
+                .build();
+        } else {
+            return new com.azure.core.util.ConfigurationBuilder().build();
+        }
+    }
+
+    private static final class TestSpanProcessor implements SpanProcessor {
+        private static final AttributeKey NET_PEER_NAME = AttributeKey.stringKey("net.peer.name");
+        private static final AttributeKey SERVER_ADDRESS = AttributeKey.stringKey("server.address");
+
+        private final ClientLogger logger;
+        private final String modelEndpointHost;
+        private final ConcurrentLinkedDeque spans = new ConcurrentLinkedDeque<>();
+
+        TestSpanProcessor(String modelEndpointHost) {
+            this.logger = new ClientLogger(TestSpanProcessor.class);
+            this.modelEndpointHost = modelEndpointHost;
+        }
+
+        public List getEndedSpans() {
+            return spans.stream().collect(Collectors.toList());
+        }
+
+        @Override
+        public void onStart(Context context, ReadWriteSpan readWriteSpan) {
+        }
+
+        @Override
+        public boolean isStartRequired() {
+            return false;
+        }
+
+        @Override
+        public void onEnd(ReadableSpan readableSpan) {
+            logger.info(readableSpan.toString());
+            assertEquals(modelEndpointHost, getEndpoint(readableSpan));
+            spans.add(readableSpan);
+        }
+
+        @Override
+        public boolean isEndRequired() {
+            return true;
+        }
+
+        private String getEndpoint(ReadableSpan readableSpan) {
+            // Depending on the OpenTelemetry version being used, the attribute name for the peer name may be different.
+            // The attribute name was changed from "net.peer.name" to "server.address".
+            final String endpoint = readableSpan.getAttribute(NET_PEER_NAME);
+            if (endpoint != null) {
+                return endpoint;
+            }
+            return readableSpan.getAttribute(SERVER_ADDRESS);
+        }
+    }
+}
diff --git a/sdk/ai/azure-ai-inference/src/test/java/com/azure/ai/inference/ChatCompletionsSyncClientTest.java b/sdk/ai/azure-ai-inference/src/test/java/com/azure/ai/inference/ChatCompletionsSyncClientTest.java
index aec770eb293cc..3ce620fbe67d9 100644
--- a/sdk/ai/azure-ai-inference/src/test/java/com/azure/ai/inference/ChatCompletionsSyncClientTest.java
+++ b/sdk/ai/azure-ai-inference/src/test/java/com/azure/ai/inference/ChatCompletionsSyncClientTest.java
@@ -25,12 +25,24 @@
 import com.azure.core.http.HttpClient;
 import com.azure.core.http.rest.RequestOptions;
 import com.azure.core.http.rest.Response;
+import com.azure.core.tracing.opentelemetry.OpenTelemetryTracingOptions;
 import com.azure.core.util.BinaryData;
+import com.azure.core.util.ClientOptions;
+import com.azure.core.util.Configuration;
+import com.azure.core.util.ConfigurationBuilder;
 import com.azure.core.util.IterableStream;
+import com.azure.core.util.logging.ClientLogger;
 import com.azure.json.JsonReader;
 import com.azure.json.JsonSerializable;
 import com.azure.json.JsonToken;
 import com.azure.json.JsonWriter;
+import io.opentelemetry.api.common.Attributes;
+import io.opentelemetry.context.Context;
+import io.opentelemetry.sdk.OpenTelemetrySdk;
+import io.opentelemetry.sdk.trace.ReadWriteSpan;
+import io.opentelemetry.sdk.trace.ReadableSpan;
+import io.opentelemetry.sdk.trace.SdkTracerProvider;
+import io.opentelemetry.sdk.trace.SpanProcessor;
 import org.junit.jupiter.params.ParameterizedTest;
 import org.junit.jupiter.params.provider.MethodSource;
 
@@ -40,7 +52,14 @@
 import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.List;
-
+import java.util.concurrent.ConcurrentLinkedDeque;
+import java.util.stream.Collectors;
+
+import static com.azure.ai.inference.ChatCompletionClientTracerTest.assertCapturedChatEvents;
+import static com.azure.ai.inference.ChatCompletionClientTracerTest.assertChatSpanRequestAttributes;
+import static com.azure.ai.inference.ChatCompletionClientTracerTest.assertChatSpanResponseAttributes;
+import static com.azure.ai.inference.ChatCompletionClientTracerTest.assertNoChatEventsCaptured;
+import static com.azure.ai.inference.ChatCompletionClientTracerTest.getChatSpan;
 import static com.azure.ai.inference.TestUtils.DISPLAY_NAME_WITH_ARGUMENTS;
 import static org.junit.jupiter.api.Assertions.assertEquals;
 import static org.junit.jupiter.api.Assertions.assertNotNull;
@@ -56,9 +75,31 @@ public class ChatCompletionsSyncClientTest extends ChatCompletionsClientTestBase
     private static final String TEST_IMAGE_PATH = "./src/samples/resources/sample-images/sample.png";
     private static final String TEST_IMAGE_FORMAT = "png";
 
-    private ChatCompletionsClient getChatCompletionsClient(HttpClient httpClient) {
+    private ChatCompletionsClientBuilder getBuilder(HttpClient httpClient) {
         return getChatCompletionsClientBuilder(
-            interceptorManager.isPlaybackMode() ? interceptorManager.getPlaybackClient() : httpClient).buildClient();
+            interceptorManager.isPlaybackMode() ? interceptorManager.getPlaybackClient() : httpClient);
+    }
+
+    private ChatCompletionsClient getChatCompletionsClient(HttpClient httpClient) {
+        return getBuilder(httpClient).buildClient();
+    }
+
+    private ChatCompletionsClient getChatCompletionsClientWithTracing(HttpClient httpClient,
+        SpanProcessor spanProcessor, boolean captureContent) {
+        final OpenTelemetryTracingOptions tracingOptions
+            = new OpenTelemetryTracingOptions().setOpenTelemetry(OpenTelemetrySdk.builder()
+                .setTracerProvider(SdkTracerProvider.builder().addSpanProcessor(spanProcessor).build())
+                .build());
+        final ChatCompletionsClientBuilder builder
+            = getBuilder(httpClient).clientOptions(new ClientOptions().setTracingOptions(tracingOptions));
+        if (captureContent) {
+            final Configuration configuration
+                = new ConfigurationBuilder().putProperty("azure.tracing.gen_ai.content_recording_enabled", "true")
+                    .build();
+            return builder.configuration(configuration).buildClient();
+        } else {
+            return builder.buildClient();
+        }
     }
 
     @ParameterizedTest(name = DISPLAY_NAME_WITH_ARGUMENTS)
@@ -274,6 +315,40 @@ public void testGetCompletionsStreamWithFunctionCalls(HttpClient httpClient) {
         }
     }
 
+    @ParameterizedTest(name = DISPLAY_NAME_WITH_ARGUMENTS)
+    @MethodSource("com.azure.ai.inference.TestUtils#getTestParameters")
+    public void testGetCompletionsWithTracing(HttpClient httpClient) {
+        final TestSpanProcessor spanProcessor = new TestSpanProcessor();
+        client = getChatCompletionsClientWithTracing(httpClient, spanProcessor, false);
+        getChatCompletionsFromOptionsRunner((options) -> {
+            final ChatCompletions completions = client.complete(options);
+            assertCompletions(1, completions);
+            final List spans = spanProcessor.getEndedSpans();
+            final ReadableSpan chatSpan = getChatSpan(spans, options);
+            final Attributes chatAttributes = chatSpan.getAttributes();
+            assertChatSpanRequestAttributes(chatAttributes, options);
+            assertNoChatEventsCaptured(chatSpan);
+            assertChatSpanResponseAttributes(chatAttributes, completions);
+        });
+    }
+
+    @ParameterizedTest(name = DISPLAY_NAME_WITH_ARGUMENTS)
+    @MethodSource("com.azure.ai.inference.TestUtils#getTestParameters")
+    public void testGetCompletionsWithTracingCapturingContent(HttpClient httpClient) {
+        final TestSpanProcessor spanProcessor = new TestSpanProcessor();
+        client = getChatCompletionsClientWithTracing(httpClient, spanProcessor, true);
+        getChatCompletionsFromOptionsRunner((options) -> {
+            final ChatCompletions completions = client.complete(options);
+            assertCompletions(1, completions);
+            final List spans = spanProcessor.getEndedSpans();
+            final ReadableSpan chatSpan = getChatSpan(spans, options);
+            final Attributes chatAttributes = chatSpan.getAttributes();
+            assertChatSpanRequestAttributes(chatAttributes, options);
+            assertCapturedChatEvents(chatSpan, options.getMessages());
+            assertChatSpanResponseAttributes(chatAttributes, completions);
+        });
+    }
+
     private static String futureTemperature(String locationName, String data) {
         return String.format("%s C", FUNCTION_RETURN);
     }
@@ -446,4 +521,38 @@ public static StringField fromJson(JsonReader jsonReader) throws IOException {
         }
 
     }
+
+    private static final class TestSpanProcessor implements SpanProcessor {
+
+        private final ClientLogger logger;
+        private final ConcurrentLinkedDeque spans = new ConcurrentLinkedDeque<>();
+
+        TestSpanProcessor() {
+            this.logger = new ClientLogger(TestSpanProcessor.class);
+        }
+
+        public List getEndedSpans() {
+            return spans.stream().collect(Collectors.toList());
+        }
+
+        @Override
+        public void onStart(Context context, ReadWriteSpan readWriteSpan) {
+        }
+
+        @Override
+        public boolean isStartRequired() {
+            return false;
+        }
+
+        @Override
+        public void onEnd(ReadableSpan readableSpan) {
+            logger.info(readableSpan.toString());
+            spans.add(readableSpan);
+        }
+
+        @Override
+        public boolean isEndRequired() {
+            return true;
+        }
+    }
 }