diff --git a/examples/experimental/collector_example.ipynb b/examples/experimental/collector_example.ipynb new file mode 100644 index 000000000..a8c12bcd8 --- /dev/null +++ b/examples/experimental/collector_example.ipynb @@ -0,0 +1,106 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "! pip install opentelemetry-exporter-otlp\n", + "! pip install \"fastapi[standard]\"" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# TruLens OTLP Collector Usage" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import json\n", + "\n", + "from opentelemetry import trace\n", + "from opentelemetry.exporter.otlp.proto.http.trace_exporter import (\n", + " OTLPSpanExporter,\n", + ")\n", + "from opentelemetry.propagate import inject\n", + "from opentelemetry.sdk.trace import TracerProvider\n", + "from opentelemetry.sdk.trace.export import BatchSpanProcessor\n", + "import requests\n", + "from trulens.experimental.otel_tracing.core.otel import Collector\n", + "\n", + "tracer = trace.get_tracer(__name__)\n", + "\n", + "tracer_provider = TracerProvider()\n", + "trace.set_tracer_provider(tracer_provider)\n", + "span_processor = BatchSpanProcessor(\n", + " OTLPSpanExporter(endpoint=\"http://localhost:5000\")\n", + ")\n", + "tracer_provider.add_span_processor(span_processor)\n", + "\n", + "with tracer.start_as_current_span(\"request\") as span:\n", + " headers = {}\n", + " inject(headers)\n", + "\n", + "print(headers)\n", + "\n", + "c = Collector()\n", + "c.start()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "res = requests.post(\"http://localhost:5000/\", json=json.dumps({\"key\": \"value\"}))\n", + "res.content" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from opentelemetry.exporter import otlp" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "dir(otlp.proto.grpc)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "trulens-9bG3yHQd-py3.11", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/examples/experimental/dummy_example.ipynb b/examples/experimental/dummy_example.ipynb index 2c6dce5af..9b71c0608 100644 --- a/examples/experimental/dummy_example.ipynb +++ b/examples/experimental/dummy_example.ipynb @@ -418,7 +418,7 @@ ], "metadata": { "kernelspec": { - "display_name": "trulens-9bG3yHQd-py3.9", + "display_name": "trulens-9bG3yHQd-py3.11", "language": "python", "name": "python3" }, diff --git a/examples/experimental/export_dummy_example.ipynb b/examples/experimental/export_dummy_example.ipynb index 9da425bdd..3c6eb024b 100644 --- a/examples/experimental/export_dummy_example.ipynb +++ b/examples/experimental/export_dummy_example.ipynb @@ -50,9 +50,7 @@ "source": [ "# ruff: noqa: F401\n", "\n", - "import json\n", "from pathlib import Path\n", - "import re\n", "import sys\n", "\n", "import dotenv\n", @@ -68,8 +66,12 @@ "from trulens.core import Feedback\n", "from trulens.core import Select\n", "from trulens.core.session import TruSession\n", - "from trulens.experimental.otel_tracing.core.trace import TracerProvider\n", + "from trulens.experimental.otel_tracing.core.trace import sem\n", + "from trulens.experimental.otel_tracing.core.trace.trace import (\n", + " trulens_tracer_provider,\n", + ")\n", "from trulens.feedback.dummy.provider import DummyProvider\n", + "from trulens.otel.semconv.trace import SpanAttributes\n", "\n", "# Add base dir to path to be able to access test folder.\n", "base_dir = Path().cwd().parent.parent.resolve()\n", @@ -88,7 +90,7 @@ "outputs": [], "source": [ "# Sets the global default tracer provider to be the trulens one.\n", - "trace.set_tracer_provider(TracerProvider())\n", + "trace.set_tracer_provider(trulens_tracer_provider())\n", "\n", "# Creates a tracer for custom spans below.\n", "tracer = trace.get_tracer(__name__)" @@ -115,6 +117,8 @@ "# Create a TruLens session.\n", "session = TruSession()\n", "\n", + "session.reset_database() # reset the database first as enabling otel_tracing creates another table which we don't want to be deleted by reset_database.\n", + "\n", "# To export spans to an external OTEL SpanExporter tool, set it here:\n", "session.experimental_otel_exporter = exporter\n", "\n", @@ -123,8 +127,7 @@ "# this step is required.\n", "session.experimental_enable_feature(\"otel_tracing\")\n", "\n", - "session.reset_database()\n", - "session.start_dashboard()" + "# session.start_dashboard()" ] }, { @@ -138,9 +141,7 @@ "# Create dummy endpoint for a dummy feedback function:\n", "dummy_provider = DummyProvider()\n", "dummy_feedback = Feedback(dummy_provider.sentiment).on(\n", - " text=Select.RecordSpans.trulens.call.generate.attributes[\n", - " \"trulens.bindings\"\n", - " ].prompt\n", + " text=Select.RecordSpans.trulens.call.generate.bound_arguments.prompt\n", ")\n", "# Parts of the selector are:\n", "#\n", @@ -151,8 +152,8 @@ "#\n", "# - attributes - Span attributes\n", "#\n", - "# - [\"trulens.bindings\"] - Attributes specific to TruLens spans. Call spans\n", - "# include method call arguments in \"trulens.bindings\". Other attributes are\n", + "# - [\"trulens.bound_arguments\"] - Attributes specific to TruLens spans. Call spans\n", + "# include method call arguments in \"trulens.bound_arguments\". Other attributes are\n", "# \"trulens.ret\" for the call span's return value and \"trulens.error\" for the\n", "# call span's error if it raised an exception instead of returning.\n", "#\n", @@ -162,10 +163,16 @@ "ca = DummyApp()\n", "\n", "# Create trulens wrapper:\n", - "ta = TruCustomApp(\n", + "ta1 = TruCustomApp(\n", " ca,\n", - " app_id=\"customapp\",\n", + " app_name=\"customapp1\",\n", " feedbacks=[dummy_feedback],\n", + ")\n", + "\n", + "ta2 = TruCustomApp(\n", + " ca,\n", + " app_name=\"customapp2\",\n", + " # feedbacks=[dummy_feedback],\n", ")" ] }, @@ -177,16 +184,34 @@ "source": [ "# Normal trulens recording context manager:\n", "\n", - "with ta as recorder:\n", - " # (optional) Another custom span.\n", + "# print(trace.get_current_span())\n", + "\n", + "with ta1 as recorder:\n", + " # (optional) Custom span.\n", " with tracer.start_as_current_span(\"custom inner span\") as inner_span:\n", + " print(\"inner_span=\", inner_span)\n", + "\n", " # (optional) Set custom span attributes.\n", - " inner_span.set_attribute(\"custom\", \"value\")\n", + " inner_span.set_attribute(\"custom\", \"value1\")\n", "\n", " # Normal instrumented call:\n", - " print(ca.respond_to_query(\"hello\"))\n", + " print(ca.respond_to_query(query=\"hello\"))\n", "\n", - "record = recorder.get()" + " with ta2 as recorder2:\n", + " # Second custom span.\n", + " with tracer.start_as_current_span(\n", + " \"custom inner span 2\"\n", + " ) as inner_span2:\n", + " print(\"inner_span2=\", inner_span)\n", + " inner_span2.set_attribute(\"custom\", \"value2\")\n", + "\n", + " # Second instrumented call.\n", + " print(ca.respond_to_query(query=\"hi\"))\n", + "\n", + " # A third instrumented call.\n", + " print(ca.respond_to_query(query=\"goodbye\"))\n", + "\n", + "record, *rest = recorder.records" ] }, { @@ -198,7 +223,34 @@ "# Check the feedback results. Note that this feedback function is from a dummy\n", "# provider which does not have true sentiment analysis.\n", "\n", - "record.feedback_results[0].result()" + "print(record.feedback_results[0].result())" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "record.experimental_otel_spans[1]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "record.experimental_otel_spans" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "record.experimental_otel_spans[0].attributes" ] }, { @@ -209,7 +261,83 @@ "source": [ "# Check trulens instrumented calls as spans:\n", "\n", - "record.get(Select.RecordSpans)" + "span = record.get(Select.RecordSpans.trulens.call.respond_to_query[0])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Raw representation of attributes in the span:\n", + "\n", + "span._attributes" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Serialized versions of attributes:\n", + "\n", + "span.attributes" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Some of the same data is also accessible directly from the span instance:\n", + "\n", + "# These are the explicitly defined pydantic model fields.\n", + "for key, value in span.model_fields.items():\n", + " print(f\"(explicit) {key}: {getattr(span, key)}\")\n", + "\n", + "# These are computed properties defined to mirror their value in the attribute dict:\n", + "for key, value in span.model_computed_fields.items():\n", + " print(f\"(computed) {key}: {getattr(span, key)}\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Span type can be accessed by either the span_types attributes of enums or\n", + "# python class hierarchy:\n", + "\n", + "print(span.span_types)\n", + "print(type(span).__bases__)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Use span_types for checking span category:\n", + "\n", + "print(SpanAttributes.SpanType.CALL in span.span_types)\n", + "print(SpanAttributes.SpanType.UNKNOWN in span.span_types)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Use isinstance instead:\n", + "\n", + "print(isinstance(span, sem.Call))\n", + "print(isinstance(span, sem.Unknown))" ] }, { @@ -223,6 +351,9 @@ "# The InMemorySpanExporter stores the spans in memory. Lets read them back here\n", "# to inspect them:\n", "\n", + "from trulens.experimental.otel_tracing.core.trace import context as core_context\n", + "from trulens.experimental.otel_tracing.core.trace import otel as core_otel\n", + "\n", "if \"exporter\" in locals():\n", " print(f\"Spans exported to {exporter}:\")\n", "\n", @@ -230,31 +361,75 @@ " spans = exporter.get_finished_spans()\n", "\n", " for span in spans:\n", - " print(span.name)\n", - "\n", - " # The ConsoleSpanExporter writes json dumps of each span. Lets read those back\n", - " # here to inspect them:\n", - "\n", - " if isinstance(exporter, ConsoleSpanExporter):\n", - " match_root_json = re.compile(r\"(?:(^|\\n))\\{.+?\\n\\}\", re.DOTALL)\n", - "\n", - " if \"stream\" in locals():\n", - " dumps = match_root_json.finditer(stream.getvalue()) # noqa: F821\n", - "\n", - " for dump in dumps:\n", - " span = json.loads(dump.group())\n", - " print(span[\"name\"])\n", + " # Using of_contextlike here to print ids more readably.\n", + " print(\n", + " core_context.SpanContext.of_contextlike(span.context),\n", + " \"->\",\n", + " core_context.SpanContext.of_contextlike(span.parent),\n", + " span.name,\n", + " )\n", "\n", " elif isinstance(exporter, ZipkinExporter):\n", " print(\n", " \"The spans should be visible in the zipkin dashboard at http://localhost:9411/zipkin/\"\n", " )" ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Get the spans back from the new spans table.\n", + "\n", + "db = session.connector.db\n", + "\n", + "for span in db.get_spans():\n", + " print(span)\n", + " if isinstance(span, sem.RecordRoot):\n", + " print(\"\\tThis is the root of record:\", span.record_id)\n", + " print(\"\\tRecord is for app:\", span.app_id)\n", + " if isinstance(span, sem.Record):\n", + " print(\"\\tBelongs to records:\", list(span.record_ids.values()))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Get the trace roots only:\n", + "\n", + "from trulens.otel.semconv.trace import SpanAttributes\n", + "\n", + "db = session.connector.db\n", + "\n", + "for span in db.get_spans(\n", + " where=db.Span.span_types.contains(SpanAttributes.SpanType.RECORD_ROOT)\n", + "):\n", + " # Root spans identify the record_id they are recording and the app to which\n", + " # that record belongs:\n", + " print(span, span.record_id, span.app_id, span.app_name, span.app_version)\n", + "\n", + " # Root spans have these fields that were previously in Record:\n", + " print(\"\\tmain_input:\", repr(span.main_input))\n", + " print(\"\\tmain_output:\", repr(span.main_output))\n", + " print(\"\\tmain_error:\", repr(span.main_error))\n", + " print(\"\\ttotal_cost:\", span.total_cost)\n", + "\n", + " # Get the spans for a specific record:\n", + " for child_span in db.get_spans(\n", + " where=db.Span.record_ids.contains(span.record_id)\n", + " ):\n", + " print(\"\\t\", child_span)" + ] } ], "metadata": { "kernelspec": { - "display_name": "trulens-9bG3yHQd-py3.11", + "display_name": "trulens-9bG3yHQd-py3.8", "language": "python", "name": "python3" }, diff --git a/poetry.lock b/poetry.lock index ebaf7640a..1e17b42af 100644 --- a/poetry.lock +++ b/poetry.lock @@ -3278,13 +3278,13 @@ reference = "pypi-public" [[package]] name = "litellm" -version = "1.53.5" +version = "1.53.7" description = "Library to easily interface with LLM API providers" optional = false python-versions = ">=3.8, !=2.7.*, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*, !=3.5.*, !=3.6.*, !=3.7.*" files = [ - {file = "litellm-1.53.5-py3-none-any.whl", hash = "sha256:6e4bb2eb3b3a61f29320bb45fb23293daccfb540f21b08049a30e96fa43ef710"}, - {file = "litellm-1.53.5.tar.gz", hash = "sha256:1985b25ee270ff100fae309f0805b678fb3759e96a7c7bdc148dc3a6073b136d"}, + {file = "litellm-1.53.7-py3-none-any.whl", hash = "sha256:f6d58a6bebe8cb530d6e3d45ae6f2f648546687d5fd3eb2e064ac8292b50b9c1"}, + {file = "litellm-1.53.7.tar.gz", hash = "sha256:1b00bb3b7f8f35b0843abc1ced98e7bb0580430ca027f6710128dc1346fb1073"}, ] [package.dependencies] @@ -4899,13 +4899,13 @@ reference = "pypi-public" [[package]] name = "notebook" -version = "7.2.2" +version = "7.2.3" description = "Jupyter Notebook - A web-based notebook environment for interactive computing" optional = false python-versions = ">=3.8" files = [ - {file = "notebook-7.2.2-py3-none-any.whl", hash = "sha256:c89264081f671bc02eec0ed470a627ed791b9156cad9285226b31611d3e9fe1c"}, - {file = "notebook-7.2.2.tar.gz", hash = "sha256:2ef07d4220421623ad3fe88118d687bc0450055570cdd160814a59cf3a1c516e"}, + {file = "notebook-7.2.3-py3-none-any.whl", hash = "sha256:6e560b360fc805c88037d5d988ed000347b2200f594c6d0929467c25cd11a79b"}, + {file = "notebook-7.2.3.tar.gz", hash = "sha256:3bf03e92f97f0f28bfd3faabe19bdb7fde0c53a58adac78f0b61b1334a53f7a1"}, ] [package.dependencies] @@ -5535,13 +5535,13 @@ reference = "pypi-public" [[package]] name = "openai" -version = "1.56.2" +version = "1.57.0" description = "The official Python library for the openai API" optional = false python-versions = ">=3.8" files = [ - {file = "openai-1.56.2-py3-none-any.whl", hash = "sha256:82d0c48f9504e04c7797e9b799dcf7f49a246d99b6cbfd90f3193ea80815b69e"}, - {file = "openai-1.56.2.tar.gz", hash = "sha256:17312af69bc7670d4048f98ab5849f8784d98c39ac64fcde19406e3774a0c1e5"}, + {file = "openai-1.57.0-py3-none-any.whl", hash = "sha256:972e36960b821797952da3dc4532f486c28e28a2a332d7d0c5407f242e9d9c39"}, + {file = "openai-1.57.0.tar.gz", hash = "sha256:76f91971c4bdbd78380c9970581075e0337b5d497c2fbf7b5255078f4b31abf9"}, ] [package.dependencies] @@ -6674,13 +6674,13 @@ reference = "pypi-public" [[package]] name = "pyjwt" -version = "2.9.0" +version = "2.10.1" description = "JSON Web Token implementation in Python" optional = false -python-versions = ">=3.8" +python-versions = ">=3.9" files = [ - {file = "PyJWT-2.9.0-py3-none-any.whl", hash = "sha256:3b02fb0f44517787776cf48f2ae25d8e14f300e6d7545a4315cee571a415e850"}, - {file = "pyjwt-2.9.0.tar.gz", hash = "sha256:7e1e5b56cc735432a7369cbfa0efe50fa113ebecdc04ae6922deba8b84582d0c"}, + {file = "PyJWT-2.10.1-py3-none-any.whl", hash = "sha256:dcdd193e30abefd5debf142f9adfcdd2b58004e644f25406ffaebd50bd98dacb"}, + {file = "pyjwt-2.10.1.tar.gz", hash = "sha256:3cc5772eb20009233caf06e9d8a0577824723b44e6648ee0a2aedb6cf9381953"}, ] [package.extras] @@ -7616,29 +7616,29 @@ reference = "pypi-public" [[package]] name = "ruff" -version = "0.8.1" +version = "0.8.2" description = "An extremely fast Python linter and code formatter, written in Rust." optional = false python-versions = ">=3.7" files = [ - {file = "ruff-0.8.1-py3-none-linux_armv6l.whl", hash = "sha256:fae0805bd514066f20309f6742f6ee7904a773eb9e6c17c45d6b1600ca65c9b5"}, - {file = "ruff-0.8.1-py3-none-macosx_10_12_x86_64.whl", hash = "sha256:b8a4f7385c2285c30f34b200ca5511fcc865f17578383db154e098150ce0a087"}, - {file = "ruff-0.8.1-py3-none-macosx_11_0_arm64.whl", hash = "sha256:cd054486da0c53e41e0086e1730eb77d1f698154f910e0cd9e0d64274979a209"}, - {file = "ruff-0.8.1-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2029b8c22da147c50ae577e621a5bfbc5d1fed75d86af53643d7a7aee1d23871"}, - {file = "ruff-0.8.1-py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:2666520828dee7dfc7e47ee4ea0d928f40de72056d929a7c5292d95071d881d1"}, - {file = "ruff-0.8.1-py3-none-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:333c57013ef8c97a53892aa56042831c372e0bb1785ab7026187b7abd0135ad5"}, - {file = "ruff-0.8.1-py3-none-manylinux_2_17_ppc64.manylinux2014_ppc64.whl", hash = "sha256:288326162804f34088ac007139488dcb43de590a5ccfec3166396530b58fb89d"}, - {file = "ruff-0.8.1-py3-none-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:b12c39b9448632284561cbf4191aa1b005882acbc81900ffa9f9f471c8ff7e26"}, - {file = "ruff-0.8.1-py3-none-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:364e6674450cbac8e998f7b30639040c99d81dfb5bbc6dfad69bc7a8f916b3d1"}, - {file = "ruff-0.8.1-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b22346f845fec132aa39cd29acb94451d030c10874408dbf776af3aaeb53284c"}, - {file = "ruff-0.8.1-py3-none-musllinux_1_2_aarch64.whl", hash = "sha256:b2f2f7a7e7648a2bfe6ead4e0a16745db956da0e3a231ad443d2a66a105c04fa"}, - {file = "ruff-0.8.1-py3-none-musllinux_1_2_armv7l.whl", hash = "sha256:adf314fc458374c25c5c4a4a9270c3e8a6a807b1bec018cfa2813d6546215540"}, - {file = "ruff-0.8.1-py3-none-musllinux_1_2_i686.whl", hash = "sha256:a885d68342a231b5ba4d30b8c6e1b1ee3a65cf37e3d29b3c74069cdf1ee1e3c9"}, - {file = "ruff-0.8.1-py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:d2c16e3508c8cc73e96aa5127d0df8913d2290098f776416a4b157657bee44c5"}, - {file = "ruff-0.8.1-py3-none-win32.whl", hash = "sha256:93335cd7c0eaedb44882d75a7acb7df4b77cd7cd0d2255c93b28791716e81790"}, - {file = "ruff-0.8.1-py3-none-win_amd64.whl", hash = "sha256:2954cdbe8dfd8ab359d4a30cd971b589d335a44d444b6ca2cb3d1da21b75e4b6"}, - {file = "ruff-0.8.1-py3-none-win_arm64.whl", hash = "sha256:55873cc1a473e5ac129d15eccb3c008c096b94809d693fc7053f588b67822737"}, - {file = "ruff-0.8.1.tar.gz", hash = "sha256:3583db9a6450364ed5ca3f3b4225958b24f78178908d5c4bc0f46251ccca898f"}, + {file = "ruff-0.8.2-py3-none-linux_armv6l.whl", hash = "sha256:c49ab4da37e7c457105aadfd2725e24305ff9bc908487a9bf8d548c6dad8bb3d"}, + {file = "ruff-0.8.2-py3-none-macosx_10_12_x86_64.whl", hash = "sha256:ec016beb69ac16be416c435828be702ee694c0d722505f9c1f35e1b9c0cc1bf5"}, + {file = "ruff-0.8.2-py3-none-macosx_11_0_arm64.whl", hash = "sha256:f05cdf8d050b30e2ba55c9b09330b51f9f97d36d4673213679b965d25a785f3c"}, + {file = "ruff-0.8.2-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:60f578c11feb1d3d257b2fb043ddb47501ab4816e7e221fbb0077f0d5d4e7b6f"}, + {file = "ruff-0.8.2-py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:cbd5cf9b0ae8f30eebc7b360171bd50f59ab29d39f06a670b3e4501a36ba5897"}, + {file = "ruff-0.8.2-py3-none-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:b402ddee3d777683de60ff76da801fa7e5e8a71038f57ee53e903afbcefdaa58"}, + {file = "ruff-0.8.2-py3-none-manylinux_2_17_ppc64.manylinux2014_ppc64.whl", hash = "sha256:705832cd7d85605cb7858d8a13d75993c8f3ef1397b0831289109e953d833d29"}, + {file = "ruff-0.8.2-py3-none-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:32096b41aaf7a5cc095fa45b4167b890e4c8d3fd217603f3634c92a541de7248"}, + {file = "ruff-0.8.2-py3-none-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:e769083da9439508833cfc7c23e351e1809e67f47c50248250ce1ac52c21fb93"}, + {file = "ruff-0.8.2-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5fe716592ae8a376c2673fdfc1f5c0c193a6d0411f90a496863c99cd9e2ae25d"}, + {file = "ruff-0.8.2-py3-none-musllinux_1_2_aarch64.whl", hash = "sha256:81c148825277e737493242b44c5388a300584d73d5774defa9245aaef55448b0"}, + {file = "ruff-0.8.2-py3-none-musllinux_1_2_armv7l.whl", hash = "sha256:d261d7850c8367704874847d95febc698a950bf061c9475d4a8b7689adc4f7fa"}, + {file = "ruff-0.8.2-py3-none-musllinux_1_2_i686.whl", hash = "sha256:1ca4e3a87496dc07d2427b7dd7ffa88a1e597c28dad65ae6433ecb9f2e4f022f"}, + {file = "ruff-0.8.2-py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:729850feed82ef2440aa27946ab39c18cb4a8889c1128a6d589ffa028ddcfc22"}, + {file = "ruff-0.8.2-py3-none-win32.whl", hash = "sha256:ac42caaa0411d6a7d9594363294416e0e48fc1279e1b0e948391695db2b3d5b1"}, + {file = "ruff-0.8.2-py3-none-win_amd64.whl", hash = "sha256:2aae99ec70abf43372612a838d97bfe77d45146254568d94926e8ed5bbb409ea"}, + {file = "ruff-0.8.2-py3-none-win_arm64.whl", hash = "sha256:fb88e2a506b70cfbc2de6fae6681c4f944f7dd5f2fe87233a7233d888bad73e8"}, + {file = "ruff-0.8.2.tar.gz", hash = "sha256:b84f4f414dda8ac7f75075c1fa0b905ac0ff25361f42e6d5da681a465e0f78e5"}, ] [package.source] @@ -10380,4 +10380,4 @@ reference = "pypi-public" [metadata] lock-version = "2.0" python-versions = "^3.8.1,!=3.9.7" -content-hash = "cbd54e9477f8aebed0e62b9690c074d06d6824d77d6933a617876f62c44183aa" +content-hash = "ef4eb2a688f0ca7e2634c2122df5faa4794c4902a5070b57ecdb458c5c67cbcd" diff --git a/pyproject.toml b/pyproject.toml index a677533c0..b263fbe8e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -29,7 +29,6 @@ classifiers = [ [tool.poetry.dependencies] python = "^3.8.1,!=3.9.7" trulens-core = { version = "^1.0.0", extras = [ - "otel", "tqdm", "openai", ] } @@ -85,7 +84,7 @@ trulens-apps-nemo = { path = "src/apps/nemo", develop = true, python = "<3.13" } [tool.poetry.group.providers.dependencies] trulens-providers-bedrock = { path = "src/providers/bedrock", develop = true } trulens-providers-langchain = { path = "src/providers/langchain", develop = true } -trulens-providers-cortex = { path = "src/providers/cortex", develop = true, python = "<3.12" } +trulens-providers-cortex = { path = "src/providers/cortex", develop = true, python = ">=3.9,<3.12" } trulens-providers-huggingface = { path = "src/providers/huggingface", develop = true } trulens-providers-openai = { path = "src/providers/openai", develop = true } trulens-providers-litellm = { path = "src/providers/litellm", develop = true } diff --git a/src/core/trulens/_mods.py b/src/core/trulens/_mods.py index 0e1f354cd..789052f4d 100644 --- a/src/core/trulens/_mods.py +++ b/src/core/trulens/_mods.py @@ -6,6 +6,12 @@ """ +from core.trulens.experimental.otel_tracing.core.trace import ( + trace as core_trace, +) +from core.trulens.experimental.otel_tracing.core.trace.trace import ( + otel as core_otel, +) from trulens.apps import basic as basic_app from trulens.apps import custom as custom_app from trulens.apps import virtual as virtual_app @@ -77,8 +83,6 @@ from trulens.dashboard.utils import records_utils from trulens.dashboard.ux import components as dashboard_components from trulens.dashboard.ux import styles as dashboard_styles -from trulens.experimental.otel_tracing.core import otel as core_otel -from trulens.experimental.otel_tracing.core import trace as core_trace from trulens.experimental.otel_tracing.core._utils import wrap as wrap_utils from trulens.feedback import embeddings as feedback_embeddings from trulens.feedback import feedback as mod_feedback diff --git a/src/core/trulens/core/app.py b/src/core/trulens/core/app.py index f27ac2cdd..d864fbd8a 100644 --- a/src/core/trulens/core/app.py +++ b/src/core/trulens/core/app.py @@ -19,6 +19,7 @@ Hashable, Iterable, List, + Literal, Optional, Sequence, Set, @@ -1057,12 +1058,10 @@ def __enter__(self): token = self.recording_contexts.set(ctx) ctx.token = token - # self._set_context_vars() - return ctx # For use as a context manager. - def __exit__(self, exc_type, exc_value, exc_tb): + def __exit__(self, exc_type, exc_value, exc_tb) -> Literal[False]: if self.session.experimental_feature( core_experimental.Feature.OTEL_TRACING ): @@ -1073,12 +1072,8 @@ def __exit__(self, exc_type, exc_value, exc_tb): ctx = self.recording_contexts.get() self.recording_contexts.reset(ctx.token) - # self._reset_context_vars() - - if exc_type is not None: - raise exc_value - - return + # False means dont suppress exceptions. + return False # For use as a context manager. async def __aenter__(self): @@ -1094,12 +1089,10 @@ async def __aenter__(self): token = self.recording_contexts.set(ctx) ctx.token = token - # self._set_context_vars() - return ctx # For use as a context manager. - async def __aexit__(self, exc_type, exc_value, exc_tb): + async def __aexit__(self, exc_type, exc_value, exc_tb) -> Literal[False]: if self.session.experimental_feature( core_experimental.Feature.OTEL_TRACING ): @@ -1110,12 +1103,8 @@ async def __aexit__(self, exc_type, exc_value, exc_tb): ctx = self.recording_contexts.get() self.recording_contexts.reset(ctx.token) - # self._reset_context_vars() - - if exc_type is not None: - raise exc_value - - return + # False means dont suppress exceptions. + return False def _set_context_vars(self): # HACK: For debugging purposes, try setting/resetting all context vars diff --git a/src/core/trulens/core/database/base.py b/src/core/trulens/core/database/base.py index 6999baf1f..3e82b05e5 100644 --- a/src/core/trulens/core/database/base.py +++ b/src/core/trulens/core/database/base.py @@ -1,9 +1,22 @@ import abc +from dataclasses import dataclass from datetime import datetime import logging -from typing import Any, Dict, Iterable, List, Optional, Sequence, Tuple, Union +from typing import ( + Any, + ClassVar, + Dict, + Iterable, + List, + Optional, + Sequence, + Tuple, + Type, + Union, +) import pandas as pd +import pydantic from trulens.core.schema import app as app_schema from trulens.core.schema import dataset as dataset_schema from trulens.core.schema import feedback as feedback_schema @@ -36,6 +49,53 @@ """Default value for option to redact secrets before writing out data to database.""" +@dataclass +class PageSelect: + """EXPERIMENTAL(otel_tracing): Pagination information for a database query. + + This is expected to be obtuse as it is DB implementation independent. + """ + + offset: Optional[int] = None + """The offset of the first row to return. + + None is identical to 0. + """ + + limit: Optional[int] = None + """The maximum number of rows to return. + + None means no limit. + """ + + shuffle: bool = False + """Shuffle the rows before returning them.""" + + before_index: Optional[int] = None + """The index of the row to start before. + + Note that the index column is auto incrementing. + """ + + after_index: Optional[int] = None + """The index of the row to start after. + + Note that the index column is auto incrementing. + """ + + before_created_timestamp: Optional[datetime] = None + """The created timestamp of the row to start before.""" + + after_created_timestamp: Optional[datetime] = None + """The created timestamp of the row to start after.""" + + before_updated_timestamp: Optional[datetime] = None + """The updated timestamp of the row to start before.""" + + after_updated_timestamp: Optional[datetime] = None + """The updated timestamp of the row to start after.""" + + class DB(serial_utils.SerialModel, abc.ABC, text_utils.WithIdentString): """Abstract definition of databases used by trulens. @@ -43,6 +103,22 @@ class DB(serial_utils.SerialModel, abc.ABC, text_utils.WithIdentString): and default implementation of this interface. """ + T: ClassVar[Type] = Any + """EXPERIMENTAL(otel_tracing): Database "table" expression type.""" + + Q: ClassVar[Type] = Any + """EXPERIMENTAL(otel_tracing): Database "select" expression type.""" + + W: ClassVar[Type] = Any + """EXPERIMENTAL(otel_tracing): Database "where" expression type. + + Must support ==, !-, `contains`, `and`, `or`, `not`. + """ + + model_config: ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict( + arbitrary_types_allowed=True + ) + redact_keys: bool = DEFAULT_DATABASE_REDACT_KEYS """Redact secrets before writing out data.""" @@ -138,8 +214,8 @@ def insert_app(self, app: app_schema.AppDefinition) -> types_schema.AppID: Args: app: The app to insert or update. Note that only the - [AppDefinition][trulens.core.schema.app.AppDefinition] parts are serialized - hence the type hint. + [AppDefinition][trulens.core.schema.app.AppDefinition] parts are + serialized hence the type hint. Returns: The id of the given app. @@ -366,8 +442,9 @@ def get_records_and_feedback( def insert_ground_truth( self, ground_truth: groundtruth_schema.GroundTruth ) -> types_schema.GroundTruthID: - """Insert a ground truth entry into the database. The ground truth id is generated - based on the ground truth content, so re-inserting is idempotent. + """Insert a ground truth entry into the database. The ground truth id is + generated based on the ground truth content, so re-inserting is + idempotent. Args: ground_truth: The ground truth entry to insert. @@ -413,8 +490,8 @@ def get_ground_truths_by_dataset(self, dataset_name: str) -> pd.DataFrame: def insert_dataset( self, dataset: dataset_schema.Dataset ) -> types_schema.DatasetID: - """Insert a dataset into the database. The dataset id is generated based on the - dataset content, so re-inserting is idempotent. + """Insert a dataset into the database. The dataset id is generated based + on the dataset content, so re-inserting is idempotent. Args: dataset: The dataset to insert. diff --git a/src/core/trulens/core/database/orm.py b/src/core/trulens/core/database/orm.py index 67a2dec75..b8bc5c157 100644 --- a/src/core/trulens/core/database/orm.py +++ b/src/core/trulens/core/database/orm.py @@ -27,8 +27,9 @@ from trulens.core.schema import record as record_schema from trulens.core.utils import json as json_utils +# TODO: move these type aliases to core/schema/types.py TYPE_JSON = Text -"""Database type for JSON fields.""" +"""Database type for serialized JSON fields.""" TYPE_TIMESTAMP = Float """Database type for timestamps.""" @@ -110,6 +111,8 @@ class ORM(abc.ABC, Generic[T]): registry: Dict[str, Type[T]] metadata: MetaData + base: Type[T] + AppDefinition: Type[T] FeedbackDefinition: Type[T] Record: Type[T] @@ -118,7 +121,7 @@ class ORM(abc.ABC, Generic[T]): Dataset: Type[T] -def new_orm(base: Type[T], prefix: str = "trulens_") -> Type[ORM[T]]: +def new_orm(base_: Type[T], prefix: str = "trulens_") -> Type[ORM[T]]: """Create a new orm container from the given base table class.""" class NewORM(ORM): @@ -133,6 +136,9 @@ class NewORM(ORM): relationships in another. """ + base = base_ + """Declerative table base for the other table classes.""" + registry: Dict[str, base] = base.registry._class_registry """Table name to ORM class mapping for tables used by trulens. diff --git a/src/core/trulens/core/database/sqlalchemy.py b/src/core/trulens/core/database/sqlalchemy.py index 48ced3587..c8c0e520a 100644 --- a/src/core/trulens/core/database/sqlalchemy.py +++ b/src/core/trulens/core/database/sqlalchemy.py @@ -66,6 +66,15 @@ class SQLAlchemyDB(core_db.DB): arbitrary_types_allowed=True ) + T: ClassVar[Type] = sa.sql.expression.TableClause + """Table type for this database.""" + + Q: ClassVar[Type] = sa.sql.expression.Select + """Query type for this database.""" + + W: ClassVar[Type] = sa.sql.expression.Select + """Where clause type for this database.""" + table_prefix: str = core_db.DEFAULT_DATABASE_PREFIX """The prefix to use for all table names. @@ -148,8 +157,8 @@ def from_tru_args( database_prefix: Optional[str] = core_db.DEFAULT_DATABASE_PREFIX, **kwargs: Dict[str, Any], ) -> SQLAlchemyDB: - """Process database-related configuration provided to the [Tru][trulens.core.session.TruSession] class to - create a database. + """Process database-related configuration provided to the + [Tru][trulens.core.session.TruSession] class to create a database. Emits warnings if appropriate. """ @@ -232,9 +241,12 @@ def from_db_engine( ) -> SQLAlchemyDB: """ Create a database for the given engine. + Args: engine: The database engine. + kwargs: Additional arguments to pass to the database constructor. + Returns: A database instance. """ @@ -320,12 +332,14 @@ def migrate_database(self, prior_prefix: Optional[str] = None): ) # logger.warning("Please ignore these warnings: \"SAWarning: This declarative base already contains...\"") + all_tables = [ + c._table_base_name + for c in self.orm.registry.values() + if hasattr(c, "_table_base_name") + ] + with self.engine.connect() as c: - for table_name in ["alembic_version"] + [ - c._table_base_name - for c in self.orm.registry.values() - if hasattr(c, "_table_base_name") - ]: + for table_name in ["alembic_version"] + all_tables: old_version_table = f"{prior_prefix}{table_name}" new_version_table = f"{self.table_prefix}{table_name}" @@ -359,6 +373,17 @@ def reset_database(self): ] meta.drop_all(bind=self.engine, tables=tables) + # EXPERIMENTAL(otel_tracing): Spans table is hacked in and needs special + # handling here. We remove the meta data that was reflected back from DB + # schema so that the Span schema can be created when otel_tracing is + # being enabled. If don't do this, `meta.reflect` above will learn about + # the spans table and if its metadata is subsequently created in the + # otel_tracing version of this file, sqlalchemy will throw an error + # because it thinks it already exists. + spans_table = self.table_prefix + "spans" + if spans_table in meta.tables: + meta.remove(meta.tables[spans_table]) + self.migrate_database() def insert_record( @@ -411,6 +436,8 @@ def get_app( ): return json.loads(_app.app_json) + return None + def update_app_metadata( self, app_id: types_schema.AppID, metadata: Dict[str, Any] ) -> Optional[app_schema.AppDefinition]: @@ -435,6 +462,8 @@ def nested_update(metadata: dict, update: dict): nested_update(app_json["metadata"], metadata) _app.app_json = json.dumps(app_json) + return None + def get_apps( self, app_name: Optional[types_schema.AppName] = None ) -> Iterable[serial_utils.JSON]: @@ -494,7 +523,8 @@ def delete_app(self, app_id: types_schema.AppID) -> None: def insert_feedback_definition( self, feedback_definition: feedback_schema.FeedbackDefinition ) -> types_schema.FeedbackDefinitionID: - """See [DB.insert_feedback_definition][trulens.core.database.base.DB.insert_feedback_definition].""" + """See + [DB.insert_feedback_definition][trulens.core.database.base.DB.insert_feedback_definition].""" # TODO: thread safety @@ -609,7 +639,8 @@ def insert_feedback( def batch_insert_feedback( self, feedback_results: List[feedback_schema.FeedbackResult] ) -> List[types_schema.FeedbackResultID]: - """See [DB.batch_insert_feedback][trulens.core.database.base.DB.batch_insert_feedback].""" + """See + [DB.batch_insert_feedback][trulens.core.database.base.DB.batch_insert_feedback].""" # The Snowflake stored procedure connector isn't currently capable of # handling None qmark-bound to an `INSERT INTO` or `UPDATE` statement # for nullable numeric columns. Thus, as a hack, we get around this by @@ -715,7 +746,7 @@ def get_feedback_count_by_status( feedback_result_id: Optional[types_schema.FeedbackResultID] = None, feedback_definition_id: Optional[ types_schema.FeedbackDefinitionID - ] = None, + ] = None, # pylint: disable=W0613 status: Optional[ Union[ feedback_schema.FeedbackResultStatus, @@ -728,7 +759,8 @@ def get_feedback_count_by_status( shuffle: bool = False, run_location: Optional[feedback_schema.FeedbackRunLocation] = None, ) -> Dict[feedback_schema.FeedbackResultStatus, int]: - """See [DB.get_feedback_count_by_status][trulens.core.database.base.DB.get_feedback_count_by_status].""" + """See + [DB.get_feedback_count_by_status][trulens.core.database.base.DB.get_feedback_count_by_status].""" with self.session.begin() as session: q = self._feedback_query( @@ -779,7 +811,8 @@ def get_records_and_feedback( offset: Optional[int] = None, limit: Optional[int] = None, ) -> Tuple[pd.DataFrame, Sequence[str]]: - """See [DB.get_records_and_feedback][trulens.core.database.base.DB.get_records_and_feedback].""" + """See + [DB.get_records_and_feedback][trulens.core.database.base.DB.get_records_and_feedback].""" # TODO: Add pagination to this method. Currently the joinedload in # select below disables lazy loading of records which will be a problem @@ -835,7 +868,8 @@ def get_records_and_feedback( def insert_ground_truth( self, ground_truth: groundtruth_schema.GroundTruth ) -> types_schema.GroundTruthID: - """See [DB.insert_ground_truth][trulens.core.database.base.DB.insert_ground_truth].""" + """See + [DB.insert_ground_truth][trulens.core.database.base.DB.insert_ground_truth].""" # TODO: thread safety with self.session.begin() as session: @@ -862,7 +896,9 @@ def insert_ground_truth( def batch_insert_ground_truth( self, ground_truths: List[groundtruth_schema.GroundTruth] ) -> List[types_schema.GroundTruthID]: - """See [DB.batch_insert_ground_truth][trulens.core.database.base.DB.batch_insert_ground_truth].""" + """See + [DB.batch_insert_ground_truth][trulens.core.database.base.DB.batch_insert_ground_truth].""" + with self.session.begin() as session: ground_truth_ids = [gt.ground_truth_id for gt in ground_truths] @@ -911,10 +947,14 @@ def get_ground_truth( ): return json.loads(_ground_truth) + return None + def get_ground_truths_by_dataset( self, dataset_name: str ) -> pd.DataFrame | None: - """See [DB.get_ground_truths_by_dataset][trulens.core.database.base.DB.get_ground_truths_by_dataset].""" + """See + [DB.get_ground_truths_by_dataset][trulens.core.database.base.DB.get_ground_truths_by_dataset].""" + with self.session.begin() as session: q = sa.select(self.orm.Dataset) all_datasets = (row[0] for row in session.execute(q)) diff --git a/src/core/trulens/core/experimental/__init__.py b/src/core/trulens/core/experimental/__init__.py index 9c1990c21..29c33fe61 100644 --- a/src/core/trulens/core/experimental/__init__.py +++ b/src/core/trulens/core/experimental/__init__.py @@ -24,6 +24,8 @@ from trulens.core.utils import python as python_utils from trulens.core.utils import text as text_utils +# from trulens.core import session as core_session # circular import + T = TypeVar("T") @@ -103,6 +105,13 @@ def assert_optionals_installed() -> None: def are_optionals_installed() -> bool: """Check if the optional requirements for the feature are installed.""" + @staticmethod + @abstractmethod + def enable( + session: _WithExperimentalSettings, + ) -> None: # actually TruSession + """Callback to call for the feature when enabled.""" + @staticmethod def assert_can_enable(feature: Feature) -> None: """Asserts that the given feature can be enabled. @@ -130,10 +139,13 @@ def can_enable(feature: Feature) -> bool: return _FeatureSetup.load_setup(modname).are_optionals_installed() @staticmethod - def load_setup(modname: str) -> Type[_FeatureSetup]: + def load_setup(modname_or_flag: Union[str, Feature]) -> Type[_FeatureSetup]: """Load the setup class for the given module.""" - mod = importlib.import_module(modname) + if isinstance(modname_or_flag, Feature): + modname_or_flag = _FEATURE_SETUPS[modname_or_flag] + + mod = importlib.import_module(modname_or_flag) if not hasattr(mod, "_FeatureSetup"): raise ImportError( @@ -142,6 +154,19 @@ def load_setup(modname: str) -> Type[_FeatureSetup]: return getattr(mod, "_FeatureSetup") + @staticmethod + def call_enable( + flag: Feature, session: _WithExperimentalSettings + ) -> None: # actually TruSession + """Called when the feature is enabled for the session.""" + + setup = _FeatureSetup.load_setup(flag) + setup.enable(session=session) + + +# Alias for the static method to access it from this module. +can_enable = _FeatureSetup.can_enable + class _Setting(Generic[T]): """A setting that attains some value and can be prevented from further @@ -363,6 +388,11 @@ def _experimental_feature( was_frozen = self._experimental_feature_flags.is_frozen(flag) + if value: + # If the feature has optional requirements, this checks that they + # are installed and raises an ImportError if not. + _FeatureSetup.assert_can_enable(flag) + if value: # If the feature has optional requirements, this checks that they # are installed and raises an ImportError if not. @@ -376,6 +406,9 @@ def _experimental_feature( if value is not None and changed: if val: + # call_enable and print message only if was not previously enabled. + _FeatureSetup.call_enable(flag=flag, session=self) + print( f"{text_utils.UNICODE_CHECK} experimental {flag} enabled for {self._ident_str()}." ) diff --git a/src/core/trulens/core/schema/record.py b/src/core/trulens/core/schema/record.py index 783fa8cfc..1f93a4a88 100644 --- a/src/core/trulens/core/schema/record.py +++ b/src/core/trulens/core/schema/record.py @@ -44,9 +44,25 @@ def __str__(self): path: serial_utils.Lens """Path to the method in the app's structure.""" - method: pyschema_utils.Method + method: Optional[pyschema_utils.Method] = None """The method that was called.""" + function: Optional[pyschema_utils.Function] = None + """If representing a function instead of a method, set this field instead. + + Not using FunctionOrMethod on `method` for backwards compatibility. + """ + + @property + def function_or_method(self) -> pyschema_utils.FunctionOrMethod: + """The callable object for this method or function.""" + + if self.method is not None: + return self.method + else: + assert self.function is not None + return self.function + class RecordAppCall(serial_utils.SerialModel): """Info regarding each instrumented method call.""" @@ -107,11 +123,7 @@ def method(self) -> pyschema_utils.Method: class Record(serial_utils.SerialModel, Hashable): - """The record of a single main method call. - - Note: - This class will be renamed to `Trace` in the future. - """ + """The record of a single main method call.""" def __str__(self): ret = f"Record({self.record_id}) with {len(self.calls)} calls:\n" @@ -120,10 +132,10 @@ def __str__(self): return ret - model_config: ClassVar[dict] = { + model_config: ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict( # for `Future[FeedbackResult]` - "arbitrary_types_allowed": True - } + arbitrary_types_allowed=True + ) record_id: types_schema.RecordID """Unique identifier for this record.""" @@ -182,21 +194,26 @@ def _validate_experimental_otel_spans(cls, spans: List[Any]) -> List[Any]: """Deserialize spans if otel_tracing is enabled. We need to do this manually as the experimental_otel_spans field is - declared as containing `Any` but we want to have `Span`s there instead. - We cannot declare the field having `Span`s because we are not sure - otel_tracing is available. + declared as containing `Any` but we want to have `sem.TypedSpan`s there + instead. We cannot declare the field having `TypedSpan`s because we are + not sure otel_tracing is available. """ ret = [] if len(spans) > 0: if otel_tracing_feature._FeatureSetup.are_optionals_installed(): - from trulens.experimental.otel_tracing.core.trace import Span + from trulens.experimental.otel_tracing.core.trace import ( + sem as core_sem, + ) for span in spans: if isinstance(span, dict): - ret.append(Span.model_validate(span)) + ret.append(core_sem.TypedSpan.mixin_new(d=span)) else: + assert isinstance( + span, core_sem.TypedSpan + ), "TypedSpan expected." ret.append(span) else: logger.warning( @@ -204,8 +221,8 @@ def _validate_experimental_otel_spans(cls, spans: List[Any]) -> List[Any]: "be used in the current environment due to missing modules.\n%s", otel_tracing_feature._FeatureSetup.REQUIREMENT.module_not_found, ) - # Set to empty None to prevent errors downstream where Span - # instances are expected. + # Set to empty to prevent errors downstream where Span instances + # are expected. ret = [] return ret @@ -361,7 +378,7 @@ def layout_calls_as_app(self) -> Bunch: # Adds another attribute to path, from method name: path = frame_info.path + serial_utils.GetItemOrAttribute( - item_or_attribute=frame_info.method.name + item_or_attribute=frame_info.function_or_method.name ) if path.exists(obj=ret): diff --git a/src/core/trulens/core/schema/types.py b/src/core/trulens/core/schema/types.py index 18428c715..736bd7483 100644 --- a/src/core/trulens/core/schema/types.py +++ b/src/core/trulens/core/schema/types.py @@ -1,9 +1,54 @@ -"""Type aliases.""" +""" +Type aliases and basic types that share interpretation across different systems: +python, OTEL, and SQL. + +This file is here to consolidate the many places where these types appear or +where aliases to them ended up. + +Do not make this module depend on any in TruLens except things which themselves +have little to no internal dependencies. +""" -from typing import Dict +# TODO: move other type aliases for basic types to here: +# - trulens.core.database.orm:py has some +# - + +from __future__ import annotations + +import datetime +from enum import Enum +import random +import sys +import time +from typing import ( + Dict, + Generic, + Iterable, + List, + Mapping, + Optional, + Sequence, + Tuple, + TypeVar, + Union, +) import uuid +from opentelemetry import trace as trace_api +from opentelemetry.trace import span as span_api +from opentelemetry.trace import status as status_api +from opentelemetry.util import types as types_api +from sqlalchemy import BINARY +from sqlalchemy import JSON +from sqlalchemy import SMALLINT +from sqlalchemy import TIMESTAMP +from sqlalchemy import VARCHAR +from trulens.core._utils.pycompat import NoneType # import style exception +from trulens.core._utils.pycompat import Type # import style exception from trulens.core._utils.pycompat import TypeAlias # import style exception +from trulens.core._utils.pycompat import TypeAliasType # import style exception +from trulens.core.utils import serial as serial_utils +from trulens.otel.semconv import trace as truconv RecordID: TypeAlias = str """Unique identifier for a record. @@ -83,3 +128,766 @@ def new_call_id() -> CallID: By default these are hashes of dataset content as json. """ + +# The rest is a type organization introduced with EXPERIMENTAL(otel_tracing). + +T = TypeVar("T") + +O = TypeVar("O") # noqa: E741 +"""Types for values in OTEL representations of spans or otherwise.""" + +P = TypeVar("P") +"""Types for values in python.""" + +S = TypeVar("S") +"""Types for values provided to or retrieved from sqlalchemy.""" + + +class TypeInfo(Generic[O, P, S]): + """Container for types and conversions between types used to represent + values that need to be stored/represented in different places. + + The places are at least python and sql and optionally in OTEL. + + The capabilities this class exposes are: + + - Type aliases in the form of `{PY,SQL,OTEL}_TYPE`. + + - Type union for the above. + + - + """ + + OTEL_TYPE: Optional[Type[O]] = None + """Type for OTEL values. + + None if type info is not relevant to OTEL. + """ + + NUM_BITS: Optional[int] = None + """Number of bits in values of this type. + + Note that when represented in python, OTEL, or SQL, the number of bits may + vary but may be specified by a spec, like OTEL, to be a certain number of + bits regardless.""" + + PY_TYPE: Type[P] + """Type for python values.""" + + SQL_TYPE: Type[S] + """Type for values understood by sqlalchemy to be representing in the + database as the column type `SQL_SCHEMA_TYPE`.""" + + SQL_SCHEMA_TYPE: Type + """SQL column type for the column type.""" + + UNION_TYPE: Union[Type[O], Type[P], Type[S]] + """Union of all types that can be used to represent values of this type + except the schema type.""" + + TYPES: Tuple[Type[O], Type[P], Type[S]] + """Tuple of the above so that isinstance can be used.""" + + @classmethod + def py(cls, val: TypeInfo.UNION_TYPE) -> P: + """Convert a value to a python value.""" + if isinstance(val, cls.PY_TYPE): + return val + if isinstance(val, cls.SQL_TYPE): + return cls.py_of_sql(val) + if cls.OTEL_TYPE is not None and isinstance(val, cls.OTEL_TYPE): + return cls.py_of_otel(val) + + raise TypeError(f"Cannot convert value of type {type(val)} to python.") + + @classmethod + def otel(cls, val: TypeInfo.UNION_TYPE) -> O: + """Convert a value to the otel representation.""" + + cls._assert_has_otel() + + if isinstance(val, cls.OTEL_TYPE): + return val + if isinstance(val, cls.PY_TYPE): + return cls.otel_of_py(val) + if isinstance(val, cls.SQL_TYPE): + return cls.otel_of_sql(val) + + raise TypeError(f"Cannot convert value of type {type(val)} to otel.") + + @classmethod + def sql(cls, val: TypeInfo.UNION_TYPE) -> S: + """Convert a value to the sql representation.""" + + if isinstance(val, cls.SQL_TYPE): + return val + if isinstance(val, cls.PY_TYPE): + return cls.sql_of_py(val) + if cls.OTEL_TYPE is not None and isinstance(val, cls.OTEL_TYPE): + return cls.sql_of_otel(val) + + raise TypeError(f"Cannot convert value of type {type(val)} to sql.") + + @classmethod + def default_py(cls) -> P: + """Default python value for this type.""" + return cls.rand_py() + + @classmethod + def default_sql(cls) -> S: + """Default sql value for this type.""" + return cls.rand_sql() + + @classmethod + def rand_py(cls) -> P: + """Generate a new random python value of this type.""" + if cls.rand_otel is not TypeInfo.rand_otel: + return cls.py_of_otel(cls.rand_otel()) + if cls.rand_sql is not TypeInfo.rand_sql: + return cls.py_of_sql(cls.rand_sql()) + raise NotImplementedError("Python type generation not implemented.") + + @classmethod + def rand_sql(cls) -> S: + """Generate a new random sql value of this type.""" + if cls.rand_otel is not TypeInfo.rand_otel: + return cls.sql_of_otel(cls.rand_otel()) + if cls.rand_py is not TypeInfo.rand_py: + return cls.sql_of_py(cls.rand_py()) + + raise NotImplementedError("SQL type generation not implemented.") + + @classmethod + def sql_of_py(cls, py_value: P) -> S: + """Convert a python value to a sql value.""" + + if cls.PY_TYPE is cls.SQL_TYPE: + return py_value + + if ( + cls.sql_of_otel is not TypeInfo.sql_of_otel + and cls.otel_of_py is not TypeInfo.otel_of_py + ): + return cls.sql_of_otel(cls.otel_of_py(py_value)) + + raise NotImplementedError + + @classmethod + def py_of_sql(cls, sql_value: S) -> P: + """Convert a sql value to a python value.""" + + if cls.PY_TYPE is cls.SQL_TYPE: + return sql_value + + if ( + cls.py_of_otel is not TypeInfo.py_of_otel + and cls.otel_of_sql is not TypeInfo.otel_of_sql + ): + return cls.py_of_otel(cls.otel_of_sql(sql_value)) + + raise NotImplementedError + + @classmethod + def _assert_has_otel(cls) -> None: + if cls.OTEL_TYPE is None: + raise NotImplementedError( + f"{cls.__name__} does not support OTEL values." + ) + + @classmethod + def rand_otel(cls) -> O: + """Generate a new random otel value of this type.""" + + cls._assert_has_otel() + + if cls.rand_py is not TypeInfo.rand_py: + return cls.otel_of_py(cls.rand_py()) + if cls.rand_sql is not TypeInfo.rand_sql: + return cls.otel_of_sql(cls.rand_sql()) + + raise NotImplementedError("OTEL type generation not implemented.") + + @classmethod + def default_otel(cls) -> O: + """Default otel value for this type.""" + + cls._assert_has_otel() + + return cls.rand_otel() + + @classmethod + def otel_of_py(cls, py_value: P) -> O: + """Convert a python value to an otel value.""" + + cls._assert_has_otel() + + if cls.OTEL_TYPE is cls.PY_TYPE: + return py_value + + if ( + cls.otel_of_sql is not TypeInfo.otel_of_sql + and cls.sql_of_py is not TypeInfo.sql_of_py + ): + return cls.otel_of_sql(cls.sql_of_py(py_value)) + + raise NotImplementedError + + @classmethod + def py_of_otel(cls, otel_value: O) -> P: + """Convert an otel value to a python value.""" + + cls._assert_has_otel() + + if cls.PY_TYPE is cls.OTEL_TYPE: + return otel_value + + if ( + cls.py_of_sql is not TypeInfo.py_of_sql + and cls.sql_of_otel is not TypeInfo.sql_of_otel + ): + return cls.py_of_sql(cls.sql_of_otel(otel_value)) + + raise NotImplementedError + + @classmethod + def otel_of_sql(cls, sql_value: S) -> O: + """Convert a sql value to an otel value.""" + + cls._assert_has_otel() + + if cls.OTEL_TYPE is cls.SQL_TYPE: + return sql_value + + if ( + cls.otel_of_py is not TypeInfo.otel_of_py + and cls.py_of_sql is not TypeInfo.py_of_sql + ): + return cls.otel_of_py(cls.py_of_sql(sql_value)) + + raise NotImplementedError + + @classmethod + def sql_of_otel(cls, otel_value: O) -> S: + """Convert an otel value to a sql value.""" + + cls._assert_has_otel() + + if cls.SQL_TYPE is cls.OTEL_TYPE: + return otel_value + + if ( + cls.sql_of_py is not TypeInfo.sql_of_py + and cls.py_of_otel is not TypeInfo.py_of_otel + ): + return cls.sql_of_py(cls.py_of_otel(otel_value)) + + raise NotImplementedError + + +class SpanID(TypeInfo[int, int, bytes]): + """Span ID type. + + This type is for supporting OTEL hence its requirements come from there. In + OTEL and python it is a 64-bit integer. In the database, it is a binary + column with 64 bits or 8 bytes and read as bytes. + """ + + NUM_BITS: int = 64 + """Number of bits in a span identifier.""" + + OTEL_TYPE: Type = int + PY_TYPE: Type = int + SQL_TYPE: Type = bytes + SQL_SCHEMA_TYPE: Type = BINARY(NUM_BITS // 8) + + UNION_TYPE: Type = Union[int, bytes] + TYPES = (int, bytes) + + INVALID_OTEL = span_api.INVALID_SPAN_ID + """Span ID for non-recording or invalid spans.""" + + @classmethod + def rand_otel(cls) -> int: + return int( + random.getrandbits(cls.NUM_BITS) & span_api._SPAN_ID_MAX_VALUE + ) + + @classmethod + def rand_py(cls) -> int: + return cls.rand_otel() + + @classmethod + def rand_sql(cls) -> bytes: + return cls.sql_of_py(cls.rand_py()) + + @classmethod + def otel_of_py(cls, py_value: int) -> int: + return py_value + + @classmethod + def py_of_otel(cls, otel_value: int) -> int: + return otel_value + + @classmethod + def sql_of_py(cls, py_value: int) -> bytes: + return py_value.to_bytes(cls.NUM_BITS // 8, byteorder="big") + + @classmethod + def py_of_sql(cls, sql_value: bytes) -> int: + return int.from_bytes(sql_value, byteorder="big") + + +class TraceID(TypeInfo[int, int, bytes]): + """Trace ID type. + + This type is for supporting OTEL hence its requirements come from there. In + OTEL and python it is a 128-bit integer. In the database, it is a binary + column with 128 bits or 16 bytes. + """ + + NUM_BITS: int = 64 + """Number of bits in a span identifier.""" + + OTEL_TYPE: Type = int + PY_TYPE: Type = int + SQL_TYPE: Type = bytes + SQL_SCHEMA_TYPE: Type = BINARY(NUM_BITS // 8) + + UNION_TYPE: Type = Union[int, bytes] + TYPES: Tuple = (int, bytes) + + INVALID_OTEL = span_api.INVALID_TRACE_ID + """Trace ID for non-recording or invalid spans.""" + + @classmethod + def rand_otel(cls) -> int: + return int( + random.getrandbits(cls.NUM_BITS) + & trace_api.span._TRACE_ID_MAX_VALUE + ) + + @classmethod + def rand_py(cls) -> int: + return cls.rand_otel() + + @classmethod + def rand_sql(cls) -> bytes: + return cls.sql_of_py(cls.rand_py()) + + @classmethod + def otel_of_py(cls, py_value: int) -> int: + return py_value + + @classmethod + def py_of_otel(cls, otel_value: int) -> int: + return otel_value + + @classmethod + def sql_of_py(cls, py_value: int) -> bytes: + return py_value.to_bytes(cls.NUM_BITS // 8, byteorder="big") + + @classmethod + def py_of_sql(cls, sql_value: bytes) -> int: + return int.from_bytes(sql_value, byteorder="big") + + +class StrAsVarChar(TypeInfo[str, str, str]): + """Types that are strings in python,otel,sql interface and VARCHAR in SQL column.""" + + NUM_BYTES: int = 256 + + OTEL_TYPE: Type = str + PY_TYPE: Type = str + SQL_TYPE: Type = str + SQL_SCHEMA_TYPE: Type = VARCHAR(NUM_BYTES) + + UNION_TYPE: Type = str + TYPES: Tuple = (str,) + + @classmethod + def rand_otel(cls) -> str: + return str(uuid.uuid4()) + + @classmethod + def rand_py(cls) -> str: + return cls.rand_otel() + + @classmethod + def rand_sql(cls) -> str: + return cls.rand_otel() + + @classmethod + def otel_of_py(cls, py_value: str) -> str: + return py_value + + @classmethod + def py_of_otel(cls, otel_value: str) -> str: + return otel_value + + @classmethod + def sql_of_py(cls, py_value: str) -> str: + return py_value + + @classmethod + def py_of_sql(cls, sql_value: str) -> str: + return sql_value + + +class TraceRecordID(StrAsVarChar): + """Types for representing record ids in traces/spans.""" + + NUM_BYTES: int = 32 + + +class ListAsJSON(TypeInfo[List[O], List[P], List[S]], Generic[O, P, S]): + """Lists stored as JSON in the database.""" + + ETI: TypeInfo[O, P, S] + """TypeInfo for elements.""" + + OTEL_TYPE: Type = List[O] + PY_TYPE: Type = List[P] + SQL_TYPE: Type = List[S] + SQL_SCHEMA_TYPE: Type = JSON + + @classmethod + def otel_of_py(cls, py_value: List[P]) -> List[O]: + return [cls.ETI.otel_of_py(val) for val in py_value] + + @classmethod + def py_of_otel(cls, otel_value: List[O]) -> List[P]: + return [cls.ETI.py_of_otel(val) for val in otel_value] + + @classmethod + def sql_of_py(cls, py_value: List[P]) -> List[S]: + return [cls.ETI.sql_of_py(val) for val in py_value] + + @classmethod + def py_of_sql(cls, sql_value: List[S]) -> List[P]: + return [cls.ETI.py_of_sql(val) for val in sql_value] + + +class DictAsJSON( + TypeInfo[Dict[str, O], Dict[str, P], Dict[str, S]], Generic[O, P, S] +): + """Dicts of str keys stored as JSON in the database.""" + + ETI: TypeInfo[O, P, S] + """TypeInfo for elements.""" + + OTEL_TYPE: Type = Dict[str, O] + PY_TYPE: Type = Dict[str, P] + SQL_TYPE: Type = Dict[str, S] + SQL_SCHEMA_TYPE: Type = JSON + + @classmethod + def otel_of_py(cls, py_value: Dict[str, P]) -> Dict[str, O]: + return {k: cls.ETI.otel_of_py(val) for k, val in py_value.items()} + + @classmethod + def py_of_otel(cls, otel_value: Dict[str, O]) -> Dict[str, P]: + return {k: cls.ETI.py_of_otel(val) for k, val in otel_value.items()} + + @classmethod + def sql_of_py(cls, py_value: Dict[str, P]) -> Dict[str, S]: + return {k: cls.ETI.sql_of_py(val) for k, val in py_value.items()} + + @classmethod + def py_of_sql(cls, sql_value: Dict[str, S]) -> Dict[str, P]: + return {k: cls.ETI.py_of_sql(val) for k, val in sql_value.items()} + + +class TraceRecordIDs( + DictAsJSON[ + TraceRecordID.OTEL_TYPE, TraceRecordID.PY_TYPE, TraceRecordID.SQL_TYPE + ] +): + """Type for representing multiple trace record ids. + + This is a list of trace record ids. It is a list of `TraceRecordID`. + """ + + ETI = TraceRecordID + + +class SpanName(StrAsVarChar): + """Span names.""" + + NUM_BYTES = 32 + # TODO: get from otel spec + + +E = TypeVar("E", bound=Enum) +"""Enum types.""" + + +class IntEnumAsSmallInt(TypeInfo[E, E, int], Generic[E]): + """Enum types that are stored as integers in the database.""" + + OTEL_TYPE: Type[E] # to fill in by subclass + PY_TYPE: Type[E] # to fill in by subclass + SQL_TYPE: Type = int + SQL_SCHEMA_TYPE: Type = ( + SMALLINT # override in subclass if bigger int needed + ) + + UNION_TYPE: Type # to fill in by subclass + TYPES: Tuple # to fill in by subclass + + @classmethod + def sql_of_py(cls, py_value: E) -> int: + return py_value.value + + @classmethod + def py_of_sql(cls, sql_value: int) -> E: + return cls.PY_TYPE(sql_value) + + +class StrEnumAsVarChar(TypeInfo[E, E, str], Generic[E]): + """Enum types that are stored as varchar in the database.""" + + OTEL_TYPE: Type[E] # to fill in by subclass + PY_TYPE: Type[E] # to fill in by subclass + SQL_TYPE: Type = str + SQL_SCHEMA_TYPE: Type = VARCHAR( + 16 + ) # override in subclass if bigger int needed + + UNION_TYPE: Type # to fill in by subclass + TYPES: Tuple # to fill in by subclass + + @classmethod + def sql_of_py(cls, py_value: E) -> str: + return py_value.value + + @classmethod + def py_of_sql(cls, sql_value: str) -> E: + return cls.PY_TYPE(sql_value) + + +class SpanType(StrEnumAsVarChar): + """Span type enum.""" + + OTEL_TYPE = truconv.SpanAttributes.SpanType + PY_TYPE = truconv.SpanAttributes.SpanType + UNION_TYPE = Union[truconv.SpanAttributes.SpanType, str] + TYPES = (truconv.SpanAttributes.SpanType, str) + + +class SpanTypes( + ListAsJSON[SpanType.OTEL_TYPE, SpanType.PY_TYPE, SpanType.SQL_TYPE] +): + """Type for representing multiple span types. + + This is a list of span types. It is a list of `SpanType`. + """ + + ETI = SpanType + + +class SpanStatusCode(IntEnumAsSmallInt): + """Span status enum.""" + + OTEL_TYPE = status_api.StatusCode + PY_TYPE = status_api.StatusCode + UNION_TYPE = Union[status_api.StatusCode, int] + TYPES = (status_api.StatusCode, int) + + +class StatusDescription(StrAsVarChar): + NUM_BYTES = 1024 + # TODO: get from otel spec + + +class SpanKind(IntEnumAsSmallInt): + """Span kind enum.""" + + OTEL_TYPE = trace_api.SpanKind + PY_TYPE = trace_api.SpanKind + UNION_TYPE = Union[trace_api.SpanKind, int] + TYPES = (trace_api.SpanKind, int) + + +class Timestamp(TypeInfo[int, datetime.datetime, datetime.datetime]): + """Timestamp type. + + This type is for supporting OTEL hence its requirements come from there. In + OTEL it is a 64-bit integer representing the number of nano seconds since + the epoch. In python, it is a [datetime][datetime.datetime]. In the + database, it is the TIMESTAMP sql column type. + + Default values are "now" and random is not supported. + """ + + NUM_BITS = 64 + """Number of bits in a span identifier.""" + + OTEL_TYPE: Type = int + PY_TYPE: Type = datetime.datetime + SQL_TYPE: Type = datetime.datetime + SQL_SCHEMA_TYPE: Type = TIMESTAMP + + UNION_TYPE: Type = Union[int, datetime.datetime] + + TYPES: Tuple = (int, datetime.datetime) + + @classmethod + def default_py(cls) -> datetime.datetime: + return datetime.datetime.now() + + @classmethod + def default_sql(cls) -> datetime.datetime: + return cls.default_py() + + @classmethod + def default_otel(cls) -> int: + return time.time_ns() + + @classmethod + def rand_otel(cls) -> int: + raise NotImplementedError("Timestamps are not meant to be random.") + + @classmethod + def rand_py(cls) -> datetime.datetime: + raise NotImplementedError("Timestamps are not meant to be random.") + + @classmethod + def rand_sql(cls) -> datetime.datetime: + raise NotImplementedError("Timestamps are not meant to be random.") + + @classmethod + def otel_of_py(cls, py_value: datetime.datetime) -> int: + return int(py_value.timestamp() * 1e9) + + @classmethod + def py_of_otel(cls, otel_value: int) -> datetime.datetime: + return datetime.datetime.fromtimestamp(otel_value / 1e9) + + @classmethod + def sql_of_py(cls, py_value: datetime.datetime) -> datetime.datetime: + return py_value + + @classmethod + def py_of_sql(cls, sql_value: datetime.datetime) -> datetime.datetime: + return sql_value + + +# OTEL Attributes-related + + +def lens_of_flat_key(key: str) -> serial_utils.Lens: + """Convert a flat dict key to a lens.""" + lens = serial_utils.Lens() + for step in key.split("."): + lens = lens[step] + + return lens + + +if sys.version_info >= (3, 9): + TLensedBaseType: TypeAlias = Union[str, int, float, bool] +else: + # The above will produce errors on isinstance if used in python 3.8. This + # will work ok instead: + TLensedBaseType = (str, int, float, bool) +"""Type of base types in span attributes. + +!!! Warning + OpenTelemetry does not allow None as an attribute value. Handling None is to + be decided. +""" + +TLensedAttributeValue = TypeAliasType( + "TLensedAttributeValue", + Union[ + str, + int, + float, + bool, + NoneType, # TODO(SNOW-1711929): None is not technically allowed as an attribute value. + Sequence["TLensedAttributeValue"], # type: ignore + "TLensedAttributes", + ], +) +"""Type of values in span attributes.""" + +# NOTE(piotrm): pydantic will fail if you specify a recursive type alias without +# the TypeAliasType schema as above. + +TLensedAttributes: TypeAlias = Dict[str, TLensedAttributeValue] +"""Attribute dictionaries. + +Note that this deviates from what OTEL allows as attribute values. Because OTEL +does not allow general recursive values to be stored as attributes, we employ a +system of flattening values before exporting to OTEL. In this process we encode +a single generic value as multiple attributes where the attribute name include +paths/lenses to the parts of the generic value they are representing. For +example, an attribute/value like `{"a": {"b": 1, "c": 2}}` would be encoded as +`{"a.b": 1, "a.c": 2}`. This process is implemented in the +`flatten_lensed_attributes` method. +""" + + +def flatten_value( + v: TLensedAttributeValue, lens: Optional[serial_utils.Lens] = None +) -> Iterable[Tuple[serial_utils.Lens, types_api.AttributeValue]]: + """Flatten recursive value into OTEL-compatible attribute values. + + See `TLensedAttributes` for more details. + """ + + if lens is None: + lens = serial_utils.Lens() + + # TODO(SNOW-1711929): OpenTelemetry does not allow None as an attribute + # value. Unsure what is best to do here. + + # if v is None: + # yield (path, "None") + + elif v is None: + pass + + elif isinstance(v, TLensedBaseType): + yield (lens, v) + + elif isinstance(v, Sequence) and all( + isinstance(e, TLensedBaseType) for e in v + ): + yield (lens, v) + + elif isinstance(v, Sequence): + for i, e in enumerate(v): + yield from flatten_value(v=e, lens=lens[i]) + + elif isinstance(v, Mapping): + for k, e in v.items(): + yield from flatten_value(v=e, lens=lens[k]) + + else: + raise ValueError( + f"Do not know how to flatten value of type {type(v)} to OTEL attributes." + ) + + +def flatten_lensed_attributes( + m: TLensedAttributes, + path: Optional[serial_utils.Lens] = None, + prefix: str = "", +) -> types_api.Attributes: + """Flatten lensed attributes into OpenTelemetry attributes.""" + + if path is None: + path = serial_utils.Lens() + + ret = {} + for k, v in m.items(): + if k.startswith(prefix): + # Only flattening those attributes that begin with `prefix` are + # those are the ones coming from trulens_eval. + for p, a in flatten_value(v, path[k]): + ret[str(p)] = a + else: + ret[k] = v + + return ret diff --git a/src/core/trulens/core/utils/containers.py b/src/core/trulens/core/utils/containers.py index 1fe6762b7..8c7067693 100644 --- a/src/core/trulens/core/utils/containers.py +++ b/src/core/trulens/core/utils/containers.py @@ -1,6 +1,4 @@ -""" -Container class utilities. -""" +"""Container class utilities.""" from __future__ import annotations diff --git a/src/core/trulens/core/utils/pyschema.py b/src/core/trulens/core/utils/pyschema.py index d1411a568..4040d385a 100644 --- a/src/core/trulens/core/utils/pyschema.py +++ b/src/core/trulens/core/utils/pyschema.py @@ -435,7 +435,73 @@ def _self_arg(bindings: inspect.BoundArguments) -> Optional[str]: return None +class Signature(serial_utils.SerialModel): + """Function or method signature. + + Just a str for now. + """ + + sig_str: str + """inspect.Signature.__str__""" + + @staticmethod + def of_signature(sig: inspect.Signature) -> Signature: + return Signature(sig_str=str(sig)) + + +class BoundArguments(serial_utils.SerialModel): + """Bound arguments to a function or method. + + Unlike bindings, this stores all args by their name. This can only be filled + in if a call is successfully bound to the function signature. + + Also provides get/set/del item. + """ + + arguments: Dict[str, Any] + + @staticmethod + def of_bound_arguments( + b: inspect.BoundArguments, skip_self: bool = True + ) -> BoundArguments: + """Convert [inspect.BoundArguments][inspect.BoundArguments] to + [BoundArguments][trulens.core.utils.pyschema.BoundArguments], optionally + skipping self.""" + + firstarg: Optional[str] = _self_arg(b) + + return BoundArguments( + arguments={ + k: v + for k, v in b.arguments.items() + if (not skip_self or k != firstarg) + } + ) + + def __getitem__(self, k: str) -> Any: + if k in self.arguments: + return self.arguments[k] + + raise KeyError(f"Key {k} not found in Bindings kwargs.") + + def __setitem__(self, k: str, v: Any): + self.arguments[k] = v + + def __delitem__(self, k: str): + del self.arguments[k] + + def __contains__(self, k: str) -> bool: + return k in self.arguments + + class Bindings(serial_utils.SerialModel): + """Function positional and keyword arguments. + + This can be filled in with arguments and kwargs to a function or method even + if they are wrong and would result in a binding error. See `BoundArguments` + for storing successfully bound arguments only. + """ + args: Tuple kwargs: Dict[str, Any] diff --git a/src/core/trulens/core/utils/requirements.txt b/src/core/trulens/core/utils/requirements.txt index 5fd0388bb..06d7f0f94 100644 --- a/src/core/trulens/core/utils/requirements.txt +++ b/src/core/trulens/core/utils/requirements.txt @@ -9,3 +9,4 @@ trulens-dashboard >= 1.0.0 # talk about these packages as required. opentelemetry-api >= 1.0.0 opentelemetry-sdk >= 1.0.0 +trulens-otel-semconv >= 1.0.0 diff --git a/src/core/trulens/core/utils/serial.py b/src/core/trulens/core/utils/serial.py index b4a062f3e..0fe4b258a 100644 --- a/src/core/trulens/core/utils/serial.py +++ b/src/core/trulens/core/utils/serial.py @@ -363,7 +363,7 @@ def get(self, obj: Dict[str, T]) -> Iterable[T]: pass # Otherwise handle a dict or object with the named attribute. - elif isinstance(obj, Dict): + elif hasattr(obj, "__getitem__"): if self.item_or_attribute in obj: yield obj[self.item_or_attribute] else: diff --git a/src/core/trulens/experimental/otel_tracing/_feature.py b/src/core/trulens/experimental/otel_tracing/_feature.py index 20d6c04f3..2b1ad746c 100644 --- a/src/core/trulens/experimental/otel_tracing/_feature.py +++ b/src/core/trulens/experimental/otel_tracing/_feature.py @@ -1,14 +1,18 @@ """Utilities for managing optional requirements of the experimental otel_tracing feature.""" +from sqlalchemy.orm import configure_mappers from trulens.core import experimental from trulens.core.utils import imports as import_utils +# from trulens.core import session as core_session # circular import + + FEATURE = experimental.Feature.OTEL_TRACING """Feature controlling the use of this module.""" REQUIREMENT = import_utils.format_import_errors( - ["opentelemetry-api", "opentelemetry-sdk"], + ["opentelemetry-api", "opentelemetry-sdk", "trulens-otel-semconv"], purpose="otel_tracing experimental feature", ) """Optional modules required for the otel_tracing experimental feature.""" @@ -16,6 +20,7 @@ with import_utils.OptionalImports(REQUIREMENT) as oi: from opentelemetry import sdk from opentelemetry import trace + from trulens.otel.semconv import trace as trulens_otel_semconv_trace class _FeatureSetup(experimental._FeatureSetup): @@ -28,10 +33,49 @@ class _FeatureSetup(experimental._FeatureSetup): def assert_optionals_installed(): """Asserts that the optional requirements for the otel_tracing feature are installed.""" - oi.assert_installed([sdk, trace]) + oi.assert_installed([sdk, trace, trulens_otel_semconv_trace]) @staticmethod def are_optionals_installed(): """Checks if the optional requirements for the otel_tracing feature are installed.""" - return not any(import_utils.is_dummy(m) for m in [sdk, trace]) + return not any( + import_utils.is_dummy(m) + for m in [sdk, trace, trulens_otel_semconv_trace] + ) + + @staticmethod + def enable( + session: experimental._WithExperimentalSettings, + ): # actually TruSession + """Called when otel_tracing is enabled for session.""" + + # Patch in Span ORM class into the session's database ORM. + from trulens.core.database import sqlalchemy as sqlalchemy_db + from trulens.experimental.otel_tracing.core.database import ( + orm as otel_orm, + ) + from trulens.experimental.otel_tracing.core.database import ( + sqlalchemy as otel_sqlalchemy, + ) + + db = session.connector.db + + if not isinstance(db, sqlalchemy_db.SQLAlchemyDB): + raise ValueError( + "otel_tracing feature requires SQLAlchemyDB for database access." + ) + + print(f"Patching {db} with otel_tracing additions.") + + orm = db.orm + tracing_orm = otel_orm.new_orm(orm.base) + orm.Span = tracing_orm.Span + + # retrofit base SQLAlchemyDB with otel_tracing additions + db.__class__ = otel_sqlalchemy._SQLAlchemyDB + + configure_mappers() + + # creates the new tables + orm.metadata.create_all(db.engine) diff --git a/src/core/trulens/experimental/otel_tracing/core/_utils/wrap.py b/src/core/trulens/experimental/otel_tracing/core/_utils/wrap.py index 680d6ecff..bdb063ce6 100644 --- a/src/core/trulens/experimental/otel_tracing/core/_utils/wrap.py +++ b/src/core/trulens/experimental/otel_tracing/core/_utils/wrap.py @@ -24,9 +24,10 @@ logger = logging.getLogger(__name__) -R = TypeVar("R") # callable's return type -A = TypeVar("A") # awaitable's result type -E = TypeVar("E") # iterator/generator element type +T = TypeVar("T") +E = TypeVar("E") # iterator/generator element +A = TypeVar("A") # awaitable result +R = TypeVar("R") # callable return value class AwaitableCallbacks(Generic[A]): @@ -68,11 +69,13 @@ def on_awaitable_result(self, result: A) -> A: !!! Important This should return the result or some wrapper of the result. """ + self.result = result return result def on_awaitable_exception(self, error: Exception) -> Exception: """Called if awaiting for the wrapped awaitable raised an exception.""" + self.error = error return error @@ -237,7 +240,7 @@ def on_iterable_stop(self): def wrap_iterable( itb: Iterable[E], - callback_class: Type[IterableCallbacks] = IterableCallbacks, + callback_class: Type[IterableCallbacks[E]] = IterableCallbacks, **kwargs: Dict[str, Any], ) -> Iterable[E]: """Wrap an iterable to invoke various callbacks. @@ -335,6 +338,7 @@ def __init__( wrapper: Callable[..., R], call_args: Tuple[Any, ...], call_kwargs: Dict[str, Any], + **kwargs, ): """Called/constructed when the wrapper function is called but before arguments are bound to the wrapped function's signature.""" @@ -348,7 +352,7 @@ def __init__( self.call_args: Optional[Tuple[Any, ...]] = call_args self.call_kwargs: Optional[Dict[str, Any]] = call_kwargs - self.bindings: Optional[inspect.BoundArguments] = None + self.bound_arguments: Optional[inspect.BoundArguments] = None self.bind_error: Optional[Exception] = None self.error: Optional[Exception] = None @@ -361,7 +365,7 @@ def on_callable_end(self): def on_callable_call( self, *, - bindings: inspect.BoundArguments, + bound_arguments: inspect.BoundArguments, ) -> inspect.BoundArguments: """Called before the execution of the wrapped method assuming its arguments can be bound. @@ -370,9 +374,9 @@ def on_callable_call( This callback must return the bound arguments or wrappers of bound arguments. """ - self.bindings = bindings + self.bound_arguments = bound_arguments - return bindings + return bound_arguments def on_callable_bind_error( self, @@ -444,16 +448,18 @@ def wrap_callable( assert isinstance(func, Callable), f"Callable expected but got {func}." + sig = inspect.signature(func) # safe sig? + our_callback_init_kwargs: Dict[str, Any] = {"func": func, "sig": sig} + if python_utils.safe_hasattr(func, CALLBACKS): - # If CALLBACKS is set, it was already a wrapper. + # If CALLBACKS is set, it was already a wrapper. In this case we only + # call the static method on_callable_wrapped. - # logger.warning("Function %s is already wrapped.", func) + our_callback_init_kwargs["wrapper"] = func + callback_class.on_callable_wrapped(**our_callback_init_kwargs, **kwargs) return func - sig = inspect.signature(func) # safe sig? - our_callback_init_kwargs: Dict[str, Any] = {"func": func, "sig": sig} - # If CALLBACKS is not set, create a wrapper and return it. @functools.wraps(func) def wrapper(*args, **kwargs) -> R: @@ -475,10 +481,10 @@ def wrapper(*args, **kwargs) -> R: ) try: - bindings = sig.bind(*args, **kwargs) # save and reuse sig + bound_arguments = sig.bind(*args, **kwargs) # save and reuse sig # callback step 1: call on_callable_call - callback.on_callable_call(bindings=bindings) + callback.on_callable_call(bound_arguments=bound_arguments) except TypeError as e: # callback step 2a: call on_callable_bind_error diff --git a/src/core/trulens/experimental/otel_tracing/core/app.py b/src/core/trulens/experimental/otel_tracing/core/app.py index ca378808f..d4250519c 100644 --- a/src/core/trulens/experimental/otel_tracing/core/app.py +++ b/src/core/trulens/experimental/otel_tracing/core/app.py @@ -1,53 +1,89 @@ +""" +Adds otel_tracing specific features to the main [App][trulens.core.app.App] +class. This file is to be integrated into the main [app][trulens.core.app] once +otel_tracing graduates. + +- Callback + ([_on_new_root_span][trulens.experimental.otel_tracing.core.app._on_new_root_span]) + for when a new root span has finished tracing. This callback adds the records' + worth of spans to the database and possibly executes feedback + +- Contextmanager methods (__enter__, __exit__) for starting and stopping a + recording context are overwritten for otel tracing to create a special + [RecordingContextSpan][trulens.experimental.otel_tracing.core.span.RecordingContextSpan] + span. + +- Callback + ([_on_new_recording_span][trulens.experimental.otel_tracing.core.app._on_new_recording_span]) + when the recording span (the above) is finished. This span also controls the + exporting of spans if TruLens has been configured so. +""" + from __future__ import annotations -import contextvars -import time from typing import ( - Iterable, + List, + Literal, + Optional, ) from trulens.core import app as core_app from trulens.core import instruments as core_instruments +from trulens.core._utils.pycompat import ReferenceType from trulens.core.schema import feedback as feedback_schema from trulens.core.schema import record as record_schema from trulens.core.utils import python as python_utils from trulens.core.utils import text as text_utils -from trulens.experimental.otel_tracing.core import trace as core_otel -from trulens.experimental.otel_tracing.core import trace as core_trace +from trulens.experimental.otel_tracing.core.trace import ( + callbacks as core_callbacks, +) +from trulens.experimental.otel_tracing.core.trace import export as core_export +from trulens.experimental.otel_tracing.core.trace import span as core_span +from trulens.experimental.otel_tracing.core.trace import trace as core_trace +from trulens.otel.semconv import trace as truconv class _App(core_app.App): # TODO(otel_tracing): Roll into core_app.App once no longer experimental. - # WithInstrumentCallbacks requirement - def get_active_contexts( - self, - ) -> Iterable[core_instruments._RecordingContext]: - """Get all active recording contexts.""" - - recording = self.recording_contexts.get(contextvars.Token.MISSING) - - while recording is not contextvars.Token.MISSING: - yield recording - recording = recording.token.old_value - # WithInstrumentCallbacks requirement def _on_new_recording_span( self, - recording_span: core_trace.Span, + recording_span: core_span.Span, ): + """Callback for when a recording span + ([RecordingContextSpan][trulens.experimental.otel_tracing.core.span.RecordingContextSpan]) + is finished. + + Handles exporting to OTEL exporters. + """ + + exporter_ident = str(str(self.session._experimental_otel_exporter)) if self.session._experimental_otel_exporter is not None: - # Export to otel exporter if exporter was set in workspace. - to_export = [] - for span in recording_span.iter_family(include_phantom=True): - if isinstance(span, core_otel.Span): - e_span = span.otel_freeze() - to_export.append(e_span) + to_export: Optional[List] = [] + num_exportable = 0 + else: + to_export = None + + for span in recording_span.iter_family(): + if to_export is not None: + if isinstance(span, core_span.Span): + num_exportable += 1 + if not core_trace.was_exported_to( + context=span.context, + to=exporter_ident, + mark_exported=True, + ): + e_span = span.otel_freeze() + to_export.append(e_span) else: print(f"Warning, span {span.name} is not exportable.") + if to_export is not None: + # Export to otel exporter if exporter was set in workspace. + print( - f"{text_utils.UNICODE_CHECK} Exporting {len(to_export)} spans to {python_utils.class_name(self.session._experimental_otel_exporter)}." + f"{text_utils.UNICODE_CHECK} Exporting {len(to_export)}/{num_exportable} spans to {python_utils.class_name(self.session._experimental_otel_exporter)}." ) self.session._experimental_otel_exporter.export(to_export) @@ -55,16 +91,35 @@ def _on_new_recording_span( def _on_new_root_span( self, recording: core_instruments._RecordingContext, - root_span: core_trace.Span, + root_span: core_span.Span, ) -> record_schema.Record: - tracer = root_span.context.tracer + """Callback for when a new trace root span + ([LiveRecordRoot][trulens.experimental.otel_tracing.core.span.LiveRecordRoot]) + is finished. + + Controls saving spans to the database as well as feedback execution or scheduling. + """ - record = tracer.record_of_root_span( + record = core_export.record_of_root_span( root_span=root_span, recording=recording ) recording.records.append(record) # need to jsonify? + typed_spans = record.experimental_otel_spans + + db_ident = str(self.connector.db) + + unwritten_spans = [ + span + for span in typed_spans + if core_trace.was_exported_to( + context=span.context, to=db_ident, mark_exported=True + ) + ] + + self.connector.db.insert_spans(spans=unwritten_spans) + error = root_span.error if error is not None: @@ -102,20 +157,24 @@ def __enter__(self): tracer: core_trace.Tracer = core_trace.trulens_tracer() - recording_span_ctx = tracer.recording() - recording_span: core_trace.PhantomSpanRecordingContext = ( + recording_span_ctx = tracer.start_as_current_span( + cls=core_span.RecordingContextSpan, + name=truconv.SpanAttributes.RECORDING.SPAN_NAME_PREFIX + + self.app_name, + live_app=ReferenceType(self), + ) + + recording_span: core_span.RecordingContextSpan = ( recording_span_ctx.__enter__() ) - recording = core_trace._RecordingContext( + + recording = core_callbacks._RecordingContext( app=self, tracer=tracer, span=recording_span, span_ctx=recording_span_ctx, ) - recording_span.recording = recording - recording_span._start_timestamp = time.time_ns() # move to trace - - # recording.ctx = ctx + recording_span.live_recording = recording token = self.recording_contexts.set(recording) recording.token = token @@ -123,19 +182,22 @@ def __enter__(self): return recording # For use as a context manager. - def __exit__(self, exc_type, exc_value, exc_tb): + def __exit__(self, exc_type, exc_value, exc_tb) -> Literal[False]: # EXPERIMENTAL(otel_tracing): replacement to recording context manager. - recording: core_trace._RecordingContext = self.recording_contexts.get() + recording: core_callbacks._RecordingContext = ( + self.recording_contexts.get() + ) assert recording is not None, "Not in a tracing context." assert recording.tracer is not None, "Not in a tracing context." assert recording.span is not None, "Not in a tracing context." - recording.span._end_timestamp = time.time_ns() # move to trace - self.recording_contexts.reset(recording.token) - return recording.span_ctx.__exit__(exc_type, exc_value, exc_tb) + + recording.span_ctx.__exit__(exc_type, exc_value, exc_tb) + + return False # For use as an async context manager. async def __aenter__(self): @@ -143,8 +205,11 @@ async def __aenter__(self): tracer: core_trace.Tracer = core_trace.trulens_tracer() - recording_span_ctx = await tracer.arecording() - recording_span: core_trace.PhantomSpanRecordingContext = ( + recording_span_ctx = tracer.astart_as_current_span( + cls=core_span.RecordingContextSpan, + name=truconv.SpanNames.RECORDING_CONTEXT_PREFIX + self.app_name, + ) + recording_span: core_span.RecordingContextSpan = ( await recording_span_ctx.__aenter__() ) recording = core_trace._RecordingContext( @@ -153,10 +218,7 @@ async def __aenter__(self): span=recording_span, span_ctx=recording_span_ctx, ) - recording_span.recording = recording - recording_span.start_timestamp = time.time_ns() - - # recording.ctx = ctx + recording_span.live_recording = recording token = self.recording_contexts.set(recording) recording.token = token @@ -164,7 +226,7 @@ async def __aenter__(self): return recording # For use as a context manager. - async def __aexit__(self, exc_type, exc_value, exc_tb): + async def __aexit__(self, exc_type, exc_value, exc_tb) -> Literal[False]: # EXPERIMENTAL(otel_tracing) recording: core_trace._RecordingContext = self.recording_contexts.get() @@ -172,7 +234,8 @@ async def __aexit__(self, exc_type, exc_value, exc_tb): assert recording is not None, "Not in a tracing context." assert recording.tracer is not None, "Not in a tracing context." - recording.span.end_timestamp = time.time_ns() - self.recording_contexts.reset(recording.token) - return await recording.span_ctx.__aexit__(exc_type, exc_value, exc_tb) + + await recording.span_ctx.__aexit__(exc_type, exc_value, exc_tb) + + return False diff --git a/src/core/trulens/experimental/otel_tracing/core/database/base.py b/src/core/trulens/experimental/otel_tracing/core/database/base.py new file mode 100644 index 000000000..3072e1a9c --- /dev/null +++ b/src/core/trulens/experimental/otel_tracing/core/database/base.py @@ -0,0 +1,71 @@ +from dataclasses import dataclass +from typing import Iterable, Optional + +from trulens.core.database import base as core_db +from trulens.core.schema import types as types_schema +from trulens.experimental.otel_tracing.core.trace import sem as core_sem + + +@dataclass +class SpanIndex: + """Index of a span in the database. + + A span can be indexed either by index alone or a combination of span_id and + trace_id. + """ + + index: Optional[int] = None + span_id: Optional[types_schema.SpanID.SQL_TYPE] = None + trace_id: Optional[types_schema.TraceID.SQL_TYPE] = None + + +class _DB(core_db.DB): + # EXPERIMENTAL(otel_tracing): Adds span API to the core DB API. + + T = core_db.DB.T + Q = core_db.DB.Q + W = core_db.DB.W + + def insert_span(self, span: core_sem.TypedSpan) -> None: + """Insert a span into the database.""" + raise NotImplementedError + + def batch_insert_span(self, spans: Iterable[core_sem.TypedSpan]) -> None: + """Insert a batch of spans into the database.""" + raise NotImplementedError + + def delete_span(self, index: SpanIndex) -> None: + """Delete a span from the database.""" + raise NotImplementedError + + def delete_spans( + self, + where: W, + page: Optional[core_db.PageSelect] = None, + ) -> None: + """Delete spans from the database.""" + raise NotImplementedError + + def get_span(self, index: SpanIndex) -> Optional[core_sem.TypedSpan]: + """Get a span from the database.""" + raise NotImplementedError + + def get_spans( + self, + where: W, + page: Optional[core_db.PageSelect] = None, + ) -> Iterable[core_sem.TypedSpan]: + """Select spans from the database.""" + raise NotImplementedError + + def get_trace_record_ids( + self, where: W, page: Optional[core_db.PageSelect] = None + ) -> Iterable[str]: + """Get the trace record ids matching the given query/page.""" + + raise NotImplementedError + + def get_trace_record(self, record_id: str) -> Iterable[core_sem.TypedSpan]: + """Select spans from the database that belong to the given record.""" + + raise NotImplementedError diff --git a/src/core/trulens/experimental/otel_tracing/core/database/orm.py b/src/core/trulens/experimental/otel_tracing/core/database/orm.py new file mode 100644 index 000000000..70e3399e7 --- /dev/null +++ b/src/core/trulens/experimental/otel_tracing/core/database/orm.py @@ -0,0 +1,250 @@ +from __future__ import annotations + +import abc +from typing import ( + ClassVar, + Dict, + Generic, + Type, + TypeVar, +) + +from sqlalchemy import INTEGER +from sqlalchemy import JSON +from sqlalchemy import Column +from sqlalchemy import UniqueConstraint +from sqlalchemy.orm import configure_mappers +from sqlalchemy.schema import MetaData +from sqlalchemy.sql import func +from trulens.core.schema import types as types_schema +from trulens.experimental.otel_tracing.core.trace import context as core_context +from trulens.experimental.otel_tracing.core.trace import sem as core_sem + +T = TypeVar("T") + + +class SpanORM(abc.ABC, Generic[T]): + """Abstract definition of a container for ORM classes.""" + + registry: Dict[str, Type[T]] + metadata: MetaData + + Span: Type[T] + + +def new_orm(base: Type[T], prefix: str = "trulens_") -> Type[SpanORM[T]]: + """Create a new orm container from the given base table class.""" + + class NewSpanORM(SpanORM): + """Container for ORM classes. + + Needs to be extended with classes that set table prefix. + + Warning: + The relationships between tables established in the classes in this + container refer to class names i.e. "AppDefinition" hence these are + important and need to stay consistent between definition of one and + relationships in another. + """ + + registry: Dict[str, base] = base.registry._class_registry + """Table name to ORM class mapping for tables used by trulens. + + This can be used to iterate through all classes/tables. + """ + + metadata: MetaData = base.metadata + """SqlAlchemy metadata object for tables used by trulens.""" + + class Span(base): + """Span DB schema.""" + + _table_base_name: ClassVar[str] = "spans" + + # pagination utility columns + created_timestamp: Column = Column( + types_schema.Timestamp.SQL_SCHEMA_TYPE, + server_default=func.now(), + ) + updated_timestamp: Column = Column( + types_schema.Timestamp.SQL_SCHEMA_TYPE, + server_default=func.now(), + onupdate=func.current_timestamp(), + ) + + index: Column = Column( + INTEGER, primary_key=True, autoincrement=True + ) + + # OTEL requirements that we use: + span_id: Column = Column( + types_schema.SpanID.SQL_SCHEMA_TYPE, nullable=False + ) + trace_id: Column = Column( + types_schema.TraceID.SQL_SCHEMA_TYPE, nullable=False + ) + + __table_args__ = (UniqueConstraint("span_id", "trace_id"),) + + parent_span_id: Column = Column(types_schema.SpanID.SQL_SCHEMA_TYPE) + + parent_trace_id: Column = Column( + types_schema.TraceID.SQL_SCHEMA_TYPE + ) + + name: Column = Column( + types_schema.SpanName.SQL_SCHEMA_TYPE, nullable=False + ) + + start_timestamp: Column = Column( + types_schema.Timestamp.SQL_SCHEMA_TYPE, nullable=False + ) + end_timestamp: Column = Column( + types_schema.Timestamp.SQL_SCHEMA_TYPE, nullable=False + ) + attributes: Column = Column(JSON) + + kind: Column = Column( + types_schema.SpanKind.SQL_SCHEMA_TYPE, nullable=False + ) + + status: Column = Column( + types_schema.SpanStatusCode.SQL_SCHEMA_TYPE, nullable=False + ) + + status_description: Column = Column( + types_schema.StatusDescription.SQL_SCHEMA_TYPE + ) + + # Note that there are other OTEL requirements that we do not use and + # hence do not model here. + + # Other columns: + record_ids: Column = Column( + types_schema.TraceRecordIDs.SQL_SCHEMA_TYPE + ) + """Each main app method call gets a different record_id. + + We cannot use trace_id for this purpose without interfering with + expected OTEL behaviour. Can be null if we cannot figure out what + record/call this span is associated with.""" + + span_types: Column = Column(JSON, nullable=False) + """Types (of TypedSpan) the span belongs to.""" + + @classmethod + def parse(cls, obj: core_sem.TypedSpan) -> NewSpanORM.Span: + """Parse a typed span object into an ORM object.""" + + record_ids = {} + if isinstance(obj, core_sem.Record): + if obj.record_ids is not None: + record_ids = obj.record_ids + else: + raise NotImplementedError( + f"Record spans must have record_ids. This span does not: {obj}" + ) + else: + # TODO: figure out how to handle this case, or just not include these? + raise NotImplementedError("Cannot handle non-App spans.") + + assert isinstance( + obj, core_sem.TypedSpan + ), "TypedSpan expected." + + return cls( + span_id=types_schema.SpanID.sql_of_py(obj.context.span_id), + trace_id=types_schema.TraceID.sql_of_py( + obj.context.trace_id + ), + record_ids=types_schema.TraceRecordIDs.sql_of_py( + record_ids + ), + parent_span_id=types_schema.SpanID.sql_of_py( + obj.parent.span_id + ) + if obj.parent + else None, + parent_trace_id=types_schema.TraceID.sql_of_py( + obj.parent.trace_id + ) + if obj.parent + else None, + name=obj.name, + start_timestamp=types_schema.Timestamp.sql_of_py( + obj.start_timestamp + ), + end_timestamp=types_schema.Timestamp.sql_of_py( + obj.end_timestamp + ) + if obj.end_timestamp + else None, + attributes=obj.attributes, + kind=types_schema.SpanKind.sql_of_py(obj.kind), + status=types_schema.SpanStatusCode.sql_of_py(obj.status), + status_description=types_schema.StatusDescription.sql_of_py( + obj.status_description + ) + if obj.status_description + else None, + span_types=list(t.value for t in obj.span_types), + ) + + def write(self) -> core_sem.TypedSpan: + """Convert ORM class to typed span.""" + + context = core_context.SpanContext( + trace_id=types_schema.TraceID.py_of_sql(self.trace_id), + span_id=types_schema.SpanID.py_of_sql(self.span_id), + ) + + parent = ( + core_context.SpanContext( + trace_id=types_schema.TraceID.py_of_sql( + self.parent_trace_id + ), + span_id=types_schema.SpanID.py_of_sql( + self.parent_span_id + ), + ) + if self.parent_span_id + else None + ) + + span_types = set(self.span_types) + # other_args = {} + # if truconv.SpanAttributes.SpanType.RECORD_ROOT in span_types: + # TODO: need to recover AttributeProperty fields from attributes here. + # other_args["record_id"] = self.attributes[ + # truconv.SpanAttributes.RECORD_ROOT.RECORD_ID + # ] + + return core_sem.TypedSpan.mixin_new( + name=self.name, + context=context, + parent=parent, + kind=types_schema.SpanKind.py_of_sql(self.kind), + attributes=self.attributes, + start_timestamp=types_schema.Timestamp.py_of_sql( + self.start_timestamp + ), + end_timestamp=types_schema.Timestamp.py_of_sql( + self.end_timestamp + ), + status=types_schema.SpanStatusCode.py_of_sql(self.status), + status_description=types_schema.StatusDescription.py_of_sql( + self.status_description + ), + links=[], # we dont keep links + span_types=types_schema.SpanTypes.py_of_sql(span_types), + record_ids=types_schema.TraceRecordIDs.py_of_sql( + self.record_ids + ), + # **other_args, + ) + + configure_mappers() # IMPORTANT + # Without the above, orm class attributes which are defined using backref + # will not be visible. + + return NewSpanORM diff --git a/src/core/trulens/experimental/otel_tracing/core/database/sqlalchemy.py b/src/core/trulens/experimental/otel_tracing/core/database/sqlalchemy.py new file mode 100644 index 000000000..5d4e1c1a2 --- /dev/null +++ b/src/core/trulens/experimental/otel_tracing/core/database/sqlalchemy.py @@ -0,0 +1,223 @@ +from typing import Iterable, List, Optional + +import sqlalchemy as sa +from trulens.core.database import base as core_db +from trulens.core.database import sqlalchemy as core_sqlalchemy +from trulens.core.schema import types as types_schema +from trulens.experimental.otel_tracing.core.database import base as otel_core_db +from trulens.experimental.otel_tracing.core.trace import sem as core_sem + + +class _SQLAlchemyDB(core_sqlalchemy.SQLAlchemyDB): + """EXPERIMENTAL(otel_tracing): Adds span API to the SQLAlchemy DB API.""" + + T = core_sqlalchemy.SQLAlchemyDB.T + """Table type.""" + + Q = core_sqlalchemy.SQLAlchemyDB.Q + """Query type.""" + + W = core_sqlalchemy.SQLAlchemyDB.W + """Where clause type.""" + + @property + def Span(self) -> T: + """Span table.""" + + if self.orm is None: + raise RuntimeError("ORM not set. Cannot refer to Span table.") + + if not hasattr(self.orm, "Span"): + raise RuntimeError( + "ORM does not have Span table. It might be an unpatched ORM class without otel_tracing support." + ) + + return self.orm.Span + + def _where_span_index( + self, + query: Q, + index: otel_core_db.SpanIndex, + ) -> Q: + """Adds where clauses to the given Span query based on the given index.""" + + if index.index is None and ( + index.span_id is None or index.trace_id is None + ): + raise ValueError( + "Span index must have either index or (span_id and trace_id)." + ) + + if index.index is not None: + query = query.where(self.Span.index == index.index) + + if index.span_id is not None: + query = query.where(self.Span.span_id == index.span_id) + + if index.trace_id is not None: + query = query.where(self.Span.trace_id == index.trace_id) + + return query + + def _where_page(self, query: Q, page: core_db.PageSelect) -> Q: + """Adds paging clauses to the given query based on the given page select.""" + + if page.limit is not None: + query = query.limit(page.limit) + + if page.offset is not None: + query = query.offset(page.offset) + + if page.shuffle: + query = query.order_by(sa.func.random()) + + if page.after_index is not None: + query = query.where(self.Span.index > page.after_index) + + if page.before_index is not None: + query = query.where(self.Span.index < page.before_index) + + if page.after_created_timestamp: + query = query.where( + self.orm.Span.created_timestamp > page.after_created_timestamp + ) + + if page.before_created_timestamp: + query = query.where( + self.Span.created_timestamp < page.before_created_timestamp + ) + + if page.after_updated_timestamp: + query = query.where( + self.Span.updated_timestamp > page.after_updated_timestamp + ) + + if page.before_updated_timestamp: + query = query.where( + self.Span.updated_timestamp < page.before_updated_timestamp + ) + + return query + + def insert_span(self, span: core_sem.TypedSpan) -> otel_core_db.SpanIndex: + """Insert a span into the database.""" + + # Check the context is valid. OTEL spec states that these ids are + # "not recording" so they shouldn't even have been tracked. + if ( + span.context.span_id == types_schema.SpanID.INVALID_OTEL + or span.context.trace_id == types_schema.TraceID.INVALID_OTEL + ): + raise ValueError(f"Invalid span context: {span.context}") + + with self.session.begin() as session: + orm_object = self.Span.parse(span) + session.merge(orm_object) + + return otel_core_db.SpanIndex( + index=orm_object.index, + span_id=orm_object.span_id, + trace_id=orm_object.trace_id, + ) + + def insert_spans( + self, spans: Iterable[core_sem.TypedSpan] + ) -> List[otel_core_db.SpanIndex]: + """Insert a batch of spans into the database.""" + + for span in spans: + # Check the contexts are valid. OTEL spec states that these ids are + # "not recording" so they shouldn't even have been tracked. + if ( + span.context.span_id == types_schema.SpanID.INVALID_OTEL + or span.context.trace_id == types_schema.TraceID.INVALID_OTEL + ): + raise ValueError(f"Invalid span context: {span.context}") + + with self.session.begin() as session: + orm_objects = [self.Span.parse(span) for span in spans] + session.add_all(orm_objects) + + return [ + otel_core_db.SpanIndex( + index=orm_object.index, + span_id=orm_object.span_id, + trace_id=orm_object.trace_id, + ) + for orm_object in orm_objects + ] + + def delete_span(self, index: otel_core_db.SpanIndex) -> None: + """Delete a span from the database.""" + + with self.session.begin() as session: + query = self._where_span_index( + query=session.query(self.Span), index=index + ).first() + session.delete(query) + + def delete_spans( + self, + where: Optional[W] = None, + page: Optional[core_db.PageSelect] = None, + ) -> None: + """Delete spans from the database.""" + + with self.session.begin() as session: + query = session.query(self.Span) + if where is not None: + query = query.where(where) + + if page is not None: + query = self._where_page(query=query, page=page) + + session.delete(query) + + def get_span( + self, index: otel_core_db.SpanIndex + ) -> Optional[core_sem.TypedSpan]: + """Get a span from the database.""" + + with self.session.begin() as session: + query = self._where_span_index( + query=session.query(self.Span), index=index + ).first() + orm_object = query.one_or_none() + + if orm_object is None: + return None + + return orm_object.write() + + def get_spans( + self, + where: Optional[W] = None, + page: Optional[core_db.PageSelect] = None, + ) -> Iterable[core_sem.TypedSpan]: + """Select spans from the database.""" + + with self.session.begin() as session: + query = session.query(self.Span) + + if where is not None: + query = query.where(where) + + if page is not None: + query = self._where_page(query=query, page=page) + + for orm_object in query.all(): + yield orm_object.write() + + def get_trace_record_ids( + self, + where: Optional[W] = None, + page: Optional[core_db.PageSelect] = None, + ) -> Iterable[str]: + """Get the trace record ids matching the given query/page.""" + + raise NotImplementedError + + def get_trace_record(self, record_id: str) -> Iterable[core_sem.TypedSpan]: + """Select spans from the database that belong to the given record.""" + + return self.get_spans(where=self.Span.record_ids.contains(record_id)) diff --git a/src/core/trulens/experimental/otel_tracing/core/feedback/endpoint.py b/src/core/trulens/experimental/otel_tracing/core/feedback/endpoint.py index 8aaffe774..552f4203c 100644 --- a/src/core/trulens/experimental/otel_tracing/core/feedback/endpoint.py +++ b/src/core/trulens/experimental/otel_tracing/core/feedback/endpoint.py @@ -1,5 +1,6 @@ from __future__ import annotations +import inspect import logging from pprint import PrettyPrinter from typing import Generic, Tuple, TypeVar @@ -8,8 +9,12 @@ from trulens.core.schema import base as base_schema from trulens.core.utils import asynchro as asynchro_utils from trulens.core.utils import python as python_utils -from trulens.experimental.otel_tracing.core import trace as mod_trace from trulens.experimental.otel_tracing.core._utils import wrap as wrap_utils +from trulens.experimental.otel_tracing.core.trace import ( + callbacks as core_callbacks, +) +from trulens.experimental.otel_tracing.core.trace import span as core_span +from trulens.experimental.otel_tracing.core.trace import trace as core_trace logger = logging.getLogger(__name__) @@ -21,7 +26,7 @@ class _WrapperEndpointCallback( - mod_trace.TracingCallbacks[Ret, mod_trace.LiveSpanCallWithCost], + core_callbacks.TracingCallbacks[Ret, core_span.LiveSpanCallWithCost], Generic[Ret, Res], ): """EXPERIMENTAL(otel_tracing): Extension to TracingCallbacks that tracks @@ -36,10 +41,13 @@ class _WrapperEndpointCallback( # overriding CallableCallbacks def __init__(self, endpoint: core_endpoint.Endpoint, **kwargs): - super().__init__(**kwargs, span_type=mod_trace.LiveSpanCallWithCost) + super().__init__(**kwargs, span_type=core_span.LiveSpanCallWithCost) self.endpoint: core_endpoint.Endpoint = endpoint - self.span.endpoint = endpoint + + assert self.span is not None + + self.span.live_endpoint = endpoint self.cost: base_schema.Cost = self.span.cost self.cost.n_requests += 1 @@ -116,9 +124,13 @@ def track_all_costs_tally( *args, **kwargs, ) -> Tuple[Ret, python_utils.Thunk[base_schema.Cost]]: - with mod_trace.trulens_tracer().cost( - method_name=__func.__name__ + with core_trace.trulens_tracer().start_as_current_span( + cls=core_span.LiveSpanCall, + live_func=__func, + live_sig=inspect.signature(__func), + live_args=args, + live_kwargs=kwargs, ) as span: ret = __func(*args, **kwargs) - return ret, span.total_cost + return ret, span.cost_tally diff --git a/src/core/trulens/experimental/otel_tracing/core/instruments.py b/src/core/trulens/experimental/otel_tracing/core/instruments.py index 180147655..747d04f5f 100644 --- a/src/core/trulens/experimental/otel_tracing/core/instruments.py +++ b/src/core/trulens/experimental/otel_tracing/core/instruments.py @@ -1,3 +1,17 @@ +""" +OTEL tracing specific additions and changes for instrumentation. + +This file is to be integrated into the main +[instruments][trulens.core.instruments] once otel_tracing graduates. + +- Overwrites + [tracked_method_wrapper][trulens.core.instruments.Instrument.tracked_method_wrapper] + to use + [wrap_callable][trulens.experimental.otel_tracing.core._utils.wrap_callable] + with span creating callbacks + ([AppTracingCallbacks][trulens.experimental.otel_tracing.core.callbacks.AppTracingCallbacks]). +""" + from __future__ import annotations import logging @@ -7,8 +21,10 @@ from trulens.core import instruments as core_instruments from trulens.core.utils import python as python_utils from trulens.core.utils import serial as serial_utils -from trulens.experimental.otel_tracing.core import trace as mod_trace from trulens.experimental.otel_tracing.core._utils import wrap as wrap_utils +from trulens.experimental.otel_tracing.core.trace import ( + callbacks as core_callbacks, +) logger = logging.getLogger(__name__) @@ -38,7 +54,7 @@ def tracked_method_wrapper( if python_utils.safe_hasattr(func, "__func__"): raise ValueError("Function expected but method received.") - if python_utils.safe_hasattr(func, mod_trace.INSTRUMENT): + if python_utils.safe_hasattr(func, core_callbacks.INSTRUMENT): logger.debug("\t\t\t%s: %s is already instrumented", query, func) # Notify the app instrumenting this method where it is located: @@ -48,7 +64,7 @@ def tracked_method_wrapper( return wrap_utils.wrap_callable( func=func, - callback_class=mod_trace.AppTracingCallbacks, + callback_class=core_callbacks.AppTracingCallbacks, func_name=method_name, - app=deproxy(self.app), + app=deproxy(self.app), # gets rewrapped in weakref later ) diff --git a/src/core/trulens/experimental/otel_tracing/core/otel.py b/src/core/trulens/experimental/otel_tracing/core/otel.py deleted file mode 100644 index e65c0b8e7..000000000 --- a/src/core/trulens/experimental/otel_tracing/core/otel.py +++ /dev/null @@ -1,749 +0,0 @@ -# ruff: noqa: E402 - -"""OTEL Compatibility Classes - -This module contains classes to support interacting with the OTEL ecosystem. -Additions on top of these meant for TruLens uses outside of OTEL compatibility -are found in `traces.py`. -""" - -from __future__ import annotations - -import contextlib -import contextvars -import logging -import random -import time -from types import TracebackType -from typing import ( - Any, - Dict, - Iterable, - List, - Mapping, - Optional, - Sequence, - Tuple, - Type, - TypeVar, - Union, -) - -import pydantic -from trulens.core._utils.pycompat import NoneType # import style exception -from trulens.core._utils.pycompat import TypeAlias # import style exception -from trulens.core._utils.pycompat import TypeAliasType # import style exception -from trulens.core.utils import pyschema as pyschema_utils -from trulens.core.utils import python as python_utils -from trulens.core.utils import serial as serial_utils -from trulens.experimental.otel_tracing import _feature - -_feature._FeatureSetup.assert_optionals_installed() # checks to make sure otel is installed - -from opentelemetry import context as context_api -from opentelemetry import trace as trace_api -from opentelemetry.sdk import resources as resources_sdk -from opentelemetry.sdk import trace as trace_sdk -from opentelemetry.util import types as types_api - -logger = logging.getLogger(__name__) - -# Type alises - -A = TypeVar("A") -B = TypeVar("B") - -TSpanID: TypeAlias = int -"""Type of span identifiers. -64 bit int as per OpenTelemetry. -""" -NUM_SPANID_BITS: int = 64 -"""Number of bits in a span identifier.""" - -TTraceID: TypeAlias = int -"""Type of trace identifiers. -128 bit int as per OpenTelemetry. -""" -NUM_TRACEID_BITS: int = 128 -"""Number of bits in a trace identifier.""" - -TTimestamp: TypeAlias = int -"""Type of timestamps in spans. - -64 bit int representing nanoseconds since epoch as per OpenTelemetry. -""" -NUM_TIMESTAMP_BITS = 64 - -TLensedBaseType: TypeAlias = Union[str, int, float, bool] -"""Type of base types in span attributes. - -!!! Warning - OpenTelemetry does not allow None as an attribute value. Handling None is to - be decided. -""" - -TLensedAttributeValue = TypeAliasType( - "TLensedAttributeValue", - Union[ - str, - int, - float, - bool, - NoneType, # TODO(SNOW-1711929): None is not technically allowed as an attribute value. - Sequence["TLensedAttributeValue"], # type: ignore - "TLensedAttributes", - ], -) -"""Type of values in span attributes.""" - -# NOTE(piotrm): pydantic will fail if you specify a recursive type alias without -# the TypeAliasType schema as above. - -TLensedAttributes: TypeAlias = Dict[str, TLensedAttributeValue] -"""Attribute dictionaries. - -Note that this deviates from what OTEL allows as attribute values. Because OTEL -does not allow general recursive values to be stored as attributes, we employ a -system of flattening values before exporting to OTEL. In this process we encode -a single generic value as multiple attributes where the attribute name include -paths/lenses to the parts of the generic value they are representing. For -example, an attribute/value like `{"a": {"b": 1, "c": 2}}` would be encoded as -`{"a.b": 1, "a.c": 2}`. This process is implemented in the -`flatten_lensed_attributes` method. -""" - - -def flatten_value( - v: TLensedAttributeValue, lens: Optional[serial_utils.Lens] = None -) -> Iterable[Tuple[serial_utils.Lens, types_api.AttributeValue]]: - """Flatten recursive value into OTEL-compatible attribute values. - - See `TLensedAttributes` for more details. - """ - - if lens is None: - lens = serial_utils.Lens() - - # TODO(SNOW-1711929): OpenTelemetry does not allow None as an attribute - # value. Unsure what is best to do here. - - # if v is None: - # yield (path, "None") - - elif v is None: - pass - - elif isinstance(v, TLensedBaseType): - yield (lens, v) - - elif isinstance(v, Sequence) and all( - isinstance(e, TLensedBaseType) for e in v - ): - yield (lens, v) - - elif isinstance(v, Sequence): - for i, e in enumerate(v): - yield from flatten_value(v=e, lens=lens[i]) - - elif isinstance(v, Mapping): - for k, e in v.items(): - yield from flatten_value(v=e, lens=lens[k]) - - else: - raise ValueError( - f"Do not know how to flatten value of type {type(v)} to OTEL attributes." - ) - - -def flatten_lensed_attributes( - m: TLensedAttributes, - path: Optional[serial_utils.Lens] = None, - prefix: str = "", -) -> types_api.Attributes: - """Flatten lensed attributes into OpenTelemetry attributes.""" - - if path is None: - path = serial_utils.Lens() - - ret = {} - for k, v in m.items(): - if k.startswith(prefix): - # Only flattening those attributes that begin with `prefix` are - # those are the ones coming from trulens_eval. - for p, a in flatten_value(v, path[k]): - ret[str(p)] = a - else: - ret[k] = v - - return ret - - -def new_trace_id(): - return int( - random.getrandbits(NUM_TRACEID_BITS) - & trace_api.span._TRACE_ID_MAX_VALUE - ) - - -def new_span_id(): - return int( - random.getrandbits(NUM_SPANID_BITS) & trace_api.span._SPAN_ID_MAX_VALUE - ) - - -class TraceState(serial_utils.SerialModel, trace_api.span.TraceState): - """[OTEL TraceState][opentelemetry.trace.TraceState] requirements.""" - - # Hackish: trace_api.span.TraceState uses _dict internally. - _dict: Dict[str, str] = pydantic.PrivateAttr(default_factory=dict) - - -class SpanContext(serial_utils.SerialModel): - """[OTEL SpanContext][opentelemetry.trace.SpanContext] requirements.""" - - model_config = pydantic.ConfigDict( - arbitrary_types_allowed=True, - use_enum_values=True, # needed for enums that do not inherit from str - ) - - trace_id: int = pydantic.Field(default_factory=new_trace_id) - """Unique identifier for the trace. - - Each root span has a unique trace id.""" - - span_id: int = pydantic.Field(default_factory=new_span_id) - """Identifier for the span. - - Meant to be at least unique within the same trace_id. - """ - - trace_flags: trace_api.TraceFlags = pydantic.Field( - trace_api.DEFAULT_TRACE_OPTIONS - ) - - @pydantic.field_validator("trace_flags", mode="before") - @classmethod - def _validate_trace_flags(cls, v): - """Validate trace flags. - - Pydantic does not seem to like classes that inherit from int without this. - """ - return trace_api.TraceFlags(v) - - trace_state: TraceState = pydantic.Field(default_factory=TraceState) - - is_remote: bool = False - - _tracer: Tracer = pydantic.PrivateAttr(None) - - @property - def tracer(self) -> Tracer: - return self._tracer - - def __init__(self, **kwargs): - super().__init__(**kwargs) - for k, v in kwargs.items(): - if v is None: - continue - # pydantic does not set private attributes in init - if k.startswith("_") and hasattr(self, k): - setattr(self, k, v) - - -def lens_of_flat_key(key: str) -> serial_utils.Lens: - """Convert a flat dict key to a lens.""" - lens = serial_utils.Lens() - for step in key.split("."): - lens = lens[step] - - return lens - - -class Span( - pyschema_utils.WithClassInfo, serial_utils.SerialModel, trace_api.Span -): - """[OTEL Span][opentelemetry.trace.Span] requirements. - - See also [OpenTelemetry - Span](https://opentelemetry.io/docs/specs/otel/trace/api/#span) and - [OpenTelemetry Span - specification](https://github.com/open-telemetry/opentelemetry-specification/blob/main/specification/trace/api.md). - """ - - model_config = pydantic.ConfigDict( - arbitrary_types_allowed=True, - use_enum_values=True, # model_validate will fail without this - ) - - name: Optional[str] = None - - kind: trace_api.SpanKind = trace_api.SpanKind.INTERNAL - - context: SpanContext = pydantic.Field(default_factory=SpanContext) - parent: Optional[SpanContext] = None - - status: trace_api.status.StatusCode = trace_api.status.StatusCode.UNSET - status_description: Optional[str] = None - - events: List[Tuple[str, trace_api.types.Attributes, TTimestamp]] = ( - pydantic.Field(default_factory=list) - ) - links: trace_api._Links = pydantic.Field(default_factory=dict) - - # attributes: trace_api.types.Attributes = pydantic.Field(default_factory=dict) - attributes: Dict = pydantic.Field(default_factory=dict) - - start_timestamp: int = pydantic.Field(default_factory=time.time_ns) - - end_timestamp: Optional[int] = None - - _record_exception: bool = pydantic.PrivateAttr(True) - _set_status_on_exception: bool = pydantic.PrivateAttr(True) - - _tracer: Tracer = pydantic.PrivateAttr(None) - """NON-STANDARD: The Tracer that produced this span.""" - - @property - def tracer(self) -> Tracer: - return self._tracer - - def __init__(self, **kwargs): - if kwargs.get("start_timestamp") is None: - kwargs["start_timestamp"] = time.time_ns() - - super().__init__(**kwargs) - - for k, v in kwargs.items(): - if v is None: - continue - # pydantic does not set private attributes in init - if k.startswith("_") and hasattr(self, k): - setattr(self, k, v) - - def update_name(self, name: str) -> None: - """See [OTEL update_name][opentelemetry.trace.span.Span.update_name].""" - - self.name = name - - def get_span_context(self) -> trace_api.span.SpanContext: - """See [OTEL get_span_context][opentelemetry.trace.span.Span.get_span_context].""" - - return self.context - - def set_status( - self, - status: Union[trace_api.span.Status, trace_api.span.StatusCode], - description: Optional[str] = None, - ) -> None: - """See [OTEL set_status][opentelemetry.trace.span.Span.set_status].""" - - if isinstance(status, trace_api.span.Status): - if description is not None: - raise ValueError( - "Ambiguous status description provided both in `status.description` and in `description`." - ) - - self.status = status.status_code - self.status_description = status.description - else: - self.status = status - self.status_description = description - - def add_event( - self, - name: str, - attributes: types_api.Attributes = None, - timestamp: Optional[TTimestamp] = None, - ) -> None: - """See [OTEL add_event][opentelemetry.trace.span.Span.add_event].""" - - self.events.append((name, attributes, timestamp or time.time_ns())) - - def add_link( - self, - context: trace_api.span.SpanContext, - attributes: types_api.Attributes = None, - ) -> None: - """See [OTEL add_link][opentelemetry.trace.span.Span.add_link].""" - - if attributes is None: - attributes = {} - - self.links[context] = attributes - - def is_recording(self) -> bool: - """See [OTEL is_recording][opentelemetry.trace.span.Span.is_recording].""" - - return self.status == trace_api.status.StatusCode.UNSET - - def set_attributes( - self, attributes: Dict[str, types_api.AttributeValue] - ) -> None: - """See [OTEL set_attributes][opentelemetry.trace.span.Span.set_attributes].""" - - for key, value in attributes.items(): - self.set_attribute(key, value) - - def set_attribute(self, key: str, value: types_api.AttributeValue) -> None: - """See [OTEL set_attribute][opentelemetry.trace.span.Span.set_attribute].""" - - self.attributes[key] = value - - def record_exception( - self, - exception: BaseException, - attributes: types_api.Attributes = None, - timestamp: Optional[TTimestamp] = None, - escaped: bool = False, # purpose unknown - ) -> None: - """See [OTEL record_exception][opentelemetry.trace.span.Span.record_exception].""" - - if self._set_status_on_exception: - self.set_status( - trace_api.status.Status(trace_api.status.StatusCode.ERROR) - ) - - if self._record_exception: - if attributes is None: - attributes = {} - - attributes["exc_type"] = python_utils.class_name(type(exception)) - attributes["exc_val"] = str(exception) - if exception.__traceback__ is not None: - attributes["code_line"] = python_utils.code_line( - exception.__traceback__.tb_frame, show_source=True - ) - - self.add_event("trulens.exception", attributes, timestamp) - - def end(self, end_time: Optional[TTimestamp] = None): - """See [OTEL end][opentelemetry.trace.span.Span.end]""" - - if end_time is None: - end_time = time.time_ns() - - self.end_timestamp = end_time - - if self.is_recording(): - self.set_status( - trace_api.status.Status(trace_api.status.StatusCode.OK) - ) - - def __enter__(self) -> Span: - """See [OTEL __enter__][opentelemetry.trace.span.Span.__enter__].""" - - return self - - def __exit__( - self, - exc_type: Optional[BaseException], - exc_val: Optional[BaseException], - exc_tb: Optional[TracebackType], - ) -> None: - """See [OTEL __exit__][opentelemetry.trace.span.Span.__exit__].""" - - try: - if exc_val is not None: - self.record_exception(exception=exc_val) - raise exc_val - finally: - self.end() - - async def __aenter__(self) -> Span: - return self.__enter__() - - async def __aexit__( - self, - exc_type: Optional[BaseException], - exc_val: Optional[BaseException], - exc_tb: Optional[TracebackType], - ) -> None: - return self.__exit__(exc_type, exc_val, exc_tb) - - # Rest of these methods are for exporting spans to ReadableSpan. All are not standard OTEL. - - @staticmethod - def otel_context_of_context(context: SpanContext) -> trace_api.SpanContext: - return trace_api.SpanContext( - trace_id=context.trace_id, - span_id=context.span_id, - is_remote=False, - ) - - def otel_name(self) -> str: - return self.name - - def otel_context(self) -> types_api.SpanContext: - return self.otel_context_of_context(self.context) - - def otel_parent_context(self) -> Optional[types_api.SpanContext]: - if self.parent is None: - return None - - return self.otel_context_of_context(self.parent) - - def otel_attributes(self) -> types_api.Attributes: - return flatten_lensed_attributes(self.attributes) - - def otel_kind(self) -> types_api.SpanKind: - return trace_api.SpanKind.INTERNAL - - def otel_status(self) -> trace_api.status.Status: - return trace_api.status.Status(self.status, self.status_description) - - def otel_resource_attributes(self) -> Dict[str, Any]: - # TODO(SNOW-1711959) - return { - "service.namespace": "trulens", - } - - def otel_resource(self) -> resources_sdk.Resource: - return resources_sdk.Resource( - attributes=self.otel_resource_attributes() - ) - - def otel_events(self) -> List[types_api.Event]: - return self.events - - def otel_links(self) -> List[types_api.Link]: - return self.links - - def otel_freeze(self) -> trace_sdk.ReadableSpan: - """Convert span to an OTEL compatible span for exporting to OTEL collectors.""" - - return trace_sdk.ReadableSpan( - name=self.otel_name(), - context=self.otel_context(), - parent=self.otel_parent_context(), - resource=self.otel_resource(), - attributes=self.otel_attributes(), - events=self.otel_events(), - links=self.otel_links(), - kind=self.otel_kind(), - instrumentation_info=None, # TODO(SNOW-1711959) - status=self.otel_status(), - start_time=self.start_timestamp, - end_time=self.end_timestamp, - instrumentation_scope=None, # TODO(SNOW-1711959) - ) - - -class Tracer(serial_utils.SerialModel, trace_api.Tracer): - """[OTEL Tracer][opentelemetry.trace.Tracer] requirements.""" - - model_config = pydantic.ConfigDict(arbitrary_types_allowed=True) - - _instrumenting_module_name: Optional[str] = pydantic.PrivateAttr(None) - """Name of the library/module that is instrumenting the code.""" - - _instrumenting_library_version: Optional[str] = pydantic.PrivateAttr(None) - """Version of the library that is instrumenting the code.""" - - _attributes: Optional[trace_api.types.Attributes] = pydantic.PrivateAttr( - None - ) - """Common attributes to add to all spans.""" - - _schema_url: Optional[str] = pydantic.PrivateAttr(None) - """Use unknown.""" - - _tracer_provider: TracerProvider = pydantic.PrivateAttr(None) - """NON-STANDARD: The TracerProvider that made this tracer.""" - - _span_class: Type[Span] = pydantic.PrivateAttr(Span) - """NON-STANDARD: The default span class to use when creating spans.""" - - _span_context_class: Type[SpanContext] = pydantic.PrivateAttr(SpanContext) - """NON-STANDARD: The default span context class to use when creating spans.""" - - def __init__(self, _context: context_api.context.Context, **kwargs): - super().__init__(**kwargs) - - for k, v in kwargs.items(): - if v is None: - continue - # pydantic does not set private attributes in init - if k.startswith("_") and hasattr(self, k): - setattr(self, k, v) - - self._context_cvar.set(_context) - - _context_cvar: contextvars.ContextVar[context_api.context.Context] = ( - pydantic.PrivateAttr( - default_factory=lambda: contextvars.ContextVar( - f"context_Tracer_{python_utils.context_id()}", default=None - ) - ) - ) - - @property - def context_cvar( - self, - ) -> contextvars.ContextVar[context_api.context.Context]: - """NON-STANDARD: The context variable to store the current span context.""" - - return self._context_cvar - - @property - def trace_id(self) -> int: - return self._tracer_provider.trace_id - - def start_span( - self, - name: Optional[str] = None, - *args, # non-standard - context: Optional[context_api.context.Context] = None, - kind: trace_api.SpanKind = trace_api.SpanKind.INTERNAL, - attributes: trace_api.types.Attributes = None, - links: trace_api._Links = None, - start_time: Optional[int] = None, - record_exception: bool = True, - set_status_on_exception: bool = True, - cls: Optional[Type[Span]] = None, # non-standard - **kwargs, # non-standard - ) -> Span: - """See [OTEL - Tracer.start_span][opentelemetry.trace.Tracer.start_span].""" - - if context is None: - parent_context = self.context_cvar.get() - - else: - if len(context) != 1: - raise ValueError("Only one context span is allowed.") - parent_span_encoding = next(iter(context.values())) - - parent_context = self._span_context_class( - trace_id=parent_span_encoding.trace_id, - span_id=parent_span_encoding.span_id, - _tracer=self, - ) - - new_context = self._span_context_class( - *args, trace_id=self.trace_id, _tracer=self, **kwargs - ) - - if name is None: - name = python_utils.class_name(self._span_class) - - if attributes is None: - attributes = {} - - if self._attributes is not None: - attributes.update(self._attributes) - - if cls is None: - cls = self._span_class - - new_span = cls( - name=name, - context=new_context, - parent=parent_context, - kind=kind, - attributes=attributes, - links=links, - start_timestamp=start_time, - _record_exception=record_exception, - _status_on_exception=set_status_on_exception, - _tracer=self, - ) - - return new_span - - @contextlib.contextmanager - def start_as_current_span( - self, - name: Optional[str] = None, - context: Optional[trace_api.context.Context] = None, - kind: trace_api.SpanKind = trace_api.SpanKind.INTERNAL, - attributes: trace_api.types.Attributes = None, - links: trace_api._Links = None, - start_time: Optional[int] = None, - record_exception: bool = True, - set_status_on_exception: bool = True, - end_on_exit: bool = True, - ): - """See [OTEL - Tracer.start_as_current_span][opentelemetry.trace.Tracer.start_as_current_span].""" - - span = self.start_span( - name=name, - context=context, - kind=kind, - attributes=attributes, - links=links, - start_time=start_time, - record_exception=record_exception, - set_status_on_exception=set_status_on_exception, - ) - - token = self.context_cvar.set(span.context) - - try: - yield span - - except BaseException as e: - if record_exception: - span.record_exception(e) - - finally: - self.context_cvar.reset(token) - - if end_on_exit: - span.end() - - -class TracerProvider(serial_utils.SerialModel, trace_api.TracerProvider): - """[OTEL TracerProvider][opentelemetry.trace.TracerProvider] - requirements.""" - - model_config = pydantic.ConfigDict(arbitrary_types_allowed=True) - - _tracer_class: Type[Tracer] = pydantic.PrivateAttr(Tracer) - """NON-STANDARD: The default tracer class to use when creating tracers.""" - - _context_cvar: contextvars.ContextVar[context_api.context.Context] = ( - pydantic.PrivateAttr( - default_factory=lambda: contextvars.ContextVar( - f"context_TracerProvider_{python_utils.context_id()}", - default=None, - ) - ) - ) - - @property - def context_cvar( - self, - ) -> contextvars.ContextVar[context_api.context.Context]: - """NON-STANDARD: The context variable to store the current span context.""" - - return self._context_cvar - - _trace_id: int = pydantic.PrivateAttr(default_factory=new_trace_id) - - @property - def trace_id(self) -> int: - """NON-STANDARD: The current trace id.""" - - return self._trace_id - - def get_tracer( - self, - instrumenting_module_name: str, - instrumenting_library_version: Optional[str] = None, - schema_url: Optional[str] = None, - attributes: Optional[types_api.Attributes] = None, - ): - """See [OTEL - TracerProvider.get_tracer][opentelemetry.trace.TracerProvider.get_tracer].""" - - tracer = self._tracer_class( - _instrumenting_module_name=instrumenting_module_name, - _instrumenting_library_version=instrumenting_library_version, - _attributes=attributes, - _schema_url=schema_url, - _tracer_provider=self, - _context=self.context_cvar.get(), - ) - - return tracer diff --git a/src/core/trulens/experimental/otel_tracing/core/session.py b/src/core/trulens/experimental/otel_tracing/core/session.py index a87116742..fc218f889 100644 --- a/src/core/trulens/experimental/otel_tracing/core/session.py +++ b/src/core/trulens/experimental/otel_tracing/core/session.py @@ -1,3 +1,8 @@ +"""OTEL tracing additions on top of TruSession. + +Adds the code for setting up an otel exporter. +""" + from typing import Any, Optional from trulens.core import experimental as core_experimental diff --git a/src/core/trulens/experimental/otel_tracing/core/trace.py b/src/core/trulens/experimental/otel_tracing/core/trace.py deleted file mode 100644 index 3962074dc..000000000 --- a/src/core/trulens/experimental/otel_tracing/core/trace.py +++ /dev/null @@ -1,1098 +0,0 @@ -# ruff: noqa: E402 - -"""Implementation of recording that resembles the tracing process in OpenTelemetry. - -!!! Note - Most of the module is (EXPERIMENTAL(otel_tracing)) though it includes some existing - non-experimental classes moved here to resolve some circular import issues. -""" - -from __future__ import annotations - -import contextlib -import contextvars -import inspect -import logging -import os -import sys -import threading as th -from threading import Lock -from typing import ( - Any, - Callable, - ContextManager, - Dict, - Generic, - Hashable, - Iterable, - List, - Optional, - Tuple, - Type, - TypeVar, - Union, -) -import uuid -import weakref - -import pydantic -from trulens.core.schema import base as base_schema -from trulens.core.schema import record as record_schema -from trulens.core.schema import types as types_schema -from trulens.core.utils import json as json_utils -from trulens.core.utils import pyschema as pyschema_utils -from trulens.core.utils import python as python_utils -from trulens.core.utils import serial as serial_utils -from trulens.experimental.otel_tracing import _feature -from trulens.experimental.otel_tracing.core import otel as core_otel -from trulens.experimental.otel_tracing.core._utils import wrap as wrap_utils - -_feature._FeatureSetup.assert_optionals_installed() # checks to make sure otel is installed - -if sys.version_info < (3, 9): - from functools import lru_cache as fn_cache -else: - from functools import cache as fn_cache - -from opentelemetry.semconv.resource import ResourceAttributes -from opentelemetry.trace import span as span_api -from opentelemetry.util import types as types_api - -T = TypeVar("T") -R = TypeVar("R") # callable return type -E = TypeVar("E") # iterator/generator element type - -logger = logging.getLogger(__name__) - -INSTRUMENT: str = "__tru_instrumented" -"""Attribute name to be used to flag instrumented objects/methods/others.""" - -APPS: str = "__tru_apps" -"""Attribute name for storing apps that expect to be notified of calls.""" - - -class SpanContext(core_otel.SpanContext, Hashable): - """TruLens additions on top of OTEL SpanContext to add Hashable and - reference to tracer that made the span.""" - - model_config = pydantic.ConfigDict(arbitrary_types_allowed=True) - - def __str__(self): - return f"{self.trace_id % 0xFF:02x}/{self.span_id % 0xFF:02x}" - - def __repr__(self): - return str(self) - - def __hash__(self): - return self.trace_id + self.span_id - - def __eq__(self, other: SpanContextLike): - if other is None: - return False - - return self.trace_id == other.trace_id and self.span_id == other.span_id - - @staticmethod - def of_spancontextlike(span_context: SpanContextLike) -> SpanContext: - if isinstance(span_context, SpanContext): - return span_context - - elif isinstance(span_context, core_otel.SpanContext): - return SpanContext( - trace_id=span_context.trace_id, - span_id=span_context.span_id, - is_remote=span_context.is_remote, - ) - elif isinstance(span_context, span_api.SpanContext): - return SpanContext( - trace_id=span_context.trace_id, - span_id=span_context.span_id, - is_remote=span_context.is_remote, - ) - elif isinstance(span_context, Dict): - return SpanContext.model_validate(span_context) - else: - raise ValueError(f"Unrecognized span context type: {span_context}") - - -SpanContextLike = Union[ - SpanContext, core_otel.SpanContext, span_api.SpanContext, serial_utils.JSON -] - - -class Span(core_otel.Span): - """TruLens additions on top of OTEL spans.""" - - model_config = pydantic.ConfigDict( - arbitrary_types_allowed=True, - use_enum_values=True, # model_validate will fail without this - ) - - def __str__(self): - return ( - f"{type(self).__name__}({self.name}, {self.context}->{self.parent})" - ) - - def __repr__(self): - return str(self) - - _lensed_attributes: serial_utils.LensedDict[Any] = pydantic.PrivateAttr( - default_factory=serial_utils.LensedDict - ) - - @property - def lensed_attributes(self) -> serial_utils.LensedDict[Any]: - return self._lensed_attributes - - @property - def parent_span(self) -> Optional[Span]: - if self.parent is None: - return None - - if self._tracer is None: - return None - - if (span := self._tracer.spans.get(self.parent)) is None: - return None - - return span - - _children_spans: List[Span] = pydantic.PrivateAttr(default_factory=list) - - @property - def children_spans(self) -> List[Span]: - return self._children_spans - - error: Optional[Exception] = pydantic.Field(None) - """Optional error if the observed computation raised an exception.""" - - def __init__(self, **kwargs): - # Convert any contexts to our hashable context class: - if (context := kwargs.get("context")) is not None: - kwargs["context"] = SpanContext.of_spancontextlike(context) - if (parent := kwargs.get("parent", None)) is not None: - kwargs["parent"] = SpanContext.of_spancontextlike(parent) - - super().__init__(**kwargs) - - if (parent_span := self.parent_span) is not None: - parent_span.children_spans.append(self) - - def iter_children( - self, transitive: bool = True, include_phantom: bool = False - ) -> Iterable[Span]: - """Iterate over all spans that are children of this span. - - Args: - transitive: Iterate recursively over children. - - include_phantom: Include phantom spans. If not set, phantom spans - will not be included but will be iterated over even if - transitive is false. - """ - - for child_span in self.children_spans: - if isinstance(child_span, PhantomSpan) and not include_phantom: - # Note that transitive being false is ignored if phantom is skipped. - yield from child_span.iter_children( - transitive=transitive, include_phantom=include_phantom - ) - else: - yield child_span - if transitive: - yield from child_span.iter_children( - transitive=transitive, - include_phantom=include_phantom, - ) - - def iter_family(self, include_phantom: bool = False) -> Iterable[Span]: - """Iterate itself and all children transitively.""" - - if (not isinstance(self, PhantomSpan)) or include_phantom: - yield self - - yield from self.iter_children( - include_phantom=include_phantom, transitive=True - ) - - def total_cost(self) -> base_schema.Cost: - """Total costs of this span and all its transitive children.""" - - total = base_schema.Cost() - - for span in self.iter_family(include_phantom=True): - if isinstance(span, WithCost) and span.cost is not None: - total += span.cost - - return total - - -class PhantomSpan(Span): - """A span type that indicates that it does not correspond to a - computation to be recorded but instead is an element of the tracing system. - - It is to be removed from the spans presented to the users. - """ - - -class LiveSpan(Span): - """A a span type that indicates that it contains live python objects. - - It is to be converted to a non-live span before being output to the user or - otherwise. - """ - - -class PhantomSpanRecordingContext(PhantomSpan): - """Tracks the context of an app used as a context manager.""" - - recording: Optional[Any] = pydantic.Field(None, exclude=True) - # TODO: app.RecordingContext # circular import issues - - def otel_resource_attributes(self) -> Dict[str, Any]: - ret = super().otel_resource_attributes() - - ret[ResourceAttributes.SERVICE_NAME] = ( - self.recording.app.app_name if self.recording is not None else None - ) - - return ret - - # override - def end(self, *args, **kwargs): - super().end(*args, **kwargs) - - self._finalize_recording() - - # override - def record_exception( - self, - exception: BaseException, - attributes: types_api.Attributes = None, - timestamp: int | None = None, - escaped: bool = False, - ) -> None: - super().record_exception(exception, attributes, timestamp, escaped) - - self._finalize_recording() - - def _finalize_recording(self): - assert self.recording is not None - - app = self.recording.app - - for span in Tracer.find_each_child( - span=self, span_filter=lambda s: isinstance(s, LiveSpanCall) - ): - app._on_new_root_span(recording=self.recording, root_span=span) - - app._on_new_recording_span(recording_span=self) - - def otel_name(self) -> str: - return "trulens.recording" - - -class SpanCall(Span): - """Non-live fields of a function call span.""" - - model_config = pydantic.ConfigDict(arbitrary_types_allowed=True) - - call_id: Optional[uuid.UUID] = pydantic.Field(None) - """Unique identifier for the call.""" - - stack: Optional[List[record_schema.RecordAppCallMethod]] = pydantic.Field( - None - ) - """Call stack of instrumented methods only.""" - - sig: Optional[inspect.Signature] = pydantic.Field(None) - """Signature of the function.""" - - func_name: Optional[str] = None - """Function name.""" - - pid: Optional[int] = None - """Process id.""" - - tid: Optional[int] = None - """Thread id.""" - - def end(self): - super().end() - - self.set_attribute(ResourceAttributes.PROCESS_PID, self.pid) - self.set_attribute("thread.id", self.tid) # TODO: semconv - - self.set_attribute("trulens.call_id", str(self.call_id)) - self.set_attribute("trulens.stack", json_utils.jsonify(self.stack)) - self.set_attribute("trulens.sig", str(self.sig)) - - def otel_name(self) -> str: - return f"trulens.call.{self.func_name}" - - -class LiveSpanCall(LiveSpan, SpanCall): - """Track a function call.""" - - model_config = pydantic.ConfigDict(arbitrary_types_allowed=True) - - live_obj: Optional[Any] = pydantic.Field(None, exclude=True) - """Self object if method call.""" - - live_cls: Optional[Type] = pydantic.Field(None, exclude=True) - """Class if method/static/class method call.""" - - live_func: Optional[Callable] = pydantic.Field(None, exclude=True) - """Function object.""" - - live_args: Optional[Tuple[Any, ...]] = pydantic.Field(None, exclude=True) - """Positional arguments to the function call.""" - - live_kwargs: Optional[Dict[str, Any]] = pydantic.Field(None, exclude=True) - """Keyword arguments to the function call.""" - - live_bindings: Optional[inspect.BoundArguments] = pydantic.Field( - None, exclude=True - ) - """Bound arguments to the function call if can be bound.""" - - live_ret: Optional[Any] = pydantic.Field(None, exclude=True) - """Return value of the function call. - - Exclusive with `error`. - """ - - live_error: Optional[Any] = pydantic.Field(None, exclude=True) - """Error raised by the function call. - - Exclusive with `ret`. - """ - - def end(self): - super().end() - - if self.live_cls is not None: - self.set_attribute( - "trulens.cls", - pyschema_utils.Class.of_class(self.live_cls).model_dump(), - ) - - if self.live_func is not None: - self.set_attribute( - "trulens.func", - pyschema_utils.FunctionOrMethod.of_callable( - self.live_func - ).model_dump(), - ) - - if self.live_ret is not None: - self.set_attribute("trulens.ret", json_utils.jsonify(self.live_ret)) - - if self.live_bindings is not None: - self.set_attribute( - "trulens.bindings", - pyschema_utils.Bindings.of_bound_arguments( - self.live_bindings, arguments_only=True, skip_self=True - ).model_dump()["kwargs"], - ) - - if self.live_error is not None: - self.set_attribute( - "trulens.error", json_utils.jsonify(self.live_error) - ) - - -S = TypeVar("S", bound=LiveSpanCall) - - -class WithCost(LiveSpan): - """Mixin to indicate the span has costs tracked.""" - - cost: base_schema.Cost = pydantic.Field(default_factory=base_schema.Cost) - """Cost of the computation spanned.""" - - endpoint: Optional[Any] = pydantic.Field( - None, exclude=True - ) # Any actually core_endpoint.Endpoint - """Endpoint handling cost extraction for this span/call.""" - - def end(self): - super().end() - - self.set_attribute("trulens.cost", self.cost.model_dump()) - - def __init__(self, cost: Optional[base_schema.Cost] = None, **kwargs): - if cost is None: - cost = base_schema.Cost() - - super().__init__(cost=cost, **kwargs) - - -class LiveSpanCallWithCost(LiveSpanCall, WithCost): - pass - - -class Tracer(core_otel.Tracer): - """TruLens additions on top of [OTEL Tracer][opentelemetry.trace.Tracer].""" - - # TODO: Tracer that does not record anything. Can either be a setting to - # this tracer or a separate "NullTracer". We need non-recording users to not - # incur much overhead hence need to be able to disable most of the tracing - # logic when appropriate. - - model_config = pydantic.ConfigDict(arbitrary_types_allowed=True) - - # Overrides core_otel.Tracer._span_class - _span_class: Type[Span] = pydantic.PrivateAttr(Span) - - # Overrides core_otel.Tracer._span_context_class - _span_context_class: Type[SpanContext] = pydantic.PrivateAttr(SpanContext) - - @property - def spans(self) -> Dict[SpanContext, Span]: - return self._tracer_provider.spans - - def __init__(self, **kwargs): - super().__init__(**kwargs) - - def __str__(self): - return f"{type(self).__name__} {self.instrumenting_module_name} {self.instrumenting_library_version}" - - def __repr__(self): - return str(self) - - def start_span(self, *args, **kwargs): - new_span = super().start_span(*args, **kwargs) - - self.spans[new_span.context] = new_span - - return new_span - - @staticmethod - def _fill_stacks( - span: Span, - get_method_path: Callable, - stack: List[record_schema.RecordAppCallMethod] = [], - ): - if isinstance(span, LiveSpanCall): - path = get_method_path(obj=span.live_obj, func=span.live_func) - - frame_ident = record_schema.RecordAppCallMethod( - path=path - if path is not None - else serial_utils.Lens().static, # placeholder path for functions - method=pyschema_utils.Method.of_method( - span.live_func, obj=span.live_obj, cls=span.live_cls - ), - ) - - stack = stack + [frame_ident] - span.stack = stack - - for subspan in span.iter_children(transitive=False): - Tracer._fill_stacks( - subspan, stack=stack, get_method_path=get_method_path - ) - - def _call_of_spancall( - self, span: LiveSpanCall - ) -> record_schema.RecordAppCall: - """Convert a SpanCall to a RecordAppCall.""" - - args = ( - dict(span.live_bindings.arguments) - if span.live_bindings is not None - else None - ) - if args is not None: - if "self" in args: - del args["self"] # remove self - - assert span.start_timestamp is not None - if span.end_timestamp is None: - logger.warning( - "Span %s has no end timestamp. It might not have yet finished recording.", - span, - ) - - return record_schema.RecordAppCall( - call_id=str(span.call_id), - stack=span.stack, - args=args, - rets=json_utils.jsonify(span.live_ret), - error=str(span.live_error), - perf=base_schema.Perf.of_ns_timestamps( - start_ns_timestamp=span.start_timestamp, - end_ns_timestamp=span.end_timestamp, - ), - pid=span.pid, - tid=span.tid, - ) - - def record_of_root_span( - self, recording: Any, root_span: LiveSpanCall - ) -> record_schema.Record: - """Convert a root span to a record. - - This span has to be a call span so we can extract things like main input and output. - """ - - assert isinstance(root_span, LiveSpanCall), type(root_span) - - app = recording.app - - self._fill_stacks(root_span, get_method_path=app.get_method_path) - - root_perf = ( - base_schema.Perf.of_ns_timestamps( - start_ns_timestamp=root_span.start_timestamp, - end_ns_timestamp=root_span.end_timestamp, - ) - if root_span.end_timestamp is not None - else None - ) - - total_cost = root_span.total_cost() - - calls = [] - if isinstance(root_span, LiveSpanCall): - calls.append(self._call_of_spancall(root_span)) - - spans = [root_span] - - for span in root_span.iter_children(include_phantom=True): - if isinstance(span, LiveSpanCall): - calls.append(self._call_of_spancall(span)) - - spans.append(span) - - bindings = root_span.live_bindings - main_error = root_span.live_error - - if bindings is not None: - main_input = app.main_input( - func=root_span.live_func, - sig=root_span.sig, - bindings=root_span.live_bindings, - ) - if main_error is None: - main_output = app.main_output( - func=root_span.live_func, - sig=root_span.sig, - bindings=root_span.live_bindings, - ret=root_span.live_ret, - ) - else: - main_output = None - else: - main_input = None - main_output = None - - record = record_schema.Record( - record_id="placeholder", - app_id=app.app_id, - main_input=json_utils.jsonify(main_input), - main_output=json_utils.jsonify(main_output), - main_error=json_utils.jsonify(main_error), - calls=calls, - perf=root_perf, - cost=total_cost, - experimental_otel_spans=spans, - ) - - # record_id determinism - record.record_id = json_utils.obj_id_of_obj( - record.model_dump(), prefix="record" - ) - - return record - - @staticmethod - def find_each_child(span: Span, span_filter: Callable) -> Iterable[Span]: - """For each family rooted at each child of this span, find the top-most - span that satisfies the filter.""" - - for child_span in span.children_spans: - if span_filter(child_span): - yield child_span - else: - yield from Tracer.find_each_child(child_span, span_filter) - - def records_of_recording( - self, recording: PhantomSpanRecordingContext - ) -> Iterable[record_schema.Record]: - """Convert a recording based on spans to a list of records.""" - - for root_span in Tracer.find_each_child( - span=recording, span_filter=lambda s: isinstance(s, LiveSpanCall) - ): - assert isinstance(root_span, LiveSpanCall) - yield self.record_of_root_span( - recording=recording, root_span=root_span - ) - - @contextlib.contextmanager - def _span(self, cls: Type[S], **kwargs) -> ContextManager[S]: - with self.start_span(cls=cls, **kwargs) as span: - with python_utils.with_context({self.context_cvar: span.context}): - yield span - - @contextlib.asynccontextmanager - async def _aspan(self, cls: Type[S], **kwargs) -> ContextManager[S]: - async with self.start_span(cls=cls, **kwargs) as span: - async with python_utils.awith_context({ - self.context_cvar: span.context - }): - yield span - - # context manager - def recording(self) -> ContextManager[PhantomSpanRecordingContext]: - return self._span( - name="trulens.recording", cls=PhantomSpanRecordingContext - ) - - # context manager - def method(self, method_name: str) -> ContextManager[LiveSpanCall]: - return self._span(name="trulens.call." + method_name, cls=LiveSpanCall) - - # context manager - def cost( - self, method_name: str, cost: Optional[base_schema.Cost] = None - ) -> ContextManager[LiveSpanCallWithCost]: - return self._span( - name="trulens.call." + method_name, - cls=LiveSpanCallWithCost, - cost=cost, - ) - - # context manager - def phantom(self) -> ContextManager[PhantomSpan]: - return self._span(name="trulens.phantom", cls=PhantomSpan) - - # context manager - async def arecording(self) -> ContextManager[PhantomSpanRecordingContext]: - return self._aspan( - name="trulens.recording", cls=PhantomSpanRecordingContext - ) - - # context manager - async def amethod(self, method_name: str) -> ContextManager[LiveSpanCall]: - return self._aspan(name="trulens.call." + method_name, cls=LiveSpanCall) - - # context manager - async def acost( - self, method_name: str, cost: Optional[base_schema.Cost] = None - ) -> ContextManager[LiveSpanCallWithCost]: - return self._aspan( - name="trulens.call." + method_name, - cls=LiveSpanCallWithCost, - cost=cost, - ) - - # context manager - async def aphantom(self) -> ContextManager[PhantomSpan]: - return self._aspan(name="trulens.phantom", cls=PhantomSpan) - - -class TracerProvider( - core_otel.TracerProvider, metaclass=python_utils.PydanticSingletonMeta -): - """TruLens additions on top of [OTEL TracerProvider][opentelemetry.trace.TracerProvider].""" - - model_config = pydantic.ConfigDict(arbitrary_types_allowed=True) - - _trace_id: int = pydantic.PrivateAttr( - default_factory=core_otel.new_trace_id - ) - - def __str__(self): - # Pydantic will not print anything useful otherwise. - return f"{self.__module__}.{type(self).__name__}()" - - @property - def trace_id(self): - return self._trace_id - - # Overrides core_otel.TracerProvider._tracer_class - _tracer_class: Type[Tracer] = pydantic.PrivateAttr(default=Tracer) - - _tracers: Dict[str, Tracer] = pydantic.PrivateAttr(default_factory=dict) - - _spans: Dict[SpanContext, Span] = pydantic.PrivateAttr(default_factory=dict) - - @property - def spans(self) -> Dict[SpanContext, Span]: - return self._spans - - def get_tracer( - self, - instrumenting_module_name: str, - instrumenting_library_version: Optional[str] = None, - schema_url: Optional[str] = None, - attributes: Optional[types_api.Attributes] = None, - ): - if instrumenting_module_name in self._tracers: - return self._tracers[instrumenting_module_name] - - tracer = super().get_tracer( - instrumenting_module_name=instrumenting_module_name, - instrumenting_library_version=instrumenting_library_version, - attributes=attributes, - schema_url=schema_url, - ) - - self._tracers[instrumenting_module_name] = tracer - - return tracer - - -tracer_provider = TracerProvider() -"""Global tracer provider. -All trulens tracers are made by this provider even if a different one is -configured for OTEL. -""" - - -@fn_cache -def trulens_tracer(): - from trulens.core import __version__ - - return tracer_provider.get_tracer( - instrumenting_module_name="trulens.experimental.otel_tracing.core.trace", - instrumenting_library_version=__version__, - ) - - -class TracingCallbacks(wrap_utils.CallableCallbacks[R], Generic[R, S]): - """Extension of CallableCallbacks that adds tracing to the wrapped callable - as implemented using tracer and spans.""" - - def __init__( - self, - func_name: str, - span_type: Type[S] = LiveSpanCall, - **kwargs: Dict[str, Any], - ): - super().__init__(**kwargs) - - self.func_name: str = func_name - - self.obj: Optional[object] = None - self.obj_cls: Optional[Type] = None - self.obj_id: Optional[int] = None - - if not issubclass(span_type, LiveSpanCall): - raise ValueError("span_type must be a subclass of LiveSpanCall.") - - self.span_context: ContextManager = trulens_tracer()._span( - span_type, name="trulens.call." + func_name - ) - self.span: S = self.span_context.__enter__() - - def on_callable_call( - self, bindings: inspect.BoundArguments, **kwargs: Dict[str, Any] - ) -> inspect.BoundArguments: - temp = super().on_callable_call(bindings=bindings, **kwargs) - - if "self" in bindings.arguments: - # TODO: need some generalization - self.obj = bindings.arguments["self"] - self.obj_cls = type(self.obj) - self.obj_id = id(self.obj) - else: - logger.warning("No self in bindings for %s.", self) - - span = self.span - span.pid = os.getpid() - span.tid = th.get_native_id() - - return temp - - def on_callable_end(self): - super().on_callable_end() - - span = self.span - - # SpanCall attributes - span.call_id = self.call_id - span.func_name = self.func_name - span.sig = self.sig - - # LiveSpanCall attributes - span.live_obj = self.obj - span.live_cls = self.obj_cls - span.live_func = self.func - span.live_args = self.call_args - span.live_kwargs = self.call_kwargs - span.live_bindings = self.bindings - span.live_ret = self.ret - span.live_error = self.error - - if self.error is not None: - self.span_context.__exit__( - type(self.error), self.error, self.error.__traceback__ - ) - else: - self.span_context.__exit__(None, None, None) - - -class _RecordingContext: - """Manager of the creation of records from record calls. - - An instance of this class is produced when using an - [App][trulens_eval.app.App] as a context mananger, i.e.: - Example: - ```python - app = ... # your app - truapp: TruChain = TruChain(app, ...) # recorder for LangChain apps - with truapp as recorder: - app.invoke(...) # use your app - recorder: RecordingContext - ``` - - Each instance of this class produces a record for every "root" instrumented - method called. Root method here means the first instrumented method in a - call stack. Note that there may be more than one of these contexts in play - at the same time due to: - - More than one wrapper of the same app. - - More than one context manager ("with" statement) surrounding calls to the - same app. - - Calls to "with_record" on methods that themselves contain recording. - - Calls to apps that use trulens internally to track records in any of the - supported ways. - - Combinations of the above. - """ - - def __init__( - self, - app: _WithInstrumentCallbacks, - record_metadata: serial_utils.JSON = None, - tracer: Optional[Tracer] = None, - span: Optional[PhantomSpanRecordingContext] = None, - span_ctx: Optional[SpanContext] = None, - ): - self.calls: Dict[types_schema.CallID, record_schema.RecordAppCall] = {} - """A record (in terms of its RecordAppCall) in process of being created. - - Storing as a map as we want to override calls with the same id which may - happen due to methods producing awaitables or generators. These result - in calls before the awaitables are awaited and then get updated after - the result is ready. - """ - # TODEP: To deprecated after migration to span-based tracing. - - self.records: List[record_schema.Record] = [] - """Completed records.""" - - self.lock: Lock = Lock() - """Lock blocking access to `records` when adding calls or - finishing a record.""" - - self.token: Optional[contextvars.Token] = None - """Token for context management.""" - - self.app: _WithInstrumentCallbacks = app - """App for which we are recording.""" - - self.record_metadata = record_metadata - """Metadata to attach to all records produced in this context.""" - - self.tracer: Optional[Tracer] = tracer - """EXPERIMENTAL(otel_tracing): OTEL-like tracer for recording. - """ - - self.span: Optional[PhantomSpanRecordingContext] = span - """EXPERIMENTAL(otel_tracing): Span that represents a recording context - (the with block).""" - - self.span_ctx = span_ctx - """EXPERIMENTAL(otel_tracing): The context manager for the above span. - """ - - @property - def spans(self) -> Dict[SpanContext, Span]: - """EXPERIMENTAL(otel_tracing): Get the spans of the tracer in this context.""" - - if self.tracer is None: - return {} - - return self.tracer.spans - - def __iter__(self): - return iter(self.records) - - def get(self) -> record_schema.Record: - """Get the single record only if there was exactly one or throw - an error otherwise.""" - - if len(self.records) == 0: - raise RuntimeError("Recording context did not record any records.") - - if len(self.records) > 1: - raise RuntimeError( - "Recording context recorded more than 1 record. " - "You can get them with ctx.records, ctx[i], or `for r in ctx: ...`." - ) - - return self.records[0] - - def __getitem__(self, idx: int) -> record_schema.Record: - return self.records[idx] - - def __len__(self): - return len(self.records) - - def __hash__(self) -> int: - # The same app can have multiple recording contexts. - return hash(id(self.app)) + hash(id(self.records)) - - def __eq__(self, other): - return hash(self) == hash(other) - - def add_call(self, call: record_schema.RecordAppCall): - """Add the given call to the currently tracked call list.""" - # TODEP: To deprecated after migration to span-based tracing. - - with self.lock: - # NOTE: This might override existing call record which happens when - # processing calls with awaitable or generator results. - self.calls[call.call_id] = call - - def finish_record( - self, - calls_to_record: Callable[ - [ - List[record_schema.RecordAppCall], - types_schema.Metadata, - Optional[record_schema.Record], - ], - record_schema.Record, - ], - existing_record: Optional[record_schema.Record] = None, - ): - """Run the given function to build a record from the tracked calls and any - pre-specified metadata.""" - # TODEP: To deprecated after migration to span-based tracing. - - with self.lock: - record = calls_to_record( - list(self.calls.values()), self.record_metadata, existing_record - ) - self.calls = {} - - if existing_record is None: - # If existing record was given, we assume it was already - # inserted into this list. - self.records.append(record) - - return record - - -class _WithInstrumentCallbacks: - """Abstract definition of callbacks invoked by Instrument during - instrumentation or when instrumented methods are called. - - Needs to be mixed into [App][trulens_eval.app.App]. - """ - - # Called during instrumentation. - def on_method_instrumented( - self, obj: object, func: Callable, path: serial_utils.Lens - ): - """Callback to be called by instrumentation system for every function - requested to be instrumented. - - Given are the object of the class in which `func` belongs - (i.e. the "self" for that function), the `func` itsels, and the `path` - of the owner object in the app hierarchy. - - Args: - obj: The object of the class in which `func` belongs (i.e. the - "self" for that method). - - func: The function that was instrumented. Expects the unbound - version (self not yet bound). - - path: The path of the owner object in the app hierarchy. - """ - - raise NotImplementedError - - # Called during invocation. - def get_method_path(self, obj: object, func: Callable) -> serial_utils.Lens: - """Get the path of the instrumented function `func`, a member of the class - of `obj` relative to this app. - - Args: - obj: The object of the class in which `func` belongs (i.e. the - "self" for that method). - - func: The function that was instrumented. Expects the unbound - version (self not yet bound). - """ - - raise NotImplementedError - - # WithInstrumentCallbacks requirement - def get_methods_for_func( - self, func: Callable - ) -> Iterable[Tuple[int, Callable, serial_utils.Lens]]: - """EXPERIMENTAL(otel_tracing): Get the methods (rather the inner - functions) matching the given `func` and the path of each. - - Args: - func: The function to match. - """ - - raise NotImplementedError - - # Called after recording of an invocation. - def _on_new_root_span( - self, - ctx: _RecordingContext, - root_span: LiveSpanCall, - ) -> record_schema.Record: - """EXPERIMENTAL(otel_tracing): Called by instrumented methods if they - are root calls (first instrumented methods in a call stack). - - Args: - ctx: The context of the recording. - - root_span: The root span that was recorded. - """ - # EXPERIMENTAL(otel_tracing) - - raise NotImplementedError - - -class AppTracingCallbacks(TracingCallbacks[R, S]): - """Extension to TracingCallbacks that keep track of apps that are - instrumenting their constituent calls.""" - - @classmethod - def on_callable_wrapped( - cls, - wrapper: Callable[..., R], - app: _WithInstrumentCallbacks, - **kwargs: Dict[str, Any], - ): - if not python_utils.safe_hasattr(wrapper, APPS): - apps: weakref.WeakSet[_WithInstrumentCallbacks] = weakref.WeakSet() - setattr(wrapper, APPS, apps) - else: - apps = python_utils.safe_getattr(wrapper, APPS) - - apps.add(app) - - return super().on_callable_wrapped(wrapper=wrapper, **kwargs) - - def __init__( - self, - app: _WithInstrumentCallbacks, - span_type: Type[Span] = LiveSpanCall, - **kwargs: Dict[str, Any], - ): - super().__init__(span_type=span_type, **kwargs) - - self.app = app - self.apps = python_utils.safe_getattr(self.wrapper, APPS) diff --git a/src/core/trulens/experimental/otel_tracing/core/trace/__init__.py b/src/core/trulens/experimental/otel_tracing/core/trace/__init__.py new file mode 100644 index 000000000..98d143589 --- /dev/null +++ b/src/core/trulens/experimental/otel_tracing/core/trace/__init__.py @@ -0,0 +1,33 @@ +# ruff: noqa: F401 +""" +Modules in this folder depend on each other which makes it impossible to import +them without circular import errors. Because of this, some imports need to be +put into `if TYPE_CHECKING` blocks and classes that depend on those imports need +to be "rebuilt" with `model_rebuild`. This only applies to `pydantic.BaseModel` +classes. Type hints on non-pydantic classes are never interpreted hence no need +to "rebuild" those. +""" + +from trulens.core import ( + app as core_app, # needed for some of the rebuilds below +) + +from . import context as core_context +from . import otel as core_otel +from . import sem as core_sem +from . import span as core_span +from . import trace as core_trace + +core_context.SpanContext.model_rebuild() +core_context.TraceState.model_rebuild() +core_otel.Span.model_rebuild() +core_otel.Tracer.model_rebuild() +core_otel.TracerProvider.model_rebuild() +core_span.Span.model_rebuild() +core_span.RecordingContextSpan.model_rebuild() +core_span.LiveRecordRoot.model_rebuild() +core_span.LiveSpanCall.model_rebuild() +core_span.LiveSpanCallWithCost.model_rebuild() +core_sem.TypedSpan.model_rebuild() +core_trace.Tracer.model_rebuild() +core_trace.TracerProvider.model_rebuild() diff --git a/src/core/trulens/experimental/otel_tracing/core/trace/callbacks.py b/src/core/trulens/experimental/otel_tracing/core/trace/callbacks.py new file mode 100644 index 000000000..b8f25b627 --- /dev/null +++ b/src/core/trulens/experimental/otel_tracing/core/trace/callbacks.py @@ -0,0 +1,553 @@ +# ruff: noqa: E402 + +""" """ + +from __future__ import annotations + +import contextvars +import inspect +import logging +import os +import threading +from typing import ( + TYPE_CHECKING, + Any, + Callable, + ContextManager, + Dict, + Generic, + Iterable, + List, + Optional, + Tuple, + Type, + TypeVar, +) + +from trulens.core._utils.pycompat import ReferenceType +from trulens.core._utils.pycompat import WeakSet +from trulens.core.schema import record as record_schema +from trulens.core.schema import types as types_schema +from trulens.core.utils import python as python_utils +from trulens.core.utils import serial as serial_utils +from trulens.core.utils import text as text_utils +from trulens.experimental.otel_tracing.core._utils import wrap as wrap_utils +from trulens.experimental.otel_tracing.core.trace import context as core_context +from trulens.experimental.otel_tracing.core.trace import otel as core_otel +from trulens.experimental.otel_tracing.core.trace import span as core_span +from trulens.experimental.otel_tracing.core.trace import trace as core_trace +from trulens.otel.semconv import trace as truconv + +if TYPE_CHECKING: + # Need to model_rebuild classes that use these: + from trulens.experimental.otel_tracing.core.trace import span as core_trace + +logger = logging.getLogger(__name__) + +T = TypeVar("T") +R = TypeVar("R") # callable return type +E = TypeVar("E") # iterator/generator element type +S = TypeVar("S") # span type + +INSTRUMENT: str = "__tru_instrumented" +"""Attribute name to be used to flag instrumented objects/methods/others.""" + +APPS: str = "__tru_apps" +"""Attribute name for storing apps that expect to be notified of calls.""" + + +class TracingCallbacks(wrap_utils.CallableCallbacks[R], Generic[R, S]): + """Extension of CallableCallbacks that adds tracing to the wrapped callable + as implemented using tracer and spans.""" + + def __init__( + self, + func_name: str, + span_type: Type[S] = core_span.LiveSpanCall, + enter_contexts: bool = True, + **kwargs: Dict[str, Any], + ): + """ + Args: + enter_contexts: Whether to enter the context managers in this class + init. If a subclass needs to add more context managers before + entering, set this flag to false in `super().__init__` and then + call `self._enter_contexts()` in own subclass `__init__`. + """ + + super().__init__(**kwargs) + + self.func_name: str = func_name + + self.obj: Optional[object] = None + self.obj_cls: Optional[Type] = None + self.obj_id: Optional[int] = None + + if not issubclass(span_type, core_span.LiveSpanCall): + raise ValueError("span_type must be a subclass of LiveSpanCall.") + + self.span_context: ContextManager[core_span.LiveSpanCall] = ( + core_trace.trulens_tracer().start_as_current_span( + cls=span_type, + name=truconv.SpanAttributes.CALL.SPAN_NAME_PREFIX + + self.func_name, + ) + ) + # Will be filled in by _enter_contexts. + self.span: Optional[core_span.LiveSpanCall] = None + + # Keeping track of possibly multiple contexts for subclasses to add + # more. + self.context_managers: List[ContextManager[core_span.LiveSpanCall]] = [ + self.span_context + ] + self.spans: List[ + core_otel.Span + ] = [] # keep track of the spans we enter + + if enter_contexts: + self._enter_contexts() + + def _enter_contexts(self): + """Enter all of the context managers registered in this class. + + This includes the span for this callback but might include others if + subclassed. + """ + + for context_manager in self.context_managers: + span = context_manager.__enter__() + self.spans.append(span) + if context_manager == self.span_context: + # Make a special note of the main span for this callback. + self.span = span + + if self.span is None: + raise RuntimeError("Main span was not created in this context.") + + # Propagate some fields from parent. Note that these may be updated by + # the subclass of this callback class when new record roots get added. + parent_span = self.span.parent_span + if parent_span is not None: + if isinstance(parent_span, core_span.Span): + self.span.record_ids = parent_span.record_ids + self.span.app_ids = parent_span.app_ids + if isinstance(parent_span, core_span.LiveSpan): + self.span.live_apps = parent_span.live_apps + + def _exit_contexts(self, error: Optional[Exception]) -> Optional[Exception]: + """Exit all of the context managers registered in this class given the + innermost context's exception optionally. + + Returns the unhandled error if the managers did not absorb it. + """ + + # Exit the contexts starting from the innermost one. + for context_manager in self.context_managers[::-1]: + if error is not None: + try: + if context_manager.__exit__( + type(error), error, error.__traceback__ + ): + # If the context absorbed the error, we don't propagate the + # error to outer contexts. + error = None + + except Exception as next_error: + # Manager might have absorbed the error but raised another + # so this error may not be the same as the original. While + # python docs say not to do this, it may happen due to bad + # exit implementation or just people not following the spec. + error = next_error + + else: + context_manager.__exit__(None, None, None) + + return error + + def on_callable_call( + self, bound_arguments: inspect.BoundArguments, **kwargs: Dict[str, Any] + ) -> inspect.BoundArguments: + temp = super().on_callable_call( + bound_arguments=bound_arguments, **kwargs + ) + + if "self" in bound_arguments.arguments: + # TODO: need some generalization + self.obj = bound_arguments.arguments["self"] + self.obj_cls = type(self.obj) + self.obj_id = id(self.obj) + else: + logger.warning("No self in bindings for %s.", self) + + span = self.span + + assert span is not None, "Contexts not yet entered." + span.process_id = os.getpid() + span.thread_id = threading.get_native_id() + + return temp + + def on_callable_end(self): + super().on_callable_end() + + error = None + try: + error = self._exit_contexts(self.error) + + except Exception as e: + # Just in case exit contexts raises another error + error = e + + finally: + span = self.span + if span is None: + raise RuntimeError("Contexts not yet entered.") + + # LiveSpanCall attributes + span.call_id = self.call_id + span.live_obj = self.obj + span.live_cls = self.obj_cls + span.live_func = self.func + span.live_args = self.call_args + span.live_kwargs = self.call_kwargs + span.live_bound_arguments = self.bound_arguments + span.live_sig = self.sig + span.live_ret = self.ret + span.live_error = error + + +class _RecordingContext: + """Manager of the creation of records from record calls. + + An instance of this class is produced when using an + [App][trulens_eval.app.App] as a context mananger, i.e.: + Example: + ```python + app = ... # your app + truapp: TruChain = TruChain(app, ...) # recorder for LangChain apps + with truapp as recorder: + app.invoke(...) # use your app + recorder: RecordingContext + ``` + + Each instance of this class produces a record for every "root" instrumented + method called. Root method here means the first instrumented method in a + call stack. Note that there may be more than one of these contexts in play + at the same time due to: + - More than one wrapper of the same app. + - More than one context manager ("with" statement) surrounding calls to the + same app. + - Calls to "with_record" on methods that themselves contain recording. + - Calls to apps that use trulens internally to track records in any of the + supported ways. + - Combinations of the above. + """ + + def __init__( + self, + app: _WithInstrumentCallbacks, + record_metadata: serial_utils.JSON = None, + tracer: Optional[core_trace.Tracer] = None, + span: Optional[core_span.RecordingContextSpan] = None, + span_ctx: Optional[core_context.SpanContext] = None, + ): + self.calls: Dict[types_schema.CallID, record_schema.RecordAppCall] = {} + """A record (in terms of its RecordAppCall) in process of being created. + + Storing as a map as we want to override calls with the same id which may + happen due to methods producing awaitables or generators. These result + in calls before the awaitables are awaited and then get updated after + the result is ready. + """ + # TODEP: To deprecated after migration to span-based tracing. + + self.records: List[record_schema.Record] = [] + """Completed records.""" + + self.lock: threading.Lock = threading.Lock() + """Lock blocking access to `records` when adding calls or + finishing a record.""" + + self.token: Optional[contextvars.Token] = None + """Token for context management.""" + + self.app: _WithInstrumentCallbacks = app + """App for which we are recording.""" + + self.record_metadata = record_metadata + """Metadata to attach to all records produced in this context.""" + + self.tracer: Optional[core_trace.Tracer] = tracer + """EXPERIMENTAL(otel_tracing): OTEL-like tracer for recording. + """ + + self.span: Optional[core_span.RecordingContextSpan] = span + """EXPERIMENTAL(otel_tracing): Span that represents a recording context + (the with block).""" + + self.span_ctx = span_ctx + """EXPERIMENTAL(otel_tracing): The context manager for the above span. + """ + + @property + def spans(self) -> Dict[core_context.SpanContext, core_otel.Span]: + """EXPERIMENTAL(otel_tracing): Get the spans of the tracer in this context.""" + + if self.tracer is None: + return {} + + return self.tracer.spans + + def __iter__(self): + return iter(self.records) + + def get(self) -> record_schema.Record: + """Get the single record only if there was exactly one or throw + an error otherwise.""" + + if len(self.records) == 0: + raise RuntimeError("Recording context did not record any records.") + + if len(self.records) > 1: + raise RuntimeError( + "Recording context recorded more than 1 record. " + "You can get them with ctx.records, ctx[i], or `for r in ctx: ...`." + ) + + return self.records[0] + + def __getitem__(self, idx: int) -> record_schema.Record: + return self.records[idx] + + def __len__(self): + return len(self.records) + + def __hash__(self) -> int: + # The same app can have multiple recording contexts. + return hash(id(self.app)) + hash(id(self.records)) + + def __eq__(self, other): + return hash(self) == hash(other) + + def add_call(self, call: record_schema.RecordAppCall): + """Add the given call to the currently tracked call list.""" + # TODEP: To deprecated after migration to span-based tracing. + + with self.lock: + # NOTE: This might override existing call record which happens when + # processing calls with awaitable or generator results. + self.calls[call.call_id] = call + + def finish_record( + self, + calls_to_record: Callable[ + [ + List[record_schema.RecordAppCall], + types_schema.Metadata, + Optional[record_schema.Record], + ], + record_schema.Record, + ], + existing_record: Optional[record_schema.Record] = None, + ): + """Run the given function to build a record from the tracked calls and any + pre-specified metadata.""" + # TODEP: To deprecated after migration to span-based tracing. + + with self.lock: + record = calls_to_record( + list(self.calls.values()), self.record_metadata, existing_record + ) + self.calls = {} + + if existing_record is None: + # If existing record was given, we assume it was already + # inserted into this list. + self.records.append(record) + + return record + + +class _WithInstrumentCallbacks: + """Abstract definition of callbacks invoked by Instrument during + instrumentation or when instrumented methods are called. + + Needs to be mixed into [App][trulens_eval.app.App]. + """ + + # Called during instrumentation. + def on_method_instrumented( + self, obj: object, func: Callable, path: serial_utils.Lens + ): + """Callback to be called by instrumentation system for every function + requested to be instrumented. + + Given are the object of the class in which `func` belongs + (i.e. the "self" for that function), the `func` itsels, and the `path` + of the owner object in the app hierarchy. + + Args: + obj: The object of the class in which `func` belongs (i.e. the + "self" for that method). + + func: The function that was instrumented. Expects the unbound + version (self not yet bound). + + path: The path of the owner object in the app hierarchy. + """ + + raise NotImplementedError + + # Called during invocation. + def get_method_path(self, obj: object, func: Callable) -> serial_utils.Lens: + """Get the path of the instrumented function `func`, a member of the class + of `obj` relative to this app. + + Args: + obj: The object of the class in which `func` belongs (i.e. the + "self" for that method). + + func: The function that was instrumented. Expects the unbound + version (self not yet bound). + """ + + raise NotImplementedError + + # WithInstrumentCallbacks requirement + def get_methods_for_func( + self, func: Callable + ) -> Iterable[Tuple[int, Callable, serial_utils.Lens]]: + """EXPERIMENTAL(otel_tracing): Get the methods (rather the inner + functions) matching the given `func` and the path of each. + + Args: + func: The function to match. + """ + + raise NotImplementedError + + # Called after recording of an invocation. + def _on_new_root_span( + self, + ctx: _RecordingContext, + root_span: core_span.LiveSpanCall, + ) -> record_schema.Record: + """EXPERIMENTAL(otel_tracing): Called by instrumented methods if they + are root calls (first instrumented methods in a call stack). + + Args: + ctx: The context of the recording. + + root_span: The root span that was recorded. + """ + # EXPERIMENTAL(otel_tracing) + + raise NotImplementedError + + +class AppTracingCallbacks(TracingCallbacks[R, S]): + """Extension to TracingCallbacks that keep track of apps that are + instrumenting their constituent calls. + + Also inserts LiveRecordRoot spans + """ + + @classmethod + def on_callable_wrapped( + cls, + wrapper: Callable[..., R], + app: _WithInstrumentCallbacks, + **kwargs: Dict[str, Any], + ): + # Adds the requesting app to the list of apps the wrapper is + # instrumented for. + + if not python_utils.safe_hasattr(wrapper, APPS): + apps: WeakSet[_WithInstrumentCallbacks] = WeakSet() + setattr(wrapper, APPS, apps) + else: + apps = python_utils.safe_getattr(wrapper, APPS) + + apps.add(app) + + return super().on_callable_wrapped(wrapper=wrapper, **kwargs) + + def __init__( + self, + span_type: Type[core_otel.Span] = core_span.LiveSpanCall, + **kwargs: Dict[str, Any], + ): + # Do not enter the context managers in the superclass init as we need to + # add another outer one possibly depending on the below logic. + super().__init__(span_type=span_type, enter_contexts=False, **kwargs) + + # Get all of the apps that have instrumented this call. + apps = python_utils.safe_getattr(self.wrapper, APPS) + # Determine which of this apps are actually recording: + apps = {app for app in apps if app.recording_contexts.get() is not None} + + trace_root_span_context_managers: List[ContextManager] = [] + + current_span = core_trace.trulens_tracer().current_span + record_map = {} + started_apps: WeakSet[Any] = WeakSet() # Any = App + + # Logic here needs to determine whether to add new RecordRoot spans. Get + # already tracking apps/records from current (soon to be parent) span. + if current_span is None: + pass + else: + if isinstance(current_span, core_span.Span): + record_map.update(current_span.record_ids) + + if isinstance(current_span, core_span.LiveSpan): + started_apps = started_apps.union(current_span.live_apps) + + # Now for each app that instrumented the method that is not yet in + # record_ids, create a span context manager for it and add it to + # record_ids of the new created span. + + for app in set(apps).difference(started_apps): + new_record_id = types_schema.TraceRecordID.default_py() + record_map[app.app_id] = new_record_id + print( + f"{text_utils.UNICODE_CHECK} New record {new_record_id} on call to {python_utils.callable_name(self.func)} by app {app.app_name}." + ) + started_apps.add(app) + trace_root_span_context_managers.append( + core_trace.trulens_tracer().start_as_current_span( + cls=core_span.LiveRecordRoot, + name=truconv.SpanAttributes.RECORD_ROOT.SPAN_NAME_PREFIX + + app.app_name, # otel Span field + record_ids=dict(record_map), # trulens Span field + app_ids={ + app.app_id for app in started_apps + }, # trulens Span field + live_apps=WeakSet(started_apps), # LiveSpan field + live_app=ReferenceType(app), # LiveRecordRoot field + record_id=new_record_id, # LiveRecordRoot field + ) + ) + + # Importantly, add the managers for the trace root `before` the span + # managed by TracingCallbacks. This makes sure the root spans are the + # parents of the call span. The order of root spans does not matter as + # we stored them in a set in wrapper.APPS. + self.context_managers = ( + trace_root_span_context_managers + self.context_managers + ) + + # Finally enter the contexts, possibly including the ones we just added. + self._enter_contexts() + + assert self.span is not None, "Contexts not yet entered." + + # Make note of all the apps the main span is recording for and the app + # to record map. + if issubclass(span_type, core_span.Span): + self.span.record_ids = record_map + self.span.app_ids = {app.app_id for app in started_apps} + + if issubclass(span_type, core_span.LiveSpan): + self.span.live_apps = started_apps diff --git a/src/core/trulens/experimental/otel_tracing/core/trace/collector.py b/src/core/trulens/experimental/otel_tracing/core/trace/collector.py new file mode 100644 index 000000000..4c1559a3c --- /dev/null +++ b/src/core/trulens/experimental/otel_tracing/core/trace/collector.py @@ -0,0 +1,132 @@ +# ruff: noqa: E402 + +"""OTEL Compatible OTLP Collector. + +!!! Warning: + WORK ONGOING; please do not use. + +See [specification](https://opentelemetry.io/docs/specs/otlp/). See also the +other side of this connection in +[OTLPSpanExporter][opentelemetry.exporter.otlp.proto.http.trace_exporter.OTLPSpanExporter]. + +Not all of the specification is currently supported. Please update this +docstring as more of the spec is handled. + +- Ignores most of http spec including path. + +- Only proto payloads are supported. + +- No compression is supported. + +- Only spans are supported. + + +""" + +from __future__ import annotations + +import json +import logging +from pprint import pprint +import threading + +import pydantic +from trulens.experimental.otel_tracing import _feature + +_feature._FeatureSetup.assert_optionals_installed() # checks to make sure otel is installed + +import uvicorn + +logger = logging.getLogger(__name__) + + +class CollectorRequest(pydantic.BaseModel): + payload: str = "notset" + + +class CollectorResponse(pydantic.BaseModel): + status: int = 404 + + +class Collector: + """OTLP Traces Collector.""" + + @staticmethod + async def _uvicorn_handle(scope, receive, send): + """Main uvicorn handler.""" + + print("scope:") + pprint(scope) + if scope.get("type") != "http": + return + + request = await receive() + print("request:") + pprint(request) + + if request.get("type") != "http.request": + return + + headers = dict(scope.get("headers", {})) + method = scope.get("method", None) + if method != "POST": + return + + body = request.get("body", None) + if body is None: + return + content_type = headers.get(b"content-type", None) + + if content_type == b"application/json": + body = json.loads(body.decode("utf-8")) + elif content_type == b"application/x-protobuf": + from opentelemetry.proto.collector.trace.v1.trace_service_pb2 import ( + ExportTraceServiceRequest as PB2ExportTraceServiceRequest, + ) + + body = PB2ExportTraceServiceRequest().FromString(body) + + for resource_and_span in body.resource_spans: + resource = resource_and_span.resource + print("resource:") + pprint(resource) + spans = resource_and_span.scope_spans + for span in spans: + print("span:") + pprint(span) + + else: + return + + await send({ + "type": "http.response.start", + "status": 200, + "headers": [ + [b"content-type", b"application/json"], + ], + }) + + await send({ + "type": "http.response.body", + "body": CollectorResponse(status=200) + .model_dump_json() + .encode("utf-8"), + }) + + def __init__(self): + self.app = self._uvicorn_handle + self.server_thread = threading.Thread(target=self._run) + + def _run(self): + self.config = uvicorn.Config(app=self.app, port=5000) + self.server = uvicorn.Server(self.config) + import asyncio + + self.loop = asyncio.new_event_loop() + self.loop.run_until_complete(self.server.serve()) + + def start(self): + self.server_thread.start() + + def stop(self): + self.loop.close() diff --git a/src/core/trulens/experimental/otel_tracing/core/trace/context.py b/src/core/trulens/experimental/otel_tracing/core/trace/context.py new file mode 100644 index 000000000..aa9e3e98f --- /dev/null +++ b/src/core/trulens/experimental/otel_tracing/core/trace/context.py @@ -0,0 +1,173 @@ +# ruff: noqa: E402 + +""" """ + +from __future__ import annotations + +import logging +from typing import ( + TYPE_CHECKING, + ClassVar, + Dict, + Hashable, + Optional, + Union, +) + +from opentelemetry import context as context_api +from opentelemetry import trace as trace_api +from opentelemetry.trace import span as span_api +import pydantic +from trulens.core.schema import types as types_schema +from trulens.core.utils import serial as serial_utils + +if TYPE_CHECKING: + from trulens.experimental.otel_tracing.core import trace as core_trace + + +logger = logging.getLogger(__name__) + + +class TraceState(serial_utils.SerialModel, span_api.TraceState): + """[OTEL TraceState][opentelemetry.trace.TraceState] requirements. + + Adds [SerialModel][trulens.core.utils.serial.SerialModel] and therefore + [pydantic.BaseModel][pydantic.BaseModel] onto the OTEL TraceState. + """ + + # Hackish: span_api.TraceState uses _dict internally. + _dict: Dict[str, str] = pydantic.PrivateAttr(default_factory=dict) + + +class SpanContext(serial_utils.SerialModel, Hashable): + """[OTEL SpanContext][opentelemetry.trace.SpanContext] requirements. + + Adds [SerialModel][trulens.core.utils.serial.SerialModel] and therefore + [pydantic.BaseModel][pydantic.BaseModel] onto the OTEL SpanContext. + + Also adds hashing, equality, conversion, and representation methods. + """ + + model_config: ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict( + arbitrary_types_allowed=True, + use_enum_values=True, # needed for enums that do not inherit from str + ) + + def __str__(self): + return f"{self.trace_id % 0xFF:02x}/{self.span_id % 0xFF:02x}" + + def __repr__(self): + return str(self) + + def __hash__(self): + return self.trace_id + self.span_id + + def __eq__(self, other: ContextLike): + if other is None: + return False + + return self.trace_id == other.trace_id and self.span_id == other.span_id + + trace_id: types_schema.TraceID.PY_TYPE + """Unique identifier for the trace. + + Each root span has a unique trace id.""" + + span_id: types_schema.SpanID.PY_TYPE + """Identifier for the span. + + Meant to be at least unique within the same trace_id. + """ + + trace_flags: trace_api.TraceFlags = pydantic.Field( + trace_api.DEFAULT_TRACE_OPTIONS + ) + + @pydantic.field_validator("trace_flags", mode="before") + @classmethod + def _validate_trace_flags(cls, v): + """Validate trace flags. + + Pydantic does not seem to like classes that inherit from int without this. + """ + return trace_api.TraceFlags(v) + + trace_state: TraceState = pydantic.Field(default_factory=TraceState) + + is_remote: bool = False + + _tracer: core_trace.Tracer = pydantic.PrivateAttr(None) + """Reference to the tracer that produces this SpanContext.""" + + @property + def tracer(self) -> core_trace.Tracer: + """Tracer that produced this SpanContext.""" + return self._tracer + + @staticmethod + def of_contextlike( + context: ContextLike, tracer: Optional[core_trace.Tracer] = None + ) -> SpanContext: + """Convert several types that convey span/contxt identifiers into the + common SpanContext type.""" + + if isinstance(context, SpanContext): + if tracer is not None: + context._tracer = tracer + + return context + + if isinstance(context, span_api.SpanContext): + # otel api SpanContext; doesn't have hashing and other things we need. + return SpanContext( + trace_id=context.trace_id, + span_id=context.span_id, + is_remote=context.is_remote, + _tracer=tracer, + ) + if isinstance(context, context_api.Context): + # Context dict from OTEL. + + if len(context) == 1: + span_encoding = next(iter(context.values())) + + return SpanContext( + trace_id=types_schema.TraceID.py_of_otel( + span_encoding.trace_id + ), + span_id=types_schema.SpanID.py_of_otel( + span_encoding.span_id + ), + _tracer=tracer, + ) + else: + raise ValueError( + f"Unrecognized context dict from OTEL: {context}" + ) + if isinstance(context, dict): + # Json encoding of SpanContext, i.e. output of + # SpanContext.model_dump . + + context["_tracer"] = tracer + return SpanContext.model_validate(context) + + raise ValueError(f"Unrecognized span context type: {context}") + + def __init__(self, **kwargs): + super().__init__(**kwargs) + + for k, v in kwargs.items(): + if v is None: + continue + # pydantic does not set private attributes in init + if k.startswith("_") and hasattr(self, k): + setattr(self, k, v) + + +ContextLike = Union[ + SpanContext, span_api.SpanContext, context_api.Context, serial_utils.JSON +] +"""SpanContext types we need to deal with. + +These may be the non-hashable ones coming from OTEL, the hashable ones we +defined above, or their JSON representations.""" diff --git a/src/core/trulens/experimental/otel_tracing/core/trace/export.py b/src/core/trulens/experimental/otel_tracing/core/trace/export.py new file mode 100644 index 000000000..c221be41a --- /dev/null +++ b/src/core/trulens/experimental/otel_tracing/core/trace/export.py @@ -0,0 +1,214 @@ +# ruff: noqa: E402 + +""" """ + +from __future__ import annotations + +import inspect +import logging +from typing import ( + Any, + Callable, + Dict, + List, + Optional, +) + +from trulens.core.schema import base as base_schema +from trulens.core.schema import record as record_schema +from trulens.core.utils import json as json_utils +from trulens.core.utils import pyschema as pyschema_utils +from trulens.core.utils import serial as serial_utils +from trulens.experimental.otel_tracing.core.trace import otel as core_otel +from trulens.experimental.otel_tracing.core.trace import sem as core_sem +from trulens.experimental.otel_tracing.core.trace import span as core_span + +logger = logging.getLogger(__name__) + + +def _fill_stacks( + span: core_span.Span, + get_method_path: Callable, + span_stacks: Dict[core_otel.Span, List[record_schema.RecordAppCallMethod]], + stack: Optional[List[record_schema.RecordAppCallMethod]] = None, +): + """Populate span_stacks with a mapping of span to call stack for + backwards compatibility with records. + + Args: + span: Span to start from. + + get_method_path: Function that looks up lens of a given + obj/function. This is an WithAppCallbacks method. + + span_stacks: Mapping of span to call stack. This will be modified by + this method. + + stack: Current call stack. Recursive calls will build this up. + """ + if stack is None: + stack = [] + + if isinstance(span, core_span.LiveSpanCall): + if span.live_func is None: + print(span.attributes) + raise ValueError(f"Span {span} has no function.") + + path = get_method_path(obj=span.live_obj, func=span.live_func) + + if path is None: + logger.warning( + "No path found for %s in %s.", span.live_func, span.live_obj + ) + path = serial_utils.Lens().static + + if inspect.ismethod(span.live_func): + # This is a method. + frame_ident = record_schema.RecordAppCallMethod( + path=path, + method=pyschema_utils.Method.of_method( + span.live_func, obj=span.live_obj, cls=span.live_cls + ), + ) + elif inspect.isfunction(span.live_func): + # This is a function, not a method. + frame_ident = record_schema.RecordAppCallMethod( + path=path, + method=None, + function=pyschema_utils.Function.of_function(span.live_func), + ) + else: + raise ValueError(f"Unexpected function type: {span.live_func}") + + stack = stack + [frame_ident] + span_stacks[span] = stack + + for subspan in span.iter_children(transitive=False): + _fill_stacks( + subspan, + stack=stack, + get_method_path=get_method_path, + span_stacks=span_stacks, + ) + + +def _call_of_spancall( + span: core_span.LiveSpanCall, + stack: List[record_schema.RecordAppCallMethod], +) -> record_schema.RecordAppCall: + """Convert a LiveSpanCall to a RecordAppCall.""" + + args = ( + dict(span.live_bound_arguments.arguments) + if span.live_bound_arguments is not None + else {} + ) + if "self" in args: + del args["self"] # remove self + + assert span.start_timestamp is not None + if span.end_timestamp is None: + logger.warning( + "Span %s has no end timestamp. It might not have yet finished recording.", + span, + ) + + return record_schema.RecordAppCall( + call_id=str(span.call_id), + stack=stack, + args={k: json_utils.jsonify(v) for k, v in args.items()}, + rets=json_utils.jsonify(span.live_ret), + error=str(span.live_error), + perf=base_schema.Perf( + start_time=span.start_timestamp, + end_time=span.end_timestamp, + ), + pid=span.process_id, + tid=span.thread_id, + ) + + +def record_of_root_span( + recording: Any, root_span: core_span.LiveRecordRoot +) -> record_schema.Record: + """Convert a root span to a record. + + This span has to be a call span so we can extract things like main input and output. + """ + + assert isinstance(root_span, core_span.LiveRecordRoot), type(root_span) + + app = recording.app + + # Use the record_id created during tracing. + record_id = root_span.record_id + + span_stacks: Dict[ + core_otel.Span, List[record_schema.RecordAppCallMethod] + ] = {} + + _fill_stacks( + root_span, + span_stacks=span_stacks, + get_method_path=app.get_method_path, + ) + + if root_span.end_timestamp is None: + raise RuntimeError(f"Root span has not finished recording: {root_span}") + + root_perf = base_schema.Perf( + start_time=root_span.start_timestamp, + end_time=root_span.end_timestamp, + ) + + total_cost = root_span.cost_tally() + + calls = [] + spans = [core_sem.TypedSpan.semanticize(root_span)] + + root_call_span = None + for span in root_span.iter_children(): + if isinstance(span, core_span.LiveSpanCall): + calls.append(_call_of_spancall(span, stack=span_stacks[span])) + root_call_span = root_call_span or span + + spans.append(core_sem.TypedSpan.semanticize(span)) + + if root_call_span is None: + raise ValueError("No call span found under trace root span.") + + bound_arguments = root_call_span.live_bound_arguments + main_error = root_call_span.live_error + + if bound_arguments is not None: + main_input = app.main_input( + func=root_call_span.live_func, + sig=root_call_span.live_sig, + bindings=root_call_span.live_bound_arguments, + ) + if main_error is None: + main_output = app.main_output( + func=root_call_span.live_func, + sig=root_call_span.live_sig, + bindings=root_call_span.live_bound_arguments, + ret=root_call_span.live_ret, + ) + else: + main_output = None + else: + main_input = None + main_output = None + + record = record_schema.Record( + record_id=record_id, + app_id=app.app_id, + main_input=json_utils.jsonify(main_input), + main_output=json_utils.jsonify(main_output), + main_error=json_utils.jsonify(main_error), + calls=calls, + perf=root_perf, + cost=total_cost, + experimental_otel_spans=spans, + ) + + return record diff --git a/src/core/trulens/experimental/otel_tracing/core/trace/otel.py b/src/core/trulens/experimental/otel_tracing/core/trace/otel.py new file mode 100644 index 000000000..24ea0bca5 --- /dev/null +++ b/src/core/trulens/experimental/otel_tracing/core/trace/otel.py @@ -0,0 +1,751 @@ +# ruff: noqa: E402 + +"""OTEL Compatibility Classes + +This module contains classes to support interacting with the OTEL ecosystem. +Additions on top of these meant for TruLens uses outside of OTEL compatibility +are found in `span.py` and `trace.py`. +""" + +from __future__ import annotations + +import contextlib +import contextvars +import logging +from types import TracebackType +from typing import ( + Any, + Callable, + Dict, + Hashable, + List, + Literal, + Optional, + Tuple, + Type, + TypeVar, + Union, +) + +from opentelemetry import trace as trace_api +from opentelemetry.sdk import resources as resources_sdk +from opentelemetry.sdk import trace as trace_sdk +from opentelemetry.trace import span as span_api +from opentelemetry.util import types as types_api +import pydantic +from trulens.core.schema import types as types_schema +from trulens.core.utils import pyschema as pyschema_utils +from trulens.core.utils import python as python_utils +from trulens.core.utils import serial as serial_utils +from trulens.core.utils import text as text_utils +from trulens.experimental.otel_tracing.core.trace import context as core_context + +logger = logging.getLogger(__name__) + +# Type alises + +A = TypeVar("A") +B = TypeVar("B") + + +class Span( + pyschema_utils.WithClassInfo, + serial_utils.SerialModel, + trace_api.Span, + Hashable, +): + """[OTEL Span][opentelemetry.trace.Span] requirements. + + See also [OpenTelemetry + Span](https://opentelemetry.io/docs/specs/otel/trace/api/#span) and + [OpenTelemetry Span + specification](https://github.com/open-telemetry/opentelemetry-specification/blob/main/specification/trace/api.md). + + Adds more features on top of basic OTEL API span requirements: + + - Hashable. + + - pydantic.BaseModel for validation and (de)serialization. + + - Async context manager requirements (__aenter__, __aexit__). + + - Conversions to OTEL ReadableSpan (methods starting with "otel_"). + """ + + model_config = pydantic.ConfigDict( + arbitrary_types_allowed=True, + use_enum_values=True, # model_validate will fail without this + ) + + name: Optional[str] = None + + kind: trace_api.SpanKind = trace_api.SpanKind.INTERNAL + + @pydantic.field_validator("kind") + @classmethod + def _validate_kind(cls, v): + return trace_api.SpanKind(v) + + context: core_context.SpanContext + + parent: Optional[core_context.SpanContext] = None + + status: trace_api.status.StatusCode = trace_api.status.StatusCode.UNSET + + @pydantic.field_validator("status") + @classmethod + def _validate_status(cls, v): + return trace_api.status.StatusCode(v) + + status_description: Optional[str] = None + + events: List[ + Tuple[str, trace_api.types.Attributes, types_schema.Timestamp.PY_TYPE] + ] = pydantic.Field(default_factory=list) + links: trace_api._Links = pydantic.Field(default_factory=lambda: []) + + # attributes: trace_api.types.Attributes = pydantic.Field(default_factory=dict) + attributes: Dict = pydantic.Field(default_factory=dict) + + start_timestamp: types_schema.Timestamp.PY_TYPE = pydantic.Field( + default_factory=types_schema.Timestamp.default_py + ) + + end_timestamp: Optional[types_schema.Timestamp.PY_TYPE] = None + + _record_exception: bool = pydantic.PrivateAttr(True) + _set_status_on_exception: bool = pydantic.PrivateAttr(True) + _end_on_exit: bool = pydantic.PrivateAttr(True) + + _tracer: Tracer = pydantic.PrivateAttr(None) + """NON-STANDARD: The Tracer that produced this span.""" + + @property + def tracer(self) -> Tracer: + """The tracer that produced this span.""" + return self._tracer + + def __hash__(self): + return hash(self.context) + + def __init__(self, **kwargs): + if kwargs.get("start_timestamp") is None: + kwargs["start_timestamp"] = types_schema.Timestamp.default_py() + + super().__init__(**kwargs) + + for k, v in kwargs.items(): + if v is None: + continue + # pydantic does not set private attributes in init + if k.startswith("_") and hasattr(self, k): + setattr(self, k, v) + + def update_name(self, name: str) -> None: + """See [OTEL update_name][opentelemetry.trace.span.Span.update_name].""" + + self.name = name + + def get_span_context(self) -> span_api.SpanContext: + """See [OTEL get_span_context][opentelemetry.trace.span.Span.get_span_context].""" + + return self.context + + def set_status( + self, + status: Union[span_api.Status, span_api.StatusCode], + description: Optional[str] = None, + ) -> None: + """See [OTEL set_status][opentelemetry.trace.span.Span.set_status].""" + + if isinstance(status, span_api.Status): + if description is not None: + raise ValueError( + "Ambiguous status description provided both in `status.description`" + " and in `description`." + ) + + assert isinstance(status.status_code, span_api.StatusCode), ( + f"Invalid status code {status.status_code} of type " + f"{type(status.status_code)}." + ) + + self.status = span_api.StatusCode(status.status_code) + self.status_description = status.description + + elif isinstance(status, span_api.StatusCode): + self.status = span_api.StatusCode(status) + self.status_description = description + + else: + raise ValueError(f"Invalid status {status} or type {type(status)}.") + + def add_event( + self, + name: str, + attributes: types_api.Attributes = None, + timestamp: Optional[types_schema.Timestamp.OTEL_TYPE] = None, + ) -> None: + """See [OTEL add_event][opentelemetry.trace.span.Span.add_event]. + + !!! Warning: + As this is an OTEL requirement, we accept expected OTEL types + instead of the ones we actually use in our classes. + """ + + self.events.append(( + name, + attributes, + types_schema.Timestamp.py_of_otel(timestamp) + if timestamp is not None + else types_schema.Timestamp.default_py(), + )) + + def add_link( + self, + context: span_api.SpanContext, + attributes: types_api.Attributes = None, + ) -> None: + """See [OTEL add_link][opentelemetry.trace.span.Span.add_link].""" + + if attributes is None: + attributes = {} + + self.links[context] = attributes + + def is_recording(self) -> bool: + """See [OTEL + is_recording][opentelemetry.trace.span.Span.is_recording].""" + + return self.status == trace_api.status.StatusCode.UNSET + + def set_attributes( + self, attributes: Dict[str, types_api.AttributeValue] + ) -> None: + """See [OTEL + set_attributes][opentelemetry.trace.span.Span.set_attributes].""" + + for key, value in attributes.items(): + self.set_attribute(key, value) + + def set_attribute(self, key: str, value: types_api.AttributeValue) -> None: + """See [OTEL + set_attribute][opentelemetry.trace.span.Span.set_attribute].""" + + self.attributes[key] = value + + def record_exception( + self, + exception: BaseException, + attributes: types_api.Attributes = None, + timestamp: Optional[types_schema.Timestamp.UNION_TYPE] = None, + escaped: bool = False, # purpose unknown + ) -> None: + """See [OTEL + record_exception][opentelemetry.trace.span.Span.record_exception]. + + !!! Warning: + As this is an OTEL requirement, we accept expected OTEL types in + args. + """ + + if exception is None: + raise RuntimeError("Exception must be provided.") + + # TODO: what to do here other than record the exception? + # print(f"Encountered exception {type(exception)} in span {self}:") + # print(exception) + # traceback.print_exception(type(exception), exception, exception.__traceback__) + + if self._set_status_on_exception: + self.set_status( + trace_api.status.Status(trace_api.status.StatusCode.ERROR) + ) + + if self._record_exception: + if attributes is None: + attributes = {} + + attributes["exc_type"] = python_utils.class_name(type(exception)) + attributes["exc_val"] = str(exception) + if exception.__traceback__ is not None: + attributes["code_line"] = python_utils.code_line( + exception.__traceback__.tb_frame, show_source=True + ) + + self.add_event("trulens.exception", attributes, timestamp) + + def end(self, end_time: Optional[types_schema.Timestamp.UNION_TYPE] = None): + """See [OTEL end][opentelemetry.trace.span.Span.end]. + + !!! Warning: + As this is an OTEL requirement, we accept expected OTEL types in + args. + """ + + if end_time is None: + self.end_timestamp = types_schema.Timestamp.default_py() + else: + self.end_timestamp = types_schema.Timestamp.py(end_time) + + if self.is_recording(): + self.set_status( + trace_api.status.Status(trace_api.status.StatusCode.OK) + ) + + # context manager requirement + def __enter__(self) -> Span: + """See [OTEL __enter__][opentelemetry.trace.span.Span.__enter__].""" + + # Span can be used as a context manager to automatically handle ending + # and exception recording. + + return self + + # context manager requirement + def __exit__( + self, + exc_type: Optional[BaseException], + exc_val: Optional[BaseException], + exc_tb: Optional[TracebackType], + ) -> Literal[False]: + """See [OTEL __exit__][opentelemetry.trace.span.Span.__exit__].""" + + if exc_val is not None: + self.record_exception(exception=exc_val) + + if self._end_on_exit: + self.end() + + return False # don't suppress exceptions + + # async context manager requirement + async def __aenter__(self) -> Span: + return self.__enter__() + + # async context manager requirement + async def __aexit__( + self, + exc_type: Optional[BaseException], + exc_val: Optional[BaseException], + exc_tb: Optional[TracebackType], + ) -> Literal[False]: + return self.__exit__(exc_type, exc_val, exc_tb) + + # Rest of these methods are for exporting spans to ReadableSpan. All are not + # standard OTEL but values for OTEL ReadableSpan. + + @staticmethod + def otel_context_of_context( + context: core_context.SpanContext, + ) -> trace_api.SpanContext: + return trace_api.SpanContext( + trace_id=types_schema.TraceID.otel_of_py(context.trace_id), + span_id=types_schema.SpanID.otel_of_py(context.span_id), + is_remote=False, + ) + + def otel_context(self) -> types_api.SpanContext: + return self.otel_context_of_context(self.context) + + def otel_parent_context(self) -> Optional[types_api.SpanContext]: + if self.parent is None: + return None + + return self.otel_context_of_context(self.parent) + + def otel_attributes(self) -> types_api.Attributes: + return types_schema.flatten_lensed_attributes(self.attributes) + + def otel_kind(self) -> types_api.SpanKind: + return trace_api.SpanKind.INTERNAL + + def otel_status(self) -> trace_api.status.Status: + return trace_api.status.Status(self.status, self.status_description) + + def otel_resource_attributes(self) -> Dict[str, Any]: + # TODO(SNOW-1711959) + return { + "service.namespace": "trulens", + } + + def otel_resource(self) -> resources_sdk.Resource: + return resources_sdk.Resource( + attributes=self.otel_resource_attributes() + ) + + def otel_events(self) -> List[types_api.Event]: + return [ + (a, b, types_schema.Timestamp.otel_of_py(c)) + for (a, b, c) in self.events + ] + + def otel_links(self) -> List[types_api.Link]: + return self.links + + def otel_start_timestamp(self) -> types_schema.Timestamp.OTEL_TYPE: + return types_schema.Timestamp.otel_of_py(self.start_timestamp) + + def otel_end_timestamp(self) -> Optional[types_schema.Timestamp.OTEL_TYPE]: + if self.end_timestamp is None: + return None + return types_schema.Timestamp.otel_of_py(self.end_timestamp) + + def was_exported_to( + self, to: Hashable, mark_exported: bool = False + ) -> bool: + ret = to in self.exported_to + if mark_exported: + self.exported_to.add(to) + return ret + + def otel_freeze( + self, + ) -> trace_sdk.ReadableSpan: + """Convert span to an OTEL compatible span for exporting to OTEL collectors.""" + + return trace_sdk.ReadableSpan( + name=self.name, + context=self.otel_context(), + parent=self.otel_parent_context(), + resource=self.otel_resource(), + attributes=self.otel_attributes(), + events=self.otel_events(), + links=self.otel_links(), + kind=self.otel_kind(), + instrumentation_info=None, # TODO(SNOW-1711959) + status=self.otel_status(), + start_time=self.otel_start_timestamp(), + end_time=self.otel_end_timestamp(), + instrumentation_scope=None, # TODO(SNOW-1711959) + ) + + +def _default_context_factory( + name: str, +) -> Callable[[], contextvars.ContextVar[core_context.SpanContext]]: + """Create a default span context contextvar factory. + + Includes the given name in the contextvar name. The default context is a + non-recording context. + """ + + def create(): + return contextvars.ContextVar( + f"context_{name}_{python_utils.context_id()}", + default=core_context.SpanContext( + trace_id=types_schema.TraceID.INVALID_OTEL, + span_id=types_schema.SpanID.INVALID_OTEL, + ), + ) + + return create + + +class Tracer(serial_utils.SerialModel, trace_api.Tracer): + """[OTEL Tracer][opentelemetry.trace.Tracer] requirements.""" + + model_config = pydantic.ConfigDict(arbitrary_types_allowed=True) + + _instrumenting_module_name: Optional[str] = pydantic.PrivateAttr(None) + """Name of the library/module that is instrumenting the code.""" + + _instrumenting_library_version: Optional[str] = pydantic.PrivateAttr(None) + """Version of the library that is instrumenting the code.""" + + _attributes: Optional[trace_api.types.Attributes] = pydantic.PrivateAttr( + None + ) + """Common attributes to add to all spans.""" + + _schema_url: Optional[str] = pydantic.PrivateAttr(None) + """Use unknown.""" + + _tracer_provider: TracerProvider = pydantic.PrivateAttr(None) + """NON-STANDARD: The TracerProvider that made this tracer.""" + + _span_class: Type[Span] = pydantic.PrivateAttr(Span) + """NON-STANDARD: The default span class to use when creating spans.""" + + def __str__(self): + return ( + type(self).__name__ + + " " + + (self._instrumenting_module_name or "") + + " " + + (self._instrumenting_library_version or "") + ) + + def __repr__(self): + return str(self) + + def __init__( + self, + _span_context_cvar: contextvars.ContextVar[core_context.SpanContext], + **kwargs, + ): + super().__init__(**kwargs) + + for k, v in kwargs.items(): + if v is None: + continue + # pydantic does not set private attributes in init + if k.startswith("_") and hasattr(self, k): + setattr(self, k, v) + + self._span_context_cvar = _span_context_cvar + + _span_context_cvar: contextvars.ContextVar[core_context.SpanContext] = ( + pydantic.PrivateAttr(default_factory=_default_context_factory("Tracer")) + ) + + @property + def current_span_context(self) -> core_context.SpanContext: + return self._span_context_cvar.get() + + def current_span_id(self) -> types_schema.SpanID.PY_TYPE: + return self.current_span_context.span_id + + def current_trace_id(self) -> types_schema.TraceID.PY_TYPE: + return self.current_span_context.trace_id + + def start_span( + self, + name: Optional[str] = None, + context: Optional[core_context.ContextLike] = None, + kind: trace_api.SpanKind = trace_api.SpanKind.INTERNAL, + attributes: trace_api.types.Attributes = None, + links: trace_api._Links = None, + start_time: Optional[types_schema.Timestamp.UNION_TYPE] = None, + record_exception: bool = True, + set_status_on_exception: bool = True, + cls: Optional[Type[Span]] = None, # non-standard + end_on_exit: bool = True, # non-standard + **kwargs, # non-standard + ) -> Span: + """See [OTEL + Tracer.start_span][opentelemetry.trace.Tracer.start_span]. + + !!! Warning: + As this is an OTEL requirement, we accept expected OTEL types in + args. + + Args: + cls: Class of span to create. Defaults to the class set in the + tracer. + + trace_id: Trace id to use. Defaults to the current trace id. + + *args: Additional arguments to pass to the span. + + **kwargs: Additional keyword arguments to pass to the span. + """ + + if ( + context is None + or ( + parent_context := core_context.SpanContext.of_spancontextlike( + context + ) + ) + is None + ): + parent_context = self.current_span_context + + new_context = core_context.SpanContext( + trace_id=parent_context.trace_id, + span_id=types_schema.SpanID.rand_otel(), + _tracer=self, + ) + + if name is None: + name = python_utils.class_name(self._span_class) + + if attributes is None: + attributes = {} + + if self._attributes is not None: + attributes.update(self._attributes) + + if cls is None: + cls = self._span_class + + new_span = cls( + name=name, + context=new_context, + parent=parent_context, + kind=kind, + attributes=attributes, + links=links, + start_timestamp=types_schema.Timestamp.py(start_time) + if start_time + else None, + _record_exception=record_exception, + _set_status_on_exception=set_status_on_exception, + _end_on_exit=end_on_exit, + _tracer=self, + **kwargs, + ) + + return new_span + + @contextlib.contextmanager + def start_as_current_span( + self, + name: Optional[str] = None, + context: Optional[core_context.ContextLike] = None, + kind: trace_api.SpanKind = trace_api.SpanKind.INTERNAL, + attributes: trace_api.attributes = None, + links: trace_api._Links = None, + start_time: Optional[types_schema.Timestamp.UNION_TYPE] = None, + record_exception: bool = True, + set_status_on_exception: bool = True, + end_on_exit: bool = True, + cls: Optional[Type[Span]] = None, # non-standard + **kwargs, # non-standard + ): + """See [OTEL + Tracer.start_as_current_span][opentelemetry.trace.Tracer.start_as_current_span]. + + !!! Warning: + As this is an OTEL requirement, we accept expected OTEL types in + args. + """ + + # TODO: Make this agnostic context manager to match OTEL. + # TODO: Make this usable as a decorator to match OTEL. + + # Create a new span. Context controls its ending and recording of exception. + with self.start_span( + name=name, + context=context, + kind=kind, + attributes=attributes, + links=links, + start_time=start_time, + record_exception=record_exception, + set_status_on_exception=set_status_on_exception, + cls=cls, + end_on_exit=end_on_exit, + **kwargs, + ) as span: + # Set current span context to that of the new span. + with python_utils.with_context({ + self._span_context_cvar: span.context + }): + try: + yield span + finally: + pass + + @contextlib.asynccontextmanager + async def astart_as_current_span( + self, + name: Optional[str] = None, + context: Optional[trace_api.context.Context] = None, + kind: trace_api.SpanKind = trace_api.SpanKind.INTERNAL, + attributes: trace_api.attributes = None, + links: trace_api._Links = None, + start_time: Optional[types_schema.Timestamp.UNION_TYPE] = None, + record_exception: bool = True, + set_status_on_exception: bool = True, + end_on_exit: bool = True, + cls: Optional[Type[Span]] = None, # non-standard + **kwargs, # non-standard + ): + """Not otel standard but mimics the sync version. + + In OTEL, `start_as_current_span` works both for sync and async. + """ + + # Create a new span. + async with self.start_span( + name=name, + context=context, + kind=kind, + attributes=attributes, + links=links, + start_time=start_time, + record_exception=record_exception, + set_status_on_exception=set_status_on_exception, + cls=cls, + end_on_exit=end_on_exit, + **kwargs, + ) as span: + # Set current span context to that of the new span. + async with python_utils.awith_context({ + self._span_context_cvar: span.context + }): + try: + yield span + finally: + pass + + +class TracerProvider(serial_utils.SerialModel, trace_api.TracerProvider): + """[OTEL TracerProvider][opentelemetry.trace.TracerProvider] + requirements.""" + + model_config = pydantic.ConfigDict(arbitrary_types_allowed=True) + + _tracer_class: Type[Tracer] = pydantic.PrivateAttr(Tracer) + """NON-STANDARD: The default tracer class to use when creating tracers.""" + + _span_context_cvar: contextvars.ContextVar[core_context.SpanContext] = ( + pydantic.PrivateAttr( + default_factory=_default_context_factory("TracerProvider") + ) + ) + + @property + def current_span_context(self) -> core_context.SpanContext: + """NON-STANDARD: The current span context.""" + + return self._span_context_cvar.get() + + @property + def current_trace_id(self) -> types_schema.TraceID.PY_TYPE: + """NON-STANDARD: The current trace id.""" + + return self.current_span_context.trace_id + + @property + def current_span_id(self) -> types_schema.SpanID.PY_TYPE: + """NON-STANDARD: The current span id.""" + + return self.current_span_context.span_id + + def __init__(self): + super().__init__() + + self._span_context_cvar.set( + core_context.SpanContext( + span_id=types_schema.SpanID.rand_otel(), + trace_id=types_schema.TraceID.rand_otel(), + ) + ) + + print( + f"{text_utils.UNICODE_SQUID} TruLens root context={self._span_context_cvar.get()}" + ) + + def get_tracer( + self, + instrumenting_module_name: str, + instrumenting_library_version: Optional[str] = None, + schema_url: Optional[str] = None, + attributes: Optional[types_api.Attributes] = None, + ): + """See [OTEL + TracerProvider.get_tracer][opentelemetry.trace.TracerProvider.get_tracer].""" + + tracer = self._tracer_class( + _instrumenting_module_name=instrumenting_module_name, + _instrumenting_library_version=instrumenting_library_version, + _attributes=attributes, + _schema_url=schema_url, + _tracer_provider=self, + _span_context_cvar=self._span_context_cvar, + ) + + return tracer diff --git a/src/core/trulens/experimental/otel_tracing/core/trace/sem.py b/src/core/trulens/experimental/otel_tracing/core/trace/sem.py new file mode 100644 index 000000000..180bc2643 --- /dev/null +++ b/src/core/trulens/experimental/otel_tracing/core/trace/sem.py @@ -0,0 +1,657 @@ +"""TypedSpan organization and semantization. + +The Span subclass and subsubclasses defined here are the only ones we put into the database. +""" + +from __future__ import annotations + +import functools +import inspect +from logging import getLogger +from typing import ( + ClassVar, + Dict, + List, + Optional, + Set, + Tuple, + Type, + TypeVar, +) +import uuid + +import pydantic +from trulens.core.schema import base as base_schema +from trulens.core.schema import types as types_schema +from trulens.core.utils import json as json_utils +from trulens.core.utils import pyschema as pyschema_utils +from trulens.core.utils import serial as serial_utils +from trulens.experimental.otel_tracing.core.trace import span as core_span +from trulens.otel.semconv import trace as truconv + +logger = getLogger(__name__) + +T = TypeVar("T") + + +@functools.lru_cache +def _get_combo_class(classes: Tuple[Type[TypedSpan]]) -> Type[TypedSpan]: + """Get the class that corresponds to the combination of classes in the + input set. + + Args: + classes: The set of classes to combine. + + Returns: + The class that corresponds to the combination of the input classes. + """ + + class _Combo(*classes): + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + self.span_types = set(s.span_type for s in classes) + + # for cls in classes: + # cls._init_attribute_properties(self, kwargs) + + _Combo.__name__ = "_".join(cls.__name__ for cls in classes) + _Combo.__qualname__ = "_".join(cls.__qualname__ for cls in classes) + + return _Combo + + +def get_combo_class(classes: Set[Type[TypedSpan]]) -> Type[TypedSpan]: + """Get the class that corresponds to the combination of classes in the + input set. + + Also populates the span_types field. + + Args: + classes: The set of classes to combine. + + Returns: + The class that corresponds to the combination of the input classes. + """ + + classes = tuple(classes) + + if len(classes) == 1: + return classes[0] + + classes = tuple(sorted(classes, key=lambda cls: cls.__qualname__)) + + return _get_combo_class(classes) + + +class TypedSpan(core_span.Span): + """A span with a type.""" + + span_type: ClassVar[Optional[truconv.SpanAttributes.SpanType]] = None + """Mixin type for each subclass.""" + + @classmethod + def mixin_new(cls, d: Optional[Dict] = None, **kwargs) -> TypedSpan: + """Given a jsonized version of a typed span that may be of multiple + types, initialize the appropriate classes with the provided data.""" + + if d is None: + d = {} + + d.update(kwargs) + + # if (record_ids := d.get("record_ids", None)) is not None: + # Some span types have record_id field which gets stored in + # record_ids in the db. + # if len(record_ids) == 1: + # d["record_id"] = next(iter(record_ids.values())) + + types = d.pop("span_types", []) + + classes = {TYPE_TO_CLASS_MAP[t] for t in types} + + combo_class = get_combo_class(classes) + + return combo_class(**d) + + @staticmethod + def semanticize(span: core_span.Span) -> TypedSpan: + class_args = { + "context": span.context, + "parent": span.parent, + "name": span.name, + "start_timestamp": span.start_timestamp, + "end_timestamp": span.end_timestamp, + "attributes": span.attributes, + "status": span.status, + "status_description": span.status_description, + "links": span.links, + "events": span.events, + } + classes = set() + + if isinstance(span, core_span.Span): + classes.add( + Record + ) # everything that comes from trulens tracer is a span under an app right now + class_args["record_ids"] = span.record_ids + class_args["app_ids"] = span.app_ids + + if span.record_ids is None: + raise RuntimeError("Span has no record_ids.") + + if isinstance(span, core_span.RecordingContextSpan): + classes.add(Recording) + + app = span.live_app() + if app is None: + logger.warning( + "App in %s got garbage collected before serialization.", + span, + ) + else: + class_args.update(dict(app_id=app.app_id)) + + if isinstance(span, core_span.LiveSpan): + # already covered everything in the Span case. The other live objects are not relevant. + pass + + if isinstance(span, core_span.LiveSpanCall): + classes.add(Call) + + class_args.update( + dict( + call_id=span.call_id, + signature=pyschema_utils.Signature.of_signature( + span.live_sig + ), + function=pyschema_utils.FunctionOrMethod.of_callable( + span.live_func + ), + process_id=span.process_id, + thread_id=span.thread_id, + bound_arguments=pyschema_utils.BoundArguments.of_bound_arguments( + span.live_bound_arguments + ) + if span.live_bound_arguments is not None + else None, + ret=json_utils.jsonify(span.live_ret), + call_error=json_utils.jsonify(span.live_error), + ) + ) + + if isinstance(span, core_span.WithCost): + classes.add(Cost) + + class_args["cost"] = span.cost + + if isinstance(span, core_span.LiveRecordRoot): + classes.add(RecordRoot) + + class_args.update(dict(record_id=span.record_id)) + + app = span.live_app() + + if app is None: + logger.warning( + "App in %s got garbage collected before serialization.", + span, + ) + else: + # Get the main method call so we can get the main input/output/error. + + main_span = span.first_child( + matching=lambda s: isinstance(s, core_span.LiveSpanCall) + ) + + total_cost = span.cost_tally() + + if main_span is None: + logger.warning( + "No main span found for record %s in %s.", + span.record_id, + span, + ) + + else: + main_input = app.main_input( + func=main_span.live_func, + sig=main_span.live_sig, + bindings=main_span.live_bound_arguments, + ) + main_output = app.main_output( + func=main_span.live_func, + sig=main_span.live_sig, + bindings=main_span.live_bound_arguments, + ret=main_span.live_ret, + ) + main_error = json_utils.jsonify(main_span.live_error) + + class_args.update( + dict( + main_input=main_input, + main_output=main_output, + main_error=main_error, + ) + ) + + class_args.update( + dict( + app_id=app.app_id, + app_name=app.app_name, + app_version=app.app_version, + total_cost=total_cost, + ) + ) + + # TODO: + # classes.add(Semantic) + + if len(classes) == 0: + logger.warning("No types found for span %s.", span) + classes.add(Unknown) + + cls = get_combo_class(classes) + + instance = cls(**class_args) + + return instance + + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + + # This is meant to be overwritten: + self.span_types = set([self.__class__.span_type]) + + span_types = core_span.Span.attribute_property( + truconv.SpanAttributes.SPAN_TYPES, + Set[truconv.SpanAttributes.SpanType], + default_factory=set, + ) + """A span can be of multiple categories.""" + + +class Recording(TypedSpan): + """A TruLens recording context span type.""" + + span_type: ClassVar[truconv.SpanAttributes.SpanType] = ( + truconv.SpanAttributes.SpanType.RECORDING + ) + + app_id = core_span.Span.attribute_property( + truconv.SpanAttributes.RECORDING.APP_ID, types_schema.AppID + ) + + +class Unknown(TypedSpan): + """An unknown span type.""" + + span_type: ClassVar[truconv.SpanAttributes.SpanType] = ( + truconv.SpanAttributes.SpanType.UNKNOWN + ) + + +class EvalRoot(TypedSpan): + """Root of feedback function evaluation.""" + + span_type: ClassVar[truconv.SpanAttributes.SpanType] = ( + truconv.SpanAttributes.SpanType.EVAL_ROOT + ) + + # feedback result fields + + +class RecordRoot(TypedSpan): + """Root of a record.""" + + span_type: ClassVar[truconv.SpanAttributes.SpanType] = ( + truconv.SpanAttributes.SpanType.RECORD_ROOT + ) + + record_id = core_span.Span.attribute_property( + truconv.SpanAttributes.RECORD_ROOT.RECORD_ID, types_schema.RecordID + ) + + # TODEP: + # perf = core_span.Span.attribute_property("record.perf", base_schema.Perf) + # TODEP: + # ts = core_span.Span.attribute_property("record.ts", datetime.datetime) + # tags = core_span.Span.attribute_property("record.tags", str) + # meta = core_span.Span.attribute_property("record.meta", serial_utils.JSON) + + app_id = core_span.Span.attribute_property( + truconv.SpanAttributes.RECORD_ROOT.APP_ID, types_schema.AppID + ) + app_name = core_span.Span.attribute_property( + truconv.SpanAttributes.RECORD_ROOT.APP_NAME, str + ) + app_version = core_span.Span.attribute_property( + truconv.SpanAttributes.RECORD_ROOT.APP_VERSION, str + ) + + total_cost = core_span.Span.attribute_property( + truconv.SpanAttributes.RECORD_ROOT.TOTAL_COST, base_schema.Cost + ) + + main_input = core_span.Span.attribute_property( + truconv.SpanAttributes.RECORD_ROOT.MAIN_INPUT, serial_utils.JSON + ) + + main_output = core_span.Span.attribute_property( + truconv.SpanAttributes.RECORD_ROOT.MAIN_OUTPUT, serial_utils.JSON + ) + + main_error = core_span.Span.attribute_property( + truconv.SpanAttributes.RECORD_ROOT.MAIN_ERROR, serial_utils.JSON + ) + + +class Semantic(TypedSpan): + """A normal span that is not unknown.""" + + span_type: ClassVar[truconv.SpanAttributes.SpanType] = ( + truconv.SpanAttributes.SpanType.SEMANTIC + ) + + +class Cost(TypedSpan): + """A span that corresponds to a cost.""" + + span_type: ClassVar[truconv.SpanAttributes.SpanType] = ( + truconv.SpanAttributes.SpanType.COST + ) + + cost = core_span.Span.attribute_property( + truconv.SpanAttributes.COST.COST, base_schema.Cost + ) + + +class Call(TypedSpan): + """A typed span that corresponds to a method call.""" + + model_config: ClassVar[pydantic.ConfigDict] = pydantic.ConfigDict( + arbitrary_types_allowed=True + ) + + span_type: ClassVar[truconv.SpanAttributes.SpanType] = ( + truconv.SpanAttributes.SpanType.CALL + ) + + model_config = pydantic.ConfigDict(arbitrary_types_allowed=True) + + call_id = core_span.Span.attribute_property( + truconv.SpanAttributes.CALL.CALL_ID, uuid.UUID + ) + """Unique identifier for the call.""" + + signature = core_span.Span.attribute_property( + truconv.SpanAttributes.CALL.SIGNATURE, inspect.Signature + ) + """Signature of the function.""" + + function = core_span.Span.attribute_property( + truconv.SpanAttributes.CALL.FUNCTION, pyschema_utils.FunctionOrMethod + ) + """Function info.""" + + # TODO: move this to resource attributes: + process_id = core_span.Span.attribute_property( + truconv.SpanAttributes.CALL.PROCESS_ID, int + ) + """Process id.""" + + thread_id = core_span.Span.attribute_property( + truconv.SpanAttributes.CALL.THREAD_ID, int + ) + """Thread id.""" + + bound_arguments = core_span.Span.attribute_property( + truconv.SpanAttributes.CALL.BOUND_ARGUMENTS, + Optional[pyschema_utils.BoundArguments], + default=None, + ) + """Bindings of the function, if can be bound.""" + + ret = core_span.Span.attribute_property( + truconv.SpanAttributes.CALL.RETURN, serial_utils.JSON + ) + + call_error = core_span.Span.attribute_property( + truconv.SpanAttributes.CALL.ERROR, serial_utils.JSON + ) + + +class Record(TypedSpan): + """Span that contains recording/app ids.""" + + span_type: ClassVar[truconv.SpanAttributes.SpanType] = ( + truconv.SpanAttributes.SpanType.RECORD + ) + + app_ids = core_span.Span.attribute_property( + truconv.SpanAttributes.RECORD.APP_IDS, Set[types_schema.AppID] + ) + """The app ids of the apps that were called.""" + + record_ids = core_span.Span.attribute_property( + truconv.SpanAttributes.RECORD.RECORD_IDS, + Dict[types_schema.AppID, types_schema.RecordID], + ) + """The map of app_id to record_id indicating the id of the span as viewed by + each app that was tracing it.""" + + +class Retrieval(TypedSpan): + """A retrieval.""" + + span_type: ClassVar[truconv.SpanAttributes.SpanType] = ( + truconv.SpanAttributes.SpanType.RETRIEVAL + ) + + query_text = core_span.Span.attribute_property( + truconv.SpanAttributes.RETRIEVAL.QUERY_TEXT, str + ) + """Input text whose related contexts are being retrieved.""" + + query_embedding = core_span.Span.attribute_property( + truconv.SpanAttributes.RETRIEVAL.QUERY_EMBEDDING, List[float] + ) + """Embedding of the input text.""" + + distance_type = core_span.Span.attribute_property( + truconv.SpanAttributes.RETRIEVAL.DISTANCE_TYPE, str + ) + """Distance function used for ranking contexts.""" + + num_contexts = core_span.Span.attribute_property( + truconv.SpanAttributes.RETRIEVAL.NUM_CONTEXTS, int + ) + """The number of contexts requested, not necessarily retrieved.""" + + retrieved_contexts = core_span.Span.attribute_property( + truconv.SpanAttributes.RETRIEVAL.RETRIEVED_CONTEXTS, List[str] + ) + """The retrieved contexts.""" + + retrieved_scores = core_span.Span.attribute_property( + truconv.SpanAttributes.RETRIEVAL.RETRIEVED_SCORES, List[float] + ) + """The scores of the retrieved contexts.""" + + retrieved_embeddings = core_span.Span.attribute_property( + truconv.SpanAttributes.RETRIEVAL.RETRIEVED_EMBEDDINGS, List[List[float]] + ) + """The embeddings of the retrieved contexts.""" + + +class Reranking(TypedSpan): + """A reranker call.""" + + span_type: ClassVar[truconv.SpanAttributes.SpanType] = ( + truconv.SpanAttributes.SpanType.RERANKING + ) + + query_text = core_span.Span.attribute_property( + truconv.SpanAttributes.RERANKING.QUERY_TEXT, str + ) + """The query text.""" + + model_name = core_span.Span.attribute_property( + truconv.SpanAttributes.RERANKING.MODEL_NAME, str + ) # consider generic ML model name attr + """The model name of the reranker.""" + + top_n = core_span.Span.attribute_property( + truconv.SpanAttributes.RERANKING.TOP_N, int + ) + """The number of contexts to rerank.""" + + input_context_texts = core_span.Span.attribute_property( + truconv.SpanAttributes.RERANKING.INPUT_CONTEXT_TEXTS, List[str] + ) + """The contexts being reranked.""" + + input_context_scores = core_span.Span.attribute_property( + truconv.SpanAttributes.RERANKING.INPUT_CONTEXT_SCORES, + Optional[List[float]], + ) + """The scores of the input contexts.""" + + output_ranks = core_span.Span.attribute_property( + truconv.SpanAttributes.RERANKING.OUTPUT_RANKS, List[int] + ) + """Reranked indexes into `input_context_texts`.""" + + +class Generation(TypedSpan): + """A generation call to an LLM.""" + + span_type: ClassVar[truconv.SpanAttributes.SpanType] = ( + truconv.SpanAttributes.SpanType.GENERATION + ) + + model_name = core_span.Span.attribute_property( + truconv.SpanAttributes.GENERATION.MODEL_NAME, str + ) # to replace with otel's LLM_REQUEST_MODEL + """The model name of the LLM.""" + + model_type = core_span.Span.attribute_property( + truconv.SpanAttributes.GENERATION.MODEL_TYPE, str + ) + """The type of model used.""" + + input_token_count = core_span.Span.attribute_property( + truconv.SpanAttributes.GENERATION.INPUT_TOKEN_COUNT, int + ) # to replace with otel's LLM_RESPONSE_USAGE_PROMPT_TOKENS + """The number of tokens in the input.""" + + input_messages = core_span.Span.attribute_property( + truconv.SpanAttributes.GENERATION.INPUT_MESSAGES, List[dict] + ) + """The prompt given to the LLM.""" + + output_token_count = core_span.Span.attribute_property( + truconv.SpanAttributes.GENERATION.OUTPUT_MESSAGES, int + ) # to replace with otel's LLM_RESPONSE_COMPLETION_TOKENS + """The number of tokens in the output.""" + + output_messages = core_span.Span.attribute_property( + truconv.SpanAttributes.GENERATION.OUTPUT_MESSAGES, List[dict] + ) + """The returned text.""" + + temperature = core_span.Span.attribute_property( + truconv.SpanAttributes.GENERATION.TEMPERATURE, float + ) # to replace with otel's LLM_REQUEST_TEMPERATURE + """The temperature used for generation.""" + + cost = core_span.Span.attribute_property( + truconv.SpanAttributes.GENERATION.COST, float + ) + """The cost of the generation.""" + + +class Memorization(TypedSpan): + """A memory call.""" + + span_type: ClassVar[truconv.SpanAttributes.SpanType] = ( + truconv.SpanAttributes.SpanType.MEMORIZATION + ) + + memory_type = core_span.Span.attribute_property( + truconv.SpanAttributes.MEMORIZATION.MEMORY_TYPE, str + ) + """The type of memory.""" + + remembered = core_span.Span.attribute_property( + truconv.SpanAttributes.MEMORIZATION.REMEMBERED, str + ) + """The text being integrated into the memory in this span.""" + + +class Embedding(TypedSpan): + """An embedding call.""" + + span_type: ClassVar[truconv.SpanAttributes.SpanType] = ( + truconv.SpanAttributes.SpanType.EMBEDDING + ) + + input_text = core_span.Span.attribute_property( + truconv.SpanAttributes.EMBEDDING.INPUT_TEXT, str + ) + """The text being embedded.""" + + model_name = core_span.Span.attribute_property( + truconv.SpanAttributes.EMBEDDING.MODEL_NAME, str + ) + """The model name of the embedding model.""" + + embedding = core_span.Span.attribute_property( + truconv.SpanAttributes.EMBEDDING.EMBEDDING, List[float] + ) + """The embedding of the input text.""" + + +class ToolInvocation(TypedSpan): + """A tool invocation.""" + + span_type: ClassVar[truconv.SpanAttributes.SpanType] = ( + truconv.SpanAttributes.SpanType.TOOL_INVOCATION + ) + + description = core_span.Span.attribute_property( + truconv.SpanAttributes.TOOL_INVOCATION.DESCRIPTION, str + ) + """The description of the tool.""" + + +class AgentInvocation(TypedSpan): + """An agent invocation.""" + + span_type: ClassVar[truconv.SpanAttributes.SpanType] = ( + truconv.SpanAttributes.SpanType.AGENT_INVOCATION + ) + + description = core_span.Span.attribute_property( + truconv.SpanAttributes.AGENT_INVOCATION.DESCRIPTION, str + ) + """The description of the agent.""" + + +TYPE_TO_CLASS_MAP: Dict[truconv.SpanAttributes.SpanType, Type[TypedSpan]] = { + truconv.SpanAttributes.SpanType.UNKNOWN: Unknown, + truconv.SpanAttributes.SpanType.SEMANTIC: Semantic, + truconv.SpanAttributes.SpanType.RECORDING: Recording, + truconv.SpanAttributes.SpanType.EVAL_ROOT: EvalRoot, + truconv.SpanAttributes.SpanType.RECORD_ROOT: RecordRoot, + truconv.SpanAttributes.SpanType.RECORD: Record, + truconv.SpanAttributes.SpanType.CALL: Call, + truconv.SpanAttributes.SpanType.COST: Cost, + truconv.SpanAttributes.SpanType.RETRIEVAL: Retrieval, + truconv.SpanAttributes.SpanType.RERANKING: Reranking, + truconv.SpanAttributes.SpanType.GENERATION: Generation, + truconv.SpanAttributes.SpanType.MEMORIZATION: Memorization, + truconv.SpanAttributes.SpanType.EMBEDDING: Embedding, + truconv.SpanAttributes.SpanType.TOOL_INVOCATION: ToolInvocation, + truconv.SpanAttributes.SpanType.AGENT_INVOCATION: AgentInvocation, +} +"""Map of classes from their type enum.""" diff --git a/src/core/trulens/experimental/otel_tracing/core/trace/span.py b/src/core/trulens/experimental/otel_tracing/core/trace/span.py new file mode 100644 index 000000000..c11ff29b0 --- /dev/null +++ b/src/core/trulens/experimental/otel_tracing/core/trace/span.py @@ -0,0 +1,590 @@ +# ruff: noqa: E402 + +"""Spans extending OTEL functionality for TruLens.""" + +from __future__ import annotations + +import inspect +import os +import threading +from typing import ( + TYPE_CHECKING, + Any, + Callable, + Dict, + Generic, + Iterable, + List, + Optional, + Set, + Tuple, + Type, + TypeVar, + Union, +) +import uuid + +from opentelemetry.semconv.resource import ResourceAttributes +import pydantic +from trulens.core._utils.pycompat import ReferenceType +from trulens.core._utils.pycompat import TypeAlias +from trulens.core._utils.pycompat import WeakSet +from trulens.core.schema import base as base_schema +from trulens.core.schema import types as types_schema +from trulens.core.utils import json as json_utils +from trulens.experimental.otel_tracing.core.trace import context as core_context +from trulens.experimental.otel_tracing.core.trace import otel as core_otel +from trulens.experimental.otel_tracing.core.trace import trace as core_trace +from trulens.otel.semconv import trace as truconv + +if TYPE_CHECKING: + from trulens.core import app as core_app + +T = TypeVar("T") +R = TypeVar("R") # callable return type +E = TypeVar("E") # iterator/generator element type + + +class AttributeProperty(property, Generic[T]): + """Property that stores a serialized version its value in the attributes + dictionary. + + Validates default and on assignment. This is meant to be used only in + trulens Span instances (or subclasses). + + Args: + name: The name of the property. The key used for storage will be + this with the vendor prefix. + + typ: The type of the property. + + typ_factory: A factory function that returns the type of the + property. This can be used for forward referenced types. + + default: The default value of the property. + + default_factory: A factory function that returns the default value + of the property. This can be used for defaults that make use of + forward referenced types. + """ + + def __init__( + self, + name: str, + typ: Optional[Type[T]] = None, + typ_factory: Optional[Callable[[], Type[T]]] = None, + default: Optional[T] = None, + default_factory: Optional[Callable[[], T]] = None, + ): + self.name = name + self.typ = typ + self.typ_factory = typ_factory + self.default = default + self.default_factory = default_factory + + self.field_name: Optional[str] = None + + self.forward_initialized = False + + def init_forward(self): + if self.forward_initialized: + return + + self.forward_initialized = True + + if self.typ is None and self.typ_factory is not None: + self.typ = self.typ_factory() + + if self.default is None and self.default_factory is not None: + self.default = self.default_factory() + + if self.typ is None and self.default is not None: + self.typ = type(self.default) + + if self.typ is None: + self.tadapter = None + else: + try: + self.tadapter = pydantic.TypeAdapter(self.typ) + + if self.default is not None: + self.tadapter.validate_python(self.default) + + except pydantic.PydanticSchemaGenerationError: + self.tadapter = None + + def fget(self, obj: Any) -> Optional[T]: + return self.__get__(obj, obj.__class__) + + def __get__(self, obj: Any, objtype: Optional[Type[T]]) -> Optional[T]: # type: ignore # noqa: F821 + if obj is None: + return self + + self.init_forward() + return obj._attributes.get(self.name, self.default) + + def __set__(self, obj, value: T) -> None: + self.init_forward() + + if self.tadapter is not None: + try: + self.tadapter.validate_python(value) + except pydantic.ValidationError as e: + raise ValueError( + f"Invalid value for attribute {self.field_name}: {e}" + ) + + obj._attributes[self.name] = value + obj.attributes[self.name] = json_utils.jsonify(value) + + def __delete__(self, obj): + del obj._attributes[self.name] + del obj.attributes[self.name] + + def __set_name__(self, cls, name): + self.field_name = name + + if name in cls.__annotations__: + # If type is specified in annotation, take it from there. + self.typ = cls.__annotations__[name] + self.tadapter = pydantic.TypeAdapter(self.typ) + + # Update the recorded return type as well. + # TODO: cannot do this at this point as the below dict is not yet populated + # if name in cls.model_computed_fields: + # cls.model_computed_fields[name].return_type = self.typ + + # Have to remove it as pydantic will complain about overriding fields with computed fields. + del cls.__annotations__[name] + + +class WithAttributeProperties(pydantic.BaseModel): + _attributes: Dict[str, Any] = pydantic.PrivateAttr(default_factory=dict) + """Non-serialized values of named fields defined by `attribute_property`. + + These are mirrored with serialized versions in `attributes`. + """ + + @staticmethod + def attribute_property_factory(base: str) -> Callable: + def prop_factory( + name: str, + typ: Optional[Type[T]] = None, + typ_factory: Optional[Callable[[], Type[T]]] = None, + default: Optional[T] = None, + default_factory: Optional[Callable[[], T]] = None, + ) -> property: + return Span.attribute_property( + name=base + "." + name, + typ=typ, + typ_factory=typ_factory, + default=default, + default_factory=default_factory, + ) + + return prop_factory + + @staticmethod + def attribute_property( + name: str, + typ: Optional[Type[T]] = None, + typ_factory: Optional[Callable[[], Type[T]]] = None, + default: Optional[T] = None, + default_factory: Optional[Callable[[], T]] = None, + ) -> property: + """See AttributeProperty.""" + + return pydantic.computed_field( + AttributeProperty(name, typ, typ_factory, default, default_factory), + return_type=typ, + ) + + +class Span(core_otel.Span, WithAttributeProperties): + """TruLens additions on top of OTEL spans. + + Note that in this representation, we keep track of the tracer that produced + the instance and have properties to access other spans from that tracer, + like the parent. This make traversing lives produced in this process a bit + easier. + """ + + model_config = pydantic.ConfigDict( + arbitrary_types_allowed=True, + use_enum_values=True, # model_validate will fail without this + ) + + def __str__(self): + return ( + f"{type(self).__name__}({self.name}, {self.context}->{self.parent})" + ) + + def __repr__(self): + return str(self) + + @property + def parent_span(self) -> Optional[Span]: + if self.parent is None: + return None + + if self._tracer is None: + return None + + if (span := self._tracer.spans.get(self.parent)) is None: + return None + + return span + + _children_spans: List[Span] = pydantic.PrivateAttr(default_factory=list) + + @property + def children_spans(self) -> List[Span]: + return self._children_spans + + error: Optional[Exception] = pydantic.Field(None) + """Optional error if the observed computation raised an exception.""" + + record_ids = WithAttributeProperties.attribute_property( + truconv.SpanAttributes.RECORD.RECORD_IDS, + typ=Dict[types_schema.AppID, types_schema.RecordID], + default_factory=dict, + ) + """App id to record id map. + + This is because the same span might represent part of the trace of different + records because more than one app is tracing. + + This will not be filled in if the span was produced outside of a recording + context. + """ + + app_ids = WithAttributeProperties.attribute_property( + truconv.SpanAttributes.RECORD.APP_IDS, + typ=Set[types_schema.AppID], + default_factory=set, + ) + """Apps recording this span.""" + + def __init__(self, **kwargs): + # Convert any contexts to our hashable context class: + if (context := kwargs.get("context")) is not None: + kwargs["context"] = core_context.SpanContext.of_contextlike(context) + if (parent := kwargs.get("parent", None)) is not None: + kwargs["parent"] = core_context.SpanContext.of_contextlike(parent) + + super().__init__(**kwargs) + + if (parent_span := self.parent_span) is not None: + if isinstance(parent_span, Span): + parent_span.children_spans.append(self) + + self._init_attribute_properties(kwargs) + + def _init_attribute_properties(self, kwargs): + # Attribute_property fields are not automatically set from kwargs or + # from attributes. We set them here. + + for name in kwargs.keys(): + if not hasattr(self, name): + raise RuntimeWarning("Unknown field in kwargs: " + name) + + all_computed_fields = dict(self.model_computed_fields.items()) + + for name, field in all_computed_fields.items(): + if not isinstance(field.wrapped_property, AttributeProperty): + # Only cover our AttributeProperty. + continue + + prop = field.wrapped_property + attribute_name = prop.name + + if (val := kwargs.get(name)) is not None: + # Got from kwargs. + pass + else: + if ( + val := self.attributes.get(attribute_name, None) + ) is not None: + # Got from OTEL attributes. + pass + else: + # Get from defaults specified on AttributeProperty. + val = prop.default + if prop.default_factory is not None: + val = prop.default_factory() + + setattr(self, name, val) + + def iter_ancestors(self) -> Iterable[Span]: + """Iterate over all ancestors of this span.""" + + yield self + + if self.parent_span is not None: + yield from self.parent_span.iter_ancestors() + + def has_ancestor_of_type(self, span_type: Type[Span]) -> bool: + """Check if this span has an ancestor of the given type.""" + + for ancestor in self.iter_ancestors(): + if isinstance(ancestor, span_type): + return True + + return False + + def iter_children( + self, + transitive: bool = True, + matching: Optional[SpanFilterLike] = None, + ) -> Iterable[Span]: + """Iterate over all spans that are children of this span. + + Args: + transitive: Iterate recursively over children. + + matching: Optional filter function to apply to each child span. + """ + + matching = _filter_of_spanfilterlike(matching) + + for child_span in self.children_spans: + if matching(child_span): + yield child_span + if transitive: + yield from child_span.iter_children( + transitive=transitive, + matching=matching, + ) + + def first_child( + self, + transitive: bool = True, + matching: Optional[SpanFilterLike] = None, + ) -> Optional[Span]: + """Get the first child span that passes the filter.""" + + matching = _filter_of_spanfilterlike(matching) + + try: + return next(iter(self.iter_children(transitive, matching))) + except StopIteration: + return None + + def iter_family( + self, matching: Optional[SpanFilterLike] = None + ) -> Iterable[Span]: + """Iterate itself and all children transitively.""" + + matching = _filter_of_spanfilterlike(matching) + + yield from self.iter_children(transitive=True, matching=matching) + + def cost_tally(self) -> base_schema.Cost: + """Total costs of this span and all its transitive children.""" + + total = base_schema.Cost() + + for span in self.iter_family(): + if isinstance(span, WithCost) and span.cost is not None: + total += span.cost + + return total + + +SpanFilterLike: TypeAlias = Union[Type[Span], Callable[[Span], bool]] +"""Filter for spans. + +Either a span type (interpreted as an `isinstance check`) or a callable from +span to bool. Produces a callable from span to bool. +""" + + +def _filter_of_spanfilterlike( + filter: Optional[SpanFilterLike], +) -> Callable[[Span], bool]: + """Create a filter function from a SpanFilterLike. + + Defaults to filter that accepts all spans. + """ + + if filter is None: + return lambda s: True + + if isinstance(filter, type): + return lambda s: isinstance(s, filter) + + return filter + + +class LiveSpan(Span): + """A a span type that indicates that it contains live python objects. + + It is to be converted to a non-live span before being output to the user or + otherwise. + """ + + live_apps: WeakSet[core_app.App] = pydantic.Field( + default_factory=WeakSet, exclude=True + ) # Any = App + """Apps for which this span is recording trace info for. + + WeakSet to prevent memory leaks. + + Note that this will not be filled in if this span was produced outside of an + app recording context. + """ + + +class RecordingContextSpan(LiveSpan): + """Tracks the context of an app used as a context manager.""" + + live_recording: Optional[Any] = pydantic.Field(None, exclude=True) + # TODO: app.RecordingContext # circular import issues + + live_app: Optional[ReferenceType[core_app.App]] = pydantic.Field( + None, exclude=True + ) + + def otel_resource_attributes(self) -> Dict[str, Any]: + ret = super().otel_resource_attributes() + + ret[ResourceAttributes.SERVICE_NAME] = ( + self.live_recording.app.app_name + if self.live_recording is not None + else None + ) + + return ret + + # override to also call _finalize_recording . + def end(self, *args, **kwargs): + super().end(*args, **kwargs) + + self._finalize_recording() + + def _finalize_recording(self): + assert self.live_recording is not None + + app = self.live_recording.app + + for span in core_trace.Tracer.find_each_child( + span=self, span_filter=lambda s: isinstance(s, LiveRecordRoot) + ): + app._on_new_root_span(recording=self.live_recording, root_span=span) + + app._on_new_recording_span(recording_span=self) + + +class LiveSpanCall(LiveSpan): + """Track a function call.""" + + model_config = pydantic.ConfigDict(arbitrary_types_allowed=True) + + call_id = WithAttributeProperties.attribute_property( + truconv.SpanAttributes.CALL.CALL_ID, + Optional[uuid.UUID], + default_factory=uuid.uuid4, + ) + """Unique call identifiers.""" + + process_id = WithAttributeProperties.attribute_property( + truconv.SpanAttributes.CALL.PROCESS_ID, int, default_factory=os.getpid + ) + """Process ID of the call.""" + + thread_id = WithAttributeProperties.attribute_property( + truconv.SpanAttributes.CALL.THREAD_ID, + int, + default_factory=threading.get_native_id, + ) + """Thread ID of the call.""" + + call_error = WithAttributeProperties.attribute_property( + truconv.SpanAttributes.CALL.ERROR, Optional[Exception], default=None + ) + """Optional error if the called function raised an exception.""" + + live_sig: Optional[inspect.Signature] = pydantic.Field(None, exclude=True) + """Called function's signature.""" + + live_obj: Optional[Any] = pydantic.Field(None, exclude=True) + """Self object if method call.""" + + live_cls: Optional[Type] = pydantic.Field(None, exclude=True) + """Class if method/static/class method call.""" + + live_func: Optional[Callable] = pydantic.Field(None, exclude=True) + """Function object.""" + + live_args: Optional[Tuple[Any, ...]] = pydantic.Field(None, exclude=True) + """Positional arguments to the function call.""" + + live_kwargs: Optional[Dict[str, Any]] = pydantic.Field(None, exclude=True) + """Keyword arguments to the function call.""" + + live_bound_arguments: Optional[inspect.BoundArguments] = pydantic.Field( + None, exclude=True + ) + """Bound arguments to the function call if can be bound.""" + + live_ret: Optional[Any] = pydantic.Field(None, exclude=True) + """Return value of the function call. + + Exclusive with `error`. + """ + + live_error: Optional[Any] = pydantic.Field(None, exclude=True) + """Error raised by the function call. + + Exclusive with `ret`. + """ + + +class LiveRecordRoot(LiveSpan): + """Wrapper for first app calls, or "records". + + Children spans of type `LiveSpan` are expected to contain the app named here + in their `live_apps` field. + """ + + live_app: Optional[ReferenceType[core_app.App]] = pydantic.Field( + None, exclude=True + ) + """The app for which this is the root call. + + Value must be included in children's `live_apps` field. + """ + + record_id = WithAttributeProperties.attribute_property( + truconv.SpanAttributes.RECORD_ROOT.RECORD_ID, + types_schema.TraceRecordID.PY_TYPE, + ) + """Unique identifier for this root call or what is called a "record". + + Note that this is different from `record_ids` though this + `record_id` will be included in `record_ids` and will be included in + children's `record_ids` fields. + + Note that a record root cannot be a distributed call hence there is no + non-live record root. + """ + + +class WithCost(LiveSpan): + """Mixin to indicate the span has costs tracked.""" + + cost = WithAttributeProperties.attribute_property( + truconv.SpanAttributes.COST.COST, + base_schema.Cost, + default_factory=base_schema.Cost, + ) + """Cost of the computation spanned.""" + + live_endpoint: Optional[Any] = pydantic.Field( + None, exclude=True + ) # Any actually core_endpoint.Endpoint + """Endpoint handling cost extraction for this span/call.""" + + +class LiveSpanCallWithCost(LiveSpanCall, WithCost): + pass diff --git a/src/core/trulens/experimental/otel_tracing/core/trace/trace.py b/src/core/trulens/experimental/otel_tracing/core/trace/trace.py new file mode 100644 index 000000000..d28dc104a --- /dev/null +++ b/src/core/trulens/experimental/otel_tracing/core/trace/trace.py @@ -0,0 +1,216 @@ +# ruff: noqa: E402 + +"""Tracer for OTEL tracing. + +Adds TruLens specific features on top of the minimal OTEL Tracer. + +!!! Note + Most of the module is EXPERIMENTAL(otel_tracing) though it includes some existing + non-experimental classes moved here to resolve some circular import issues. +""" + +from __future__ import annotations + +from collections import defaultdict +import logging +import sys +from typing import ( + TYPE_CHECKING, + Callable, + Dict, + Hashable, + Iterable, + Optional, + Set, + Type, + TypeVar, +) + +from opentelemetry.util import types as types_api +import pydantic +from trulens.core.schema import types as types_schema +from trulens.core.utils import python as python_utils +from trulens.experimental.otel_tracing.core.trace import otel as core_otel +from trulens.experimental.otel_tracing.core.trace import span as core_span + +if TYPE_CHECKING: + # Need to model_rebuild classes thast use any of these: + from trulens.experimental.otel_tracing.core.trace import ( + context as core_context, + ) + +if sys.version_info < (3, 9): + from functools import lru_cache as fn_cache +else: + from functools import cache as fn_cache + +T = TypeVar("T") +R = TypeVar("R") # callable return type +E = TypeVar("E") # iterator/generator element type +S = TypeVar("S") # span type + +logger = logging.getLogger(__name__) + + +class Tracer(core_otel.Tracer): + """TruLens additions on top of [OTEL Tracer][opentelemetry.trace.Tracer].""" + + # TODO: Create a Tracer that does not record anything. Can either be a + # setting to this tracer or a separate "NullTracer". We need non-recording + # users to not incur much overhead hence need to be able to disable most of + # the tracing logic when appropriate. + + model_config = pydantic.ConfigDict(arbitrary_types_allowed=True) + + # Overrides core_otel.Tracer._span_class + _span_class: Type[core_otel.Span] = pydantic.PrivateAttr( + default_factory=lambda: core_span.Span + ) + + @property + def spans(self) -> Dict[core_context.SpanContext, core_otel.Span]: + return self._tracer_provider.spans + + @property + def current_span(self) -> Optional[core_otel.Span]: + if (context := self.current_span_context) is None: + return None + + return self.spans.get(context) + + def start_span(self, *args, **kwargs): + """Like OTEL start_span except also keeps track of the span just created.""" + + new_span = super().start_span(*args, **kwargs) + + self.spans[new_span.context] = new_span + + return new_span + + @staticmethod + def find_each_child( + span: core_span.Span, span_filter: Callable + ) -> Iterable[core_span.Span]: + """For each family rooted at each child of this span, find the top-most + span that satisfies the filter.""" + + for child_span in span.children_spans: + if span_filter(child_span): + yield child_span + else: + yield from Tracer.find_each_child(child_span, span_filter) + + +class TracerProvider( + core_otel.TracerProvider, metaclass=python_utils.PydanticSingletonMeta +): + """TruLens additions on top of [OTEL TracerProvider][opentelemetry.trace.TracerProvider].""" + + model_config = pydantic.ConfigDict(arbitrary_types_allowed=True) + + _trace_id: types_schema.TraceID.PY_TYPE = pydantic.PrivateAttr( + default_factory=types_schema.TraceID.default_py + ) + + def __str__(self): + # Pydantic will not print anything useful otherwise. + return f"{self.__module__}.{type(self).__name__}()" + + @property + def trace_id(self) -> types_schema.TraceID.PY_TYPE: + return self._trace_id + + # Overrides core_otel.TracerProvider._tracer_class + _tracer_class: Type[Tracer] = pydantic.PrivateAttr(default=Tracer) + + _tracers: Dict[str, Tracer] = pydantic.PrivateAttr(default_factory=dict) + + _spans: Dict[core_context.SpanContext, core_otel.Span] = ( + pydantic.PrivateAttr(default_factory=dict) + ) + + @property + def spans(self) -> Dict[core_context.SpanContext, core_otel.Span]: + return self._spans + + _exported_map: Dict[Hashable, Set[core_context.SpanContext]] = ( + pydantic.PrivateAttr(default_factory=lambda: defaultdict(set)) + ) + """NON-STANDARD: Each sink (hashable) is mapped to the set of span contexts + it has received. + + This is to prevent saving the same span twice or exporting it twice. Due to + the recording context nature of TruLens, the same spans can be processed for + multiple apps/contexts but we don't want to write them more than once. + """ + + def was_exported_to( + self, + context: core_context.SpanContext, + to: Hashable, + mark_exported: bool = False, + ) -> bool: + """Determine whether the given span context has been exported to the + given sink. + + Optionally marks the span context as exported. + """ + + ret = context in self._exported_map[to] + + if mark_exported: + self._exported_map[to].add(context) + + return ret + + def get_tracer( + self, + instrumenting_module_name: str, + instrumenting_library_version: Optional[str] = None, + schema_url: Optional[str] = None, + attributes: Optional[types_api.Attributes] = None, + ): + if instrumenting_module_name in self._tracers: + return self._tracers[instrumenting_module_name] + + tracer = super().get_tracer( + instrumenting_module_name=instrumenting_module_name, + instrumenting_library_version=instrumenting_library_version, + attributes=attributes, + schema_url=schema_url, + ) + + self._tracers[instrumenting_module_name] = tracer + + return tracer + + +@fn_cache +def trulens_tracer_provider(): + """Global tracer provider. + All trulens tracers are made by this provider even if a different one is + configured for OTEL. + """ + + return TracerProvider() + + +def was_exported_to( + context: core_context.SpanContext, to: Hashable, mark_exported: bool = False +): + """Determine whether the given span context has been exported to the given sink. + + Optionally marks the span context as exported. + """ + + return trulens_tracer_provider().was_exported_to(context, to, mark_exported) + + +@fn_cache +def trulens_tracer(): + from trulens.core import __version__ + + return trulens_tracer_provider().get_tracer( + instrumenting_module_name="trulens.experimental.otel_tracing.core.trace", + instrumenting_library_version=__version__, + ) diff --git a/src/feedback/trulens/feedback/dummy/endpoint.py b/src/feedback/trulens/feedback/dummy/endpoint.py index cd82f362b..afd12ff98 100644 --- a/src/feedback/trulens/feedback/dummy/endpoint.py +++ b/src/feedback/trulens/feedback/dummy/endpoint.py @@ -531,7 +531,7 @@ class DynamicDummyAPI(DummyAPI): ) -if otel_tracing_feature._FeatureSetup.are_optionals_installed(): +if otel_tracing_feature._FeatureSetup.are_optionals_installed(): # pylint: disable=protected-access from trulens.experimental.otel_tracing.core.feedback import ( endpoint as experimental_core_endpoint, ) diff --git a/src/providers/huggingface/trulens/providers/huggingface/endpoint.py b/src/providers/huggingface/trulens/providers/huggingface/endpoint.py index 49559bd51..4c8b0752c 100644 --- a/src/providers/huggingface/trulens/providers/huggingface/endpoint.py +++ b/src/providers/huggingface/trulens/providers/huggingface/endpoint.py @@ -74,12 +74,12 @@ def on_callable_return( super().on_callable_return(ret=ret, **kwargs) - bindings = self.bindings + bound_arguments = self.bound_arguments - if "url" not in bindings.arguments: + if "url" not in bound_arguments.arguments: return ret - url = bindings.arguments["url"] + url = bound_arguments.arguments["url"] if not url.startswith("https://api-inference.huggingface.co"): return ret diff --git a/tests/unit/static/golden/api.trulens.3.11.yaml b/tests/unit/static/golden/api.trulens.3.11.yaml index 6c155076a..3b1b999dd 100644 --- a/tests/unit/static/golden/api.trulens.3.11.yaml +++ b/tests/unit/static/golden/api.trulens.3.11.yaml @@ -216,6 +216,7 @@ trulens.core.database.base: DEFAULT_DATABASE_PREFIX: builtins.str DEFAULT_DATABASE_REDACT_KEYS: builtins.bool MULTI_CALL_NAME_DELIMITER: builtins.str + PageSelect: builtins.type trulens.core.database.base.DB: __bases__: - trulens.core.utils.serial.SerialModel @@ -223,6 +224,9 @@ trulens.core.database.base.DB: - trulens.core.utils.text.WithIdentString __class__: pydantic._internal._model_construction.ModelMetaclass attributes: + Q: typing._AnyMeta + T: typing._AnyMeta + W: typing._AnyMeta batch_insert_feedback: builtins.function batch_insert_ground_truth: builtins.function batch_insert_record: builtins.function @@ -249,6 +253,20 @@ trulens.core.database.base.DB: reset_database: builtins.function table_prefix: builtins.str update_app_metadata: builtins.function +trulens.core.database.base.PageSelect: + __bases__: + - builtins.object + __class__: builtins.type + attributes: + after_created_timestamp: builtins.NoneType + after_index: builtins.NoneType + after_updated_timestamp: builtins.NoneType + before_created_timestamp: builtins.NoneType + before_index: builtins.NoneType + before_updated_timestamp: builtins.NoneType + limit: builtins.NoneType + offset: builtins.NoneType + shuffle: builtins.bool trulens.core.database.connector: __class__: builtins.module highs: @@ -442,6 +460,7 @@ trulens.core.experimental: highs: {} lows: Feature: enum.EnumType + can_enable: builtins.function trulens.core.experimental.Feature: __bases__: - builtins.str @@ -991,7 +1010,9 @@ trulens.core.schema.record.RecordAppCallMethod: - trulens.core.utils.serial.SerialModel __class__: pydantic._internal._model_construction.ModelMetaclass attributes: - method: trulens.core.utils.pyschema.Method + function: typing.Optional[trulens.core.utils.pyschema.Function, builtins.NoneType] + function_or_method: builtins.property + method: typing.Optional[trulens.core.utils.pyschema.Method, builtins.NoneType] path: trulens.core.utils.serial.Lens trulens.core.schema.select: __class__: builtins.module @@ -1026,7 +1047,178 @@ trulens.core.schema.types: __class__: builtins.module highs: {} lows: + DictAsJSON: builtins.type + IntEnumAsSmallInt: builtins.type + ListAsJSON: builtins.type + SpanID: builtins.type + SpanKind: builtins.type + SpanName: builtins.type + SpanStatusCode: builtins.type + SpanType: builtins.type + SpanTypes: builtins.type + StatusDescription: builtins.type + StrAsVarChar: builtins.type + StrEnumAsVarChar: builtins.type + TLensedAttributeValue: typing_extensions.TypeAliasType + Timestamp: builtins.type + TraceID: builtins.type + TraceRecordID: builtins.type + TraceRecordIDs: builtins.type + TypeInfo: builtins.type + flatten_lensed_attributes: builtins.function + flatten_value: builtins.function + lens_of_flat_key: builtins.function new_call_id: builtins.function +trulens.core.schema.types.DictAsJSON: + __bases__: + - trulens.core.schema.types.TypeInfo + - typing.Generic + __class__: builtins.type + attributes: + PY_TYPE: typing._GenericAlias + SQL_SCHEMA_TYPE: builtins.type + SQL_TYPE: typing._GenericAlias +trulens.core.schema.types.IntEnumAsSmallInt: + __bases__: + - trulens.core.schema.types.TypeInfo + - typing.Generic + __class__: builtins.type + attributes: + SQL_SCHEMA_TYPE: builtins.type + SQL_TYPE: builtins.type +trulens.core.schema.types.ListAsJSON: + __bases__: + - trulens.core.schema.types.TypeInfo + - typing.Generic + __class__: builtins.type + attributes: + PY_TYPE: typing._GenericAlias + SQL_SCHEMA_TYPE: builtins.type + SQL_TYPE: typing._GenericAlias +trulens.core.schema.types.SpanID: + __bases__: + - trulens.core.schema.types.TypeInfo + __class__: builtins.type + attributes: + INVALID_OTEL: builtins.int + PY_TYPE: builtins.type + SQL_SCHEMA_TYPE: sqlalchemy.sql.sqltypes.BINARY + SQL_TYPE: builtins.type + TYPES: builtins.tuple + UNION_TYPE: typing._UnionGenericAlias +trulens.core.schema.types.SpanKind: + __bases__: + - trulens.core.schema.types.IntEnumAsSmallInt + __class__: builtins.type + attributes: + PY_TYPE: enum.EnumType + TYPES: builtins.tuple + UNION_TYPE: typing._UnionGenericAlias +trulens.core.schema.types.SpanName: + __bases__: + - trulens.core.schema.types.StrAsVarChar + __class__: builtins.type + attributes: {} +trulens.core.schema.types.SpanStatusCode: + __bases__: + - trulens.core.schema.types.IntEnumAsSmallInt + __class__: builtins.type + attributes: + PY_TYPE: enum.EnumType + TYPES: builtins.tuple + UNION_TYPE: typing._UnionGenericAlias +trulens.core.schema.types.SpanType: + __bases__: + - trulens.core.schema.types.StrEnumAsVarChar + __class__: builtins.type + attributes: + PY_TYPE: enum.EnumType + TYPES: builtins.tuple + UNION_TYPE: typing._UnionGenericAlias +trulens.core.schema.types.SpanTypes: + __bases__: + - trulens.core.schema.types.ListAsJSON + __class__: builtins.type + attributes: + ETI: builtins.type +trulens.core.schema.types.StatusDescription: + __bases__: + - trulens.core.schema.types.StrAsVarChar + __class__: builtins.type + attributes: {} +trulens.core.schema.types.StrAsVarChar: + __bases__: + - trulens.core.schema.types.TypeInfo + __class__: builtins.type + attributes: + NUM_BYTES: builtins.int + PY_TYPE: builtins.type + SQL_SCHEMA_TYPE: sqlalchemy.sql.sqltypes.VARCHAR + SQL_TYPE: builtins.type + TYPES: builtins.tuple + UNION_TYPE: builtins.type +trulens.core.schema.types.StrEnumAsVarChar: + __bases__: + - trulens.core.schema.types.TypeInfo + - typing.Generic + __class__: builtins.type + attributes: + SQL_SCHEMA_TYPE: sqlalchemy.sql.sqltypes.VARCHAR + SQL_TYPE: builtins.type +trulens.core.schema.types.Timestamp: + __bases__: + - trulens.core.schema.types.TypeInfo + __class__: builtins.type + attributes: + PY_TYPE: builtins.type + SQL_SCHEMA_TYPE: builtins.type + SQL_TYPE: builtins.type + TYPES: builtins.tuple + UNION_TYPE: typing._UnionGenericAlias +trulens.core.schema.types.TraceID: + __bases__: + - trulens.core.schema.types.TypeInfo + __class__: builtins.type + attributes: + INVALID_OTEL: builtins.int + PY_TYPE: builtins.type + SQL_SCHEMA_TYPE: sqlalchemy.sql.sqltypes.BINARY + SQL_TYPE: builtins.type + TYPES: builtins.tuple + UNION_TYPE: typing._UnionGenericAlias +trulens.core.schema.types.TraceRecordID: + __bases__: + - trulens.core.schema.types.StrAsVarChar + __class__: builtins.type + attributes: {} +trulens.core.schema.types.TraceRecordIDs: + __bases__: + - trulens.core.schema.types.DictAsJSON + __class__: builtins.type + attributes: + ETI: builtins.type +trulens.core.schema.types.TypeInfo: + __bases__: + - typing.Generic + __class__: builtins.type + attributes: + NUM_BITS: builtins.NoneType + OTEL_TYPE: builtins.NoneType + default_otel: builtins.classmethod + default_py: builtins.classmethod + default_sql: builtins.classmethod + otel: builtins.classmethod + otel_of_py: builtins.classmethod + otel_of_sql: builtins.classmethod + py: builtins.classmethod + py_of_otel: builtins.classmethod + py_of_sql: builtins.classmethod + rand_otel: builtins.classmethod + rand_py: builtins.classmethod + rand_sql: builtins.classmethod + sql: builtins.classmethod + sql_of_otel: builtins.classmethod + sql_of_py: builtins.classmethod trulens.core.session: __class__: builtins.module highs: {} @@ -1251,12 +1443,14 @@ trulens.core.utils.pyschema: highs: {} lows: Bindings: pydantic._internal._model_construction.ModelMetaclass + BoundArguments: pydantic._internal._model_construction.ModelMetaclass Class: pydantic._internal._model_construction.ModelMetaclass Function: pydantic._internal._model_construction.ModelMetaclass FunctionOrMethod: pydantic._internal._model_construction.ModelMetaclass Method: pydantic._internal._model_construction.ModelMetaclass Module: pydantic._internal._model_construction.ModelMetaclass Obj: pydantic._internal._model_construction.ModelMetaclass + Signature: pydantic._internal._model_construction.ModelMetaclass WithClassInfo: pydantic._internal._model_construction.ModelMetaclass builtin_init_dummy: builtins.function clean_attributes: builtins.function @@ -1273,6 +1467,13 @@ trulens.core.utils.pyschema.Bindings: kwargs: typing.Dict[builtins.str, typing.Any] load: builtins.function of_bound_arguments: builtins.staticmethod +trulens.core.utils.pyschema.BoundArguments: + __bases__: + - trulens.core.utils.serial.SerialModel + __class__: pydantic._internal._model_construction.ModelMetaclass + attributes: + arguments: typing.Dict[builtins.str, typing.Any] + of_bound_arguments: builtins.staticmethod trulens.core.utils.pyschema.Class: __bases__: - trulens.core.utils.serial.SerialModel @@ -1331,6 +1532,13 @@ trulens.core.utils.pyschema.Obj: init_bindings: typing.Optional[trulens.core.utils.pyschema.Bindings, builtins.NoneType] load: builtins.function of_object: builtins.staticmethod +trulens.core.utils.pyschema.Signature: + __bases__: + - trulens.core.utils.serial.SerialModel + __class__: pydantic._internal._model_construction.ModelMetaclass + attributes: + of_signature: builtins.staticmethod + sig_str: builtins.str trulens.core.utils.pyschema.WithClassInfo: __bases__: - pydantic.main.BaseModel