Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Extend agent payload definitions #53

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 15 additions & 1 deletion semantic-core/generation/gen_semantic_defs.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
import os
import re
from typing import NamedTuple
from pydantic.json_schema import GenerateJsonSchema

from semantic_model.payloads import AgentPayload
from semantic_model.payloads import IntakeResolvedDbSpan
Expand All @@ -19,8 +20,21 @@
logger = logging.getLogger(__name__)


class JSONSchemaGenerator(GenerateJsonSchema):
def generate(self, schema, mode='validation'):
json_schema = super().generate(schema, mode=mode)
json_schema['$schema'] = self.schema_dialect

try:
json_schema = schema['cls'].customize_json_schema(json_schema)
except AttributeError:
pass

return json_schema


def generate_schema(*args, payload_type, version_info):
json_schema_str = json.dumps(payload_type.model_json_schema(), indent=2)
json_schema_str = json.dumps(payload_type.model_json_schema(schema_generator=JSONSchemaGenerator), indent=2)
subdir = "releases" if version_info.is_release else "drafts"

# Create the directory if it doesn't exist
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,12 @@
from typing import List, Dict

from semantic_model.registry.types import Hostname

from semantic_model.registry.types.span import Span
from semantic_model.registry.types.span_type import SpanType
from semantic_model.registry.types.tags_base import TagsBase
from semantic_model.registry.types.tags_http import TagsHTTP
from semantic_model.registry.types.tags_sql import TagsSQL
from semantic_model.registry.types.tracer_payload import TracerPayload

NonEmptyString = Annotated[str, Field(min_length=1)]
PositiveFloat = Annotated[float, Field(gt=0)]
Expand All @@ -15,6 +20,54 @@ class AgentPayload(BaseModel):
Represents the generic semantic_model for the agent payload, structurally defined here: https://github.com/DataDog/datadog-agent/blob/main/pkg/proto/datadog/trace/agent_payload.proto
"""

@staticmethod
def customize_json_schema(schema):
if '$defs' not in schema:
schema['$defs'] = {}

extra_models = [TagsBase, TagsHTTP, TagsSQL]

# generate the JSON schema for the extra models and add it to the root definitions.
for cls in extra_models:
name = cls.__name__
schema['$defs'][name] = cls.model_json_schema()

span_type = 'type'

# Validate the span.meta property using different schemas conditionally, based on
# the span.type attribute.
# https://json-schema.org/understanding-json-schema/reference/conditionals#ifthenelse
# This feature is not supported by Pydantic: https://github.com/pydantic/pydantic/issues/529
schema['$defs'][Span.__name__]['allOf'] = [
# default case when type is not defined or is not a known value
{
'if': {'not': {'properties': {span_type: {'enum': [SpanType.web, SpanType.http, SpanType.sql]}}}},
'then': {'properties': {'meta': {'$ref': f"#/$defs/{TagsBase.__name__}"}}}
},
{
'if': {
'properties': {span_type: {'enum': [SpanType.web, SpanType.http]}},
'required': [span_type]
},
'then': {'properties': {'meta': {'$ref': f"#/$defs/{TagsHTTP.__name__}"}}},
},
{
'if': {
'properties': {span_type: {'const': SpanType.sql}},
'required': [span_type]
},
'then': {'properties': {'meta': {'$ref': f"#/$defs/{TagsSQL.__name__}"}}},
},
]

# Move the nested $defs to the root model, so the generated references still work.
for cls in extra_models:
name = cls.__name__
schema['$defs'] = schema['$defs'] | schema['$defs'][name]['$defs']
del schema['$defs'][name]['$defs']

return schema

hostName: Annotated[
Hostname,
Field(
Expand Down Expand Up @@ -75,5 +128,11 @@ class AgentPayload(BaseModel):
description="""Holds `RareSamplerEnabled` value in AgentConfig""",
),
] = None

# TODO: tracerPayloads
tracerPayloads: Annotated[
List[TracerPayload],
Field(
alias="tracerPayloads",
title="Tracer Payloads",
description="""Specifies the list of the payloads received from tracers""",
)
]
Original file line number Diff line number Diff line change
Expand Up @@ -22,4 +22,3 @@
from .trace_flags import TraceFlags
from .trace_state import TraceState
from .tags import Tags

Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
from enum import Enum


class LanguageName(str, Enum):
go = 'go'
python = 'python'
php = 'php'
ruby = 'ruby'
jvm = 'jvm'
dotnet = 'dotnet'
js = 'js'
111 changes: 111 additions & 0 deletions semantic-core/generation/semantic_model/registry/types/span.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,111 @@
from pydantic import BaseModel, Field
from typing_extensions import Annotated

from semantic_model.registry.types import SpanId
from semantic_model.registry.types.span_type import SpanType


class Span(BaseModel):
service: Annotated[
str,
Field(
alias="service",
title="Service",
description="The name of the service with which this span is associated"
)
]
name: Annotated[
str,
Field(
alias="name",
title="Name",
description="The operation name of this span"
)
]
resource: Annotated[
str,
Field(
alias="resource",
title="Resource",
description="The resource name of this span, also sometimes called the endpoint (for web spans)"
)
]
traceID: Annotated[
str,
Field(
alias="traceID",
title="Trace ID",
description="The ID of the trace to which this span belongs"
)
]
spanID: Annotated[
SpanId,
Field(
alias="spanID",
title="Span ID",
),
] = None
parentID: Annotated[
SpanId,
Field(
alias="parentID",
title="Parent ID",
description="The ID of this span's parent, or zero if this span has no parent"
)
] = None
start: Annotated[
int,
Field(
alias="start",
title="Start",
description="The number of nanoseconds between the Unix epoch and the beginning of this span"
)
]
duration: Annotated[
int,
Field(
alias="duration",
title="Duration",
description="The time length of this span in nanoseconds"
)
]
error: Annotated[
int,
Field(
alias="error",
title="Error",
description="Error is 1 if there is an error associated with this span, or 0 if there is not"
)
] = None
meta: Annotated[
dict[str, str],
Field(
alias="meta",
title="Meta",
description="Meta is a mapping from tag name to tag value for string-valued tags"
)
] = None
metrics: Annotated[
dict[str, float],
Field(
alias="metrics",
title="Metrics",
description="Metrics is a mapping from tag name to tag value for numeric-valued tags"
)
] = None
type: Annotated[
SpanType,
Field(
alias="type",
title="Type",
description="Represents the type of the service with which this span is associated. Example values: `web`, `db`, `lambda`"
)
] = None
meta_struct: Annotated[
dict[str, int],
Field(
alias="meta_struct",
title="Meta Struct",
description="Represents a registry of structured \"other\" data used by, e.g., AppSec"
)
] = None
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
from enum import Enum


class SpanKind(str, Enum):
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can this be defined as an Annotated Field type? Something along these lines: https://github.com/DataDog/schema/blob/main/semantic-core/generation/semantic_model/registry/properties/data_policies.py#L12-L16

This way we can leverage the Fields type to define in here all of the common properties that should apply to wherever this type is used.

internal = 'internal'
client = 'client'
server = 'server'
producer = 'producer'
consumer = 'consumer'
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
from enum import Enum


class SpanType(str, Enum):
"""Span types have similar behaviour to "app types" and help categorize
traces in the Datadog application. They can also help fine grain agent
level behaviours such as obfuscation and quantization, when these are
enabled in the agent's configuration."""

web = 'web'
http = 'http'
sql = 'sql'
cassandra = 'cassandra'
redis = 'redis'
memcached = 'memcached'
mongodb = 'mongodb'
elasticsearch = 'elasticsearch'
leveldb = 'leveldb'
dns = 'dns'
queue = 'queue'
consul = 'consul'
graphql = 'graphql'
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
from pydantic import BaseModel, Field

from typing_extensions import Annotated

from semantic_model.registry.types.span_kind import SpanKind


class TagsBase(BaseModel):
span_kind: Annotated[
SpanKind,
Field(
alias="span.kind",
title="span.kind",
description="",
)
]
Loading