Skip to content

Commit

Permalink
Add logs for dbt cloud crawler (#982)
Browse files Browse the repository at this point in the history
* Dump GQL requests and responses

* Bump version

* Fix type

* Add tests
  • Loading branch information
elic-eon authored Sep 10, 2024
1 parent 9ebb5a7 commit f92e2d1
Show file tree
Hide file tree
Showing 4 changed files with 90 additions and 2 deletions.
7 changes: 6 additions & 1 deletion metaphor/dbt/cloud/extractor.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
from metaphor.dbt.cloud.client import DbtAdminAPIClient
from metaphor.dbt.cloud.config import DbtCloudConfig
from metaphor.dbt.cloud.discovery_api import DiscoveryAPIClient
from metaphor.dbt.cloud.http import LogTransport
from metaphor.dbt.cloud.parser.parser import Parser
from metaphor.dbt.cloud.utils import parse_environment
from metaphor.models.crawler_run_metadata import Platform
Expand Down Expand Up @@ -51,7 +52,11 @@ def __init__(self, config: DbtCloudConfig):
self._discovery_api_client = DiscoveryAPIClient(
url=self._discovery_api_url,
headers=headers,
http_client=httpx.Client(timeout=None, headers=headers),
http_client=httpx.Client(
timeout=None,
headers=headers,
transport=LogTransport(httpx.HTTPTransport()),
),
)

async def extract(self) -> Collection[ENTITY_TYPES]:
Expand Down
57 changes: 57 additions & 0 deletions metaphor/dbt/cloud/http.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
import json
import secrets
from collections.abc import Iterable

import httpx

from metaphor.common.logger import get_logger, json_dump_to_debug_file

logger = get_logger()


class LogResponse(httpx.Response):
def _log_payload(self, filename: str, payload: bytes):
try:
request_json = json.loads(payload.decode())
json_dump_to_debug_file(request_json, filename)
except json.JSONDecodeError:
logger.exception("Not able to log request")

def _log(self, response: bytes):
request = self.request
if not isinstance(request.stream, Iterable):
return

r_body = b"".join(request.stream)
method = request.method

request_signature = f"{method}_{request.url.path[1:].replace('/', u'__')}"
random_slug = secrets.token_hex(4)
req_suffix = f"_{random_slug}_req.json"
res_suffix = f"_{random_slug}_res.json"
req_file_name = f"{request_signature[:250 - len(req_suffix)]}{req_suffix}"
res_file_name = f"{request_signature[:250 - len(res_suffix)]}{res_suffix}"

self._log_payload(req_file_name, r_body)
self._log_payload(res_file_name, response)

def read(self) -> bytes:
resp = super().read()
self._log(resp)
return resp


class LogTransport(httpx.BaseTransport):
def __init__(self, transport: httpx.BaseTransport):
self.transport = transport

def handle_request(self, request: httpx.Request) -> httpx.Response:

response = self.transport.handle_request(request)

return LogResponse(
status_code=response.status_code,
headers=response.headers,
stream=response.stream,
extensions=response.extensions,
)
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[tool.poetry]
name = "metaphor-connectors"
version = "0.14.103"
version = "0.14.104"
license = "Apache-2.0"
description = "A collection of Python-based 'connectors' that extract metadata from various sources to ingest into the Metaphor app."
authors = ["Metaphor <[email protected]>"]
Expand Down
26 changes: 26 additions & 0 deletions tests/dbt/cloud/test_http.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
import json

import httpx
from testcontainers.general import DockerContainer

from metaphor.common.logger import debug_files
from metaphor.dbt.cloud.http import LogTransport


def test_http_client():
http_client = httpx.Client(
transport=LogTransport(httpx.HTTPTransport()),
)

with DockerContainer("hashicorp/http-echo").with_exposed_ports(5678).with_env(
"ECHO_TEXT", json.dumps({})
) as container:
port = container.get_exposed_port(5678)
host = container.get_container_host_ip()

debug_files.clear()
url = f"http://{host}:{port}"
http_client.post(url, content=json.dumps({"foo": "bar"}))

# Should log two json file
assert len(debug_files) == 2

0 comments on commit f92e2d1

Please sign in to comment.