diff --git a/metaphor/dbt/cloud/extractor.py b/metaphor/dbt/cloud/extractor.py index 5084572e..64b999c9 100644 --- a/metaphor/dbt/cloud/extractor.py +++ b/metaphor/dbt/cloud/extractor.py @@ -8,6 +8,7 @@ from metaphor.dbt.cloud.client import DbtAdminAPIClient from metaphor.dbt.cloud.config import DbtCloudConfig from metaphor.dbt.cloud.discovery_api import DiscoveryAPIClient +from metaphor.dbt.cloud.http import LogTransport from metaphor.dbt.cloud.parser.parser import Parser from metaphor.dbt.cloud.utils import parse_environment from metaphor.models.crawler_run_metadata import Platform @@ -51,7 +52,11 @@ def __init__(self, config: DbtCloudConfig): self._discovery_api_client = DiscoveryAPIClient( url=self._discovery_api_url, headers=headers, - http_client=httpx.Client(timeout=None, headers=headers), + http_client=httpx.Client( + timeout=None, + headers=headers, + transport=LogTransport(httpx.HTTPTransport()), + ), ) async def extract(self) -> Collection[ENTITY_TYPES]: diff --git a/metaphor/dbt/cloud/http.py b/metaphor/dbt/cloud/http.py new file mode 100644 index 00000000..7d05de0d --- /dev/null +++ b/metaphor/dbt/cloud/http.py @@ -0,0 +1,57 @@ +import json +import secrets +from collections.abc import Iterable + +import httpx + +from metaphor.common.logger import get_logger, json_dump_to_debug_file + +logger = get_logger() + + +class LogResponse(httpx.Response): + def _log_payload(self, filename: str, payload: bytes): + try: + request_json = json.loads(payload.decode()) + json_dump_to_debug_file(request_json, filename) + except json.JSONDecodeError: + logger.exception("Not able to log request") + + def _log(self, response: bytes): + request = self.request + if not isinstance(request.stream, Iterable): + return + + r_body = b"".join(request.stream) + method = request.method + + request_signature = f"{method}_{request.url.path[1:].replace('/', u'__')}" + random_slug = secrets.token_hex(4) + req_suffix = f"_{random_slug}_req.json" + res_suffix = f"_{random_slug}_res.json" + req_file_name = f"{request_signature[:250 - len(req_suffix)]}{req_suffix}" + res_file_name = f"{request_signature[:250 - len(res_suffix)]}{res_suffix}" + + self._log_payload(req_file_name, r_body) + self._log_payload(res_file_name, response) + + def read(self) -> bytes: + resp = super().read() + self._log(resp) + return resp + + +class LogTransport(httpx.BaseTransport): + def __init__(self, transport: httpx.BaseTransport): + self.transport = transport + + def handle_request(self, request: httpx.Request) -> httpx.Response: + + response = self.transport.handle_request(request) + + return LogResponse( + status_code=response.status_code, + headers=response.headers, + stream=response.stream, + extensions=response.extensions, + ) diff --git a/pyproject.toml b/pyproject.toml index 61c0763b..b3a5c4b6 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "metaphor-connectors" -version = "0.14.103" +version = "0.14.104" license = "Apache-2.0" description = "A collection of Python-based 'connectors' that extract metadata from various sources to ingest into the Metaphor app." authors = ["Metaphor "] diff --git a/tests/dbt/cloud/test_http.py b/tests/dbt/cloud/test_http.py new file mode 100644 index 00000000..d38ef694 --- /dev/null +++ b/tests/dbt/cloud/test_http.py @@ -0,0 +1,26 @@ +import json + +import httpx +from testcontainers.general import DockerContainer + +from metaphor.common.logger import debug_files +from metaphor.dbt.cloud.http import LogTransport + + +def test_http_client(): + http_client = httpx.Client( + transport=LogTransport(httpx.HTTPTransport()), + ) + + with DockerContainer("hashicorp/http-echo").with_exposed_ports(5678).with_env( + "ECHO_TEXT", json.dumps({}) + ) as container: + port = container.get_exposed_port(5678) + host = container.get_container_host_ip() + + debug_files.clear() + url = f"http://{host}:{port}" + http_client.post(url, content=json.dumps({"foo": "bar"})) + + # Should log two json file + assert len(debug_files) == 2