Skip to content

Commit

Permalink
Informatica crawler (#928)
Browse files Browse the repository at this point in the history
* Informatica crawler

* Add readme

* Bump version

* Update lock file

* tests

* Add missing __init__.py

* Add more tests

* Add more tests

* Address comments
  • Loading branch information
elic-eon authored Jul 26, 2024
1 parent 6b3e716 commit a17afd9
Show file tree
Hide file tree
Showing 30 changed files with 1,192 additions and 34 deletions.
42 changes: 25 additions & 17 deletions metaphor/common/api_request.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
import json
import secrets
import tempfile
from typing import Any, Callable, Dict, Type, TypeVar
from urllib.parse import urlparse
from typing import Any, Callable, Dict, Literal, Type, TypeVar
from urllib.parse import urljoin, urlparse

import requests
from pydantic import TypeAdapter, ValidationError
Expand All @@ -14,37 +14,41 @@


class ApiError(Exception):
def __init__(self, url: str, status_code: int, error_msg: str) -> None:
def __init__(self, url: str, status_code: int, body: str) -> None:
self.status_code = status_code
self.error_msg = error_msg
super().__init__(f"call {url} api failed: {status_code}\n{error_msg}")
self.body = body
super().__init__(f"call {url} api failed: {status_code}\n{body}")


def get_request(
def make_request(
url: str,
headers: Dict[str, str],
type_: Type[T],
transform_response: Callable[[requests.Response], Any] = lambda r: r.json(),
timeout: int = 600, # default request timeout 600s
timeout: int = 10,
method: Literal["get", "post"] = "get",
**kwargs,
) -> T:
"""Generic get api request to make third part api call and return with customized data class"""
result = requests.get(url, headers=headers, timeout=timeout, **kwargs)
result = getattr(requests, method)(url, headers=headers, timeout=timeout, **kwargs)
if result.status_code == 200:
# Add JSON response to log.zip
file_name = (
f"{urlparse(url).path[1:].replace('/', u'__')}_{secrets.token_hex(4)}"
)

# request signature, example: get_v1__resource_abcd
request_signature = f"{method}_{urlparse(url).path[1:].replace('/', u'__')}"

# suffix with length 8 chars random string
suffix = f"_{secrets.token_hex(4)}.json"

# Avoid file name too long error and truncate prefix to avoid duplicate file name
# 250 is the lowest default maximum charactors file name length limit acrocess major file systems
file_name = (
file_name[len(file_name) - 245 :] if len(file_name) > 245 else file_name
)
file_name = f"{file_name}.json"
# 250 is the lowest default maximum characters file name length limit across major file systems
file_name = f"{request_signature[:250 - len(suffix)]}{suffix}"

# Add JSON response to log.zip
out_file = f"{tempfile.mkdtemp()}/{file_name}"
with open(out_file, "w") as fp:
json.dump(result.json(), fp, indent=2)
debug_files.append(out_file)

try:
return TypeAdapter(type_).validate_python(transform_response(result))
except ValidationError as error:
Expand All @@ -54,3 +58,7 @@ def get_request(
raise ApiError(url, result.status_code, "cannot parse result")
else:
raise ApiError(url, result.status_code, result.content.decode())


def make_url(base: str, path: str):
return urljoin(base, path)
4 changes: 2 additions & 2 deletions metaphor/fivetran/extractor.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@

from requests.auth import HTTPBasicAuth

from metaphor.common.api_request import ApiError, get_request
from metaphor.common.api_request import ApiError, make_request
from metaphor.common.base_extractor import BaseExtractor
from metaphor.common.entity_id import (
dataset_normalized_name,
Expand Down Expand Up @@ -550,4 +550,4 @@ def _get_all(self, url: str, type_: Type[DataT]) -> List[DataT]:

def _call_get(self, url: str, **kwargs):
headers = {"Accept": "application/json;version=2"}
return get_request(url=url, headers=headers, auth=self._auth, **kwargs)
return make_request(url=url, headers=headers, auth=self._auth, **kwargs)
33 changes: 33 additions & 0 deletions metaphor/informatica/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
# Informatica Connector

This connector extracts technical metadata from Informatica using [Informatica Intelligent Cloud Services REST API](https://docs.informatica.com/integration-cloud/b2b-gateway/current-version/rest-api-reference/preface.html).

## Config File

Create a YAML config file based on the following template.

### Required Configurations

```yaml
base_url: <base_url>
user: <username>
password: <password>
```
### Optional Configurations
#### Output Destination
See [Output Config](../common/docs/output.md) for more information.
## Testing
Follow the [Installation](../../README.md) instructions to install `metaphor-connectors` in your environment (or virtualenv).

Run the following command to test the connector locally:

```shell
metaphor informatica <config_file>
```

Manually verify the output after the command finishes.
6 changes: 6 additions & 0 deletions metaphor/informatica/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
from metaphor.common.cli import cli_main
from metaphor.informatica.extractor import InformaticaExtractor


def main(config_file: str):
cli_main(InformaticaExtractor, config_file)
13 changes: 13 additions & 0 deletions metaphor/informatica/config.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
from pydantic.dataclasses import dataclass

from metaphor.common.base_config import BaseConfig
from metaphor.common.dataclass import ConnectorConfig


@dataclass(config=ConnectorConfig)
class InformaticaRunConfig(BaseConfig):
user: str

password: str

base_url: str
Loading

0 comments on commit a17afd9

Please sign in to comment.