Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Informatica crawler #928

Merged
merged 10 commits into from
Jul 26, 2024
Merged
Show file tree
Hide file tree
Changes from 9 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
42 changes: 25 additions & 17 deletions metaphor/common/api_request.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
import json
import secrets
import tempfile
from typing import Any, Callable, Dict, Type, TypeVar
from urllib.parse import urlparse
from typing import Any, Callable, Dict, Literal, Type, TypeVar
from urllib.parse import urljoin, urlparse

import requests
from pydantic import TypeAdapter, ValidationError
Expand All @@ -14,37 +14,41 @@


class ApiError(Exception):
def __init__(self, url: str, status_code: int, error_msg: str) -> None:
def __init__(self, url: str, status_code: int, body: str) -> None:
self.status_code = status_code
self.error_msg = error_msg
super().__init__(f"call {url} api failed: {status_code}\n{error_msg}")
self.body = body
super().__init__(f"call {url} api failed: {status_code}\n{body}")


def get_request(
def make_request(
url: str,
headers: Dict[str, str],
type_: Type[T],
transform_response: Callable[[requests.Response], Any] = lambda r: r.json(),
timeout: int = 600, # default request timeout 600s
timeout: int = 10,
method: Literal["get", "post"] = "get",
**kwargs,
) -> T:
"""Generic get api request to make third part api call and return with customized data class"""
result = requests.get(url, headers=headers, timeout=timeout, **kwargs)
result = getattr(requests, method)(url, headers=headers, timeout=timeout, **kwargs)
usefulalgorithm marked this conversation as resolved.
Show resolved Hide resolved
if result.status_code == 200:
# Add JSON response to log.zip
file_name = (
f"{urlparse(url).path[1:].replace('/', u'__')}_{secrets.token_hex(4)}"
)

# request signature, example: get_v1__resource_abcd
request_signature = f"{method}_{urlparse(url).path[1:].replace('/', u'__')}"

# suffix with length 8 chars random string
suffix = f"_{secrets.token_hex(4)}.json"

# Avoid file name too long error and truncate prefix to avoid duplicate file name
# 250 is the lowest default maximum charactors file name length limit acrocess major file systems
file_name = (
file_name[len(file_name) - 245 :] if len(file_name) > 245 else file_name
)
file_name = f"{file_name}.json"
# 250 is the lowest default maximum characters file name length limit across major file systems
file_name = f"{request_signature[:250 - len(suffix)]}{suffix}"

# Add JSON response to log.zip
out_file = f"{tempfile.mkdtemp()}/{file_name}"
with open(out_file, "w") as fp:
json.dump(result.json(), fp, indent=2)
debug_files.append(out_file)

try:
return TypeAdapter(type_).validate_python(transform_response(result))
except ValidationError as error:
Expand All @@ -54,3 +58,7 @@
raise ApiError(url, result.status_code, "cannot parse result")
else:
raise ApiError(url, result.status_code, result.content.decode())


def make_url(base: str, path: str):
return urljoin(base, path)

Check warning on line 64 in metaphor/common/api_request.py

View check run for this annotation

Codecov / codecov/patch

metaphor/common/api_request.py#L64

Added line #L64 was not covered by tests
4 changes: 2 additions & 2 deletions metaphor/fivetran/extractor.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@

from requests.auth import HTTPBasicAuth

from metaphor.common.api_request import ApiError, get_request
from metaphor.common.api_request import ApiError, make_request
from metaphor.common.base_extractor import BaseExtractor
from metaphor.common.entity_id import (
dataset_normalized_name,
Expand Down Expand Up @@ -550,4 +550,4 @@ def _get_all(self, url: str, type_: Type[DataT]) -> List[DataT]:

def _call_get(self, url: str, **kwargs):
headers = {"Accept": "application/json;version=2"}
return get_request(url=url, headers=headers, auth=self._auth, **kwargs)
return make_request(url=url, headers=headers, auth=self._auth, **kwargs)
33 changes: 33 additions & 0 deletions metaphor/informatica/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
# Informatica Connector

This connector extracts technical metadata from Informatica using [Informatica Intelligent Cloud Services REST API](https://docs.informatica.com/integration-cloud/b2b-gateway/current-version/rest-api-reference/preface.html).

## Config File

Create a YAML config file based on the following template.

### Required Configurations

```yaml
base_url: <base_url>
user: <username>
password: <password>
```

### Optional Configurations

#### Output Destination

See [Output Config](../common/docs/output.md) for more information.

## Testing

Follow the [Installation](../../README.md) instructions to install `metaphor-connectors` in your environment (or virtualenv).

Run the following command to test the connector locally:

```shell
metaphor informatica <config_file>
```

Manually verify the output after the command finishes.
6 changes: 6 additions & 0 deletions metaphor/informatica/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
from metaphor.common.cli import cli_main
from metaphor.informatica.extractor import InformaticaExtractor


def main(config_file: str):
cli_main(InformaticaExtractor, config_file)
13 changes: 13 additions & 0 deletions metaphor/informatica/config.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
from pydantic.dataclasses import dataclass

from metaphor.common.base_config import BaseConfig
from metaphor.common.dataclass import ConnectorConfig


@dataclass(config=ConnectorConfig)
class InformaticaRunConfig(BaseConfig):
user: str

password: str

base_url: str
Loading
Loading