Skip to content

Commit

Permalink
[sc-27549] Switch to using DiscoveryAPI for dbt.cloud (#926)
Browse files Browse the repository at this point in the history
* [sc-27549] Switch to using DiscoveryAPI for dbt.cloud

* use codegen

* rewrite pt 1

* pt2

* refactor

* refactor

* parse test

* finish implementation

* ignore generated files in bandit

* paginate macro arguments query

* finish implementation

* dont include codegen as dependency

* rename files

* use pascalcase query names

* remove unused stuff

* add test

* bump version

* rename graphql_client to generated

* add codegen.sh

* address comments

* refactor tests

* add test input

* add test input

* do not ignore node columns

* ignore generated files in coverage

* parse run result completion time

* update docs

* address comments

* use pyproject.toml for precommit bandit

* add toml dep in precommit bandit

* fix

* fix time format

* bump version
  • Loading branch information
usefulalgorithm authored Jul 29, 2024
1 parent 8c2b533 commit 127ce75
Show file tree
Hide file tree
Showing 86 changed files with 15,984 additions and 518 deletions.
2 changes: 2 additions & 0 deletions .github/workflows/cd.yml
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,8 @@ jobs:
- name: Test
run: |
echo "setuptools<72" > constraints.txt
poetry run pip install --constraint constraints.txt pyhive==0.7.0
poetry install -E all
poetry run coverage run --source=metaphor -m pytest
poetry run coverage xml
Expand Down
5 changes: 3 additions & 2 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -43,17 +43,18 @@ jobs:
- name: Lint & Type Check
run: |
echo "setuptools<72" > constraints.txt
poetry run pip install --constraint constraints.txt pyhive==0.7.0
poetry install -E all
poetry run flake8
poetry run black --check .
poetry run isort --check .
poetry run mypy . --explicit-package-bases
poetry run bandit -r . -c pyproject.toml
# TODO(SC-14236): Include __init__.py back to coverage after fixing async testing issues
- name: Test
run: |
poetry run coverage run --source=metaphor --omit='**/__init__.py' -m pytest
poetry run coverage run -m pytest
poetry run coverage xml
- name: Codecov
Expand Down
3 changes: 2 additions & 1 deletion .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,8 @@ repos:
rev: 1.7.8
hooks:
- id: bandit
args: ['--skip=B101,B106,B404,B603,B607,B608']
args: [-c, pyproject.toml]
additional_dependencies: ['bandit[toml]']

- repo: https://github.com/pycqa/flake8
rev: 7.0.0
Expand Down
17 changes: 17 additions & 0 deletions metaphor/common/entity_id.py
Original file line number Diff line number Diff line change
Expand Up @@ -121,3 +121,20 @@ def dataset_normalized_name(
return normalize_full_dataset_name(
".".join([part for part in [db, schema, table] if part is not None])
)


def parts_to_dataset_entity_id(
platform: DataPlatform,
account: Optional[str],
database: Optional[str] = None,
schema: Optional[str] = None,
table: Optional[str] = None,
) -> EntityId:
"""
converts parts of a dataset, its platform and account into a dataset entity ID
"""
return to_dataset_entity_id(
dataset_normalized_name(database, schema, table),
platform,
account,
)
2 changes: 2 additions & 0 deletions metaphor/dbt/cloud/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ class DbtRun(NamedTuple):
project_id: int
job_id: int
run_id: int
environment_id: int

def __str__(self) -> str:
return f"ID = {self.run_id}, project ID = {self.project_id}, job ID = {self.job_id}"
Expand Down Expand Up @@ -115,6 +116,7 @@ def get_last_successful_run(
project_id=run.get("project_id"),
job_id=run.get("job_definition_id"),
run_id=run.get("id"),
environment_id=run.get("environment_id"),
)

offset += page_size
Expand Down
100 changes: 0 additions & 100 deletions metaphor/dbt/cloud/discovery_api.py

This file was deleted.

5 changes: 5 additions & 0 deletions metaphor/dbt/cloud/discovery_api/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
from .generated.client import Client as DiscoveryAPIClient

__all__ = [
"DiscoveryAPIClient",
]
83 changes: 83 additions & 0 deletions metaphor/dbt/cloud/discovery_api/apollo-codegen-config.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,83 @@
{
"schemaNamespace": "MySchema",
"schemaDownload": {
"downloadMethod": {
"introspection": {
"endpointURL": "https://metadata.cloud.getdbt.com/graphql",
"httpMethod": {
"POST": {}
},
"includeDeprecatedInputValues": false,
"outputFormat": "SDL"
}
},
"downloadTimeout": 60,
"headers": [],
"outputPath": "./schema.graphql"
},
"experimentalFeatures": {
"clientControlledNullability": true,
"legacySafelistingCompatibleOperations": true
},
"operationManifest": {
"generateManifestOnCodeGeneration": false,
"path": "/operation/identifiers/path",
"version": "persistedQueries"
},
"input": {
"operationSearchPaths": [
"/search/path/**/*.graphql"
],
"schemaSearchPaths": [
"/path/to/schema.graphqls"
]
},
"output": {
"operations": {
"absolute": {
"accessModifier": "internal",
"path": "/absolute/path"
}
},
"schemaTypes": {
"moduleType": {
"embeddedInTarget": {
"accessModifier": "public",
"name": "SomeTarget"
}
},
"path": "/output/path"
},
"testMocks": {
"swiftPackage": {
"targetName": "SchemaTestMocks"
}
}
},
"options": {
"additionalInflectionRules": [
{
"pluralization": {
"replacementRegex": "animals",
"singularRegex": "animal"
}
}
],
"cocoapodsCompatibleImportStatements": true,
"conversionStrategies": {
"enumCases": "none",
"fieldAccessors": "camelCase",
"inputObjects": "camelCase"
},
"deprecatedEnumCases": "exclude",
"operationDocumentFormat": [
"definition"
],
"pruneGeneratedFiles": false,
"schemaDocumentation": "exclude",
"selectionSetInitializers": {
"localCacheMutations": true
},
"warningsOnDeprecatedUsage": "exclude"
}
}
5 changes: 5 additions & 0 deletions metaphor/dbt/cloud/discovery_api/ariadne-codegen.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
[tool.ariadne-codegen]
schema_path = "schema.graphql"
queries_path = "queries.graphql"
async_client = false
target_package_name = "generated"
38 changes: 38 additions & 0 deletions metaphor/dbt/cloud/discovery_api/codegen.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
# Generate GraphQL client code

## Requirements

- Python >= 3.9
- `ariadne-codegen`

## Usage

```bash
cd metaphor/dbt/cloud/discovery_api
./codegen.sh
```

## Existing files

### `codegen.sh`

Run this script to get the schema from DBT's Apollo server, and generate the corresponding GraphQL client code.

### `queries.graphql`

The queries we will execute from the extractor class.

### `apollo-codegen-config.json`

Copied from [Full Codegen Configuration Example](https://www.apollographql.com/docs/ios/code-generation/codegen-configuration/#full-codegen-configuration-example) on Apollo's site. The only modifications are:

- `endpointURL`
- `outputPath`

### `ariadne-codegen.toml`

Controls the behavior of `ariadne-codegen`.

### `schema.graphql`

The upstream DBT GraphQL schema. This file will be downloaded from upstream whenever `codegen.sh` is run.
17 changes: 17 additions & 0 deletions metaphor/dbt/cloud/discovery_api/codegen.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
#!/usr/bin/env bash

# The tool is called `apollo-ios-cli`: https://www.apollographql.com/docs/ios/code-generation/codegen-cli/
# It does not mean it's iOS only.
APOLLO_IOS_CLI_VERSION=1.14.0

wget -c \
"https://github.com/apollographql/apollo-ios/releases/download/${APOLLO_IOS_CLI_VERSION}/apollo-ios-cli.tar.gz" -O - | \
tar -xz

./apollo-ios-cli fetch-schema --path ./apollo-codegen-config.json

rm -f ./apollo-ios-cli

poetry run ariadne-codegen --config ariadne-codegen.toml
poetry run black .
poetry run isort .
Loading

0 comments on commit 127ce75

Please sign in to comment.