Skip to content

Commit

Permalink
Looker crawler is failed to load model [sc-30007] (#1049)
Browse files Browse the repository at this point in the history
* Looker crawler is failing without notifications [sc-30007]

* address comments

* update doc

* use relative path
  • Loading branch information
alyiwang authored Dec 16, 2024
1 parent e7c3ad9 commit 9f01e66
Show file tree
Hide file tree
Showing 6 changed files with 57 additions and 2 deletions.
10 changes: 10 additions & 0 deletions metaphor/looker/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -100,6 +100,16 @@ If the looker users use a different URL to view content on Looker than the serve
alternative_base_url: <looker_base_url> // e.g. https://looker.my_company.com
```

#### Ignored Model Files

You can also specify a list of model files to ignore by using the following config. The pattern is matched against the relative path of the model file from the project base directory.

```yaml
ignored_model_files:
- "model1.model.lkml"
- "tmp/*"
```

#### SSL Verification

You can also disable SSL verify and change the request timeout if needed, e.g.
Expand Down
5 changes: 4 additions & 1 deletion metaphor/looker/config.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from typing import Dict, Optional
from typing import Dict, List, Optional

from pydantic import model_validator
from pydantic.dataclasses import dataclass
Expand Down Expand Up @@ -41,6 +41,9 @@ class LookerRunConfig(BaseConfig):
# LookML git repository configuration
lookml_git_repo: Optional[GitRepoConfig] = None

# Ignored LookerML model files
ignored_model_files: Optional[List[str]] = None

# Source code URL for the project directory
project_source_url: Optional[str] = None

Expand Down
2 changes: 2 additions & 0 deletions metaphor/looker/extractor.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,7 @@ def __init__(self, config: LookerRunConfig) -> None:
self._connections = config.connections
self._lookml_dir = config.lookml_dir
self._lookml_git_repo = config.lookml_git_repo
self._ignored_model_files = config.ignored_model_files or []
self._project_source_url = config.project_source_url
self._include_personal_folders = config.include_personal_folders
self._explore_view_folder_name = config.explore_view_folder_name
Expand Down Expand Up @@ -109,6 +110,7 @@ async def extract(self) -> Collection[ENTITY_TYPES]:
connections,
self._explore_view_folder_name,
self._project_source_url,
self._ignored_model_files,
)

folder_map = self._fetch_folders()
Expand Down
18 changes: 18 additions & 0 deletions metaphor/looker/lookml_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
import operator
import os
from dataclasses import dataclass
from fnmatch import fnmatch
from typing import Dict, List, Optional, Set, Tuple

try:
Expand Down Expand Up @@ -611,11 +612,23 @@ def _load_model(
return raw_model, entity_urls, connection


def _is_ignored_model_file(
model_path: str, base_dir: str, ignored_model_files: List[str]
) -> bool:
"""Check if the model file is ignored by the config"""
relative_path = os.path.relpath(model_path, base_dir)
for ignored_model_file in ignored_model_files:
if fnmatch(relative_path, ignored_model_file):
return True
return False


def parse_project(
base_dir: str,
connections: Dict[str, LookerConnectionConfig],
explore_view_folder_name,
projectSourceUrl: Optional[str] = None,
ignored_model_files: List[str] = [],
) -> Tuple[ModelMap, List[VirtualView]]:
"""
parse the project under base_dir, returning a Model map and a list of virtual views including
Expand All @@ -626,7 +639,12 @@ def parse_project(
virtual_views = []

for model_path in glob.glob(f"{base_dir}/**/*.model.lkml", recursive=True):
if _is_ignored_model_file(model_path, base_dir, ignored_model_files):
logger.info(f"Ignoring model file {model_path} by config")
continue

model_name = os.path.basename(model_path)[0 : -len(".model.lkml")]

raw_model, entity_urls, connection = _load_model(
model_path, base_dir, connections, projectSourceUrl
)
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[tool.poetry]
name = "metaphor-connectors"
version = "0.14.160"
version = "0.14.161"
license = "Apache-2.0"
description = "A collection of Python-based 'connectors' that extract metadata from various sources to ingest into the Metaphor app."
authors = ["Metaphor <[email protected]>"]
Expand Down
22 changes: 22 additions & 0 deletions tests/looker/test_lookml_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,28 @@ def test_empty_model(test_root_dir):
assert virtual_views == []


def test_ignored_model(test_root_dir):
models_map, virtual_views = parse_project(
f"{test_root_dir}/looker/empty_model",
connection_map,
VIEW_EXPLORE_FOLDER,
ignored_model_files=["model1.model.lkml"],
)

assert models_map == {}
assert virtual_views == []

models_map, virtual_views = parse_project(
f"{test_root_dir}/looker/empty_model",
connection_map,
VIEW_EXPLORE_FOLDER,
ignored_model_files=["model*"],
)

assert models_map == {}
assert virtual_views == []


def test_basic(test_root_dir):
models_map, virtual_views = parse_project(
f"{test_root_dir}/looker/basic",
Expand Down

0 comments on commit 9f01e66

Please sign in to comment.