Skip to content

Commit

Permalink
add core sources to the dlt init -l list
Browse files Browse the repository at this point in the history
  • Loading branch information
sh-rp committed Sep 3, 2024
1 parent b7caaa2 commit f30fff5
Show file tree
Hide file tree
Showing 8 changed files with 135 additions and 73 deletions.
14 changes: 7 additions & 7 deletions dlt/cli/_dlt.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@

from dlt.cli.init_command import (
init_command,
list_verified_sources_command,
list_sources_command,
DLT_INIT_DOCS_URL,
DEFAULT_VERIFIED_SOURCES_REPO,
)
Expand Down Expand Up @@ -75,9 +75,9 @@ def init_command_wrapper(


@utils.track_command("list_sources", False)
def list_verified_sources_command_wrapper(repo_location: str, branch: str) -> int:
def list_sources_command_wrapper(repo_location: str, branch: str) -> int:
try:
list_verified_sources_command(repo_location, branch)
list_sources_command(repo_location, branch)
except Exception as ex:
on_exception(ex, DLT_INIT_DOCS_URL)
return -1
Expand Down Expand Up @@ -314,11 +314,11 @@ def main() -> int:
),
)
init_cmd.add_argument(
"--list-verified-sources",
"--list-sources",
"-l",
default=False,
action="store_true",
help="List available verified sources",
help="List available sources",
)
init_cmd.add_argument(
"source",
Expand Down Expand Up @@ -606,8 +606,8 @@ def main() -> int:
del command_kwargs["list_pipelines"]
return pipeline_command_wrapper(**command_kwargs)
elif args.command == "init":
if args.list_verified_sources:
return list_verified_sources_command_wrapper(args.location, args.branch)
if args.list_sources:
return list_sources_command_wrapper(args.location, args.branch)
else:
if not args.source or not args.destination:
init_cmd.print_usage()
Expand Down
90 changes: 57 additions & 33 deletions dlt/cli/init_command.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,11 +46,6 @@
DEFAULT_VERIFIED_SOURCES_REPO = "https://github.com/dlt-hub/verified-sources.git"
INIT_MODULE_NAME = "init"
SOURCES_MODULE_NAME = "sources"
SKIP_CORE_SOURCES_FOLDERS = [
"helpers",
"init",
"rest_api",
] # TODO: remove rest api here once pipeline file is here


def _get_template_files(
Expand Down Expand Up @@ -136,16 +131,30 @@ def _get_dependency_system(dest_storage: FileStorage) -> str:
return None


def _list_core_sources() -> Dict[str, SourceConfiguration]:
local_path = Path(os.path.dirname(os.path.realpath(__file__))).parent / SOURCES_MODULE_NAME
core_sources_storage = FileStorage(str(local_path))

sources: Dict[str, SourceConfiguration] = {}
for source_name in files_ops.get_sources_names(core_sources_storage, source_type="core"):
sources[source_name] = files_ops.get_core_source_configuration(
core_sources_storage, source_name
)
return sources


def _list_verified_sources(
repo_location: str, branch: str = None
) -> Dict[str, SourceConfiguration]:
clone_storage = git.get_fresh_repo_files(repo_location, get_dlt_repos_dir(), branch=branch)
sources_storage = FileStorage(clone_storage.make_full_path(SOURCES_MODULE_NAME))

sources: Dict[str, SourceConfiguration] = {}
for source_name in files_ops.get_verified_source_names(sources_storage):
for source_name in files_ops.get_sources_names(sources_storage, source_type="verified"):
try:
sources[source_name] = files_ops.get_verified_source_files(sources_storage, source_name)
sources[source_name] = files_ops.get_verified_source_configuration(
sources_storage, source_name
)
except Exception as ex:
fmt.warning(f"Verified source {source_name} not available: {ex}")

Expand Down Expand Up @@ -228,14 +237,29 @@ def _welcome_message(
)


def list_verified_sources_command(repo_location: str, branch: str = None) -> None:
fmt.echo("Looking up for verified sources in %s..." % fmt.bold(repo_location))
def list_sources_command(repo_location: str, branch: str = None) -> None:
fmt.echo("---")
fmt.echo("Available dlt core sources:")
fmt.echo("---")
core_sources = _list_core_sources()
for source_name, source_configuration in core_sources.items():
msg = "%s: %s" % (fmt.bold(source_name), source_configuration.doc)
fmt.echo(msg)

fmt.echo("---")
fmt.echo("Looking up verified sources at %s..." % fmt.bold(repo_location))
fmt.echo("Available verified sources:")
fmt.echo("---")
for source_name, source_configuration in _list_verified_sources(repo_location, branch).items():
reqs = source_configuration.requirements
dlt_req_string = str(reqs.dlt_requirement_base)
msg = "%s: %s" % (fmt.bold(source_name), source_configuration.doc)
msg = "%s:" % (fmt.bold(source_name))
if source_name in core_sources.keys():
msg += " (Deprecated since dlt 1.0.0 in favor of core source of the same name) "
msg += source_configuration.doc
if not reqs.is_installed_dlt_compatible():
msg += fmt.warning_style(" [needs update: %s]" % (dlt_req_string))

fmt.echo(msg)


Expand All @@ -253,24 +277,24 @@ def init_command(

# lookup core sources
local_path = Path(os.path.dirname(os.path.realpath(__file__))).parent / SOURCES_MODULE_NAME
local_sources_storage = FileStorage(str(local_path))
core_sources_storage = FileStorage(str(local_path))

# discover type of source
source_type: files_ops.SOURCE_TYPE = "generic"
source_type: files_ops.TSourceType = "generic"
if (
local_sources_storage.has_folder(source_name)
and source_name not in SKIP_CORE_SOURCES_FOLDERS
and not omit_core_sources
):
source_name in files_ops.get_sources_names(core_sources_storage, source_type="core")
) and not omit_core_sources:
source_type = "core"
else:
if omit_core_sources:
fmt.echo("Omitting dlt core sources.")
fmt.echo("Looking up verified sources at %s..." % fmt.bold(repo_location))
clone_storage = git.get_fresh_repo_files(repo_location, get_dlt_repos_dir(), branch=branch)
# copy dlt source files from here
sources_storage = FileStorage(clone_storage.make_full_path(SOURCES_MODULE_NAME))
if sources_storage.has_folder(source_name):
verified_sources_storage = FileStorage(clone_storage.make_full_path(SOURCES_MODULE_NAME))
if source_name in files_ops.get_sources_names(
verified_sources_storage, source_type="verified"
):
source_type = "verified"

# look up init storage in core
Expand Down Expand Up @@ -302,7 +326,9 @@ def init_command(

if source_type == "verified":
# get pipeline files
source_configuration = files_ops.get_verified_source_files(sources_storage, source_name)
source_configuration = files_ops.get_verified_source_configuration(
verified_sources_storage, source_name
)
# get file index from remote verified source files being copied
remote_index = files_ops.get_remote_source_index(
source_configuration.storage.storage_path,
Expand Down Expand Up @@ -337,18 +363,9 @@ def init_command(
source_configuration.files.extend(template_files)

else:
pipeline_dest_script = source_name + "_pipeline.py"

if source_type == "core":
source_configuration = SourceConfiguration(
source_type,
"dlt.sources." + source_name,
local_sources_storage,
source_name + "_pipeline.py",
pipeline_dest_script,
[".gitignore"],
SourceRequirements([]),
"",
source_configuration = files_ops.get_core_source_configuration(
core_sources_storage, source_name
)
else:
if not is_valid_schema_name(source_name):
Expand All @@ -358,14 +375,17 @@ def init_command(
"pipeline",
init_storage,
pipeline_script,
pipeline_dest_script,
source_name + "_pipeline.py",
template_files,
SourceRequirements([]),
"",
)

if dest_storage.has_file(pipeline_dest_script):
fmt.warning("Pipeline script %s already exists, exiting" % pipeline_dest_script)
if dest_storage.has_file(source_configuration.dest_pipeline_script):
fmt.warning(
"Pipeline script %s already exists, exiting"
% source_configuration.dest_pipeline_script
)
return

# add .dlt/*.toml files to be copied
Expand Down Expand Up @@ -517,6 +537,10 @@ def init_command(
"A source with the name %s was not found. Using a template to create a new source"
" and pipeline with name %s." % (fmt.bold(source_name), fmt.bold(source_name))
)
fmt.echo(
"In case you did not want to use a template, run 'dlt init -l' to see a list of"
" available sources."
)

if use_generic_template and source_configuration.source_type != "generic":
fmt.warning("The --generic parameter is discarded if a source is found.")
Expand Down
57 changes: 43 additions & 14 deletions dlt/cli/pipeline_files.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,16 +16,23 @@
from dlt.cli import utils
from dlt.cli.requirements import SourceRequirements

TSourceType = Literal["core", "verified", "generic"]

SOURCES_INIT_INFO_ENGINE_VERSION = 1
SOURCES_INIT_INFO_FILE = ".sources"
IGNORE_FILES = ["*.py[cod]", "*$py.class", "__pycache__", "py.typed", "requirements.txt"]
IGNORE_SOURCES = [".*", "_*"]
SOURCE_TYPE = Literal["core", "verified", "generic"]
IGNORE_VERIFIED_SOURCES = [".*", "_*"]
IGNORE_CORE_SOURCES = [
".*",
"_*",
"helpers",
"init",
"rest_api",
] # TODO: remove rest api here once pipeline file is here


class SourceConfiguration(NamedTuple):
source_type: SOURCE_TYPE
source_type: TSourceType
source_module_prefix: str
storage: FileStorage
pipeline_script: str
Expand Down Expand Up @@ -149,20 +156,49 @@ def get_remote_source_index(
}


def get_verified_source_names(sources_storage: FileStorage) -> List[str]:
def get_sources_names(sources_storage: FileStorage, source_type: TSourceType) -> List[str]:
candidates: List[str] = []
ignore_cases = IGNORE_VERIFIED_SOURCES if source_type == "verified" else IGNORE_CORE_SOURCES
for name in [
n
for n in sources_storage.list_folder_dirs(".", to_root=False)
if not any(fnmatch.fnmatch(n, ignore) for ignore in IGNORE_SOURCES)
if not any(fnmatch.fnmatch(n, ignore) for ignore in ignore_cases)
]:
# must contain at least one valid python script
if any(f.endswith(".py") for f in sources_storage.list_folder_files(name, to_root=False)):
candidates.append(name)
return candidates


def get_verified_source_files(
def _get_docstring_for_module(sources_storage: FileStorage, source_name: str) -> str:
# read the docs
init_py = os.path.join(source_name, utils.MODULE_INIT)
docstring: str = ""
if sources_storage.has_file(init_py):
docstring = get_module_docstring(sources_storage.load(init_py))
if docstring:
docstring = docstring.splitlines()[0]
return docstring


def get_core_source_configuration(
sources_storage: FileStorage, source_name: str
) -> SourceConfiguration:
pipeline_file = source_name + "_pipeline.py"

return SourceConfiguration(
"core",
"dlt.sources." + source_name,
sources_storage,
pipeline_file,
pipeline_file,
[".gitignore"],
SourceRequirements([]),
_get_docstring_for_module(sources_storage, source_name),
)


def get_verified_source_configuration(
sources_storage: FileStorage, source_name: str
) -> SourceConfiguration:
if not sources_storage.has_folder(source_name):
Expand Down Expand Up @@ -191,13 +227,6 @@ def get_verified_source_files(
if all(not fnmatch.fnmatch(file, ignore) for ignore in IGNORE_FILES)
]
)
# read the docs
init_py = os.path.join(source_name, utils.MODULE_INIT)
docstring: str = ""
if sources_storage.has_file(init_py):
docstring = get_module_docstring(sources_storage.load(init_py))
if docstring:
docstring = docstring.splitlines()[0]
# read requirements
requirements_path = os.path.join(source_name, utils.REQUIREMENTS_TXT)
if sources_storage.has_file(requirements_path):
Expand All @@ -213,7 +242,7 @@ def get_verified_source_files(
example_script,
files,
requirements,
docstring,
_get_docstring_for_module(sources_storage, source_name),
)


Expand Down
4 changes: 2 additions & 2 deletions docs/website/docs/reference/command-line-interface.md
Original file line number Diff line number Diff line change
Expand Up @@ -23,9 +23,9 @@ version if run again with existing `source` name. You are warned if files will b
### Specify your own "verified sources" repository.
You can use `--location <repo_url or local folder>` option to specify your own repository with sources. Typically you would [fork ours](https://github.com/dlt-hub/verified-sources) and start customizing and adding sources ie. to use them for your team or organization. You can also specify a branch with `--branch <name>` ie. to test a version being developed.

### List all verified sources
### List all sources
```sh
dlt init --list-verified-sources
dlt init --list-sources
```
Shows all available verified sources and their short descriptions. For each source, checks if your local `dlt` version requires update
and prints the relevant warning.
Expand Down
4 changes: 2 additions & 2 deletions docs/website/docs/walkthroughs/add-a-verified-source.md
Original file line number Diff line number Diff line change
Expand Up @@ -21,10 +21,10 @@ mkdir various_pipelines
cd various_pipelines
```

List available verified sources to see their names and descriptions:
List available sources to see their names and descriptions:

```sh
dlt init --list-verified-sources
dlt init --list-sources
```

Now pick one of the source names, for example `pipedrive` and a destination i.e. `bigquery`:
Expand Down
4 changes: 2 additions & 2 deletions tests/cli/common/test_cli_invoke.py
Original file line number Diff line number Diff line change
Expand Up @@ -106,9 +106,9 @@ def test_invoke_init_chess_and_template(script_runner: ScriptRunner) -> None:
assert result.returncode == 0


def test_invoke_list_verified_sources(script_runner: ScriptRunner) -> None:
def test_invoke_list_sources(script_runner: ScriptRunner) -> None:
known_sources = ["chess", "sql_database", "google_sheets", "pipedrive"]
result = script_runner.run(["dlt", "init", "--list-verified-sources"])
result = script_runner.run(["dlt", "init", "--list-sources"])
assert result.returncode == 0
for known_source in known_sources:
assert known_source in result.stdout
Expand Down
4 changes: 2 additions & 2 deletions tests/cli/common/test_telemetry_command.py
Original file line number Diff line number Diff line change
Expand Up @@ -132,7 +132,7 @@ def instrument_raises_2(in_raises_2: bool) -> int:
def test_instrumentation_wrappers() -> None:
from dlt.cli._dlt import (
init_command_wrapper,
list_verified_sources_command_wrapper,
list_sources_command_wrapper,
DEFAULT_VERIFIED_SOURCES_REPO,
pipeline_command_wrapper,
deploy_command_wrapper,
Expand All @@ -155,7 +155,7 @@ def test_instrumentation_wrappers() -> None:
assert msg["properties"]["success"] is False

SENT_ITEMS.clear()
list_verified_sources_command_wrapper(DEFAULT_VERIFIED_SOURCES_REPO, None)
list_sources_command_wrapper(DEFAULT_VERIFIED_SOURCES_REPO, None)
msg = SENT_ITEMS[0]
assert msg["event"] == "command_list_sources"

Expand Down
Loading

0 comments on commit f30fff5

Please sign in to comment.