diff --git a/dlt/cli/_dlt.py b/dlt/cli/_dlt.py index a19b4ed4a9..72db8fa250 100644 --- a/dlt/cli/_dlt.py +++ b/dlt/cli/_dlt.py @@ -16,7 +16,7 @@ from dlt.cli.init_command import ( init_command, - list_verified_sources_command, + list_sources_command, DLT_INIT_DOCS_URL, DEFAULT_VERIFIED_SOURCES_REPO, ) @@ -75,9 +75,9 @@ def init_command_wrapper( @utils.track_command("list_sources", False) -def list_verified_sources_command_wrapper(repo_location: str, branch: str) -> int: +def list_sources_command_wrapper(repo_location: str, branch: str) -> int: try: - list_verified_sources_command(repo_location, branch) + list_sources_command(repo_location, branch) except Exception as ex: on_exception(ex, DLT_INIT_DOCS_URL) return -1 @@ -314,11 +314,11 @@ def main() -> int: ), ) init_cmd.add_argument( - "--list-verified-sources", + "--list-sources", "-l", default=False, action="store_true", - help="List available verified sources", + help="List available sources", ) init_cmd.add_argument( "source", @@ -606,8 +606,8 @@ def main() -> int: del command_kwargs["list_pipelines"] return pipeline_command_wrapper(**command_kwargs) elif args.command == "init": - if args.list_verified_sources: - return list_verified_sources_command_wrapper(args.location, args.branch) + if args.list_sources: + return list_sources_command_wrapper(args.location, args.branch) else: if not args.source or not args.destination: init_cmd.print_usage() diff --git a/dlt/cli/init_command.py b/dlt/cli/init_command.py index 7f4e223186..777f66bcfc 100644 --- a/dlt/cli/init_command.py +++ b/dlt/cli/init_command.py @@ -46,11 +46,6 @@ DEFAULT_VERIFIED_SOURCES_REPO = "https://github.com/dlt-hub/verified-sources.git" INIT_MODULE_NAME = "init" SOURCES_MODULE_NAME = "sources" -SKIP_CORE_SOURCES_FOLDERS = [ - "helpers", - "init", - "rest_api", -] # TODO: remove rest api here once pipeline file is here def _get_template_files( @@ -136,6 +131,18 @@ def _get_dependency_system(dest_storage: FileStorage) -> str: return None +def _list_core_sources() -> Dict[str, SourceConfiguration]: + local_path = Path(os.path.dirname(os.path.realpath(__file__))).parent / SOURCES_MODULE_NAME + core_sources_storage = FileStorage(str(local_path)) + + sources: Dict[str, SourceConfiguration] = {} + for source_name in files_ops.get_sources_names(core_sources_storage, source_type="core"): + sources[source_name] = files_ops.get_core_source_configuration( + core_sources_storage, source_name + ) + return sources + + def _list_verified_sources( repo_location: str, branch: str = None ) -> Dict[str, SourceConfiguration]: @@ -143,9 +150,11 @@ def _list_verified_sources( sources_storage = FileStorage(clone_storage.make_full_path(SOURCES_MODULE_NAME)) sources: Dict[str, SourceConfiguration] = {} - for source_name in files_ops.get_verified_source_names(sources_storage): + for source_name in files_ops.get_sources_names(sources_storage, source_type="verified"): try: - sources[source_name] = files_ops.get_verified_source_files(sources_storage, source_name) + sources[source_name] = files_ops.get_verified_source_configuration( + sources_storage, source_name + ) except Exception as ex: fmt.warning(f"Verified source {source_name} not available: {ex}") @@ -228,14 +237,29 @@ def _welcome_message( ) -def list_verified_sources_command(repo_location: str, branch: str = None) -> None: - fmt.echo("Looking up for verified sources in %s..." % fmt.bold(repo_location)) +def list_sources_command(repo_location: str, branch: str = None) -> None: + fmt.echo("---") + fmt.echo("Available dlt core sources:") + fmt.echo("---") + core_sources = _list_core_sources() + for source_name, source_configuration in core_sources.items(): + msg = "%s: %s" % (fmt.bold(source_name), source_configuration.doc) + fmt.echo(msg) + + fmt.echo("---") + fmt.echo("Looking up verified sources at %s..." % fmt.bold(repo_location)) + fmt.echo("Available verified sources:") + fmt.echo("---") for source_name, source_configuration in _list_verified_sources(repo_location, branch).items(): reqs = source_configuration.requirements dlt_req_string = str(reqs.dlt_requirement_base) - msg = "%s: %s" % (fmt.bold(source_name), source_configuration.doc) + msg = "%s:" % (fmt.bold(source_name)) + if source_name in core_sources.keys(): + msg += " (Deprecated since dlt 1.0.0 in favor of core source of the same name) " + msg += source_configuration.doc if not reqs.is_installed_dlt_compatible(): msg += fmt.warning_style(" [needs update: %s]" % (dlt_req_string)) + fmt.echo(msg) @@ -253,15 +277,13 @@ def init_command( # lookup core sources local_path = Path(os.path.dirname(os.path.realpath(__file__))).parent / SOURCES_MODULE_NAME - local_sources_storage = FileStorage(str(local_path)) + core_sources_storage = FileStorage(str(local_path)) # discover type of source - source_type: files_ops.SOURCE_TYPE = "generic" + source_type: files_ops.TSourceType = "generic" if ( - local_sources_storage.has_folder(source_name) - and source_name not in SKIP_CORE_SOURCES_FOLDERS - and not omit_core_sources - ): + source_name in files_ops.get_sources_names(core_sources_storage, source_type="core") + ) and not omit_core_sources: source_type = "core" else: if omit_core_sources: @@ -269,8 +291,10 @@ def init_command( fmt.echo("Looking up verified sources at %s..." % fmt.bold(repo_location)) clone_storage = git.get_fresh_repo_files(repo_location, get_dlt_repos_dir(), branch=branch) # copy dlt source files from here - sources_storage = FileStorage(clone_storage.make_full_path(SOURCES_MODULE_NAME)) - if sources_storage.has_folder(source_name): + verified_sources_storage = FileStorage(clone_storage.make_full_path(SOURCES_MODULE_NAME)) + if source_name in files_ops.get_sources_names( + verified_sources_storage, source_type="verified" + ): source_type = "verified" # look up init storage in core @@ -302,7 +326,9 @@ def init_command( if source_type == "verified": # get pipeline files - source_configuration = files_ops.get_verified_source_files(sources_storage, source_name) + source_configuration = files_ops.get_verified_source_configuration( + verified_sources_storage, source_name + ) # get file index from remote verified source files being copied remote_index = files_ops.get_remote_source_index( source_configuration.storage.storage_path, @@ -337,18 +363,9 @@ def init_command( source_configuration.files.extend(template_files) else: - pipeline_dest_script = source_name + "_pipeline.py" - if source_type == "core": - source_configuration = SourceConfiguration( - source_type, - "dlt.sources." + source_name, - local_sources_storage, - source_name + "_pipeline.py", - pipeline_dest_script, - [".gitignore"], - SourceRequirements([]), - "", + source_configuration = files_ops.get_core_source_configuration( + core_sources_storage, source_name ) else: if not is_valid_schema_name(source_name): @@ -358,14 +375,17 @@ def init_command( "pipeline", init_storage, pipeline_script, - pipeline_dest_script, + source_name + "_pipeline.py", template_files, SourceRequirements([]), "", ) - if dest_storage.has_file(pipeline_dest_script): - fmt.warning("Pipeline script %s already exists, exiting" % pipeline_dest_script) + if dest_storage.has_file(source_configuration.dest_pipeline_script): + fmt.warning( + "Pipeline script %s already exists, exiting" + % source_configuration.dest_pipeline_script + ) return # add .dlt/*.toml files to be copied @@ -517,6 +537,10 @@ def init_command( "A source with the name %s was not found. Using a template to create a new source" " and pipeline with name %s." % (fmt.bold(source_name), fmt.bold(source_name)) ) + fmt.echo( + "In case you did not want to use a template, run 'dlt init -l' to see a list of" + " available sources." + ) if use_generic_template and source_configuration.source_type != "generic": fmt.warning("The --generic parameter is discarded if a source is found.") diff --git a/dlt/cli/pipeline_files.py b/dlt/cli/pipeline_files.py index 2b4448527e..0bb23ed7aa 100644 --- a/dlt/cli/pipeline_files.py +++ b/dlt/cli/pipeline_files.py @@ -16,16 +16,23 @@ from dlt.cli import utils from dlt.cli.requirements import SourceRequirements +TSourceType = Literal["core", "verified", "generic"] SOURCES_INIT_INFO_ENGINE_VERSION = 1 SOURCES_INIT_INFO_FILE = ".sources" IGNORE_FILES = ["*.py[cod]", "*$py.class", "__pycache__", "py.typed", "requirements.txt"] -IGNORE_SOURCES = [".*", "_*"] -SOURCE_TYPE = Literal["core", "verified", "generic"] +IGNORE_VERIFIED_SOURCES = [".*", "_*"] +IGNORE_CORE_SOURCES = [ + ".*", + "_*", + "helpers", + "init", + "rest_api", +] # TODO: remove rest api here once pipeline file is here class SourceConfiguration(NamedTuple): - source_type: SOURCE_TYPE + source_type: TSourceType source_module_prefix: str storage: FileStorage pipeline_script: str @@ -149,12 +156,13 @@ def get_remote_source_index( } -def get_verified_source_names(sources_storage: FileStorage) -> List[str]: +def get_sources_names(sources_storage: FileStorage, source_type: TSourceType) -> List[str]: candidates: List[str] = [] + ignore_cases = IGNORE_VERIFIED_SOURCES if source_type == "verified" else IGNORE_CORE_SOURCES for name in [ n for n in sources_storage.list_folder_dirs(".", to_root=False) - if not any(fnmatch.fnmatch(n, ignore) for ignore in IGNORE_SOURCES) + if not any(fnmatch.fnmatch(n, ignore) for ignore in ignore_cases) ]: # must contain at least one valid python script if any(f.endswith(".py") for f in sources_storage.list_folder_files(name, to_root=False)): @@ -162,7 +170,35 @@ def get_verified_source_names(sources_storage: FileStorage) -> List[str]: return candidates -def get_verified_source_files( +def _get_docstring_for_module(sources_storage: FileStorage, source_name: str) -> str: + # read the docs + init_py = os.path.join(source_name, utils.MODULE_INIT) + docstring: str = "" + if sources_storage.has_file(init_py): + docstring = get_module_docstring(sources_storage.load(init_py)) + if docstring: + docstring = docstring.splitlines()[0] + return docstring + + +def get_core_source_configuration( + sources_storage: FileStorage, source_name: str +) -> SourceConfiguration: + pipeline_file = source_name + "_pipeline.py" + + return SourceConfiguration( + "core", + "dlt.sources." + source_name, + sources_storage, + pipeline_file, + pipeline_file, + [".gitignore"], + SourceRequirements([]), + _get_docstring_for_module(sources_storage, source_name), + ) + + +def get_verified_source_configuration( sources_storage: FileStorage, source_name: str ) -> SourceConfiguration: if not sources_storage.has_folder(source_name): @@ -191,13 +227,6 @@ def get_verified_source_files( if all(not fnmatch.fnmatch(file, ignore) for ignore in IGNORE_FILES) ] ) - # read the docs - init_py = os.path.join(source_name, utils.MODULE_INIT) - docstring: str = "" - if sources_storage.has_file(init_py): - docstring = get_module_docstring(sources_storage.load(init_py)) - if docstring: - docstring = docstring.splitlines()[0] # read requirements requirements_path = os.path.join(source_name, utils.REQUIREMENTS_TXT) if sources_storage.has_file(requirements_path): @@ -213,7 +242,7 @@ def get_verified_source_files( example_script, files, requirements, - docstring, + _get_docstring_for_module(sources_storage, source_name), ) diff --git a/docs/website/docs/reference/command-line-interface.md b/docs/website/docs/reference/command-line-interface.md index 8e816fb622..693c068a4f 100644 --- a/docs/website/docs/reference/command-line-interface.md +++ b/docs/website/docs/reference/command-line-interface.md @@ -23,9 +23,9 @@ version if run again with existing `source` name. You are warned if files will b ### Specify your own "verified sources" repository. You can use `--location ` option to specify your own repository with sources. Typically you would [fork ours](https://github.com/dlt-hub/verified-sources) and start customizing and adding sources ie. to use them for your team or organization. You can also specify a branch with `--branch ` ie. to test a version being developed. -### List all verified sources +### List all sources ```sh -dlt init --list-verified-sources +dlt init --list-sources ``` Shows all available verified sources and their short descriptions. For each source, checks if your local `dlt` version requires update and prints the relevant warning. diff --git a/docs/website/docs/walkthroughs/add-a-verified-source.md b/docs/website/docs/walkthroughs/add-a-verified-source.md index d7cd24b544..144b805974 100644 --- a/docs/website/docs/walkthroughs/add-a-verified-source.md +++ b/docs/website/docs/walkthroughs/add-a-verified-source.md @@ -21,10 +21,10 @@ mkdir various_pipelines cd various_pipelines ``` -List available verified sources to see their names and descriptions: +List available sources to see their names and descriptions: ```sh -dlt init --list-verified-sources +dlt init --list-sources ``` Now pick one of the source names, for example `pipedrive` and a destination i.e. `bigquery`: diff --git a/tests/cli/common/test_cli_invoke.py b/tests/cli/common/test_cli_invoke.py index f856162479..0c6be1ea24 100644 --- a/tests/cli/common/test_cli_invoke.py +++ b/tests/cli/common/test_cli_invoke.py @@ -106,9 +106,9 @@ def test_invoke_init_chess_and_template(script_runner: ScriptRunner) -> None: assert result.returncode == 0 -def test_invoke_list_verified_sources(script_runner: ScriptRunner) -> None: +def test_invoke_list_sources(script_runner: ScriptRunner) -> None: known_sources = ["chess", "sql_database", "google_sheets", "pipedrive"] - result = script_runner.run(["dlt", "init", "--list-verified-sources"]) + result = script_runner.run(["dlt", "init", "--list-sources"]) assert result.returncode == 0 for known_source in known_sources: assert known_source in result.stdout diff --git a/tests/cli/common/test_telemetry_command.py b/tests/cli/common/test_telemetry_command.py index d2ccc81ebe..d2c1f958f2 100644 --- a/tests/cli/common/test_telemetry_command.py +++ b/tests/cli/common/test_telemetry_command.py @@ -132,7 +132,7 @@ def instrument_raises_2(in_raises_2: bool) -> int: def test_instrumentation_wrappers() -> None: from dlt.cli._dlt import ( init_command_wrapper, - list_verified_sources_command_wrapper, + list_sources_command_wrapper, DEFAULT_VERIFIED_SOURCES_REPO, pipeline_command_wrapper, deploy_command_wrapper, @@ -155,7 +155,7 @@ def test_instrumentation_wrappers() -> None: assert msg["properties"]["success"] is False SENT_ITEMS.clear() - list_verified_sources_command_wrapper(DEFAULT_VERIFIED_SOURCES_REPO, None) + list_sources_command_wrapper(DEFAULT_VERIFIED_SOURCES_REPO, None) msg = SENT_ITEMS[0] assert msg["event"] == "command_list_sources" diff --git a/tests/cli/test_init_command.py b/tests/cli/test_init_command.py index a69c9885c8..61ff08312d 100644 --- a/tests/cli/test_init_command.py +++ b/tests/cli/test_init_command.py @@ -57,7 +57,7 @@ def get_verified_source_candidates(repo_dir: str) -> List[str]: sources_storage = FileStorage(os.path.join(repo_dir, SOURCES_MODULE_NAME)) # enumerate all candidate verified sources - return files_ops.get_verified_source_names(sources_storage) + return files_ops.get_sources_names(sources_storage, source_type="verified") def test_init_command_pipeline_template(repo_dir: str, project_files: FileStorage) -> None: @@ -114,7 +114,18 @@ def test_init_command_chess_verified_source(repo_dir: str, project_files: FileSt raise -def test_init_list_verified_pipelines(repo_dir: str, project_files: FileStorage) -> None: +def test_list_helper_functions(repo_dir: str, project_files: FileStorage) -> None: + # see wether all core sources are found + sources = init_command._list_core_sources() + assert set(sources.keys()) == set(CORE_SOURCES) + + sources = init_command._list_verified_sources(repo_dir) + assert len(sources.keys()) > 10 + known_sources = ["chess", "sql_database", "google_sheets", "pipedrive"] + assert set(known_sources).issubset(set(sources.keys())) + + +def test_init_list_sources(repo_dir: str, project_files: FileStorage) -> None: sources = init_command._list_verified_sources(repo_dir) # a few known sources must be there known_sources = ["chess", "sql_database", "google_sheets", "pipedrive"] @@ -123,16 +134,14 @@ def test_init_list_verified_pipelines(repo_dir: str, project_files: FileStorage) for k_p in known_sources: assert sources[k_p].doc # run the command - init_command.list_verified_sources_command(repo_dir) + init_command.list_sources_command(repo_dir) -def test_init_list_verified_pipelines_update_warning( - repo_dir: str, project_files: FileStorage -) -> None: +def test_init_list_sources_update_warning(repo_dir: str, project_files: FileStorage) -> None: """Sources listed include a warning if a different dlt version is required""" with mock.patch.object(SourceRequirements, "current_dlt_version", return_value="0.0.1"): with io.StringIO() as buf, contextlib.redirect_stdout(buf): - init_command.list_verified_sources_command(repo_dir) + init_command.list_sources_command(repo_dir) _out = buf.getvalue() # Check one listed source @@ -241,7 +250,7 @@ def test_init_code_update_index_diff(repo_dir: str, project_files: FileStorage) sources_storage.save(new_file_path, new_content) sources_storage.delete(del_file_path) - source_files = files_ops.get_verified_source_files(sources_storage, "pipedrive") + source_files = files_ops.get_verified_source_configuration(sources_storage, "pipedrive") remote_index = files_ops.get_remote_source_index( sources_storage.storage_path, source_files.files, ">=0.3.5" ) @@ -287,7 +296,7 @@ def test_init_code_update_index_diff(repo_dir: str, project_files: FileStorage) mod_file_path_2 = os.path.join("pipedrive", "new_munger_X.py") sources_storage.save(mod_file_path_2, local_content) local_index = files_ops.load_verified_sources_local_index("pipedrive") - source_files = files_ops.get_verified_source_files(sources_storage, "pipedrive") + source_files = files_ops.get_verified_source_configuration(sources_storage, "pipedrive") remote_index = files_ops.get_remote_source_index( sources_storage.storage_path, source_files.files, ">=0.3.5" ) @@ -330,7 +339,7 @@ def test_init_code_update_index_diff(repo_dir: str, project_files: FileStorage) sources_storage.save(new_file_path, local_content) sources_storage.save(mod_file_path, local_content) project_files.delete(del_file_path) - source_files = files_ops.get_verified_source_files(sources_storage, "pipedrive") + source_files = files_ops.get_verified_source_configuration(sources_storage, "pipedrive") remote_index = files_ops.get_remote_source_index( sources_storage.storage_path, source_files.files, ">=0.3.5" ) @@ -343,7 +352,7 @@ def test_init_code_update_index_diff(repo_dir: str, project_files: FileStorage) # generate a conflict by deleting file locally that is modified on remote project_files.delete(mod_file_path) - source_files = files_ops.get_verified_source_files(sources_storage, "pipedrive") + source_files = files_ops.get_verified_source_configuration(sources_storage, "pipedrive") remote_index = files_ops.get_remote_source_index( sources_storage.storage_path, source_files.files, ">=0.3.5" )