Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Make CLI lazy load #2145

Closed
wants to merge 6 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
721 changes: 28 additions & 693 deletions metaflow/cli.py

Large diffs are not rendered by default.

Empty file.
96 changes: 96 additions & 0 deletions metaflow/cli_components/dump_cmd.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,96 @@
import pickle

from metaflow._vendor import click

from ..cli import echo_always, echo_dev_null
from ..datastore import TaskDataStoreSet
from ..exception import CommandException


@click.command(
help="Get data artifacts of a task or all tasks in a step. "
"The format for input-path is either <run_id>/<step_name> or "
"<run_id>/<step_name>/<task_id>."
)
@click.argument("input-path")
@click.option(
"--private/--no-private",
default=False,
show_default=True,
help="Show also private attributes.",
)
@click.option(
"--max-value-size",
default=1000,
show_default=True,
type=int,
help="Show only values that are smaller than this number. "
"Set to 0 to see only keys.",
)
@click.option(
"--include",
type=str,
default="",
help="Include only artifacts in the given comma-separated list.",
)
@click.option(
"--file", type=str, default=None, help="Serialize artifacts in the given file."
)
@click.pass_obj
def dump(obj, input_path, private=None, max_value_size=None, include=None, file=None):

if obj.is_quiet:
echo = echo_dev_null
else:
echo = echo_always

output = {}
kwargs = {
"show_private": private,
"max_value_size": max_value_size,
"include": {t for t in include.split(",") if t},
}

# Pathspec can either be run_id/step_name or run_id/step_name/task_id.
parts = input_path.split("/")
if len(parts) == 2:
run_id, step_name = parts
task_id = None
elif len(parts) == 3:
run_id, step_name, task_id = parts
else:
raise CommandException(
"input_path should either be run_id/step_name or run_id/step_name/task_id"
)

datastore_set = TaskDataStoreSet(
obj.flow_datastore,
run_id,
steps=[step_name],
prefetch_data_artifacts=kwargs.get("include"),
)
if task_id:
ds_list = [datastore_set.get_with_pathspec(input_path)]
else:
ds_list = list(datastore_set) # get all tasks

for ds in ds_list:
echo(
"Dumping output of run_id=*{run_id}* "
"step=*{step}* task_id=*{task_id}*".format(
run_id=ds.run_id, step=ds.step_name, task_id=ds.task_id
),
fg="magenta",
)

if file is None:
echo_always(
ds.format(**kwargs), highlight="green", highlight_bold=False, err=False
)
else:
output[ds.pathspec] = ds.to_dict(**kwargs)

if file is not None:
with open(file, "wb") as f:
pickle.dump(output, f, protocol=pickle.HIGHEST_PROTOCOL)
echo("Artifacts written to *%s*" % file)
51 changes: 51 additions & 0 deletions metaflow/cli_components/init_cmd.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
from metaflow._vendor import click

from .. import parameters
from ..runtime import NativeRuntime


@parameters.add_custom_parameters(deploy_mode=False)
@click.command(help="Internal command to initialize a run.", hidden=True)
@click.option(
"--run-id",
default=None,
required=True,
help="ID for one execution of all steps in the flow.",
)
@click.option(
"--task-id", default=None, required=True, help="ID for this instance of the step."
)
@click.option(
"--tag",
"tags",
multiple=True,
default=None,
help="Tags for this instance of the step.",
)
@click.pass_obj
def init(obj, run_id=None, task_id=None, tags=None, **kwargs):
# init is a separate command instead of an option in 'step'
# since we need to capture user-specified parameters with
# @add_custom_parameters. Adding custom parameters to 'step'
# is not desirable due to the possibility of name clashes between
# user-specified parameters and our internal options. Note that
# user-specified parameters are often defined as environment
# variables.

obj.metadata.add_sticky_tags(tags=tags)

runtime = NativeRuntime(
obj.flow,
obj.graph,
obj.flow_datastore,
obj.metadata,
obj.environment,
obj.package,
obj.logger,
obj.entrypoint,
obj.event_logger,
obj.monitor,
run_id=run_id,
)
obj.flow._set_constants(obj.graph, kwargs)
runtime.persist_constants(task_id=task_id)
Loading
Loading