Skip to content

Commit

Permalink
Merge pull request #153 from ufal/download-dataset-cli
Browse files Browse the repository at this point in the history
Download dataset cli
  • Loading branch information
kasnerz authored Nov 13, 2024
2 parents 3232bde + 69cf3a3 commit e21997f
Showing 1 changed file with 59 additions and 22 deletions.
81 changes: 59 additions & 22 deletions factgenie/bin/run.py
Original file line number Diff line number Diff line change
@@ -1,19 +1,16 @@
#!/usr/bin/env python3

# The cli is CLI entry point.
# The run.py module is CLI entry point.
# The local imports in individual functions make CLI way faster.
# Use them as much as possible and minimize imports at the top of the file.
import click
import argparse
import yaml
from flask.cli import FlaskGroup
from factgenie.campaigns import CampaignMode
from factgenie.app import app
from factgenie import ROOT_DIR, MAIN_CONFIG_PATH, CAMPAIGN_DIR, INPUT_DIR, OUTPUT_DIR
from factgenie.campaigns import CampaignMode # required because of the click args choices


def list_datasets(app):
"""List all available datasets."""
"""List locally available datasets."""
from factgenie.workflows import get_local_dataset_overview

dataset_overview = get_local_dataset_overview(app)
Expand All @@ -22,6 +19,22 @@ def list_datasets(app):
print(dataset_id)


def list_downloadable(app):
from factgenie import workflows, utils

datasets = workflows.get_local_dataset_overview(app)

resources = utils.load_resources_config()

# set as `downloaded` the datasets that are already downloaded
for dataset_id in resources.keys():
resources[dataset_id]["downloaded"] = dataset_id in datasets

for dataset_id, dataset_info in resources.items():
print(f"{dataset_id} - downloaded: {dataset_info['downloaded']}")



def list_outputs(app):
"""List all available outputs."""
from factgenie.workflows import get_model_outputs_overview
Expand Down Expand Up @@ -60,6 +73,22 @@ def list_campaigns(app):
print(campaign_id)


@app.cli.command("list")
@click.argument("output", type=click.Choice(["datasets", "outputs", "campaigns", "downloadable"]))
def list_data(output: str):
"""List available data."""
if output == "datasets":
list_datasets(app)
elif output == "outputs":
list_outputs(app)
elif output == "campaigns":
list_campaigns(app)
elif output == "downloadable":
list_downloadable(app)
else:
click.echo(list_data.get_help(click.Context(list_data)))


def show_dataset_info(app, dataset_id: str):
"""Show information about a dataset."""

Expand Down Expand Up @@ -90,18 +119,6 @@ def show_campaign_info(app, campaign_id: str):
pp({"metadata": campaign.metadata, "stats": campaign.get_stats()})


@app.cli.command("list")
@click.argument("output", type=click.Choice(["datasets", "outputs", "campaigns"]))
def list_data(output: str):
"""List available data."""
if output == "datasets":
list_datasets(app)
elif output == "outputs":
list_outputs(app)
elif output == "campaigns":
list_campaigns(app)


@app.cli.command("info")
@click.option("-d", "--dataset", type=str, help="Show information about a dataset.")
@click.option("-c", "--campaign", type=str, help="Show information about a campaign.")
Expand All @@ -111,6 +128,28 @@ def info(dataset: str, campaign: str):
show_dataset_info(app, dataset)
elif campaign:
show_campaign_info(app, campaign)
else:
click.echo(info.get_help(click.Context(info)))


@app.cli.command("download")
@click.option(
"-d",
"--dataset_id",
type=str,
help=(
"Download dataset input data. "
"Factgenie does not use references so the inputs define the datasets. "
"If the dataset class defines model outputs and annotations we download them too."
),
)
def download_data(dataset_id: str):
import factgenie.workflows as workflows

if dataset_id:
workflows.download_dataset(app, dataset_id)
else:
click.echo(info.get_help(click.Context(info)))


@app.cli.command("create_llm_campaign")
Expand Down Expand Up @@ -205,10 +244,7 @@ def create_llm_campaign(


@app.cli.command("run_llm_campaign")
@click.argument(
"campaign_id",
type=str,
)
@click.argument("campaign_id", type=str)
def run_llm_campaign(campaign_id: str):
from factgenie.models import ModelFactory
from factgenie import llm_campaign
Expand Down Expand Up @@ -250,6 +286,7 @@ def create_app(**kwargs):
import factgenie.workflows as workflows
from apscheduler.schedulers.background import BackgroundScheduler
from factgenie.utils import check_login
from factgenie import ROOT_DIR, MAIN_CONFIG_PATH, CAMPAIGN_DIR, INPUT_DIR, OUTPUT_DIR

logger = logging.getLogger(__name__)

Expand Down

0 comments on commit e21997f

Please sign in to comment.