Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Converted manifest to single list of files #317

Merged
merged 2 commits into from
Nov 19, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
158 changes: 66 additions & 92 deletions cumulus_library/actions/builder.py
Original file line number Diff line number Diff line change
Expand Up @@ -135,60 +135,14 @@ def run_protected_table_builder(
)


def run_table_builder(
config: base_utils.StudyConfig,
manifest: study_manifest.StudyManifest,
*,
db_parser: databases.DatabaseParser = None,
def _run_workflow(
config: base_utils.StudyConfig, manifest: study_manifest.StudyManifest, filename: str
) -> None:
"""Loads modules from a manifest and executes code via BaseTableBuilder
"""Loads workflow config from toml definitions and executes workflow

:param config: a StudyConfig object
:param manifest: a StudyManifest object
:keyword db_parser: an object implementing DatabaseParser for the target database
"""
for file in manifest.get_table_builder_file_list():
_load_and_execute_builder(
config=config,
manifest=manifest,
filename=file,
db_parser=db_parser,
)


def run_counts_builders(
config: base_utils.StudyConfig,
manifest: study_manifest.StudyManifest,
) -> None:
"""Loads counts modules from a manifest and executes code via BaseTableBuilder

While a count is a form of statistics, it is treated separately from other
statistics because it is, by design, always going to be static against a
given dataset, where other statistical methods may use sampling techniques
or adjustable input parameters that may need to be preserved for later review.

:param config: a StudyConfig object
:param manifest: a StudyManifest object
"""
for file in manifest.get_counts_builder_file_list():
_load_and_execute_builder(
config=config,
manifest=manifest,
filename=file,
)


def run_statistics_builders(
config: base_utils.StudyConfig,
manifest: study_manifest.StudyManifest,
) -> None:
"""Loads statistics modules from toml definitions and executes

:param config: a StudyConfig object
:param manifest: a StudyManifest object
"""
if len(manifest.get_statistics_file_list()) == 0:
return
existing_stats = []
if not config.stats_build:
existing_stats = (
Expand All @@ -199,40 +153,41 @@ def run_statistics_builders(
)
.fetchall()
)
for file in manifest.get_statistics_file_list():
# This open is a bit redundant with the open inside of the PSM builder,
# but we're letting it slide so that builders function similarly
# across the board
safe_timestamp = base_utils.get_tablename_safe_iso_timestamp()
toml_path = pathlib.Path(f"{manifest._study_path}/{file}")
with open(toml_path, "rb") as file:
stats_config = tomllib.load(file)
config_type = stats_config["config_type"]
target_table = stats_config.get("target_table", stats_config.get("table_prefix", ""))

if (target_table,) in existing_stats and not config.stats_build:
continue
if config_type == "psm":
# This open is a bit redundant with the open inside of the PSM builder,
# but we're letting it slide so that builders function similarly
# across the board
safe_timestamp = base_utils.get_tablename_safe_iso_timestamp()
toml_path = pathlib.Path(f"{manifest._study_path}/{filename}")
with open(toml_path, "rb") as file:
workflow_config = tomllib.load(file)
config_type = workflow_config["config_type"]
target_table = workflow_config.get("target_table", workflow_config.get("table_prefix", ""))

if (target_table,) in existing_stats and not config.stats_build:
return
match config_type:
case "psm":
builder = psm_builder.PsmBuilder(
toml_config_path=toml_path,
config=stats_config,
config=workflow_config,
data_path=manifest.data_path / f"{manifest.get_study_prefix()}/psm",
)
elif config_type == "valueset":
case "valueset":
builder = valueset_builder.ValuesetBuilder(
toml_config_path=toml_path,
config=stats_config,
config=workflow_config,
data_path=manifest.data_path / f"{manifest.get_study_prefix()}/valueset",
)
else:
raise errors.StudyManifestParsingError( # pragma: no cover
f"{toml_path} references an invalid statistics type {config_type}."
case _: # pragma: no cover
raise errors.StudyManifestParsingError(
f"{toml_path} references an invalid workflow type {config_type}."
)
builder.execute_queries(
config=config,
manifest=manifest,
table_suffix=safe_timestamp,
)
builder.execute_queries(
config=config,
manifest=manifest,
table_suffix=safe_timestamp,
)
if config_type in set(item.value for item in enums.StatisticsTypes):
log_utils.log_statistics(
config=config,
manifest=manifest,
Expand All @@ -242,11 +197,11 @@ def run_statistics_builders(
)


def run_matching_table_builder(
def build_matching_files(
config: base_utils.StudyConfig,
manifest: study_manifest.StudyManifest,
*,
builder: str,
builder: str | None,
db_parser: databases.DatabaseParser = None,
):
"""targets all table builders matching a target string for running
Expand All @@ -256,36 +211,55 @@ def run_matching_table_builder(
:keyword builder: filename of a module implementing a TableBuilder
:keyword db_parser: an object implementing DatabaseParser for the target database"""
all_generators = manifest.get_all_generators()
for file in all_generators:
if builder and file.find(builder) == -1:
continue
_load_and_execute_builder(
config=config,
manifest=manifest,
filename=file,
db_parser=db_parser,
)
matches = []
if not builder: # pragma: no cover
matches = all_generators
else:
for file in all_generators:
if file.find(builder) != -1:
matches.append(file)
build_study(config, manifest, db_parser=db_parser, file_list=matches)


def build_study(
config: base_utils.StudyConfig,
manifest: study_manifest.StudyManifest,
*,
db_parser: databases.DatabaseParser = None,
continue_from: str | None = None,
file_list: list | None = None,
) -> list:
"""Creates tables in the schema by iterating through the sql_config.file_names

:param config: a StudyConfig object
:param manifest: a StudyManifest object
:keyword continue_from: Name of a sql file to resume table creation from
:keyword continue_from: Name of a file to resume table creation from
:returns: loaded queries (for unit testing only)
"""
if file_list is None:
file_list = manifest.get_file_list(continue_from)
for file in file_list:
if file.endswith(".py"):
_load_and_execute_builder(
config=config,
manifest=manifest,
filename=file,
db_parser=db_parser,
)
elif file.endswith(".toml"):
_run_workflow(config=config, manifest=manifest, filename=file)
elif file.endswith(".sql"):
_run_raw_queries(config=config, manifest=manifest, filename=file)
else:
raise errors.StudyManifestParsingError


def _run_raw_queries(
config: base_utils.StudyConfig, manifest: study_manifest.StudyManifest, filename: str
):
queries = []
for file in manifest.get_sql_file_list(continue_from):
for query in base_utils.parse_sql(base_utils.load_text(f"{manifest._study_path}/{file}")):
queries.append([query, file])
if len(queries) == 0:
return []
for query in base_utils.parse_sql(base_utils.load_text(f"{manifest._study_path}/{filename}")):
queries.append([query, filename])
for query in queries:
query[0] = base_utils.update_query_if_schema_specified(query[0], manifest)
query[0] = query[0].replace(
Expand All @@ -298,7 +272,7 @@ def build_study(
# We want to only show a progress bar if we are :not: printing SQL lines
with base_utils.get_progress_bar(disable=config.verbose) as progress:
task = progress.add_task(
f"Creating {manifest.get_study_prefix()} study in db...",
f"Building tables from {filename}...",
total=len(queries),
visible=not config.verbose,
)
Expand Down
34 changes: 25 additions & 9 deletions cumulus_library/builders/protected_table_builder.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,8 @@
"""Builder for creating tables for tracking state/logging changes"""

import pathlib
import tomllib

from cumulus_library import (
BaseTableBuilder,
base_utils,
Expand Down Expand Up @@ -64,12 +67,25 @@ def prepare_queries(
TRANSACTION_COLS_TYPES,
)
)
if manifest._study_config.get("statistics_config"):
self.queries.append(
base_templates.get_ctas_empty_query(
db_schema,
statistics,
STATISTICS_COLS,
STATISTICS_COLS_TYPES,
)
)
files = manifest.get_all_workflows()
if len(files) == 0:
return
dogversioning marked this conversation as resolved.
Show resolved Hide resolved
stats_types = set(item.value for item in enums.StatisticsTypes)
# In this loop, we are just checking to see if :any: workflow is a stats
# type workflow - if so, we'll create a table to hold data of stats runs
# (if it doesn't already exist) outside of the study lifecycle for
# persistence reasons
for file in files:
toml_path = pathlib.Path(f"{manifest._study_path}/{file}")
with open(toml_path, "rb") as file:
workflow_config = tomllib.load(file)
if workflow_config["config_type"] in stats_types:
self.queries.append(
base_templates.get_ctas_empty_query(
db_schema,
statistics,
STATISTICS_COLS,
STATISTICS_COLS_TYPES,
)
)
return
12 changes: 3 additions & 9 deletions cumulus_library/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -113,7 +113,6 @@ def clean_and_build_study(
config=self.get_config(manifest),
manifest=manifest,
)
builder.run_table_builder(config=self.get_config(manifest), manifest=manifest)

else:
log_utils.log_transaction(
Expand All @@ -127,11 +126,6 @@ def clean_and_build_study(
manifest=manifest,
continue_from=continue_from,
)
builder.run_counts_builders(config=self.get_config(manifest), manifest=manifest)
builder.run_statistics_builders(
config=self.get_config(manifest),
manifest=manifest,
)
log_utils.log_transaction(
config=self.get_config(manifest),
manifest=manifest,
Expand All @@ -150,7 +144,7 @@ def clean_and_build_study(
)
raise e

def run_matching_table_builder(
def build_matching_files(
self,
target: pathlib.Path,
table_builder_name: str,
Expand All @@ -164,7 +158,7 @@ def run_matching_table_builder(
:param options: The dictionary of study-specific options
"""
manifest = study_manifest.StudyManifest(target, options=options)
builder.run_matching_table_builder(
builder.build_matching_files(
config=self.get_config(manifest),
manifest=manifest,
builder=table_builder_name,
Expand Down Expand Up @@ -330,7 +324,7 @@ def run_cli(args: dict):
elif args["action"] == "build":
for target in args["target"]:
if args["builder"]:
runner.run_matching_table_builder(
runner.build_matching_files(
study_dict[target], args["builder"], options=args["options"]
)
else:
Expand Down
6 changes: 6 additions & 0 deletions cumulus_library/enums.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,12 @@ class ProtectedTables(enum.Enum):
TRANSACTIONS = "lib_transactions"


class StatisticsTypes(enum.Enum):
"""A subset of workflows that create statistics sampling artifacts"""

PSM = "psm"


class LogStatuses(enum.Enum):
DEBUG = "debug"
ERROR = "error"
Expand Down
12 changes: 2 additions & 10 deletions cumulus_library/studies/core/manifest.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
study_prefix = "core"

[table_builder_config]
[file_config]
file_names = [
"builder_prereq_tables.py",
"builder_allergyintolerance.py",
Expand All @@ -9,17 +9,9 @@ file_names = [
"builder_encounter.py",
"builder_documentreference.py",
"builder_medicationrequest.py",
"builder_observation.py"
]

[sql_config]
file_names = [
"builder_observation.py",
"observation_type.sql",
"meta_date.sql",
]

[counts_builder_config]
file_names = [
"count_core.py"
]

Expand Down
2 changes: 1 addition & 1 deletion cumulus_library/studies/discovery/manifest.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
study_prefix = "discovery"

[table_builder_config]
[file_config]
file_names = [
"code_detection.py",
]
Expand Down
Loading
Loading