Skip to content

Commit

Permalink
Add option to split command that removes prefixes in filenames genera…
Browse files Browse the repository at this point in the history
…ted from field values
  • Loading branch information
kevinschaper committed Oct 3, 2024
1 parent 87c34b9 commit 8760f51
Show file tree
Hide file tree
Showing 2 changed files with 6 additions and 1 deletion.
4 changes: 4 additions & 0 deletions src/koza/cli_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -130,6 +130,7 @@ def _check_row_count(type: Literal["node", "edge"]):
def split_file(file: str,
fields: str,
format: OutputFormat = OutputFormat.tsv,
remove_prefixes: bool = False,
output_dir: str = "./output"):
db = duckdb.connect(":memory:")

Expand All @@ -146,6 +147,9 @@ def split_file(file: str,
list_of_value_dicts = [dict(zip(keys, v)) for v in values]

def clean_value_for_filename(value):
if remove_prefixes and ':' in value:
value = value.split(":")[-1]

return value.replace("biolink:", "").replace(" ", "_").replace(":", "_")

def generate_filename_from_row(row):
Expand Down
3 changes: 2 additions & 1 deletion src/koza/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,10 +69,11 @@ def validate(
def split(
file: str = typer.Argument(..., help="Path to the source kgx file to be split"),
fields: str = typer.Argument(..., help="Comma separated list of fields to split on"),
remove_prefixes: bool = typer.Option(False, help="Remove prefixes from the file names for values from the specified fields. (e.g, NCBIGene:9606 becomes 9606"),
output_dir: str = typer.Option(default="output", help="Path to output directory"),
):
"""Split a file by fields"""
split_file(file, fields, output_dir=output_dir)
split_file(file, fields,remove_prefixes=remove_prefixes, output_dir=output_dir)

if __name__ == "__main__":
typer_app()

0 comments on commit 8760f51

Please sign in to comment.