From e312d4632dc488b2227055ff6e4b62f8dd2a9a30 Mon Sep 17 00:00:00 2001 From: nlouwen Date: Tue, 3 Dec 2024 11:49:39 +0100 Subject: [PATCH 1/5] clear central on new longest lcs --- big_scape/comparison/lcs.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/big_scape/comparison/lcs.py b/big_scape/comparison/lcs.py index 492e52b6..adb4a199 100644 --- a/big_scape/comparison/lcs.py +++ b/big_scape/comparison/lcs.py @@ -287,9 +287,10 @@ def find_domain_lcs_region( # Length - # clear the list if it's not empty and the current match is longer + # clear the lists if it's not empty and the current match is longer if len(use_longest_list) > 0 and length > use_longest_list[0][1]: use_longest_list.clear() + use_central_list.clear() # add the match to the list if it's empty or the current match is equal length # or longer than the existing match @@ -523,9 +524,10 @@ def find_domain_lcs_protocluster( # Length - # clear the list if it's not empty and the current match is longer + # clear the lists if it's not empty and the current match is longer if len(use_longest_list) > 0 and length > use_longest_list[0][1]: use_longest_list.clear() + use_central_list.clear() # add the match to the list if it's empty or the current match is equal length # or longer than the existing match From efba643ad0886e57185341c2cd1df85bb3f3059d Mon Sep 17 00:00:00 2001 From: Catarina Loureiro Date: Thu, 5 Dec 2024 11:04:19 +0100 Subject: [PATCH 2/5] make datetime consistent accross output --- big_scape/cli/benchmark_cli.py | 6 +++--- big_scape/cli/cli_validations.py | 4 ++-- big_scape/cli/cluster_cli.py | 6 +++--- big_scape/cli/query_cli.py | 6 +++--- 4 files changed, 11 insertions(+), 11 deletions(-) diff --git a/big_scape/cli/benchmark_cli.py b/big_scape/cli/benchmark_cli.py index 14403cc5..7eac876d 100644 --- a/big_scape/cli/benchmark_cli.py +++ b/big_scape/cli/benchmark_cli.py @@ -52,12 +52,12 @@ def benchmark(ctx, *args, **kwargs): ctx.obj.update(ctx.params) ctx.obj["mode"] = "Benchmark" - # workflow validations - validate_output_paths(ctx) - # set start time and label set_start(ctx.obj) + # workflow validations + validate_output_paths(ctx) + # initialize logger init_logger(ctx.obj) init_logger_file(ctx.obj) diff --git a/big_scape/cli/cli_validations.py b/big_scape/cli/cli_validations.py index fd0bd365..958fde92 100644 --- a/big_scape/cli/cli_validations.py +++ b/big_scape/cli/cli_validations.py @@ -22,7 +22,7 @@ def set_start(param_dict) -> None: """get start time and set label in a run parameter dict""" start_time: datetime = datetime.now() - timestamp = start_time.strftime("%d-%m-%Y_%H-%M-%S") + timestamp = start_time.strftime("%Y-%m-%d_%H-%M-%S") if param_dict["label"]: param_dict["label"] = f"{param_dict['label']}_{timestamp}" else: @@ -116,7 +116,7 @@ def validate_output_dir(ctx, param, output_dir) -> Path: def validate_output_paths(ctx) -> None: """Sets the output paths to default output_dir if not provided""" - timestamp = time.strftime("%Y-%m-%d_%H-%M-%S", time.localtime()) + timestamp = ctx.obj["label"] if "db_path" in ctx.obj and ctx.obj["db_path"] is None: db_path = ctx.obj["output_dir"] / Path(f"{ctx.obj['output_dir'].name}.db") diff --git a/big_scape/cli/cluster_cli.py b/big_scape/cli/cluster_cli.py index 96153d69..4418186d 100644 --- a/big_scape/cli/cluster_cli.py +++ b/big_scape/cli/cluster_cli.py @@ -138,6 +138,9 @@ def cluster(ctx, *args, **kwargs): ctx.obj["propagate"] = True # compatibility with query wrt cc generation ctx.obj["mode"] = "Cluster" + # set start time and run label + set_start(ctx.obj) + # workflow validations validate_binning_cluster_workflow(ctx) validate_pfam_path(ctx) @@ -145,9 +148,6 @@ def cluster(ctx, *args, **kwargs): validate_output_paths(ctx) validate_disk_only(ctx) - # set start time and run label - set_start(ctx.obj) - # initialize logger init_logger(ctx.obj) init_logger_file(ctx.obj) diff --git a/big_scape/cli/query_cli.py b/big_scape/cli/query_cli.py index ed571d3f..5609f187 100644 --- a/big_scape/cli/query_cli.py +++ b/big_scape/cli/query_cli.py @@ -99,6 +99,9 @@ def query(ctx, *args, **kwarg): ctx.obj["exclude_classes"] = None ctx.obj["include_classes"] = None + # set start time and label + set_start(ctx.obj) + # workflow validations validate_pfam_path(ctx) validate_output_paths(ctx) @@ -106,9 +109,6 @@ def query(ctx, *args, **kwarg): validate_query_record(ctx) validate_disk_only(ctx) - # set start time and label - set_start(ctx.obj) - # initialize logger init_logger(ctx.obj) init_logger_file(ctx.obj) From 56885515d9310e4793b1e70373cba8db457c9c45 Mon Sep 17 00:00:00 2001 From: Catarina Loureiro Date: Thu, 5 Dec 2024 11:08:18 +0100 Subject: [PATCH 3/5] only add edges with distance < 1 to the full network table --- big_scape/output/legacy_output.py | 1 + 1 file changed, 1 insertion(+) diff --git a/big_scape/output/legacy_output.py b/big_scape/output/legacy_output.py index e5a7c050..d34fee8f 100644 --- a/big_scape/output/legacy_output.py +++ b/big_scape/output/legacy_output.py @@ -638,6 +638,7 @@ def write_network_file( .where(edge_params_table.c.weights.in_(incl_weights)) .where(edge_params_table.c.alignment_mode == aln_mode) .where(edge_params_table.c.extend_strategy == ext_strat) + .where(distance_table.c.distance < 1) ) if cutoff is not None: From 16b51be0208c81621992d3841cf3f83a0aa49a6d Mon Sep 17 00:00:00 2001 From: Catarina Loureiro Date: Thu, 5 Dec 2024 11:19:06 +0100 Subject: [PATCH 4/5] removed unused import --- big_scape/cli/cli_validations.py | 1 - 1 file changed, 1 deletion(-) diff --git a/big_scape/cli/cli_validations.py b/big_scape/cli/cli_validations.py index 958fde92..1e6c1915 100644 --- a/big_scape/cli/cli_validations.py +++ b/big_scape/cli/cli_validations.py @@ -7,7 +7,6 @@ from pathlib import Path from typing import Optional import os -import time import platform # from other modules From 29ea5a2d912df332723e231c53ec51b93239abd5 Mon Sep 17 00:00:00 2001 From: nlouwen Date: Thu, 5 Dec 2024 12:04:33 +0100 Subject: [PATCH 5/5] expand docs --- big_scape/comparison/lcs.py | 8 ++++++-- big_scape/output/legacy_output.py | 4 +++- 2 files changed, 9 insertions(+), 3 deletions(-) diff --git a/big_scape/comparison/lcs.py b/big_scape/comparison/lcs.py index adb4a199..82204720 100644 --- a/big_scape/comparison/lcs.py +++ b/big_scape/comparison/lcs.py @@ -287,7 +287,9 @@ def find_domain_lcs_region( # Length - # clear the lists if it's not empty and the current match is longer + # clear the longest list if it's not empty and the current match is longer. + # also clear the central list as we do not care about a shorter LCS even if it + # is more central if len(use_longest_list) > 0 and length > use_longest_list[0][1]: use_longest_list.clear() use_central_list.clear() @@ -524,7 +526,9 @@ def find_domain_lcs_protocluster( # Length - # clear the lists if it's not empty and the current match is longer + # clear the longest list if it's not empty and the current match is longer. + # also clear the central list as we do not care about a shorter LCS even if it + # is more central if len(use_longest_list) > 0 and length > use_longest_list[0][1]: use_longest_list.clear() use_central_list.clear() diff --git a/big_scape/output/legacy_output.py b/big_scape/output/legacy_output.py index d34fee8f..47287b64 100644 --- a/big_scape/output/legacy_output.py +++ b/big_scape/output/legacy_output.py @@ -638,11 +638,13 @@ def write_network_file( .where(edge_params_table.c.weights.in_(incl_weights)) .where(edge_params_table.c.alignment_mode == aln_mode) .where(edge_params_table.c.extend_strategy == ext_strat) - .where(distance_table.c.distance < 1) ) if cutoff is not None: select_statement = select_statement.where(distance_table.c.distance < cutoff) + else: + # still do not include edges with a distance of 1 + select_statement = select_statement.where(distance_table.c.distance < 1) edgelist = set(DB.execute(select_statement).fetchall())