diff --git a/big_scape/comparison/binning.py b/big_scape/comparison/binning.py index 04698288..8f27eda2 100644 --- a/big_scape/comparison/binning.py +++ b/big_scape/comparison/binning.py @@ -69,6 +69,8 @@ def generate_pairs(self, legacy_sorting=False) -> Generator[RecordPair, None, No Generator[RegionPair]: Generator for Region pairs in this bin """ for record_a, record_b in combinations(self.source_records, 2): + if record_a.parent_gbk == record_b.parent_gbk: + continue if legacy_sorting: sorted_a, sorted_b = sorted((record_a, record_b), key=sort_name_key) pair = RecordPair(sorted_a, sorted_b) @@ -289,14 +291,16 @@ def generate_pairs(self, legacy_sorting=False) -> Generator[RecordPair, None, No continue self.done_record_ids.add(region._db_id) - for bgc_a in connected_reference_regions: - for bgc_b in singleton_reference_regions: + for region_a in connected_reference_regions: + for region_b in singleton_reference_regions: + if region_a.parent_gbk == region_b.parent_gbk: + continue if legacy_sorting: - sorted_a, sorted_b = sorted((bgc_a, bgc_b), key=sort_name_key) + sorted_a, sorted_b = sorted((region_a, region_b), key=sort_name_key) pair = RecordPair(sorted_a, sorted_b) else: - pair = RecordPair(bgc_a, bgc_b) + pair = RecordPair(region_a, region_b) yield pair @@ -571,6 +575,9 @@ def generate_pairs(self, legacy_sorting=False) -> Generator[RecordPair, None, No record_a = self.record_id_to_obj[record_a_id] record_b = self.record_id_to_obj[record_b_id] + if record_a.parent_gbk == record_b.parent_gbk: + continue + if legacy_sorting: sorted_a, sorted_b = sorted((record_a, record_b), key=sort_name_key) pair = RecordPair(sorted_a, sorted_b) @@ -630,6 +637,10 @@ def generate_pairs(self, legacy_sorting=False) -> Generator[RecordPair, None, No existing_distances = set(DB.execute(select_statement).fetchall()) for pair in self.bin.generate_pairs(legacy_sorting): + # this should never happen, since no edges should be generated for + # records of the same parent gbk + if pair.record_a.parent_gbk == pair.record_b.parent_gbk: + continue # if the pair is not in the set of existing distances, yield it if ( pair.record_a._db_id, diff --git a/big_scape/comparison/legacy_workflow_alt.py b/big_scape/comparison/legacy_workflow_alt.py index 93d0d5c2..f3d484d3 100644 --- a/big_scape/comparison/legacy_workflow_alt.py +++ b/big_scape/comparison/legacy_workflow_alt.py @@ -313,30 +313,6 @@ def calculate_scores_pair( for pair in pairs: logging.debug(pair) pair.comparable_region.log_comparable_region() - logging.debug("") - if pair.record_a.parent_gbk == pair.record_b.parent_gbk: - results.append( - ( - pair.record_a._db_id, - pair.record_b._db_id, - 0.0, - 1.0, - 1.0, - 1.0, - edge_param_id, - pair.comparable_region.lcs_a_start, - pair.comparable_region.lcs_a_stop, - pair.comparable_region.lcs_b_start, - pair.comparable_region.lcs_b_stop, - pair.comparable_region.a_start, - pair.comparable_region.a_stop, - pair.comparable_region.b_start, - pair.comparable_region.b_stop, - pair.comparable_region.reverse, - ) - ) - continue - jaccard = calc_jaccard_pair(pair) if jaccard == 0.0: diff --git a/big_scape/output/html_template/output/html_content/js/bigscape.js b/big_scape/output/html_template/output/html_content/js/bigscape.js index 9947fe2e..a3c7470d 100644 --- a/big_scape/output/html_template/output/html_content/js/bigscape.js +++ b/big_scape/output/html_template/output/html_content/js/bigscape.js @@ -327,12 +327,18 @@ function Bigscape(run_data, bs_data, bs_families, bs_alignment, bs_similarity, n } for (var a = 0; a < bs_data.length; a++) { for (var b = 0; b < bs_data.length; b++) { - if ((a > b) && (bs_similarity[a][b] > intra_cutoff)) { - if ((bs_to_cl[a] !== bs_to_cl[b]) && (bs_similarity[a][b] < inter_cutoff)) { - continue; + // addind a topo link + if (bs_data[a]["hash"] == bs_data[b]["hash"]) { + graph.addLink(a, b, { weight: 0.01 }); + } + else{ + if ((a > b) && (bs_similarity[a][b] > intra_cutoff)) { + if ((bs_to_cl[a] !== bs_to_cl[b]) && (bs_similarity[a][b] < inter_cutoff)) { + continue; + } + var weight = bs_similarity[a][b]; + graph.addLink(a, b, { weight: weight }); } - var weight = bs_similarity[a][b]; - graph.addLink(a, b, { weight: weight }); } } } @@ -450,7 +456,7 @@ function Bigscape(run_data, bs_data, bs_families, bs_alignment, bs_similarity, n .attr("stroke-width", link["data"]["weight"] * 10); if (graph.getNode(link.fromId).data.hash === graph.getNode(link.toId).data.hash) { - line = line.attr("stroke-dasharray", "10,10") + line = line.attr("stroke-dasharray", "10,10").attr("stroke-width", link["data"]["weight"] * 500); } return line }); diff --git a/test/comparison/test_binning.py b/test/comparison/test_binning.py index 5e75f91c..611e167a 100644 --- a/test/comparison/test_binning.py +++ b/test/comparison/test_binning.py @@ -169,11 +169,11 @@ def test_num_pairs_correct_with_query_ref(self): def test_legacy_sorting(self): """Tests whether the legacy sorting option in bin.pairs() correctly orders the pairs""" - gbk_a = GBK(Path("test1.gbk"), "test", "test") + gbk_a = GBK(Path("test1.gbk"), "test1", "test") bgc_a = BGCRecord(gbk_a, 0, 0, 10, False, "") - gbk_b = GBK(Path("test2.gbk"), "test", "test") + gbk_b = GBK(Path("test2.gbk"), "test2", "test") bgc_b = BGCRecord(gbk_b, 0, 0, 10, False, "") - gbk_c = GBK(Path("test3.gbk"), "test", "test") + gbk_c = GBK(Path("test3.gbk"), "test3", "test") bgc_c = BGCRecord(gbk_c, 0, 0, 10, False, "") # due to the order, this should generate a list of pairs as follows without legacy sort: @@ -734,16 +734,18 @@ def test_cull_singletons_cutoff(self): class TestMixComparison(TestCase): def test_mix_iter(self): """Tests whether a new mix bin can be created for comparison""" - gbk = GBK(Path("test"), "test", source_type=bs_enums.SOURCE_TYPE.QUERY) + gbk1 = GBK(Path("test"), "test1", source_type=bs_enums.SOURCE_TYPE.QUERY) + gbk2 = GBK(Path("test"), "test2", source_type=bs_enums.SOURCE_TYPE.QUERY) + gbk3 = GBK(Path("test"), "test3", source_type=bs_enums.SOURCE_TYPE.QUERY) - bgc_a = BGCRecord(gbk, 0, 0, 10, False, "") - bgc_a.parent_gbk = gbk + bgc_a = BGCRecord(gbk1, 0, 0, 10, False, "") + bgc_a.parent_gbk = gbk1 - bgc_b = BGCRecord(gbk, 0, 0, 10, False, "") - bgc_b.parent_gbk = gbk + bgc_b = BGCRecord(gbk2, 0, 0, 10, False, "") + bgc_b.parent_gbk = gbk2 - bgc_c = BGCRecord(gbk, 0, 0, 10, False, "") - bgc_c.parent_gbk = gbk + bgc_c = BGCRecord(gbk3, 0, 0, 10, False, "") + bgc_c.parent_gbk = gbk3 bgc_list = [bgc_a, bgc_b, bgc_c]