Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

topo link generation #87

Merged
merged 1 commit into from
Nov 21, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 15 additions & 4 deletions big_scape/comparison/binning.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,8 @@ def generate_pairs(self, legacy_sorting=False) -> Generator[RecordPair, None, No
Generator[RegionPair]: Generator for Region pairs in this bin
"""
for record_a, record_b in combinations(self.source_records, 2):
if record_a.parent_gbk == record_b.parent_gbk:
continue
if legacy_sorting:
sorted_a, sorted_b = sorted((record_a, record_b), key=sort_name_key)
pair = RecordPair(sorted_a, sorted_b)
Expand Down Expand Up @@ -289,14 +291,16 @@ def generate_pairs(self, legacy_sorting=False) -> Generator[RecordPair, None, No
continue
self.done_record_ids.add(region._db_id)

for bgc_a in connected_reference_regions:
for bgc_b in singleton_reference_regions:
for region_a in connected_reference_regions:
for region_b in singleton_reference_regions:
if region_a.parent_gbk == region_b.parent_gbk:
continue
if legacy_sorting:
sorted_a, sorted_b = sorted((bgc_a, bgc_b), key=sort_name_key)
sorted_a, sorted_b = sorted((region_a, region_b), key=sort_name_key)
pair = RecordPair(sorted_a, sorted_b)

else:
pair = RecordPair(bgc_a, bgc_b)
pair = RecordPair(region_a, region_b)

yield pair

Expand Down Expand Up @@ -571,6 +575,9 @@ def generate_pairs(self, legacy_sorting=False) -> Generator[RecordPair, None, No
record_a = self.record_id_to_obj[record_a_id]
record_b = self.record_id_to_obj[record_b_id]

if record_a.parent_gbk == record_b.parent_gbk:
continue

if legacy_sorting:
sorted_a, sorted_b = sorted((record_a, record_b), key=sort_name_key)
pair = RecordPair(sorted_a, sorted_b)
Expand Down Expand Up @@ -630,6 +637,10 @@ def generate_pairs(self, legacy_sorting=False) -> Generator[RecordPair, None, No
existing_distances = set(DB.execute(select_statement).fetchall())

for pair in self.bin.generate_pairs(legacy_sorting):
# this should never happen, since no edges should be generated for
# records of the same parent gbk
if pair.record_a.parent_gbk == pair.record_b.parent_gbk:
continue
# if the pair is not in the set of existing distances, yield it
if (
pair.record_a._db_id,
Expand Down
24 changes: 0 additions & 24 deletions big_scape/comparison/legacy_workflow_alt.py
Original file line number Diff line number Diff line change
Expand Up @@ -313,30 +313,6 @@ def calculate_scores_pair(
for pair in pairs:
logging.debug(pair)
pair.comparable_region.log_comparable_region()
logging.debug("")
if pair.record_a.parent_gbk == pair.record_b.parent_gbk:
results.append(
(
pair.record_a._db_id,
pair.record_b._db_id,
0.0,
1.0,
1.0,
1.0,
edge_param_id,
pair.comparable_region.lcs_a_start,
pair.comparable_region.lcs_a_stop,
pair.comparable_region.lcs_b_start,
pair.comparable_region.lcs_b_stop,
pair.comparable_region.a_start,
pair.comparable_region.a_stop,
pair.comparable_region.b_start,
pair.comparable_region.b_stop,
pair.comparable_region.reverse,
)
)
continue

jaccard = calc_jaccard_pair(pair)

if jaccard == 0.0:
Expand Down
18 changes: 12 additions & 6 deletions big_scape/output/html_template/output/html_content/js/bigscape.js
Original file line number Diff line number Diff line change
Expand Up @@ -327,12 +327,18 @@ function Bigscape(run_data, bs_data, bs_families, bs_alignment, bs_similarity, n
}
for (var a = 0; a < bs_data.length; a++) {
for (var b = 0; b < bs_data.length; b++) {
if ((a > b) && (bs_similarity[a][b] > intra_cutoff)) {
if ((bs_to_cl[a] !== bs_to_cl[b]) && (bs_similarity[a][b] < inter_cutoff)) {
continue;
// addind a topo link
if (bs_data[a]["hash"] == bs_data[b]["hash"]) {
graph.addLink(a, b, { weight: 0.01 });
}
else{
if ((a > b) && (bs_similarity[a][b] > intra_cutoff)) {
if ((bs_to_cl[a] !== bs_to_cl[b]) && (bs_similarity[a][b] < inter_cutoff)) {
continue;
}
var weight = bs_similarity[a][b];
graph.addLink(a, b, { weight: weight });
}
var weight = bs_similarity[a][b];
graph.addLink(a, b, { weight: weight });
}
}
}
Expand Down Expand Up @@ -450,7 +456,7 @@ function Bigscape(run_data, bs_data, bs_families, bs_alignment, bs_similarity, n
.attr("stroke-width", link["data"]["weight"] * 10);

if (graph.getNode(link.fromId).data.hash === graph.getNode(link.toId).data.hash) {
line = line.attr("stroke-dasharray", "10,10")
line = line.attr("stroke-dasharray", "10,10").attr("stroke-width", link["data"]["weight"] * 500);
}
return line
});
Expand Down
22 changes: 12 additions & 10 deletions test/comparison/test_binning.py
Original file line number Diff line number Diff line change
Expand Up @@ -169,11 +169,11 @@ def test_num_pairs_correct_with_query_ref(self):
def test_legacy_sorting(self):
"""Tests whether the legacy sorting option in bin.pairs() correctly orders the pairs"""

gbk_a = GBK(Path("test1.gbk"), "test", "test")
gbk_a = GBK(Path("test1.gbk"), "test1", "test")
bgc_a = BGCRecord(gbk_a, 0, 0, 10, False, "")
gbk_b = GBK(Path("test2.gbk"), "test", "test")
gbk_b = GBK(Path("test2.gbk"), "test2", "test")
bgc_b = BGCRecord(gbk_b, 0, 0, 10, False, "")
gbk_c = GBK(Path("test3.gbk"), "test", "test")
gbk_c = GBK(Path("test3.gbk"), "test3", "test")
bgc_c = BGCRecord(gbk_c, 0, 0, 10, False, "")

# due to the order, this should generate a list of pairs as follows without legacy sort:
Expand Down Expand Up @@ -734,16 +734,18 @@ def test_cull_singletons_cutoff(self):
class TestMixComparison(TestCase):
def test_mix_iter(self):
"""Tests whether a new mix bin can be created for comparison"""
gbk = GBK(Path("test"), "test", source_type=bs_enums.SOURCE_TYPE.QUERY)
gbk1 = GBK(Path("test"), "test1", source_type=bs_enums.SOURCE_TYPE.QUERY)
gbk2 = GBK(Path("test"), "test2", source_type=bs_enums.SOURCE_TYPE.QUERY)
gbk3 = GBK(Path("test"), "test3", source_type=bs_enums.SOURCE_TYPE.QUERY)

bgc_a = BGCRecord(gbk, 0, 0, 10, False, "")
bgc_a.parent_gbk = gbk
bgc_a = BGCRecord(gbk1, 0, 0, 10, False, "")
bgc_a.parent_gbk = gbk1

bgc_b = BGCRecord(gbk, 0, 0, 10, False, "")
bgc_b.parent_gbk = gbk
bgc_b = BGCRecord(gbk2, 0, 0, 10, False, "")
bgc_b.parent_gbk = gbk2

bgc_c = BGCRecord(gbk, 0, 0, 10, False, "")
bgc_c.parent_gbk = gbk
bgc_c = BGCRecord(gbk3, 0, 0, 10, False, "")
bgc_c.parent_gbk = gbk3

bgc_list = [bgc_a, bgc_b, bgc_c]

Expand Down
Loading