Skip to content

Commit

Permalink
some more layoutContigs comments
Browse files Browse the repository at this point in the history
  • Loading branch information
Dmitry-Antipov committed Oct 10, 2023
1 parent 1434eb3 commit a1a44ec
Showing 1 changed file with 15 additions and 2 deletions.
17 changes: 15 additions & 2 deletions src/scripts/get_layout_from_mbg.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,13 @@

mapping_file = sys.argv[1]
edge_overlap_file = sys.argv[2]
#Read alignments to the graph. Only hifi alignments (+gaps) and ONT gaps are used.

#Read alignments to the graph. Only hifi alignments to mbg and gaps are currently used
read_alignment_file = sys.argv[3]

#Either paths from rukki or just single nodes
paths_file = sys.argv[4]

nodelens_file = sys.argv[5]
layout_output = sys.argv[6]
layscf_output = sys.argv[7]
Expand All @@ -25,6 +28,7 @@ def canon(left, right):
return (revnode(right), revnode(left))
return (left, right)

#transform paths to base elements - mbg nodes and gaps.
def get_leafs(path, mapping, edge_overlaps, raw_node_lens):
path_len = 0
for i in range(0, len(path)):
Expand Down Expand Up @@ -83,6 +87,9 @@ def get_leafs(path, mapping, edge_overlaps, raw_node_lens):
assert current_len == path_len
return (result, overlaps)

#this gives us matches of individual read to contigs(= rukki paths or isolated utig4 node)
#path is an alignment of read (for most cases, hifi read to utig1 graph)
#node_poses - map from nodes to the list of contigs and their positions in contigs
def get_matches(path, node_poses, contig_nodeseqs, raw_node_lens, edge_overlaps, pathleftclip, pathrightclip, readleftclip, readrightclip, readlen, readstart, readend, gap):
longest = None
result = []
Expand Down Expand Up @@ -228,6 +235,9 @@ def get_exact_match_length(clusters):
assert left_len == right_len - left_clip - right_clip

pieceid = 0

#all these contains info about contigs - here nodes or rukki paths splitted by N
#paths are transformed into mbg nodes and gaps with get_leafs procedure
contig_lens = {}
contig_nodeseqs = {}
contig_nodeoverlaps = {}
Expand Down Expand Up @@ -284,7 +294,7 @@ def get_exact_match_length(clusters):
sys.stderr.write(pathname + "\t" + pathstr + "\n")

contig_pieces[fullname].append("end")

#map from node to list of contigs where it occures
node_poses = {}
for contigname in contig_nodeseqs:
for i in range(0, len(contig_nodeseqs[contigname])):
Expand Down Expand Up @@ -363,6 +373,8 @@ def get_exact_match_length(clusters):
len_readend = readlen
contig_contains_reads[contig][readname].append((contigpos + readlen, contigpos, len_readstart, len_readend, readlen, readstart, readend))

#here we clusterize separate matches to the nodes in the path to clusters (by match position in contig)

read_clusters = {}
for contig in contig_contains_reads:
for readname in contig_contains_reads[contig]:
Expand All @@ -385,6 +397,7 @@ def get_exact_match_length(clusters):
if fw:
if fwcluster is None:
fwcluster = (contigstart, contigend, readstart, readend, [(real_readstart, real_readend)])
#this is the place where alignment to different nodes are actually united
elif contigstart < fwcluster[0] + 50 and contigend < fwcluster[1] + 50:
fwcluster = (contigstart, contigend, min(fwcluster[2], readstart), max(fwcluster[3], readend), fwcluster[4] + [(real_readstart, real_readend)])
else:
Expand Down

0 comments on commit a1a44ec

Please sign in to comment.