From ce63ceeb3675c0fb81a419d09816b9e3ea9d31c2 Mon Sep 17 00:00:00 2001 From: Zhentian Kai <38963539+ZKai0801@users.noreply.github.com> Date: Sun, 28 Jun 2020 09:40:24 +0800 Subject: [PATCH 1/2] fix the bug in gene orientation --- scripts/make_fusion_genes.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/scripts/make_fusion_genes.py b/scripts/make_fusion_genes.py index 0193daa..a1398b4 100644 --- a/scripts/make_fusion_genes.py +++ b/scripts/make_fusion_genes.py @@ -30,7 +30,7 @@ def make_fusion_gene(gene, fw, refflat): for line in fh: if gene[0] not in line: continue - _, transcript, chrom, _, start, end, _, _, _, exonstart, exonend = line.rstrip("\n").split("\t") + _, transcript, chrom, strand, start, end, _, _, _, exonstart, exonend = line.rstrip("\n").split("\t") transcripts[transcript] = (chrom, start, end, exonstart, exonend) transcript = get_longest_transcript(transcripts.keys(), refflat) chrom, start, end, exonstart, exonend = transcripts[transcript] @@ -41,13 +41,15 @@ def make_fusion_gene(gene, fw, refflat): for line in fh: if gene[1] not in line: continue - _, transcript, chrom, _, start, end, _, _, _, exonstart, exonend = line.rstrip("\n").split("\t") + _, transcript, chrom, strand, start, end, _, _, _, exonstart, exonend = line.rstrip("\n").split("\t") break # write to a file header = f">{gene[0]}_{transcript},{chrom}:{start}-{end}\n" fw.write(header) exons = list(zip(exonstart.split(","), exonend.split(",")))[:-1] + if strand == "-": + exons = exons[::-1] for index, each_exon in enumerate(exons, start=1): fw.write(f'{index},{each_exon[0]},{each_exon[1]}\n') fw.write("\n") From ac29e356097c8d1290352316a4ae2f121c8ee76f Mon Sep 17 00:00:00 2001 From: Zhentian Kai <38963539+ZKai0801@users.noreply.github.com> Date: Sun, 28 Jun 2020 09:46:53 +0800 Subject: [PATCH 2/2] fix the bug of wrong gene/transcript picking --- scripts/make_fusion_genes.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/scripts/make_fusion_genes.py b/scripts/make_fusion_genes.py index a1398b4..d80eed1 100644 --- a/scripts/make_fusion_genes.py +++ b/scripts/make_fusion_genes.py @@ -28,9 +28,9 @@ def make_fusion_gene(gene, fw, refflat): transcripts = {} with open(refflat, "r") as fh: for line in fh: - if gene[0] not in line: + cur_gene, transcript, chrom, strand, start, end, _, _, _, exonstart, exonend = line.rstrip("\n").split("\t") + if gene[0] != cur_gene: continue - _, transcript, chrom, strand, start, end, _, _, _, exonstart, exonend = line.rstrip("\n").split("\t") transcripts[transcript] = (chrom, start, end, exonstart, exonend) transcript = get_longest_transcript(transcripts.keys(), refflat) chrom, start, end, exonstart, exonend = transcripts[transcript] @@ -39,9 +39,9 @@ def make_fusion_gene(gene, fw, refflat): elif len(gene) == 2: with open(refflat, "r") as fh: for line in fh: - if gene[1] not in line: - continue _, transcript, chrom, strand, start, end, _, _, _, exonstart, exonend = line.rstrip("\n").split("\t") + if gene[1] != transcript: + continue break # write to a file