From ac5f0662e6a81c082bbc6b47f631eb80af5e2150 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Crist=C3=B3bal=20Gallardo?= Date: Tue, 25 May 2021 21:21:42 +0200 Subject: [PATCH 1/8] Include HiFiasm bloom filter option --- README.md | 3 ++- mitohifi_v2.py | 3 ++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index e5c7d3e..f1583a4 100644 --- a/README.md +++ b/README.md @@ -107,7 +107,7 @@ This command will output you NC_016067.1.fasta and NC_016067.1.gb that you will ``` Usage: 'python mitohifi_v2.py -r "f1.fasta f2.fasta f3.fasta" -f reference.fasta -g reference.gb -t -o ' -usage: mitohifi_v2.py (-r R | -c C) [-h] -f F -g G -t T [-p P] +usage: mitohifi_v2.py (-r R | -c C) [-h] -f F -g G -t T [-p P] [-m M] [--circular-size CIRCULAR_SIZE] [--circular-offset CIRCULAR_OFFSET] [-o O] @@ -123,6 +123,7 @@ Arguments: hifiams, minimap2, samtools and blast -p P -p: Percentage of query in the blast match with close- related mito + -m M -m: Number of bits for HiFiasm bloom filter (default = 0) --circular-size CIRCULAR_SIZE Size to consider when checking for circularization --circular-offset CIRCULAR_OFFSET diff --git a/mitohifi_v2.py b/mitohifi_v2.py index 2a03116..db9f006 100644 --- a/mitohifi_v2.py +++ b/mitohifi_v2.py @@ -106,6 +106,7 @@ def main(): parser.add_argument("-g", help= "-k: Close-related species Mitogenome in genebank format", required = "True") parser.add_argument("-t", help= "-t: Number of threads for (i) hifiasm and (ii) the blast search", required = "True", type=int) parser.add_argument("-p", help="-p: Percentage of query in the blast match with close-related mito", type=int, default=50) + parser.add_argument("-m", help="-m: Number of bits for HiFiasm bloom filter (default = 0)", type=int, default=0) parser.add_argument('--circular-size', help='Size to consider when checking for circularization', type=int, default=220) parser.add_argument('--circular-offset', help='Offset from start and finish to consider when looking for circularization', type=int, default=40) parser.add_argument("-o", help="""-o: Organism genetic code following NCBI table (for mitogenome annotation): @@ -151,7 +152,7 @@ def main(): print("\nNow let's run hifiasm to assemble the mapped and filtered reads!\n") with open("hifiasm.log", "w") as hifiasm_log_f: - subprocess.run(["hifiasm", "-t", str(args.t), "-o", "gbk.HiFiMapped.bam.filtered.assembled", "gbk.HiFiMapped.bam.filtered.fasta", ], stderr=subprocess.STDOUT, stdout=hifiasm_log_f) + subprocess.run(["hifiasm", "-t", str(args.t), "-m", str(args.m), "-o", "gbk.HiFiMapped.bam.filtered.assembled", "gbk.HiFiMapped.bam.filtered.fasta", ], stderr=subprocess.STDOUT, stdout=hifiasm_log_f) gfa2fa_script = os.path.join(os.path.dirname(__file__),"gfa2fa") # gets path to gfa2fa script From edde6597892d067a2f80c97976ec53738e20c757 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Crist=C3=B3bal=20Gallardo?= Date: Tue, 25 May 2021 21:58:32 +0200 Subject: [PATCH 2/8] Fix mistake HiFiasm option --- mitohifi_v2.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mitohifi_v2.py b/mitohifi_v2.py index db9f006..a0ad8f2 100644 --- a/mitohifi_v2.py +++ b/mitohifi_v2.py @@ -152,7 +152,7 @@ def main(): print("\nNow let's run hifiasm to assemble the mapped and filtered reads!\n") with open("hifiasm.log", "w") as hifiasm_log_f: - subprocess.run(["hifiasm", "-t", str(args.t), "-m", str(args.m), "-o", "gbk.HiFiMapped.bam.filtered.assembled", "gbk.HiFiMapped.bam.filtered.fasta", ], stderr=subprocess.STDOUT, stdout=hifiasm_log_f) + subprocess.run(["hifiasm", "-t", str(args.t), "-f", str(args.m), "-o", "gbk.HiFiMapped.bam.filtered.assembled", "gbk.HiFiMapped.bam.filtered.fasta", ], stderr=subprocess.STDOUT, stdout=hifiasm_log_f) gfa2fa_script = os.path.join(os.path.dirname(__file__),"gfa2fa") # gets path to gfa2fa script From 51be5c890a8542d6c468f9835b2147e0264c0223 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Crist=C3=B3bal=20Gallardo?= Date: Tue, 25 May 2021 22:05:41 +0200 Subject: [PATCH 3/8] Add additional information --- README.md | 2 +- mitohifi_v2.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index f1583a4..11cb008 100644 --- a/README.md +++ b/README.md @@ -123,7 +123,7 @@ Arguments: hifiams, minimap2, samtools and blast -p P -p: Percentage of query in the blast match with close- related mito - -m M -m: Number of bits for HiFiasm bloom filter (default = 0) + -m M -m: Number of bits for HiFiasm bloom filter [it maps to maps to -f in HiFiasm] (default = 0) --circular-size CIRCULAR_SIZE Size to consider when checking for circularization --circular-offset CIRCULAR_OFFSET diff --git a/mitohifi_v2.py b/mitohifi_v2.py index a0ad8f2..657fba6 100644 --- a/mitohifi_v2.py +++ b/mitohifi_v2.py @@ -106,7 +106,7 @@ def main(): parser.add_argument("-g", help= "-k: Close-related species Mitogenome in genebank format", required = "True") parser.add_argument("-t", help= "-t: Number of threads for (i) hifiasm and (ii) the blast search", required = "True", type=int) parser.add_argument("-p", help="-p: Percentage of query in the blast match with close-related mito", type=int, default=50) - parser.add_argument("-m", help="-m: Number of bits for HiFiasm bloom filter (default = 0)", type=int, default=0) + parser.add_argument("-m", help="-m: Number of bits for HiFiasm bloom filter [it maps to maps to -f in HiFiasm] (default = 0)", type=int, default=0) parser.add_argument('--circular-size', help='Size to consider when checking for circularization', type=int, default=220) parser.add_argument('--circular-offset', help='Offset from start and finish to consider when looking for circularization', type=int, default=40) parser.add_argument("-o", help="""-o: Organism genetic code following NCBI table (for mitogenome annotation): From a601e61f5209e8bf1d82b3d01f346c911703cb98 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Crist=C3=B3bal=20Gallardo?= Date: Tue, 25 May 2021 22:06:29 +0200 Subject: [PATCH 4/8] Fix typo --- mitohifi_v2.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mitohifi_v2.py b/mitohifi_v2.py index 657fba6..8ae84e0 100644 --- a/mitohifi_v2.py +++ b/mitohifi_v2.py @@ -106,7 +106,7 @@ def main(): parser.add_argument("-g", help= "-k: Close-related species Mitogenome in genebank format", required = "True") parser.add_argument("-t", help= "-t: Number of threads for (i) hifiasm and (ii) the blast search", required = "True", type=int) parser.add_argument("-p", help="-p: Percentage of query in the blast match with close-related mito", type=int, default=50) - parser.add_argument("-m", help="-m: Number of bits for HiFiasm bloom filter [it maps to maps to -f in HiFiasm] (default = 0)", type=int, default=0) + parser.add_argument("-m", help="-m: Number of bits for HiFiasm bloom filter [it maps to -f in HiFiasm] (default = 0)", type=int, default=0) parser.add_argument('--circular-size', help='Size to consider when checking for circularization', type=int, default=220) parser.add_argument('--circular-offset', help='Offset from start and finish to consider when looking for circularization', type=int, default=40) parser.add_argument("-o", help="""-o: Organism genetic code following NCBI table (for mitogenome annotation): From bc777ebbd74fabdb7ceb96830a8dab7f588d4ee0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Crist=C3=B3bal=20Gallardo?= Date: Tue, 25 May 2021 22:07:02 +0200 Subject: [PATCH 5/8] Fix typo --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 11cb008..d1237a1 100644 --- a/README.md +++ b/README.md @@ -123,7 +123,7 @@ Arguments: hifiams, minimap2, samtools and blast -p P -p: Percentage of query in the blast match with close- related mito - -m M -m: Number of bits for HiFiasm bloom filter [it maps to maps to -f in HiFiasm] (default = 0) + -m M -m: Number of bits for HiFiasm bloom filter [it maps to -f in HiFiasm] (default = 0) --circular-size CIRCULAR_SIZE Size to consider when checking for circularization --circular-offset CIRCULAR_OFFSET From 628971721c15ab398f559ef8ec9ab6837e3036ce Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B6rn=20Gr=C3=BCning?= Date: Wed, 26 May 2021 10:06:39 +0200 Subject: [PATCH 6/8] Update mitohifi_v2.py --- mitohifi_v2.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mitohifi_v2.py b/mitohifi_v2.py index 8ae84e0..aa227f5 100644 --- a/mitohifi_v2.py +++ b/mitohifi_v2.py @@ -174,7 +174,7 @@ def main(): fixContigHeaders.fix_headers(original_contigs, "fixed_header_contigs.fasta") os.remove(original_contigs) # remove original contig file - os.rename("fixed_header_contigs.fasta", original_contigs) # replace original contigs file by the version that has the headers fixed + shutil.move("fixed_header_contigs.fasta", original_contigs) # replace original contigs file by the version that has the headers fixed contigs = original_contigs From 5e66efc1636ff1ddc87727f60bed4d0cf705e271 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Crist=C3=B3bal=20Gallardo?= Date: Sat, 29 May 2021 00:28:02 +0200 Subject: [PATCH 7/8] Fix variable name --- getReprContig.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/getReprContig.py b/getReprContig.py index d610e8e..357b56b 100644 --- a/getReprContig.py +++ b/getReprContig.py @@ -42,12 +42,12 @@ def get_largest_cluster(cdhit_clstr_file): #clusters[cluster_id] = 0 else: curr_sequences.append(line.strip()) - # catch the last cluster + # catch the last cluster if len(curr_sequences) > largest_cluster_len: largest_cluster = curr_cluster largest_cluster_len = len(curr_sequences) largest_cluster_seqs = curr_sequences - + print("[x] Current sequences: {}".format(curr_sequences)) for sequence in largest_cluster_seqs: if sequence[-1] == "*": representative_seq = sequence From 9d2dd1abe39d337903c0406b39e17829e38ab62b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Crist=C3=B3bal=20Gallardo?= Date: Sat, 29 May 2021 00:31:22 +0200 Subject: [PATCH 8/8] Remove print line --- getReprContig.py | 1 - 1 file changed, 1 deletion(-) diff --git a/getReprContig.py b/getReprContig.py index 357b56b..78df848 100644 --- a/getReprContig.py +++ b/getReprContig.py @@ -47,7 +47,6 @@ def get_largest_cluster(cdhit_clstr_file): largest_cluster = curr_cluster largest_cluster_len = len(curr_sequences) largest_cluster_seqs = curr_sequences - print("[x] Current sequences: {}".format(curr_sequences)) for sequence in largest_cluster_seqs: if sequence[-1] == "*": representative_seq = sequence