forked from bioinform/rnacocktail
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathindex.html
1805 lines (1666 loc) · 83.7 KB
/
index.html
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="utf-8">
<meta http-equiv="X-UA-Compatible" content="IE=edge">
<meta name="viewport" content="width=device-width, initial-scale=1">
<meta name="description" content="RNACocktail">
<meta name="author" content="Mohammad Sahraeian">
<title>RNACocktail</title>
<!-- Bootstrap core CSS -->
<link href="http://maxcdn.bootstrapcdn.com/bootstrap/3.2.0/css/bootstrap.min.css" rel="stylesheet">
<style>
.container {
margin-right: auto;
margin-left: auto;
max-width: 760px;
}
tr:nth-child(even) {
background-color: #eeeeee;
}
</style>
<script>
(function(i,s,o,g,r,a,m){i['GoogleAnalyticsObject']=r;i[r]=i[r]||function(){
(i[r].q=i[r].q||[]).push(arguments)},i[r].l=1*new Date();a=s.createElement(o),
m=s.getElementsByTagName(o)[0];a.async=1;a.src=g;m.parentNode.insertBefore(a,m)
})(window,document,'script','https://www.google-analytics.com/analytics.js','ga');
ga('create', 'UA-58409498-7', 'auto');
ga('send', 'pageview');
/**
* Function that tracks a click on an outbound link in Google Analytics.
* This function takes a valid URL string as an argument, and uses that URL string
* as the event label.
*/
var trackOutboundLink = function(url) {
ga('send', 'event', 'outbound', 'click', url, {'hitCallback':
function () {
document.location = url;
}
});
}
function _gaLt(event){
var el = event.srcElement || event.target;
/* Loop up the DOM tree through parent elements if clicked element is not a link (eg: an image inside a link) */
while(el && (typeof el.tagName == 'undefined' || el.tagName.toLowerCase() != 'a' || !el.href)){
el = el.parentNode;
}
if(el && el.href){
if(el.href.indexOf(location.host) == -1){ /* external link */
/* HitCallback function to either open link in either same or new window */
var hitBack = function(link, target){
target ? window.open(link, target) : window.location.href = link;
};
/* link */
var link = el.href;
/* Is target set and not _(self|parent|top)? */
var target = (el.target && !el.target.match(/^_(self|parent|top)$/i)) ? el.target : false;
/* send event with callback */
ga(
"send", "event", "Outgoing Links", link,
document.location.pathname + document.location.search,
{"hitCallback": hitBack(link, target)}
);
/* Prevent standard click */
event.preventDefault ? event.preventDefault() : event.returnValue = !1;
}
}
}
/* Attach the event to all clicks in the document after page has loaded */
var w = window;
w.addEventListener ? w.addEventListener("load",function(){document.body.addEventListener("click",_gaLt,!1)},!1)
: w.attachEvent && w.attachEvent("onload",function(){document.body.attachEvent("onclick",_gaLt)});
</script>
</head>
<body>
<div class="container" style="padding-bottom: 400px;">
<div class="page-header">
<h1>RNACocktail</h1>
<h2>A comprehensive framework for accurate and efficient RNA-Seq analysis</h2>
</div>
<p>The RNACocktail pipeline is composed of a high-accuracy tools for different steps of RNA-Seq analysis. It performs a broad spectrum RNA-Seq analysis on both short- and long-read technologies to enable meaningful insights from transcriptomic data. It was developed after analyzing a variety of RNA-Seq samples (ranging from germline, cancer to stem cell datasets) and technologies using a multitude of tool combinations to determine a pipeline which is comprehensive, fast and accurate.</p>
<p> RNACocktail supports:
<table style="width:70%">
<tr>
<th style="width:50%">short-read</th>
<th style="width:50%">long-read</th>
</tr>
<tr>
<td>alignment</td>
<td>error correction</td>
</tr>
<tr>
<td>transcriptome reconstruction</td>
<td>alignment</td>
</tr>
<tr>
<td>denovo transcriptome assembly</td>
<td>transcriptome reconstruction</td>
</tr>
<tr>
<td>alignment-free quantification</td>
<td>fusion prediction</td>
</tr>
<tr>
<td>differential expression analysis</td>
<td></td>
</tr>
<tr>
<td>fusion prediction</td>
<td></td>
</tr>
<tr>
<td>variant calling</td>
<td></td>
</tr>
<tr>
<td>RNA editing prediction</td>
<td></td>
</tr>
</table>
</p>
<div class="jumbotron">
<p><i>For more information contact us at <a href="mailto:[email protected]">[email protected]</a></i>
</p>
</div>
<h2>Publication</h2>
<div class="panel panel-default" style="font-family:monospace;">
<div class="panel-body">
<i>If you use RNACocktail in your work, please cite the following:</i><br>
Sayed Mohammad Ebrahim Sahraeian, Marghoob Mohiyuddin, Robert Sebra, Hagen Tilgner,
Pegah T. Afshar, Kin Fai Au, Narges Bani Asadi, Mark B. Gerstein, Wing Hung Wong,
Michael P. Snyder, Eric Schadt, and Hugo Y. K. Lam<br>
<b>Gaining comprehensive biological insight into the transcriptome by performing a broad-spectrum RNA-seq analysis</b><br>
Nature Communications 8, Article number: 59 (2017). <a
href="http://dx.doi.org/10.1038/s41467-017-00050-4"
onclick="trackOutboundLink('http://dx.doi.org/10.1038/s41467-017-00050-4'); return false;">doi:10.1038/s41467-017-00050-4
</a>
</div>
</div>
<h2>Download RNACocktail</h2>
<p>Latest version: <a href="https://github.com/bioinform/RNACocktail/archive/v0.2.2.tar.gz"
onclick="trackOutboundLink('https://github.com/bioinform/RNACocktail/archive/v0.2.2.tar.gz'); return false;">https://github.com/bioinform/RNACocktail/archive/v0.2.2.tar.gz</a></p>
<p>For other versions, see "releases". <a href="https://github.com/bioinform/RNACocktail/releases"
onclick="trackOutboundLink('https://github.com/bioinform/RNACocktail/releases'); return false;">https://github.com/bioinform/RNACocktail/releases</a></p>
<h2>RNACocktail Docker Image</h2>
<p>The docker image with all the packages installed can be found at <a href="https://hub.docker.com/r/marghoob/rnacocktail/"
onclick="trackOutboundLink('https://hub.docker.com/r/marghoob/rnacocktail'); return false;">https://hub.docker.com/r/marghoob/rnacocktail/</a></p>
<p>The dockerfile is also available at <code>docker/Dockerfile</code> for local build.</p>
<h2>System Requirements</h2>
<p>
Python 2.7 and the following Python packages must be installed:
<table style="width:100%">
<tr>
<th style="width:20%">Tool</th>
<th style="width:20%">Version tested</th>
<th style="width:60%">Pipeline modes used in</th>
</tr>
<tr>
<td><a href="http://pythonhosted.org/pybedtools">pybedtools</a></td>
<td>0.7.7</td>
<td><b>editing</b></td>
</tr>
<tr>
<td><a href="https://github.com/pysam-developers/pysam">pysam</a></td>
<td>0.9.0</td>
<td><b>editing</b></td>
</tr>
<tr>
<td><a href="http://www.numpy.org/">numpy</a></td>
<td>1.12.0</td>
<td><b>editing</b></td>
</tr>
<tr>
<td><a href="http://scipy.org/">scipy</a></td>
<td>0.18.1</td>
<td><b>long_fusion</b></td>
</tr>
<tr>
<td><a href="http://biopython.org/">biopython</a></td>
<td>1.66</td>
<td><b>fusion</b></td>
</tr>
<tr>
<td><a href="https://openpyxl.readthedocs.io/en/default/">openpyxl</a></td>
<td>1.5.6</td>
<td><b>fusion</b></td>
</tr>
<tr>
<td><a href="http://pandas.pydata.org/">pandas</a></td>
<td>0.19.2</td>
<td><b>fusion</b></td>
</tr>
<tr>
<td><a href="https://pypi.python.org/pypi/xlrd">xlrd</a></td>
<td>0.6.1</td>
<td><b>fusion</b></td>
</tr>
</table>
<p>Note that <a href="https://github.com/arq5x/bedtools2">bedtools</a> has to be installed separately in order for pybedtools to work.</p>
<p>In addition, paths to the following tools must be provided as RNACocktail arguments. Alternatively, the executables can be on PATH environmental variable or defined on defaults.py:</p>
<table style="width:100%">
<tr>
<th style="width:20%">Tool</th>
<th style="width:20%">Version tested</th>
<th style="width:60%">Pipeline modes used in</th>
</tr>
<tr>
<td><a href="https://github.com/samtools/samtools">SAMtools</a></td>
<td>1.2</td>
<td><b>align</b>, <b>reconstruct</b>, <b>long_align</b>, <b>long_reconstruct</b>, and <b>editing</b></td>
</tr>
<tr>
<td><a href="http://ccb.jhu.edu/software/hisat2/index.shtml">HISAT2</a></td>
<td>2.0.5</td>
<td><b>align</b></td>
</tr>
<tr>
<td><a href="https://ccb.jhu.edu/software/stringtie/index.shtml">StringTie</a></td>
<td>1.3.3</td>
<td><b>reconstruct</b> and <b>diff</b></td>
</tr>
<tr>
<td><a href="https://github.com/COMBINE-lab/salmon">Salmon</a></td>
<td>0.8.0</td>
<td><b>quantify</b></td>
</tr>
<tr>
<td><a href="https://github.com/dzerbino/oases">Oases</a></td>
<td>0.2.09</td>
<td><b>assembly</b></td>
</tr>
<tr>
<td><a href="https://github.com/dzerbino/velvet">Velvet</a></td>
<td>1.2.10</td>
<td><b>assembly</b></td>
</tr>
<tr>
<td><a href="https://www.r-project.org/">R</a> with <a href="https://www.bioconductor.org/packages/devel/bioc/html/DESeq2.html">DESeq2</a>, <a href="https://github.com/hadley/readr">readr</a>, and <a href="https://github.com/mikelove/tximport">tximport</a> libraries</td>
<td>3.3.2</td>
<td><b>diff</b>, <b>editing</b></td>
</tr>
<tr>
<td><a href="http://bioinf.wehi.edu.au/featureCounts/">featureCounts</a></td>
<td>1.5.0-p1</td>
<td><b>diff</b></td>
</tr>
<tr>
<td><a href="http://www.atgc-montpellier.fr/lordec/">LoRDEC</a></td>
<td>0.6</td>
<td><b>long_correct</b></td>
</tr>
<tr>
<td><a href="https://github.com/alexdobin/STAR">STAR</a></td>
<td>2.5.2b</td>
<td><b>long_align</b></td>
</tr>
<tr>
<td><a href="https://github.com/bioinform/IDP/">IDP</a></td>
<td>0.1.9</td>
<td><b>long_reconstruct</b></td>
</tr>
<tr>
<td><a href="https://www.healthcare.uiowa.edu/labs/au/IDP-fusion/default.asp">IDP-fusion</a></td>
<td>1.1.1</td>
<td><b>long_fusion</b></td>
</tr>
<tr>
<td><a href="https://software.broadinstitute.org/gatk/">GATK</a></td>
<td>3.5-0</td>
<td><b>variant</b> and <b>editing</b></td>
</tr>
<tr>
<td><a href="https://broadinstitute.github.io/picard/">Picard</a></td>
<td>2.2.2</td>
<td><b>variant</b></td>
</tr>
<tr>
<td><a href="https://github.com/zhqingit/giremi">GIREMI</a></td>
<td>0.2.1</td>
<td><b>editing</b></td>
</tr>
<tr>
<td><a href="https://github.com/GATB/gatb-core/archive">gatb-core</a></td>
<td>1.1.0</td>
<td><b>editing</b></td>
</tr>
<tr>
<td><a href="https://github.com/samtools/htslib">HTSlib</a></td>
<td>1.3</td>
<td><b>editing</b></td>
</tr>
<tr>
<td><a href="https://github.com/ndaniel/fusioncatcher">FusionCatcher</a></td>
<td>0.99.5a beta</td>
<td><b>fusion</b></td>
</tr>
<tr>
<td><a href="http://bowtie-bio.sourceforge.net/index.shtml">bowtie</a></td>
<td>1.2.0</td>
<td><b>fusion</b></b></td>
</tr>
<tr>
<td><a href="http://bowtie-bio.sourceforge.net/bowtie2/index.shtml">bowtie2</a></td>
<td>2.2.9</td>
<td><b>fusion</b>, <b>long_fusion</b></td>
</tr>
<tr>
<td><a href="http://bio-bwa.sourceforge.net/">bwa</a></td>
<td>0.7.15</td>
<td><b>fusion</b></b></td>
</tr>
<tr>
<td><a href="https://github.com/ncbi/sra-tools">sra toolkit</a></td>
<td>2.8.1</td>
<td><b>fusion</b></b></td>
</tr>
<tr>
<td><a href="http://ftp.gnu.org/gnu/coreutils/">coreutils</a></td>
<td>8.25</td>
<td><b>fusion</b></b></td>
</tr>
<tr>
<td><a href="http://zlib.net/pigz/">pigz</a></td>
<td>2.3.1</td>
<td><b>fusion</b></b></td>
</tr>
<tr>
<td><a href="http://hgdownload.cse.ucsc.edu/admin/exe/linux.x86_64/blat">blat</a></td>
<td>0.35</td>
<td><b>fusion</b></b></td>
</tr>
<tr>
<td><a href="http://hgdownload.cse.ucsc.edu/admin/exe/linux.x86_64/faToTwoBit">faToTwoBit</a></td>
<td></td>
<td><b>fusion</b></b></td>
</tr>
<tr>
<td><a href="http://hgdownload.cse.ucsc.edu/admin/exe/linux.x86_64.v287/liftOver">liftOver</a></td>
<td></td>
<td><b>fusion</b></b></td>
</tr>
<tr>
<td><a href="https://github.com/ndaniel/seqtk">SeqTK</a></td>
<td>1.0-r82b</td>
<td><b>fusion</b></b></td>
</tr>
<tr>
<td><a href="http://research-pub.gene.com/gmap/">gmap</a></td>
<td>2017-02-15</td>
<td><b>long_fusion</b></b></td>
</tr>
</table>
</p>
<h2>Installing RNACocktail</h2>
<p>RNACocktail is a python package and can be installed using <code>pip</code>. To install type <code>pip install https://github.com/bioinform/RNACocktail/archive/v0.2.2.tar.gz</code>. The current version
of RNACocktail is v0.2.2. In general, the install source would be https://github.com/bioinform/RNACocktail/archive/version.tar.gz</p>
<h2>Running RNACocktail</h2>
<p>Type <code>run_rnacocktail.py -h</code> for help.</p>
<p>Type <code>run_rnacocktail.py align -h</code> for short-read alignment help.</p>
<p>Type <code>run_rnacocktail.py reconstruct -h</code> for short-read transcriptome reconstruction help.</p>
<p>Type <code>run_rnacocktail.py quantify -h</code> for short-read quantification help.</p>
<p>Type <code>run_rnacocktail.py diff -h</code> for short-read differential expression help.</p>
<p>Type <code>run_rnacocktail.py denovo -h</code> for short-read de novo assembly help.</p>
<p>Type <code>run_rnacocktail.py long_correct -h</code> for long-read error correction help.</p>
<p>Type <code>run_rnacocktail.py long_align -h</code> for long-read alignment help.</p>
<p>Type <code>run_rnacocktail.py long_reconstruct -h</code> for long-read transcriptome reconstruction help.</p>
<p>Type <code>run_rnacocktail.py long_fusion -h</code> for long-read fusion detection help.</p>
<p>Type <code>run_rnacocktail.py variant -h</code> for variant calling help.</p>
<p>Type <code>run_rnacocktail.py editing -h</code> for RNA editing detection help.</p>
<p>Type <code>run_rnacocktail.py fusion -h</code> for RNA fusion detection help.</p>
<p>Type <code>run_rnacocktail.py all -h</code> for running all RNACocktail pipeline steps help.</p>
<p>The <code>all</code> mode for RNACocktail will automatically perform the most comprehensive analysis possible given the input data, which includes steps from alignment to differential expression analysis.</p>
<h2>Testing RNACoktail</h2>
<h3>Small test</h2>
<p><code>cd test</code></p>
<p><code>./test_run.sh</code></p>
<h3>Extensive test of all modes on Docker image</h2>
<p><code>cd test</code></p>
<p><code>./docker_test.sh</code></p>
<h2>Analysis scripts</h2>
<p>Several IPython Notebook and .py scripts to analyze the predictions in different tasks can be found at <code>analaysis_scripts</code> folder</p>
<h2>Output files</h2>
<p>The table below summarizes the output files generated by each mode of RNACocktail.</p>
<table style="width:100%;">
<tr>
<th style="width:20%">Task</th>
<th style="width:20%">Command</th>
<th style="width:20%">Default Tool</th>
<th style="width:40%">Output Files</th>
</tr>
<tr>
<td>Short-read alignment</td>
<td><code>align</code></td>
<td>HISAT2</td>
<td><p><b>alignments:</b> alignments.sorted.bam</p>
<p><b>junctions:</b> splicesites.tab</p></td>
</tr>
<tr>
<td>Short-read transcriptome reconstruction</td>
<td><code>reconstruct</code></td>
<td>StringTie</td>
<td><p><b>trasncripts:</b> transcripts.gtf</p>
<p><b>expressions:</b> gene_abund.tab</p></td>
</tr>
<tr>
<td>Short-read quantification</td>
<td><code>quantify</code></td>
<td>Salmon-SMEM</td>
<td><p><b>expressions:</b> quant.sf</p></td>
</tr>
<tr>
<td>Short-read differential expression</td>
<td><code>diff</code></td>
<td>DESeq2</td>
<td><p><b>differential expressions:</b> deseq2_res.tab</p></td>
</tr>
<tr>
<td>Short-read de novo assembly</td>
<td><code>denovo</code></td>
<td>Oases</td>
<td><p><b>trasncripts:</b> transcripts.fa</p></td>
</tr>
<tr>
<td>Long-read error correction</td>
<td><code>long_correct</code></td>
<td>LoRDEC</td>
<td><p><b>corrected reads</b> long_corrected.fa</p></td>
</tr>
<tr>
<td>Long-read alignment</td>
<td><code>long_align</code></td>
<td>STARlong</td>
<td><p><b>alignments</b> Aligned.out.psl</p></td>
</tr>
<tr>
<td>Long-read transcriptome reconstruction</td>
<td><code>long_reconstruct</code></td>
<td>IDP</td>
<td><p><b>trasncripts:</b> isoform.gtf</p>
<p><b>expressions:</b> isoform.exp</p></td>
</tr>
<tr>
<td>Long-read fusion detection</td>
<td><code>long_fusion</code></td>
<td>IDP-fusion</td>
<td><p><b>fusions:</b> fusion_report.tsv</p>
</tr>
<tr>
<td>Variant calling</td>
<td><code>variant</code></td>
<td>GATK</td>
<td><p><b>variants:</b> variants_filtered.vcf</p></td>
</tr>
<tr>
<td>RNA editing detection</td>
<td><code>editing</code></td>
<td>GIREMI</td>
<td><p><b>edits:</b> giremi_out.txt.res</p></td>
</tr>
<tr>
<td>RNA Fusion detection</td>
<td><code>fusion</code></td>
<td>FusionCatcher</td>
<td><p><b>fusions:</b> final-list_candidate-fusion-genes.txt</p></td>
</tr>
<tr>
<td>Running all steps</td>
<td><code>all</code></td>
<td>whole pipeline</td>
<td><p>all outputs of the successful steps.</p></td>
</tr>
</table>
<h2>Examples</h2>
<p>Some example command-lines for running RNACocktail with various modes and data type (short- and long-reads) are shown below. In particular, examples 17 and 18 show how to use the <code>all</code> mode for the most comprehensive analysis. Note that RNACocktail requires pre-built indexes for the genomic and transcriptomic references.</p>
<h4>Example 1 (align):</h4> Run of RNACocktail for alignment of paired-end short-read sequences (HISAT2).
<p><code>run_rnacocktail.py align --align_idx hisat2-idx --outdir out --workdir work --ref_gtf genes.GRCh37.gtf --1 seq_1.fq.gz --2 seq_2.fq.gz --hisat2 /path/to/hisat2 --hisat2_sps /path/to/hisat2_extract_splice_sites.py --samtools /path/to/samtools --threads 10 --sample A </code></p>
<h4>Example 2 (align):</h4> Run of RNACocktail for alignment of single-end short-read sequences (HISAT2).
<p><code>run_rnacocktail.py align --align_idx hisat2-idx --outdir out --workdir work --ref_gtf genes.GRCh37.gtf --U seq.fq.gz --hisat2 /path/to/hisat2 --hisat2_sps /path/to/hisat2_extract_splice_sites.py --samtools /path/to/samtools --threads 10 --sample A </code></p>
<h4>Example 3 (reconstruct):</h4> Run of RNACocktail for short-read transcriptome reconstruction (StringTie).
<p><code>run_rnacocktail.py reconstruct --alignment_bam work/hisat2/A/alignments.sorted.bam --outdir out --workdir work --ref_gtf genes.GRCh37.gtf --stringtie /path/to/stringtie --threads 10 --sample A
</code></p>
<h4>Example 4 (quantify):</h4> Run of RNACocktail for (alignment-free) quantification of paired-end short-read sequences (Salmon-SMEM).
<p><code>run_rnacocktail.py quantify --quantifier_idx salmon_fmd_idx --1 seq_1.fq.gz --2 seq_2.fq.gz --libtype IU --salmon_k 19 --outdir out --workdir work --salmon /path/to/salmon --threads 10 --sample A --unzip
</code></p>
<h4>Example 5 (quantify):</h4> Run of RNACocktail for (alignment-free) quantification of single-end short-read sequences (Salmon-SMEM).
<p><code>run_rnacocktail.py quantify --quantifier_idx salmon_fmd_idx --U seq.fq.gz --libtype U --salmon_k 19 --outdir out --workdir work --salmon /path/to/salmon --threads 10 --sample A --unzip
</code></p>
<h4>Example 6 (diff):</h4> Run of RNACocktail for differential expression analysis of quantifications computed using Salmon-SMEM (DESeq2).
<p><code>run_rnacocktail.py diff --quant_files work/salmon_smem/A1/quant.sf,work/salmon_smem/A2/quant.sf work/salmon_smem/B1/quant.sf,work/salmon_smem/B2/quant.sf --sample A1,A2 B1,B2 --ref_gtf genes.GRCh37.gtf --outdir out --workdir work
</code></p>
<h4>Example 7 (diff):</h4> Run of RNACocktail for differential expression analysis of reads aligned using HISAT2 on reference transcriptome (DESeq2).
<p><code>run_rnacocktail.py diff --alignments work/hisat2/A1/alignments.sorted.bam,work/hisat2/A2/alignments.sorted.bam work/hisat2/B1/alignments.sorted.bam,work/hisat2/B2/alignments.sorted.bam --sample A1,A2 B1,B2 --ref_gtf genes.GRCh37.gtf --outdir out --workdir work --featureCounts /path/to/featureCounts
</code></p>
<h4>Example 8 (diff):</h4> Run of RNACocktail for differential expression analysis of reads aligned using HISAT2 on StringTie computed transcriptome (DESeq2).
<p><code>run_rnacocktail.py diff --alignments work/hisat2/A1/alignments.sorted.bam,work/hisat2/A2/alignments.sorted.bam work/hisat2/B1/alignments.sorted.bam,work/hisat2/B2/alignments.sorted.bam --transcripts_gtfs work/stringtie/A1/transcripts.gtf,work/stringtie/A2/transcripts.gtf work/stringtie/B1/transcripts.gtf,work/stringtie/B2/transcripts.gtf --sample A1,A2 B1,B2 --ref_gtf genes.GRCh37.gtf --outdir out --workdir work --featureCounts /path/to/featureCounts
</code></p>
<h4>Example 9 (denovo):</h4> Run of RNACocktail for de novo assembly (Oases).
<p><code>run_rnacocktail.py denovo --1 seq_1.fq.gz --2 seq_2.fq.gz --outdir out --workdir work --oases /path/to/oases --velveth /path/to/velveth --velvetg /path/to/velvetg --threads 4 --sample A --file_format fastq.gz
</code></p>
<h4>Example 10 (long_correct):</h4> Run of RNACocktail for long-read error correction (LoRDEC).
<p><code>run_rnacocktail.py long_correct --kmer 23 --solid 3 --short seq.fq.gz --long seq_long.fa --outdir out --workdir work --lordec /path/to/lordec-correct --threads 4 --sample A
</code></p>
<h4>Example 11 (long_align):</h4> Run of RNACocktail for long-read alignment (STARlong).
<p><code>run_rnacocktail.py long_align --long work/lordec/A/long_corrected.fa --outdir out --workdir work --starlong /path/to/STARlong --threads 4 --sample A --sam2psl /path/to/sam2psl.py --samtools /path/to/samtools --genome_dir /path/to/STAR/genome_idx
</code></p>
<h4>Example 12 (long_reconstruct):</h4> Run of RNACocktail for long-read transcriptome reconstruction (IDP).
<p><code>run_rnacocktail.py long_reconstruct --alignment work/hisat2/A/alignments.sorted.bam --short_junction work/hisat2/A/splicesites.bed --long_alignment work/starlong/A/Aligned.out.psl --outdir out --workdir work --idp /path/to/runIDP.py --threads 4 --sample A --read_length 100 --ref_genome genome.GRCh37.fa --ref_all_gpd hg19.all.refSeq_gencode_ensemble_EST_known.gpd --ref_gpd genes.GRCh37.refFlat.txt --samtools /path/to/samtools --idp_cfg idp.cfg
</code></p>
<h4>Example 13 (long_fusion):</h4> Run of RNACocktail for long-read fusion detection (IDP-fusion).
<p><code>run_rnacocktail.py long_fusion --alignment work/hisat2/A/alignments.sorted.bam --short_junction work/hisat2/A/splicesites.bed --short_fasta seq.fa--long_fasta work/lordec/A/long_corrected.fa --outdir out --workdir work --threads 4 --sample A --ref_genome genome.GRCh37.fa --ref_all_gpd hg19.all.refSeq_gencode_ensemble_EST_known.gpd --ref_gpd genes.GRCh37.refFlat.txt --read_length 100 --genome_bowtie2_idx genome.bt2_idx --transcriptome_bowtie2_idx genes.bt2_idx --uniqueness_bedgraph uniqueness.bedGraph --gmap_idx gmap_idx --idpfusion /path/to/runIDP.py --samtools /path/to/samtools --idpfusion_cfg idpfusion.cfg
</code></p>
<h4>Example 14 (variant):</h4> Run of RNACocktail for RNA-Seq variant calling (GATK) with enabled IndelRealignment.
<p><code>run_rnacocktail.py variant --alignment work/hisat2/A/alignments.sorted.bam --outdir out --workdir work --picard /path/to/picard.jar --gatk /path/to/GenomeAnalysisTK.jar --threads 10 --sample A --ref_genome genome.GRCh37.fa --IndelRealignment --knownsites dbsnp_138.b37.vcf
</code></p>
<h4>Example 15 (editing):</h4> Run of RNACocktail for RNA editing detection (GIREMI)
<p><code>run_rnacocktail.py editing --alignment work/gatk/A/bsqr.bam --variant work/gatk/A/variants_filtered.vcf --strand_pos test/GRCh37_strand_pos.bed --genes_pos test/GRCh37_genes_pos.bed --outdir out --workdir work --giremi_dir /path/to/giremi/directory/ --gatk /path/to/GenomeAnalysisTK.jar --samtools /path/to/samtools --htslib_dir /path/to/htslib/directory/ --threads 10 --sample A --ref_genome genome.GRCh37.fa --knownsites dbsnp_138.b37.vcf
</code></p>
<h4>Example 16 (fusion):</h4> Run of RNACocktail for RNA fusion detection (FusionCatcher)
<p><code>run_rnacocktail.py fusion --data_dir /path/to/fusioncatcher/ensembl/data/directory/ --input seq_1.fq.gz,seq_2.fq.gz --outdir out --workdir work --fusioncatcher /path/to/fusioncatcher --threads 4 --sample A
</code></p>
<h4>Example 17 (all):</h4> Run all pipeline steps (Short-read example)
<p><code>run_rnacocktail.py all --outdir out --workdir work --threads 10 --1 A1_1.fq.gz,A2_1.fq.gz B1_1.fq.gz,B2_1.fq.gz --2 A1_2.fq.gz,A2_2.fq.gz B1_2.fq.gz,B2_2.fq.gz --sample all_A1,all_A2 all_B1,all_B2 --ref_gtf genes.GRCh37.gtf --ref_genome genome.GRCh37.fa --align_idx hisat2-idx --quantifier_idx salmon_fmd_idx --unzip --file_format fastq.gz --IndelRealignment --CleanSam --knownsites dbsnp_138.b37.vcf --strand_pos test/GRCh37_strand_pos.bed --genes_pos test/GRCh37_genes_pos.bed --data_dir /path/to/fusioncatcher/ensembl/data/directory/ --giremi_dir /path/to/giremi/directory/ --gatk /path/to/GenomeAnalysisTK.jar --htslib_dir /path/to/htslib/directory/ --picard /path/to/picard.jar --samtools /path/to/samtools --hisat2 /path/to/hisat2 --hisat2_sps /path/to/hisat2_extract_splice_sites.py --stringtie /path/to/stringtie --salmon /path/to/salmon --featureCounts /path/to/featureCounts --oases /path/to/oases --velveth /path/to/velveth --velvetg /path/to/velvetg --lordec /path/to/lordec-correct --sam2psl /path/to/sam2psl.py --fusioncatcher /path/to/fusioncatcher
</code></p>
<h4>Example 18 (all):</h4> Run all pipeline steps (long-read example)
<p><code>run_rnacocktail.py all --outdir out --workdir work --threads 10 --U seq_short.fa --long seq_long.fa --sample all_C --ref_gtf genes.GRCh37.gtf --ref_genome genome.GRCh37.fa --align_idx hisat2-idx --quantifier_idx salmon_fmd_idx --unzip --file_format fasta --IndelRealignment --CleanSam --knownsites dbsnp_138.b37.vcf --strand_pos test/GRCh37_strand_pos.bed --genes_pos test/GRCh37_genes_pos.bed --data_dir /path/to/fusioncatcher/ensembl/data/directory/ --giremi_dir /path/to/giremi/directory/ --gatk /path/to/GenomeAnalysisTK.jar --htslib_dir /path/to/htslib/directory/ --star_genome_dir /path/to/STAR/genome_idx/ --genome_bowtie2_idx genome.bt2_idx --transcriptome_bowtie2_idx genes.bt2_idx --uniqueness_bedgraph uniqueness.bedGraph --gmap_idx gmap_idx --ref_all_gpd hg19.all.refSeq_gencode_ensemble_EST_known.gpd --ref_gpd genes.GRCh37.refFlat.txt --read_length 100 --picard /path/to/picard.jar --hisat2_opts \"-f\" --idp /path/to/idp/runIDP.py --idpfusion /path/to/idpfusion/runIDP.py --samtools /path/to/samtools --hisat2 /path/to/hisat2 --hisat2_sps /path/to/hisat2_extract_splice_sites.py --stringtie /path/to/stringtie --salmon /path/to/salmon --featureCounts /path/to/featureCounts --oases /path/to/oases --velveth /path/to/velveth --velvetg /path/to/velvetg --lordec /path/to/lordec-correct --sam2psl /path/to/sam2psl.py --fusioncatcher /path/to/fusioncatcher
</code></p>
<h2>Command line options</h2>
<h3>General options</h3>
<table style="width:100%">
<tr>
<th style="width:30%">Option</th>
<th style="width:70%">Definition</th>
</tr>
<tr>
<td><code>--sample STRING</code></td>
<td>Sample name</td>
</tr>
<tr>
<td><code>--threads INT</code></td>
<td>Number of threads to use (default: 1)</td>
</tr>
<tr>
<td><code>--start INT</code></td>
<td>It re-starts executing the workflow/pipeline from the given step number. This can be used when the pipeline has crashed/stopped and one wants to re-run it from from the step where it stopped without re-running from the beginning the entire pipeline. 0 is for restarting automatically and 1 is the first step. (default is '0').</td>
</tr>
<tr>
<td><code>--timeout INT</code></td>
<td>Maximum run time for commands (in seconds) (default 10000000)</td>
</tr>
</table>
<h3>Short-read alignment options</h3>
<p><b>run_rnacocktail.py align </b></p>
<table style="width:100%">
<tr>
<th style="width:30%">Option</th>
<th style="width:70%">Definition</th>
</tr>
<tr>
<td><code>--sr_aligner STRING</code></td>
<td>Short-read alignment tool (default: HISAT2)</td>
</tr>
<tr>
<td><code>--align_idx STRING</code></td>
<td>The basename of the index generated by the alignment tool for the reference genome</td>
</tr>
<tr>
<td><code>--1 STRING</code></td>
<td>Comma-separated list of files containing mate 1s (filename usually includes _1), e.g. --1 A_1.fq,B_1.fq.</td>
</tr>
<tr>
<td><code>--2 STRING</code></td>
<td>Comma-separated list of files containing mate 2s (filename usually includes _2), e.g. --2 A_2.fq,B_2.fq.</td>
</tr>
<tr>
<td><code>--U STRING</code></td>
<td>Comma-separated list of files containing unpaired reads to be aligned, e.g. --U A.fq,B.fq.</td>
</tr>
<tr>
<td><code>--sra STRING</code></td>
<td>Comma-separated list of SRA accession numbers, e.g. --sra SRR353653,SRR353654. Information about read types is available at <a href="http://trace.ncbi.nlm.nih.gov/Traces/sra/sra.cgi?sp=runinfo&acc=sra-acc&retmode=xml">here</a>, where sra is SRA accession number.</td>
</tr>
<tr>
<td><code>--ref_gtf STRING</code></td>
<td>The reference transcriptome annotation file (in GTF or GFF3 format) to guide the analysis. ( --known-splicesite-infile option for HISAT will be created based on this file)</td>
</tr>
<tr>
<td><code>--hisat2 STRING</code></td>
<td>Path to HISAT2 executable (Optional. Can be on PATH or defined on defaults.py)</td>
</tr>
<tr>
<td><code>--hisat2_sps STRING</code></td>
<td>Path to hisat2_extract_splice_sites.py script. Can be found in HISAT2 package. (Optional. Can be on PATH or defined on defaults.py)</td>
</tr>
<tr>
<td><code>--samtools STRING</code></td>
<td>Path to samtools executable. (Optional. Can be on PATH or defined on defaults.py)</td>
</tr>
<tr>
<td><code>--hisat2_opts STRING</code></td>
<td>Other options used for HISAT2 aligner. (should be put between " ") (For HISAT2 check <a href="http://ccb.jhu.edu/software/hisat2/manual.shtml">here</a>).</td>
</tr>
</table>
<h3>Short-read transcriptome reconstruction options</h3>
<p><b>run_rnacocktail.py reconstruct </b></p>
<table style="width:100%">
<tr>
<th style="width:30%">Option</th>
<th style="width:70%">Definition</th>
</tr>
<tr>
<td><code>--reconstructor STRING</code></td>
<td>The transcriptome reconstruction tool to use (default: StringTie)</td>
</tr>
<tr>
<td><code>--alignment_bam STRING</code></td>
<td>A BAM file with RNA-Seq read mappings which must be sorted by their genomic location (e.g. The output BAM file generated in align mode).</td>
</tr>
<tr>
<td><code>--ref_gtf STRING</code></td>
<td>The reference transcriptome annotation file (in GTF or GFF3 format) to guide the analysis.</td>
</tr>
<tr>
<td><code>--stringtie STRING</code></td>
<td>Path to StringTie executable (Optional. Can be on PATH or defined on defaults.py)</td>
</tr>
<tr>
<td><code>--samtools STRING</code></td>
<td>Path to samtools executable. (Optional. Can be on PATH or defined on defaults.py)</td>
</tr>
<tr>
<td><code>--stringtie_opts STRING</code></td>
<td>Other options used for StringTie transcriptome reconstruction. (should be put between " ") (For StringTie check <a href="https://ccb.jhu.edu/software/stringtie/index.shtml?t=manual">here</a>).</td>
</tr>
</table>
<h3>Alignment-free transcript quantification options</h3>
<p><b>run_rnacocktail.py quantify </b></p>
<table style="width:100%">
<tr>
<th style="width:30%">Option</th>
<th style="width:70%">Definition</th>
</tr>
<tr>
<td><code>--quantifier STRING</code></td>
<td>The quantification tool to use (default: Salmon-SMEM)</td>
</tr>
<tr>
<td><code>--quantifier_idx STRING</code></td>
<td>The index generated for the reference transcriptome. (FMD-based index for Salmon-SMEM)</td>
</tr>
<tr>
<td><code>--1 STRING</code></td>
<td>Comma-separated list of files containing mate 1s (filename usually includes _1), e.g. --1 A_1.fq,B_1.fq.</td>
</tr>
<tr>
<td><code>--2 STRING</code></td>
<td>Comma-separated list of files containing mate 2s (filename usually includes _2), e.g. --2 A_2.fq,B_2.fq.</td>
</tr>
<tr>
<td><code>--U STRING</code></td>
<td>Comma-separated list of files containing unpaired reads to be aligned, e.g. --U A.fq,B.fq.</td>
</tr>
<tr>
<td><code>--salmon_k INT</code></td>
<td>SMEM's smaller than this size will not be considered by Salmon. (default 19).</td>
</tr>
<tr>
<td><code>--libtype STRING</code></td>
<td>Format string describing the library type. (For Salmon check <a href="http://salmon.readthedocs.io/en/latest/library_type.html#fraglibtype">here</a>).</td>
</tr>
<tr>
<td><code>--unzip</code></td>
<td>The sequence files are zipped. So unzip them first</td>
</tr>
<tr>
<td><code>--salmon STRING</code></td>
<td>Path to Salmon executable (Optional. Can be on PATH or defined on defaults.py)</td>
</tr>
<tr>
<td><code>--salmon_smem_opts STRING</code></td>
<td>Other options used for Salmon-SMEM quantifications. (should be put between " ") (For Salmon check <a href="http://salmon.readthedocs.io/en/latest/salmon.html#using-salmon">here</a>).</td>
</tr>
</table>
<h3>Differential Analysis options</h3>
<p><b>run_rnacocktail.py diff </b></p>
<table style="width:100%">
<tr>
<th style="width:30%">Option</th>
<th style="width:70%">Definition</th>
</tr>
<tr>
<td><code>--difftool STRING</code></td>
<td>The differential analysis tool to use. (default: DESeq2)</td>
</tr>
<tr>
<td><code>--quant_files STRING</code></td>
<td>Quantification files for each sample (e.g. Salmon's quant.sf outputs). Replicates in same sample should be listed comma separated. e.g --quant_files A1/quant.sf,A2/quant.sf B1/quant.sf,B2/quant.sf</td>
</tr>
<tr>
<td><code>--transcripts_gtfs STRING</code></td>
<td>Reconstructed transcript GTF files (for instance StringTie's transcripts.gtf output). Replicates in same sample should be listed comma separated. e.g --transcripts_gtfs A1/transcripts.gtf,A2/transcripts.gtf B1/transcripts.gtf,B2/transcripts.gtf</td>
</tr>
<tr>
<td><code>--alignments STRING</code></td>
<td>Alignment BAM files for each sample (for instance HISAT2's output). Replicates in same sample should be listed comma separated. e.g --alignments A1/alignments.bam,A2/alignments.bam B1/alignments.bam,B2/alignments.bam</td>
</tr>
<tr>
<td><code>--ref_gtf STRING</code></td>
<td>The reference transcriptome annotation file (in GTF or GFF3 format) to guide the analysis.</td>
</tr>
<tr>
<td><code>--sample STRING</code></td>
<td>Sample names. Number of samples and replicates should match the input quantification (--quant_files) or alignemnt (--alignments). Replicates in same sample should be listed comma separated. e.g --sample A1,A2 B1,B2</td>
</tr>
<tr>
<td><code>--mincount INT</code></td>
<td>Minimum read counts per transcripts. Differential analysis pre-filtering step removes transcripts that have less than this number of reads. (default 2)</td>
</tr>
<tr>
<td><code>--alpha FLOAT</code></td>
<td>Adjusted p-value significance level for differential analysis. (default 0.05)</td>
</tr>
<tr>
<td><code>--R STRING</code></td>
<td>Path to R executable (DESeq2, readr, tximport should have been installed in R) (Optional. Can be on PATH or defined on defaults.py)</td>
</tr>
<tr>
<td><code>--featureCounts STRING</code></td>
<td>Path to featureCounts executable. (Optional. Can be on PATH or defined on defaults.py)</td>
</tr>
<tr>
<td><code>--stringtie STRING</code></td>
<td>Path to StringTie executable. (Optional. Can be on PATH or defined on defaults.py)</td>
</tr>
<tr>
<td><code>--stringtie_merge_opts STRING</code></td>
<td>Other options used for StringTie merge. Can be set when the reconstructed transcript GTFs are used.(should be put between " ") (For StringTie check <a href="https://ccb.jhu.edu/software/stringtie/index.shtml?t=manual">here</a>).</td>
</tr>
<tr>
<td><code>--featureCounts_opts STRING</code></td>
<td>Other options used for featureCounts. (should be put between " ") (For options check <a href="http://bioinf.wehi.edu.au/subread-package/SubreadUsersGuide.pdf">here</a>).</td>
</tr>
</table>
<h3>De novo assembly options</h3>
<p><b>run_rnacocktail.py denovo </b></p>
<table style="width:100%">
<tr>
<th style="width:30%">Option</th>
<th style="width:70%">Definition</th>
</tr>
<tr>
<td><code>--assembler STRING</code></td>
<td>The de novo assembler to use. (default Oases)</td>
</tr>
<tr>
<td><code>--assmebly_hash INT</code></td>
<td>Odd integer, or a comma separated list of odd integers that specify the assembly has length (for Oases/Velvet).</td>
</tr>
<tr>
<td><code>--file_format STRING</code></td>
<td>Input file format for de novo assembly Options: fasta, fastq, raw, fasta.gz, fastq.gz, raw.gz, sam, bam, fmtAuto. (default fasta)</td>
</tr>
<tr>
<td><code>--read_type STRING</code></td>
<td>Input sequence read type for de novo assembly Options: short, shortPaired, short2, shortPaired2, long, longPaired, reference. (Check <a href="https://www.ebi.ac.uk/~zerbino/velvet/Manual.pdf">here</a> for description) (default short)</td>
</tr>
<tr>
<td><code>--1 STRING</code></td>
<td>Comma-separated list of files containing mate 1s (filename usually includes _1), e.g. --1 A_1.fq,B_1.fq.</td>
</tr>
<tr>
<td><code>--2 STRING</code></td>
<td>Comma-separated list of files containing mate 2s (filename usually includes _2), e.g. --2 A_2.fq,B_2.fq.</td>
</tr>
<tr>
<td><code>--U STRING</code></td>
<td>Comma-separated list of files containing unpaired reads to be aligned, e.g. --U A.fq,B.fq.</td>
</tr>
<tr>
<td><code>--I STRING</code></td>
<td>Comma-separated list of files containing interleaved paired-end reads to be assembled, e.g. --I A.fq,B.fq.</td>
</tr>
<tr>
<td><code>--oases STRING</code></td>
<td>Path to oases executable. (Optional. Can be on PATH or defined on defaults.py)</td>
</tr>
<tr>
<td><code>--velvetg STRING</code></td>
<td>Path to velvetg executable. (Optional. Can be on PATH or defined on defaults.py)</td>
</tr>
<tr>
<td><code>--velveth STRING</code></td>
<td>Path to velveth executable. (Optional. Can be on PATH or defined on defaults.py)</td>
</tr>
<tr>
<td><code>--velveth_opts STRING</code></td>
<td>Other options used for assembly by velveth. (For velvet options check <a href="https://genome.ucsc.edu/FAQ/FAQformat.html">here</a>https://github.com/dzerbino/velvet/blob/master/Manual.pdf).</td>
</tr>
<tr>
<td><code>--velvetg_opts STRING</code></td>
<td>Other options used for assembly by velvetg. (should be put between " ") (For velvet options check <a href="https://genome.ucsc.edu/FAQ/FAQformat.html">here</a>https://github.com/dzerbino/velvet/blob/master/Manual.pdf).</td>
</tr>
<tr>
<td><code>--velveth_opts STRING</code></td>
<td>Other options used for assembly by velveth. (For velvet options check <a href="https://genome.ucsc.edu/FAQ/FAQformat.html">here</a>https://github.com/dzerbino/velvet/blob/master/Manual.pdf).</td>
</tr>
<tr>
<td><code>--oases_opts STRING</code></td>
<td>Other options used for assembly by Oases. (should be put between " ") (For Oases options check <a href="https://genome.ucsc.edu/FAQ/FAQformat.html">here</a>https://github.com/dzerbino/oases).</td>
</tr>
</table>
<h3>Long read error correction options</h3>
<p><b>run_rnacocktail.py long_correct </b></p>
<table style="width:100%">
<tr>
<th style="width:30%">Option</th>
<th style="width:70%">Definition</th>
</tr>
<tr>
<td><code>--long_corrector STRING</code></td>
<td>The long-read error correction tool to use. (default LoRDEC).</td>
</tr>
<tr>
<td><code>--kmer INT</code></td>
<td>LoRDEC k-mer length</td>
</tr>
<tr>
<td><code>--solid INT</code></td>
<td>LoRDEC solidity abundance threshold for k-mers</td>
</tr>
<tr>
<td><code>--long STRING</code></td>
<td>The FASTA file containing long reads</td>
</tr>
<tr>
<td><code>--short STRING</code></td>
<td>The FASTA or FASTQ file containing short reads. (can be compressed .gz file)</td>
</tr>
<tr>
<td><code>--lordec STRING</code></td>
<td>Path to LoRDEC executable (Optional. Can be on PATH or defined on defaults.py)</td>
</tr>
<tr>
<td><code>--lordec_opts STRING</code></td>
<td>Other options used for LoRDEC. (should be put between " ") (For LoRDEC check <a href="http://www.atgc-montpellier.fr/lordec/README.html">here</a>).</td>
</tr>
</table>
<h3>Long read alignment options</h3>
<p><b>run_rnacocktail.py long_align </b></p>
<table style="width:100%">
<tr>
<th style="width:30%">Option</th>
<th style="width:70%">Definition</th>
</tr>
<tr>
<td><code>--long_aligner STRING</code></td>
<td>The long-read alignment tool to use. (default STARlong).</td>
</tr>
<tr>
<td><code>--long STRING</code></td>
<td>The FASTA file containing long reads</td>
</tr>
<tr>
<td><code>--genome_dir STRING</code></td>
<td>Specifies path to the genome directory where STAR genome indices where generated</td>
</tr>
<tr>
<td><code>--ref_gtf STRING</code></td>
<td>The reference transcriptome annotation file (in GTF or GFF3 format) to guide the analysis.</td>
</tr>
<tr>
<td><code>--starlong STRING</code></td>
<td>Path to STARlong executable (version 2.5.0a or later) (Optional. Can be on PATH or defined on defaults.py)</td>
</tr>
<tr>
<td><code>--sam2psl STRING</code></td>
<td>Path to the sam2psl.py script. Can be found in FusionCatcher package. (Optional. Can be on PATH or defined on defaults.py)</td>
</tr>
<tr>
<td><code>--samtools STRING</code></td>
<td>Path to samtools executable. (Optional. Can be on PATH or defined on defaults.py)</td>
</tr>
<tr>
<td><code>--starlong_opts STRING</code></td>
<td>Other options used for LoRDEC. (should be put between " ") (For LoRDEC check <a href="http://www.atgc-montpellier.fr/lordec/README.html">here</a>). As the default we use the following options as advised in <a href="https://github.com/PacificBiosciences/cDNA_primer/wiki/Bioinfx-study:-Optimizing-STAR-aligner-for-Iso-Seq-data">here</a>: <code>
--outSAMattributes NH HI NM MD --readNameSeparator space --outFilterMultimapScoreRange 1 --outFilterMismatchNmax 2000 --scoreGapNoncan -20 --scoreGapGCAG -4 --scoreGapATAC -8 --scoreDelOpen -1 --scoreDelBase -1 --scoreInsOpen -1 --scoreInsBase -1 --alignEndsType Local --seedSearchStartLmax 50 --seedPerReadNmax 100000 --seedPerWindowNmax 1000 --alignTranscriptsPerReadNmax 100000 --alignTranscriptsPerWindowNmax 10000
</code>.</td>