Skip to content
Quanhu Sheng edited this page Sep 29, 2017 · 8 revisions

First of all, we need to import libraries

#!/usr/bin/perl
use strict;
use warnings;

use CQS::ClassFactory;
use CQS::FileUtils;
use CQS::SystemUtils;
use CQS::ConfigUtils;
use Pipeline::RNASeq;

Then we start with our project definition

my $def = {
  #define task name, this name will be used as prefix of a few result, such as read count table file name.
  task_name      => "B3436",
  
  #email which will be used for notification if you run through cluster
  email          => "quanhu.sheng\@vanderbilt.edu",
  
  #target dir which will be automatically created and used to save code and result
  target_dir     => "/scratch/cqs/shengq2/test/pipeline_rnaseq_simple",

  #cqs in house software which will be used to generate count table. https://github.com/shengqh/CQS.Tools/
  cqstools       => "/home/shengq2/cqstools/cqstools.exe",

Genome sequence and annotation file

  #gene annotation file
  transcript_gtf => "/scratch/cqs/shengq2/references/gatk/b37/Homo_sapiens.GRCh37.82.MT.gtf",
  
  #gene name to transcription name map file which is generated by "mono cqstools.exe gtf_buildmap".
  name_map_file  => "/scratch/cqs/shengq2/references/gatk/b37/Homo_sapiens.GRCh37.82.MT.map",
  
  #genome sequence file
  fasta_file     => "/scratch/cqs/shengq2/references/gatk/b37/human_g1k_v37.fasta",

Alignment software and index

  #using STAR. 
  aligner => "star",
  star_index => "/scratch/cqs/shengq2/references/gatk/b37/STAR_index_2.5.2b_ensembl_v82_sjdb99",

if you want to use hisat2, then you have to set hisat2_index which will be similar to:

  #using hisat2. 
  aligner => "hisat2",
  hisat2_index => "/scratch/cqs/shengq2/references/hisat2_index/grch37_snp_tran/genome_snp_tran",

Quality control at BAM level:

  #perform RNASeQC or not
  perform_rnaseqc => 0,
  rnaseqc_jar     => "/home/shengq2/local/bin/RNA-SeQC_v1.1.8.jar",

Perform variant calling based on GATK best practice. If you set it to 1, then you will need to fill in the following required entries

  perform_call_variants => 0,
  dbsnp => "/scratch/cqs/shengq2/references/gatk/b37/dbsnp_150.b37.vcf",
  gatk_jar => "/scratch/cqs/shengq2/local/bin/gatk/GenomeAnalysisTK.jar",
  picard_jar => "/scratch/cqs/shengq2/local/bin/picard/picard.jar",
  annovar_db => "/scratch/cqs/shengq2/references/annovar/humandb",
  annovar_buildver => "hg19",
  annovar_param => "-protocol refGene,snp138,cosmic70 -operation g,f,f --remove",

Now we need to define the experimental. First of all, the read files. The hash map can be generated by command "mono cqstools.exe file_def"

  #source files
  files          => {
    "JDB-01" => [ "/scratch/cqs/shengq2/rnaseq/20160509_brown_3436/data/3436-JDB-1_1_sequence.txt.gz", "/scratch/cqs/shengq2/rnaseq/20160509_brown_3436/data/3436-JDB-1_2_sequence.txt.gz" ],
    "JDB-02" => [ "/scratch/cqs/shengq2/rnaseq/20160509_brown_3436/data/3436-JDB-2_1_sequence.txt.gz", "/scratch/cqs/shengq2/rnaseq/20160509_brown_3436/data/3436-JDB-2_2_sequence.txt.gz" ],
    "JDB-03" => [ "/scratch/cqs/shengq2/rnaseq/20160509_brown_3436/data/3436-JDB-3_1_sequence.txt.gz", "/scratch/cqs/shengq2/rnaseq/20160509_brown_3436/data/3436-JDB-3_2_sequence.txt.gz" ],
    "JDB-04" => [ "/scratch/cqs/shengq2/rnaseq/20160509_brown_3436/data/3436-JDB-4_1_sequence.txt.gz", "/scratch/cqs/shengq2/rnaseq/20160509_brown_3436/data/3436-JDB-4_2_sequence.txt.gz" ],
    "JDB-05" => [ "/scratch/cqs/shengq2/rnaseq/20160509_brown_3436/data/3436-JDB-5_1_sequence.txt.gz", "/scratch/cqs/shengq2/rnaseq/20160509_brown_3436/data/3436-JDB-5_2_sequence.txt.gz" ],
    "JDB-06" => [ "/scratch/cqs/shengq2/rnaseq/20160509_brown_3436/data/3436-JDB-6_1_sequence.txt.gz", "/scratch/cqs/shengq2/rnaseq/20160509_brown_3436/data/3436-JDB-6_2_sequence.txt.gz" ],
  },

The samples can be grouped together for correlation analysis or differential comparison. One sample can belonged to multiple groups for combination of comparison.

  #group information for visualization and comparison 
  groups => {
    "FED"  => [ "JDB-01", "JDB-02", "JDB-03" ],
    "DMSO" => [ "JDB-04", "JDB-05", "JDB-06" ],
  },

Comparison information, in each comparison, the first one is control. For example, in comparison "DMSO_vs_FED", "FED" is control.

  pairs => {
    "DMSO_vs_FED" => [ "FED", "DMSO" ],
  },

Perform enrichment analysis using WebGestalt, organism should be one of: "athaliana" "btaurus" "celegans" "cfamiliaris" "drerio" "sscrofa" "dmelanogaster" "ggallus" "hsapiens" "mmusculus" "rnorvegicus" "scerevisiae"

  perform_webgestalt => 1,
  webgestalt_organism => "hsapiens", 
  #organism should be: "athaliana" "btaurus"       "celegans"      "cfamiliaris"   "drerio"  "sscrofa"       "dmelanogaster" "ggallus"       "hsapiens"      "mmusculus"  "rnorvegicus"   "scerevisiae" 

Now close the definition

};

Finally, call perfromRNASeq to generate all scripts.

performRNASeq($def);
1;

You should find the scripts generated inside of your target folder. The whole scripts can be downloaded from simple

Clone this wiki locally