-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathSnakefile
88 lines (76 loc) · 2.85 KB
/
Snakefile
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
import os
import subprocess
import pandas as pd
if not os.path.exists(config['log_dir']): subprocess.run(f'mkdir -p {config["log_dir"]}', shell=True)
if not os.path.exists(config['tmp_dir']): subprocess.run(f'mkdir -p {config["tmp_dir"]}', shell=True)
SAMPLES = [s.strip() for s in open(config['samplesfile']).readlines()]
rule all:
input:
#expand('results/star/{sample}.bam.bai', sample=SAMPLES),
#expand('results/star/{sample}.bam.bai', sample=SAMPLES),
expand('results/featurecounts/{sample}.counts.txt', sample=SAMPLES),
#expand('results/star/{sample}/{sample}.Aligned.sortedByCoord.out.bam', sample=SAMPLES),
def _get_r1s(wildcards):
df = pd.read_table(config['fastq_pathfile'])
df = df[df['sample']==wildcards.sample]
assert df.shape[0] == 1, df
paths_str = df['R1_paths'].iloc[0]
paths = paths_str.split(',')
return paths
def _get_r2s(wildcards):
df = pd.read_table(config['fastq_pathfile'])
df = df[df['sample']==wildcards.sample]
assert df.shape[0] == 1, df
paths_str = df['R2_paths'].iloc[0]
paths = paths_str.split(',')
return paths
rule star_align:
input:
r1s = _get_r1s,
r2s = _get_r2s,
output:
bam = 'results/star/{sample}/{sample}.Aligned.sortedByCoord.out.bam'
params:
r1s = lambda w, input: ','.join(input.r1s),
r2s = lambda w, input: ','.join(input.r2s),
out_dir = 'results/star/{sample}',
genome_dir = config['star_genome_dir'],
threads: 16,
resources:
mem_mb = 61440,
shell:
'module load star/2.5.3a ; '
'STAR --runMode alignReads '
'--outFilterMultimapNmax 3 '
'--runThreadN {threads} '
'--genomeDir {params.genome_dir} '
'--readFilesIn {params.r1s} {params.r2s} '
'--readFilesCommand zcat '
'--outFileNamePrefix {params.out_dir}/{wildcards.sample}. '
'--outSAMtype BAM SortedByCoordinate'
rule featurecounts:
input:
bam = 'results/star/{sample}/{sample}.Aligned.sortedByCoord.out.bam',
output:
counts = 'results/featurecounts/{sample}.counts.txt',
params:
gtf = config['reference_gtf'],
threads: 4,
#singularity: 'docker://alexgilgal/featurecount:latest',
singularity: '~/chois7/singularity/sif/featurecount.sif',
shell:
'featureCounts -p -O -T {threads} '
'-a {params.gtf} -g gene_name '
'-o {output.counts} {input.bam}'
rule index_and_softlink:
input:
bam = 'results/star/{sample}/{sample}.Aligned.sortedByCoord.out.bam',
output:
bam = 'results/star/{sample}.bam',
bai = 'results/star/{sample}.bam.bai',
params:
bai = 'results/star/{sample}/{sample}.Aligned.sortedByCoord.out.bam.bai',
shell:
'samtools index {input.bam}; '
'cp {input.bam} {output.bam}; '
'cp {params.bai} {output.bai}'