-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathtrialmapping.sh
86 lines (66 loc) · 5.78 KB
/
trialmapping.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
## define some parameters
sample=${1}
sampleB=($(echo ${sample} | cut -d"_" -f1-8))
sampID=($(echo ${sample} | cut -d"_" -f1-7))
pond=($(echo ${sample} | cut -d"_" -f9-12))
threads=10
echo $sample
echo $sampleB
inputDir="/scratch/kbb7sh/Daphnia/SingleMoms2018/fastqs"
interDir="/scratch/kbb7sh/Daphnia/SingleMoms2018/March2018SMIntB"
outputDir="/scratch/kbb7sh/Daphnia/SingleMoms2018/March2018SMMapB"
flowcell=($( ls ${inputDir}/*.gz | awk '{split($0,a,"/"); print a[7]}' | awk '{split($0,a,"_"); print a[1]}' | sort | uniq ))
echo ${!flowcell[@]}
## map reads
for cell in "${flowcell[@]}"; do
echo "${cell}"
lanes=($( ls ${inputDir}/${cell}*${sampleB}* | awk 'FS="_" {print $2}' | sort | uniq ))
for lane in "${lanes[@]}"; do
echo "${lane}"
### trim out nextera and index seq
java -jar $EBROOTTRIMMOMATIC/trimmomatic-0.36.jar PE \
${inputDir}/${cell}_${lane}_1_${sampleB}.fastq.gz \
${inputDir}/${cell}_${lane}_2_${sampleB}.fastq.gz \
${interDir}/${cell}_${lane}_1_${sampID}.P_trimm.fastq \
${interDir}/${cell}_${lane}_1_${sampID}.U_trimm.fastq \
${interDir}/${cell}_${lane}_2_${sampID}.P_trimm.fastq \
${interDir}/${cell}_${lane}_2_${sampID}.U_trimm.fastq \
ILLUMINACLIP:/project/berglandlab/Karen/TrimmomaticAdaptors/NexteraPE-PE.fa:2:30:10:8:true
### first, merge overlapping reads
~/pear-0.9.11-linux-x86_64/bin/pear \
-f ${interDir}/${cell}_${lane}_1_${sampID}.P_trimm.fastq \
-r ${interDir}/${cell}_${lane}_2_${sampID}.P_trimm.fastq \
-o ${interDir}/${cell}_${lane}_${sampID} \
-j ${threads}
### next, map to reference genome
bwa mem -t ${threads} -K 100000000 -Y \
-R "@RG\tID:${sampID};${cell};${lane}\tSM:${pond}\tPL:illumina\tPU:${sampID};${cell};${lane}" \
/scratch/kbb7sh/genomefiles/totalHiCwithallbestgapclosed.fa \
${interDir}/${cell}_${lane}_${sampID}.assembled.fastq | \
samtools view -L /scratch/kbb7sh/genomefiles/D84Agoodscaffstouse.bed -Suh -q 20 -F 0x100 | \
samtools sort -@ ${threads} -o ${interDir}/${cell}_${lane}_${pond}.sort.bam
samtools index ${interDir}/${cell}_${lane}_${pond}.sort.bam
## unassembled reads
bwa mem -t ${threads} -K 100000000 -Y \
-R "@RG\tID:${sampID};${cell};${lane}\tSM:${pond}\tPL:illumina\tPU:${sampID};${cell};${lane}" \
/scratch/kbb7sh/genomefiles/totalHiCwithallbestgapclosed.fa \
${interDir}/${cell}_${lane}_${sampID}.unassembled.forward.fastq \
${interDir}/${cell}_${lane}_${sampID}.unassembled.reverse.fastq | \
samtools view -L /scratch/kbb7sh/genomefiles/D84Agoodscaffstouse.bed -Suh -q 20 -F 0x100 | \
samtools sort -@ ${threads} -o ${interDir}/${cell}_${lane}_${pond}.filt.unassembled.sort.bam
samtools index ${interDir}/${cell}_${lane}_${pond}.filt.unassembled.sort.bam
## Next, merge assembled and unassembled bam files and mark duplicates
samtools merge ${interDir}/${cell}_${lane}_${pond}.filt.merged.bam \
${interDir}/${cell}_${lane}_${pond}.sort.bam \
${interDir}/${cell}_${lane}_${pond}.filt.unassembled.sort.bam
samtools index ${interDir}/${cell}_${lane}_${pond}.filt.merged.bam
done
done
### next, merge bam files to single bam file
samtools merge ${interDir}/${pond}_finalmap.bam ${interDir}/*${pond}.filt.merged.bam
samtools index ${interDir}/${pond}_finalmap.bam
### Mark duplicates
java -jar $EBROOTPICARD/picard.jar MarkDuplicates MAX_FILE_HANDLES_FOR_READ_ENDS_MAP=1000 \
REMOVE_DUPLICATES=true \
INPUT=${interDir}/${pond}_finalmap.bam \
OUTPUT=${outputDir}/${pond}_finalmap_mdup.bam METRICS_FILE=${interDir}/${pond}_finalmap_mdups.metrics CREATE_INDEX=true