forked from dgpinheiro/bioaat
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathsim.sh
executable file
·77 lines (64 loc) · 2.48 KB
/
sim.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
#!/bin/bash
#
# INGLÊS/ENGLISH
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
# http://www.gnu.org/copyleft/gpl.html
#
#
# PORTUGUÊS/PORTUGUESE
# Este programa é distribuído na expectativa de ser útil aos seus
# usuários, porém NÃO TEM NENHUMA GARANTIA, EXPLÍCITAS OU IMPLÍCITAS,
# COMERCIAIS OU DE ATENDIMENTO A UMA DETERMINADA FINALIDADE. Consulte
# a Licença Pública Geral GNU para maiores detalhes.
# http://www.gnu.org/copyleft/gpl.html
#
# Copyright (C) 2012 Universidade de São Paulo
#
# Universidade de São Paulo
# Laboratório de Biologia do Desenvolvimento de Abelhas
# Núcleo de Bioinformática (LBDA-BioInfo)
#
# Daniel Guariz Pinheiro
# http://zulu.fmrp.usp.br/bioinfo
#
rm -f transcriptoma.fa
IFS=$'\n'
for acc in $(cat ./ACCS.txt); do
echo "Pegando FASTA para ${acc} ..."
esearch -db nucleotide -query ${acc} | efetch \
-format fasta >> transcriptoma.fa
done
for biogroup in A B; do
for rep in 1 2; do
echo "Gerando reads para amostra ${biogroup} réplica ${rep} ..."
generate_fragments.py -r transcriptoma.fa \
-a ./abundance_${biogroup}.txt \
-o ./tmp.frags_${biogroup}_${rep} \
-t 25000 \
-i 300 \
-s 30
cat ./tmp.frags_${biogroup}_${rep}.1.fasta | renameSeqs.pl \
-if FASTA \
-of FASTA \
-p SAMPLE${biogroup}${rep} \
-w 1000 | \
sed 's/^>\(\S\+\).*/>\1/' \
> ./frags_${biogroup}${rep}.fa
cat ./frags_${biogroup}${rep}.fa | simNGS -a \
AGATCGGAAGAGCACACGTCTGAACTCCAGTCACATCACGATCTCGTATGCCGTCTTCTGCTTG:AGATCGGAAGAGCGTCGTGTAGGGAAAGAGTGTAGATCTCGGTGGTCGCCGTATCATT \
-p paired \
/usr/local/bioinfo/simNGS/data/s_4_0099.runfile \
-n 151 > ./SAMPLE${biogroup}${rep}.fastq 2> SAMPLE${biogroup}${rep}.err.txt
mkdir -p ./raw
deinterleave_pairs SAMPLE${biogroup}${rep}.fastq \
-o ./raw/SAMPLE${biogroup}${rep}_R1.fastq \
./raw/SAMPLE${biogroup}${rep}_R2.fastq
rm -f ./tmp.frags_${biogroup}_${rep}.1.fasta ./frags_${biogroup}${rep}.fa ./SAMPLE${biogroup}${rep}.fastq ./SAMPLE${biogroup}${rep}.err.txt
echo "Número de reads ${biogroup}${rep} R1:" $(echo "$(cat raw/SAMPLE${biogroup}${rep}_R1.fastq | wc -l)/4" | bc)
echo "Número de reads ${biogroup}${rep} R2:" $(echo "$(cat raw/SAMPLE${biogroup}${rep}_R2.fastq | wc -l)/4" | bc)
done
done