-
Notifications
You must be signed in to change notification settings - Fork 2
/
example_data.sh
113 lines (83 loc) · 2.97 KB
/
example_data.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
###### SETUP ######
# set paths to the analysis programs,
# will need to be replaced your local installation
ANGSD="$HOME/programs/angsd/angsd"
realSFS="$HOME/programs/angsd/misc/realSFS"
IBS="$HOME/programs/angsd/misc/ibs"
SAMTOOLS="samtools"
# download the example data
wget http://popgen.dk/software/download/angsd/bams.tar.gz
# unzip/untar and index the bam files
tar xf bams.tar.gz
for i in bams/*.bam;do samtools index $i;done
###### realSFS METHOD ######
# make a directory for the results
mkdir results_realsfs
# get the R script to parse the realSFS output
wget https://raw.githubusercontent.com/rwaples/freqfree_suppl/master/read_realSFS.R
# make a separate bam filelist for each indiviudal
# also create a SAMPLES array for use below
BAMS=./bams/*.bam
SAMPLES=()
for b in $BAMS; do
# parse out the sample name
base="$(basename -- $b)"
sample="${base%%.mapped.*}"
SAMPLES+=("$sample")
echo $sample
echo $b > ${sample}.filelist.ind
done
# For the realSFS method, one of the alleles at each site must be specified
# here we will use an ancestral state file
# download and index the ancestral state fasta file
wget http://popgen.dk/software/download/angsd/hg19ancNoChr.fa.gz
$SAMTOOLS faidx hg19ancNoChr.fa.gz
# run doSAF on each individual
for s in "${SAMPLES[@]}"; do
$ANGSD -b ${s}.filelist.ind \
-anc hg19ancNoChr.fa.gz \
-minMapQ 30 -minQ 20 -GL 2 \
-doSaf 1 -doDepth 1 -doCounts 1 \
-out ${s}
done
# run realSFS on each pair of indiviudals
for i in {0..9}; do
for j in {0..9}; do
if (( i < j)); then
sample1=${SAMPLES[i]}
sample2=${SAMPLES[j]}
$realSFS ${sample1}.saf.idx ${sample2}.saf.idx > ./results_realsfs/${sample1}_${sample2}.2dsfs
fi
done
done
# lets take a look at the results for a single pair of individuals
Rscript \
-e "source('./read_realSFS.R')" \
-e "res = read_realSFS('results_realsfs/smallNA06985_smallNA11830.2dsfs')" \
-e "res['sample1'] = 'smallNA06985'; res['sample2'] = 'smallNA11830'" \
-e "print(res[,c('sample1', 'sample2', 'nSites', 'Kin', 'R0', 'R1') ])"
###### IBS METHOD ######
# make a directory for the results
mkdir results_IBS
# get the R script to parse the IBS output
wget https://raw.githubusercontent.com/rwaples/freqfree_suppl/master/read_IBS.R
## make a bam filelists containing all individuals
ls bams/*.bam > all.filelist
# make a genotype likelihood file (glf) containing all individuals
$ANGSD -b all.filelist \
-minMapQ 30 -minQ 20 -GL 2 \
-doGlf 1 \
-out example
# run IBS, this will analyse each pair of indiviudals
$IBS -glf example.glf.gz \
-model 0 \
-nInd 10 -allpairs 1 \
-outFileName results_IBS/ibs.model0.results
# look at the results from IBS
Rscript \
-e "source('./read_IBS.R')" \
-e "res = do_derived_stats(read_ibspair_model0('results_IBS/ibs.model0.results.ibspair'))" \
-e "print(res[6,c('ind1', 'ind2', 'nSites', 'Kin', 'R0', 'R1') ])"
# the IBS method in ANGSD indexes individuals as they appear in the filelist
# (zero-indexed)
cat all.filelist