-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathpipe1.sh
executable file
·81 lines (66 loc) · 1.69 KB
/
pipe1.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
#!/bin/bash
#create result file which contains for a specific annotation the number of
#shared gene pairs per RCG size and distance/correlation measure
#e.g.
#distance k=1 k=2 ... k=100
#euclidean 3 5 700
#pearson 4 7 800
#input files contain the number of gene pairs with shared annotation per gene
#and RCG size between 1 and 100 for a specific measurement
#inputfolder contains such inputfiles with different measurement but the same annotation
input_folder=$1
#build headline: distance k=1 k=2 ... k=100
printf "%s\t" "distance"
for i in `seq 1 5`
do
printf "%s\t" k=${i}
done
for i in `seq 10 10 100`
do
if [ $i != 100 ]
then
printf "%s\t" k=${i}
else
printf "%s\n" k=${i}
fi
done
for shared_anno_file in ${input_folder}/*.txt
do
#echo $shared_anno_file
fname=$(basename $shared_anno_file)
dist="unknown"
#extract the distance/correlation measure
case $fname in
*euclidean*)
dist="euclidean"
;;
*spearman*)
dist="spearman"
;;
*quadraticpearson*)
dist="quadraticpearson"
;;
*pearson*)
dist="pearson"
;;
*relintoverlap*)
dist="rio"
;;
*mutual_information*)
dist="mutual_information"
;;
*cosine*)
dist="cosine"
;;
esac
if [ "$dist" != "unknown" ]
then
#colums=awk '{print NF; exit}' $shared_anno_file
#printf "%s\t%s" ${dist}
#awk '{OFS="\t"}{for (i=2; i<=NF; i++) a[i-1]+=$i } END {for (i=2; i<=NF; i++) printf a[i-1] OFS; printf "\n"}' $shared_anno_file
printf "%s\t" ${dist}
awk '{OFS="\t"}{for (i=2; i<=NF; i++) sum[i-1]+=$i } END {for (i=2; i<=NF; i++) printf "%s%s", sum[i-1], (i==NF?"\n":"\t")}' $shared_anno_file
else
echo "Unknown distance/correlation measure in "$shared_anno_file
fi
done