-
Notifications
You must be signed in to change notification settings - Fork 0
/
10.database
executable file
·93 lines (77 loc) · 3.37 KB
/
10.database
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
#!/bin/bash
source globals
SQL=$0.sql
XLS=$OUTPUT_NAME.xls
IN=data/combined.final.otus.xls
FA=data/combined.final.otus.fasta
CLASSIN=data/combined.final.otus.classified.xls
CLASSINFLAT=data/combined.final.otus.classified.flat.xls
seq_table=final_otus_sequences
otu_table=final_otus
class_table=final_otus_classifications
class_flat=final_otus_classifications_flat
echo loading sequences from $FA into database table $seq_table
awk -f fa2sql.awk -v create_table=1 -v table="$seq_table" < $FA | mysql -h $HOST $DB
echo flattening $CLASSIN for loading to the database
awk -F'\t' '{ gsub(/"/, "", $0); otu = $1; rank = 0; for (i = 3; i <= NF; i += 3) { rank++; printf("%s\t%d\t%s\t%s\t%s\n", otu, rank, $i, $(i + 1), $(i + 2)); } }' $CLASSIN > $CLASSINFLAT
echo loading otu abundance data from $IN into database table $otu_table
echo " and otu classification data from $CLASSIN into database table $class_table"
cat << EOF > $SQL
DROP TABLE IF EXISTS $otu_table;
CREATE TABLE $otu_table (
EOF
head -1 $IN | awk '{ printf("\tOTUId VARCHAR(32) PRIMARY KEY NOT NULL,\n"); for (i = 2; i <= NF; ++i) { printf("\t%s INTEGER%s\n", $i, (i == NF ? "" : ", ")); } }' >> $SQL
cat << EOF >> $SQL
);
LOAD DATA LOCAL INFILE "$IN" INTO TABLE $otu_table FIELDS TERMINATED BY '\t' IGNORE 1 LINES;
DROP TABLE IF EXISTS $class_table;
CREATE TABLE $class_table (
OTUId VARCHAR(32) NOT NULL,
rank_no INTEGER NOT NULL,
name VARCHAR(32) NOT NULL,
rank VARCHAR(32) NOT NULL,
confidence FLOAT NOT NULL,
INDEX (OTUId),
INDEX (OTUId, name),
INDEX (OTUId, rank),
INDEX (OTUId, rank_no),
INDEX (OTUId, name, rank, rank_no)
);
LOAD DATA LOCAL INFILE "$CLASSINFLAT" INTO TABLE $class_table FIELDS TERMINATED BY '\t';
DROP TABLE IF EXISTS $class_flat;
CREATE TABLE $class_flat
SELECT otus.OTUId,
d.name AS domain, d.confidence AS domain_confidence,
p.name AS phylum, p.confidence AS phylum_confidence,
c.name AS class, c.confidence AS class_confidence,
o.name AS order_, o.confidence AS order_confidence,
f.name AS family, f.confidence AS family_confidence,
g.name AS genus, g.confidence AS genus_confidence,
s.name AS species, s.confidence AS species_confidence
FROM (SELECT DISTINCT OTUId FROM $class_table) AS otus
LEFT JOIN $class_table AS d ON otus.OTUId = d.OTUId AND d.rank = "domain"
LEFT JOIN $class_table AS p ON otus.OTUId = p.OTUId AND p.rank = "phylum"
LEFT JOIN $class_table AS c ON otus.OTUId = c.OTUId AND c.rank = "class"
LEFT JOIN $class_table AS o ON otus.OTUId = o.OTUId AND o.rank = "order"
LEFT JOIN $class_table AS f ON otus.OTUId = f.OTUId AND f.rank = "family"
LEFT JOIN $class_table AS g ON otus.OTUId = g.OTUId AND g.rank = "genus"
LEFT JOIN $class_table AS s ON otus.OTUId = s.OTUId AND s.rank = "species"
;
SELECT ot.OTUId,
EOF
awk -F'\t' '{ if (line >= 1) { printf("\t\t%s AS \"%s\",\n", $1, $1); } ++line; }' $SAMPLE_INFO_SUBSET >> $SQL
cat << EOF >> $SQL
domain, domain_confidence, phylum, phylum_confidence, class, class_confidence,
order_, order_confidence, family, family_confidence, genus, genus_confidence, species, species_confidence,
sequence
FROM $otu_table AS ot
INNER JOIN $class_flat AS cf ON ot.OTUId = cf.OTUId
INNER JOIN $seq_table AS st ON ot.OTUId = st.otu
ORDER BY ( 0
EOF
awk -F'\t' '{ if (line >= 1) { printf("\t\t\t+ %s\n", $1); } ++line; }' $SAMPLE_INFO_SUBSET >> $SQL
cat << EOF >> $SQL
) DESC
;
EOF
mysql -h $HOST $DB < $SQL | sed "s/NULL//g" > $XLS