-
Notifications
You must be signed in to change notification settings - Fork 2
/
notes.txt
94 lines (69 loc) · 1.82 KB
/
notes.txt
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
accession_taxid names nodes
sqlite> select * from names LIMIT 10;
taxid|name
1|root
2|Bacteria <bacteria>
6|Azorhizobium
7|Azorhizobium caulinodans
9|Buchnera aphidicola
10|Cellvibrio
11|Cellulomonas gilvus
13|Dictyoglomus
14|Dictyoglomus thermophilum
16|Methylophilus
sqlite> select * from nodes LIMIT 10;
taxid|parent_taxid|rank
1|1|-
2|131567|superkingdom
6|335928|genus
7|6|species
9|32199|species
10|1706371|genus
11|1707|species
13|203488|genus
14|13|species
16|32011|genus
sqlite> select * from accession_taxid LIMIT 10;
accession|taxid
A00001.1|10641
A00002.1|9913
A00003.1|9913
A00004.1|32630
A00005.1|32630
A00006.1|32630
A00008.1|32630
A00009.1|32630
A00010.1|32630
A00011.1|32630
A.taxId NCBI_node
B.nameTxt NCBI_name
WHERE
A.taxId = B.taxId
AND NCBI_node.parentTaxId = taxid
6965 ../output_data/child_test.csv
wc -l blacklist_2
7346421
10:41 hadley_king@hive1-node1 ~ $ nohup python3 git_filtered_nt/ac2taxid_check.py -d ncbi-taxonomy-database/taxonomy.db -n raw_data/nt -l logs/logfile.accession2taxid.txt &
ps ax | grep 'ac2taxid_check.py'
accession|taxid|name|parent_taxid|rank
https://ncbi-hackathons.github.io/EDirectCookbook/
cat logs/logfile.accession2taxid_test.txt | epost -db nuccore -format acc | elink -target taxonomy | efetch -format xml | xtract -pattern Taxon -element TaxId, ScientificName, Rank, Lineage
#______________________________________________________________________________#
Monday 06-26-23
1,512,606
1,576,990
656650780
656981344
make nucleotide
330564 /tmp/tmp.UqzDSlVl2g
656981344 /tmp/tmp.UqzDSlVl2g
972514815 /tmp/tmp.UqzDSlVl2g
330564 /tmp/tmp.YJcTZpo3kb
656981344 /tmp/tmp.YJcTZpo3kb
972,514,815 /tmp/tmp.YJcTZpo3kb
972,514,757 /tmp/tmp.kb7bmdsyZl
PID: 2784320
14,384,694,720 * 0.06 = 863,081,683.2
36,331,904
93,245,213 nt
11,283,515