-
Notifications
You must be signed in to change notification settings - Fork 0
/
pattern.go
128 lines (114 loc) · 3.35 KB
/
pattern.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
package main
import (
"fmt"
"io"
"github.com/bjeight/fastats/fasta"
)
// pattern() is fastats at, gc, gaps etc. in the cli. It writes the appropriate header (which
// depends on the cli arguments), then passes patternRecords() + the cli arguments + the writer to
// collectCommandLine which processes the fasta file(s) from the command line or stdin, depending
// on what is provided by the user.
func pattern(w io.Writer, filepaths []string, pattern string, file bool, counts bool, description bool, lenFormat string) error {
switch {
case file && counts:
_, err := w.Write([]byte("file\t" + pattern + "_count\n"))
if err != nil {
return err
}
case file && !counts:
_, err := w.Write([]byte("file\t" + pattern + "_prop\n"))
if err != nil {
return err
}
case !file && counts:
_, err := w.Write([]byte("record\t" + pattern + "_count\n"))
if err != nil {
return err
}
case !file && !counts:
_, err := w.Write([]byte("record\t" + pattern + "_prop\n"))
if err != nil {
return err
}
}
err := collectCommandLine(w, patternRecords, filepaths, pattern, file, counts, description, lenFormat)
if err != nil {
return err
}
return nil
}
// patternRecords does the work of fastats at, gc, etc. for one fasta file at a time.
func patternRecords(r *fasta.Reader, args arguments, w io.Writer) error {
// get the file name in case we need to print it to stdout
filename := filenameFromFullPath(args.filepath)
// we need the pattern to be counted as a slice of bytes so that we can perform
// the array lookup in the next step
pattern_slice := []byte(args.pattern)
// initiate counts for the number of occurences of the specified patterh, and
// the length of each record
n_total := 0
l_total := 0
// iterate over every record in the fasta file
for {
record, err := r.Read()
if err == io.EOF {
break
}
if err != nil {
return err
}
// initiate a table of counts
var lookup [256]int
// for every nucleotide in the sequence, +1 its cell in the lookup table
for _, nuc := range record.Seq {
lookup[nuc] += 1
}
// for every nucleotide to be looked up, add its count from the lookup table
// to the total
n := 0
for _, b := range pattern_slice {
n += lookup[b]
}
// if the statistic is to be calculated per file, add this record's pattern count
// and length to the total, else write this records statistic.
if args.file {
n_total += n
l_total += len(record.Seq)
} else {
// print a count or a proportion
if args.counts {
s := fmt.Sprintf("%s\t%d\n", return_record_name(record, args.description), n)
_, err := w.Write([]byte(s))
if err != nil {
return err
}
} else {
proportion := float64(n) / float64(len(record.Seq))
s := fmt.Sprintf("%s\t%f\n", return_record_name(record, args.description), proportion)
_, err := w.Write([]byte(s))
if err != nil {
return err
}
}
}
}
// if the statistic is to be calculated per file, we print the statistic after all
// the records have been processed
if args.file {
if args.counts {
s := fmt.Sprintf("%s\t%d\n", filename, n_total)
_, err := w.Write([]byte(s))
if err != nil {
return err
}
} else {
proportion := float64(n_total) / float64(l_total)
s := fmt.Sprintf("%s\t%f\n", filename, proportion)
_, err := w.Write([]byte(s))
if err != nil {
return err
}
}
}
return nil
}