forked from mcgml/PrimerDesigner
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathBlast.java
86 lines (62 loc) · 2.69 KB
/
Blast.java
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
package nhs.genetics.cardiff;
import java.io.*;
import java.util.ArrayList;
import java.util.Scanner;
import java.util.logging.Level;
import java.util.logging.Logger;
/**
* A wrapper for running BLAST executable https://blast.ncbi.nlm.nih.gov
*
* @author Matt Lyon
* @version 1.0
* @since 2015-04-27
*/
public class Blast {
private static final Logger log = Logger.getLogger(Blast.class.getName());
public static ArrayList<GenomicLocation> callShortQueryBlast(String query, File blastnFilePath, File blastnRefPath, int maxExactMatches, double minSimilarity) throws MaxAlignmentExceededException {
log.log(Level.FINE, "Calling short query blast ...");
log.log(Level.FINE, "Sequence: " + query);
int numberExactAlignments = 0;
ArrayList<String> output = new ArrayList<>();
ArrayList<GenomicLocation> alignments = new ArrayList<>();
try{
ProcessBuilder builder = new ProcessBuilder(
blastnFilePath.toString(),
"-db", blastnRefPath.toString(),
"-task" , "blastn-short",
"-outfmt", "6"
);
Process process = builder.start();
OutputStream stdin = process.getOutputStream();
InputStream stdout = process.getInputStream();
BufferedWriter writer = new BufferedWriter(new OutputStreamWriter(stdin));
writer.write(query);
writer.flush();
writer.close();
Scanner scanner = new Scanner(stdout);
while (scanner.hasNextLine()) {
output.add(scanner.nextLine());
}
if (process.waitFor() != 0){
throw new RuntimeException("Problem invoking blastn-short, exit code: " + process.exitValue());
}
} catch (IOException e){
log.log(Level.SEVERE, e.toString());
} catch (InterruptedException e){
log.log(Level.SEVERE, e.toString());
}
//extract genome coordinates from blast output
for (String s : output){
String[] fields = s.split("\t");
alignments.add(new GenomicLocation(fields[1], Integer.parseInt(fields[8]), Integer.parseInt(fields[9])));
if (fields[2].equals("100.00") && (double) Integer.parseInt(fields[3]) / query.length() >= minSimilarity){
numberExactAlignments++;
}
}
//throw error if too many alignemtns are identified
if (numberExactAlignments > maxExactMatches){
throw new MaxAlignmentExceededException("Sequence " + query + " has too many alignments (" + numberExactAlignments + ")");
}
return alignments;
}
}