forked from deweylab/RSEM
-
Notifications
You must be signed in to change notification settings - Fork 0
/
getUnique.cpp
74 lines (57 loc) · 1.37 KB
/
getUnique.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
#include<cstdio>
#include<cstring>
#include<cstdlib>
#include<cassert>
#include<string>
#include<vector>
#include <stdint.h>
#include "sam/bam.h"
#include "sam/sam.h"
#include "utils.h"
using namespace std;
string cqname;
samfile_t *in, *out;
bam1_t *b;
vector<bam1_t*> arr;
bool unaligned;
void output() {
if (unaligned || arr.size() == 0) return;
bool isPaired = (arr[0]->core.flag & 0x0001);
if ((isPaired && arr.size() != 2) || (!isPaired && arr.size() != 1)) return;
for (size_t i = 0; i < arr.size(); i++) samwrite(out, arr[i]);
}
int main(int argc, char* argv[]) {
if (argc != 3) {
printf("Usage: rsem-get-unique unsorted_transcript_bam_input bam_output\n");
exit(-1);
}
in = samopen(argv[1], "rb", NULL);
assert(in != 0);
out = samopen(argv[2], "wb", in->header);
assert(out != 0);
HIT_INT_TYPE cnt = 0;
cqname = "";
arr.clear();
b = bam_init1();
unaligned = false;
while (samread(in, b) >= 0) {
if (cqname != bam1_qname(b)) {
output();
cqname = bam1_qname(b);
for (size_t i = 0; i < arr.size(); i++) bam_destroy1(arr[i]);
arr.clear();
unaligned = false;
}
unaligned = unaligned || (b->core.flag & 0x0004);
arr.push_back(bam_dup1(b));
++cnt;
if (cnt % 1000000 == 0) { printf("."); fflush(stdout); }
}
if (cnt >= 1000000) printf("\n");
output();
bam_destroy1(b);
samclose(in);
samclose(out);
printf("done!\n");
return 0;
}