-
Notifications
You must be signed in to change notification settings - Fork 0
/
sex_ploidy_map.cpp
117 lines (103 loc) · 3.36 KB
/
sex_ploidy_map.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
#include "sex_ploidy_map.h"
#include "qgenlib/qgen_error.h"
int8_t* sex_ploidy_map::get_ploidies(bcf1_t* v) {
int32_t cur_ploidy_type = get_ploidy_type(v);
if ( ploidies == NULL )
ploidies = new int8_t[vSex.size()];
if ( prev_ploidy_type != cur_ploidy_type ) {
for(int32_t i=0; i < (int32_t)vSex.size(); ++i) {
if ( cur_ploidy_type == PLOIDY_TYPE_AUTOSOME )
ploidies[i] = 2;
else if ( cur_ploidy_type == PLOIDY_TYPE_X )
ploidies[i] = ( vSex[i] == 1 ? 1 : 2 );
else if ( cur_ploidy_type == PLOIDY_TYPE_Y )
ploidies[i] = ( vSex[i] == 1 ? 1 : 0 );
else if ( cur_ploidy_type == PLOIDY_TYPE_MT )
ploidies[i] = 1;
else
error("Unknown ploidy type %d", cur_ploidy_type);
}
prev_ploidy_type = cur_ploidy_type;
}
return ploidies;
}
int32_t sex_ploidy_map::get_ploidy_type(bcf1_t* v) {
if ( v ) {
if ( v->rid == x_rid ) {
if ( ( v->pos >= x_start ) && ( v->pos <= x_stop ) ) {
return PLOIDY_TYPE_X;
}
else
return PLOIDY_TYPE_AUTOSOME;
}
else if ( v->rid == y_rid )
return PLOIDY_TYPE_Y;
else if ( v->rid == mt_rid )
return PLOIDY_TYPE_MT;
else
return PLOIDY_TYPE_AUTOSOME;
}
else
return PLOIDY_TYPE_AUTOSOME;
}
int32_t sex_ploidy_map::set_rids_from_hdr(bcf_hdr_t* hdr) {
x_rid = bcf_hdr_name2id(hdr, x_label.c_str());
y_rid = bcf_hdr_name2id(hdr, y_label.c_str());
mt_rid = bcf_hdr_name2id(hdr, mt_label.c_str());
return ( ( x_rid < 0 ? 0 : 1 ) + ( y_rid < 0 ? 0 : 1 ) + ( mt_rid < 0 ? 0 : 1 ) );
}
int32_t sex_ploidy_map::load_sex_map_file(const char* filename, bcf_hdr_t* hdr) {
set_rids_from_hdr(hdr);
int32_t nsamples = bcf_hdr_nsamples(hdr);
//fprintf(stderr,"load_sex_map_files() : nsamples=%d\n",nsamples);
if ( ( filename == NULL ) || ( strlen(filename) == 0 ) ) {
for(int32_t i=0; i < nsamples; ++i) {
vSex.push_back(2);
}
}
else {
htsFile *file = hts_open(filename,"r");
if ( file == NULL ) {
fprintf(stderr,"ERROR: Cannot open %s\n",filename);
exit(1);
}
kstring_t *s = &file->line;
while( hts_getline(file,'\n',s) >= 0 ) {
std::string ss = std::string(s->s);
size_t idx = ss.find_first_of("\t ");
if ( idx == std::string::npos ) {
fprintf(stderr,"ERROR: Cannot parse line %s in %s\n",ss.c_str(), filename);
exit(1);
}
std::string id = ss.substr(0, idx);
int32_t sex = atoi(ss.substr(idx+1).c_str());
if ( mSex.find(id) != mSex.end() ) {
fprintf(stderr,"ERROR: Duplicate ID %s in %s\n",id.c_str(), filename);
exit(1);
}
if ( sex == 0 ) {
fprintf(stderr,"WARNING: Unknown sex for individual %s, assuming females\n",id.c_str());
sex = 2;
}
else if ( sex > 2 ) {
fprintf(stderr,"ERROR: Invalid sex %d for individual %s\n",sex,id.c_str());
exit(1);
}
mSex[id] = sex;
}
for(int32_t i=0; i < nsamples; ++i) {
const char* sid = bcf_hdr_int2id(hdr, BCF_DT_SAMPLE, i);
std::map<std::string,int>::iterator it = mSex.find(sid);
if ( it == mSex.end() ) { // not found
fprintf(stderr,"WARNING: No sex information is available for %s, treating as female\n",sid);
vSex.push_back(2);
}
else {
vSex.push_back(it->second);
if ( it->second == 1 )
++n_males;
}
}
}
return nsamples;
}