Skip to content

Commit

Permalink
new seqset feats
Browse files Browse the repository at this point in the history
  • Loading branch information
sodiumnitrate committed Jan 31, 2024
1 parent dfd0f57 commit 4b9cb49
Show file tree
Hide file tree
Showing 3 changed files with 48 additions and 1 deletion.
6 changes: 6 additions & 0 deletions src/include/seq_set.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -49,4 +49,10 @@ class SeqSet {

std::vector<std::vector<int>> pairwise_distance();
SeqSet find_subset_with_names(std::vector<std::string> names);

// remove columns at given indices
void remove_columns(std::vector<int> indices);

// check if all seqs have the same length
bool are_lengths_identical();
};
40 changes: 40 additions & 0 deletions src/seq_set.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -390,6 +390,45 @@ SeqSet SeqSet::find_subset_with_names(std::vector<std::string> names){
return new_sset;
}

// check if all seqs have the same length
bool SeqSet::are_lengths_identical(){
int length = records[0].length();
for(auto& t : records){
if (t.length() != length){
return false;
}
}
return true;
}

void SeqSet::remove_columns(std::vector<int> indices){

// assumes aligned sequences of the same length
if (!this->are_lengths_identical()){
std::cout << "Sequences have different lengths." << std::endl;
throw;
}

std::unordered_set<int> idx_to_remove;
for(auto i : indices){
idx_to_remove.insert(i);
}


int length = records[0].length();

std::string new_seq_str;
for(int j = 0; j < records.size(); j++){
new_seq_str = "";
for(int i = 0; i < length; i++){
if(!idx_to_remove.contains(i)){
new_seq_str.push_back(records[j].get_seq()[i]);
}
}
records[j].set_seq(new_seq_str);
}
}

void init_seq_set(py::module_ &m){
py::class_<SeqSet>(m, "SeqSet", py::dynamic_attr())
.def(py::init<>())
Expand All @@ -415,6 +454,7 @@ void init_seq_set(py::module_ &m){
.def("trim_gaps_with_threshold", py::overload_cast<float>(&SeqSet::trim_gaps))
.def("pairwise_distance", &SeqSet::pairwise_distance)
.def("find_subset_with_names", &SeqSet::find_subset_with_names)
.def("remove_columns", &SeqSet::remove_columns)
.def("__repr__",
[](SeqSet &a){
return "<sequence_analysis.SeqSet of size " + std::to_string(a.size()) + " >";
Expand Down
3 changes: 2 additions & 1 deletion src/sequence.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -125,7 +125,8 @@ const std::unordered_map<std::string, char> codon_to_aa_map = {
{"TGG", 'W'},
{"CGT", 'R'},
{"AGT", 'S'},
{"GGT", 'G'}
{"GGT", 'G'},
{"---", '-'}
};

/*
Expand Down

0 comments on commit 4b9cb49

Please sign in to comment.