Skip to content

Commit

Permalink
Reduce memory usage for kmer mapper normalization
Browse files Browse the repository at this point in the history
  • Loading branch information
asl committed Dec 5, 2024
1 parent 09f76ca commit 1c3b41d
Show file tree
Hide file tree
Showing 2 changed files with 48 additions and 18 deletions.
20 changes: 19 additions & 1 deletion src/common/alignment/kmer_map.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -85,16 +85,34 @@ class KMerMap {
mapping_.erase(res);
}

void set(const Kmer &key, const Seq &value) {
bool set(const Kmer &key, const Seq &value) {
RawSeqData *rawvalue = nullptr;
bool inserted = false;
auto res = mapping_.find_ks((const char*)key.data(), key_size_);
if (res == mapping_.end()) {
rawvalue = new RawSeqData[rawcnt_];
mapping_.insert_ks((const char*)key.data(), key_size_, rawvalue);
inserted = true;
} else {
rawvalue = res.value();
}
memcpy(rawvalue, value.data(), key_size_);
return inserted;
}

bool set(const RawSeqData *key, const RawSeqData *value) {
RawSeqData *rawvalue = nullptr;
bool inserted = false;
auto res = mapping_.find_ks((const char*)key, key_size_);
if (res == mapping_.end()) {
rawvalue = new RawSeqData[rawcnt_];
mapping_.insert_ks((const char*)key, key_size_, rawvalue);
inserted = true;
} else {
rawvalue = res.value();
}
memcpy(rawvalue, value, key_size_);
return inserted;
}

bool count(const Kmer &key) const {
Expand Down
46 changes: 29 additions & 17 deletions src/common/alignment/kmer_mapper.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -31,17 +31,25 @@ class KmerMapper : public omnigraph::GraphActionHandler<Graph> {
KMerMap mapping_;
bool normalized_;

const RawSeqData* GetRoot(const Kmer &kmer) const {
const RawSeqData* GetNonTrivialRoot(const RawSeqData *kmer) const {
const RawSeqData *answer = nullptr;
const RawSeqData *rawval = mapping_.find(kmer);

size_t step = 0;
while (rawval != nullptr) {
Seq val(k_, rawval);

answer = rawval;
rawval = mapping_.find(val);
rawval = mapping_.find(rawval);
step += 1;
}
return answer;
return step > 1 ? answer : nullptr;
}

bool HasNonTrivialRoot(const RawSeqData *kmer) const {
const RawSeqData *rawval = mapping_.find(kmer);
if (rawval == nullptr)
return false;

return mapping_.find(rawval) != nullptr;
}

public:
Expand All @@ -66,23 +74,27 @@ class KmerMapper : public omnigraph::GraphActionHandler<Graph> {
if (normalized_)
return;

adt::KMerVector<Kmer> all(k_, size());
for (auto it = begin(); it != end(); ++it)
all.push_back(it->first);

std::vector<const RawSeqData*> roots(all.size(), nullptr);
// Preallocate 5% of size
adt::KMerVector<Kmer> all(k_, size() / 20);
size_t sz = 0;
for (auto it = begin(); it != end(); ++it) {
if (!HasNonTrivialRoot(it->first.data()))
continue;

# pragma omp parallel for
for (size_t i = 0; i < all.size(); ++i) {
Seq val(k_, all[i]);
roots[i] = GetRoot(val);
all.push_back(it->first);
sz += 1;
}
INFO("Total " << sz << " kmers with non-trivial roots");

# pragma omp parallel for
for (size_t i = 0; i < all.size(); ++i) {
if (roots[i] != nullptr) {
Seq kmer(k_, all[i]);
mapping_.set(kmer, Seq(k_, roots[i]));
const RawSeqData *kmer = all[i];
if (const RawSeqData *root = GetNonTrivialRoot(kmer)) {
// This is potentially racy, however we do not insert new values, we
// only change values of the existing ones in a pre-determined way
// (root), the final result is ok.
bool inserted = mapping_.set(kmer, root);
VERIFY_MSG(!inserted, "should never insert new kmers here");
}
}

Expand Down

0 comments on commit 1c3b41d

Please sign in to comment.