diff --git a/CMakeLists.txt b/CMakeLists.txt
index c54ca24..820c2ec 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -42,3 +42,6 @@ target_link_libraries(check z)
 
 add_executable(query src/query.cpp)
 target_link_libraries(query z)
+
+add_executable(permute src/permute.cpp)
+target_link_libraries(permute z)
\ No newline at end of file
diff --git a/README.md b/README.md
index f756267..7f34707 100644
--- a/README.md
+++ b/README.md
@@ -218,6 +218,28 @@ We see that the canonical dictionary is twice as fast as the regular dictionary
 for low-hit workloads,
 even on this tiny example, for only +0.4 bits/k-mer.
 
+### Example 4
+
+	./permute ../data/unitigs_stitched/with_abundances/ecoli_sakai.BA000007.3.k31_ust.abundances.fa.gz 31 -o ecoli_sakai.permuted.fa
+
+This command re-orders (and possibly reverse-complement) the strings in the collection as to *minimize* the number of runs in the abundances and, hence, optimize the encoding of the abundances.
+The result is saved to the file `ecoli_sakai.permuted.fa`.
+
+In this example for the E.Coli collection (Sakai strain) we reduce the number of runs in the abundances from 5820 to 3723.
+
+Then use the `build` command as usual to build the permuted collection:
+
+	./build ecoli_sakai.permuted.fa 31 13 --abundances --verbose
+	
+The index built on the permuted collection
+optimizes the storage space for the abundances which results in a 15.1X better space than the empirical entropy of the abundances.
+
+For reference, the index built on the original collection:
+
+	./build ../data/unitigs_stitched/with_abundances/ecoli_sakai.BA000007.3.k31_ust.abundances.fa.gz 31 13 --abundances --verbose
+
+already achieves a 12.4X better space than the empirical entropy.
+
 Input Files
 -----------
 
diff --git a/data/unitigs_stitched/with_abundances/ecoli_sakai.BA000007.3.k31_ust.abundances.fa.gz b/data/unitigs_stitched/with_abundances/ecoli_sakai.BA000007.3.k31_ust.abundances.fa.gz
index d5c697e..7607fd9 100644
Binary files a/data/unitigs_stitched/with_abundances/ecoli_sakai.BA000007.3.k31_ust.abundances.fa.gz and b/data/unitigs_stitched/with_abundances/ecoli_sakai.BA000007.3.k31_ust.abundances.fa.gz differ
diff --git a/external/pthash b/external/pthash
index f0a0fb9..3f1deb0 160000
--- a/external/pthash
+++ b/external/pthash
@@ -1 +1 @@
-Subproject commit f0a0fb929ecfe1d925d88bc9bf41f7b4eaf0516b
+Subproject commit 3f1deb0e1ec132274af8ef55478915209f608b2c
diff --git a/include/abundances.hpp b/include/abundances.hpp
index f0d3684..4fd4536 100644
--- a/include/abundances.hpp
+++ b/include/abundances.hpp
@@ -1,20 +1,17 @@
 #pragma once
 
 #include <vector>
-#include <unordered_map>  // count the distinct abundances
+#include <unordered_map>  // count the distinct abundances with freq information
+
 #include "ef_sequence.hpp"
 
 namespace sshash {
 
 struct abundances {
     struct builder {
-        builder() : m_most_frequent_abundance(0) {}
+        builder() {}
 
-        void init(uint64_t most_frequent_abundance) {
-            m_most_frequent_abundance = most_frequent_abundance;
-            m_kmer_id_interval_lengths.push_back(0);
-            m_abundance_interval_lengths.push_back(0);
-        }
+        void init() { m_abundance_interval_lengths.push_back(0); }
 
         void eat(uint64_t abundance) {
             assert(abundance > 0);
@@ -26,28 +23,17 @@ struct abundances {
             }
         }
 
-        void push_kmer_id_interval(uint64_t value, uint64_t length) {
-            m_kmer_id_interval_values.push_back(value);
-            m_kmer_id_interval_lengths.push_back(m_kmer_id_interval_lengths.back() + length);
-        }
-
         void push_abundance_interval(uint64_t value, uint64_t length) {
             m_abundance_interval_values.push_back(value);
             m_abundance_interval_lengths.push_back(m_abundance_interval_lengths.back() + length);
         }
 
-        uint64_t num_kmer_id_intervals() const { return m_kmer_id_interval_values.size(); }
         uint64_t num_abundance_intervals() const { return m_abundance_interval_values.size(); }
 
         void finalize(uint64_t num_kmers) {
-            assert(
-                std::is_sorted(m_kmer_id_interval_values.begin(), m_kmer_id_interval_values.end()));
-            assert(std::is_sorted(m_kmer_id_interval_lengths.begin(),
-                                  m_kmer_id_interval_lengths.end()));
             assert(std::is_sorted(m_abundance_interval_lengths.begin(),
                                   m_abundance_interval_lengths.end()));
 
-            std::cout << "num_kmer_id_intervals " << num_kmer_id_intervals() << std::endl;
             std::cout << "num_abundance_intervals " << num_abundance_intervals() << std::endl;
 
             uint64_t num_distinct_abundances = m_abundances_map.size();
@@ -80,23 +66,9 @@ struct abundances {
                 return x.first < y.first;
             });
 
-            /* If this test fails, then we need to change the value of
-                constants::most_frequent_abundance. */
-            if (m_most_frequent_abundance != m_abundances.front().first) {
-                throw std::runtime_error("the most frequent abundance is not " +
-                                         std::to_string(constants::most_frequent_abundance) +
-                                         " but " + std::to_string(m_abundances.front().first));
-            }
-
             uint64_t rest = num_kmers - m_abundances.front().second;
             std::cout << "kmers that do not have the most frequent ab: " << rest << " ("
                       << (rest * 100.0) / num_kmers << "%)" << std::endl;
-            if (m_kmer_id_interval_values.size() != 0) {  // optimize_mfa
-                std::cout << "cumulative_kmer_id_interval_lengths "
-                          << m_kmer_id_interval_lengths.back() << '/' << rest << std::endl;
-                std::cout << "cumulative_abundance_interval_lengths "
-                          << m_abundance_interval_lengths.back() << '/' << rest << std::endl;
-            }
 
             m_abundance_dictionary_builder.resize(num_distinct_abundances,
                                                   std::ceil(std::log2(largest_ab + 1)));
@@ -108,13 +80,6 @@ struct abundances {
         }
 
         void build(abundances& index) {
-            std::swap(index.m_most_frequent_abundance, m_most_frequent_abundance);
-
-            index.m_kmer_id_interval_values.encode(m_kmer_id_interval_values.begin(),
-                                                   m_kmer_id_interval_values.size());
-            index.m_kmer_id_interval_lengths.encode(m_kmer_id_interval_lengths.begin(),
-                                                    m_kmer_id_interval_lengths.size());
-
             uint64_t num_distinct_abundances = m_abundance_dictionary_builder.size();
             pthash::compact_vector::builder abundance_interval_values;
             abundance_interval_values.resize(
@@ -144,7 +109,7 @@ struct abundances {
             m_abundance_dictionary_builder.build(index.m_abundance_dictionary);
         }
 
-        // return the average empirical entropy per abundance
+        /* return the average empirical entropy per abundance */
         double print_info(uint64_t num_kmers) {
             assert(!m_abundances.empty());
             double expected_ab_value = 0.0;
@@ -166,15 +131,10 @@ struct abundances {
         }
 
     private:
-        uint64_t m_most_frequent_abundance;
-
         /* (abundance,frequency) pairs during construction, then (abundance,id) after sorting */
         std::unordered_map<uint64_t, uint64_t> m_abundances_map;
         std::vector<std::pair<uint64_t, uint64_t>> m_abundances;  // (abundance,frequency)
 
-        std::vector<uint64_t> m_kmer_id_interval_values;
-        std::vector<uint64_t> m_kmer_id_interval_lengths;
-
         std::vector<uint64_t> m_abundance_interval_values;
         std::vector<uint64_t> m_abundance_interval_lengths;
 
@@ -184,48 +144,18 @@ struct abundances {
     bool empty() const { return m_abundance_dictionary.size() == 0; }
 
     uint64_t abundance(uint64_t kmer_id) const {
-        uint64_t rank = kmer_id;
-
-        if (m_kmer_id_interval_values.size() != 0) {  // optimize_mfa
-            bool is_present = false;
-            auto [pos, val, prev] = m_kmer_id_interval_values.next_geq_neighbourhood(kmer_id);
-            if (val == kmer_id) {
-                is_present = true;
-                rank = m_kmer_id_interval_lengths.access(pos);
-            } else {
-                if (pos > 0) {
-                    rank = m_kmer_id_interval_lengths.access(pos - 1);
-                    uint64_t length = m_kmer_id_interval_lengths.access(pos) - rank;
-                    if (kmer_id < prev + length) {
-                        is_present = true;
-                        assert(kmer_id >= prev);
-                        rank += kmer_id - prev;
-                    }
-                }
-            }
-            if (!is_present) return m_most_frequent_abundance;
-        }
-
-        uint64_t i = m_abundance_interval_lengths.prev_leq(rank);
+        uint64_t i = m_abundance_interval_lengths.prev_leq(kmer_id);
         uint64_t id = m_abundance_interval_values.access(i);
         uint64_t abundance = m_abundance_dictionary.access(id);
-
         return abundance;
     }
 
     uint64_t num_bits() const {
-        return sizeof(m_most_frequent_abundance) * 8 + m_kmer_id_interval_values.num_bits() +
-               m_kmer_id_interval_lengths.num_bits() + m_abundance_interval_values.bytes() * 8 +
-               m_abundance_interval_lengths.num_bits() + m_abundance_dictionary.bytes() * 8;
+        return m_abundance_interval_values.bytes() * 8 + m_abundance_interval_lengths.num_bits() +
+               m_abundance_dictionary.bytes() * 8;
     }
 
     void print_space_breakdown(uint64_t num_kmers) const {
-        std::cout << "    kmer_id_interval_values: "
-                  << static_cast<double>(m_kmer_id_interval_values.num_bits()) / num_kmers
-                  << " [bits/kmer]\n";
-        std::cout << "    kmer_id_interval_lengths: "
-                  << static_cast<double>(m_kmer_id_interval_lengths.num_bits()) / num_kmers
-                  << " [bits/kmer]\n";
         std::cout << "    abundance_interval_values: "
                   << static_cast<double>(m_abundance_interval_values.bytes() * 8) / num_kmers
                   << " [bits/kmer]\n";
@@ -239,34 +169,14 @@ struct abundances {
 
     template <typename Visitor>
     void visit(Visitor& visitor) {
-        visitor.visit(m_most_frequent_abundance);
-        visitor.visit(m_kmer_id_interval_values);
-        visitor.visit(m_kmer_id_interval_lengths);
         visitor.visit(m_abundance_interval_values);
         visitor.visit(m_abundance_interval_lengths);
         visitor.visit(m_abundance_dictionary);
     }
 
 private:
-    uint64_t m_most_frequent_abundance;
-
-    /*****
-       We model abundances as two lists of intervals.
-       Each interval is a pair (value,length).
-       - First list: kmer_ids list. In this case, a pair (value,length)
-       represents all kmer_ids = value, value+1, value+2, ..., value+length-1.
-       - Second list: abundance list. In this case, a pair (value,length)
-       represents that the abundance [value] repeats for [length] times.
-    */
-
-    ef_sequence<true> m_kmer_id_interval_values;
-    ef_sequence<false> m_kmer_id_interval_lengths;
-
     pthash::compact_vector m_abundance_interval_values;
     ef_sequence<true> m_abundance_interval_lengths;
-    /***/
-
-    /* Abundance dictionary, listing all distinct abundances sorted by decreasing frequency. */
     pthash::compact_vector m_abundance_dictionary;
 };
 
diff --git a/include/builder/build.cpp b/include/builder/build.cpp
index 9dab699..4931b06 100644
--- a/include/builder/build.cpp
+++ b/include/builder/build.cpp
@@ -6,21 +6,13 @@
 #include "../info.cpp"
 #include "../../external/pthash/include/pthash.hpp"
 #include "../../external/pthash/external/essentials/include/essentials.hpp"
-
-#include "build_util_types.hpp"
+#include "util.hpp"
 
 #include <numeric>  // for std::partial_sum
 #include <vector>
 
 namespace sshash {
 
-void expect(char got, char expected) {
-    if (got != expected) {
-        std::cout << "got '" << got << "' but expected '" << expected << "'" << std::endl;
-        throw parse_runtime_error();
-    }
-}
-
 struct parse_data {
     parse_data() : num_kmers(0) {}
     uint64_t num_kmers;
@@ -88,37 +80,18 @@ void parse_file(std::istream& is, parse_data& data, build_configuration const& b
 
     uint64_t seq_len = 0;
     uint64_t sum_of_abundances = 0;
-    data.abundances_builder.init(constants::most_frequent_abundance);
-
-    /* intervals of kmer_ids */
-    uint64_t kmer_id_value = constants::invalid;
-    uint64_t kmer_id_length = 1;
+    data.abundances_builder.init();
 
     /* intervals of abundances */
     uint64_t ab_value = constants::invalid;
-    uint64_t ab_length = 1;
-
-    uint64_t num_sequences_diff_abs = 0;  // num sequences whose kmers have different abundances
-    uint64_t num_sequences_all_mfa = 0;   // num sequences whose kmers have same abundance == mfa
-
-    auto process_abundance = [&](uint64_t ab) {
-        if (ab == ab_value) {
-            ab_length += 1;
-        } else {
-            if (ab_value != constants::invalid) {
-                data.abundances_builder.push_abundance_interval(ab_value, ab_length);
-            }
-            ab_value = ab;
-            ab_length = 1;
-        }
-    };
+    uint64_t ab_length = 0;
 
     auto parse_header = [&]() {
         if (sequence.empty()) return;
 
         /*
             Heder format:
-            >[seq_id] LN:i:[seq_len] ab:Z:[ab_seq]
+            >[id] LN:i:[seq_len] ab:Z:[ab_seq]
             where [ab_seq] is a space-separated sequence of integer counters (the abundances),
             whose length is equal to [seq_len]-k+1
         */
@@ -140,8 +113,7 @@ void parse_file(std::istream& is, parse_data& data, build_configuration const& b
         uint64_t j = sequence.find_first_of(' ', i);
         if (j == std::string::npos) throw parse_runtime_error();
 
-        char* end;
-        seq_len = std::strtoull(sequence.data() + i, &end, 10);
+        seq_len = std::strtoull(sequence.data() + i, nullptr, 10);
         i = j + 1;
         expect(sequence[i + 0], 'a');
         expect(sequence[i + 1], 'b');
@@ -150,53 +122,23 @@ void parse_file(std::istream& is, parse_data& data, build_configuration const& b
         expect(sequence[i + 4], ':');
         i += 5;
 
-        kmer_id_value = constants::invalid;
-        kmer_id_length = 1;
-
-        bool kmers_have_all_mfa = true;
-        bool kmers_have_different_abundances = false;
-
-        for (uint64_t j = 0, num_kmers = data.num_kmers, prev_ab = constants::invalid;
-             j != seq_len - k + 1; ++j, ++num_kmers) {
-            uint64_t ab = std::strtoull(sequence.data() + i, &end, 10);
+        for (uint64_t j = 0; j != seq_len - k + 1; ++j) {
+            uint64_t ab = std::strtoull(sequence.data() + i, nullptr, 10);
             i = sequence.find_first_of(' ', i) + 1;
 
-            if (ab != constants::most_frequent_abundance) kmers_have_all_mfa = false;
-            if (j > 0) {
-                if (ab != prev_ab) { kmers_have_different_abundances = true; }
-            }
-            prev_ab = ab;
-
             data.abundances_builder.eat(ab);
             sum_of_abundances += ab;
 
-            if (build_config.optimize_mfa) {
-                if (ab != constants::most_frequent_abundance) {
-                    if (kmer_id_value == constants::invalid) {
-                        kmer_id_value = num_kmers;
-                        kmer_id_length = 1;
-                    } else {
-                        if (num_kmers == kmer_id_value + kmer_id_length) kmer_id_length += 1;
-                    }
-                    process_abundance(ab);
-                } else {
-                    if (kmer_id_value != constants::invalid) {
-                        data.abundances_builder.push_kmer_id_interval(kmer_id_value,
-                                                                      kmer_id_length);
-                    }
-                    kmer_id_value = constants::invalid;
-                }
+            if (ab == ab_value) {
+                ab_length += 1;
             } else {
-                process_abundance(ab);
+                if (ab_value != constants::invalid) {
+                    data.abundances_builder.push_abundance_interval(ab_value, ab_length);
+                }
+                ab_value = ab;
+                ab_length = 1;
             }
         }
-
-        if (kmer_id_value != constants::invalid) {
-            data.abundances_builder.push_kmer_id_interval(kmer_id_value, kmer_id_length);
-        }
-
-        num_sequences_diff_abs += kmers_have_different_abundances;
-        num_sequences_all_mfa += kmers_have_all_mfa;
     };
 
     while (!is.eof()) {
@@ -264,18 +206,6 @@ void parse_file(std::istream& is, parse_data& data, build_configuration const& b
 
     if (build_config.store_abundances) {
         std::cout << "sum_of_abundances " << sum_of_abundances << std::endl;
-        std::cout << "num_sequences whose kmers have different abundances: "
-                  << num_sequences_diff_abs << "/" << num_sequences << " ("
-                  << (num_sequences_diff_abs * 100.0) / num_sequences << "%)" << std::endl;
-        std::cout << "num_sequences whose kmers all have the same abundance != mfa: "
-                  << (num_sequences - num_sequences_diff_abs) << "/" << num_sequences << " ("
-                  << ((num_sequences - num_sequences_diff_abs) * 100.0) / num_sequences << "%)"
-                  << std::endl;
-        std::cout << "num_sequences whose kmers all have the same abundance == mfa: "
-                  << num_sequences_all_mfa << "/" << (num_sequences - num_sequences_diff_abs)
-                  << " ("
-                  << (num_sequences_all_mfa * 100.0) / (num_sequences - num_sequences_diff_abs)
-                  << "%)" << std::endl;
         data.abundances_builder.push_abundance_interval(ab_value, ab_length);
         data.abundances_builder.finalize(data.num_kmers);
     }
diff --git a/include/builder/cover.hpp b/include/builder/cover.hpp
new file mode 100644
index 0000000..9296e0f
--- /dev/null
+++ b/include/builder/cover.hpp
@@ -0,0 +1,430 @@
+#pragma once
+
+#include <unordered_map>
+#include <unordered_set>
+#include <vector>
+
+#include "../util.hpp"
+
+namespace sshash {
+
+namespace constants {
+constexpr uint64_t most_frequent_abundance = 1;  // usual value
+}
+
+struct node {
+    // We assume there are less than 2^32 sequences and that
+    // the largest abundance fits into a 32-bit uint.
+    node(uint32_t i, uint32_t f, uint32_t b, bool s) : id(i), front(f), back(b), sign(s) {}
+    uint32_t id, front, back;
+    bool sign;  // '+' --> forward
+                // '-' --> backward
+};
+
+struct cover {
+    typedef std::vector<node> walk_t;
+    typedef std::vector<walk_t> walks_t;
+
+    cover(uint64_t num_sequences, uint64_t num_runs_abundances)
+        : m_num_sequences(num_sequences), m_num_runs_abundances(num_runs_abundances) {}
+
+    void compute(std::vector<node>& nodes) {
+        assert(nodes.size() == m_num_sequences);
+
+        essentials::timer_type timer;
+        timer.start();
+
+        /* (abundance, position of a candidate abundance with front = abundance) */
+        std::unordered_map<uint32_t, uint32_t> abundance_map;
+
+        /* visited[node.id] = true if node has been visited; false otherwise */
+        std::vector<bool> visited;
+
+        /* set of unvisited nodes */
+        std::unordered_set<uint32_t> unvisited_nodes;
+
+        /* map from node id to offset+ into nodes */
+        std::vector<uint32_t> id_to_offset;
+
+        /* nodes created for next round */
+        std::vector<node> tmp_nodes;
+
+        walk_t walk;
+        walks_t walks_in_round;
+
+        unvisited_nodes.reserve(nodes.size());     // at most
+        walk.reserve(nodes.size());                // at most
+        walks_in_round.reserve(2 * nodes.size());  // at most
+
+        std::cout << "initial number of runs = " << m_num_runs_abundances << std::endl;
+
+        {
+            /*
+                optimize for the most frequent case:
+                push nodes of the form v:[mfa,mfa] to the bottom, and remove them
+                forming a single (usually, long) walk.
+            */
+
+            essentials::timer_type timer;
+            timer.start();
+
+            std::sort(nodes.begin(), nodes.end(), [](auto const& x, auto const& y) {
+                if (x.front == constants::most_frequent_abundance and
+                    x.back == constants::most_frequent_abundance) {
+                    return false;
+                }
+                if (y.front == constants::most_frequent_abundance and
+                    y.back == constants::most_frequent_abundance) {
+                    return true;
+                }
+                if (x.front != y.front) return x.front > y.front;
+                if (x.back != y.back) return x.back > y.back;
+                return x.id > y.id;
+            });
+
+            uint64_t num_special_nodes = 0;
+            uint64_t num_nodes = nodes.size();
+            while (!nodes.empty()) {
+                auto v = nodes.back();
+                if (v.front == constants::most_frequent_abundance and
+                    v.back == constants::most_frequent_abundance) {
+                    walk.push_back(v);
+                    nodes.pop_back();
+                    num_special_nodes += 1;
+                } else {
+                    break;
+                }
+            }
+            std::cout << "num nodes of the form v:[" << constants::most_frequent_abundance << ","
+                      << constants::most_frequent_abundance << "] = " << num_special_nodes << "/"
+                      << num_nodes << "(" << (num_special_nodes * 100.0) / num_nodes << "%)"
+                      << std::endl;
+
+            if (num_special_nodes != 0) {
+                /* create new nodes for next round */
+                assert(!walk.empty());
+                uint32_t id = walks_in_round.size();
+                uint32_t front = walk.front().front;
+                uint32_t back = walk.back().back;
+                tmp_nodes.emplace_back(id, front, back, true);
+                tmp_nodes.emplace_back(id, back, front, false);
+                walks_in_round.push_back(walk);
+                walk.clear();
+            }
+
+            /* add all the nodes with backward orientation */
+            num_nodes = nodes.size();
+            for (uint64_t i = 0; i != num_nodes; ++i) {
+                auto node = nodes[i];
+                assert(node.sign == true);
+                nodes.emplace_back(node.id, node.back, node.front, false);
+            }
+
+            timer.stop();
+            std::cout << "  time: " << timer.elapsed() / 1000000 << " [sec]" << std::endl;
+        }
+
+        while (true) {
+            std::cout << "round " << rounds.size() << std::endl;
+
+            uint64_t num_nodes = nodes.size();
+            std::cout << "  num_nodes " << num_nodes << std::endl;
+            assert(num_nodes % 2 == 0);
+
+            essentials::timer_type round_timer;  // total time of round
+            round_timer.start();
+
+            if (rounds.size() == 0) {
+                /* remember: we removed some nodes but the id-space still spans
+                   [0..m_num_sequences-1] */
+                id_to_offset.resize(m_num_sequences);
+                visited.resize(m_num_sequences);
+            } else {
+                id_to_offset.resize(num_nodes / 2);
+                visited.resize(num_nodes / 2);
+            }
+
+            /* all nodes unvisited */
+            std::fill(visited.begin(), visited.end(), false);
+
+            for (auto const& node : nodes) unvisited_nodes.insert(node.id);
+            assert(unvisited_nodes.size() == num_nodes / 2);
+
+            std::sort(nodes.begin(), nodes.end(), [](auto const& x, auto const& y) {
+                if (x.front != y.front) return x.front < y.front;
+                if (x.back != y.back) return x.back < y.back;
+                return x.id < y.id;
+            });
+
+            /* fill abundance_map */
+            {
+                uint64_t prev_front = constants::invalid;
+                uint64_t offset = 0;
+                for (auto const& node : nodes) {
+                    if (node.front != prev_front) abundance_map[node.front] = offset;
+                    offset += 1;
+                    prev_front = node.front;
+                }
+                assert(offset == nodes.size());
+            }
+
+            /* fill id_to_offset map */
+            {
+                uint64_t offset = 0;
+                for (auto const& node : nodes) {
+                    if (node.sign) id_to_offset[node.id] = offset;
+                    offset += 1;
+                }
+            }
+
+            uint64_t i = 0;                        // position of an unvisited node in nodes
+            bool no_more_matches_possible = true;  // to stop the algorithm
+
+            /* some statistics */
+            uint64_t total_nodes_visited_to_find_matches = 0;
+            uint64_t total_nodes_visited_to_failure = 0;
+            uint64_t total_nodes_visited_to_success = 0;
+            uint64_t total_nodes = 0;
+
+            while (!unvisited_nodes.empty()) {
+                /* 1. take an unvisited node */
+                {
+                    uint32_t unvisited_node_id = *(unvisited_nodes.begin());
+                    i = id_to_offset[unvisited_node_id];
+                }
+
+                /* 2. create a new walk */
+                walk.clear();
+                while (true) {
+                    total_nodes += 1;
+
+                    auto node = nodes[i];
+                    uint64_t id = node.id;
+                    assert(visited[id] == false);
+                    visited[id] = true;
+                    walk.push_back(node);
+
+                    /* node has been visited, so erase it from unvisited_nodes */
+                    if (unvisited_nodes.find(id) != unvisited_nodes.cend()) {
+                        unvisited_nodes.erase(id);
+                    }
+
+                    auto search_match = [&](uint64_t back, uint64_t candidate_i) {
+                        bool no_match_found = false;
+                        uint64_t tmp_total_nodes_visited_to_failure = 0;
+                        uint64_t tmp_total_nodes_visited_to_success = 0;
+
+                        while (true) {
+                            total_nodes_visited_to_find_matches += 1;
+                            tmp_total_nodes_visited_to_failure += 1;
+                            tmp_total_nodes_visited_to_success += 1;
+
+                            if (candidate_i == num_nodes) {
+                                total_nodes_visited_to_failure +=
+                                    tmp_total_nodes_visited_to_failure;
+                                break;
+                            }
+                            auto candidate = nodes[candidate_i];
+
+                            /* skip */
+                            if (candidate.id == id) {
+                                candidate_i += 1;
+                                continue;
+                            }
+
+                            /* checked all candidate matches */
+                            if (candidate.front != back) {
+                                no_match_found = true;
+                                total_nodes_visited_to_failure +=
+                                    tmp_total_nodes_visited_to_failure;
+                                break;
+                            }
+
+                            /* match found */
+                            if (visited[candidate.id] == false) {
+                                total_nodes_visited_to_success +=
+                                    tmp_total_nodes_visited_to_success;
+                                break;
+                            }
+
+                            candidate_i += 1;
+                        }
+                        assert(candidate_i <= num_nodes);
+
+                        /* update candidate position in abundance_map */
+                        abundance_map[back] = candidate_i;
+
+                        if (no_match_found or candidate_i == num_nodes) return false;
+
+                        /* valid match was found, then visit it next */
+                        i = candidate_i;
+                        return true;
+                    };
+
+                    /* 3. search for a match */
+
+                    /* first, try to match back abundance */
+                    uint64_t back = node.back;
+                    uint64_t candidate_i = abundance_map[back];
+                    bool found = search_match(back, candidate_i);
+
+                    /* if a match is not found and the walk is singleton,
+                       then the only node in the walk could be matched on
+                       front abundance */
+                    if (!found and walk.size() == 1) {
+                        back = node.front;
+                        candidate_i = abundance_map[back];
+                        found = search_match(back, candidate_i);
+                        if (found) {
+                            /* change orientation of the node */
+                            walk[0] = {node.id, node.back, node.front, !node.sign};
+                        }
+                    }
+
+                    if (!found) break;
+                    no_more_matches_possible = false;
+                }
+                assert(!walk.empty());
+
+                /* create new nodes for next round */
+                uint32_t id = walks_in_round.size();
+                uint32_t front = walk.front().front;
+                uint32_t back = walk.back().back;
+                tmp_nodes.emplace_back(id, front, back, true);
+                tmp_nodes.emplace_back(id, back, front, false);
+                walks_in_round.push_back(walk);
+            }
+            assert(unvisited_nodes.empty());
+
+            if (total_nodes_visited_to_find_matches > total_nodes) {
+                std::cout << "  avg. num. nodes visited to find a match = "
+                          << static_cast<double>(total_nodes_visited_to_find_matches) / total_nodes
+                          << std::endl;
+                std::cout << "  avg. num. nodes visited to success = "
+                          << static_cast<double>(total_nodes_visited_to_success) / total_nodes
+                          << std::endl;
+                std::cout << "  avg. num. nodes visited to failure = "
+                          << static_cast<double>(total_nodes_visited_to_failure) / total_nodes
+                          << std::endl;
+            }
+
+            std::cout << "  num_walks = " << walks_in_round.size() << std::endl;
+
+            {
+#ifndef NDEBUG
+                std::fill(visited.begin(), visited.end(), false);
+#endif
+                uint64_t num_matches_in_round = 0;
+                for (auto const& walk : walks_in_round) {
+                    num_matches_in_round += walk.size() - 1;
+#ifndef NDEBUG
+                    uint64_t prev_back = walk.front().front;
+                    // std::cout << "=>";
+                    for (auto const& w : walk) {
+                        if (visited[w.id]) std::cout << "ERROR: duplicate node." << std::endl;
+                        if (w.front != prev_back) {
+                            std::cout << "ERROR: path is broken." << std::endl;
+                        }
+                        prev_back = w.back;
+                        visited[w.id] = true;
+                        // std::cout << w.id << ":[" << w.front << "," << w.back << ","
+                        //           << (w.sign ? '+' : '-') << "] ";
+                    }
+                    // std::cout << std::endl;
+#endif
+                }
+                assert(m_num_runs_abundances > num_matches_in_round);
+                m_num_runs_abundances -= num_matches_in_round;
+                std::cout << "  num_matches = " << num_matches_in_round << std::endl;
+                std::cout << "  num_runs " << m_num_runs_abundances << std::endl;
+
+                round_timer.stop();
+                std::cout << "  time: " << round_timer.elapsed() / 1000000 << " [sec]" << std::endl;
+            }
+
+            rounds.push_back(walks_in_round);
+
+            nodes.swap(tmp_nodes);
+            tmp_nodes.clear();
+            walks_in_round.clear();
+            abundance_map.clear();
+
+            if (no_more_matches_possible) {
+                std::cout << "STOP: no more matches possible." << std::endl;
+                break;
+            }
+        }
+
+        timer.stop();
+        std::cout << "cover computed in: " << timer.elapsed() / 1000000 << " [sec]" << std::endl;
+        std::cout << "final number of runs R = " << m_num_runs_abundances << std::endl;
+
+        assert(m_num_runs_abundances >= 1);
+    }
+
+    void save(std::string const& filename) {
+        std::ofstream out(filename.c_str());
+        assert(rounds.size() > 0);
+        int r = rounds.size() - 1;
+        const auto& walks = rounds[r];
+        uint64_t num_sequences = 0;
+        for (uint64_t w = 0; w != walks.size(); ++w) num_sequences += visit(w, r, true, out);
+        if (num_sequences != m_num_sequences) {
+            std::cerr << "Error: expected to write " << m_num_sequences << " but written "
+                      << num_sequences << std::endl;
+            throw std::runtime_error("wrong number of sequences written");
+        }
+        out.close();
+    }
+
+private:
+    uint64_t m_num_sequences;
+    uint64_t m_num_runs_abundances;
+    std::vector<walks_t> rounds;
+
+    /* visit walk of index w in round of index r */
+    uint64_t visit(int w, int r, bool sign, std::ofstream& out) const {
+        if (r > 0) {
+            assert(size_t(w) < rounds[r].size());
+            auto const& walk = rounds[r][w];
+            uint64_t num_sequences = 0;
+            for (auto const& node : walk) {
+                /*
+                    + & + = +
+                    - & - = +
+                    + & - = -
+                    - & + = -
+                */
+                bool new_sign = sign and node.sign;
+                if (sign == false and node.sign == false) new_sign = true;
+                num_sequences += visit(node.id, r - 1, new_sign, out);
+            }
+            return num_sequences;
+        }
+        /* print */
+        assert(size_t(w) < rounds[0].size());
+        auto const& walk = rounds[0][w];
+
+        if (sign) {
+            /* print forward */
+            for (uint64_t i = 0; i != walk.size(); ++i) {
+                auto const& node = walk[i];
+                // out << node.id << ":[" << node.front << "," << node.back << "] ";
+                out << node.id;
+                out << (node.sign ? " 1\n" : " 0\n");
+            }
+        } else {
+            /* print backward and change orientation */
+            for (int64_t i = walk.size() - 1; i >= 0; --i) {
+                auto const& node = walk[i];
+                // out << node.id << ":[" << node.back << "," << node.front << "] ";
+                out << node.id;
+                out << (node.sign ? " 0\n" : " 1\n");
+            }
+        }
+
+        return walk.size();
+    }
+};
+
+}  // namespace sshash
\ No newline at end of file
diff --git a/include/builder/build_util_types.hpp b/include/builder/util.hpp
similarity index 96%
rename from include/builder/build_util_types.hpp
rename to include/builder/util.hpp
index 41151c9..725c56e 100644
--- a/include/builder/build_util_types.hpp
+++ b/include/builder/util.hpp
@@ -12,6 +12,13 @@ struct parse_runtime_error : public std::runtime_error {
         : std::runtime_error("did you provide an input file with abundance counts?") {}
 };
 
+void expect(char got, char expected) {
+    if (got != expected) {
+        std::cout << "got '" << got << "' but expected '" << expected << "'" << std::endl;
+        throw parse_runtime_error();
+    }
+}
+
 struct compact_string_pool {
     compact_string_pool() {}
 
diff --git a/include/ef_sequence.hpp b/include/ef_sequence.hpp
index 3d1ee93..6fd17ae 100644
--- a/include/ef_sequence.hpp
+++ b/include/ef_sequence.hpp
@@ -8,13 +8,13 @@ namespace sshash {
 
 template <bool index_zeros = false>
 struct ef_sequence {
-    ef_sequence() {}
+    ef_sequence() : m_universe(0) {}
 
     template <typename Iterator>
     void encode(Iterator begin, uint64_t n) {
         if (n == 0) return;
         uint64_t u = *(begin + n - 1);
-        m_universe = u;  // universe is last element, "back"
+        m_universe = u;
 
         uint64_t l = uint64_t((n && u / n) ? pthash::util::msb(u / n) : 0);
         pthash::bit_vector_builder bvb_high_bits(n + (u >> l) + 1);
@@ -37,8 +37,8 @@ struct ef_sequence {
 
         pthash::bit_vector(&bvb_high_bits).swap(m_high_bits);
         cv_builder_low_bits.build(m_low_bits);
-        pthash::darray1(m_high_bits).swap(m_high_bits_d1);
-        if constexpr (index_zeros) { pthash::darray0(m_high_bits).swap(m_high_bits_d0); }
+        m_high_bits_d1.build(m_high_bits);
+        if constexpr (index_zeros) m_high_bits_d0.build(m_high_bits);
     }
 
     struct iterator {
@@ -55,7 +55,6 @@ struct ef_sequence {
         }
 
         bool good() const { return m_ef != nullptr; }
-
         bool has_next() const { return m_pos < m_ef->size(); }
 
         uint64_t next() {
diff --git a/include/util.hpp b/include/util.hpp
index ff01b2f..6403374 100644
--- a/include/util.hpp
+++ b/include/util.hpp
@@ -17,7 +17,7 @@ constexpr uint64_t hashcode_bits = 64;
 constexpr double c = 3.0;  // for PThash
 constexpr uint64_t min_l = 6;
 constexpr uint64_t max_l = 12;
-constexpr uint64_t most_frequent_abundance = 1;  // usual value
+static const std::string default_tmp_dirname(".");
 }  // namespace constants
 
 typedef pthash::murmurhash2_64 base_hasher_type;
@@ -67,7 +67,6 @@ struct build_configuration {
 
         , canonical_parsing(false)
         , store_abundances(false)
-        , optimize_mfa(false)
         , verbose(true) {}
 
     uint64_t k;  // kmer size
@@ -79,15 +78,13 @@ struct build_configuration {
 
     bool canonical_parsing;
     bool store_abundances;
-    bool optimize_mfa;  // fma: "most frequent abundance"
     bool verbose;
 
     void print() const {
         std::cout << "k = " << k << ", m = " << m << ", seed = " << seed << ", l = " << l
                   << ", c = " << c
                   << ", canonical_parsing = " << (canonical_parsing ? "true" : "false")
-                  << ", store_abundances = " << (store_abundances ? "true" : "false")
-                  << ", optimize_mfa = " << (optimize_mfa ? "true" : "false") << std::endl;
+                  << ", store_abundances = " << (store_abundances ? "true" : "false") << std::endl;
     }
 };
 
diff --git a/src/build.cpp b/src/build.cpp
index 40adc6e..1477c9a 100644
--- a/src/build.cpp
+++ b/src/build.cpp
@@ -43,8 +43,6 @@ int main(int argc, char** argv) {
                "--canonical-parsing", true);
     parser.add("store_abundances", "Also store the abundances in compressed format.",
                "--abundances", true);
-    parser.add("optimize_mfa", "Turn on space optimization for most frequent abundance (mfa).",
-               "--optimize-mfa", true);
     parser.add("output_filename", "Output file name where the data structure will be serialized.",
                "-o", false);
     parser.add("check", "Check correctness after construction.", "--check", true);
@@ -68,7 +66,6 @@ int main(int argc, char** argv) {
     if (parser.parsed("c")) build_config.c = parser.get<double>("c");
     build_config.canonical_parsing = parser.get<bool>("canonical_parsing");
     build_config.store_abundances = parser.get<bool>("store_abundances");
-    build_config.optimize_mfa = parser.get<bool>("optimize_mfa");
     build_config.verbose = parser.get<bool>("verbose");
     build_config.print();
 
diff --git a/src/permute.cpp b/src/permute.cpp
new file mode 100644
index 0000000..6844e5c
--- /dev/null
+++ b/src/permute.cpp
@@ -0,0 +1,536 @@
+#include <iostream>
+
+#include "../include/gz/zip_stream.cpp"
+#include "../external/pthash/external/essentials/include/essentials.hpp"
+#include "../external/pthash/external/cmd_line_parser/include/parser.hpp"
+#include "../include/builder/cover.hpp"
+#include "../include/builder/util.hpp"
+
+using namespace sshash;
+
+struct permute_data {
+    permute_data() : num_runs_abundances(0), num_sequences(0) {}
+    uint64_t num_runs_abundances;
+    uint64_t num_sequences;
+    std::vector<node> nodes;
+};
+
+void parse_file(std::istream& is, permute_data& data, build_configuration const& build_config) {
+    std::string sequence;
+    uint64_t k = build_config.k;
+    uint64_t num_bases = 0;
+    uint64_t num_kmers = 0;
+    uint64_t seq_len = 0;
+
+    uint64_t sum_of_abundances = 0;
+    uint64_t num_sequences_diff_abs = 0;  // num sequences whose kmers have different abundances
+    uint64_t num_sequences_all_mfa = 0;   // num sequences whose kmers have same abundance == mfa
+
+    data.num_runs_abundances = 0;
+    data.num_sequences = 0;
+
+    /* count number of distinct abundances */
+    std::unordered_set<uint32_t> distinct_abundances;
+
+    auto parse_header = [&]() {
+        if (sequence.empty()) return;
+
+        /*
+            Heder format:
+            >[id] LN:i:[seq_len] ab:Z:[ab_seq]
+            where [ab_seq] is a space-separated sequence of integer counters (the abundances),
+            whose length is equal to [seq_len]-k+1
+        */
+
+        // example header: '>12 LN:i:41 ab:Z:2 2 2 2 2 2 2 2 2 2 2'
+
+        expect(sequence[0], '>');
+        uint64_t i = 0;
+        i = sequence.find_first_of(' ', i);
+        if (i == std::string::npos) throw parse_runtime_error();
+
+        i += 1;
+        expect(sequence[i + 0], 'L');
+        expect(sequence[i + 1], 'N');
+        expect(sequence[i + 2], ':');
+        expect(sequence[i + 3], 'i');
+        expect(sequence[i + 4], ':');
+        i += 5;
+        uint64_t j = sequence.find_first_of(' ', i);
+        if (j == std::string::npos) throw parse_runtime_error();
+
+        seq_len = std::strtoull(sequence.data() + i, nullptr, 10);
+        i = j + 1;
+        expect(sequence[i + 0], 'a');
+        expect(sequence[i + 1], 'b');
+        expect(sequence[i + 2], ':');
+        expect(sequence[i + 3], 'Z');
+        expect(sequence[i + 4], ':');
+        i += 5;
+
+        bool kmers_have_all_mfa = true;
+        bool kmers_have_different_abundances = false;
+        uint64_t front = constants::invalid;
+        uint64_t back = constants::invalid;
+
+        for (uint64_t j = 0, prev_abundance = constants::invalid; j != seq_len - k + 1; ++j) {
+            uint64_t abundance = std::strtoull(sequence.data() + i, nullptr, 10);
+            sum_of_abundances += abundance;
+            i = sequence.find_first_of(' ', i) + 1;
+
+            distinct_abundances.insert(abundance);
+
+            /* set front and back */
+            if (j == 0) front = abundance;
+            if (j == seq_len - k) back = abundance;
+
+            /* accumulate statistics */
+            if (abundance != constants::most_frequent_abundance) kmers_have_all_mfa = false;
+            if (j > 0 and abundance != prev_abundance) kmers_have_different_abundances = true;
+
+            /* count the number of runs */
+            if (abundance != prev_abundance) data.num_runs_abundances += 1;
+
+            prev_abundance = abundance;
+        }
+
+        num_sequences_diff_abs += kmers_have_different_abundances;
+        num_sequences_all_mfa += kmers_have_all_mfa;
+
+        constexpr bool sign = true;
+        data.nodes.emplace_back(data.num_sequences, front, back, sign);
+    };
+
+    while (!is.eof()) {
+        std::getline(is, sequence);  // header sequence
+        parse_header();
+
+        std::getline(is, sequence);  // DNA sequence
+        if (sequence.size() < k) continue;
+
+        if (++data.num_sequences % 100000 == 0) {
+            std::cout << "read " << data.num_sequences << " sequences, " << num_bases << " bases, "
+                      << num_kmers << " kmers" << std::endl;
+        }
+
+        num_bases += sequence.size();
+        num_kmers += sequence.size() - k + 1;
+
+        if (seq_len != sequence.size()) {
+            std::cout << "ERROR: expected a sequence of length " << seq_len
+                      << " but got one of length " << sequence.size() << std::endl;
+            throw std::runtime_error("file is malformed");
+        }
+    }
+
+    assert(data.nodes.size() == data.num_sequences);
+    assert(data.num_runs_abundances >= data.num_sequences);
+
+    std::cout << "read " << data.num_sequences << " sequences, " << num_bases << " bases, "
+              << num_kmers << " kmers" << std::endl;
+    std::cout << "found " << distinct_abundances.size() << " distinct abundances" << std::endl;
+    std::cout << "sum_of_abundances " << sum_of_abundances << std::endl;
+    std::cout << "num_sequences whose kmers have different abundances: " << num_sequences_diff_abs
+              << "/" << data.num_sequences << " ("
+              << (num_sequences_diff_abs * 100.0) / data.num_sequences << "%)" << std::endl;
+    std::cout << "num_sequences whose kmers all have the same abundance != mfa: "
+              << (data.num_sequences - num_sequences_diff_abs) << "/" << data.num_sequences << " ("
+              << ((data.num_sequences - num_sequences_diff_abs) * 100.0) / data.num_sequences
+              << "%)" << std::endl;
+    std::cout << "num_sequences whose kmers all have the same abundance == mfa: "
+              << num_sequences_all_mfa << "/" << (data.num_sequences - num_sequences_diff_abs)
+              << " ("
+              << (num_sequences_all_mfa * 100.0) / (data.num_sequences - num_sequences_diff_abs)
+              << "%)" << std::endl;
+
+    /* computation of lower bound to the number of final abundance runs */
+    {
+        struct counts {
+            uint32_t freq;
+            uint32_t indegree;
+            uint32_t outdegree;
+        };
+
+        std::unordered_map<uint32_t, counts> endpoint_counts;
+
+        for (auto const& node : data.nodes) {
+            uint64_t front = node.front;
+            uint64_t back = node.back;
+            {
+                auto it = endpoint_counts.find(front);
+                if (it == endpoint_counts.cend()) {
+                    endpoint_counts[front] = {1, 0, 0};
+                } else {
+                    (*it).second.freq += 1;
+                }
+            }
+            {
+                auto it = endpoint_counts.find(back);
+                if (it == endpoint_counts.cend()) {
+                    endpoint_counts[back] = {1, 0, 0};
+                } else {
+                    (*it).second.freq += 1;
+                }
+            }
+            if (front != back) {
+                endpoint_counts[back].indegree += 1;
+                endpoint_counts[front].outdegree += 1;
+            }
+        }
+
+        uint64_t num_endpoints = 0;
+        for (auto const& p : endpoint_counts) {
+            uint64_t freq = p.second.freq;
+            uint64_t indegree = p.second.indegree;
+            uint64_t outdegree = p.second.outdegree;
+
+            /* special case: abundance will appear as singleton, so count twice */
+            if (indegree == 0 and outdegree == 0) {
+                num_endpoints += 2;
+                continue;
+            }
+
+            /* if the excess of frequency is odd, then the abundance will appear as end-point */
+            if (freq % 2 == 1) num_endpoints += 1;
+        }
+
+        uint64_t num_distinct_abundances = distinct_abundances.size();
+        uint64_t num_runs_abundances_internal = data.num_runs_abundances - data.nodes.size();
+        uint64_t num_walks = num_endpoints / 2;
+        std::cout << "(estimated) num_walks = " << num_walks << std::endl;
+        std::cout << "num_runs_abundances = " << data.num_runs_abundances << std::endl;
+        std::cout << "num_runs_abundances_internal = " << num_runs_abundances_internal << std::endl;
+
+        uint64_t R_lo =
+            std::max<uint64_t>(num_distinct_abundances, num_runs_abundances_internal + 1);
+        uint64_t R_hi = num_runs_abundances_internal + num_walks;
+        std::cout << "R_lo = " << R_lo << std::endl;
+        std::cout << "R_hi = " << R_hi << std::endl;
+    }
+}
+
+permute_data parse_file(std::string const& filename, build_configuration const& build_config) {
+    std::ifstream is(filename.c_str());
+    if (!is.good()) throw std::runtime_error("error in opening the file '" + filename + "'");
+    std::cout << "reading file '" << filename << "'..." << std::endl;
+    permute_data data;
+    if (util::ends_with(filename, ".gz")) {
+        zip_istream zis(is);
+        parse_file(zis, data, build_config);
+    } else {
+        parse_file(is, data, build_config);
+    }
+    is.close();
+    return data;
+}
+
+struct lines_iterator {
+    lines_iterator(uint8_t const* begin, uint8_t const* end) : m_begin(begin), m_end(end) {
+        advance_to_next();
+    }
+
+    void advance_to_next() {
+        m_header = read_line();
+        m_sequence = read_line();
+    }
+
+    std::string const& header() const { return m_header; }
+    std::string const& sequence() const { return m_sequence; }
+    bool has_next() const { return !m_header.empty() and !m_sequence.empty(); }
+
+private:
+    uint8_t const* m_begin;
+    uint8_t const* m_end;
+    std::string m_header;
+    std::string m_sequence;
+
+    std::string read_line() {
+        uint8_t const* begin = m_begin;
+        while (m_begin != m_end and *m_begin++ != '\n')
+            ;
+        if (begin == m_begin) return std::string("");
+        return std::string(reinterpret_cast<const char*>(begin), m_begin - begin - 1);
+    }
+};
+
+void reverse_header(std::string const& input, std::string& output, uint64_t k) {
+    // Example header:
+    // >2 LN:i:61 ab:Z:4 4 4 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 1
+    // Expected output:
+    // >2 LN:i:61 ab:Z:1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 4 4 4
+
+    /* skip validation of input string */
+
+    uint64_t i = 0;
+    i = input.find_first_of(' ', i);
+    if (i == std::string::npos) throw parse_runtime_error();
+
+    i += 6;  // skip ' LN:i':
+    uint64_t j = input.find_first_of(' ', i);
+    if (j == std::string::npos) throw parse_runtime_error();
+
+    uint64_t seq_len = std::strtoull(input.data() + i, nullptr, 10);
+    i = j + 6;  // skip ' ab:Z:'
+    output.append(input.data(), input.data() + i);
+
+    std::vector<uint32_t> abundances;
+    abundances.reserve(seq_len - k + 1);
+    for (uint64_t j = 0; j != seq_len - k + 1; ++j) {
+        uint64_t abundance = std::strtoull(input.data() + i, nullptr, 10);
+        abundances.push_back(abundance);
+        i = input.find_first_of(' ', i) + 1;
+    }
+
+    std::reverse(abundances.begin(), abundances.end());
+    for (auto abundance : abundances) output.append(std::to_string(abundance) + " ");
+}
+
+void permute_and_write(std::istream& is, std::string const& output_filename,
+                       std::string const& tmp_dirname, pthash::compact_vector const& permutation,
+                       pthash::bit_vector const& signs, uint64_t k) {
+    constexpr uint64_t limit = 1 * essentials::GB;
+    std::vector<std::pair<std::string, std::string>> buffer;  // (header, dna)
+
+    std::string run_identifier =
+        std::to_string(pthash::clock_type::now().time_since_epoch().count());
+
+    std::string header_sequence;
+    std::string header_sequence_r;
+
+    std::string dna_sequence;
+    std::string dna_sequence_rc;
+
+    uint64_t num_sequences = permutation.size();
+    uint64_t num_bases = 0;
+    uint64_t bytes = 0;
+    uint64_t num_files_to_merge = 0;
+
+    auto get_tmp_output_filename = [&](uint64_t id) {
+        return tmp_dirname + "/tmp.run" + run_identifier + "." + std::to_string(id);
+    };
+
+    auto sort_and_flush = [&]() {
+        if (buffer.empty()) return;
+
+        std::cout << "sorting buffer..." << std::endl;
+        std::sort(buffer.begin(), buffer.end(), [&](auto const& p_x, auto const& p_y) {
+            assert(p_x.first.front() == '>');
+            assert(p_y.first.front() == '>');
+            uint64_t seq_id_x = std::strtoull(p_x.first.data() + 1, nullptr, 10);
+            uint64_t seq_id_y = std::strtoull(p_y.first.data() + 1, nullptr, 10);
+            return permutation[seq_id_x] < permutation[seq_id_y];
+        });
+
+        auto tmp_output_filename = get_tmp_output_filename(num_files_to_merge);
+        std::cout << "saving to file '" << tmp_output_filename << "'..." << std::endl;
+        std::ofstream out(tmp_output_filename.c_str());
+        if (!out.is_open()) throw std::runtime_error("cannot open file");
+        for (auto const& seq : buffer) out << seq.first << '\n' << seq.second << '\n';
+        out.close();
+
+        buffer.clear();
+        bytes = 0;
+        num_files_to_merge += 1;
+    };
+
+    for (uint64_t i = 0; i != num_sequences; ++i) {
+        std::getline(is, header_sequence);
+        std::getline(is, dna_sequence);
+
+        if (!signs[i]) {
+            /* compute reverse complement of dna_sequence
+               and reverse the abundances in header_sequence */
+            dna_sequence_rc.resize(dna_sequence.size());
+            header_sequence_r.clear();
+            util::compute_reverse_complement(dna_sequence.data(), dna_sequence_rc.data(),
+                                             dna_sequence.size());
+            reverse_header(header_sequence, header_sequence_r, k);
+            dna_sequence.swap(dna_sequence_rc);
+            header_sequence.swap(header_sequence_r);
+        }
+
+        uint64_t seq_bytes = header_sequence.size() + dna_sequence.size() + 16;
+        if (bytes + seq_bytes > limit) sort_and_flush();
+
+        bytes += seq_bytes;
+        buffer.emplace_back(header_sequence, dna_sequence);
+        num_bases += dna_sequence.size();
+
+        if (i != 0 and i % 100000 == 0) {
+            std::cout << "read " << i << " sequences, " << num_bases << " bases" << std::endl;
+        }
+    }
+    sort_and_flush();
+
+    std::cout << "read " << num_sequences << " sequences, " << num_bases << " bases" << std::endl;
+
+    /* merge */
+    {
+        assert(num_files_to_merge > 0);
+        std::cout << "files to merge = " << num_files_to_merge << std::endl;
+
+        std::vector<lines_iterator> iterators;
+        std::vector<uint32_t> idx_heap;
+        iterators.reserve(num_files_to_merge);
+        idx_heap.reserve(num_files_to_merge);
+        std::vector<mm::file_source<uint8_t>> mm_files(num_files_to_merge);
+
+        auto heap_idx_comparator = [&](uint32_t i, uint32_t j) {
+            assert(iterators[i].header().front() == '>');
+            assert(iterators[j].header().front() == '>');
+            uint64_t seq_id_x = std::strtoull(iterators[i].header().data() + 1, nullptr, 10);
+            uint64_t seq_id_y = std::strtoull(iterators[j].header().data() + 1, nullptr, 10);
+            return permutation[seq_id_x] > permutation[seq_id_y];
+        };
+
+        auto advance_heap_head = [&]() {
+            auto idx = idx_heap.front();
+            iterators[idx].advance_to_next();
+            if (iterators[idx].has_next()) {  // percolate down the head
+                uint64_t pos = 0;
+                uint64_t size = idx_heap.size();
+                while (2 * pos + 1 < size) {
+                    uint64_t i = 2 * pos + 1;
+                    if (i + 1 < size and heap_idx_comparator(idx_heap[i], idx_heap[i + 1])) ++i;
+                    if (heap_idx_comparator(idx_heap[i], idx_heap[pos])) break;
+                    std::swap(idx_heap[pos], idx_heap[i]);
+                    pos = i;
+                }
+            } else {
+                std::pop_heap(idx_heap.begin(), idx_heap.end(), heap_idx_comparator);
+                idx_heap.pop_back();
+            }
+        };
+
+        /* create the input iterators and make the heap */
+        for (uint64_t i = 0; i != num_files_to_merge; ++i) {
+            auto tmp_output_filename = get_tmp_output_filename(i);
+            mm_files[i].open(tmp_output_filename, mm::advice::sequential);
+            iterators.emplace_back(mm_files[i].data(), mm_files[i].data() + mm_files[i].size());
+            idx_heap.push_back(i);
+        }
+        std::make_heap(idx_heap.begin(), idx_heap.end(), heap_idx_comparator);
+
+        std::ofstream out(output_filename.c_str());
+        if (!out.is_open()) throw std::runtime_error("cannot open file");
+
+        uint64_t num_written_sequences = 0;
+        while (!idx_heap.empty()) {
+            auto const& it = iterators[idx_heap.front()];
+            out << it.header() << '\n' << it.sequence() << '\n';
+            num_written_sequences += 1;
+            if (num_written_sequences % 100000 == 0) {
+                std::cout << "written sequences = " << num_written_sequences << "/" << num_sequences
+                          << std::endl;
+            }
+            advance_heap_head();
+        }
+        std::cout << "written sequences = " << num_written_sequences << "/" << num_sequences
+                  << std::endl;
+        out.close();
+        assert(num_written_sequences == num_sequences);
+
+        /* remove tmp files */
+        for (uint64_t i = 0; i != num_files_to_merge; ++i) {
+            mm_files[i].close();
+            auto tmp_output_filename = get_tmp_output_filename(i);
+            std::remove(tmp_output_filename.c_str());
+        }
+    }
+}
+
+void permute_and_write(std::string const& input_filename, std::string const& output_filename,
+                       std::string const& tmp_dirname, pthash::compact_vector const& permutation,
+                       pthash::bit_vector const& signs, uint64_t k) {
+    std::ifstream is(input_filename.c_str());
+    if (!is.good()) throw std::runtime_error("error in opening the file '" + input_filename + "'");
+    std::cout << "reading file '" << input_filename << "'..." << std::endl;
+    if (util::ends_with(input_filename, ".gz")) {
+        zip_istream zis(is);
+        permute_and_write(zis, output_filename, tmp_dirname, permutation, signs, k);
+    } else {
+        permute_and_write(is, output_filename, tmp_dirname, permutation, signs, k);
+    }
+    is.close();
+}
+
+int main(int argc, char** argv) {
+    cmd_line_parser::parser parser(argc, argv);
+
+    /* mandatory arguments */
+    parser.add("input_filename",
+               "Must be a FASTA file (.fa/fasta extension) compressed with gzip (.gz) or not:\n"
+               "\t- without duplicate nor invalid kmers\n"
+               "\t- one DNA sequence per line\n"
+               "\t- with also kmers' abundances.\n"
+               "\tFor example, it could be the de Bruijn graph topology output by BCALM.");
+
+    parser.add("k", "K-mer length (must be <= " + std::to_string(constants::max_k) + ").");
+
+    /* optional arguments */
+    parser.add("output_filename", "Output file where the permuted collection will be written.",
+               "-o", false);
+    parser.add("tmp_dirname",
+               "Temporary directory used for merging in external memory. Default is directory '" +
+                   constants::default_tmp_dirname + "'.",
+               "-d", false);
+
+    if (!parser.parse()) return 1;
+
+    auto input_filename = parser.get<std::string>("input_filename");
+    auto k = parser.get<uint64_t>("k");
+
+    build_configuration build_config;
+    build_config.k = k;
+
+    std::string output_filename = input_filename + ".permuted";
+    if (parser.parsed("output_filename")) {
+        output_filename = parser.get<std::string>("output_filename");
+    }
+
+    std::string tmp_dirname = constants::default_tmp_dirname;
+    if (parser.parsed("tmp_dirname")) tmp_dirname = parser.get<std::string>("tmp_dirname");
+
+    std::string permutation_filename = tmp_dirname + "/tmp.permutation";
+
+    auto data = parse_file(input_filename, build_config);
+
+    {
+        /* compute cover */
+        cover c(data.num_sequences, data.num_runs_abundances);
+        assert(data.nodes.size() == data.num_sequences);
+        c.compute(data.nodes);
+        c.save(permutation_filename);
+        std::vector<node>().swap(data.nodes);
+    }
+
+    pthash::compact_vector permutation;
+    pthash::bit_vector signs;
+    {
+        std::ifstream is(permutation_filename.c_str());
+        if (!is.good()) {
+            throw std::runtime_error("error in opening the file '" + permutation_filename + "'");
+        }
+        pthash::compact_vector::builder cv_builder(
+            data.num_sequences,
+            data.num_sequences == 1 ? 1 : std::ceil(std::log2(data.num_sequences)));
+        pthash::bit_vector_builder bv_builder(data.num_sequences);
+        for (uint64_t i = 0; i != data.num_sequences; ++i) {
+            uint64_t position = 0;
+            bool sign = 0;
+            is >> position;
+            is >> sign;
+            cv_builder.set(position, i);
+            bv_builder.set(position, sign);
+        }
+        is.close();
+        cv_builder.build(permutation);
+        signs.build(&bv_builder);
+    }
+
+    /* permute and save to output file */
+    permute_and_write(input_filename, output_filename, tmp_dirname, permutation, signs, k);
+    std::remove(permutation_filename.c_str());
+
+    return 0;
+}