diff --git a/dccrg.hpp b/dccrg.hpp index 3bff081..f71c26a 100644 --- a/dccrg.hpp +++ b/dccrg.hpp @@ -41,6 +41,7 @@ dccrg::Dccrg for a starting point in the API. #include "iterator" #include "limits" #include "map" +#include #include "set" #include "stdexcept" #include "tuple" @@ -368,8 +369,11 @@ template < no_load_balancing(other.get_no_load_balancing()), reserved_options(other.get_reserved_options()), cell_weights(other.get_cell_weights()), + communication_weights(other.get_communication_weights()), + partitioning_neighborhoods(other.partitioning_neighborhoods), neighbor_processes(other.get_neighbor_processes()), - balancing_load(other.get_balancing_load()) + balancing_load(other.get_balancing_load()), + load_balance_norm(other.load_balance_norm) { if (other.get_balancing_load()) { std::cerr << __FILE__ << ":" << __LINE__ @@ -805,6 +809,28 @@ template < return NULL; } + const std::vector>> get_mutual_neighbors( + const uint64_t cell, + const int neighborhood_id = default_neighborhood_id + ) const { + std::vector>> neighbors; + for (auto& i : *get_neighbors_of(cell, neighborhood_id)) { + for (auto& j : *get_neighbors_to(cell, neighborhood_id)) { + if (i.first == j.first) { + neighbors.push_back(i); + } + } + } + return neighbors; + } + + void clear_partitioning_neighborhoods (uint64_t cell) { + partitioning_neighborhoods[cell].clear(); + } + + void add_partitioning_neighborhood (uint64_t cell, int neighborhood) { + partitioning_neighborhoods[cell].insert(neighborhood); + } /*! Returns a pointer to the cells that consider given cell as a neighbor. @@ -2688,6 +2714,16 @@ template < return ret_val; } + // Consider giving index as argument to reduce calculations + static std::array distance_to_double(const std::array& in) + { + std::array ret; + for (int i = 0; i < 3; ++i) { + ret[i] = static_cast(in[i]) / static_cast(in[3] > 0 ? in[3] : 1); + } + return ret; + } + /*! Returns true if given cell's neighbor types match given criterion, false otherwise. @@ -3251,8 +3287,7 @@ template < } // children of refined cells inherit their weight - if (this->rank == process_of_refined - && this->cell_weights.count(refined) > 0) { + if (this->rank == process_of_refined && this->cell_weights.count(refined) > 0) { for (const uint64_t child: children) { this->cell_weights[child] = this->cell_weights.at(refined); } @@ -3407,6 +3442,8 @@ template < this->pin_requests.erase(unrefined); this->new_pin_requests.erase(unrefined); this->cell_weights.erase(unrefined); + this->communication_weights.erase(unrefined); + this->partitioning_neighborhoods.erase(unrefined); // don't send unrefined cells' user data to self if (this->rank == process_of_unrefined @@ -4076,6 +4113,7 @@ template < this->cells_not_to_refine.clear(); this->cells_not_to_unrefine.clear(); this->cell_weights.clear(); + this->communication_weights.clear(); #ifdef DEBUG // check that there are no duplicate adds / removes @@ -5580,7 +5618,6 @@ template < - RETURN_LISTS - EDGE_WEIGHT_DIM - NUM_GID_ENTRIES - - OBJ_WEIGHT_DIM Call this with name = LB_METHOD and value = HIER to use hierarchial partitioning. @@ -5603,6 +5640,10 @@ template < ); } + if (name == "OBJ_WEIGHT_DIM") { + this->cell_weight_dim = std::stoi(value); + } + Zoltan_Set_Param(this->zoltan, name.c_str(), value.c_str()); return *this; } @@ -6112,7 +6153,7 @@ template < Returns true on success, does nothing and returns false otherwise. */ - bool set_cell_weight(const uint64_t cell, const double weight) + bool set_cell_weight(const uint64_t cell, const std::vector& weight) { if (this->cell_process.count(cell) == 0) { return false; @@ -6126,7 +6167,52 @@ template < return false; } - this->cell_weights[cell] = weight; + if (cell_weight_dim == 1 && weight.size() > 1) { + if (load_balance_norm == 0) { + // Infinity norm i.e. maximum + this->cell_weights[cell] = std::vector{*std::max_element(weight.begin(), weight.end())}; + } else if (load_balance_norm > 0) { + // p-norm, including Manhattan (sum!) and Euclidian + double sum {0.0}; + for (auto w : weight) { + sum += std::pow(w, load_balance_norm); + } + this->cell_weights[cell] = std::vector{pow(sum, 1.0 / load_balance_norm)}; + } else { + throw std::invalid_argument("Invalid load balance norm!"); + } + } else if (static_cast(cell_weight_dim) != weight.size()) { + throw std::invalid_argument("Invalid cell weight dimension!"); + } else { + this->cell_weights[cell] = weight; + } + + return true; + } + + bool set_cell_weight(const uint64_t cell, const double weight) { + return set_cell_weight(cell, std::vector({weight})); + } + + void set_load_balance_norm(int norm) { + this->load_balance_norm = norm; + } + + bool set_communication_weight(const uint64_t cell, const double weight) + { + if (this->cell_process.count(cell) == 0) { + return false; + } + + if (this->cell_process.at(cell) != this->rank) { + return false; + } + + if (cell != this->get_child(cell)) { + return false; + } + + this->communication_weights[cell] = weight; return true; } @@ -6137,7 +6223,30 @@ template < Returns a quiet nan if above conditions are not met. Unset cell weights are assumed to be 1. */ - double get_cell_weight(const uint64_t cell) const + std::vector get_cell_weight(const uint64_t cell) const + { + if (this->cell_process.count(cell) == 0) { + return std::vector(cell_weight_dim, std::numeric_limits::quiet_NaN()); + } + + if (this->cell_process.at(cell) != this->rank) { + return std::vector(cell_weight_dim, std::numeric_limits::quiet_NaN()); + } + + if (cell != this->get_child(cell)) { + return std::vector(cell_weight_dim, std::numeric_limits::quiet_NaN()); + } + + return this->cell_weights.count(cell) ? this->cell_weights.at(cell) : std::vector(cell_weight_dim, 1.0); + } + + /*! + Returns the weight of given local existing cell without children. + + Returns a quiet nan if above conditions are not met. + Unset cell weights are assumed to be 1. + */ + double get_communication_weight(const uint64_t cell) const { if (this->cell_process.count(cell) == 0) { return std::numeric_limits::quiet_NaN(); @@ -6151,14 +6260,9 @@ template < return std::numeric_limits::quiet_NaN(); } - if (this->cell_weights.count(cell) == 0) { - return 1; - } else { - return this->cell_weights.at(cell); - } + return this->communication_weights.count(cell) ? this->communication_weights.at(cell) : 1.0; } - /*! Returns the cells that will be added to this process by load balancing. */ @@ -6287,11 +6391,15 @@ template < /*! Returns cell weights that have been set. */ - const std::unordered_map& get_cell_weights() const + const std::unordered_map>& get_cell_weights() const { return this->cell_weights; } + const std::unordered_map& get_communication_weights() const + { + return this->communication_weights; + } /*! Adds a new neighborhood for updating Cell_Data between neighbors on different processes. @@ -7137,6 +7245,7 @@ template < Zoltan_Struct* zoltan; // number of processes per part in a hierarchy level (numbering starts from 0) std::vector processes_per_part; + std::vector partition_number; // options for each level of hierarchial load balancing (numbering start from 0) std::vector> partitioning_options; // record whether Zoltan_LB_Partition is expected to fail @@ -7145,8 +7254,16 @@ template < // reserved options that the user cannot change std::unordered_set reserved_options; - // optional user-given weights of cells on this process - std::unordered_map cell_weights; + // optional user-given processing weights of cells on this process + int cell_weight_dim {1}; + int load_balance_norm {1}; // Sum + std::unordered_map> cell_weights; + + // optional user-given communication weights of cells on this process + std::unordered_map communication_weights; + + // Set of neighborhoods each cell communicates in (hyper)graph partitioning + std::unordered_map> partitioning_neighborhoods; // processes which have cells close enough from cells of this process std::unordered_set neighbor_processes; @@ -7432,6 +7549,44 @@ template < this->no_load_balancing = false; } + // Hardcoded for now + if (this->load_balancing_method == "HIER") { + // Automagical hierarchical partition + std::hash hasher; + + //get name of this node + char nodename[MPI_MAX_PROCESSOR_NAME]; + int namelength, nodehash; + MPI_Get_processor_name(nodename,&namelength); + nodehash = static_cast(hasher(std::string(nodename, namelength)) % std::numeric_limits::max()); + + //intra-node communicator + int rank; + MPI_Comm_rank(MPI_COMM_WORLD, &rank); + MPI_Comm nodeComm; + MPI_Comm_split(MPI_COMM_WORLD, nodehash, rank, &nodeComm); + + //inter-node communicator + int nodeRank; + MPI_Comm_rank(nodeComm,&nodeRank); + MPI_Comm interComm; + MPI_Comm_split(MPI_COMM_WORLD, nodeRank, rank, &interComm); + int interRank; + MPI_Comm_rank(interComm, &interRank); + + // Make sure everyone in node agrees on the number of the node + int nodeNumber {interRank}; + MPI_Bcast(&nodeNumber, 1, MPI_INT, 0, nodeComm); + + add_partitioning_level(1); // Level 0 - Nodes + partition_number.push_back(nodeNumber); + add_partitioning_option(0, "LB_METHOD", "GRAPH"); + + add_partitioning_level(1); // Level 1 - Processes + partition_number.push_back(nodeRank); + add_partitioning_option(1, "LB_METHOD", "RIB"); + } + // If environment variable DCCRG_PROCS is set, // use that for determining the number of DCCRG worker-processes int worker_procs = this->comm_size; @@ -7446,7 +7601,6 @@ template < this->reserved_options.insert("EDGE_WEIGHT_DIM"); this->reserved_options.insert("NUM_GID_ENTRIES"); this->reserved_options.insert("NUM_LID_ENTRIES"); - this->reserved_options.insert("OBJ_WEIGHT_DIM"); this->reserved_options.insert("RETURN_LISTS"); this->reserved_options.insert("NUM_GLOBAL_PARTS"); this->reserved_options.insert("NUM_LOCAL_PARTS"); @@ -7456,10 +7610,9 @@ template < Set reserved options */ // 0 because Zoltan crashes in hierarchial with larger values - Zoltan_Set_Param(this->zoltan, "EDGE_WEIGHT_DIM", "0"); + Zoltan_Set_Param(this->zoltan, "EDGE_WEIGHT_DIM", "1"); Zoltan_Set_Param(this->zoltan, "NUM_GID_ENTRIES", "1"); - Zoltan_Set_Param(this->zoltan, "NUM_LID_ENTRIES", "0"); - Zoltan_Set_Param(this->zoltan, "OBJ_WEIGHT_DIM", "1"); + Zoltan_Set_Param(this->zoltan, "NUM_LID_ENTRIES", "1"); Zoltan_Set_Param(this->zoltan, "RETURN_LISTS", "ALL"); // set other options @@ -7538,6 +7691,17 @@ template < this ); + Zoltan_Set_Obj_Size_Multi_Fn( + this->zoltan, + &Dccrg< + Cell_Data, + Geometry, + std::tuple, + std::tuple + >::fill_with_communication_weights, + this + ); + Zoltan_Set_HG_Size_CS_Fn( this->zoltan, &Dccrg< @@ -7960,8 +8124,7 @@ template < const std::string& get_load_balancing_method() const { return this->load_balancing_method; } - - + private: /*! Initializes local cells' neighbor lists and related data structures. @@ -10617,7 +10780,13 @@ template < >(data); *error = ZOLTAN_OK; + if (number_of_weights_per_object != dccrg_instance->cell_weight_dim) { + std::cerr << "ERROR Zoltan expected " << number_of_weights_per_object << " weights, dccrg has " << dccrg_instance->cell_weight_dim << std::endl; + abort(); + } + int i = 0; + int j = 0; for (const auto& item: dccrg_instance->cell_data) { #ifdef DEBUG @@ -10629,12 +10798,13 @@ template < global_ids[i] = item.first; - if (number_of_weights_per_object > 0) { - if (dccrg_instance->cell_weights.count(item.first) > 0) { - object_weights[i] = float(dccrg_instance->cell_weights.at(item.first)); - } else { - object_weights[i] = 1; - } + if (static_cast(number_of_weights_per_object) != dccrg_instance->get_cell_weight(item.first).size()) { + std::cerr << "ERROR Zoltan expected " << number_of_weights_per_object << " weights, cell has " << dccrg_instance->get_cell_weight(item.first).size() << std::endl; + abort(); + } + + for (auto w : dccrg_instance->get_cell_weight(item.first)) { + object_weights[j++] = w; } i++; @@ -10643,6 +10813,7 @@ template < /*! + Graph partitioning Writes the number of neighbors into number_of_neighbors for all cells given in global_ids. */ static void fill_number_of_neighbors_for_cells( @@ -10681,13 +10852,9 @@ template < } number_of_neighbors[i] = 0; - for (const auto& neighbor_i: dccrg_instance->neighbors_of.at(cell)) { - const auto& neighbor = neighbor_i.first; - if (neighbor != 0 - /* Zoltan 3.501 crashes in hierarchial - if a cell is a neighbor to itself */ - && neighbor != cell) { - number_of_neighbors[i]++; + for (auto neighborhood : dccrg_instance->partitioning_neighborhoods[cell]) { + for (const auto& [neighbor, dir] : *dccrg_instance->get_neighbors_to(cell, neighborhood)) { + number_of_neighbors[i] += neighbor > 0 && neighbor != cell; // We apparently have to do this stupid filtering here } } } @@ -10695,6 +10862,7 @@ template < /*! + Graph partitioning Writes neighbor lists of given cells into neighbors, etc. */ static void fill_neighbor_lists( @@ -10725,6 +10893,16 @@ template < >(data); *error = ZOLTAN_OK; + // TODO: this could be a different parameter, possibly later + // if ((unsigned int) number_of_weights_per_edge != dccrg_instance->cell_weight_dim) { + // std::cerr + // << "Zoltan is expecting wrong number of weights per edge: " << number_of_weights_per_edge + // << " instead of " << dccrg_instance->cell_weight_dim + // << std::endl; + // *error = ZOLTAN_FATAL; + // return; + // } + int current_neighbor_number = 0; for (int i = 0; i < number_of_cells; i++) { uint64_t cell = uint64_t(global_ids[i]); @@ -10736,37 +10914,79 @@ template < return; } - number_of_neighbors[i] = 0; + // number_of_neighbors[i] = 0; - for (const auto& neighbor_i: dccrg_instance->neighbors_of.at(cell)) { - const auto& neighbor = neighbor_i.first; + // We consider the communication weight from this cell to others + auto weight {dccrg_instance->get_communication_weight(cell)}; + for (auto neighborhood : dccrg_instance->partitioning_neighborhoods[cell]) { + for (const auto& [neighbor, dir] : *dccrg_instance->get_neighbors_to(cell, neighborhood)) { + // Zoltan 3.501 crashes in hierarchial if a cell is a neighbor to itself + if (neighbor == 0 || neighbor == cell) { + continue; + } - if (neighbor == 0 - /* Zoltan 3.501 crashes in hierarchial - if a cell is a neighbor to itself */ - || neighbor == cell) { - continue; - } + // number_of_neighbors[i]++; - number_of_neighbors[i]++; + neighbors[current_neighbor_number] = neighbor; + processes_of_neighbors[current_neighbor_number] = int(dccrg_instance->cell_process.at(neighbor)); - neighbors[current_neighbor_number] = neighbor; - processes_of_neighbors[current_neighbor_number] - = int(dccrg_instance->cell_process.at(neighbor)); + // weight of edge from cell to *neighbor + if(number_of_weights_per_edge) { + edge_weights[current_neighbor_number] = weight; + } - // weight of edge from cell to *neighbor - if (number_of_weights_per_edge > 0) { - edge_weights[current_neighbor_number] = 1.0; + ++current_neighbor_number; } + } + } + } + - current_neighbor_number++; + /*! + Fills sizes with communication weights of cells in global_ids + */ + static void fill_with_communication_weights( + void *data, + int /*global_id_size*/, + int /*local_id_size*/, + int number_of_cells, + ZOLTAN_ID_PTR global_ids, + ZOLTAN_ID_PTR /*local_ids*/, + int *sizes, + int *error + ) { + Dccrg< + Cell_Data, + Geometry, + std::tuple, + std::tuple + >* dccrg_instance = reinterpret_cast< + Dccrg< + Cell_Data, + Geometry, + std::tuple, + std::tuple + >* + >(data); + *error = ZOLTAN_OK; + + for (int i = 0; i < number_of_cells; i++) { + uint64_t cell = uint64_t(global_ids[i]); + if (dccrg_instance->cell_data.count(cell) == 0) { + *error = ZOLTAN_FATAL; + std::cerr << "Process " << dccrg_instance->rank + << ": Zoltan wanted the weight of a non-existing cell " << cell + << std::endl; + return; } + + sizes[i] = dccrg_instance->get_communication_weight(cell); } } /*! - Writes the number of hyperedges (self + one per neighbor cell) in the grid for all cells on this process. + Writes the number of connections (self + one per neighbor cell) in the grid for all cells on this process. */ static void fill_number_of_hyperedges( void* data, @@ -10789,24 +11009,21 @@ template < >(data); *error = ZOLTAN_OK; - *number_of_hyperedges = int(dccrg_instance->cell_data.size()); *format = ZOLTAN_COMPRESSED_EDGE; + *number_of_hyperedges = 0; *number_of_connections = 0; - for (const auto& item: dccrg_instance->cell_data) { - - (*number_of_connections)++; - - for (const auto& neighbor_i: dccrg_instance->neighbors_of.at(item.first)) { - const auto& neighbor = neighbor_i.first; - if (neighbor != 0 - /* Zoltan 3.501 crashes in hierarchial - if a cell is a neighbor to itself */ - && neighbor != item.first) { - (*number_of_connections)++; + for (const auto& [cell, data]: dccrg_instance->cell_data) { + for (auto neighborhood : dccrg_instance->partitioning_neighborhoods[cell]) { + ++*number_of_hyperedges; + ++*number_of_connections; + for (const auto& [neighbor, dir] : *dccrg_instance->get_neighbors_to(cell, neighborhood)) { + *number_of_connections += neighbor > 0 && neighbor != cell; // We apparently have to do this stupid filtering here } } } + + // std::cerr << "I have " + std::to_string(dccrg_instance->cell_data.size()) + "cells, " + std::to_string(*number_of_hyperedges) + " hedges and " + std::to_string(*number_of_connections) + " connections!\n"; } @@ -10846,37 +11063,35 @@ template < return; } - if ((unsigned int) number_of_hyperedges != dccrg_instance->cell_data.size()) { - std::cerr << "Zoltan is expecting wrong number of hyperedges: " << number_of_hyperedges - << " instead of " << dccrg_instance->cell_data.size() - << std::endl; - *error = ZOLTAN_FATAL; - return; - } - - int i = 0; int connection_number = 0; - for (const auto& item: dccrg_instance->cell_data) { - - hyperedges[i] = item.first; - hyperedge_connection_offsets[i] = connection_number; - - // add a connection to the cell itself from its hyperedge - connections[connection_number++] = item.first; + int hedge_number = 0; + for (const auto& [cell, data]: dccrg_instance->cell_data) { + for (auto neighborhood : dccrg_instance->partitioning_neighborhoods[cell]) { + hyperedges[hedge_number] = cell; + hyperedge_connection_offsets[hedge_number] = connection_number; + + // add a connection to the cell itself from its hyperedge + connections[connection_number++] = cell; + + for (const auto& [neighbor, dir]: *dccrg_instance->get_neighbors_to(cell, neighborhood)) { + // Zoltan 3.501 crashes in hierarchial if a cell is a neighbor to itself + if (neighbor == 0 || neighbor == cell) { + continue; + } - for (const auto& neighbor_i: dccrg_instance->neighbors_of.at(item.first)) { - const auto& neighbor = neighbor_i.first; - if (neighbor == 0 - /* Zoltan 3.501 crashes in hierarchial - if a cell is a neighbor to itself */ - || neighbor == item.first) { - continue; + connections[connection_number++] = neighbor; } - connections[connection_number++] = neighbor; + ++hedge_number; } + } - i++; + if (number_of_hyperedges != hedge_number) { + std::cerr << "Zoltan is expecting wrong number of hyperedges: " << number_of_hyperedges + << " instead of " << dccrg_instance->cell_data.size() + << std::endl; + *error = ZOLTAN_FATAL; + return; } if (connection_number != number_of_connections) { @@ -10953,28 +11168,23 @@ template < return; } - int i = 0; - for (const auto& item: dccrg_instance->cell_data) { + // TODO: this coudl be a different parameter, possibly later + // if ((unsigned int) number_of_weights_per_hyperedge != dccrg_instance->cell_weight_dim) { + // std::cerr + // << "Zoltan is expecting wrong number of weights per hyperedge: " << number_of_weights_per_hyperedge + // << " instead of " << dccrg_instance->cell_weight_dim + // << std::endl; + // *error = ZOLTAN_FATAL; + // return; + // } + int i = 0; + for (const auto& item : dccrg_instance->cell_data) { hyperedges[i] = item.first; - - if (number_of_weights_per_hyperedge > 0) { - int number_of_hyperedges = 0; - - for (const auto& neighbor_i: dccrg_instance->neighbors_of.at(item.first)) { - const auto neighbor = neighbor_i.first; - if (neighbor != 0 - /* Zoltan 3.501 crashes in hierarchial - if a cell is a neighbor to itself (periodic grid) */ - && neighbor != item.first) { - number_of_hyperedges++; - } - } - - hyperedge_weights[i] = float(1.0 * number_of_hyperedges); + if (number_of_weights_per_hyperedge) { + hyperedge_weights[i] = dccrg_instance->get_communication_weight(item.first); } - - i++; + ++i; } } @@ -11033,15 +11243,17 @@ template < *error = ZOLTAN_OK; } - int process = int(dccrg_instance->rank); - int part; + return dccrg_instance->partition_number[level]; - for (int i = 0; i <= level; i++) { - part = process / dccrg_instance->processes_per_part[i]; - process %= dccrg_instance->processes_per_part[i]; - } + // int process = int(dccrg_instance->rank); + // int part; - return part; + // for (int i = 0; i <= level; i++) { + // part = process / dccrg_instance->processes_per_part[i]; + // process %= dccrg_instance->processes_per_part[i]; + // } + + // return part; } @@ -11073,7 +11285,6 @@ template < std::tuple >* >(data); - if (level < 0 || level >= int(dccrg_instance->processes_per_part.size())) { std::cerr << "Zoltan wanted partitioning options for an invalid hierarchy "