diff --git a/application/CMakeLists.txt b/application/CMakeLists.txt index 374d263..c920047 100644 --- a/application/CMakeLists.txt +++ b/application/CMakeLists.txt @@ -1,6 +1,9 @@ cuda_add_executable(BFS bfs.cu) target_link_libraries(BFS gswitch) +cuda_add_executable(MBFS mbfs.cu) +target_link_libraries(MBFS gswitch) + cuda_add_executable(CC cc.cu) target_link_libraries(CC gswitch) diff --git a/application/mbfs.cu b/application/mbfs.cu new file mode 100644 index 0000000..dd871cb --- /dev/null +++ b/application/mbfs.cu @@ -0,0 +1,163 @@ +#include +#include + +#include "gswitch.h" + +using G = device_graph_t; + +// actors +inspector_t inspector; +selector_t selector; +executor_t executor; +feature_t fets; +config_t conf; +stat_t stats; + +struct BFS : Functor { + __device__ Status filter(int vid, G g) { + int lvl = *wa_of(vid); + if (lvl == g.get_level()) + return Active; + else if (lvl < 0) + return Inactive; + else + return Fixed; + } + __device__ int emit(int vid, Empty *w, G g) { return g.get_level(); } + __device__ bool cond(int v, int newv, G g) { return *wa_of(v) == -1; } + __device__ bool comp(int *v, int newv, G g) { + *v = newv; + return true; + } + __device__ bool compAtomic(int *v, int newv, G g) { + *v = newv; + return true; + } +}; + +int *bfs_cpu(host_graph_t hg, int root) { + LOG("generate CPU BFS reference\n"); + double ms = mwtime(); + int *lvl = (int *)malloc(sizeof(int) * hg.nvertexs); + memset(lvl, -1, sizeof(int) * hg.nvertexs); + std::queue q; + lvl[root] = 0; + q.push(root); + while (!q.empty()) { + int v = q.front(); + q.pop(); + int s = hg.start_pos[v]; + int e = (v == (hg.nvertexs - 1) ? hg.nedges : hg.start_pos[v + 1]); + for (int j = s; j < e; ++j) { + int u = hg.adj_list[j]; + if (lvl[u] == -1) { + lvl[u] = lvl[v] + 1; + q.push(u); + } + } + } + double me = mwtime(); + LOG("CPU BFS: %.3f ms\n", me - ms); + return lvl; +} + +void validation(int *lCPU, int *lGPU, int N) { + bool flag = true; + for (int i = 0; i < N; ++i) { + if (lGPU[i] - lCPU[i] != 0) { + flag = false; + puts("failed"); + std::cout << i << " " << lGPU[i] << " " << lCPU[i] << std::endl; + break; + } + } + if (flag) + puts("passed"); +} + +template +double run_bfs(G g, F f, active_set_t &as, int root) { + // step 1: initializing + LOG(" -- Initializing\n"); + as.init(root); + + // step 2: Execute Algorithm + double s = mwtime(); + int level; + for (level = 0;; level++) { + inspector.inspect(as, g, f, stats, fets, conf); + if (as.finish(g, f, conf)) + break; + selector.select(stats, fets, conf); + executor.filter(as, g, f, stats, fets, conf); + g.update_level(); + executor.expand(as, g, f, stats, fets, conf); + // fets.record(); // for training + if (as.finish(g, f, conf)) + break; + } + double e = mwtime(); + + return e - s; +} + +int main(int argc, char *argv[]) { + parse_cmd(argc, argv, "BFS"); + + // step 1 : set features + fets.centric = VC; + fets.pattern = Idem; + fets.fromall = false; + fets.toall = false; + conf.conf_pruning = true; + + // step 2 : init Graph & Algorithm + auto g = build_graph(cmd_opt.path, fets, cmd_opt.with_header, + cmd_opt.with_weight, cmd_opt.directed); + if (g.hg.nedges == 0) + return 1; + BFS f; + f.data.build(g.hg.nvertexs); + + init_conf(stats, fets, conf, g, f); + active_set_t as = build_active_set(g.dg.nvertexs, conf); + + // step 3 : choose root vertex + int root = cmd_opt.src; + if (root < 0) + root = g.hg.random_root(); + LOG(" -- Root is: %d\n", root); + fets.use_root = root; + + for (int iteration = 0; iteration < 3; ++iteration) { + f.data.init_wa([root](int i) { return i == root ? 0 : -1; }); + + // step 4 : execute Algorithm + LOG(" -- Launching BFS\n"); + double time = run_bfs(g.dg, f, as, root); + + // reset + reset_conf(stats, fets, conf, g, f); + as.reset(); + g.dg.reset_level(); + + // step 5 : validation + f.data.sync_wa(); + if (cmd_opt.validation) { + int *lvl = bfs_cpu(g.hg, root); + validation(lvl, f.data.h_wa, g.hg.nvertexs); + } + + LOG("GPU BFS time: %.3f ms\n", time); + std::cout << time << std::endl; + } + // std::cout << fets.nvertexs << " " + //<< fets.nedges << " " + //<< fets.avg_deg << " " + //<< fets.std_deg << " " + //<< fets.range << " " + //<< fets.GI << " " + //<< fets.Her << " " + //<< time << std::endl; + return 0; +} diff --git a/script/mbfs.sh b/script/mbfs.sh new file mode 100755 index 0000000..4db04bc --- /dev/null +++ b/script/mbfs.sh @@ -0,0 +1,2 @@ +#cfgs="Push-Queue-CM-Standalone" +../build/application/MBFS $1 --with-header --src=0 --device=0 --validation --configs=$cfgs --verbose diff --git a/src/abstraction/config.cuh b/src/abstraction/config.cuh index ff0e018..81e55ac 100644 --- a/src/abstraction/config.cuh +++ b/src/abstraction/config.cuh @@ -73,7 +73,13 @@ struct config_t { std::cout << "] "; } - void reset() {} + void reset() { + conf_first_round = true; + conf_switch_to_fusion = false; + conf_switch_to_standalone = false; + conf_fuse_inspect = false; + conf_idle = 0; + } __device__ __forceinline__ bool ignore_u_state() { return conf_ignore_u_state; diff --git a/src/data_structures/graph.cuh b/src/data_structures/graph.cuh index e3e527b..ddbeac4 100644 --- a/src/data_structures/graph.cuh +++ b/src/data_structures/graph.cuh @@ -31,6 +31,7 @@ template struct device_graph_t { // WARNING,TODO: This update is only vaild in the top level // since the primitives we have accept parameter by assignment inline void update_level(int inc = 1) { level += inc; } + inline void reset_level(int inc = 1) { level = 0; } int64_t nvertexs; int64_t nedges; @@ -146,6 +147,7 @@ template struct device_graph_t { // WARNING,TODO: This update is only vaild in the top level // since the primitives we have accept parameter by assignment __host__ __device__ inline void update_level(int inc = 1) { level += inc; } + __host__ __device__ inline void reset_level(int inc = 1) { level = 0; } chunk_t dg_chunks; diff --git a/src/gswitch.h b/src/gswitch.h index 41fad40..536ac12 100644 --- a/src/gswitch.h +++ b/src/gswitch.h @@ -62,4 +62,49 @@ void init_conf(stat_t &stats, feature_t &fets, config_t &conf, G &g, F &f) { } } +template +void reset_conf(stat_t &stats, feature_t &fets, config_t &conf, G &g, F &f) { + conf.reset(); + fets.architecture_features(); + conf.conf_inherit = false; + if (fets.use_root >= 0) + fets.first_workload = g.dg.get_degree(fets.use_root); + + if (fets.centric == VC) { + fets.flatten(); + std::vector fv; + for (int i = 2; i < 7; ++i) + fv.push_back(fets.fv[i]); + + if (select_fusion(fv)) + conf.conf_fuse_inspect = true; + else + conf.conf_fuse_inspect = false; + + if (fets.toall && fets.fromall) + conf.conf_fuse_inspect = false; + + if (cmd_opt.ins.has_fusion) { + if (cmd_opt.ins.fusion == Fused) + conf.conf_fuse_inspect = true; + else + conf.conf_fuse_inspect = false; + } + if (conf.conf_fuse_inspect) + conf.conf_fusion = true; + if (!conf.conf_fuse_inspect) { + conf.conf_qmode = Normal; + } + } + + if (conf.conf_window) { + f.data.window.enable = + true; // used in device function (which can only touch the as) + f.data.window.set_init_winsize( + DIV(8 * g.el.mean_weight * 32, g.hg.attr.avg_deg)); + LOG(" -- window size: %f\n", + DIV(8 * g.el.mean_weight * 32, g.hg.attr.avg_deg)); + } +} + #endif