From d019646b3da9907d86b69dd5bced378d0e6ed1b4 Mon Sep 17 00:00:00 2001 From: "xiaolei.zl" Date: Thu, 12 Oct 2023 19:57:55 +0800 Subject: [PATCH] use unordered_set rather than bitset --- flex/engines/hqps_db/core/context.h | 31 +++++++++++++ flex/engines/hqps_db/core/sync_engine.h | 40 ++++++++++++++--- flex/engines/hqps_db/core/utils/props.h | 44 +++++++++++++++++++ flex/engines/hqps_db/structures/collection.h | 21 +++++---- .../queries/ic/stored_procedure/ic9.cypher | 12 ++++- flex/tests/hqps/ic/ic10.h | 19 -------- flex/tests/hqps/ic/ic9.h | 29 ++++++++++++ flex/tests/hqps/validator.cc | 8 ++-- 8 files changed, 166 insertions(+), 38 deletions(-) diff --git a/flex/engines/hqps_db/core/context.h b/flex/engines/hqps_db/core/context.h index bb2571f4307d..5cc3b86539bd 100644 --- a/flex/engines/hqps_db/core/context.h +++ b/flex/engines/hqps_db/core/context.h @@ -72,6 +72,14 @@ class ContextIter> { return std::make_tuple(iter_.GetIndexElement()); } + template + auto GetIndexElements() const { + // check in_col_id is length 1, and the only item is base_tag or -1 + static_assert(sizeof...(in_col_id) == 1, + "GetIndexElements only accept one col_id"); + return std::make_tuple(iter_.GetIndexElement()); + } + auto GetAllIndexElementWithData() const { return std::make_tuple(iter_.GetIndexElementWithData()); } @@ -148,6 +156,29 @@ class ContextIter> { return get_index_ele_tuple_impl(index_seq); } + template ::type* = + nullptr> + inline auto get_index_element_impl() const { + return cur_iter_.GetIndexElement(); + } + + template ::type* = + nullptr> + inline auto get_index_element_impl() const { + return gs::get_from_tuple(others_iter_tuple_) + .GetIndexElement(); + } + + template + inline auto GetIndexElements() const { + // tie other_iter_tuple and cur_iter_ into a tuple + return std::make_tuple(this->get_index_element_impl()...); + } + auto GetAllIndexElementWithData() const { return get_index_ele_data_tuple_impl(index_seq); } diff --git a/flex/engines/hqps_db/core/sync_engine.h b/flex/engines/hqps_db/core/sync_engine.h index 2a21fde4d2d1..86f7ed0049dc 100644 --- a/flex/engines/hqps_db/core/sync_engine.h +++ b/flex/engines/hqps_db/core/sync_engine.h @@ -707,23 +707,41 @@ class SyncEngine : public BaseEngine { auto prop_descs = create_prop_descs_from_selectors(selectors); auto prop_getters_tuple = create_prop_getters_from_prop_desc(graph, ctx, prop_descs); + double t0 = -grape::GetCurrentTime(); + // for (auto iter : ctx) { + // auto eles = iter.GetAllElement(); + // // if (expr(eles)) { + // // if (std::apply(expr, props)) { + // if (run_expr_filter(expr, prop_getters_tuple, eles)) { + // select_indices.emplace_back(cur_ind); + // cur_offset += 1; + // } + // cur_ind += 1; + // new_offsets.emplace_back(cur_offset); + // } + + auto ind_seq = std::make_index_sequence(); for (auto iter : ctx) { - auto eles = iter.GetAllElement(); - // if (expr(eles)) { - // if (std::apply(expr, props)) { - if (run_expr_filter(expr, prop_getters_tuple, eles)) { + auto eles = iter.template GetIndexElements(); + if (run_expr_filter_v2(expr, prop_getters_tuple, eles, ind_seq)) { select_indices.emplace_back(cur_ind); cur_offset += 1; } cur_ind += 1; new_offsets.emplace_back(cur_offset); } + t0 += grape::GetCurrentTime(); + VLOG(10) << "Select " << select_indices.size() << ", out of " << cur_ind << " records" << ", head size: " << cur_.Size(); - + double t1 = -grape::GetCurrentTime(); cur_.SubSetWithIndices(select_indices); ctx.merge_offset_with_back(new_offsets); + t1 += grape::GetCurrentTime(); + LOG(INFO) << "[Select perf]: select from size: " << cur_.Size() + << " to size: " << select_indices.size() + << ", select time: " << t0 << ", reduce time: " << t1; return std::move(ctx); } @@ -744,6 +762,18 @@ class SyncEngine : public BaseEngine { return expr(std::get(prop_getter_tuple).get_from_all_element(eles)...); } + template + static inline bool run_expr_filter_v2( + const EXPR& expr, std::tuple& prop_getter_tuple, + std::tuple& eles, std::index_sequence) { + // for each prop getter, get the view, and apply expr on it. + static_assert(sizeof...(PROP_GETTER) == sizeof...(Is)); + static_assert(sizeof...(PROP_GETTER) == sizeof...(ELE)); + return expr(std::get(prop_getter_tuple) + .get_from_index_element(std::get(eles))...); + } + //////////////////////////////////////Group///////////////////////// // We currently support group with one key, and possiblely multiple values. // create a brand new context type. diff --git a/flex/engines/hqps_db/core/utils/props.h b/flex/engines/hqps_db/core/utils/props.h index 311bc39292d5..8a8a56785870 100644 --- a/flex/engines/hqps_db/core/utils/props.h +++ b/flex/engines/hqps_db/core/utils/props.h @@ -185,6 +185,11 @@ class InnerIdGetter { return gs::get_from_tuple(all_ele); } + inline auto get_from_index_element( + const std::tuple& ele) const { + return std::get<1>(ele); + } + private: const std::vector& vids_; }; @@ -210,6 +215,10 @@ class InnerIdDataGetter { return ind_ele; } + VID_T get_from_index_element(const std::tuple& tuple) const { + return std::get<1>(tuple); + } + private: const std::vector& vids_; const std::vector>& data_; @@ -254,6 +263,13 @@ class EdgeSetInnerIdGetter { auto dst_vid = std::get<1>(tuple); return Edge(src_vid, dst_vid); } + + template + inline auto get_from_index_element(const IND_ELE_T& tuple) const { + auto src_vid = std::get<0>(tuple); + auto dst_vid = std::get<1>(tuple); + return Edge(src_vid, dst_vid); + } }; template @@ -272,6 +288,10 @@ class CollectionPropGetter { return gs::get_from_tuple(all_ele); } + inline auto get_from_index_element(const std::tuple& tuple) const { + return std::get<1>(tuple); + } + template inline void set_ind_ele(const ALL_IND_ELE_T& ind_ele) { ind_ele_ = ind_ele; @@ -298,6 +318,10 @@ class CollectionPropGetter> { return gs::get_from_tuple(all_ele); } + inline auto get_from_index_element(const std::tuple& tuple) const { + return std::get<1>(tuple); + } + template inline void set_ind_ele(const ALL_IND_ELE_T& ind_ele) { ind_ele_ = ind_ele; @@ -327,6 +351,10 @@ class FlatEdgeSetPropGetter { return std::get<0>(std::get<2>(my_ele)); } + inline auto get_from_index_element(const index_ele_tuple_t& tuple) const { + return std::get<0>(std::get<2>(std::get<1>(tuple))); + } + template inline void set_ind_ele(const ALL_IND_ELE_T& ind_ele) { ind_ele_ = gs::get_from_tuple(ind_ele); @@ -356,6 +384,10 @@ class GeneralEdgeSetPropGetter { return std::get<0>(std::get<1>(my_ele).properties()); } + inline auto get_from_index_element(const index_ele_tuple_t& ind_ele) const { + return std::get<0>(std::get<2>(ind_ele).properties()); + } + template inline void set_ind_ele(const ALL_IND_ELE_T& ind_ele) { ind_ele_ = gs::get_from_tuple(ind_ele); @@ -392,6 +424,10 @@ class TwoLabelVertexSetImplPropGetter { return getters_[std::get<0>(ele)].get_view(std::get<1>(ele)); } + inline auto get_from_index_element(const IND_ELE_T& ind_ele) const { + return getters_[std::get<1>(ind_ele)].get_view(std::get<2>(ind_ele)); + } + template inline void set_ind_ele(const ALL_IND_ELE_T& ind_ele) { ind_ele_ = gs::get_from_tuple(ind_ele); @@ -428,6 +464,10 @@ class RowVertexSetPropGetter { return getter_.get_view(ele); } + inline auto get_from_index_element(const IND_ELE_T& ele) const { + return getter_.get_view(std::get<1>(ele)); + } + template inline void set_ind_ele(const ALL_IND_ELE_T& ind_ele) { ind_ele_ = gs::get_from_tuple(ind_ele); @@ -459,6 +499,10 @@ class KeyedRowVertexSetPropGetter { return getter_.get_view(my_ele); } + inline auto get_from_index_element(const IND_ELE_T& ele) const { + return getter_.get_view(std::get<1>(ele)); + } + template inline void set_ind_ele(const ALL_IND_ELE_T& ind_ele) { ind_ele_ = gs::get_from_tuple(ind_ele); diff --git a/flex/engines/hqps_db/structures/collection.h b/flex/engines/hqps_db/structures/collection.h index 4d50818cf613..3304fe67aea0 100644 --- a/flex/engines/hqps_db/structures/collection.h +++ b/flex/engines/hqps_db/structures/collection.h @@ -468,30 +468,33 @@ class DistinctCountBuilder<1, tag_id, T> { T>, "Type not match"); while (vec_.size() <= ind) { - vec_.emplace_back(grape::Bitset(range_size)); + // vec_.emplace_back(grape::Bitset(range_size)); + vec_.emplace_back(std::unordered_set()); } - auto& cur_bitset = vec_[ind]; + auto& cur_set = vec_[ind]; auto cur_v = std::get<1>(cur_ind_ele); if (cur_v == null_value) { return; } - cur_bitset.set_bit(cur_v - min_v); - VLOG(20) << "[DistinctCount]: tag id: " << tag_id - << "insert at ind: " << ind << ",value : " << cur_v - << ", res: " << cur_bitset.count(); + cur_set.insert(cur_v); + // cur_set.set_bit(cur_v - min_v); + // VLOG(20) << "[DistinctCount]: tag id: " << tag_id + // << "insert at ind: " << ind << ",value : " << cur_v + // << ", res: " << cur_set.count(); } Collection Build() { std::vector res; res.reserve(vec_.size()); - for (auto& bitset : vec_) { - res.emplace_back(bitset.count()); + for (auto& set : vec_) { + res.emplace_back(set.size()); } return Collection(std::move(res)); } private: - std::vector vec_; + // std::vector vec_; + std::vector> vec_; T min_v, max_v, range_size, null_value; }; diff --git a/flex/resources/queries/ic/stored_procedure/ic9.cypher b/flex/resources/queries/ic/stored_procedure/ic9.cypher index 099127ad30c0..355fcee5e766 100644 --- a/flex/resources/queries/ic/stored_procedure/ic9.cypher +++ b/flex/resources/queries/ic/stored_procedure/ic9.cypher @@ -1,2 +1,12 @@ MATCH (p:PERSON {id: $personId})-[:KNOWS*1..3]-(friend:PERSON)<-[:HASCREATOR]-(message : POST | COMMENT) -WHERE friend <> p and message.creationDate < $maxDate with friend,message ORDER BY message.creationDate DESC, message.id ASC LIMIT 20 RETURN friend.id AS personId, friend.firstName AS personFirstName, friend.lastName AS personLastName, message.id AS commentOrPostId, message.content AS messageContent, message.imageFile AS messageImageFile, message.creationDate AS commentOrPostCreationDate \ No newline at end of file +WHERE friend <> p and message.creationDate < $maxDate +with friend, message +ORDER BY message.creationDate DESC, message.id ASC +LIMIT 20 +RETURN friend.id AS personId, + friend.firstName AS personFirstName, + friend.lastName AS personLastName, + message.id AS commentOrPostId, + message.content AS messageContent, + message.imageFile AS messageImageFile, + message.creationDate AS commentOrPostCreationDate; \ No newline at end of file diff --git a/flex/tests/hqps/ic/ic10.h b/flex/tests/hqps/ic/ic10.h index 37f2ef58052a..cb51c5274945 100644 --- a/flex/tests/hqps/ic/ic10.h +++ b/flex/tests/hqps/ic/ic10.h @@ -193,12 +193,6 @@ class IC10 { std::move(left_left_left_left_ctx1), std::move(left_left_left_right_ctx2)); - { - for (auto iter : left_left_left_left_ctx2) { - auto eles = iter.GetAllElement(); - VLOG(10) << "After antijoin: " << gs::to_string(eles); - } - } auto left_left_left_left_expr2 = gs::make_filter(IC10left_left_left_left_expr1(), gs::PropertySelector("None"), @@ -215,12 +209,6 @@ class IC10 { gs::make_mapper_with_variable( gs::PropertySelector("birthday"))}); - { - for (auto iter : left_left_left_left_ctx4) { - auto eles = iter.GetAllElement(); - VLOG(10) << "before select with brithday: " << gs::to_string(eles); - } - } auto left_left_left_left_expr3 = gs::make_filter( IC10left_left_left_left_expr2(month), gs::PropertySelector("None"), gs::PropertySelector("None"), @@ -231,13 +219,6 @@ class IC10 { graph, std::move(left_left_left_left_ctx4), std::move(left_left_left_left_expr3)); - { - for (auto iter : left_left_left_left_ctx5) { - auto eles = iter.GetAllElement(); - VLOG(10) << "After select with brithday: " << gs::to_string(eles); - } - } - auto left_left_left_left_ctx6 = Engine::Project( graph, std::move(left_left_left_left_ctx5), std::tuple{gs::make_mapper_with_variable( diff --git a/flex/tests/hqps/ic/ic9.h b/flex/tests/hqps/ic/ic9.h index 63447fb1b71c..37eaebf493d7 100644 --- a/flex/tests/hqps/ic/ic9.h +++ b/flex/tests/hqps/ic/ic9.h @@ -50,6 +50,22 @@ struct IC9expr2 { // Auto generated query class definition class IC9 { + private: + mutable double expand_to_msg_time = 0.0; + mutable double filter_creation_date_time = 0.0; + mutable double filter_person_id_time = 0.0; + mutable double proj_time = 0.0; + mutable double sort_time = 0.0; + + public: + ~IC9() { + LOG(INFO) << "IC9 expand_to_msg_time: " << expand_to_msg_time; + LOG(INFO) << "IC9 filter_creation_date_time: " << filter_creation_date_time; + LOG(INFO) << "IC9 filter_person_id_time: " << filter_person_id_time; + LOG(INFO) << "IC9 proj_time: " << proj_time; + LOG(INFO) << "IC9 sort_time: " << sort_time; + } + public: using Engine = SyncEngine; using label_id_t = typename gs::MutableCSRInterface::label_id_t; @@ -66,6 +82,7 @@ class IC9 { auto ctx0 = Engine::template ScanVertexWithOid( graph, 1, personId); + double t0 = -grape::GetCurrentTime(); auto edge_expand_opt1 = gs::make_edge_expandv_opt( gs::Direction::Both, (label_id_t) 8, (label_id_t) 1); @@ -82,7 +99,9 @@ class IC9 { auto ctx2 = Engine::template EdgeExpandV( graph, std::move(ctx1), std::move(edge_expand_opt3)); + expand_to_msg_time += t0 + grape::GetCurrentTime(); + double t1 = -grape::GetCurrentTime(); auto expr3 = gs::make_filter(IC9expr1(maxDate), gs::PropertySelector("creationDate")); auto get_v_opt4 = @@ -91,23 +110,33 @@ class IC9 { std::move(expr3)); auto ctx3 = Engine::template GetV( graph, std::move(ctx2), std::move(get_v_opt4)); + filter_creation_date_time += t1 + grape::GetCurrentTime(); + + double t2 = -grape::GetCurrentTime(); auto expr4 = gs::make_filter( IC9expr2(), gs::PropertySelector("None"), gs::PropertySelector("None")); auto ctx4 = Engine::template Select( graph, std::move(ctx3), std::move(expr4)); + filter_person_id_time += t2 + grape::GetCurrentTime(); + double t3 = -grape::GetCurrentTime(); auto ctx5 = Engine::Project( graph, std::move(ctx4), std::tuple{gs::make_mapper_with_variable( gs::PropertySelector("")), gs::make_mapper_with_variable( gs::PropertySelector(""))}); + proj_time += t3 + grape::GetCurrentTime(); + + double t4 = -grape::GetCurrentTime(); auto ctx6 = Engine::Sort( graph, std::move(ctx5), gs::Range(0, 20), std::tuple{gs::OrderingPropPair( "creationDate"), gs::OrderingPropPair("id")}); + sort_time += t4 + grape::GetCurrentTime(); + auto ctx7 = Engine::Project( graph, std::move(ctx6), std::tuple{gs::make_mapper_with_variable( diff --git a/flex/tests/hqps/validator.cc b/flex/tests/hqps/validator.cc index 7978a6cffb70..d41340a6047a 100644 --- a/flex/tests/hqps/validator.cc +++ b/flex/tests/hqps/validator.cc @@ -1,7 +1,7 @@ #include "flex/engines/hqps_db/database/mutable_csr_interface.h" // #include "flex/tests/hqps/ic/ic1.h" -// #include "flex/tests/hqps/ic/ic10.h" +#include "flex/tests/hqps/ic/ic10.h" // #include "flex/tests/hqps/ic/ic11.h" // #include "flex/tests/hqps/ic/ic12.h" // #include "flex/tests/hqps/ic/ic2.h" @@ -10,7 +10,7 @@ // #include "flex/tests/hqps/ic/ic5.h" // #include "flex/tests/hqps/ic/ic5_v2.h" // #include "flex/tests/hqps/ic/ic6.h" -#include "flex/tests/hqps/ic/ic7.h" +// #include "flex/tests/hqps/ic/ic7.h" // #include "flex/tests/hqps/ic/ic8.h" // #include "flex/tests/hqps/ic/ic9.h" @@ -110,10 +110,10 @@ void validate_all(const gs::MutableCSRInterface& graph, // validate(graph, validate_dir + "/validation_params_ic4.csv"); // validate(graph, validate_dir + "/validation_params_ic5.csv"); // validate(graph, validate_dir + "/validation_params_ic6.csv"); - validate(graph, validate_dir + "/validation_params_ic7.csv"); + // validate(graph, validate_dir + "/validation_params_ic7.csv"); // validate(graph, validate_dir + "/validation_params_ic8.csv"); // validate(graph, validate_dir + "/validation_params_ic9.csv"); - // validate(graph, validate_dir + "/validation_params_ic10.csv"); + validate(graph, validate_dir + "/validation_params_ic10.csv"); // validate(graph, validate_dir + "/validation_params_ic11.csv"); // validate(graph, validate_dir + "/validation_params_ic12.csv"); LOG(INFO) << "Finish validating all tests";