Skip to content

Commit

Permalink
use unordered_set rather than bitset
Browse files Browse the repository at this point in the history
  • Loading branch information
zhanglei1949 committed Oct 12, 2023
1 parent 309e96c commit d019646
Show file tree
Hide file tree
Showing 8 changed files with 166 additions and 38 deletions.
31 changes: 31 additions & 0 deletions flex/engines/hqps_db/core/context.h
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,14 @@ class ContextIter<base_tag, std::tuple<SET_T>> {
return std::make_tuple(iter_.GetIndexElement());
}

template <int... in_col_id>
auto GetIndexElements() const {
// check in_col_id is length 1, and the only item is base_tag or -1
static_assert(sizeof...(in_col_id) == 1,
"GetIndexElements only accept one col_id");
return std::make_tuple(iter_.GetIndexElement());
}

auto GetAllIndexElementWithData() const {
return std::make_tuple(iter_.GetIndexElementWithData());
}
Expand Down Expand Up @@ -148,6 +156,29 @@ class ContextIter<base_tag, std::tuple<SET_T, PREV_SETS...>> {
return get_index_ele_tuple_impl(index_seq);
}

template <int in_col_id,
typename std::enable_if<(
in_col_id == -1 || in_col_id == base_tag + num_others)>::type* =
nullptr>
inline auto get_index_element_impl() const {
return cur_iter_.GetIndexElement();
}

template <int in_col_id,
typename std::enable_if<(
in_col_id != -1 && in_col_id != base_tag + num_others)>::type* =
nullptr>
inline auto get_index_element_impl() const {
return gs::get_from_tuple<in_col_id - base_tag>(others_iter_tuple_)
.GetIndexElement();
}

template <int... in_col_id>
inline auto GetIndexElements() const {
// tie other_iter_tuple and cur_iter_ into a tuple
return std::make_tuple(this->get_index_element_impl<in_col_id>()...);
}

auto GetAllIndexElementWithData() const {
return get_index_ele_data_tuple_impl(index_seq);
}
Expand Down
40 changes: 35 additions & 5 deletions flex/engines/hqps_db/core/sync_engine.h
Original file line number Diff line number Diff line change
Expand Up @@ -707,23 +707,41 @@ class SyncEngine : public BaseEngine {
auto prop_descs = create_prop_descs_from_selectors<in_col_id...>(selectors);
auto prop_getters_tuple =
create_prop_getters_from_prop_desc(graph, ctx, prop_descs);
double t0 = -grape::GetCurrentTime();
// for (auto iter : ctx) {
// auto eles = iter.GetAllElement();
// // if (expr(eles)) {
// // if (std::apply(expr, props)) {
// if (run_expr_filter(expr, prop_getters_tuple, eles)) {
// select_indices.emplace_back(cur_ind);
// cur_offset += 1;
// }
// cur_ind += 1;
// new_offsets.emplace_back(cur_offset);
// }

auto ind_seq = std::make_index_sequence<sizeof...(in_col_id)>();
for (auto iter : ctx) {
auto eles = iter.GetAllElement();
// if (expr(eles)) {
// if (std::apply(expr, props)) {
if (run_expr_filter(expr, prop_getters_tuple, eles)) {
auto eles = iter.template GetIndexElements<in_col_id...>();
if (run_expr_filter_v2(expr, prop_getters_tuple, eles, ind_seq)) {
select_indices.emplace_back(cur_ind);
cur_offset += 1;
}
cur_ind += 1;
new_offsets.emplace_back(cur_offset);
}
t0 += grape::GetCurrentTime();

VLOG(10) << "Select " << select_indices.size() << ", out of " << cur_ind
<< " records"
<< ", head size: " << cur_.Size();

double t1 = -grape::GetCurrentTime();
cur_.SubSetWithIndices(select_indices);
ctx.merge_offset_with_back(new_offsets);
t1 += grape::GetCurrentTime();
LOG(INFO) << "[Select perf]: select from size: " << cur_.Size()
<< " to size: " << select_indices.size()
<< ", select time: " << t0 << ", reduce time: " << t1;
return std::move(ctx);
}

Expand All @@ -744,6 +762,18 @@ class SyncEngine : public BaseEngine {
return expr(std::get<Is>(prop_getter_tuple).get_from_all_element(eles)...);
}

template <typename EXPR, typename... PROP_GETTER, typename... ELE,
size_t... Is>
static inline bool run_expr_filter_v2(
const EXPR& expr, std::tuple<PROP_GETTER...>& prop_getter_tuple,
std::tuple<ELE...>& eles, std::index_sequence<Is...>) {
// for each prop getter, get the view, and apply expr on it.
static_assert(sizeof...(PROP_GETTER) == sizeof...(Is));
static_assert(sizeof...(PROP_GETTER) == sizeof...(ELE));
return expr(std::get<Is>(prop_getter_tuple)
.get_from_index_element(std::get<Is>(eles))...);
}

//////////////////////////////////////Group/////////////////////////
// We currently support group with one key, and possiblely multiple values.
// create a brand new context type.
Expand Down
44 changes: 44 additions & 0 deletions flex/engines/hqps_db/core/utils/props.h
Original file line number Diff line number Diff line change
Expand Up @@ -185,6 +185,11 @@ class InnerIdGetter {
return gs::get_from_tuple<tag_id>(all_ele);
}

inline auto get_from_index_element(
const std::tuple<size_t, VID_T>& ele) const {
return std::get<1>(ele);
}

private:
const std::vector<VID_T>& vids_;
};
Expand All @@ -210,6 +215,10 @@ class InnerIdDataGetter {
return ind_ele;
}

VID_T get_from_index_element(const std::tuple<size_t, VID_T>& tuple) const {
return std::get<1>(tuple);
}

private:
const std::vector<VID_T>& vids_;
const std::vector<std::tuple<DATA_T...>>& data_;
Expand Down Expand Up @@ -254,6 +263,13 @@ class EdgeSetInnerIdGetter {
auto dst_vid = std::get<1>(tuple);
return Edge<VID_T, grape::EmptyType>(src_vid, dst_vid);
}

template <typename IND_ELE_T>
inline auto get_from_index_element(const IND_ELE_T& tuple) const {
auto src_vid = std::get<0>(tuple);
auto dst_vid = std::get<1>(tuple);
return Edge<VID_T, grape::EmptyType>(src_vid, dst_vid);
}
};

template <int tag_id, typename T>
Expand All @@ -272,6 +288,10 @@ class CollectionPropGetter {
return gs::get_from_tuple<tag_id>(all_ele);
}

inline auto get_from_index_element(const std::tuple<size_t, T>& tuple) const {
return std::get<1>(tuple);
}

template <typename ALL_IND_ELE_T>
inline void set_ind_ele(const ALL_IND_ELE_T& ind_ele) {
ind_ele_ = ind_ele;
Expand All @@ -298,6 +318,10 @@ class CollectionPropGetter<tag_id, std::tuple<T>> {
return gs::get_from_tuple<tag_id>(all_ele);
}

inline auto get_from_index_element(const std::tuple<size_t, T>& tuple) const {
return std::get<1>(tuple);
}

template <typename ALL_IND_ELE_T>
inline void set_ind_ele(const ALL_IND_ELE_T& ind_ele) {
ind_ele_ = ind_ele;
Expand Down Expand Up @@ -327,6 +351,10 @@ class FlatEdgeSetPropGetter {
return std::get<0>(std::get<2>(my_ele));
}

inline auto get_from_index_element(const index_ele_tuple_t& tuple) const {
return std::get<0>(std::get<2>(std::get<1>(tuple)));
}

template <typename ALL_IND_ELE_T>
inline void set_ind_ele(const ALL_IND_ELE_T& ind_ele) {
ind_ele_ = gs::get_from_tuple<tag_id>(ind_ele);
Expand Down Expand Up @@ -356,6 +384,10 @@ class GeneralEdgeSetPropGetter {
return std::get<0>(std::get<1>(my_ele).properties());
}

inline auto get_from_index_element(const index_ele_tuple_t& ind_ele) const {
return std::get<0>(std::get<2>(ind_ele).properties());
}

template <typename ALL_IND_ELE_T>
inline void set_ind_ele(const ALL_IND_ELE_T& ind_ele) {
ind_ele_ = gs::get_from_tuple<tag_id>(ind_ele);
Expand Down Expand Up @@ -392,6 +424,10 @@ class TwoLabelVertexSetImplPropGetter {
return getters_[std::get<0>(ele)].get_view(std::get<1>(ele));
}

inline auto get_from_index_element(const IND_ELE_T& ind_ele) const {
return getters_[std::get<1>(ind_ele)].get_view(std::get<2>(ind_ele));
}

template <typename ALL_IND_ELE_T>
inline void set_ind_ele(const ALL_IND_ELE_T& ind_ele) {
ind_ele_ = gs::get_from_tuple<tag_id>(ind_ele);
Expand Down Expand Up @@ -428,6 +464,10 @@ class RowVertexSetPropGetter {
return getter_.get_view(ele);
}

inline auto get_from_index_element(const IND_ELE_T& ele) const {
return getter_.get_view(std::get<1>(ele));
}

template <typename ALL_IND_ELE_T>
inline void set_ind_ele(const ALL_IND_ELE_T& ind_ele) {
ind_ele_ = gs::get_from_tuple<tag_id>(ind_ele);
Expand Down Expand Up @@ -459,6 +499,10 @@ class KeyedRowVertexSetPropGetter {
return getter_.get_view(my_ele);
}

inline auto get_from_index_element(const IND_ELE_T& ele) const {
return getter_.get_view(std::get<1>(ele));
}

template <typename ALL_IND_ELE_T>
inline void set_ind_ele(const ALL_IND_ELE_T& ind_ele) {
ind_ele_ = gs::get_from_tuple<tag_id>(ind_ele);
Expand Down
21 changes: 12 additions & 9 deletions flex/engines/hqps_db/structures/collection.h
Original file line number Diff line number Diff line change
Expand Up @@ -468,30 +468,33 @@ class DistinctCountBuilder<1, tag_id, T> {
T>,
"Type not match");
while (vec_.size() <= ind) {
vec_.emplace_back(grape::Bitset(range_size));
// vec_.emplace_back(grape::Bitset(range_size));
vec_.emplace_back(std::unordered_set<T>());
}
auto& cur_bitset = vec_[ind];
auto& cur_set = vec_[ind];
auto cur_v = std::get<1>(cur_ind_ele);
if (cur_v == null_value) {
return;
}
cur_bitset.set_bit(cur_v - min_v);
VLOG(20) << "[DistinctCount]: tag id: " << tag_id
<< "insert at ind: " << ind << ",value : " << cur_v
<< ", res: " << cur_bitset.count();
cur_set.insert(cur_v);
// cur_set.set_bit(cur_v - min_v);
// VLOG(20) << "[DistinctCount]: tag id: " << tag_id
// << "insert at ind: " << ind << ",value : " << cur_v
// << ", res: " << cur_set.count();
}

Collection<size_t> Build() {
std::vector<size_t> res;
res.reserve(vec_.size());
for (auto& bitset : vec_) {
res.emplace_back(bitset.count());
for (auto& set : vec_) {
res.emplace_back(set.size());
}
return Collection<size_t>(std::move(res));
}

private:
std::vector<grape::Bitset> vec_;
// std::vector<grape::Bitset> vec_;
std::vector<std::unordered_set<T>> vec_;
T min_v, max_v, range_size, null_value;
};

Expand Down
12 changes: 11 additions & 1 deletion flex/resources/queries/ic/stored_procedure/ic9.cypher
Original file line number Diff line number Diff line change
@@ -1,2 +1,12 @@
MATCH (p:PERSON {id: $personId})-[:KNOWS*1..3]-(friend:PERSON)<-[:HASCREATOR]-(message : POST | COMMENT)
WHERE friend <> p and message.creationDate < $maxDate with friend,message ORDER BY message.creationDate DESC, message.id ASC LIMIT 20 RETURN friend.id AS personId, friend.firstName AS personFirstName, friend.lastName AS personLastName, message.id AS commentOrPostId, message.content AS messageContent, message.imageFile AS messageImageFile, message.creationDate AS commentOrPostCreationDate
WHERE friend <> p and message.creationDate < $maxDate
with friend, message
ORDER BY message.creationDate DESC, message.id ASC
LIMIT 20
RETURN friend.id AS personId,
friend.firstName AS personFirstName,
friend.lastName AS personLastName,
message.id AS commentOrPostId,
message.content AS messageContent,
message.imageFile AS messageImageFile,
message.creationDate AS commentOrPostCreationDate;
19 changes: 0 additions & 19 deletions flex/tests/hqps/ic/ic10.h
Original file line number Diff line number Diff line change
Expand Up @@ -193,12 +193,6 @@ class IC10 {
std::move(left_left_left_left_ctx1),
std::move(left_left_left_right_ctx2));

{
for (auto iter : left_left_left_left_ctx2) {
auto eles = iter.GetAllElement();
VLOG(10) << "After antijoin: " << gs::to_string(eles);
}
}
auto left_left_left_left_expr2 =
gs::make_filter(IC10left_left_left_left_expr1(),
gs::PropertySelector<grape::EmptyType>("None"),
Expand All @@ -215,12 +209,6 @@ class IC10 {
gs::make_mapper_with_variable<INPUT_COL_ID(1)>(
gs::PropertySelector<Date>("birthday"))});

{
for (auto iter : left_left_left_left_ctx4) {
auto eles = iter.GetAllElement();
VLOG(10) << "before select with brithday: " << gs::to_string(eles);
}
}
auto left_left_left_left_expr3 = gs::make_filter(
IC10left_left_left_left_expr2(month),
gs::PropertySelector<Date>("None"), gs::PropertySelector<Date>("None"),
Expand All @@ -231,13 +219,6 @@ class IC10 {
graph, std::move(left_left_left_left_ctx4),
std::move(left_left_left_left_expr3));

{
for (auto iter : left_left_left_left_ctx5) {
auto eles = iter.GetAllElement();
VLOG(10) << "After select with brithday: " << gs::to_string(eles);
}
}

auto left_left_left_left_ctx6 = Engine::Project<PROJ_TO_NEW>(
graph, std::move(left_left_left_left_ctx5),
std::tuple{gs::make_mapper_with_variable<INPUT_COL_ID(0)>(
Expand Down
29 changes: 29 additions & 0 deletions flex/tests/hqps/ic/ic9.h
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,22 @@ struct IC9expr2 {

// Auto generated query class definition
class IC9 {
private:
mutable double expand_to_msg_time = 0.0;
mutable double filter_creation_date_time = 0.0;
mutable double filter_person_id_time = 0.0;
mutable double proj_time = 0.0;
mutable double sort_time = 0.0;

public:
~IC9() {
LOG(INFO) << "IC9 expand_to_msg_time: " << expand_to_msg_time;
LOG(INFO) << "IC9 filter_creation_date_time: " << filter_creation_date_time;
LOG(INFO) << "IC9 filter_person_id_time: " << filter_person_id_time;
LOG(INFO) << "IC9 proj_time: " << proj_time;
LOG(INFO) << "IC9 sort_time: " << sort_time;
}

public:
using Engine = SyncEngine<gs::MutableCSRInterface>;
using label_id_t = typename gs::MutableCSRInterface::label_id_t;
Expand All @@ -66,6 +82,7 @@ class IC9 {
auto ctx0 = Engine::template ScanVertexWithOid<gs::AppendOpt::Persist>(
graph, 1, personId);

double t0 = -grape::GetCurrentTime();
auto edge_expand_opt1 = gs::make_edge_expandv_opt(
gs::Direction::Both, (label_id_t) 8, (label_id_t) 1);

Expand All @@ -82,7 +99,9 @@ class IC9 {
auto ctx2 =
Engine::template EdgeExpandV<gs::AppendOpt::Temp, INPUT_COL_ID(1)>(
graph, std::move(ctx1), std::move(edge_expand_opt3));
expand_to_msg_time += t0 + grape::GetCurrentTime();

double t1 = -grape::GetCurrentTime();
auto expr3 = gs::make_filter(IC9expr1(maxDate),
gs::PropertySelector<int64_t>("creationDate"));
auto get_v_opt4 =
Expand All @@ -91,23 +110,33 @@ class IC9 {
std::move(expr3));
auto ctx3 = Engine::template GetV<gs::AppendOpt::Persist, INPUT_COL_ID(-1)>(
graph, std::move(ctx2), std::move(get_v_opt4));
filter_creation_date_time += t1 + grape::GetCurrentTime();

double t2 = -grape::GetCurrentTime();
auto expr4 = gs::make_filter(
IC9expr2(), gs::PropertySelector<grape::EmptyType>("None"),
gs::PropertySelector<grape::EmptyType>("None"));
auto ctx4 = Engine::template Select<INPUT_COL_ID(1), INPUT_COL_ID(0)>(
graph, std::move(ctx3), std::move(expr4));
filter_person_id_time += t2 + grape::GetCurrentTime();

double t3 = -grape::GetCurrentTime();
auto ctx5 = Engine::Project<PROJ_TO_NEW>(
graph, std::move(ctx4),
std::tuple{gs::make_mapper_with_variable<INPUT_COL_ID(1)>(
gs::PropertySelector<grape::EmptyType>("")),
gs::make_mapper_with_variable<INPUT_COL_ID(2)>(
gs::PropertySelector<grape::EmptyType>(""))});
proj_time += t3 + grape::GetCurrentTime();

double t4 = -grape::GetCurrentTime();
auto ctx6 = Engine::Sort(
graph, std::move(ctx5), gs::Range(0, 20),
std::tuple{gs::OrderingPropPair<gs::SortOrder::DESC, 1, int64_t>(
"creationDate"),
gs::OrderingPropPair<gs::SortOrder::ASC, 1, int64_t>("id")});
sort_time += t4 + grape::GetCurrentTime();

auto ctx7 = Engine::Project<PROJ_TO_NEW>(
graph, std::move(ctx6),
std::tuple{gs::make_mapper_with_variable<INPUT_COL_ID(0)>(
Expand Down
Loading

0 comments on commit d019646

Please sign in to comment.