diff --git a/.github/workflows/interactive.yml b/.github/workflows/interactive.yml index db8266f65c84..80bf939ab513 100644 --- a/.github/workflows/interactive.yml +++ b/.github/workflows/interactive.yml @@ -168,6 +168,20 @@ jobs: sed -i 's/default_graph: modern_graph/default_graph: ldbc/g' ./engine_config_test.yaml sed -i 's/temp_workspace/interactive_workspace/g' ./engine_config_test.yaml + - name: Proxy Server test + env: + INTERACTIVE_WORKSPACE: /tmp/interactive_workspace + run: | + cd ${GITHUB_WORKSPACE}/flex/tests/hqps + bash hqps_proxy_server_test.sh ${INTERACTIVE_WORKSPACE} ./engine_config_test.yaml + + - name: Proxy Server test + env: + INTERACTIVE_WORKSPACE: /tmp/interactive_workspace + run: | + cd ${GITHUB_WORKSPACE}/flex/tests/hqps + bash hqps_proxy_server_test.sh ${INTERACTIVE_WORKSPACE} ./engine_config_test.yaml + - name: Sample Query test env: GS_TEST_DIR: ${{ github.workspace }}/gstest diff --git a/.gitignore b/.gitignore index dc6bce24ed8e..bcb3e75c77df 100644 --- a/.gitignore +++ b/.gitignore @@ -146,6 +146,8 @@ flex/interactive/sdk/python/gs_interactive/configuration.py flex/interactive/sdk/python/gs_interactive/exceptions.py flex/interactive/sdk/python/gs_interactive/rest.py !flex/interactive/sdk/python/gs_interactive/client/generated/__init__.py +!flex/interactive/sdk/python/gs_interactive/models/long_text.py **/.cache/ +*.so diff --git a/charts/graphscope-interactive.yaml b/charts/graphscope-interactive.yaml new file mode 100644 index 000000000000..1aa1bfb55d7e --- /dev/null +++ b/charts/graphscope-interactive.yaml @@ -0,0 +1,443 @@ +--- +# Source: graphscope-interactive/templates/configmap.yaml +apiVersion: v1 +kind: ConfigMap +metadata: + name: release-name-graphscope-interactive-config + namespace: kubetask + labels: + helm.sh/chart: graphscope-interactive-0.0.2 + app.kubernetes.io/name: graphscope-interactive + app.kubernetes.io/instance: release-name + app.kubernetes.io/managed-by: Helm + app.kubernetes.io/component: configmap +data: + engine_config.yaml: |- + directories: + workspace: /tmp/interactive_workspace + subdirs: + data: data + logs: logs + conf: conf + log_level: INFO + default_graph: modern_graph + compute_engine: + type: hiactor + workers: + - localhost:10000 + thread_num_per_worker: 1 + compiler: + planner: + is_on: true + opt: RBO + rules: + - FilterIntoJoinRule + - FilterMatchRule + - NotMatchToAntiJoinRule + endpoint: + default_listen_address: localhost + bolt_connector: + disabled: false + port: 7687 + gremlin_connector: + disabled: true + port: 8182 + query_timeout: 30000 + http_service: + default_listen_address: localhost + admin_port: 7777 + query_port: 10000 +--- +# Source: graphscope-interactive/templates/primary/svc.yaml +apiVersion: v1 +kind: Service +metadata: + name: release-name-graphscope-interactive-primary + namespace: kubetask + labels: + helm.sh/chart: graphscope-interactive-0.0.2 + app.kubernetes.io/name: graphscope-interactive + app.kubernetes.io/instance: release-name + app.kubernetes.io/managed-by: Helm + app.kubernetes.io/component: primary + annotations: +spec: + type: NodePort + ports: + - name: admin-port + port: 7777 + protocol: TCP + targetPort: 7777 + - name: query-port + port: 10000 + protocol: TCP + targetPort: 10000 + selector: + app.kubernetes.io/name: graphscope-interactive + app.kubernetes.io/instance: release-name + app.kubernetes.io/component: primary +--- +# Source: graphscope-interactive/templates/secondary/svc.yaml +apiVersion: v1 +kind: Service +metadata: + name: release-name-graphscope-interactive-secondary + namespace: kubetask + labels: + helm.sh/chart: graphscope-interactive-0.0.2 + app.kubernetes.io/name: graphscope-interactive + app.kubernetes.io/instance: release-name + app.kubernetes.io/managed-by: Helm + app.kubernetes.io/component: secondary + annotations: +spec: + type: LoadBalancer + ports: + - name: admin-port + port: 7777 + protocol: TCP + targetPort: 7777 + - name: query-port + port: 10000 + protocol: TCP + targetPort: 10000 + selector: + app.kubernetes.io/name: graphscope-interactive + app.kubernetes.io/instance: release-name + app.kubernetes.io/component: secondary +--- +# Source: graphscope-interactive/templates/primary/statefulset.yaml +apiVersion: apps/v1 +kind: StatefulSet +metadata: + name: release-name-graphscope-interactive-primary + namespace: kubetask + labels: + helm.sh/chart: graphscope-interactive-0.0.2 + app.kubernetes.io/name: graphscope-interactive + app.kubernetes.io/instance: release-name + app.kubernetes.io/managed-by: Helm + app.kubernetes.io/component: primary +spec: + replicas: 1 + selector: + matchLabels: + app.kubernetes.io/name: graphscope-interactive + app.kubernetes.io/instance: release-name + app.kubernetes.io/component: primary + serviceName: release-name-graphscope-interactive-primary + updateStrategy: + type: RollingUpdate + template: + metadata: + annotations: + labels: + helm.sh/chart: graphscope-interactive-0.0.2 + app.kubernetes.io/name: graphscope-interactive + app.kubernetes.io/instance: release-name + app.kubernetes.io/managed-by: Helm + app.kubernetes.io/component: primary + spec: + hostNetwork: + hostIPC: false + serviceAccountName: default + initContainers: + containers: + - name: proxy-admin + image: registry.cn-hongkong.aliyuncs.com/graphscope/interactive:debug + imagePullPolicy: "Always" + # command: ["sleep", "infinity"] + command: + - /bin/bash + - -c + - | + POD_NAME=$MY_POD_NAME + if [ -z "$POD_NAME" ]; then + POD_NAME=$(hostname) + fi + echo "POD_NAME: $POD_NAME" + secondary_pod_dns_names="" + # cnt=1 + # for i from 0 to $SECONDARY_REPLICAS + for ((i=0; i> /home/graphscope/daily_graph_import.log +echo "--------------------------------" >> /home/graphscope/daily_graph_import.log +eval $cmd >> /home/graphscope/daily_graph_import.log 2>&1 diff --git a/charts/graphscope-interactive/script/get_ds.py b/charts/graphscope-interactive/script/get_ds.py new file mode 100644 index 000000000000..dd8dea8c21d1 --- /dev/null +++ b/charts/graphscope-interactive/script/get_ds.py @@ -0,0 +1,23 @@ + +query=""" +SELECT ds + FROM onecomp_risk.ads_fin_rsk_fe_ent_rel_data_version + WHERE ds = MAX_PT("onecomp_risk.ads_fin_rsk_fe_ent_rel_data_version"); +""" +import os +from odps import ODPS +from odps.accounts import StsAccount +# Make sure environment variable ALIBABA_CLOUD_ACCESS_KEY_ID already set to acquired Access Key ID, +# environment variable ALIBABA_CLOUD_ACCESS_KEY_SECRET set to acquired Access Key Secret +# while environment variable ALIBABA_CLOUD_STS_TOKEN set to acquired STS token. +# Not recommended to hardcode Access Key ID or Access Key Secret in your code. +o = ODPS( + os.getenv('ALIBABA_CLOUD_ACCESS_KEY_ID'), + os.getenv('ALIBABA_CLOUD_ACCESS_KEY_SECRET'), + project='onecomp', + endpoint='http://service-corp.odps.aliyun-inc.com/api', +) + +with o.execute_sql(query).open_reader() as reader: + pd_df = reader.to_pandas() + print(pd_df['ds'][0]) diff --git a/charts/graphscope-interactive/script/get_service_status.py b/charts/graphscope-interactive/script/get_service_status.py new file mode 100644 index 000000000000..bf74891c66e1 --- /dev/null +++ b/charts/graphscope-interactive/script/get_service_status.py @@ -0,0 +1,91 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +# +# Copyright 2020 Alibaba Group Holding Limited. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + + +import sys + +sys.path.append("../../../flex/interactive/sdk/python/") +import time + +from gs_interactive.models.edge_mapping_destination_vertex_mappings_inner import ( + EdgeMappingDestinationVertexMappingsInner, +) + +import gs_interactive +from gs_interactive.models.column_mapping import ColumnMapping +from gs_interactive.models.edge_mapping_source_vertex_mappings_inner import ( + EdgeMappingSourceVertexMappingsInner, +) +from gs_interactive.models.edge_mapping_source_vertex_mappings_inner_column import ( + EdgeMappingSourceVertexMappingsInnerColumn, +) +from gs_interactive.client.driver import Driver +from gs_interactive.client.session import Session +from gs_interactive.models import * + + +def restart_service(sess: Session, graph_id: str): + resp = sess.start_service( + start_service_request=StartServiceRequest(graph_id=graph_id) + ) + assert resp.is_ok() + print("restart service successfully") + + +def get_service_status(sess: Session): + resp = sess.get_service_status() + assert resp.is_ok() + print("service status: ", resp.get_value()) + status = resp.get_value() + print("service running is now running on graph", status.graph.id) + + +def get_current_running_graph(sess: Session): + resp = sess.get_service_status() + assert resp.is_ok() + status = resp.get_value() + return status.graph.id + + +def list_graph(sess: Session): + resp = sess.list_graphs() + assert resp.is_ok() + res = resp.get_value() + graph_id_arr = [graph.id for graph in res] + print("list graph: ", graph_id_arr) + + +if __name__ == "__main__": + # parse command line args + import argparse + + parser = argparse.ArgumentParser() + parser.add_argument("--endpoint", type=str, default="http://localhost:7777") + + # finish + args = parser.parse_args() + print(args) + print("connecting to ", args.endpoint) + driver = Driver(args.endpoint) + sess = driver.session() + # get current running graph + + get_service_status(sess) + print("-----------------Finish getting service status-----------------") + + list_graph(sess) diff --git a/charts/graphscope-interactive/script/procedure.cc b/charts/graphscope-interactive/script/procedure.cc new file mode 100644 index 000000000000..36398842a08b --- /dev/null +++ b/charts/graphscope-interactive/script/procedure.cc @@ -0,0 +1,766 @@ +// #include "flex/engines/graph_db/app/app_base.h" +#include "flex/engines/graph_db/database/graph_db_session.h" +#include "flex/engines/hqps_db/app/interactive_app_base.h" +#include "grape/util.h" +#include "nlohmann/json.hpp" + +namespace gs { + +// TODO: make sure the max vid is less than 2^30 +inline vid_t encode_vid(label_t v_label, vid_t vid) { + // vid_t is uint32_t, use the first 4 bits to store label id + return ((vid_t) v_label << 31) | vid; +} + +inline label_t decode_label(vid_t encoded_vid) { return encoded_vid >> 31; } + +inline vid_t decode_vid(vid_t encoded_vid) { return encoded_vid & 0x7FFFFFFF; } + +inline int64_t get_oid_from_encoded_vid(ReadTransaction& txn, + vid_t encoded_vid) { + auto label = decode_label(encoded_vid); + auto vid = decode_vid(encoded_vid); + CHECK(encode_vid(label, vid) == encoded_vid) + << "vid: " << encoded_vid << ", label " << (int32_t) label + << ", local id " << vid; + return txn.GetVertexId(label, vid).AsInt64(); +} + +inline std::string status_to_str(int64_t status_code) { + if (status_code == 1) { + return "在营"; + } else if (status_code == 2) { + return "注销"; + } else if (status_code == 3) { + return "停业"; + } else if (status_code == 4) { + return "迁入"; + } else if (status_code == 5) { + return "清算"; + } else if (status_code == 6) { + return "吊销"; + } else if (status_code == 7) { + return "未知"; + } else if (status_code == 8) { + return "迁出"; + } else { + return std::to_string(status_code); + } +} + +inline std::string rel_type_to_string(int64_t rel_type) { + if (rel_type == 0) { + return "invest"; + } else if (rel_type == 1) { + return "shareholder"; + } else if (rel_type == 2) { + return "shareholder_his"; + } else if (rel_type == 3) { + return "legalperson"; + } else if (rel_type == 4) { + return "legalperson_his"; + } else if (rel_type == 5) { + return "executive"; + } else if (rel_type == 6) { + return "executive_his"; + } else { + LOG(ERROR) << "Unknown rel type: " << rel_type; + return "unknown"; + } +} + +struct Path { + std::vector vids; + std::vector rel_types; + std::vector weights; + std::vector rel_infos; + std::vector directions; + // The size of rel_types is one less than the size of vids. +}; + +// Contains all path to the end node. +struct Results { + uint32_t start_node_id; + std::unordered_map> path_to_end_node; + + void clear() { path_to_end_node.clear(); } +}; + +enum class AddResultRet { + Success = 0, + Fail = 1, + Full = 2, +}; + +struct ResultsCreator { + ResultsCreator( + label_t comp_label_id, label_t person_label_id, + std::shared_ptr> typed_comp_named_col, + std::shared_ptr> typed_comp_status_col, + std::shared_ptr> typed_comp_credit_code_col, + std::shared_ptr> + typed_comp_license_number_col, + std::shared_ptr> typed_person_named_col) + : comp_label_id_(comp_label_id), + person_label_id_(person_label_id), + typed_comp_named_col_(typed_comp_named_col), + typed_comp_status_col_(typed_comp_status_col), + typed_comp_credit_code_col_(typed_comp_credit_code_col), + typed_comp_license_number_col_(typed_comp_license_number_col), + typed_person_named_col_(typed_person_named_col) {} + + void set_start_vid(uint32_t start_vid) { results_.start_node_id = start_vid; } + + inline std::string get_vertex_label_str_from_encoded_vid( + vid_t encoded_vid) const { + auto label = decode_label(encoded_vid); + if (label == comp_label_id_) { + return "company"; + } else if (label == person_label_id_) { + return "oc_person"; + } else { + return "unknown"; + } + } + + inline nlohmann::json get_vertex_properties_from_encoded_vid( + ReadTransaction& txn, vid_t encoded_vid) const { + auto label = decode_label(encoded_vid); + auto vid = decode_vid(encoded_vid); + auto oid = get_oid_from_encoded_vid(txn, encoded_vid); + nlohmann::json properties; + if (label == comp_label_id_) { + properties["label"] = "company"; + properties["status"] = + status_to_str(typed_comp_status_col_->get_view(vid)); + properties["credit_code"] = typed_comp_credit_code_col_->get_view(vid); + properties["license_number"] = + typed_comp_license_number_col_->get_view(vid); + } else if (label == person_label_id_) { + properties["label"] = "oc_person"; + auto person_name = typed_person_named_col_->get_view(vid); + properties["status"] = ""; + properties["credit_code"] = ""; + properties["license_number"] = ""; + } else { + throw std::runtime_error("Invalid label"); + } + return properties; + } + + inline std::string_view get_vertex_name_from_encoded_vid( + vid_t encoded_vid) const { + auto label = decode_label(encoded_vid); + auto vid = decode_vid(encoded_vid); + if (label == comp_label_id_) { + auto comp_name = typed_comp_named_col_->get_view(vid); + return comp_name; + } else if (label == person_label_id_) { + auto person_name = typed_person_named_col_->get_view(vid); + return person_name; + } else { + throw std::runtime_error("Invalid label"); + } + } + + // TODO: support multiple properties on edge. + AddResultRet add_result(int32_t result_limit, + const std::vector& cur_path, + const std::vector& weights, + const std::vector& rel_types, + const std::vector& rel_infos, + const std::vector& directions) { + if (cur_path.size() < 2) { + LOG(ERROR) << "Path size is less than 2"; + return AddResultRet::Fail; + } + if (rel_types.size() + 1 != cur_path.size()) { + LOG(ERROR) << "miss match between rel_types and cur_path size:" + << rel_types.size() << " " << cur_path.size(); + return AddResultRet::Fail; + } + if (directions.size() + 1 != cur_path.size()) { + LOG(ERROR) << "miss match between directions and cur_path size:" + << directions.size() << " " << cur_path.size(); + return AddResultRet::Fail; + } + auto start_node_id = cur_path[0]; + auto end_node_id = cur_path.back(); + auto iter = results_.path_to_end_node.find(end_node_id); + if (iter == results_.path_to_end_node.end()) { + results_.path_to_end_node[end_node_id] = std::vector(); + } + Path path; + path.vids = cur_path; + path.weights = weights; + path.rel_types = rel_types; + path.rel_infos = rel_infos; + path.directions = directions; + // LOG(INFO) << "emplace path: " << gs::to_string(path.vids) << ", " + // << gs::to_string(path.weights) << ", " + // << gs::to_string(path.rel_types) << ", " + // << gs::to_string(path.rel_infos) << ", " + // << gs::to_string(path.directions); + if (results_.path_to_end_node[end_node_id].size() >= result_limit) { + return AddResultRet::Full; + } + results_.path_to_end_node[end_node_id].push_back(path); + return AddResultRet::Success; + } + + inline std::string build_edge_id(int64_t encoded_start_id, + int64_t end_vid) const { + return std::to_string(encoded_start_id) + "->" + std::to_string(end_vid); + } + + inline nlohmann::json get_edge_properties( + double weight, int64_t rel_type, const std::string_view& rel_info) const { + nlohmann::json properties; + properties["label"] = rel_type_to_string(rel_type); + if (weight > 1) { + properties["weight"] = ""; + } else { + properties["weight"] = weight; + } + properties["rel_info"] = rel_info; + return properties; + } + + std::string get_result_as_json_string(ReadTransaction& txn) const { + nlohmann::json json = nlohmann::json::array(); + auto start_node_name = + get_vertex_name_from_encoded_vid(results_.start_node_id); + for (const auto& [end_node_id, paths_vec] : results_.path_to_end_node) { + auto end_node_name = get_vertex_name_from_encoded_vid(end_node_id); + nlohmann::json end_node_json; + end_node_json["endNodeName"] = end_node_name; + end_node_json["startNodeName"] = start_node_name; + int64_t prev_oid; + std::string prev_name; + nlohmann::json paths = nlohmann::json::array(); + LOG(INFO) << "paths vec size:" << paths_vec.size(); + for (const auto& path : paths_vec) { + nlohmann::json path_json = nlohmann::json::object(); + path_json["relationShips"] = nlohmann::json::array(); + path_json["nodes"] = nlohmann::json::array(); + for (size_t i = 0; i < path.vids.size(); i++) { + nlohmann::json node_json; + prev_oid = node_json["id"] = + get_oid_from_encoded_vid(txn, path.vids[i]); + prev_name = node_json["name"] = + get_vertex_name_from_encoded_vid(path.vids[i]); + + node_json["label"] = + get_vertex_label_str_from_encoded_vid(path.vids[i]); + node_json["properties"] = + get_vertex_properties_from_encoded_vid(txn, path.vids[i]); + path_json["nodes"].push_back(node_json); + + if (i < path.rel_types.size()) { + nlohmann::json rel_json; + rel_json["type"] = rel_type_to_string(path.rel_types[i]); + rel_json["name"] = rel_json["type"]; + auto& dir = path.directions[i]; + if (dir == Direction::Out) { + rel_json["startNode"] = prev_name; + rel_json["id"] = build_edge_id( + prev_oid, get_oid_from_encoded_vid(txn, path.vids[i + 1])); + rel_json["endNode"] = + get_vertex_name_from_encoded_vid(path.vids[i + 1]); + rel_json["properties"] = get_edge_properties( + path.weights[i], path.rel_types[i], path.rel_infos[i]); + } else { + rel_json["startNode"] = + get_vertex_name_from_encoded_vid(path.vids[i + 1]); + rel_json["id"] = build_edge_id( + get_oid_from_encoded_vid(txn, path.vids[i + 1]), prev_oid); + rel_json["endNode"] = prev_name; + rel_json["properties"] = get_edge_properties( + path.weights[i], path.rel_types[i], path.rel_infos[i]); + } + path_json["relationShips"].push_back(rel_json); + } + } + // json["paths"].push_back(path_json); + paths.push_back(path_json); + // VLOG(10) << "path_json: " << path_json.dump(); + } + end_node_json["paths"] = paths; + json.push_back(end_node_json); + } + return json.dump(); + } + + void clear() { results_.clear(); } + + label_t comp_label_id_; + label_t person_label_id_; + std::shared_ptr> typed_comp_named_col_; + std::shared_ptr> typed_comp_status_col_; + std::shared_ptr> typed_comp_credit_code_col_; + std::shared_ptr> typed_comp_license_number_col_; + std::shared_ptr> typed_person_named_col_; + + Results results_; // The results of the query. +}; + +/** + *@brief Return the investigation path from the given company to the target. + The input is 1 start company/person and a list of target companies. + + The rel_label(or rel_type) has the following mapping relation + person-[]->company: + 1: shareholder; + 2: shareholder_his; + 3: legalperson; + 4: legalperson_his; + 5: executive; + 6: executive_his + company-[]->company: + 0: invest + */ + +class HuoYan : public WriteAppBase { + public: + static constexpr double timeout_sec = 90; + static constexpr int32_t REL_TYPE_MAX = 8; // 1 ~ 7 + HuoYan() : is_initialized_(false) {} + ~HuoYan() {} + bool is_simple(const std::vector& path) { + // to check whether there are same vid in the path + vis_.clear(); + for (auto& vid : path) { + if (vis_.find(vid) != vis_.end()) { + return false; + } + vis_.insert(vid); + } + return true; + } + + bool edge_expand(gs::ReadTransaction& txn, const std::vector& vid_vec, + label_t dst_label_id, const AdjListView& edges, + const std::vector& valid_rel_type_ids, int32_t cur_ind, + std::vector>& cur_paths, + std::vector>& cur_weights, + std::vector>& cur_rel_types, + std::vector>& cur_rel_infos, + std::vector>& cur_directions, + std::vector>& next_paths, + std::vector>& next_weights, + std::vector>& next_rel_types, + std::vector>& next_rel_infos, + std::vector>& next_directions, + size_t& result_size, int single_result_limit, + int32_t result_limit, Encoder& output, double& cur_time_left, + Direction direction) { + auto& cur_path = cur_paths[cur_ind]; + auto& cur_rel_type = cur_rel_types[cur_ind]; + auto& cur_weight = cur_weights[cur_ind]; + auto& cur_rel_info = cur_rel_infos[cur_ind]; + auto& cur_direction = cur_directions[cur_ind]; + double t0 = -grape::GetCurrentTime(); + // The direction is same for all edges. + cur_direction.emplace_back(direction); + for (auto& edge : edges) { + auto dst = edge.get_neighbor(); + auto encoded_vid = encode_vid(dst_label_id, dst); + auto data = edge.get_data(); + auto edge_rel_type = data[1].AsInt64(); + if (!valid_rel_type_ids[edge_rel_type]) { + // filter edges by rel type + continue; + } + cur_path.emplace_back(encoded_vid); + CHECK(data.size() == 3) << "Expect 3 but got: " << data.size(); + // VLOG(10) << data[0].AsDouble() << "," << edge_rel_type << "," + // << data[2].AsStringView(); + cur_weight.emplace_back(data[0].AsDouble()); + cur_rel_type.emplace_back(edge_rel_type); + cur_rel_info.emplace_back(data[2].AsStringView()); + + // LOG(INFO) << "path is ? simple: " <add_result(single_result_limit, cur_path, + cur_weight, cur_rel_type, + cur_rel_info, cur_direction); + + if (res == AddResultRet::Fail) { + LOG(ERROR) << "Failed to add result"; + return false; + } + if (res == AddResultRet::Full) { + // then set dst to invalid. + valid_comp_vids_[dst] = false; + continue; + } + // for (auto k = 0; k < cur_rel_type.size(); ++k) { + // output.put_long(get_oid_from_encoded_vid(txn, cur_path[k])); + // output.put_long(get_oid_from_encoded_vid(txn, cur_path[k + 1])); + // VLOG(10) << "put src id " + // << get_oid_from_encoded_vid(txn, cur_path[k]) + // << ", dst id " + // << get_oid_from_encoded_vid(txn, cur_path[k + 1]); + // output.put_string_view( + // get_vertex_name_from_encoded_vid(cur_path[k])); + // output.put_string_view( + // get_vertex_name_from_encoded_vid(cur_path[k + 1])); + // VLOG(10) << "put name: " + // << get_vertex_name_from_encoded_vid(cur_path[k]) + // << ", dst name " + // << get_vertex_name_from_encoded_vid(cur_path[k + 1]); + // output.put_int(cur_rel_type[k]); + // } + + if (result_size >= result_limit) { + // output.put_int_at(begin_loc, result_size); + LOG(INFO) << "result limit exced: " << result_size; + output.put_string(results_creator_->get_result_as_json_string(txn)); + txn.Commit(); + for (auto& vid : vid_vec) { + valid_comp_vids_[vid] = false; + results_creator_->clear(); + } + return true; + } + } + } + cur_path.pop_back(); + cur_weight.pop_back(); + cur_rel_type.pop_back(); + cur_rel_info.pop_back(); + } + cur_direction.pop_back(); + + t0 += grape::GetCurrentTime(); + cur_time_left -= t0; + if (cur_time_left < 0) { + LOG(INFO) << "Timeout, result size: " << result_size; + output.put_string(results_creator_->get_result_as_json_string(txn)); + txn.Commit(); + for (auto& vid : vid_vec) { + valid_comp_vids_[vid] = false; + } + return true; + } + return false; + } + + bool initialize(GraphDBSession& graph) { + LOG(INFO) << "initializing..."; + comp_label_id_ = graph.schema().get_vertex_label_id("company"); + person_label_id_ = graph.schema().get_vertex_label_id("person"); + invest_label_id_ = graph.schema().get_edge_label_id("invest"); + person_invest_label_id_ = graph.schema().get_edge_label_id("personInvest"); + size_t num = graph.graph().vertex_num(comp_label_id_); + valid_comp_vids_.resize(num, false); + LOG(INFO) << "company num:" << num; + LOG(INFO) << "person num: " << graph.graph().vertex_num(person_label_id_); + auto comp_name_col = + graph.get_vertex_property_column(comp_label_id_, "vertex_name"); + auto comp_status_col = + graph.get_vertex_property_column(comp_label_id_, "status"); + auto comp_credit_code_col = + graph.get_vertex_property_column(comp_label_id_, "credit_code"); + auto comp_license_number_col = + graph.get_vertex_property_column(comp_label_id_, "license_number"); + auto person_name_col = + graph.get_vertex_property_column(person_label_id_, "vertex_name"); + if (!comp_name_col) { + LOG(ERROR) << "column vertex_name not found for company"; + return false; + } + if (!person_name_col) { + LOG(ERROR) << "column vertex_name not found for person"; + return false; + } + if (!comp_status_col) { + LOG(ERROR) << "column status not found for company"; + return false; + } + if (!comp_credit_code_col) { + LOG(ERROR) << "column credit_code not found for company"; + return false; + } + if (!comp_license_number_col) { + LOG(ERROR) << "column license_number not found for company"; + return false; + } + typed_comp_named_col_ = + std::dynamic_pointer_cast>(comp_name_col); + typed_comp_status_col_ = + std::dynamic_pointer_cast>(comp_status_col); + typed_comp_credit_code_col_ = + std::dynamic_pointer_cast>( + comp_credit_code_col); + typed_comp_license_number_col_ = + std::dynamic_pointer_cast>( + comp_license_number_col); + typed_person_named_col_ = + std::dynamic_pointer_cast>( + person_name_col); + if (!typed_comp_named_col_) { + LOG(ERROR) << "column vertex_name is not string type for company"; + } + if (!typed_person_named_col_) { + LOG(ERROR) << "column vertex_name is not string type for person"; + } + results_creator_ = std::make_shared( + comp_label_id_, person_label_id_, typed_comp_named_col_, + typed_comp_status_col_, typed_comp_credit_code_col_, + typed_comp_license_number_col_, typed_person_named_col_); + is_initialized_.store(true); + return true; + } + +#define DEBUG + bool Query(GraphDBSession& graph, Decoder& input, Encoder& output) { + //////////Initialization//////////////////////////// + if (!is_initialized_.load()) { + if (!initialize(graph)) { + LOG(ERROR) << "Failed to initialize"; + return false; + } else { + LOG(INFO) << "Successfully initialized"; + } + } else { + LOG(INFO) << "Already initialized"; + } + ////////////Initialization/////////////////////////// + results_creator_->clear(); + double cur_time_left = timeout_sec; + + auto txn = graph.GetReadTransaction(); + int32_t hop_limit = input.get_int(); + int32_t single_result_limit = input.get_int(); + // single_result_limit is the max number of paths for 1v1 relations. + // the max number of all paths is result_limit * vid_vec.size() + int32_t rel_type_num = input.get_int(); + LOG(INFO) << "result limit: " << single_result_limit + << " rel type num: " << rel_type_num; + // valid rel type ids + std::vector valid_rel_type_ids(REL_TYPE_MAX, false); + for (int i = 0; i < rel_type_num; ++i) { + auto rel_type = input.get_int(); + if (rel_type < 0 || rel_type >= REL_TYPE_MAX) { + LOG(ERROR) << "Invalid rel type id: " << rel_type; + return false; + } + valid_rel_type_ids[rel_type] = true; + } + // Get the start node id. + auto start_oid = input.get_long(); + LOG(INFO) << "Got start oid: " << start_oid; + vid_t start_vid; + if (!txn.GetVertexIndex(comp_label_id_, Any::From(start_oid), start_vid)) { + LOG(WARNING) << "Start oid: " << start_oid << ", not found"; + output.put_string("{}"); + txn.Commit(); + return true; + } + results_creator_->set_start_vid(encode_vid(comp_label_id_, start_vid)); + LOG(INFO) << "start vid: " << start_vid; + + int32_t vec_size = input.get_int(); + LOG(INFO) << "Group Query: hop limit " << hop_limit << ", result limit " + << single_result_limit << ", ids size " << vec_size; + std::vector vid_vec; + int count = 0; + + for (int i = 0; i < vec_size; ++i) { + auto oid = input.get_long(); + // std::string_view oid = input.get_string(); + vid_t vid; + if (!txn.GetVertexIndex(comp_label_id_, Any::From(oid), vid)) { + LOG(INFO) << "Get oid: " << oid << ", not found"; + count++; + } else { + VLOG(10) << "Oid: " << oid << ", vid:" << vid; + auto encoded_vid = encode_vid(comp_label_id_, vid); +#ifdef DEBUG + auto label = decode_label(encoded_vid); + auto vid2 = decode_vid(encoded_vid); + CHECK_EQ(label, comp_label_id_); + CHECK_EQ(vid, vid2); +#endif + vid_vec.emplace_back(vid); + } + } + int32_t result_limit = single_result_limit * vec_size; + LOG(INFO) << count << " out of " << vec_size << " vertices not found"; + for (auto& vid : vid_vec) { + valid_comp_vids_[vid] = true; + } + + auto cmp_invest_outgoing_view = txn.GetOutgoingGraphView( + comp_label_id_, comp_label_id_, invest_label_id_); + auto cmp_invest_incoming_view = txn.GetIncomingGraphView( + comp_label_id_, comp_label_id_, invest_label_id_); + + auto person_invest_outgoing_view = txn.GetOutgoingGraphView( + person_label_id_, comp_label_id_, person_invest_label_id_); + auto person_invest_incoming_view = txn.GetIncomingGraphView( + comp_label_id_, person_label_id_, person_invest_label_id_); + + // Expand from vid_vec, until end_vertex is valid, or hop limit is reached. + std::vector> cur_paths; + std::vector> cur_weights; + std::vector> cur_rel_types; + std::vector> cur_rel_infos; + std::vector> cur_directions; + + std::vector> next_paths; + std::vector> next_weights; + std::vector> next_rel_types; + std::vector> next_rel_infos; + std::vector> next_directions; + // init cur_paths + cur_paths.emplace_back( + std::vector{encode_vid(comp_label_id_, start_vid)}); + cur_rel_types.emplace_back(std::vector{}); + cur_weights.emplace_back(std::vector{}); + cur_rel_infos.emplace_back(std::vector{}); + cur_directions.emplace_back(std::vector{}); + // size_t begin_loc = output.skip_int(); + size_t result_size = 0; + for (auto i = 1; i <= hop_limit; ++i) { + VLOG(10) << "hop: " << i; + // possible edges: + // company->company + // person->company + // company->person + + for (auto j = 0; j < cur_paths.size(); ++j) { + // VLOG(10) << "path: " << gs::to_string(cur_paths[j]); + auto last_vid_encoded = cur_paths[j].back(); + auto last_vid = decode_vid(last_vid_encoded); + auto label = decode_label(last_vid_encoded); + if (label == comp_label_id_) { + const auto& oedges = cmp_invest_outgoing_view.get_edges(last_vid); + if (edge_expand(txn, vid_vec, comp_label_id_, oedges, + valid_rel_type_ids, j, cur_paths, cur_weights, + cur_rel_types, cur_rel_infos, cur_directions, + next_paths, next_weights, next_rel_types, + next_rel_infos, next_directions, result_size, + single_result_limit, result_limit, output, + cur_time_left, Direction::Out)) { + return true; // early terminate. + } + + const auto& iedges = cmp_invest_incoming_view.get_edges(last_vid); + if (edge_expand(txn, vid_vec, comp_label_id_, iedges, + valid_rel_type_ids, j, cur_paths, cur_weights, + cur_rel_types, cur_rel_infos, cur_directions, + next_paths, next_weights, next_rel_types, + next_rel_infos, next_directions, result_size, + single_result_limit, result_limit, output, + cur_time_left, Direction::In)) { + return true; // early terminate. + } + + const auto& oedges2 = person_invest_incoming_view.get_edges(last_vid); + if (edge_expand(txn, vid_vec, person_label_id_, oedges2, + valid_rel_type_ids, j, cur_paths, cur_weights, + cur_rel_types, cur_rel_infos, cur_directions, + next_paths, next_weights, next_rel_types, + next_rel_infos, next_directions, result_size, + single_result_limit, result_limit, output, + cur_time_left, Direction::In)) { + return true; // early terminate. + } + } else if (label == person_label_id_) { + const auto& oedges = person_invest_outgoing_view.get_edges(last_vid); + double t0 = -grape::GetCurrentTime(); + if (edge_expand(txn, vid_vec, comp_label_id_, oedges, + valid_rel_type_ids, j, cur_paths, cur_weights, + cur_rel_types, cur_rel_infos, cur_directions, + next_paths, next_weights, next_rel_types, + next_rel_infos, next_directions, result_size, + single_result_limit, result_limit, output, + cur_time_left, Direction::Out)) { + return true; // early terminate. + } + } else { + LOG(ERROR) << "Invalid label: " << label; + return false; + } + } + // LOG(INFO) << "Hop: " << i << ", result: " << final_results.size() + // << ", cur_paths: " << cur_paths.size() + // << ", next_paths: " << next_paths.size(); + cur_paths.swap(next_paths); + cur_rel_types.swap(next_rel_types); + cur_weights.swap(next_weights); + cur_rel_infos.swap(next_rel_infos); + cur_directions.swap(next_directions); + next_paths.clear(); + next_rel_types.clear(); + next_weights.clear(); + next_rel_infos.clear(); + next_directions.clear(); + } + + output.put_string(results_creator_->get_result_as_json_string(txn)); + txn.Commit(); + for (auto& vid : vid_vec) { + valid_comp_vids_[vid] = false; + results_creator_->clear(); + } + LOG(INFO) << "result size: " << result_size; + + return true; + } + + private: + std::atomic is_initialized_; + label_t comp_label_id_; + label_t person_label_id_; + label_t invest_label_id_; + label_t person_invest_label_id_; + // std::vector comp_vis_; + // std::vector person_vis_; + std::unordered_set vis_; + std::vector valid_comp_vids_; + + std::shared_ptr> typed_comp_named_col_; + std::shared_ptr> typed_comp_status_col_; + std::shared_ptr> typed_comp_credit_code_col_; + std::shared_ptr> typed_comp_license_number_col_; + std::shared_ptr> typed_person_named_col_; + + std::shared_ptr results_creator_; +}; + +#undef DEBUG + +} // namespace gs + +extern "C" { +void* CreateApp(gs::GraphDBSession& db) { + gs::HuoYan* app = new gs::HuoYan(); + return static_cast(app); +} + +void DeleteApp(void* app) { + gs::HuoYan* casted = static_cast(app); + delete casted; +} +} diff --git a/charts/graphscope-interactive/script/qidian.cc b/charts/graphscope-interactive/script/qidian.cc new file mode 100644 index 000000000000..091f2ebf53f4 --- /dev/null +++ b/charts/graphscope-interactive/script/qidian.cc @@ -0,0 +1,480 @@ +#include "flex/engines/graph_db/app/app_base.h" +#include "flex/engines/graph_db/database/graph_db_session.h" +#include "grape/util.h" + +#define RAPIDJSON_HAS_STDSTRING 1 + +#include +#include +#include +#include "rapidjson/rapidjson.h" +#include "rapidjson/stringbuffer.h" +#include "rapidjson/writer.h" + +namespace gs { + +struct Path { + std::vector vids; + std::vector rel_types; + std::vector weights; + std::vector directions; + // The size of rel_types is one less than the size of vids. +}; + +struct CustomHash { + size_t operator()(const std::pair& key) const { + return std::hash()(key.first) ^ std::hash()(key.second); + } +}; + +struct Results { + std::unordered_map, std::vector, + CustomHash> + path_to_end_node; + + void clear() { path_to_end_node.clear(); } +}; + +std::string rel_type_to_string(int64_t rel_type_id) { + if (rel_type_id == 0) { + return "invest"; + } else if (rel_type_id == 1) { + return "shareholder"; + } else if (rel_type_id == 2) { + return "shareholder_his"; + } else if (rel_type_id == 3) { + return "legalperson"; + } else if (rel_type_id == 4) { + return "legalperson_his"; + } else if (rel_type_id == 5) { + return "executive"; + } else if (rel_type_id == 6) { + return "executive_his"; + } else { + return "unknown"; + } +} + +class ResultCreator { + public: + static constexpr int32_t VERTEX_LABEL_ID = 0; + ResultCreator(const ReadTransaction& txn) + : txn_(txn), page_id_(0), page_size_(0) {} + + void Init( + int32_t page_id, int32_t page_size, + std::shared_ptr> typed_comp_named_col) { + page_id_ = page_id; + page_size_ = page_size; + typed_comp_named_col_ = typed_comp_named_col; + } + + bool AddPath(const std::vector& cur_path, + const std::vector& cur_rel_type, + const std::vector& rel_weight, + const std::vector& directions) { + if (cur_path.size() < 2) { + LOG(ERROR) << "Path size is less than 2"; + return false; + } + if (cur_rel_type.size() + 1 != cur_path.size()) { + LOG(ERROR) << "miss match between cur_rel_type and cur_path size:" + << cur_rel_type.size() << " " << cur_path.size(); + return false; + } + if (directions.size() + 1 != cur_path.size()) { + LOG(ERROR) << "miss match between directions and cur_path size:" + << directions.size() << " " << cur_path.size(); + return false; + } + auto start_node_id = cur_path[0]; + auto end_node_id = cur_path.back(); + auto key = std::make_pair(start_node_id, end_node_id); + auto iter = results_.path_to_end_node.find(key); + if (iter == results_.path_to_end_node.end()) { + results_.path_to_end_node[key] = std::vector(); + } + Path path; + path.vids = cur_path; + path.weights = rel_weight; + path.rel_types = cur_rel_type; + path.directions = directions; + results_.path_to_end_node[key].push_back(path); + return true; + } + + std::string Dump() const { + rapidjson::Document document_(rapidjson::kObjectType); + document_.AddMember("currentPage", page_id_, document_.GetAllocator()); + document_.AddMember("pageSize", page_size_, document_.GetAllocator()); + // + // rapidjson::Document paths(rapidjson::kArrayType, + // document_.GetAllocator()); + document_.AddMember("data", rapidjson::kArrayType, + document_.GetAllocator()); + for (auto& [key, path_list] : results_.path_to_end_node) { + auto& src_vid = key.first; + auto& dst_vid = key.second; + rapidjson::Document paths_for_pair(rapidjson::kObjectType, + &document_.GetAllocator()); + { + paths_for_pair.AddMember("startNodeName", get_vertex_name(src_vid), + document_.GetAllocator()); + paths_for_pair.AddMember("endNodeName", get_vertex_name(dst_vid), + document_.GetAllocator()); + paths_for_pair.AddMember("startNodeId", get_vertex_id(src_vid), + document_.GetAllocator()); + paths_for_pair.AddMember("endNodeId", get_vertex_id(dst_vid), + document_.GetAllocator()); + paths_for_pair.AddMember("paths", rapidjson::kArrayType, + document_.GetAllocator()); + for (auto& path : path_list) { + paths_for_pair["paths"].PushBack( + to_json(path, document_.GetAllocator()), + document_.GetAllocator()); + } + } + + document_["data"].PushBack(paths_for_pair, document_.GetAllocator()); + } + rapidjson::StringBuffer buffer; + rapidjson::Writer writer(buffer); + document_.Accept(writer); + return buffer.GetString(); + } + + rapidjson::Document to_json( + const Path& path, rapidjson::Document::AllocatorType& allocator) const { + rapidjson::Document path_json(rapidjson::kObjectType, &allocator); + path_json.AddMember("nodes", rapidjson::kArrayType, allocator); + path_json.AddMember("relationships", rapidjson::kArrayType, allocator); + for (auto i = 0; i < path.vids.size(); ++i) { + rapidjson::Document node(rapidjson::kObjectType, &allocator); + node.AddMember("id", get_vertex_id(path.vids[i]), allocator); + node.AddMember("name", get_vertex_name(path.vids[i]), allocator); + path_json["nodes"].PushBack(node, allocator); + if (i < path.vids.size() - 1) { + auto relation_id = generate_relation_id(path.vids[i], path.vids[i + 1], + path.rel_types[i]); + rapidjson::Document rel(rapidjson::kObjectType, &allocator); + rel.AddMember("startNode", get_vertex_name(path.vids[i]), allocator); + rel.AddMember("endNode", get_vertex_name(path.vids[i + 1]), allocator); + rel.AddMember("type", get_rel_type_name(path.rel_types[i]), allocator); + rel.AddMember("name", get_rel_type_name(path.rel_types[i]), allocator); + rel.AddMember("id", relation_id, allocator); + rel.AddMember("properties", rapidjson::kObjectType, allocator); + rel["properties"].AddMember("weight", path.weights[i], allocator); + rel["properties"].AddMember( + "label", get_rel_type_name(path.rel_types[i]), allocator); + rel["properties"].AddMember("id", relation_id, allocator); + rel["properties"].AddMember( + "label", get_rel_type_name(path.rel_types[i]), allocator); + path_json["relationships"].PushBack(rel, allocator); + } + } + return path_json; + } + + private: + inline std::string get_vertex_name(vid_t vid) const { + return std::string(typed_comp_named_col_->get_view(vid)); + } + + inline int64_t get_vertex_id(vid_t vid) const { + return txn_.GetVertexId(VERTEX_LABEL_ID, vid).AsInt64(); + } + + inline std::string get_rel_type_name(int64_t rel_type) const { + return rel_type_to_string(rel_type); + } + + inline std::string generate_relation_id(vid_t src, vid_t dst, + int64_t rel_type) const { + return std::to_string(get_vertex_id(src)) + "_" + + get_rel_type_name(rel_type) + "_" + + std::to_string(get_vertex_id(dst)); + } + + const ReadTransaction& txn_; + int32_t page_id_, page_size_; + std::shared_ptr> typed_comp_named_col_; + + Results results_; +}; + +class QiDian : public WriteAppBase { + public: + static constexpr double timeout_sec = 15; + static constexpr int32_t REL_TYPE_MAX = 19; // 0 ~ 18 + QiDian() {} + + void Init(GraphDBSession& graph) { + vertex_label_id_ = graph.schema().get_vertex_label_id("vertex"); + invest_label_id_ = graph.schema().get_edge_label_id("invest"); + size_t num = graph.graph().vertex_num(vertex_label_id_); + + LOG(INFO) << "vertex num:" << num; + valid_comp_vids_.resize(num, false); + + auto comp_name_col = + graph.get_vertex_property_column(vertex_label_id_, "vertex_name"); + if (!comp_name_col) { + LOG(ERROR) << "column vertex_name not found for company"; + } + typed_comp_named_col_ = + std::dynamic_pointer_cast>(comp_name_col); + if (!typed_comp_named_col_) { + LOG(ERROR) << "column vertex_name is not string type for company"; + } + } + ~QiDian() {} + bool is_simple(const std::vector& path) { + // to check whether there are same vid in the path + vis_.clear(); + for (auto& vid : path) { + if (vis_.find(vid) != vis_.end()) { + return false; + } + vis_.insert(vid); + } + return true; + } + + bool edge_expand(gs::ReadTransaction& txn, const std::vector& vid_vec, + const AdjListView& edges, + std::vector& valid_rel_type_ids, int32_t cur_ind, + std::vector>& cur_paths, + std::vector>& cur_rel_types, + std::vector>& cur_rel_weights, + std::vector>& cur_directions, + std::vector>& next_paths, + std::vector>& next_rel_types, + std::vector>& next_rel_weights, + std::vector>& next_directions, + size_t& result_size, int32_t left_bound, int32_t right_bound, + Encoder& output, ResultCreator& result_creator_, + Direction direction) { + auto& cur_path = cur_paths[cur_ind]; + auto& cur_rel_type = cur_rel_types[cur_ind]; + auto& cur_rel_weight = cur_rel_weights[cur_ind]; + auto& cur_direction = cur_directions[cur_ind]; + + for (auto& edge : edges) { + auto rel_type = edge.get_data()[1].AsInt64(); + if (rel_type >= REL_TYPE_MAX || !valid_rel_type_ids[rel_type]){ + continue ; + } + auto dst = edge.get_neighbor(); + cur_path.emplace_back(dst); + cur_rel_type.emplace_back(); + cur_rel_weight.emplace_back(edge.get_data()[0].AsDouble()); + cur_direction.emplace_back(direction); + if (is_simple(cur_path)) { + next_paths.emplace_back(cur_path); + next_rel_types.emplace_back(cur_rel_type); + next_rel_weights.emplace_back(cur_rel_weight); + next_directions.emplace_back(cur_direction); + if (valid_comp_vids_[dst]) { + // final_results.emplace_back(path); + if (result_size >= left_bound) { + // output.put_int(cur_rel_type.size()); + if (cur_path.size() != cur_rel_type.size() + 1) { + throw std::runtime_error("Error Internal state"); + } + if (!result_creator_.AddPath(cur_path, cur_rel_type, cur_rel_weight, + cur_direction)) { + LOG(ERROR) << "Add path failed"; + return false; + } + } + ++result_size; + + if (result_size >= right_bound) { + // output.put_int_at(begin_loc, result_size - left_bound); + output.put_string(result_creator_.Dump()); + return cleanUp(txn, vid_vec); + } + } + } + cur_path.pop_back(); + cur_rel_type.pop_back(); + cur_rel_weight.pop_back(); + cur_direction.pop_back(); + } + return false; + } + +#define DEBUG + bool Query(GraphDBSession& graph, Decoder& input, Encoder& output) { + Init(graph); + + double cur_time_left = timeout_sec; + + auto txn = graph.GetReadTransaction(); + int32_t hop_limit = input.get_int(); + int32_t page_id = input.get_int(); + int32_t page_limit = input.get_int(); + int32_t left_bound = page_id * page_limit; + int32_t right_bound = (page_id + 1) * page_limit; + // LOG(INFO) << "result limit: " << page_limit << "\n"; + int32_t rel_type_num = input.get_int(); + // valid rel type ids + std::vector valid_rel_type_ids(REL_TYPE_MAX, false); + for (int i = 0; i < rel_type_num; ++i) { + auto rel_type = input.get_int(); + if (rel_type < 0 || rel_type >= REL_TYPE_MAX) { + LOG(ERROR) << "Invalid rel type id: " << rel_type; + return false; + } + valid_rel_type_ids[rel_type] = true; + } + + int32_t vec_size = input.get_int(); + LOG(INFO) << "Group Query: hop limit " << hop_limit << ", result limit " + << page_limit << ", ids size " << vec_size + << ", range: " << left_bound << ", " << right_bound; + std::vector vid_vec; + int count = 0; + + for (int i = 0; i < vec_size; ++i) { + auto oid = input.get_long(); + // std::string_view oid = input.get_string(); + vid_t vid; + if (!txn.GetVertexIndex(vertex_label_id_, Any::From(oid), vid)) { + LOG(INFO) << "Get oid: " << oid << ", not found"; + count++; + } else { + VLOG(10) << "Oid: " << oid << ", vid:" << vid; + vid_vec.emplace_back(vid); + } + } + if (count > 0) { + LOG(INFO) << count << " out of " << vec_size << " vertices are not found"; + } + for (auto& vid : vid_vec) { + valid_comp_vids_[vid] = true; + } + +#if 0 + auto cmp_invest_outgoing_view = txn.GetOutgoingGraphView( + vertex_label_id_, vertex_label_id_, invest_label_id_); + auto cmp_invest_incoming_view = txn.GetIncomingGraphView( + vertex_label_id_, vertex_label_id_, invest_label_id_); +#else + auto cmp_invest_outgoing_view = txn.GetOutgoingGraphView( + vertex_label_id_, vertex_label_id_, invest_label_id_); + auto cmp_invest_incoming_view = txn.GetIncomingGraphView( + vertex_label_id_, vertex_label_id_, invest_label_id_); +#endif + ResultCreator result_creator_(txn); + result_creator_.Init(page_id, page_limit, typed_comp_named_col_); + + // Expand from vid_vec, until end_vertex is valid, or hop limit is reached. + std::vector> cur_paths; + std::vector> cur_rel_types; + std::vector> cur_rel_weights; + std::vector> cur_directions; + std::vector> next_paths; + std::vector> next_rel_types; + std::vector> next_rel_weights; + std::vector> next_directions; + // init cur_paths + for (auto& vid : vid_vec) { + cur_paths.emplace_back(std::vector{vid}); + cur_rel_types.emplace_back(std::vector{}); + cur_rel_weights.emplace_back(std::vector{}); + cur_directions.emplace_back(std::vector{}); + } + // size_t begin_loc = output.skip_int(); + size_t result_size = 0; + for (auto i = 1; i <= hop_limit; ++i) { + VLOG(10) << "hop: " << i; + + for (auto j = 0; j < cur_paths.size(); ++j) { + // VLOG(10) << "path: " << gs::to_string(cur_paths[j]); + auto last_vid = cur_paths[j].back(); + const auto& oedges = cmp_invest_outgoing_view.get_edges(last_vid); + double t0 = -grape::GetCurrentTime(); + if (edge_expand(txn, vid_vec, oedges, valid_rel_type_ids, j, cur_paths, + cur_rel_types, cur_rel_weights, cur_directions, + next_paths, next_rel_types, next_rel_weights, + next_directions, result_size, left_bound, right_bound, + output, result_creator_, Direction::Out)) { + return true; // early terminate. + } + t0 += grape::GetCurrentTime(); + cur_time_left -= t0; + if (cur_time_left < 0) { + LOG(INFO) << "Timeout, result size: " << result_size - left_bound; + // output.put_int_at(begin_loc, result_size - left_bound); + output.put_string(result_creator_.Dump()); + return cleanUp(txn, vid_vec); + } + double t1 = -grape::GetCurrentTime(); + const auto& iedges = cmp_invest_incoming_view.get_edges(last_vid); + if (edge_expand(txn, vid_vec, iedges, valid_rel_type_ids, j, cur_paths, + cur_rel_types, cur_rel_weights, cur_directions, + next_paths, next_rel_types, next_rel_weights, + next_directions, result_size, left_bound, right_bound, + output, result_creator_, Direction::In)) { + return true; // early terminate. + } + t1 += grape::GetCurrentTime(); + cur_time_left -= t1; + if (cur_time_left < 0) { + LOG(INFO) << "Timeout, result size: " << result_size - left_bound; + // output.put_int_at(begin_loc, result_size - left_bound); + output.put_string(result_creator_.Dump()); + return cleanUp(txn, vid_vec); + } + } + // LOG(INFO) << "Hop: " << i << ", result: " << final_results.size() + // << ", cur_paths: " << cur_paths.size() + // << ", next_paths: " << next_paths.size(); + cur_paths.swap(next_paths); + cur_rel_types.swap(next_rel_types); + cur_rel_weights.swap(next_rel_weights); + cur_directions.swap(next_directions); + next_paths.clear(); + next_rel_types.clear(); + next_rel_weights.clear(); + next_directions.clear(); + } + + // output.put_int_at(begin_loc, result_size - left_bound); + output.put_string(result_creator_.Dump()); + return cleanUp(txn, vid_vec); + } + + bool cleanUp(ReadTransaction& txn, const std::vector& vid_vec) { + txn.Commit(); + for (auto& vid : vid_vec) { + valid_comp_vids_[vid] = false; + } + return true; + } + + private: + label_t vertex_label_id_; + label_t invest_label_id_; + std::unordered_set vis_; + std::vector valid_comp_vids_; + + std::shared_ptr> typed_comp_named_col_; +}; + +#undef DEBUG + +} // namespace gs + +extern "C" { +void* CreateApp(gs::GraphDBSession& db) { + gs::QiDian* app = new gs::QiDian(); + return static_cast(app); +} + +void DeleteApp(void* app) { + gs::QiDian* casted = static_cast(app); + delete casted; +} +} diff --git a/charts/graphscope-interactive/script/switch_graph.py b/charts/graphscope-interactive/script/switch_graph.py new file mode 100644 index 000000000000..cb68ca354d8d --- /dev/null +++ b/charts/graphscope-interactive/script/switch_graph.py @@ -0,0 +1,221 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +# +# Copyright 2020 Alibaba Group Holding Limited. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import time +import hmac +import hashlib +import base64 +import urllib.parse + +import sys + +# sys.path.append("../../../flex/interactive/sdk/python/") +import time + +from gs_interactive.models.edge_mapping_destination_vertex_mappings_inner import ( + EdgeMappingDestinationVertexMappingsInner, +) + +import gs_interactive +from gs_interactive.models.column_mapping import ColumnMapping +from gs_interactive.models.edge_mapping_source_vertex_mappings_inner import ( + EdgeMappingSourceVertexMappingsInner, +) +from gs_interactive.models.edge_mapping_source_vertex_mappings_inner_column import ( + EdgeMappingSourceVertexMappingsInnerColumn, +) +from gs_interactive.client.driver import Driver +from gs_interactive.client.session import Session +from gs_interactive.models import * + +query = """ +SELECT ds + FROM onecomp_risk.ads_fin_rsk_fe_ent_rel_data_version + WHERE ds = MAX_PT("onecomp_risk.ads_fin_rsk_fe_ent_rel_data_version"); +""" +import os + +script_directory = os.path.dirname(os.path.abspath(__file__)) +print("script directory", script_directory) + +uri = "https://oapi.dingtalk.com/robot/send?access_token=" +# read token from ${HOME}/.dingtalk_token +token = "" +with open(os.path.expanduser("~/.dingtalk_token"), "r") as f: + token = f.read().strip() +if token == "": + raise Exception("token is empty") + +secret = "" +with open(os.path.expanduser("~/.dingtalk_secret"), "r") as f: + secret = f.read().strip() +if secret == "": + raise Exception("secret is empty") + + +def get_full_uri(): + timestamp = str(round(time.time() * 1000)) + secret_enc = secret.encode("utf-8") + string_to_sign = "{}\n{}".format(timestamp, secret) + string_to_sign_enc = string_to_sign.encode("utf-8") + hmac_code = hmac.new( + secret_enc, string_to_sign_enc, digestmod=hashlib.sha256 + ).digest() + sign = urllib.parse.quote_plus(base64.b64encode(hmac_code)) + print(timestamp) + print(sign) + return uri + token + "×tamp=" + timestamp + "&sign=" + sign + + +def report_message(message: str): + uri = get_full_uri() + print(uri) + real_msg = {"msgtype": "text", "text": {"content": message}} + print(real_msg) + import requests + + headers = {"Content-Type": "application/json"} + response = requests.post(uri, json=real_msg, headers=headers) + print(response.text) + + +def restart_service(sess: Session, graph_id: str, report_error: bool): + resp = sess.start_service( + start_service_request=StartServiceRequest(graph_id=graph_id) + ) + if not resp.is_ok(): + if report_error: + report_message(f"Failed to restart service, graph_id: {graph_id}") + raise Exception(f"Failed to restart service, graph_id: {graph_id}") + print("restart service successfully") + + +def get_service_status(sess: Session, report_error: bool): + resp = sess.get_service_status() + if not resp.is_ok(): + if report_error: + report_message("Failed to get service status") + raise Exception("Failed to get service status") + print("service status: ", resp.get_value()) + status = resp.get_value() + print("service running is now running on graph", status.graph.id) + + +def get_current_running_graph(sess: Session, report_error: bool): + resp = sess.get_service_status() + if not resp.is_ok(): + if report_error: + report_message("Failed to get service status") + raise Exception("Failed to get service status") + status = resp.get_value() + return status.graph.id + + +def list_graph(sess: Session): + resp = sess.list_graphs() + if not resp.is_ok(): + report_message("Failed to list graph") + res = resp.get_value() + graph_id_arr = [graph.id for graph in res] + print("list graph: ", graph_id_arr) + + +def check_graph_exits_and_ready(sess: Session, graph_id: str, report_error: bool): + resp = sess.get_graph_meta(graph_id=graph_id) + print("check graph exits: ", resp.is_ok()) + if not resp.is_ok(): + if report_error: + report_message(f"Failed to get graph schema, graph_id: {graph_id}") + raise Exception(f"Failed to get graph schema, graph_id: {graph_id}") + print("graph exits: ", resp.get_value()) + # check whether the graph contains loading config and has one procedures + meta = resp.get_value() + if meta.data_import_config is None: + if report_error: + report_message(f"Graph {graph_id} does not contain loading config") + raise Exception(f"Graph {graph_id} does not contain loading config") + print("graph has loading config") + + # check whether the graph has one procedures + if meta.stored_procedures is None or len(meta.stored_procedures) != 1: + if report_error: + report_message(f"Graph {graph_id} does not contain one procedures") + raise Exception(f"Graph {graph_id} does not contain one procedures") + + +if __name__ == "__main__": + # parse command line args + import argparse + + parser = argparse.ArgumentParser() + parser.add_argument("--endpoint", type=str, default="http://localhost:7777") + parser.add_argument("--graph_id", type=str, default=None, required=False) + parser.add_argument("--validate-reporting", type=bool, default=False) + parser.add_argument("--report-error", type=bool, default=False) + + # finish + args = parser.parse_args() + print(args) + if args.validate_reporting: + report_message("Testing message") + sys.exit(0) + print("connecting to ", args.endpoint) + driver = Driver(args.endpoint) + sess = driver.session() + # get current running graph + old_graph = get_current_running_graph(sess, args.report_error) + print("-----------------Finish getting current running graph-----------------") + print("old graph: ", old_graph) + + if args.graph_id not in [None, ""]: + graph_id = args.graph_id + else: + # try to update service to the latest graph + graph_ids = list_graph(sess) + # pick the largest graph_id + graph_id = max(graph_ids) + print("pick the largest graph_id: ", graph_id, "from", graph_ids) + print("new graph: ", graph_id) + + # check if graph_id exists + check_graph_exits_and_ready(sess, graph_id, args.report_error) + + start_time = time.time() + restart_service(sess, graph_id, args.report_error) + end_time = time.time() + execution_time = end_time - start_time + print("-----------------Finish restarting service-----------------") + print(f"restart service cost {execution_time:.6f}seconds") + + get_service_status(sess, args.report_error) + print("-----------------Finish getting service status-----------------") + + list_graph(sess) + + # after switch to new graph, delete the old graph + delete_graph = sess.delete_graph(old_graph) + print("delete graph res: ", delete_graph) + if not delete_graph.is_ok(): + if args.report_error: + report_message(f"Failed to delete graph {old_graph}") + raise Exception(f"fail to delete graph {old_graph}") + + if args.report_error: + report_message( + f"Switched to graph {graph_id} successfully, restart service cost {execution_time:.6f}seconds" + ) diff --git a/charts/graphscope-interactive/script/switch_graph_to_newest.sh b/charts/graphscope-interactive/script/switch_graph_to_newest.sh new file mode 100755 index 000000000000..1ef8b3c07a3b --- /dev/null +++ b/charts/graphscope-interactive/script/switch_graph_to_newest.sh @@ -0,0 +1,5 @@ +#!/bin/bash +cmd="python3 /home/graphscope/work/k8s-test/gs/charts/graphscope-interactive/script/switch_graph.py --endpoint http://33.37.43.163:7777" +echo "date is $(date)" >> /home/graphscope/switch_graph.log +echo "--------------------------------" >> /home/graphscope/switch_graph.log +eval $cmd >> /home/graphscope/switch_graph.log 2>&1 diff --git a/charts/graphscope-interactive/script/tests/create_modern_graph_and_import.py b/charts/graphscope-interactive/script/tests/create_modern_graph_and_import.py new file mode 100644 index 000000000000..4bb0aed7568c --- /dev/null +++ b/charts/graphscope-interactive/script/tests/create_modern_graph_and_import.py @@ -0,0 +1,343 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +# +# Copyright 2020 Alibaba Group Holding Limited. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + + +import sys + +# sys.path.append("../../../flex/interactive/sdk/python/") +import time + +from gs_interactive.models.edge_mapping_destination_vertex_mappings_inner import ( + EdgeMappingDestinationVertexMappingsInner, +) + +import gs_interactive +from gs_interactive.models.column_mapping import ColumnMapping +from gs_interactive.models.edge_mapping_source_vertex_mappings_inner import ( + EdgeMappingSourceVertexMappingsInner, +) +from gs_interactive.models.edge_mapping_source_vertex_mappings_inner_column import ( + EdgeMappingSourceVertexMappingsInnerColumn, +) +from gs_interactive.client.driver import Driver +from gs_interactive.client.session import Session +from gs_interactive.models import * + +import os + +script_directory = os.path.dirname(os.path.abspath(__file__)) +print("script directory", script_directory) + +modern_graph = { + "name": "modern_graph", + "version": "v0.1", + "store_type": "mutable_csr", + "description": "A graph with 2 vertex types and 2 edge types", + "schema": { + "vertex_types": [ + { + "type_id": 0, + "type_name": "person", + "description": "A person vertex type", + "x_csr_params": {"max_vertex_num": 100}, + "properties": [ + { + "property_id": 0, + "property_name": "id", + "property_type": {"primitive_type": "DT_SIGNED_INT64"}, + }, + { + "property_id": 1, + "property_name": "name", + "property_type": {"string": {"long_text": ""}}, + }, + { + "property_id": 2, + "property_name": "age", + "property_type": {"primitive_type": "DT_SIGNED_INT32"}, + }, + ], + "primary_keys": ["id"], + }, + { + "type_id": 1, + "type_name": "software", + "description": "A software vertex type", + "x_csr_params": {"max_vertex_num": 100}, + "properties": [ + { + "property_id": 0, + "property_name": "id", + "property_type": {"primitive_type": "DT_SIGNED_INT64"}, + }, + { + "property_id": 1, + "property_name": "name", + "property_type": {"string": {"long_text": ""}}, + }, + { + "property_id": 2, + "property_name": "lang", + "property_type": {"string": {"long_text": ""}}, + }, + ], + "primary_keys": ["id"], + }, + ], + "edge_types": [ + { + "type_id": 0, + "type_name": "knows", + "description": "A knows edge type", + "vertex_type_pair_relations": [ + { + "source_vertex": "person", + "destination_vertex": "person", + "relation": "MANY_TO_MANY", + "x_csr_params": {"sort_on_compaction": "true"}, + } + ], + "properties": [ + { + "property_id": 0, + "property_name": "weight", + "property_type": {"primitive_type": "DT_DOUBLE"}, + } + ], + }, + { + "type_id": 1, + "type_name": "created", + "description": "A created edge type", + "vertex_type_pair_relations": [ + { + "source_vertex": "person", + "destination_vertex": "software", + "relation": "MANY_TO_MANY", + } + ], + "properties": [ + { + "property_id": 0, + "property_name": "weight", + "property_type": {"primitive_type": "DT_DOUBLE"}, + } + ], + }, + ], + }, +} + + +def create_graph(sess: Session, ds: str, report_error: bool): + # copied_huoyan_graph = huoyan_graph.copy() + graph_name = f"onecompany_{ds}" + create_graph_req = CreateGraphRequest.from_dict(modern_graph) + create_graph_res = sess.create_graph(create_graph_req) + # CreateGraphRequest.from_dict(copied_huoyan_graph) + + if not create_graph_res.is_ok(): + print("create graph failed: ", create_graph_res.get_status_message()) + raise Exception("fail to create graph") + create_graph_resp = create_graph_res.get_value() + print( + f"create graph {create_graph_resp.graph_id} successfully with name graph_name" + ) + return create_graph_resp.graph_id + + +def loading_graph(sess: Session, graph_id: str, report_error: bool): + schema_mapping = SchemaMapping( + loading_config=SchemaMappingLoadingConfig( + data_source=SchemaMappingLoadingConfigDataSource( + scheme="file", + location="@//home/graphscope/work/k8s-test/gs/flex/interactive/examples/modern_graph/", + ), + import_option="init", + format=SchemaMappingLoadingConfigFormat( + type="csv", + metadata={"batch_reader": True}, + ), + ), + vertex_mappings=[ + VertexMapping( + type_name="person", + inputs=[f"person.csv"], + ), + VertexMapping( + type_name="software", + inputs=[f"software.csv"], + ), + ], + edge_mappings=[ + EdgeMapping( + type_triplet=EdgeMappingTypeTriplet( + edge="knows", + source_vertex="person", + destination_vertex="person", + ), + inputs=["person_knows_person.csv"], + ), + EdgeMapping( + type_triplet=EdgeMappingTypeTriplet( + edge="created", + source_vertex="person", + destination_vertex="software", + ), + inputs=["person_created_software.csv"], + ), + ], + ) + resp = sess.bulk_loading(graph_id, schema_mapping) + if not resp.is_ok(): + print("resp: ", resp.get_status_message()) + raise Exception("fail to create loading job") + print(f"create loading job successfully: {resp.get_value().job_id}") + return resp.get_value().job_id + + +def wait_job_finish(sess: Session, job_id: str): + while True: + resp = sess.get_job(job_id) + status = resp.get_value().status + print("job status: ", status) + if status == "SUCCESS": + break + elif status == "FAILED": + print("job failed: ", resp.get_value()) + raise Exception("job failed") + else: + time.sleep(10) + print("Finish loading graph: ", job_id) + + +def create_procedure( + sess: Session, graph_id: str, file_path: str, proc_name, report_error: bool +): + # read file into string + with open(file_path, "r") as f: + content = f.read() + resp = sess.create_procedure( + graph_id, + CreateProcedureRequest( + name=proc_name, description="huo yan app", query=content, type="cpp" + ), + ) + print("create procedure result: ", resp) + if not resp.is_ok(): + raise Exception("fail to create procedure") + + +def restart_service(sess: Session, graph_id: str): + resp = sess.start_service( + start_service_request=StartServiceRequest(graph_id=graph_id) + ) + if not resp.is_ok(): + print("restart service failed: ", resp.get_status_message()) + print("restart service successfully") + + +def get_service_status(sess: Session, report_error: bool): + resp = sess.get_service_status() + if not resp.is_ok(): + print("get service status failed: ", resp.get_status_message()) + raise Exception("fail to get service status") + print("service status: ", resp.get_value()) + status = resp.get_value() + print("service running is now running on graph", status.graph.id) + + +def get_current_running_graph(sess: Session, report_error: bool): + resp = sess.get_service_status() + if not resp.is_ok(): + print("get service status failed: ", resp.get_status_message()) + raise Exception("fail to get service status") + status = resp.get_value() + return status.graph.id + + +def list_graph(sess: Session, report_error: bool): + resp = sess.list_graphs() + if not resp.is_ok(): + print("list graph failed: ", resp.get_status_message()) + raise Exception("fail to list graph") + res = resp.get_value() + graph_id_arr = [graph.id for graph in res] + print("list graph: ", graph_id_arr) + + +if __name__ == "__main__": + # parse command line args + import argparse + + parser = argparse.ArgumentParser() + parser.add_argument("--endpoint", type=str, default="http://localhost:7777") + parser.add_argument("--proc-name", type=str, default="huoyan") + # parser.add_argument("--remove-old-graph", type=bool, default=True) + parser.add_argument("--ds", type=str) + parser.add_argument("--validate-reporting", type=bool, default=False) + parser.add_argument("--report-error", type=bool, default=False) + + # finish + args = parser.parse_args() + print(args) + + print("connecting to ", args.endpoint) + + report_error = args.report_error + + driver = Driver(args.endpoint) + sess = driver.session() + # get current running graph + old_graph = get_current_running_graph(sess, report_error) + print("-----------------Finish getting current running graph-----------------") + print("old graph: ", old_graph) + + graph_id = create_graph(sess, report_error) + print("-----------------Finish creating graph-----------------") + print("graph_id: ", graph_id) + + job_id = loading_graph(sess, graph_id, report_error) + wait_job_finish(sess, job_id) + print("-----------------Finish loading graph-----------------") + + create_procedure( + sess, graph_id, script_directory + "/procedure.cc", args.proc_name, report_error + ) + print("-----------------Finish creating procedure-----------------") + + start_time = time.time() + restart_service(sess, graph_id) + end_time = time.time() + execution_time = end_time - start_time + print("-----------------Finish restarting service-----------------") + print(f"restart service cost {execution_time:.6f}seconds") + + get_service_status(sess, report_error) + print("-----------------Finish getting service status-----------------") + + # if args.remove_old_graph: + # print("remove old graph") + # delete_graph = sess.delete_graph(old_graph) + # print("delete graph res: ", delete_graph) + # else: + # print("keep old graph", old_graph) + + list_graph(sess, report_error) + + print("Bulk loading modern graph finished successfully") diff --git a/charts/graphscope-interactive/settings.yaml b/charts/graphscope-interactive/settings.yaml new file mode 100644 index 000000000000..64f5c7838003 --- /dev/null +++ b/charts/graphscope-interactive/settings.yaml @@ -0,0 +1,81 @@ +Kata: &kata + hostNetwork: true + schedulerName: unified-batch-scheduler + podAnnotations: + alibabacloud.com/assign-in-virtual-node: odps-kata + alibabacloud.com/skip-kubelet-admission: "[\"cpu\",\"memory\",\"alibabacloud.com/acu\",\"pods\"]" + podLabels: &podlabels + alibabacloud/qos: LS + alibabacloud.com/quota-name: yuansi-child-quotaxy + +NoKata: &nokata + hostNetwork: false + schedulerName: default-scheduler + podAnnotations: + #alibabacloud.com/assign-in-virtual-node: odps-kata + alibabacloud.com/skip-kubelet-admission: "[\"cpu\",\"memory\",\"alibabacloud.com/acu\",\"pods\"]" + podLabels: + alibabacloud/qos: LS + #alibabacloud.com/quota-name: yuansi-child-quotaxy + +Defaults: &defaults + dnsPolicy: &dnspolicy None + dnsConfig: &dnsconfig + nameservers: ["33.18.9.125"] #118f + #nameservers: ["11.163.105.32"] #88n + searches: + - kubetask.svc.cluster.local + - svc.cluster.local + - cluster.local + options: + - name: ndots + value: "2" + + tolerations: &toleration + - effect: NoExecute + key: node.kubernetes.io/not-ready + operator: Exists + tolerationSeconds: 300 + - effect: NoExecute + key: node.kubernetes.io/unreachable + operator: Exists + tolerationSeconds: 300 + - effect: NoSchedule + key: sigma.ali/resource-pool + operator: Exists + #value: ackee_pool + - effect: NoSchedule + key: sigma.ali/is-ecs + operator: Exists + - effect: NoSchedule + key: alibabacloud.com/partition + operator: Exists + - effect: NoSchedule + key: sigma.ali/server-owner + operator: Exists + +resources: &resource_min + limits: + cpu: 1000m + memory: 1Gi + requests: + cpu: 1000m + memory: 1Gi + +global: + #imageRegistry: registry.cn-hongkong.aliyuncs.com + storageClass: csi-ultron-prjquota + +<<: *defaults + +primary: + <<: *nokata + +secondary: + <<: *nokata + +frontend: + <<: *nokata + +cronjob: + <<: *kata diff --git a/charts/graphscope-interactive/templates/_helpers.tpl b/charts/graphscope-interactive/templates/_helpers.tpl index 0880da16567e..47a42f811858 100644 --- a/charts/graphscope-interactive/templates/_helpers.tpl +++ b/charts/graphscope-interactive/templates/_helpers.tpl @@ -27,10 +27,21 @@ If release name contains chart name it will be used as a full name. {{- printf "%s-%s" (include "graphscope-interactive.fullname" .) "frontend" | trunc 63 | trimSuffix "-" -}} {{- end -}} -{{- define "graphscope-interactive.engine.fullname" -}} -{{- printf "%s-%s" (include "graphscope-interactive.fullname" .) "engine" | trunc 63 | trimSuffix "-" -}} +{{- define "graphscope-interactive.primary.fullname" -}} +{{- printf "%s-%s" (include "graphscope-interactive.fullname" .) "primary" | trunc 63 | trimSuffix "-" -}} {{- end -}} +{{- define "graphscope-interactive.secondary.fullname" -}} +{{- printf "%s-%s" (include "graphscope-interactive.fullname" .) "secondary" | trunc 63 | trimSuffix "-" -}} +{{- end -}} + +{{- define "graphscope-interactive.ingress.fullname" -}} +{{- printf "%s-%s" (include "graphscope-interactive.fullname" .) "ingress" | trunc 63 | trimSuffix "-" -}} +{{- end -}} + +{{- define "graphscope-interactive.cronjob.fullname" -}} +{{- printf "%s-%s" (include "graphscope-interactive.fullname" .) "cronjob" | trunc 63 | trimSuffix "-" -}}{{- end -}} + {{/* Create chart name and version as used by the chart label. @@ -77,11 +88,80 @@ Return the proper graphscope-interactive frontend image name {{- end -}} {{/* -Return the proper graphscope-interactive engine image name +Return the proper graphscope-interactive primary image name +*/}} +{{- define "graphscope-interactive.primary.image" -}} +{{- $tag := .Chart.AppVersion | toString -}} +{{- with .Values.primary.image -}} +{{- if .tag -}} +{{- $tag = .tag | toString -}} +{{- end -}} +{{- if .registry -}} +{{- printf "%s/%s:%s" .registry .repository $tag -}} +{{- else -}} +{{- printf "%s:%s" .repository $tag -}} +{{- end -}} +{{- end -}} +{{- end -}} + +{{/* +Return the proper graphscope-interactive primary image name +*/}} +{{- define "graphscope-interactive.nginx.image" -}} +{{- $tag := .Chart.AppVersion | toString -}} +{{- with .Values.nginx.image -}} +{{- if .tag -}} +{{- $tag = .tag | toString -}} +{{- end -}} +{{- if .registry -}} +{{- printf "%s/%s:%s" .registry .repository $tag -}} +{{- else -}} +{{- printf "%s:%s" .repository $tag -}} +{{- end -}} +{{- end -}} +{{- end -}} + +{{/* +Return the proper graphscope-interactive primary image name +*/}} +{{- define "graphscope-interactive.openresty.image" -}} +{{- $tag := .Chart.AppVersion | toString -}} +{{- with .Values.openresty.image -}} +{{- if .tag -}} +{{- $tag = .tag | toString -}} +{{- end -}} +{{- if .registry -}} +{{- printf "%s/%s:%s" .registry .repository $tag -}} +{{- else -}} +{{- printf "%s:%s" .repository $tag -}} +{{- end -}} +{{- end -}} +{{- end -}} + + +{{/* +Return the proper graphscope-interactive secondary image name +*/}} +{{- define "graphscope-interactive.secondary.image" -}} +{{- $tag := .Chart.AppVersion | toString -}} +{{- with .Values.secondary.image -}} +{{- if .tag -}} +{{- $tag = .tag | toString -}} +{{- end -}} +{{- if .registry -}} +{{- printf "%s/%s:%s" .registry .repository $tag -}} +{{- else -}} +{{- printf "%s:%s" .repository $tag -}} +{{- end -}} +{{- end -}} +{{- end -}} + +{{/* +Return the proper graphscope-interactive cron job image name */}} -{{- define "graphscope-interactive.engine.image" -}} +{{- define "graphscope-interactive.cronjob.image" -}} {{- $tag := .Chart.AppVersion | toString -}} -{{- with .Values.engine.image -}} +{{- with .Values.cronjob.image -}} {{- if .tag -}} {{- $tag = .tag | toString -}} {{- end -}} diff --git a/charts/graphscope-interactive/templates/admin_proxy_nginx_conf.yaml b/charts/graphscope-interactive/templates/admin_proxy_nginx_conf.yaml new file mode 100644 index 000000000000..05c87164e52f --- /dev/null +++ b/charts/graphscope-interactive/templates/admin_proxy_nginx_conf.yaml @@ -0,0 +1,138 @@ +apiVersion: v1 +kind: ConfigMap +metadata: + name: {{ .Release.Name }}-admin-proxy-nginx-config + namespace: {{ .Release.Namespace }} + labels: {{- include "graphscope-interactive.labels" . | nindent 4 }} + app.kubernetes.io/component: configmap + {{- if .Values.commonLabels }} + {{- include "graphscope-interactive.tplvalues.render" ( dict "value" .Values.commonLabels "context" $ ) | nindent 4 }} + {{- end }} + {{- if .Values.commonAnnotations }} + annotations: {{- include "graphscope-interactive.tplvalues.render" ( dict "value" .Values.commonAnnotations "context" $ ) | nindent 4 }} + {{- end }} +data: + nginx.conf: | + events {} + http { + resolver {{ first .Values.dnsConfig.nameservers }}; + server { + {{- $adminPort := .Values.primary.service.adminPort | int }} + listen {{ $adminPort }}; + client_body_buffer_size 10M; + client_max_body_size 10M; + location / { + content_by_lua_block { + function arrayToString(arr, separator) + separator = separator or ", " -- Default separator if not provided + return table.concat(arr, separator) + end + local http = require "resty.http" + local res = {} + local success = true + local count = 0 + local status_code = 0 + local error_message = nil -- Initialize a variable to capture error messages + + + local urls = { + {{- $baseName := include "graphscope-interactive.secondary.fullname" . }} + {{- $replicaCount := .Values.secondary.replicaCount | int }} + {{- $serviceName := printf "%s.%s.svc.%s" (include "graphscope-interactive.secondary.fullname" .) .Release.Namespace .Values.clusterDomain }} + {{- $port := .Values.secondary.service.adminPort | int }} + {{- if eq $replicaCount 1 }} + {{ printf "http://%s-0.%s:%d" $baseName $serviceName $port | quote }} + {{- else }} + {{- range $i := until (sub $replicaCount 1 | int ) }} + {{ printf "\"http://%s-%d.%s:%d\"," $baseName $i $serviceName $port }} + {{- end }} + {{ printf "http://%s-%d.%s:%d" $baseName (sub $replicaCount 1) $serviceName $port | quote }} + {{- end }} + } + + local original_headers = ngx.req.get_headers() + local request_uri=ngx.var.request_uri + local method = ngx.req.get_method() + + -- Create a table for modified headers + local backend_headers = {} + + -- Copy the relevant headers, if needed, or modify them + for key, value in pairs(original_headers) do + -- You can filter headers if needed (e.g., skip "host" or "authorization") + if key ~= "Host" and key ~= "User-Agent" and key ~= "Content-Length" then + backend_headers[key] = value + end + end + + + for _, backend in ipairs(urls) do + -- full_uri is backend + request_uri + local full_uri = backend .. request_uri + local httpc = http.new() + local response, err + + if method == "GET" then + response, err = httpc:request_uri(full_uri, { + method = "GET", + }) + elseif method == "POST" then + ngx.req.read_body() -- Read the request body + local body_data = ngx.req.get_body_data() + response, err = httpc:request_uri(full_uri, { + method = "POST", + body = body_data, + headers = backend_headers + }) + elseif method == "PUT" then + ngx.req.read_body() -- Read the request body + local body_data = ngx.req.get_body_data() + response, err = httpc:request_uri(full_uri, { + method = "PUT", + body = body_data, + headers = backend_headers + }) + elseif method == "DELETE" then + response, err = httpc:request_uri(full_uri, { + method = "DELETE", + }) + end + + if response ~= nil then + status_code = response.status + res[#res + 1] = response.body + if response.status < 200 or response.status >= 300 then + success = false + if not error_message then -- Capture the error message from the first failed request + error_message = response.body or "Failed request without a body." + end + end + else + status_code = 500 + if err ~= nil then + ngx.log(ngx.ERR, "Failed to request: ", err) + success = false + if not error_message then -- Capture error when no response + error_message = "Error: " .. err + end + else + success = false + error_message = "Not found" + end + end + end + + ngx.header.content_type = 'application/json' + if success then + ngx.status = status_code + ngx.say(res[1]) + ngx.exit(status_code) + else + ngx.status = status_code + ngx.say(error_message) + ngx.exit(status_code) + end + } + } + } + } diff --git a/charts/graphscope-interactive/templates/configmap.yaml b/charts/graphscope-interactive/templates/configmap.yaml index 6c4c8153495e..f69978bee25d 100644 --- a/charts/graphscope-interactive/templates/configmap.yaml +++ b/charts/graphscope-interactive/templates/configmap.yaml @@ -19,13 +19,13 @@ data: data: data logs: logs conf: conf - log_level: {{ .Values.engine.logLevel }} + log_level: {{ .Values.primary.logLevel }} default_graph: {{ .Values.defaultGraph }} compute_engine: type: hiactor workers: - - ENGINE_SERVICE_HOST:10000 - thread_num_per_worker: {{ .Values.engine.threadNumPerWorker }} + - localhost:10000 + thread_num_per_worker: {{ .Values.primary.threadNumPerWorker }} compiler: planner: is_on: true @@ -35,7 +35,7 @@ data: - FilterMatchRule - NotMatchToAntiJoinRule endpoint: - default_listen_address: ENGINE_SERVICE_HOST + default_listen_address: localhost bolt_connector: disabled: false port: {{ .Values.frontend.service.cypherPort }} @@ -44,10 +44,6 @@ data: port: {{ .Values.frontend.service.gremlinPort }} query_timeout: {{ .Values.frontend.service.queryTimeout }} http_service: - default_listen_address: ENGINE_SERVICE_HOST - admin_port: {{ .Values.engine.service.adminPort }} - query_port: {{ .Values.engine.service.queryPort }} - setup.sh: |- - #!/bin/bash - sudo sed -e "s/ENGINE_SERVICE_HOST/${ENGINE_SERVICE_HOST}/g" ${ENGINE_CONFIG_PATH} > ${REAL_ENGINE_CONFIG_PATH} - echo "Finish set ENGINE_SERVICE_HOST to ${ENGINE_SERVICE_HOST}" \ No newline at end of file + default_listen_address: localhost + admin_port: {{ .Values.primary.service.adminPort }} + query_port: {{ .Values.primary.service.queryPort }} \ No newline at end of file diff --git a/charts/graphscope-interactive/templates/engine/statefulset.yaml b/charts/graphscope-interactive/templates/engine/statefulset.yaml deleted file mode 100644 index f9455b53fd71..000000000000 --- a/charts/graphscope-interactive/templates/engine/statefulset.yaml +++ /dev/null @@ -1,167 +0,0 @@ -{{- $frontendFullname := include "graphscope-interactive.frontend.fullname" . }} -{{- $engineFullName := include "graphscope-interactive.engine.fullname" . }} -{{- $releaseNamespace := .Release.Namespace }} -{{- $clusterDomain := .Values.clusterDomain }} - -apiVersion: apps/v1 -kind: StatefulSet -metadata: - name: {{ include "graphscope-interactive.engine.fullname" . }} - namespace: {{ .Release.Namespace }} - labels: {{- include "graphscope-interactive.labels" . | nindent 4 }} - app.kubernetes.io/component: engine - {{- if .Values.commonLabels }} - {{- include "graphscope-interactive.tplvalues.render" ( dict "value" .Values.commonLabels "context" $ ) | nindent 4 }} - {{- end }} - {{- if .Values.commonAnnotations }} - annotations: {{- include "graphscope-interactive.tplvalues.render" ( dict "value" .Values.commonAnnotations "context" $ ) | nindent 4 }} - {{- end }} -spec: - replicas: {{ .Values.engine.replicaCount }} - selector: - matchLabels: {{ include "graphscope-interactive.selectorLabels" . | nindent 6 }} - app.kubernetes.io/component: engine - serviceName: {{ include "graphscope-interactive.engine.fullname" . }}-headless - updateStrategy: - type: {{ .Values.engine.updateStrategy }} - {{- if (eq "Recreate" .Values.engine.updateStrategy) }} - rollingUpdate: null - {{- end }} - template: - metadata: - annotations: - {{- if .Values.engine.podAnnotations }} - {{- include "graphscope-interactive.tplvalues.render" (dict "value" .Values.engine.podAnnotations "context" $) | nindent 8 }} - {{- end }} - labels: {{- include "graphscope-interactive.labels" . | nindent 8 }} - app.kubernetes.io/component: engine - {{- if .Values.commonLabels }} - {{- include "graphscope-interactive.tplvalues.render" ( dict "value" .Values.commonLabels "context" $ ) | nindent 8 }} - {{- end }} - spec: - {{- with .Values.imagePullSecrets }} - imagePullSecrets: {{- toYaml . | nindent 8 }} - {{- end }} - serviceAccountName: {{ include "graphscope-interactive.serviceAccountName" . }} - {{- if .Values.engine.affinity }} - affinity: {{- include "graphscope-interactive.tplvalues.render" (dict "value" .Values.engine.affinity "context" $) | nindent 8 }} - {{- end }} - initContainers: - {{- if .Values.engine.initContainers }} - {{- include "graphscope-interactive.tplvalues.render" (dict "value" .Values.engine.initContainers "context" $) | nindent 8 }} - {{- end }} - containers: - - name: engine - image: {{ include "graphscope-interactive.engine.image" . }} - imagePullPolicy: {{ .Values.engine.image.pullPolicy | quote }} - command: - - /bin/bash - - -c - - | - echo "Starting engine..." - # first check engine_config.yaml exists - if [ ! -f ${ENGINE_CONFIG_PATH} ]; then - #error exit - echo "${ENGINE_CONFIG_PATH} not found, exiting..." - exit 1 - fi - # then check interactive_server binary exists and executable - if [ ! -x ${ENGINE_BINARY_PATH} ]; then - #error exit - echo "${ENGINE_BINARY_PATH} binary not found or not executable, exiting..." - exit 1 - fi - # always try to load the built-in graph: gs_interactive_default_graph - # for case CURRENT_GRAPH is not the default_graph, we assume the data is already loaded. - # TODO. - builtin_graph_schema_path="${INTERACTIVE_WORKSPACE}/data/${DEFAULT_GRAPH_NAME}/graph.yaml" - builtin_graph_data_path="${INTERACTIVE_WORKSPACE}/data/${DEFAULT_GRAPH_NAME}/indices/" - builtin_graph_import_path="${INTERACTIVE_WORKSPACE}/data/${DEFAULT_GRAPH_NAME}/bulk_load.yaml" - # if builtin_graph_data_path exists, skip - if [ ! -d ${builtin_graph_data_path} ]; then - mkdir -p ${INTERACTIVE_WORKSPACE}/data/${DEFAULT_GRAPH_NAME} - echo "Loading builtin graph: ${DEFAULT_GRAPH_NAME} with command: $builtin_graph_loader_cmd" - cp /opt/flex/share/gs_interactive_default_graph/graph.yaml ${builtin_graph_schema_path} - cp /opt/flex/share/gs_interactive_default_graph/bulk_load.yaml ${builtin_graph_import_path} - export FLEX_DATA_DIR=/opt/flex/share/gs_interactive_default_graph/ - - builtin_graph_loader_cmd="${BULK_LOADER_BINARY_PATH} -g ${builtin_graph_schema_path} -d ${builtin_graph_data_path} -l ${builtin_graph_import_path}" - echo "Loading builtin graph: ${DEFAULT_GRAPH_NAME} with command: $builtin_graph_loader_cmd" - eval $builtin_graph_loader_cmd - fi - - bash /etc/interactive/setup.sh - cmd="GLOG_v=10 ${ENGINE_BINARY_PATH} -c ${REAL_ENGINE_CONFIG_PATH}" - #cmd="${cmd} --enable-admin-service false -w ${INTERACTIVE_WORKSPACE}" - cmd="${cmd} -g ${builtin_graph_schema_path} --data-path ${builtin_graph_data_path}" - echo "Starting engine with command: $cmd" - eval $cmd - env: - - name: INTERACTIVE_WORKSPACE - value: {{ .Values.workspace | quote }} - - name: ENGINE_SERVICE_HOST - value: {{ $engineFullName }}-headless.{{ $releaseNamespace }}.svc.{{ $clusterDomain }} - - name: ENGINE_CONFIG_PATH - value: {{ include "graphscope-interactive.engineConfigPath" . }} - - name: REAL_ENGINE_CONFIG_PATH - value: {{ include "graphscope-interactive.realEngineConfigPath" . }} - - name: ENGINE_BINARY_PATH - value: {{ include "graphscope-interactive.engineBinaryPath" . }} - - name: ENGINE_SHARD_NUM - value: {{ .Values.engine.threadNumPerWorker | quote }} - - name: BULK_LOADER_BINARY_PATH - value: /opt/flex/bin/bulk_loader - - name: DEFAULT_GRAPH_NAME - value: {{ .Values.defaultGraph }} - ports: - - name: admin-port - containerPort: {{ .Values.engine.service.adminPort }} - - name: query-port - containerPort: {{ .Values.engine.service.queryPort }} - {{- if .Values.engine.resources }} - resources: {{- toYaml .Values.engine.resources | nindent 12 }} - {{- end }} - volumeMounts: - - name: workspace - mountPath: {{ .Values.workspace }} - - name: config - mountPath: {{ include "graphscope-interactive.engineConfigPath" . }} - subPath: engine_config.yaml - - name: config - mountPath: /etc/interactive/setup.sh - subPath: setup.sh - volumes: - - name: config - configMap: - name: {{ include "graphscope-interactive.configmapName" . }} - defaultMode: 0755 - {{- if and .Values.engine.persistence.enabled .Values.engine.persistence.existingClaim }} - - name: workspace - persistentVolumeClaim: - claimName: {{ tpl .Values.engine.persistence.existingClaim . }} - {{- else if not .Values.engine.persistence.enabled }} - - name: workspace - emptyDir: {} - {{- else if and .Values.engine.persistence.enabled (not .Values.engine.persistence.existingClaim) }} - volumeClaimTemplates: - - metadata: - name: workspace - {{- if .Values.persistence.annotations }} - annotations: {{- include "common.tplvalues.render" (dict "value" .Values.persistence.annotations "context" $) | nindent 10 }} - {{- end }} - {{- if .Values.persistence.labels }} - labels: {{- include "common.tplvalues.render" (dict "value" .Values.persistence.labels "context" $) | nindent 10 }} - {{- end }} - spec: - accessModes: - {{- range .Values.persistence.accessModes }} - - {{ . | quote }} - {{- end }} - resources: - requests: - storage: {{ .Values.engine.persistence.size | quote }} - {{ include "graphscope-interactive.storageClass" . | nindent 8 }} - {{- if .Values.engine.persistence.selector }} - selector: {{- include "graphscope-interactive.tplvalues.render" (dict "value" .Values.engine.persistence.selector "context" $) | nindent 10 }} - {{- end -}} - {{- end }} diff --git a/charts/graphscope-interactive/templates/engine/svc-headless.yaml b/charts/graphscope-interactive/templates/engine/svc-headless.yaml deleted file mode 100644 index a49094e692d2..000000000000 --- a/charts/graphscope-interactive/templates/engine/svc-headless.yaml +++ /dev/null @@ -1,37 +0,0 @@ -apiVersion: v1 -kind: Service -metadata: - name: {{ include "graphscope-interactive.engine.fullname" . }}-headless - namespace: {{ .Release.Namespace }} - labels: {{- include "graphscope-interactive.labels" . | nindent 4 }} - app.kubernetes.io/component: engine - {{- if .Values.commonLabels }} - {{- include "common.tplvalues.render" ( dict "value" .Values.commonLabels "context" $ ) | nindent 4 }} - {{- end }} - annotations: - {{- if .Values.commonAnnotations }} - {{- include "common.tplvalues.render" ( dict "value" .Values.commonAnnotations "context" $ ) | nindent 4 }} - {{- end }} -spec: - type: {{ .Values.engine.service.type }} - {{- if and (eq .Values.engine.service.type "ClusterIP") .Values.engine.service.clusterIP }} - clusterIP: {{ .Values.engine.service.clusterIP }} - {{- end }} - {{- if and .Values.engine.service.loadBalancerIP (eq .Values.engine.service.type "LoadBalancer") }} - loadBalancerIP: {{ .Values.engine.service.loadBalancerIP }} - externalTrafficPolicy: {{ .Values.engine.service.externalTrafficPolicy | quote }} - {{- end }} - {{- if and (eq .Values.engine.service.type "LoadBalancer") .Values.engine.service.loadBalancerSourceRanges }} - loadBalancerSourceRanges: {{- toYaml .Values.engine.service.loadBalancerSourceRanges | nindent 4 }} - {{- end }} - ports: - - name: admin-port - port: {{ .Values.engine.service.adminPort }} - protocol: TCP - targetPort: admin-port - - name: query-port - port: {{ .Values.engine.service.queryPort }} - protocol: TCP - targetPort: query-port - selector: {{- include "graphscope-interactive.selectorLabels" . | nindent 4 }} - app.kubernetes.io/component: engine diff --git a/charts/graphscope-interactive/templates/primary/statefulset.yaml b/charts/graphscope-interactive/templates/primary/statefulset.yaml new file mode 100644 index 000000000000..3ecc225664e7 --- /dev/null +++ b/charts/graphscope-interactive/templates/primary/statefulset.yaml @@ -0,0 +1,154 @@ +{{- $frontendFullname := include "graphscope-interactive.frontend.fullname" . }} +{{- $primaryFullName := include "graphscope-interactive.primary.fullname" . }} +{{- $secondaryFullName := include "graphscope-interactive.secondary.fullname" . }} +{{- $releaseNamespace := .Release.Namespace }} +{{- $clusterDomain := .Values.clusterDomain }} + +apiVersion: apps/v1 +kind: StatefulSet +metadata: + name: {{ include "graphscope-interactive.primary.fullname" . }} + namespace: {{ .Release.Namespace }} + labels: {{- include "graphscope-interactive.labels" . | nindent 4 }} + app.kubernetes.io/component: primary + {{- if .Values.commonLabels }} + {{- include "graphscope-interactive.tplvalues.render" ( dict "value" .Values.commonLabels "context" $ ) | nindent 4 }} + {{- end }} + {{- if .Values.commonAnnotations }} + annotations: {{- include "graphscope-interactive.tplvalues.render" ( dict "value" .Values.commonAnnotations "context" $ ) | nindent 4 }} + {{- end }} +spec: + replicas: {{ .Values.primary.replicaCount }} + selector: + matchLabels: {{ include "graphscope-interactive.selectorLabels" . | nindent 6 }} + app.kubernetes.io/component: primary + serviceName: {{ include "graphscope-interactive.primary.fullname" . }} + updateStrategy: + type: {{ .Values.primary.updateStrategy }} + {{- if (eq "Recreate" .Values.primary.updateStrategy) }} + rollingUpdate: null + {{- end }} + template: + metadata: + annotations: + {{- if .Values.primary.podAnnotations }} + {{- include "graphscope-interactive.tplvalues.render" (dict "value" .Values.primary.podAnnotations "context" $) | nindent 8 }} + {{- end }} + labels: {{- include "graphscope-interactive.labels" . | nindent 8 }} + app.kubernetes.io/component: primary + {{- if .Values.primary.podLabels }} + {{- include "graphscope-interactive.tplvalues.render" (dict "value" .Values.primary.podLabels "context" $) | nindent 8 }} + {{- end }} + # alibabacloud.com/custom-cni-plugin-type: "nimitz" + spec: + {{- if .Values.imagePullSecrets }} + imagePullSecrets: {{- toYaml . | nindent 8 }} + {{- end }} + {{- if .Values.primary.hostAliases }} + hostAliases: {{- include "graphscope-interactive.tplvalues.render" (dict "value" .Values.primary.hostAliases "context" $) | nindent 8 }} + {{- end }} + hostNetwork: {{ .Values.primary.hostNetwork }} + hostIPC: {{ .Values.primary.hostIPC }} + {{- if .Values.primary.schedulerName }} + schedulerName: {{ .Values.primary.schedulerName | quote }} + {{- end }} + {{- if .Values.nodeSelector }} + nodeSelector: {{- include "graphscope-interactive.tplvalues.render" (dict "value" .Values.nodeSelector "context" $) | nindent 8 }} + {{- end }} + {{- if .Values.dnsPolicy }} + dnsPolicy: {{ .Values.dnsPolicy | quote }} + {{- end }} + {{- if .Values.dnsConfig }} + dnsConfig: {{- include "graphscope-interactive.tplvalues.render" (dict "value" .Values.dnsConfig "context" $) | nindent 8 }} + {{- end }} + {{- if .Values.tolerations }} + tolerations: {{- include "graphscope-interactive.tplvalues.render" (dict "value" .Values.tolerations "context" $) | nindent 8 }} + {{- end }} + serviceAccountName: {{ include "graphscope-interactive.serviceAccountName" . }} + {{- if .Values.primary.affinity }} + affinity: {{- include "graphscope-interactive.tplvalues.render" (dict "value" .Values.primary.affinity "context" $) | nindent 8 }} + {{- end }} + initContainers: + {{- if .Values.primary.initContainers }} + {{- include "graphscope-interactive.tplvalues.render" (dict "value" .Values.primary.initContainers "context" $) | nindent 8 }} + {{- end }} + containers: + - name: admin-nginx + image: {{ include "graphscope-interactive.openresty.image" . }} + imagePullPolicy: {{ .Values.openresty.image.pullPolicy | quote }} + #command: ["sleep", "infinity"] + command: ["openresty", "-g", "daemon off;"] + ports: + - name: admin-port + containerPort: {{ .Values.primary.service.adminPort }} + {{- if .Values.primary.resources }} + resources: {{- toYaml .Values.primary.resources | nindent 12 }} + {{- end }} + volumeMounts: + - name: workspace + mountPath: {{ .Values.workspace }} + - name: config + mountPath: {{ include "graphscope-interactive.engineConfigPath" . }} + subPath: engine_config.yaml + - name: admin-proxy-nginx-config + mountPath: /usr/local/openresty/nginx/conf/nginx.conf + subPath: nginx.conf + - name: proxy-nginx + image: {{ include "graphscope-interactive.nginx.image" . }} + imagePullPolicy: {{ .Values.nginx.image.pullPolicy | quote }} + ports: + - name: query-port + containerPort: {{ .Values.primary.service.queryPort }} + {{- if .Values.primary.resources }} + resources: {{- toYaml .Values.primary.resources | nindent 12 }} + {{- end }} + volumeMounts: + - name: workspace + mountPath: {{ .Values.workspace }} + - name: config + mountPath: {{ include "graphscope-interactive.engineConfigPath" . }} + subPath: engine_config.yaml + - name: query-proxy-nginx-config + mountPath: /etc/nginx/nginx.conf + subPath: nginx.conf + volumes: + - name: query-proxy-nginx-config + configMap: + name: {{ .Release.Name }}-query-proxy-nginx-config + - name: admin-proxy-nginx-config + configMap: + name: {{ .Release.Name }}-admin-proxy-nginx-config + - name: config + configMap: + name: {{ include "graphscope-interactive.configmapName" . }} + defaultMode: 0755 + {{- if and .Values.primary.persistence.enabled .Values.primary.persistence.existingClaim }} + - name: workspace + persistentVolumeClaim: + claimName: {{ tpl .Values.primary.persistence.existingClaim . }} + {{- else if not .Values.primary.persistence.enabled }} + - name: workspace + emptyDir: {} + {{- else if and .Values.primary.persistence.enabled (not .Values.primary.persistence.existingClaim) }} + volumeClaimTemplates: + - metadata: + name: workspace + {{- if .Values.persistence.annotations }} + annotations: {{- include "graphscope-interactive.tplvalues.render" (dict "value" .Values.persistence.annotations "context" $) | nindent 10 }} + {{- end }} + {{- if .Values.persistence.labels }} + labels: {{- include "graphscope-interactive.tplvalues.render" (dict "value" .Values.persistence.labels "context" $) | nindent 10 }} + {{- end }} + spec: + accessModes: + {{- range .Values.persistence.accessModes }} + - {{ . | quote }} + {{- end }} + resources: + requests: + storage: {{ .Values.primary.persistence.size | quote }} + {{ include "graphscope-interactive.storageClass" . | nindent 8 }} + {{- if .Values.primary.persistence.selector }} + selector: {{- include "graphscope-interactive.tplvalues.render" (dict "value" .Values.primary.persistence.selector "context" $) | nindent 10 }} + {{- end -}} + {{- end }} diff --git a/charts/graphscope-interactive/templates/primary/svc.yaml b/charts/graphscope-interactive/templates/primary/svc.yaml new file mode 100644 index 000000000000..63783c1d2292 --- /dev/null +++ b/charts/graphscope-interactive/templates/primary/svc.yaml @@ -0,0 +1,37 @@ +apiVersion: v1 +kind: Service +metadata: + name: {{ include "graphscope-interactive.primary.fullname" . }} + namespace: {{ .Release.Namespace }} + labels: {{- include "graphscope-interactive.labels" . | nindent 4 }} + app.kubernetes.io/component: primary + {{- if .Values.commonLabels }} + {{- include "graphscope-interactive.tplvalues.render" ( dict "value" .Values.commonLabels "context" $ ) | nindent 4 }} + {{- end }} + annotations: + {{- if .Values.commonAnnotations }} + {{- include "graphscope-interactive.tplvalues.render" ( dict "value" .Values.commonAnnotations "context" $ ) | nindent 4 }} + {{- end }} +spec: + type: {{ .Values.primary.service.type }} + {{- if and (eq .Values.primary.service.type "ClusterIP") .Values.primary.service.clusterIP }} + clusterIP: {{ .Values.primary.service.clusterIP }} + {{- end }} + {{- if and .Values.primary.service.loadBalancerIP (eq .Values.primary.service.type "LoadBalancer") }} + loadBalancerIP: {{ .Values.primary.service.loadBalancerIP }} + externalTrafficPolicy: {{ .Values.primary.service.externalTrafficPolicy | quote }} + {{- end }} + {{- if and (eq .Values.primary.service.type "LoadBalancer") .Values.primary.service.loadBalancerSourceRanges }} + loadBalancerSourceRanges: {{- toYaml .Values.primary.service.loadBalancerSourceRanges | nindent 4 }} + {{- end }} + ports: + - name: admin-port + port: {{ .Values.primary.service.adminPort }} + protocol: TCP + targetPort: {{ .Values.primary.service.adminPort }} + - name: query-port + port: {{ .Values.primary.service.queryPort }} + protocol: TCP + targetPort: {{ .Values.primary.service.queryPort }} + selector: {{- include "graphscope-interactive.selectorLabels" . | nindent 4 }} + app.kubernetes.io/component: primary diff --git a/charts/graphscope-interactive/templates/query_proxy_nginx_conf.yaml b/charts/graphscope-interactive/templates/query_proxy_nginx_conf.yaml new file mode 100644 index 000000000000..bf3d69c812a9 --- /dev/null +++ b/charts/graphscope-interactive/templates/query_proxy_nginx_conf.yaml @@ -0,0 +1,41 @@ +apiVersion: v1 +kind: ConfigMap +metadata: + name: {{ .Release.Name }}-query-proxy-nginx-config + namespace: {{ .Release.Namespace }} + labels: {{- include "graphscope-interactive.labels" . | nindent 4 }} + app.kubernetes.io/component: configmap + {{- if .Values.commonLabels }} + {{- include "graphscope-interactive.tplvalues.render" ( dict "value" .Values.commonLabels "context" $ ) | nindent 4 }} + {{- end }} + {{- if .Values.commonAnnotations }} + annotations: {{- include "graphscope-interactive.tplvalues.render" ( dict "value" .Values.commonAnnotations "context" $ ) | nindent 4 }} + {{- end }} +data: + nginx.conf: | + events {} + http { + upstream my_service_1 { + {{- $baseName := include "graphscope-interactive.secondary.fullname" . }} + {{- $replicaCount := .Values.secondary.replicaCount | int }} + {{- $serviceName := printf "%s.%s.svc.%s" (include "graphscope-interactive.secondary.fullname" .) .Release.Namespace .Values.clusterDomain }} + {{- $port := .Values.secondary.service.queryPort | int }} + {{- range $i := until $replicaCount }} + server {{ printf "%s-%d.%s:%d" $baseName $i $serviceName $port }}; + {{- end }} + } + + server { + {{- $queryPort := .Values.primary.service.queryPort | int }} + listen {{ $queryPort }}; + server_name localhost; + + location / { + proxy_pass http://my_service_1; + proxy_set_header Host $host; + proxy_set_header X-Real-IP $remote_addr; + proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; + proxy_set_header X-Forwarded-Proto $scheme; + } + } + } diff --git a/charts/graphscope-interactive/templates/secondary/statefulset.yaml b/charts/graphscope-interactive/templates/secondary/statefulset.yaml new file mode 100644 index 000000000000..bc0a6eb2fbef --- /dev/null +++ b/charts/graphscope-interactive/templates/secondary/statefulset.yaml @@ -0,0 +1,170 @@ +{{- $frontendFullname := include "graphscope-interactive.frontend.fullname" . }} +{{- $primaryFullName := include "graphscope-interactive.secondary.fullname" . }} +{{- $secondaryFullName := include "graphscope-interactive.secondary.fullname" . }} +{{- $releaseNamespace := .Release.Namespace }} +{{- $clusterDomain := .Values.clusterDomain }} + +apiVersion: apps/v1 +kind: StatefulSet +metadata: + name: {{ include "graphscope-interactive.secondary.fullname" . }} + namespace: {{ .Release.Namespace }} + labels: {{- include "graphscope-interactive.labels" . | nindent 4 }} + app.kubernetes.io/component: secondary + {{- if .Values.commonLabels }} + {{- include "graphscope-interactive.tplvalues.render" ( dict "value" .Values.commonLabels "context" $ ) | nindent 4 }} + {{- end }} + {{- if .Values.commonAnnotations }} + annotations: {{- include "graphscope-interactive.tplvalues.render" ( dict "value" .Values.commonAnnotations "context" $ ) | nindent 4 }} + {{- end }} +spec: + replicas: {{ .Values.secondary.replicaCount }} + selector: + matchLabels: {{ include "graphscope-interactive.selectorLabels" . | nindent 6 }} + app.kubernetes.io/component: secondary + serviceName: {{ include "graphscope-interactive.secondary.fullname" . }} + updateStrategy: + type: {{ .Values.secondary.updateStrategy }} + {{- if (eq "Recreate" .Values.secondary.updateStrategy) }} + rollingUpdate: null + {{- end }} + template: + metadata: + annotations: + {{- if .Values.secondary.podAnnotations }} + {{- include "graphscope-interactive.tplvalues.render" (dict "value" .Values.secondary.podAnnotations "context" $) | nindent 8 }} + {{- end }} + labels: {{- include "graphscope-interactive.labels" . | nindent 8 }} + app.kubernetes.io/component: secondary + {{- if .Values.secondary.podLabels }} + {{- include "graphscope-interactive.tplvalues.render" (dict "value" .Values.secondary.podLabels "context" $) | nindent 8 }} + {{- end }} + # alibabacloud.com/custom-cni-plugin-type: "nimitz" + spec: + {{- if .Values.imagePullSecrets }} + imagePullSecrets: {{- toYaml . | nindent 8 }} + {{- end }} + {{- if .Values.secondary.hostAliases }} + hostAliases: {{- include "graphscope-interactive.tplvalues.render" (dict "value" .Values.secondary.hostAliases "context" $) | nindent 8 }} + {{- end }} + hostNetwork: {{ .Values.secondary.hostNetwork }} + hostIPC: {{ .Values.secondary.hostIPC }} + {{- if .Values.secondary.schedulerName }} + schedulerName: {{ .Values.secondary.schedulerName | quote }} + {{- end }} + {{- if .Values.nodeSelector }} + nodeSelector: {{- include "graphscope-interactive.tplvalues.render" (dict "value" .Values.nodeSelector "context" $) | nindent 8 }} + {{- end }} + {{- if .Values.dnsPolicy }} + dnsPolicy: {{ .Values.dnsPolicy | quote }} + {{- end }} + {{- if .Values.dnsConfig }} + dnsConfig: {{- include "graphscope-interactive.tplvalues.render" (dict "value" .Values.dnsConfig "context" $) | nindent 8 }} + {{- end }} + {{- if .Values.tolerations }} + tolerations: {{- include "graphscope-interactive.tplvalues.render" (dict "value" .Values.tolerations "context" $) | nindent 8 }} + {{- end }} + serviceAccountName: {{ include "graphscope-interactive.serviceAccountName" . }} + {{- if .Values.secondary.affinity }} + affinity: {{- include "graphscope-interactive.tplvalues.render" (dict "value" .Values.secondary.affinity "context" $) | nindent 8 }} + {{- end }} + initContainers: + {{- if .Values.secondary.initContainers }} + {{- include "graphscope-interactive.tplvalues.render" (dict "value" .Values.secondary.initContainers "context" $) | nindent 8 }} + {{- end }} + containers: + - name: secondary + image: {{ include "graphscope-interactive.secondary.image" . }} + imagePullPolicy: {{ .Values.secondary.image.pullPolicy | quote }} + #command: ["sleep", "infinity"] + command: + - /bin/bash + - -c + - | + POD_NAME=$MY_POD_NAME + if [ -z "$POD_NAME" ]; then + POD_NAME=$(hostname) + fi + echo "POD_NAME: $POD_NAME" + sudo chown -R graphscope:graphscope $INTERACTIVE_WORKSPACE + cmd="/opt/flex/bin/entrypoint.sh -t engine -w $INTERACTIVE_WORKSPACE --parallelism $ENGINE_SHARD_NUM" + echo "CMD: $cmd" + eval $cmd + # sleep infinity + env: + - name: INTERACTIVE_WORKSPACE + value: {{ .Values.workspace | quote }} + - name: primary_SERVICE_HOST + value: {{ $primaryFullName }}.{{ $releaseNamespace }}.svc.{{ $clusterDomain }} + - name: ENGINE_CONFIG_PATH + value: {{ include "graphscope-interactive.engineConfigPath" . }} + - name: REAL_ENGINE_CONFIG_PATH + value: {{ include "graphscope-interactive.realEngineConfigPath" . }} + - name: SECONDARY_QUERY_PORT + value: {{ .Values.secondary.service.queryPort | quote }} + - name: ENGINE_BINARY_PATH + value: {{ include "graphscope-interactive.engineBinaryPath" . }} + - name: ENGINE_SHARD_NUM + value: {{ .Values.secondary.threadNumPerWorker | quote }} + - name: BULK_LOADER_BINARY_PATH + value: /opt/flex/bin/bulk_loader + - name: DEFAULT_GRAPH_NAME + value: {{ .Values.defaultGraph }} + - name: ODPS_ACCESS_ID + value: {{ .Values.odps.access.id | quote}} + - name: ODPS_ACCESS_KEY + value: {{ .Values.odps.access.key | quote}} + - name: ODPS_ENDPOINT + value: {{ .Values.odps.endpoint | quote}} + ports: + - name: admin-port + containerPort: {{ .Values.secondary.service.adminPort }} + - name: query-port + containerPort: {{ .Values.secondary.service.queryPort }} + {{- if .Values.secondary.resources }} + resources: {{- toYaml .Values.secondary.resources | nindent 12 }} + {{- end }} + volumeMounts: + - name: workspace + mountPath: {{ .Values.workspace }} + - name: config + mountPath: {{ include "graphscope-interactive.engineConfigPath" . }} + subPath: engine_config.yaml + # - name: config + # mountPath: /etc/interactive/setup.sh + # subPath: setup.sh + volumes: + - name: config + configMap: + name: {{ include "graphscope-interactive.configmapName" . }} + defaultMode: 0755 + {{- if and .Values.secondary.persistence.enabled .Values.secondary.persistence.existingClaim }} + - name: workspace + persistentVolumeClaim: + claimName: {{ tpl .Values.secondary.persistence.existingClaim . }} + {{- else if not .Values.secondary.persistence.enabled }} + - name: workspace + emptyDir: {} + {{- else if and .Values.secondary.persistence.enabled (not .Values.secondary.persistence.existingClaim) }} + volumeClaimTemplates: + - metadata: + name: workspace + {{- if .Values.persistence.annotations }} + annotations: {{- include "graphscope-interactive.tplvalues.render" (dict "value" .Values.persistence.annotations "context" $) | nindent 10 }} + {{- end }} + {{- if .Values.persistence.labels }} + labels: {{- include "graphscope-interactive.tplvalues.render" (dict "value" .Values.persistence.labels "context" $) | nindent 10 }} + {{- end }} + spec: + accessModes: + {{- range .Values.persistence.accessModes }} + - {{ . | quote }} + {{- end }} + resources: + requests: + storage: {{ .Values.secondary.persistence.size | quote }} + {{ include "graphscope-interactive.storageClass" . | nindent 8 }} + {{- if .Values.secondary.persistence.selector }} + selector: {{- include "graphscope-interactive.tplvalues.render" (dict "value" .Values.secondary.persistence.selector "context" $) | nindent 10 }} + {{- end -}} + {{- end }} diff --git a/charts/graphscope-interactive/templates/secondary/svc.yaml b/charts/graphscope-interactive/templates/secondary/svc.yaml new file mode 100644 index 000000000000..50fb13d53e50 --- /dev/null +++ b/charts/graphscope-interactive/templates/secondary/svc.yaml @@ -0,0 +1,37 @@ +apiVersion: v1 +kind: Service +metadata: + name: {{ include "graphscope-interactive.secondary.fullname" . }} + namespace: {{ .Release.Namespace }} + labels: {{- include "graphscope-interactive.labels" . | nindent 4 }} + app.kubernetes.io/component: secondary + {{- if .Values.commonLabels }} + {{- include "graphscope-interactive.tplvalues.render" ( dict "value" .Values.commonLabels "context" $ ) | nindent 4 }} + {{- end }} + annotations: + {{- if .Values.commonAnnotations }} + {{- include "graphscope-interactive.tplvalues.render" ( dict "value" .Values.commonAnnotations "context" $ ) | nindent 4 }} + {{- end }} +spec: + type: {{ .Values.secondary.service.type }} + {{- if and (eq .Values.secondary.service.type "ClusterIP") .Values.secondary.service.clusterIP }} + clusterIP: {{ .Values.secondary.service.clusterIP }} + {{- end }} + {{- if and .Values.secondary.service.loadBalancerIP (eq .Values.secondary.service.type "LoadBalancer") }} + loadBalancerIP: {{ .Values.secondary.service.loadBalancerIP }} + externalTrafficPolicy: {{ .Values.secondary.service.externalTrafficPolicy | quote }} + {{- end }} + {{- if and (eq .Values.secondary.service.type "LoadBalancer") .Values.secondary.service.loadBalancerSourceRanges }} + loadBalancerSourceRanges: {{- toYaml .Values.secondary.service.loadBalancerSourceRanges | nindent 4 }} + {{- end }} + ports: + - name: admin-port + port: {{ .Values.secondary.service.adminPort }} + protocol: TCP + targetPort: {{ .Values.secondary.service.adminPort }} + - name: query-port + port: {{ .Values.secondary.service.queryPort }} + protocol: TCP + targetPort: {{ .Values.secondary.service.queryPort }} + selector: {{- include "graphscope-interactive.selectorLabels" . | nindent 4 }} + app.kubernetes.io/component: secondary diff --git a/charts/graphscope-interactive/values.yaml b/charts/graphscope-interactive/values.yaml index ef424b655da5..9bca775eb93f 100644 --- a/charts/graphscope-interactive/values.yaml +++ b/charts/graphscope-interactive/values.yaml @@ -18,6 +18,17 @@ commonAnnotations: {} ## commonLabels: {} +global: + #imageRegistry: registry.cn-hongkong.aliyuncs.com + storageClass: "" + +odps: + secretName: "odps-secret" + access: + id: "" + key: "" + endpoint: "" + ## javaOpts: "" @@ -30,6 +41,7 @@ workspace: "/tmp/interactive_workspace" ## default graph defaultGraph: modern_graph +nodeSelector: {} hiactorWorkerNum: 1 @@ -79,33 +91,72 @@ persistence: ## labels: {} +## Ingress configuration +ingress: + hostname: "interactive.example" + paths: + - path: / + +cronjob: + enabled: true + schedule: "* * * * *" + image: + registry: reg.docker.alibaba-inc.com + repository: "7brs/busybox" + tag: "latest" + pullPolicy: IfNotPresent + command: | + echo "Current date: $(date +%Y-%m-%d)" > /tmp/current_date.sh + /bin/sh /tmp/current_date.sh + tolerations: [] + podAnnotations: {} + podLabels: {} + +nginx: + image: + registry: reg.docker.alibaba-inc.com + repository: "7brs/interactive" + tag: "nginx-debug" + pullPolicy: Always + +openresty: + image: + registry: reg.docker.alibaba-inc.com + repository: "7brs/interactive" + tag: "openresty-debug" + pullPolicy: Always + ## GraphScope Interactive parameters ## -engine: +primary: image: - registry: registry.cn-hongkong.aliyuncs.com - repository: graphscope/interactive + #registry: registry.cn-hongkong.aliyuncs.com + registry: reg.docker.alibaba-inc.com + #repository: graphscope/interactive + repository: 7brs/interactive # Overrides the image tag whose default is the chart appVersion. - tag: "v0.0.3" + tag: "debug" ## Specify a imagePullPolicy ## Defaults to 'Always' if image tag is 'latest', else set to 'IfNotPresent' ## ref: http://kubernetes.io/docs/user-guide/images/#pre-pulling-images ## - pullPolicy: IfNotPresent + pullPolicy: Always ## Optionally specify an array of imagePullSecrets (secrets must be manually created in the namespace) ## ref: https://kubernetes.io/docs/tasks/configure-pod-container/pull-image-private-registry/ ## Example: ## pullSecrets: ## - myRegistryKeySecretName ## - pullSecrets: [ ] + pullSecrets: [] replicaCount: 1 + hangUntilSuccess: false + logLevel: INFO # Number of thread each worker will use - threadNumPerWorker: 1 + threadNumPerWorker: 64 ## updateStrategy for GraphScope Interactive statefulset ## ref: https://kubernetes.io/docs/concepts/workloads/controllers/statefulset/#update-strategies @@ -133,6 +184,7 @@ engine: ## ref: https://kubernetes.io/docs/concepts/configuration/assign-pod-node/#affinity-and-anti-affinity ## Note: podAffinityPreset, podAntiAffinityPreset, and nodeAffinityPreset will be ignored when it's set ## + affinity: {} # affinity: # nodeAffinity: # requiredDuringSchedulingIgnoredDuringExecution: @@ -148,6 +200,10 @@ engine: ## nodeSelector: {} + hostAliases: {} + + hostIPC: false + ## Tolerations for GraphScope Interactive pods assignment ## ref: https://kubernetes.io/docs/concepts/configuration/taint-and-toleration/ ## @@ -170,7 +226,13 @@ engine: ## GraphScope Interactive container's resource requests and limits ## ref: http://kubernetes.io/docs/user-guide/compute-resources/ ## - resources: {} + resources: + limits: + cpu: 64000m + memory: 32Gi + requests: + cpu: 64000m + memory: 32Gi # We usually recommend not to specify default resources and to leave this as a conscious # choice for the user. This also increases chances charts run on environments with little # resources, such as Minikube. If you do want to specify resources, uncomment the following @@ -210,8 +272,8 @@ engine: ## Name of existing PVC to hold GraphScope Interactive data ## NOTE: When it's set the rest of persistence parameters are ignored ## - existingClaim: "graphscope-interactive-pvc" - #existingClaim: "" + # existingClaim: "graphscope-interactive-pvc" + existingClaim: "" ## Persistent Volume Storage Class ## If defined, storageClassName: @@ -245,10 +307,14 @@ engine: service: ## Service type ## - type: ClusterIP + type: NodePort ## Service port ## - servicePort: 55557 + ports: + - name: query_port + port: 10000 + targetPort: 10000 + protocol: TCP queryPort: 10000 @@ -296,23 +362,269 @@ engine: ## # maxUnavailable: 1 - ## GraphScope Interactive pod label. If labels are same as commonLabels , this will take precedence. + # ## GraphScope Interactive pod label. If labels are same as commonLabels , this will take precedence. + # ## + podLabels: {} + + +## GraphScope Interactive parameters +## +secondary: + image: + #registry: registry.cn-hongkong.aliyuncs.com + registry: reg.docker.alibaba-inc.com + #repository: graphscope/interactive + repository: 7brs/interactive + # Overrides the image tag whose default is the chart appVersion. + tag: "debug" + ## Specify a imagePullPolicy + ## Defaults to 'Always' if image tag is 'latest', else set to 'IfNotPresent' + ## ref: http://kubernetes.io/docs/user-guide/images/#pre-pulling-images + ## + pullPolicy: Always + ## Optionally specify an array of imagePullSecrets (secrets must be manually created in the namespace) + ## ref: https://kubernetes.io/docs/tasks/configure-pod-container/pull-image-private-registry/ + ## Example: + ## pullSecrets: + ## - myRegistryKeySecretName + ## + pullSecrets: [] + + replicaCount: 2 + + logLevel: INFO + + # Number of thread each worker will use + threadNumPerWorker: 64 + + ## updateStrategy for GraphScope Interactive statefulset + ## ref: https://kubernetes.io/docs/concepts/workloads/controllers/statefulset/#update-strategies + ## + updateStrategy: RollingUpdate + + ## GraphScope Interactive pod annotations + ## ref: https://kubernetes.io/docs/concepts/overview/working-with-objects/annotations/ + ## + podAnnotations: {} + + ## GraphScope Interactive pod affinity preset + ## ref: https://kubernetes.io/docs/concepts/scheduling-eviction/assign-pod-node/#inter-pod-affinity-and-anti-affinity + ## Allowed values: soft, hard + ## + podAffinityPreset: "" + + ## GraphScope Interactive pod anti-affinity preset + ## ref: https://kubernetes.io/docs/concepts/scheduling-eviction/assign-pod-node/#inter-pod-affinity-and-anti-affinity + ## Allowed values: soft, hard + ## + podAntiAffinityPreset: soft + + ## Affinity for GraphScope Interactive pods assignment + ## ref: https://kubernetes.io/docs/concepts/configuration/assign-pod-node/#affinity-and-anti-affinity + ## Note: podAffinityPreset, podAntiAffinityPreset, and nodeAffinityPreset will be ignored when it's set + ## + affinity: {} + # affinity: + # nodeAffinity: + # requiredDuringSchedulingIgnoredDuringExecution: + # nodeSelectorTerms: + # - matchExpressions: + # - key: app + # operator: In + # values: + # - interactive_single_node + + ## Node labels for GraphScope Interactive pods assignment + ## ref: https://kubernetes.io/docs/user-guide/node-selection/ + ## + nodeSelector: {} + + hostAliases: {} + + hostIPC: false + + ## Tolerations for GraphScope Interactive pods assignment + ## ref: https://kubernetes.io/docs/concepts/configuration/taint-and-toleration/ + ## + tolerations: [] + + ## GraphScope Interactive Pod security context + ## ref: https://kubernetes.io/docs/tasks/configure-pod-container/security-context/#set-the-security-context-for-a-pod + ## + podSecurityContext: + enabled: false + fsGroup: 1001 + + ## GraphScope Interactive container security context + ## ref: https://kubernetes.io/docs/tasks/configure-pod-container/security-context/#set-the-security-context-for-a-container + ## + containerSecurityContext: + enabled: false + runAsUser: 1001 + + ## GraphScope Interactive container's resource requests and limits + ## ref: http://kubernetes.io/docs/user-guide/compute-resources/ + ## + resources: + limits: + cpu: 320000m + #memory: 400Gi + memory: 150Gi + requests: + cpu: 32000m + #memory: 300Gi + memory: 150Gi + # We usually recommend not to specify default resources and to leave this as a conscious + # choice for the user. This also increases chances charts run on environments with little + # resources, such as Minikube. If you do want to specify resources, uncomment the following + # lines, adjust them as necessary, and remove the curly braces after 'resources:'. + # limits: + # cpu: 100m + # memory: 128Mi + # requests: + # cpu: 100m + # memory: 128Mi + + ## GraphScope Interactive container's liveness and readiness probes + ## ref: https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle/#container-probes + ## + livenessProbe: + enabled: false + initialDelaySeconds: 120 + periodSeconds: 10 + timeoutSeconds: 1 + failureThreshold: 3 + successThreshold: 1 + readinessProbe: + enabled: false + initialDelaySeconds: 30 + periodSeconds: 10 + timeoutSeconds: 1 + failureThreshold: 3 + successThreshold: 1 + + ## Enable persistence using Persistent Volume Claims + ## ref: http://kubernetes.io/docs/user-guide/persistent-volumes/ + ## + persistence: + ## If true, use a Persistent Volume Claim, If false, use emptyDir + ## + enabled: true + ## Name of existing PVC to hold GraphScope Interactive data + ## NOTE: When it's set the rest of persistence parameters are ignored + ## + # existingClaim: "graphscope-interactive-pvc" + existingClaim: "" + + ## Persistent Volume Storage Class + ## If defined, storageClassName: + ## If set to "-", storageClassName: "", which disables dynamic provisioning + ## If undefined (the default) or set to null, no storageClassName spec is + ## set, choosing the default provisioner. (gp2 on AWS, standard on + ## GKE, AWS & OpenStack) + ## + # storageClass: "manual" + ## Persistent Volume Claim annotations + ## + annotations: {} + ## Persistent Volume Access Mode + ## + accessModes: + - ReadWriteOnce # read and write by a single node. + ## Persistent Volume size + ## + # size: 300Gi + size: 150Gi + ## selector can be used to match an existing PersistentVolume + ## selector: + ## matchLabels: + ## app: my-app + ## + selector: {} + + initContainers: [] + + ## GraphScope interactive Service parameters + ## + service: + ## Service type + ## + #type: NodePort + type: ClusterIP + # type: LoadBalancer + ## Service port + ## + ports: + - name: query_port + port: 10000 + targetPort: 10000 + protocol: TCP + + queryPort: 10000 + + adminPort: 7777 + + ## Specify the nodePort value for the LoadBalancer and NodePort service types. + ## ref: https://kubernetes.io/docs/concepts/services-networking/service/#type-nodeport + ## + nodePorts: + service: "" + query: "" + admin: "" + ## Service clusterIP + ## + clusterIP: None + #clusterIP: "" + ## Set the LoadBalancer service type to internal only. + ## ref: https://kubernetes.io/docs/concepts/services-networking/service/#internal-load-balancer + ## + loadBalancerIP: "" + ## Enable client source IP preservation + ## ref http://kubernetes.io/docs/tasks/access-application-cluster/create-external-load-balancer/#preserving-the-client-source-ip + ## + externalTrafficPolicy: Cluster + ## Load Balancer sources + ## https://kubernetes.io/docs/tasks/access-application-cluster/configure-cloud-provider-firewall/#restrict-access-for-loadbalancer-service + ## E.g. + ## loadBalancerSourceRanges: + ## - 10.10.10.0/24 + ## + loadBalancerSourceRanges: [] + ## Provide any additional annotations which may be required + ## + annotations: {} + + ## GraphScope Interactive Pod Disruption Budget configuration + ## ref: https://kubernetes.io/docs/tasks/run-application/configure-pdb/ ## + pdb: + enabled: false + ## Min number of pods that must still be available after the eviction + ## + minAvailable: 1 + ## Max number of pods that can be unavailable after the eviction + ## + # maxUnavailable: 1 + + # ## GraphScope Interactive pod label. If labels are same as commonLabels , this will take precedence. + # ## podLabels: {} ## GraphScope Frontend parameters ## frontend: image: - registry: registry.cn-hongkong.aliyuncs.com - repository: graphscope/interactive + #registry: registry.cn-hongkong.aliyuncs.com + registry: reg.docker.alibaba-inc.com + #repository: graphscope/interactive + repository: 7brs/interactive # Overrides the image tag whose default is the chart appVersion. - tag: "v0.0.3" + tag: "debug" ## Specify a imagePullPolicy ## Defaults to 'Always' if image tag is 'latest', else set to 'IfNotPresent' ## ref: http://kubernetes.io/docs/user-guide/images/#pre-pulling-images ## - pullPolicy: IfNotPresent + pullPolicy: Always ## Optionally specify an array of imagePullSecrets (secrets must be manually created in the namespace) ## ref: https://kubernetes.io/docs/tasks/configure-pod-container/pull-image-private-registry/ ## Example: @@ -349,7 +661,7 @@ frontend: ## ref: https://kubernetes.io/docs/concepts/configuration/assign-pod-node/#affinity-and-anti-affinity ## Note: podAffinityPreset, podAntiAffinityPreset, and nodeAffinityPreset will be ignored when it's set ## - ## affinity: {} + affinity: {} # affinity: # nodeAffinity: # requiredDuringSchedulingIgnoredDuringExecution: @@ -365,15 +677,25 @@ frontend: ## nodeSelector: {} + hostIPC: false + + hostAliases: {} + ## Tolerations for GraphScope Interactive pods assignment ## ref: https://kubernetes.io/docs/concepts/configuration/taint-and-toleration/ ## - tolerations: [] + # tolerations: [] ## GraphScope Interactive container's resource requests and limits ## ref: http://kubernetes.io/docs/user-guide/compute-resources/ ## - resources: {} + resources: + limits: + cpu: 1000m + memory: 1Gi + requests: + cpu: 1000m + memory: 1Gi # We usually recommend not to specify default resources and to leave this as a conscious # choice for the user. This also increases chances charts run on environments with little # resources, such as Minikube. If you do want to specify resources, uncomment the following @@ -410,10 +732,7 @@ frontend: service: ## Service type ## - type: LoadBalancer - ## Service port - ## - servicePort: 55556 + type: ClusterIP ## Gremlin console port ## @@ -471,5 +790,3 @@ frontend: ## podLabels: {} -global: - storageClass: "" diff --git a/docs/flex/interactive/data_import.md b/docs/flex/interactive/data_import.md index 704fa55f610f..9dba375cbc8b 100644 --- a/docs/flex/interactive/data_import.md +++ b/docs/flex/interactive/data_import.md @@ -34,6 +34,10 @@ To illustrate, let's examine the `examples/modern_import_full.yaml` file. This c ``` yaml loading_config: + x_csr_params: + parallelism: 1 + build_csr_in_mem: true + use_mmap_vector: true data_source: scheme: file location: /home/modern_graph/ @@ -227,6 +231,9 @@ The table below offers a detailed breakdown of each configuration item. In this | loading_config.format.metadata.escaping | false | Whether escaping is used | No | | loading_config.format.metadata.escape_char | '\\' | Escaping character (if `escaping` is true) | No | | loading_config.format.metadata.batch_size | 4MB | The size of batch for reading from files | No | +| loading_config.x_csr_params.parallelism | 1 | Number of threads used for bulk loading | No | +| loading_config.x_csr_params.build_csr_in_mem | false | Whether to build csr fully in memory | No | +| loading_config.x_csr_params.use_mmap_vector | false | Whether to use mmap_vector rather than mmap_array for building | No | | | | | | | **vertex_mappings** | N/A | Define how to map the raw data into a graph vertex in the schema | Yes | | vertex_mappings.type_name | N/A | Name of the vertex type | Yes | diff --git a/flex/CMakeLists.txt b/flex/CMakeLists.txt index 36f4228931d5..10ca5dae3d04 100644 --- a/flex/CMakeLists.txt +++ b/flex/CMakeLists.txt @@ -15,6 +15,7 @@ option(BUILD_TEST "Whether to build test" ON) option(BUILD_DOC "Whether to build doc" OFF) option(BUILD_ODPS_FRAGMENT_LOADER "Whether to build odps fragment loader" OFF) option(USE_PTHASH "Whether to use pthash" OFF) +option(BUILD_PROXY "Whether to build proxy" ON) #print options message(STATUS "Build HighQPS Engine: ${BUILD_HQPS}") diff --git a/flex/bin/CMakeLists.txt b/flex/bin/CMakeLists.txt index cb3b1a271188..e3ab4b8ee009 100644 --- a/flex/bin/CMakeLists.txt +++ b/flex/bin/CMakeLists.txt @@ -32,6 +32,12 @@ if(BUILD_HQPS) install(PROGRAMS load_plan_and_gen.sh DESTINATION bin) endif() +if (BUILD_PROXY) + add_executable(proxy_server proxy_server.cc) + target_link_libraries(proxy_server flex_utils flex_server ${GLOG_LIBRARIES} ${GFLAGS_LIBRARIES}) + install_without_export_flex_target(proxy_server) +endif() + include_directories(${Boost_INCLUDE_DIRS}) add_executable(bulk_loader bulk_loader.cc) target_link_libraries(bulk_loader flex_rt_mutable_graph flex_utils ${GLOG_LIBRARIES} ${GFLAGS_LIBRARIES} ${Boost_LIBRARIES}) diff --git a/flex/bin/bulk_loader.cc b/flex/bin/bulk_loader.cc index d462b5e97440..68450667e935 100644 --- a/flex/bin/bulk_loader.cc +++ b/flex/bin/bulk_loader.cc @@ -37,10 +37,10 @@ void signal_handler(int signal) { << ",Clearing directory: " << work_dir << ", exiting..."; // remove all files in work_dir std::filesystem::remove_all(work_dir); - exit(0); + exit(signal); } else { LOG(ERROR) << "Received unexpected signal " << signal << ", exiting..."; - exit(1); + exit(signal); } } @@ -64,8 +64,7 @@ int main(int argc, char** argv) { * */ desc.add_options()("help", "Display help message")( - "version,v", "Display version")("parallelism,p", - bpo::value()->default_value(1), + "version,v", "Display version")("parallelism,p", bpo::value(), "parallelism of bulk loader")( "data-path,d", bpo::value(), "data directory path")( "graph-config,g", bpo::value(), "graph schema config file")( @@ -90,7 +89,6 @@ int main(int argc, char** argv) { return 0; } - uint32_t parallelism = vm["parallelism"].as(); std::string data_path = ""; std::string bulk_load_config_path = ""; std::string graph_schema_path = ""; @@ -110,17 +108,6 @@ int main(int argc, char** argv) { return -1; } bulk_load_config_path = vm["bulk-load"].as(); - bool build_csr_in_mem = false; - if (vm.count("build-csr-in-mem")) { - build_csr_in_mem = vm["build-csr-in-mem"].as(); - LOG(INFO) << "batch init in memory: " << static_cast(build_csr_in_mem); - } - - bool use_mmap_vector = false; - if (vm.count("use-mmap-vector")) { - use_mmap_vector = vm["use-mmap-vector"].as(); - LOG(INFO) << "use mmap vector: " << static_cast(use_mmap_vector); - } setenv("TZ", "Asia/Shanghai", 1); tzset(); @@ -141,6 +128,19 @@ int main(int argc, char** argv) { return -1; } + // check whether parallelism, build_csr_in_mem, use_mmap_vector are overriden + if (vm.count("parallelism")) { + loading_config_res.value().SetParallelism(vm["parallelism"].as()); + } + if (vm.count("build-csr-in-mem")) { + loading_config_res.value().SetBuildCsrInMem( + vm["build-csr-in-mem"].as()); + } + if (vm.count("use-mmap-vector")) { + loading_config_res.value().SetUseMmapVector( + vm["use-mmap-vector"].as()); + } + std::filesystem::path data_dir_path(data_path); if (!std::filesystem::exists(data_dir_path)) { std::filesystem::create_directory(data_dir_path); @@ -163,8 +163,7 @@ int main(int argc, char** argv) { std::signal(SIGABRT, signal_handler); auto loader = gs::LoaderFactory::CreateFragmentLoader( - data_dir_path.string(), schema_res.value(), loading_config_res.value(), - parallelism, build_csr_in_mem, use_mmap_vector); + data_dir_path.string(), schema_res.value(), loading_config_res.value()); loader->LoadFragment(); t += grape::GetCurrentTime(); diff --git a/flex/bin/proxy_server.cc b/flex/bin/proxy_server.cc new file mode 100644 index 000000000000..e83197159394 --- /dev/null +++ b/flex/bin/proxy_server.cc @@ -0,0 +1,135 @@ +/** Copyright 2020 Alibaba Group Holding Limited. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include "stdlib.h" + +#include "flex/engines/hqps_db/core/utils/hqps_utils.h" +#include "flex/engines/http_server/service/proxy_service.h" +#include "flex/utils/service_utils.h" + +#include + +#include + +namespace bpo = boost::program_options; + +namespace gs { +// Function to parse endpoints from a string +bool parse_endpoints(const std::string& input_string, + std::vector>& endpoints) { + std::istringstream iss(input_string); + std::string endpoint; + + while (std::getline(iss, endpoint, ',')) { + // Split the endpoint into host and port using ':' + size_t delimiter_pos = endpoint.find(':'); + if (delimiter_pos == std::string::npos) { + std::cerr << "Invalid endpoint: " << endpoint << ", missing delimiter ':'" + << std::endl; + continue; + } + + std::string host = endpoint.substr(0, delimiter_pos); + std::string port_str = endpoint.substr(delimiter_pos + 1); + uint16_t port; + try { + port = std::stoull(port_str); + } catch (const std::invalid_argument& e) { + LOG(ERROR) << "Invalid port: " << port_str << ", must be a number" + << std::endl; + return false; + } + + // Check for valid port range + if (port < 1 || port > 65535) { + LOG(ERROR) << "Invalid port: " << port << ", must be between 1 and 65535" + << std::endl; + return false; + } + endpoints.push_back({host, port}); + } + return true; +} +} // namespace gs + +/** + * The main entrance for ProxyServer. + * The ProxyServer will block if one request is not executed by the server. + */ +int main(int argc, char** argv) { + bpo::options_description desc("Usage:"); + desc.add_options()("help,h", "Display help messages")( + "endpoints,e", bpo::value()->required(), + "The endpoints of the proxy server, e.g., {ip}:{port},{ip}:{port},...")( + "heartbeat-interval,i", bpo::value()->default_value(1), + "The interval of heartbeat check in seconds")( + "enable-heartbeat-check", bpo::value()->default_value(false), + "Enable heartbeat check or not")( + "port,p", bpo::value()->default_value(9999), + "The port of the proxy server")( + "hang-until-success", bpo::value()->default_value(true), + "Hang until the request is successfully forwarded")( + "parallelism", bpo::value()->default_value(1), + "The number of threads to handle requests"); + + setenv("TZ", "Asia/Shanghai", 1); + tzset(); + + bpo::variables_map vm; + bpo::store(bpo::command_line_parser(argc, argv).options(desc).run(), vm); + bpo::notify(vm); + + if (vm.count("help")) { + std::cout << desc << std::endl; + return 0; + } + + if (!vm.count("endpoints")) { + LOG(FATAL) << "endpoints is not specified"; + return 0; + } + std::vector> endpoints; + if (!gs::parse_endpoints(vm["endpoints"].as(), endpoints)) { + LOG(FATAL) << "Failed to parse endpoints"; + return 0; + } + + LOG(INFO) << "got endpoints of size: " << endpoints.size() + << ", :" << gs::to_string(endpoints); + + uint32_t shard_num = 1; + uint16_t http_port = 9999; + if (vm.count("port")) { + http_port = vm["port"].as(); + } + if (vm.count("parallelism")) { + shard_num = vm["parallelism"].as(); + } + + if (!server::ProxyService::get() + .init(shard_num, http_port, endpoints, + vm["enable-heartbeat-check"].as(), + vm["heartbeat-interval"].as(), + vm["hang-until-success"].as()) + .ok()) { + LOG(FATAL) << "Failed to init ProxyService"; + return 0; + } + server::ProxyService::get().run_and_wait_for_exit(); + + return 0; +} diff --git a/flex/engines/graph_db/database/graph_db.cc b/flex/engines/graph_db/database/graph_db.cc index e761a83c3551..4a8c142b0a97 100644 --- a/flex/engines/graph_db/database/graph_db.cc +++ b/flex/engines/graph_db/database/graph_db.cc @@ -42,7 +42,14 @@ struct SessionLocalContext { char _padding2[4096 - sizeof(GraphDBSession) % 4096]; }; -GraphDB::GraphDB() = default; +GraphDB::GraphDB() + : work_dir_(""), + contexts_(nullptr), + thread_num_(0), + monitor_thread_running_(false), + last_compaction_ts_(0), + compact_thread_running_(false) {} + GraphDB::~GraphDB() { if (compact_thread_running_) { compact_thread_running_ = false; @@ -78,6 +85,7 @@ Result GraphDB::Open(const Schema& schema, const std::string& data_dir, } Result GraphDB::Open(const GraphDBConfig& config) { + graph_ = std::make_unique(); const std::string& data_dir = config.data_dir; const Schema& schema = config.schema; if (!std::filesystem::exists(data_dir)) { @@ -88,19 +96,19 @@ Result GraphDB::Open(const GraphDBConfig& config) { bool create_empty_graph = false; if (!std::filesystem::exists(schema_file)) { create_empty_graph = true; - graph_.mutable_schema() = schema; + graph_->mutable_schema() = schema; } work_dir_ = data_dir; thread_num_ = config.thread_num; try { - graph_.Open(data_dir, config.memory_level); + graph_->Open(data_dir, config.memory_level); } catch (std::exception& e) { LOG(ERROR) << "Exception: " << e.what(); return Result(StatusCode::InternalError, "Exception: " + std::string(e.what()), false); } - if ((!create_empty_graph) && (!graph_.schema().Equals(schema))) { + if ((!create_empty_graph) && (!graph_->schema().Equals(schema))) { LOG(ERROR) << "Schema inconsistent..\n"; return Result(StatusCode::InternalError, "Schema of work directory is not compatible with the " @@ -109,7 +117,7 @@ Result GraphDB::Open(const GraphDBConfig& config) { } // Set the plugin info from schema to graph_.schema(), since the plugin info // is not serialized and deserialized. - auto& mutable_schema = graph_.mutable_schema(); + auto& mutable_schema = graph_->mutable_schema(); mutable_schema.SetPluginDir(schema.GetPluginDir()); std::vector> plugin_name_paths; const auto& plugins = schema.GetPlugins(); @@ -136,7 +144,7 @@ Result GraphDB::Open(const GraphDBConfig& config) { openWalAndCreateContexts(data_dir, allocator_strategy); if ((!create_empty_graph) && config.warmup) { - graph_.Warmup(thread_num_); + graph_->Warmup(thread_num_); } if (config.enable_monitoring) { @@ -219,8 +227,9 @@ Result GraphDB::Open(const GraphDBConfig& config) { VLOG(10) << "Trigger auto compaction"; last_compaction_at = query_num_after; timestamp_t ts = this->version_manager_.acquire_update_timestamp(); - auto txn = CompactTransaction(this->graph_, this->contexts_[0].logger, - this->version_manager_, ts); + auto txn = + CompactTransaction(*this->graph_, this->contexts_[0].logger, + this->version_manager_, ts); txn.Commit(); VLOG(10) << "Finish compaction"; } @@ -231,18 +240,54 @@ Result GraphDB::Open(const GraphDBConfig& config) { return Result(true); } +void GraphDB::Swap(GraphDB& other) { + std::swap(work_dir_, other.work_dir_); + std::swap(contexts_, other.contexts_); + // NOTE: the graph db has changed, so the session should be updated. + for (int i = 0; i < thread_num_; ++i) { + contexts_[i].session.set_db(*this); + } + std::swap(thread_num_, other.thread_num_); + + std::swap(graph_, other.graph_); + version_manager_.swap(other.version_manager_); + + std::swap(app_paths_, other.app_paths_); + std::swap(app_factories_, other.app_factories_); + std::swap(monitor_thread_running_, other.monitor_thread_running_); + std::swap(monitor_thread_, other.monitor_thread_); + std::swap(last_compaction_ts_, other.last_compaction_ts_); + std::swap(compact_thread_running_, other.compact_thread_running_); + std::swap(compact_thread_, other.compact_thread_); + + // std::this_thread::sleep_for(std::chrono::seconds(10)); + auto plugins = graph_->schema().GetPlugins(); + for (auto plugin : graph_->schema().GetPlugins()) { + LOG(INFO) << "plugin: " << plugin.first << ", " << plugin.second.first + << ", " << plugin.second.second; + } +} + void GraphDB::Close() { + LOG(INFO) << "closing: " << static_cast(monitor_thread_running_); if (monitor_thread_running_) { monitor_thread_running_ = false; monitor_thread_.join(); } + LOG(INFO) << "Close monitor thread: " + << static_cast(monitor_thread_running_); if (compact_thread_running_) { compact_thread_running_ = false; compact_thread_.join(); } + LOG(INFO) << "Close compact thread"; //-----------Clear graph_db---------------- - graph_.Clear(); + if (graph_) { + graph_->Clear(); + } + LOG(INFO) << "Clear graph db"; version_manager_.clear(); + LOG(INFO) << "Clear version manager"; if (contexts_ != nullptr) { for (int i = 0; i < thread_num_; ++i) { contexts_[i].~SessionLocalContext(); @@ -250,13 +295,14 @@ void GraphDB::Close() { free(contexts_); contexts_ = nullptr; } + LOG(INFO) << "Clear contexts"; std::fill(app_paths_.begin(), app_paths_.end(), ""); std::fill(app_factories_.begin(), app_factories_.end(), nullptr); } ReadTransaction GraphDB::GetReadTransaction() { uint32_t ts = version_manager_.acquire_read_timestamp(); - return {graph_, version_manager_, ts}; + return {*graph_, version_manager_, ts}; } InsertTransaction GraphDB::GetInsertTransaction(int thread_id) { @@ -294,17 +340,23 @@ timestamp_t GraphDB::GetLastCompactionTimestamp() const { return last_compaction_ts_; } -const MutablePropertyFragment& GraphDB::graph() const { return graph_; } -MutablePropertyFragment& GraphDB::graph() { return graph_; } +const MutablePropertyFragment& GraphDB::graph() const { return *graph_; } +MutablePropertyFragment& GraphDB::graph() { return *graph_; } -const Schema& GraphDB::schema() const { return graph_.schema(); } +const Schema& GraphDB::schema() const { return graph_->schema(); } std::shared_ptr GraphDB::get_vertex_property_column( uint8_t label, const std::string& col_name) const { - return graph_.get_vertex_table(label).get_column(col_name); + return graph_->get_vertex_table(label).get_column(col_name); } AppWrapper GraphDB::CreateApp(uint8_t app_type, int thread_id) { + for (size_t i = 0; i < app_factories_.size(); ++i) { + if (app_factories_[i] != nullptr) { + LOG(INFO) << "App factory: " << i << ", " << app_paths_[i] << ", " + << app_factories_[i]; + } + } if (app_factories_[app_type] == nullptr) { LOG(ERROR) << "Stored procedure " << static_cast(app_type) << " is not registered."; @@ -376,19 +428,19 @@ void GraphDB::ingestWals(const std::vector& wals, for (auto& update_wal : parser.update_wals()) { uint32_t to_ts = update_wal.timestamp; if (from_ts < to_ts) { - IngestWalRange(contexts_, graph_, parser, from_ts, to_ts, thread_num); + IngestWalRange(contexts_, *graph_, parser, from_ts, to_ts, thread_num); } if (update_wal.size == 0) { - graph_.Compact(update_wal.timestamp); + graph_->Compact(update_wal.timestamp); last_compaction_ts_ = update_wal.timestamp; } else { - UpdateTransaction::IngestWal(graph_, work_dir, to_ts, update_wal.ptr, + UpdateTransaction::IngestWal(*graph_, work_dir, to_ts, update_wal.ptr, update_wal.size, contexts_[0].allocator); } from_ts = to_ts + 1; } if (from_ts <= parser.last_ts()) { - IngestWalRange(contexts_, graph_, parser, from_ts, parser.last_ts() + 1, + IngestWalRange(contexts_, *graph_, parser, from_ts, parser.last_ts() + 1, thread_num); } version_manager_.init_ts(parser.last_ts(), thread_num); @@ -448,7 +500,7 @@ void GraphDB::openWalAndCreateContexts(const std::string& data_dir, contexts_[i].logger.open(wal_dir_path, i); } - initApps(graph_.schema().GetPlugins()); + initApps(graph_->schema().GetPlugins()); VLOG(1) << "Successfully restore load plugins"; } diff --git a/flex/engines/graph_db/database/graph_db.h b/flex/engines/graph_db/database/graph_db.h index 502710abd012..e377fcf74bcb 100644 --- a/flex/engines/graph_db/database/graph_db.h +++ b/flex/engines/graph_db/database/graph_db.h @@ -89,6 +89,8 @@ class GraphDB { Result Open(const GraphDBConfig& config); + void Swap(GraphDB& new_db); + /** * @brief Close the current opened graph. */ @@ -149,6 +151,19 @@ class GraphDB { void UpdateCompactionTimestamp(timestamp_t ts); timestamp_t GetLastCompactionTimestamp() const; + inline int ThreadNum() const { return thread_num_; } + + inline VersionManager& version_manager() { return version_manager_; } + + inline void printAppFactory() { + for (size_t i = 0; i < app_factories_.size(); ++i) { + if (app_factories_[i] != nullptr) { + LOG(INFO) << "App factory: " << i << ", " << app_paths_[i] << ", " + << app_factories_[i]; + } + } + } + private: bool registerApp(const std::string& path, uint8_t index = 0); @@ -173,7 +188,7 @@ class GraphDB { int thread_num_; - MutablePropertyFragment graph_; + std::unique_ptr graph_; VersionManager version_manager_; std::array app_paths_; diff --git a/flex/engines/graph_db/database/graph_db_session.cc b/flex/engines/graph_db/database/graph_db_session.cc index 979b14ec55a4..f1e15b3fb00b 100644 --- a/flex/engines/graph_db/database/graph_db_session.cc +++ b/flex/engines/graph_db/database/graph_db_session.cc @@ -27,34 +27,36 @@ namespace gs { +void GraphDBSession::set_db(GraphDB& db) { db_ = db; } + ReadTransaction GraphDBSession::GetReadTransaction() const { - uint32_t ts = db_.version_manager_.acquire_read_timestamp(); - return ReadTransaction(db_.graph_, db_.version_manager_, ts); + uint32_t ts = db_.get().version_manager_.acquire_read_timestamp(); + return ReadTransaction(*db_.get().graph_, db_.get().version_manager_, ts); } InsertTransaction GraphDBSession::GetInsertTransaction() { - uint32_t ts = db_.version_manager_.acquire_insert_timestamp(); - return InsertTransaction(db_.graph_, alloc_, logger_, db_.version_manager_, - ts); + uint32_t ts = db_.get().version_manager_.acquire_insert_timestamp(); + return InsertTransaction(*db_.get().graph_, alloc_, logger_, + db_.get().version_manager_, ts); } SingleVertexInsertTransaction GraphDBSession::GetSingleVertexInsertTransaction() { - uint32_t ts = db_.version_manager_.acquire_insert_timestamp(); - return SingleVertexInsertTransaction(db_.graph_, alloc_, logger_, - db_.version_manager_, ts); + uint32_t ts = db_.get().version_manager_.acquire_insert_timestamp(); + return SingleVertexInsertTransaction(*db_.get().graph_, alloc_, logger_, + db_.get().version_manager_, ts); } SingleEdgeInsertTransaction GraphDBSession::GetSingleEdgeInsertTransaction() { - uint32_t ts = db_.version_manager_.acquire_insert_timestamp(); - return SingleEdgeInsertTransaction(db_.graph_, alloc_, logger_, - db_.version_manager_, ts); + uint32_t ts = db_.get().version_manager_.acquire_insert_timestamp(); + return SingleEdgeInsertTransaction(*db_.get().graph_, alloc_, logger_, + db_.get().version_manager_, ts); } UpdateTransaction GraphDBSession::GetUpdateTransaction() { - uint32_t ts = db_.version_manager_.acquire_update_timestamp(); - return UpdateTransaction(db_.graph_, alloc_, work_dir_, logger_, - db_.version_manager_, ts); + uint32_t ts = db_.get().version_manager_.acquire_update_timestamp(); + return UpdateTransaction(*db_.get().graph_, alloc_, work_dir_, logger_, + db_.get().version_manager_, ts); } bool GraphDBSession::BatchUpdate(UpdateBatch& batch) { @@ -63,46 +65,47 @@ bool GraphDBSession::BatchUpdate(UpdateBatch& batch) { } const MutablePropertyFragment& GraphDBSession::graph() const { - return db_.graph(); + return db_.get().graph(); } -const GraphDB& GraphDBSession::db() const { return db_; } +const GraphDB& GraphDBSession::db() const { return db_.get(); } -MutablePropertyFragment& GraphDBSession::graph() { return db_.graph(); } +MutablePropertyFragment& GraphDBSession::graph() { return db_.get().graph(); } -const Schema& GraphDBSession::schema() const { return db_.schema(); } +const Schema& GraphDBSession::schema() const { return db_.get().schema(); } std::shared_ptr GraphDBSession::get_vertex_property_column( uint8_t label, const std::string& col_name) const { - return db_.get_vertex_property_column(label, col_name); + return db_.get().get_vertex_property_column(label, col_name); } std::shared_ptr GraphDBSession::get_vertex_id_column( uint8_t label) const { - if (db_.graph().lf_indexers_[label].get_type() == PropertyType::kInt64) { + if (db_.get().graph().lf_indexers_[label].get_type() == + PropertyType::kInt64) { return std::make_shared>( dynamic_cast&>( - db_.graph().lf_indexers_[label].get_keys())); - } else if (db_.graph().lf_indexers_[label].get_type() == + db_.get().graph().lf_indexers_[label].get_keys())); + } else if (db_.get().graph().lf_indexers_[label].get_type() == PropertyType::kInt32) { return std::make_shared>( dynamic_cast&>( - db_.graph().lf_indexers_[label].get_keys())); - } else if (db_.graph().lf_indexers_[label].get_type() == + db_.get().graph().lf_indexers_[label].get_keys())); + } else if (db_.get().graph().lf_indexers_[label].get_type() == PropertyType::kUInt64) { return std::make_shared>( dynamic_cast&>( - db_.graph().lf_indexers_[label].get_keys())); - } else if (db_.graph().lf_indexers_[label].get_type() == + db_.get().graph().lf_indexers_[label].get_keys())); + } else if (db_.get().graph().lf_indexers_[label].get_type() == PropertyType::kUInt32) { return std::make_shared>( dynamic_cast&>( - db_.graph().lf_indexers_[label].get_keys())); - } else if (db_.graph().lf_indexers_[label].get_type() == + db_.get().graph().lf_indexers_[label].get_keys())); + } else if (db_.get().graph().lf_indexers_[label].get_type() == PropertyType::kStringView) { return std::make_shared>( dynamic_cast&>( - db_.graph().lf_indexers_[label].get_keys())); + db_.get().graph().lf_indexers_[label].get_keys())); } else { return nullptr; } @@ -174,19 +177,22 @@ Result> GraphDBSession::Eval(const std::string& input) { result_buffer); } -void GraphDBSession::GetAppInfo(Encoder& result) { db_.GetAppInfo(result); } +void GraphDBSession::GetAppInfo(Encoder& result) { + db_.get().GetAppInfo(result); +} int GraphDBSession::SessionId() const { return thread_id_; } CompactTransaction GraphDBSession::GetCompactTransaction() { - timestamp_t ts = db_.version_manager_.acquire_update_timestamp(); - return CompactTransaction(db_.graph_, logger_, db_.version_manager_, ts); + timestamp_t ts = db_.get().version_manager_.acquire_update_timestamp(); + return CompactTransaction(*db_.get().graph_, logger_, + db_.get().version_manager_, ts); } bool GraphDBSession::Compact() { auto txn = GetCompactTransaction(); - if (txn.timestamp() > db_.GetLastCompactionTimestamp() + 100000) { - db_.UpdateCompactionTimestamp(txn.timestamp()); + if (txn.timestamp() > db_.get().GetLastCompactionTimestamp() + 100000) { + db_.get().UpdateCompactionTimestamp(txn.timestamp()); txn.Commit(); return true; } else { @@ -214,7 +220,7 @@ AppBase* GraphDBSession::GetApp(int type) { if (likely(apps_[type] != nullptr)) { app = apps_[type]; } else { - app_wrappers_[type] = db_.CreateApp(type, thread_id_); + app_wrappers_[type] = db_.get().CreateApp(type, thread_id_); if (app_wrappers_[type].app() == NULL) { LOG(ERROR) << "[Query-" + std::to_string((int) type) << "] is not registered..."; @@ -229,6 +235,29 @@ AppBase* GraphDBSession::GetApp(int type) { #undef likely // likely +// bool GraphDBSession::SwapGraphData(const Schema& schema, +// const std::string& data_dir) { +// // auto update_transaction = GetUpdateTransaction(); +// LOG(INFO) << "Acquire update timestamp..."; +// auto ts = db_.get().version_manager_.acquire_update_timestamp(); +// // Use a update transaction to avoid new transaction come. +// GraphDB new_db; +// auto open_res = new_db.Open(schema, data_dir, db_.get().thread_num_); +// if (!open_res.ok()) { +// return false; +// } +// LOG(INFO) << "Successfully open new db..."; +// db_.get().Swap(new_db); +// LOG(INFO) << "Successfully swap db..."; + +// // NOW the version manager is in the new db. +// new_db.version_manager_.release_update_timestamp(ts); +// LOG(INFO) << "Successfully release update timestamp..."; +// new_db.Close(); +// LOG(INFO) << "Successfully close new db..."; +// return true; +// } + #ifdef BUILD_HQPS Result> GraphDBSession::parse_query_type_from_cypher_json( diff --git a/flex/engines/graph_db/database/graph_db_session.h b/flex/engines/graph_db/database/graph_db_session.h index 5d21f13ec28c..38662d19bc19 100644 --- a/flex/engines/graph_db/database/graph_db_session.h +++ b/flex/engines/graph_db/database/graph_db_session.h @@ -28,6 +28,8 @@ #include "flex/utils/property/column.h" #include "flex/utils/result.h" +#include + namespace gs { class GraphDB; @@ -67,6 +69,8 @@ class GraphDBSession { } ~GraphDBSession() {} + void set_db(GraphDB& db); + ReadTransaction GetReadTransaction() const; InsertTransaction GetInsertTransaction(); @@ -109,6 +113,8 @@ class GraphDBSession { AppBase* GetApp(int idx); + // bool SwapGraphData(const Schema& schema, const std::string& data_dir); + private: #ifdef BUILD_HQPS Result> @@ -183,7 +189,7 @@ class GraphDBSession { "Invalid input tag: " + std::to_string(input_tag))); } } - GraphDB& db_; + std::reference_wrapper db_; Allocator& alloc_; WalWriter& logger_; std::string work_dir_; diff --git a/flex/engines/graph_db/database/version_manager.cc b/flex/engines/graph_db/database/version_manager.cc index 9ff538b67505..8e9aa32c7938 100644 --- a/flex/engines/graph_db/database/version_manager.cc +++ b/flex/engines/graph_db/database/version_manager.cc @@ -140,6 +140,28 @@ bool VersionManager::revert_update_timestamp(uint32_t ts) { return false; } +void VersionManager::swap(VersionManager& vm) { + auto tmp = vm.write_ts_.load(); + vm.write_ts_.store(write_ts_.load()); + write_ts_.store(tmp); + + tmp = vm.read_ts_.load(); + vm.read_ts_.store(read_ts_.load()); + read_ts_.store(tmp); + + tmp = vm.pending_reqs_.load(); + vm.pending_reqs_.store(pending_reqs_.load()); + pending_reqs_.store(tmp); + + tmp = vm.pending_update_reqs_.load(); + vm.pending_update_reqs_.store(pending_update_reqs_.load()); + pending_update_reqs_.store(tmp); + + buf_.swap(vm.buf_); + // std::swap(lock_, vm.lock_); + std::swap(thread_num_, vm.thread_num_); +} + } // namespace gs #undef likely diff --git a/flex/engines/graph_db/database/version_manager.h b/flex/engines/graph_db/database/version_manager.h index 2a7d4f3fd85a..8a65d2b70643 100644 --- a/flex/engines/graph_db/database/version_manager.h +++ b/flex/engines/graph_db/database/version_manager.h @@ -51,6 +51,8 @@ class VersionManager { void release_update_timestamp(uint32_t ts); bool revert_update_timestamp(uint32_t ts); + void swap(VersionManager& rhs); + private: std::atomic write_ts_{1}; std::atomic read_ts_{0}; diff --git a/flex/engines/hqps_db/core/utils/hqps_utils.h b/flex/engines/hqps_db/core/utils/hqps_utils.h index 5cae552776e0..781693e74e49 100644 --- a/flex/engines/hqps_db/core/utils/hqps_utils.h +++ b/flex/engines/hqps_db/core/utils/hqps_utils.h @@ -803,6 +803,13 @@ struct to_string_impl { } }; +template <> +struct to_string_impl { + static inline std::string to_string(const uint16_t& empty) { + return std::to_string((int32_t) empty); + } +}; + template <> struct to_string_impl { static inline std::string to_string(const int64_t& empty) { diff --git a/flex/engines/http_server/CMakeLists.txt b/flex/engines/http_server/CMakeLists.txt index 07cd7034d813..a7c80d3d1419 100644 --- a/flex/engines/http_server/CMakeLists.txt +++ b/flex/engines/http_server/CMakeLists.txt @@ -11,6 +11,10 @@ if (Hiactor_FOUND) list(FILTER server_actor_autogen_files EXCLUDE REGEX ".*codegen.*") endif () + if (NOT BUILD_PROXY) + list(FILTER server_actor_autogen_files EXCLUDE REGEX ".*proxy.*") + endif () + # get all .cc files in current directory, except for generated/ file(GLOB_RECURSE SERVER_FILES "${CMAKE_CURRENT_SOURCE_DIR}/*.cc") list(FILTER SERVER_FILES EXCLUDE REGEX ".*generated.*") @@ -22,6 +26,14 @@ if (Hiactor_FOUND) list(FILTER SERVER_FILES EXCLUDE REGEX ".*workdir_manipulator*") endif () + if (NOT BUILD_PROXY) + list(FILTER SERVER_FILES EXCLUDE REGEX ".*proxy_actor*") + list(FILTER SERVER_FILES EXCLUDE REGEX ".*proxy_http*") + list(FILTER SERVER_FILES EXCLUDE REGEX ".*proxy_service*") + endif () + + message(STATUS "SERVER_FILES: ${SERVER_FILES}") + add_library(flex_server STATIC ${SERVER_FILES} ${server_actor_autogen_files}) add_dependencies(flex_server server_actor_autogen) target_compile_options (flex_server diff --git a/flex/engines/http_server/actor/admin_actor.act.cc b/flex/engines/http_server/actor/admin_actor.act.cc index 77c6c142c7cc..fad7d710992c 100644 --- a/flex/engines/http_server/actor/admin_actor.act.cc +++ b/flex/engines/http_server/actor/admin_actor.act.cc @@ -29,6 +29,43 @@ namespace server { +std::string to_message_json(const std::string& message) { + return "{\"message\":\"" + message + "\"}"; +} + +seastar::future SwapGraphData(const std::string& in_graph_name, + const std::string& cur_graph_name, + const gs::Schema& schema, + const std::string& data_dir) { + if (in_graph_name == cur_graph_name) { + return seastar::make_ready_future(true); + } + + return seastar::sleep(std::chrono::seconds(1)) + .then([in_graph_name, cur_graph_name, data_dir, schema]() { + // auto update_transaction = GetUpdateTransaction(); + LOG(INFO) << "Acquire update timestamp..."; + auto& old_db = gs::GraphDB::get(); + auto ts = old_db.version_manager().acquire_update_timestamp(); + // Use a update transaction to avoid new transaction come. + gs::GraphDB new_db; + auto open_res = new_db.Open(schema, data_dir, old_db.ThreadNum()); + if (!open_res.ok()) { + return seastar::make_ready_future(false); + } + LOG(INFO) << "Successfully open new db..."; + old_db.Swap(new_db); + LOG(INFO) << "Successfully swap db..."; + // NOW the version manager is in the new db. + new_db.version_manager().release_update_timestamp(ts); + LOG(INFO) << "Successfully release update timestamp..."; + new_db.Close(); + old_db.printAppFactory(); + LOG(INFO) << "Successfully close new db..."; + return seastar::make_ready_future(true); + }); +} + gs::GraphStatistics get_graph_statistics(const gs::GraphDBSession& sess) { gs::GraphStatistics stat; const auto& graph = sess.graph(); @@ -481,19 +518,23 @@ seastar::future admin_actor::run_get_graph_meta( VLOG(10) << "Successfully get all procedures: " << get_all_procedure_res.value().size(); auto& all_plugin_metas = get_all_procedure_res.value(); - for (auto& plugin_meta : all_plugin_metas) { - add_runnable_info(plugin_meta); - } - auto& graph_meta = meta_res.value(); - // There can also be procedures that builtin in the graph meta. - for (auto& plugin_meta : graph_meta.plugin_metas) { - add_runnable_info(plugin_meta); + { + std::lock_guard lock(mtx_); + for (auto& plugin_meta : all_plugin_metas) { + add_runnable_info(plugin_meta); + } + auto& graph_meta = meta_res.value(); + // There can also be procedures that builtin in the graph meta. + for (auto& plugin_meta : graph_meta.plugin_metas) { + add_runnable_info(plugin_meta); + } + + graph_meta.plugin_metas.insert(graph_meta.plugin_metas.end(), + all_plugin_metas.begin(), + all_plugin_metas.end()); + return seastar::make_ready_future( + gs::Result(std::move(graph_meta.ToJson()))); } - graph_meta.plugin_metas.insert(graph_meta.plugin_metas.end(), - all_plugin_metas.begin(), - all_plugin_metas.end()); - return seastar::make_ready_future( - gs::Result(std::move(graph_meta.ToJson()))); } else { LOG(ERROR) << "Fail to get all procedures: " << get_all_procedure_res.status().error_message() << " for " @@ -569,8 +610,8 @@ seastar::future admin_actor::run_delete_graph( } WorkDirManipulator::DeleteGraph(query_param.content); return seastar::make_ready_future( - gs::Result("Successfully delete graph: " + - query_param.content)); + gs::Result(to_message_json( + "Successfully delete graph: " + query_param.content))); } else { LOG(ERROR) << "Fail to delete graph: " << delete_res.status().error_message(); @@ -609,7 +650,7 @@ seastar::future admin_actor::run_graph_loading( gs::StatusCode::InvalidImportFile, "Fail to parse json: "))); } - int32_t loading_thread_num = 1; + int32_t loading_thread_num = 16; if (yaml["loading_thread_num"]) { loading_thread_num = yaml["loading_thread_num"].as(); } @@ -809,8 +850,8 @@ seastar::future admin_actor::delete_procedure( VLOG(10) << "Successfully delete procedure: " << procedure_id; return seastar::make_ready_future( - gs::Result("Successfully delete procedure: " + - procedure_id)); + gs::Result( + to_message_json("Successfully delete procedure: " + procedure_id))); } // update a procedure by graph name and procedure name @@ -867,8 +908,8 @@ seastar::future admin_actor::update_procedure( if (update_res.ok()) { VLOG(10) << "Successfully update procedure: " << procedure_id; return seastar::make_ready_future( - gs::Result("Successfully update procedure: " + - procedure_id)); + gs::Result( + to_message_json("Successfully update procedure: " + procedure_id))); } else { LOG(ERROR) << "Fail to create procedure: " << update_res.status().error_message(); @@ -1006,75 +1047,77 @@ seastar::future admin_actor::start_service( // First Stop query_handler's actors. auto& hqps_service = HQPSService::get(); - return hqps_service.stop_query_actors().then([this, prev_lock, graph_name, - schema_value, cur_running_graph, - data_dir_value, &hqps_service] { - LOG(INFO) << "Successfully stopped query handler"; - - { - std::lock_guard lock(mtx_); - auto& db = gs::GraphDB::get(); - LOG(INFO) << "Update service running on graph:" << graph_name; - - // use the previous thread num - auto thread_num = db.SessionNum(); - db.Close(); - VLOG(10) << "Closed the previous graph db"; - if (!db.Open(schema_value, data_dir_value, thread_num).ok()) { - LOG(ERROR) << "Fail to load graph from data directory: " - << data_dir_value; - if (!prev_lock) { // If the graph is not locked before, and we - // fail at some steps after locking, we should - // unlock it. - metadata_store_->UnlockGraphIndices(graph_name); - } - return seastar::make_ready_future( - gs::Result(gs::Status( - gs::StatusCode::InternalError, - "Fail to load graph from data directory: " + data_dir_value))); - } - LOG(INFO) << "Successfully load graph from data directory: " - << data_dir_value; - // unlock the previous graph - if (graph_name != cur_running_graph) { - auto unlock_res = - metadata_store_->UnlockGraphIndices(cur_running_graph); - if (!unlock_res.ok()) { - LOG(ERROR) << "Fail to unlock graph: " << cur_running_graph; - if (!prev_lock) { + LOG(INFO) << "Successfully stopped query handler"; + + metadata_store_->ClearRunningGraph(); + VLOG(10) << "Closed the previous graph db"; + + LOG(INFO) << "Update service running on graph:" << graph_name; + + return SwapGraphData(graph_name, cur_running_graph, schema_value, + data_dir_value) + .then([this, &hqps_service, graph_name, cur_running_graph, data_dir_value, + prev_lock](bool res) { + if (!res) { + LOG(ERROR) << "Fail to load graph from data directory: " + << data_dir_value; + if (!prev_lock) { // If the graph is not locked before, and we + // fail at some steps after locking, we should + // unlock it. metadata_store_->UnlockGraphIndices(graph_name); } return seastar::make_ready_future( - gs::Result(unlock_res.status())); - } - } - LOG(INFO) << "Update running graph to: " << graph_name; - auto set_res = metadata_store_->SetRunningGraph(graph_name); - if (!set_res.ok()) { - LOG(ERROR) << "Fail to set running graph: " << graph_name; - if (!prev_lock) { - metadata_store_->UnlockGraphIndices(graph_name); + gs::Result( + gs::Status(gs::StatusCode::InternalError, + "Fail to load graph from data directory: " + + data_dir_value))); + } else { + LOG(INFO) << "After swap"; + gs::GraphDB::get().printAppFactory(); + LOG(INFO) << "Successfully load graph from data directory: " + << data_dir_value; + // unlock the previous graph + if (graph_name != cur_running_graph) { + auto unlock_res = + metadata_store_->UnlockGraphIndices(cur_running_graph); + if (!unlock_res.ok()) { + LOG(ERROR) << "Fail to unlock graph: " << cur_running_graph; + if (!prev_lock) { + metadata_store_->UnlockGraphIndices(graph_name); + } + return seastar::make_ready_future( + gs::Result(unlock_res.status())); + } + } + LOG(INFO) << "Update running graph to: " << graph_name; + auto set_res = metadata_store_->SetRunningGraph(graph_name); + if (!set_res.ok()) { + LOG(ERROR) << "Fail to set running graph: " << graph_name; + if (!prev_lock) { + metadata_store_->UnlockGraphIndices(graph_name); + } + return seastar::make_ready_future( + gs::Result(set_res.status())); + } + + LOG(INFO) << "Successfully restart query actors"; + // now start the compiler + auto schema_path = + server::WorkDirManipulator::GetGraphSchemaPath(graph_name); + if (!hqps_service.start_compiler_subprocess(schema_path)) { + LOG(ERROR) << "Fail to start compiler"; + return seastar::make_ready_future( + gs::Result(gs::Status( + gs::StatusCode::InternalError, "Fail to start compiler"))); + } + LOG(INFO) << "Successfully started service with graph: " + << graph_name; + hqps_service.reset_start_time(); + return seastar::make_ready_future( + gs::Result( + std::move(to_message_json("Successfully start service")))); } - return seastar::make_ready_future( - gs::Result(set_res.status())); - } - } - hqps_service.start_query_actors(); // start on a new scope. - LOG(INFO) << "Successfully restart query actors"; - // now start the compiler - auto schema_path = - server::WorkDirManipulator::GetGraphSchemaPath(graph_name); - if (!hqps_service.start_compiler_subprocess(schema_path)) { - LOG(ERROR) << "Fail to start compiler"; - return seastar::make_ready_future( - gs::Result(gs::Status(gs::StatusCode::InternalError, - "Fail to start compiler"))); - } - LOG(INFO) << "Successfully started service with graph: " << graph_name; - hqps_service.reset_start_time(); - return seastar::make_ready_future( - gs::Result("Successfully start service")); - }); + }); } // Stop service. @@ -1111,7 +1154,8 @@ seastar::future admin_actor::stop_service( if (hqps_service.stop_compiler_subprocess()) { LOG(INFO) << "Successfully stop compiler"; return seastar::make_ready_future( - gs::Result("Successfully stop service")); + gs::Result( + to_message_json("Successfully stop service"))); } else { LOG(ERROR) << "Fail to stop compiler"; return seastar::make_ready_future( @@ -1125,6 +1169,7 @@ seastar::future admin_actor::stop_service( // get service status seastar::future admin_actor::service_status( query_param&& query_param) { + LOG(INFO) << "Get service status."; auto& hqps_service = HQPSService::get(); auto query_port = hqps_service.get_query_port(); auto running_graph_res = metadata_store_->GetRunningGraph(); @@ -1143,10 +1188,10 @@ seastar::future admin_actor::service_status( auto get_all_procedure_res = metadata_store_->GetAllPluginMeta(running_graph_res.value()); if (get_all_procedure_res.ok()) { - VLOG(10) << "Successfully get all procedures: " - << get_all_procedure_res.value().size(); + // VLOG(10) << "Successfully get all procedures: " + // << get_all_procedure_res.value().size(); auto& all_plugin_metas = get_all_procedure_res.value(); - VLOG(10) << "original all plugins : " << all_plugin_metas.size(); + // VLOG(10) << "original all plugins : " << all_plugin_metas.size(); for (auto& plugin_meta : all_plugin_metas) { add_runnable_info(plugin_meta); } @@ -1154,13 +1199,14 @@ seastar::future admin_actor::service_status( add_runnable_info(plugin_meta); } - VLOG(10) << "original graph meta: " << graph_meta.plugin_metas.size(); + // VLOG(10) << "original graph meta: " << + // graph_meta.plugin_metas.size(); for (auto& plugin_meta : all_plugin_metas) { if (plugin_meta.runnable) { graph_meta.plugin_metas.emplace_back(plugin_meta); } } - VLOG(10) << "got graph meta: " << graph_meta.ToJson(); + // VLOG(10) << "got graph meta: " << graph_meta.ToJson(); res["graph"] = nlohmann::json::parse(graph_meta.ToJson()); } else { LOG(ERROR) << "Fail to get all procedures: " @@ -1301,7 +1347,8 @@ seastar::future admin_actor::cancel_job( if (cancel_meta_res.ok()) { VLOG(10) << "Successfully cancel job: " << job_id; return seastar::make_ready_future( - gs::Result("Successfully cancel job: " + job_id)); + gs::Result( + to_message_json("Successfully cancel job: " + job_id))); } else { LOG(ERROR) << "Fail to cancel job: " << job_id << ", error message: " << cancel_meta_res.status().error_message(); @@ -1330,10 +1377,13 @@ seastar::future admin_actor::run_get_graph_statistic( "The queried graph is not running: " + graph_id + ", current running graph is: " + queried_graph))); } - auto statistics = get_graph_statistics( - gs::GraphDB::get().GetSession(hiactor::local_shard_id())); - return seastar::make_ready_future( - gs::Result(statistics.ToJson())); + { + std::lock_guard lock(mtx_); + auto statistics = get_graph_statistics( + gs::GraphDB::get().GetSession(hiactor::local_shard_id())); + return seastar::make_ready_future( + gs::Result(std::move(statistics.ToJson()))); + } } seastar::future admin_actor::upload_file( diff --git a/flex/engines/http_server/actor/executor.act.cc b/flex/engines/http_server/actor/executor.act.cc index aba73eca0f74..ff54c7e93ff3 100644 --- a/flex/engines/http_server/actor/executor.act.cc +++ b/flex/engines/http_server/actor/executor.act.cc @@ -43,7 +43,7 @@ seastar::future executor::run_graph_db_query( if (!ret.ok()) { LOG(ERROR) << "Eval failed: " << ret.status().error_message(); return seastar::make_exception_future( - "Query failed: " + ret.status().error_message()); + "Query failed: " + ret.status().error_message()); } auto result = ret.value(); seastar::sstring content(result.data(), result.size()); diff --git a/flex/engines/http_server/actor/proxy_actor.act.cc b/flex/engines/http_server/actor/proxy_actor.act.cc new file mode 100644 index 000000000000..3c87fad5790a --- /dev/null +++ b/flex/engines/http_server/actor/proxy_actor.act.cc @@ -0,0 +1,109 @@ +/** Copyright 2020 Alibaba Group Holding Limited. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "flex/engines/http_server/actor/proxy_actor.act.h" +#include "flex/engines/http_server/service/proxy_service.h" + +#include "nlohmann/json.hpp" + +#include + +namespace server { + +proxy_actor::~proxy_actor() { + // finalization + // ... +} + +proxy_actor::proxy_actor(hiactor::actor_base* exec_ctx, + const hiactor::byte_t* addr) + : hiactor::actor(exec_ctx, addr) { + set_max_concurrency(1); // set max concurrency for task reentrancy (stateful) + // initialization + // ... +} + +seastar::future proxy_actor::do_query( + proxy_request&& request_payload) { + auto& request = request_payload.content; + VLOG(10) << "proxy_actor::forward_request, method: " << request->_method + << ", path: " << request->_url << ", query: " << request->content; + + // recover the old url with paramters in request + auto& proxy_service = ProxyService::get(); + auto& client = proxy_service.get_client(); + return client + .forward_request(request->_url, request->_method, request->content, + request->_headers) + .then([&proxy_service](gs::Result&& result) { + if (!result.ok()) { + return seastar::make_ready_future( + proxy_query_result{(result.status())}); + } + auto& content = result.value(); + if (content.size() == 0) { + return seastar::make_exception_future( + std::runtime_error("Got no responses when forwarding request " + "to interactive servers.")); + } + // Check all responses are ok, if not ok, return error + seastar::sstring res_string; + size_t error_count = 0; + for (size_t i = 0; i < content.size(); ++i) { + auto& response = content[i]; + if (response.first != 200) { + error_count++; + } + } + if (error_count == 0) { + res_string = content[0].second; + return seastar::make_ready_future( + proxy_query_result{std::move(res_string)}); + } else { + res_string = + "Got error response when forwarding request " + "to interactive servers, error count: " + + std::to_string(error_count) + "\n"; + for (size_t i = 0; i < content.size(); ++i) { + auto& response = content[i]; + if (response.first != 200) { + LOG(ERROR) << "Got error response when forwarding request " + "to interactive servers at index: " + << std::to_string(i) << ", endpoint: " + << proxy_service.get_endpoints()[i].first + ":" + << std::to_string( + proxy_service.get_endpoints()[i].second) + << std::to_string(response.first) + ", msg:" + << response.second; + std::string tmp = + "Got error response when forwarding request " + "to interactive servers at index: " + + std::to_string(i) + + ", endpoint: " + proxy_service.get_endpoints()[i].first + + ":" + + std::to_string(proxy_service.get_endpoints()[i].second) + + ", code: " + std::to_string(response.first) + + ", msg: " + response.second + "\n"; + res_string += tmp; + } + } + return seastar::make_ready_future( + proxy_query_result{gs::Result(gs::Status( + gs::StatusCode::QueryFailed, std::move(res_string)))}); + } + }); +} + +} // namespace server diff --git a/flex/engines/http_server/actor/proxy_actor.act.h b/flex/engines/http_server/actor/proxy_actor.act.h new file mode 100644 index 000000000000..05f606d16d6f --- /dev/null +++ b/flex/engines/http_server/actor/proxy_actor.act.h @@ -0,0 +1,44 @@ +/** Copyright 2020 Alibaba Group Holding Limited. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef ENGINES_HTTP_SERVER_ACTOR_PROXY_ACTOR_H_ +#define ENGINES_HTTP_SERVER_ACTOR_PROXY_ACTOR_H_ + + +#include "flex/engines/http_server/types.h" + +#include +#include +#include + +namespace server { + +class ANNOTATION(actor:impl) proxy_actor : public hiactor::actor { + public: + proxy_actor(hiactor::actor_base* exec_ctx, const hiactor::byte_t* addr); + ~proxy_actor() override; + + seastar::future ANNOTATION(actor:method) do_query(proxy_request&& param); + + // DECLARE_RUN_QUERIES; + /// Declare `do_work` func here, no need to implement. + ACTOR_DO_WORK() + + private: + int32_t your_private_members_ = 0; +}; +} + +#endif // ENGINES_HTTP_SERVER_ACTOR_PROXY_ACTOR_H_ \ No newline at end of file diff --git a/flex/engines/http_server/handler/admin_http_handler.cc b/flex/engines/http_server/handler/admin_http_handler.cc index e0e874640b14..c97c9bca4e5e 100644 --- a/flex/engines/http_server/handler/admin_http_handler.cc +++ b/flex/engines/http_server/handler/admin_http_handler.cc @@ -19,6 +19,7 @@ #include #include +#include #include #include "flex/engines/http_server/generated/actor/admin_actor_ref.act.autogen.h" #include "flex/engines/http_server/types.h" @@ -240,7 +241,7 @@ class admin_http_graph_handler_impl : public seastar::httpd::handler_base { auto dst_executor = executor_idx_; executor_idx_ = (executor_idx_ + 1) % shard_concurrency_; - LOG(INFO) << "Handling path:" << path << ", method: " << req->_method; + // LOG(INFO) << "Handling path:" << path << ", method: " << req->_method; auto& method = req->_method; if (method == "POST") { if (path.find("dataloading") != seastar::sstring::npos) { @@ -488,7 +489,7 @@ class admin_http_service_handler_impl : public seastar::httpd::handler_base { auto dst_executor = executor_idx_; executor_idx_ = (executor_idx_ + 1) % shard_concurrency_; - LOG(INFO) << "Handling path:" << path << ", method: " << req->_method; + // LOG(INFO) << "Handling path:" << path << ", method: " << req->_method; auto& method = req->_method; if (method == "POST") { // Then param[action] should exists @@ -525,7 +526,7 @@ class admin_http_service_handler_impl : public seastar::httpd::handler_base { } } else { // get status - LOG(INFO) << "GET with action: status"; + // LOG(INFO) << "GET with action: status"; return admin_actor_refs_[dst_executor] .service_status(query_param{std::move(req->content)}) .then_wrapped([rep = std::move(rep)]( diff --git a/flex/engines/http_server/handler/admin_http_handler.h b/flex/engines/http_server/handler/admin_http_handler.h index 14465e459a95..c7774b56934f 100644 --- a/flex/engines/http_server/handler/admin_http_handler.h +++ b/flex/engines/http_server/handler/admin_http_handler.h @@ -16,13 +16,15 @@ #ifndef ENGINES_HTTP_SERVER_HANDLER_ADMIN_HTTP_HANDLER_H_ #define ENGINES_HTTP_SERVER_HANDLER_ADMIN_HTTP_HANDLER_H_ -#include -#include #include #include "flex/engines/http_server/handler/http_utils.h" #include "flex/engines/http_server/types.h" #include "flex/utils/service_utils.h" +#include +#include +#include + namespace server { class InteractiveAdminService; diff --git a/flex/engines/http_server/handler/hqps_http_handler.cc b/flex/engines/http_server/handler/hqps_http_handler.cc index 2ddd83c984ab..dbf51b70b19f 100644 --- a/flex/engines/http_server/handler/hqps_http_handler.cc +++ b/flex/engines/http_server/handler/hqps_http_handler.cc @@ -20,6 +20,7 @@ #include "opentelemetry/trace/span_startoptions.h" #endif // HAVE_OPENTELEMETRY_CPP +#include #include "flex/engines/graph_db/database/graph_db_session.h" #include "flex/engines/http_server/executor_group.actg.h" #include "flex/engines/http_server/options.h" @@ -72,6 +73,54 @@ class optional_param_matcher : public matcher { namespace server { +hqps_heartbeat_handler::hqps_heartbeat_handler() {} + +hqps_heartbeat_handler::~hqps_heartbeat_handler() = default; + +// TODO: return snapshot_id. +seastar::future> +hqps_heartbeat_handler::handle(const seastar::sstring& path, + std::unique_ptr req, + std::unique_ptr rep) { + if (path.find("sampleQuery") != seastar::sstring::npos) { + using namespace std::chrono_literals; + LOG(INFO) << "Before sampleQuery"; + return seastar::sleep(3s).then([rep = std::move(rep)]() mutable { + rep->write_body("bin", seastar::sstring{"OK"}); + rep->done(); + LOG(INFO) << "Finish sampleQuery"; + return seastar::make_ready_future>( + std::move(rep)); + }); + } else if (path.find("ready") != seastar::sstring::npos) { + LOG(INFO) << "/ready:" << hiactor::local_shard_id(); + auto& hqps_service = HQPSService::get(); + auto metadata_store = hqps_service.get_metadata_store(); + if (!metadata_store) { + rep->write_body("bin", seastar::sstring{"Metadata store is not ready"}); + rep->done(); + return seastar::make_ready_future>( + std::move(rep)); + } + auto service_status = metadata_store->GetRunningGraph(); + if (service_status.ok()) { + rep->write_body("bin", seastar::sstring{"Ready"}); + rep->done(); + return seastar::make_ready_future>( + std::move(rep)); + } else { + return seastar::make_exception_future< + std::unique_ptr>( + std::runtime_error("Service not ready")); + } + } else { + rep->write_body("bin", seastar::sstring{"Heartbeat OK"}); + rep->done(); + return seastar::make_ready_future>( + std::move(rep)); + } +} + hqps_ic_handler::hqps_ic_handler(uint32_t init_group_id, uint32_t max_group_id, uint32_t group_inc_step, uint32_t shard_concurrency) @@ -537,6 +586,7 @@ hqps_http_handler::hqps_http_handler(uint16_t http_port, int32_t shard_num) : http_port_(http_port), actors_running_(true) { ic_handlers_.resize(shard_num); adhoc_query_handlers_.resize(shard_num); + heart_beat_handlers_.resize(shard_num); } hqps_http_handler::~hqps_http_handler() { @@ -586,24 +636,25 @@ void hqps_http_handler::stop() { seastar::future<> hqps_http_handler::stop_query_actors() { // First cancel the scope. - return ic_handlers_[hiactor::local_shard_id()] - ->cancel_current_scope() - .then([this] { - LOG(INFO) << "Cancelled ic scope"; - return adhoc_query_handlers_[hiactor::local_shard_id()] - ->cancel_current_scope(); - }) - .then([this] { - LOG(INFO) << "Cancelled proc scope"; - actors_running_.store(false); - return seastar::make_ready_future<>(); - }); + // return ic_handlers_[hiactor::local_shard_id()] + // ->cancel_current_scope() + // .then([this] { + // LOG(INFO) << "Cancelled ic scope"; + // return adhoc_query_handlers_[hiactor::local_shard_id()] + // ->cancel_current_scope(); + // }) + // .then([this] { + // LOG(INFO) << "Cancelled proc scope"; + // actors_running_.store(false); + // return seastar::make_ready_future<>(); + // }); + return seastar::make_ready_future<>(); } void hqps_http_handler::start_query_actors() { - ic_handlers_[hiactor::local_shard_id()]->create_actors(); - adhoc_query_handlers_[hiactor::local_shard_id()]->create_actors(); - actors_running_.store(true); + // ic_handlers_[hiactor::local_shard_id()]->create_actors(); + // adhoc_query_handlers_[hiactor::local_shard_id()]->create_actors(); + // actors_running_.store(true); } seastar::future<> hqps_http_handler::set_routes() { @@ -614,6 +665,7 @@ seastar::future<> hqps_http_handler::set_routes() { auto adhoc_query_handler = new hqps_adhoc_query_handler( ic_adhoc_group_id, codegen_group_id, max_group_id, group_inc_step, shard_adhoc_concurrency); + auto heart_beat_handler = new hqps_heartbeat_handler(); auto rule_proc = new seastar::httpd::match_rule(ic_handler); rule_proc->add_str("/v1/graph") @@ -627,6 +679,13 @@ seastar::future<> hqps_http_handler::set_routes() { ic_handlers_[hiactor::local_shard_id()] = ic_handler; adhoc_query_handlers_[hiactor::local_shard_id()] = adhoc_query_handler; + heart_beat_handlers_[hiactor::local_shard_id()] = heart_beat_handler; + r.add(seastar::httpd::operation_type::GET, + seastar::httpd::url("/heartbeat"), heart_beat_handler); + r.add(seastar::httpd::operation_type::GET, seastar::httpd::url("/ready"), + heart_beat_handler); + r.add(seastar::httpd::operation_type::GET, + seastar::httpd::url("/sampleQuery"), heart_beat_handler); return seastar::make_ready_future<>(); }); diff --git a/flex/engines/http_server/handler/hqps_http_handler.h b/flex/engines/http_server/handler/hqps_http_handler.h index a89e97dfe6e5..484303fda8eb 100644 --- a/flex/engines/http_server/handler/hqps_http_handler.h +++ b/flex/engines/http_server/handler/hqps_http_handler.h @@ -29,6 +29,19 @@ namespace server { +class hqps_heartbeat_handler : public seastar::httpd::handler_base { + public: + hqps_heartbeat_handler(); + ~hqps_heartbeat_handler() override; + + seastar::future> handle( + const seastar::sstring& path, + std::unique_ptr req, + std::unique_ptr rep) override; + + private: +}; + class hqps_ic_handler : public seastar::httpd::handler_base { public: // extra headers @@ -133,6 +146,7 @@ class hqps_http_handler { std::vector ic_handlers_; std::vector adhoc_query_handlers_; + std::vector heart_beat_handlers_; }; } // namespace server diff --git a/flex/engines/http_server/handler/http_proxy.cc b/flex/engines/http_server/handler/http_proxy.cc new file mode 100644 index 000000000000..834afd7839f8 --- /dev/null +++ b/flex/engines/http_server/handler/http_proxy.cc @@ -0,0 +1,280 @@ +/** Copyright 2020 Alibaba Group Holding Limited. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "flex/engines/http_server/handler/http_proxy.h" +#include "flex/engines/hqps_db/core/utils/hqps_utils.h" + +#include + +namespace server { + +HeartBeatChecker::HeartBeatChecker( + // std::vector& clients, + const std::vector>& endpoints, + int32_t heart_beat_interval) + : running_(false), + heart_beat_interval_(DEFAULT_HEART_BEAT_INTERVAL), + // clients_(clients), + endpoints_(endpoints) { + endpoint_status_.resize(endpoints.size(), true); +} + +HeartBeatChecker::~HeartBeatChecker() { + if (running_) { + stop(); + } +} + +gs::Status HeartBeatChecker::start() { + running_ = true; + heartbeat_thread_ = std::thread(&HeartBeatChecker::check_heartbeat, this); + VLOG(10) << "HeartBeatChecker started"; + return gs::Status::OK(); +} + +gs::Status HeartBeatChecker::stop() { + running_ = false; + VLOG(10) << "Stopping HeartBeatChecker"; + while (!heartbeat_thread_.joinable()) { + std::this_thread::sleep_for(std::chrono::milliseconds(100)); + } + heartbeat_thread_.join(); + VLOG(10) << "HeartBeatChecker stopped"; + return gs::Status::OK(); +} + +void HeartBeatChecker::check_heartbeat() { + while (running_) { + for (size_t i = 0; i < endpoints_.size(); ++i) { + httplib::Client client(endpoints_[i].first, endpoints_[i].second); + auto res = client.Get("/"); + if (!res) { + LOG(ERROR) << "Failed to connect to endpoint at index: " << i; + endpoint_status_[i] = false; + } else { + VLOG(10) << "Heartbeat check to " << i << " is OK"; + endpoint_status_[i] = true; + } + } + std::this_thread::sleep_for(std::chrono::seconds(heart_beat_interval_)); + } +} + +const std::vector& HeartBeatChecker::get_endpoint_status() const { + return endpoint_status_; +} + +// Utils functions + +HttpForwardingResponse to_response(const httplib::Result& res) { + if (res.error() != httplib::Error::Success) { + LOG(ERROR) << "Failed to send request: " << res.error() + << ", response body: " << res.value().body; + for (auto& header : res.value().headers) { + LOG(ERROR) << "Header: " << header.first << ", " << header.second; + } + return std::make_pair(static_cast(res.error()), + httplib::to_string(res.error())); + } + return std::make_pair(res->status, res->body); +} + +// std::multimap; +httplib::Headers to_httplib_headers(const seastar_http_headers_t& headers) { + httplib::Headers httplib_headers; + for (auto& header : headers) { + // Those headers should not be forwarded, otherwise will cause error. + if (header.first == "Host" || header.first == "User-Agent" || + header.first == "Content-Length") { + continue; + } + httplib_headers.emplace(std::string(header.first.c_str()), + std::string(header.second.c_str())); + } + return httplib_headers; +} + +HttpProxy::HttpProxy() : initialized_(false), enable_heart_beat_check_(false) {} + +HttpProxy::~HttpProxy() { close(); } + +void HttpProxy::close() { + if (initialized_) { + if (heartbeat_checker_) { + heartbeat_checker_->stop(); + } + // for (auto& client : clients_) { + // client.stop(); + // } + initialized_ = false; + } +} + +gs::Status HttpProxy::init( + const std::vector>& endpoints, + bool enable_heart_beat_check, int32_t heart_beat_interval, + bool hang_until_success) { + enable_heart_beat_check_ = enable_heart_beat_check; + hang_until_success_ = hang_until_success; + endpoints_ = endpoints; + if (endpoints_.empty()) { + return gs::Status(gs::StatusCode::InValidArgument, "No endpoint provided"); + } + // TODO: check connection to endpoint, if not connected, return error + // clients_.reserve(endpoints_.size()); + // for (auto& endpoint : endpoints_) { + // httplib::Client client(endpoint.first, endpoint.second); + // client.set_connection_timeout(CONNECTION_TIMEOUT, 0); // 5s + // client.set_read_timeout(READ_TIMEOUT, 0); // 10s + // client.set_write_timeout(WRITE_TIMEOUT, 0); // 10s + // clients_.emplace_back(std::move(client)); + // } + // // test connection + // for (auto& client : clients_) { + // auto res = client.Get("/heartbeat"); + // if (!res) { + // return gs::Status(gs::StatusCode::InternalError, + // "Failed to connect to endpoint"); + // } + // } + // start heart beat check + if (enable_heart_beat_check_) { + heartbeat_checker_ = std::make_unique(endpoints_); + RETURN_IF_NOT_OK(heartbeat_checker_->start()); + } + initialized_ = true; + return gs::Status::OK(); +} + +seastar::future> HttpProxy::forward_request( + const std::string& path, const std::string& method, const std::string& body, + const seastar_http_headers_t& headers) { + LOG(INFO) << "Forwarding request to " << path << ", method: " << method + << ", body: " << body.size() << ", headers: " << headers.size(); + if (!initialized_) { + return seastar::make_ready_future>( + HttpForwardingResponses{}); + } + // std::vector> reply_futs; + // Get the status of the endpoints from last heartbeat check + { + bool all_endpoints_ready = true; + if (heartbeat_checker_) { + const auto& endpoint_status = heartbeat_checker_->get_endpoint_status(); + // First check if all the endpoints + // for (size_t i = 0; i < clients_.size(); ++i) { + // if (!endpoint_status[i]) { + // LOG(WARNING) << "Endpoint at index " << i << " is not available"; + // all_endpoints_ready = false; + // } + // } + } + if (!all_endpoints_ready) { + // TODO: add results to indicate the endpoint is not available + return seastar::make_ready_future>( + HttpForwardingResponses{}); + } + } + // HttpForwardingResponses replies; + // First send to client 0 and then send to client 1 + return do_send_requests(path, method, body, headers) + .then_wrapped([](seastar::future&& fut) { + try { + auto responses = fut.get(); + return gs::Result(std::move(responses)); + } catch (const std::exception& e) { + return gs::Result( + gs::Status(gs::StatusCode::InternalError, e.what())); + } + }); +} + +seastar::future HttpProxy::do_send_request( + const std::string& path, const std::string& method, const std::string& body, + const seastar_http_headers_t& headers, + // std::vector& clients, + size_t ind, HttpForwardingResponses&& responses) { + if (ind >= endpoints_.size()) { + return seastar::make_ready_future( + std::move(responses)); + } + + if (method != "GET" && method != "POST" && method != "DELETE" && + method != "PUT") { + LOG(ERROR) << "Unsupported method: " << method; + return seastar::make_exception_future( + std::runtime_error("Unsupported method: " + method)); + } + + HttpForwardingResponse response; + + auto lambda = [this, &path, &method, &body, &headers, ind, &responses]() { + seastar::sstring content_type = "application/json"; + seastar::sstring sstr("Content-Type"); + if (headers.find(sstr) != headers.end()) { + content_type = headers.at(sstr); + } + httplib::Client client(endpoints_[ind].first, endpoints_[ind].second); + client.set_connection_timeout(CONNECTION_TIMEOUT, 0); // 5s + client.set_read_timeout(READ_TIMEOUT, 0); // 10s + client.set_write_timeout(WRITE_TIMEOUT, 0); // 10s + if (method == "GET") { + VLOG(10) << "Forwarding GET request to " << path; + return to_response(client.Get(path.c_str(), to_httplib_headers(headers))); + } else if (method == "POST") { + return to_response(client.Post(path.c_str(), to_httplib_headers(headers), + body, content_type)); + } else if (method == "DELETE") { + return to_response( + client.Delete(path.c_str(), to_httplib_headers(headers))); + } else { // must be put + return to_response(client.Put(path.c_str(), to_httplib_headers(headers), + body, content_type)); + } + }; + + if (hang_until_success_) { + while (true) { + response = lambda(); + if (response.first == 200) { + responses.emplace_back(std::move(response)); + break; + } else { + LOG(ERROR) << "Failed to send request to endpoint at index " << ind + << ", status: " << response.first + << ", msg: " << response.second; + if (response.first == 404) { + LOG(ERROR) << "Endpoint not found, skip it"; + responses.emplace_back(std::move(response)); + break; + } + std::this_thread::sleep_for(std::chrono::seconds(3)); + } + } + } else { + response = lambda(); + responses.emplace_back(std::move(response)); + } + return do_send_request(path, method, body, headers, ind + 1, + std::move(responses)); +} +seastar::future HttpProxy::do_send_requests( + const std::string& path, const std::string& method, const std::string& body, + const seastar_http_headers_t& headers) { + HttpForwardingResponses responses; + return do_send_request(path, method, body, headers, 0, std::move(responses)); +} + +} // namespace server \ No newline at end of file diff --git a/flex/engines/http_server/handler/http_proxy.h b/flex/engines/http_server/handler/http_proxy.h new file mode 100644 index 000000000000..0a84ba1637da --- /dev/null +++ b/flex/engines/http_server/handler/http_proxy.h @@ -0,0 +1,110 @@ +/** Copyright 2020 Alibaba Group Holding Limited. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef ENGINES_HTTP_SERVER_HANDLER_FORWARD_HTTP_CLIENT_H_ +#define ENGINES_HTTP_SERVER_HANDLER_FORWARD_HTTP_CLIENT_H_ + +#include + +#include "flex/third_party/httplib.h" +#include "flex/utils/result.h" + +#include +#include + +namespace server { + +class HeartBeatChecker { + public: + static constexpr int32_t DEFAULT_HEART_BEAT_INTERVAL = 2; // 2s + HeartBeatChecker( + // std::vector& clients, + const std::vector>& endpoints, + int32_t heart_beat_interval = DEFAULT_HEART_BEAT_INTERVAL); + ~HeartBeatChecker(); + + gs::Status start(); + + gs::Status stop(); + + const std::vector& get_endpoint_status() const; + + private: + void check_heartbeat(); + + std::atomic running_; + int32_t heart_beat_interval_; + // std::vector& clients_; + const std::vector>& endpoints_; + std::vector endpoint_status_; // to mark whether the endpoint is alive + std::thread heartbeat_thread_; +}; + +using HttpForwardingResponse = std::pair; +using HttpForwardingResponses = std::vector; +using seastar_http_headers_t = + std::unordered_map; + +// A wrapped http client which will send request to multiple endpoints and +// return the summary of the responses. +// It will do heartbeat check to the endpoints to make sure the endpoints are +// available. +// Currently, we don't distinguish the read/write requests, we just +// send the request to all the endpoints. +class HttpProxy { + public: + static constexpr int32_t CONNECTION_TIMEOUT = 5; // 5s + static constexpr int32_t READ_TIMEOUT = 300; // 5s + static constexpr int32_t WRITE_TIMEOUT = 300; // 10s + HttpProxy(); + ~HttpProxy(); + + gs::Status init( + const std::vector>& endpoints, + bool enable_heart_beat_check = false, + int32_t heart_beat_interval = + HeartBeatChecker::DEFAULT_HEART_BEAT_INTERVAL, + bool hang_until_success = true); + + void close(); + + seastar::future> forward_request( + const std::string& path, const std::string& method, + const std::string& body, const seastar_http_headers_t& headers); + + private: + seastar::future do_send_request( + const std::string& path, const std::string& method, + const std::string& body, const seastar_http_headers_t& headers, + size_t ind, HttpForwardingResponses&& responses); + + seastar::future do_send_requests( + const std::string& path, const std::string& method, + const std::string& body, const seastar_http_headers_t& headers); + + std::atomic initialized_; + bool enable_heart_beat_check_; + bool hang_until_success_; + std::vector> endpoints_; // ip and ports + + // std::vector clients_; + + std::unique_ptr heartbeat_checker_; +}; + +} // namespace server + +#endif // ENGINES_HTTP_SERVER_HANDLER_FORWARD_HTTP_CLIENT_H_ diff --git a/flex/engines/http_server/handler/http_utils.h b/flex/engines/http_server/handler/http_utils.h index 376537eb3132..3c69a710ca00 100644 --- a/flex/engines/http_server/handler/http_utils.h +++ b/flex/engines/http_server/handler/http_utils.h @@ -12,6 +12,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ + #include "flex/engines/http_server/types.h" #include "flex/utils/result.h" #include "seastar/http/common.hh" diff --git a/flex/engines/http_server/handler/proxy_http_handler.cc b/flex/engines/http_server/handler/proxy_http_handler.cc new file mode 100644 index 000000000000..9f9e51359198 --- /dev/null +++ b/flex/engines/http_server/handler/proxy_http_handler.cc @@ -0,0 +1,95 @@ +/** Copyright 2020 Alibaba Group Holding Limited. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "flex/engines/http_server/handler/proxy_http_handler.h" + +#include "flex/engines/http_server/executor_group.actg.h" +#include "flex/engines/http_server/handler/http_utils.h" +#include "flex/engines/http_server/options.h" + +#include "flex/engines/http_server/types.h" + +namespace server { + +proxy_http_forward_handler::proxy_http_forward_handler( + uint32_t group_id, uint32_t shard_concurrency) + : executor_idx_(0), shard_concurrency_(shard_concurrency) { + executor_refs_.reserve(shard_concurrency_); + hiactor::scope_builder builder; + builder.set_shard(hiactor::local_shard_id()) + .enter_sub_scope(hiactor::scope(0)) + .enter_sub_scope(hiactor::scope(group_id)); + for (unsigned i = 0; i < shard_concurrency_; ++i) { + executor_refs_.emplace_back(builder.build_ref(i)); + } +} + +seastar::future> +proxy_http_forward_handler::handle(const seastar::sstring& path, + std::unique_ptr req, + std::unique_ptr rep) { + auto dst_executor = executor_idx_; + executor_idx_ = (executor_idx_ + 1) % shard_concurrency_; + + return executor_refs_[dst_executor] + .do_query(proxy_request{std::move(req)}) + .then_wrapped([rep = std::move(rep)]( + seastar::future&& fut) mutable { + return return_reply_with_result(std::move(rep), std::move(fut)); + // if (__builtin_expect(fut.failed(), false)) { + // return seastar::make_exception_future< + // std::unique_ptr>(fut.get_exception()); + // } + // auto result = fut.get0(); + // rep->write_body("bin", std::move(result.content)); + // rep->done(); + // return seastar::make_ready_future< + // std::unique_ptr>(std::move(rep)); + }); +} + +proxy_http_handler::proxy_http_handler(uint16_t http_port) + : http_port_(http_port) {} + +void proxy_http_handler::start() { + auto fut = seastar::alien::submit_to( + *seastar::alien::internal::default_instance, 0, [this] { + return server_.start() + .then([this] { return set_routes(); }) + .then([this] { return server_.listen(http_port_); }) + .then([this] { + fmt::print("Http handler is listening on port {} ...\n", + http_port_); + }); + }); + fut.wait(); +} + +void proxy_http_handler::stop() { + auto fut = + seastar::alien::submit_to(*seastar::alien::internal::default_instance, 0, + [this] { return server_.stop(); }); + fut.wait(); +} + +seastar::future<> proxy_http_handler::set_routes() { + return server_.set_routes([](seastar::httpd::routes& r) { + r.add_default_handler(new proxy_http_forward_handler( + proxy_group_id, shard_proxy_concurrency)); + return seastar::make_ready_future<>(); + }); +} + +} // namespace server \ No newline at end of file diff --git a/flex/engines/http_server/handler/proxy_http_handler.h b/flex/engines/http_server/handler/proxy_http_handler.h new file mode 100644 index 000000000000..b360f9390608 --- /dev/null +++ b/flex/engines/http_server/handler/proxy_http_handler.h @@ -0,0 +1,63 @@ +/** Copyright 2020 Alibaba Group Holding Limited. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef ENGINES_HTTP_SERVER_HANDLER_PROXY_HTTP_HANDLER_H_ +#define ENGINES_HTTP_SERVER_HANDLER_PROXY_HTTP_HANDLER_H_ + +#include +#include +#include +#include + +#include "flex/engines/http_server/generated/actor/proxy_actor_ref.act.autogen.h" + +namespace server { + +class proxy_http_forward_handler : public seastar::httpd::handler_base { + public: + proxy_http_forward_handler(uint32_t group_id, uint32_t shard_concurrency); + + ~proxy_http_forward_handler() = default; + + seastar::future> handle( + const seastar::sstring& path, + std::unique_ptr req, + std::unique_ptr rep) override; + + private: + uint32_t executor_idx_; + const uint32_t shard_concurrency_; + std::vector executor_refs_; +}; + +// TODO: How to distinguish between read requests and write requests? +class proxy_http_handler { + public: + proxy_http_handler(uint16_t http_port); + + void start(); + void stop(); + + private: + seastar::future<> set_routes(); + + private: + const uint16_t http_port_; + seastar::httpd::http_server_control server_; +}; + +} // namespace server + +#endif // ENGINES_HTTP_SERVER_HANDLER_PROXY_HTTP_HANDLER_H_ \ No newline at end of file diff --git a/flex/engines/http_server/options.cc b/flex/engines/http_server/options.cc index 2f7c0441acf3..0bc4fbb74117 100644 --- a/flex/engines/http_server/options.cc +++ b/flex/engines/http_server/options.cc @@ -20,10 +20,11 @@ namespace server { uint32_t shard_query_concurrency = 16; uint32_t shard_update_concurrency = 4; uint32_t shard_adhoc_concurrency = 4; -uint32_t shard_admin_graph_concurrency = 1; +uint32_t shard_admin_graph_concurrency = 16; uint32_t shard_admin_procedure_concurrency = 1; uint32_t shard_admin_node_concurrency = 1; uint32_t shard_admin_job_concurrency = 1; -uint32_t shard_admin_service_concurrency = 1; +uint32_t shard_admin_service_concurrency = 16; +uint32_t shard_proxy_concurrency = 16; // same as shard_query_concurrency } // namespace server diff --git a/flex/engines/http_server/options.h b/flex/engines/http_server/options.h index b7110a9d6620..62256b32bf36 100644 --- a/flex/engines/http_server/options.h +++ b/flex/engines/http_server/options.h @@ -28,6 +28,7 @@ const uint32_t ic_update_group_id = 3; const uint32_t ic_adhoc_group_id = 4; const uint32_t codegen_group_id = 5; const uint32_t proc_query_group_id = 6; +const uint32_t proxy_group_id = 7; const uint32_t max_group_id = std::numeric_limits::max(); const uint32_t group_inc_step = @@ -42,6 +43,7 @@ extern uint32_t shard_admin_node_concurrency; extern uint32_t shard_admin_service_concurrency; extern uint32_t shard_admin_job_concurrency; extern uint32_t shard_admin_procedure_concurrency; +extern uint32_t shard_proxy_concurrency; } // namespace server diff --git a/flex/engines/http_server/service/proxy_service.cc b/flex/engines/http_server/service/proxy_service.cc new file mode 100644 index 000000000000..aa9685d7f10e --- /dev/null +++ b/flex/engines/http_server/service/proxy_service.cc @@ -0,0 +1,63 @@ +/** Copyright 2020 Alibaba Group Holding Limited. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "flex/engines/http_server/service/proxy_service.h" + +namespace server { + +gs::Status ProxyService::init( + uint32_t num_shards, uint16_t http_port, + const std::vector>& endpoints, + bool enable_heartbeat, int32_t heart_beat_interval, + bool hang_until_success) { + proxy_port_ = http_port; + endpoints_ = endpoints; + actor_sys_ = std::make_unique(num_shards, false); + http_hdl_ = std::make_unique(http_port); + auto init_res = client.init(endpoints, enable_heartbeat, heart_beat_interval, + hang_until_success); + if (!init_res.ok()) { + LOG(ERROR) << "Failed to init HttpProxy"; + return gs::Status(gs::StatusCode::InternalError, + "Failed to init HttpProxy" + init_res.error_message()); + } + return gs::Status::OK(); +} + +void ProxyService::run_and_wait_for_exit() { + if (!actor_sys_ || !http_hdl_) { + std::cerr << "GraphDB service has not been inited!" << std::endl; + return; + } + actor_sys_->launch(); + http_hdl_->start(); + running_.store(true); + while (running_.load(std::memory_order_relaxed)) { + std::this_thread::sleep_for(std::chrono::milliseconds(100)); + } + http_hdl_->stop(); + actor_sys_->terminate(); +} + +const std::vector>& +ProxyService::get_endpoints() const { + return endpoints_; +} + +HttpProxy& ProxyService::get_client() { return client; } + +void ProxyService::set_exit_state() { running_.store(false); } + +} // namespace server diff --git a/flex/engines/http_server/service/proxy_service.h b/flex/engines/http_server/service/proxy_service.h new file mode 100644 index 000000000000..9632818cdafe --- /dev/null +++ b/flex/engines/http_server/service/proxy_service.h @@ -0,0 +1,66 @@ +/** Copyright 2020 Alibaba Group Holding Limited. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef ENGINES_HTTP_SERVER_SERVICE_PROXY_SERVICE_H_ +#define ENGINES_HTTP_SERVER_SERVICE_PROXY_SERVICE_H_ + +#include +#include +#include +#include + +#include "flex/engines/http_server/actor_system.h" +#include "flex/engines/http_server/handler/http_proxy.h" +#include "flex/engines/http_server/handler/proxy_http_handler.h" +#include "flex/utils/result.h" +#include "flex/utils/service_utils.h" + +namespace server { +class ProxyService { + public: + static ProxyService& get() { + static ProxyService instance; + return instance; + } + + ~ProxyService() = default; + + gs::Status init( + uint32_t num_shards, uint16_t http_port, + const std::vector>& endpoints, + bool enable_heartbeat = false, + int32_t heart_beat_interval = + HeartBeatChecker::DEFAULT_HEART_BEAT_INTERVAL, + bool hang_until_success = true); + void run_and_wait_for_exit(); + const std::vector>& get_endpoints() const; + void set_exit_state(); + + HttpProxy& get_client(); + + private: + ProxyService() = default; + + private: + uint32_t proxy_port_; + std::vector> endpoints_; + std::unique_ptr actor_sys_; + std::unique_ptr http_hdl_; + std::atomic running_{false}; + HttpProxy client; +}; +} // namespace server + +#endif // ENGINES_HTTP_SERVER_SERVICE_PROXY_SERVICE_H_ diff --git a/flex/engines/http_server/types.h b/flex/engines/http_server/types.h index f43cd6d4fe08..44a39b0dd83f 100644 --- a/flex/engines/http_server/types.h +++ b/flex/engines/http_server/types.h @@ -20,8 +20,10 @@ #include #include #include +#include #include "flex/utils/service_utils.h" +#include #include namespace server { @@ -54,7 +56,9 @@ struct payload { }; using query_param = payload; +using proxy_request = payload>; using query_result = payload; +using proxy_query_result = payload>; using admin_query_result = payload>; // url_path, query_param using graph_management_param = diff --git a/flex/interactive/docker/Makefile b/flex/interactive/docker/Makefile index 4eba0d8a62e4..b55f4cd832b0 100644 --- a/flex/interactive/docker/Makefile +++ b/flex/interactive/docker/Makefile @@ -31,7 +31,7 @@ interactive-runtime: --target final_image \ --build-arg ARCH=$(ARCH) \ --build-arg ENABLE_COORDINATOR=${ENABLE_COORDINATOR} \ - --no-cache -t registry.cn-hongkong.aliyuncs.com/graphscope/interactive:${SHORT_SHA}-${ARCH} . + -t registry.cn-hongkong.aliyuncs.com/graphscope/interactive:${SHORT_SHA}-${ARCH} . hqps-server-base: docker build \ diff --git a/flex/interactive/docker/entrypoint.sh b/flex/interactive/docker/entrypoint.sh index 6616f8ce15d1..099c8fee53bf 100644 --- a/flex/interactive/docker/entrypoint.sh +++ b/flex/interactive/docker/entrypoint.sh @@ -18,10 +18,11 @@ set -e DEFAULT_GRAPH_NAME=gs_interactive_default_graph BULK_LOADER_BINARY_PATH=/opt/flex/bin/bulk_loader INTERACTIVE_SERVER_BIN=/opt/flex/bin/interactive_server +PROXY_SERVER_BIN=/opt/flex/bin/proxy_server function usage() { cat << EOF - Usage: $0 -w[--workspace] + Usage: $0 -w[--workspace] -t[--type] -e[--endpoints] This is the entrypoint script for the interactive container. Options: -h, --help: show this help message and exit @@ -32,28 +33,42 @@ function usage() { -c, --enable-coordinator: Launch the Interactive service along with Coordinator. Must enable this option if you want to use `gsctl` command-line tool. + -t, --type: Specify the type of the service to start. + Default is "engine", which means start the engine service. + Other options are "proxy", which means start the proxy service. + -e, --endpoints: Specify the endpoints of the engine service. i.e. + the address of the engine service. For example, "localhost:9190,localhost:9191" EOF } function prepare_workspace() { #receive args + if [ $# -ne 2 ]; then + echo "Usage: prepare_workspace " + exit 1 + fi local workspace=$1 if [ -z "${workspace}" ]; then workspace="/tmp/interactive_workspace" fi + local PARALLELISM=$2 + echo "Preparing workspace: ${workspace}, parallelism: ${PARALLELISM}" #if workspace is not exist, create it - if [ ! -d "${workspace}" ]; then - mkdir -p ${workspace} - mkdir -p ${workspace}/conf/ - else - echo "Workspace ${workspace} already exists" + mkdir -p ${workspace} + mkdir -p ${workspace}/conf/ + # prepare engine_config.yaml + builtin_graph_directory="${workspace}/data/${DEFAULT_GRAPH_NAME}" + if [ -d "${builtin_graph_directory}" ]; then + echo "The builtin graph: ${DEFAULT_GRAPH_NAME} already exists, skip preparing the workspace" return 0 fi - # prepare engine_config.yaml engine_config_path="${workspace}/conf/engine_config.yaml" - cp /opt/flex/share/engine_config.yaml $engine_config_path + if [ ! -f "${engine_config_path}" ]; then + cp /opt/flex/share/engine_config.yaml $engine_config_path + fi #make sure the line which start with default_graph is changed to default_graph: ${DEFAULT_GRAPH_NAME} sed -i "s/default_graph:.*/default_graph: ${DEFAULT_GRAPH_NAME}/" $engine_config_path + sed -i "s/thread_num_per_worker:.*/thread_num_per_worker: ${PARALLELISM}/" $engine_config_path echo "Using default graph: ${DEFAULT_GRAPH_NAME} to start the service" # copy the builtin graph @@ -83,7 +98,7 @@ function launch_service() { start_cmd="${INTERACTIVE_SERVER_BIN} -c ${engine_config_path}" start_cmd="${start_cmd} -w ${workspace}" start_cmd="${start_cmd} --enable-admin-service true" - start_cmd="${start_cmd} --start-compiler true" + # start_cmd="${start_cmd} --start-compiler true" echo "Starting the service with command: $start_cmd" if $ENABLE_COORDINATOR; then start_cmd="${start_cmd} &"; fi eval $start_cmd @@ -106,39 +121,92 @@ EOF fi } +function launch_proxy_service() { + #expect 1 arg + if [ $# -ne 4 ]; then + echo "Usage: launch_proxy_service " + echo " number of args: $#" + exit 1 + fi + local endpoints=$1 + local port=$2 + local hang_until_success=$3 + local parallelism=$4 + start_cmd="${PROXY_SERVER_BIN} -e '${endpoints}' -p ${port} --hang-until-success ${hang_until_success} --parallelism ${parallelism}" + echo "Starting the proxy service with command: $start_cmd" + eval $start_cmd +} + #################### Entry #################### ENABLE_COORDINATOR=false WORKSPACE=/tmp/interactive_workspace +SERVICE_TYPE="engine" +PROXY_PORT=10000 +HANG_UNTIL_SUCCESS=false +PARALLELISM=32 while [[ $# -gt 0 ]]; do - case $1 in - -w | --workspace) - shift - if [[ $# -eq 0 || $1 == -* ]]; then - echo "Option -w requires an argument." >&2 - exit 1 - fi - WORKSPACE=$1 - shift - ;; - -c | --enable-coordinator) - ENABLE_COORDINATOR=true - shift - ;; - -h | --help) - usage - exit 0 - ;; - *) - echo "Invalid option: $1" >&2 - usage - exit 1 - ;; + key="$1" + + case $key in + -h | --help) + usage + exit + ;; + -w | --workspace) + shift + WORKSPACE="$1" + shift + ;; + -c | --enable-coordinator) + ENABLE_COORDINATOR=true + shift + ;; + -t | --type) + shift + SERVICE_TYPE="$1" + shift + ;; + -e | --endpoints) + shift + ENDPOINTS="$1" + shift + ;; + -p | --port) + shift + PROXY_PORT="$1" + shift + ;; + --hang-until-success) + shift + HANG_UNTIL_SUCCESS="$1" + shift + ;; + --parallelism) + shift + PARALLELISM="$1" + shift + ;; + *) # unknown option + echo "unknown option $1" + usage + exit 1 + ;; esac done - -prepare_workspace $WORKSPACE -launch_service $WORKSPACE -launch_coordinator +if [ "${SERVICE_TYPE}" != "engine" ] && [ "${SERVICE_TYPE}" != "proxy" ]; then + echo "Invalid service type: ${SERVICE_TYPE}" + usage + exit 1 +fi +if [ "${SERVICE_TYPE}" == "proxy" ]; then + echo "Start the proxy service" + launch_proxy_service $ENDPOINTS $PROXY_PORT $HANG_UNTIL_SUCCESS $PARALLELISM +else + echo "Start the engine service" + prepare_workspace $WORKSPACE $PARALLELISM + launch_service $WORKSPACE + launch_coordinator +fi diff --git a/flex/interactive/docker/interactive-runtime.Dockerfile b/flex/interactive/docker/interactive-runtime.Dockerfile index 36cca0b52aba..454ce4370402 100644 --- a/flex/interactive/docker/interactive-runtime.Dockerfile +++ b/flex/interactive/docker/interactive-runtime.Dockerfile @@ -4,11 +4,12 @@ FROM registry.cn-hongkong.aliyuncs.com/graphscope/interactive-base:v0.0.4 AS bui ARG ARCH ARG ENABLE_COORDINATOR="false" -COPY --chown=graphscope:graphscope . /home/graphscope/GraphScope - # change bash as default SHELL ["/bin/bash", "-c"] +# install debug tools +RUN sudo apt-get update && sudo apt-get install -y vim iputils-ping curl + # install arrow RUN cd /tmp && sudo apt-get update && sudo apt-get install -y -V ca-certificates lsb-release wget libcurl4-openssl-dev && \ curl -o apache-arrow-apt-source-latest.deb https://apache.jfrog.io/artifactory/arrow/$(lsb_release --id --short | tr 'A-Z' 'a-z')/apache-arrow-apt-source-latest-$(lsb_release --codename --short).deb && \ @@ -31,8 +32,10 @@ cmake . -DCMAKE_INSTALL_PREFIX=/opt/flex -DCMAKE_BUILD_TYPE=Release -DCMAKE_CXX_ -DBUILD_TESTING=OFF -DWITH_EXAMPLES=OFF && make -j && make install && rm -rf /tmp/opentelemetry-cpp # install flex +COPY --chown=graphscope:graphscope . /home/graphscope/GraphScope + RUN . ${HOME}/.cargo/env && cd ${HOME}/GraphScope/flex && \ - git submodule update --init && mkdir build && cd build && cmake .. -DCMAKE_INSTALL_PREFIX=/opt/flex -DBUILD_DOC=OFF -DBUILD_TEST=OFF && make -j && make install && \ + git submodule update --init && mkdir build && cd build && cmake .. -DCMAKE_INSTALL_PREFIX=/opt/flex -DBUILD_DOC=OFF -DBUILD_ODPS_FRAGMENT_LOADER=ON && make -j && make install && \ cd ~/GraphScope/interactive_engine/ && mvn clean package -Pexperimental -DskipTests && \ cd ~/GraphScope/interactive_engine/compiler && cp target/compiler-0.0.1-SNAPSHOT.jar /opt/flex/lib/ && \ cp target/libs/*.jar /opt/flex/lib/ && \ @@ -62,13 +65,19 @@ ARG ENABLE_COORDINATOR="false" ENV DEBIAN_FRONTEND=noninteractive # g++ + jre 500MB -RUN apt-get update && apt-get -y install sudo locales g++ cmake openjdk-11-jre-headless tzdata && \ +RUN apt-get update && apt-get -y install sudo locales g++ cmake openjdk-11-jre-headless tzdata iputils-ping curl rapidjson-dev && \ locale-gen en_US.UTF-8 && apt-get clean -y && rm -rf /var/lib/apt/lists/* # shanghai zoneinfo ENV TZ=Asia/Shanghai RUN ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezone +RUN cd /tmp && export KUBE_VER=v1.19.2 && \ + curl -LO https://storage.googleapis.com/kubernetes-release/release/${KUBE_VER}/bin/linux/amd64/kubectl && \ + chmod +x ./kubectl && \ + cd /tmp && \ + mv ./kubectl /usr/local/bin/kubectl + # python3 RUN if [ "${ENABLE_COORDINATOR}" = "true" ]; then \ apt-get update && apt-get -y install python3 python3-pip && \ diff --git a/flex/interactive/sdk/examples/python/get_service_status.py b/flex/interactive/sdk/examples/python/get_service_status.py new file mode 100644 index 000000000000..b6dc8f7fef19 --- /dev/null +++ b/flex/interactive/sdk/examples/python/get_service_status.py @@ -0,0 +1,79 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +# +# Copyright 2020 Alibaba Group Holding Limited. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +import sys + +sys.path.append("../../python/") +import time +import argparse +import os +from interactive_sdk.client.driver import Driver +from interactive_sdk.client.session import Session +from interactive_sdk.openapi.models.query_request import QueryRequest +from interactive_sdk.openapi.models.gs_data_type import GSDataType +from interactive_sdk.openapi.models.typed_value import TypedValue +from interactive_sdk.openapi.models.primitive_type import PrimitiveType + + +def get_service_status(sess: Session): + print("Get service status") + status = sess.get_service_status() + print(status) + + +def get_procedures(sess: Session): + print("Get procedures") + procedures = sess.list_procedures("1") + print(procedures) + + +def call_procedure(sess: Session): + print("Call procedure") + req = QueryRequest( + query_name="QueryName", + arguments=[ + TypedValue( + type=GSDataType(PrimitiveType(primitive_type="DT_SIGNED_INT32")), + value=1, + ) + ], + ) + resp = sess.call_procedure("1", req) + print(resp) + + +if __name__ == "__main__": + # expect one argument: interactive_endpoint + parser = argparse.ArgumentParser(description="Example Python3 script") + + # Add arguments + parser.add_argument( + "--endpoint", + type=str, + help="The interactive endpoint to connect", + required=True, + default="https://virtserver.swaggerhub.com/GRAPHSCOPE/interactive/1.0.0/", + ) + + # Parse the arguments + args = parser.parse_args() + + driver = Driver(endpoint=args.endpoint) + with driver.session() as sess: + get_service_status(sess) + get_procedures(sess) + call_procedure(sess) diff --git a/flex/interactive/sdk/java/pom.xml b/flex/interactive/sdk/java/pom.xml index aa59b5cdb629..f3df413f5325 100644 --- a/flex/interactive/sdk/java/pom.xml +++ b/flex/interactive/sdk/java/pom.xml @@ -5,7 +5,7 @@ interactive jar interactive - 0.3 + 0.4-SNAPSHOT https://github.com/alibaba/GraphScope/tree/main/flex/interactive GraphScope Interactive Java SDK @@ -46,14 +46,23 @@ - - ossrh - https://oss.sonatype.org/content/repositories/snapshots - - ossrh - https://oss.sonatype.org/service/local/staging/deploy/maven2/ + releases + http://mvnrepo.alibaba-inc.com/mvn/releases + + + snapshots + http://mvnrepo.alibaba-inc.com/mvn/snapshots + + + + + + + + + diff --git a/flex/interactive/sdk/java/src/main/java/com/alibaba/graphscope/interactive/client/Driver.java b/flex/interactive/sdk/java/src/main/java/com/alibaba/graphscope/interactive/client/Driver.java index e27fd5dff33b..dc230feed8ae 100644 --- a/flex/interactive/sdk/java/src/main/java/com/alibaba/graphscope/interactive/client/Driver.java +++ b/flex/interactive/sdk/java/src/main/java/com/alibaba/graphscope/interactive/client/Driver.java @@ -44,6 +44,10 @@ public static Driver connect(String uri) { return new Driver(uri); } + public static ProcedureInterface procedureOnly(String uri) { + return DefaultSession.queryServiceOnly(uri); + } + private Driver(String uri) { // Parse uri String[] parts = uri.split(":"); diff --git a/flex/interactive/sdk/java/src/main/java/com/alibaba/graphscope/interactive/client/impl/DefaultSession.java b/flex/interactive/sdk/java/src/main/java/com/alibaba/graphscope/interactive/client/impl/DefaultSession.java index 82dda3f359f1..ee9de32cf80e 100644 --- a/flex/interactive/sdk/java/src/main/java/com/alibaba/graphscope/interactive/client/impl/DefaultSession.java +++ b/flex/interactive/sdk/java/src/main/java/com/alibaba/graphscope/interactive/client/impl/DefaultSession.java @@ -41,16 +41,19 @@ public class DefaultSession implements Session { private static String JSON_FORMAT_STRING = "json"; private static String PROTO_FORMAT_STRING = "proto"; private static String ENCODER_FORMAT_STRING = "encoder"; - private final AdminServiceGraphManagementApi graphApi; - private final AdminServiceJobManagementApi jobApi; - private final AdminServiceProcedureManagementApi procedureApi; - private final AdminServiceServiceManagementApi serviceApi; - private final GraphServiceVertexManagementApi vertexApi; - private final GraphServiceEdgeManagementApi edgeApi; - private final QueryServiceApi queryApi; - private final UtilsApi utilsApi; - private final ApiClient client, queryClient; + private AdminServiceGraphManagementApi graphApi; + private AdminServiceJobManagementApi jobApi; + private AdminServiceProcedureManagementApi procedureApi; + private AdminServiceServiceManagementApi serviceApi; + private GraphServiceVertexManagementApi vertexApi; + private GraphServiceEdgeManagementApi edgeApi; + private QueryServiceApi queryApi; + private UtilsApi utilsApi; + private ApiClient client, queryClient; + private DefaultSession(){ + + } /** * Create a default GraphScope Interactive Session. * @@ -89,6 +92,16 @@ private DefaultSession(String uri) { queryApi = new QueryServiceApi(queryClient); } + public static DefaultSession queryServiceOnly(String queryUri) { + DefaultSession sess = new DefaultSession(); + sess.queryClient = new ApiClient(); + sess.queryClient.setBasePath(queryUri); + sess.queryClient.setReadTimeout(DEFAULT_READ_TIMEOUT); + sess.queryClient.setWriteTimeout(DEFAULT_WRITE_TIMEOUT); + sess.queryApi = new QueryServiceApi(sess.queryClient); + return sess; + } + public static DefaultSession newInstance(String uri) { return new DefaultSession(uri); } diff --git a/flex/interactive/sdk/java/src/test/resources/sample_app.cc b/flex/interactive/sdk/java/src/test/resources/sample_app.cc index 0c2b7cf49b2c..e222fb388171 100644 --- a/flex/interactive/sdk/java/src/test/resources/sample_app.cc +++ b/flex/interactive/sdk/java/src/test/resources/sample_app.cc @@ -17,28 +17,17 @@ #include "flex/utils/app_utils.h" namespace gs { -class ExampleQuery : public CypherReadAppBase { +class ExampleQuery : public WriteAppBase { public: using Engine = SyncEngine; using label_id_t = typename gs::MutableCSRInterface::label_id_t; using vertex_id_t = typename gs::MutableCSRInterface::vertex_id_t; ExampleQuery() {} // Query function for query class - results::CollectiveResults Query(const gs::GraphDBSession& sess, - int32_t param1) override { - LOG(INFO) << "param1: " << param1; - gs::MutableCSRInterface graph(sess); - auto ctx0 = Engine::template ScanVertex( - graph, 0, Filter()); - - auto ctx1 = Engine::Project( - graph, std::move(ctx0), - std::tuple{gs::make_mapper_with_variable( - gs::PropertySelector("id"))}); - auto ctx2 = Engine::Limit(std::move(ctx1), 0, 5); - auto res = Engine::Sink(graph, ctx2, std::array{0}); - LOG(INFO) << "res: " << res.DebugString(); - return res; + bool Query(GraphDBSession &graph, Decoder &input, Encoder &output) { + std::this_thread::sleep_for(std::chrono::seconds(5)); + LOG(INFO) << "after sleep"; + return true; } }; } // namespace gs diff --git a/flex/interactive/sdk/python/.openapi-generator-ignore b/flex/interactive/sdk/python/.openapi-generator-ignore index 4a910045dd30..521c88ae7648 100644 --- a/flex/interactive/sdk/python/.openapi-generator-ignore +++ b/flex/interactive/sdk/python/.openapi-generator-ignore @@ -26,6 +26,7 @@ README.md setup.py gs_interactive/client +gs_interactive/models/long_text.py requirements.txt test-requirements.txt test/ diff --git a/flex/interactive/sdk/python/gs_interactive/client/driver.py b/flex/interactive/sdk/python/gs_interactive/client/driver.py index f9396eee8c08..c588b181f908 100644 --- a/flex/interactive/sdk/python/gs_interactive/client/driver.py +++ b/flex/interactive/sdk/python/gs_interactive/client/driver.py @@ -18,10 +18,8 @@ import sys -from gremlin_python import statics from gremlin_python.driver.client import Client -from gremlin_python.driver.driver_remote_connection import \ - DriverRemoteConnection +from gremlin_python.driver.driver_remote_connection import DriverRemoteConnection from gremlin_python.process.graph_traversal import __ from gremlin_python.process.strategies import * from gremlin_python.structure.graph import Graph diff --git a/flex/interactive/sdk/python/gs_interactive/models/long_text.py b/flex/interactive/sdk/python/gs_interactive/models/long_text.py new file mode 100644 index 000000000000..dd81dd566576 --- /dev/null +++ b/flex/interactive/sdk/python/gs_interactive/models/long_text.py @@ -0,0 +1,99 @@ +# coding: utf-8 + +""" + GraphScope Interactive API v0.3 + + This is the definition of GraphScope Interactive API, including - AdminService API - Vertex/Edge API - QueryService AdminService API (with tag AdminService) defines the API for GraphManagement, ProcedureManagement and Service Management. Vertex/Edge API (with tag GraphService) defines the API for Vertex/Edge management, including creation/updating/delete/retrive. QueryService API (with tag QueryService) defines the API for procedure_call, Ahodc query. + + The version of the OpenAPI document: 1.0.0 + Contact: graphscope@alibaba-inc.com + Generated by OpenAPI Generator (https://openapi-generator.tech) + + Do not edit the class manually. +""" # noqa: E501 + + +from __future__ import annotations +import pprint +import re # noqa: F401 +import json + +from pydantic import BaseModel, ConfigDict, StrictStr +from typing import Any, ClassVar, Dict, List, Optional +from typing import Optional, Set +from typing_extensions import Self + +class LongText(BaseModel): + """ + LongText + """ # noqa: E501 + long_text: Optional[StrictStr] + __properties: ClassVar[List[str]] = ["long_text"] + + model_config = ConfigDict( + populate_by_name=True, + validate_assignment=True, + protected_namespaces=(), + extra= "forbid", + ) + + + def to_str(self) -> str: + """Returns the string representation of the model using alias""" + return pprint.pformat(self.model_dump(by_alias=True)) + + def to_json(self) -> str: + """Returns the JSON representation of the model using alias""" + # TODO: pydantic v2: use .model_dump_json(by_alias=True, exclude_unset=True) instead + return json.dumps(self.to_dict()) + + @classmethod + def from_json(cls, json_str: str) -> Optional[Self]: + """Create an instance of LongText from a JSON string""" + return cls.from_dict(json.loads(json_str)) + + def to_dict(self) -> Dict[str, Any]: + """Return the dictionary representation of the model using alias. + + This has the following differences from calling pydantic's + `self.model_dump(by_alias=True)`: + + * `None` is only added to the output dict for nullable fields that + were set at model initialization. Other fields with value `None` + are ignored. + """ + excluded_fields: Set[str] = set([ + ]) + + _dict = self.model_dump( + by_alias=True, + exclude=excluded_fields, + exclude_none=True, + ) + # set to None if long_text (nullable) is None + # and model_fields_set contains the field + if self.long_text is None and "long_text" in self.model_fields_set: + _dict['long_text'] = None + + return _dict + + @classmethod + def from_dict(cls, obj: Optional[Dict[str, Any]]) -> Optional[Self]: + """Create an instance of LongText from a dict""" + if obj is None: + return None + + if not isinstance(obj, dict): + return cls.model_validate(obj) + + # Forbid extra fields, in order to avoid matching var_char + for key in obj: + if key not in cls.__properties: + raise ValueError(f"Unexpected field {key} for LongText") + + _obj = cls.model_validate({ + "long_text": obj.get("long_text") + }) + return _obj + + diff --git a/flex/interactive/sdk/python/test/test_driver.py b/flex/interactive/sdk/python/test/test_driver.py index bc36dbb71086..bffdaaffd7a4 100644 --- a/flex/interactive/sdk/python/test/test_driver.py +++ b/flex/interactive/sdk/python/test/test_driver.py @@ -24,43 +24,8 @@ import pytest from gs_interactive.client.driver import Driver -from gs_interactive.models.base_edge_type_vertex_type_pair_relations_inner import ( - BaseEdgeTypeVertexTypePairRelationsInner, -) -from gs_interactive.models.create_edge_type import CreateEdgeType -from gs_interactive.models.create_graph_request import CreateGraphRequest -from gs_interactive.models.create_graph_schema_request import ( - CreateGraphSchemaRequest, -) -from gs_interactive.models.create_procedure_request import ( - CreateProcedureRequest, -) -from gs_interactive.models.create_property_meta import CreatePropertyMeta -from gs_interactive.models.create_vertex_type import CreateVertexType -from gs_interactive.models.edge_mapping import EdgeMapping -from gs_interactive.models.edge_mapping_type_triplet import ( - EdgeMappingTypeTriplet, -) -from gs_interactive.models.gs_data_type import GSDataType -from gs_interactive.models.typed_value import TypedValue -from gs_interactive.models.job_status import JobStatus -from gs_interactive.models.long_text import LongText -from gs_interactive.models.primitive_type import PrimitiveType -from gs_interactive.models.schema_mapping import SchemaMapping -from gs_interactive.models.schema_mapping_loading_config import ( - SchemaMappingLoadingConfig, -) -from gs_interactive.models.schema_mapping_loading_config_format import ( - SchemaMappingLoadingConfigFormat, -) -from gs_interactive.models.schema_mapping_loading_config_data_source import ( - SchemaMappingLoadingConfigDataSource, -) -from gs_interactive.models.start_service_request import StartServiceRequest -from gs_interactive.models.string_type import StringType -from gs_interactive.models.string_type_string import StringTypeString -from gs_interactive.models.vertex_mapping import VertexMapping -from gs_interactive.models.query_request import QueryRequest +from gs_interactive.models import * + class TestDriver(unittest.TestCase): """Test usage of driver""" @@ -79,36 +44,36 @@ def setUp(self): self._cpp_proc_name = None print("finish setup") - def tearDown(self): - if self._graph_id is not None: - if self._cypher_proc_name is not None: - print("delete procedure: ") - rep1 = self._sess.delete_procedure(self._graph_id, self._cypher_proc_name) - print("delete procedure: ", rep1) - if self._cpp_proc_name is not None: - print("delete procedure: ") - rep1 = self._sess.delete_procedure(self._graph_id, self._cpp_proc_name) - print("delete procedure: ", rep1) - print("delete graph: ", self._graph_id) - rep2 = self._sess.delete_graph(self._graph_id) - print("delete graph: ", rep2) + # def tearDown(self): + # if self._graph_id is not None: + # if self._cypher_proc_name is not None: + # print("delete procedure: ") + # rep1 = self._sess.delete_procedure(self._graph_id, self._cypher_proc_name) + # print("delete procedure: ", rep1) + # if self._cpp_proc_name is not None: + # print("delete procedure: ") + # rep1 = self._sess.delete_procedure(self._graph_id, self._cpp_proc_name) + # print("delete procedure: ", rep1) + # print("delete graph: ", self._graph_id) + # rep2 = self._sess.delete_graph(self._graph_id) + # print("delete graph: ", rep2) def test_example(self): self._graph_id = self.createGraph() self.bulkLoading() self.bulkLoadingUploading() self.waitJobFinish() - self.list_graph() - self.runCypherQuery() - self.runGremlinQuery() - self.createCypherProcedure() + # self.list_graph() + # self.runCypherQuery() + # self.runGremlinQuery() + # self.createCypherProcedure() self.createCppProcedure() self.restart() - self.restartOnNewGraph() - self.getStatistics() - self.callProcedure() - self.callProcedureWithHttp() - self.callProcedureWithHttpCurrent() + # self.restartOnNewGraph() + # self.getStatistics() + # self.callProcedure() + # self.callProcedureWithHttp() + # self.callProcedureWithHttpCurrent() def createGraph(self): create_graph = CreateGraphRequest(name="test_graph", description="test graph") @@ -169,6 +134,11 @@ def bulkLoading(self): data_source=SchemaMappingLoadingConfigDataSource(scheme="file", location=location), import_option="init", format=SchemaMappingLoadingConfigFormat(type="csv"), + x_csr_params=SchemaMappingLoadingConfigXCsrParams( + parallelism=1, + build_csr_in_mem=True, + use_mmap_vector=True + ), ), vertex_mappings=[ VertexMapping(type_name="person", inputs=[person_csv_path]) diff --git a/flex/openapi/openapi_interactive.yaml b/flex/openapi/openapi_interactive.yaml index af3c5aaf721a..f64e363b419b 100644 --- a/flex/openapi/openapi_interactive.yaml +++ b/flex/openapi/openapi_interactive.yaml @@ -1058,6 +1058,7 @@ components: example: DT_SIGNED_INT32 LongText: x-body-name: long_text + additionalProperties: false type: object required: - long_text @@ -1067,6 +1068,7 @@ components: nullable: true FixedChar: x-body-name: fixed_char + additionalProperties: false type: object required: - char @@ -1074,11 +1076,12 @@ components: char: type: object required: - - fixed_char + - fixed_length properties: fixed_char: type: integer VarChar: + additionalProperties: false x-body-name: var_char type: object required: @@ -1105,8 +1108,6 @@ components: TimeStampType: x-body-name: time_stamp_type type: object - required: - - timestamp properties: timestamp: type: string @@ -1602,6 +1603,16 @@ components: loading_config: type: object properties: + x_csr_params: + type: object + description: mutable_csr specific parameters + properties: + parallelism: # how many thread used for bulk loading + type: integer + build_csr_in_mem: # whether to build csr in memory + type: boolean + use_mmap_vector: # whether to use mmap vector + type: boolean data_source: type: object properties: diff --git a/flex/storages/metadata/default_graph_meta_store.cc b/flex/storages/metadata/default_graph_meta_store.cc index 59f8b9fee3f8..ebb76499ae34 100644 --- a/flex/storages/metadata/default_graph_meta_store.cc +++ b/flex/storages/metadata/default_graph_meta_store.cc @@ -144,15 +144,15 @@ Result> DefaultGraphMetaStore::GetAllPluginMeta( return Result>(res.status()); } std::vector metas; - VLOG(10) << "Found plugin metas: " << res.move_value().size(); + // VLOG(10) << "Found plugin metas: " << res.move_value().size(); for (auto& pair : res.move_value()) { auto plugin_meta = PluginMeta::FromJson(pair.second); if (plugin_meta.bound_graph == graph_id) { metas.push_back(plugin_meta); } } - VLOG(10) << "Found plugin metas belong to graph " << graph_id << ": " - << metas.size(); + // VLOG(10) << "Found plugin metas belong to graph " << graph_id << ": " + // << metas.size(); return Result>(metas); } diff --git a/flex/storages/metadata/graph_meta_store.cc b/flex/storages/metadata/graph_meta_store.cc index 44801276ab91..89023ad6f45b 100644 --- a/flex/storages/metadata/graph_meta_store.cc +++ b/flex/storages/metadata/graph_meta_store.cc @@ -65,7 +65,11 @@ std::string GraphMeta::ToJson() const { json["creation_time"] = creation_time; json["data_update_time"] = data_update_time; if (!data_import_config.empty()) { - json["data_import_config"] = nlohmann::json::parse(data_import_config); + try { + json["data_import_config"] = nlohmann::json::parse(data_import_config); + } catch (const std::exception& e) { + LOG(ERROR) << "Invalid data_import_config: " << data_import_config; + } } json["schema"] = nlohmann::json::parse(schema); json["stored_procedures"] = nlohmann::json::array(); diff --git a/flex/storages/metadata/local_file_metadata_store.cc b/flex/storages/metadata/local_file_metadata_store.cc index 0ef323e18ef5..1ad3e08ee153 100644 --- a/flex/storages/metadata/local_file_metadata_store.cc +++ b/flex/storages/metadata/local_file_metadata_store.cc @@ -38,7 +38,7 @@ Result LocalFileMetadataStore::CreateMeta( std::unique_lock lock(meta_mutex_); meta_key_t meta_key; ASSIGN_AND_RETURN_IF_RESULT_NOT_OK(meta_key, get_next_meta_key(meta_kind)); - VLOG(10) << "got next meta key: " << meta_key; + LOG(INFO) << "got next meta key: " << meta_key << ", for " << meta_kind; if (is_key_exist(meta_kind, meta_key)) { return Status(StatusCode::InternalError, "When creating meta, got an existing key"); @@ -86,7 +86,7 @@ Result>> LocalFileMetadataStore::GetAllMeta(const meta_kind_t& meta_kind) { std::unique_lock lock(meta_mutex_); - VLOG(10) << "Getting all meta for: " << meta_kind; + // VLOG(10) << "Getting all meta for: " << meta_kind; std::vector> meta_values; auto meta_dir = get_meta_kind_dir(meta_kind); for (auto& p : std::filesystem::directory_iterator(meta_dir)) { @@ -100,7 +100,7 @@ LocalFileMetadataStore::GetAllMeta(const meta_kind_t& meta_kind) { continue; } auto id_str = file_name.substr(strlen(META_FILE_PREFIX)); - VLOG(10) << "Reading meta file: " << file_name; + // VLOG(10) << "Reading meta file: " << file_name; auto meta_file = get_meta_file(meta_kind, id_str); auto meta_value_res = read_file(meta_file); if (meta_value_res.ok()) { diff --git a/flex/storages/rt_mutable_graph/file_names.h b/flex/storages/rt_mutable_graph/file_names.h index 7b01b1fbe613..64e7d25c758a 100644 --- a/flex/storages/rt_mutable_graph/file_names.h +++ b/flex/storages/rt_mutable_graph/file_names.h @@ -166,6 +166,7 @@ inline std::string get_latest_snapshot(const std::string& work_dir) { { FILE* fin = fopen((snapshots_dir + "/VERSION").c_str(), "r"); CHECK_EQ(fread(&version, sizeof(uint32_t), 1, fin), 1); + fclose(fin); } return snapshots_dir + "/" + std::to_string(version); } diff --git a/flex/storages/rt_mutable_graph/loader/abstract_arrow_fragment_loader.cc b/flex/storages/rt_mutable_graph/loader/abstract_arrow_fragment_loader.cc index eb93989c6765..2331151e5ce2 100644 --- a/flex/storages/rt_mutable_graph/loader/abstract_arrow_fragment_loader.cc +++ b/flex/storages/rt_mutable_graph/loader/abstract_arrow_fragment_loader.cc @@ -62,7 +62,7 @@ void set_column_from_string_array(gs::ColumnBase* col, auto casted = std::static_pointer_cast(array->chunk(j)); for (auto k = 0; k < casted->length(); ++k) { - auto str = casted->GetView(k); + auto str = casted->IsNull(k) ? "" : casted->GetView(k); std::string_view sw(str.data(), str.size()); if (offset[cur_ind] >= size) { cur_ind++; @@ -123,8 +123,11 @@ void set_column_from_timestamp_array(gs::ColumnBase* col, if (offset[cur_ind] >= size) { cur_ind++; } else { - col->set_any(offset[cur_ind++], - std::move(AnyConverter::to_any(casted->Value(k)))); + col->set_any( + offset[cur_ind++], + std::move(AnyConverter::to_any( + casted->IsNull(k) ? Date(std::numeric_limits::max()) + : Date(casted->Value(k))))); } } } @@ -149,8 +152,11 @@ void set_column_from_timestamp_array_to_day( if (offset[cur_ind] >= size) { cur_ind++; } else { - col->set_any(offset[cur_ind++], - std::move(AnyConverter::to_any(casted->Value(k)))); + col->set_any( + offset[cur_ind++], + std::move(AnyConverter::to_any( + casted->IsNull(k) ? Day(std::numeric_limits::max()) + : Day(casted->Value(k))))); } } } diff --git a/flex/storages/rt_mutable_graph/loader/abstract_arrow_fragment_loader.h b/flex/storages/rt_mutable_graph/loader/abstract_arrow_fragment_loader.h index d918efd0686d..4c0e0e93b0cb 100644 --- a/flex/storages/rt_mutable_graph/loader/abstract_arrow_fragment_loader.h +++ b/flex/storages/rt_mutable_graph/loader/abstract_arrow_fragment_loader.h @@ -55,9 +55,18 @@ void set_column(gs::ColumnBase* col, std::shared_ptr array, size_t cur_ind = 0; for (auto j = 0; j < array->num_chunks(); ++j) { auto casted = std::static_pointer_cast(array->chunk(j)); + size_t size = col->size(); for (auto k = 0; k < casted->length(); ++k) { - col->set_any(offset[cur_ind++], - std::move(AnyConverter::to_any(casted->Value(k)))); + if (offset[cur_ind] >= size) { + cur_ind++; + } else { + col->set_any( + offset[cur_ind++], + std::move(casted->IsNull(k) + ? AnyConverter::to_any( + std::numeric_limits::max()) + : AnyConverter::to_any(casted->Value(k)))); + } } } } @@ -105,9 +114,12 @@ struct _add_vertex { } auto casted_array = std::static_pointer_cast(col); for (size_t i = 0; i < row_num; ++i) { + if (casted_array->IsNull(i)) { + LOG(FATAL) << "Null value in primary key column: "; + } if (!indexer.add(casted_array->Value(i), vid)) { VLOG(2) << "Duplicate vertex id: " << casted_array->Value(i) << ".."; - offset.emplace_back(std::numeric_limits::max()); + offset.emplace_back(std::numeric_limits::max()); } else { offset.emplace_back(vid); } @@ -116,11 +128,11 @@ struct _add_vertex { if (col->type()->Equals(arrow::utf8())) { auto casted_array = std::static_pointer_cast(col); for (size_t i = 0; i < row_num; ++i) { - auto str = casted_array->GetView(i); + auto str = casted_array->IsNull(i) ? "" : casted_array->GetView(i); std::string_view str_view(str.data(), str.size()); if (!indexer.add(str_view, vid)) { VLOG(2) << "Duplicate vertex id: " << str_view << ".."; - offset.emplace_back(std::numeric_limits::max()); + offset.emplace_back(std::numeric_limits::max()); } else { offset.emplace_back(vid); } @@ -129,11 +141,11 @@ struct _add_vertex { auto casted_array = std::static_pointer_cast(col); for (size_t i = 0; i < row_num; ++i) { - auto str = casted_array->GetView(i); + auto str = casted_array->IsNull(i) ? "" : casted_array->GetView(i); std::string_view str_view(str.data(), str.size()); if (!indexer.add(str_view, vid)) { VLOG(2) << "Duplicate vertex id: " << str_view << ".."; - offset.emplace_back(std::numeric_limits::max()); + offset.emplace_back(std::numeric_limits::max()); } else { offset.emplace_back(vid); } @@ -159,13 +171,16 @@ struct _add_vertex { } auto casted_array = std::static_pointer_cast(col); for (size_t i = 0; i < row_num; ++i) { + if (casted_array->IsNull(i)) { + LOG(FATAL) << "Null value in primary key column: "; + } indexer.add_vertex(casted_array->Value(i)); } } else { if (col->type()->Equals(arrow::utf8())) { auto casted_array = std::static_pointer_cast(col); for (size_t i = 0; i < row_num; ++i) { - auto str = casted_array->GetView(i); + auto str = casted_array->IsNull(i) ? "" : casted_array->GetView(i); std::string_view str_view(str.data(), str.size()); indexer.add_vertex(str_view); } @@ -173,7 +188,7 @@ struct _add_vertex { auto casted_array = std::static_pointer_cast(col); for (size_t i = 0; i < row_num; ++i) { - auto str = casted_array->GetView(i); + auto str = casted_array->IsNull(i) ? "" : casted_array->GetView(i); std::string_view str_view(str.data(), str.size()); indexer.add_vertex(str_view); } @@ -194,7 +209,7 @@ void _append(bool is_dst, size_t cur_ind, std::shared_ptr col, if (col->type()->Equals(arrow::utf8())) { auto casted = std::static_pointer_cast(col); for (auto j = 0; j < casted->length(); ++j) { - auto str = casted->GetView(j); + auto str = casted->IsNull(j) ? "" : casted->GetView(j); std::string_view str_view(str.data(), str.size()); auto vid = indexer.get_index(Any::From(str_view)); if (is_dst) { @@ -210,7 +225,7 @@ void _append(bool is_dst, size_t cur_ind, std::shared_ptr col, // must be large utf8 auto casted = std::static_pointer_cast(col); for (auto j = 0; j < casted->length(); ++j) { - auto str = casted->GetView(j); + auto str = casted->IsNull(j) ? "" : casted->GetView(j); std::string_view str_view(str.data(), str.size()); auto vid = indexer.get_index(Any::From(str_view)); if (is_dst) { @@ -227,6 +242,9 @@ void _append(bool is_dst, size_t cur_ind, std::shared_ptr col, using arrow_array_type = typename gs::TypeConverter::ArrowArrayType; auto casted = std::static_pointer_cast(col); for (auto j = 0; j < casted->length(); ++j) { + if (casted->IsNull(j)) { + LOG(FATAL) << "Null value in primary key column: "; + } auto vid = indexer.get_index(Any::From(casted->Value(j))); if (is_dst) { std::get<1>(parsed_edges[cur_ind++]) = vid; @@ -302,11 +320,17 @@ static void append_edges(std::shared_ptr src_col, arrow::StringArray>::value || std::is_same::value) { - auto str = data->GetView(j); - std::string_view str_view(str.data(), str.size()); - std::get<2>(parsed_edges[cur_ind++]) = str_view; + if (data->IsNull(j)) { + std::get<2>(parsed_edges[cur_ind++]) = ""; + } else { + auto str = data->IsNull(j) ? "" : data->GetString(j); + std::string_view str_view(str.data(), str.size()); + std::get<2>(parsed_edges[cur_ind++]) = str_view; + } } else { - std::get<2>(parsed_edges[cur_ind++]) = data->Value(j); + std::get<2>(parsed_edges[cur_ind++]) = + data->IsNull(j) ? std::numeric_limits::max() + : data->Value(j); } } VLOG(10) << "Finish inserting: " << src_col->length() << " edges"; @@ -331,14 +355,12 @@ static void append_edges(std::shared_ptr src_col, class AbstractArrowFragmentLoader : public IFragmentLoader { public: AbstractArrowFragmentLoader(const std::string& work_dir, const Schema& schema, - const LoadingConfig& loading_config, - int32_t thread_num, bool build_csr_in_mem, - bool use_mmap_vector) + const LoadingConfig& loading_config) : loading_config_(loading_config), schema_(schema), - thread_num_(thread_num), - build_csr_in_mem_(build_csr_in_mem), - use_mmap_vector_(use_mmap_vector), + thread_num_(loading_config_.GetParallelism()), + build_csr_in_mem_(loading_config_.GetBuildCsrInMem()), + use_mmap_vector_(loading_config_.GetUseMmapVector()), basic_fragment_loader_(schema_, work_dir) { vertex_label_num_ = schema_.vertex_label_num(); edge_label_num_ = schema_.edge_label_num(); @@ -599,6 +621,9 @@ class AbstractArrowFragmentLoader : public IFragmentLoader { auto casted_array = std::static_pointer_cast(primary_key_column); for (size_t i = 0; i < row_num; ++i) { + if (casted_array->IsNull(i)) { + LOG(FATAL) << "Null value in primary key column: "; + } vids.emplace_back(indexer.get_index(casted_array->Value(i))); } } else { @@ -607,7 +632,8 @@ class AbstractArrowFragmentLoader : public IFragmentLoader { std::static_pointer_cast( primary_key_column); for (size_t i = 0; i < row_num; ++i) { - auto str = casted_array->GetView(i); + auto str = + casted_array->IsNull(i) ? "" : casted_array->GetView(i); std::string_view str_view(str.data(), str.size()); vids.emplace_back(indexer.get_index(str_view)); } @@ -617,7 +643,8 @@ class AbstractArrowFragmentLoader : public IFragmentLoader { std::static_pointer_cast( primary_key_column); for (size_t i = 0; i < row_num; ++i) { - auto str = casted_array->GetView(i); + auto str = + casted_array->IsNull(i) ? "" : casted_array->GetView(i); std::string_view str_view(str.data(), str.size()); vids.emplace_back(indexer.get_index(str_view)); } diff --git a/flex/storages/rt_mutable_graph/loader/csv_fragment_loader.cc b/flex/storages/rt_mutable_graph/loader/csv_fragment_loader.cc index 6e7d0e4873eb..0b6b938ec339 100644 --- a/flex/storages/rt_mutable_graph/loader/csv_fragment_loader.cc +++ b/flex/storages/rt_mutable_graph/loader/csv_fragment_loader.cc @@ -230,11 +230,9 @@ static void put_column_names_option(const LoadingConfig& loading_config, std::shared_ptr CSVFragmentLoader::Make( const std::string& work_dir, const Schema& schema, - const LoadingConfig& loading_config, int32_t thread_num, - bool build_csr_in_mem, bool use_mmap_vector) { + const LoadingConfig& loading_config) { return std::shared_ptr( - new CSVFragmentLoader(work_dir, schema, loading_config, thread_num, - build_csr_in_mem, use_mmap_vector)); + new CSVFragmentLoader(work_dir, schema, loading_config)); } void CSVFragmentLoader::addVertices(label_t v_label_id, @@ -317,8 +315,8 @@ void CSVFragmentLoader::loadVertices() { ++iter) { vertex_files.emplace_back(iter->first, iter->second); } - LOG(INFO) << "Parallel loading with " << thread_num_ << " threads, " << " " - << vertex_files.size() << " vertex files, "; + LOG(INFO) << "Parallel loading with " << thread_num_ << " threads, " + << " " << vertex_files.size() << " vertex files, "; std::atomic v_ind(0); std::vector threads(thread_num_); for (int i = 0; i < thread_num_; ++i) { diff --git a/flex/storages/rt_mutable_graph/loader/csv_fragment_loader.h b/flex/storages/rt_mutable_graph/loader/csv_fragment_loader.h index 0127cb150095..cea7555caa87 100644 --- a/flex/storages/rt_mutable_graph/loader/csv_fragment_loader.h +++ b/flex/storages/rt_mutable_graph/loader/csv_fragment_loader.h @@ -66,16 +66,12 @@ class CSVTableRecordBatchSupplier : public IRecordBatchSupplier { class CSVFragmentLoader : public AbstractArrowFragmentLoader { public: CSVFragmentLoader(const std::string& work_dir, const Schema& schema, - const LoadingConfig& loading_config, int32_t thread_num, - bool build_csr_in_mem, bool use_mmap_vector) - : AbstractArrowFragmentLoader(work_dir, schema, loading_config, - thread_num, build_csr_in_mem, - use_mmap_vector) {} + const LoadingConfig& loading_config) + : AbstractArrowFragmentLoader(work_dir, schema, loading_config) {} static std::shared_ptr Make( const std::string& work_dir, const Schema& schema, - const LoadingConfig& loading_config, int32_t thread_num, - bool build_csr_in_mem, bool use_mmap_vector); + const LoadingConfig& loading_config); ~CSVFragmentLoader() {} diff --git a/flex/storages/rt_mutable_graph/loader/loader_factory.cc b/flex/storages/rt_mutable_graph/loader/loader_factory.cc index 9d257309cc94..2036ba98b611 100644 --- a/flex/storages/rt_mutable_graph/loader/loader_factory.cc +++ b/flex/storages/rt_mutable_graph/loader/loader_factory.cc @@ -46,16 +46,14 @@ void LoaderFactory::Finalize() {} std::shared_ptr LoaderFactory::CreateFragmentLoader( const std::string& work_dir, const Schema& schema, - const LoadingConfig& loading_config, int thread_num, bool build_csr_in_mem, - bool use_mmap_vector) { + const LoadingConfig& loading_config) { auto scheme = loading_config.GetScheme(); auto format = loading_config.GetFormat(); auto key = scheme + format; auto& known_loaders_ = getKnownLoaders(); auto iter = known_loaders_.find(key); if (iter != known_loaders_.end()) { - return iter->second(work_dir, schema, loading_config, thread_num, - build_csr_in_mem, use_mmap_vector); + return iter->second(work_dir, schema, loading_config); } else { LOG(FATAL) << "Unsupported format: " << format; } diff --git a/flex/storages/rt_mutable_graph/loader/loader_factory.h b/flex/storages/rt_mutable_graph/loader/loader_factory.h index 95034482a58a..4f2536cfd8f8 100644 --- a/flex/storages/rt_mutable_graph/loader/loader_factory.h +++ b/flex/storages/rt_mutable_graph/loader/loader_factory.h @@ -30,8 +30,7 @@ class LoaderFactory { public: using loader_initializer_t = std::shared_ptr (*)( const std::string& work_dir, const Schema& schema, - const LoadingConfig& loading_config, int thread_num, - bool build_csr_in_mem, bool use_mmap_vector); + const LoadingConfig& loading_config); static void Init(); @@ -39,8 +38,7 @@ class LoaderFactory { static std::shared_ptr CreateFragmentLoader( const std::string& work_dir, const Schema& schema, - const LoadingConfig& loading_config, int thread_num, - bool build_csr_in_mem, bool use_mmap_vector); + const LoadingConfig& loading_config); static bool Register(const std::string& scheme_type, const std::string& format_type, diff --git a/flex/storages/rt_mutable_graph/loader/odps_fragment_loader.cc b/flex/storages/rt_mutable_graph/loader/odps_fragment_loader.cc index 18c1a73baf82..1971faa67512 100644 --- a/flex/storages/rt_mutable_graph/loader/odps_fragment_loader.cc +++ b/flex/storages/rt_mutable_graph/loader/odps_fragment_loader.cc @@ -120,6 +120,7 @@ void ODPSReadClient::CreateReadSession( while (resp.status_ != apsara::odps::sdk::storage_api::Status::OK && resp.status_ != apsara::odps::sdk::storage_api::Status::WAIT) { LOG(ERROR) << "CreateReadSession failed" << resp.error_message_; + std::this_thread::sleep_for(std::chrono::seconds(2)); resp = createReadSession(table_identifier, selected_cols, partition_cols, selected_partitions); } @@ -323,6 +324,10 @@ ODPSTableRecordBatchSupplier::ODPSTableRecordBatchSupplier( // Read the table. table_ = odps_read_client_.ReadTable(session_id, split_count, table_identifier, thread_num); + + LOG(INFO) << "Successfully read table: " << table_identifier.table_ + << " with " << table_->num_rows() << " rows, " + << table_->num_columns() << " columns"; reader_ = std::make_shared(*table_); } @@ -341,11 +346,9 @@ ODPSTableRecordBatchSupplier::GetNextBatch() { std::shared_ptr ODPSFragmentLoader::Make( const std::string& work_dir, const Schema& schema, - const LoadingConfig& loading_config, int32_t thread_num, - bool build_csr_in_mem, bool use_mmap_vector) { + const LoadingConfig& loading_config) { return std::shared_ptr( - new ODPSFragmentLoader(work_dir, schema, loading_config, thread_num, - build_csr_in_mem, use_mmap_vector)); + new ODPSFragmentLoader(work_dir, schema, loading_config)); } void ODPSFragmentLoader::init() { odps_read_client_.init(); } @@ -451,8 +454,8 @@ void ODPSFragmentLoader::loadVertices() { ++iter) { vertex_files.emplace_back(iter->first, iter->second); } - LOG(INFO) << "Parallel loading with " << thread_num_ << " threads, " << " " - << vertex_files.size() << " vertex files, "; + LOG(INFO) << "Parallel loading with " << thread_num_ << " threads, " + << " " << vertex_files.size() << " vertex files, "; std::atomic v_ind(0); std::vector threads(thread_num_); for (int i = 0; i < thread_num_; ++i) { diff --git a/flex/storages/rt_mutable_graph/loader/odps_fragment_loader.h b/flex/storages/rt_mutable_graph/loader/odps_fragment_loader.h index 8b08b140ed4d..1da86c5a27d0 100644 --- a/flex/storages/rt_mutable_graph/loader/odps_fragment_loader.h +++ b/flex/storages/rt_mutable_graph/loader/odps_fragment_loader.h @@ -171,16 +171,12 @@ class ODPSTableRecordBatchSupplier : public IRecordBatchSupplier { class ODPSFragmentLoader : public AbstractArrowFragmentLoader { public: ODPSFragmentLoader(const std::string& work_dir, const Schema& schema, - const LoadingConfig& loading_config, int32_t thread_num, - bool build_csr_in_mem, bool use_mmap_vector) - : AbstractArrowFragmentLoader(work_dir, schema, loading_config, - thread_num, build_csr_in_mem, - use_mmap_vector) {} + const LoadingConfig& loading_config) + : AbstractArrowFragmentLoader(work_dir, schema, loading_config) {} static std::shared_ptr Make( const std::string& work_dir, const Schema& schema, - const LoadingConfig& loading_config, int32_t thread_num, - bool build_csr_in_mem, bool); + const LoadingConfig& loading_config); ~ODPSFragmentLoader() {} diff --git a/flex/storages/rt_mutable_graph/loading_config.cc b/flex/storages/rt_mutable_graph/loading_config.cc index 71077a45a9f0..7817637873cf 100644 --- a/flex/storages/rt_mutable_graph/loading_config.cc +++ b/flex/storages/rt_mutable_graph/loading_config.cc @@ -151,8 +151,7 @@ static bool parse_column_mappings( int32_t column_id = -1; if (!get_scalar(column_mapping, "index", column_id)) { VLOG(10) << "Column index for column mapping is not set, skip"; - } - else { + } else { if (column_id < 0) { LOG(ERROR) << "Column index for column mapping should be non-negative"; return false; @@ -555,6 +554,25 @@ Status parse_bulk_load_config_yaml(const YAML::Node& root, const Schema& schema, get_scalar(data_source_node, "location", data_location); } + if (loading_config_node["x_csr_params"]) { + if (get_scalar(loading_config_node["x_csr_params"], + loader_options::PARALLELISM, load_config.parallelism_)) { + VLOG(10) << "Parallelism is set to: " << load_config.parallelism_; + } + if (get_scalar(loading_config_node["x_csr_params"], + loader_options::BUILD_CSR_IN_MEM, + load_config.build_csr_in_mem_)) { + VLOG(10) << "Build csr in memory is set to: " + << load_config.build_csr_in_mem_; + } + if (get_scalar(loading_config_node["x_csr_params"], + loader_options::USE_MMAP_VECTOR, + load_config.use_mmap_vector_)) { + VLOG(10) << "Use mmap vector is set to: " + << load_config.use_mmap_vector_; + } + } + RETURN_IF_NOT_OK( parse_bulk_load_method(loading_config_node, load_config.method_)); auto format_node = loading_config_node["format"]; @@ -717,14 +735,23 @@ LoadingConfig::LoadingConfig(const Schema& schema) : schema_(schema), scheme_("file"), method_(BulkLoadMethod::kInit), - format_("csv") {} + format_("csv"), + parallelism_(loader_options::DEFAULT_PARALLELISM), + build_csr_in_mem_(loader_options::DEFAULT_BUILD_CSR_IN_MEM), + use_mmap_vector_(loader_options::DEFAULT_USE_MMAP_VECTOR) {} LoadingConfig::LoadingConfig(const Schema& schema, const std::string& data_source, const std::string& delimiter, const BulkLoadMethod& method, const std::string& format) - : schema_(schema), scheme_(data_source), method_(method), format_(format) { + : schema_(schema), + scheme_(data_source), + method_(method), + format_(format), + parallelism_(loader_options::DEFAULT_PARALLELISM), + build_csr_in_mem_(loader_options::DEFAULT_BUILD_CSR_IN_MEM), + use_mmap_vector_(loader_options::DEFAULT_USE_MMAP_VECTOR) { metadata_[reader_options::DELIMITER] = delimiter; } diff --git a/flex/storages/rt_mutable_graph/loading_config.h b/flex/storages/rt_mutable_graph/loading_config.h index 44d41f683719..04c3754838e9 100644 --- a/flex/storages/rt_mutable_graph/loading_config.h +++ b/flex/storages/rt_mutable_graph/loading_config.h @@ -59,6 +59,15 @@ static const std::unordered_set CSV_META_KEY_WORDS = { } // namespace reader_options +namespace loader_options { +static constexpr const char* PARALLELISM = "parallelism"; +static constexpr const char* BUILD_CSR_IN_MEM = "build_csr_in_mem"; +static constexpr const char* USE_MMAP_VECTOR = "use_mmap_vector"; +static constexpr const int32_t DEFAULT_PARALLELISM = 1; +static constexpr const bool DEFAULT_BUILD_CSR_IN_MEM = false; +static constexpr const bool DEFAULT_USE_MMAP_VECTOR = false; +} // namespace loader_options + class LoadingConfig; namespace config_parsing { @@ -146,11 +155,27 @@ class LoadingConfig { GetEdgeSrcDstCol(label_t src_label_id, label_t dst_label_id, label_t edge_label_id) const; + inline void SetParallelism(int32_t parallelism) { + parallelism_ = parallelism; + } + inline void SetBuildCsrInMem(bool build_csr_in_mem) { + build_csr_in_mem_ = build_csr_in_mem; + } + inline void SetUseMmapVector(bool use_mmap_vector) { + use_mmap_vector_ = use_mmap_vector; + } + inline int32_t GetParallelism() const { return parallelism_; } + inline bool GetBuildCsrInMem() const { return build_csr_in_mem_; } + inline bool GetUseMmapVector() const { return use_mmap_vector_; } + private: const Schema& schema_; std::string scheme_; // "file", "hdfs", "oss", "s3" BulkLoadMethod method_; // init, append, overwrite std::string format_; // csv, tsv, json, parquet + int32_t parallelism_; // Number of thread should be used in loading + bool build_csr_in_mem_; // Whether to build csr in memory + bool use_mmap_vector_; // Whether to use mmap vector // meta_data, stores all the meta info about loading std::unordered_map metadata_; diff --git a/flex/storages/rt_mutable_graph/mutable_property_fragment.h b/flex/storages/rt_mutable_graph/mutable_property_fragment.h index f2bc198c9576..f17c3f5b8af3 100644 --- a/flex/storages/rt_mutable_graph/mutable_property_fragment.h +++ b/flex/storages/rt_mutable_graph/mutable_property_fragment.h @@ -109,6 +109,8 @@ class MutablePropertyFragment { void loadSchema(const std::string& filename); + void swap(MutablePropertyFragment& rhs); + Schema schema_; std::vector lf_indexers_; std::vector ie_, oe_; diff --git a/flex/tests/hqps/engine_config_test_2.yaml b/flex/tests/hqps/engine_config_test_2.yaml new file mode 100644 index 000000000000..3e4973392edd --- /dev/null +++ b/flex/tests/hqps/engine_config_test_2.yaml @@ -0,0 +1,39 @@ +directories: + workspace: /tmp/interactive_workspace + subdirs: + data: data + logs: logs + conf: conf +log_level: INFO +default_graph: ldbc +compute_engine: + type: hiactor + workers: + - localhost:10000 + thread_num_per_worker: 1 + store: + type: cpp-mcsr + metadata_store: + type: file # file/sqlite/etcd +compiler: + planner: + is_on: true + opt: RBO + rules: + - FilterIntoJoinRule + - FilterMatchRule + - NotMatchToAntiJoinRule + endpoint: + default_listen_address: localhost + bolt_connector: + disabled: false + port: 7687 + gremlin_connector: + disabled: false + port: 8182 + query_timeout: 40000 + gremlin_script_language_name: antlr_gremlin_calcite +http_service: + default_listen_address: localhost + admin_port: 7777 + query_port: 10001 diff --git a/flex/tests/hqps/hqps_proxy_server_test.sh b/flex/tests/hqps/hqps_proxy_server_test.sh new file mode 100644 index 000000000000..89d2c82df762 --- /dev/null +++ b/flex/tests/hqps/hqps_proxy_server_test.sh @@ -0,0 +1,161 @@ +#!/bin/bash +# Copyright 2020 Alibaba Group Holding Limited. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +set -e + +RED='\033[0;31m' +GREEN='\033[0;32m' +NC='\033[0m' # No Color +err() { + echo -e "${RED}[$(date +'%Y-%m-%d %H:%M:%S')] -ERROR- $* ${NC}" >&2 +} + +info() { + echo -e "${GREEN}[$(date +'%Y-%m-%d %H:%M:%S')] -INFO- $* ${NC}" +} + +SCRIPT_DIR=$(cd -- "$(dirname -- "${BASH_SOURCE[0]}")" &>/dev/null && pwd) +FLEX_HOME=${SCRIPT_DIR}/../../ +INTERACITIVE_SERVER_BIN=${FLEX_HOME}/build/bin/interactive_server +PROXY_SERVER_BIN=${FLEX_HOME}/build/bin/proxy_server +GIE_HOME=${FLEX_HOME}/../interactive_engine/ +ENGINE_CONFIG_PATH_WORKER1=/tmp/interactive_engine_config_worker1.yaml +ENGINE_CONFIG_PATH_WORKER2=/tmp/interactive_engine_config_worker2.yaml + +if [ $# -lt 2 ] || [ $# -ge 3 ]; then + echo "Receives: $# args, need 2 args" + echo "Usage: $0 " + exit 1 +fi + +INTERACTIVE_WORKSPACE=$1 +ENGINE_CONFIG_PATH=$2 +info "INTERACTIVE_WORKSPACE: ${INTERACTIVE_WORKSPACE}" +info "ENGINE_CONFIG_PATH: ${ENGINE_CONFIG_PATH}" + +kill_service(){ + info "Kill Service first" + ps -ef | grep "com.alibaba.graphscope.GraphServer" | awk '{print $2}' | xargs kill -9 || true + ps -ef | grep "interactive_server" | awk '{print $2}' | xargs kill -9 || true + ps -ef | grep "/bin/proxy_server" | awk '{print $2}' | xargs kill -9 || true + sleep 3 + # check if service is killed + info "Kill Service success" +} + +# kill service when exit +trap kill_service EXIT + +# create two copy of engine config, for two workers. +prepare_engine_config() { + if [ -f ${ENGINE_CONFIG_PATH_WORKER1} ]; then + rm -f ${ENGINE_CONFIG_PATH_WORKER1} + fi + if [ -f ${ENGINE_CONFIG_PATH_WORKER2} ]; then + rm -f ${ENGINE_CONFIG_PATH_WORKER2} + fi + cp ${ENGINE_CONFIG_PATH} ${ENGINE_CONFIG_PATH_WORKER1} + cp ${ENGINE_CONFIG_PATH} ${ENGINE_CONFIG_PATH_WORKER2} + # modify the engine config + sed -i "s/localhost:10000/localhost:10001/g" ${ENGINE_CONFIG_PATH_WORKER2} + sed -i "s/port: 7687/port: 7688/g" ${ENGINE_CONFIG_PATH_WORKER2} + sed -i "s/port: 8182/port: 8183/g" ${ENGINE_CONFIG_PATH_WORKER2} + sed -i "s/admin_port: 7777/admin_port: 7778/g" ${ENGINE_CONFIG_PATH_WORKER2} + sed -i "s/query_port: 10000/query_port: 10001/g" ${ENGINE_CONFIG_PATH_WORKER2} +} + +start_worker() { + info "start worker1" + graph_yaml=${INTERACTIVE_WORKSPACE}/data/modern_graph/graph.yaml + indices_path=${INTERACTIVE_WORKSPACE}/data/modern_graph/indices + base_cmd="${INTERACITIVE_SERVER_BIN} -g ${graph_yaml}" + base_cmd=" ${base_cmd} --data-path ${indices_path}" + cmd1=" ${base_cmd} -c ${ENGINE_CONFIG_PATH_WORKER1}" + cmd2=" ${base_cmd} -c ${ENGINE_CONFIG_PATH_WORKER2}" + info "Start worker1 with command: ${cmd1}" + ${cmd1} & + sleep 2 + info "Start worker2 with command: ${cmd2}" + ${cmd2} & + sleep 2 + # check whether interactive_server has two process running + cnt=$(ps -ef | grep "bin/interactive_server" | grep -v grep | wc -l) + if [ ${cnt} -ne 2 ]; then + err "Start worker failed, expect 2 interactive_server process, but got ${cnt}" + exit 1 + fi + info "Start worker success" +} + +start_proxy() { + info "start proxy server" + cmd="${PROXY_SERVER_BIN} -e localhost:10000,localhost:10001" + info "Start proxy server with command: ${cmd}" + ${cmd} & + sleep 2 + # check whether proxy_server is running + cnt=$(ps -ef | grep "bin/proxy_server" | grep -v grep | wc -l) + if [ ${cnt} -ne 1 ]; then + err "Start proxy server failed, expect 1 proxy_server process, but got ${cnt}" + exit 1 + fi + info "Start proxy server success" +} + + +test_proxy() { + # First check whether proxy server is running, if not, exit + cnt=$(ps -ef | grep "bin/proxy_server" | grep -v grep | wc -l) + if [ ${cnt} -ne 1 ]; then + err "Proxy server is not running, got cnt ${cnt}, expect 1" + exit 1 + fi + # test proxy server + info "Test proxy server" + # check heart beat + res=$(curl -X GET http://localhost:9999/heartbeat) + if [ "${res}" != "OK" ]; then + err "Test proxy server failed, expect OK, but got ${res}" + exit 1 + fi + # now kill worker2, and check whether proxy server can still work + ps -ef | grep "bin/interactive_server" | grep -v grep | grep ${ENGINE_CONFIG_PATH_WORKER2} | awk '{print $2}' | xargs kill -9 + sleep 2 + res=$(curl -X GET http://localhost:9999/heartbeat) + # shold still be ok + if [ "${res}" != "OK" ]; then + err "Test proxy server failed, expect OK, but got ${res}" + exit 1 + fi + # now kill worker1, and check whether proxy server can still work + ps -ef | grep "bin/interactive_server" | grep -v grep | grep ${ENGINE_CONFIG_PATH_WORKER1} | awk '{print $2}' | xargs kill -9 + sleep 2 + res=$(curl -X GET http://localhost:9999/heartbeat) + # shold not contains OK + if [ "${res}" == "OK" ]; then + err "Test proxy server failed, expect not OK, but got ${res}" + exit 1 + fi + info "Test proxy server success" +} + + +kill_service + +prepare_engine_config +start_worker # start interactive worker first +start_proxy # start the proxy server +test_proxy + +kill_service \ No newline at end of file diff --git a/flex/utils/mmap_array.h b/flex/utils/mmap_array.h index 3cb5d74128d0..d752f9def362 100644 --- a/flex/utils/mmap_array.h +++ b/flex/utils/mmap_array.h @@ -79,7 +79,7 @@ class mmap_array { } mmap_array(mmap_array&& rhs) : mmap_array() { swap(rhs); } - ~mmap_array() {} + ~mmap_array() { reset(); } void reset() { if (data_ != NULL && mmap_size_ != 0) { diff --git a/flex/utils/result.h b/flex/utils/result.h index 6d553b4c3b23..865fbadde1b4 100644 --- a/flex/utils/result.h +++ b/flex/utils/result.h @@ -86,6 +86,9 @@ class Result { Result(const Status& status, ValueType&& value) : status_(status), value_(std::move(value)) {} + Result(Status&& status, ValueType&& value) + : status_(std::move(status)), value_(std::move(value)) {} + Result(const Status& status) : status_(status) {} Result(const Status& status, const ValueType& value)