diff --git a/.github/workflows/gss.yml b/.github/workflows/gss.yml index 5bf0558997ff..ffd550578596 100644 --- a/.github/workflows/gss.yml +++ b/.github/workflows/gss.yml @@ -79,6 +79,8 @@ jobs: . ${HOME}/.graphscope_env export SCCACHE_DIR=~/.cache/sccache export RUSTC_WRAPPER=/usr/local/bin/sccache + rustup toolchain install 1.81.0 + rustup default 1.81.0 cd ${GITHUB_WORKSPACE}/interactive_engine mvn clean install -P groot -Drust.compile.mode=debug -DskipTests --quiet mvn clean install -Pgroot-data-load --quiet diff --git a/.github/workflows/k8s-ci.yml b/.github/workflows/k8s-ci.yml index 2eff66e0a1af..66d6909d6b7d 100644 --- a/.github/workflows/k8s-ci.yml +++ b/.github/workflows/k8s-ci.yml @@ -282,6 +282,12 @@ jobs: ~/.cache/sccache key: ${{ runner.os }}-cargo-${{ hashFiles('**/Cargo.lock') }} + - name: Install Rust + uses: actions-rs/toolchain@v1 + with: + toolchain: 1.81.0 + override: true + - name: Build Artifact run: | . ${HOME}/.graphscope_env @@ -640,6 +646,12 @@ jobs: ~/.cache/sccache key: ${{ runner.os }}-cargo-${{ hashFiles('**/Cargo.lock') }} + - name: Install Rust + uses: actions-rs/toolchain@v1 + with: + toolchain: 1.81.0 + override: true + - name: Build GIE Experimental Artifacts run: | . ~/.graphscope_env diff --git a/.github/workflows/pr-check.yml b/.github/workflows/pr-check.yml index 7f59f2182263..a66be7b6d96c 100644 --- a/.github/workflows/pr-check.yml +++ b/.github/workflows/pr-check.yml @@ -213,11 +213,15 @@ jobs: python3 -m black --check --diff . python3 -m flake8 . popd - pushd flex/interactive/sdk/python + # we need to generate the code first + pushd flex/interactive/sdk + bash generate_sdk.sh -g python + pushd python python3 -m isort --check --diff . python3 -m black --check --diff . python3 -m flake8 . popd + popd - name: Generate Docs shell: bash diff --git a/docs/zh/frequently_asked_questions.rst b/docs/zh/frequently_asked_questions.rst index 66be7aefef79..f0428aa941b4 100644 --- a/docs/zh/frequently_asked_questions.rst +++ b/docs/zh/frequently_asked_questions.rst @@ -69,7 +69,7 @@ - ``vineyard_shared_mem``: 存储数据集的内存。我们发现将其设置为数据集在磁盘上的大小的 5 倍通常是一个合理的值。 它相当于 graphscope 的 helm chart 中的 ``vineyard.shared_mem``。 - - ``k8s_engine_mem`: ``engine`` 容器的内存大小。一般将其设置为 ``vineyard_shared_mem`` 的同样大小。它相当于 graphscope 的 helm chart 中的 ``engines.resources.memory.requests`` 和 ``engines.resources.memory.requests``。 + - ``k8s_engine_mem``: ``engine`` 容器的内存大小。一般将其设置为 ``vineyard_shared_mem`` 的同样大小。它相当于 graphscope 的 helm chart 中的 ``engines.resources.memory.requests`` 和 ``engines.resources.memory.requests``。 10. 导致在 Apple M1 python3.8 环境下安装 GraphScope 失败的原因可能有哪些? diff --git a/flex/engines/graph_db/runtime/adhoc/expr_impl.h b/flex/engines/graph_db/runtime/adhoc/expr_impl.h index 20a764de0696..91046e31e19c 100644 --- a/flex/engines/graph_db/runtime/adhoc/expr_impl.h +++ b/flex/engines/graph_db/runtime/adhoc/expr_impl.h @@ -85,17 +85,23 @@ class WithInExpr : public ExprBase { WithInExpr(const ReadTransaction& txn, const Context& ctx, std::unique_ptr&& key, const common::Value& array) : key_(std::move(key)) { - if constexpr (std::is_same_v) { - CHECK(array.item_case() == common::Value::kI64Array); - size_t len = array.i64_array().item_size(); - for (size_t idx = 0; idx < len; ++idx) { - container_.push_back(array.i64_array().item(idx)); - } - } else if constexpr (std::is_same_v) { - CHECK(array.item_case() == common::Value::kI32Array); - size_t len = array.i32_array().item_size(); - for (size_t idx = 0; idx < len; ++idx) { - container_.push_back(array.i32_array().item(idx)); + if constexpr ((std::is_same_v) || + (std::is_same_v) ) { + // Implicitly convert to T + if (array.item_case() == common::Value::kI64Array) { + size_t len = array.i64_array().item_size(); + for (size_t idx = 0; idx < len; ++idx) { + container_.push_back(array.i64_array().item(idx)); + } + } else if (array.item_case() == common::Value::kI32Array) { + size_t len = array.i32_array().item_size(); + for (size_t idx = 0; idx < len; ++idx) { + container_.push_back(array.i32_array().item(idx)); + } + } else { + LOG(FATAL) << "Fail to construct WithInExpr of type " + << typeid(T).name() << " with array of type " + << array.item_case(); } } else if constexpr (std::is_same_v) { CHECK(array.item_case() == common::Value::kStrArray); diff --git a/flex/engines/http_server/handler/graph_db_http_handler.cc b/flex/engines/http_server/handler/graph_db_http_handler.cc index 319920459058..4ff8c9e540ff 100644 --- a/flex/engines/http_server/handler/graph_db_http_handler.cc +++ b/flex/engines/http_server/handler/graph_db_http_handler.cc @@ -176,7 +176,7 @@ class stored_proc_handler : public StoppableHandler { bool start() override { if (get_executors()[StoppableHandler::shard_id()].size() > 0) { - LOG(ERROR) << "The actors have been already created!"; + VLOG(10) << "The actors have been already created!"; return false; } return StoppableHandler::start_scope( diff --git a/flex/engines/http_server/handler/graph_db_http_handler.h b/flex/engines/http_server/handler/graph_db_http_handler.h index 6bc5c906910e..22090e66dc41 100644 --- a/flex/engines/http_server/handler/graph_db_http_handler.h +++ b/flex/engines/http_server/handler/graph_db_http_handler.h @@ -69,7 +69,7 @@ class StoppableHandler : public seastar::httpd::handler_base { } catch (const std::exception& e) { // In case the scope is already cancelled, we should ignore the // exception. - LOG(INFO) << "Failed to cancel IC scope: " << e.what(); + VLOG(1) << "Failed to cancel IC scope: " << e.what(); } func(); return seastar::make_ready_future<>(); diff --git a/flex/interactive/sdk/python/gs_interactive/client/status.py b/flex/interactive/sdk/python/gs_interactive/client/status.py index 61d772421ddc..26745c230f5f 100644 --- a/flex/interactive/sdk/python/gs_interactive/client/status.py +++ b/flex/interactive/sdk/python/gs_interactive/client/status.py @@ -17,6 +17,7 @@ # from urllib3.exceptions import MaxRetryError +from urllib3.exceptions import ProtocolError from gs_interactive.api_response import ApiResponse from gs_interactive.client.generated.interactive_pb2 import Code as StatusCode @@ -108,6 +109,8 @@ def from_exception(exception: ApiException): return Status(StatusCode.INTERNAL_ERROR, exception.body) elif isinstance(exception, MaxRetryError): return Status(StatusCode.INTERNAL_ERROR, exception) + elif isinstance(exception, ProtocolError): + return Status(StatusCode.INTERNAL_ERROR, exception) return Status( StatusCode.UNKNOWN, "Unknown Error from exception " + exception.body ) diff --git a/flex/interactive/sdk/python/gs_interactive/tests/conftest.py b/flex/interactive/sdk/python/gs_interactive/tests/conftest.py index 32baf98060f1..4bfe06bbcec7 100644 --- a/flex/interactive/sdk/python/gs_interactive/tests/conftest.py +++ b/flex/interactive/sdk/python/gs_interactive/tests/conftest.py @@ -17,6 +17,7 @@ # # get the directory of the current file +import copy import os import time @@ -27,6 +28,7 @@ from gs_interactive.client.session import Session from gs_interactive.models import CreateGraphRequest from gs_interactive.models import CreateProcedureRequest +from gs_interactive.models import GetGraphSchemaResponse from gs_interactive.models import SchemaMapping from gs_interactive.models import StartServiceRequest from gs_interactive.models import UpdateProcedureRequest @@ -39,7 +41,7 @@ modern_graph_full = { - "name": "modern_graph", + "name": "full_graph", "description": "This is a test graph", "schema": { "vertex_types": [ @@ -120,7 +122,7 @@ } modern_graph_vertex_only = { - "name": "modern_graph", + "name": "vertex_only", "description": "This is a test graph, only contains vertex", "schema": { "vertex_types": [ @@ -148,7 +150,7 @@ } modern_graph_partial = { - "name": "modern_graph", + "name": "partial_graph", "description": "This is a test graph", "schema": { "vertex_types": [ @@ -336,7 +338,7 @@ def create_partial_modern_graph(interactive_session): @pytest.fixture(scope="function") def create_graph_with_custom_pk_name(interactive_session): - modern_graph_custom_pk_name = modern_graph_full.copy() + modern_graph_custom_pk_name = copy.deepcopy(modern_graph_full) for vertex_type in modern_graph_custom_pk_name["schema"]["vertex_types"]: vertex_type["properties"][0]["property_name"] = "custom_id" vertex_type["primary_keys"] = ["custom_id"] @@ -492,3 +494,24 @@ def start_service_on_graph(interactive_session, graph_id: str): assert resp.is_ok() # wait three second to let compiler get the new graph time.sleep(3) + + +def ensure_compiler_schema_ready( + interactive_session, neo4j_session: Neo4jSession, graph_id: str +): + rel_graph_meta = interactive_session.get_graph_schema(graph_id).get_value() + max_times = 10 + while True: + if max_times == 0: + raise Exception("compiler schema is not ready") + res = neo4j_session.run("CALL gs.procedure.meta.schema();") + val = res.single().value() + compiler_graph_schema = GetGraphSchemaResponse.from_json(val) + # print("compiler_graph_schema: ", compiler_graph_schema) + # print("rel_graph_meta: ", rel_graph_meta) + if compiler_graph_schema == rel_graph_meta: + break + print("compiler schema is not ready, wait for 1 second") + time.sleep(1) + max_times -= 1 + print("compiler schema is ready") diff --git a/flex/interactive/sdk/python/gs_interactive/tests/test_robustness.py b/flex/interactive/sdk/python/gs_interactive/tests/test_robustness.py index fabfdf20d7e4..fb5d3abcdef4 100644 --- a/flex/interactive/sdk/python/gs_interactive/tests/test_robustness.py +++ b/flex/interactive/sdk/python/gs_interactive/tests/test_robustness.py @@ -27,6 +27,7 @@ from gs_interactive.tests.conftest import call_procedure # noqa: E402 from gs_interactive.tests.conftest import create_procedure from gs_interactive.tests.conftest import delete_procedure +from gs_interactive.tests.conftest import ensure_compiler_schema_ready from gs_interactive.tests.conftest import import_data_to_full_modern_graph from gs_interactive.tests.conftest import import_data_to_partial_modern_graph from gs_interactive.tests.conftest import import_data_to_vertex_only_modern_graph @@ -61,6 +62,9 @@ def test_query_on_vertex_only_graph( """ print("[Query on vertex only graph]") start_service_on_graph(interactive_session, create_vertex_only_modern_graph) + ensure_compiler_schema_ready( + interactive_session, neo4j_session, create_vertex_only_modern_graph + ) run_cypher_test_suite( neo4j_session, create_vertex_only_modern_graph, vertex_only_cypher_queries ) @@ -69,6 +73,10 @@ def test_query_on_vertex_only_graph( import_data_to_vertex_only_modern_graph( interactive_session, create_vertex_only_modern_graph ) + start_service_on_graph(interactive_session, create_vertex_only_modern_graph) + ensure_compiler_schema_ready( + interactive_session, neo4j_session, create_vertex_only_modern_graph + ) run_cypher_test_suite( neo4j_session, create_vertex_only_modern_graph, vertex_only_cypher_queries ) @@ -83,12 +91,19 @@ def test_query_on_partial_graph( print("[Query on partial graph]") # start service on new graph start_service_on_graph(interactive_session, create_partial_modern_graph) + ensure_compiler_schema_ready( + interactive_session, neo4j_session, create_partial_modern_graph + ) # try to query on the graph run_cypher_test_suite(neo4j_session, create_partial_modern_graph, cypher_queries) start_service_on_graph(interactive_session, "1") import_data_to_partial_modern_graph( interactive_session, create_partial_modern_graph ) + start_service_on_graph(interactive_session, create_partial_modern_graph) + ensure_compiler_schema_ready( + interactive_session, neo4j_session, create_partial_modern_graph + ) run_cypher_test_suite(neo4j_session, create_partial_modern_graph, cypher_queries) @@ -100,10 +115,17 @@ def test_query_on_full_modern_graph( """ print("[Query on full modern graph]") start_service_on_graph(interactive_session, create_modern_graph) + ensure_compiler_schema_ready( + interactive_session, neo4j_session, create_modern_graph + ) # try to query on the graph run_cypher_test_suite(neo4j_session, create_modern_graph, cypher_queries) start_service_on_graph(interactive_session, "1") import_data_to_full_modern_graph(interactive_session, create_modern_graph) + start_service_on_graph(interactive_session, create_modern_graph) + ensure_compiler_schema_ready( + interactive_session, neo4j_session, create_modern_graph + ) run_cypher_test_suite(neo4j_session, create_modern_graph, cypher_queries) @@ -129,6 +151,9 @@ def test_service_switching( ) print("Procedure id: ", a_proc_id) start_service_on_graph(interactive_session, create_modern_graph) + ensure_compiler_schema_ready( + interactive_session, neo4j_session, create_modern_graph + ) call_procedure(neo4j_session, create_modern_graph, a_proc_id) # create procedure on graph_b_id @@ -139,6 +164,9 @@ def test_service_switching( "MATCH(n: person) return count(n);", ) start_service_on_graph(interactive_session, create_vertex_only_modern_graph) + ensure_compiler_schema_ready( + interactive_session, neo4j_session, create_vertex_only_modern_graph + ) call_procedure(neo4j_session, create_vertex_only_modern_graph, b_proc_id) @@ -156,6 +184,9 @@ def test_procedure_creation(interactive_session, neo4j_session, create_modern_gr ) print("Procedure id: ", a_proc_id) start_service_on_graph(interactive_session, create_modern_graph) + ensure_compiler_schema_ready( + interactive_session, neo4j_session, create_modern_graph + ) call_procedure(neo4j_session, create_modern_graph, a_proc_id) # create procedure with name containing space, @@ -202,6 +233,9 @@ def test_builtin_procedure(interactive_session, neo4j_session, create_modern_gra ) # Call the builtin procedure start_service_on_graph(interactive_session, create_modern_graph) + ensure_compiler_schema_ready( + interactive_session, neo4j_session, create_modern_graph + ) call_procedure( neo4j_session, create_modern_graph, @@ -259,6 +293,10 @@ def test_list_jobs(interactive_session, create_vertex_only_modern_graph): def test_call_proc_in_cypher(interactive_session, neo4j_session, create_modern_graph): print("[Test call procedure in cypher]") import_data_to_full_modern_graph(interactive_session, create_modern_graph) + start_service_on_graph(interactive_session, create_modern_graph) + ensure_compiler_schema_ready( + interactive_session, neo4j_session, create_modern_graph + ) result = neo4j_session.run( 'MATCH(p: person) with p.id as oid CALL k_neighbors("person", oid, 1) return label_name, vertex_oid;' ) @@ -276,6 +314,9 @@ def test_custom_pk_name( interactive_session, create_graph_with_custom_pk_name ) start_service_on_graph(interactive_session, create_graph_with_custom_pk_name) + ensure_compiler_schema_ready( + interactive_session, neo4j_session, create_graph_with_custom_pk_name + ) result = neo4j_session.run( "MATCH (n: person) where n.custom_id = 4 return n.custom_id;" ) @@ -289,7 +330,6 @@ def test_custom_pk_name( ) records = result.fetch(1) assert len(records) == 1 and records[0]["$f0"] == 2 - start_service_on_graph(interactive_session, "1") def test_x_csr_params( diff --git a/flex/interactive/sdk/python/setup.cfg b/flex/interactive/sdk/python/setup.cfg index 1e1839250158..255781291114 100644 --- a/flex/interactive/sdk/python/setup.cfg +++ b/flex/interactive/sdk/python/setup.cfg @@ -3,7 +3,7 @@ profile = black ensure_newline_before_comments = True line_length = 88 force_single_line = True -skip = build/,dist/,gs_interactive/api/,gs_interactive/api_response.py,gs_interactive/configuration.py,gs_interactive/exceptions.py,gs_interactive/models/,gs_interactiverest.py, +skip = build/,dist/,gs_interactive/api/,gs_interactive/api_response.py,gs_interactive/configuration.py,gs_interactive/exceptions.py,gs_interactive/models/,gs_interactiverest.py,gs_interactive/api_client.py,gs_interactive/__init__.py,gs_interactive/rest.py skip_glob = *_pb2.py,*_pb2_grpc.py,build/* [flake8] diff --git a/flex/storages/metadata/local_file_metadata_store.cc b/flex/storages/metadata/local_file_metadata_store.cc index 5f77d0cc2e7a..bcff4bf4ed38 100644 --- a/flex/storages/metadata/local_file_metadata_store.cc +++ b/flex/storages/metadata/local_file_metadata_store.cc @@ -181,8 +181,8 @@ Result LocalFileMetadataStore::UpdateMeta(const meta_kind_t& meta_kind, Result LocalFileMetadataStore::get_next_meta_key( - const LocalFileMetadataStore::meta_kind_t& meta_kind) const { - return std::to_string(get_max_id(meta_kind) + 1); + const LocalFileMetadataStore::meta_kind_t& meta_kind) { + return std::to_string(increase_and_get_id(meta_kind)); } std::string LocalFileMetadataStore::get_root_meta_dir() const { @@ -208,29 +208,38 @@ std::string LocalFileMetadataStore::get_meta_file(const meta_kind_t& meta_kind, return ret; } -int32_t LocalFileMetadataStore::get_max_id(const meta_kind_t& meta_kind) const { - // iterate all files in the directory, get the max id. - int max_id_ = 0; +// Guarded by meta_mutex_ outside. +int32_t LocalFileMetadataStore::increase_and_get_id( + const meta_kind_t& meta_kind) { auto dir = get_meta_kind_dir(meta_kind); - for (auto& p : std::filesystem::directory_iterator(dir)) { - if (std::filesystem::is_directory(p)) { - continue; - } - auto file_name = p.path().filename().string(); - if (file_name.find(META_FILE_PREFIX) != std::string::npos) { - auto id_str = file_name.substr(strlen(META_FILE_PREFIX)); - int32_t id; - try { - id = std::stoi(id_str); - } catch (std::invalid_argument& e) { - LOG(ERROR) << "Invalid id: " << id_str; - continue; - } - if (id > max_id_) { - max_id_ = id; - } + int max_id_ = 0; + // In the directory, we expect a file with name CUR_ID_FILE_NAME. + // If the file does not exist, we will create one with content "0". + auto cur_id_file = dir + "/" + CUR_ID_FILE_NAME; + if (!std::filesystem::exists(cur_id_file)) { + std::ofstream out_file(cur_id_file); + if (!out_file.is_open()) { + LOG(ERROR) << "Failed to create file: " << cur_id_file; + return -1; } + out_file << "0"; + out_file.close(); + } + std::ifstream in_file(cur_id_file); + if (!in_file.is_open()) { + LOG(ERROR) << "Failed to open file: " << cur_id_file; + return -1; } + in_file >> max_id_; + in_file.close(); + max_id_++; + std::ofstream out_file(cur_id_file); + if (!out_file.is_open()) { + LOG(ERROR) << "Failed to open file: " << cur_id_file; + return -1; + } + out_file << max_id_; + out_file.close(); return max_id_; } diff --git a/flex/storages/metadata/local_file_metadata_store.h b/flex/storages/metadata/local_file_metadata_store.h index 186aa68e7efd..bada3778d535 100644 --- a/flex/storages/metadata/local_file_metadata_store.h +++ b/flex/storages/metadata/local_file_metadata_store.h @@ -48,6 +48,7 @@ class LocalFileMetadataStore : public IMetaStore { static constexpr const char* METADATA_DIR = "METADATA"; static constexpr const char* META_FILE_PREFIX = "META_"; + static constexpr const char* CUR_ID_FILE_NAME = "CUR_ID"; LocalFileMetadataStore(const std::string& path); @@ -110,12 +111,15 @@ class LocalFileMetadataStore : public IMetaStore { update_func_t update_func) override; private: - Result get_next_meta_key(const meta_kind_t& meta_kind) const; + Result get_next_meta_key(const meta_kind_t& meta_kind); std::string get_root_meta_dir() const; std::string get_meta_kind_dir(const meta_kind_t& meta_kind) const; std::string get_meta_file(const meta_kind_t& meta_kind, const meta_key_t& meta_key) const; - int32_t get_max_id(const meta_kind_t& meta_kind) const; + /** + * For the specified meta_kind, increase the id and return the new id. + */ + int32_t increase_and_get_id(const meta_kind_t& meta_kind); bool is_key_exist(const meta_kind_t& meta_kind, const meta_key_t& meta_key) const; diff --git a/flex/tests/hqps/hqps_robust_test.sh b/flex/tests/hqps/hqps_robust_test.sh index 8090d8be8164..af21a57459ac 100644 --- a/flex/tests/hqps/hqps_robust_test.sh +++ b/flex/tests/hqps/hqps_robust_test.sh @@ -82,7 +82,7 @@ start_engine_service(){ fi cmd="${SERVER_BIN} -c ${config_path} --enable-admin-service true " - cmd="${cmd} -w ${INTERACTIVE_WORKSPACE} --start-compiler true &" + cmd="${cmd} -w ${INTERACTIVE_WORKSPACE} --start-compiler true > /tmp/engine.log 2>&1 & " echo "Start engine service with command: ${cmd}" eval ${cmd} diff --git a/flex/tests/hqps/interactive_config_test.yaml b/flex/tests/hqps/interactive_config_test.yaml index 0e1f9f15ed21..6013b0079d90 100644 --- a/flex/tests/hqps/interactive_config_test.yaml +++ b/flex/tests/hqps/interactive_config_test.yaml @@ -5,7 +5,7 @@ compute_engine: type: hiactor workers: - localhost:10000 - thread_num_per_worker: 1 + thread_num_per_worker: 4 store: type: cpp-mcsr metadata_store: @@ -26,6 +26,7 @@ compiler: statistics: uri: http://localhost:7777/v1/graph/%s/statistics interval: 86400000 # ms + timeout: 1000 # ms endpoint: default_listen_address: localhost bolt_connector: diff --git a/flex/utils/yaml_utils.cc b/flex/utils/yaml_utils.cc index 775c4245ea2a..3c36a0e85535 100644 --- a/flex/utils/yaml_utils.cc +++ b/flex/utils/yaml_utils.cc @@ -52,12 +52,20 @@ void convert_yaml_node_to_json(const YAML::Node& node, json.SetInt(node.as()); } catch (const YAML::BadConversion& e) { try { - json.SetDouble(node.as()); + json.SetInt64(node.as()); } catch (const YAML::BadConversion& e) { try { - json.SetBool(node.as()); + json.SetUint64(node.as()); } catch (const YAML::BadConversion& e) { - json.SetString(node.as().c_str(), allocator); + try { + json.SetDouble(node.as()); + } catch (const YAML::BadConversion& e) { + try { + json.SetBool(node.as()); + } catch (const YAML::BadConversion& e) { + json.SetString(node.as().c_str(), allocator); + } + } } } } diff --git a/interactive_engine/benchmark/queries/cypher_queries/experiments/BI1.cypher b/interactive_engine/benchmark/queries/cypher_queries/experiments/BI1.cypher new file mode 100644 index 000000000000..b43ca43e5448 --- /dev/null +++ b/interactive_engine/benchmark/queries/cypher_queries/experiments/BI1.cypher @@ -0,0 +1,40 @@ +MATCH (message:COMMENT) +WHERE message.creationDate < $datetime +WITH count(message) AS totalMessageCount + +MATCH (message:COMMENT) +WHERE message.creationDate < $datetime +AND message.length > 0 +WITH + totalMessageCount, + message, + date(datetime({epochMillis: message.creationDate})) AS date +WITH + totalMessageCount, + date.year AS year, + CASE + WHEN 'POST' in labels(message) THEN 0 + ELSE 1 + END AS isComment, + CASE + WHEN message.length < 40 THEN 0 + WHEN message.length < 80 THEN 1 + WHEN message.length < 160 THEN 2 + ELSE 3 + END AS lengthCategory, + count(message) AS messageCount, + sum(message.length) / count(message) AS averageMessageLength, + count(message.length) AS sumMessageLength + +RETURN + year, + isComment, + lengthCategory, + messageCount, + averageMessageLength, + sumMessageLength, + messageCount / totalMessageCount AS percentageOfMessages + ORDER BY + year DESC, + isComment ASC, + lengthCategory ASC; \ No newline at end of file diff --git a/interactive_engine/benchmark/queries/cypher_queries/experiments/BI10.cypher b/interactive_engine/benchmark/queries/cypher_queries/experiments/BI10.cypher new file mode 100644 index 000000000000..f866c6672629 --- /dev/null +++ b/interactive_engine/benchmark/queries/cypher_queries/experiments/BI10.cypher @@ -0,0 +1,14 @@ +MATCH (p1:PERSON {id : $personId})-[:KNOWS*1..4]-(expert:PERSON), + (expert)-[:ISLOCATEDIN]->(:PLACE)-[:ISPARTOF]->(country:PLACE {name: $country}), + (expert)<-[:HASCREATOR]-(message)-[:HASTAG]->(:TAG)-[:HASTYPE]->(:TAGCLASS {name: $tagClass}) +WITH DISTINCT expert, message +MATCH (message)-[:HASTAG]->(tag:TAG) +RETURN + expert.id as id, + tag.name as name, + count(message) AS messageCount +ORDER BY + messageCount DESC, + name ASC, + id ASC +LIMIT 100; \ No newline at end of file diff --git a/interactive_engine/benchmark/queries/cypher_queries/experiments/BI11.cypher b/interactive_engine/benchmark/queries/cypher_queries/experiments/BI11.cypher new file mode 100644 index 000000000000..dbaa9b4d87dc --- /dev/null +++ b/interactive_engine/benchmark/queries/cypher_queries/experiments/BI11.cypher @@ -0,0 +1,13 @@ +MATCH (a:PERSON)-[:ISLOCATEDIN]->(:PLACE)-[:ISPARTOF]->(country:PLACE {name: $country}), + (b)-[:ISLOCATEDIN]->(:PLACE)-[:ISPARTOF]->(country), + (c)-[:ISLOCATEDIN]->(:PLACE)-[:ISPARTOF]->(country), + (a)-[k1:KNOWS]-(b:PERSON), + (b)-[k2:KNOWS]-(c:PERSON), + (c)-[k3:KNOWS]-(a) +WHERE a.id < b.id + AND b.id < c.id + AND $startDate <= k1.creationDate AND k1.creationDate <= $endDate + AND $startDate <= k2.creationDate AND k2.creationDate <= $endDate + AND $startDate <= k3.creationDate AND k3.creationDate <= $endDate +WITH DISTINCT country, a, b +RETURN count(*) AS count; \ No newline at end of file diff --git a/interactive_engine/benchmark/queries/cypher_queries/experiments/BI12.cypher b/interactive_engine/benchmark/queries/cypher_queries/experiments/BI12.cypher new file mode 100644 index 000000000000..be0a0fa7855f --- /dev/null +++ b/interactive_engine/benchmark/queries/cypher_queries/experiments/BI12.cypher @@ -0,0 +1,10 @@ +MATCH (person:PERSON)<-[:HASCREATOR]-(message), + (message)-[:REPLYOF * 0..30]->(post:POST) +WHERE message.length < $lengthThreshold + AND message.creationDate > $startDate + AND post.language IN languages +WITH person, count(message) as msgCnt +RETURN msgCnt, count(person) as personCnt +ORDER BY + personCnt DESC, + msgCnt DESC; \ No newline at end of file diff --git a/interactive_engine/benchmark/queries/cypher_queries/experiments/BI13.cypher b/interactive_engine/benchmark/queries/cypher_queries/experiments/BI13.cypher new file mode 100644 index 000000000000..ba813dc7befe --- /dev/null +++ b/interactive_engine/benchmark/queries/cypher_queries/experiments/BI13.cypher @@ -0,0 +1,44 @@ +MATCH (country:PLACE {name: $country})<-[:ISPARTOF]-(:PLACE)<-[:ISLOCATEDIN]-(zombie:PERSON) +WHERE zombie.creationDate < $endDate +OPTIONAL MATCH (zombie)<-[:HASCREATOR]-(message) +WHERE message.creationDate < $endDate +WITH + country, + zombie, + date(datetime({epochMillis: $endDate})) as idate, + date(datetime({epochMillis: zombie.creationDate})) as zdate, + count(message) AS messageCount +WITH + country, + zombie, + 12 * (idate.year - zdate.year ) + + (idate.month - zdate.month) + + 1 AS months, + messageCount +WHERE messageCount / months < 1 +WITH + country, + collect(zombie) AS zombies +UNWIND zombies AS zombie +MATCH // Match1 + (zombie)<-[:HASCREATOR]-()<-[:LIKES]-(likerZombie:PERSON) +WHERE likerZombie IN zombies +MATCH // Match2 + (zombie)<-[:HASCREATOR]-()<-[:LIKES]-(likerPerson:PERSON) +WHERE likerPerson.creationDate < $endDate +WITH + zombie, + count(distinct likerZombie) AS zombieLikeCount, // Aggregate1 + count(distinct likerPerson) AS totalLikeCount // Aggregate2 +RETURN + zombie.id AS zid, + zombieLikeCount, + totalLikeCount, + CASE totalLikeCount + WHEN 0 THEN 0.0 + ELSE zombieLikeCount / totalLikeCount + END AS zombieScore +ORDER BY + zombieScore DESC, + zid ASC +LIMIT 100; \ No newline at end of file diff --git a/interactive_engine/benchmark/queries/cypher_queries/experiments/BI14.cypher b/interactive_engine/benchmark/queries/cypher_queries/experiments/BI14.cypher new file mode 100644 index 000000000000..dce89ab3c336 --- /dev/null +++ b/interactive_engine/benchmark/queries/cypher_queries/experiments/BI14.cypher @@ -0,0 +1,17 @@ +MATCH + (country1:PLACE {name: $country1})<-[:ISPARTOF]-(city1:PLACE)<-[:ISLOCATEDIN]-(person1:PERSON), + (country2:PLACE {name: $country2})<-[:ISPARTOF]-(city2:PLACE)<-[:ISLOCATEDIN]-(person2:PERSON), + (person1)-[knows:KNOWS]-(person2) +// Match1 +MATCH (person1)<-[:HASCREATOR]-(c:COMMENT)-[:REPLYOF]->()-[:HASCREATOR]->(person2:PERSON) +WITH person1, person2, city1, 4 as score1 +// Match2 +MATCH (person1)-[:LIKES]->(m)-[:HASCREATOR]->(person2) +WITH person1, person2, city1, score1, 10 as score2 +WITH + person1, + person2, + city1, + sum(distinct score1) as score1, // Aggregate1 + sum(distinct score2) as score2 // Aggregate2 +RETURN count(*); \ No newline at end of file diff --git a/interactive_engine/benchmark/queries/cypher_queries/experiments/BI16.cypher b/interactive_engine/benchmark/queries/cypher_queries/experiments/BI16.cypher new file mode 100644 index 000000000000..ee047b7f8543 --- /dev/null +++ b/interactive_engine/benchmark/queries/cypher_queries/experiments/BI16.cypher @@ -0,0 +1,8 @@ +MATCH (person1:PERSON)<-[:HASCREATOR]-(message1)-[:HASTAG]->(tag:TAG {name: $tagName}) +WHERE message1.creationDate > $date +OPTIONAL MATCH (person1)-[:KNOWS]-(person2:PERSON)<-[:HASCREATOR]-(message2)-[:HASTAG]->(tag) +WHERE message2.creationDate = $date +WITH person1, count(DISTINCT message1) AS cm, count(DISTINCT person2) AS cp2 +WHERE cp2 <= 4 +// return count +RETURN person1, cm; \ No newline at end of file diff --git a/interactive_engine/benchmark/queries/cypher_queries/experiments/BI17.cypher b/interactive_engine/benchmark/queries/cypher_queries/experiments/BI17.cypher new file mode 100644 index 000000000000..c9f75979beb4 --- /dev/null +++ b/interactive_engine/benchmark/queries/cypher_queries/experiments/BI17.cypher @@ -0,0 +1,8 @@ +MATCH + (comment)-[:HASTAG]->(tag:TAG {name: $tag}), + (comment)-[:REPLYOF]->(message2), + (message2)-[:HASTAG]->(tag), + (message1)-[:HASTAG]->(tag:TAG {name: $tag}), + (message1)-[:REPLYOF*0..10]->(post1:POST)<-[:CONTAINEROF]-(forum1:FORUM), + (forum1)-[:HASMEMBER]->(person3:PERSON)<-[:HASCREATOR]-(message2) +RETURN count(*); \ No newline at end of file diff --git a/interactive_engine/benchmark/queries/cypher_queries/experiments/BI18.cypher b/interactive_engine/benchmark/queries/cypher_queries/experiments/BI18.cypher new file mode 100644 index 000000000000..e35d58a141d0 --- /dev/null +++ b/interactive_engine/benchmark/queries/cypher_queries/experiments/BI18.cypher @@ -0,0 +1,6 @@ +MATCH (tag:TAG {name: $tag})<-[:HASINTEREST]-(person1:PERSON)-[:KNOWS]-(mutualFriend:PERSON)-[:KNOWS]-(person2:PERSON)-[:HASINTEREST]->(tag2 {name: $tag}) +WHERE person1 <> person2 + AND NOT (person1)-[:KNOWS]-(person2) +RETURN person1.id AS person1Id, person2.id AS person2Id, count(DISTINCT mutualFriend) AS mutualFriendCount +ORDER BY mutualFriendCount DESC, person1Id ASC, person2Id ASC +LIMIT 20; \ No newline at end of file diff --git a/interactive_engine/benchmark/queries/cypher_queries/experiments/BI2.cypher b/interactive_engine/benchmark/queries/cypher_queries/experiments/BI2.cypher new file mode 100644 index 000000000000..bf692b8b435d --- /dev/null +++ b/interactive_engine/benchmark/queries/cypher_queries/experiments/BI2.cypher @@ -0,0 +1,26 @@ +MATCH (tag:TAG)-[:HASTYPE]->(:TAGCLASS {name: $tagClass}), (tag:TAG)<-[:HASTAG]-(message) +WITH + tag, + CASE + WHEN message.creationDate < $dateEnd1 + AND message.creationDate >= $date THEN 1 + ELSE 0 + END AS count1, + CASE + WHEN message.creationDate < $dateEnd2 + AND message.creationDate >= $dateEnd1 THEN 1 + ELSE 0 + END AS count2 +WITH + tag, + sum(count1) AS countWindow1, + sum(count2) AS countWindow2 +RETURN + tag.name as name, + countWindow1, + countWindow2, + abs(countWindow1 - countWindow2) AS diff +ORDER BY +diff DESC, +name ASC +LIMIT 100; \ No newline at end of file diff --git a/interactive_engine/benchmark/queries/cypher_queries/experiments/BI3.cypher b/interactive_engine/benchmark/queries/cypher_queries/experiments/BI3.cypher new file mode 100644 index 000000000000..e82ef50a581c --- /dev/null +++ b/interactive_engine/benchmark/queries/cypher_queries/experiments/BI3.cypher @@ -0,0 +1,13 @@ +MATCH + (country:PLACE {name: $country})<-[:ISPARTOF]-()<-[:ISLOCATEDIN]- + (person:PERSON)<-[:HASMODERATOR]-(forum:FORUM)-[:CONTAINEROF]->(post:POST)<-[:REPLYOF*0..30]-(message)-[:HASTAG]->(:TAG)-[:HASTYPE]->(:TAGCLASS {name: $tagClass}) +RETURN + forum.id as id, + forum.title, + forum.creationDate, + person.id as personId, + count(DISTINCT message) AS messageCount + ORDER BY + messageCount DESC, + id ASC + LIMIT 20; \ No newline at end of file diff --git a/interactive_engine/benchmark/queries/cypher_queries/experiments/BI4.cypher b/interactive_engine/benchmark/queries/cypher_queries/experiments/BI4.cypher new file mode 100644 index 000000000000..e339ad7402e0 --- /dev/null +++ b/interactive_engine/benchmark/queries/cypher_queries/experiments/BI4.cypher @@ -0,0 +1,25 @@ +MATCH (country:PLACE)<-[:ISPARTOF]-(:PLACE)<-[:ISLOCATEDIN]-(person:PERSON)<-[:HASMEMBER]-(forum:FORUM) +WHERE forum.creationDate > $date +WITH country, forum, count(person) AS numberOfMembers +ORDER BY numberOfMembers DESC, forum.id ASC, country.id +WITH DISTINCT forum AS topForum +LIMIT 100 + +WITH collect(topForum) AS topForums + +UNWIND topForums AS topForum2 +MATCH (topForum1)-[:CONTAINEROF]->(post:POST)<-[:REPLYOF*0..30]-(message)-[:HASCREATOR]->(person:PERSON)<-[:HASMEMBER]-(topForum2:FORUM) +WITH person, message, topForum1 +WHERE topForum1 IN topForums +WITH person, count(DISTINCT message) AS messageCount + +RETURN + person.id AS personId, + person.firstName AS personFirstName, + person.lastName AS personLastName, + person.creationDate AS personCreationDate, + sum(messageCount) AS messageCount +ORDER BY + messageCount DESC, + person.id ASC +LIMIT 100; \ No newline at end of file diff --git a/interactive_engine/benchmark/queries/cypher_queries/experiments/BI5.cypher b/interactive_engine/benchmark/queries/cypher_queries/experiments/BI5.cypher new file mode 100644 index 000000000000..ba069a89352d --- /dev/null +++ b/interactive_engine/benchmark/queries/cypher_queries/experiments/BI5.cypher @@ -0,0 +1,20 @@ +Match (tag:TAG {name: $tag})<-[:HASTAG]-(message) +OPTIONAL MATCH (message)<-[:LIKES]-(liker:PERSON) +OPTIONAL MATCH (message)<-[:REPLYOF]-(comment:COMMENT) +MATCH (message)-[:HASCREATOR]->(person:PERSON) +WITH message, person, count(distinct liker) as likeCount, count(distinct comment) as replyCount +WITH + person.id AS id, + sum(replyCount) as replyCount, + sum(likeCount) as likeCount, + count(message) as messageCount +RETURN + id, + replyCount, + likeCount, + messageCount, + 1*messageCount + 2*replyCount + 10*likeCount AS score +ORDER BY + score DESC, + id ASC +LIMIT 100; \ No newline at end of file diff --git a/interactive_engine/benchmark/queries/cypher_queries/experiments/BI6.cypher b/interactive_engine/benchmark/queries/cypher_queries/experiments/BI6.cypher new file mode 100644 index 000000000000..790c11f98a2f --- /dev/null +++ b/interactive_engine/benchmark/queries/cypher_queries/experiments/BI6.cypher @@ -0,0 +1,11 @@ +MATCH (tag:TAG {name: $tag})<-[:HASTAG]-(message1)-[:HASCREATOR]->(person1:PERSON), + (message1)<-[:LIKES]-(person2:PERSON), + (person2)<-[:HASCREATOR]-(message2)<-[like:LIKES]-(person3:PERSON) +RETURN + person1.id, + // Using 'DISTINCT like' here ensures that each person2's popularity score is only added once for each person1 + count(DISTINCT like) AS authorityScore +ORDER BY + authorityScore DESC, + person1.id ASC +LIMIT 100; \ No newline at end of file diff --git a/interactive_engine/benchmark/queries/cypher_queries/experiments/BI7.cypher b/interactive_engine/benchmark/queries/cypher_queries/experiments/BI7.cypher new file mode 100644 index 000000000000..97ece1da7c6e --- /dev/null +++ b/interactive_engine/benchmark/queries/cypher_queries/experiments/BI7.cypher @@ -0,0 +1,12 @@ +MATCH + (tag:TAG {name: $tag})<-[:HASTAG]-(message:COMMENT), + (message)<-[:REPLYOF]-(comment:COMMENT), + (comment:COMMENT)-[:HASTAG]->(relatedTag:TAG) +WHERE NOT (comment:COMMENT)-[:HASTAG]->(tag:TAG {name: $tag}) +RETURN + relatedTag.name as name, + count(DISTINCT comment) AS count +ORDER BY + count DESC, + name ASC +LIMIT 100; \ No newline at end of file diff --git a/interactive_engine/benchmark/queries/cypher_queries/experiments/BI8.cypher b/interactive_engine/benchmark/queries/cypher_queries/experiments/BI8.cypher new file mode 100644 index 000000000000..a8c63514cf65 --- /dev/null +++ b/interactive_engine/benchmark/queries/cypher_queries/experiments/BI8.cypher @@ -0,0 +1,7 @@ +MATCH (tag:TAG {name: $tag}) +// score +OPTIONAL MATCH (tag)<-[interest:HASINTEREST]-(person:PERSON) +OPTIONAL MATCH (tag)<-[:HASTAG]-(message)-[:HASCREATOR]->(person:PERSON) +WHERE $startDate < message.creationDate + AND message.creationDate < $endDate +RETURN tag, count(person) AS totalCount; \ No newline at end of file diff --git a/interactive_engine/benchmark/queries/cypher_queries/experiments/BI9.cypher b/interactive_engine/benchmark/queries/cypher_queries/experiments/BI9.cypher new file mode 100644 index 000000000000..753336cd49e2 --- /dev/null +++ b/interactive_engine/benchmark/queries/cypher_queries/experiments/BI9.cypher @@ -0,0 +1,19 @@ +MATCH + (person:PERSON)<-[:HASCREATOR]-(post:POST)<-[:REPLYOF*0..7]-(message) +WHERE + post.creationDate >= $startDate AND post.creationDate <= $endDate AND + message.creationDate >= $startDate AND message.creationDate <= $endDate +WITH + person, + count(distinct post) as threadCnt, + count(message) as msgCnt +RETURN + person.id as id, + person.firstName, + person.lastName, + threadCnt, + msgCnt + ORDER BY + msgCnt DESC, + id ASC + LIMIT 100; \ No newline at end of file diff --git a/interactive_engine/benchmark/queries/cypher_queries/experiments/IC1.cypher b/interactive_engine/benchmark/queries/cypher_queries/experiments/IC1.cypher new file mode 100644 index 000000000000..bf378b9f488a --- /dev/null +++ b/interactive_engine/benchmark/queries/cypher_queries/experiments/IC1.cypher @@ -0,0 +1,52 @@ +MATCH k = shortestPath((p: PERSON{id: $personId})-[:KNOWS*1..4]-(f: PERSON {firstName: $firstName})) +MATCH (f:PERSON)-[:ISLOCATEDIN]->(locationCity:PLACE) + +WHERE + p <> f + +OPTIONAL MATCH (f: PERSON)-[workAt:WORKAT]->(company:ORGANISATION)-[:ISLOCATEDIN]->(country:PLACE) +// append one new column +WITH + f, k, locationCity, + CASE + WHEN company is null Then null + ELSE [company.name, workAt.workFrom, country.name] + END as companies + +WITH f, k, locationCity, collect(companies) as company_info + +OPTIONAL MATCH (f: PERSON)-[studyAt:STUDYAT]->(university)-[:ISLOCATEDIN]->(universityCity:PLACE) +// append one new column +WITH f, k, locationCity, company_info, + CASE + WHEN university is null Then null + ELSE [university.name, studyAt.classYear, universityCity.name] + END as universities + +WITH f, k, locationCity, company_info, collect(universities) as university_info + +// apend one new column +WITH + f, + k, + locationCity, + company_info, + university_info, + length(k) as distance + +ORDER BY distance ASC, f.lastName ASC, f.id ASC +LIMIT 20 + +return f.id AS friendId, + f.lastName AS friendLastName, + distance AS distanceFromPerson, + f.birthday AS friendBirthday, + f.creationDate AS friendCreationDate, + f.gender AS friendGender, + f.browserUsed AS friendBrowserUsed, + f.locationIP AS friendLocationIp, + f.email AS friendEmail, + f.language AS friendLanguage, + locationCity.name AS friendCityName, + university_info AS friendUniversities, + company_info AS friendCompanies; \ No newline at end of file diff --git a/interactive_engine/benchmark/queries/cypher_queries/experiments/IC10.cypher b/interactive_engine/benchmark/queries/cypher_queries/experiments/IC10.cypher new file mode 100644 index 000000000000..dc727f2a7179 --- /dev/null +++ b/interactive_engine/benchmark/queries/cypher_queries/experiments/IC10.cypher @@ -0,0 +1,36 @@ +MATCH (person:PERSON {id: $personId})-[:KNOWS*2..3]-(friend: PERSON) +OPTIONAL MATCH (friend : PERSON)<-[:HASCREATOR]-(post:POST) +OPTIONAL MATCH (friend)<-[:HASCREATOR]-(post1:POST)-[:HASTAG]->(tag:TAG)<-[:HASINTEREST]-(person: PERSON {id: $personId}) + +// Anti-Pattern +WHERE + NOT friend=person + AND NOT (friend:PERSON)-[:KNOWS]-(person :PERSON {id: $personId}) + +WITH + person, + friend, + post, + post1, + date(datetime({epochMillis: friend.birthday})) as birthday + +// datetime(friend.birthday) as birthday + +WHERE (birthday.month=$month AND birthday.day>=21) OR + (birthday.month=($month%12)+1 AND birthday.day<22) + +// Aggregate +WITH friend, count(distinct post) as postCount, count(distinct post1) as commonPostCount + +WITH friend, commonPostCount - (postCount - commonPostCount) AS commonInterestScore +ORDER BY commonInterestScore DESC, friend.id ASC +LIMIT 10 + +MATCH (friend:PERSON)-[:ISLOCATEDIN]->(city:PLACE) + +RETURN friend.id AS personId, + friend.firstName AS personFirstName, + friend.lastName AS personLastName, + commonInterestScore, + friend.gender AS personGender, + city.name AS personCityName; \ No newline at end of file diff --git a/interactive_engine/benchmark/queries/cypher_queries/experiments/IC11.cypher b/interactive_engine/benchmark/queries/cypher_queries/experiments/IC11.cypher new file mode 100644 index 000000000000..4f4e440552eb --- /dev/null +++ b/interactive_engine/benchmark/queries/cypher_queries/experiments/IC11.cypher @@ -0,0 +1,19 @@ +MATCH (p:PERSON {id: $personId})-[:KNOWS*1..3]-(friend:PERSON), + (friend:PERSON)-[wa:WORKAT]->(com:ORGANISATION)-[:ISLOCATEDIN]->(:PLACE {name: $countryName}) +WHERE p <> friend + AND wa.workFrom < $workFromYear + +WITH DISTINCT friend as friend, + com AS com, + wa.workFrom as organizationWorkFromYear + +ORDER BY + organizationWorkFromYear ASC, + friend.id ASC, com.name DESC +LIMIT 10 +return + friend.id AS personId, + friend.firstName AS personFirstName, + friend.lastName AS personLastName, + com.name as organizationName, + organizationWorkFromYear as organizationWorkFromYear; \ No newline at end of file diff --git a/interactive_engine/benchmark/queries/cypher_queries/experiments/IC12.cypher b/interactive_engine/benchmark/queries/cypher_queries/experiments/IC12.cypher new file mode 100644 index 000000000000..57596896d5f4 --- /dev/null +++ b/interactive_engine/benchmark/queries/cypher_queries/experiments/IC12.cypher @@ -0,0 +1,17 @@ +MATCH + (unused:PERSON {id: $personId })-[:KNOWS]-(friend:PERSON)<-[:HASCREATOR]-(comments:COMMENT)-[:REPLYOF]->(:POST)-[:HASTAG]->(tags:TAG), + (tags:TAG)-[:HASTYPE]->(:TAGCLASS)-[:ISSUBCLASSOF*0..10]->(:TAGCLASS {name: $tagClassName}) +WITH + friend AS friend, + collect(DISTINCT tags.name) AS tagNames, + count(DISTINCT comments) AS replyCount +ORDER BY + replyCount DESC, + friend.id ASC +LIMIT 20 +RETURN + friend.id AS personId, + friend.firstName AS personFirstName, + friend.lastName AS personLastName, + tagNames, + replyCount \ No newline at end of file diff --git a/interactive_engine/benchmark/queries/cypher_queries/experiments/IC2.cypher b/interactive_engine/benchmark/queries/cypher_queries/experiments/IC2.cypher new file mode 100644 index 000000000000..0f7aa8814cb3 --- /dev/null +++ b/interactive_engine/benchmark/queries/cypher_queries/experiments/IC2.cypher @@ -0,0 +1,17 @@ +MATCH (p :PERSON {id: $personId})-[:KNOWS]-(friend:PERSON)<-[:HASCREATOR]-(message) +WHERE + message.creationDate <= $maxDate +WITH + friend, + message +ORDER BY + message.creationDate DESC, + message.id ASC LIMIT 20 +return + friend.id AS personId, + friend.firstName AS personFirstName, + friend.lastName AS personLastName, + message.id AS postOrCommentId, + message.content AS content, + message.imageFile AS imageFile, + message.creationDate AS postOrCommentCreationDate; \ No newline at end of file diff --git a/interactive_engine/benchmark/queries/cypher_queries/experiments/IC3.cypher b/interactive_engine/benchmark/queries/cypher_queries/experiments/IC3.cypher new file mode 100644 index 000000000000..f5f483c1250a --- /dev/null +++ b/interactive_engine/benchmark/queries/cypher_queries/experiments/IC3.cypher @@ -0,0 +1,28 @@ +MATCH + (p:PERSON {id: $personId})-[:KNOWS*1..3]-(otherP:PERSON) +MATCH (country:PLACE)<-[:ISLOCATEDIN]-(message)-[:HASCREATOR]->(otherP:PERSON)-[ISLOCATEDIN]->(city:PLACE)-[:ISPARTOF]-> (country2:PLACE) +WHERE + otherP.id<> $personId + AND (country.name = $countryXName OR country.name = $countryYName) + AND (country2.name <> $countryXName AND country2.name <> $countryYName) + AND message.creationDate >= $startDate + AND message.creationDate < $endDate +WITH + DISTINCT + message, + otherP, + country + +WITH otherP, + CASE WHEN country.name=$countryXName THEN 1 ELSE 0 END AS messageX, + CASE WHEN country.name=$countryYName THEN 1 ELSE 0 END AS messageY +WITH otherP, sum(messageX) AS xCount, sum(messageY) AS yCount +WHERE xCount > 0 AND yCount > 0 +RETURN + otherP.id as id, + otherP.firstName as firstName, + otherP.lastName as lastName, + xCount, + yCount, + xCount + yCount as total +ORDER BY total DESC, id ASC LIMIT 20; \ No newline at end of file diff --git a/interactive_engine/benchmark/queries/cypher_queries/experiments/IC4.cypher b/interactive_engine/benchmark/queries/cypher_queries/experiments/IC4.cypher new file mode 100644 index 000000000000..096916c2f243 --- /dev/null +++ b/interactive_engine/benchmark/queries/cypher_queries/experiments/IC4.cypher @@ -0,0 +1,17 @@ +MATCH (person:PERSON {id: $personId})-[:KNOWS]-(friend:PERSON)<-[:HASCREATOR]-(post:POST)-[:HASTAG]->(tag: TAG) +WITH DISTINCT tag, post +WITH tag, + CASE + WHEN post.creationDate < $endDate AND post.creationDate >= $startDate THEN 1 + ELSE 0 + END AS valid, + CASE + WHEN $startDate > post.creationDate THEN 1 + ELSE 0 + END AS inValid +WITH tag, sum(valid) AS postCount, sum(inValid) AS inValidPostCount +WHERE postCount>0 AND inValidPostCount=0 + +RETURN tag.name AS tagName, postCount +ORDER BY postCount DESC, tagName ASC +LIMIT 10; \ No newline at end of file diff --git a/interactive_engine/benchmark/queries/cypher_queries/experiments/IC5.cypher b/interactive_engine/benchmark/queries/cypher_queries/experiments/IC5.cypher new file mode 100644 index 000000000000..7b6a0727ae13 --- /dev/null +++ b/interactive_engine/benchmark/queries/cypher_queries/experiments/IC5.cypher @@ -0,0 +1,22 @@ +MATCH (person:PERSON { id: $personId })-[:KNOWS*1..3]-(friend), + (friend)<-[membership:HASMEMBER]-(forum) +OPTIONAL MATCH (friend)<-[:HASCREATOR]-(post)<-[:CONTAINEROF]-(forum) +WHERE + NOT friend.id = $personId + AND membership.joinDate > $minDate +WITH + DISTINCT + friend AS friend, + forum AS forum, + post as post + +WITH + forum, + count(post) AS postCount +ORDER BY + postCount DESC, + forum.id ASC +LIMIT 20 +RETURN + forum.title AS forumName, + postCount; \ No newline at end of file diff --git a/interactive_engine/benchmark/queries/cypher_queries/experiments/IC6.cypher b/interactive_engine/benchmark/queries/cypher_queries/experiments/IC6.cypher new file mode 100644 index 000000000000..8aa2b01d0830 --- /dev/null +++ b/interactive_engine/benchmark/queries/cypher_queries/experiments/IC6.cypher @@ -0,0 +1,17 @@ +MATCH (p_:PERSON {id: $personId})-[:KNOWS*1..3]-(other:PERSON), + (other)<-[:HASCREATOR]-(p:POST)-[:HASTAG]->(t:TAG {name: $tagName}), + (p:POST)-[:HASTAG]->(otherTag:TAG) + +WHERE other.id <> $personId AND otherTag <> t + +WITH DISTINCT + otherTag, + p + +RETURN + otherTag.name as name, + count(p) as postCnt +ORDER BY + postCnt desc, + name asc +LIMIT 10; \ No newline at end of file diff --git a/interactive_engine/benchmark/queries/cypher_queries/experiments/IC7.cypher b/interactive_engine/benchmark/queries/cypher_queries/experiments/IC7.cypher new file mode 100644 index 000000000000..c3bdea288f40 --- /dev/null +++ b/interactive_engine/benchmark/queries/cypher_queries/experiments/IC7.cypher @@ -0,0 +1,23 @@ + MATCH (person:PERSON {id: $personId})<-[:HASCREATOR]-(message)<-[like:LIKES]-(liker:PERSON) + OPTIONAL MATCH (liker: PERSON)-[k:KNOWS]-(p2: PERSON {id: $personId}) + WITH liker, message, like.creationDate AS likeTime, person, + CASE + WHEN k is null THEN true + ELSE false + END AS isNew + ORDER BY likeTime DESC, message.id ASC + WITH liker, person, head(collect(message)) as message, head(collect(likeTime)) AS likeTime, isNew + RETURN + liker.id AS personId, + liker.firstName AS personFirstName, + liker.lastName AS personLastName, + likeTime AS likeCreationDate, + message.id AS commentOrPostId, + message.content AS messageContent, + message.imageFile AS messageImageFile, + (likeTime - message.creationDate)/1000/60 AS minutesLatency, + isNew + ORDER BY + likeCreationDate DESC, + personId ASC + LIMIT 20; \ No newline at end of file diff --git a/interactive_engine/benchmark/queries/cypher_queries/experiments/IC8.cypher b/interactive_engine/benchmark/queries/cypher_queries/experiments/IC8.cypher new file mode 100644 index 000000000000..71c8d15659b5 --- /dev/null +++ b/interactive_engine/benchmark/queries/cypher_queries/experiments/IC8.cypher @@ -0,0 +1,14 @@ +MATCH(p:PERSON {id: $personId}) <-[:HASCREATOR] -(msg) <- [:REPLYOF] - (cmt: COMMENT) - [:HASCREATOR] -> (author : PERSON) +WITH + p, msg, cmt, author +ORDER BY + cmt.creationDate DESC, + cmt.id ASC +LIMIT 20 +RETURN + author.id, + author.firstName, + author.lastName, + cmt.creationDate, + cmt.id, + cmt.content; \ No newline at end of file diff --git a/interactive_engine/benchmark/queries/cypher_queries/experiments/IC9.cypher b/interactive_engine/benchmark/queries/cypher_queries/experiments/IC9.cypher new file mode 100644 index 000000000000..a2e5d30ba94d --- /dev/null +++ b/interactive_engine/benchmark/queries/cypher_queries/experiments/IC9.cypher @@ -0,0 +1,19 @@ +MATCH (p:PERSON {id: $personId})-[:KNOWS*1..3]-(friend:PERSON) +MATCH (message)-[:HASCREATOR]->(friend:PERSON) +where message.creationDate < $maxDate + AND friend.id <> $personId + +WITH DISTINCT friend, message + +RETURN + friend.id AS personId, + friend.firstName AS personFirstName, + friend.lastName AS personLastName, + message.id AS commentOrPostId, + message.content AS messageContent, + message.imageFile AS messageImageFile, + message.creationDate AS commentOrPostCreationDate +ORDER BY + commentOrPostCreationDate DESC, + commentOrPostId ASC +LIMIT 20; \ No newline at end of file diff --git a/interactive_engine/benchmark/queries/cypher_queries/experiments/gopt/Qc1(a).cypher b/interactive_engine/benchmark/queries/cypher_queries/experiments/Qc1(a).cypher similarity index 100% rename from interactive_engine/benchmark/queries/cypher_queries/experiments/gopt/Qc1(a).cypher rename to interactive_engine/benchmark/queries/cypher_queries/experiments/Qc1(a).cypher diff --git a/interactive_engine/benchmark/queries/cypher_queries/experiments/gopt/Qc1(b).cypher b/interactive_engine/benchmark/queries/cypher_queries/experiments/Qc1(b).cypher similarity index 100% rename from interactive_engine/benchmark/queries/cypher_queries/experiments/gopt/Qc1(b).cypher rename to interactive_engine/benchmark/queries/cypher_queries/experiments/Qc1(b).cypher diff --git a/interactive_engine/benchmark/queries/cypher_queries/experiments/gopt/Qc2(a).cypher b/interactive_engine/benchmark/queries/cypher_queries/experiments/Qc2(a).cypher similarity index 100% rename from interactive_engine/benchmark/queries/cypher_queries/experiments/gopt/Qc2(a).cypher rename to interactive_engine/benchmark/queries/cypher_queries/experiments/Qc2(a).cypher diff --git a/interactive_engine/benchmark/queries/cypher_queries/experiments/gopt/Qc2(b).cypher b/interactive_engine/benchmark/queries/cypher_queries/experiments/Qc2(b).cypher similarity index 100% rename from interactive_engine/benchmark/queries/cypher_queries/experiments/gopt/Qc2(b).cypher rename to interactive_engine/benchmark/queries/cypher_queries/experiments/Qc2(b).cypher diff --git a/interactive_engine/benchmark/queries/cypher_queries/experiments/gopt/Qc3(a).cypher b/interactive_engine/benchmark/queries/cypher_queries/experiments/Qc3(a).cypher similarity index 100% rename from interactive_engine/benchmark/queries/cypher_queries/experiments/gopt/Qc3(a).cypher rename to interactive_engine/benchmark/queries/cypher_queries/experiments/Qc3(a).cypher diff --git a/interactive_engine/benchmark/queries/cypher_queries/experiments/gopt/Qc3(b).cypher b/interactive_engine/benchmark/queries/cypher_queries/experiments/Qc3(b).cypher similarity index 100% rename from interactive_engine/benchmark/queries/cypher_queries/experiments/gopt/Qc3(b).cypher rename to interactive_engine/benchmark/queries/cypher_queries/experiments/Qc3(b).cypher diff --git a/interactive_engine/benchmark/queries/cypher_queries/experiments/Qc4(a).cypher b/interactive_engine/benchmark/queries/cypher_queries/experiments/Qc4(a).cypher new file mode 100644 index 000000000000..a8e2e91696c0 --- /dev/null +++ b/interactive_engine/benchmark/queries/cypher_queries/experiments/Qc4(a).cypher @@ -0,0 +1,7 @@ +Match (c:PLACE {name: $name})<-[:ISLOCATEDIN]-(p1:PERSON), + (c)<-[:ISLOCATEDIN]-(p2:PERSON), + (p1)<-[:HASCREATOR]-(m1:COMMENT)<-[:LIKES]->(p2:PERSON), + (c:PLACE {name: $name})<-[:ISLOCATEDIN]-(p3:PERSON), + (c)<-[:ISLOCATEDIN]-(p4:PERSON), + (p3)<-[:HASCREATOR]-(m2:COMMENT)<-[:LIKES]->(p4:PERSON) +RETURN count(c) \ No newline at end of file diff --git a/interactive_engine/benchmark/queries/cypher_queries/experiments/Qc4(b).cypher b/interactive_engine/benchmark/queries/cypher_queries/experiments/Qc4(b).cypher new file mode 100644 index 000000000000..3aaaa5989227 --- /dev/null +++ b/interactive_engine/benchmark/queries/cypher_queries/experiments/Qc4(b).cypher @@ -0,0 +1,7 @@ +Match (c:PLACE {name: $name})<-[:ISLOCATEDIN]-(p1:PERSON), + (c)<-[:ISLOCATEDIN]-(p2:PERSON), + (p1)<-[:HASCREATOR]-(m1:COMMENT)<-[:LIKES]->(p2:PERSON), + (c:PLACE {name: $name})<-[:ISLOCATEDIN]-(p3:PERSON), + (c)<-[:ISLOCATEDIN]-(p4:PERSON), + (p3)-[:KNOWS|HASMODERATOR]-(m2:FORUM|PERSON)-[:KNOWS|HASMODERATOR]-(p4:PERSON) +RETURN count(c) \ No newline at end of file diff --git a/interactive_engine/benchmark/queries/cypher_queries/experiments/gopt/Qr5.cypher b/interactive_engine/benchmark/queries/cypher_queries/experiments/Qr1.cypher similarity index 100% rename from interactive_engine/benchmark/queries/cypher_queries/experiments/gopt/Qr5.cypher rename to interactive_engine/benchmark/queries/cypher_queries/experiments/Qr1.cypher diff --git a/interactive_engine/benchmark/queries/cypher_queries/experiments/Qr1.gremlin b/interactive_engine/benchmark/queries/cypher_queries/experiments/Qr1.gremlin new file mode 100644 index 000000000000..a812f447c1c5 --- /dev/null +++ b/interactive_engine/benchmark/queries/cypher_queries/experiments/Qr1.gremlin @@ -0,0 +1,3 @@ +g.V().match( + __.as('p1').hasLabel('PERSON').out('KNOWS').hasLabel('PERSON').as('p2') +).select('p1').has('id', $id1).select('p2').has('id', $id2).count() \ No newline at end of file diff --git a/interactive_engine/benchmark/queries/cypher_queries/experiments/gopt/Qr6.cypher b/interactive_engine/benchmark/queries/cypher_queries/experiments/Qr2.cypher similarity index 100% rename from interactive_engine/benchmark/queries/cypher_queries/experiments/gopt/Qr6.cypher rename to interactive_engine/benchmark/queries/cypher_queries/experiments/Qr2.cypher diff --git a/interactive_engine/benchmark/queries/cypher_queries/experiments/Qr2.gremlin b/interactive_engine/benchmark/queries/cypher_queries/experiments/Qr2.gremlin new file mode 100644 index 000000000000..97b8a42c17a7 --- /dev/null +++ b/interactive_engine/benchmark/queries/cypher_queries/experiments/Qr2.gremlin @@ -0,0 +1,4 @@ +g.V().match( + __.as('p1').hasLabel('PERSON').out('KNOWS').as('p2'), + __.as('p2').out('LIKES').hasLabel('COMMENT').as('c1')). +select('p1').has('id', $id1).select('p2').has('id', $id2).select('c1').has('length', P.gt($len)).count() \ No newline at end of file diff --git a/interactive_engine/benchmark/queries/cypher_queries/experiments/gopt/Qr1.cypher b/interactive_engine/benchmark/queries/cypher_queries/experiments/Qr3.cypher similarity index 100% rename from interactive_engine/benchmark/queries/cypher_queries/experiments/gopt/Qr1.cypher rename to interactive_engine/benchmark/queries/cypher_queries/experiments/Qr3.cypher diff --git a/interactive_engine/benchmark/queries/cypher_queries/experiments/Qr3.gremlin b/interactive_engine/benchmark/queries/cypher_queries/experiments/Qr3.gremlin new file mode 100644 index 000000000000..6028e32c5b87 --- /dev/null +++ b/interactive_engine/benchmark/queries/cypher_queries/experiments/Qr3.gremlin @@ -0,0 +1 @@ +g.V().hasLabel('FORUM').as('forum').out('CONTAINEROF').as('c1').in('REPLYOF').as('r1').out('HASCREATOR').as('h1').select('forum').out('HASMEMBER').as('h2').count() \ No newline at end of file diff --git a/interactive_engine/benchmark/queries/cypher_queries/experiments/gopt/Qr2.cypher b/interactive_engine/benchmark/queries/cypher_queries/experiments/Qr4.cypher similarity index 100% rename from interactive_engine/benchmark/queries/cypher_queries/experiments/gopt/Qr2.cypher rename to interactive_engine/benchmark/queries/cypher_queries/experiments/Qr4.cypher diff --git a/interactive_engine/benchmark/queries/cypher_queries/experiments/Qr4.gremlin b/interactive_engine/benchmark/queries/cypher_queries/experiments/Qr4.gremlin new file mode 100644 index 000000000000..b8c93bd1f806 --- /dev/null +++ b/interactive_engine/benchmark/queries/cypher_queries/experiments/Qr4.gremlin @@ -0,0 +1 @@ +g.V().hasLabel('PLACE').as('p1').in('ISLOCATEDIN').as('i1').in('HASCREATOR').hasLabel('COMMENT').as('c1').in('LIKES', 'REPLYOF').as('r1').out('HASTAG', 'HASINTEREST').as('h1').count() \ No newline at end of file diff --git a/interactive_engine/benchmark/queries/cypher_queries/experiments/Qr5.cypher b/interactive_engine/benchmark/queries/cypher_queries/experiments/Qr5.cypher new file mode 100644 index 000000000000..e318e289dfdd --- /dev/null +++ b/interactive_engine/benchmark/queries/cypher_queries/experiments/Qr5.cypher @@ -0,0 +1,4 @@ +MATCH (p: PERSON{id: $personId})-[k:KNOWS*1..4]-(f: PERSON {firstName: $firstName}) +WITH f, p +MATCH (f: PERSON {firstName: $firstName})-[:ISLOCATEDIN]->(locationCity:PLACE) +RETURN count(p); \ No newline at end of file diff --git a/interactive_engine/benchmark/queries/cypher_queries/experiments/Qr5.gremlin b/interactive_engine/benchmark/queries/cypher_queries/experiments/Qr5.gremlin new file mode 100644 index 000000000000..76d061578b7c --- /dev/null +++ b/interactive_engine/benchmark/queries/cypher_queries/experiments/Qr5.gremlin @@ -0,0 +1,7 @@ +g.V(). +match( + __.as('p').has('PERSON', 'id', 6592).out('KNOWS').out('KNOWS').out('KNOWS').has('PERSON', 'firstName', 'Mikhail').as('f') +).select('f').as('f').select('p').as('p'). +match( + __.as('f').out('ISLOCATEDIN').as('e2') +).count(f); \ No newline at end of file diff --git a/interactive_engine/benchmark/queries/cypher_queries/experiments/Qr6.cypher b/interactive_engine/benchmark/queries/cypher_queries/experiments/Qr6.cypher new file mode 100644 index 000000000000..d8b97cccb8aa --- /dev/null +++ b/interactive_engine/benchmark/queries/cypher_queries/experiments/Qr6.cypher @@ -0,0 +1,5 @@ +MATCH (p: PERSON)-[k:KNOWS]-(f: PERSON) +WITH f +LIMIT 1000 +MATCH (f:PERSON)-[:ISLOCATEDIN]->(locationCity:PLACE) +RETURN count(f); \ No newline at end of file diff --git a/interactive_engine/benchmark/queries/cypher_queries/experiments/Qr6.gremlin b/interactive_engine/benchmark/queries/cypher_queries/experiments/Qr6.gremlin new file mode 100644 index 000000000000..7f4520ca845d --- /dev/null +++ b/interactive_engine/benchmark/queries/cypher_queries/experiments/Qr6.gremlin @@ -0,0 +1,6 @@ +g.V().match( + __.as('p').hasLabel('PERSON').out('KNOWS').hasLabel('PERSON').as('f') +).select('f').as('f').limit(1000). +match( + __.as('f').out('ISLOCATEDIN').as('p') +).count() \ No newline at end of file diff --git a/interactive_engine/benchmark/queries/cypher_queries/experiments/Qr7.cypher b/interactive_engine/benchmark/queries/cypher_queries/experiments/Qr7.cypher new file mode 100644 index 000000000000..5fd54d184b81 --- /dev/null +++ b/interactive_engine/benchmark/queries/cypher_queries/experiments/Qr7.cypher @@ -0,0 +1,3 @@ +Match (p1: PERSON {id:$id})-[:KNOWS*3..4]->(:PERSON) +UNION (p1: PERSON {id:$id})-[:KNOWS*4..5]->(:PERSON), +Return count(p1); \ No newline at end of file diff --git a/interactive_engine/benchmark/queries/cypher_queries/experiments/Qr7.gremlin b/interactive_engine/benchmark/queries/cypher_queries/experiments/Qr7.gremlin new file mode 100644 index 000000000000..769ee91e4838 --- /dev/null +++ b/interactive_engine/benchmark/queries/cypher_queries/experiments/Qr7.gremlin @@ -0,0 +1,3 @@ +g.V().union( + has('PERSON', 'id', $id).out('3..4', 'KNOWS').hasLabel('PERSON'), + has('PERSON', 'id', $id).out('4..5', 'KNOWS').hasLabel('PERSON')).count() \ No newline at end of file diff --git a/interactive_engine/benchmark/queries/cypher_queries/experiments/gopt/Qr8.cypher b/interactive_engine/benchmark/queries/cypher_queries/experiments/Qr8.cypher similarity index 100% rename from interactive_engine/benchmark/queries/cypher_queries/experiments/gopt/Qr8.cypher rename to interactive_engine/benchmark/queries/cypher_queries/experiments/Qr8.cypher diff --git a/interactive_engine/benchmark/queries/cypher_queries/experiments/Qr8.gremlin b/interactive_engine/benchmark/queries/cypher_queries/experiments/Qr8.gremlin new file mode 100644 index 000000000000..2898411049d3 --- /dev/null +++ b/interactive_engine/benchmark/queries/cypher_queries/experiments/Qr8.gremlin @@ -0,0 +1,3 @@ +g.V().union( + has('PERSON', 'id', $id).out('4..5', 'KNOWS').hasLabel('PERSON'), + has('PERSON', 'id', $id).out('5..6', 'KNOWS').hasLabel('PERSON')).count() \ No newline at end of file diff --git a/interactive_engine/benchmark/queries/cypher_queries/experiments/gopt/Qt1.cypher b/interactive_engine/benchmark/queries/cypher_queries/experiments/Qt1.cypher similarity index 100% rename from interactive_engine/benchmark/queries/cypher_queries/experiments/gopt/Qt1.cypher rename to interactive_engine/benchmark/queries/cypher_queries/experiments/Qt1.cypher diff --git a/interactive_engine/benchmark/queries/cypher_queries/experiments/gopt/Qt2.cypher b/interactive_engine/benchmark/queries/cypher_queries/experiments/Qt2.cypher similarity index 100% rename from interactive_engine/benchmark/queries/cypher_queries/experiments/gopt/Qt2.cypher rename to interactive_engine/benchmark/queries/cypher_queries/experiments/Qt2.cypher diff --git a/interactive_engine/benchmark/queries/cypher_queries/experiments/gopt/Qt3.cypher b/interactive_engine/benchmark/queries/cypher_queries/experiments/Qt3.cypher similarity index 100% rename from interactive_engine/benchmark/queries/cypher_queries/experiments/gopt/Qt3.cypher rename to interactive_engine/benchmark/queries/cypher_queries/experiments/Qt3.cypher diff --git a/interactive_engine/benchmark/queries/cypher_queries/experiments/gopt/Qt4.cypher b/interactive_engine/benchmark/queries/cypher_queries/experiments/Qt4.cypher similarity index 100% rename from interactive_engine/benchmark/queries/cypher_queries/experiments/gopt/Qt4.cypher rename to interactive_engine/benchmark/queries/cypher_queries/experiments/Qt4.cypher diff --git a/interactive_engine/benchmark/queries/cypher_queries/experiments/gopt/Qt5.cypher b/interactive_engine/benchmark/queries/cypher_queries/experiments/Qt5.cypher similarity index 100% rename from interactive_engine/benchmark/queries/cypher_queries/experiments/gopt/Qt5.cypher rename to interactive_engine/benchmark/queries/cypher_queries/experiments/Qt5.cypher diff --git a/interactive_engine/benchmark/queries/cypher_queries/experiments/gopt/Qc4(a).cypher b/interactive_engine/benchmark/queries/cypher_queries/experiments/gopt/Qc4(a).cypher deleted file mode 100644 index 25e085e2afa2..000000000000 --- a/interactive_engine/benchmark/queries/cypher_queries/experiments/gopt/Qc4(a).cypher +++ /dev/null @@ -1,7 +0,0 @@ -Match (forum:FORUM)-[:CONTAINEROF]->(post:POST), -(forum:FORUM)-[:HASMEMBER]->(person1:PERSON), -(forum:FORUM)-[:HASMEMBER]->(person2:PERSON), -(person1:PERSON)-[:KNOWS]->(person2:PERSON), -(person1:PERSON)-[:LIKES]->(post:POST), -(person2:PERSON)-[:LIKES]->(post:POST) -Return count(person1); \ No newline at end of file diff --git a/interactive_engine/benchmark/queries/cypher_queries/experiments/gopt/Qc4(b).cypher b/interactive_engine/benchmark/queries/cypher_queries/experiments/gopt/Qc4(b).cypher deleted file mode 100644 index f77da135517e..000000000000 --- a/interactive_engine/benchmark/queries/cypher_queries/experiments/gopt/Qc4(b).cypher +++ /dev/null @@ -1,7 +0,0 @@ -Match (forum:FORUM)-[:HASTAG]->(post:TAG), -(forum:FORUM)-[:HASMODERATOR]->(person1:PERSON), -(forum:FORUM)-[:HASMODERATOR|CONTAINEROF]->(person2:PERSON|POST), -(person1:PERSON)-[:KNOWS|LIKES]->(person2:PERSON|POST), -(person1:PERSON)-[:HASINTEREST]->(post:TAG), -(person2:PERSON|POST)-[:HASINTEREST|HASTAG]->(post:TAG) -Return count(person1); \ No newline at end of file diff --git a/interactive_engine/benchmark/queries/cypher_queries/experiments/gopt/Qr3.cypher b/interactive_engine/benchmark/queries/cypher_queries/experiments/gopt/Qr3.cypher deleted file mode 100644 index 8ab6bc52abf1..000000000000 --- a/interactive_engine/benchmark/queries/cypher_queries/experiments/gopt/Qr3.cypher +++ /dev/null @@ -1,2 +0,0 @@ -Match (author:PERSON)<-[:HASCREATOR]-(msg1:POST|COMMENT) -Return count(author); \ No newline at end of file diff --git a/interactive_engine/benchmark/queries/cypher_queries/experiments/gopt/Qr4.cypher b/interactive_engine/benchmark/queries/cypher_queries/experiments/gopt/Qr4.cypher deleted file mode 100644 index cab0b9fd3181..000000000000 --- a/interactive_engine/benchmark/queries/cypher_queries/experiments/gopt/Qr4.cypher +++ /dev/null @@ -1,2 +0,0 @@ -Match (author:PERSON)<-[:HASCREATOR]-(msg1:POST|COMMENT)<-[:REPLYOF]-(msg2:POST|COMMENT) -Return count(author); \ No newline at end of file diff --git a/interactive_engine/benchmark/queries/cypher_queries/experiments/gopt/Qr7.cypher b/interactive_engine/benchmark/queries/cypher_queries/experiments/gopt/Qr7.cypher deleted file mode 100644 index 4c6be7d078c6..000000000000 --- a/interactive_engine/benchmark/queries/cypher_queries/experiments/gopt/Qr7.cypher +++ /dev/null @@ -1,3 +0,0 @@ -Match (p1: PERSON {id:1243})-[:KNOWS*3..4]->(:PERSON) -UNION (p1: PERSON {id:1243})-[:KNOWS*4..5]->(:PERSON), -Return count(p1); \ No newline at end of file diff --git a/interactive_engine/benchmark/queries/cypher_queries/experiments/neo4j/Qc1(a).cypher b/interactive_engine/benchmark/queries/cypher_queries/experiments/neo4j/Qc1(a).cypher deleted file mode 100644 index f0602e726e02..000000000000 --- a/interactive_engine/benchmark/queries/cypher_queries/experiments/neo4j/Qc1(a).cypher +++ /dev/null @@ -1,4 +0,0 @@ -Match (message:Post)-[:HAS_CREATOR]->(person), - (message:Post)-[:HAS_TAG]->(tag:Tag), - (person)-[:HAS_INTEREST]->(tag:Tag) -Return count(person); diff --git a/interactive_engine/benchmark/queries/cypher_queries/experiments/neo4j/Qc1(b).cypher b/interactive_engine/benchmark/queries/cypher_queries/experiments/neo4j/Qc1(b).cypher deleted file mode 100644 index a0330fee7c19..000000000000 --- a/interactive_engine/benchmark/queries/cypher_queries/experiments/neo4j/Qc1(b).cypher +++ /dev/null @@ -1,4 +0,0 @@ -Match (message)-[:KNOWS|HAS_MODERATOR]->(person:Person), - (message)-[]->(tag:Tag), - (person)-[]->(tag) -Return count(person); diff --git a/interactive_engine/benchmark/queries/cypher_queries/experiments/neo4j/Qc2(a).cypher b/interactive_engine/benchmark/queries/cypher_queries/experiments/neo4j/Qc2(a).cypher deleted file mode 100644 index 7095af2b83ea..000000000000 --- a/interactive_engine/benchmark/queries/cypher_queries/experiments/neo4j/Qc2(a).cypher +++ /dev/null @@ -1,5 +0,0 @@ -Match (person1:Person)-[:LIKES]->(message:Post), - (message:Post)<-[:CONTAINER_OF]-(person2:Forum), - (person1:Person)-[:KNOWS]->(place), - (person2:Forum)-[:HAS_MODERATOR]->(place) -Return count(person1); \ No newline at end of file diff --git a/interactive_engine/benchmark/queries/cypher_queries/experiments/neo4j/Qc2(b).cypher b/interactive_engine/benchmark/queries/cypher_queries/experiments/neo4j/Qc2(b).cypher deleted file mode 100644 index 40900bd64928..000000000000 --- a/interactive_engine/benchmark/queries/cypher_queries/experiments/neo4j/Qc2(b).cypher +++ /dev/null @@ -1,5 +0,0 @@ -Match (person1:Person)-[:LIKES]->(message:Post), - (message:Post)<-[:CONTAINER_OF]-(person2:Forum), - (person1:Person)-[:KNOWS|HAS_INTEREST]->(place), - (person2:Forum)-[:HAS_MODERATOR|HAS_TAG]->(place) -Return count(person1); \ No newline at end of file diff --git a/interactive_engine/benchmark/queries/cypher_queries/experiments/neo4j/Qc3(a).cypher b/interactive_engine/benchmark/queries/cypher_queries/experiments/neo4j/Qc3(a).cypher deleted file mode 100644 index 8744f49df441..000000000000 --- a/interactive_engine/benchmark/queries/cypher_queries/experiments/neo4j/Qc3(a).cypher +++ /dev/null @@ -1,5 +0,0 @@ -Match (person1)<-[:HAS_CREATOR]-(comment:Comment), - (comment:Comment)-[:REPLY_OF]->(post:Post), - (post:Post)<-[:CONTAINER_OF]-(forum), - (forum)-[:HAS_MEMBER]->(person2) -Return count(person1); \ No newline at end of file diff --git a/interactive_engine/benchmark/queries/cypher_queries/experiments/neo4j/Qc3(b).cypher b/interactive_engine/benchmark/queries/cypher_queries/experiments/neo4j/Qc3(b).cypher deleted file mode 100644 index 50b5115a0f1a..000000000000 --- a/interactive_engine/benchmark/queries/cypher_queries/experiments/neo4j/Qc3(b).cypher +++ /dev/null @@ -1,4 +0,0 @@ -Match (p:Comment)-[]->(:Person)-[]->(:Place), - (p)<-[]-(message), - (message)-[]->(tag:Tag) -Return count(p); \ No newline at end of file diff --git a/interactive_engine/benchmark/queries/cypher_queries/experiments/neo4j/Qc4(a).cypher b/interactive_engine/benchmark/queries/cypher_queries/experiments/neo4j/Qc4(a).cypher deleted file mode 100644 index 0efd33b6581a..000000000000 --- a/interactive_engine/benchmark/queries/cypher_queries/experiments/neo4j/Qc4(a).cypher +++ /dev/null @@ -1,7 +0,0 @@ -Match (forum)-[:CONTAINER_OF]->(post:Post), - (forum)-[:HAS_MEMBER]->(person1:Person), - (forum)-[:HAS_MEMBER]->(person2:Person), - (person1:Person)-[:KNOWS]->(person2:Person), - (person1:Person)-[:LIKES]->(post:Post), - (person2:Person)-[:LIKES]->(post:Post) -Return count(person1); \ No newline at end of file diff --git a/interactive_engine/benchmark/queries/cypher_queries/experiments/neo4j/Qc4(b).cypher b/interactive_engine/benchmark/queries/cypher_queries/experiments/neo4j/Qc4(b).cypher deleted file mode 100644 index 4f18cdb43be9..000000000000 --- a/interactive_engine/benchmark/queries/cypher_queries/experiments/neo4j/Qc4(b).cypher +++ /dev/null @@ -1,7 +0,0 @@ -Match (forum)-[:HAS_TAG]->(post:Tag), - (forum)-[:HAS_MODERATOR]->(person1), - (forum)-[:HAS_MODERATOR|CONTAINER_OF]->(person2), - (person1)-[:KNOWS|LIKES]->(person2), - (person1)-[:HAS_INTEREST]->(post:Tag), - (person2)-[:HAS_INTEREST|HAS_TAG]->(post:Tag) -Return count(person1); \ No newline at end of file diff --git a/interactive_engine/benchmark/queries/cypher_queries/experiments/neo4j/Qr1.cypher b/interactive_engine/benchmark/queries/cypher_queries/experiments/neo4j/Qr1.cypher deleted file mode 100644 index 63eb639aae0a..000000000000 --- a/interactive_engine/benchmark/queries/cypher_queries/experiments/neo4j/Qr1.cypher +++ /dev/null @@ -1,3 +0,0 @@ -Match (p1:Person)-[:KNOWS]->(p2:Person) -Where p1.id < 933 -Return count(p1); \ No newline at end of file diff --git a/interactive_engine/benchmark/queries/cypher_queries/experiments/neo4j/Qr2.cypher b/interactive_engine/benchmark/queries/cypher_queries/experiments/neo4j/Qr2.cypher deleted file mode 100644 index 1b9e67c2329e..000000000000 --- a/interactive_engine/benchmark/queries/cypher_queries/experiments/neo4j/Qr2.cypher +++ /dev/null @@ -1,5 +0,0 @@ -Match (p1:Person)<-[:HAS_MODERATOR]-(forum:Forum), - (p1:Person)<-[:HAS_CREATOR]-(post:Post), - (post)<-[:CONTAINER_OF]-(forum) -Where p1.id < 933 -Return count(p1); \ No newline at end of file diff --git a/interactive_engine/benchmark/queries/cypher_queries/experiments/neo4j/Qr3.cypher b/interactive_engine/benchmark/queries/cypher_queries/experiments/neo4j/Qr3.cypher deleted file mode 100644 index 33a6aeabcaa1..000000000000 --- a/interactive_engine/benchmark/queries/cypher_queries/experiments/neo4j/Qr3.cypher +++ /dev/null @@ -1 +0,0 @@ -Match (p1:Person)-[:KNOWS]->(p2:Person)-[:KNOWS]->(p3:Person) Return count(p1); \ No newline at end of file diff --git a/interactive_engine/benchmark/queries/cypher_queries/experiments/neo4j/Qr4.cypher b/interactive_engine/benchmark/queries/cypher_queries/experiments/neo4j/Qr4.cypher deleted file mode 100644 index e669e04dc6a0..000000000000 --- a/interactive_engine/benchmark/queries/cypher_queries/experiments/neo4j/Qr4.cypher +++ /dev/null @@ -1 +0,0 @@ -Match (p1:Person)<-[:HAS_MODERATOR]-(forum:Forum)-[:CONTAINER_OF]->(post:Post)-[:HAS_CREATOR]->(p2:Person) Return count(p1); \ No newline at end of file diff --git a/interactive_engine/benchmark/queries/cypher_queries/experiments/neo4j/Qt1.cypher b/interactive_engine/benchmark/queries/cypher_queries/experiments/neo4j/Qt1.cypher deleted file mode 100644 index a685b75f3ded..000000000000 --- a/interactive_engine/benchmark/queries/cypher_queries/experiments/neo4j/Qt1.cypher +++ /dev/null @@ -1 +0,0 @@ -Match (p1:Person)<-[:HAS_CREATOR]-()<-[:CONTAINER_OF]-(p2) Return count(p1); \ No newline at end of file diff --git a/interactive_engine/benchmark/queries/cypher_queries/experiments/neo4j/Qt2.cypher b/interactive_engine/benchmark/queries/cypher_queries/experiments/neo4j/Qt2.cypher deleted file mode 100644 index 4bacf99d040b..000000000000 --- a/interactive_engine/benchmark/queries/cypher_queries/experiments/neo4j/Qt2.cypher +++ /dev/null @@ -1 +0,0 @@ -Match (p)-[]->(:Organisation)-[]->(:Place) Return count(p); \ No newline at end of file diff --git a/interactive_engine/benchmark/queries/cypher_queries/experiments/neo4j/Qt3.cypher b/interactive_engine/benchmark/queries/cypher_queries/experiments/neo4j/Qt3.cypher deleted file mode 100644 index d12671012651..000000000000 --- a/interactive_engine/benchmark/queries/cypher_queries/experiments/neo4j/Qt3.cypher +++ /dev/null @@ -1 +0,0 @@ -Match (p1)<-[:IS_LOCATED_IN]-(p2:Comment)-[]->(:Tag) Return count(p1); \ No newline at end of file diff --git a/interactive_engine/benchmark/queries/cypher_queries/experiments/neo4j/Qt4.cypher b/interactive_engine/benchmark/queries/cypher_queries/experiments/neo4j/Qt4.cypher deleted file mode 100644 index 223fddf6f96d..000000000000 --- a/interactive_engine/benchmark/queries/cypher_queries/experiments/neo4j/Qt4.cypher +++ /dev/null @@ -1,3 +0,0 @@ -Match (p1)<-[]-(p2:Post), - (p1)<-[:HAS_MODERATOR]-()-[]->(p2) -Return count(p1); diff --git a/interactive_engine/benchmark/queries/cypher_queries/experiments/neo4j/Qt5.cypher b/interactive_engine/benchmark/queries/cypher_queries/experiments/neo4j/Qt5.cypher deleted file mode 100644 index 2543e68b708b..000000000000 --- a/interactive_engine/benchmark/queries/cypher_queries/experiments/neo4j/Qt5.cypher +++ /dev/null @@ -1 +0,0 @@ -Match (p:Post)-[]->(p1), (p1)-[]->(:Place) Return count(p1); \ No newline at end of file diff --git a/interactive_engine/compiler/README.md b/interactive_engine/compiler/README.md new file mode 100644 index 000000000000..7f9cf0a488eb --- /dev/null +++ b/interactive_engine/compiler/README.md @@ -0,0 +1,34 @@ +# GOpt: A Modular Graph-Native Query Optimization Framework + +## Introducing GOpt + +GOpt is a modular, graph-native query optimization framework designed to accelerate graph query execution on industrial-scale graph systems. It excels in handling complex graph patterns that combine graph pattern matching with relational operations on large graphs. GOpt is not aware of the underlying storage data and focuses solely on computation on top of the data, which makes it easy and fast to be integrated into other graph or relational databases. + +### Core Features + +1. **Multiple Query Languages Support**: GOpt supports standard [Gremlin](https://tinkerpop.apache.org/gremlin.html) and [Cypher](https://neo4j.com/docs/cypher-manual/current/introduction/) languages, with upcoming [GQL](https://www.gqlstandards.org/) support. +2. **Lightweight, Serverless Integration**: GOpt provides modular interfaces to integrate with various platforms. It has already been integrated into GraphScope and Neo4j. +3. **Advanced Graph-native Optimization**: GOpt introduces a comprehensive set of heuristic rules, an automatic type inference algorithm, and advanced cost-based optimization techniques. + +:::{figure-md} + + + +GOpt System Overview +::: + +### Why GOpt + +1. **High Performance** + + GOpt is designed and implemented based on years of academic research, with key techniques published in prestigious systems conferences. Our experiments, as documented in our [papers](https://arxiv.org/abs/2401.17786), demonstrate that GOpt outperforms most graph and relational databases in both standard ([LDBC](https://ldbcouncil.org/)) and real-world (Alibaba) graph workloads. +2. **User-Friendly Interface** + + GOpt offers different layers of SDK tailored to various user requirements. It provides Cypher and Gremlin language support to lower the barrier of entry. User-provided Cypher or Gremlin queries can be more flexible and ambiguous, with GOpt automatically validating and completing the query information based on property graph modeling. Additionally, it provides lower-level APIs for developers who require deeper integration. +3. **Seamless Integration** + + GOpt is lightweight and serverless, facilitating seamless integration into other databases through a small-sized JAR file deployment. Built on the Calcite framework, GOpt leverages Calcite's extensive range of adapters, simplifying the integration with various data formats. This advantage allows GOpt to seamlessly integrate with mainstream relational databases that has been powered by Calcite. Additionally, GOpt is equipped with graph-native algorithms, enhancing its compatibility with graph-native database APIs. + +For more details, please refer to the [GOpt Documentation](https://graphscope.io/docs/latest/interactive_engine/gopt). diff --git a/interactive_engine/compiler/conf/ir.compiler.properties b/interactive_engine/compiler/conf/ir.compiler.properties index 992f828a2b2e..2ab6058f579c 100644 --- a/interactive_engine/compiler/conf/ir.compiler.properties +++ b/interactive_engine/compiler/conf/ir.compiler.properties @@ -68,5 +68,8 @@ calcite.default.charset: UTF-8 # set the interval in milliseconds to fetch graph schema # graph.meta.schema.fetch.interval.ms: 1000 -# set the timeout in milliseconds to fetch graph statistics +# set the interval in milliseconds to fetch graph statistics # graph.meta.statistics.fetch.interval.ms: 86400000l + +# set the timeout in milliseconds to fetch graph statistics +# graph.meta.fetch.timeout.ms: 1000 diff --git a/interactive_engine/compiler/src/main/java/com/alibaba/graphscope/common/config/GraphConfig.java b/interactive_engine/compiler/src/main/java/com/alibaba/graphscope/common/config/GraphConfig.java index 9ab065f61366..74a7086599bc 100644 --- a/interactive_engine/compiler/src/main/java/com/alibaba/graphscope/common/config/GraphConfig.java +++ b/interactive_engine/compiler/src/main/java/com/alibaba/graphscope/common/config/GraphConfig.java @@ -31,6 +31,9 @@ public class GraphConfig { public static final Config GRAPH_META_STATISTICS_FETCH_INTERVAL_MS = Config.longConfig("graph.meta.statistics.fetch.interval.ms", 24 * 3600 * 1000l); + public static final Config GRAPH_META_FETCH_TIMEOUT_MS = + Config.longConfig("graph.meta.fetch.timeout.ms", 1000); + // an intermediate solution to support foreign key, will be integrated into schema public static final Config GRAPH_FOREIGN_KEY_URI = Config.stringConfig("graph.foreign.key", ""); diff --git a/interactive_engine/compiler/src/main/java/com/alibaba/graphscope/common/config/YamlConfigs.java b/interactive_engine/compiler/src/main/java/com/alibaba/graphscope/common/config/YamlConfigs.java index e55682a86cdb..ec35991327ad 100644 --- a/interactive_engine/compiler/src/main/java/com/alibaba/graphscope/common/config/YamlConfigs.java +++ b/interactive_engine/compiler/src/main/java/com/alibaba/graphscope/common/config/YamlConfigs.java @@ -72,6 +72,9 @@ public class YamlConfigs extends Configs { "graph.meta.statistics.fetch.interval.ms", (Configs configs) -> configs.get("compiler.meta.reader.statistics.interval")) + .put( + "graph.meta.fetch.timeout.ms", + (Configs configs) -> configs.get("compiler.meta.reader.timeout")) .put( "graph.store", (Configs configs) -> { diff --git a/interactive_engine/compiler/src/main/java/com/alibaba/graphscope/common/ir/meta/GraphId.java b/interactive_engine/compiler/src/main/java/com/alibaba/graphscope/common/ir/meta/GraphId.java index 8ddf0d75a511..4c63425263cd 100644 --- a/interactive_engine/compiler/src/main/java/com/alibaba/graphscope/common/ir/meta/GraphId.java +++ b/interactive_engine/compiler/src/main/java/com/alibaba/graphscope/common/ir/meta/GraphId.java @@ -51,4 +51,9 @@ public boolean equals(Object o) { public int hashCode() { return Objects.hashCode(id); } + + @Override + public String toString() { + return "GraphId{" + "id=" + id + '}'; + } } diff --git a/interactive_engine/compiler/src/main/java/com/alibaba/graphscope/common/ir/meta/reader/HttpIrMetaReader.java b/interactive_engine/compiler/src/main/java/com/alibaba/graphscope/common/ir/meta/reader/HttpIrMetaReader.java index 2e12792d7bf1..ba6cfb089d87 100644 --- a/interactive_engine/compiler/src/main/java/com/alibaba/graphscope/common/ir/meta/reader/HttpIrMetaReader.java +++ b/interactive_engine/compiler/src/main/java/com/alibaba/graphscope/common/ir/meta/reader/HttpIrMetaReader.java @@ -57,7 +57,9 @@ public HttpIrMetaReader(Configs configs) { public IrMeta readMeta() throws IOException { try { HttpResponse response = - sendRequest(GraphConfig.GRAPH_META_SCHEMA_URI.get(configs)); + sendRequest( + GraphConfig.GRAPH_META_SCHEMA_URI.get(configs), + GraphConfig.GRAPH_META_FETCH_TIMEOUT_MS.get(configs)); String res = response.body(); Preconditions.checkArgument( response.statusCode() == 200, @@ -91,7 +93,8 @@ public IrGraphStatistics readStats(GraphId graphId) throws IOException { sendRequest( String.format( GraphConfig.GRAPH_META_STATISTICS_URI.get(configs), - graphId.getId())); + graphId.getId()), + GraphConfig.GRAPH_META_FETCH_TIMEOUT_MS.get(configs)); String res = response.body(); Preconditions.checkArgument( response.statusCode() == 200, @@ -109,7 +112,9 @@ public IrGraphStatistics readStats(GraphId graphId) throws IOException { public boolean syncStatsEnabled(GraphId graphId) throws IOException { try { HttpResponse response = - sendRequest(GraphConfig.GRAPH_META_SCHEMA_URI.get(configs)); + sendRequest( + GraphConfig.GRAPH_META_SCHEMA_URI.get(configs), + GraphConfig.GRAPH_META_FETCH_TIMEOUT_MS.get(configs)); String res = response.body(); Preconditions.checkArgument( response.statusCode() == 200, @@ -122,13 +127,14 @@ public boolean syncStatsEnabled(GraphId graphId) throws IOException { } } - private HttpResponse sendRequest(String requestUri) + private HttpResponse sendRequest(String requestUri, long timeOut) throws IOException, InterruptedException { HttpRequest request = HttpRequest.newBuilder() .uri(URI.create(requestUri)) .headers(CONTENT_TYPE, APPLICATION_JSON) .GET() + .timeout(java.time.Duration.ofMillis(timeOut)) .build(); return httpClient.send(request, HttpResponse.BodyHandlers.ofString()); } diff --git a/interactive_engine/compiler/src/main/java/com/alibaba/graphscope/cypher/integration/suite/simple/SimpleMatchQueries.java b/interactive_engine/compiler/src/main/java/com/alibaba/graphscope/cypher/integration/suite/simple/SimpleMatchQueries.java index a6cff80461b6..f027b644fcf1 100644 --- a/interactive_engine/compiler/src/main/java/com/alibaba/graphscope/cypher/integration/suite/simple/SimpleMatchQueries.java +++ b/interactive_engine/compiler/src/main/java/com/alibaba/graphscope/cypher/integration/suite/simple/SimpleMatchQueries.java @@ -219,4 +219,15 @@ public static QueryContext get_simple_match_query_17_test() { List expected = Arrays.asList("Record<{$f0: 851}>"); return new QueryContext(query, expected); } + + public static QueryContext get_simple_match_query_18_test() { + String query = + "MATCH (country:PLACE {name:" + + " \"India\"})<-[:ISPARTOF]-(:PLACE)<-[:ISLOCATEDIN]-(zombie:PERSON)\n" + + "OPTIONAL MATCH (zombie)<-[:HASCREATOR]-(message)\n" + + "WHERE message.length < 100\n" + + " Return count(country);"; + List expected = Arrays.asList("Record<{$f0: 39783}>"); + return new QueryContext(query, expected); + } } diff --git a/interactive_engine/compiler/src/test/java/com/alibaba/graphscope/cypher/integration/ldbc/SimpleMatchTest.java b/interactive_engine/compiler/src/test/java/com/alibaba/graphscope/cypher/integration/ldbc/SimpleMatchTest.java index aa503ae020cc..48b8e60fcfbf 100644 --- a/interactive_engine/compiler/src/test/java/com/alibaba/graphscope/cypher/integration/ldbc/SimpleMatchTest.java +++ b/interactive_engine/compiler/src/test/java/com/alibaba/graphscope/cypher/integration/ldbc/SimpleMatchTest.java @@ -162,6 +162,13 @@ public void run_simple_match_17_test() { Assert.assertEquals(testQuery.getExpectedResult().toString(), result.list().toString()); } + @Test + public void run_simple_match_18_test() { + QueryContext testQuery = SimpleMatchQueries.get_simple_match_query_18_test(); + Result result = session.run(testQuery.getQuery()); + Assert.assertEquals(testQuery.getExpectedResult().toString(), result.list().toString()); + } + @AfterClass public static void afterClass() { if (session != null) { diff --git a/interactive_engine/executor/ir/graph_proxy/src/apis/graph/mod.rs b/interactive_engine/executor/ir/graph_proxy/src/apis/graph/mod.rs index ecd0837e5c1e..fe58fee9bfdc 100644 --- a/interactive_engine/executor/ir/graph_proxy/src/apis/graph/mod.rs +++ b/interactive_engine/executor/ir/graph_proxy/src/apis/graph/mod.rs @@ -30,6 +30,8 @@ use crate::utils::expr::eval_pred::PEvaluator; pub mod element; pub type ID = i64; +// a special id for Null graph elements. +pub const NULL_ID: ID = ID::MAX; pub fn read_id(reader: &mut R) -> io::Result { reader.read_i64() diff --git a/interactive_engine/executor/ir/graph_proxy/src/utils/expr/eval.rs b/interactive_engine/executor/ir/graph_proxy/src/utils/expr/eval.rs index 2b27f09c1751..7de63b7da164 100644 --- a/interactive_engine/executor/ir/graph_proxy/src/utils/expr/eval.rs +++ b/interactive_engine/executor/ir/graph_proxy/src/utils/expr/eval.rs @@ -833,7 +833,7 @@ impl InnerOpr { mod tests { use ahash::HashMap; use dyn_type::DateTimeFormats; - use ir_common::{expr_parse::str_to_expr_pb, generated::physical::physical_opr::operator}; + use ir_common::expr_parse::str_to_expr_pb; use super::*; use crate::apis::{DynDetails, Vertex}; diff --git a/interactive_engine/executor/ir/integrated/tests/optional_expand_test.rs b/interactive_engine/executor/ir/integrated/tests/optional_expand_test.rs index 95abc4fee9d4..81a064d20541 100644 --- a/interactive_engine/executor/ir/integrated/tests/optional_expand_test.rs +++ b/interactive_engine/executor/ir/integrated/tests/optional_expand_test.rs @@ -21,7 +21,6 @@ mod common; mod test { use std::sync::Arc; - use dyn_type::Object; use graph_proxy::apis::{register_graph, GraphElement}; use graph_proxy::create_exp_store; use graph_store::ldbc::LDBCVertexParser; @@ -96,8 +95,7 @@ mod test { while let Some(Ok(record)) = result.next() { if let Some(element) = record.get(None).unwrap().as_vertex() { result_ids.push(element.id() as usize) - } else if let Some(obj) = record.get(None).unwrap().as_object() { - assert_eq!(obj, &Object::None); + } else if record.get(None).unwrap().is_none() { none_cnt += 1; } } @@ -131,8 +129,7 @@ mod test { println!("record: {:?}", record); if let Some(element) = record.get(None).unwrap().as_vertex() { result_ids.push(element.id() as usize) - } else if let Some(obj) = record.get(None).unwrap().as_object() { - assert_eq!(obj, &Object::None); + } else if record.get(None).unwrap().is_none() { none_cnt += 1; } } @@ -168,8 +165,7 @@ mod test { while let Some(Ok(record)) = result.next() { if let Some(e) = record.get(None).unwrap().as_edge() { result_edges.push((e.src_id as usize, e.dst_id as usize)); - } else if let Some(obj) = record.get(None).unwrap().as_object() { - assert_eq!(obj, &Object::None); + } else if record.get(None).unwrap().is_none() { none_cnt += 1; } } @@ -268,11 +264,8 @@ mod test { while let Some(Ok(record)) = result.next() { if let Some(element) = record.get(None).unwrap().as_vertex() { result_ids.push(element.id() as usize); - } else if let Some(obj) = record.get(None).unwrap().as_object() { - assert_eq!(obj, &Object::None); + } else if record.get(None).unwrap().is_none() { none_cnt += 1; - } else { - unreachable!() } } result_ids.sort(); diff --git a/interactive_engine/executor/ir/runtime/src/process/entry.rs b/interactive_engine/executor/ir/runtime/src/process/entry.rs index ef73406c2c9d..3cd47355ed69 100644 --- a/interactive_engine/executor/ir/runtime/src/process/entry.rs +++ b/interactive_engine/executor/ir/runtime/src/process/entry.rs @@ -24,6 +24,7 @@ use std::sync::Arc; use ahash::HashMap; use dyn_type::{BorrowObject, Object}; +use graph_proxy::apis::graph::NULL_ID; use graph_proxy::apis::VertexOrEdge; use graph_proxy::apis::{Edge, Element, GraphElement, GraphPath, PropertyValue, Vertex, ID}; use ir_common::error::ParsePbError; @@ -51,6 +52,8 @@ pub enum EntryType { Intersection, /// Type of collection consisting of entries Collection, + /// A Null graph element entry type + Null, } pub trait Entry: Debug + Send + Sync + AsAny + Element { @@ -104,6 +107,7 @@ impl DynEntry { .as_object() .map(|obj| obj.eq(&Object::None)) .unwrap_or(false), + EntryType::Null => true, _ => false, } } @@ -184,6 +188,9 @@ impl Encode for DynEntry { .unwrap() .write_to(writer)?; } + EntryType::Null => { + writer.write_u8(9)?; + } } Ok(()) } @@ -225,6 +232,7 @@ impl Decode for DynEntry { let general_intersect = GeneralIntersectionEntry::read_from(reader)?; Ok(DynEntry::new(general_intersect)) } + 9 => Ok(DynEntry::new(NullEntry)), _ => unreachable!(), } } @@ -247,7 +255,7 @@ impl Element for DynEntry { impl GraphElement for DynEntry { fn id(&self) -> ID { match self.get_type() { - EntryType::Vertex | EntryType::Edge | EntryType::Path => { + EntryType::Vertex | EntryType::Edge | EntryType::Path | EntryType::Null => { self.inner.as_graph_element().unwrap().id() } _ => unreachable!(), @@ -256,7 +264,7 @@ impl GraphElement for DynEntry { fn label(&self) -> Option { match self.get_type() { - EntryType::Vertex | EntryType::Edge | EntryType::Path => { + EntryType::Vertex | EntryType::Edge | EntryType::Path | EntryType::Null => { self.inner.as_graph_element().unwrap().label() } _ => unreachable!(), @@ -265,7 +273,7 @@ impl GraphElement for DynEntry { fn get_property(&self, key: &NameOrId) -> Option { match self.get_type() { - EntryType::Vertex | EntryType::Edge | EntryType::Path => self + EntryType::Vertex | EntryType::Edge | EntryType::Path | EntryType::Null => self .inner .as_graph_element() .unwrap() @@ -276,7 +284,7 @@ impl GraphElement for DynEntry { fn get_all_properties(&self) -> Option> { match self.get_type() { - EntryType::Vertex | EntryType::Edge | EntryType::Path => self + EntryType::Vertex | EntryType::Edge | EntryType::Path | EntryType::Null => self .inner .as_graph_element() .unwrap() @@ -306,6 +314,7 @@ impl Hash for DynEntry { .as_any_ref() .downcast_ref::() .hash(state), + EntryType::Null => self.hash(state), } } } @@ -335,6 +344,7 @@ impl PartialEq for DynEntry { .as_any_ref() .downcast_ref::() .eq(&other.as_any_ref().downcast_ref::()), + EntryType::Null => other.get_type() == EntryType::Null, } } else { false @@ -373,6 +383,7 @@ impl PartialOrd for DynEntry { .as_any_ref() .downcast_ref::() .partial_cmp(&other.as_any_ref().downcast_ref::()), + EntryType::Null => None, } } else { None @@ -548,6 +559,50 @@ impl Decode for CollectionEntry { } } +// NullEntry represents a null graph element, e.g., a null vertex generated by optional edge_expand. +#[derive(Debug, Clone, Default, PartialEq, PartialOrd, Eq, Hash)] +pub struct NullEntry; + +impl_as_any!(NullEntry); + +impl Entry for NullEntry { + fn get_type(&self) -> EntryType { + EntryType::Null + } +} + +impl Element for NullEntry { + fn as_graph_element(&self) -> Option<&dyn GraphElement> { + Some(self) + } + + fn len(&self) -> usize { + 0 + } + + fn as_borrow_object(&self) -> BorrowObject { + BorrowObject::None + } +} + +impl GraphElement for NullEntry { + fn id(&self) -> ID { + NULL_ID + } + + fn label(&self) -> Option { + None + } + + fn get_property(&self, _key: &NameOrId) -> Option { + None + } + + fn get_all_properties(&self) -> Option> { + None + } +} + impl TryFrom for DynEntry { type Error = ParsePbError; fn try_from(e: result_pb::Element) -> Result { diff --git a/interactive_engine/executor/ir/runtime/src/process/operator/flatmap/edge_expand.rs b/interactive_engine/executor/ir/runtime/src/process/operator/flatmap/edge_expand.rs index 3b9104b38711..4675c99d596a 100644 --- a/interactive_engine/executor/ir/runtime/src/process/operator/flatmap/edge_expand.rs +++ b/interactive_engine/executor/ir/runtime/src/process/operator/flatmap/edge_expand.rs @@ -25,7 +25,7 @@ use ir_common::KeyId; use pegasus::api::function::{DynIter, FlatMapFunction, FnResult}; use crate::error::{FnExecError, FnGenError, FnGenResult}; -use crate::process::entry::{Entry, EntryType}; +use crate::process::entry::{Entry, EntryType, NullEntry}; use crate::process::operator::flatmap::FlatMapFuncGen; use crate::process::record::{Record, RecordExpandIter, RecordPathExpandIter}; @@ -50,7 +50,7 @@ impl FlatMapFunction for EdgeExpandOperator< // the case of expand edge, and get end vertex; ExpandOpt::Vertex => { if self.is_optional && iter.peek().is_none() { - input.append(Object::None, self.alias); + input.append(NullEntry, self.alias); Ok(Box::new(vec![input].into_iter())) } else { let neighbors_iter = iter.map(|e| { @@ -74,7 +74,7 @@ impl FlatMapFunction for EdgeExpandOperator< // the case of expand neighbors, including edges/vertices ExpandOpt::Edge => { if self.is_optional && iter.peek().is_none() { - input.append(Object::None, self.alias); + input.append(NullEntry, self.alias); Ok(Box::new(vec![input].into_iter())) } else { Ok(Box::new(RecordExpandIter::new( diff --git a/interactive_engine/executor/ir/runtime/src/process/operator/map/get_v.rs b/interactive_engine/executor/ir/runtime/src/process/operator/map/get_v.rs index ef68632cab80..5d44af1b7d40 100644 --- a/interactive_engine/executor/ir/runtime/src/process/operator/map/get_v.rs +++ b/interactive_engine/executor/ir/runtime/src/process/operator/map/get_v.rs @@ -15,7 +15,6 @@ use std::convert::TryInto; -use dyn_type::Object; use graph_proxy::apis::GraphElement; use graph_proxy::apis::{get_graph, DynDetails, GraphPath, QueryParams, Vertex}; use graph_proxy::utils::expr::eval_pred::EvalPred; @@ -26,7 +25,7 @@ use ir_common::{KeyId, LabelId}; use pegasus::api::function::{FilterMapFunction, FnResult}; use crate::error::{FnExecError, FnExecResult, FnGenError, FnGenResult}; -use crate::process::entry::{DynEntry, Entry}; +use crate::process::entry::{DynEntry, Entry, NullEntry}; use crate::process::operator::map::FilterMapFuncGen; use crate::process::record::Record; @@ -118,16 +117,9 @@ impl FilterMapFunction for GetVertexOperator { } else { Err(FnExecError::unexpected_data_error("unreachable path end entry in GetV"))? } - } else if let Some(obj) = entry.as_object() { - if Object::None.eq(obj) { - input.append(Object::None, self.alias); - Ok(Some(input)) - } else { - Err(FnExecError::unexpected_data_error(&format!( - "Can only apply `GetV` on an object that is not None. The entry is {:?}", - entry - )))? - } + } else if entry.is_none() { + input.append(NullEntry, self.alias); + Ok(Some(input)) } else { Err(FnExecError::unexpected_data_error( &format!( "Can only apply `GetV` (`Auxilia` instead) on an edge or path entry, while the entry is {:?}", entry @@ -251,27 +243,20 @@ impl FilterMapFunction for AuxiliaOperator { } else { return Ok(None); } - } else if let Some(obj) = entry.as_object() { - if Object::None.eq(obj) { - if let Some(predicate) = &self.query_params.filter { - let res = predicate - .eval_bool(Some(&input)) - .map_err(|e| FnExecError::from(e))?; - if res { - input.append(Object::None, self.alias); - return Ok(Some(input)); - } else { - return Ok(None); - } - } else { - input.append(Object::None, self.alias); + } else if entry.is_none() { + if let Some(predicate) = &self.query_params.filter { + let res = predicate + .eval_bool(Some(&input)) + .map_err(|e| FnExecError::from(e))?; + if res { + input.append(NullEntry, self.alias); return Ok(Some(input)); + } else { + return Ok(None); } } else { - Err(FnExecError::unexpected_data_error(&format!( - "neither Vertex nor Edge entry is accessed in `Auxilia` operator, the entry is {:?}", - entry - )))? + input.append(NullEntry, self.alias); + return Ok(Some(input)); } } else { Err(FnExecError::unexpected_data_error(&format!( diff --git a/interactive_engine/executor/ir/runtime/src/process/operator/sink/sink.rs b/interactive_engine/executor/ir/runtime/src/process/operator/sink/sink.rs index 426a2c2e782a..9fee81e91205 100644 --- a/interactive_engine/executor/ir/runtime/src/process/operator/sink/sink.rs +++ b/interactive_engine/executor/ir/runtime/src/process/operator/sink/sink.rs @@ -252,6 +252,7 @@ impl RecordSinkEncoder { EntryType::Pair => { unreachable!() } + EntryType::Null => Some(result_pb::element::Inner::Object(Object::None.into())), }; result_pb::Element { inner } } diff --git a/interactive_engine/pom.xml b/interactive_engine/pom.xml index f69d68faca14..e5f55ca86aec 100644 --- a/interactive_engine/pom.xml +++ b/interactive_engine/pom.xml @@ -253,7 +253,7 @@ 4.4.0 4.4.0 - 0.4.2 + 0.4.3 no-gaia-ir diff --git a/k8s/dockerfiles/coordinator.Dockerfile b/k8s/dockerfiles/coordinator.Dockerfile index d12cbe99c043..3b36d8e6811a 100644 --- a/k8s/dockerfiles/coordinator.Dockerfile +++ b/k8s/dockerfiles/coordinator.Dockerfile @@ -36,10 +36,17 @@ FROM ubuntu:22.04 AS coordinator ENV DEBIAN_FRONTEND=noninteractive RUN apt-get update -y && \ - apt-get install -y sudo python3-pip openmpi-bin curl tzdata netcat && \ + apt-get install -y sudo python3-pip openmpi-bin curl locales tzdata netcat && \ + locale-gen en_US.UTF-8 && \ apt-get clean -y && \ rm -rf /var/lib/apt/lists/* +ENV LANG en_US.UTF-8 +ENV LANGUAGE en_US:en +ENV LC_ALL en_US.UTF-8 +ENV TZ=Asia/Shanghai +RUN ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezone + ENV GRAPHSCOPE_HOME=/opt/graphscope RUN useradd -m graphscope -u 1001 \ diff --git a/k8s/dockerfiles/graphscope-store.Dockerfile b/k8s/dockerfiles/graphscope-store.Dockerfile index c079b2aac43b..a2466cf11a51 100644 --- a/k8s/dockerfiles/graphscope-store.Dockerfile +++ b/k8s/dockerfiles/graphscope-store.Dockerfile @@ -14,7 +14,7 @@ COPY --chown=graphscope:graphscope . /home/graphscope/graphscope COPY --chown=graphscope:graphscope ./interactive_engine/assembly/src/conf/maven.settings.xml /home/graphscope/.m2/settings.xml USER graphscope -RUN rustup toolchain install 1.76.0 && rustup default 1.76.0 +RUN rustup toolchain install 1.81.0 && rustup default 1.81.0 RUN cd /home/graphscope/graphscope \ && . ~/.graphscope_env \ diff --git a/k8s/dockerfiles/interactive-entrypoint.sh b/k8s/dockerfiles/interactive-entrypoint.sh index 009323f6c31f..76715ec609d6 100644 --- a/k8s/dockerfiles/interactive-entrypoint.sh +++ b/k8s/dockerfiles/interactive-entrypoint.sh @@ -57,6 +57,9 @@ function prepare_workspace() { cp /opt/flex/share/interactive_config.yaml $engine_config_path #make sure the line which start with default_graph is changed to default_graph: ${DEFAULT_GRAPH_NAME} sed -i "s/default_graph:.*/default_graph: ${DEFAULT_GRAPH_NAME}/" $engine_config_path + # By default, we occupy the all available cpus + cpus=$(grep -c ^processor /proc/cpuinfo) + sed -i "s/thread_num_per_worker:.*/thread_num_per_worker: ${cpus}/" $engine_config_path echo "Using default graph: ${DEFAULT_GRAPH_NAME} to start the service" # copy the builtin graph diff --git a/k8s/dockerfiles/interactive.Dockerfile b/k8s/dockerfiles/interactive.Dockerfile index 4a818174e5f3..10fb05880510 100644 --- a/k8s/dockerfiles/interactive.Dockerfile +++ b/k8s/dockerfiles/interactive.Dockerfile @@ -18,7 +18,7 @@ RUN cd /home/graphscope/GraphScope/ && \ else \ mkdir /home/graphscope/install; \ . /home/graphscope/.graphscope_env; \ - rustup toolchain install 1.76.0 && rustup default 1.76.0; \ + rustup toolchain install 1.81.0 && rustup default 1.81.0; \ make interactive-install BUILD_TYPE="$profile" INSTALL_PREFIX=/home/graphscope/install; \ fi diff --git a/k8s/internal/Makefile b/k8s/internal/Makefile index 81736ddf4b52..fd3f5b8ea26f 100644 --- a/k8s/internal/Makefile +++ b/k8s/internal/Makefile @@ -110,7 +110,7 @@ graphscope-manylinux2014-py3-nodocker: sudo sed -i 's|#baseurl=http://mirror.centos.org|baseurl=http://vault.centos.org|g' /etc/yum.repos.d/CentOS-* && \ sudo yum install java-11-openjdk-devel -y && \ sudo yum remove java-1.8.0-openjdk-devel java-1.8.0-openjdk java-1.8.0-openjdk-headless -y && \ - rustup toolchain install 1.76.0 && rustup default 1.76.0 && \ + rustup toolchain install 1.81.0 && rustup default 1.81.0 && \ cd $(WORKING_DIR)/../.. && \ if [[ "${PLATFORM}" == "aarch64" ]]; then \ export AUDITWHEEL_PLAT=manylinux2014_${PLATFORM}; \ diff --git a/python/graphscope/analytical/udf/patch.py b/python/graphscope/analytical/udf/patch.py index 03aba6e882ab..c6c50400744e 100644 --- a/python/graphscope/analytical/udf/patch.py +++ b/python/graphscope/analytical/udf/patch.py @@ -134,7 +134,7 @@ def patch_cython_codewriter(writer): # noqa: C901 13. Add for `visit_CascadedCmpNode` node. Patch for `visit_PrimaryCmpNode` node. - Examaples: + Examples: ---------- >>> 5 < 6 <= 7 > 4 >= 3 > 2 != 1 diff --git a/python/graphscope/client/utils.py b/python/graphscope/client/utils.py index a6e25021c40d..5494b091fe9a 100644 --- a/python/graphscope/client/utils.py +++ b/python/graphscope/client/utils.py @@ -143,7 +143,7 @@ def handle_grpc_error_with_retry(fn, retry=True): This function will retry max times with specific GRPC status. See detail in `GRPC_MAX_RETRIES_BY_CODE`. - Refer and specical thanks to: + Refer and special thanks to: https://github.com/googleapis/google-cloud-python/issues/2583 """ @@ -179,7 +179,7 @@ def with_grpc_catch(*args, **kwargs): def handle_grpc_error(fn_or_retry): - """Decorator to handle grpc error, and accepts an optional arugment to control + """Decorator to handle grpc error, and accepts an optional argument to control whether the function should be retried for certain errors. This decorator can be used as diff --git a/python/graphscope/deploy/hosts/cluster.py b/python/graphscope/deploy/hosts/cluster.py index 1f3affb14cc6..b35d01839021 100644 --- a/python/graphscope/deploy/hosts/cluster.py +++ b/python/graphscope/deploy/hosts/cluster.py @@ -91,7 +91,7 @@ def _launch_coordinator(self): # Param `start_new_session=True` is for putting child process to a new process group # so it won't get the signals from parent. - # In notebook environment, we need to accept the signal from kernel restarted/stoped. + # In notebook environment, we need to accept the signal from kernel restarted/stopped. process = subprocess.Popen( cmd, start_new_session=False if in_notebook() else True, diff --git a/python/graphscope/framework/app.py b/python/graphscope/framework/app.py index 7eb43a16bdf1..565dce8d6a4e 100644 --- a/python/graphscope/framework/app.py +++ b/python/graphscope/framework/app.py @@ -501,7 +501,7 @@ def load_app(gar=None, algo=None, context=None, **kwargs): specified path or bytes. For java apps, gar can be none to indicate we should find the app in - previouse added libs. + previous added libs. Returns: Instance of diff --git a/python/graphscope/framework/context.py b/python/graphscope/framework/context.py index c41604e0d4bf..61ea2e97e3b1 100644 --- a/python/graphscope/framework/context.py +++ b/python/graphscope/framework/context.py @@ -146,7 +146,7 @@ def to_numpy(self, selector, vertex_range=None, axis=0): identical with vertices' oid type. Omitting the first index starts the slice at the beginning of the vertices, and omitting the second index extends the slice to the end of the vertices. - Note the comparision is not based on numeric order, but on alphabetic order. + Note the comparison is not based on numeric order, but on alphabetic order. axis (int): optional, default to 0. Returns: @@ -170,7 +170,7 @@ def to_dataframe(self, selector, vertex_range=None): of vertices from `m` to, but not including `n`. Type of `m`, `n` must be identical with vertices' oid type. Only the sub-ranges of vertices data will be retrieved. - Note the comparision is not based on numeric order, but on alphabetic order. + Note the comparison is not based on numeric order, but on alphabetic order. Returns: :class:`graphscope.framework.context.ResultDAGNode`: diff --git a/python/graphscope/framework/dag.py b/python/graphscope/framework/dag.py index f2acb529cd41..4668ce38387b 100644 --- a/python/graphscope/framework/dag.py +++ b/python/graphscope/framework/dag.py @@ -29,7 +29,7 @@ class Dag(object): """Class represented as a GraphScope dataflow dag. - A :class:`Dag` is always belongs to a session and containes a set of + A :class:`Dag` is always belongs to a session and contains a set of :class:`Operation` object, which performs computations on tensors. """ diff --git a/python/graphscope/framework/graph_builder.py b/python/graphscope/framework/graph_builder.py index 17530a7e26a8..8daa73294993 100644 --- a/python/graphscope/framework/graph_builder.py +++ b/python/graphscope/framework/graph_builder.py @@ -159,11 +159,11 @@ def load_from( generate_eid (bool, optional): Whether to generate a unique edge id for each edge. Generated eid will be placed in third column. This feature is for cooperating with interactive engine. If you only need to work with analytical engine, set it to False. Defaults to True. - retain_oid (bool, optional): Whether to keep the orignal ID column as the last column of vertex table. + retain_oid (bool, optional): Whether to keep the original ID column as the last column of vertex table. This feature is for cooperating with interactive engine. If you only need to work with analytical engine, set it to False. Defaults to True. vertex_map (str, optional): Indicate use global vertex map or local vertex map. Can be "global" or "local". - compact_edges (bool, optional): Compact edges (CSR) using varint and delta encoding. Defaults to False. + compact_edges (bool, optional): Compact edges (CSR) using variant and delta encoding. Defaults to False. Note that compact edges helps to half the memory usage of edges in graph data structure, but may cause at most 10%~20% performance degeneration in some algorithms. use_perfect_hash (bool, optional): Use perfect hashmap in vertex map to optimize the memory usage. diff --git a/python/graphscope/framework/loader.py b/python/graphscope/framework/loader.py index 7ea61b92bc52..6a4fe88fd008 100644 --- a/python/graphscope/framework/loader.py +++ b/python/graphscope/framework/loader.py @@ -43,7 +43,7 @@ class CSVOptions(object): """Options to read from CSV files. - Avaiable options are: + Available options are: - column delimiters - include a subset of columns - types of each columns @@ -165,7 +165,7 @@ def __repr__(self) -> str: return self.__str__() def resolve(self, source): - """Dispatch resolver based on type of souce. + """Dispatch resolver based on type of source. Args: source: Different data sources diff --git a/python/graphscope/gsctl/scripts/install_deps.sh b/python/graphscope/gsctl/scripts/install_deps.sh index 1de02edf06bb..dd351f2e019f 100755 --- a/python/graphscope/gsctl/scripts/install_deps.sh +++ b/python/graphscope/gsctl/scripts/install_deps.sh @@ -54,7 +54,7 @@ get_os_version() { OS_VERSION=$(uname -r) fi if [[ "${PLATFORM}" != *"Ubuntu"* && "${PLATFORM}" != *"CentOS"* && "${PLATFORM}" != *"Darwin"* && "${PLATFORM}" != *"Aliyun"* ]];then - err "Only suppport on Ubuntu/CentOS/macOS/AliyunOS platform." + err "Only support on Ubuntu/CentOS/macOS/AliyunOS platform." exit 1 fi if [[ "${PLATFORM}" == *"Ubuntu"* && "${OS_VERSION:0:2}" -lt "20" ]]; then @@ -882,8 +882,8 @@ install_interactive_dependencies() { if ! command -v rustup &>/dev/null; then curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y source $HOME/.cargo/env - rustup install 1.76.0 - rustup default 1.76.0 + rustup install 1.81.0 + rustup default 1.81.0 rustc --version fi # opentelemetry diff --git a/python/graphscope/nx/algorithms/builtin.py b/python/graphscope/nx/algorithms/builtin.py index 2b1b27f64186..1ace74ccfdd4 100644 --- a/python/graphscope/nx/algorithms/builtin.py +++ b/python/graphscope/nx/algorithms/builtin.py @@ -414,7 +414,7 @@ def all_pairs_shortest_path_length(G, weight=None): ---------- G : networkx graph - weight : string (defualt=None) + weight : string (default=None) edge weights will be accessed via the edge attribute with this key (that is, the weight of the edge joining `u` to `v` will be ``G.edges[u, v][weight]``). If is None, every edge is assume to be one. diff --git a/python/graphscope/nx/algorithms/tests/forward/test_isomorphism.py b/python/graphscope/nx/algorithms/tests/forward/test_isomorphism.py index e26319f55754..ec739206b11e 100644 --- a/python/graphscope/nx/algorithms/tests/forward/test_isomorphism.py +++ b/python/graphscope/nx/algorithms/tests/forward/test_isomorphism.py @@ -60,12 +60,12 @@ class TestGenericMultiEdgeMatch(): pass -@pytest.mark.skip(reason="not supoort time object as attribute") +@pytest.mark.skip(reason="not support time object as attribute") class TestTimeRespectingGraphMatcher(object): pass -@pytest.mark.skip(reason="not supoort time object as attribute") +@pytest.mark.skip(reason="not support time object as attribute") class TestDiTimeRespectingGraphMatcher(object): pass diff --git a/python/graphscope/nx/algorithms/tests/forward/test_simple_paths.py b/python/graphscope/nx/algorithms/tests/forward/test_simple_paths.py index ec5009f8863c..191a29a1ae3e 100644 --- a/python/graphscope/nx/algorithms/tests/forward/test_simple_paths.py +++ b/python/graphscope/nx/algorithms/tests/forward/test_simple_paths.py @@ -21,8 +21,8 @@ def test_shortest_simple_paths(): ) @pytest.mark.usefixtures("graphscope_session") -@pytest.mark.skipif(nxa.__version__ < "2.5", reason="netowrkx2.4 does not support weight funtion.") -def test_shortest_simple_paths_directed_with_weight_fucntion(): +@pytest.mark.skipif(nxa.__version__ < "2.5", reason="netowrkx2.4 does not support weight function.") +def test_shortest_simple_paths_directed_with_weight_function(): def cost(u, v, x): return 1 diff --git a/python/graphscope/nx/classes/graph.py b/python/graphscope/nx/classes/graph.py index d3249a7e82ab..4042ea27a150 100644 --- a/python/graphscope/nx/classes/graph.py +++ b/python/graphscope/nx/classes/graph.py @@ -431,7 +431,7 @@ def session_id(self): @property def key(self): - """Key of the coresponding engine graph.""" + """Key of the corresponding engine graph.""" if hasattr(self, "_graph") and self._is_client_view: return ( self._graph.key diff --git a/scripts/launch_cluster.py b/scripts/launch_cluster.py index 64035af9692a..a315f5c28318 100755 --- a/scripts/launch_cluster.py +++ b/scripts/launch_cluster.py @@ -146,7 +146,7 @@ def _get_cluster_config(self): default="1.21", ) config["instance_type"] = click.prompt( - "Worker node instance type, defalut", default="t2.micro" + "Worker node instance type, default", default="t2.micro" ) config["node_num"] = click.prompt( "Worker node num, default", type=int, default=4 diff --git a/tutorials/02_graph_manipulations_with_networkx_compatible_apis.ipynb b/tutorials/02_graph_manipulations_with_networkx_compatible_apis.ipynb index 50f260efe340..d7b9ce7d70e1 100644 --- a/tutorials/02_graph_manipulations_with_networkx_compatible_apis.ipynb +++ b/tutorials/02_graph_manipulations_with_networkx_compatible_apis.ipynb @@ -236,7 +236,7 @@ "metadata": {}, "outputs": [], "source": [ - "list(G.edges.data()) # shows the edge arrtibutes" + "list(G.edges.data()) # shows the edge attributes" ] }, { @@ -972,7 +972,7 @@ "metadata": {}, "outputs": [], "source": [ - "H = DG.to_undirected() # return a \"deepcopy\" of undirected represetation of DG.\n", + "H = DG.to_undirected() # return a \"deepcopy\" of undirected representation of DG.\n", "list(H.edges)" ] }, @@ -1000,7 +1000,7 @@ "metadata": {}, "outputs": [], "source": [ - "K = DG.reverse() # retrun a \"deepcopy\" of reversed copy.\n", + "K = DG.reverse() # return a \"deepcopy\" of reversed copy.\n", "list(K.edges)" ] }, diff --git a/tutorials/zh/02_graph_manipulations_with_networkx_compatible_apis.ipynb b/tutorials/zh/02_graph_manipulations_with_networkx_compatible_apis.ipynb index 69c449b6ab3b..4589a7be1d22 100644 --- a/tutorials/zh/02_graph_manipulations_with_networkx_compatible_apis.ipynb +++ b/tutorials/zh/02_graph_manipulations_with_networkx_compatible_apis.ipynb @@ -235,7 +235,7 @@ "metadata": {}, "outputs": [], "source": [ - "list(G.edges.data()) # shows the edge arrtibutes" + "list(G.edges.data()) # shows the edge attributes" ] }, { @@ -962,7 +962,7 @@ "metadata": {}, "outputs": [], "source": [ - "H = DG.to_undirected() # return a \"deepcopy\" of undirected represetation of DG.\n", + "H = DG.to_undirected() # return a \"deepcopy\" of undirected representation of DG.\n", "list(H.edges)" ] }, @@ -991,7 +991,7 @@ "metadata": {}, "outputs": [], "source": [ - "K = DG.reverse() # retrun a \"deepcopy\" of reversed copy.\n", + "K = DG.reverse() # return a \"deepcopy\" of reversed copy.\n", "list(K.edges)" ] }, diff --git a/tutorials/zh/07_interactive_query_with_gremlin.ipynb b/tutorials/zh/07_interactive_query_with_gremlin.ipynb index 4cee0feebc8b..92bcb4a4202b 100644 --- a/tutorials/zh/07_interactive_query_with_gremlin.ipynb +++ b/tutorials/zh/07_interactive_query_with_gremlin.ipynb @@ -50,7 +50,7 @@ "metadata": {}, "outputs": [], "source": [ - "# Greate GIE engine with Gremlin\n", + "# Create GIE engine with Gremlin\n", "\n", "interactive = graphscope.gremlin(modern_graph)" ]