From 68ebe69c5eb7709b430b1761d1ad6006bf8445ce Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E9=9C=84=E7=A3=8A?= Date: Wed, 29 May 2024 17:30:55 +0800 Subject: [PATCH 01/52] todo: test user-writable --- .../giraph/MessageAppWithUserWritable.java | 87 +++++++++++++ .../P2PEdgeMultipleLongInputFormat.java | 115 ++++++++++++++++++ .../P2PVertexMultipleLongInputFormat.java | 86 +++++++++++++ 3 files changed, 288 insertions(+) create mode 100644 analytical_engine/java/grape-demo/src/main/java/com/alibaba/graphscope/example/giraph/MessageAppWithUserWritable.java create mode 100644 analytical_engine/java/grape-demo/src/main/java/com/alibaba/graphscope/example/giraph/format/P2PEdgeMultipleLongInputFormat.java create mode 100644 analytical_engine/java/grape-demo/src/main/java/com/alibaba/graphscope/example/giraph/format/P2PVertexMultipleLongInputFormat.java diff --git a/analytical_engine/java/grape-demo/src/main/java/com/alibaba/graphscope/example/giraph/MessageAppWithUserWritable.java b/analytical_engine/java/grape-demo/src/main/java/com/alibaba/graphscope/example/giraph/MessageAppWithUserWritable.java new file mode 100644 index 000000000000..f53aba267205 --- /dev/null +++ b/analytical_engine/java/grape-demo/src/main/java/com/alibaba/graphscope/example/giraph/MessageAppWithUserWritable.java @@ -0,0 +1,87 @@ +/* + * Copyright 2022 Alibaba Group Holding Limited. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.alibaba.graphscope.example.giraph; + +import com.alibaba.graphscope.example.giraph.writable.MultipleLongWritable; +import org.apache.giraph.conf.LongConfOption; +import org.apache.giraph.graph.BasicComputation; +import org.apache.giraph.graph.Vertex; +import org.apache.hadoop.io.LongWritable; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.IOException; +import java.util.Objects; + +/** + * Only send msg. + */ +public class MessageAppWithUserWritable + extends BasicComputation { + + public static LongConfOption MAX_SUPER_STEP; + private static Logger logger = LoggerFactory.getLogger(MessageApp.class); + + static { + String maxSuperStep = System.getenv("MAX_SUPER_STEP"); + if (Objects.isNull(maxSuperStep) || maxSuperStep.isEmpty()) { + MAX_SUPER_STEP = new LongConfOption("maxSuperStep", 1, "max super step"); + } else { + MAX_SUPER_STEP = + new LongConfOption( + "maxSuperStep", Long.valueOf(maxSuperStep), "max super step"); + } + } + + /** + * Must be defined by user to do computation on a single Vertex. + * + * @param vertex Vertex + * @param messages Messages that were sent to this vertex in the previous superstep. Each + * message is only guaranteed to have + */ + @Override + public void compute( + Vertex vertex, + Iterable messages) + throws IOException { + if (getSuperstep() == 0) { + logger.info("There should be no messages in step0, " + vertex.getId()); + boolean flag = false; + for (MultipleLongWritable message : messages) { + flag = true; + } + if (flag) { + throw new IllegalStateException( + "Expect no msg received in step 1, but actually received"); + } + MultipleLongWritable msg = new MultipleLongWritable(vertex.getId().get()); + sendMessageToAllEdges(vertex, msg); + } else if (getSuperstep() < MAX_SUPER_STEP.get(getConf())) { + logger.info("step [{}] Checking received msg", getSuperstep()); + int msgCnt = 0; + for (MultipleLongWritable message : messages) { + msgCnt += 1; + } + vertex.setValue(new MultipleLongWritable(msgCnt)); + } else if (getSuperstep() == MAX_SUPER_STEP.get(getConf())) { + vertex.voteToHalt(); + } else { + logger.info("Impossible: " + getSuperstep()); + } + } +} diff --git a/analytical_engine/java/grape-demo/src/main/java/com/alibaba/graphscope/example/giraph/format/P2PEdgeMultipleLongInputFormat.java b/analytical_engine/java/grape-demo/src/main/java/com/alibaba/graphscope/example/giraph/format/P2PEdgeMultipleLongInputFormat.java new file mode 100644 index 000000000000..f58d210f63fa --- /dev/null +++ b/analytical_engine/java/grape-demo/src/main/java/com/alibaba/graphscope/example/giraph/format/P2PEdgeMultipleLongInputFormat.java @@ -0,0 +1,115 @@ +/* + * Copyright 2022 Alibaba Group Holding Limited. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.alibaba.graphscope.example.giraph.format; + +import com.alibaba.graphscope.example.giraph.writable.MultipleLongWritable; +import org.apache.giraph.io.EdgeReader; +import org.apache.giraph.io.formats.TextEdgeInputFormat; +import org.apache.hadoop.io.LongWritable; +import org.apache.hadoop.io.Text; +import org.apache.hadoop.mapreduce.InputSplit; +import org.apache.hadoop.mapreduce.TaskAttemptContext; + +import java.io.IOException; + +public class P2PEdgeMultipleLongInputFormat extends TextEdgeInputFormat { + + /** + * Create an edge reader for a given split. The framework will call {@link + * EdgeReader#initialize(InputSplit, TaskAttemptContext)} before the split is used. + * + * @param split the split to be read + * @param context the information about the task + * @return a new record reader + * @throws IOException + */ + @Override + public EdgeReader createEdgeReader( + InputSplit split, TaskAttemptContext context) throws IOException { + return new P2PEdgeReader(); + } + + public class P2PEdgeReader extends TextEdgeReaderFromEachLineProcessed { + + String SEPARATOR = " "; + /** + * Cached vertex id for the current line + */ + private LongWritable srcId; + + private LongWritable dstId; + private MultipleLongWritable edgeValue; + + /** + * Preprocess the line so other methods can easily read necessary information for creating + * edge + * + * @param line the current line to be read + * @return the preprocessed object + * @throws IOException exception that can be thrown while reading + */ + @Override + protected String[] preprocessLine(Text line) throws IOException { + // logger.debug("line: " + line.toString()); + String[] tokens = line.toString().split(SEPARATOR); + if (tokens.length != 3) { + throw new IllegalStateException("expect 3 ele in edge line"); + } + // logger.debug(String.join(",", tokens)); + srcId = new LongWritable(Long.parseLong(tokens[0])); + dstId = new LongWritable(Long.parseLong(tokens[1])); + edgeValue = new MultipleLongWritable(Long.parseLong(tokens[2])); + return tokens; + } + + /** + * Reads target vertex id from the preprocessed line. + * + * @param line the object obtained by preprocessing the line + * @return the target vertex id + * @throws IOException exception that can be thrown while reading + */ + @Override + protected LongWritable getTargetVertexId(String[] line) throws IOException { + return dstId; + } + + /** + * Reads source vertex id from the preprocessed line. + * + * @param line the object obtained by preprocessing the line + * @return the source vertex id + * @throws IOException exception that can be thrown while reading + */ + @Override + protected LongWritable getSourceVertexId(String[] line) throws IOException { + return srcId; + } + + /** + * Reads edge value from the preprocessed line. + * + * @param line the object obtained by preprocessing the line + * @return the edge value + * @throws IOException exception that can be thrown while reading + */ + @Override + protected MultipleLongWritable getValue(String[] line) throws IOException { + return edgeValue; + } + } +} diff --git a/analytical_engine/java/grape-demo/src/main/java/com/alibaba/graphscope/example/giraph/format/P2PVertexMultipleLongInputFormat.java b/analytical_engine/java/grape-demo/src/main/java/com/alibaba/graphscope/example/giraph/format/P2PVertexMultipleLongInputFormat.java new file mode 100644 index 000000000000..19c059f185ac --- /dev/null +++ b/analytical_engine/java/grape-demo/src/main/java/com/alibaba/graphscope/example/giraph/format/P2PVertexMultipleLongInputFormat.java @@ -0,0 +1,86 @@ +/* + * Copyright 2022 Alibaba Group Holding Limited. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.alibaba.graphscope.example.giraph.format; + +import com.alibaba.graphscope.example.giraph.writable.MultipleLongWritable; +import com.google.common.collect.Lists; + +import org.apache.giraph.edge.Edge; +import org.apache.giraph.io.formats.TextVertexInputFormat; +import org.apache.hadoop.io.LongWritable; +import org.apache.hadoop.io.Text; +import org.apache.hadoop.mapreduce.InputSplit; +import org.apache.hadoop.mapreduce.TaskAttemptContext; + +import java.io.IOException; +import java.util.List; + +public class P2PVertexMultipleLongInputFormat + extends TextVertexInputFormat { + + /** + * The factory method which produces the {@link TextVertexReader} used by this input format. + * + * @param split the split to be read + * @param context the information about the task + * @return the text vertex reader to be used + */ + @Override + public TextVertexInputFormat.TextVertexReader + createVertexReader(InputSplit split, TaskAttemptContext context) throws IOException { + return new P2PVertexReader(); + } + + public class P2PVertexReader extends TextVertexReaderFromEachLineProcessed { + + String SEPARATOR = " "; + + /** + * Cached vertex id for the current line + */ + private LongWritable id; + + private MultipleLongWritable value; + + @Override + protected String[] preprocessLine(Text line) throws IOException { + // logger.debug("line: " + line.toString()); + String[] tokens = line.toString().split(SEPARATOR); + // logger.debug(String.join(",", tokens)); + id = new LongWritable(Long.parseLong(tokens[0])); + value = new MultipleLongWritable(Long.parseLong(tokens[1])); + return tokens; + } + + @Override + protected LongWritable getId(String[] tokens) throws IOException { + return id; + } + + @Override + protected MultipleLongWritable getValue(String[] tokens) throws IOException { + return value; + } + + @Override + protected Iterable> getEdges(String[] tokens) + throws IOException { + List> edges = Lists.newArrayListWithCapacity(0); + return edges; + } + } +} From 355cecd67cc4ff780964dbbe907ee528c29d384f Mon Sep 17 00:00:00 2001 From: "xiaolei.zl" Date: Mon, 3 Jun 2024 15:40:43 +0800 Subject: [PATCH 02/52] testing Committed-by: xiaolei.zl from Dev container --- analytical_engine/CMakeLists.txt | 70 +++++------ .../graphscope/utils/ConfigurationUtils.java | 3 + .../alibaba/graphscope/utils/GrapeTypes.java | 2 + .../graphscope/utils/AppBaseParser.java | 5 +- analytical_engine/test/app_tests.sh | 71 ++++++----- analytical_engine/test/giraph_runner.h | 119 ++++++++++++++---- 6 files changed, 178 insertions(+), 92 deletions(-) diff --git a/analytical_engine/CMakeLists.txt b/analytical_engine/CMakeLists.txt index fa5853dce7d4..3b07d3d9f244 100644 --- a/analytical_engine/CMakeLists.txt +++ b/analytical_engine/CMakeLists.txt @@ -367,10 +367,10 @@ endif() # Test targets if (BUILD_TESTS) - add_executable(run_app test/run_app.cc core/object/dynamic.cc) - target_include_directories(run_app PRIVATE ${LIBGRAPELITE_INCLUDE_DIRS}/grape/analytical_apps utils apps) - target_link_libraries(run_app ${LIBGRAPELITE_LIBRARIES} ${GFLAGS_LIBRARIES} ${CMAKE_DL_LIBS} ${Boost_LIBRARIES} ${VINEYARD_LIBRARIES}) - target_link_libraries(run_app OpenMP::OpenMP_CXX) + #add_executable(run_app test/run_app.cc core/object/dynamic.cc) + #target_include_directories(run_app PRIVATE ${LIBGRAPELITE_INCLUDE_DIRS}/grape/analytical_apps utils apps) + #target_link_libraries(run_app ${LIBGRAPELITE_LIBRARIES} ${GFLAGS_LIBRARIES} ${CMAKE_DL_LIBS} ${Boost_LIBRARIES} ${VINEYARD_LIBRARIES}) + #target_link_libraries(run_app OpenMP::OpenMP_CXX) if (ENABLE_JAVA_SDK) add_executable(run_java_app test/run_java_app.cc core/java/javasdk.cc core/object/dynamic.cc) @@ -428,43 +428,43 @@ if (BUILD_TESTS) target_link_libraries(${target} OpenMP::OpenMP_CXX) endmacro() - add_vineyard_app(run_vy_app SRCS test/run_vy_app.cc) - add_vineyard_app(run_vy_app_compact SRCS test/run_vy_app_compact.cc) - add_vineyard_app(run_vy_app_local_vm SRCS test/run_vy_app_local_vm.cc) +# add_vineyard_app(run_vy_app SRCS test/run_vy_app.cc) +# add_vineyard_app(run_vy_app_compact SRCS test/run_vy_app_compact.cc) +# add_vineyard_app(run_vy_app_local_vm SRCS test/run_vy_app_local_vm.cc) - add_vineyard_app(run_load_from_stream SRCS test/run_load_from_stream.cc) + #add_vineyard_app(run_load_from_stream SRCS test/run_load_from_stream.cc) - add_vineyard_app(run_vy_ldbc SRCS test/run_vy_ldbc.cc) - target_include_directories(run_vy_ldbc PRIVATE ${LIBGRAPELITE_INCLUDE_DIRS}/grape/analytical_apps) + #add_vineyard_app(run_vy_ldbc SRCS test/run_vy_ldbc.cc) + #target_include_directories(run_vy_ldbc PRIVATE ${LIBGRAPELITE_INCLUDE_DIRS}/grape/analytical_apps) - add_vineyard_app(run_ctx SRCS test/run_ctx.cc) - target_include_directories(run_ctx PRIVATE ${LIBGRAPELITE_INCLUDE_DIRS}/grape/analytical_apps) - target_link_libraries(run_ctx gs_proto) +# add_vineyard_app(run_ctx SRCS test/run_ctx.cc) +# target_include_directories(run_ctx PRIVATE ${LIBGRAPELITE_INCLUDE_DIRS}/grape/analytical_apps) +# target_link_libraries(run_ctx gs_proto) - add_vineyard_app(run_property_ctx SRCS test/run_property_ctx.cc) - target_include_directories(run_property_ctx PRIVATE ${LIBGRAPELITE_INCLUDE_DIRS}/grape/analytical_apps) - target_link_libraries(run_property_ctx gs_proto) +# add_vineyard_app(run_property_ctx SRCS test/run_property_ctx.cc) +# target_include_directories(run_property_ctx PRIVATE ${LIBGRAPELITE_INCLUDE_DIRS}/grape/analytical_apps) +# target_link_libraries(run_property_ctx gs_proto) - add_vineyard_app(run_pregel_app SRCS test/run_pregel_app.cc) +# add_vineyard_app(run_pregel_app SRCS test/run_pregel_app.cc) - add_vineyard_app(run_string_oid SRCS test/run_string_oid.cc) - target_include_directories(run_string_oid PRIVATE ${LIBGRAPELITE_INCLUDE_DIRS}/grape/analytical_apps) +# add_vineyard_app(run_string_oid SRCS test/run_string_oid.cc) +# target_include_directories(run_string_oid PRIVATE ${LIBGRAPELITE_INCLUDE_DIRS}/grape/analytical_apps) - add_vineyard_app(run_empty_property SRCS test/run_empty_property.cc) - target_include_directories(run_empty_property PRIVATE ${LIBGRAPELITE_INCLUDE_DIRS}/grape/analytical_apps) +# add_vineyard_app(run_empty_property SRCS test/run_empty_property.cc) +# target_include_directories(run_empty_property PRIVATE ${LIBGRAPELITE_INCLUDE_DIRS}/grape/analytical_apps) - add_vineyard_app(test_project_string SRCS test/test_project_string.cc) +# add_vineyard_app(test_project_string SRCS test/test_project_string.cc) - add_vineyard_app(basic_graph_benchmarks SRCS benchmarks/basic_graph_benchmarks.cc) +# add_vineyard_app(basic_graph_benchmarks SRCS benchmarks/basic_graph_benchmarks.cc) - add_vineyard_app(property_graph_loader SRCS benchmarks/property_graph_loader.cc) +# add_vineyard_app(property_graph_loader SRCS benchmarks/property_graph_loader.cc) - add_vineyard_app(property_graph_benchmarks SRCS benchmarks/property_graph_benchmarks.cc) +# add_vineyard_app(property_graph_benchmarks SRCS benchmarks/property_graph_benchmarks.cc) - add_vineyard_app(projected_graph_benchmarks SRCS benchmarks/projected_graph_benchmarks.cc) +# add_vineyard_app(projected_graph_benchmarks SRCS benchmarks/projected_graph_benchmarks.cc) if (NETWORKX) - add_vineyard_app(test_convert SRCS test/test_convert.cc) + # add_vineyard_app(test_convert SRCS test/test_convert.cc) endif () endif () @@ -509,14 +509,14 @@ if(ENABLE_JAVA_SDK) set(GAE_JAVA_JNI_LIB "${GAE_JAVA_DIR}/grape-runtime/target/native/libgrape-jni.so") endif() - add_custom_command( - OUTPUT "${GAE_JAVA_RUNTIME_JAR}" - COMMAND mvn -Drevision=${GRAPHSCOPE_ANALYTICAL_JAR_VERSION} clean install -DskipTests --quiet - DEPENDS gs_proto - WORKING_DIRECTORY ${GAE_JAVA_DIR} - COMMENT "Building GAE-java..." - VERBATIM - ) + # add_custom_command( + # OUTPUT "${GAE_JAVA_RUNTIME_JAR}" + # COMMAND mvn -Drevision=${GRAPHSCOPE_ANALYTICAL_JAR_VERSION} clean install -DskipTests --quiet + # DEPENDS gs_proto + # WORKING_DIRECTORY ${GAE_JAVA_DIR} + # COMMENT "Building GAE-java..." + # VERBATIM + # ) add_custom_target(grape_jni ALL DEPENDS "${GAE_JAVA_RUNTIME_JAR}" ) diff --git a/analytical_engine/java/grape-giraph/src/main/java/com/alibaba/graphscope/utils/ConfigurationUtils.java b/analytical_engine/java/grape-giraph/src/main/java/com/alibaba/graphscope/utils/ConfigurationUtils.java index f3dbd9d36195..fed8f0c22604 100644 --- a/analytical_engine/java/grape-giraph/src/main/java/com/alibaba/graphscope/utils/ConfigurationUtils.java +++ b/analytical_engine/java/grape-giraph/src/main/java/com/alibaba/graphscope/utils/ConfigurationUtils.java @@ -295,6 +295,9 @@ public static boolean checkTypeConsistency( if (grapeTypeClass.equals(Float.class)) { return giraphTypeClass.equals(FloatWritable.class); } + if (grapeTypeClass.equals(String.class)) { + return true; + } logger.error( "Unsupported grape type and giraph type: " + grapeTypeClass.getName() diff --git a/analytical_engine/java/grape-giraph/src/main/java/com/alibaba/graphscope/utils/GrapeTypes.java b/analytical_engine/java/grape-giraph/src/main/java/com/alibaba/graphscope/utils/GrapeTypes.java index b03174c8a35f..baee2fcdf024 100644 --- a/analytical_engine/java/grape-giraph/src/main/java/com/alibaba/graphscope/utils/GrapeTypes.java +++ b/analytical_engine/java/grape-giraph/src/main/java/com/alibaba/graphscope/utils/GrapeTypes.java @@ -80,6 +80,8 @@ private Class cppType2JavaType(String typeString) { return Double.class; } else if (typeString.equals("float")) { return Float.class; + } else if (typeString.equals("std::string")){ + return String.class; } throw new IllegalStateException("Not supported type string" + typeString); } diff --git a/analytical_engine/java/grape-jdk/src/main/java/com/alibaba/graphscope/utils/AppBaseParser.java b/analytical_engine/java/grape-jdk/src/main/java/com/alibaba/graphscope/utils/AppBaseParser.java index 4359195dd250..def06e66842b 100644 --- a/analytical_engine/java/grape-jdk/src/main/java/com/alibaba/graphscope/utils/AppBaseParser.java +++ b/analytical_engine/java/grape-jdk/src/main/java/com/alibaba/graphscope/utils/AppBaseParser.java @@ -183,7 +183,10 @@ private static String writableToJava(String typeName) { return "java.lang.Integer"; } else if (typeName.contains("LongWritable")) { return "java.lang.Long"; - } else throw new IllegalStateException("Not recognized writable " + typeName); + } else { + return "org.apache.hadoop.io.Writable"; + } + // throw new IllegalStateException("Not recognized writable " + typeName); } private static Method getMethod(Class clz) { diff --git a/analytical_engine/test/app_tests.sh b/analytical_engine/test/app_tests.sh index d6717cd26162..3e59da1acecc 100755 --- a/analytical_engine/test/app_tests.sh +++ b/analytical_engine/test/app_tests.sh @@ -399,60 +399,65 @@ pushd "${ENGINE_HOME}"/build get_test_data -for app in "${ldbc_apps[@]}"; do - run ${np} ./run_app --vfile "${test_dir}"/p2p-31.v --efile "${test_dir}"/p2p-31.e --application "${app}" --out_prefix ./test_output --sssp_source=6 --sssp_target=10 --bfs_source=6 - exact_verify "${test_dir}"/property/ldbc/p2p-31-"${app^^}" -done +# for app in "${ldbc_apps[@]}"; do +# run ${np} ./run_app --vfile "${test_dir}"/p2p-31.v --efile "${test_dir}"/p2p-31.e --application "${app}" --out_prefix ./test_output --sssp_source=6 --sssp_target=10 --bfs_source=6 +# exact_verify "${test_dir}"/property/ldbc/p2p-31-"${app^^}" +# done -for app in "${other_apps[@]}"; do - run ${np} ./run_app --vfile "${test_dir}"/p2p-31.v --efile "${test_dir}"/p2p-31.e --application "${app}" --out_prefix ./test_output --sssp_source=6 --sssp_target=10 --bfs_source=6 - exact_verify "${test_dir}"/p2p-31-"${app}" -done +# for app in "${other_apps[@]}"; do +# run ${np} ./run_app --vfile "${test_dir}"/p2p-31.v --efile "${test_dir}"/p2p-31.e --application "${app}" --out_prefix ./test_output --sssp_source=6 --sssp_target=10 --bfs_source=6 +# exact_verify "${test_dir}"/p2p-31-"${app}" +# done -for app in "${apps_with_directed[@]}"; do - run ${np} ./run_app --vfile "${test_dir}"/p2p-31.v --efile "${test_dir}"/p2p-31.e --application "${app}" --out_prefix ./test_output --directed - exact_verify "${test_dir}"/p2p-31-"${app}" -done +# for app in "${apps_with_directed[@]}"; do +# run ${np} ./run_app --vfile "${test_dir}"/p2p-31.v --efile "${test_dir}"/p2p-31.e --application "${app}" --out_prefix ./test_output --directed +# exact_verify "${test_dir}"/p2p-31-"${app}" +# done start_vineyard -run_vy ${np} ./run_vy_app "${socket_file}" 2 "${test_dir}"/new_property/v2_e2/twitter_e 2 "${test_dir}"/new_property/v2_e2/twitter_v 0 -run_vy_2 ${np} ./run_vy_app "${socket_file}" 4 "${test_dir}"/projected_property/twitter_property_e "${test_dir}"/projected_property/twitter_property_v 1 -run_lpa ${np} ./run_vy_app "${socket_file}" 1 "${test_dir}"/property/lpa_dataset/lpa_3000_e 2 "${test_dir}"/property/lpa_dataset/lpa_3000_v 0 1 lpa -run_sampling_path 2 ./run_vy_app "${socket_file}" "${test_dir}"/property/sampling_path 0 1 sampling_path 0-0-1-4-2 +#run_vy ${np} ./run_vy_app "${socket_file}" 2 "${test_dir}"/new_property/v2_e2/twitter_e 2 "${test_dir}"/new_property/v2_e2/twitter_v 0 +#run_vy_2 ${np} ./run_vy_app "${socket_file}" 4 "${test_dir}"/projected_property/twitter_property_e "${test_dir}"/projected_property/twitter_property_v 1 +#run_lpa ${np} ./run_vy_app "${socket_file}" 1 "${test_dir}"/property/lpa_dataset/lpa_3000_e 2 "${test_dir}"/property/lpa_dataset/lpa_3000_v 0 1 lpa +#run_sampling_path 2 ./run_vy_app "${socket_file}" "${test_dir}"/property/sampling_path 0 1 sampling_path 0-0-1-4-2 # local vm -run_vy_2 ${np} ./run_vy_app_local_vm "${socket_file}" 1 "${test_dir}"/property/p2p-31_property_e "${test_dir}"/property/p2p-31_property_v 1 +#run_vy_2 ${np} ./run_vy_app_local_vm "${socket_file}" 1 "${test_dir}"/property/p2p-31_property_e "${test_dir}"/property/p2p-31_property_v 1 # compact edges -run_vy_2 ${np} ./run_vy_app_compact "${socket_file}" 1 "${test_dir}"/property/p2p-31_property_e "${test_dir}"/property/p2p-31_property_v 1 +#run_vy_2 ${np} ./run_vy_app_compact "${socket_file}" 1 "${test_dir}"/property/p2p-31_property_e "${test_dir}"/property/p2p-31_property_v 1 -run_vy ${np} ./run_pregel_app "${socket_file}" 2 "${test_dir}"/new_property/v2_e2/twitter_e 2 "${test_dir}"/new_property/v2_e2/twitter_v -rm -rf ./test_output/* -cp ./outputs_pregel_sssp/* ./test_output -exact_verify "${test_dir}"/twitter-sssp-4 +#run_vy ${np} ./run_pregel_app "${socket_file}" 2 "${test_dir}"/new_property/v2_e2/twitter_e 2 "${test_dir}"/new_property/v2_e2/twitter_v +#rm -rf ./test_output/* +# cp ./outputs_pregel_sssp/* ./test_output +# exact_verify "${test_dir}"/twitter-sssp-4 -run ${np} ./run_pregel_app tc "${test_dir}"/p2p-31.e "${test_dir}"/p2p-31.v ./test_output -exact_verify "${test_dir}/p2p-31"-triangles +# run ${np} ./run_pregel_app tc "${test_dir}"/p2p-31.e "${test_dir}"/p2p-31.v ./test_output +# exact_verify "${test_dir}/p2p-31"-triangles if [[ "${RUN_JAVA_TESTS}" == "ON" ]]; then - run_vy_2 ${np} ./projected_fragment_mapper_test "${socket_file}" 1 "${test_dir}"/projected_property/twitter_property_e "${test_dir}"/projected_property/twitter_property_v + # run_vy_2 ${np} ./projected_fragment_mapper_test "${socket_file}" 1 "${test_dir}"/projected_property/twitter_property_e "${test_dir}"/projected_property/twitter_property_v if [[ "${USER_JAR_PATH}"x != ""x ]] then echo "Running Java tests..." - run_vy_2 ${np} ./run_java_app "${socket_file}" 1 "${test_dir}"/projected_property/twitter_property_e "${test_dir}"/projected_property/twitter_property_v 1 0 1 com.alibaba.graphscope.example.bfs.BFS - GLOG_v=10 ./run_java_string_app /tmp/vineyard.sock \ - 1 "${test_dir}/projected_property/twitter_property_e_0#header_row=True#src_label=v&dst_label=v&label=e&include_all_columns=true&column_types=int64_t,int64_t,int32_t,int32_t,std::string" \ - 1 "${test_dir}/projected_property/twitter_property_v_0#header_row=True#label=v&include_all_columns=true&column_types=int64_t,std::string" \ - com.alibaba.graphscope.example.stringApp.StringApp + # run_vy_2 ${np} ./run_java_app "${socket_file}" 1 "${test_dir}"/projected_property/twitter_property_e "${test_dir}"/projected_property/twitter_property_v 1 0 1 com.alibaba.graphscope.example.bfs.BFS + # GLOG_v=10 ./run_java_string_app /tmp/vineyard.sock \ + # 1 "${test_dir}/projected_property/twitter_property_e_0#header_row=True#src_label=v&dst_label=v&label=e&include_all_columns=true&column_types=int64_t,int64_t,int32_t,int32_t,std::string" \ + # 1 "${test_dir}/projected_property/twitter_property_v_0#header_row=True#label=v&include_all_columns=true&column_types=int64_t,std::string" \ + # com.alibaba.graphscope.example.stringApp.StringApp echo "Running girpah tests..." - ./giraph_runner --vertex_input_format_class giraph:com.alibaba.graphscope.example.giraph.format.P2PVertexInputFormat \ - --edge_input_format_class giraph:com.alibaba.graphscope.example.giraph.format.P2PEdgeInputFormat --vfile "${test_dir}"/p2p-31.v \ + # GLOG_v=10 ./giraph_runner --vertex_input_format_class giraph:com.alibaba.graphscope.example.giraph.format.P2PVertexInputFormat \ + # --edge_input_format_class giraph:com.alibaba.graphscope.example.giraph.format.P2PEdgeInputFormat --vfile "${test_dir}"/p2p-31.v \ + # --efile "${test_dir}"/p2p-31.e --ipc_socket /tmp/vineyard.sock --lib_path /opt/graphscope/lib/libgrape-jni.so \ + # --user_app_class com.alibaba.graphscope.example.giraph.SSSP + + GLOG_v=10 ./giraph_runner --vertex_input_format_class giraph:com.alibaba.graphscope.example.giraph.format.P2PVertexMultipleLongInputFormat \ + --edge_input_format_class giraph:com.alibaba.graphscope.example.giraph.format.P2PEdgeMultipleLongInputFormat --vfile "${test_dir}"/p2p-31.v \ --efile "${test_dir}"/p2p-31.e --ipc_socket /tmp/vineyard.sock --lib_path /opt/graphscope/lib/libgrape-jni.so \ - --user_app_class com.alibaba.graphscope.example.giraph.SSSP + --user_app_class com.alibaba.graphscope.example.giraph.MessageAppWithUserWritable fi fi diff --git a/analytical_engine/test/giraph_runner.h b/analytical_engine/test/giraph_runner.h index 9a10f2f0c6de..5633aa2eed88 100644 --- a/analytical_engine/test/giraph_runner.h +++ b/analytical_engine/test/giraph_runner.h @@ -39,6 +39,10 @@ limitations under the License. #include "core/io/property_parser.h" #include "core/java/utils.h" #include "core/loader/arrow_fragment_loader.h" +#include "vineyard/common/util/json.h" + +#include +#include namespace bl = boost::leaf; @@ -46,12 +50,9 @@ namespace gs { using FragmentType = vineyard::ArrowFragment; -using ProjectedFragmentType = - ArrowProjectedFragment; using FragmentLoaderType = ArrowFragmentLoader; -using APP_TYPE = JavaPIEProjectedDefaultApp; // using LOADER_TYPE = grape::GiraphFragmentLoader; void Init(const std::string& params) { @@ -63,6 +64,46 @@ void Init(const std::string& params) { } } +std::pair parse_property_type( + const vineyard::ObjectMeta& metadata) { + vineyard::json json; + metadata.GetKeyValue("schema_json_", json); + LOG(INFO) << "schema_json_: " << json; + std::string vertex_type_name, edge_type_name; + + if (json.contains("types")) { + auto types = json["types"]; + if (types.size() == 2) { + for (auto type : types) { + if (type["label"] == "vertex_label") { + if (type.contains("propertyDefList")) { + auto properties = type["propertyDefList"]; + CHECK(properties.size() == 1); + auto data_type = properties[0]["data_type"]; + vertex_type_name = data_type.get(); + } else { + LOG(FATAL) << "No propertyDefList found in schema"; + } + } else if (type["label"] == "edge_label") { + if (type.contains("propertyDefList")) { + auto properties = type["propertyDefList"]; + CHECK(properties.size() == 1); + auto data_type = properties[0]["data_type"]; + edge_type_name = data_type.get(); + } else { + LOG(FATAL) << "No propertyDefList found in schema"; + } + } else { + LOG(FATAL) << "Unknown type label"; + } + } + } + } else { + LOG(FATAL) << "No types found in schema"; + } + return std::make_pair(vertex_type_name, edge_type_name); +} + vineyard::ObjectID LoadGiraphFragment( const grape::CommSpec& comm_spec, const std::string& vfile, const std::string& efile, const std::string& vertex_input_format_class, @@ -75,7 +116,7 @@ vineyard::ObjectID LoadGiraphFragment( graph->retain_oid = false; auto vertex = std::make_shared(); - vertex->label = "label1"; + vertex->label = "vertex_label"; vertex->vid = "0"; vertex->protocol = "file"; vertex->values = vfile; @@ -84,11 +125,11 @@ vineyard::ObjectID LoadGiraphFragment( graph->vertices.push_back(vertex); auto edge = std::make_shared(); - edge->label = "label2"; + edge->label = "edge_label"; auto subLabel = std::make_shared(); - subLabel->src_label = "label1"; + subLabel->src_label = "vertex_label"; subLabel->src_vid = "0"; - subLabel->dst_label = "label1"; + subLabel->dst_label = "vertex_label"; subLabel->dst_vid = "0"; subLabel->protocol = "file"; subLabel->values = efile; @@ -129,6 +170,7 @@ void Query(grape::CommSpec& comm_spec, std::shared_ptr fragment, int query_times) { std::vector query_time(query_times, 0.0); double total_time = 0.0; + using APP_TYPE = JavaPIEProjectedDefaultApp; for (auto i = 0; i < query_times; ++i) { auto app = std::make_shared(); @@ -169,6 +211,20 @@ void Query(grape::CommSpec& comm_spec, std::shared_ptr fragment, VLOG(1) << "Separate time: " << oss.str(); } +template +void ProjectAndQuery(grape::CommSpec& comm_spec, + std::shared_ptr fragment, + const std::string& frag_name, + const std::string& new_params, + const std::string& user_lib_path, int query_times) { + // Project + std::shared_ptr projected_fragment = + ProjectedFragmentType::Project(fragment, 0, 0, 0, 0); + + Query(comm_spec, projected_fragment, new_params, + user_lib_path, query_times); +} + void CreateAndQuery(std::string params) { grape::CommSpec comm_spec; comm_spec.Init(MPI_COMM_WORLD); @@ -221,18 +277,23 @@ void CreateAndQuery(std::string params) { } int query_times = getFromPtree(pt, OPTION_QUERY_TIMES); + vineyard::ObjectMeta metadata; + VINEYARD_CHECK_OK(client.GetMetaData(fragment_id, metadata)); + LOG(INFO) << "got metadata: " << metadata.ToString(); + + // chose different type according to the schema + std::string vertex_data_type, edge_data_type; + std::tie(vertex_data_type, edge_data_type) = parse_property_type(metadata); + std::shared_ptr fragment = std::dynamic_pointer_cast(client.GetObject(fragment_id)); - VLOG(10) << "fid: " << fragment->fid() << "fnum: " << fragment->fnum() << "v label num: " << fragment->vertex_label_num() << "e label num: " << fragment->edge_label_num() << "total v num: " << fragment->GetTotalVerticesNum(); VLOG(1) << "inner vertices: " << fragment->GetInnerVerticesNum(0); - - std::string frag_name = - "gs::ArrowProjectedFragment"; - pt.put("frag_name", frag_name); + VLOG(1) << "vertex_data_type: " << vertex_data_type + << " edge_data_type: " << edge_data_type; std::string jar_name; if (getenv("USER_JAR_PATH")) { @@ -249,19 +310,31 @@ void CreateAndQuery(std::string params) { return; } pt.put("jar_name", jar_name); - - std::stringstream ss; - boost::property_tree::json_parser::write_json(ss, pt); - std::string new_params = ss.str(); - std::string user_lib_path = getFromPtree(pt, OPTION_LIB_PATH); - // Project - std::shared_ptr projected_fragment = - ProjectedFragmentType::Project(fragment, 0, 0, 0, 0); - - Query(comm_spec, projected_fragment, new_params, - user_lib_path, query_times); + if (vertex_data_type == "STRING" && edge_data_type == "STRING") { + std::string frag_name = + "gs::ArrowProjectedFragment"; + pt.put("frag_name", frag_name); + std::stringstream ss; + boost::property_tree::json_parser::write_json(ss, pt); + std::string new_params = ss.str(); + using ProjectedFragmentType = + ArrowProjectedFragment; + ProjectAndQuery( + comm_spec, fragment, frag_name, new_params, user_lib_path, query_times); + } else { + std::string frag_name = + "gs::ArrowProjectedFragment"; + pt.put("frag_name", frag_name); + std::stringstream ss; + boost::property_tree::json_parser::write_json(ss, pt); + std::string new_params = ss.str(); + using ProjectedFragmentType = + ArrowProjectedFragment; + ProjectAndQuery( + comm_spec, fragment, frag_name, new_params, user_lib_path, query_times); + } } void Finalize() { grape::FinalizeMPIComm(); From e34441964fdf44dcdacfe67983a234fb9b426b3d Mon Sep 17 00:00:00 2001 From: "xiaolei.zl" Date: Mon, 3 Jun 2024 17:52:55 +0800 Subject: [PATCH 03/52] todo: resolve edata Committed-by: xiaolei.zl from Dev container --- analytical_engine/CMakeLists.txt | 48 +++++++++---------- .../graph/impl/VertexDataManagerImpl.java | 11 ++++- .../graphscope/utils/ConfigurationUtils.java | 3 +- .../alibaba/graphscope/utils/GrapeTypes.java | 3 +- .../graphscope/graph/AbstractEdgeManager.java | 17 +++++++ .../FFIByteVectorOutputStream.java | 11 +++++ .../graphscope/utils/AppBaseParser.java | 2 +- .../utils/FFITypeFactoryhelper.java | 7 ++- .../graphscope/utils/JavaClassName.java | 2 + .../alibaba/graphscope/utils/TypeUtils.java | 7 ++- .../utils/array/PrimitiveArray.java | 3 +- .../annotation/AnnotationInvoker.java | 21 ++++++++ analytical_engine/java/install.sh | 3 ++ 13 files changed, 106 insertions(+), 32 deletions(-) create mode 100644 analytical_engine/java/install.sh diff --git a/analytical_engine/CMakeLists.txt b/analytical_engine/CMakeLists.txt index 3b07d3d9f244..525c99eac094 100644 --- a/analytical_engine/CMakeLists.txt +++ b/analytical_engine/CMakeLists.txt @@ -373,17 +373,17 @@ if (BUILD_TESTS) #target_link_libraries(run_app OpenMP::OpenMP_CXX) if (ENABLE_JAVA_SDK) - add_executable(run_java_app test/run_java_app.cc core/java/javasdk.cc core/object/dynamic.cc) - target_include_directories(run_java_app PRIVATE core utils apps) - target_compile_definitions(run_java_app PUBLIC ENABLE_JAVA_SDK) - target_link_libraries(run_java_app gs_proto ${GFLAGS_LIBRARIES} ${MPI_CXX_LIBRARIES} ${JNI_LIBRARIES} ${Boost_LIBRARIES} - ${CMAKE_DL_LIBS} ${GLOG_LIBRARIES} ${VINEYARD_LIBRARIES}) + # add_executable(run_java_app test/run_java_app.cc core/java/javasdk.cc core/object/dynamic.cc) + # target_include_directories(run_java_app PRIVATE core utils apps) + # target_compile_definitions(run_java_app PUBLIC ENABLE_JAVA_SDK) + # target_link_libraries(run_java_app gs_proto ${GFLAGS_LIBRARIES} ${MPI_CXX_LIBRARIES} ${JNI_LIBRARIES} ${Boost_LIBRARIES} + # ${CMAKE_DL_LIBS} ${GLOG_LIBRARIES} ${VINEYARD_LIBRARIES}) # java app benchmark - add_executable(property_graph_java_app_benchmarks benchmarks/property_graph_java_app_benchmarks.cc core/java/javasdk.cc core/object/dynamic.cc) - target_include_directories(property_graph_java_app_benchmarks PRIVATE core utils apps) - target_compile_definitions(property_graph_java_app_benchmarks PUBLIC ENABLE_JAVA_SDK) - target_link_libraries(property_graph_java_app_benchmarks gs_proto ${JNI_LIBRARIES} ${Boost_LIBRARIES} ${CMAKE_DL_LIBS} ${VINEYARD_LIBRARIES}) + # add_executable(property_graph_java_app_benchmarks benchmarks/property_graph_java_app_benchmarks.cc core/java/javasdk.cc core/object/dynamic.cc) + # target_include_directories(property_graph_java_app_benchmarks PRIVATE core utils apps) + # target_compile_definitions(property_graph_java_app_benchmarks PUBLIC ENABLE_JAVA_SDK) + # target_link_libraries(property_graph_java_app_benchmarks gs_proto ${JNI_LIBRARIES} ${Boost_LIBRARIES} ${CMAKE_DL_LIBS} ${VINEYARD_LIBRARIES}) # giraph runner add_executable(giraph_runner test/giraph_runner.cc core/java/javasdk.cc) @@ -392,25 +392,25 @@ if (BUILD_TESTS) target_link_libraries(giraph_runner ${CMAKE_DL_LIBS} gs_proto ${VINEYARD_LIBRARIES} ${Boost_LIBRARIES} ${GFLAGS_LIBRARIES} ${JNI_LIBRARIES}) # graphx related test - add_executable(projected_fragment_mapper_test test/projected_fragment_mapper_test.cc) - target_include_directories(projected_fragment_mapper_test PRIVATE core utils apps) - target_link_libraries(projected_fragment_mapper_test ${CMAKE_DL_LIBS} ${VINEYARD_LIBRARIES} ${Boost_LIBRARIES} ${GLOG_LIBRARIES} ${GFLAGS_LIBRARIES}) + # add_executable(projected_fragment_mapper_test test/projected_fragment_mapper_test.cc) + # target_include_directories(projected_fragment_mapper_test PRIVATE core utils apps) + # target_link_libraries(projected_fragment_mapper_test ${CMAKE_DL_LIBS} ${VINEYARD_LIBRARIES} ${Boost_LIBRARIES} ${GLOG_LIBRARIES} ${GFLAGS_LIBRARIES}) - add_executable(graphx_loader_test test/graphx_loader_test.cc) - target_include_directories(graphx_loader_test PRIVATE core utils apps) - target_compile_definitions(graphx_loader_test PUBLIC ENABLE_JAVA_SDK) - target_link_libraries(graphx_loader_test ${CMAKE_DL_LIBS} ${VINEYARD_LIBRARIES} ${Boost_LIBRARIES} ${GLOG_LIBRARIES} ${GFLAGS_LIBRARIES}) + # add_executable(graphx_loader_test test/graphx_loader_test.cc) + # target_include_directories(graphx_loader_test PRIVATE core utils apps) + # target_compile_definitions(graphx_loader_test PUBLIC ENABLE_JAVA_SDK) + # target_link_libraries(graphx_loader_test ${CMAKE_DL_LIBS} ${VINEYARD_LIBRARIES} ${Boost_LIBRARIES} ${GLOG_LIBRARIES} ${GFLAGS_LIBRARIES}) - add_executable(run_java_string_app test/run_java_string_app.cc core/java/javasdk.cc) - target_include_directories(run_java_string_app PRIVATE core utils apps) - target_compile_definitions(run_java_string_app PUBLIC ENABLE_JAVA_SDK) - target_link_libraries(run_java_string_app ${CMAKE_DL_LIBS} ${VINEYARD_LIBRARIES} ${Boost_LIBRARIES} ${GLOG_LIBRARIES} ${GFLAGS_LIBRARIES} ${JNI_LIBRARIES}) + # add_executable(run_java_string_app test/run_java_string_app.cc core/java/javasdk.cc) + # target_include_directories(run_java_string_app PRIVATE core utils apps) + # target_compile_definitions(run_java_string_app PUBLIC ENABLE_JAVA_SDK) + # target_link_libraries(run_java_string_app ${CMAKE_DL_LIBS} ${VINEYARD_LIBRARIES} ${Boost_LIBRARIES} ${GLOG_LIBRARIES} ${GFLAGS_LIBRARIES} ${JNI_LIBRARIES}) if (${LIBUNWIND_FOUND}) - target_link_libraries(run_java_app ${LIBUNWIND_LIBRARIES}) - target_link_libraries(property_graph_java_app_benchmarks ${LIBUNWIND_LIBRARIES}) - target_link_libraries(giraph_runner ${LIBUNWIND_LIBRARIES}) - target_link_libraries(projected_fragment_mapper_test ${LIBUNWIND_LIBRARIES}) + # target_link_libraries(run_java_app ${LIBUNWIND_LIBRARIES}) + # target_link_libraries(property_graph_java_app_benchmarks ${LIBUNWIND_LIBRARIES}) + # target_link_libraries(giraph_runner ${LIBUNWIND_LIBRARIES}) + # target_link_libraries(projected_fragment_mapper_test ${LIBUNWIND_LIBRARIES}) endif () endif() diff --git a/analytical_engine/java/grape-giraph/src/main/java/com/alibaba/graphscope/graph/impl/VertexDataManagerImpl.java b/analytical_engine/java/grape-giraph/src/main/java/com/alibaba/graphscope/graph/impl/VertexDataManagerImpl.java index 0b64dd0cb404..4a16aa4d16ee 100644 --- a/analytical_engine/java/grape-giraph/src/main/java/com/alibaba/graphscope/graph/impl/VertexDataManagerImpl.java +++ b/analytical_engine/java/grape-giraph/src/main/java/com/alibaba/graphscope/graph/impl/VertexDataManagerImpl.java @@ -20,6 +20,7 @@ import com.alibaba.graphscope.graph.VertexDataManager; import com.alibaba.graphscope.serialization.FFIByteVectorInputStream; import com.alibaba.graphscope.serialization.FFIByteVectorOutputStream; +import com.alibaba.graphscope.ds.StringView; import org.apache.giraph.conf.ImmutableClassesGiraphConfiguration; import org.apache.hadoop.io.Writable; @@ -143,8 +144,14 @@ private void readVertexDataFromIFragment(FFIByteVectorOutputStream outputStream) String value = (String) fragment.getData(vertex); outputStream.writeBytes(value); } - } else { - logger.error("Unsupported oid class: " + conf.getGrapeOidClass().getName()); + } else if (conf.getGrapeVdataClass().equals(StringView.class)){ + for (Vertex vertex : iterable) { + StringView value = (StringView) fragment.getData(vertex); + outputStream.writeBytes(value); + } + } + else { + logger.error("Unsupported vdata class: " + conf.getGrapeVdataClass().getName()); } // else if (conf.getGrapeVdataClass().equals the userDefined class... outputStream.finishSetting(); diff --git a/analytical_engine/java/grape-giraph/src/main/java/com/alibaba/graphscope/utils/ConfigurationUtils.java b/analytical_engine/java/grape-giraph/src/main/java/com/alibaba/graphscope/utils/ConfigurationUtils.java index fed8f0c22604..a30714e2c54a 100644 --- a/analytical_engine/java/grape-giraph/src/main/java/com/alibaba/graphscope/utils/ConfigurationUtils.java +++ b/analytical_engine/java/grape-giraph/src/main/java/com/alibaba/graphscope/utils/ConfigurationUtils.java @@ -21,6 +21,7 @@ import com.alibaba.fastjson.JSONObject; import com.alibaba.graphscope.fragment.IFragment; +import com.alibaba.graphscope.ds.StringView; import org.apache.giraph.combiner.MessageCombiner; import org.apache.giraph.conf.GiraphConfiguration; @@ -295,7 +296,7 @@ public static boolean checkTypeConsistency( if (grapeTypeClass.equals(Float.class)) { return giraphTypeClass.equals(FloatWritable.class); } - if (grapeTypeClass.equals(String.class)) { + if (grapeTypeClass.equals(String.class) || grapeTypeClass.equals(StringView.class)) { return true; } logger.error( diff --git a/analytical_engine/java/grape-giraph/src/main/java/com/alibaba/graphscope/utils/GrapeTypes.java b/analytical_engine/java/grape-giraph/src/main/java/com/alibaba/graphscope/utils/GrapeTypes.java index baee2fcdf024..0d307b6da992 100644 --- a/analytical_engine/java/grape-giraph/src/main/java/com/alibaba/graphscope/utils/GrapeTypes.java +++ b/analytical_engine/java/grape-giraph/src/main/java/com/alibaba/graphscope/utils/GrapeTypes.java @@ -20,6 +20,7 @@ import com.alibaba.graphscope.fragment.ImmutableEdgecutFragment; import com.alibaba.graphscope.fragment.adaptor.ArrowProjectedAdaptor; import com.alibaba.graphscope.fragment.adaptor.ImmutableEdgecutFragmentAdaptor; +import com.alibaba.graphscope.ds.StringView; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -81,7 +82,7 @@ private Class cppType2JavaType(String typeString) { } else if (typeString.equals("float")) { return Float.class; } else if (typeString.equals("std::string")){ - return String.class; + return StringView.class; } throw new IllegalStateException("Not supported type string" + typeString); } diff --git a/analytical_engine/java/grape-jdk/src/main/java/com/alibaba/graphscope/graph/AbstractEdgeManager.java b/analytical_engine/java/grape-jdk/src/main/java/com/alibaba/graphscope/graph/AbstractEdgeManager.java index 91f330f2b12f..96358673a9db 100644 --- a/analytical_engine/java/grape-jdk/src/main/java/com/alibaba/graphscope/graph/AbstractEdgeManager.java +++ b/analytical_engine/java/grape-jdk/src/main/java/com/alibaba/graphscope/graph/AbstractEdgeManager.java @@ -30,6 +30,7 @@ import com.alibaba.graphscope.utils.LongPointerAccessor; import com.alibaba.graphscope.utils.array.PrimitiveArray; import com.google.common.collect.Lists; +import com.alibaba.graphscope.ds.StringView; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -215,6 +216,7 @@ public CSRHolder( // totalNumOfEdges); // dstLids = (VID_T[]) Array.newInstance(vidClass, (int) totalNumOfEdges); // dstLids = (VID_T[]) new Object[(int) totalNumOfEdges]; + logger.info("edatas class: {}", bizEdataClass.getClass().getName()); edatas = PrimitiveArray.create(bizEdataClass, (int) totalNumOfEdges); dstOids = PrimitiveArray.create(bizOidClass, (int) totalNumOfEdges); dstLids = PrimitiveArray.create(vidClass, (int) totalNumOfEdges); @@ -367,6 +369,10 @@ private int grapeEdata2Int() { logger.info("edata: String"); return 4; } + else if (edataClass.equals(StringView.class)) { + logger.info("edata: StringView"); + return 5; + } throw new IllegalStateException("Cannot recognize edata type " + edataClass); } @@ -434,6 +440,17 @@ private FFIByteVector generateEdataString( curAddr += nbrUnitEleSize; } } + case 5: + for (int lid = 0; lid < innerVerticesNum; ++lid) { + long curAddr = nbrUnitAddrs[lid]; + for (int j = 0; j < numOfEdges[lid]; ++j) { + long eid = JavaRuntime.getLong(curAddr + VID_SIZE_IN_BYTE); + GRAPE_ED_T edata = edataArray.get(eid); + StringView longValue = (StringView) edata; + outputStream.writeBytes(longValue); + curAddr += nbrUnitEleSize; + } + } break; default: throw new IllegalStateException("Unexpected edata type: " + edata_t); diff --git a/analytical_engine/java/grape-jdk/src/main/java/com/alibaba/graphscope/serialization/FFIByteVectorOutputStream.java b/analytical_engine/java/grape-jdk/src/main/java/com/alibaba/graphscope/serialization/FFIByteVectorOutputStream.java index c42b89e792fa..69613c61ada3 100644 --- a/analytical_engine/java/grape-jdk/src/main/java/com/alibaba/graphscope/serialization/FFIByteVectorOutputStream.java +++ b/analytical_engine/java/grape-jdk/src/main/java/com/alibaba/graphscope/serialization/FFIByteVectorOutputStream.java @@ -17,6 +17,7 @@ import com.alibaba.graphscope.stdcxx.FFIByteVector; import com.alibaba.graphscope.stdcxx.FFIByteVectorFactory; +import com.alibaba.graphscope.ds.StringView; import java.io.DataOutput; import java.io.IOException; @@ -274,6 +275,16 @@ public void writeBytes(String s) throws IOException { offset += len; } + public void writeBytes(StringView s) throws IOException { + int len = (int) s.size(); + vector.ensure(offset, len); + for (int i = 0; i < len; i++) { + // UnsafeHolder.U.putByte(newBase + i, (byte) data.charAt(i)); + vector.setRawByte(offset + i, (byte) s.byteAt(i)); + } + offset += len; + } + /** * Writes every character in the string s, to the output stream, in order, two * bytes per character. If s is null, a NullPointerException diff --git a/analytical_engine/java/grape-jdk/src/main/java/com/alibaba/graphscope/utils/AppBaseParser.java b/analytical_engine/java/grape-jdk/src/main/java/com/alibaba/graphscope/utils/AppBaseParser.java index def06e66842b..2596f34fef98 100644 --- a/analytical_engine/java/grape-jdk/src/main/java/com/alibaba/graphscope/utils/AppBaseParser.java +++ b/analytical_engine/java/grape-jdk/src/main/java/com/alibaba/graphscope/utils/AppBaseParser.java @@ -184,7 +184,7 @@ private static String writableToJava(String typeName) { } else if (typeName.contains("LongWritable")) { return "java.lang.Long"; } else { - return "org.apache.hadoop.io.Writable"; + return "com.alibaba.graphscope.ds.StringView"; } // throw new IllegalStateException("Not recognized writable " + typeName); } diff --git a/analytical_engine/java/grape-jdk/src/main/java/com/alibaba/graphscope/utils/FFITypeFactoryhelper.java b/analytical_engine/java/grape-jdk/src/main/java/com/alibaba/graphscope/utils/FFITypeFactoryhelper.java index e881d6eec44d..edfb4cfbcdfc 100644 --- a/analytical_engine/java/grape-jdk/src/main/java/com/alibaba/graphscope/utils/FFITypeFactoryhelper.java +++ b/analytical_engine/java/grape-jdk/src/main/java/com/alibaba/graphscope/utils/FFITypeFactoryhelper.java @@ -26,6 +26,7 @@ import com.alibaba.fastffi.FFIPointer; import com.alibaba.fastffi.FFITypeFactory; import com.alibaba.fastffi.FFIVector; +import com.alibaba.graphscope.ds.StringView; import com.alibaba.fastffi.impl.CXXStdVector; import com.alibaba.graphscope.arrow.array.PrimitiveArrowArrayBuilder; import com.alibaba.graphscope.ds.DenseVertexSet; @@ -80,8 +81,12 @@ public static String javaType2CppType(Class clz) { return "int32_t"; } else if (clz.getName() == Double.class.getName()) { return "double"; + } else if (clz.getName() == String.class.getName()) { + return "std::string"; + } else if (clz.getName() == StringView.class.getName()){ + return "std::string"; } else { - logger.error("Must be one of long, double, integer"); + logger.error("Must be one of long, double, integer, but got: " + clz.getName()); return "null"; } } diff --git a/analytical_engine/java/grape-jdk/src/main/java/com/alibaba/graphscope/utils/JavaClassName.java b/analytical_engine/java/grape-jdk/src/main/java/com/alibaba/graphscope/utils/JavaClassName.java index 803cfdc0a25a..638255a8197b 100644 --- a/analytical_engine/java/grape-jdk/src/main/java/com/alibaba/graphscope/utils/JavaClassName.java +++ b/analytical_engine/java/grape-jdk/src/main/java/com/alibaba/graphscope/utils/JavaClassName.java @@ -17,4 +17,6 @@ public class JavaClassName { public static final String JAVA_FFI_BYTE_STRING = "com.alibaba.fastffi.FFIByteString"; public static final String STRING_VIEW = "com.alibaba.graphscope.ds.StringView"; + + public static final String STD_STRING = "com.alibaba.graphscope.stdcxx.StdString"; } diff --git a/analytical_engine/java/grape-jdk/src/main/java/com/alibaba/graphscope/utils/TypeUtils.java b/analytical_engine/java/grape-jdk/src/main/java/com/alibaba/graphscope/utils/TypeUtils.java index 2156c6814273..0a353628f6e6 100644 --- a/analytical_engine/java/grape-jdk/src/main/java/com/alibaba/graphscope/utils/TypeUtils.java +++ b/analytical_engine/java/grape-jdk/src/main/java/com/alibaba/graphscope/utils/TypeUtils.java @@ -1,5 +1,7 @@ package com.alibaba.graphscope.utils; +import com.alibaba.graphscope.ds.StringView; + public class TypeUtils { public static boolean isPrimitive(Class javaClass) { @@ -29,7 +31,10 @@ public static String primitiveClass2CppStr(Class javaClass, boolean sign) { return "double"; } else if (javaClass.equals(Float.class) || javaClass.equals(float.class)) { return "float"; - } else { + } else if (javaClass.equals(StringView.class)){ + return "std::string"; + } + else { throw new IllegalStateException("Not recognized class " + javaClass.getName()); } } diff --git a/analytical_engine/java/grape-jdk/src/main/java/com/alibaba/graphscope/utils/array/PrimitiveArray.java b/analytical_engine/java/grape-jdk/src/main/java/com/alibaba/graphscope/utils/array/PrimitiveArray.java index 471036d4eaa9..4f5dfc6ecf05 100644 --- a/analytical_engine/java/grape-jdk/src/main/java/com/alibaba/graphscope/utils/array/PrimitiveArray.java +++ b/analytical_engine/java/grape-jdk/src/main/java/com/alibaba/graphscope/utils/array/PrimitiveArray.java @@ -48,7 +48,8 @@ static PrimitiveArray create(Class clz, int len) { return (PrimitiveArray) new LongArray(len); } else if (clz.equals(int.class) || clz.equals(Integer.class)) { return (PrimitiveArray) new IntArray(len); - } else { + } + else { return (PrimitiveArray) new ObjectArray(clz, len); } } diff --git a/analytical_engine/java/grape-runtime/src/main/java/com/alibaba/graphscope/annotation/AnnotationInvoker.java b/analytical_engine/java/grape-runtime/src/main/java/com/alibaba/graphscope/annotation/AnnotationInvoker.java index 2f2d5bc22dfe..39c96421fe36 100644 --- a/analytical_engine/java/grape-runtime/src/main/java/com/alibaba/graphscope/annotation/AnnotationInvoker.java +++ b/analytical_engine/java/grape-runtime/src/main/java/com/alibaba/graphscope/annotation/AnnotationInvoker.java @@ -32,6 +32,7 @@ import static com.alibaba.graphscope.utils.JavaClassName.LONG; import static com.alibaba.graphscope.utils.JavaClassName.STRING; import static com.alibaba.graphscope.utils.JavaClassName.STRING_VIEW; +import static com.alibaba.graphscope.utils.JavaClassName.STD_STRING; import com.alibaba.fastffi.CXXHead; import com.alibaba.fastffi.CXXTemplate; @@ -142,6 +143,7 @@ @CXXTemplate(cxx = "int64_t", java = "Long"), @CXXTemplate(cxx = "double", java = "Double"), @CXXTemplate(cxx = "int32_t", java = "Integer"), + @CXXTemplate(cxx = "std::string", java = STD_STRING) }), @FFIGen( type = "com.alibaba.graphscope.ds.VertexRange", @@ -1163,6 +1165,25 @@ + ">", "Integer" }), + @CXXTemplate( + cxx = { + CPP_ARROW_PROJECTED_FRAGMENT + + "", + "std::string" + }, + java = { + JAVA_ARROW_PROJECTED_FRAGMENT + + "<" + + LONG + + "," + + LONG + + "," + + STRING_VIEW + + "," + + STRING_VIEW + + ">", + STD_STRING + }), }), @FFIGen( type = "com.alibaba.graphscope.parallel.DefaultMessageManager", diff --git a/analytical_engine/java/install.sh b/analytical_engine/java/install.sh new file mode 100644 index 000000000000..059dd5b7ab64 --- /dev/null +++ b/analytical_engine/java/install.sh @@ -0,0 +1,3 @@ +cp grape-runtime/target/native/libgrape-jni.so /opt/graphscope/lib +cp grape-runtime/target/grape-runtime-0.28.0-shaded.jar /opt/graphscope/lib +cp grape-giraph/target/grape-giraph-0.28.0-shaded.jar /opt/graphscope/lib From 542edd340dde16ba199a62a62e6ba01317d41be6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E9=9C=84=E7=A3=8A?= Date: Mon, 3 Jun 2024 18:07:08 +0800 Subject: [PATCH 04/52] use baseTypedArray --- .../graphscope/graph/AbstractEdgeManager.java | 51 ++++++++++++------- 1 file changed, 34 insertions(+), 17 deletions(-) diff --git a/analytical_engine/java/grape-jdk/src/main/java/com/alibaba/graphscope/graph/AbstractEdgeManager.java b/analytical_engine/java/grape-jdk/src/main/java/com/alibaba/graphscope/graph/AbstractEdgeManager.java index 96358673a9db..09577f2116c3 100644 --- a/analytical_engine/java/grape-jdk/src/main/java/com/alibaba/graphscope/graph/AbstractEdgeManager.java +++ b/analytical_engine/java/grape-jdk/src/main/java/com/alibaba/graphscope/graph/AbstractEdgeManager.java @@ -17,9 +17,7 @@ package com.alibaba.graphscope.graph; import com.alibaba.fastffi.llvm4jni.runtime.JavaRuntime; -import com.alibaba.graphscope.ds.PrimitiveTypedArray; -import com.alibaba.graphscope.ds.PropertyNbrUnit; -import com.alibaba.graphscope.ds.Vertex; +import com.alibaba.graphscope.ds.*; import com.alibaba.graphscope.fragment.ArrowProjectedFragment; import com.alibaba.graphscope.fragment.IFragment; import com.alibaba.graphscope.fragment.adaptor.ArrowProjectedAdaptor; @@ -30,7 +28,6 @@ import com.alibaba.graphscope.utils.LongPointerAccessor; import com.alibaba.graphscope.utils.array.PrimitiveArray; import com.google.common.collect.Lists; -import com.alibaba.graphscope.ds.StringView; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -95,7 +92,7 @@ public void init( } this.vidClass = vidClass; edata_t = grapeEdata2Int(); - PrimitiveTypedArray newTypedArray = + BaseTypedArray newTypedArray = FFITypeFactoryhelper.newPrimitiveTypedArray(edataClass); newTypedArray.setAddress(this.fragment.getEdataArrayAccessor().getAddress()); csrHolder = new CSRHolder(newTypedArray, consumer); @@ -200,7 +197,7 @@ public class CSRHolder { private BiConsumer> consumer; public CSRHolder( - PrimitiveTypedArray edataArray, + BaseTypedArray edataArray, BiConsumer> consumer) { this.consumer = consumer; totalNumOfEdges = getTotalNumOfEdges(); @@ -235,7 +232,7 @@ private long getTotalNumOfEdges() { return largest - smallest; } - private void initArrays(PrimitiveTypedArray edataArray) throws IOException { + private void initArrays(BaseTypedArray edataArray) throws IOException { int tmpSum = 0; long oeBeginOffset, oeEndOffset; for (long lid = 0; lid < innerVerticesNum; ++lid) { @@ -271,17 +268,19 @@ private void initArrays(PrimitiveTypedArray edataArray) throws IOExc fillInEdataArray(edataArray); } - private void fillInEdataArray(PrimitiveTypedArray edataArray) + private void fillInEdataArray(BaseTypedArray edataArray) throws IOException { // first try to set directly. int index = 0; if (bizEdataClass.equals(edataClass)) { - logger.info("biz edata {} == grape edata, try to read direct", edata_t); + logger.info("biz edata {} == grape edata, try to read direct, biz edata class {}, edata class {}", edata_t, bizEdataClass, edataClass); + PrimitiveTypedArray primitiveTypedArray = FFITypeFactoryhelper.newPrimitiveTypedArray(bizEdataClass); + primitiveTypedArray.setAddress(edataArray.getAddress()); for (int lid = 0; lid < innerVerticesNum; ++lid) { long curAddr = nbrUnitAddrs[lid] + VID_SIZE_IN_BYTE; for (int j = 0; j < numOfEdges[lid]; ++j) { long eid = JavaRuntime.getLong(curAddr); - edatas.set(index++, (BIZ_EDATA_T) edataArray.get(eid)); + edatas.set(index++, primitiveTypedArray.get(eid)); curAddr += nbrUnitEleSize; } } @@ -377,16 +376,19 @@ else if (edataClass.equals(StringView.class)) { } private FFIByteVector generateEdataString( - long[] nbrUnitAddrs, long[] numOfEdges, PrimitiveTypedArray edataArray) + long[] nbrUnitAddrs, long[] numOfEdges, BaseTypedArray edataArray) throws IOException { FFIByteVectorOutputStream outputStream = new FFIByteVectorOutputStream(); switch (edata_t) { case 0: + PrimitiveTypedArray longTypedArray = + FFITypeFactoryhelper.newPrimitiveTypedArray(edataClass); + longTypedArray.setAddress(edataArray.getAddress()); for (int lid = 0; lid < innerVerticesNum; ++lid) { long curAddr = nbrUnitAddrs[lid]; for (int j = 0; j < numOfEdges[lid]; ++j) { long eid = JavaRuntime.getLong(curAddr + VID_SIZE_IN_BYTE); - GRAPE_ED_T edata = edataArray.get(eid); + GRAPE_ED_T edata = longTypedArray.get(eid); Long longValue = (Long) edata; outputStream.writeLong(longValue); curAddr += nbrUnitEleSize; @@ -394,11 +396,14 @@ private FFIByteVector generateEdataString( } break; case 1: + PrimitiveTypedArray intTypedArray = + FFITypeFactoryhelper.newPrimitiveTypedArray(edataClass); + intTypedArray.setAddress(edataArray.getAddress()); for (int lid = 0; lid < innerVerticesNum; ++lid) { long curAddr = nbrUnitAddrs[lid]; for (int j = 0; j < numOfEdges[lid]; ++j) { long eid = JavaRuntime.getLong(curAddr + VID_SIZE_IN_BYTE); - GRAPE_ED_T edata = edataArray.get(eid); + GRAPE_ED_T edata = intTypedArray.get(eid); Integer longValue = (Integer) edata; outputStream.writeInt(longValue); curAddr += nbrUnitEleSize; @@ -406,11 +411,14 @@ private FFIByteVector generateEdataString( } break; case 2: + PrimitiveTypedArray doubleTypedArray = + FFITypeFactoryhelper.newPrimitiveTypedArray(edataClass); + doubleTypedArray.setAddress(edataArray.getAddress()); for (int lid = 0; lid < innerVerticesNum; ++lid) { long curAddr = nbrUnitAddrs[lid]; for (int j = 0; j < numOfEdges[lid]; ++j) { long eid = JavaRuntime.getLong(curAddr + VID_SIZE_IN_BYTE); - GRAPE_ED_T edata = edataArray.get(eid); + GRAPE_ED_T edata = doubleTypedArray.get(eid); Double longValue = (Double) edata; outputStream.writeDouble(longValue); curAddr += nbrUnitEleSize; @@ -418,11 +426,14 @@ private FFIByteVector generateEdataString( } break; case 3: + PrimitiveTypedArray floatTypedArray = + FFITypeFactoryhelper.newPrimitiveTypedArray(edataClass); + floatTypedArray.setAddress(edataArray.getAddress()); for (int lid = 0; lid < innerVerticesNum; ++lid) { long curAddr = nbrUnitAddrs[lid]; for (int j = 0; j < numOfEdges[lid]; ++j) { long eid = JavaRuntime.getLong(curAddr + VID_SIZE_IN_BYTE); - GRAPE_ED_T edata = edataArray.get(eid); + GRAPE_ED_T edata = floatTypedArray.get(eid); Float longValue = (Float) edata; outputStream.writeFloat(longValue); curAddr += nbrUnitEleSize; @@ -430,22 +441,28 @@ private FFIByteVector generateEdataString( } break; case 4: + PrimitiveTypedArray stringTypedArray = + FFITypeFactoryhelper.newPrimitiveTypedArray(edataClass); + stringTypedArray.setAddress(edataArray.getAddress()); for (int lid = 0; lid < innerVerticesNum; ++lid) { long curAddr = nbrUnitAddrs[lid]; for (int j = 0; j < numOfEdges[lid]; ++j) { long eid = JavaRuntime.getLong(curAddr + VID_SIZE_IN_BYTE); - GRAPE_ED_T edata = edataArray.get(eid); + GRAPE_ED_T edata = stringTypedArray.get(eid); String longValue = (String) edata; outputStream.writeBytes(longValue); curAddr += nbrUnitEleSize; } } case 5: + PrimitiveTypedArray stringViewTypedArray = + FFITypeFactoryhelper.newPrimitiveTypedArray(edataClass); + stringViewTypedArray.setAddress(edataArray.getAddress()); for (int lid = 0; lid < innerVerticesNum; ++lid) { long curAddr = nbrUnitAddrs[lid]; for (int j = 0; j < numOfEdges[lid]; ++j) { long eid = JavaRuntime.getLong(curAddr + VID_SIZE_IN_BYTE); - GRAPE_ED_T edata = edataArray.get(eid); + GRAPE_ED_T edata = stringViewTypedArray.get(eid); StringView longValue = (StringView) edata; outputStream.writeBytes(longValue); curAddr += nbrUnitEleSize; From 86bccd23d1267265db5d874e838388152aaba515 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E9=9C=84=E7=A3=8A?= Date: Mon, 3 Jun 2024 19:27:57 +0800 Subject: [PATCH 05/52] fix --- .../com/alibaba/graphscope/graph/AbstractEdgeManager.java | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/analytical_engine/java/grape-jdk/src/main/java/com/alibaba/graphscope/graph/AbstractEdgeManager.java b/analytical_engine/java/grape-jdk/src/main/java/com/alibaba/graphscope/graph/AbstractEdgeManager.java index 09577f2116c3..76c427f17d19 100644 --- a/analytical_engine/java/grape-jdk/src/main/java/com/alibaba/graphscope/graph/AbstractEdgeManager.java +++ b/analytical_engine/java/grape-jdk/src/main/java/com/alibaba/graphscope/graph/AbstractEdgeManager.java @@ -92,8 +92,12 @@ public void init( } this.vidClass = vidClass; edata_t = grapeEdata2Int(); - BaseTypedArray newTypedArray = - FFITypeFactoryhelper.newPrimitiveTypedArray(edataClass); + BaseTypedArray newTypedArray; + if (edataClass.equals(StringView.class)){ + newTypedArray = (BaseTypedArray) FFITypeFactoryhelper.newStringTypedArray(); + } else { + newTypedArray = FFITypeFactoryhelper.newPrimitiveTypedArray(edataClass); + } newTypedArray.setAddress(this.fragment.getEdataArrayAccessor().getAddress()); csrHolder = new CSRHolder(newTypedArray, consumer); edgeIterable = new TupleIterable(csrHolder); From 0be87747e814cd6ad4b75abf3310c84b84e01a61 Mon Sep 17 00:00:00 2001 From: "xiaolei.zl" Date: Mon, 3 Jun 2024 20:25:57 +0800 Subject: [PATCH 06/52] Todo: solve python entrance problem Committed-by: xiaolei.zl from Dev container Committed-by: xiaolei.zl from Dev container --- .../giraph/MessageAppWithUserWritable.java | 10 ++++--- .../GiraphComputationAdaptorContext.java | 27 ++++++++++++++++--- .../graph/impl/GiraphVertexIdManagerImpl.java | 2 +- .../graphscope/graph/AbstractEdgeManager.java | 9 +++---- .../annotation/AnnotationInvoker.java | 1 + 5 files changed, 35 insertions(+), 14 deletions(-) diff --git a/analytical_engine/java/grape-demo/src/main/java/com/alibaba/graphscope/example/giraph/MessageAppWithUserWritable.java b/analytical_engine/java/grape-demo/src/main/java/com/alibaba/graphscope/example/giraph/MessageAppWithUserWritable.java index f53aba267205..b6f3fa7afe24 100644 --- a/analytical_engine/java/grape-demo/src/main/java/com/alibaba/graphscope/example/giraph/MessageAppWithUserWritable.java +++ b/analytical_engine/java/grape-demo/src/main/java/com/alibaba/graphscope/example/giraph/MessageAppWithUserWritable.java @@ -34,12 +34,12 @@ public class MessageAppWithUserWritable extends BasicComputation { public static LongConfOption MAX_SUPER_STEP; - private static Logger logger = LoggerFactory.getLogger(MessageApp.class); + private static Logger logger = LoggerFactory.getLogger(MessageAppWithUserWritable.class); static { String maxSuperStep = System.getenv("MAX_SUPER_STEP"); if (Objects.isNull(maxSuperStep) || maxSuperStep.isEmpty()) { - MAX_SUPER_STEP = new LongConfOption("maxSuperStep", 1, "max super step"); + MAX_SUPER_STEP = new LongConfOption("maxSuperStep", 3, "max super step"); } else { MAX_SUPER_STEP = new LongConfOption( @@ -60,7 +60,7 @@ public void compute( Iterable messages) throws IOException { if (getSuperstep() == 0) { - logger.info("There should be no messages in step0, " + vertex.getId()); + // logger.info("There should be no messages in step0, " + vertex.getId()); boolean flag = false; for (MultipleLongWritable message : messages) { flag = true; @@ -72,7 +72,9 @@ public void compute( MultipleLongWritable msg = new MultipleLongWritable(vertex.getId().get()); sendMessageToAllEdges(vertex, msg); } else if (getSuperstep() < MAX_SUPER_STEP.get(getConf())) { - logger.info("step [{}] Checking received msg", getSuperstep()); + if (vertex.getId().get() < 20){ + logger.info("step [{}] Checking received msg", getSuperstep()); + } int msgCnt = 0; for (MultipleLongWritable message : messages) { msgCnt += 1; diff --git a/analytical_engine/java/grape-giraph/src/main/java/com/alibaba/graphscope/context/GiraphComputationAdaptorContext.java b/analytical_engine/java/grape-giraph/src/main/java/com/alibaba/graphscope/context/GiraphComputationAdaptorContext.java index 56f664b0c1d6..82fc8382263e 100644 --- a/analytical_engine/java/grape-giraph/src/main/java/com/alibaba/graphscope/context/GiraphComputationAdaptorContext.java +++ b/analytical_engine/java/grape-giraph/src/main/java/com/alibaba/graphscope/context/GiraphComputationAdaptorContext.java @@ -39,8 +39,10 @@ import com.alibaba.graphscope.serialization.FFIByteVectorInputStream; import com.alibaba.graphscope.serialization.FFIByteVectorOutputStream; import com.alibaba.graphscope.stdcxx.FFIByteVector; +import com.alibaba.graphscope.stdcxx.StdString; import com.alibaba.graphscope.utils.ConfigurationUtils; import com.alibaba.graphscope.utils.FFITypeFactoryhelper; +import com.alibaba.graphscope.ds.StringView; import org.apache.giraph.conf.GiraphConfiguration; import org.apache.giraph.conf.ImmutableClassesGiraphConfiguration; @@ -248,7 +250,24 @@ public void writeBackVertexData() { // This string is not readable. vertexArray.setValue(grapeVertex, new String(bytes)); } - } else { + } else if (conf.getGrapeVdataClass().equals(StringView.class)) { + byte[] bytes = new byte[(int) maxOffset]; + for (long lid = 0; lid < innerVerticesNum; ++lid) { + grapeVertex.setValue((VID_T) (Long) lid); + if (inputStream.longAvailable() <= 0) { + throw new IllegalStateException( + "Input stream too short for " + innerVerticesNum + " vertices"); + } + if (inputStream.read(bytes, 0, (int) offsets[(int) lid]) == -1) { + throw new IllegalStateException("read input stream failed"); + } + // This string is not readable. + StdString value = (StdString) vertexArray.get(grapeVertex); + //TODO: can be optimized without creating a java string + value.fromJavaString(new String(bytes)); + } + } + else { throw new IllegalStateException( "Unrecognized vdata class:" + conf.getGrapeVdataClass().getName()); } @@ -548,11 +567,11 @@ private ImmutableClassesGiraphConfiguration generateConfiguration( */ private boolean checkConsistency(ImmutableClassesGiraphConfiguration configuration) { return ConfigurationUtils.checkTypeConsistency( - configuration.getGrapeOidClass(), configuration.getVertexIdClass()) + configuration.getGrapeOidClass(), configuration.getVertexIdClass()) && ConfigurationUtils.checkTypeConsistency( - configuration.getGrapeEdataClass(), configuration.getEdgeValueClass()) + configuration.getGrapeEdataClass(), configuration.getEdgeValueClass()) && ConfigurationUtils.checkTypeConsistency( - configuration.getGrapeVdataClass(), configuration.getVertexValueClass()); + configuration.getGrapeVdataClass(), configuration.getVertexValueClass()); } private static String urlsToString(URL[] urls) { diff --git a/analytical_engine/java/grape-giraph/src/main/java/com/alibaba/graphscope/graph/impl/GiraphVertexIdManagerImpl.java b/analytical_engine/java/grape-giraph/src/main/java/com/alibaba/graphscope/graph/impl/GiraphVertexIdManagerImpl.java index b058fd3823ed..e2ab384d525b 100644 --- a/analytical_engine/java/grape-giraph/src/main/java/com/alibaba/graphscope/graph/impl/GiraphVertexIdManagerImpl.java +++ b/analytical_engine/java/grape-giraph/src/main/java/com/alibaba/graphscope/graph/impl/GiraphVertexIdManagerImpl.java @@ -147,7 +147,7 @@ private FFIByteVectorInputStream generateVertexIdStream() { } outputStream.finishSetting(); logger.info( - "Vertex data stream size: " + "Vertex id stream size: " + outputStream.bytesWriten() + ", vertices: " + vertexNum); diff --git a/analytical_engine/java/grape-jdk/src/main/java/com/alibaba/graphscope/graph/AbstractEdgeManager.java b/analytical_engine/java/grape-jdk/src/main/java/com/alibaba/graphscope/graph/AbstractEdgeManager.java index 76c427f17d19..c3250b3941d8 100644 --- a/analytical_engine/java/grape-jdk/src/main/java/com/alibaba/graphscope/graph/AbstractEdgeManager.java +++ b/analytical_engine/java/grape-jdk/src/main/java/com/alibaba/graphscope/graph/AbstractEdgeManager.java @@ -459,16 +459,15 @@ private FFIByteVector generateEdataString( } } case 5: - PrimitiveTypedArray stringViewTypedArray = - FFITypeFactoryhelper.newPrimitiveTypedArray(edataClass); + StringTypedArray stringViewTypedArray = + FFITypeFactoryhelper.newStringTypedArray(); stringViewTypedArray.setAddress(edataArray.getAddress()); for (int lid = 0; lid < innerVerticesNum; ++lid) { long curAddr = nbrUnitAddrs[lid]; for (int j = 0; j < numOfEdges[lid]; ++j) { long eid = JavaRuntime.getLong(curAddr + VID_SIZE_IN_BYTE); - GRAPE_ED_T edata = stringViewTypedArray.get(eid); - StringView longValue = (StringView) edata; - outputStream.writeBytes(longValue); + StringView edata = stringViewTypedArray.get(eid); + outputStream.writeBytes(edata); curAddr += nbrUnitEleSize; } } diff --git a/analytical_engine/java/grape-runtime/src/main/java/com/alibaba/graphscope/annotation/AnnotationInvoker.java b/analytical_engine/java/grape-runtime/src/main/java/com/alibaba/graphscope/annotation/AnnotationInvoker.java index 39c96421fe36..196b01443e0b 100644 --- a/analytical_engine/java/grape-runtime/src/main/java/com/alibaba/graphscope/annotation/AnnotationInvoker.java +++ b/analytical_engine/java/grape-runtime/src/main/java/com/alibaba/graphscope/annotation/AnnotationInvoker.java @@ -47,6 +47,7 @@ */ @FFIGenBatch( value = { + @FFIGen(type = "com.alibaba.graphscope.stdcxx.StdString"), @FFIGen(type = "com.alibaba.graphscope.ds.FidPointer"), @FFIGen(type = "com.alibaba.graphscope.ds.DestList"), @FFIGen(type = "com.alibaba.graphscope.stdcxx.CCharPointer"), From 4e48e22068e3c91b955e29967f8bdf44a686c4a2 Mon Sep 17 00:00:00 2001 From: "xiaolei.zl" Date: Tue, 4 Jun 2024 10:11:59 +0800 Subject: [PATCH 07/52] code minor refine Committed-by: xiaolei.zl from Dev container --- .../graphscope/context/GiraphComputationAdaptorContext.java | 6 +++--- .../graphscope/serialization/FFIByteVectorOutputStream.java | 1 - .../java/com/alibaba/graphscope/utils/AppBaseParser.java | 1 - 3 files changed, 3 insertions(+), 5 deletions(-) diff --git a/analytical_engine/java/grape-giraph/src/main/java/com/alibaba/graphscope/context/GiraphComputationAdaptorContext.java b/analytical_engine/java/grape-giraph/src/main/java/com/alibaba/graphscope/context/GiraphComputationAdaptorContext.java index 82fc8382263e..6d3b6659142b 100644 --- a/analytical_engine/java/grape-giraph/src/main/java/com/alibaba/graphscope/context/GiraphComputationAdaptorContext.java +++ b/analytical_engine/java/grape-giraph/src/main/java/com/alibaba/graphscope/context/GiraphComputationAdaptorContext.java @@ -567,11 +567,11 @@ private ImmutableClassesGiraphConfiguration generateConfiguration( */ private boolean checkConsistency(ImmutableClassesGiraphConfiguration configuration) { return ConfigurationUtils.checkTypeConsistency( - configuration.getGrapeOidClass(), configuration.getVertexIdClass()) + configuration.getGrapeOidClass(), configuration.getVertexIdClass()) && ConfigurationUtils.checkTypeConsistency( - configuration.getGrapeEdataClass(), configuration.getEdgeValueClass()) + configuration.getGrapeEdataClass(), configuration.getEdgeValueClass()) && ConfigurationUtils.checkTypeConsistency( - configuration.getGrapeVdataClass(), configuration.getVertexValueClass()); + configuration.getGrapeVdataClass(), configuration.getVertexValueClass()); } private static String urlsToString(URL[] urls) { diff --git a/analytical_engine/java/grape-jdk/src/main/java/com/alibaba/graphscope/serialization/FFIByteVectorOutputStream.java b/analytical_engine/java/grape-jdk/src/main/java/com/alibaba/graphscope/serialization/FFIByteVectorOutputStream.java index 69613c61ada3..55a58514881b 100644 --- a/analytical_engine/java/grape-jdk/src/main/java/com/alibaba/graphscope/serialization/FFIByteVectorOutputStream.java +++ b/analytical_engine/java/grape-jdk/src/main/java/com/alibaba/graphscope/serialization/FFIByteVectorOutputStream.java @@ -279,7 +279,6 @@ public void writeBytes(StringView s) throws IOException { int len = (int) s.size(); vector.ensure(offset, len); for (int i = 0; i < len; i++) { - // UnsafeHolder.U.putByte(newBase + i, (byte) data.charAt(i)); vector.setRawByte(offset + i, (byte) s.byteAt(i)); } offset += len; diff --git a/analytical_engine/java/grape-jdk/src/main/java/com/alibaba/graphscope/utils/AppBaseParser.java b/analytical_engine/java/grape-jdk/src/main/java/com/alibaba/graphscope/utils/AppBaseParser.java index 2596f34fef98..0426b0d8fc5d 100644 --- a/analytical_engine/java/grape-jdk/src/main/java/com/alibaba/graphscope/utils/AppBaseParser.java +++ b/analytical_engine/java/grape-jdk/src/main/java/com/alibaba/graphscope/utils/AppBaseParser.java @@ -186,7 +186,6 @@ private static String writableToJava(String typeName) { } else { return "com.alibaba.graphscope.ds.StringView"; } - // throw new IllegalStateException("Not recognized writable " + typeName); } private static Method getMethod(Class clz) { From bb020451e57e2a7c785f405fab064edf2115d2cd Mon Sep 17 00:00:00 2001 From: "xiaolei.zl" Date: Tue, 4 Jun 2024 11:30:32 +0800 Subject: [PATCH 08/52] minor Committed-by: xiaolei.zl from Dev container --- analytical_engine/test/giraph_runner.h | 1 - 1 file changed, 1 deletion(-) diff --git a/analytical_engine/test/giraph_runner.h b/analytical_engine/test/giraph_runner.h index 5633aa2eed88..907bb32b49eb 100644 --- a/analytical_engine/test/giraph_runner.h +++ b/analytical_engine/test/giraph_runner.h @@ -279,7 +279,6 @@ void CreateAndQuery(std::string params) { vineyard::ObjectMeta metadata; VINEYARD_CHECK_OK(client.GetMetaData(fragment_id, metadata)); - LOG(INFO) << "got metadata: " << metadata.ToString(); // chose different type according to the schema std::string vertex_data_type, edge_data_type; From 6d3076507603ec5e20eea2495d5c46165aefeed0 Mon Sep 17 00:00:00 2001 From: zhanglei1949 Date: Wed, 5 Jun 2024 14:41:14 +0800 Subject: [PATCH 09/52] reverts changes on CMakeLists --- analytical_engine/CMakeLists.txt | 118 +++++++++++++++---------------- 1 file changed, 59 insertions(+), 59 deletions(-) diff --git a/analytical_engine/CMakeLists.txt b/analytical_engine/CMakeLists.txt index 525c99eac094..fa5853dce7d4 100644 --- a/analytical_engine/CMakeLists.txt +++ b/analytical_engine/CMakeLists.txt @@ -367,23 +367,23 @@ endif() # Test targets if (BUILD_TESTS) - #add_executable(run_app test/run_app.cc core/object/dynamic.cc) - #target_include_directories(run_app PRIVATE ${LIBGRAPELITE_INCLUDE_DIRS}/grape/analytical_apps utils apps) - #target_link_libraries(run_app ${LIBGRAPELITE_LIBRARIES} ${GFLAGS_LIBRARIES} ${CMAKE_DL_LIBS} ${Boost_LIBRARIES} ${VINEYARD_LIBRARIES}) - #target_link_libraries(run_app OpenMP::OpenMP_CXX) + add_executable(run_app test/run_app.cc core/object/dynamic.cc) + target_include_directories(run_app PRIVATE ${LIBGRAPELITE_INCLUDE_DIRS}/grape/analytical_apps utils apps) + target_link_libraries(run_app ${LIBGRAPELITE_LIBRARIES} ${GFLAGS_LIBRARIES} ${CMAKE_DL_LIBS} ${Boost_LIBRARIES} ${VINEYARD_LIBRARIES}) + target_link_libraries(run_app OpenMP::OpenMP_CXX) if (ENABLE_JAVA_SDK) - # add_executable(run_java_app test/run_java_app.cc core/java/javasdk.cc core/object/dynamic.cc) - # target_include_directories(run_java_app PRIVATE core utils apps) - # target_compile_definitions(run_java_app PUBLIC ENABLE_JAVA_SDK) - # target_link_libraries(run_java_app gs_proto ${GFLAGS_LIBRARIES} ${MPI_CXX_LIBRARIES} ${JNI_LIBRARIES} ${Boost_LIBRARIES} - # ${CMAKE_DL_LIBS} ${GLOG_LIBRARIES} ${VINEYARD_LIBRARIES}) + add_executable(run_java_app test/run_java_app.cc core/java/javasdk.cc core/object/dynamic.cc) + target_include_directories(run_java_app PRIVATE core utils apps) + target_compile_definitions(run_java_app PUBLIC ENABLE_JAVA_SDK) + target_link_libraries(run_java_app gs_proto ${GFLAGS_LIBRARIES} ${MPI_CXX_LIBRARIES} ${JNI_LIBRARIES} ${Boost_LIBRARIES} + ${CMAKE_DL_LIBS} ${GLOG_LIBRARIES} ${VINEYARD_LIBRARIES}) # java app benchmark - # add_executable(property_graph_java_app_benchmarks benchmarks/property_graph_java_app_benchmarks.cc core/java/javasdk.cc core/object/dynamic.cc) - # target_include_directories(property_graph_java_app_benchmarks PRIVATE core utils apps) - # target_compile_definitions(property_graph_java_app_benchmarks PUBLIC ENABLE_JAVA_SDK) - # target_link_libraries(property_graph_java_app_benchmarks gs_proto ${JNI_LIBRARIES} ${Boost_LIBRARIES} ${CMAKE_DL_LIBS} ${VINEYARD_LIBRARIES}) + add_executable(property_graph_java_app_benchmarks benchmarks/property_graph_java_app_benchmarks.cc core/java/javasdk.cc core/object/dynamic.cc) + target_include_directories(property_graph_java_app_benchmarks PRIVATE core utils apps) + target_compile_definitions(property_graph_java_app_benchmarks PUBLIC ENABLE_JAVA_SDK) + target_link_libraries(property_graph_java_app_benchmarks gs_proto ${JNI_LIBRARIES} ${Boost_LIBRARIES} ${CMAKE_DL_LIBS} ${VINEYARD_LIBRARIES}) # giraph runner add_executable(giraph_runner test/giraph_runner.cc core/java/javasdk.cc) @@ -392,25 +392,25 @@ if (BUILD_TESTS) target_link_libraries(giraph_runner ${CMAKE_DL_LIBS} gs_proto ${VINEYARD_LIBRARIES} ${Boost_LIBRARIES} ${GFLAGS_LIBRARIES} ${JNI_LIBRARIES}) # graphx related test - # add_executable(projected_fragment_mapper_test test/projected_fragment_mapper_test.cc) - # target_include_directories(projected_fragment_mapper_test PRIVATE core utils apps) - # target_link_libraries(projected_fragment_mapper_test ${CMAKE_DL_LIBS} ${VINEYARD_LIBRARIES} ${Boost_LIBRARIES} ${GLOG_LIBRARIES} ${GFLAGS_LIBRARIES}) + add_executable(projected_fragment_mapper_test test/projected_fragment_mapper_test.cc) + target_include_directories(projected_fragment_mapper_test PRIVATE core utils apps) + target_link_libraries(projected_fragment_mapper_test ${CMAKE_DL_LIBS} ${VINEYARD_LIBRARIES} ${Boost_LIBRARIES} ${GLOG_LIBRARIES} ${GFLAGS_LIBRARIES}) - # add_executable(graphx_loader_test test/graphx_loader_test.cc) - # target_include_directories(graphx_loader_test PRIVATE core utils apps) - # target_compile_definitions(graphx_loader_test PUBLIC ENABLE_JAVA_SDK) - # target_link_libraries(graphx_loader_test ${CMAKE_DL_LIBS} ${VINEYARD_LIBRARIES} ${Boost_LIBRARIES} ${GLOG_LIBRARIES} ${GFLAGS_LIBRARIES}) + add_executable(graphx_loader_test test/graphx_loader_test.cc) + target_include_directories(graphx_loader_test PRIVATE core utils apps) + target_compile_definitions(graphx_loader_test PUBLIC ENABLE_JAVA_SDK) + target_link_libraries(graphx_loader_test ${CMAKE_DL_LIBS} ${VINEYARD_LIBRARIES} ${Boost_LIBRARIES} ${GLOG_LIBRARIES} ${GFLAGS_LIBRARIES}) - # add_executable(run_java_string_app test/run_java_string_app.cc core/java/javasdk.cc) - # target_include_directories(run_java_string_app PRIVATE core utils apps) - # target_compile_definitions(run_java_string_app PUBLIC ENABLE_JAVA_SDK) - # target_link_libraries(run_java_string_app ${CMAKE_DL_LIBS} ${VINEYARD_LIBRARIES} ${Boost_LIBRARIES} ${GLOG_LIBRARIES} ${GFLAGS_LIBRARIES} ${JNI_LIBRARIES}) + add_executable(run_java_string_app test/run_java_string_app.cc core/java/javasdk.cc) + target_include_directories(run_java_string_app PRIVATE core utils apps) + target_compile_definitions(run_java_string_app PUBLIC ENABLE_JAVA_SDK) + target_link_libraries(run_java_string_app ${CMAKE_DL_LIBS} ${VINEYARD_LIBRARIES} ${Boost_LIBRARIES} ${GLOG_LIBRARIES} ${GFLAGS_LIBRARIES} ${JNI_LIBRARIES}) if (${LIBUNWIND_FOUND}) - # target_link_libraries(run_java_app ${LIBUNWIND_LIBRARIES}) - # target_link_libraries(property_graph_java_app_benchmarks ${LIBUNWIND_LIBRARIES}) - # target_link_libraries(giraph_runner ${LIBUNWIND_LIBRARIES}) - # target_link_libraries(projected_fragment_mapper_test ${LIBUNWIND_LIBRARIES}) + target_link_libraries(run_java_app ${LIBUNWIND_LIBRARIES}) + target_link_libraries(property_graph_java_app_benchmarks ${LIBUNWIND_LIBRARIES}) + target_link_libraries(giraph_runner ${LIBUNWIND_LIBRARIES}) + target_link_libraries(projected_fragment_mapper_test ${LIBUNWIND_LIBRARIES}) endif () endif() @@ -428,43 +428,43 @@ if (BUILD_TESTS) target_link_libraries(${target} OpenMP::OpenMP_CXX) endmacro() -# add_vineyard_app(run_vy_app SRCS test/run_vy_app.cc) -# add_vineyard_app(run_vy_app_compact SRCS test/run_vy_app_compact.cc) -# add_vineyard_app(run_vy_app_local_vm SRCS test/run_vy_app_local_vm.cc) + add_vineyard_app(run_vy_app SRCS test/run_vy_app.cc) + add_vineyard_app(run_vy_app_compact SRCS test/run_vy_app_compact.cc) + add_vineyard_app(run_vy_app_local_vm SRCS test/run_vy_app_local_vm.cc) - #add_vineyard_app(run_load_from_stream SRCS test/run_load_from_stream.cc) + add_vineyard_app(run_load_from_stream SRCS test/run_load_from_stream.cc) - #add_vineyard_app(run_vy_ldbc SRCS test/run_vy_ldbc.cc) - #target_include_directories(run_vy_ldbc PRIVATE ${LIBGRAPELITE_INCLUDE_DIRS}/grape/analytical_apps) + add_vineyard_app(run_vy_ldbc SRCS test/run_vy_ldbc.cc) + target_include_directories(run_vy_ldbc PRIVATE ${LIBGRAPELITE_INCLUDE_DIRS}/grape/analytical_apps) -# add_vineyard_app(run_ctx SRCS test/run_ctx.cc) -# target_include_directories(run_ctx PRIVATE ${LIBGRAPELITE_INCLUDE_DIRS}/grape/analytical_apps) -# target_link_libraries(run_ctx gs_proto) + add_vineyard_app(run_ctx SRCS test/run_ctx.cc) + target_include_directories(run_ctx PRIVATE ${LIBGRAPELITE_INCLUDE_DIRS}/grape/analytical_apps) + target_link_libraries(run_ctx gs_proto) -# add_vineyard_app(run_property_ctx SRCS test/run_property_ctx.cc) -# target_include_directories(run_property_ctx PRIVATE ${LIBGRAPELITE_INCLUDE_DIRS}/grape/analytical_apps) -# target_link_libraries(run_property_ctx gs_proto) + add_vineyard_app(run_property_ctx SRCS test/run_property_ctx.cc) + target_include_directories(run_property_ctx PRIVATE ${LIBGRAPELITE_INCLUDE_DIRS}/grape/analytical_apps) + target_link_libraries(run_property_ctx gs_proto) -# add_vineyard_app(run_pregel_app SRCS test/run_pregel_app.cc) + add_vineyard_app(run_pregel_app SRCS test/run_pregel_app.cc) -# add_vineyard_app(run_string_oid SRCS test/run_string_oid.cc) -# target_include_directories(run_string_oid PRIVATE ${LIBGRAPELITE_INCLUDE_DIRS}/grape/analytical_apps) + add_vineyard_app(run_string_oid SRCS test/run_string_oid.cc) + target_include_directories(run_string_oid PRIVATE ${LIBGRAPELITE_INCLUDE_DIRS}/grape/analytical_apps) -# add_vineyard_app(run_empty_property SRCS test/run_empty_property.cc) -# target_include_directories(run_empty_property PRIVATE ${LIBGRAPELITE_INCLUDE_DIRS}/grape/analytical_apps) + add_vineyard_app(run_empty_property SRCS test/run_empty_property.cc) + target_include_directories(run_empty_property PRIVATE ${LIBGRAPELITE_INCLUDE_DIRS}/grape/analytical_apps) -# add_vineyard_app(test_project_string SRCS test/test_project_string.cc) + add_vineyard_app(test_project_string SRCS test/test_project_string.cc) -# add_vineyard_app(basic_graph_benchmarks SRCS benchmarks/basic_graph_benchmarks.cc) + add_vineyard_app(basic_graph_benchmarks SRCS benchmarks/basic_graph_benchmarks.cc) -# add_vineyard_app(property_graph_loader SRCS benchmarks/property_graph_loader.cc) + add_vineyard_app(property_graph_loader SRCS benchmarks/property_graph_loader.cc) -# add_vineyard_app(property_graph_benchmarks SRCS benchmarks/property_graph_benchmarks.cc) + add_vineyard_app(property_graph_benchmarks SRCS benchmarks/property_graph_benchmarks.cc) -# add_vineyard_app(projected_graph_benchmarks SRCS benchmarks/projected_graph_benchmarks.cc) + add_vineyard_app(projected_graph_benchmarks SRCS benchmarks/projected_graph_benchmarks.cc) if (NETWORKX) - # add_vineyard_app(test_convert SRCS test/test_convert.cc) + add_vineyard_app(test_convert SRCS test/test_convert.cc) endif () endif () @@ -509,14 +509,14 @@ if(ENABLE_JAVA_SDK) set(GAE_JAVA_JNI_LIB "${GAE_JAVA_DIR}/grape-runtime/target/native/libgrape-jni.so") endif() - # add_custom_command( - # OUTPUT "${GAE_JAVA_RUNTIME_JAR}" - # COMMAND mvn -Drevision=${GRAPHSCOPE_ANALYTICAL_JAR_VERSION} clean install -DskipTests --quiet - # DEPENDS gs_proto - # WORKING_DIRECTORY ${GAE_JAVA_DIR} - # COMMENT "Building GAE-java..." - # VERBATIM - # ) + add_custom_command( + OUTPUT "${GAE_JAVA_RUNTIME_JAR}" + COMMAND mvn -Drevision=${GRAPHSCOPE_ANALYTICAL_JAR_VERSION} clean install -DskipTests --quiet + DEPENDS gs_proto + WORKING_DIRECTORY ${GAE_JAVA_DIR} + COMMENT "Building GAE-java..." + VERBATIM + ) add_custom_target(grape_jni ALL DEPENDS "${GAE_JAVA_RUNTIME_JAR}" ) From f457e3de155de10f1cfa0d64b23148dd2a45d3cc Mon Sep 17 00:00:00 2001 From: "xiaolei.zl" Date: Wed, 5 Jun 2024 14:45:54 +0800 Subject: [PATCH 10/52] fix tests Committed-by: xiaolei.zl from Dev container --- analytical_engine/test/app_tests.sh | 71 +++++++++++++++-------------- 1 file changed, 36 insertions(+), 35 deletions(-) diff --git a/analytical_engine/test/app_tests.sh b/analytical_engine/test/app_tests.sh index 3e59da1acecc..649fb6bd8984 100755 --- a/analytical_engine/test/app_tests.sh +++ b/analytical_engine/test/app_tests.sh @@ -399,62 +399,63 @@ pushd "${ENGINE_HOME}"/build get_test_data -# for app in "${ldbc_apps[@]}"; do -# run ${np} ./run_app --vfile "${test_dir}"/p2p-31.v --efile "${test_dir}"/p2p-31.e --application "${app}" --out_prefix ./test_output --sssp_source=6 --sssp_target=10 --bfs_source=6 -# exact_verify "${test_dir}"/property/ldbc/p2p-31-"${app^^}" -# done +for app in "${ldbc_apps[@]}"; do + run ${np} ./run_app --vfile "${test_dir}"/p2p-31.v --efile "${test_dir}"/p2p-31.e --application "${app}" --out_prefix ./test_output --sssp_source=6 --sssp_target=10 --bfs_source=6 + exact_verify "${test_dir}"/property/ldbc/p2p-31-"${app^^}" +done -# for app in "${other_apps[@]}"; do -# run ${np} ./run_app --vfile "${test_dir}"/p2p-31.v --efile "${test_dir}"/p2p-31.e --application "${app}" --out_prefix ./test_output --sssp_source=6 --sssp_target=10 --bfs_source=6 -# exact_verify "${test_dir}"/p2p-31-"${app}" -# done +for app in "${other_apps[@]}"; do + run ${np} ./run_app --vfile "${test_dir}"/p2p-31.v --efile "${test_dir}"/p2p-31.e --application "${app}" --out_prefix ./test_output --sssp_source=6 --sssp_target=10 --bfs_source=6 + exact_verify "${test_dir}"/p2p-31-"${app}" +done -# for app in "${apps_with_directed[@]}"; do -# run ${np} ./run_app --vfile "${test_dir}"/p2p-31.v --efile "${test_dir}"/p2p-31.e --application "${app}" --out_prefix ./test_output --directed -# exact_verify "${test_dir}"/p2p-31-"${app}" -# done +for app in "${apps_with_directed[@]}"; do + run ${np} ./run_app --vfile "${test_dir}"/p2p-31.v --efile "${test_dir}"/p2p-31.e --application "${app}" --out_prefix ./test_output --directed + exact_verify "${test_dir}"/p2p-31-"${app}" +done start_vineyard -#run_vy ${np} ./run_vy_app "${socket_file}" 2 "${test_dir}"/new_property/v2_e2/twitter_e 2 "${test_dir}"/new_property/v2_e2/twitter_v 0 -#run_vy_2 ${np} ./run_vy_app "${socket_file}" 4 "${test_dir}"/projected_property/twitter_property_e "${test_dir}"/projected_property/twitter_property_v 1 -#run_lpa ${np} ./run_vy_app "${socket_file}" 1 "${test_dir}"/property/lpa_dataset/lpa_3000_e 2 "${test_dir}"/property/lpa_dataset/lpa_3000_v 0 1 lpa -#run_sampling_path 2 ./run_vy_app "${socket_file}" "${test_dir}"/property/sampling_path 0 1 sampling_path 0-0-1-4-2 +run_vy ${np} ./run_vy_app "${socket_file}" 2 "${test_dir}"/new_property/v2_e2/twitter_e 2 "${test_dir}"/new_property/v2_e2/twitter_v 0 +run_vy_2 ${np} ./run_vy_app "${socket_file}" 4 "${test_dir}"/projected_property/twitter_property_e "${test_dir}"/projected_property/twitter_property_v 1 +run_lpa ${np} ./run_vy_app "${socket_file}" 1 "${test_dir}"/property/lpa_dataset/lpa_3000_e 2 "${test_dir}"/property/lpa_dataset/lpa_3000_v 0 1 lpa +run_sampling_path 2 ./run_vy_app "${socket_file}" "${test_dir}"/property/sampling_path 0 1 sampling_path 0-0-1-4-2 # local vm -#run_vy_2 ${np} ./run_vy_app_local_vm "${socket_file}" 1 "${test_dir}"/property/p2p-31_property_e "${test_dir}"/property/p2p-31_property_v 1 +run_vy_2 ${np} ./run_vy_app_local_vm "${socket_file}" 1 "${test_dir}"/property/p2p-31_property_e "${test_dir}"/property/p2p-31_property_v 1 # compact edges -#run_vy_2 ${np} ./run_vy_app_compact "${socket_file}" 1 "${test_dir}"/property/p2p-31_property_e "${test_dir}"/property/p2p-31_property_v 1 +run_vy_2 ${np} ./run_vy_app_compact "${socket_file}" 1 "${test_dir}"/property/p2p-31_property_e "${test_dir}"/property/p2p-31_property_v 1 -#run_vy ${np} ./run_pregel_app "${socket_file}" 2 "${test_dir}"/new_property/v2_e2/twitter_e 2 "${test_dir}"/new_property/v2_e2/twitter_v -#rm -rf ./test_output/* -# cp ./outputs_pregel_sssp/* ./test_output -# exact_verify "${test_dir}"/twitter-sssp-4 +run_vy ${np} ./run_pregel_app "${socket_file}" 2 "${test_dir}"/new_property/v2_e2/twitter_e 2 "${test_dir}"/new_property/v2_e2/twitter_v +rm -rf ./test_output/* +cp ./outputs_pregel_sssp/* ./test_output +exact_verify "${test_dir}"/twitter-sssp-4 -# run ${np} ./run_pregel_app tc "${test_dir}"/p2p-31.e "${test_dir}"/p2p-31.v ./test_output -# exact_verify "${test_dir}/p2p-31"-triangles +run ${np} ./run_pregel_app tc "${test_dir}"/p2p-31.e "${test_dir}"/p2p-31.v ./test_output +exact_verify "${test_dir}/p2p-31"-triangles if [[ "${RUN_JAVA_TESTS}" == "ON" ]]; then - # run_vy_2 ${np} ./projected_fragment_mapper_test "${socket_file}" 1 "${test_dir}"/projected_property/twitter_property_e "${test_dir}"/projected_property/twitter_property_v + run_vy_2 ${np} ./projected_fragment_mapper_test "${socket_file}" 1 "${test_dir}"/projected_property/twitter_property_e "${test_dir}"/projected_property/twitter_property_v if [[ "${USER_JAR_PATH}"x != ""x ]] then echo "Running Java tests..." - # run_vy_2 ${np} ./run_java_app "${socket_file}" 1 "${test_dir}"/projected_property/twitter_property_e "${test_dir}"/projected_property/twitter_property_v 1 0 1 com.alibaba.graphscope.example.bfs.BFS - # GLOG_v=10 ./run_java_string_app /tmp/vineyard.sock \ - # 1 "${test_dir}/projected_property/twitter_property_e_0#header_row=True#src_label=v&dst_label=v&label=e&include_all_columns=true&column_types=int64_t,int64_t,int32_t,int32_t,std::string" \ - # 1 "${test_dir}/projected_property/twitter_property_v_0#header_row=True#label=v&include_all_columns=true&column_types=int64_t,std::string" \ - # com.alibaba.graphscope.example.stringApp.StringApp + run_vy_2 ${np} ./run_java_app "${socket_file}" 1 "${test_dir}"/projected_property/twitter_property_e "${test_dir}"/projected_property/twitter_property_v 1 0 1 com.alibaba.graphscope.example.bfs.BFS + GLOG_v=10 ./run_java_string_app /tmp/vineyard.sock \ + 1 "${test_dir}/projected_property/twitter_property_e_0#header_row=True#src_label=v&dst_label=v&label=e&include_all_columns=true&column_types=int64_t,int64_t,int32_t,int32_t,std::string" \ + 1 "${test_dir}/projected_property/twitter_property_v_0#header_row=True#label=v&include_all_columns=true&column_types=int64_t,std::string" \ + com.alibaba.graphscope.example.stringApp.StringApp echo "Running girpah tests..." - # GLOG_v=10 ./giraph_runner --vertex_input_format_class giraph:com.alibaba.graphscope.example.giraph.format.P2PVertexInputFormat \ - # --edge_input_format_class giraph:com.alibaba.graphscope.example.giraph.format.P2PEdgeInputFormat --vfile "${test_dir}"/p2p-31.v \ - # --efile "${test_dir}"/p2p-31.e --ipc_socket /tmp/vineyard.sock --lib_path /opt/graphscope/lib/libgrape-jni.so \ - # --user_app_class com.alibaba.graphscope.example.giraph.SSSP + ./giraph_runner --vertex_input_format_class giraph:com.alibaba.graphscope.example.giraph.format.P2PVertexInputFormat \ + --edge_input_format_class giraph:com.alibaba.graphscope.example.giraph.format.P2PEdgeInputFormat --vfile "${test_dir}"/p2p-31.v \ + --efile "${test_dir}"/p2p-31.e --ipc_socket /tmp/vineyard.sock --lib_path /opt/graphscope/lib/libgrape-jni.so \ + --user_app_class com.alibaba.graphscope.example.giraph.SSSP - GLOG_v=10 ./giraph_runner --vertex_input_format_class giraph:com.alibaba.graphscope.example.giraph.format.P2PVertexMultipleLongInputFormat \ + echo "Test Giraph app user Customized Writable" + ./giraph_runner --vertex_input_format_class giraph:com.alibaba.graphscope.example.giraph.format.P2PVertexMultipleLongInputFormat \ --edge_input_format_class giraph:com.alibaba.graphscope.example.giraph.format.P2PEdgeMultipleLongInputFormat --vfile "${test_dir}"/p2p-31.v \ --efile "${test_dir}"/p2p-31.e --ipc_socket /tmp/vineyard.sock --lib_path /opt/graphscope/lib/libgrape-jni.so \ --user_app_class com.alibaba.graphscope.example.giraph.MessageAppWithUserWritable From 69ecac9f336e27f5e3b4fd98ccc4ac13285b8d13 Mon Sep 17 00:00:00 2001 From: "xiaolei.zl" Date: Wed, 5 Jun 2024 14:46:45 +0800 Subject: [PATCH 11/52] format Committed-by: xiaolei.zl from Dev container --- .../giraph/MessageAppWithUserWritable.java | 6 ++++-- .../P2PEdgeMultipleLongInputFormat.java | 4 +++- .../P2PVertexMultipleLongInputFormat.java | 8 ++++--- .../GiraphComputationAdaptorContext.java | 7 +++---- .../graph/impl/VertexDataManagerImpl.java | 7 +++---- .../graphscope/utils/ConfigurationUtils.java | 2 +- .../alibaba/graphscope/utils/GrapeTypes.java | 4 ++-- .../graphscope/graph/AbstractEdgeManager.java | 21 +++++++++++-------- .../FFIByteVectorOutputStream.java | 2 +- .../utils/FFITypeFactoryhelper.java | 4 ++-- .../alibaba/graphscope/utils/TypeUtils.java | 5 ++--- .../utils/array/PrimitiveArray.java | 3 +-- .../annotation/AnnotationInvoker.java | 2 +- 13 files changed, 40 insertions(+), 35 deletions(-) diff --git a/analytical_engine/java/grape-demo/src/main/java/com/alibaba/graphscope/example/giraph/MessageAppWithUserWritable.java b/analytical_engine/java/grape-demo/src/main/java/com/alibaba/graphscope/example/giraph/MessageAppWithUserWritable.java index b6f3fa7afe24..7597867b624d 100644 --- a/analytical_engine/java/grape-demo/src/main/java/com/alibaba/graphscope/example/giraph/MessageAppWithUserWritable.java +++ b/analytical_engine/java/grape-demo/src/main/java/com/alibaba/graphscope/example/giraph/MessageAppWithUserWritable.java @@ -17,6 +17,7 @@ package com.alibaba.graphscope.example.giraph; import com.alibaba.graphscope.example.giraph.writable.MultipleLongWritable; + import org.apache.giraph.conf.LongConfOption; import org.apache.giraph.graph.BasicComputation; import org.apache.giraph.graph.Vertex; @@ -31,7 +32,8 @@ * Only send msg. */ public class MessageAppWithUserWritable - extends BasicComputation { + extends BasicComputation< + LongWritable, MultipleLongWritable, MultipleLongWritable, MultipleLongWritable> { public static LongConfOption MAX_SUPER_STEP; private static Logger logger = LoggerFactory.getLogger(MessageAppWithUserWritable.class); @@ -72,7 +74,7 @@ public void compute( MultipleLongWritable msg = new MultipleLongWritable(vertex.getId().get()); sendMessageToAllEdges(vertex, msg); } else if (getSuperstep() < MAX_SUPER_STEP.get(getConf())) { - if (vertex.getId().get() < 20){ + if (vertex.getId().get() < 20) { logger.info("step [{}] Checking received msg", getSuperstep()); } int msgCnt = 0; diff --git a/analytical_engine/java/grape-demo/src/main/java/com/alibaba/graphscope/example/giraph/format/P2PEdgeMultipleLongInputFormat.java b/analytical_engine/java/grape-demo/src/main/java/com/alibaba/graphscope/example/giraph/format/P2PEdgeMultipleLongInputFormat.java index f58d210f63fa..a6052f692242 100644 --- a/analytical_engine/java/grape-demo/src/main/java/com/alibaba/graphscope/example/giraph/format/P2PEdgeMultipleLongInputFormat.java +++ b/analytical_engine/java/grape-demo/src/main/java/com/alibaba/graphscope/example/giraph/format/P2PEdgeMultipleLongInputFormat.java @@ -17,6 +17,7 @@ package com.alibaba.graphscope.example.giraph.format; import com.alibaba.graphscope.example.giraph.writable.MultipleLongWritable; + import org.apache.giraph.io.EdgeReader; import org.apache.giraph.io.formats.TextEdgeInputFormat; import org.apache.hadoop.io.LongWritable; @@ -26,7 +27,8 @@ import java.io.IOException; -public class P2PEdgeMultipleLongInputFormat extends TextEdgeInputFormat { +public class P2PEdgeMultipleLongInputFormat + extends TextEdgeInputFormat { /** * Create an edge reader for a given split. The framework will call {@link diff --git a/analytical_engine/java/grape-demo/src/main/java/com/alibaba/graphscope/example/giraph/format/P2PVertexMultipleLongInputFormat.java b/analytical_engine/java/grape-demo/src/main/java/com/alibaba/graphscope/example/giraph/format/P2PVertexMultipleLongInputFormat.java index 19c059f185ac..d6bb19dac955 100644 --- a/analytical_engine/java/grape-demo/src/main/java/com/alibaba/graphscope/example/giraph/format/P2PVertexMultipleLongInputFormat.java +++ b/analytical_engine/java/grape-demo/src/main/java/com/alibaba/graphscope/example/giraph/format/P2PVertexMultipleLongInputFormat.java @@ -40,8 +40,9 @@ public class P2PVertexMultipleLongInputFormat * @return the text vertex reader to be used */ @Override - public TextVertexInputFormat.TextVertexReader - createVertexReader(InputSplit split, TaskAttemptContext context) throws IOException { + public TextVertexInputFormat + .TextVertexReader + createVertexReader(InputSplit split, TaskAttemptContext context) throws IOException { return new P2PVertexReader(); } @@ -79,7 +80,8 @@ protected MultipleLongWritable getValue(String[] tokens) throws IOException { @Override protected Iterable> getEdges(String[] tokens) throws IOException { - List> edges = Lists.newArrayListWithCapacity(0); + List> edges = + Lists.newArrayListWithCapacity(0); return edges; } } diff --git a/analytical_engine/java/grape-giraph/src/main/java/com/alibaba/graphscope/context/GiraphComputationAdaptorContext.java b/analytical_engine/java/grape-giraph/src/main/java/com/alibaba/graphscope/context/GiraphComputationAdaptorContext.java index 6d3b6659142b..43931d6e77fb 100644 --- a/analytical_engine/java/grape-giraph/src/main/java/com/alibaba/graphscope/context/GiraphComputationAdaptorContext.java +++ b/analytical_engine/java/grape-giraph/src/main/java/com/alibaba/graphscope/context/GiraphComputationAdaptorContext.java @@ -23,6 +23,7 @@ import com.alibaba.fastjson.JSONObject; import com.alibaba.graphscope.communication.Communicator; import com.alibaba.graphscope.ds.GSVertexArray; +import com.alibaba.graphscope.ds.StringView; import com.alibaba.graphscope.factory.GiraphComputationFactory; import com.alibaba.graphscope.fragment.IFragment; import com.alibaba.graphscope.graph.AggregatorManager; @@ -42,7 +43,6 @@ import com.alibaba.graphscope.stdcxx.StdString; import com.alibaba.graphscope.utils.ConfigurationUtils; import com.alibaba.graphscope.utils.FFITypeFactoryhelper; -import com.alibaba.graphscope.ds.StringView; import org.apache.giraph.conf.GiraphConfiguration; import org.apache.giraph.conf.ImmutableClassesGiraphConfiguration; @@ -263,11 +263,10 @@ public void writeBackVertexData() { } // This string is not readable. StdString value = (StdString) vertexArray.get(grapeVertex); - //TODO: can be optimized without creating a java string + // TODO: can be optimized without creating a java string value.fromJavaString(new String(bytes)); } - } - else { + } else { throw new IllegalStateException( "Unrecognized vdata class:" + conf.getGrapeVdataClass().getName()); } diff --git a/analytical_engine/java/grape-giraph/src/main/java/com/alibaba/graphscope/graph/impl/VertexDataManagerImpl.java b/analytical_engine/java/grape-giraph/src/main/java/com/alibaba/graphscope/graph/impl/VertexDataManagerImpl.java index 4a16aa4d16ee..3390dbe3a356 100644 --- a/analytical_engine/java/grape-giraph/src/main/java/com/alibaba/graphscope/graph/impl/VertexDataManagerImpl.java +++ b/analytical_engine/java/grape-giraph/src/main/java/com/alibaba/graphscope/graph/impl/VertexDataManagerImpl.java @@ -15,12 +15,12 @@ */ package com.alibaba.graphscope.graph.impl; +import com.alibaba.graphscope.ds.StringView; import com.alibaba.graphscope.ds.Vertex; import com.alibaba.graphscope.fragment.IFragment; import com.alibaba.graphscope.graph.VertexDataManager; import com.alibaba.graphscope.serialization.FFIByteVectorInputStream; import com.alibaba.graphscope.serialization.FFIByteVectorOutputStream; -import com.alibaba.graphscope.ds.StringView; import org.apache.giraph.conf.ImmutableClassesGiraphConfiguration; import org.apache.hadoop.io.Writable; @@ -144,13 +144,12 @@ private void readVertexDataFromIFragment(FFIByteVectorOutputStream outputStream) String value = (String) fragment.getData(vertex); outputStream.writeBytes(value); } - } else if (conf.getGrapeVdataClass().equals(StringView.class)){ + } else if (conf.getGrapeVdataClass().equals(StringView.class)) { for (Vertex vertex : iterable) { StringView value = (StringView) fragment.getData(vertex); outputStream.writeBytes(value); } - } - else { + } else { logger.error("Unsupported vdata class: " + conf.getGrapeVdataClass().getName()); } // else if (conf.getGrapeVdataClass().equals the userDefined class... diff --git a/analytical_engine/java/grape-giraph/src/main/java/com/alibaba/graphscope/utils/ConfigurationUtils.java b/analytical_engine/java/grape-giraph/src/main/java/com/alibaba/graphscope/utils/ConfigurationUtils.java index a30714e2c54a..7303e47a5413 100644 --- a/analytical_engine/java/grape-giraph/src/main/java/com/alibaba/graphscope/utils/ConfigurationUtils.java +++ b/analytical_engine/java/grape-giraph/src/main/java/com/alibaba/graphscope/utils/ConfigurationUtils.java @@ -20,8 +20,8 @@ import static org.apache.giraph.conf.GiraphConstants.TYPES_HOLDER_CLASS; import com.alibaba.fastjson.JSONObject; -import com.alibaba.graphscope.fragment.IFragment; import com.alibaba.graphscope.ds.StringView; +import com.alibaba.graphscope.fragment.IFragment; import org.apache.giraph.combiner.MessageCombiner; import org.apache.giraph.conf.GiraphConfiguration; diff --git a/analytical_engine/java/grape-giraph/src/main/java/com/alibaba/graphscope/utils/GrapeTypes.java b/analytical_engine/java/grape-giraph/src/main/java/com/alibaba/graphscope/utils/GrapeTypes.java index 0d307b6da992..1022a56f7cf7 100644 --- a/analytical_engine/java/grape-giraph/src/main/java/com/alibaba/graphscope/utils/GrapeTypes.java +++ b/analytical_engine/java/grape-giraph/src/main/java/com/alibaba/graphscope/utils/GrapeTypes.java @@ -15,12 +15,12 @@ */ package com.alibaba.graphscope.utils; +import com.alibaba.graphscope.ds.StringView; import com.alibaba.graphscope.fragment.ArrowProjectedFragment; import com.alibaba.graphscope.fragment.IFragment; import com.alibaba.graphscope.fragment.ImmutableEdgecutFragment; import com.alibaba.graphscope.fragment.adaptor.ArrowProjectedAdaptor; import com.alibaba.graphscope.fragment.adaptor.ImmutableEdgecutFragmentAdaptor; -import com.alibaba.graphscope.ds.StringView; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -81,7 +81,7 @@ private Class cppType2JavaType(String typeString) { return Double.class; } else if (typeString.equals("float")) { return Float.class; - } else if (typeString.equals("std::string")){ + } else if (typeString.equals("std::string")) { return StringView.class; } throw new IllegalStateException("Not supported type string" + typeString); diff --git a/analytical_engine/java/grape-jdk/src/main/java/com/alibaba/graphscope/graph/AbstractEdgeManager.java b/analytical_engine/java/grape-jdk/src/main/java/com/alibaba/graphscope/graph/AbstractEdgeManager.java index c3250b3941d8..7c22b726e3d8 100644 --- a/analytical_engine/java/grape-jdk/src/main/java/com/alibaba/graphscope/graph/AbstractEdgeManager.java +++ b/analytical_engine/java/grape-jdk/src/main/java/com/alibaba/graphscope/graph/AbstractEdgeManager.java @@ -93,7 +93,7 @@ public void init( this.vidClass = vidClass; edata_t = grapeEdata2Int(); BaseTypedArray newTypedArray; - if (edataClass.equals(StringView.class)){ + if (edataClass.equals(StringView.class)) { newTypedArray = (BaseTypedArray) FFITypeFactoryhelper.newStringTypedArray(); } else { newTypedArray = FFITypeFactoryhelper.newPrimitiveTypedArray(edataClass); @@ -272,13 +272,18 @@ private void initArrays(BaseTypedArray edataArray) throws IOExceptio fillInEdataArray(edataArray); } - private void fillInEdataArray(BaseTypedArray edataArray) - throws IOException { + private void fillInEdataArray(BaseTypedArray edataArray) throws IOException { // first try to set directly. int index = 0; if (bizEdataClass.equals(edataClass)) { - logger.info("biz edata {} == grape edata, try to read direct, biz edata class {}, edata class {}", edata_t, bizEdataClass, edataClass); - PrimitiveTypedArray primitiveTypedArray = FFITypeFactoryhelper.newPrimitiveTypedArray(bizEdataClass); + logger.info( + "biz edata {} == grape edata, try to read direct, biz edata class {}, edata" + + " class {}", + edata_t, + bizEdataClass, + edataClass); + PrimitiveTypedArray primitiveTypedArray = + FFITypeFactoryhelper.newPrimitiveTypedArray(bizEdataClass); primitiveTypedArray.setAddress(edataArray.getAddress()); for (int lid = 0; lid < innerVerticesNum; ++lid) { long curAddr = nbrUnitAddrs[lid] + VID_SIZE_IN_BYTE; @@ -371,8 +376,7 @@ private int grapeEdata2Int() { } else if (edataClass.equals(String.class)) { logger.info("edata: String"); return 4; - } - else if (edataClass.equals(StringView.class)) { + } else if (edataClass.equals(StringView.class)) { logger.info("edata: StringView"); return 5; } @@ -459,8 +463,7 @@ private FFIByteVector generateEdataString( } } case 5: - StringTypedArray stringViewTypedArray = - FFITypeFactoryhelper.newStringTypedArray(); + StringTypedArray stringViewTypedArray = FFITypeFactoryhelper.newStringTypedArray(); stringViewTypedArray.setAddress(edataArray.getAddress()); for (int lid = 0; lid < innerVerticesNum; ++lid) { long curAddr = nbrUnitAddrs[lid]; diff --git a/analytical_engine/java/grape-jdk/src/main/java/com/alibaba/graphscope/serialization/FFIByteVectorOutputStream.java b/analytical_engine/java/grape-jdk/src/main/java/com/alibaba/graphscope/serialization/FFIByteVectorOutputStream.java index 55a58514881b..8956d8bd87b4 100644 --- a/analytical_engine/java/grape-jdk/src/main/java/com/alibaba/graphscope/serialization/FFIByteVectorOutputStream.java +++ b/analytical_engine/java/grape-jdk/src/main/java/com/alibaba/graphscope/serialization/FFIByteVectorOutputStream.java @@ -15,9 +15,9 @@ */ package com.alibaba.graphscope.serialization; +import com.alibaba.graphscope.ds.StringView; import com.alibaba.graphscope.stdcxx.FFIByteVector; import com.alibaba.graphscope.stdcxx.FFIByteVectorFactory; -import com.alibaba.graphscope.ds.StringView; import java.io.DataOutput; import java.io.IOException; diff --git a/analytical_engine/java/grape-jdk/src/main/java/com/alibaba/graphscope/utils/FFITypeFactoryhelper.java b/analytical_engine/java/grape-jdk/src/main/java/com/alibaba/graphscope/utils/FFITypeFactoryhelper.java index edfb4cfbcdfc..1e2020cef981 100644 --- a/analytical_engine/java/grape-jdk/src/main/java/com/alibaba/graphscope/utils/FFITypeFactoryhelper.java +++ b/analytical_engine/java/grape-jdk/src/main/java/com/alibaba/graphscope/utils/FFITypeFactoryhelper.java @@ -26,7 +26,6 @@ import com.alibaba.fastffi.FFIPointer; import com.alibaba.fastffi.FFITypeFactory; import com.alibaba.fastffi.FFIVector; -import com.alibaba.graphscope.ds.StringView; import com.alibaba.fastffi.impl.CXXStdVector; import com.alibaba.graphscope.arrow.array.PrimitiveArrowArrayBuilder; import com.alibaba.graphscope.ds.DenseVertexSet; @@ -34,6 +33,7 @@ import com.alibaba.graphscope.ds.GSVertexArray; import com.alibaba.graphscope.ds.PrimitiveTypedArray; import com.alibaba.graphscope.ds.StringTypedArray; +import com.alibaba.graphscope.ds.StringView; import com.alibaba.graphscope.ds.Vertex; import com.alibaba.graphscope.ds.VertexArray; import com.alibaba.graphscope.ds.VertexRange; @@ -83,7 +83,7 @@ public static String javaType2CppType(Class clz) { return "double"; } else if (clz.getName() == String.class.getName()) { return "std::string"; - } else if (clz.getName() == StringView.class.getName()){ + } else if (clz.getName() == StringView.class.getName()) { return "std::string"; } else { logger.error("Must be one of long, double, integer, but got: " + clz.getName()); diff --git a/analytical_engine/java/grape-jdk/src/main/java/com/alibaba/graphscope/utils/TypeUtils.java b/analytical_engine/java/grape-jdk/src/main/java/com/alibaba/graphscope/utils/TypeUtils.java index 0a353628f6e6..be79f0756c01 100644 --- a/analytical_engine/java/grape-jdk/src/main/java/com/alibaba/graphscope/utils/TypeUtils.java +++ b/analytical_engine/java/grape-jdk/src/main/java/com/alibaba/graphscope/utils/TypeUtils.java @@ -31,10 +31,9 @@ public static String primitiveClass2CppStr(Class javaClass, boolean sign) { return "double"; } else if (javaClass.equals(Float.class) || javaClass.equals(float.class)) { return "float"; - } else if (javaClass.equals(StringView.class)){ + } else if (javaClass.equals(StringView.class)) { return "std::string"; - } - else { + } else { throw new IllegalStateException("Not recognized class " + javaClass.getName()); } } diff --git a/analytical_engine/java/grape-jdk/src/main/java/com/alibaba/graphscope/utils/array/PrimitiveArray.java b/analytical_engine/java/grape-jdk/src/main/java/com/alibaba/graphscope/utils/array/PrimitiveArray.java index 4f5dfc6ecf05..471036d4eaa9 100644 --- a/analytical_engine/java/grape-jdk/src/main/java/com/alibaba/graphscope/utils/array/PrimitiveArray.java +++ b/analytical_engine/java/grape-jdk/src/main/java/com/alibaba/graphscope/utils/array/PrimitiveArray.java @@ -48,8 +48,7 @@ static PrimitiveArray create(Class clz, int len) { return (PrimitiveArray) new LongArray(len); } else if (clz.equals(int.class) || clz.equals(Integer.class)) { return (PrimitiveArray) new IntArray(len); - } - else { + } else { return (PrimitiveArray) new ObjectArray(clz, len); } } diff --git a/analytical_engine/java/grape-runtime/src/main/java/com/alibaba/graphscope/annotation/AnnotationInvoker.java b/analytical_engine/java/grape-runtime/src/main/java/com/alibaba/graphscope/annotation/AnnotationInvoker.java index 196b01443e0b..e1a6a46fc7da 100644 --- a/analytical_engine/java/grape-runtime/src/main/java/com/alibaba/graphscope/annotation/AnnotationInvoker.java +++ b/analytical_engine/java/grape-runtime/src/main/java/com/alibaba/graphscope/annotation/AnnotationInvoker.java @@ -30,9 +30,9 @@ import static com.alibaba.graphscope.utils.JavaClassName.JAVA_ARROW_PROJECTED_FRAGMENT_GETTER; import static com.alibaba.graphscope.utils.JavaClassName.JAVA_ARROW_PROJECTED_FRAGMENT_MAPPER; import static com.alibaba.graphscope.utils.JavaClassName.LONG; +import static com.alibaba.graphscope.utils.JavaClassName.STD_STRING; import static com.alibaba.graphscope.utils.JavaClassName.STRING; import static com.alibaba.graphscope.utils.JavaClassName.STRING_VIEW; -import static com.alibaba.graphscope.utils.JavaClassName.STD_STRING; import com.alibaba.fastffi.CXXHead; import com.alibaba.fastffi.CXXTemplate; From ec92cf3b2e32693411fa85c010915bd8d544df43 Mon Sep 17 00:00:00 2001 From: "xiaolei.zl" Date: Wed, 5 Jun 2024 14:47:56 +0800 Subject: [PATCH 12/52] remove test script Committed-by: xiaolei.zl from Dev container --- analytical_engine/java/install.sh | 3 --- 1 file changed, 3 deletions(-) delete mode 100644 analytical_engine/java/install.sh diff --git a/analytical_engine/java/install.sh b/analytical_engine/java/install.sh deleted file mode 100644 index 059dd5b7ab64..000000000000 --- a/analytical_engine/java/install.sh +++ /dev/null @@ -1,3 +0,0 @@ -cp grape-runtime/target/native/libgrape-jni.so /opt/graphscope/lib -cp grape-runtime/target/grape-runtime-0.28.0-shaded.jar /opt/graphscope/lib -cp grape-giraph/target/grape-giraph-0.28.0-shaded.jar /opt/graphscope/lib From 44d57c7b6b7d9bd1562eeadd36678df2675ed4fb Mon Sep 17 00:00:00 2001 From: "xiaolei.zl" Date: Wed, 5 Jun 2024 14:50:56 +0800 Subject: [PATCH 13/52] format Committed-by: xiaolei.zl from Dev container --- .../java/com/alibaba/graphscope/graph/AbstractEdgeManager.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/analytical_engine/java/grape-jdk/src/main/java/com/alibaba/graphscope/graph/AbstractEdgeManager.java b/analytical_engine/java/grape-jdk/src/main/java/com/alibaba/graphscope/graph/AbstractEdgeManager.java index 7c22b726e3d8..805ca9d0a50f 100644 --- a/analytical_engine/java/grape-jdk/src/main/java/com/alibaba/graphscope/graph/AbstractEdgeManager.java +++ b/analytical_engine/java/grape-jdk/src/main/java/com/alibaba/graphscope/graph/AbstractEdgeManager.java @@ -278,7 +278,7 @@ private void fillInEdataArray(BaseTypedArray edataArray) throws IOEx if (bizEdataClass.equals(edataClass)) { logger.info( "biz edata {} == grape edata, try to read direct, biz edata class {}, edata" - + " class {}", + + " class {}", edata_t, bizEdataClass, edataClass); From 2bbcb2dc5ad8b2f93ef1d8427b29d274cadf0153 Mon Sep 17 00:00:00 2001 From: "xiaolei.zl" Date: Wed, 5 Jun 2024 15:53:01 +0800 Subject: [PATCH 14/52] cpp format Committed-by: xiaolei.zl from Dev container --- analytical_engine/test/giraph_runner.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/analytical_engine/test/giraph_runner.h b/analytical_engine/test/giraph_runner.h index 907bb32b49eb..88eda0c1c553 100644 --- a/analytical_engine/test/giraph_runner.h +++ b/analytical_engine/test/giraph_runner.h @@ -78,7 +78,7 @@ std::pair parse_property_type( if (type["label"] == "vertex_label") { if (type.contains("propertyDefList")) { auto properties = type["propertyDefList"]; - CHECK(properties.size() == 1); + CHECK_EQ(properties.size(), 1); auto data_type = properties[0]["data_type"]; vertex_type_name = data_type.get(); } else { @@ -87,7 +87,7 @@ std::pair parse_property_type( } else if (type["label"] == "edge_label") { if (type.contains("propertyDefList")) { auto properties = type["propertyDefList"]; - CHECK(properties.size() == 1); + CHECK_EQ(properties.size(), 1); auto data_type = properties[0]["data_type"]; edge_type_name = data_type.get(); } else { From 8cc6860c238d0f9c30f959cf928eccde5ae89ef2 Mon Sep 17 00:00:00 2001 From: "xiaolei.zl" Date: Fri, 7 Jun 2024 15:24:21 +0800 Subject: [PATCH 15/52] add python test Committed-by: xiaolei.zl from Dev container --- .../core/context/java_pie_projected_context.h | 9 +++++++++ .../graph/impl/VertexDataManagerImpl.java | 4 ++-- .../graphscope/utils/AppBaseParser.java | 14 +++++++++---- .../graphscope/utils/AppContextGetter.java | 3 +++ coordinator/gscoordinator/utils.py | 4 ++++ .../tests/unittest/test_java_app.py | 20 +++++++++++++++++++ 6 files changed, 48 insertions(+), 6 deletions(-) diff --git a/analytical_engine/core/context/java_pie_projected_context.h b/analytical_engine/core/context/java_pie_projected_context.h index fbac9ccbb381..e5308533b862 100644 --- a/analytical_engine/core/context/java_pie_projected_context.h +++ b/analytical_engine/core/context/java_pie_projected_context.h @@ -144,6 +144,15 @@ class JavaPIEProjectedContext : public JavaContextBase { std::shared_ptr inner_ctx_impl_shared(inner_ctx_impl); return std::make_shared(id, frag_wrapper, inner_ctx_impl_shared); + } else if (data_type == "std::string") { + using inner_ctx_type = grape::VertexDataContext; + using inner_ctx_wrapper_type = + VertexDataContextWrapper; + auto inner_ctx_impl = + reinterpret_cast(this->inner_context_addr()); + std::shared_ptr inner_ctx_impl_shared(inner_ctx_impl); + return std::make_shared(id, frag_wrapper, + inner_ctx_impl_shared); } else { LOG(ERROR) << "Unrecognizable data type: " << data_type; } diff --git a/analytical_engine/java/grape-giraph/src/main/java/com/alibaba/graphscope/graph/impl/VertexDataManagerImpl.java b/analytical_engine/java/grape-giraph/src/main/java/com/alibaba/graphscope/graph/impl/VertexDataManagerImpl.java index 3390dbe3a356..e145b130cfd0 100644 --- a/analytical_engine/java/grape-giraph/src/main/java/com/alibaba/graphscope/graph/impl/VertexDataManagerImpl.java +++ b/analytical_engine/java/grape-giraph/src/main/java/com/alibaba/graphscope/graph/impl/VertexDataManagerImpl.java @@ -112,9 +112,9 @@ private void readVertexDataFromIFragment(FFIByteVectorOutputStream outputStream) // We need to form all vdata as a stream, so java writables can read from this stream. Iterable> iterable; if (conf.getGrapeVidClass().equals(Long.class)) { - iterable = (Iterable>) fragment.vertices().longIterable(); + iterable = (Iterable>) fragment.innerVertices().longIterable(); } else if (conf.getGrapeVidClass().equals(Integer.class)) { - iterable = (Iterable>) fragment.vertices().intIterable(); + iterable = (Iterable>) fragment.innerVertices().intIterable(); } else { throw new IllegalStateException( "No recognizable vid" + conf.getGrapeVidClass().getName()); diff --git a/analytical_engine/java/grape-jdk/src/main/java/com/alibaba/graphscope/utils/AppBaseParser.java b/analytical_engine/java/grape-jdk/src/main/java/com/alibaba/graphscope/utils/AppBaseParser.java index 0426b0d8fc5d..0e318335a68b 100644 --- a/analytical_engine/java/grape-jdk/src/main/java/com/alibaba/graphscope/utils/AppBaseParser.java +++ b/analytical_engine/java/grape-jdk/src/main/java/com/alibaba/graphscope/utils/AppBaseParser.java @@ -120,9 +120,15 @@ private static boolean tryGiraphClass(Class claz) { } else { return false; } + // The type params are: I, V, E, M, C, we need to return uint64_t for vid. String typeParamNames[] = new String[types.length]; for (int i = 0; i < types.length; ++i) { - typeParamNames[i] = writableToJava(types[i].getTypeName()); + if (i == 1) { + typeParamNames[1] = "java.lang.Long"; + } + else { + typeParamNames[i] = writableToJava(types[i].getTypeName()); + } } logger.info("TypeParams: " + String.join(",", typeParamNames)); logger.info("ContextType:vertex_data"); @@ -177,11 +183,11 @@ private static Type[] getTypeParams(Class clz, int size) { } private static String writableToJava(String typeName) { - if (typeName.contains("DoubleWritable")) { + if (typeName.contains("org.apache.hadoop.io.DoubleWritable")) { return "java.lang.Double"; - } else if (typeName.contains("IntWritable")) { + } else if (typeName.contains("org.apache.hadoop.io.IntWritable")) { return "java.lang.Integer"; - } else if (typeName.contains("LongWritable")) { + } else if (typeName.contains("org.apache.hadoop.io.LongWritable")) { return "java.lang.Long"; } else { return "com.alibaba.graphscope.ds.StringView"; diff --git a/analytical_engine/java/grape-jdk/src/main/java/com/alibaba/graphscope/utils/AppContextGetter.java b/analytical_engine/java/grape-jdk/src/main/java/com/alibaba/graphscope/utils/AppContextGetter.java index 28a34b528a41..3d8c0da684d3 100644 --- a/analytical_engine/java/grape-jdk/src/main/java/com/alibaba/graphscope/utils/AppContextGetter.java +++ b/analytical_engine/java/grape-jdk/src/main/java/com/alibaba/graphscope/utils/AppContextGetter.java @@ -201,6 +201,9 @@ public static String getVertexDataContextDataType(VertexDataContext ctxObj) { } else if (ret.getName() == "java.lang.Long") { return "int64_t"; } + else if (ret.getName() == "com.alibaba.graphscope.ds.StringView") { + return "std::string"; + } return null; } } diff --git a/coordinator/gscoordinator/utils.py b/coordinator/gscoordinator/utils.py index 78bbf68833dc..25c903dc1a08 100644 --- a/coordinator/gscoordinator/utils.py +++ b/coordinator/gscoordinator/utils.py @@ -695,6 +695,10 @@ def _type_param_consistent(graph_actucal_type_param, java_app_type_param): if graph_actucal_type_param in {"int32_t", "uint32_t"}: return True return False + if java_app_type_param == "com.alibaba.graphscope.ds.StringView": + if graph_actucal_type_param in {"std::string"}: + return True + return False return False diff --git a/python/graphscope/tests/unittest/test_java_app.py b/python/graphscope/tests/unittest/test_java_app.py index 548cdd101f05..79aadbb6f895 100644 --- a/python/graphscope/tests/unittest/test_java_app.py +++ b/python/graphscope/tests/unittest/test_java_app.py @@ -58,6 +58,9 @@ def demo_jar(): def projected_graph_sssp_class(): return "com.alibaba.graphscope.example.sssp.SSSP" +@pytest.fixture(scope="module") +def projected_graph_giraph_app_class(): + return "com.alibaba.graphscope.example.giraph.MessageAppWithUserWritable" @pytest.fixture(scope="module") def non_exist_java_class(): @@ -175,3 +178,20 @@ def test_giraph_app( giraph_sssp = load_app(algo="giraph:com.alibaba.graphscope.example.giraph.SSSP") giraph_sssp(g, sourceId=6) del g + +@pytest.mark.timeout(3600) +def test_giraph_app_user_writable( + demo_jar, + graphscope_session, + projected_graph_giraph_app_class, +): + graphscope_session.add_lib(demo_jar) + vformat = "giraph:com.alibaba.graphscope.example.giraph.format.P2PVertexMultipleLongInputFormat" + eformat = "giraph:com.alibaba.graphscope.example.giraph.format.P2PEdgeMultipleLongInputFormat" + g = projected_p2p_graph_loaded_by_giraph( + graphscope_session, demo_jar, vformat, eformat + ) + + user_app = load_app(algo="giraph:com.alibaba.graphscope.example.giraph.MessageAppWithUserWritable") + user_app(g) + del g From 99bb1aa3b6c67b629be48457ddf185ae5b5098e5 Mon Sep 17 00:00:00 2001 From: "xiaolei.zl" Date: Wed, 5 Jun 2024 15:02:46 +0800 Subject: [PATCH 16/52] fix dockerfile and precompile.py Committed-by: xiaolei.zl from Dev container --- k8s/dockerfiles/analytical.Dockerfile | 2 +- k8s/utils/precompile.py | 15 ++++++++++++++- 2 files changed, 15 insertions(+), 2 deletions(-) diff --git a/k8s/dockerfiles/analytical.Dockerfile b/k8s/dockerfiles/analytical.Dockerfile index 5a9cc1e794fe..9cc0614861aa 100644 --- a/k8s/dockerfiles/analytical.Dockerfile +++ b/k8s/dockerfiles/analytical.Dockerfile @@ -73,7 +73,7 @@ RUN cd /home/graphscope/GraphScope/ && \ strip ${INSTALL_DIR}/bin/grape_engine; \ strip ${INSTALL_DIR}/lib/*.so; \ sudo cp -rs ${INSTALL_DIR}/* ${GRAPHSCOPE_HOME}/; \ - python3 ./k8s/utils/precompile.py --graph --output_dir ${INSTALL_DIR}/builtin; \ + python3 ./k8s/utils/precompile.py --graph --output_dir ${INSTALL_DIR}/builtin --enable_java_sdk ON; \ strip ${INSTALL_DIR}/builtin/*/*.so || true; \ fi diff --git a/k8s/utils/precompile.py b/k8s/utils/precompile.py index 4def24e505e9..21818ff33631 100755 --- a/k8s/utils/precompile.py +++ b/k8s/utils/precompile.py @@ -41,6 +41,7 @@ def compute_sig(s): else "/opt/graphscope" ) + def cmake_and_make(cmake_commands): try: cmake_process = subprocess.run( @@ -92,7 +93,12 @@ def cmake_graph(graph_class): ) with open(cmakelists_file, mode="w") as f: f.write(content) - cmake_commands = ["cmake", ".", "-DNETWORKX=" + NETWORKX] + cmake_commands = [ + "cmake", + ".", + "-DNETWORKX=" + NETWORKX, + "-DENABLE_JAVA_SDK=" + ENABLE_JAVA_SDK, + ] if "ArrowFragment" in graph_class: cmake_commands.append("-DPROPERTY_GRAPH_FRAME=True") else: @@ -453,6 +459,12 @@ def parse_sys_args(): default=WORKSPACE, help="Output directory." ) + parser.add_argument( + "--enable_java_sdk", + type=str, + default="OFF", + help="Enable Java SDK support or not", + ) return parser.parse_args() WORKSPACE = Path(os.path.join("/", tempfile.gettempprefix(), "gs", "builtin")).resolve() @@ -462,6 +474,7 @@ def parse_sys_args(): print("Launching with args", args) WORKSPACE = args.output_dir WORKSPACE = Path(WORKSPACE).resolve() + ENABLE_JAVA_SDK = args.enable_java_sdk print("Will output libraries to", WORKSPACE) os.makedirs(WORKSPACE, exist_ok=True) if args.graph: From 8ab68c8fe575ee50b2cf41eaa38c165323cb4760 Mon Sep 17 00:00:00 2001 From: zhanglei1949 Date: Fri, 7 Jun 2024 16:37:41 +0800 Subject: [PATCH 17/52] able to run --- VERSION | 2 +- .../graphscope/utils/AppBaseParser.java | 6 +-- analytical_engine/java/pom.xml | 2 +- coordinator/gscoordinator/op_executor.py | 7 +-- k8s/dockerfiles/analytical.Dockerfile | 1 + k8s/dockerfiles/coordinator.Dockerfile | 8 +++- python/test_gae_java.py | 44 +++++++++++++++++++ 7 files changed, 60 insertions(+), 10 deletions(-) create mode 100644 python/test_gae_java.py diff --git a/VERSION b/VERSION index 697f087f376a..1b58cc10180e 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -0.28.0 +0.27.0 diff --git a/analytical_engine/java/grape-jdk/src/main/java/com/alibaba/graphscope/utils/AppBaseParser.java b/analytical_engine/java/grape-jdk/src/main/java/com/alibaba/graphscope/utils/AppBaseParser.java index 0e318335a68b..330c177d13b9 100644 --- a/analytical_engine/java/grape-jdk/src/main/java/com/alibaba/graphscope/utils/AppBaseParser.java +++ b/analytical_engine/java/grape-jdk/src/main/java/com/alibaba/graphscope/utils/AppBaseParser.java @@ -183,11 +183,11 @@ private static Type[] getTypeParams(Class clz, int size) { } private static String writableToJava(String typeName) { - if (typeName.contains("org.apache.hadoop.io.DoubleWritable")) { + if (typeName.equals("org.apache.hadoop.io.DoubleWritable")) { return "java.lang.Double"; - } else if (typeName.contains("org.apache.hadoop.io.IntWritable")) { + } else if (typeName.equals("org.apache.hadoop.io.IntWritable")) { return "java.lang.Integer"; - } else if (typeName.contains("org.apache.hadoop.io.LongWritable")) { + } else if (typeName.equals("org.apache.hadoop.io.LongWritable")) { return "java.lang.Long"; } else { return "com.alibaba.graphscope.ds.StringView"; diff --git a/analytical_engine/java/pom.xml b/analytical_engine/java/pom.xml index 59a9b286f4d0..d6cb9d97b879 100644 --- a/analytical_engine/java/pom.xml +++ b/analytical_engine/java/pom.xml @@ -64,7 +64,7 @@ - 0.28.0 + 0.27.0 0.1.2 0.19 3.3.11 diff --git a/coordinator/gscoordinator/op_executor.py b/coordinator/gscoordinator/op_executor.py index 8e9f33f27492..90eb181f3f7d 100644 --- a/coordinator/gscoordinator/op_executor.py +++ b/coordinator/gscoordinator/op_executor.py @@ -416,9 +416,10 @@ def _compile_lib_and_distribute(self, compile_func, lib_name, op, *args, **kwarg **kwargs, ) # for java app compilation, we need to distribute the jar and ffi generated - if app_type == "java_pie": - self._launcher.distribute_file(java_jar_path) - self._launcher.distribute_file(java_ffi_path) + # if app_type == "java_pie": + # self._launcher.distribute_file(java_jar_path) + # currently no ffi path is generated. + # self._launcher.distribute_file(java_ffi_path) self._launcher.distribute_file(lib_path) return lib_path diff --git a/k8s/dockerfiles/analytical.Dockerfile b/k8s/dockerfiles/analytical.Dockerfile index 9cc0614861aa..0f5e494cc1bf 100644 --- a/k8s/dockerfiles/analytical.Dockerfile +++ b/k8s/dockerfiles/analytical.Dockerfile @@ -90,6 +90,7 @@ ENV GRAPHSCOPE_HOME=/opt/graphscope ENV PATH=$PATH:$GRAPHSCOPE_HOME/bin LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$GRAPHSCOPE_HOME/lib USER root +RUN apt-get update && apt-get install -y default-jdk COPY ./k8s/utils/kube_ssh /usr/local/bin/kube_ssh COPY --from=builder-java /home/graphscope/install /opt/graphscope/ RUN mkdir -p /tmp/gs && (mv /opt/graphscope/builtin /tmp/gs/builtin || true) && chown -R graphscope:graphscope /tmp/gs diff --git a/k8s/dockerfiles/coordinator.Dockerfile b/k8s/dockerfiles/coordinator.Dockerfile index a2719ce6e68f..8a25efb1b8e1 100644 --- a/k8s/dockerfiles/coordinator.Dockerfile +++ b/k8s/dockerfiles/coordinator.Dockerfile @@ -24,7 +24,11 @@ RUN cd /home/graphscope/GraphScope/ && \ cp wheelhouse/*.whl /home/graphscope/install/ && \ cd ../coordinator && \ python3 setup.py bdist_wheel && \ - cp dist/*.whl /home/graphscope/install/; \ + cp dist/*.whl /home/graphscope/install/ && \ + cd ../analytical_engine/java/ && \ + mvn clean package -DskipTests -Dmaven.antrun.skip=true && \ + cp grape-runtime/target/grape-runtime-*-shaded.jar /home/graphscope/install/lib/ && \ + cp grape-giraph/target/grape-giraph-*-shaded.jar /home/graphscope/install/lib/; \ fi ############### RUNTIME: Coordinator ####################### @@ -34,7 +38,7 @@ FROM ubuntu:22.04 AS coordinator ENV DEBIAN_FRONTEND=noninteractive RUN apt-get update -y && \ - apt-get install -y sudo python3-pip openmpi-bin curl tzdata && \ + apt-get install -y sudo python3-pip openmpi-bin curl tzdata default-jdk && \ apt-get clean -y && \ rm -rf /var/lib/apt/lists/* diff --git a/python/test_gae_java.py b/python/test_gae_java.py new file mode 100644 index 000000000000..e0b54f6bb941 --- /dev/null +++ b/python/test_gae_java.py @@ -0,0 +1,44 @@ +import graphscope +import os +from graphscope.framework.app import load_app + +graphscope.set_option(show_log=True) +graphscope.set_option(log_level="DEBUG") +k8s_volumes = { + "data": { + "type": "hostPath", + "field": { + "path": "/data", + "type": "Directory" + }, + "mounts": { + "mountPath": "/data" + } + } +} +sess = graphscope.session(cluster_type="k8s", enabled_engines="gae-java", k8s_volumes=k8s_volumes, k8s_image_tag="0.27.0") +sess.add_lib('/mnt/zhanglei/grape-demo-0.27.0-shaded.jar') +# vformat = "giraph:com.alibaba.graphscope.example.giraph.format.P2PVertexInputFormat" +# eformat = "giraph:com.alibaba.graphscope.example.giraph.format.P2PEdgeInputFormat" +# graph = sess.load_from( +# vertices="/data/gstest/p2p-31.v", +# vformat=vformat, +# edges="/data/gstest/p2p-31.e", +# eformat=eformat, +# ) +# proj_g = graph._project_to_simple(v_prop="vdata", e_prop="data") +# giraph_sssp = load_app(algo="giraph:com.alibaba.graphscope.example.giraph.SSSP") +# res = giraph_sssp(proj_g, sourceId=6) + +vformat2 = "giraph:com.alibaba.graphscope.example.giraph.format.P2PVertexMultipleLongInputFormat" +eformat2 = "giraph:com.alibaba.graphscope.example.giraph.format.P2PEdgeMultipleLongInputFormat" +graph2 = sess.load_from( + vertices="/data/gstest/p2p-31.v", + vformat=vformat2, + edges="/data/gstest/p2p-31.e", + eformat=eformat2, +) +proj_g2 = graph2._project_to_simple(v_prop="vdata", e_prop="data") + +user_app = load_app(algo="giraph:com.alibaba.graphscope.example.giraph.MessageAppWithUserWritable") +res = user_app(proj_g2) \ No newline at end of file From 0b81735742defb8fda554fc579c4c5b68e4da8b9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E9=9C=84=E7=A3=8A?= Date: Tue, 11 Jun 2024 14:14:00 +0800 Subject: [PATCH 18/52] fix --- .../graphscope/utils/AppBaseParser.java | 20 +++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/analytical_engine/java/grape-jdk/src/main/java/com/alibaba/graphscope/utils/AppBaseParser.java b/analytical_engine/java/grape-jdk/src/main/java/com/alibaba/graphscope/utils/AppBaseParser.java index 330c177d13b9..d91592b05103 100644 --- a/analytical_engine/java/grape-jdk/src/main/java/com/alibaba/graphscope/utils/AppBaseParser.java +++ b/analytical_engine/java/grape-jdk/src/main/java/com/alibaba/graphscope/utils/AppBaseParser.java @@ -122,14 +122,18 @@ private static boolean tryGiraphClass(Class claz) { } // The type params are: I, V, E, M, C, we need to return uint64_t for vid. String typeParamNames[] = new String[types.length]; - for (int i = 0; i < types.length; ++i) { - if (i == 1) { - typeParamNames[1] = "java.lang.Long"; - } - else { - typeParamNames[i] = writableToJava(types[i].getTypeName()); - } - } + typeParamNames[0] = writableToJava(types[0].getTypeName()); + typeParamNames[1] = "java.lang.Long"; + typeParamNames[2] = writableToJava(types[1].getTypeName()); + typeParamNames[3] = writableToJava(types[2].getTypeName()); +// for (int i = 0; i < types.length; ++i) { +// if (i == 1) { +// typeParamNames[1] = "java.lang.Long"; +// } +// else { +// typeParamNames[i] = writableToJava(types[i].getTypeName()); +// } +// } logger.info("TypeParams: " + String.join(",", typeParamNames)); logger.info("ContextType:vertex_data"); logger.info("VertexData: " + typeParamNames[1]); From 1ee9c3bb0ad403f201196e9d812f5b350b153fb3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E9=9C=84=E7=A3=8A?= Date: Tue, 11 Jun 2024 14:29:04 +0800 Subject: [PATCH 19/52] we can't assume edata/vdata must be string --- .../graphscope/example/giraph/MessageAppWithUserWritable.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/analytical_engine/java/grape-demo/src/main/java/com/alibaba/graphscope/example/giraph/MessageAppWithUserWritable.java b/analytical_engine/java/grape-demo/src/main/java/com/alibaba/graphscope/example/giraph/MessageAppWithUserWritable.java index 7597867b624d..fb55672f924a 100644 --- a/analytical_engine/java/grape-demo/src/main/java/com/alibaba/graphscope/example/giraph/MessageAppWithUserWritable.java +++ b/analytical_engine/java/grape-demo/src/main/java/com/alibaba/graphscope/example/giraph/MessageAppWithUserWritable.java @@ -33,7 +33,7 @@ */ public class MessageAppWithUserWritable extends BasicComputation< - LongWritable, MultipleLongWritable, MultipleLongWritable, MultipleLongWritable> { + LongWritable, MultipleLongWritable, LongWritable, MultipleLongWritable> { public static LongConfOption MAX_SUPER_STEP; private static Logger logger = LoggerFactory.getLogger(MessageAppWithUserWritable.class); @@ -58,7 +58,7 @@ public class MessageAppWithUserWritable */ @Override public void compute( - Vertex vertex, + Vertex vertex, Iterable messages) throws IOException { if (getSuperstep() == 0) { From eb0454cb6dcb62ee8f9dd119b3b0c0deb939f707 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E9=9C=84=E7=A3=8A?= Date: Tue, 11 Jun 2024 14:34:54 +0800 Subject: [PATCH 20/52] fix edge input format --- .../giraph/format/P2PEdgeMultipleLongInputFormat.java | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/analytical_engine/java/grape-demo/src/main/java/com/alibaba/graphscope/example/giraph/format/P2PEdgeMultipleLongInputFormat.java b/analytical_engine/java/grape-demo/src/main/java/com/alibaba/graphscope/example/giraph/format/P2PEdgeMultipleLongInputFormat.java index a6052f692242..6850c98c4f8d 100644 --- a/analytical_engine/java/grape-demo/src/main/java/com/alibaba/graphscope/example/giraph/format/P2PEdgeMultipleLongInputFormat.java +++ b/analytical_engine/java/grape-demo/src/main/java/com/alibaba/graphscope/example/giraph/format/P2PEdgeMultipleLongInputFormat.java @@ -28,7 +28,7 @@ import java.io.IOException; public class P2PEdgeMultipleLongInputFormat - extends TextEdgeInputFormat { + extends TextEdgeInputFormat { /** * Create an edge reader for a given split. The framework will call {@link @@ -40,7 +40,7 @@ public class P2PEdgeMultipleLongInputFormat * @throws IOException */ @Override - public EdgeReader createEdgeReader( + public EdgeReader createEdgeReader( InputSplit split, TaskAttemptContext context) throws IOException { return new P2PEdgeReader(); } @@ -54,7 +54,7 @@ public class P2PEdgeReader extends TextEdgeReaderFromEachLineProcessed private LongWritable srcId; private LongWritable dstId; - private MultipleLongWritable edgeValue; + private LongWritable edgeValue; /** * Preprocess the line so other methods can easily read necessary information for creating @@ -74,7 +74,7 @@ protected String[] preprocessLine(Text line) throws IOException { // logger.debug(String.join(",", tokens)); srcId = new LongWritable(Long.parseLong(tokens[0])); dstId = new LongWritable(Long.parseLong(tokens[1])); - edgeValue = new MultipleLongWritable(Long.parseLong(tokens[2])); + edgeValue = new LongWritable(Long.parseLong(tokens[2])); return tokens; } @@ -110,7 +110,7 @@ protected LongWritable getSourceVertexId(String[] line) throws IOException { * @throws IOException exception that can be thrown while reading */ @Override - protected MultipleLongWritable getValue(String[] line) throws IOException { + protected LongWritable getValue(String[] line) throws IOException { return edgeValue; } } From 456650861f80903e3922bdd3b84a5c522697e91a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E9=9C=84=E7=A3=8A?= Date: Tue, 11 Jun 2024 15:38:07 +0800 Subject: [PATCH 21/52] minor fix --- .../giraph/format/P2PVertexMultipleLongInputFormat.java | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/analytical_engine/java/grape-demo/src/main/java/com/alibaba/graphscope/example/giraph/format/P2PVertexMultipleLongInputFormat.java b/analytical_engine/java/grape-demo/src/main/java/com/alibaba/graphscope/example/giraph/format/P2PVertexMultipleLongInputFormat.java index d6bb19dac955..10c381b98df5 100644 --- a/analytical_engine/java/grape-demo/src/main/java/com/alibaba/graphscope/example/giraph/format/P2PVertexMultipleLongInputFormat.java +++ b/analytical_engine/java/grape-demo/src/main/java/com/alibaba/graphscope/example/giraph/format/P2PVertexMultipleLongInputFormat.java @@ -30,7 +30,7 @@ import java.util.List; public class P2PVertexMultipleLongInputFormat - extends TextVertexInputFormat { + extends TextVertexInputFormat { /** * The factory method which produces the {@link TextVertexReader} used by this input format. @@ -40,7 +40,7 @@ public class P2PVertexMultipleLongInputFormat * @return the text vertex reader to be used */ @Override - public TextVertexInputFormat + public TextVertexInputFormat .TextVertexReader createVertexReader(InputSplit split, TaskAttemptContext context) throws IOException { return new P2PVertexReader(); @@ -78,7 +78,7 @@ protected MultipleLongWritable getValue(String[] tokens) throws IOException { } @Override - protected Iterable> getEdges(String[] tokens) + protected Iterable> getEdges(String[] tokens) throws IOException { List> edges = Lists.newArrayListWithCapacity(0); From 9e2614dac16304e862377a5222a537caa3ab64ef Mon Sep 17 00:00:00 2001 From: zhanglei1949 Date: Tue, 11 Jun 2024 15:46:33 +0800 Subject: [PATCH 22/52] fixed --- .../example/giraph/format/P2PVertexMultipleLongInputFormat.java | 2 +- .../main/java/com/alibaba/graphscope/utils/AppBaseParser.java | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/analytical_engine/java/grape-demo/src/main/java/com/alibaba/graphscope/example/giraph/format/P2PVertexMultipleLongInputFormat.java b/analytical_engine/java/grape-demo/src/main/java/com/alibaba/graphscope/example/giraph/format/P2PVertexMultipleLongInputFormat.java index 10c381b98df5..79c3b4785f5a 100644 --- a/analytical_engine/java/grape-demo/src/main/java/com/alibaba/graphscope/example/giraph/format/P2PVertexMultipleLongInputFormat.java +++ b/analytical_engine/java/grape-demo/src/main/java/com/alibaba/graphscope/example/giraph/format/P2PVertexMultipleLongInputFormat.java @@ -80,7 +80,7 @@ protected MultipleLongWritable getValue(String[] tokens) throws IOException { @Override protected Iterable> getEdges(String[] tokens) throws IOException { - List> edges = + List> edges = Lists.newArrayListWithCapacity(0); return edges; } diff --git a/analytical_engine/java/grape-jdk/src/main/java/com/alibaba/graphscope/utils/AppBaseParser.java b/analytical_engine/java/grape-jdk/src/main/java/com/alibaba/graphscope/utils/AppBaseParser.java index d91592b05103..7f2a51a77891 100644 --- a/analytical_engine/java/grape-jdk/src/main/java/com/alibaba/graphscope/utils/AppBaseParser.java +++ b/analytical_engine/java/grape-jdk/src/main/java/com/alibaba/graphscope/utils/AppBaseParser.java @@ -136,7 +136,7 @@ private static boolean tryGiraphClass(Class claz) { // } logger.info("TypeParams: " + String.join(",", typeParamNames)); logger.info("ContextType:vertex_data"); - logger.info("VertexData: " + typeParamNames[1]); + logger.info("VertexData: " + typeParamNames[2]); return true; } From 3c8a93e5f8f230959f64811d9a881c52c2e7b971 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E9=9C=84=E7=A3=8A?= Date: Tue, 11 Jun 2024 17:02:39 +0800 Subject: [PATCH 23/52] add template specification --- .../annotation/AnnotationInvoker.java | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/analytical_engine/java/grape-runtime/src/main/java/com/alibaba/graphscope/annotation/AnnotationInvoker.java b/analytical_engine/java/grape-runtime/src/main/java/com/alibaba/graphscope/annotation/AnnotationInvoker.java index e1a6a46fc7da..5b9bddf94509 100644 --- a/analytical_engine/java/grape-runtime/src/main/java/com/alibaba/graphscope/annotation/AnnotationInvoker.java +++ b/analytical_engine/java/grape-runtime/src/main/java/com/alibaba/graphscope/annotation/AnnotationInvoker.java @@ -1185,6 +1185,25 @@ + ">", STD_STRING }), + @CXXTemplate( + cxx = { + CPP_ARROW_PROJECTED_FRAGMENT + + "", + "std::string" + }, + java = { + JAVA_ARROW_PROJECTED_FRAGMENT + + "<" + + LONG + + "," + + LONG + + "," + + STRING_VIEW + + "," + + LONG + + ">", + STD_STRING + }), }), @FFIGen( type = "com.alibaba.graphscope.parallel.DefaultMessageManager", From 4e430df72b7c2fe5d044300dfe56cd6be066793a Mon Sep 17 00:00:00 2001 From: zhanglei1949 Date: Tue, 11 Jun 2024 17:23:34 +0800 Subject: [PATCH 24/52] add more template specialization --- .../annotation/AnnotationInvoker.java | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/analytical_engine/java/grape-runtime/src/main/java/com/alibaba/graphscope/annotation/AnnotationInvoker.java b/analytical_engine/java/grape-runtime/src/main/java/com/alibaba/graphscope/annotation/AnnotationInvoker.java index 5b9bddf94509..7cbcb1b8f61f 100644 --- a/analytical_engine/java/grape-runtime/src/main/java/com/alibaba/graphscope/annotation/AnnotationInvoker.java +++ b/analytical_engine/java/grape-runtime/src/main/java/com/alibaba/graphscope/annotation/AnnotationInvoker.java @@ -1204,6 +1204,25 @@ + ">", STD_STRING }), + @CXXTemplate( + cxx = { + CPP_ARROW_PROJECTED_FRAGMENT + + "", + "std::string" + }, + java = { + JAVA_ARROW_PROJECTED_FRAGMENT + + "<" + + LONG + + "," + + LONG + + "," + + STRING_VIEW + + "," + + DOUBLE + + ">", + STD_STRING + }), }), @FFIGen( type = "com.alibaba.graphscope.parallel.DefaultMessageManager", From 5de153142baeddad59b4fed24f53441b0e455df0 Mon Sep 17 00:00:00 2001 From: "xiaolei.zl" Date: Mon, 17 Jun 2024 20:08:42 +0800 Subject: [PATCH 25/52] for user writable, output with toString method Committed-by: xiaolei.zl from Dev container --- .../context/GiraphComputationAdaptorContext.java | 9 ++++++--- python/graphscope/tests/unittest/test_java_app.py | 14 +++++++++++--- 2 files changed, 17 insertions(+), 6 deletions(-) diff --git a/analytical_engine/java/grape-giraph/src/main/java/com/alibaba/graphscope/context/GiraphComputationAdaptorContext.java b/analytical_engine/java/grape-giraph/src/main/java/com/alibaba/graphscope/context/GiraphComputationAdaptorContext.java index 43931d6e77fb..b28345f3f90a 100644 --- a/analytical_engine/java/grape-giraph/src/main/java/com/alibaba/graphscope/context/GiraphComputationAdaptorContext.java +++ b/analytical_engine/java/grape-giraph/src/main/java/com/alibaba/graphscope/context/GiraphComputationAdaptorContext.java @@ -171,9 +171,11 @@ public void writeBackVertexData() { { long previous = 0; try { - if (conf.getGrapeVdataClass().equals(String.class)) { + if (conf.getGrapeVdataClass().equals(String.class) || conf.getGrapeVdataClass().equals(StringView.class)) { for (long lid = 0; lid < innerVerticesNum; ++lid) { - vertexDataManager.getVertexData(lid).write(outputStream); + // vertexDataManager.getVertexData(lid).write(outputStream); + // Write the output of toString(). + outputStream.writeBytes(vertexDataManager.getVertexData(lid).toString()); long cur = outputStream.bytesWriten(); offsets[(int) lid] = cur - previous; maxOffset = Math.max(offsets[(int) lid], maxOffset); @@ -264,7 +266,8 @@ public void writeBackVertexData() { // This string is not readable. StdString value = (StdString) vertexArray.get(grapeVertex); // TODO: can be optimized without creating a java string - value.fromJavaString(new String(bytes)); + String javaStr = new String(bytes); + value.fromJavaString(javaStr); } } else { throw new IllegalStateException( diff --git a/python/graphscope/tests/unittest/test_java_app.py b/python/graphscope/tests/unittest/test_java_app.py index 79aadbb6f895..20391fade8d5 100644 --- a/python/graphscope/tests/unittest/test_java_app.py +++ b/python/graphscope/tests/unittest/test_java_app.py @@ -163,7 +163,7 @@ def projected_p2p_graph_loaded_by_giraph( reason="Java SDK is disabled, skip this test.", ) @pytest.mark.timeout(3600) -def test_giraph_app( +def test_giraph_app_sssp( demo_jar, graphscope_session, projected_graph_sssp_class, @@ -176,9 +176,15 @@ def test_giraph_app( ) giraph_sssp = load_app(algo="giraph:com.alibaba.graphscope.example.giraph.SSSP") - giraph_sssp(g, sourceId=6) + ctx = giraph_sssp(g, sourceId=6) + dataframe = ctx.to_dataframe({"node": "v.id", "r": "r"}) + print (dataframe) del g +@pytest.mark.skipif( + os.environ.get("RUN_JAVA_TESTS") != "ON", + reason="Java SDK is disabled, skip this test.", +) @pytest.mark.timeout(3600) def test_giraph_app_user_writable( demo_jar, @@ -193,5 +199,7 @@ def test_giraph_app_user_writable( ) user_app = load_app(algo="giraph:com.alibaba.graphscope.example.giraph.MessageAppWithUserWritable") - user_app(g) + ctx = user_app(g) + dataframe = ctx.to_dataframe({"node": "v.id", "r": "r"}) + print (dataframe) del g From 8d51d1e552644162150f550ae6ec78dec6b2bc50 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E9=9C=84=E7=A3=8A?= Date: Fri, 21 Jun 2024 16:09:30 +0800 Subject: [PATCH 26/52] todo: impl hdfs reader --- .../alibaba/graphscope/loader/LoaderBase.java | 31 ++ .../graphscope/loader/LoaderFactory.java | 18 + .../loader/impl/AbstractLoader.java | 494 ++++++++++++++++++ .../graphscope/loader/impl/FileLoader.java | 443 +--------------- .../graphscope/loader/impl/HDFSLoader.java | 50 ++ 5 files changed, 620 insertions(+), 416 deletions(-) create mode 100644 analytical_engine/java/grape-giraph/src/main/java/com/alibaba/graphscope/loader/LoaderFactory.java create mode 100644 analytical_engine/java/grape-giraph/src/main/java/com/alibaba/graphscope/loader/impl/AbstractLoader.java create mode 100644 analytical_engine/java/grape-giraph/src/main/java/com/alibaba/graphscope/loader/impl/HDFSLoader.java diff --git a/analytical_engine/java/grape-giraph/src/main/java/com/alibaba/graphscope/loader/LoaderBase.java b/analytical_engine/java/grape-giraph/src/main/java/com/alibaba/graphscope/loader/LoaderBase.java index 07a01825706a..609713f8a213 100644 --- a/analytical_engine/java/grape-giraph/src/main/java/com/alibaba/graphscope/loader/LoaderBase.java +++ b/analytical_engine/java/grape-giraph/src/main/java/com/alibaba/graphscope/loader/LoaderBase.java @@ -15,16 +15,47 @@ */ package com.alibaba.graphscope.loader; +import com.alibaba.graphscope.stdcxx.FFIByteVecVector; +import com.alibaba.graphscope.stdcxx.FFIIntVecVector; + +import java.util.concurrent.ExecutionException; + /** * Base interface defines behavior for a loader. */ public interface LoaderBase { + void init( + int workerId, + int workerNum, + int threadNum, + FFIByteVecVector vidBuffers, + FFIByteVecVector vertexDataBuffers, + FFIByteVecVector edgeSrcIdBuffers, + FFIByteVecVector edgeDstIdBuffers, + FFIByteVecVector edgeDataBuffers, + FFIIntVecVector vidOffsets, + FFIIntVecVector vertexDataOffsets, + FFIIntVecVector edgeSrcIdOffsets, + FFIIntVecVector edgeDstIdOffsets, + FFIIntVecVector edgeDataOffsets); + + /** + * @param inputPath The path of input file. + * @param vformatClass The class name of vertex format. + * @return Return an integer contains type params info. + */ + int loadVertices(String inputPath, String vformatClass) + throws ExecutionException, InterruptedException, ClassNotFoundException; + + void loadEdges(String inputPath, String eformatClass)throws ExecutionException, InterruptedException, ClassNotFoundException; + LoaderBase.TYPE loaderType(); int concurrency(); enum TYPE { FileLoader, + HDFSLoader, } } diff --git a/analytical_engine/java/grape-giraph/src/main/java/com/alibaba/graphscope/loader/LoaderFactory.java b/analytical_engine/java/grape-giraph/src/main/java/com/alibaba/graphscope/loader/LoaderFactory.java new file mode 100644 index 000000000000..b5b47c4bf357 --- /dev/null +++ b/analytical_engine/java/grape-giraph/src/main/java/com/alibaba/graphscope/loader/LoaderFactory.java @@ -0,0 +1,18 @@ +package com.alibaba.graphscope.loader; + +import com.alibaba.graphscope.loader.impl.FileLoader; +import com.alibaba.graphscope.loader.impl.HDFSLoader; + +import java.net.URLClassLoader; + +public class LoaderFactory { + public static LoaderBase createLoader(LoaderBase.TYPE type, int id, URLClassLoader classLoader) { + if (type == LoaderBase.TYPE.FileLoader) { + return new FileLoader(id, classLoader); + } + if (type == LoaderBase.TYPE.HDFSLoader){ + return new HDFSLoader(id, classLoader); + } + throw new IllegalArgumentException("Unsupported loader type: " + type); + } +} diff --git a/analytical_engine/java/grape-giraph/src/main/java/com/alibaba/graphscope/loader/impl/AbstractLoader.java b/analytical_engine/java/grape-giraph/src/main/java/com/alibaba/graphscope/loader/impl/AbstractLoader.java new file mode 100644 index 000000000000..55181b784ff9 --- /dev/null +++ b/analytical_engine/java/grape-giraph/src/main/java/com/alibaba/graphscope/loader/impl/AbstractLoader.java @@ -0,0 +1,494 @@ +package com.alibaba.graphscope.loader.impl; + +import com.alibaba.graphscope.graph.impl.VertexImpl; +import com.alibaba.graphscope.loader.GraphDataBufferManager; +import com.alibaba.graphscope.loader.LoaderBase; +import com.alibaba.graphscope.stdcxx.FFIByteVecVector; +import com.alibaba.graphscope.stdcxx.FFIIntVecVector; +import com.google.common.base.Preconditions; +import org.apache.giraph.conf.GiraphConfiguration; +import org.apache.giraph.conf.ImmutableClassesGiraphConfiguration; +import org.apache.giraph.edge.Edge; +import org.apache.giraph.graph.Vertex; +import org.apache.giraph.io.EdgeInputFormat; +import org.apache.giraph.io.EdgeReader; +import org.apache.giraph.io.VertexInputFormat; +import org.apache.giraph.io.VertexReader; +import org.apache.giraph.io.formats.TextEdgeInputFormat; +import org.apache.giraph.io.formats.TextVertexInputFormat; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.io.Writable; +import org.apache.hadoop.io.WritableComparable; +import org.apache.hadoop.mapreduce.InputSplit; +import org.apache.hadoop.mapreduce.TaskAttemptContext; +import org.apache.hadoop.mapreduce.TaskAttemptID; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.BufferedReader; +import java.io.FileNotFoundException; +import java.io.FileReader; +import java.io.IOException; +import java.lang.reflect.Field; +import java.lang.reflect.Method; +import java.net.URLClassLoader; +import java.util.concurrent.*; +import java.util.concurrent.atomic.AtomicInteger; + +import static com.alibaba.graphscope.loader.LoaderUtils.generateTypeInt; +import static com.alibaba.graphscope.utils.FileUtils.getNumLinesOfFile; +import static org.apache.giraph.utils.ReflectionUtils.getTypeArguments; + +public abstract class AbstractLoader implements LoaderBase { + private static Logger logger = LoggerFactory.getLogger(AbstractLoader.class); + + protected int loaderId; + protected int threadNum; + protected int workerId; + protected int workerNum; + + protected Class vertexInputFormatClz; + protected Class edgeInputFormatClz; + + protected static AtomicInteger LOADER_ID = new AtomicInteger(0); + protected static AtomicInteger V_CALLABLE_ID = new AtomicInteger(0); + protected static AtomicInteger E_CALLABLE_ID = new AtomicInteger(0); + + + protected VertexInputFormat vertexInputFormat; + protected EdgeInputFormat edgeInputFormat; + + protected ExecutorService executor; + + protected Method createVertexReaderMethod; + protected Method createEdgeReaderMethod; + + protected GraphDataBufferManager proxy; + protected Field vertexIdField; + protected Field vertexValueField; + protected Field vertexEdgesField; + protected Field VIFBufferedReaderField; + protected Field EIFBufferedReaderField; + protected InputSplit inputSplit = + new InputSplit() { + @Override + public long getLength() throws IOException, InterruptedException { + return 0; + } + + @Override + public String[] getLocations() throws IOException, InterruptedException { + return new String[0]; + } + }; + + protected Configuration configuration = new Configuration(); + protected GiraphConfiguration giraphConfiguration = new GiraphConfiguration(configuration); + protected TaskAttemptID taskAttemptID = new TaskAttemptID(); + protected TaskAttemptContext taskAttemptContext = + new TaskAttemptContext(configuration, taskAttemptID); + + protected Class giraphOidClass; + protected Class giraphVDataClass; + protected Class giraphEDataClass; + protected URLClassLoader classLoader; + + public AbstractLoader(int id, URLClassLoader classLoader) { + this.classLoader = classLoader; + logger.info("FileLoader using classLoader {} to load vif and eif", classLoader); + this.giraphConfiguration.setClassLoader(this.classLoader); + loaderId = id; + try { + vertexIdField = VertexImpl.class.getDeclaredField("initializeOid"); + vertexIdField.setAccessible(true); + vertexValueField = VertexImpl.class.getDeclaredField("initializeVdata"); + vertexValueField.setAccessible(true); + vertexEdgesField = VertexImpl.class.getDeclaredField("initializeEdges"); + vertexEdgesField.setAccessible(true); + VIFBufferedReaderField = TextVertexInputFormat.class.getDeclaredField("fileReader"); + VIFBufferedReaderField.setAccessible(true); + EIFBufferedReaderField = TextEdgeInputFormat.class.getDeclaredField("fileReader"); + EIFBufferedReaderField.setAccessible(true); + } catch (NoSuchFieldException e) { + throw new IllegalStateException(e.getMessage()); + } + } + + @Override + public int concurrency() { + return threadNum; + } + + @Override + public void init( + int workerId, + int workerNum, + int threadNum, + FFIByteVecVector vidBuffers, + FFIByteVecVector vertexDataBuffers, + FFIByteVecVector edgeSrcIdBuffers, + FFIByteVecVector edgeDstIdBuffers, + FFIByteVecVector edgeDataBuffers, + FFIIntVecVector vidOffsets, + FFIIntVecVector vertexDataOffsets, + FFIIntVecVector edgeSrcIdOffsets, + FFIIntVecVector edgeDstIdOffsets, + FFIIntVecVector edgeDataOffsets) { + this.workerId = workerId; + this.workerNum = workerNum; + logger.info("worker id: {}, worker num: {}", workerId, workerNum); + + this.threadNum = threadNum; + this.executor = Executors.newFixedThreadPool(threadNum); + // Create a proxy form adding vertex and adding edges + proxy = + new GraphDataBufferManagerImpl( + workerId, + threadNum, + vidBuffers, + vertexDataBuffers, + edgeSrcIdBuffers, + edgeDstIdBuffers, + edgeDataBuffers, + vidOffsets, + vertexDataOffsets, + edgeSrcIdOffsets, + edgeDstIdOffsets, + edgeDataOffsets); + } + + /** + * @param inputPath + * @return Return an integer contains type params info. + */ + @Override + public int loadVertices(String inputPath, String vformatClass) + throws ExecutionException, InterruptedException, ClassNotFoundException { + logger.info("vertex input path {}, vformat class{}", inputPath, vformatClass.toString()); + giraphConfiguration.setVertexInputFormatClass( + (Class) this.classLoader.loadClass(vformatClass)); + ImmutableClassesGiraphConfiguration conf = + new ImmutableClassesGiraphConfiguration(giraphConfiguration); + conf.setClassLoader(this.classLoader); + try { + vertexInputFormatClz = conf.getVertexInputFormatClass(); + + inferGiraphTypesFromJSON(vertexInputFormatClz); + + vertexInputFormat = vertexInputFormatClz.newInstance(); + vertexInputFormat.setConf(conf); + createVertexReaderMethod = + vertexInputFormatClz.getDeclaredMethod( + "createVertexReader", InputSplit.class, TaskAttemptContext.class); + + } catch (Exception e) { + e.printStackTrace(); + logger.error(e.getMessage()); + } + loadVerticesImpl(inputPath); + + // Finish output stream, such that offset == size; + proxy.finishAdding(); + return generateTypeInt(giraphOidClass, giraphVDataClass, giraphEDataClass); + } + + @Override + public void loadEdges(String inputPath, String eformatClass) + throws ExecutionException, InterruptedException, ClassNotFoundException { + logger.debug("edge input path {}", inputPath); + giraphConfiguration.setEdgeInputFormatClass( + (Class) this.classLoader.loadClass(eformatClass)); + + ImmutableClassesGiraphConfiguration conf = + new ImmutableClassesGiraphConfiguration(giraphConfiguration); + try { + edgeInputFormatClz = conf.getEdgeInputFormatClass(); + + edgeInputFormat = edgeInputFormatClz.newInstance(); + edgeInputFormat.setConf(conf); + createEdgeReaderMethod = + edgeInputFormatClz.getDeclaredMethod( + "createEdgeReader", InputSplit.class, TaskAttemptContext.class); + + } catch (Exception e) { + e.printStackTrace(); + logger.error(e.getMessage()); + } + loadEdgesImpl(inputPath); + + // Finish output stream, such that offset == size; + proxy.finishAdding(); + } + + private void inferGiraphTypesFromJSON(Class child) { + Class[] classList = getTypeArguments(VertexInputFormat.class, child); + Preconditions.checkArgument(classList.length == 3); + giraphOidClass = (Class) classList[0]; + giraphVDataClass = (Class) classList[1]; + giraphEDataClass = (Class) classList[2]; + logger.info( + "infer from json params: oid {}, vdata {}, edata {}", + giraphOidClass.getName(), + giraphVDataClass.getName(), + giraphEDataClass.getName()); + } + + protected void loadVerticesImpl(String inputPath) throws ExecutionException, InterruptedException { + // Try to get number of lines + long numOfLines = getNumLinesOfFile(inputPath); + logger.info( + "file {} has {} lines, workerId {}, workerNum {}", + inputPath, + numOfLines, + workerId, + workerNum); + long linesPerWorker = (numOfLines + (workerNum - 1)) / workerNum; + long start = Math.min(linesPerWorker * workerId, numOfLines); + long end = Math.min(linesPerWorker * (workerId + 1), numOfLines); + long chunkSize = (end - start + threadNum - 1) / threadNum; + proxy.reserveNumVertices((int) (end - start)); + logger.debug( + "[reading vertex] total lines {}, worker {} read {}, thread num {}, chunkSize {}", + numOfLines, + workerId, + end - start, + threadNum, + chunkSize); + long cur = start; + + Future[] futures = new Future[threadNum]; + + for (int i = 0; i < threadNum; ++i) { + AbstractVertexLoaderCallable vertexLoaderCallable = +// new VertexLoaderCallable( +// i, inputPath, Math.min(cur, end), Math.min(cur + chunkSize, end)); + createVertexLoaderCallable(i, inputPath, Math.min(cur, end), Math.min(cur + chunkSize, end)); + futures[i] = executor.submit(vertexLoaderCallable); + cur += chunkSize; + } + + long sum = 0; + for (int i = 0; i < threadNum; ++i) { + sum += (Long) futures[i].get(); + } + logger.info("[vertices] worker {} loaded {} lines ", workerId, sum); + } + + public abstract class AbstractVertexLoaderCallable implements Callable { + private int threadId; + private int callableId; + private BufferedReader bufferedReader; + private long start; + private long end; // exclusive + private VertexReader vertexReader; + + abstract BufferedReader createBufferedReader(String inputPath) throws FileNotFoundException; + + public AbstractVertexLoaderCallable(int threadId, String inputPath, long startLine, long endLine) { + callableId = V_CALLABLE_ID.getAndAdd(1); + try { + FileReader fileReader = new FileReader(inputPath); +// bufferedReader = new BufferedReader(fileReader); + bufferedReader = createBufferedReader(inputPath); + } catch (FileNotFoundException e) { + e.printStackTrace(); + } + + try { + // create vertex reader + vertexReader = + (VertexReader) + createVertexReaderMethod.invoke( + vertexInputFormat, inputSplit, taskAttemptContext); + logger.info("vertex reader: " + vertexReader); + // vertexReaderClz = vertexReader.getClass(); + } catch (Exception e) { + e.printStackTrace(); + } + + this.threadId = threadId; + this.start = startLine; + this.end = endLine; + // proxy.reserveNumVertices((int) this.end - (int) this.start); + logger.info( + "Abstract loader {} creating vertex loader callable: {}, file : {}, reader {}," + + " thread id {}, from {} to {}", + AbstractLoader.this, + AbstractVertexLoaderCallable.this, + inputPath, + bufferedReader, + threadId, + startLine, + endLine); + } + + /** + * Computes a result, or throws an exception if unable to do so. + * + * @return computed result + * @throws Exception if unable to compute a result + */ + @Override + public Long call() throws Exception { + long cnt = 0; + while (cnt < start) { + bufferedReader.readLine(); + cnt += 1; + } + logger.info("worker {} thread {} skipped lines {}", workerId, threadId, cnt); + // For text vertex reader, we set the data source manually. + VIFBufferedReaderField.set(vertexInputFormat, bufferedReader); + logger.info( + "worker {} thread {} has set the field {} to {}", + workerId, + threadId, + VIFBufferedReaderField, + bufferedReader); + vertexReader.initialize(inputSplit, taskAttemptContext); + vertexReader.setConf(vertexInputFormat.getConf()); + + while (cnt < end && vertexReader.nextVertex()) { + Vertex vertex = vertexReader.getCurrentVertex(); + Writable vertexId = (Writable) vertexIdField.get(vertex); + Writable vertexValue = (Writable) vertexValueField.get(vertex); + Iterable vertexEdges = (Iterable) vertexEdgesField.get(vertex); + proxy.addVertex(threadId, vertexId, vertexValue); + // suppose directed. + proxy.addEdges(threadId, vertexId, vertexEdges); + cnt += 1; + } + + bufferedReader.close(); + + return cnt - start; + } + } + + protected void loadEdgesImpl(String filePath) throws ExecutionException, InterruptedException { + // Try to get number of lines + long numOfLines = getNumLinesOfFile(filePath); + long linesPerWorker = (numOfLines + (workerNum - 1)) / workerNum; + long start = Math.min(linesPerWorker * workerId, numOfLines); + long end = Math.min(linesPerWorker * (workerId + 1), numOfLines); + long chunkSize = (end - start + threadNum - 1) / threadNum; + proxy.reserveNumEdges((int) (end - start)); + logger.debug( + "[reading edge] total lines {}, worker {} read {}, thread num {}, chunkSize {}", + numOfLines, + workerId, + end - start, + threadNum, + chunkSize); + long cur = start; + + Future[] futures = new Future[threadNum]; + + for (int i = 0; i < threadNum; ++i) { + AbstractLoader.AbstractEdgeLoaderCallable edgeLoaderCallable = + createEdgeLoaderCallable( + i, filePath, Math.min(cur, end), Math.min(cur + chunkSize, end)); +// new AbstractLoader.AbstractEdgeLoaderCallable( +// i, filePath, Math.min(cur, end), Math.min(cur + chunkSize, end)); + futures[i] = executor.submit(edgeLoaderCallable); + cur += chunkSize; + } + + long sum = 0; + for (int i = 0; i < threadNum; ++i) { + sum += (Long) futures[i].get(); + } + logger.info("[edges] worker {} loaded {} lines ", workerId, sum); + } + + public abstract class AbstractEdgeLoaderCallable implements Callable { + private int threadId; + private int callableId; + private BufferedReader bufferedReader; + private long start; + private long end; // exclusive + private EdgeReader edgeReader; + + abstract BufferedReader createBufferedReader(String inputPath) throws FileNotFoundException; + + public AbstractEdgeLoaderCallable(int threadId, String inputPath, long startLine, long endLine) { + callableId = E_CALLABLE_ID.getAndAdd(1); + try { + +// bufferedReader = new BufferedReader(fileReader); + bufferedReader = createBufferedReader(inputPath); + } catch (FileNotFoundException e) { + e.printStackTrace(); + } + + try { + // create vertex reader + edgeReader = + (EdgeReader) + createEdgeReaderMethod.invoke( + edgeInputFormat, inputSplit, taskAttemptContext); + logger.info("edge reader: " + edgeReader); + // vertexReaderClz = edgeReader.getClass(); + } catch (Exception e) { + e.printStackTrace(); + } + + this.threadId = threadId; + this.start = startLine; + this.end = endLine; + // proxy.reserveNumVertices((int) this.end - (int) this.start); + logger.info( + "File loader {} creating edge callable: {}, file : {}, reader {}, thread id {}," + + " from {} to {}", + AbstractLoader.this, + AbstractEdgeLoaderCallable.this, + inputPath, + bufferedReader, + threadId, + startLine, + endLine); + } + + /** + * Computes a result, or throws an exception if unable to do so. + * + * @return computed result + * @throws Exception if unable to compute a result + */ + @Override + public Long call() throws Exception { + long cnt = 0; + while (cnt < start) { + bufferedReader.readLine(); + cnt += 1; + } + logger.info("worker {} thread {} skipped lines {}", workerId, threadId, cnt); + // For text vertex reader, we set the data source manually. + EIFBufferedReaderField.set(edgeInputFormat, bufferedReader); + logger.info( + "worker {} thread {} has set the field {} to {}", + workerId, + threadId, + EIFBufferedReaderField, + bufferedReader); + edgeReader.initialize(inputSplit, taskAttemptContext); + edgeReader.setConf(edgeInputFormat.getConf()); + + while (cnt < end && edgeReader.nextEdge()) { + WritableComparable sourceId = edgeReader.getCurrentSourceId(); + Edge edge = edgeReader.getCurrentEdge(); + proxy.addEdge(threadId, sourceId, edge.getTargetVertexId(), edge.getValue()); + cnt += 1; + } + + bufferedReader.close(); + return cnt - start; + } + } + +// protected abstract void loadVerticesImpl(String inputPath) throws ExecutionException, InterruptedException; + + protected abstract AbstractVertexLoaderCallable createVertexLoaderCallable(int i, String inputPath, long min, long min1); + + protected abstract AbstractEdgeLoaderCallable createEdgeLoaderCallable(int i, String inputPath, long min, long min1); + +} diff --git a/analytical_engine/java/grape-giraph/src/main/java/com/alibaba/graphscope/loader/impl/FileLoader.java b/analytical_engine/java/grape-giraph/src/main/java/com/alibaba/graphscope/loader/impl/FileLoader.java index 4078e512500f..1bcf9932d40e 100644 --- a/analytical_engine/java/grape-giraph/src/main/java/com/alibaba/graphscope/loader/impl/FileLoader.java +++ b/analytical_engine/java/grape-giraph/src/main/java/com/alibaba/graphscope/loader/impl/FileLoader.java @@ -63,461 +63,72 @@ /** * Load from a file on system. */ -public class FileLoader implements LoaderBase { +public class FileLoader extends AbstractLoader { private static Logger logger = LoggerFactory.getLogger(FileLoader.class); - private static AtomicInteger LOADER_ID = new AtomicInteger(0); - private static AtomicInteger V_CALLABLE_ID = new AtomicInteger(0); - private static AtomicInteger E_CALLABLE_ID = new AtomicInteger(0); - - private int loaderId; - private int threadNum; - private int workerId; - private int workerNum; - private Class vertexInputFormatClz; - private Class edgeInputFormatClz; - // private Class vertexReaderClz; - private VertexInputFormat vertexInputFormat; - private EdgeInputFormat edgeInputFormat; - - private ExecutorService executor; - // private static String inputPath; - - private Method createVertexReaderMethod; - private Method createEdgeReaderMethod; - - private GraphDataBufferManager proxy; - private Field vertexIdField; - private Field vertexValueField; - private Field vertexEdgesField; - private Field VIFBufferedReaderField; - private Field EIFBufferedReaderField; - private InputSplit inputSplit = - new InputSplit() { - @Override - public long getLength() throws IOException, InterruptedException { - return 0; - } - - @Override - public String[] getLocations() throws IOException, InterruptedException { - return new String[0]; - } - }; - private Configuration configuration = new Configuration(); - private GiraphConfiguration giraphConfiguration = new GiraphConfiguration(configuration); - private TaskAttemptID taskAttemptID = new TaskAttemptID(); - private TaskAttemptContext taskAttemptContext = - new TaskAttemptContext(configuration, taskAttemptID); - - private Class giraphOidClass; - private Class giraphVDataClass; - private Class giraphEDataClass; - private URLClassLoader classLoader; - public FileLoader(int id, URLClassLoader classLoader) { - this.classLoader = classLoader; - logger.info("FileLoader using classLoader {} to load vif and eif", classLoader); - this.giraphConfiguration.setClassLoader(this.classLoader); - loaderId = id; - try { - vertexIdField = VertexImpl.class.getDeclaredField("initializeOid"); - vertexIdField.setAccessible(true); - vertexValueField = VertexImpl.class.getDeclaredField("initializeVdata"); - vertexValueField.setAccessible(true); - vertexEdgesField = VertexImpl.class.getDeclaredField("initializeEdges"); - vertexEdgesField.setAccessible(true); - VIFBufferedReaderField = TextVertexInputFormat.class.getDeclaredField("fileReader"); - VIFBufferedReaderField.setAccessible(true); - EIFBufferedReaderField = TextEdgeInputFormat.class.getDeclaredField("fileReader"); - EIFBufferedReaderField.setAccessible(true); - } catch (NoSuchFieldException e) { - throw new IllegalStateException(e.getMessage()); - } + super(id, classLoader); } - public static FileLoader create(URLClassLoader cl) { - synchronized (FileLoader.class) { - return new FileLoader(LOADER_ID.getAndAdd(1), cl); - } - } - - public void init( - int workerId, - int workerNum, - int threadNum, - FFIByteVecVector vidBuffers, - FFIByteVecVector vertexDataBuffers, - FFIByteVecVector edgeSrcIdBuffers, - FFIByteVecVector edgeDstIdBuffers, - FFIByteVecVector edgeDataBuffers, - FFIIntVecVector vidOffsets, - FFIIntVecVector vertexDataOffsets, - FFIIntVecVector edgeSrcIdOffsets, - FFIIntVecVector edgeDstIdOffsets, - FFIIntVecVector edgeDataOffsets) { - this.workerId = workerId; - this.workerNum = workerNum; - logger.info("worker id: {}, worker num: {}", workerId, workerNum); - - this.threadNum = threadNum; - this.executor = Executors.newFixedThreadPool(threadNum); - // Create a proxy form adding vertex and adding edges - proxy = - new GraphDataBufferManagerImpl( - workerId, - threadNum, - vidBuffers, - vertexDataBuffers, - edgeSrcIdBuffers, - edgeDstIdBuffers, - edgeDataBuffers, - vidOffsets, - vertexDataOffsets, - edgeSrcIdOffsets, - edgeDstIdOffsets, - edgeDataOffsets); - } - - /** - * @param inputPath - * @return Return an integer contains type params info. - */ - public int loadVerticesAndEdges(String inputPath, String vformatClass) - throws ExecutionException, InterruptedException, ClassNotFoundException { - logger.info("vertex input path {}, vformat class{}", inputPath, vformatClass.toString()); - giraphConfiguration.setVertexInputFormatClass( - (Class) this.classLoader.loadClass(vformatClass)); - ImmutableClassesGiraphConfiguration conf = - new ImmutableClassesGiraphConfiguration(giraphConfiguration); - conf.setClassLoader(this.classLoader); - try { - vertexInputFormatClz = conf.getVertexInputFormatClass(); - - inferGiraphTypesFromJSON(vertexInputFormatClz); - - vertexInputFormat = vertexInputFormatClz.newInstance(); - vertexInputFormat.setConf(conf); - createVertexReaderMethod = - vertexInputFormatClz.getDeclaredMethod( - "createVertexReader", InputSplit.class, TaskAttemptContext.class); - - } catch (Exception e) { - e.printStackTrace(); - logger.error(e.getMessage()); - } - loadVertices(inputPath); - - // Finish output stream, such that offset == size; - proxy.finishAdding(); - return generateTypeInt(giraphOidClass, giraphVDataClass, giraphEDataClass); - } - - public void loadEdges(String inputPath, String eformatClass) - throws ExecutionException, InterruptedException, ClassNotFoundException { - logger.debug("edge input path {}", inputPath); - giraphConfiguration.setEdgeInputFormatClass( - (Class) this.classLoader.loadClass(eformatClass)); - - ImmutableClassesGiraphConfiguration conf = - new ImmutableClassesGiraphConfiguration(giraphConfiguration); - try { - edgeInputFormatClz = conf.getEdgeInputFormatClass(); - - edgeInputFormat = edgeInputFormatClz.newInstance(); - edgeInputFormat.setConf(conf); - createEdgeReaderMethod = - edgeInputFormatClz.getDeclaredMethod( - "createEdgeReader", InputSplit.class, TaskAttemptContext.class); - - } catch (Exception e) { - e.printStackTrace(); - logger.error(e.getMessage()); - } - loadEdgesImpl(inputPath); - - // Finish output stream, such that offset == size; - proxy.finishAdding(); + @Override + protected AbstractVertexLoaderCallable createVertexLoaderCallable(int i, String inputPath, long min, long min1) { + return new FileVertexLoaderCallable(i, inputPath, min, min1); } - private void loadVertices(String inputPath) throws ExecutionException, InterruptedException { - // Try to get number of lines - long numOfLines = getNumLinesOfFile(inputPath); - logger.info( - "file {} has {} lines, workerId {}, workerNum {}", - inputPath, - numOfLines, - workerId, - workerNum); - long linesPerWorker = (numOfLines + (workerNum - 1)) / workerNum; - long start = Math.min(linesPerWorker * workerId, numOfLines); - long end = Math.min(linesPerWorker * (workerId + 1), numOfLines); - long chunkSize = (end - start + threadNum - 1) / threadNum; - proxy.reserveNumVertices((int) (end - start)); - logger.debug( - "[reading vertex] total lines {}, worker {} read {}, thread num {}, chunkSize {}", - numOfLines, - workerId, - end - start, - threadNum, - chunkSize); - long cur = start; - - Future[] futures = new Future[threadNum]; - - for (int i = 0; i < threadNum; ++i) { - VertexLoaderCallable vertexLoaderCallable = - new VertexLoaderCallable( - i, inputPath, Math.min(cur, end), Math.min(cur + chunkSize, end)); - futures[i] = executor.submit(vertexLoaderCallable); - cur += chunkSize; - } - - long sum = 0; - for (int i = 0; i < threadNum; ++i) { - sum += (Long) futures[i].get(); - } - logger.info("[vertices] worker {} loaded {} lines ", workerId, sum); + @Override + protected AbstractEdgeLoaderCallable createEdgeLoaderCallable(int i, String inputPath, long min, long min1) { + return new FileEdgeLoaderCallable(i, inputPath, min, min1); } - private void loadEdgesImpl(String filePath) throws ExecutionException, InterruptedException { - // Try to get number of lines - long numOfLines = getNumLinesOfFile(filePath); - long linesPerWorker = (numOfLines + (workerNum - 1)) / workerNum; - long start = Math.min(linesPerWorker * workerId, numOfLines); - long end = Math.min(linesPerWorker * (workerId + 1), numOfLines); - long chunkSize = (end - start + threadNum - 1) / threadNum; - proxy.reserveNumEdges((int) (end - start)); - logger.debug( - "[reading edge] total lines {}, worker {} read {}, thread num {}, chunkSize {}", - numOfLines, - workerId, - end - start, - threadNum, - chunkSize); - long cur = start; - - Future[] futures = new Future[threadNum]; - - for (int i = 0; i < threadNum; ++i) { - EdgeLoaderCallable edgeLoaderCallable = - new EdgeLoaderCallable( - i, filePath, Math.min(cur, end), Math.min(cur + chunkSize, end)); - futures[i] = executor.submit(edgeLoaderCallable); - cur += chunkSize; - } - - long sum = 0; - for (int i = 0; i < threadNum; ++i) { - sum += (Long) futures[i].get(); - } - logger.info("[edges] worker {} loaded {} lines ", workerId, sum); - } @Override public LoaderBase.TYPE loaderType() { return TYPE.FileLoader; } - @Override - public int concurrency() { - return threadNum; - } - - private void inferGiraphTypesFromJSON(Class child) { - Class[] classList = getTypeArguments(VertexInputFormat.class, child); - Preconditions.checkArgument(classList.length == 3); - giraphOidClass = (Class) classList[0]; - giraphVDataClass = (Class) classList[1]; - giraphEDataClass = (Class) classList[2]; - logger.info( - "infer from json params: oid {}, vdata {}, edata {}", - giraphOidClass.getName(), - giraphVDataClass.getName(), - giraphEDataClass.getName()); - } - @Override public String toString() { return FileLoader.class.toString() + "@" + loaderId; } - class VertexLoaderCallable implements Callable { - private int threadId; - private int callableId; - private BufferedReader bufferedReader; - private long start; - private long end; // exclusive - private VertexReader vertexReader; - public VertexLoaderCallable(int threadId, String inputPath, long startLine, long endLine) { - callableId = V_CALLABLE_ID.getAndAdd(1); - try { - FileReader fileReader = new FileReader(inputPath); - bufferedReader = new BufferedReader(fileReader); - } catch (FileNotFoundException e) { - e.printStackTrace(); - } + public class FileVertexLoaderCallable extends AbstractVertexLoaderCallable { - try { - // create vertex reader - vertexReader = - (VertexReader) - createVertexReaderMethod.invoke( - vertexInputFormat, inputSplit, taskAttemptContext); - logger.info("vertex reader: " + vertexReader); - // vertexReaderClz = vertexReader.getClass(); - } catch (Exception e) { - e.printStackTrace(); - } - - this.threadId = threadId; - this.start = startLine; - this.end = endLine; - // proxy.reserveNumVertices((int) this.end - (int) this.start); - logger.info( - "File loader {} creating vertex loader callable: {}, file : {}, reader {}," - + " thread id {}, from {} to {}", - FileLoader.this, - VertexLoaderCallable.this, - inputPath, - bufferedReader, - threadId, - startLine, - endLine); + public FileVertexLoaderCallable(int id, String inputPath, long start, long end) { + super(id,inputPath,start,end); } @Override - public String toString() { - return VertexLoaderCallable.class.toString() + "@" + callableId; + BufferedReader createBufferedReader(String inputPath) throws FileNotFoundException { + FileReader fileReader = new FileReader(inputPath); + return new BufferedReader(fileReader); } - /** - * Computes a result, or throws an exception if unable to do so. - * - * @return computed result - * @throws Exception if unable to compute a result - */ @Override - public Long call() throws Exception { - long cnt = 0; - while (cnt < start) { - bufferedReader.readLine(); - cnt += 1; - } - logger.info("worker {} thread {} skipped lines {}", workerId, threadId, cnt); - // For text vertex reader, we set the data source manually. - VIFBufferedReaderField.set(vertexInputFormat, bufferedReader); - logger.info( - "worker {} thread {} has set the field {} to {}", - workerId, - threadId, - VIFBufferedReaderField, - bufferedReader); - vertexReader.initialize(inputSplit, taskAttemptContext); - vertexReader.setConf(vertexInputFormat.getConf()); - - while (cnt < end && vertexReader.nextVertex()) { - Vertex vertex = vertexReader.getCurrentVertex(); - Writable vertexId = (Writable) vertexIdField.get(vertex); - Writable vertexValue = (Writable) vertexValueField.get(vertex); - Iterable vertexEdges = (Iterable) vertexEdgesField.get(vertex); - proxy.addVertex(threadId, vertexId, vertexValue); - // suppose directed. - proxy.addEdges(threadId, vertexId, vertexEdges); - cnt += 1; - } - - bufferedReader.close(); - - return cnt - start; + public String toString() { + return FileVertexLoaderCallable.class.toString() + "@" + loaderId; } - } - class EdgeLoaderCallable implements Callable { - private int threadId; - private int callableId; - private BufferedReader bufferedReader; - private long start; - private long end; // exclusive - private EdgeReader edgeReader; + } - public EdgeLoaderCallable(int threadId, String inputPath, long startLine, long endLine) { - callableId = E_CALLABLE_ID.getAndAdd(1); - try { - FileReader fileReader = new FileReader(inputPath); - bufferedReader = new BufferedReader(fileReader); - } catch (FileNotFoundException e) { - e.printStackTrace(); - } + public class FileEdgeLoaderCallable extends AbstractEdgeLoaderCallable { - try { - // create vertex reader - edgeReader = - (EdgeReader) - createEdgeReaderMethod.invoke( - edgeInputFormat, inputSplit, taskAttemptContext); - logger.info("edge reader: " + edgeReader); - // vertexReaderClz = edgeReader.getClass(); - } catch (Exception e) { - e.printStackTrace(); - } + public FileEdgeLoaderCallable(int id, String inputPath, long start, long end) { + super(id,inputPath,start,end); + } - this.threadId = threadId; - this.start = startLine; - this.end = endLine; - // proxy.reserveNumVertices((int) this.end - (int) this.start); - logger.info( - "File loader {} creating edge callable: {}, file : {}, reader {}, thread id {}," - + " from {} to {}", - FileLoader.this, - EdgeLoaderCallable.this, - inputPath, - bufferedReader, - threadId, - startLine, - endLine); + @Override + BufferedReader createBufferedReader(String inputPath) throws FileNotFoundException { + FileReader fileReader = new FileReader(inputPath); + return new BufferedReader(fileReader); } @Override public String toString() { - return EdgeLoaderCallable.class.toString() + "@" + callableId; + return FileEdgeLoaderCallable.class.toString() + "@" + loaderId; } - /** - * Computes a result, or throws an exception if unable to do so. - * - * @return computed result - * @throws Exception if unable to compute a result - */ - @Override - public Long call() throws Exception { - long cnt = 0; - while (cnt < start) { - bufferedReader.readLine(); - cnt += 1; - } - logger.info("worker {} thread {} skipped lines {}", workerId, threadId, cnt); - // For text vertex reader, we set the data source manually. - EIFBufferedReaderField.set(edgeInputFormat, bufferedReader); - logger.info( - "worker {} thread {} has set the field {} to {}", - workerId, - threadId, - EIFBufferedReaderField, - bufferedReader); - edgeReader.initialize(inputSplit, taskAttemptContext); - edgeReader.setConf(edgeInputFormat.getConf()); + } - while (cnt < end && edgeReader.nextEdge()) { - WritableComparable sourceId = edgeReader.getCurrentSourceId(); - Edge edge = edgeReader.getCurrentEdge(); - proxy.addEdge(threadId, sourceId, edge.getTargetVertexId(), edge.getValue()); - cnt += 1; - } - bufferedReader.close(); - return cnt - start; - } - } } diff --git a/analytical_engine/java/grape-giraph/src/main/java/com/alibaba/graphscope/loader/impl/HDFSLoader.java b/analytical_engine/java/grape-giraph/src/main/java/com/alibaba/graphscope/loader/impl/HDFSLoader.java new file mode 100644 index 000000000000..703bd7525564 --- /dev/null +++ b/analytical_engine/java/grape-giraph/src/main/java/com/alibaba/graphscope/loader/impl/HDFSLoader.java @@ -0,0 +1,50 @@ +package com.alibaba.graphscope.loader.impl; + +import java.io.BufferedReader; +import java.io.FileNotFoundException; +import java.net.URLClassLoader; + +public class HDFSLoader extends AbstractLoader { + + public HDFSLoader(int id, URLClassLoader classLoader) { + super(id, classLoader); + } + + @Override + protected AbstractVertexLoaderCallable createVertexLoaderCallable(int i, String inputPath, long min, long min1) { + return new HDFSVertexLoaderCallable(i, inputPath, min, min1); + } + + @Override + protected AbstractEdgeLoaderCallable createEdgeLoaderCallable(int i, String inputPath, long min, long min1) { + return new HDFSEdgeLoaderCallable(i, inputPath, min, min1); + } + + + @Override + public TYPE loaderType() { + return TYPE.HDFSLoader; + } + + public class HDFSVertexLoaderCallable extends AbstractVertexLoaderCallable { + @Override + BufferedReader createBufferedReader(String inputPath) throws FileNotFoundException { + return null; + } + + public HDFSVertexLoaderCallable(int id, String inputPath, long min, long min1) { + super(id, inputPath, min, min1); + } + } + + public class HDFSEdgeLoaderCallable extends AbstractEdgeLoaderCallable { + @Override + BufferedReader createBufferedReader(String inputPath) throws FileNotFoundException { + return null; + } + + public HDFSEdgeLoaderCallable(int id, String inputPath, long min, long min1) { + super(id, inputPath, min, min1); + } + } +} From 4ed32fe865ae645b8a5f94095abafcc67d139717 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E9=9C=84=E7=A3=8A?= Date: Mon, 24 Jun 2024 14:32:33 +0800 Subject: [PATCH 27/52] impl the file loader --- analytical_engine/java/grape-giraph/pom.xml | 4 +++ .../alibaba/graphscope/loader/LoaderBase.java | 5 +-- .../loader/impl/AbstractLoader.java | 10 +++--- .../graphscope/loader/impl/HDFSLoader.java | 29 +++++++++++++++++ analytical_engine/java/grape-jdk/pom.xml | 4 +++ .../alibaba/graphscope/utils/FileUtils.java | 32 ++++++++++++++++++- analytical_engine/java/pom.xml | 6 ++++ 7 files changed, 83 insertions(+), 7 deletions(-) diff --git a/analytical_engine/java/grape-giraph/pom.xml b/analytical_engine/java/grape-giraph/pom.xml index 4b27f16718d2..8b3369cbd795 100644 --- a/analytical_engine/java/grape-giraph/pom.xml +++ b/analytical_engine/java/grape-giraph/pom.xml @@ -70,6 +70,10 @@ giraph-core 1.3.0-hadoop2 + + org.apache.hadoop + hadoop-client + diff --git a/analytical_engine/java/grape-giraph/src/main/java/com/alibaba/graphscope/loader/LoaderBase.java b/analytical_engine/java/grape-giraph/src/main/java/com/alibaba/graphscope/loader/LoaderBase.java index 609713f8a213..ac227d4ca556 100644 --- a/analytical_engine/java/grape-giraph/src/main/java/com/alibaba/graphscope/loader/LoaderBase.java +++ b/analytical_engine/java/grape-giraph/src/main/java/com/alibaba/graphscope/loader/LoaderBase.java @@ -18,6 +18,7 @@ import com.alibaba.graphscope.stdcxx.FFIByteVecVector; import com.alibaba.graphscope.stdcxx.FFIIntVecVector; +import java.io.IOException; import java.util.concurrent.ExecutionException; /** @@ -46,9 +47,9 @@ void init( * @return Return an integer contains type params info. */ int loadVertices(String inputPath, String vformatClass) - throws ExecutionException, InterruptedException, ClassNotFoundException; + throws ExecutionException, InterruptedException, ClassNotFoundException, IOException; - void loadEdges(String inputPath, String eformatClass)throws ExecutionException, InterruptedException, ClassNotFoundException; + void loadEdges(String inputPath, String eformatClass) throws ExecutionException, InterruptedException, ClassNotFoundException, IOException; LoaderBase.TYPE loaderType(); diff --git a/analytical_engine/java/grape-giraph/src/main/java/com/alibaba/graphscope/loader/impl/AbstractLoader.java b/analytical_engine/java/grape-giraph/src/main/java/com/alibaba/graphscope/loader/impl/AbstractLoader.java index 55181b784ff9..5a3f1df8fe80 100644 --- a/analytical_engine/java/grape-giraph/src/main/java/com/alibaba/graphscope/loader/impl/AbstractLoader.java +++ b/analytical_engine/java/grape-giraph/src/main/java/com/alibaba/graphscope/loader/impl/AbstractLoader.java @@ -42,6 +42,8 @@ public abstract class AbstractLoader implements LoaderBase { private static Logger logger = LoggerFactory.getLogger(AbstractLoader.class); + private static int BATCH_SIZE = 1024; + protected int loaderId; protected int threadNum; protected int workerId; @@ -163,7 +165,7 @@ public void init( */ @Override public int loadVertices(String inputPath, String vformatClass) - throws ExecutionException, InterruptedException, ClassNotFoundException { + throws ExecutionException, InterruptedException, ClassNotFoundException, IOException { logger.info("vertex input path {}, vformat class{}", inputPath, vformatClass.toString()); giraphConfiguration.setVertexInputFormatClass( (Class) this.classLoader.loadClass(vformatClass)); @@ -194,7 +196,7 @@ public int loadVertices(String inputPath, String vformatClass) @Override public void loadEdges(String inputPath, String eformatClass) - throws ExecutionException, InterruptedException, ClassNotFoundException { + throws ExecutionException, InterruptedException, ClassNotFoundException, IOException { logger.debug("edge input path {}", inputPath); giraphConfiguration.setEdgeInputFormatClass( (Class) this.classLoader.loadClass(eformatClass)); @@ -233,7 +235,7 @@ private void inferGiraphTypesFromJSON(Class child) giraphEDataClass.getName()); } - protected void loadVerticesImpl(String inputPath) throws ExecutionException, InterruptedException { + protected void loadVerticesImpl(String inputPath) throws ExecutionException, InterruptedException, IOException { // Try to get number of lines long numOfLines = getNumLinesOfFile(inputPath); logger.info( @@ -364,7 +366,7 @@ public Long call() throws Exception { } } - protected void loadEdgesImpl(String filePath) throws ExecutionException, InterruptedException { + protected void loadEdgesImpl(String filePath) throws ExecutionException, InterruptedException, IOException { // Try to get number of lines long numOfLines = getNumLinesOfFile(filePath); long linesPerWorker = (numOfLines + (workerNum - 1)) / workerNum; diff --git a/analytical_engine/java/grape-giraph/src/main/java/com/alibaba/graphscope/loader/impl/HDFSLoader.java b/analytical_engine/java/grape-giraph/src/main/java/com/alibaba/graphscope/loader/impl/HDFSLoader.java index 703bd7525564..a7944affce87 100644 --- a/analytical_engine/java/grape-giraph/src/main/java/com/alibaba/graphscope/loader/impl/HDFSLoader.java +++ b/analytical_engine/java/grape-giraph/src/main/java/com/alibaba/graphscope/loader/impl/HDFSLoader.java @@ -1,7 +1,12 @@ package com.alibaba.graphscope.loader.impl; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.Path; + import java.io.BufferedReader; import java.io.FileNotFoundException; +import java.io.IOException; +import java.io.InputStreamReader; import java.net.URLClassLoader; public class HDFSLoader extends AbstractLoader { @@ -29,22 +34,46 @@ public TYPE loaderType() { public class HDFSVertexLoaderCallable extends AbstractVertexLoaderCallable { @Override BufferedReader createBufferedReader(String inputPath) throws FileNotFoundException { + //Expect a string with format: hdfs://host:port/path + Path path = new Path(inputPath); + try { + return new BufferedReader(new InputStreamReader(path.getFileSystem(new Configuration()).open(path))); + } catch (IOException e) { + e.printStackTrace(); + } return null; } public HDFSVertexLoaderCallable(int id, String inputPath, long min, long min1) { super(id, inputPath, min, min1); } + + @Override + public String toString() { + return HDFSVertexLoaderCallable.class.toString() + "@" + loaderId; + } } public class HDFSEdgeLoaderCallable extends AbstractEdgeLoaderCallable { @Override BufferedReader createBufferedReader(String inputPath) throws FileNotFoundException { + //Expect a string with format: hdfs://host:port/path + Path path = new Path(inputPath); + try { + return new BufferedReader(new InputStreamReader(path.getFileSystem(new Configuration()).open(path))); + } catch (IOException e) { + e.printStackTrace(); + } return null; } public HDFSEdgeLoaderCallable(int id, String inputPath, long min, long min1) { super(id, inputPath, min, min1); } + + @Override + public String toString() { + return HDFSEdgeLoaderCallable.class.toString() + "@" + loaderId; + } } } diff --git a/analytical_engine/java/grape-jdk/pom.xml b/analytical_engine/java/grape-jdk/pom.xml index 3874417b76ba..282486e36d29 100644 --- a/analytical_engine/java/grape-jdk/pom.xml +++ b/analytical_engine/java/grape-jdk/pom.xml @@ -77,6 +77,10 @@ scala-library provided + + org.apache.hadoop + hadoop-client + diff --git a/analytical_engine/java/grape-jdk/src/main/java/com/alibaba/graphscope/utils/FileUtils.java b/analytical_engine/java/grape-jdk/src/main/java/com/alibaba/graphscope/utils/FileUtils.java index 7c0a794f080a..448bc8aa799b 100644 --- a/analytical_engine/java/grape-jdk/src/main/java/com/alibaba/graphscope/utils/FileUtils.java +++ b/analytical_engine/java/grape-jdk/src/main/java/com/alibaba/graphscope/utils/FileUtils.java @@ -16,9 +16,13 @@ package com.alibaba.graphscope.utils; +import org.apache.hadoop.conf.Configuration; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import java.io.BufferedReader; +import java.io.IOException; +import java.io.InputStreamReader; import java.nio.file.Files; import java.nio.file.Path; import java.nio.file.Paths; @@ -26,7 +30,17 @@ public class FileUtils { private static Logger logger = LoggerFactory.getLogger(FileUtils.class.getName()); - public static long getNumLinesOfFile(String path) { + public static long getNumLinesOfFile(String path) throws IOException { + //if path start with hdfs://, we should use hadoop api to get the number of lines + if (path.startsWith("hdfs://")) { + return getNumLinesOfHdfsFile(path); + } + else { + return getNumLinesOfLocalFile(path); + } + } + + public static long getNumLinesOfLocalFile(String path) { long count = 0; try { Path p = Paths.get(path); @@ -36,4 +50,20 @@ public static long getNumLinesOfFile(String path) { } return count; } + + public static long getNumLinesOfHdfsFile(String input) throws IOException { + org.apache.hadoop.fs.Path path = new org.apache.hadoop.fs.Path(input); + BufferedReader reader = new BufferedReader(new InputStreamReader(path.getFileSystem(new Configuration()).open(path))); + long count = 0; + try { + while (reader.readLine() != null) { + count++; + } + reader.close(); + } catch (IOException e) { + logger.error("Failed to read file: " + input); + e.printStackTrace(); + } + return count; + } } diff --git a/analytical_engine/java/pom.xml b/analytical_engine/java/pom.xml index d6cb9d97b879..9107463d0948 100644 --- a/analytical_engine/java/pom.xml +++ b/analytical_engine/java/pom.xml @@ -101,6 +101,7 @@ 1.3.2 2.3.5 2.9.0 + 2.7.3 @@ -273,6 +274,11 @@ ${grpc.version} test + + org.apache.hadoop + hadoop-client + ${hadoop.version} + From aeb2da9a5c3dbe5ef20584a45d9168dc3663d023 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E9=9C=84=E7=A3=8A?= Date: Mon, 24 Jun 2024 17:48:44 +0800 Subject: [PATCH 28/52] move to giraph sdk --- .../graphscope/loader/LoaderUtils.java | 43 +++++++++--- .../loader/impl/AbstractLoader.java | 2 +- .../graphscope/loader/impl/FileLoader.java | 37 +--------- .../alibaba/graphscope/utils/FileUtils.java | 69 ------------------- 4 files changed, 35 insertions(+), 116 deletions(-) delete mode 100644 analytical_engine/java/grape-jdk/src/main/java/com/alibaba/graphscope/utils/FileUtils.java diff --git a/analytical_engine/java/grape-giraph/src/main/java/com/alibaba/graphscope/loader/LoaderUtils.java b/analytical_engine/java/grape-giraph/src/main/java/com/alibaba/graphscope/loader/LoaderUtils.java index ff1094fb1859..37aa176a8432 100644 --- a/analytical_engine/java/grape-giraph/src/main/java/com/alibaba/graphscope/loader/LoaderUtils.java +++ b/analytical_engine/java/grape-giraph/src/main/java/com/alibaba/graphscope/loader/LoaderUtils.java @@ -15,6 +15,7 @@ */ package com.alibaba.graphscope.loader; +import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.io.DoubleWritable; import org.apache.hadoop.io.FloatWritable; import org.apache.hadoop.io.IntWritable; @@ -28,6 +29,9 @@ import java.io.File; import java.io.IOException; import java.io.InputStreamReader; +import java.nio.file.Files; +import java.nio.file.Path; +import java.nio.file.Paths; public class LoaderUtils { @@ -46,22 +50,41 @@ public static boolean checkFileExist(String path) { temp = new File(path); return temp.exists(); } + public static long getNumLinesOfFile(String path) throws IOException { + //if path start with hdfs://, we should use hadoop api to get the number of lines + if (path.startsWith("hdfs://")) { + return getNumLinesOfHdfsFile(path); + } + else { + return getNumLinesOfLocalFile(path); + } + } + + public static long getNumLinesOfLocalFile(String path) { + long count = 0; + try { + Path p = Paths.get(path); + count = Files.lines(p).count(); + } catch (Exception e) { + e.printStackTrace(); + } + return count; + } - public static long getNumLinesOfFile(String path) { - ProcessBuilder builder = new ProcessBuilder("wc", "-l", path); - builder.inheritIO().redirectOutput(ProcessBuilder.Redirect.PIPE); - Process process = null; + public static long getNumLinesOfHdfsFile(String input) throws IOException { + org.apache.hadoop.fs.Path path = new org.apache.hadoop.fs.Path(input); + BufferedReader reader = new BufferedReader(new InputStreamReader(path.getFileSystem(new Configuration()).open(path))); + long count = 0; try { - process = builder.start(); - try (BufferedReader reader = - new BufferedReader(new InputStreamReader(process.getInputStream()))) { - String res = reader.readLine().trim().split("\\s+")[0]; - return Long.parseLong(res); + while (reader.readLine() != null) { + count++; } + reader.close(); } catch (IOException e) { + logger.error("Failed to read file: " + input); e.printStackTrace(); } - return 0; + return count; } /** diff --git a/analytical_engine/java/grape-giraph/src/main/java/com/alibaba/graphscope/loader/impl/AbstractLoader.java b/analytical_engine/java/grape-giraph/src/main/java/com/alibaba/graphscope/loader/impl/AbstractLoader.java index 5a3f1df8fe80..17989cc27ac6 100644 --- a/analytical_engine/java/grape-giraph/src/main/java/com/alibaba/graphscope/loader/impl/AbstractLoader.java +++ b/analytical_engine/java/grape-giraph/src/main/java/com/alibaba/graphscope/loader/impl/AbstractLoader.java @@ -36,7 +36,7 @@ import java.util.concurrent.atomic.AtomicInteger; import static com.alibaba.graphscope.loader.LoaderUtils.generateTypeInt; -import static com.alibaba.graphscope.utils.FileUtils.getNumLinesOfFile; +import static com.alibaba.graphscope.loader.LoaderUtils.getNumLinesOfFile; import static org.apache.giraph.utils.ReflectionUtils.getTypeArguments; public abstract class AbstractLoader implements LoaderBase { diff --git a/analytical_engine/java/grape-giraph/src/main/java/com/alibaba/graphscope/loader/impl/FileLoader.java b/analytical_engine/java/grape-giraph/src/main/java/com/alibaba/graphscope/loader/impl/FileLoader.java index 1bcf9932d40e..99c8ec252a71 100644 --- a/analytical_engine/java/grape-giraph/src/main/java/com/alibaba/graphscope/loader/impl/FileLoader.java +++ b/analytical_engine/java/grape-giraph/src/main/java/com/alibaba/graphscope/loader/impl/FileLoader.java @@ -15,50 +15,15 @@ */ package com.alibaba.graphscope.loader.impl; -import static com.alibaba.graphscope.loader.LoaderUtils.generateTypeInt; -import static com.alibaba.graphscope.utils.FileUtils.getNumLinesOfFile; - -import static org.apache.giraph.utils.ReflectionUtils.getTypeArguments; - -import com.alibaba.graphscope.graph.impl.VertexImpl; -import com.alibaba.graphscope.loader.GraphDataBufferManager; import com.alibaba.graphscope.loader.LoaderBase; -import com.alibaba.graphscope.stdcxx.FFIByteVecVector; -import com.alibaba.graphscope.stdcxx.FFIIntVecVector; -import com.google.common.base.Preconditions; - -import org.apache.giraph.conf.GiraphConfiguration; -import org.apache.giraph.conf.ImmutableClassesGiraphConfiguration; -import org.apache.giraph.edge.Edge; -import org.apache.giraph.graph.Vertex; -import org.apache.giraph.io.EdgeInputFormat; -import org.apache.giraph.io.EdgeReader; -import org.apache.giraph.io.VertexInputFormat; -import org.apache.giraph.io.VertexReader; -import org.apache.giraph.io.formats.TextEdgeInputFormat; -import org.apache.giraph.io.formats.TextVertexInputFormat; -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.io.Writable; -import org.apache.hadoop.io.WritableComparable; -import org.apache.hadoop.mapreduce.InputSplit; -import org.apache.hadoop.mapreduce.TaskAttemptContext; -import org.apache.hadoop.mapreduce.TaskAttemptID; + import org.slf4j.Logger; import org.slf4j.LoggerFactory; import java.io.BufferedReader; import java.io.FileNotFoundException; import java.io.FileReader; -import java.io.IOException; -import java.lang.reflect.Field; -import java.lang.reflect.Method; import java.net.URLClassLoader; -import java.util.concurrent.Callable; -import java.util.concurrent.ExecutionException; -import java.util.concurrent.ExecutorService; -import java.util.concurrent.Executors; -import java.util.concurrent.Future; -import java.util.concurrent.atomic.AtomicInteger; /** * Load from a file on system. diff --git a/analytical_engine/java/grape-jdk/src/main/java/com/alibaba/graphscope/utils/FileUtils.java b/analytical_engine/java/grape-jdk/src/main/java/com/alibaba/graphscope/utils/FileUtils.java deleted file mode 100644 index 448bc8aa799b..000000000000 --- a/analytical_engine/java/grape-jdk/src/main/java/com/alibaba/graphscope/utils/FileUtils.java +++ /dev/null @@ -1,69 +0,0 @@ -/* - * Copyright 2022 Alibaba Group Holding Limited. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package com.alibaba.graphscope.utils; - -import org.apache.hadoop.conf.Configuration; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import java.io.BufferedReader; -import java.io.IOException; -import java.io.InputStreamReader; -import java.nio.file.Files; -import java.nio.file.Path; -import java.nio.file.Paths; - -public class FileUtils { - private static Logger logger = LoggerFactory.getLogger(FileUtils.class.getName()); - - public static long getNumLinesOfFile(String path) throws IOException { - //if path start with hdfs://, we should use hadoop api to get the number of lines - if (path.startsWith("hdfs://")) { - return getNumLinesOfHdfsFile(path); - } - else { - return getNumLinesOfLocalFile(path); - } - } - - public static long getNumLinesOfLocalFile(String path) { - long count = 0; - try { - Path p = Paths.get(path); - count = Files.lines(p).count(); - } catch (Exception e) { - e.printStackTrace(); - } - return count; - } - - public static long getNumLinesOfHdfsFile(String input) throws IOException { - org.apache.hadoop.fs.Path path = new org.apache.hadoop.fs.Path(input); - BufferedReader reader = new BufferedReader(new InputStreamReader(path.getFileSystem(new Configuration()).open(path))); - long count = 0; - try { - while (reader.readLine() != null) { - count++; - } - reader.close(); - } catch (IOException e) { - logger.error("Failed to read file: " + input); - e.printStackTrace(); - } - return count; - } -} From bb579ba11a2ae8f8654b88270887edc3edb6858a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E9=9C=84=E7=A3=8A?= Date: Mon, 24 Jun 2024 20:09:19 +0800 Subject: [PATCH 29/52] refactor loader --- .../alibaba/graphscope/loader/LoaderBase.java | 6 - .../graphscope/loader/LoaderFactory.java | 13 +- ...AbstractLoader.java => DefaultLoader.java} | 126 ++++++++---------- .../graphscope/loader/impl/FileLoader.java | 99 -------------- .../graphscope/loader/impl/HDFSLoader.java | 79 ----------- 5 files changed, 61 insertions(+), 262 deletions(-) rename analytical_engine/java/grape-giraph/src/main/java/com/alibaba/graphscope/loader/impl/{AbstractLoader.java => DefaultLoader.java} (91%) delete mode 100644 analytical_engine/java/grape-giraph/src/main/java/com/alibaba/graphscope/loader/impl/FileLoader.java delete mode 100644 analytical_engine/java/grape-giraph/src/main/java/com/alibaba/graphscope/loader/impl/HDFSLoader.java diff --git a/analytical_engine/java/grape-giraph/src/main/java/com/alibaba/graphscope/loader/LoaderBase.java b/analytical_engine/java/grape-giraph/src/main/java/com/alibaba/graphscope/loader/LoaderBase.java index ac227d4ca556..c09646de227a 100644 --- a/analytical_engine/java/grape-giraph/src/main/java/com/alibaba/graphscope/loader/LoaderBase.java +++ b/analytical_engine/java/grape-giraph/src/main/java/com/alibaba/graphscope/loader/LoaderBase.java @@ -51,12 +51,6 @@ int loadVertices(String inputPath, String vformatClass) void loadEdges(String inputPath, String eformatClass) throws ExecutionException, InterruptedException, ClassNotFoundException, IOException; - LoaderBase.TYPE loaderType(); - int concurrency(); - enum TYPE { - FileLoader, - HDFSLoader, - } } diff --git a/analytical_engine/java/grape-giraph/src/main/java/com/alibaba/graphscope/loader/LoaderFactory.java b/analytical_engine/java/grape-giraph/src/main/java/com/alibaba/graphscope/loader/LoaderFactory.java index b5b47c4bf357..361a7a68759a 100644 --- a/analytical_engine/java/grape-giraph/src/main/java/com/alibaba/graphscope/loader/LoaderFactory.java +++ b/analytical_engine/java/grape-giraph/src/main/java/com/alibaba/graphscope/loader/LoaderFactory.java @@ -1,18 +1,11 @@ package com.alibaba.graphscope.loader; -import com.alibaba.graphscope.loader.impl.FileLoader; -import com.alibaba.graphscope.loader.impl.HDFSLoader; +import com.alibaba.graphscope.loader.impl.DefaultLoader; import java.net.URLClassLoader; public class LoaderFactory { - public static LoaderBase createLoader(LoaderBase.TYPE type, int id, URLClassLoader classLoader) { - if (type == LoaderBase.TYPE.FileLoader) { - return new FileLoader(id, classLoader); - } - if (type == LoaderBase.TYPE.HDFSLoader){ - return new HDFSLoader(id, classLoader); - } - throw new IllegalArgumentException("Unsupported loader type: " + type); + public static LoaderBase createLoader(int id, URLClassLoader classLoader) { + return new DefaultLoader(id, classLoader); } } diff --git a/analytical_engine/java/grape-giraph/src/main/java/com/alibaba/graphscope/loader/impl/AbstractLoader.java b/analytical_engine/java/grape-giraph/src/main/java/com/alibaba/graphscope/loader/impl/DefaultLoader.java similarity index 91% rename from analytical_engine/java/grape-giraph/src/main/java/com/alibaba/graphscope/loader/impl/AbstractLoader.java rename to analytical_engine/java/grape-giraph/src/main/java/com/alibaba/graphscope/loader/impl/DefaultLoader.java index 17989cc27ac6..24f4af11d97a 100644 --- a/analytical_engine/java/grape-giraph/src/main/java/com/alibaba/graphscope/loader/impl/AbstractLoader.java +++ b/analytical_engine/java/grape-giraph/src/main/java/com/alibaba/graphscope/loader/impl/DefaultLoader.java @@ -26,9 +26,9 @@ import org.slf4j.LoggerFactory; import java.io.BufferedReader; -import java.io.FileNotFoundException; import java.io.FileReader; import java.io.IOException; +import java.io.InputStreamReader; import java.lang.reflect.Field; import java.lang.reflect.Method; import java.net.URLClassLoader; @@ -39,24 +39,18 @@ import static com.alibaba.graphscope.loader.LoaderUtils.getNumLinesOfFile; import static org.apache.giraph.utils.ReflectionUtils.getTypeArguments; -public abstract class AbstractLoader implements LoaderBase { - private static Logger logger = LoggerFactory.getLogger(AbstractLoader.class); - +public class DefaultLoader implements LoaderBase { + protected static AtomicInteger LOADER_ID = new AtomicInteger(0); + protected static AtomicInteger V_CALLABLE_ID = new AtomicInteger(0); + protected static AtomicInteger E_CALLABLE_ID = new AtomicInteger(0); + private static Logger logger = LoggerFactory.getLogger(DefaultLoader.class); private static int BATCH_SIZE = 1024; - protected int loaderId; protected int threadNum; protected int workerId; protected int workerNum; - protected Class vertexInputFormatClz; protected Class edgeInputFormatClz; - - protected static AtomicInteger LOADER_ID = new AtomicInteger(0); - protected static AtomicInteger V_CALLABLE_ID = new AtomicInteger(0); - protected static AtomicInteger E_CALLABLE_ID = new AtomicInteger(0); - - protected VertexInputFormat vertexInputFormat; protected EdgeInputFormat edgeInputFormat; @@ -95,7 +89,7 @@ public String[] getLocations() throws IOException, InterruptedException { protected Class giraphEDataClass; protected URLClassLoader classLoader; - public AbstractLoader(int id, URLClassLoader classLoader) { + public DefaultLoader(int id, URLClassLoader classLoader) { this.classLoader = classLoader; logger.info("FileLoader using classLoader {} to load vif and eif", classLoader); this.giraphConfiguration.setClassLoader(this.classLoader); @@ -262,9 +256,7 @@ protected void loadVerticesImpl(String inputPath) throws ExecutionException, Int for (int i = 0; i < threadNum; ++i) { AbstractVertexLoaderCallable vertexLoaderCallable = -// new VertexLoaderCallable( -// i, inputPath, Math.min(cur, end), Math.min(cur + chunkSize, end)); - createVertexLoaderCallable(i, inputPath, Math.min(cur, end), Math.min(cur + chunkSize, end)); + new AbstractVertexLoaderCallable(i, inputPath, Math.min(cur, end), Math.min(cur + chunkSize, end)); futures[i] = executor.submit(vertexLoaderCallable); cur += chunkSize; } @@ -276,7 +268,51 @@ protected void loadVerticesImpl(String inputPath) throws ExecutionException, Int logger.info("[vertices] worker {} loaded {} lines ", workerId, sum); } - public abstract class AbstractVertexLoaderCallable implements Callable { + BufferedReader createBufferedReader(String inputPath) throws IOException { + if (inputPath.startsWith("hdfs://")) { + org.apache.hadoop.fs.Path path = new org.apache.hadoop.fs.Path(inputPath); + return new BufferedReader(new InputStreamReader(path.getFileSystem(new Configuration()).open(path))); + } else { + FileReader fileReader = new FileReader(inputPath); + return new BufferedReader(fileReader); + } + } + + protected void loadEdgesImpl(String filePath) throws ExecutionException, InterruptedException, IOException { + // Try to get number of lines + long numOfLines = getNumLinesOfFile(filePath); + long linesPerWorker = (numOfLines + (workerNum - 1)) / workerNum; + long start = Math.min(linesPerWorker * workerId, numOfLines); + long end = Math.min(linesPerWorker * (workerId + 1), numOfLines); + long chunkSize = (end - start + threadNum - 1) / threadNum; + proxy.reserveNumEdges((int) (end - start)); + logger.debug( + "[reading edge] total lines {}, worker {} read {}, thread num {}, chunkSize {}", + numOfLines, + workerId, + end - start, + threadNum, + chunkSize); + long cur = start; + + Future[] futures = new Future[threadNum]; + + for (int i = 0; i < threadNum; ++i) { + DefaultLoader.AbstractEdgeLoaderCallable edgeLoaderCallable = + new DefaultLoader.AbstractEdgeLoaderCallable( + i, filePath, Math.min(cur, end), Math.min(cur + chunkSize, end)); + futures[i] = executor.submit(edgeLoaderCallable); + cur += chunkSize; + } + + long sum = 0; + for (int i = 0; i < threadNum; ++i) { + sum += (Long) futures[i].get(); + } + logger.info("[edges] worker {} loaded {} lines ", workerId, sum); + } + + public class AbstractVertexLoaderCallable implements Callable { private int threadId; private int callableId; private BufferedReader bufferedReader; @@ -284,15 +320,13 @@ public abstract class AbstractVertexLoaderCallable implements Callable { private long end; // exclusive private VertexReader vertexReader; - abstract BufferedReader createBufferedReader(String inputPath) throws FileNotFoundException; - public AbstractVertexLoaderCallable(int threadId, String inputPath, long startLine, long endLine) { callableId = V_CALLABLE_ID.getAndAdd(1); try { FileReader fileReader = new FileReader(inputPath); // bufferedReader = new BufferedReader(fileReader); bufferedReader = createBufferedReader(inputPath); - } catch (FileNotFoundException e) { + } catch (IOException e) { e.printStackTrace(); } @@ -315,7 +349,7 @@ public AbstractVertexLoaderCallable(int threadId, String inputPath, long startLi logger.info( "Abstract loader {} creating vertex loader callable: {}, file : {}, reader {}," + " thread id {}, from {} to {}", - AbstractLoader.this, + DefaultLoader.this, AbstractVertexLoaderCallable.this, inputPath, bufferedReader, @@ -366,43 +400,7 @@ public Long call() throws Exception { } } - protected void loadEdgesImpl(String filePath) throws ExecutionException, InterruptedException, IOException { - // Try to get number of lines - long numOfLines = getNumLinesOfFile(filePath); - long linesPerWorker = (numOfLines + (workerNum - 1)) / workerNum; - long start = Math.min(linesPerWorker * workerId, numOfLines); - long end = Math.min(linesPerWorker * (workerId + 1), numOfLines); - long chunkSize = (end - start + threadNum - 1) / threadNum; - proxy.reserveNumEdges((int) (end - start)); - logger.debug( - "[reading edge] total lines {}, worker {} read {}, thread num {}, chunkSize {}", - numOfLines, - workerId, - end - start, - threadNum, - chunkSize); - long cur = start; - - Future[] futures = new Future[threadNum]; - - for (int i = 0; i < threadNum; ++i) { - AbstractLoader.AbstractEdgeLoaderCallable edgeLoaderCallable = - createEdgeLoaderCallable( - i, filePath, Math.min(cur, end), Math.min(cur + chunkSize, end)); -// new AbstractLoader.AbstractEdgeLoaderCallable( -// i, filePath, Math.min(cur, end), Math.min(cur + chunkSize, end)); - futures[i] = executor.submit(edgeLoaderCallable); - cur += chunkSize; - } - - long sum = 0; - for (int i = 0; i < threadNum; ++i) { - sum += (Long) futures[i].get(); - } - logger.info("[edges] worker {} loaded {} lines ", workerId, sum); - } - - public abstract class AbstractEdgeLoaderCallable implements Callable { + public class AbstractEdgeLoaderCallable implements Callable { private int threadId; private int callableId; private BufferedReader bufferedReader; @@ -410,15 +408,13 @@ public abstract class AbstractEdgeLoaderCallable implements Callable { private long end; // exclusive private EdgeReader edgeReader; - abstract BufferedReader createBufferedReader(String inputPath) throws FileNotFoundException; - public AbstractEdgeLoaderCallable(int threadId, String inputPath, long startLine, long endLine) { callableId = E_CALLABLE_ID.getAndAdd(1); try { // bufferedReader = new BufferedReader(fileReader); bufferedReader = createBufferedReader(inputPath); - } catch (FileNotFoundException e) { + } catch (IOException e) { e.printStackTrace(); } @@ -441,7 +437,7 @@ public AbstractEdgeLoaderCallable(int threadId, String inputPath, long startLine logger.info( "File loader {} creating edge callable: {}, file : {}, reader {}, thread id {}," + " from {} to {}", - AbstractLoader.this, + DefaultLoader.this, AbstractEdgeLoaderCallable.this, inputPath, bufferedReader, @@ -487,10 +483,4 @@ public Long call() throws Exception { } } -// protected abstract void loadVerticesImpl(String inputPath) throws ExecutionException, InterruptedException; - - protected abstract AbstractVertexLoaderCallable createVertexLoaderCallable(int i, String inputPath, long min, long min1); - - protected abstract AbstractEdgeLoaderCallable createEdgeLoaderCallable(int i, String inputPath, long min, long min1); - } diff --git a/analytical_engine/java/grape-giraph/src/main/java/com/alibaba/graphscope/loader/impl/FileLoader.java b/analytical_engine/java/grape-giraph/src/main/java/com/alibaba/graphscope/loader/impl/FileLoader.java deleted file mode 100644 index 99c8ec252a71..000000000000 --- a/analytical_engine/java/grape-giraph/src/main/java/com/alibaba/graphscope/loader/impl/FileLoader.java +++ /dev/null @@ -1,99 +0,0 @@ -/* - * Copyright 2021 Alibaba Group Holding Limited. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package com.alibaba.graphscope.loader.impl; - -import com.alibaba.graphscope.loader.LoaderBase; - -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import java.io.BufferedReader; -import java.io.FileNotFoundException; -import java.io.FileReader; -import java.net.URLClassLoader; - -/** - * Load from a file on system. - */ -public class FileLoader extends AbstractLoader { - private static Logger logger = LoggerFactory.getLogger(FileLoader.class); - - public FileLoader(int id, URLClassLoader classLoader) { - super(id, classLoader); - } - - @Override - protected AbstractVertexLoaderCallable createVertexLoaderCallable(int i, String inputPath, long min, long min1) { - return new FileVertexLoaderCallable(i, inputPath, min, min1); - } - - @Override - protected AbstractEdgeLoaderCallable createEdgeLoaderCallable(int i, String inputPath, long min, long min1) { - return new FileEdgeLoaderCallable(i, inputPath, min, min1); - } - - - @Override - public LoaderBase.TYPE loaderType() { - return TYPE.FileLoader; - } - - @Override - public String toString() { - return FileLoader.class.toString() + "@" + loaderId; - } - - - public class FileVertexLoaderCallable extends AbstractVertexLoaderCallable { - - public FileVertexLoaderCallable(int id, String inputPath, long start, long end) { - super(id,inputPath,start,end); - } - - @Override - BufferedReader createBufferedReader(String inputPath) throws FileNotFoundException { - FileReader fileReader = new FileReader(inputPath); - return new BufferedReader(fileReader); - } - - @Override - public String toString() { - return FileVertexLoaderCallable.class.toString() + "@" + loaderId; - } - - } - - public class FileEdgeLoaderCallable extends AbstractEdgeLoaderCallable { - - public FileEdgeLoaderCallable(int id, String inputPath, long start, long end) { - super(id,inputPath,start,end); - } - - @Override - BufferedReader createBufferedReader(String inputPath) throws FileNotFoundException { - FileReader fileReader = new FileReader(inputPath); - return new BufferedReader(fileReader); - } - - @Override - public String toString() { - return FileEdgeLoaderCallable.class.toString() + "@" + loaderId; - } - - } - - -} diff --git a/analytical_engine/java/grape-giraph/src/main/java/com/alibaba/graphscope/loader/impl/HDFSLoader.java b/analytical_engine/java/grape-giraph/src/main/java/com/alibaba/graphscope/loader/impl/HDFSLoader.java deleted file mode 100644 index a7944affce87..000000000000 --- a/analytical_engine/java/grape-giraph/src/main/java/com/alibaba/graphscope/loader/impl/HDFSLoader.java +++ /dev/null @@ -1,79 +0,0 @@ -package com.alibaba.graphscope.loader.impl; - -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.fs.Path; - -import java.io.BufferedReader; -import java.io.FileNotFoundException; -import java.io.IOException; -import java.io.InputStreamReader; -import java.net.URLClassLoader; - -public class HDFSLoader extends AbstractLoader { - - public HDFSLoader(int id, URLClassLoader classLoader) { - super(id, classLoader); - } - - @Override - protected AbstractVertexLoaderCallable createVertexLoaderCallable(int i, String inputPath, long min, long min1) { - return new HDFSVertexLoaderCallable(i, inputPath, min, min1); - } - - @Override - protected AbstractEdgeLoaderCallable createEdgeLoaderCallable(int i, String inputPath, long min, long min1) { - return new HDFSEdgeLoaderCallable(i, inputPath, min, min1); - } - - - @Override - public TYPE loaderType() { - return TYPE.HDFSLoader; - } - - public class HDFSVertexLoaderCallable extends AbstractVertexLoaderCallable { - @Override - BufferedReader createBufferedReader(String inputPath) throws FileNotFoundException { - //Expect a string with format: hdfs://host:port/path - Path path = new Path(inputPath); - try { - return new BufferedReader(new InputStreamReader(path.getFileSystem(new Configuration()).open(path))); - } catch (IOException e) { - e.printStackTrace(); - } - return null; - } - - public HDFSVertexLoaderCallable(int id, String inputPath, long min, long min1) { - super(id, inputPath, min, min1); - } - - @Override - public String toString() { - return HDFSVertexLoaderCallable.class.toString() + "@" + loaderId; - } - } - - public class HDFSEdgeLoaderCallable extends AbstractEdgeLoaderCallable { - @Override - BufferedReader createBufferedReader(String inputPath) throws FileNotFoundException { - //Expect a string with format: hdfs://host:port/path - Path path = new Path(inputPath); - try { - return new BufferedReader(new InputStreamReader(path.getFileSystem(new Configuration()).open(path))); - } catch (IOException e) { - e.printStackTrace(); - } - return null; - } - - public HDFSEdgeLoaderCallable(int id, String inputPath, long min, long min1) { - super(id, inputPath, min, min1); - } - - @Override - public String toString() { - return HDFSEdgeLoaderCallable.class.toString() + "@" + loaderId; - } - } -} From 7296fd47d6d0e9e7eb2efdf73f9e01a2a049993f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E9=9C=84=E7=A3=8A?= Date: Tue, 25 Jun 2024 11:40:34 +0800 Subject: [PATCH 30/52] update to 2.10.2 --- analytical_engine/java/grape-giraph/pom.xml | 12 ++++++------ .../graphscope/loader/impl/DefaultLoader.java | 3 ++- analytical_engine/java/pom.xml | 2 +- 3 files changed, 9 insertions(+), 8 deletions(-) diff --git a/analytical_engine/java/grape-giraph/pom.xml b/analytical_engine/java/grape-giraph/pom.xml index 8b3369cbd795..9babfe7ea082 100644 --- a/analytical_engine/java/grape-giraph/pom.xml +++ b/analytical_engine/java/grape-giraph/pom.xml @@ -41,16 +41,16 @@ json 1 0 - 1.2.1 + 1.21 true - - org.apache.hadoop - hadoop-core - ${hadoop-core.version} - + + + + + com.alibaba.graphscope grape-jdk diff --git a/analytical_engine/java/grape-giraph/src/main/java/com/alibaba/graphscope/loader/impl/DefaultLoader.java b/analytical_engine/java/grape-giraph/src/main/java/com/alibaba/graphscope/loader/impl/DefaultLoader.java index 24f4af11d97a..91d69bcd29ce 100644 --- a/analytical_engine/java/grape-giraph/src/main/java/com/alibaba/graphscope/loader/impl/DefaultLoader.java +++ b/analytical_engine/java/grape-giraph/src/main/java/com/alibaba/graphscope/loader/impl/DefaultLoader.java @@ -22,6 +22,7 @@ import org.apache.hadoop.mapreduce.InputSplit; import org.apache.hadoop.mapreduce.TaskAttemptContext; import org.apache.hadoop.mapreduce.TaskAttemptID; +import org.apache.hadoop.mapreduce.task.TaskAttemptContextImpl; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -82,7 +83,7 @@ public String[] getLocations() throws IOException, InterruptedException { protected GiraphConfiguration giraphConfiguration = new GiraphConfiguration(configuration); protected TaskAttemptID taskAttemptID = new TaskAttemptID(); protected TaskAttemptContext taskAttemptContext = - new TaskAttemptContext(configuration, taskAttemptID); + new TaskAttemptContextImpl(configuration, taskAttemptID); protected Class giraphOidClass; protected Class giraphVDataClass; diff --git a/analytical_engine/java/pom.xml b/analytical_engine/java/pom.xml index 9107463d0948..6f9236a1d921 100644 --- a/analytical_engine/java/pom.xml +++ b/analytical_engine/java/pom.xml @@ -101,7 +101,7 @@ 1.3.2 2.3.5 2.9.0 - 2.7.3 + 2.10.2 From c5bf0d4e9cb569a57ecab468edb67d68ad4476f7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E9=9C=84=E7=A3=8A?= Date: Tue, 25 Jun 2024 12:07:22 +0800 Subject: [PATCH 31/52] add dummy impl --- .../graphscope/loader/impl/DefaultLoader.java | 3 +- .../loader/impl/DummyTaskAttemptContext.java | 245 ++++++++++++++++++ 2 files changed, 246 insertions(+), 2 deletions(-) create mode 100644 analytical_engine/java/grape-giraph/src/main/java/com/alibaba/graphscope/loader/impl/DummyTaskAttemptContext.java diff --git a/analytical_engine/java/grape-giraph/src/main/java/com/alibaba/graphscope/loader/impl/DefaultLoader.java b/analytical_engine/java/grape-giraph/src/main/java/com/alibaba/graphscope/loader/impl/DefaultLoader.java index 91d69bcd29ce..cc30e0885435 100644 --- a/analytical_engine/java/grape-giraph/src/main/java/com/alibaba/graphscope/loader/impl/DefaultLoader.java +++ b/analytical_engine/java/grape-giraph/src/main/java/com/alibaba/graphscope/loader/impl/DefaultLoader.java @@ -22,7 +22,6 @@ import org.apache.hadoop.mapreduce.InputSplit; import org.apache.hadoop.mapreduce.TaskAttemptContext; import org.apache.hadoop.mapreduce.TaskAttemptID; -import org.apache.hadoop.mapreduce.task.TaskAttemptContextImpl; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -83,7 +82,7 @@ public String[] getLocations() throws IOException, InterruptedException { protected GiraphConfiguration giraphConfiguration = new GiraphConfiguration(configuration); protected TaskAttemptID taskAttemptID = new TaskAttemptID(); protected TaskAttemptContext taskAttemptContext = - new TaskAttemptContextImpl(configuration, taskAttemptID); + new DummyTaskAttemptContext(); protected Class giraphOidClass; protected Class giraphVDataClass; diff --git a/analytical_engine/java/grape-giraph/src/main/java/com/alibaba/graphscope/loader/impl/DummyTaskAttemptContext.java b/analytical_engine/java/grape-giraph/src/main/java/com/alibaba/graphscope/loader/impl/DummyTaskAttemptContext.java new file mode 100644 index 000000000000..7dee3236f913 --- /dev/null +++ b/analytical_engine/java/grape-giraph/src/main/java/com/alibaba/graphscope/loader/impl/DummyTaskAttemptContext.java @@ -0,0 +1,245 @@ +package com.alibaba.graphscope.loader.impl; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.io.RawComparator; +import org.apache.hadoop.mapreduce.*; +import org.apache.hadoop.security.Credentials; + +import java.io.IOException; +import java.net.URI; + +public class DummyTaskAttemptContext implements TaskAttemptContext { + private String status; + public DummyTaskAttemptContext() { + } + + @Override + public TaskAttemptID getTaskAttemptID() { + return null; + } + + @Override + public void setStatus(String s) { + this.status = s; + } + + @Override + public String getStatus() { + return this.status; + } + + @Override + public float getProgress() { + return 0; + } + + @Override + public Counter getCounter(Enum anEnum) { + return null; + } + + @Override + public Counter getCounter(String s, String s1) { + return null; + } + + @Override + public Configuration getConfiguration() { + return null; + } + + @Override + public Credentials getCredentials() { + return null; + } + + @Override + public JobID getJobID() { + return null; + } + + @Override + public int getNumReduceTasks() { + return 0; + } + + @Override + public Path getWorkingDirectory() throws IOException { + return null; + } + + @Override + public Class getOutputKeyClass() { + return null; + } + + @Override + public Class getOutputValueClass() { + return null; + } + + @Override + public Class getMapOutputKeyClass() { + return null; + } + + @Override + public Class getMapOutputValueClass() { + return null; + } + + @Override + public String getJobName() { + return null; + } + + @Override + public Class> getInputFormatClass() throws ClassNotFoundException { + return null; + } + + @Override + public Class> getMapperClass() throws ClassNotFoundException { + return null; + } + + @Override + public Class> getCombinerClass() throws ClassNotFoundException { + return null; + } + + @Override + public Class> getReducerClass() throws ClassNotFoundException { + return null; + } + + @Override + public Class> getOutputFormatClass() throws ClassNotFoundException { + return null; + } + + @Override + public Class> getPartitionerClass() throws ClassNotFoundException { + return null; + } + + @Override + public RawComparator getSortComparator() { + return null; + } + + @Override + public String getJar() { + return null; + } + + @Override + public RawComparator getCombinerKeyGroupingComparator() { + return null; + } + + @Override + public RawComparator getGroupingComparator() { + return null; + } + + @Override + public boolean getJobSetupCleanupNeeded() { + return false; + } + + @Override + public boolean getTaskCleanupNeeded() { + return false; + } + + @Override + public boolean getProfileEnabled() { + return false; + } + + @Override + public String getProfileParams() { + return null; + } + + @Override + public Configuration.IntegerRanges getProfileTaskRange(boolean b) { + return null; + } + + @Override + public String getUser() { + return null; + } + + /** + * @deprecated + */ + @Override + public boolean getSymlink() { + return false; + } + + @Override + public Path[] getArchiveClassPaths() { + return new Path[0]; + } + + @Override + public URI[] getCacheArchives() throws IOException { + return new URI[0]; + } + + @Override + public URI[] getCacheFiles() throws IOException { + return new URI[0]; + } + + /** + * @deprecated + */ + @Override + public Path[] getLocalCacheArchives() throws IOException { + return new Path[0]; + } + + /** + * @deprecated + */ + @Override + public Path[] getLocalCacheFiles() throws IOException { + return new Path[0]; + } + + @Override + public Path[] getFileClassPaths() { + return new Path[0]; + } + + @Override + public String[] getArchiveTimestamps() { + return new String[0]; + } + + @Override + public String[] getFileTimestamps() { + return new String[0]; + } + + @Override + public int getMaxMapAttempts() { + return 0; + } + + @Override + public int getMaxReduceAttempts() { + return 0; + } + + @Override + public void progress() { + + } +} From e122746ef3a62447405f3ebf5bbb35f609ae470f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E9=9C=84=E7=A3=8A?= Date: Tue, 25 Jun 2024 12:23:20 +0800 Subject: [PATCH 32/52] try fix waiting --- .../graphscope/loader/LoaderUtils.java | 20 +++++++++++++++++-- .../graphscope/loader/impl/DefaultLoader.java | 6 ++---- 2 files changed, 20 insertions(+), 6 deletions(-) diff --git a/analytical_engine/java/grape-giraph/src/main/java/com/alibaba/graphscope/loader/LoaderUtils.java b/analytical_engine/java/grape-giraph/src/main/java/com/alibaba/graphscope/loader/LoaderUtils.java index 37aa176a8432..02b22c03d415 100644 --- a/analytical_engine/java/grape-giraph/src/main/java/com/alibaba/graphscope/loader/LoaderUtils.java +++ b/analytical_engine/java/grape-giraph/src/main/java/com/alibaba/graphscope/loader/LoaderUtils.java @@ -71,9 +71,25 @@ public static long getNumLinesOfLocalFile(String path) { return count; } - public static long getNumLinesOfHdfsFile(String input) throws IOException { + public static String getEndpointFromPath(String path) { + if (path.startsWith("hdfs://")) { + int index = path.indexOf("/", 7); + return path.substring(0, index); + } + return null; + } + + public static BufferedReader createHdfsBufferedReader(String input) throws IOException { org.apache.hadoop.fs.Path path = new org.apache.hadoop.fs.Path(input); - BufferedReader reader = new BufferedReader(new InputStreamReader(path.getFileSystem(new Configuration()).open(path))); + Configuration conf = new Configuration(); + String endpoint = getEndpointFromPath(input); + logger.info("endpoint: " + endpoint); + conf.set("fs.defaultFS", endpoint); + return new BufferedReader(new InputStreamReader(path.getFileSystem(conf).open(path))); + } + + public static long getNumLinesOfHdfsFile(String input) throws IOException { + BufferedReader reader = createHdfsBufferedReader(input); long count = 0; try { while (reader.readLine() != null) { diff --git a/analytical_engine/java/grape-giraph/src/main/java/com/alibaba/graphscope/loader/impl/DefaultLoader.java b/analytical_engine/java/grape-giraph/src/main/java/com/alibaba/graphscope/loader/impl/DefaultLoader.java index cc30e0885435..a1693886dce9 100644 --- a/analytical_engine/java/grape-giraph/src/main/java/com/alibaba/graphscope/loader/impl/DefaultLoader.java +++ b/analytical_engine/java/grape-giraph/src/main/java/com/alibaba/graphscope/loader/impl/DefaultLoader.java @@ -35,8 +35,7 @@ import java.util.concurrent.*; import java.util.concurrent.atomic.AtomicInteger; -import static com.alibaba.graphscope.loader.LoaderUtils.generateTypeInt; -import static com.alibaba.graphscope.loader.LoaderUtils.getNumLinesOfFile; +import static com.alibaba.graphscope.loader.LoaderUtils.*; import static org.apache.giraph.utils.ReflectionUtils.getTypeArguments; public class DefaultLoader implements LoaderBase { @@ -270,8 +269,7 @@ protected void loadVerticesImpl(String inputPath) throws ExecutionException, Int BufferedReader createBufferedReader(String inputPath) throws IOException { if (inputPath.startsWith("hdfs://")) { - org.apache.hadoop.fs.Path path = new org.apache.hadoop.fs.Path(inputPath); - return new BufferedReader(new InputStreamReader(path.getFileSystem(new Configuration()).open(path))); + return createHdfsBufferedReader(inputPath); } else { FileReader fileReader = new FileReader(inputPath); return new BufferedReader(fileReader); From 73de18f30d9058fd045d9ec7b99d209b11bd94c7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E9=9C=84=E7=A3=8A?= Date: Tue, 25 Jun 2024 15:22:45 +0800 Subject: [PATCH 33/52] try to catch error --- .../com/alibaba/graphscope/loader/LoaderUtils.java | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/analytical_engine/java/grape-giraph/src/main/java/com/alibaba/graphscope/loader/LoaderUtils.java b/analytical_engine/java/grape-giraph/src/main/java/com/alibaba/graphscope/loader/LoaderUtils.java index 02b22c03d415..e5d58063a148 100644 --- a/analytical_engine/java/grape-giraph/src/main/java/com/alibaba/graphscope/loader/LoaderUtils.java +++ b/analytical_engine/java/grape-giraph/src/main/java/com/alibaba/graphscope/loader/LoaderUtils.java @@ -16,6 +16,7 @@ package com.alibaba.graphscope.loader; import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.io.DoubleWritable; import org.apache.hadoop.io.FloatWritable; import org.apache.hadoop.io.IntWritable; @@ -85,7 +86,16 @@ public static BufferedReader createHdfsBufferedReader(String input) throws IOExc String endpoint = getEndpointFromPath(input); logger.info("endpoint: " + endpoint); conf.set("fs.defaultFS", endpoint); - return new BufferedReader(new InputStreamReader(path.getFileSystem(conf).open(path))); + FileSystem fileSystem = null; + try { + fileSystem = FileSystem.get(conf); + } + catch (Exception e) { + logger.error("Failed to get file system: " + input); + e.printStackTrace(); + return null; + } + return new BufferedReader(new InputStreamReader(fileSystem.open(path))); } public static long getNumLinesOfHdfsFile(String input) throws IOException { From 11fcd4b91ca7d4504fde09c81a47f83ce7c36251 Mon Sep 17 00:00:00 2001 From: "xiaolei.zl" Date: Wed, 26 Jun 2024 12:24:18 +0800 Subject: [PATCH 34/52] support reading from hdfs Committed-by: xiaolei.zl from Dev container --- .../core/java/java_loader_invoker.h | 38 ++++++++++--------- analytical_engine/core/java/javasdk.cc | 3 ++ analytical_engine/java/grape-demo/pom.xml | 4 +- analytical_engine/java/grape-giraph/pom.xml | 9 +++++ .../graphscope/loader/LoaderUtils.java | 1 + .../graphscope/loader/impl/DefaultLoader.java | 4 -- analytical_engine/java/grape-jdk/pom.xml | 4 +- analytical_engine/java/grape-runtime/pom.xml | 12 +++++- .../src/main/resources/log4j.properties | 8 ++++ analytical_engine/java/grape_jvm_opts | 23 +++++------ analytical_engine/java/pom.xml | 5 +++ 11 files changed, 74 insertions(+), 37 deletions(-) create mode 100644 analytical_engine/java/grape-runtime/src/main/resources/log4j.properties diff --git a/analytical_engine/core/java/java_loader_invoker.h b/analytical_engine/core/java/java_loader_invoker.h index c572eb2fd054..e3afa3761d55 100644 --- a/analytical_engine/core/java/java_loader_invoker.h +++ b/analytical_engine/core/java/java_loader_invoker.h @@ -137,14 +137,15 @@ boost::leaf::result BuildArray( return {}; } -static constexpr const char* JAVA_LOADER_CLASS = - "com/alibaba/graphscope/loader/impl/FileLoader"; -static constexpr const char* JAVA_LOADER_CREATE_METHOD = "create"; +static constexpr const char* JAVA_LOADER_FACTORY_CLASS = + "com/alibaba/graphscope/loader/LoaderFactory"; +static constexpr const char* JAVA_BASE_LOADER_CLASS = + "com/alibaba/graphscope/loader/LoaderBase"; +static constexpr const char* JAVA_LOADER_CREATE_METHOD = "createLoader"; static constexpr const char* JAVA_LOADER_CREATE_SIG = - "(Ljava/net/URLClassLoader;)Lcom/alibaba/graphscope/loader/impl/" - "FileLoader;"; -static constexpr const char* JAVA_LOADER_LOAD_VE_METHOD = - "loadVerticesAndEdges"; + "(ILjava/net/" + "URLClassLoader;)Lcom/alibaba/graphscope/loader/LoaderBase;"; +static constexpr const char* JAVA_LOADER_LOAD_VE_METHOD = "loadVertices"; static constexpr const char* JAVA_LOADER_LOAD_VE_SIG = "(Ljava/lang/String;Ljava/lang/String;)I"; static constexpr const char* JAVA_LOADER_LOAD_E_METHOD = "loadEdges"; @@ -389,16 +390,19 @@ class JavaLoaderInvoker { gs::JNIEnvMark m; if (m.env()) { JNIEnv* env = m.env(); - jclass loader_class = - LoadClassWithClassLoader(env, gs_class_loader_obj, JAVA_LOADER_CLASS); - CHECK_NOTNULL(loader_class); + jclass factory_class = LoadClassWithClassLoader( + env, gs_class_loader_obj, JAVA_LOADER_FACTORY_CLASS); + jclass base_loader_class = LoadClassWithClassLoader( + env, gs_class_loader_obj, JAVA_BASE_LOADER_CLASS); + CHECK_NOTNULL(factory_class); + CHECK_NOTNULL(base_loader_class); // construct java loader obj. jmethodID create_method = env->GetStaticMethodID( - loader_class, JAVA_LOADER_CREATE_METHOD, JAVA_LOADER_CREATE_SIG); + factory_class, JAVA_LOADER_CREATE_METHOD, JAVA_LOADER_CREATE_SIG); CHECK(create_method); java_loader_obj = env->NewGlobalRef(env->CallStaticObjectMethod( - loader_class, create_method, gs_class_loader_obj)); + factory_class, create_method, worker_id_, gs_class_loader_obj)); if (env->ExceptionCheck()) { env->ExceptionDescribe(); @@ -409,7 +413,7 @@ class JavaLoaderInvoker { CHECK(java_loader_obj); jmethodID loader_method = env->GetMethodID( - loader_class, JAVA_LOADER_INIT_METHOD, JAVA_LOADER_INIT_SIG); + base_loader_class, JAVA_LOADER_INIT_METHOD, JAVA_LOADER_INIT_SIG); CHECK_NOTNULL(loader_method); env->CallVoidMethod(java_loader_obj, loader_method, worker_id_, @@ -524,8 +528,8 @@ class JavaLoaderInvoker { gs::JNIEnvMark m; if (m.env()) { JNIEnv* env = m.env(); - jclass loader_class = - LoadClassWithClassLoader(env, gs_class_loader_obj, JAVA_LOADER_CLASS); + jclass loader_class = LoadClassWithClassLoader(env, gs_class_loader_obj, + JAVA_BASE_LOADER_CLASS); CHECK_NOTNULL(loader_class); jmethodID loader_method = env->GetMethodID( @@ -560,8 +564,8 @@ class JavaLoaderInvoker { gs::JNIEnvMark m; if (m.env()) { JNIEnv* env = m.env(); - jclass loader_class = - LoadClassWithClassLoader(env, gs_class_loader_obj, JAVA_LOADER_CLASS); + jclass loader_class = LoadClassWithClassLoader(env, gs_class_loader_obj, + JAVA_BASE_LOADER_CLASS); CHECK_NOTNULL(loader_class); jmethodID loader_method = env->GetMethodID( diff --git a/analytical_engine/core/java/javasdk.cc b/analytical_engine/core/java/javasdk.cc index ee072bfceb72..64ee7468853c 100644 --- a/analytical_engine/core/java/javasdk.cc +++ b/analytical_engine/core/java/javasdk.cc @@ -254,6 +254,9 @@ JavaVM* GetJavaVM() { } } _jvm = CreateJavaVM(); + if (_jvm == NULL) { + LOG(ERROR) << "Fail to create JVM."; + } VLOG(1) << "Created JVM " << reinterpret_cast(_jvm); } return _jvm; diff --git a/analytical_engine/java/grape-demo/pom.xml b/analytical_engine/java/grape-demo/pom.xml index e1543b88de31..1c3a107ae213 100644 --- a/analytical_engine/java/grape-demo/pom.xml +++ b/analytical_engine/java/grape-demo/pom.xml @@ -62,10 +62,10 @@ com.alibaba.fastffi annotation-processor - + diff --git a/analytical_engine/java/grape-giraph/pom.xml b/analytical_engine/java/grape-giraph/pom.xml index 9babfe7ea082..df7f1a7597a6 100644 --- a/analytical_engine/java/grape-giraph/pom.xml +++ b/analytical_engine/java/grape-giraph/pom.xml @@ -98,6 +98,12 @@ package + true + + + org.slf4j:* + + *:* @@ -108,6 +114,9 @@ org/python/** org/eclipse/** it/unimi/dsi/fastutil/** + org/apache/logging/** + org/apache/log4j/** + org/sl4j/** diff --git a/analytical_engine/java/grape-giraph/src/main/java/com/alibaba/graphscope/loader/LoaderUtils.java b/analytical_engine/java/grape-giraph/src/main/java/com/alibaba/graphscope/loader/LoaderUtils.java index e5d58063a148..d5343021a612 100644 --- a/analytical_engine/java/grape-giraph/src/main/java/com/alibaba/graphscope/loader/LoaderUtils.java +++ b/analytical_engine/java/grape-giraph/src/main/java/com/alibaba/graphscope/loader/LoaderUtils.java @@ -86,6 +86,7 @@ public static BufferedReader createHdfsBufferedReader(String input) throws IOExc String endpoint = getEndpointFromPath(input); logger.info("endpoint: " + endpoint); conf.set("fs.defaultFS", endpoint); + conf.set("fs.hdfs.impl", org.apache.hadoop.hdfs.DistributedFileSystem.class.getName()); FileSystem fileSystem = null; try { fileSystem = FileSystem.get(conf); diff --git a/analytical_engine/java/grape-giraph/src/main/java/com/alibaba/graphscope/loader/impl/DefaultLoader.java b/analytical_engine/java/grape-giraph/src/main/java/com/alibaba/graphscope/loader/impl/DefaultLoader.java index a1693886dce9..fcdd9b4829c2 100644 --- a/analytical_engine/java/grape-giraph/src/main/java/com/alibaba/graphscope/loader/impl/DefaultLoader.java +++ b/analytical_engine/java/grape-giraph/src/main/java/com/alibaba/graphscope/loader/impl/DefaultLoader.java @@ -321,8 +321,6 @@ public class AbstractVertexLoaderCallable implements Callable { public AbstractVertexLoaderCallable(int threadId, String inputPath, long startLine, long endLine) { callableId = V_CALLABLE_ID.getAndAdd(1); try { - FileReader fileReader = new FileReader(inputPath); -// bufferedReader = new BufferedReader(fileReader); bufferedReader = createBufferedReader(inputPath); } catch (IOException e) { e.printStackTrace(); @@ -409,8 +407,6 @@ public class AbstractEdgeLoaderCallable implements Callable { public AbstractEdgeLoaderCallable(int threadId, String inputPath, long startLine, long endLine) { callableId = E_CALLABLE_ID.getAndAdd(1); try { - -// bufferedReader = new BufferedReader(fileReader); bufferedReader = createBufferedReader(inputPath); } catch (IOException e) { e.printStackTrace(); diff --git a/analytical_engine/java/grape-jdk/pom.xml b/analytical_engine/java/grape-jdk/pom.xml index 282486e36d29..a47d04c86ded 100644 --- a/analytical_engine/java/grape-jdk/pom.xml +++ b/analytical_engine/java/grape-jdk/pom.xml @@ -58,11 +58,11 @@ com.alibaba fastjson - + com.alibaba.fastffi diff --git a/analytical_engine/java/grape-runtime/pom.xml b/analytical_engine/java/grape-runtime/pom.xml index 47119dbb7e59..981f488b080c 100644 --- a/analytical_engine/java/grape-runtime/pom.xml +++ b/analytical_engine/java/grape-runtime/pom.xml @@ -72,6 +72,10 @@ org.apache.logging.log4j log4j-slf4j-impl + + org.slf4j + slf4j-log4j12 + @@ -132,11 +136,17 @@ com.alibaba.graphscope:* com.alibaba.fastffi:* com.alibaba:fastjson - org.slf4j:* + org.slf4j:slf4j-api org.apache.logging.log4j:* + org.slf4j:slf4j-reload4j + org.slf4j:slf4j-log4j12 + org.apache.log4j:* org.ow2.asm:* com.google.testing.compile:* + log4j:log4j + + diff --git a/analytical_engine/java/grape-runtime/src/main/resources/log4j.properties b/analytical_engine/java/grape-runtime/src/main/resources/log4j.properties new file mode 100644 index 000000000000..dc217f5e4f8a --- /dev/null +++ b/analytical_engine/java/grape-runtime/src/main/resources/log4j.properties @@ -0,0 +1,8 @@ +log4j.rootLogger=INFO, A1 + +# A1 is set to be a ConsoleAppender. +log4j.appender.A1=org.apache.log4j.ConsoleAppender + +# A1 uses PatternLayout. +log4j.appender.A1.layout=org.apache.log4j.PatternLayout +log4j.appender.A1.layout.ConversionPattern=%-4r [%t] %-5p %c %x - %m%n \ No newline at end of file diff --git a/analytical_engine/java/grape_jvm_opts b/analytical_engine/java/grape_jvm_opts index f3e490e9ffa6..a296e828ab38 100644 --- a/analytical_engine/java/grape_jvm_opts +++ b/analytical_engine/java/grape_jvm_opts @@ -21,16 +21,15 @@ then echo "Inferred GRAPHSCOPE_HOME "${gs_runtime} fi -GRAPHX_GRAPE_SDK=`ls ${GRAPHSCOPE_HOME}/lib/grape-graphx-*.jar` GRAPE_RUNTIME_JAR=`ls ${GRAPHSCOPE_HOME}/lib/grape-runtime-*.jar` GIRAPH_GRAPE_SDK=`ls ${GRAPHSCOPE_HOME}/lib/grape-giraph-*.jar` # This env point to the directory where the output for llvm4jni run.sh on Grape-runtime.jar resides if [ ! -z "${RUNTIME_LLVM4JNI_OUTPUT}" ]; then echo "find env RUNTIME_LLVM4JNI_OUTPUT, append to init java class path" - class_path=${RUNTIME_LLVM4JNI_OUTPUT}:${GRAPE_RUNTIME_JAR}:${GRAPHX_GRAPE_SDK}:${GIRAPH_GRAPE_SDK} + class_path=${RUNTIME_LLVM4JNI_OUTPUT}:${GIRAPH_GRAPE_SDK}:${GRAPE_RUNTIME_JAR} else - class_path=${GRAPE_RUNTIME_JAR}:${GRAPHX_GRAPE_SDK}:${GIRAPH_GRAPE_SDK} + class_path=${GRAPE_RUNTIME_JAR}:${GIRAPH_GRAPE_SDK} fi #include jars in spark/jars @@ -42,19 +41,21 @@ if [ ! -z "${SPARK_HOME}" ]; then fi jvm_version=$(${JAVA_HOME}/bin/javac -version 2>&1 | awk -F ' ' '{print $2}' | awk -F '.' '{print $1}') -if [ -z "${jvm_version##*11*}" ];then - _JVM_OPTS="-Xlog:gc*" - _JVM_OPTS=${_JVM_OPTS}" --illegal-access=warn" -else +#check whether java -Xlog:gc is supported +log_gc_check=$(${JAVA_HOME}/bin/java -Xlog:gc -version 2>&1 | grep "Unrecognized option") +if [ -z "${log_gc_check}"];then + _JVM_OPTS="-Xlog:gc" +else _JVM_OPTS="-XX:+PrintGCDateStamps" _JVM_OPTS=${_JVM_OPTS}" -XX:+PrintGCDetails" fi export GRAPE_JVM_OPTS="-Dcom.alibaba.fastffi.rvBuffer=2147483648 -XX:+StartAttachListener -XX:+PreserveFramePointer \ +-Djava.library.path=${GRAPHSCOPE_HOME}/lib \ +-Djava.class.path=${class_path} \ +-XX:+IgnoreUnrecognizedVMOptions \ -XX:+UseG1GC -XX:G1HeapRegionSize=32M \ -${_JVM_OPTS} \ +${_JVM_OPTS} --illegal-access=warn \ -XX:+UnlockDiagnosticVMOptions -XX:LoopUnrollLimit=1 \ --XX:+IgnoreUnrecognizedVMOptions -XX:-TieredCompilation \ --Djava.library.path=${GRAPHSCOPE_HOME}/lib \ --Djava.class.path=${class_path}" +-XX:-TieredCompilation" echo "GRAPE_JVM_OPTS=${GRAPE_JVM_OPTS}" diff --git a/analytical_engine/java/pom.xml b/analytical_engine/java/pom.xml index 6f9236a1d921..493011f2ff69 100644 --- a/analytical_engine/java/pom.xml +++ b/analytical_engine/java/pom.xml @@ -177,6 +177,11 @@ log4j-slf4j-impl ${log4j2.version} + + org.slf4j + slf4j-log4j12 + ${slf4j.version} + com.alibaba fastjson From 871a912cce151a50f27ce919102b64a9bdc5abb1 Mon Sep 17 00:00:00 2001 From: "xiaolei.zl" Date: Wed, 3 Jul 2024 14:17:53 +0800 Subject: [PATCH 35/52] fix reading from hdfs and build image Committed-by: xiaolei.zl from Dev container --- .devcontainer/devcontainer.json | 37 ++--- analytical_engine/CMakeLists.txt | 130 +++++++++--------- .../core/loader/arrow_fragment_loader.h | 6 +- analytical_engine/test/app_tests.sh | 83 +++++------ coordinator/gscoordinator/op_executor.py | 16 +++ k8s/dockerfiles/analytical.Dockerfile | 11 +- k8s/dockerfiles/coordinator.Dockerfile | 11 +- python/requirements.txt | 2 +- python/test_gae_java.py | 43 ++++-- 9 files changed, 190 insertions(+), 149 deletions(-) diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json index 4e5ffa8b324d..e98ba7ee2f1c 100644 --- a/.devcontainer/devcontainer.json +++ b/.devcontainer/devcontainer.json @@ -1,20 +1,18 @@ - // For format details, see https://aka.ms/devcontainer.json. For config options, see the // README at: https://github.com/devcontainers/templates/tree/main/src/javascript-node { - "name": "GraphScope", + "name": "GraphScope-Giraph", // Or use a Dockerfile or Docker Compose file. More info: https://containers.dev/guide/dockerfile "image": "registry.cn-hongkong.aliyuncs.com/graphscope/graphscope-dev:latest", - // Features to add to the dev container. More info: https://containers.dev/features. - "features": { - "ghcr.io/devcontainers/features/common-utils:2":{ - "installZsh": "true", + "features": { + "ghcr.io/devcontainers/features/common-utils:2": { + "installZsh": "true", "configureZshAsDefaultShell": "true", "installOhMyZsh": true, - "upgradePackages": "false" - } - }, + "upgradePackages": "false" + } + }, // Configure tool-specific properties. "customizations": { // Configure properties specific to VS Code. @@ -28,20 +26,15 @@ ] } }, - // Set `remoteUser` to `root` to connect as root instead. More info: https://aka.ms/vscode-remote/containers/non-root. "remoteUser": "graphscope", - // Use 'postCreateCommand' to run commands before the container is created. "initializeCommand": "sudo docker pull registry.cn-hongkong.aliyuncs.com/graphscope/graphscope-dev:latest", - // Uncomment this to enable C++ and Rust debugging in containers // "capAdd": ["SYS_PTRACE"], // "securityOpt": ["seccomp=unconfined"], - // Use 'forwardPorts' to make a list of ports inside the container available locally. // "forwardPorts": [3000], - // Use 'portsAttributes' to set default properties for specific forwarded ports. // More info: https://containers.dev/implementors/json_reference/#port-attributes // "portsAttributes": { @@ -50,23 +43,21 @@ // "onAutoForward": "notify" // } // }, - // Use 'postCreateCommand' to run commands after the container is created. // "postCreateCommand": "yarn install" - // Improve performance - // Uncomment these to mount a folder to a volume // https://code.visualstudio.com/remote/advancedcontainers/improve-performance#_use-a-targeted-named-volume - // "mounts": [ - // "source=${localWorkspaceFolderBasename}-node_modules,target=${containerWorkspaceFolder}/node_modules,type=volume" - // ], + "mounts": [ + "source=/mnt/zhanglei/dev_mounts,target=/home/graphscope/dev_mounts,type=bind,consistency=cached" + ], + "runArgs": [ + "--network=host", + ], // "postCreateCommand": "sudo chown graphscope node_modules" - - // Uncomment these to use a named volume for your entire source tree // https://code.visualstudio.com/remote/advancedcontainers/improve-performance#_use-a-named-volume-for-your-entire-source-tree // "workspaceMount": "source=gs,target=/workspaces,type=volume", // "workspaceFolder": "/workspaces" "postCreateCommand": "sudo chown -R graphscope /workspaces && bash pre-commit/install-hook.sh && bash pre-commit/prepare-commit-msg" -} +} \ No newline at end of file diff --git a/analytical_engine/CMakeLists.txt b/analytical_engine/CMakeLists.txt index fa5853dce7d4..5440ee83f43c 100644 --- a/analytical_engine/CMakeLists.txt +++ b/analytical_engine/CMakeLists.txt @@ -354,63 +354,63 @@ endif () # An executable workaround for graphx pregel. if (ENABLE_JAVA_SDK) - add_executable(graphx_runner core/java/graphx_runner.cc core/java/javasdk.cc) - target_include_directories(graphx_runner PRIVATE core utils apps) - target_compile_definitions(graphx_runner PUBLIC ENABLE_JAVA_SDK) - target_link_libraries(graphx_runner PRIVATE ${CMAKE_DL_LIBS} gs_proto ${VINEYARD_LIBRARIES} ${Boost_LIBRARIES} ${GFLAGS_LIBRARIES} ${JNI_LIBRARIES}) - - if (${LIBUNWIND_FOUND}) - target_link_libraries(graphx_runner PRIVATE ${LIBUNWIND_LIBRARIES}) - endif() + # add_executable(graphx_runner core/java/graphx_runner.cc core/java/javasdk.cc) + # target_include_directories(graphx_runner PRIVATE core utils apps) + # target_compile_definitions(graphx_runner PUBLIC ENABLE_JAVA_SDK) + # target_link_libraries(graphx_runner PRIVATE ${CMAKE_DL_LIBS} gs_proto ${VINEYARD_LIBRARIES} ${Boost_LIBRARIES} ${GFLAGS_LIBRARIES} ${JNI_LIBRARIES}) + + # if (${LIBUNWIND_FOUND}) + # target_link_libraries(graphx_runner PRIVATE ${LIBUNWIND_LIBRARIES}) + # endif() endif() # Test targets if (BUILD_TESTS) - add_executable(run_app test/run_app.cc core/object/dynamic.cc) - target_include_directories(run_app PRIVATE ${LIBGRAPELITE_INCLUDE_DIRS}/grape/analytical_apps utils apps) - target_link_libraries(run_app ${LIBGRAPELITE_LIBRARIES} ${GFLAGS_LIBRARIES} ${CMAKE_DL_LIBS} ${Boost_LIBRARIES} ${VINEYARD_LIBRARIES}) - target_link_libraries(run_app OpenMP::OpenMP_CXX) + # add_executable(run_app test/run_app.cc core/object/dynamic.cc) + # target_include_directories(run_app PRIVATE ${LIBGRAPELITE_INCLUDE_DIRS}/grape/analytical_apps utils apps) + # target_link_libraries(run_app ${LIBGRAPELITE_LIBRARIES} ${GFLAGS_LIBRARIES} ${CMAKE_DL_LIBS} ${Boost_LIBRARIES} ${VINEYARD_LIBRARIES}) + # target_link_libraries(run_app OpenMP::OpenMP_CXX) if (ENABLE_JAVA_SDK) - add_executable(run_java_app test/run_java_app.cc core/java/javasdk.cc core/object/dynamic.cc) - target_include_directories(run_java_app PRIVATE core utils apps) - target_compile_definitions(run_java_app PUBLIC ENABLE_JAVA_SDK) - target_link_libraries(run_java_app gs_proto ${GFLAGS_LIBRARIES} ${MPI_CXX_LIBRARIES} ${JNI_LIBRARIES} ${Boost_LIBRARIES} - ${CMAKE_DL_LIBS} ${GLOG_LIBRARIES} ${VINEYARD_LIBRARIES}) - - # java app benchmark - add_executable(property_graph_java_app_benchmarks benchmarks/property_graph_java_app_benchmarks.cc core/java/javasdk.cc core/object/dynamic.cc) - target_include_directories(property_graph_java_app_benchmarks PRIVATE core utils apps) - target_compile_definitions(property_graph_java_app_benchmarks PUBLIC ENABLE_JAVA_SDK) - target_link_libraries(property_graph_java_app_benchmarks gs_proto ${JNI_LIBRARIES} ${Boost_LIBRARIES} ${CMAKE_DL_LIBS} ${VINEYARD_LIBRARIES}) - - # giraph runner + # add_executable(run_java_app test/run_java_app.cc core/java/javasdk.cc core/object/dynamic.cc) + # target_include_directories(run_java_app PRIVATE core utils apps) + # target_compile_definitions(run_java_app PUBLIC ENABLE_JAVA_SDK) + # target_link_libraries(run_java_app gs_proto ${GFLAGS_LIBRARIES} ${MPI_CXX_LIBRARIES} ${JNI_LIBRARIES} ${Boost_LIBRARIES} + # ${CMAKE_DL_LIBS} ${GLOG_LIBRARIES} ${VINEYARD_LIBRARIES}) + + # # java app benchmark + # add_executable(property_graph_java_app_benchmarks benchmarks/property_graph_java_app_benchmarks.cc core/java/javasdk.cc core/object/dynamic.cc) + # target_include_directories(property_graph_java_app_benchmarks PRIVATE core utils apps) + # target_compile_definitions(property_graph_java_app_benchmarks PUBLIC ENABLE_JAVA_SDK) + # target_link_libraries(property_graph_java_app_benchmarks gs_proto ${JNI_LIBRARIES} ${Boost_LIBRARIES} ${CMAKE_DL_LIBS} ${VINEYARD_LIBRARIES}) + + # # giraph runner add_executable(giraph_runner test/giraph_runner.cc core/java/javasdk.cc) target_include_directories(giraph_runner PRIVATE core utils apps) target_compile_definitions(giraph_runner PUBLIC ENABLE_JAVA_SDK) target_link_libraries(giraph_runner ${CMAKE_DL_LIBS} gs_proto ${VINEYARD_LIBRARIES} ${Boost_LIBRARIES} ${GFLAGS_LIBRARIES} ${JNI_LIBRARIES}) - # graphx related test - add_executable(projected_fragment_mapper_test test/projected_fragment_mapper_test.cc) - target_include_directories(projected_fragment_mapper_test PRIVATE core utils apps) - target_link_libraries(projected_fragment_mapper_test ${CMAKE_DL_LIBS} ${VINEYARD_LIBRARIES} ${Boost_LIBRARIES} ${GLOG_LIBRARIES} ${GFLAGS_LIBRARIES}) + # # graphx related test + # add_executable(projected_fragment_mapper_test test/projected_fragment_mapper_test.cc) + # target_include_directories(projected_fragment_mapper_test PRIVATE core utils apps) + # target_link_libraries(projected_fragment_mapper_test ${CMAKE_DL_LIBS} ${VINEYARD_LIBRARIES} ${Boost_LIBRARIES} ${GLOG_LIBRARIES} ${GFLAGS_LIBRARIES}) - add_executable(graphx_loader_test test/graphx_loader_test.cc) - target_include_directories(graphx_loader_test PRIVATE core utils apps) - target_compile_definitions(graphx_loader_test PUBLIC ENABLE_JAVA_SDK) - target_link_libraries(graphx_loader_test ${CMAKE_DL_LIBS} ${VINEYARD_LIBRARIES} ${Boost_LIBRARIES} ${GLOG_LIBRARIES} ${GFLAGS_LIBRARIES}) + # add_executable(graphx_loader_test test/graphx_loader_test.cc) + # target_include_directories(graphx_loader_test PRIVATE core utils apps) + # target_compile_definitions(graphx_loader_test PUBLIC ENABLE_JAVA_SDK) + # target_link_libraries(graphx_loader_test ${CMAKE_DL_LIBS} ${VINEYARD_LIBRARIES} ${Boost_LIBRARIES} ${GLOG_LIBRARIES} ${GFLAGS_LIBRARIES}) - add_executable(run_java_string_app test/run_java_string_app.cc core/java/javasdk.cc) - target_include_directories(run_java_string_app PRIVATE core utils apps) - target_compile_definitions(run_java_string_app PUBLIC ENABLE_JAVA_SDK) - target_link_libraries(run_java_string_app ${CMAKE_DL_LIBS} ${VINEYARD_LIBRARIES} ${Boost_LIBRARIES} ${GLOG_LIBRARIES} ${GFLAGS_LIBRARIES} ${JNI_LIBRARIES}) + # add_executable(run_java_string_app test/run_java_string_app.cc core/java/javasdk.cc) + # target_include_directories(run_java_string_app PRIVATE core utils apps) + # target_compile_definitions(run_java_string_app PUBLIC ENABLE_JAVA_SDK) + # target_link_libraries(run_java_string_app ${CMAKE_DL_LIBS} ${VINEYARD_LIBRARIES} ${Boost_LIBRARIES} ${GLOG_LIBRARIES} ${GFLAGS_LIBRARIES} ${JNI_LIBRARIES}) if (${LIBUNWIND_FOUND}) - target_link_libraries(run_java_app ${LIBUNWIND_LIBRARIES}) - target_link_libraries(property_graph_java_app_benchmarks ${LIBUNWIND_LIBRARIES}) + # target_link_libraries(run_java_app ${LIBUNWIND_LIBRARIES}) + # target_link_libraries(property_graph_java_app_benchmarks ${LIBUNWIND_LIBRARIES}) target_link_libraries(giraph_runner ${LIBUNWIND_LIBRARIES}) - target_link_libraries(projected_fragment_mapper_test ${LIBUNWIND_LIBRARIES}) + # target_link_libraries(projected_fragment_mapper_test ${LIBUNWIND_LIBRARIES}) endif () endif() @@ -428,43 +428,43 @@ if (BUILD_TESTS) target_link_libraries(${target} OpenMP::OpenMP_CXX) endmacro() - add_vineyard_app(run_vy_app SRCS test/run_vy_app.cc) - add_vineyard_app(run_vy_app_compact SRCS test/run_vy_app_compact.cc) - add_vineyard_app(run_vy_app_local_vm SRCS test/run_vy_app_local_vm.cc) + # add_vineyard_app(run_vy_app SRCS test/run_vy_app.cc) + # add_vineyard_app(run_vy_app_compact SRCS test/run_vy_app_compact.cc) + # add_vineyard_app(run_vy_app_local_vm SRCS test/run_vy_app_local_vm.cc) - add_vineyard_app(run_load_from_stream SRCS test/run_load_from_stream.cc) + # add_vineyard_app(run_load_from_stream SRCS test/run_load_from_stream.cc) - add_vineyard_app(run_vy_ldbc SRCS test/run_vy_ldbc.cc) - target_include_directories(run_vy_ldbc PRIVATE ${LIBGRAPELITE_INCLUDE_DIRS}/grape/analytical_apps) + # add_vineyard_app(run_vy_ldbc SRCS test/run_vy_ldbc.cc) + # target_include_directories(run_vy_ldbc PRIVATE ${LIBGRAPELITE_INCLUDE_DIRS}/grape/analytical_apps) - add_vineyard_app(run_ctx SRCS test/run_ctx.cc) - target_include_directories(run_ctx PRIVATE ${LIBGRAPELITE_INCLUDE_DIRS}/grape/analytical_apps) - target_link_libraries(run_ctx gs_proto) + # add_vineyard_app(run_ctx SRCS test/run_ctx.cc) + # target_include_directories(run_ctx PRIVATE ${LIBGRAPELITE_INCLUDE_DIRS}/grape/analytical_apps) + # target_link_libraries(run_ctx gs_proto) - add_vineyard_app(run_property_ctx SRCS test/run_property_ctx.cc) - target_include_directories(run_property_ctx PRIVATE ${LIBGRAPELITE_INCLUDE_DIRS}/grape/analytical_apps) - target_link_libraries(run_property_ctx gs_proto) + # add_vineyard_app(run_property_ctx SRCS test/run_property_ctx.cc) + # target_include_directories(run_property_ctx PRIVATE ${LIBGRAPELITE_INCLUDE_DIRS}/grape/analytical_apps) + # target_link_libraries(run_property_ctx gs_proto) - add_vineyard_app(run_pregel_app SRCS test/run_pregel_app.cc) + # add_vineyard_app(run_pregel_app SRCS test/run_pregel_app.cc) - add_vineyard_app(run_string_oid SRCS test/run_string_oid.cc) - target_include_directories(run_string_oid PRIVATE ${LIBGRAPELITE_INCLUDE_DIRS}/grape/analytical_apps) + # add_vineyard_app(run_string_oid SRCS test/run_string_oid.cc) + # target_include_directories(run_string_oid PRIVATE ${LIBGRAPELITE_INCLUDE_DIRS}/grape/analytical_apps) - add_vineyard_app(run_empty_property SRCS test/run_empty_property.cc) - target_include_directories(run_empty_property PRIVATE ${LIBGRAPELITE_INCLUDE_DIRS}/grape/analytical_apps) + # add_vineyard_app(run_empty_property SRCS test/run_empty_property.cc) + # target_include_directories(run_empty_property PRIVATE ${LIBGRAPELITE_INCLUDE_DIRS}/grape/analytical_apps) - add_vineyard_app(test_project_string SRCS test/test_project_string.cc) + # add_vineyard_app(test_project_string SRCS test/test_project_string.cc) - add_vineyard_app(basic_graph_benchmarks SRCS benchmarks/basic_graph_benchmarks.cc) + # add_vineyard_app(basic_graph_benchmarks SRCS benchmarks/basic_graph_benchmarks.cc) - add_vineyard_app(property_graph_loader SRCS benchmarks/property_graph_loader.cc) + # add_vineyard_app(property_graph_loader SRCS benchmarks/property_graph_loader.cc) - add_vineyard_app(property_graph_benchmarks SRCS benchmarks/property_graph_benchmarks.cc) + # add_vineyard_app(property_graph_benchmarks SRCS benchmarks/property_graph_benchmarks.cc) - add_vineyard_app(projected_graph_benchmarks SRCS benchmarks/projected_graph_benchmarks.cc) + # add_vineyard_app(projected_graph_benchmarks SRCS benchmarks/projected_graph_benchmarks.cc) if (NETWORKX) - add_vineyard_app(test_convert SRCS test/test_convert.cc) + # add_vineyard_app(test_convert SRCS test/test_convert.cc) endif () endif () @@ -511,7 +511,7 @@ if(ENABLE_JAVA_SDK) add_custom_command( OUTPUT "${GAE_JAVA_RUNTIME_JAR}" - COMMAND mvn -Drevision=${GRAPHSCOPE_ANALYTICAL_JAR_VERSION} clean install -DskipTests --quiet + COMMAND mvn -Drevision=${GRAPHSCOPE_ANALYTICAL_JAR_VERSION} install -DskipTests --quiet DEPENDS gs_proto WORKING_DIRECTORY ${GAE_JAVA_DIR} COMMENT "Building GAE-java..." @@ -578,7 +578,7 @@ install_gsa_binary(gs_proto) install_gsa_binary(gs_util) if (ENABLE_JAVA_SDK) - install_gsa_binary(graphx_runner) + # install_gsa_binary(graphx_runner) endif() install_gsa_headers("${PROJECT_SOURCE_DIR}/apps") diff --git a/analytical_engine/core/loader/arrow_fragment_loader.h b/analytical_engine/core/loader/arrow_fragment_loader.h index 2b179a8d0a95..f7526f4cb2ec 100644 --- a/analytical_engine/core/loader/arrow_fragment_loader.h +++ b/analytical_engine/core/loader/arrow_fragment_loader.h @@ -479,7 +479,8 @@ class ArrowFragmentLoader : public vineyard::ArrowFragmentLoader { client_, sourceId, table, comm_spec_.local_id(), comm_spec_.local_num())); #ifdef ENABLE_JAVA_SDK - } else if (vertices[i]->protocol == "file" && + } else if ((vertices[i]->protocol == "file" || + vertices[i]->protocol == "hdfs") && vertices[i]->vformat.find("giraph") != std::string::npos) { BOOST_LEAF_ASSIGN( table, readTableFromGiraph( @@ -597,7 +598,8 @@ class ArrowFragmentLoader : public vineyard::ArrowFragmentLoader { << table->schema()->ToString(); } #ifdef ENABLE_JAVA_SDK - } else if (sub_labels[j].protocol == "file" && + } else if ((sub_labels[j].protocol == "file" || + sub_labels[j].protocol == "hdfs") && sub_labels[j].eformat.find("giraph") != std::string::npos) { BOOST_LEAF_ASSIGN( diff --git a/analytical_engine/test/app_tests.sh b/analytical_engine/test/app_tests.sh index 649fb6bd8984..3db1e44db335 100755 --- a/analytical_engine/test/app_tests.sh +++ b/analytical_engine/test/app_tests.sh @@ -99,6 +99,7 @@ function start_vineyard() { timestamp=$(date +%Y-%m-%d_%H-%M-%S) vineyardd \ -socket ${socket_file} \ + -rpc_socket_port 9601 \ -meta local & set +m sleep 5 @@ -399,66 +400,66 @@ pushd "${ENGINE_HOME}"/build get_test_data -for app in "${ldbc_apps[@]}"; do - run ${np} ./run_app --vfile "${test_dir}"/p2p-31.v --efile "${test_dir}"/p2p-31.e --application "${app}" --out_prefix ./test_output --sssp_source=6 --sssp_target=10 --bfs_source=6 - exact_verify "${test_dir}"/property/ldbc/p2p-31-"${app^^}" -done +# for app in "${ldbc_apps[@]}"; do +# run ${np} ./run_app --vfile "${test_dir}"/p2p-31.v --efile "${test_dir}"/p2p-31.e --application "${app}" --out_prefix ./test_output --sssp_source=6 --sssp_target=10 --bfs_source=6 +# exact_verify "${test_dir}"/property/ldbc/p2p-31-"${app^^}" +# done -for app in "${other_apps[@]}"; do - run ${np} ./run_app --vfile "${test_dir}"/p2p-31.v --efile "${test_dir}"/p2p-31.e --application "${app}" --out_prefix ./test_output --sssp_source=6 --sssp_target=10 --bfs_source=6 - exact_verify "${test_dir}"/p2p-31-"${app}" -done +# for app in "${other_apps[@]}"; do +# run ${np} ./run_app --vfile "${test_dir}"/p2p-31.v --efile "${test_dir}"/p2p-31.e --application "${app}" --out_prefix ./test_output --sssp_source=6 --sssp_target=10 --bfs_source=6 +# exact_verify "${test_dir}"/p2p-31-"${app}" +# done -for app in "${apps_with_directed[@]}"; do - run ${np} ./run_app --vfile "${test_dir}"/p2p-31.v --efile "${test_dir}"/p2p-31.e --application "${app}" --out_prefix ./test_output --directed - exact_verify "${test_dir}"/p2p-31-"${app}" -done +# for app in "${apps_with_directed[@]}"; do +# run ${np} ./run_app --vfile "${test_dir}"/p2p-31.v --efile "${test_dir}"/p2p-31.e --application "${app}" --out_prefix ./test_output --directed +# exact_verify "${test_dir}"/p2p-31-"${app}" +# done start_vineyard -run_vy ${np} ./run_vy_app "${socket_file}" 2 "${test_dir}"/new_property/v2_e2/twitter_e 2 "${test_dir}"/new_property/v2_e2/twitter_v 0 -run_vy_2 ${np} ./run_vy_app "${socket_file}" 4 "${test_dir}"/projected_property/twitter_property_e "${test_dir}"/projected_property/twitter_property_v 1 -run_lpa ${np} ./run_vy_app "${socket_file}" 1 "${test_dir}"/property/lpa_dataset/lpa_3000_e 2 "${test_dir}"/property/lpa_dataset/lpa_3000_v 0 1 lpa -run_sampling_path 2 ./run_vy_app "${socket_file}" "${test_dir}"/property/sampling_path 0 1 sampling_path 0-0-1-4-2 +# run_vy ${np} ./run_vy_app "${socket_file}" 2 "${test_dir}"/new_property/v2_e2/twitter_e 2 "${test_dir}"/new_property/v2_e2/twitter_v 0 +# run_vy_2 ${np} ./run_vy_app "${socket_file}" 4 "${test_dir}"/projected_property/twitter_property_e "${test_dir}"/projected_property/twitter_property_v 1 +# run_lpa ${np} ./run_vy_app "${socket_file}" 1 "${test_dir}"/property/lpa_dataset/lpa_3000_e 2 "${test_dir}"/property/lpa_dataset/lpa_3000_v 0 1 lpa +# run_sampling_path 2 ./run_vy_app "${socket_file}" "${test_dir}"/property/sampling_path 0 1 sampling_path 0-0-1-4-2 -# local vm -run_vy_2 ${np} ./run_vy_app_local_vm "${socket_file}" 1 "${test_dir}"/property/p2p-31_property_e "${test_dir}"/property/p2p-31_property_v 1 +# # local vm +# run_vy_2 ${np} ./run_vy_app_local_vm "${socket_file}" 1 "${test_dir}"/property/p2p-31_property_e "${test_dir}"/property/p2p-31_property_v 1 -# compact edges -run_vy_2 ${np} ./run_vy_app_compact "${socket_file}" 1 "${test_dir}"/property/p2p-31_property_e "${test_dir}"/property/p2p-31_property_v 1 +# # compact edges +# run_vy_2 ${np} ./run_vy_app_compact "${socket_file}" 1 "${test_dir}"/property/p2p-31_property_e "${test_dir}"/property/p2p-31_property_v 1 -run_vy ${np} ./run_pregel_app "${socket_file}" 2 "${test_dir}"/new_property/v2_e2/twitter_e 2 "${test_dir}"/new_property/v2_e2/twitter_v -rm -rf ./test_output/* -cp ./outputs_pregel_sssp/* ./test_output -exact_verify "${test_dir}"/twitter-sssp-4 +# run_vy ${np} ./run_pregel_app "${socket_file}" 2 "${test_dir}"/new_property/v2_e2/twitter_e 2 "${test_dir}"/new_property/v2_e2/twitter_v +# rm -rf ./test_output/* +# cp ./outputs_pregel_sssp/* ./test_output +# exact_verify "${test_dir}"/twitter-sssp-4 -run ${np} ./run_pregel_app tc "${test_dir}"/p2p-31.e "${test_dir}"/p2p-31.v ./test_output -exact_verify "${test_dir}/p2p-31"-triangles +# run ${np} ./run_pregel_app tc "${test_dir}"/p2p-31.e "${test_dir}"/p2p-31.v ./test_output +# exact_verify "${test_dir}/p2p-31"-triangles if [[ "${RUN_JAVA_TESTS}" == "ON" ]]; then - run_vy_2 ${np} ./projected_fragment_mapper_test "${socket_file}" 1 "${test_dir}"/projected_property/twitter_property_e "${test_dir}"/projected_property/twitter_property_v + # run_vy_2 ${np} ./projected_fragment_mapper_test "${socket_file}" 1 "${test_dir}"/projected_property/twitter_property_e "${test_dir}"/projected_property/twitter_property_v if [[ "${USER_JAR_PATH}"x != ""x ]] then echo "Running Java tests..." - run_vy_2 ${np} ./run_java_app "${socket_file}" 1 "${test_dir}"/projected_property/twitter_property_e "${test_dir}"/projected_property/twitter_property_v 1 0 1 com.alibaba.graphscope.example.bfs.BFS - GLOG_v=10 ./run_java_string_app /tmp/vineyard.sock \ - 1 "${test_dir}/projected_property/twitter_property_e_0#header_row=True#src_label=v&dst_label=v&label=e&include_all_columns=true&column_types=int64_t,int64_t,int32_t,int32_t,std::string" \ - 1 "${test_dir}/projected_property/twitter_property_v_0#header_row=True#label=v&include_all_columns=true&column_types=int64_t,std::string" \ - com.alibaba.graphscope.example.stringApp.StringApp + # run_vy_2 ${np} ./run_java_app "${socket_file}" 1 "${test_dir}"/projected_property/twitter_property_e "${test_dir}"/projected_property/twitter_property_v 1 0 1 com.alibaba.graphscope.example.bfs.BFS + # GLOG_v=10 ./run_java_string_app /tmp/vineyard.sock \ + # 1 "${test_dir}/projected_property/twitter_property_e_0#header_row=True#src_label=v&dst_label=v&label=e&include_all_columns=true&column_types=int64_t,int64_t,int32_t,int32_t,std::string" \ + # 1 "${test_dir}/projected_property/twitter_property_v_0#header_row=True#label=v&include_all_columns=true&column_types=int64_t,std::string" \ + # com.alibaba.graphscope.example.stringApp.StringApp echo "Running girpah tests..." - ./giraph_runner --vertex_input_format_class giraph:com.alibaba.graphscope.example.giraph.format.P2PVertexInputFormat \ - --edge_input_format_class giraph:com.alibaba.graphscope.example.giraph.format.P2PEdgeInputFormat --vfile "${test_dir}"/p2p-31.v \ - --efile "${test_dir}"/p2p-31.e --ipc_socket /tmp/vineyard.sock --lib_path /opt/graphscope/lib/libgrape-jni.so \ + # GLOG_v=10 ./giraph_runner --vertex_input_format_class giraph:com.alibaba.graphscope.example.giraph.format.P2PVertexInputFormat \ + # --edge_input_format_class giraph:com.alibaba.graphscope.example.giraph.format.P2PEdgeInputFormat --vfile "${test_dir}"/p2p-31.v \ + # --efile "${test_dir}"/p2p-31.e --ipc_socket /tmp/vineyard.sock --lib_path /opt/graphscope/lib/libgrape-jni.so \ + # --user_app_class com.alibaba.graphscope.example.giraph.SSSP + + # echo "Test Giraph app user Customized Writable" + GLOG_v=10 ./giraph_runner --vertex_input_format_class giraph:com.alibaba.graphscope.example.giraph.format.P2PVertexInputFormat \ + --edge_input_format_class giraph:com.alibaba.graphscope.example.giraph.format.P2PEdgeInputFormat --vfile hdfs://localhost:9000/test/p2p-31.v \ + --efile hdfs://localhost:9000/test/p2p-31.e --ipc_socket /tmp/vineyard.sock --lib_path /opt/graphscope/lib/libgrape-jni.so \ --user_app_class com.alibaba.graphscope.example.giraph.SSSP - - echo "Test Giraph app user Customized Writable" - ./giraph_runner --vertex_input_format_class giraph:com.alibaba.graphscope.example.giraph.format.P2PVertexMultipleLongInputFormat \ - --edge_input_format_class giraph:com.alibaba.graphscope.example.giraph.format.P2PEdgeMultipleLongInputFormat --vfile "${test_dir}"/p2p-31.v \ - --efile "${test_dir}"/p2p-31.e --ipc_socket /tmp/vineyard.sock --lib_path /opt/graphscope/lib/libgrape-jni.so \ - --user_app_class com.alibaba.graphscope.example.giraph.MessageAppWithUserWritable fi fi diff --git a/coordinator/gscoordinator/op_executor.py b/coordinator/gscoordinator/op_executor.py index 90eb181f3f7d..631549f2faae 100644 --- a/coordinator/gscoordinator/op_executor.py +++ b/coordinator/gscoordinator/op_executor.py @@ -963,6 +963,13 @@ def _process_loader_func(loader, vineyard_endpoint, vineyard_ipc_socket): # loader is type of attr_value_pb2.Chunk protocol = loader.attr[types_pb2.PROTOCOL].s.decode() source = loader.attr[types_pb2.SOURCE].s.decode() + if loader.attr[types_pb2.CHUNK_NAME]: + chunk_name = loader.attr[types_pb2.CHUNK_NAME].s.decode() + if chunk_name == "vertex": + input_format = loader.attr[types_pb2.VFORMAT].s.decode() + elif chunk_name == "edge": + input_format = loader.attr[types_pb2.EFORMAT].s.decode() + print(f"chunk_name = {chunk_name}, _format = {input_format}") try: storage_options = json.loads( loader.attr[types_pb2.STORAGE_OPTIONS].s.decode() @@ -975,6 +982,14 @@ def _process_loader_func(loader, vineyard_endpoint, vineyard_ipc_socket): read_options = {} filetype = read_options.get("filetype", None) filetype = str(filetype).upper() + # giraph app support reading from hdfs + if chunk_name and input_format: + if chunk_name == "vertex" and input_format.startswith('giraph') and protocol in ("hdfs"): + print("No need to spawn vineyard stream for giraph vertex") + return + if chunk_name == "edge" and input_format.startswith('giraph') and protocol in ("hdfs"): + print("No need to spawn vineyard stream for giraph edge") + return if ( protocol in ("hdfs", "hive", "oss", "s3") or protocol == "file" @@ -1003,6 +1018,7 @@ def _process_loader_func(loader, vineyard_endpoint, vineyard_ipc_socket): for loader in op.large_attr.chunk_meta_list.items: # handle vertex or edge loader + print("loader attr: ", loader.attr) if loader.attr[types_pb2.CHUNK_TYPE].s.decode() == "loader": # set op bodies, this is for loading graph from numpy/pandas op_bodies = [] diff --git a/k8s/dockerfiles/analytical.Dockerfile b/k8s/dockerfiles/analytical.Dockerfile index 0f5e494cc1bf..e6e3d4761b28 100644 --- a/k8s/dockerfiles/analytical.Dockerfile +++ b/k8s/dockerfiles/analytical.Dockerfile @@ -62,6 +62,12 @@ FROM $REGISTRY/graphscope/graphscope-dev:$BUILDER_VERSION AS builder-java COPY --chown=graphscope:graphscope . /home/graphscope/GraphScope +RUN sudo apt purge -y openjdk* && sudo apt purge -y default-jre* && \ + sudo apt-get update && sudo apt-get install -y openjdk-8-jdk && sudo ln -s /usr/lib/jvm/java-8-openjdk-amd64/ /usr/lib/jvm/default-java && \ + sudo update-alternatives --set java /usr/lib/jvm/java-8-openjdk-amd64/jre/bin/java && java -version && \ + sudo apt-get install -y maven + + RUN cd /home/graphscope/GraphScope/ && \ if [ "${CI}" = "true" ]; then \ cp -r artifacts/analytical-java /home/graphscope/install; \ @@ -90,7 +96,10 @@ ENV GRAPHSCOPE_HOME=/opt/graphscope ENV PATH=$PATH:$GRAPHSCOPE_HOME/bin LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$GRAPHSCOPE_HOME/lib USER root -RUN apt-get update && apt-get install -y default-jdk +RUN apt purge -y openjdk* && apt purge -y default-jre* && \ + apt-get update && apt-get install -y openjdk-8-jdk && sudo ln -s /usr/lib/jvm/java-8-openjdk-amd64/ /usr/lib/jvm/default-java && \ + update-alternatives --set java /usr/lib/jvm/java-8-openjdk-amd64/jre/bin/java && java -version && \ + sudo apt-get install -y maven COPY ./k8s/utils/kube_ssh /usr/local/bin/kube_ssh COPY --from=builder-java /home/graphscope/install /opt/graphscope/ RUN mkdir -p /tmp/gs && (mv /opt/graphscope/builtin /tmp/gs/builtin || true) && chown -R graphscope:graphscope /tmp/gs diff --git a/k8s/dockerfiles/coordinator.Dockerfile b/k8s/dockerfiles/coordinator.Dockerfile index 8a25efb1b8e1..04f19bd0a968 100644 --- a/k8s/dockerfiles/coordinator.Dockerfile +++ b/k8s/dockerfiles/coordinator.Dockerfile @@ -8,6 +8,11 @@ ARG CI=false COPY --chown=graphscope:graphscope . /home/graphscope/GraphScope +# uninstall openjdk-11 and install openjdk-8 +RUN sudo apt purge -y openjdk* && sudo apt purge -y default-jre* && \ + sudo apt-get update && sudo apt-get install -y openjdk-8-jdk && \ + sudo update-alternatives --set java /usr/lib/jvm/java-8-openjdk-amd64/jre/bin/java && java -version + RUN cd /home/graphscope/GraphScope/ && \ if [ "${CI}" = "true" ]; then \ cp -r artifacts/learning /home/graphscope/install; \ @@ -38,10 +43,14 @@ FROM ubuntu:22.04 AS coordinator ENV DEBIAN_FRONTEND=noninteractive RUN apt-get update -y && \ - apt-get install -y sudo python3-pip openmpi-bin curl tzdata default-jdk && \ + apt-get install -y sudo python3-pip openmpi-bin curl tzdata && \ apt-get clean -y && \ rm -rf /var/lib/apt/lists/* +RUN sudo apt purge -y openjdk* && sudo apt purge -y default-jre* && \ + apt-get update && apt-get install -y openjdk-8-jdk && \ + update-alternatives --set java /usr/lib/jvm/java-8-openjdk-amd64/jre/bin/java && java -version + ENV GRAPHSCOPE_HOME=/opt/graphscope RUN useradd -m graphscope -u 1001 \ diff --git a/python/requirements.txt b/python/requirements.txt index c898a22c2a79..c80a66b91036 100644 --- a/python/requirements.txt +++ b/python/requirements.txt @@ -9,7 +9,7 @@ neo4j==5.10.0 nest_asyncio networkx==2.8.0;python_version>="3.8" networkx==2.6.0;python_version<"3.8" -numpy +numpy<2.0.0 orjson packaging pandas<=2.0.3 diff --git a/python/test_gae_java.py b/python/test_gae_java.py index e0b54f6bb941..86bf1111b487 100644 --- a/python/test_gae_java.py +++ b/python/test_gae_java.py @@ -4,20 +4,21 @@ graphscope.set_option(show_log=True) graphscope.set_option(log_level="DEBUG") -k8s_volumes = { - "data": { - "type": "hostPath", - "field": { - "path": "/data", - "type": "Directory" - }, - "mounts": { - "mountPath": "/data" - } - } -} -sess = graphscope.session(cluster_type="k8s", enabled_engines="gae-java", k8s_volumes=k8s_volumes, k8s_image_tag="0.27.0") +# k8s_volumes = { +# "data": { +# "type": "hostPath", +# "field": { +# "path": "/data", +# "type": "Directory" +# }, +# "mounts": { +# "mountPath": "/data" +# } +# } +# } +sess = graphscope.session(cluster_type="k8s", enabled_engines="gae-java",k8s_image_tag="0.27.0") # k8s_volumes=k8s_volumes, sess.add_lib('/mnt/zhanglei/grape-demo-0.27.0-shaded.jar') +# sess.add_lib('/workspaces/GraphScope/analytical_engine/java/grape-demo/target/grape-demo-0.27.0-shaded.jar') # vformat = "giraph:com.alibaba.graphscope.example.giraph.format.P2PVertexInputFormat" # eformat = "giraph:com.alibaba.graphscope.example.giraph.format.P2PEdgeInputFormat" # graph = sess.load_from( @@ -32,10 +33,22 @@ vformat2 = "giraph:com.alibaba.graphscope.example.giraph.format.P2PVertexMultipleLongInputFormat" eformat2 = "giraph:com.alibaba.graphscope.example.giraph.format.P2PEdgeMultipleLongInputFormat" +# graph2 = sess.load_from( +# vertices="/data/gstest/p2p-31.v", +# vformat=vformat2, +# edges="/data/gstest/p2p-31.e", +# eformat=eformat2, +# ) +# graph2 = sess.load_from( +# vertices="hdfs://localhost:9000/test/p2p-31.v", +# vformat=vformat2, +# edges="hdfs://localhost:9000/test/p2p-31.e", +# eformat=eformat2, +# ) graph2 = sess.load_from( - vertices="/data/gstest/p2p-31.v", + vertices="hdfs://host.minikube.internal:9000/test/p2p-31.v", vformat=vformat2, - edges="/data/gstest/p2p-31.e", + edges="hdfs://host.minikube.internal:9000/test/p2p-31.e", eformat=eformat2, ) proj_g2 = graph2._project_to_simple(v_prop="vdata", e_prop="data") From 9630ab0626192eb4fc0b160b1ad62734b305673a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E9=9C=84=E7=A3=8A?= Date: Fri, 12 Jul 2024 14:54:53 +0800 Subject: [PATCH 36/52] impl circel --- .../graphscope/example/circle/CirclePIE.java | 193 ++++++++++++++++++ .../example/circle/CirclePIEContext.java | 97 +++++++++ .../graphscope/example/circle/Path.java | 77 +++++++ .../giraph/circle/CircleInputFormat.java | 55 +++++ .../example/giraph/circle/MsgWritable.java | 123 +++++++++++ .../giraph/circle/VertexAttrWritable.java | 72 +++++++ .../example/giraph/myCircle/Circle.java | 34 +++ .../example/giraph/myCircle/Path.java | 27 +++ 8 files changed, 678 insertions(+) create mode 100644 analytical_engine/java/grape-demo/src/main/java/com/alibaba/graphscope/example/circle/CirclePIE.java create mode 100644 analytical_engine/java/grape-demo/src/main/java/com/alibaba/graphscope/example/circle/CirclePIEContext.java create mode 100644 analytical_engine/java/grape-demo/src/main/java/com/alibaba/graphscope/example/circle/Path.java create mode 100644 analytical_engine/java/grape-demo/src/main/java/com/alibaba/graphscope/example/giraph/circle/CircleInputFormat.java create mode 100644 analytical_engine/java/grape-demo/src/main/java/com/alibaba/graphscope/example/giraph/circle/MsgWritable.java create mode 100644 analytical_engine/java/grape-demo/src/main/java/com/alibaba/graphscope/example/giraph/circle/VertexAttrWritable.java create mode 100644 analytical_engine/java/grape-demo/src/main/java/com/alibaba/graphscope/example/giraph/myCircle/Circle.java create mode 100644 analytical_engine/java/grape-demo/src/main/java/com/alibaba/graphscope/example/giraph/myCircle/Path.java diff --git a/analytical_engine/java/grape-demo/src/main/java/com/alibaba/graphscope/example/circle/CirclePIE.java b/analytical_engine/java/grape-demo/src/main/java/com/alibaba/graphscope/example/circle/CirclePIE.java new file mode 100644 index 000000000000..ca412f2f3322 --- /dev/null +++ b/analytical_engine/java/grape-demo/src/main/java/com/alibaba/graphscope/example/circle/CirclePIE.java @@ -0,0 +1,193 @@ +package com.alibaba.graphscope.example.circle; + +import com.alibaba.graphscope.app.DefaultAppBase; +import com.alibaba.graphscope.context.DefaultContextBase; +import com.alibaba.graphscope.ds.Vertex; +import com.alibaba.graphscope.ds.adaptor.AdjList; +import com.alibaba.graphscope.ds.adaptor.Nbr; +import com.alibaba.graphscope.fragment.IFragment; +import com.alibaba.graphscope.parallel.DefaultMessageManager; +import com.alibaba.graphscope.serialization.FFIByteVectorInputStream; +import com.alibaba.graphscope.serialization.FFIByteVectorOutputStream; +import com.alibaba.graphscope.stdcxx.FFIByteVector; +import com.alibaba.graphscope.stdcxx.FFIByteVectorFactory; +import com.alibaba.graphscope.utils.FFITypeFactoryhelper; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.IOException; +import java.util.List; + +public class CirclePIE implements DefaultAppBase< + Long, + Long, + Long, + Long, + CirclePIEContext> { + private static final Logger logger = LoggerFactory.getLogger(CirclePIE.class); + private static FFIByteVectorOutputStream msgVector = new FFIByteVectorOutputStream(); + + /** + * Partial Evaluation to implement. + * + * @param graph fragment. The graph fragment providing accesses to graph data. + * @param context context. User defined context which manages data during the whole + * computations. + * @param messageManager The message manger which manages messages between fragments. + * @see IFragment + * @see DefaultContextBase + * @see DefaultMessageManager + */ + @Override + public void PEval(IFragment graph, DefaultContextBase context, DefaultMessageManager messageManager) { + Vertex vertex = FFITypeFactoryhelper.newVertexLong(); + CirclePIEContext ctx = (CirclePIEContext) context; + for (long i = 0; i < graph.getInnerVerticesNum(); ++i) { + vertex.setValue(i); + Long globalId = graph.getInnerVertexGid(vertex); + Path path = new Path(globalId); + AdjList adjList = graph.getOutgoingAdjList(vertex); + for (Nbr nbr : adjList.iterable()) { + path.add(graph.vertex2Gid(nbr.neighbor())); + if (graph.isOuterVertex(nbr.neighbor())) { + // send path to outer vertex. + try { + sendMessageToOuterVertex(graph, messageManager, nbr.neighbor(), path); + } catch (IOException e) { + e.printStackTrace(); + } + } else { + ctx.addToNextPath(nbr.neighbor(), path); + } + path.pop(); + } + } + //No need to check circels + ctx.swapPaths(); + logger.info("After PEval: cur_path: " + ctx.curPaths.toString()); + logger.info("After PEval: next_path: " + ctx.nextPaths.toString()); + messageManager.forceContinue(); + } + + /** + * Incremental Evaluation to implement. + * + * @param graph fragment. The graph fragment providing accesses to graph data. + * @param context context. User defined context which manages data during the whole + * computations. + * @param messageManager The message manger which manages messages between fragments. + * @see IFragment + * @see DefaultContextBase + * @see DefaultMessageManager + */ + @Override + public void IncEval(IFragment graph, DefaultContextBase context, DefaultMessageManager messageManager) { + CirclePIEContext ctx = (CirclePIEContext) context; + //Receive msg and merge + try { + receiveMessage(graph, messageManager, ctx); + } catch (IOException e) { + e.printStackTrace(); + } + + logger.info("In super step {}, cur path {}", ctx.curStep, ctx.curPaths); + logger.info("In super step {}, next path {}", ctx.curStep, ctx.nextPaths); + // For received msg, check if it is already circle, if true, add to the final results. + ctx.persistCirclePathInCurrent(); + + // Implement vertex program + vprog(); + + if (ctx.curStep < ctx.maxStep - 1){ + // send msg + sendMessageThroughOE(graph, ctx, messageManager); + ctx.swapPaths(); + } + else if (ctx.curStep == ctx.maxStep - 1){ + // check whether received paths start with the nbr. + // No work + messageManager.forceContinue(); + } else { + // maybe receive message, but not sending message. + logger.info("Max step reached, " + ctx.curStep); + } + } + + void sendMessageThroughOE(IFragment graph, CirclePIEContext ctx, DefaultMessageManager messageManager) { + Vertex vertex = FFITypeFactoryhelper.newVertexLong(); + for (long i = 0; i < graph.getInnerVerticesNum(); ++i) { + vertex.setValue(i); + Long globalId = graph.getInnerVertexGid(vertex); + List paths = ctx.curPaths.get((int) i); + for (int j = 0; j < paths.size(); ++j) { + Path path = paths.get(j); + //Check whether the last node is exactly current vertex. + if (path.top() != globalId){ + logger.error("Invalid path, ending at {}, but collected by {}", path.top(), globalId); + } + AdjList adjList = graph.getOutgoingAdjList(vertex); + for (Nbr nbr : adjList.iterable()) { + path.add(graph.vertex2Gid(nbr.neighbor())); + if (graph.isOuterVertex(nbr.neighbor())) { + // send path to outer vertex. + try { + sendMessageToOuterVertex(graph, messageManager, nbr.neighbor(), path); + } catch (IOException e) { + e.printStackTrace(); + } + } else { + ctx.addToNextPath(nbr.neighbor(), path); + } + path.pop(); + } + } + } + } + + /** + * Send a message to the outer vertex (to other fragment) + * @param neighbor the outer vertex vid + */ + void sendMessageToOuterVertex(IFragment graph, DefaultMessageManager mm, Vertex neighbor, Path path) throws IOException { + logger.info("Send path {} to vertex {}, dst frag {}", path, neighbor.getValue(), graph.fid()); + msgVector.reset(); + msgVector.writeLong(graph.getOuterVertexGid(neighbor)); + path.write(msgVector); + mm.sendToFragment(graph.getFragId(neighbor), msgVector.getVector()); + } + + void receiveMessage(IFragment graph, DefaultMessageManager messageManager, CirclePIEContext ctx)throws IOException { + FFIByteVector tmpVector = (FFIByteVector) FFIByteVectorFactory.INSTANCE.create(); + long bytesOfReceivedMsg = 0; + Vertex tmpVertex = FFITypeFactoryhelper.newVertexLong(); + while (messageManager.getPureMessage(tmpVector)) { + // The retrieved tmp vector has been resized, so the cached objAddress is not available. + // trigger the refresh + tmpVector.touch(); + bytesOfReceivedMsg += tmpVector.size(); + logger.info("Frag [{}] digest message of size {}", graph.fid(), tmpVector.size()); + Path path = new Path(); + FFIByteVectorInputStream inputStream = new FFIByteVectorInputStream(tmpVector); + long gid = inputStream.readLong(); + if (!graph.innerVertexGid2Vertex(gid, tmpVertex)){ + logger.error("Fail to get lid from gid {}", gid); + } + logger.info("Got msg to lid {}", tmpVertex.getValue()); + path.read(inputStream); + // Add the tail node of new path here. +// path.add(gid); + digestMessage(ctx, tmpVertex, path); + tmpVector.clear(); + } + logger.info("total message received by frag {} bytes {}", graph.fid(), bytesOfReceivedMsg); + tmpVector.delete(); + } + + void digestMessage(CirclePIEContext ctx, Vertex vertex, Path path ) { + ctx.addToCurrentPath(vertex,path); + } + + void vprog() { + + } +} diff --git a/analytical_engine/java/grape-demo/src/main/java/com/alibaba/graphscope/example/circle/CirclePIEContext.java b/analytical_engine/java/grape-demo/src/main/java/com/alibaba/graphscope/example/circle/CirclePIEContext.java new file mode 100644 index 000000000000..9c2a590ad5f8 --- /dev/null +++ b/analytical_engine/java/grape-demo/src/main/java/com/alibaba/graphscope/example/circle/CirclePIEContext.java @@ -0,0 +1,97 @@ +package com.alibaba.graphscope.example.circle; + +import com.alibaba.fastjson.JSONObject; +import com.alibaba.graphscope.context.DefaultContextBase; +import com.alibaba.graphscope.context.VertexDataContext; +import com.alibaba.graphscope.ds.Vertex; +import com.alibaba.graphscope.fragment.IFragment; +import com.alibaba.graphscope.parallel.DefaultMessageManager; +import com.alibaba.graphscope.utils.LongIdParser; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.util.ArrayList; +import java.util.List; + +public class CirclePIEContext extends VertexDataContext, Long> + implements DefaultContextBase { + private static final Logger logger = LoggerFactory.getLogger(CirclePIEContext.class); + + public int maxStep = 3; + public int curStep = 0; + public List> curPaths; // Paths ending at vertex. + public List> nextPaths; // New generated path end at vertex this round, + public List> results; // Paths that are in circle. + public LongIdParser parser; + + /** + * Called by grape framework, before any PEval. You can initiating data structures need during + * super steps here. + * + * @param frag The graph fragment providing accesses to graph data. + * @param messageManager The message manger which manages messages between fragments. + * @param jsonObject String args from cmdline. + * @see IFragment + * @see DefaultMessageManager + * @see JSONObject + */ + @Override + public void Init(IFragment frag, DefaultMessageManager messageManager, JSONObject jsonObject) { + long innerVertexNum = frag.getInnerVerticesNum(); + curPaths = new ArrayList>((int) innerVertexNum); + nextPaths = new ArrayList<>((int) innerVertexNum); + results = new ArrayList<>((int) innerVertexNum); + for (int i = 0; i < innerVertexNum; ++i ){ + curPaths.set(i, new ArrayList<>()); + nextPaths.set(i, new ArrayList<>()); + results.set(i, new ArrayList<>()); + } + parser = new LongIdParser(frag.fnum(), 1); + } + + public void addToCurrentPath(Vertex vertex, Path path) { + curPaths.get(vertex.getValue().intValue()).add(path); + } + + public void addToNextPath(Vertex vertex, Path path) { + nextPaths.get(vertex.getValue().intValue()).add(path); + } + + public void persistCirclePathInCurrent() { + for (int i = 0; i < nextPaths.size(); ++i) { + for (int j = 0; j < nextPaths.get(i).size(); ++j) { + Path path = nextPaths.get(i).get(j); + tryToFindCircle(path); + //Do we need to remove the path? + } + } + } + + public void swapPaths() { + List> tmp = curPaths; + curPaths = nextPaths; + nextPaths = tmp; + nextPaths.clear(); + } + + /** + * Output will be executed when the computations finalizes. Data maintained in this context + * shall be outputted here. + * + * @param frag The graph fragment contains the graph info. + * @see IFragment + */ + @Override + public void Output(IFragment frag) { + logger.info("finally cur path {}", curPaths); + logger.info("finally next path {}", nextPaths); + } + + public void tryToFindCircle(Path path) { + if (path.isCircle()){ + logger.info("path is circle {}", path); + long lid = parser.getOffset(path.top()); + this.results.get((int)lid).add(path); + } + } +} diff --git a/analytical_engine/java/grape-demo/src/main/java/com/alibaba/graphscope/example/circle/Path.java b/analytical_engine/java/grape-demo/src/main/java/com/alibaba/graphscope/example/circle/Path.java new file mode 100644 index 000000000000..a27807da0f40 --- /dev/null +++ b/analytical_engine/java/grape-demo/src/main/java/com/alibaba/graphscope/example/circle/Path.java @@ -0,0 +1,77 @@ +package com.alibaba.graphscope.example.circle; + +import com.alibaba.graphscope.serialization.FFIByteVectorInputStream; +import com.alibaba.graphscope.serialization.FFIByteVectorOutputStream; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.IOException; +import java.util.Stack; + +public class Path { + private static final Logger logger = LoggerFactory.getLogger(Path.class); + private Stack vertexInnerIds; + + public Path() { + vertexInnerIds = new Stack<>(); + } + + public Path(long vid) { + vertexInnerIds = new Stack<>(); + vertexInnerIds.add(vid); + } + + public void add(long vid) { + vertexInnerIds.add(vid); + } + + public boolean isCircle() { + if (vertexInnerIds.size() <= 2){ + return false; + } + if (vertexInnerIds.peek().equals(vertexInnerIds.get(0))){ + return true; + } + return false; + } + + public long top() { + return vertexInnerIds.peek(); + } + + public void pop() { + vertexInnerIds.pop(); + } + + public void write(FFIByteVectorOutputStream output) throws IOException { + output.writeInt(vertexInnerIds.size()); + for (int i = 0; i < vertexInnerIds.size(); ++i) { + output.writeLong(vertexInnerIds.get(i)); + } + } + + public void read(FFIByteVectorInputStream input) throws IOException { + if (vertexInnerIds.size() != 0) { + throw new RuntimeException("The Path is not empty"); + } + int len = input.readInt(); + logger.info("reading {} elements from stream", len); + for (int i = 0; i < len; ++i) { + vertexInnerIds.push(input.readLong()); + } + } + + @Override + public String toString() { + StringBuilder sb = new StringBuilder(); + sb.append("Path{"); + for (int i = 0; i < vertexInnerIds.size(); ++i){ + sb.append(i); + if (i < vertexInnerIds.size() - 1){ + sb.append(","); + } + } + sb.append("}"); + return sb.toString(); + } +} diff --git a/analytical_engine/java/grape-demo/src/main/java/com/alibaba/graphscope/example/giraph/circle/CircleInputFormat.java b/analytical_engine/java/grape-demo/src/main/java/com/alibaba/graphscope/example/giraph/circle/CircleInputFormat.java new file mode 100644 index 000000000000..2f3ebebf3df1 --- /dev/null +++ b/analytical_engine/java/grape-demo/src/main/java/com/alibaba/graphscope/example/giraph/circle/CircleInputFormat.java @@ -0,0 +1,55 @@ +// +// Source code recreated from a .class file by IntelliJ IDEA +// (powered by FernFlower decompiler) +// + +package com.alibaba.graphscope.example.giraph.circle; + +import com.alibaba.graphscope.example.giraph.format.VertexAttrWritable; +import com.google.common.collect.Lists; +import java.io.IOException; +import java.util.List; +import org.apache.giraph.edge.Edge; +import org.apache.giraph.io.formats.TextVertexInputFormat; +import org.apache.hadoop.io.LongWritable; +import org.apache.hadoop.io.Text; +import org.apache.hadoop.mapreduce.InputSplit; +import org.apache.hadoop.mapreduce.TaskAttemptContext; + +public class CircleInputFormat extends TextVertexInputFormat { + public CircleInputFormat() { + } + + public TextVertexInputFormat.TextVertexReader createVertexReader(InputSplit split, TaskAttemptContext context) throws IOException { + return new CircleInputFormat.P2PVertexReader(); + } + + public class P2PVertexReader extends TextVertexInputFormat.TextVertexReaderFromEachLineProcessed { + String SEPARATOR = ","; + private LongWritable id; + private VertexAttrWritable value; + + public P2PVertexReader() { + } + + protected String[] preprocessLine(Text line) throws IOException { + String[] tokens = line.toString().split(this.SEPARATOR); + this.id = new LongWritable(Long.parseLong(tokens[0])); + this.value = new VertexAttrWritable(); + return tokens; + } + + protected LongWritable getId(String[] tokens) throws IOException { + return this.id; + } + + protected VertexAttrWritable getValue(String[] tokens) throws IOException { + return this.value; + } + + protected Iterable> getEdges(String[] tokens) throws IOException { + List> edges = Lists.newArrayListWithCapacity(0); + return edges; + } + } +} diff --git a/analytical_engine/java/grape-demo/src/main/java/com/alibaba/graphscope/example/giraph/circle/MsgWritable.java b/analytical_engine/java/grape-demo/src/main/java/com/alibaba/graphscope/example/giraph/circle/MsgWritable.java new file mode 100644 index 000000000000..3b8fd3fea399 --- /dev/null +++ b/analytical_engine/java/grape-demo/src/main/java/com/alibaba/graphscope/example/giraph/circle/MsgWritable.java @@ -0,0 +1,123 @@ +// +// Source code recreated from a .class file by IntelliJ IDEA +// (powered by FernFlower decompiler) +// + +package com.alibaba.graphscope.example.giraph.circle; + +import com.alibaba.fastjson.JSONObject; +import java.io.DataInput; +import java.io.DataOutput; +import java.io.IOException; +import java.util.ArrayList; +import java.util.Iterator; +import java.util.List; +import java.util.Objects; +import java.util.stream.Collectors; +import org.apache.hadoop.io.Writable; + +public class MsgWritable implements Writable { + private List vertexPath; + private List edgePath; + + public MsgWritable() { + this.vertexPath = new ArrayList(); + this.edgePath = new ArrayList(); + } + + public MsgWritable(List vertexPath, List edgePath) { + this.vertexPath = vertexPath; + this.edgePath = edgePath; + } + + public static boolean isCircle(List vertexList) { + int size = vertexList.size(); + return size > 1 && (Long)vertexList.get(0) == (Long)vertexList.get(size - 1); + } + + public List getVertexPath() { + return this.vertexPath; + } + + public void setVertexPath(List vertexPath) { + this.vertexPath = vertexPath; + } + + public List getEdgePath() { + return this.edgePath; + } + + public void setEdgePath(List edgePath) { + this.edgePath = edgePath; + } + + public void write(DataOutput dataOutput) throws IOException { + int vSize = this.vertexPath.size(); + dataOutput.writeInt(vSize); + Iterator var3 = this.vertexPath.iterator(); + + while(var3.hasNext()) { + long v = (Long)var3.next(); + dataOutput.writeLong(v); + } + + int eSize = this.edgePath.size(); + dataOutput.writeInt(eSize); + Iterator var8 = this.edgePath.iterator(); + + while(var8.hasNext()) { + long e = (Long)var8.next(); + dataOutput.writeLong(e); + } + + } + + public void readFields(DataInput dataInput) throws IOException { + this.vertexPath = this.readLongList(dataInput); + this.edgePath = this.readLongList(dataInput); + } + + private List readLongList(DataInput dataInput) throws IOException { + int size = dataInput.readInt(); + List list = new ArrayList(); + if (size != 0) { + for(int i = 0; i < size; ++i) { + list.add(dataInput.readLong()); + } + } + + return list; + } + + public String toString() { + JSONObject json = new JSONObject(); + json.put("v", this.vertexPath); + json.put("e", this.edgePath); + return json.toJSONString(); + } + + public boolean equals(Object otherObj) { + if (!(otherObj instanceof MsgWritable)) { + return false; + } else { + MsgWritable other = (MsgWritable)otherObj; + return ((String)this.vertexPath.stream().map((i) -> { + return i + ""; + }).collect(Collectors.joining(","))).equals(other.vertexPath.stream().map((i) -> { + return i + ""; + }).collect(Collectors.joining(","))) && ((String)this.edgePath.stream().map((i) -> { + return i + ""; + }).collect(Collectors.joining(","))).equals(other.edgePath.stream().map((i) -> { + return i + ""; + }).collect(Collectors.joining(","))); + } + } + + public int hashCode() { + return Objects.hash(new Object[]{this.vertexPath.stream().map((i) -> { + return i + ""; + }).collect(Collectors.joining(",")), this.edgePath.stream().map((i) -> { + return i + ""; + }).collect(Collectors.joining(","))}); + } +} diff --git a/analytical_engine/java/grape-demo/src/main/java/com/alibaba/graphscope/example/giraph/circle/VertexAttrWritable.java b/analytical_engine/java/grape-demo/src/main/java/com/alibaba/graphscope/example/giraph/circle/VertexAttrWritable.java new file mode 100644 index 000000000000..285fed6b66e3 --- /dev/null +++ b/analytical_engine/java/grape-demo/src/main/java/com/alibaba/graphscope/example/giraph/circle/VertexAttrWritable.java @@ -0,0 +1,72 @@ +// +// Source code recreated from a .class file by IntelliJ IDEA +// (powered by FernFlower decompiler) +// + +package com.alibaba.graphscope.example.giraph.format; + +import com.alibaba.graphscope.example.giraph.circle.MsgWritable; +import com.google.common.collect.Lists; +import java.io.DataInput; +import java.io.DataOutput; +import java.io.IOException; +import java.util.ArrayList; +import java.util.Iterator; +import java.util.List; +import java.util.stream.Collectors; +import org.apache.commons.lang.StringUtils; +import org.apache.hadoop.io.Writable; + +public class VertexAttrWritable implements Writable { + private List vertexAttr; + + public VertexAttrWritable() { + this.vertexAttr = Lists.newArrayList(new MsgWritable[]{new MsgWritable()}); + } + + public VertexAttrWritable(List values) { + this.vertexAttr = values; + } + + public List getVertexAttr() { + return this.vertexAttr; + } + + public void setVertexAttr(List vertexAttr) { + this.vertexAttr = vertexAttr; + } + + public void readFields(DataInput in) throws IOException { + int size = in.readInt(); + List vertexAttr = new ArrayList(); + if (size != 0) { + for(int i = 0; i < size; ++i) { + MsgWritable msgWritable = new MsgWritable(); + msgWritable.readFields(in); + vertexAttr.add(msgWritable); + } + } + + this.vertexAttr = vertexAttr; + } + + public void write(DataOutput out) throws IOException { + out.writeInt(this.vertexAttr.size()); + Iterator var2 = this.vertexAttr.iterator(); + + while(var2.hasNext()) { + MsgWritable msgWritable = (MsgWritable)var2.next(); + msgWritable.write(out); + } + + } + + public String toString() { + List pathList = (List)this.vertexAttr.stream().filter((path) -> { + return MsgWritable.isCircle(path.getVertexPath()); + }).map((path) -> { + return StringUtils.join(path.getEdgePath(), "&"); + }).collect(Collectors.toList()); + return !pathList.isEmpty() ? StringUtils.join(pathList, "|") : ""; + } +} diff --git a/analytical_engine/java/grape-demo/src/main/java/com/alibaba/graphscope/example/giraph/myCircle/Circle.java b/analytical_engine/java/grape-demo/src/main/java/com/alibaba/graphscope/example/giraph/myCircle/Circle.java new file mode 100644 index 000000000000..1cf9ce6b55fe --- /dev/null +++ b/analytical_engine/java/grape-demo/src/main/java/com/alibaba/graphscope/example/giraph/myCircle/Circle.java @@ -0,0 +1,34 @@ +package com.alibaba.graphscope.example.giraph.myCircle; + +import com.alibaba.graphscope.example.giraph.format.VertexAttrWritable; +import org.apache.giraph.graph.BasicComputation; +import org.apache.giraph.graph.Vertex; +import org.apache.hadoop.io.LongWritable; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.IOException; + +public class Circle extends BasicComputation { + private static final Logger logger = LoggerFactory.getLogger(com.alibaba.graphscope.example.giraph.circle.Circle.class); + int maxIteration = 3; + + public Circle() { + } + + public void preSuperstep() { + this.maxIteration = Integer.parseInt(this.getConf().get("max", "3")); + logger.info("[preSuperstep] max is {}", this.maxIteration); + } + /** + * Must be defined by user to do computation on a single Vertex. + * + * @param vertex Vertex + * @param messages Messages that were sent to this vertex in the previous + * superstep. Each message is only guaranteed to have + */ + @Override + public void compute(Vertex vertex, Iterable messages) throws IOException { + + } +} diff --git a/analytical_engine/java/grape-demo/src/main/java/com/alibaba/graphscope/example/giraph/myCircle/Path.java b/analytical_engine/java/grape-demo/src/main/java/com/alibaba/graphscope/example/giraph/myCircle/Path.java new file mode 100644 index 000000000000..09578546c3e3 --- /dev/null +++ b/analytical_engine/java/grape-demo/src/main/java/com/alibaba/graphscope/example/giraph/myCircle/Path.java @@ -0,0 +1,27 @@ +package com.alibaba.graphscope.example.giraph.myCircle; + +import org.apache.hadoop.io.Writable; + +import java.io.DataInput; +import java.io.DataOutput; +import java.io.IOException; +import java.util.ArrayList; +import java.util.List; + +public class Path implements Writable { + private List vertexIds; + + public Path() { + vertexIds = new ArrayList<>(); + } + + @Override + public void write(DataOutput dataOutput) throws IOException { + + } + + @Override + public void readFields(DataInput dataInput) throws IOException { + + } +} From 87453fe0e42afa953e5546be3af01f10438338a8 Mon Sep 17 00:00:00 2001 From: "xiaolei.zl" Date: Fri, 12 Jul 2024 14:56:31 +0800 Subject: [PATCH 37/52] stash Committed-by: xiaolei.zl from Dev container Committed-by: xiaolei.zl from Dev container --- analytical_engine/CMakeLists.txt | 16 ++++++++-------- .../mm/impl/GiraphMpiMessageManager.java | 7 ++++++- .../serialization/FFIByteVectorOutputStream.java | 5 +++++ k8s/Makefile | 3 ++- k8s/dockerfiles/analytical.Dockerfile | 2 +- 5 files changed, 22 insertions(+), 11 deletions(-) diff --git a/analytical_engine/CMakeLists.txt b/analytical_engine/CMakeLists.txt index 5440ee83f43c..a6414b328e4a 100644 --- a/analytical_engine/CMakeLists.txt +++ b/analytical_engine/CMakeLists.txt @@ -509,14 +509,14 @@ if(ENABLE_JAVA_SDK) set(GAE_JAVA_JNI_LIB "${GAE_JAVA_DIR}/grape-runtime/target/native/libgrape-jni.so") endif() - add_custom_command( - OUTPUT "${GAE_JAVA_RUNTIME_JAR}" - COMMAND mvn -Drevision=${GRAPHSCOPE_ANALYTICAL_JAR_VERSION} install -DskipTests --quiet - DEPENDS gs_proto - WORKING_DIRECTORY ${GAE_JAVA_DIR} - COMMENT "Building GAE-java..." - VERBATIM - ) + # add_custom_command( + # OUTPUT "${GAE_JAVA_RUNTIME_JAR}" + # COMMAND mvn -Drevision=${GRAPHSCOPE_ANALYTICAL_JAR_VERSION} install -DskipTests --quiet + # DEPENDS gs_proto + # WORKING_DIRECTORY ${GAE_JAVA_DIR} + # COMMENT "Building GAE-java..." + # VERBATIM + # ) add_custom_target(grape_jni ALL DEPENDS "${GAE_JAVA_RUNTIME_JAR}" ) diff --git a/analytical_engine/java/grape-giraph/src/main/java/com/alibaba/graphscope/parallel/mm/impl/GiraphMpiMessageManager.java b/analytical_engine/java/grape-giraph/src/main/java/com/alibaba/graphscope/parallel/mm/impl/GiraphMpiMessageManager.java index a24cbcc670a1..93ec0002f40b 100644 --- a/analytical_engine/java/grape-giraph/src/main/java/com/alibaba/graphscope/parallel/mm/impl/GiraphMpiMessageManager.java +++ b/analytical_engine/java/grape-giraph/src/main/java/com/alibaba/graphscope/parallel/mm/impl/GiraphMpiMessageManager.java @@ -113,6 +113,7 @@ public void receiveMessages() { /////////////////////////////////////////// bytesOfReceivedMsg += tmpVector.size(); } + tmpVector.close(); logger.info( "Frag [{}] totally Received [{}] bytes from others starting deserialization", fragId, @@ -255,5 +256,9 @@ public void postSuperstep() { } @Override - public void postApplication() {} + public void postApplication() { + for (int i = 0; i < fragNum; ++i) { + cacheOut[i].close(); + } + } } diff --git a/analytical_engine/java/grape-jdk/src/main/java/com/alibaba/graphscope/serialization/FFIByteVectorOutputStream.java b/analytical_engine/java/grape-jdk/src/main/java/com/alibaba/graphscope/serialization/FFIByteVectorOutputStream.java index 8956d8bd87b4..eb62dc095530 100644 --- a/analytical_engine/java/grape-jdk/src/main/java/com/alibaba/graphscope/serialization/FFIByteVectorOutputStream.java +++ b/analytical_engine/java/grape-jdk/src/main/java/com/alibaba/graphscope/serialization/FFIByteVectorOutputStream.java @@ -38,6 +38,11 @@ public FFIByteVectorOutputStream(FFIByteVector vector) { offset = 0; } + @Override + public void close() { + vector.delete(); + } + public void resize(long size) { vector.resize(size); } diff --git a/k8s/Makefile b/k8s/Makefile index dc7491925bb7..70e976e237b2 100644 --- a/k8s/Makefile +++ b/k8s/Makefile @@ -10,7 +10,8 @@ endif ARCH := $(shell uname -m) VERSION ?= latest -VINEYARD_VERSION ?= v0.22.0 +#VINEYARD_VERSION ?= v0.22.0 +VINEYARD_VERSION ?= main # This is the version of builder base image in most cases, except for graphscope-dev BUILDER_VERSION ?= $(VINEYARD_VERSION) # This is the version of runtime base image diff --git a/k8s/dockerfiles/analytical.Dockerfile b/k8s/dockerfiles/analytical.Dockerfile index e6e3d4761b28..89bc7b838a07 100644 --- a/k8s/dockerfiles/analytical.Dockerfile +++ b/k8s/dockerfiles/analytical.Dockerfile @@ -87,7 +87,7 @@ RUN cd /home/graphscope/GraphScope/ && \ FROM vineyardcloudnative/manylinux-llvm:2014-11.0.0 AS llvm -FROM $REGISTRY/graphscope/vineyard-dev:$RUNTIME_VERSION AS analytical-java +FROM graphscope/vineyard-dev:main-x86_64 AS analytical-java COPY --from=llvm /opt/llvm11.0.0 /opt/llvm11 ENV LLVM11_HOME=/opt/llvm11 ENV LIBCLANG_PATH=$LLVM11_HOME/lib LLVM_CONFIG_PATH=$LLVM11_HOME/bin/llvm-config From a4edc7f49024159b102b2f614a8615c6102a3c96 Mon Sep 17 00:00:00 2001 From: "xiaolei.zl" Date: Sat, 13 Jul 2024 10:37:25 +0800 Subject: [PATCH 38/52] fixing circlePIE Committed-by: xiaolei.zl from Dev container --- analytical_engine/CMakeLists.txt | 12 ++++----- .../graphscope/example/circle/CirclePIE.java | 15 ++++++++++- .../example/circle/CirclePIEContext.java | 25 +++++++++++++------ .../graphscope/example/circle/Path.java | 7 +++++- .../example/giraph/myCircle/Circle.java | 2 +- .../mm/impl/GiraphMpiMessageManager.java | 2 +- analytical_engine/test/app_tests.sh | 17 +++++++++---- analytical_engine/test/modern_graph/knows.csv | 10 ++++++++ .../test/modern_graph/person.csv | 9 +++++++ analytical_engine/test/run_java_app.cc | 8 +++--- 10 files changed, 81 insertions(+), 26 deletions(-) create mode 100644 analytical_engine/test/modern_graph/knows.csv create mode 100644 analytical_engine/test/modern_graph/person.csv diff --git a/analytical_engine/CMakeLists.txt b/analytical_engine/CMakeLists.txt index a6414b328e4a..f428c8d59bd8 100644 --- a/analytical_engine/CMakeLists.txt +++ b/analytical_engine/CMakeLists.txt @@ -373,11 +373,11 @@ if (BUILD_TESTS) # target_link_libraries(run_app OpenMP::OpenMP_CXX) if (ENABLE_JAVA_SDK) - # add_executable(run_java_app test/run_java_app.cc core/java/javasdk.cc core/object/dynamic.cc) - # target_include_directories(run_java_app PRIVATE core utils apps) - # target_compile_definitions(run_java_app PUBLIC ENABLE_JAVA_SDK) - # target_link_libraries(run_java_app gs_proto ${GFLAGS_LIBRARIES} ${MPI_CXX_LIBRARIES} ${JNI_LIBRARIES} ${Boost_LIBRARIES} - # ${CMAKE_DL_LIBS} ${GLOG_LIBRARIES} ${VINEYARD_LIBRARIES}) + add_executable(run_java_app test/run_java_app.cc core/java/javasdk.cc core/object/dynamic.cc) + target_include_directories(run_java_app PRIVATE core utils apps) + target_compile_definitions(run_java_app PUBLIC ENABLE_JAVA_SDK) + target_link_libraries(run_java_app gs_proto ${GFLAGS_LIBRARIES} ${MPI_CXX_LIBRARIES} ${JNI_LIBRARIES} ${Boost_LIBRARIES} + ${CMAKE_DL_LIBS} ${GLOG_LIBRARIES} ${VINEYARD_LIBRARIES}) # # java app benchmark # add_executable(property_graph_java_app_benchmarks benchmarks/property_graph_java_app_benchmarks.cc core/java/javasdk.cc core/object/dynamic.cc) @@ -407,7 +407,7 @@ if (BUILD_TESTS) # target_link_libraries(run_java_string_app ${CMAKE_DL_LIBS} ${VINEYARD_LIBRARIES} ${Boost_LIBRARIES} ${GLOG_LIBRARIES} ${GFLAGS_LIBRARIES} ${JNI_LIBRARIES}) if (${LIBUNWIND_FOUND}) - # target_link_libraries(run_java_app ${LIBUNWIND_LIBRARIES}) + target_link_libraries(run_java_app ${LIBUNWIND_LIBRARIES}) # target_link_libraries(property_graph_java_app_benchmarks ${LIBUNWIND_LIBRARIES}) target_link_libraries(giraph_runner ${LIBUNWIND_LIBRARIES}) # target_link_libraries(projected_fragment_mapper_test ${LIBUNWIND_LIBRARIES}) diff --git a/analytical_engine/java/grape-demo/src/main/java/com/alibaba/graphscope/example/circle/CirclePIE.java b/analytical_engine/java/grape-demo/src/main/java/com/alibaba/graphscope/example/circle/CirclePIE.java index ca412f2f3322..883010add2f5 100644 --- a/analytical_engine/java/grape-demo/src/main/java/com/alibaba/graphscope/example/circle/CirclePIE.java +++ b/analytical_engine/java/grape-demo/src/main/java/com/alibaba/graphscope/example/circle/CirclePIE.java @@ -43,6 +43,7 @@ public void PEval(IFragment graph, DefaultContextBase vertex = FFITypeFactoryhelper.newVertexLong(); CirclePIEContext ctx = (CirclePIEContext) context; for (long i = 0; i < graph.getInnerVerticesNum(); ++i) { + logger.info("vertex {}" ,i); vertex.setValue(i); Long globalId = graph.getInnerVertexGid(vertex); Path path = new Path(globalId); @@ -57,6 +58,7 @@ public void PEval(IFragment graph, DefaultContextBase graph, DefaultContextBase graph, DefaultContextBase graph, DefaultContextBase context, DefaultMessageManager messageManager) { CirclePIEContext ctx = (CirclePIEContext) context; + if (ctx.curStep >= ctx.maxStep){ + return ; + } //Receive msg and merge try { receiveMessage(graph, messageManager, ctx); @@ -114,8 +120,10 @@ else if (ctx.curStep == ctx.maxStep - 1){ } void sendMessageThroughOE(IFragment graph, CirclePIEContext ctx, DefaultMessageManager messageManager) { + logger.info("Send message through oe"); Vertex vertex = FFITypeFactoryhelper.newVertexLong(); for (long i = 0; i < graph.getInnerVerticesNum(); ++i) { + logger.info("vertex {}" ,i); vertex.setValue(i); Long globalId = graph.getInnerVertexGid(vertex); List paths = ctx.curPaths.get((int) i); @@ -131,17 +139,22 @@ void sendMessageThroughOE(IFragment graph, CirclePIEContext if (graph.isOuterVertex(nbr.neighbor())) { // send path to outer vertex. try { - sendMessageToOuterVertex(graph, messageManager, nbr.neighbor(), path); + if (!path.isCircle()){ // If circle path already found, skip. + logger.info("send msg to outer vertex: {} , path {}",nbr.neighbor(), path); + sendMessageToOuterVertex(graph, messageManager, nbr.neighbor(), path); + } } catch (IOException e) { e.printStackTrace(); } } else { + logger.info("send msg to inner vertex: {} , path {}", nbr.neighbor().getValue(), path); ctx.addToNextPath(nbr.neighbor(), path); } path.pop(); } } } + ctx.curStep += 1; } /** diff --git a/analytical_engine/java/grape-demo/src/main/java/com/alibaba/graphscope/example/circle/CirclePIEContext.java b/analytical_engine/java/grape-demo/src/main/java/com/alibaba/graphscope/example/circle/CirclePIEContext.java index 9c2a590ad5f8..d16ffca4c42c 100644 --- a/analytical_engine/java/grape-demo/src/main/java/com/alibaba/graphscope/example/circle/CirclePIEContext.java +++ b/analytical_engine/java/grape-demo/src/main/java/com/alibaba/graphscope/example/circle/CirclePIEContext.java @@ -17,7 +17,7 @@ public class CirclePIEContext extends VertexDataContext { private static final Logger logger = LoggerFactory.getLogger(CirclePIEContext.class); - public int maxStep = 3; + public int maxStep = 4; public int curStep = 0; public List> curPaths; // Paths ending at vertex. public List> nextPaths; // New generated path end at vertex this round, @@ -38,23 +38,28 @@ public class CirclePIEContext extends VertexDataContext frag, DefaultMessageManager messageManager, JSONObject jsonObject) { long innerVertexNum = frag.getInnerVerticesNum(); + createFFIContext(frag, Long.class, false); + if (jsonObject.containsKey("maxStep")) { + maxStep = jsonObject.getInteger("maxStep"); + return; + } curPaths = new ArrayList>((int) innerVertexNum); - nextPaths = new ArrayList<>((int) innerVertexNum); - results = new ArrayList<>((int) innerVertexNum); + nextPaths = new ArrayList>((int) innerVertexNum); + results = new ArrayList>((int) innerVertexNum); for (int i = 0; i < innerVertexNum; ++i ){ - curPaths.set(i, new ArrayList<>()); - nextPaths.set(i, new ArrayList<>()); - results.set(i, new ArrayList<>()); + curPaths.add(new ArrayList()); + nextPaths.add(new ArrayList()); + results.add(new ArrayList()); } parser = new LongIdParser(frag.fnum(), 1); } public void addToCurrentPath(Vertex vertex, Path path) { - curPaths.get(vertex.getValue().intValue()).add(path); + curPaths.get(vertex.getValue().intValue()).add(new Path(path)); } public void addToNextPath(Vertex vertex, Path path) { - nextPaths.get(vertex.getValue().intValue()).add(path); + nextPaths.get(vertex.getValue().intValue()).add(new Path(path)); } public void persistCirclePathInCurrent() { @@ -71,7 +76,11 @@ public void swapPaths() { List> tmp = curPaths; curPaths = nextPaths; nextPaths = tmp; + int size = nextPaths.size(); nextPaths.clear(); + for (int i = 0; i < size; ++i) { + nextPaths.add(new ArrayList()); + } } /** diff --git a/analytical_engine/java/grape-demo/src/main/java/com/alibaba/graphscope/example/circle/Path.java b/analytical_engine/java/grape-demo/src/main/java/com/alibaba/graphscope/example/circle/Path.java index a27807da0f40..bbff41e5c7e0 100644 --- a/analytical_engine/java/grape-demo/src/main/java/com/alibaba/graphscope/example/circle/Path.java +++ b/analytical_engine/java/grape-demo/src/main/java/com/alibaba/graphscope/example/circle/Path.java @@ -16,6 +16,11 @@ public Path() { vertexInnerIds = new Stack<>(); } + public Path(Path path) { + vertexInnerIds = new Stack<>(); + vertexInnerIds.addAll(path.vertexInnerIds); + } + public Path(long vid) { vertexInnerIds = new Stack<>(); vertexInnerIds.add(vid); @@ -66,7 +71,7 @@ public String toString() { StringBuilder sb = new StringBuilder(); sb.append("Path{"); for (int i = 0; i < vertexInnerIds.size(); ++i){ - sb.append(i); + sb.append(vertexInnerIds.get(i)); if (i < vertexInnerIds.size() - 1){ sb.append(","); } diff --git a/analytical_engine/java/grape-demo/src/main/java/com/alibaba/graphscope/example/giraph/myCircle/Circle.java b/analytical_engine/java/grape-demo/src/main/java/com/alibaba/graphscope/example/giraph/myCircle/Circle.java index 1cf9ce6b55fe..5d5a26bccb4e 100644 --- a/analytical_engine/java/grape-demo/src/main/java/com/alibaba/graphscope/example/giraph/myCircle/Circle.java +++ b/analytical_engine/java/grape-demo/src/main/java/com/alibaba/graphscope/example/giraph/myCircle/Circle.java @@ -10,7 +10,7 @@ import java.io.IOException; public class Circle extends BasicComputation { - private static final Logger logger = LoggerFactory.getLogger(com.alibaba.graphscope.example.giraph.circle.Circle.class); + private static final Logger logger = LoggerFactory.getLogger(com.alibaba.graphscope.example.giraph.myCircle.Circle.class); int maxIteration = 3; public Circle() { diff --git a/analytical_engine/java/grape-giraph/src/main/java/com/alibaba/graphscope/parallel/mm/impl/GiraphMpiMessageManager.java b/analytical_engine/java/grape-giraph/src/main/java/com/alibaba/graphscope/parallel/mm/impl/GiraphMpiMessageManager.java index 93ec0002f40b..b76c6cf1bc9b 100644 --- a/analytical_engine/java/grape-giraph/src/main/java/com/alibaba/graphscope/parallel/mm/impl/GiraphMpiMessageManager.java +++ b/analytical_engine/java/grape-giraph/src/main/java/com/alibaba/graphscope/parallel/mm/impl/GiraphMpiMessageManager.java @@ -113,7 +113,7 @@ public void receiveMessages() { /////////////////////////////////////////// bytesOfReceivedMsg += tmpVector.size(); } - tmpVector.close(); + tmpVector.delete(); logger.info( "Frag [{}] totally Received [{}] bytes from others starting deserialization", fragId, diff --git a/analytical_engine/test/app_tests.sh b/analytical_engine/test/app_tests.sh index 3db1e44db335..724a680bcc99 100755 --- a/analytical_engine/test/app_tests.sh +++ b/analytical_engine/test/app_tests.sh @@ -415,7 +415,7 @@ get_test_data # exact_verify "${test_dir}"/p2p-31-"${app}" # done -start_vineyard +#start_vineyard # run_vy ${np} ./run_vy_app "${socket_file}" 2 "${test_dir}"/new_property/v2_e2/twitter_e 2 "${test_dir}"/new_property/v2_e2/twitter_v 0 # run_vy_2 ${np} ./run_vy_app "${socket_file}" 4 "${test_dir}"/projected_property/twitter_property_e "${test_dir}"/projected_property/twitter_property_v 1 @@ -449,6 +449,13 @@ then # 1 "${test_dir}/projected_property/twitter_property_v_0#header_row=True#label=v&include_all_columns=true&column_types=int64_t,std::string" \ # com.alibaba.graphscope.example.stringApp.StringApp + # run_vy_2 ${np} ./run_java_app "${socket_file}" 1 "${test_dir}"/projected_property/twitter_property_e "${test_dir}"/projected_property/twitter_property_v 1 0 1 com.alibaba.graphscope.example.bfs.BFS + GLOG_v=10 mpirun -n 2 ./run_java_app "${socket_file}" \ + 1 "../test/modern_graph/knows.csv#header_row=True#delimiter=|#src_label=v0&dst_label=v0&label=e" \ + 1 "../test/modern_graph/person.csv#header_row=True#delimiter=|#label=v0" 1 0 1 \ + com.alibaba.graphscope.example.circle.CirclePIE + + echo "Running girpah tests..." # GLOG_v=10 ./giraph_runner --vertex_input_format_class giraph:com.alibaba.graphscope.example.giraph.format.P2PVertexInputFormat \ # --edge_input_format_class giraph:com.alibaba.graphscope.example.giraph.format.P2PEdgeInputFormat --vfile "${test_dir}"/p2p-31.v \ @@ -456,10 +463,10 @@ then # --user_app_class com.alibaba.graphscope.example.giraph.SSSP # echo "Test Giraph app user Customized Writable" - GLOG_v=10 ./giraph_runner --vertex_input_format_class giraph:com.alibaba.graphscope.example.giraph.format.P2PVertexInputFormat \ - --edge_input_format_class giraph:com.alibaba.graphscope.example.giraph.format.P2PEdgeInputFormat --vfile hdfs://localhost:9000/test/p2p-31.v \ - --efile hdfs://localhost:9000/test/p2p-31.e --ipc_socket /tmp/vineyard.sock --lib_path /opt/graphscope/lib/libgrape-jni.so \ - --user_app_class com.alibaba.graphscope.example.giraph.SSSP + # GLOG_v=10 ./giraph_runner --vertex_input_format_class giraph:com.alibaba.graphscope.example.giraph.format.P2PVertexInputFormat \ + # --edge_input_format_class giraph:com.alibaba.graphscope.example.giraph.format.P2PEdgeInputFormat --vfile hdfs://localhost:9000/test/p2p-31.v \ + # --efile hdfs://localhost:9000/test/p2p-31.e --ipc_socket /tmp/vineyard.sock --lib_path /opt/graphscope/lib/libgrape-jni.so \ + # --user_app_class com.alibaba.graphscope.example.giraph.SSSP fi fi diff --git a/analytical_engine/test/modern_graph/knows.csv b/analytical_engine/test/modern_graph/knows.csv new file mode 100644 index 000000000000..89dc3f247074 --- /dev/null +++ b/analytical_engine/test/modern_graph/knows.csv @@ -0,0 +1,10 @@ +src.id|dst.id|weight +1|2|5 +1|4|1 +2|3|1 +2|4|6 +4|1|1 +3|6|6 +6|7|7 +7|8|9 +8|2|1 \ No newline at end of file diff --git a/analytical_engine/test/modern_graph/person.csv b/analytical_engine/test/modern_graph/person.csv new file mode 100644 index 000000000000..82e0eb1dff2b --- /dev/null +++ b/analytical_engine/test/modern_graph/person.csv @@ -0,0 +1,9 @@ +id|age +1|29 +2|27 +3|12 +4|32 +5|41 +6|35 +7|100 +8|200 \ No newline at end of file diff --git a/analytical_engine/test/run_java_app.cc b/analytical_engine/test/run_java_app.cc index 571080829ac8..3a7218182840 100644 --- a/analytical_engine/test/run_java_app.cc +++ b/analytical_engine/test/run_java_app.cc @@ -49,6 +49,7 @@ #include "core/loader/arrow_fragment_loader.h" #include "core/object/fragment_wrapper.h" #include "core/utils/transform_utils.h" +#include "java_pie/java_pie_projected_default_app.h" #include "java_pie/java_pie_projected_parallel_app.h" #include "java_pie/java_pie_property_parallel_app.h" #include "proto/graph_def.pb.h" @@ -59,7 +60,7 @@ using FragmentType = vineyard::ArrowFragment; using ProjectedFragmentType = - gs::ArrowProjectedFragment; + gs::ArrowProjectedFragment; void output_nd_array(const grape::CommSpec& comm_spec, std::unique_ptr arc, const std::string& output_path, int data_type_expected) { @@ -332,7 +333,8 @@ void QueryProjected(vineyard::Client& client, const std::string& basic_params, const std::string& selector_string, const std::string& selectors_string) { - using AppType = gs::JavaPIEProjectedParallelAppOE; + // using AppType = gs::JavaPIEProjectedParallelAppOE; + using AppType = gs::JavaPIEProjectedDefaultApp; auto app = std::make_shared(); auto worker = AppType::CreateWorker(app, fragment); auto spec = grape::DefaultParallelEngineSpec(); @@ -502,7 +504,7 @@ void Run(vineyard::Client& client, const grape::CommSpec& comm_spec, selector_string, selectors_string); } else { // 3. run projected pt.put("frag_name", - "gs::ArrowProjectedFragment"); + "gs::ArrowProjectedFragment"); std::stringstream ss; boost::property_tree::json_parser::write_json(ss, pt); std::string basic_params = ss.str(); From 6727e98f26ff62083bffb6bebde3f9fb395319b0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E9=9C=84=E7=A3=8A?= Date: Sat, 13 Jul 2024 11:00:38 +0800 Subject: [PATCH 39/52] add annotation invoker --- .../example/circle/CirclePIEContext.java | 16 ++++++++++++++-- .../annotation/AnnotationInvoker.java | 19 +++++++++++++++++++ 2 files changed, 33 insertions(+), 2 deletions(-) diff --git a/analytical_engine/java/grape-demo/src/main/java/com/alibaba/graphscope/example/circle/CirclePIEContext.java b/analytical_engine/java/grape-demo/src/main/java/com/alibaba/graphscope/example/circle/CirclePIEContext.java index d16ffca4c42c..1f6fab1652eb 100644 --- a/analytical_engine/java/grape-demo/src/main/java/com/alibaba/graphscope/example/circle/CirclePIEContext.java +++ b/analytical_engine/java/grape-demo/src/main/java/com/alibaba/graphscope/example/circle/CirclePIEContext.java @@ -1,11 +1,15 @@ package com.alibaba.graphscope.example.circle; +import com.alibaba.fastffi.impl.CXXStdString; import com.alibaba.fastjson.JSONObject; import com.alibaba.graphscope.context.DefaultContextBase; import com.alibaba.graphscope.context.VertexDataContext; +import com.alibaba.graphscope.ds.GSVertexArray; import com.alibaba.graphscope.ds.Vertex; import com.alibaba.graphscope.fragment.IFragment; import com.alibaba.graphscope.parallel.DefaultMessageManager; +import com.alibaba.graphscope.stdcxx.StdString; +import com.alibaba.graphscope.utils.FFITypeFactoryhelper; import com.alibaba.graphscope.utils.LongIdParser; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -13,7 +17,7 @@ import java.util.ArrayList; import java.util.List; -public class CirclePIEContext extends VertexDataContext, Long> +public class CirclePIEContext extends VertexDataContext, StdString> implements DefaultContextBase { private static final Logger logger = LoggerFactory.getLogger(CirclePIEContext.class); @@ -38,7 +42,7 @@ public class CirclePIEContext extends VertexDataContext frag, DefaultMessageManager messageManager, JSONObject jsonObject) { long innerVertexNum = frag.getInnerVerticesNum(); - createFFIContext(frag, Long.class, false); + createFFIContext(frag, StdString.class, false); if (jsonObject.containsKey("maxStep")) { maxStep = jsonObject.getInteger("maxStep"); return; @@ -94,6 +98,14 @@ public void swapPaths() { public void Output(IFragment frag) { logger.info("finally cur path {}", curPaths); logger.info("finally next path {}", nextPaths); + + GSVertexArray vertexArray = data(); + Vertex cur = FFITypeFactoryhelper.newVertexLong(); + for (long vid = 0; vid < frag.getInnerVerticesNum(); ++vid) { + cur.setValue(vid); + StdString value = (StdString) vertexArray.get(cur); + value.fromJavaString(results.get((int) vid).toString()); + } } public void tryToFindCircle(Path path) { diff --git a/analytical_engine/java/grape-runtime/src/main/java/com/alibaba/graphscope/annotation/AnnotationInvoker.java b/analytical_engine/java/grape-runtime/src/main/java/com/alibaba/graphscope/annotation/AnnotationInvoker.java index 7cbcb1b8f61f..03734a6e8f28 100644 --- a/analytical_engine/java/grape-runtime/src/main/java/com/alibaba/graphscope/annotation/AnnotationInvoker.java +++ b/analytical_engine/java/grape-runtime/src/main/java/com/alibaba/graphscope/annotation/AnnotationInvoker.java @@ -973,6 +973,25 @@ + ">", "Integer" }), + @CXXTemplate( + cxx = { + CPP_ARROW_PROJECTED_FRAGMENT + + "", + "std::string" + }, + java = { + JAVA_ARROW_PROJECTED_FRAGMENT + + "<" + + LONG + + "," + + LONG + + "," + + LONG + + "," + + LONG + + ">", + STD_STRING + }), @CXXTemplate( cxx = { CPP_ARROW_PROJECTED_FRAGMENT From 0c0ce2c905fbb521352c212c40c5ac219c874492 Mon Sep 17 00:00:00 2001 From: "xiaolei.zl" Date: Sun, 14 Jul 2024 10:25:00 +0800 Subject: [PATCH 40/52] fix new example Committed-by: xiaolei.zl from Dev container --- analytical_engine/CMakeLists.txt | 16 ++++++------ .../graphscope/example/circle/CirclePIE.java | 4 ++- .../example/circle/CirclePIEContext.java | 1 + .../graphscope/utils/AppContextGetter.java | 3 +++ .../utils/FFITypeFactoryhelper.java | 2 +- analytical_engine/test/run_java_app.cc | 25 ++++++++++++++----- .../template/CMakeLists.template | 16 ++++++------ k8s/dockerfiles/coordinator.Dockerfile | 3 +-- python/test_java_app.py | 24 ++++++++++++++++++ 9 files changed, 68 insertions(+), 26 deletions(-) create mode 100644 python/test_java_app.py diff --git a/analytical_engine/CMakeLists.txt b/analytical_engine/CMakeLists.txt index f428c8d59bd8..79aaac011a51 100644 --- a/analytical_engine/CMakeLists.txt +++ b/analytical_engine/CMakeLists.txt @@ -509,14 +509,14 @@ if(ENABLE_JAVA_SDK) set(GAE_JAVA_JNI_LIB "${GAE_JAVA_DIR}/grape-runtime/target/native/libgrape-jni.so") endif() - # add_custom_command( - # OUTPUT "${GAE_JAVA_RUNTIME_JAR}" - # COMMAND mvn -Drevision=${GRAPHSCOPE_ANALYTICAL_JAR_VERSION} install -DskipTests --quiet - # DEPENDS gs_proto - # WORKING_DIRECTORY ${GAE_JAVA_DIR} - # COMMENT "Building GAE-java..." - # VERBATIM - # ) + add_custom_command( + OUTPUT "${GAE_JAVA_RUNTIME_JAR}" + COMMAND mvn -Drevision=${GRAPHSCOPE_ANALYTICAL_JAR_VERSION} install -DskipTests --quiet + DEPENDS gs_proto + WORKING_DIRECTORY ${GAE_JAVA_DIR} + COMMENT "Building GAE-java..." + VERBATIM + ) add_custom_target(grape_jni ALL DEPENDS "${GAE_JAVA_RUNTIME_JAR}" ) diff --git a/analytical_engine/java/grape-demo/src/main/java/com/alibaba/graphscope/example/circle/CirclePIE.java b/analytical_engine/java/grape-demo/src/main/java/com/alibaba/graphscope/example/circle/CirclePIE.java index 883010add2f5..e3bc6decde0c 100644 --- a/analytical_engine/java/grape-demo/src/main/java/com/alibaba/graphscope/example/circle/CirclePIE.java +++ b/analytical_engine/java/grape-demo/src/main/java/com/alibaba/graphscope/example/circle/CirclePIE.java @@ -25,7 +25,7 @@ public class CirclePIE implements DefaultAppBase< Long, CirclePIEContext> { private static final Logger logger = LoggerFactory.getLogger(CirclePIE.class); - private static FFIByteVectorOutputStream msgVector = new FFIByteVectorOutputStream(); + private static FFIByteVectorOutputStream msgVector; /** * Partial Evaluation to implement. @@ -42,6 +42,7 @@ public class CirclePIE implements DefaultAppBase< public void PEval(IFragment graph, DefaultContextBase context, DefaultMessageManager messageManager) { Vertex vertex = FFITypeFactoryhelper.newVertexLong(); CirclePIEContext ctx = (CirclePIEContext) context; + msgVector = new FFIByteVectorOutputStream(); for (long i = 0; i < graph.getInnerVerticesNum(); ++i) { logger.info("vertex {}" ,i); vertex.setValue(i); @@ -117,6 +118,7 @@ else if (ctx.curStep == ctx.maxStep - 1){ // maybe receive message, but not sending message. logger.info("Max step reached, " + ctx.curStep); } + ctx.curStep += 1; } void sendMessageThroughOE(IFragment graph, CirclePIEContext ctx, DefaultMessageManager messageManager) { diff --git a/analytical_engine/java/grape-demo/src/main/java/com/alibaba/graphscope/example/circle/CirclePIEContext.java b/analytical_engine/java/grape-demo/src/main/java/com/alibaba/graphscope/example/circle/CirclePIEContext.java index 1f6fab1652eb..b562eddbc49d 100644 --- a/analytical_engine/java/grape-demo/src/main/java/com/alibaba/graphscope/example/circle/CirclePIEContext.java +++ b/analytical_engine/java/grape-demo/src/main/java/com/alibaba/graphscope/example/circle/CirclePIEContext.java @@ -9,6 +9,7 @@ import com.alibaba.graphscope.fragment.IFragment; import com.alibaba.graphscope.parallel.DefaultMessageManager; import com.alibaba.graphscope.stdcxx.StdString; +import com.alibaba.graphscope.ds.StringView; import com.alibaba.graphscope.utils.FFITypeFactoryhelper; import com.alibaba.graphscope.utils.LongIdParser; import org.slf4j.Logger; diff --git a/analytical_engine/java/grape-jdk/src/main/java/com/alibaba/graphscope/utils/AppContextGetter.java b/analytical_engine/java/grape-jdk/src/main/java/com/alibaba/graphscope/utils/AppContextGetter.java index 3d8c0da684d3..6507b4981ec4 100644 --- a/analytical_engine/java/grape-jdk/src/main/java/com/alibaba/graphscope/utils/AppContextGetter.java +++ b/analytical_engine/java/grape-jdk/src/main/java/com/alibaba/graphscope/utils/AppContextGetter.java @@ -204,6 +204,9 @@ public static String getVertexDataContextDataType(VertexDataContext ctxObj) { else if (ret.getName() == "com.alibaba.graphscope.ds.StringView") { return "std::string"; } + else if (ret.getName() == "com.alibaba.graphscope.stdcxx.StdString"){ + return "std::string"; + } return null; } } diff --git a/analytical_engine/java/grape-jdk/src/main/java/com/alibaba/graphscope/utils/FFITypeFactoryhelper.java b/analytical_engine/java/grape-jdk/src/main/java/com/alibaba/graphscope/utils/FFITypeFactoryhelper.java index 1e2020cef981..cc694963caa0 100644 --- a/analytical_engine/java/grape-jdk/src/main/java/com/alibaba/graphscope/utils/FFITypeFactoryhelper.java +++ b/analytical_engine/java/grape-jdk/src/main/java/com/alibaba/graphscope/utils/FFITypeFactoryhelper.java @@ -81,7 +81,7 @@ public static String javaType2CppType(Class clz) { return "int32_t"; } else if (clz.getName() == Double.class.getName()) { return "double"; - } else if (clz.getName() == String.class.getName()) { + } else if (clz.getName() == String.class.getName() || clz.getName() == StdString.class.getName()) { return "std::string"; } else if (clz.getName() == StringView.class.getName()) { return "std::string"; diff --git a/analytical_engine/test/run_java_app.cc b/analytical_engine/test/run_java_app.cc index 3a7218182840..bf6a9493c7fa 100644 --- a/analytical_engine/test/run_java_app.cc +++ b/analytical_engine/test/run_java_app.cc @@ -96,6 +96,12 @@ void output_nd_array(const grape::CommSpec& comm_spec, oarc >> v; assembled_ostream << v << std::endl; } + } else if (data_type_expected == 8) { + for (int64_t i = 0; i < length1; ++i) { + std::string s; + oarc >> s; + assembled_ostream << s << std::endl; + } } else { LOG(FATAL) << "Unregonizable data type " << data_type_expected; } @@ -158,6 +164,12 @@ void output_data_frame(const grape::CommSpec& comm_spec, oarc >> data; assembled_col2_ostream << data << std::endl; } + } else if (expected_data_type == 8) { + for (int64_t i = 0; i < length; ++i) { + std::string data; + oarc >> data; + assembled_col2_ostream << data << std::endl; + } } else { LOG(FATAL) << "Unregonizable data type " << expected_data_type; } @@ -416,7 +428,7 @@ void QueryProjected(vineyard::Client& client, std::string java_out_prefix = out_prefix + "/java_projected_assembled_ndarray.dat"; output_nd_array(comm_spec, std::move(arc), java_out_prefix, - 4); // 4 for int64_t + 8); // 4 for int64_t } VLOG(1) << "[0] java projected finish test ndarray"; @@ -427,7 +439,7 @@ void QueryProjected(vineyard::Client& client, vd_ctx_wrapper->ToDataframe(comm_spec, selectors, range).value()); std::string java_data_frame_out_prefix = out_prefix + "/java_projected"; output_data_frame(comm_spec, std::move(arc), java_data_frame_out_prefix, - 4); + 8); } VLOG(1) << "[1] java projected finish test dataframe"; @@ -438,10 +450,11 @@ void QueryProjected(vineyard::Client& client, CHECK(tmp); vineyard::ObjectID ndarray_object = tmp.value(); std::string java_v6d_tensor_prefix = out_prefix + "/java_projected"; - vineyard::AnyType expected_data_type = vineyard::AnyType::Int64; // 4 - output_vineyard_tensor(client, ndarray_object, comm_spec, - java_v6d_tensor_prefix, - expected_data_type); + // vineyard::AnyType expected_data_type = vineyard::AnyType::Int64; // 4 + vineyard::AnyType expected_data_type = vineyard::AnyType::String; + output_vineyard_tensor(client, ndarray_object, comm_spec, + java_v6d_tensor_prefix, + expected_data_type); } VLOG(1) << "[2] java projected finish test vineyard tensor"; diff --git a/coordinator/gscoordinator/template/CMakeLists.template b/coordinator/gscoordinator/template/CMakeLists.template index 6852bf70958a..cd76e2d5be59 100644 --- a/coordinator/gscoordinator/template/CMakeLists.template +++ b/coordinator/gscoordinator/template/CMakeLists.template @@ -384,14 +384,14 @@ elseif (JAVA_PIE_APP) endif() # 0. codegen - if (JAVA_APP_CODEGEN) - execute_process(COMMAND java -cp ${PRE_CP} ${PROCESSOR_MAIN_CLASS} ${JAR_PATH} ${OUTPUT_DIR} $_graph_type $_vd_type - RESULT_VARIABLE ret) - message(STATUS "java -cp ${PRE_CP} ${PROCESSOR_MAIN_CLASS} ${JAR_PATH} ${OUTPUT_DIR} $_graph_type $_vd_type") - if (ret EQUAL "1") - message(FATAL_ERROR "Preprocess failed") - endif() - endif() + #if (JAVA_APP_CODEGEN) + #execute_process(COMMAND java -cp ${PRE_CP} ${PROCESSOR_MAIN_CLASS} ${JAR_PATH} ${OUTPUT_DIR} $_graph_type $_vd_type + # RESULT_VARIABLE ret) + # message(STATUS "java -cp ${PRE_CP} ${PROCESSOR_MAIN_CLASS} ${JAR_PATH} ${OUTPUT_DIR} $_graph_type $_vd_type") + #if (ret EQUAL "1") + # message(FATAL_ERROR "Preprocess failed") + #endif() + #endif() # find jni--------------------------------------------------------------------- find_package(JNI REQUIRED) include_directories(SYSTEM ${JAVA_INCLUDE_PATH}) diff --git a/k8s/dockerfiles/coordinator.Dockerfile b/k8s/dockerfiles/coordinator.Dockerfile index 04f19bd0a968..10dd020c920c 100644 --- a/k8s/dockerfiles/coordinator.Dockerfile +++ b/k8s/dockerfiles/coordinator.Dockerfile @@ -47,8 +47,7 @@ RUN apt-get update -y && \ apt-get clean -y && \ rm -rf /var/lib/apt/lists/* -RUN sudo apt purge -y openjdk* && sudo apt purge -y default-jre* && \ - apt-get update && apt-get install -y openjdk-8-jdk && \ +RUN apt-get update && apt-get install -y openjdk-8-jdk && \ update-alternatives --set java /usr/lib/jvm/java-8-openjdk-amd64/jre/bin/java && java -version ENV GRAPHSCOPE_HOME=/opt/graphscope diff --git a/python/test_java_app.py b/python/test_java_app.py new file mode 100644 index 000000000000..04d0f2658e47 --- /dev/null +++ b/python/test_java_app.py @@ -0,0 +1,24 @@ +import graphscope +import os +from graphscope.framework.app import load_app +from graphscope.framework.loader import Loader + +graphscope.set_option(show_log=True) +graphscope.set_option(log_level="DEBUG") + +# sess = graphscope.session(cluster_type="k8s", enabled_engines="gae-java",k8s_image_tag="0.27.0") # k8s_volumes=k8s_volumes, +# sess.add_lib('/mnt/zhanglei/code/giraph-dev/GraphScope/analytical_engine/java/grape-demo/target/grape-demo-0.27.0-shaded.jar') + +sess = graphscope.session(cluster_type="hosts", num_workers=1, enabled_engines="gae-java") +sess.add_lib('/workspaces/GraphScope/analytical_engine/java/grape-demo/target/grape-demo-0.27.0-shaded.jar') + + +#vloader = Loader(source="hdfs:///test/person.csv",host="host.minikube.internal",port=9000,delimiter='|') +#eloader = Loader(source="hdfs:///test/knows.csv",host="host.minikube.internal",port=9000,delimiter='|') +#graph2 = sess.load_from(vertices=vloader, edges=eloader) +graph2 = sess.load_from(vertices=Loader(source="/workspaces/GraphScope/analytical_engine/test/modern_graph/person.csv",delimiter='|'), + edges=Loader(source="/workspaces/GraphScope/analytical_engine/test/modern_graph/knows.csv", delimiter='|')) +proj_g2 = graph2._project_to_simple(v_prop="age", e_prop="weight") + +user_app = load_app(algo="java_pie:com.alibaba.graphscope.example.circle.CirclePIE") +res = user_app(proj_g2) \ No newline at end of file From 0e1e9833d1c23acc4c29436913cfe9fef68658d2 Mon Sep 17 00:00:00 2001 From: "xiaolei.zl" Date: Mon, 15 Jul 2024 10:38:42 +0800 Subject: [PATCH 41/52] run on k8s Committed-by: xiaolei.zl from Dev container --- k8s/Makefile | 4 ++-- k8s/dockerfiles/analytical.Dockerfile | 3 ++- k8s/dockerfiles/coordinator.Dockerfile | 2 +- python/test_gae_java.py | 6 ------ python/test_java_app.py | 18 +++++++++--------- 5 files changed, 14 insertions(+), 19 deletions(-) diff --git a/k8s/Makefile b/k8s/Makefile index 70e976e237b2..514d5dd8d9cc 100644 --- a/k8s/Makefile +++ b/k8s/Makefile @@ -10,8 +10,8 @@ endif ARCH := $(shell uname -m) VERSION ?= latest -#VINEYARD_VERSION ?= v0.22.0 -VINEYARD_VERSION ?= main +VINEYARD_VERSION ?= v0.22.0 +#VINEYARD_VERSION ?= main # This is the version of builder base image in most cases, except for graphscope-dev BUILDER_VERSION ?= $(VINEYARD_VERSION) # This is the version of runtime base image diff --git a/k8s/dockerfiles/analytical.Dockerfile b/k8s/dockerfiles/analytical.Dockerfile index 89bc7b838a07..c6e7a8cb8e98 100644 --- a/k8s/dockerfiles/analytical.Dockerfile +++ b/k8s/dockerfiles/analytical.Dockerfile @@ -87,7 +87,8 @@ RUN cd /home/graphscope/GraphScope/ && \ FROM vineyardcloudnative/manylinux-llvm:2014-11.0.0 AS llvm -FROM graphscope/vineyard-dev:main-x86_64 AS analytical-java +# FROM graphscope/vineyard-dev:main-x86_64 AS analytical-java +FROM $REGISTRY/graphscope/vineyard-dev:$RUNTIME_VERSION AS analytical-java COPY --from=llvm /opt/llvm11.0.0 /opt/llvm11 ENV LLVM11_HOME=/opt/llvm11 ENV LIBCLANG_PATH=$LLVM11_HOME/lib LLVM_CONFIG_PATH=$LLVM11_HOME/bin/llvm-config diff --git a/k8s/dockerfiles/coordinator.Dockerfile b/k8s/dockerfiles/coordinator.Dockerfile index 10dd020c920c..dcc71ed8a7b5 100644 --- a/k8s/dockerfiles/coordinator.Dockerfile +++ b/k8s/dockerfiles/coordinator.Dockerfile @@ -10,7 +10,7 @@ COPY --chown=graphscope:graphscope . /home/graphscope/GraphScope # uninstall openjdk-11 and install openjdk-8 RUN sudo apt purge -y openjdk* && sudo apt purge -y default-jre* && \ - sudo apt-get update && sudo apt-get install -y openjdk-8-jdk && \ + sudo apt-get update && sudo apt-get install -y openjdk-8-jdk maven && \ sudo update-alternatives --set java /usr/lib/jvm/java-8-openjdk-amd64/jre/bin/java && java -version RUN cd /home/graphscope/GraphScope/ && \ diff --git a/python/test_gae_java.py b/python/test_gae_java.py index 86bf1111b487..4e54c99ded1c 100644 --- a/python/test_gae_java.py +++ b/python/test_gae_java.py @@ -39,12 +39,6 @@ # edges="/data/gstest/p2p-31.e", # eformat=eformat2, # ) -# graph2 = sess.load_from( -# vertices="hdfs://localhost:9000/test/p2p-31.v", -# vformat=vformat2, -# edges="hdfs://localhost:9000/test/p2p-31.e", -# eformat=eformat2, -# ) graph2 = sess.load_from( vertices="hdfs://host.minikube.internal:9000/test/p2p-31.v", vformat=vformat2, diff --git a/python/test_java_app.py b/python/test_java_app.py index 04d0f2658e47..62496b80c3bf 100644 --- a/python/test_java_app.py +++ b/python/test_java_app.py @@ -6,18 +6,18 @@ graphscope.set_option(show_log=True) graphscope.set_option(log_level="DEBUG") -# sess = graphscope.session(cluster_type="k8s", enabled_engines="gae-java",k8s_image_tag="0.27.0") # k8s_volumes=k8s_volumes, -# sess.add_lib('/mnt/zhanglei/code/giraph-dev/GraphScope/analytical_engine/java/grape-demo/target/grape-demo-0.27.0-shaded.jar') +sess = graphscope.session(cluster_type="k8s", enabled_engines="gae-java",k8s_image_tag="0.27.0") # k8s_volumes=k8s_volumes, +sess.add_lib('/mnt/zhanglei/code/giraph-dev/GraphScope/analytical_engine/java/grape-demo/target/grape-demo-0.27.0-shaded.jar') -sess = graphscope.session(cluster_type="hosts", num_workers=1, enabled_engines="gae-java") -sess.add_lib('/workspaces/GraphScope/analytical_engine/java/grape-demo/target/grape-demo-0.27.0-shaded.jar') +#sess = graphscope.session(cluster_type="hosts", num_workers=1, enabled_engines="gae-java") +#sess.add_lib('/workspaces/GraphScope/analytical_engine/java/grape-demo/target/grape-demo-0.27.0-shaded.jar') -#vloader = Loader(source="hdfs:///test/person.csv",host="host.minikube.internal",port=9000,delimiter='|') -#eloader = Loader(source="hdfs:///test/knows.csv",host="host.minikube.internal",port=9000,delimiter='|') -#graph2 = sess.load_from(vertices=vloader, edges=eloader) -graph2 = sess.load_from(vertices=Loader(source="/workspaces/GraphScope/analytical_engine/test/modern_graph/person.csv",delimiter='|'), - edges=Loader(source="/workspaces/GraphScope/analytical_engine/test/modern_graph/knows.csv", delimiter='|')) +vloader = Loader(source="hdfs:///test/person.csv",host="host.minikube.internal",port=9000,delimiter='|') +eloader = Loader(source="hdfs:///test/knows.csv",host="host.minikube.internal",port=9000,delimiter='|') +graph2 = sess.load_from(vertices=vloader, edges=eloader) +#graph2 = sess.load_from(vertices=Loader(source="/workspaces/GraphScope/analytical_engine/test/modern_graph/person.csv",delimiter='|'), +# edges=Loader(source="/workspaces/GraphScope/analytical_engine/test/modern_graph/knows.csv", delimiter='|')) proj_g2 = graph2._project_to_simple(v_prop="age", e_prop="weight") user_app = load_app(algo="java_pie:com.alibaba.graphscope.example.circle.CirclePIE") From 0e20b140cced01d5c8b76d6aab08aa6cd47d04f1 Mon Sep 17 00:00:00 2001 From: "xiaolei.zl" Date: Tue, 23 Jul 2024 13:25:49 +0800 Subject: [PATCH 42/52] use availabel memory Committed-by: xiaolei.zl from Dev container --- analytical_engine/core/java/javasdk.cc | 13 +++- .../graphscope/example/circle/CirclePIE.java | 21 +++--- .../example/circle/CirclePIEContext.java | 6 +- .../graphscope/example/circle/Path.java | 2 +- analytical_engine/test/app_tests.sh | 5 ++ analytical_engine/test/run_java_app.cc | 71 ++++++++++--------- 6 files changed, 67 insertions(+), 51 deletions(-) diff --git a/analytical_engine/core/java/javasdk.cc b/analytical_engine/core/java/javasdk.cc index 64ee7468853c..5efa27d015fa 100644 --- a/analytical_engine/core/java/javasdk.cc +++ b/analytical_engine/core/java/javasdk.cc @@ -121,8 +121,19 @@ inline uint64_t getTotalSystemMemory() { return ret; } +inline uint64_t getCurrentAvailableMemory() { + uint64_t pages = sysconf(_SC_AVPHYS_PAGES); + uint64_t page_size = sysconf(_SC_PAGE_SIZE); + uint64_t ret = pages * page_size; + VLOG(10) << "---> getTotalAvailabelSystemMemory() -> " << ret; + ret = ret / 1024; + ret = ret / 1024; + ret = ret / 1024; + return ret; +} + void SetupEnv(const int local_num) { - int systemMemory = getTotalSystemMemory() / 5; + int systemMemory = (getCurrentAvailableMemory() * 2) / 3; int systemMemoryPerWorker = std::max(systemMemory / local_num, 1); int mnPerWorker = std::max(systemMemoryPerWorker * 9 / 12, 1); diff --git a/analytical_engine/java/grape-demo/src/main/java/com/alibaba/graphscope/example/circle/CirclePIE.java b/analytical_engine/java/grape-demo/src/main/java/com/alibaba/graphscope/example/circle/CirclePIE.java index e3bc6decde0c..9d94ba88a55c 100644 --- a/analytical_engine/java/grape-demo/src/main/java/com/alibaba/graphscope/example/circle/CirclePIE.java +++ b/analytical_engine/java/grape-demo/src/main/java/com/alibaba/graphscope/example/circle/CirclePIE.java @@ -44,7 +44,6 @@ public void PEval(IFragment graph, DefaultContextBase graph, DefaultContextBase graph, DefaultContextBase graph, DefaultContextBase< e.printStackTrace(); } - logger.info("In super step {}, cur path {}", ctx.curStep, ctx.curPaths); - logger.info("In super step {}, next path {}", ctx.curStep, ctx.nextPaths); + logger.info("In super step {}, cur path {}", ctx.curStep, ctx.curPaths.size()); + logger.info("In super step {}, next path {}", ctx.curStep, ctx.nextPaths.size()); // For received msg, check if it is already circle, if true, add to the final results. ctx.persistCirclePathInCurrent(); @@ -125,7 +123,6 @@ void sendMessageThroughOE(IFragment graph, CirclePIEContext logger.info("Send message through oe"); Vertex vertex = FFITypeFactoryhelper.newVertexLong(); for (long i = 0; i < graph.getInnerVerticesNum(); ++i) { - logger.info("vertex {}" ,i); vertex.setValue(i); Long globalId = graph.getInnerVertexGid(vertex); List paths = ctx.curPaths.get((int) i); @@ -142,14 +139,14 @@ void sendMessageThroughOE(IFragment graph, CirclePIEContext // send path to outer vertex. try { if (!path.isCircle()){ // If circle path already found, skip. - logger.info("send msg to outer vertex: {} , path {}",nbr.neighbor(), path); + // logger.info("send msg to outer vertex: {} , path {}",nbr.neighbor(), path); sendMessageToOuterVertex(graph, messageManager, nbr.neighbor(), path); } } catch (IOException e) { e.printStackTrace(); } } else { - logger.info("send msg to inner vertex: {} , path {}", nbr.neighbor().getValue(), path); + // logger.info("send msg to inner vertex: {} , path {}", nbr.neighbor().getValue(), path); ctx.addToNextPath(nbr.neighbor(), path); } path.pop(); @@ -164,7 +161,7 @@ void sendMessageThroughOE(IFragment graph, CirclePIEContext * @param neighbor the outer vertex vid */ void sendMessageToOuterVertex(IFragment graph, DefaultMessageManager mm, Vertex neighbor, Path path) throws IOException { - logger.info("Send path {} to vertex {}, dst frag {}", path, neighbor.getValue(), graph.fid()); + // logger.info("Send path {} to vertex {}, dst frag {}", path, neighbor.getValue(), graph.fid()); msgVector.reset(); msgVector.writeLong(graph.getOuterVertexGid(neighbor)); path.write(msgVector); @@ -180,14 +177,14 @@ void receiveMessage(IFragment graph, DefaultMessageManag // trigger the refresh tmpVector.touch(); bytesOfReceivedMsg += tmpVector.size(); - logger.info("Frag [{}] digest message of size {}", graph.fid(), tmpVector.size()); + // logger.info("Frag [{}] digest message of size {}", graph.fid(), tmpVector.size()); Path path = new Path(); FFIByteVectorInputStream inputStream = new FFIByteVectorInputStream(tmpVector); long gid = inputStream.readLong(); if (!graph.innerVertexGid2Vertex(gid, tmpVertex)){ logger.error("Fail to get lid from gid {}", gid); } - logger.info("Got msg to lid {}", tmpVertex.getValue()); + // logger.info("Got msg to lid {}", tmpVertex.getValue()); path.read(inputStream); // Add the tail node of new path here. // path.add(gid); diff --git a/analytical_engine/java/grape-demo/src/main/java/com/alibaba/graphscope/example/circle/CirclePIEContext.java b/analytical_engine/java/grape-demo/src/main/java/com/alibaba/graphscope/example/circle/CirclePIEContext.java index b562eddbc49d..ec9363083a5c 100644 --- a/analytical_engine/java/grape-demo/src/main/java/com/alibaba/graphscope/example/circle/CirclePIEContext.java +++ b/analytical_engine/java/grape-demo/src/main/java/com/alibaba/graphscope/example/circle/CirclePIEContext.java @@ -97,8 +97,8 @@ public void swapPaths() { */ @Override public void Output(IFragment frag) { - logger.info("finally cur path {}", curPaths); - logger.info("finally next path {}", nextPaths); + // logger.info("finally cur path {}", curPaths); + // logger.info("finally next path {}", nextPaths); GSVertexArray vertexArray = data(); Vertex cur = FFITypeFactoryhelper.newVertexLong(); @@ -111,7 +111,7 @@ public void Output(IFragment frag) { public void tryToFindCircle(Path path) { if (path.isCircle()){ - logger.info("path is circle {}", path); + // logger.info("path is circle {}", path); long lid = parser.getOffset(path.top()); this.results.get((int)lid).add(path); } diff --git a/analytical_engine/java/grape-demo/src/main/java/com/alibaba/graphscope/example/circle/Path.java b/analytical_engine/java/grape-demo/src/main/java/com/alibaba/graphscope/example/circle/Path.java index bbff41e5c7e0..4f3b5c22daba 100644 --- a/analytical_engine/java/grape-demo/src/main/java/com/alibaba/graphscope/example/circle/Path.java +++ b/analytical_engine/java/grape-demo/src/main/java/com/alibaba/graphscope/example/circle/Path.java @@ -60,7 +60,7 @@ public void read(FFIByteVectorInputStream input) throws IOException { throw new RuntimeException("The Path is not empty"); } int len = input.readInt(); - logger.info("reading {} elements from stream", len); + // logger.info("reading {} elements from stream", len); for (int i = 0; i < len; ++i) { vertexInnerIds.push(input.readLong()); } diff --git a/analytical_engine/test/app_tests.sh b/analytical_engine/test/app_tests.sh index 724a680bcc99..b611c4908fd5 100755 --- a/analytical_engine/test/app_tests.sh +++ b/analytical_engine/test/app_tests.sh @@ -450,11 +450,16 @@ then # com.alibaba.graphscope.example.stringApp.StringApp # run_vy_2 ${np} ./run_java_app "${socket_file}" 1 "${test_dir}"/projected_property/twitter_property_e "${test_dir}"/projected_property/twitter_property_v 1 0 1 com.alibaba.graphscope.example.bfs.BFS + GLOG_v=10 mpirun -n 2 ./run_java_app "${socket_file}" \ 1 "../test/modern_graph/knows.csv#header_row=True#delimiter=|#src_label=v0&dst_label=v0&label=e" \ 1 "../test/modern_graph/person.csv#header_row=True#delimiter=|#label=v0" 1 0 1 \ com.alibaba.graphscope.example.circle.CirclePIE + # GLOG_v=10 mpirun -n 2 ./run_java_app "${socket_file}" \ + # 1 "/workspaces/GraphScope/single/e.csv#header_row=False#delimiter=,#src_label=v0&dst_label=v0&label=e" \ + # 1 "/workspaces/GraphScope/single/v2.csv#header_row=Frue#delimiter=,#label=v0" 1 0 1 \ + # com.alibaba.graphscope.example.circle.CirclePIE echo "Running girpah tests..." # GLOG_v=10 ./giraph_runner --vertex_input_format_class giraph:com.alibaba.graphscope.example.giraph.format.P2PVertexInputFormat \ diff --git a/analytical_engine/test/run_java_app.cc b/analytical_engine/test/run_java_app.cc index bf6a9493c7fa..0f18e12ec637 100644 --- a/analytical_engine/test/run_java_app.cc +++ b/analytical_engine/test/run_java_app.cc @@ -422,40 +422,43 @@ void QueryProjected(vineyard::Client& client, auto vd_ctx_wrapper = std::dynamic_pointer_cast(ctx_wrapper); /// 0. test ndarray - { - std::unique_ptr arc = std::move( - vd_ctx_wrapper->ToNdArray(comm_spec, selector, range).value()); - std::string java_out_prefix = - out_prefix + "/java_projected_assembled_ndarray.dat"; - output_nd_array(comm_spec, std::move(arc), java_out_prefix, - 8); // 4 for int64_t - } - VLOG(1) << "[0] java projected finish test ndarray"; - - // 1. Test data frame - { - // auto selectors = gs::gs::Selector::ParseSelectors(s_selectors).value(); - std::unique_ptr arc = std::move( - vd_ctx_wrapper->ToDataframe(comm_spec, selectors, range).value()); - std::string java_data_frame_out_prefix = out_prefix + "/java_projected"; - output_data_frame(comm_spec, std::move(arc), java_data_frame_out_prefix, - 8); - } - - VLOG(1) << "[1] java projected finish test dataframe"; - // 2. test vineyard tensor - { - auto tmp = - vd_ctx_wrapper->ToVineyardTensor(comm_spec, client, selector, range); - CHECK(tmp); - vineyard::ObjectID ndarray_object = tmp.value(); - std::string java_v6d_tensor_prefix = out_prefix + "/java_projected"; - // vineyard::AnyType expected_data_type = vineyard::AnyType::Int64; // 4 - vineyard::AnyType expected_data_type = vineyard::AnyType::String; - output_vineyard_tensor(client, ndarray_object, comm_spec, - java_v6d_tensor_prefix, - expected_data_type); - } + // { + // std::unique_ptr arc = std::move( + // vd_ctx_wrapper->ToNdArray(comm_spec, selector, range).value()); + // std::string java_out_prefix = + // out_prefix + "/java_projected_assembled_ndarray.dat"; + // output_nd_array(comm_spec, std::move(arc), java_out_prefix, + // 8); // 4 for int64_t + // } + // VLOG(1) << "[0] java projected finish test ndarray"; + + // // 1. Test data frame + // { + // // auto selectors = + // gs::gs::Selector::ParseSelectors(s_selectors).value(); + // std::unique_ptr arc = std::move( + // vd_ctx_wrapper->ToDataframe(comm_spec, selectors, range).value()); + // std::string java_data_frame_out_prefix = out_prefix + + // "/java_projected"; output_data_frame(comm_spec, std::move(arc), + // java_data_frame_out_prefix, + // 8); + // } + + // VLOG(1) << "[1] java projected finish test dataframe"; + // // 2. test vineyard tensor + // { + // auto tmp = + // vd_ctx_wrapper->ToVineyardTensor(comm_spec, client, selector, + // range); + // CHECK(tmp); + // vineyard::ObjectID ndarray_object = tmp.value(); + // std::string java_v6d_tensor_prefix = out_prefix + "/java_projected"; + // // vineyard::AnyType expected_data_type = vineyard::AnyType::Int64; // + // 4 vineyard::AnyType expected_data_type = vineyard::AnyType::String; + // output_vineyard_tensor(client, ndarray_object, comm_spec, + // java_v6d_tensor_prefix, + // expected_data_type); + // } VLOG(1) << "[2] java projected finish test vineyard tensor"; } else { From 5f36aa5a714d356ed1fc95304bade44f64fa398d Mon Sep 17 00:00:00 2001 From: "xiaolei.zl" Date: Tue, 23 Jul 2024 15:31:49 +0800 Subject: [PATCH 43/52] parallel app base Committed-by: xiaolei.zl from Dev container --- .../example/circle/CirclePIEParallel.java | 284 ++++++++++++++++++ .../circle/CirclePIEParallelContext.java | 131 ++++++++ analytical_engine/test/app_tests.sh | 2 +- analytical_engine/test/run_java_app.cc | 3 +- 4 files changed, 418 insertions(+), 2 deletions(-) create mode 100644 analytical_engine/java/grape-demo/src/main/java/com/alibaba/graphscope/example/circle/CirclePIEParallel.java create mode 100644 analytical_engine/java/grape-demo/src/main/java/com/alibaba/graphscope/example/circle/CirclePIEParallelContext.java diff --git a/analytical_engine/java/grape-demo/src/main/java/com/alibaba/graphscope/example/circle/CirclePIEParallel.java b/analytical_engine/java/grape-demo/src/main/java/com/alibaba/graphscope/example/circle/CirclePIEParallel.java new file mode 100644 index 000000000000..9023df941875 --- /dev/null +++ b/analytical_engine/java/grape-demo/src/main/java/com/alibaba/graphscope/example/circle/CirclePIEParallel.java @@ -0,0 +1,284 @@ +package com.alibaba.graphscope.example.circle; + +import com.alibaba.graphscope.app.ParallelAppBase; +import com.alibaba.graphscope.context.ParallelContextBase; +import com.alibaba.graphscope.ds.Vertex; +import com.alibaba.graphscope.ds.adaptor.AdjList; +import com.alibaba.graphscope.ds.adaptor.Nbr; +import com.alibaba.graphscope.fragment.IFragment; +import com.alibaba.graphscope.parallel.ParallelMessageManager; +import com.alibaba.graphscope.serialization.FFIByteVectorInputStream; +import com.alibaba.graphscope.serialization.FFIByteVectorOutputStream; +import com.alibaba.graphscope.stdcxx.FFIByteVector; +import com.alibaba.graphscope.stdcxx.FFIByteVectorFactory; +import com.alibaba.graphscope.utils.FFITypeFactoryhelper; +import com.alibaba.graphscope.parallel.MessageInBuffer; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.IOException; +import java.util.List; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.Executors; +import java.util.concurrent.CountDownLatch; +import java.util.concurrent.atomic.AtomicInteger; + +public class CirclePIEParallel implements ParallelAppBase< + Long, + Long, + Long, + Long, + CirclePIEParallelContext> { + private static final Logger logger = LoggerFactory.getLogger(CirclePIEParallel.class); + private static FFIByteVectorOutputStream msgVector; + + /** + * Partial Evaluation to implement. + * + * @param graph fragment. The graph fragment providing accesses to graph data. + * @param context context. User defined context which manages data during the whole + * computations. + * @param messageManager The message manger which manages messages between fragments. + * @see IFragment + * @see ParallelContextBase + * @see ParallelMessageManager + */ + @Override + public void PEval(IFragment graph, ParallelContextBase context, ParallelMessageManager messageManager) { + Vertex vertex = FFITypeFactoryhelper.newVertexLong(); + CirclePIEParallelContext ctx = (CirclePIEParallelContext) context; + msgVector = new FFIByteVectorOutputStream(); + for (long i = 0; i < graph.getInnerVerticesNum(); ++i) { + vertex.setValue(i); + Long globalId = graph.getInnerVertexGid(vertex); + Path path = new Path(globalId); + AdjList adjList = graph.getOutgoingAdjList(vertex); + for (Nbr nbr : adjList.iterable()) { + path.add(graph.vertex2Gid(nbr.neighbor())); + if (graph.isOuterVertex(nbr.neighbor())) { + // send path to outer vertex. + try { + sendMessageToOuterVertex(graph, messageManager, nbr.neighbor(), path, 0); + } catch (IOException e) { + e.printStackTrace(); + } + } else { + ctx.addToNextPath(nbr.neighbor(), path); + } + path.pop(); + } + } + //No need to check circels + ctx.swapPaths(); + logger.info("After PEval: cur_path: " + ctx.curPaths.size()); + logger.info("After PEval: next_path: " + ctx.nextPaths.size()); + messageManager.forceContinue(); + ctx.curStep += 1; + } + + /** + * Incremental Evaluation to implement. + * + * @param graph fragment. The graph fragment providing accesses to graph data. + * @param context context. User defined context which manages data during the whole + * computations. + * @param messageManager The message manger which manages messages between fragments. + * @see IFragment + * @see ParallelContextBase + * @see ParallelMessageManager + */ + @Override + public void IncEval(IFragment graph, ParallelContextBase context, ParallelMessageManager messageManager) { + CirclePIEParallelContext ctx = (CirclePIEParallelContext) context; + if (ctx.curStep >= ctx.maxStep){ + return ; + } + //Receive msg and merge + receiveMessage(graph, messageManager, ctx); + + logger.info("In super step {}, cur path {}", ctx.curStep, ctx.curPaths.size()); + logger.info("In super step {}, next path {}", ctx.curStep, ctx.nextPaths.size()); + // For received msg, check if it is already circle, if true, add to the final results. + ctx.persistCirclePathInCurrent(); + + // Implement vertex program + vprog(); + + if (ctx.curStep < ctx.maxStep - 1){ + // send msg + sendMessageThroughOE(graph, ctx, messageManager); + ctx.swapPaths(); + } + else if (ctx.curStep == ctx.maxStep - 1){ + // check whether received paths start with the nbr. + // No work + messageManager.forceContinue(); + } else { + // maybe receive message, but not sending message. + logger.info("Max step reached, " + ctx.curStep); + } + ctx.curStep += 1; + } + + void sendMessageThroughOEImpl(IFragment graph, long startVertex, long endVertex, CirclePIEParallelContext ctx, ParallelMessageManager messageManager, int thread_id){ + Vertex vertex = FFITypeFactoryhelper.newVertexLong(); + for (long i = startVertex; i < endVertex; ++i) { + vertex.setValue(i); + Long globalId = graph.getInnerVertexGid(vertex); + List paths = ctx.curPaths.get((int) i); + for (int j = 0; j < paths.size(); ++j) { + Path path = paths.get(j); + //Check whether the last node is exactly current vertex. + if (path.top() != globalId){ + logger.error("Invalid path, ending at {}, but collected by {}", path.top(), globalId); + } + AdjList adjList = graph.getOutgoingAdjList(vertex); + for (Nbr nbr : adjList.iterable()) { + path.add(graph.vertex2Gid(nbr.neighbor())); + if (graph.isOuterVertex(nbr.neighbor())) { + // send path to outer vertex. + try { + if (!path.isCircle()){ // If circle path already found, skip. + // logger.info("send msg to outer vertex: {} , path {}",nbr.neighbor(), path); + sendMessageToOuterVertex(graph, messageManager, nbr.neighbor(), path, thread_id); + } + } catch (IOException e) { + e.printStackTrace(); + } + } else { + // logger.info("send msg to inner vertex: {} , path {}", nbr.neighbor().getValue(), path); + ctx.addToNextPath(nbr.neighbor(), path); + } + path.pop(); + } + } + } + } + + void sendMessageThroughOE(IFragment graph, CirclePIEParallelContext ctx, ParallelMessageManager messageManager) { + logger.info("Send message through oe"); + CountDownLatch countDownLatch = new CountDownLatch(ctx.threadNum); + AtomicInteger atomicInteger = new AtomicInteger(0); + int chunkSize = 256; + + int originEnd = (int) graph.getInnerVerticesNum(); + for (int tid = 0; tid < ctx.threadNum; ++tid) { + final int finalTid = tid; + ctx.executor.execute( + new Runnable() { + @Override + public void run() { + while (true) { + int curBegin = + Math.min(atomicInteger.getAndAdd(chunkSize), originEnd); + int curEnd = Math.min(curBegin + chunkSize, originEnd); + if (curBegin >= originEnd) { + break; + } + for (int i = curBegin; i < curEnd; ++i) { + sendMessageThroughOEImpl(graph, curBegin, curEnd, ctx, messageManager, finalTid); + } + } + countDownLatch.countDown(); + } + }); + } + try { + countDownLatch.await(); + } catch (Exception e) { + e.printStackTrace(); + ctx.executor.shutdown(); + } + ctx.curStep += 1; + } + + /** + * Send a message to the outer vertex (to other fragment) + * @param neighbor the outer vertex vid + */ + void sendMessageToOuterVertex(IFragment graph, ParallelMessageManager mm, Vertex neighbor, Path path, int threadId) throws IOException { + // logger.info("Send path {} to vertex {}, dst frag {}", path, neighbor.getValue(), graph.fid()); + msgVector.reset(); + msgVector.writeLong(graph.getOuterVertexGid(neighbor)); + path.write(msgVector); + mm.sendToFragment(graph.getFragId(neighbor), msgVector.getVector(), threadId); + } + + void receiveMessageImpl(IFragment graph, ParallelMessageManager messageManager, CirclePIEParallelContext ctx, MessageInBuffer buffer)throws IOException { + FFIByteVector tmpVector = (FFIByteVector) FFIByteVectorFactory.INSTANCE.create(); + long bytesOfReceivedMsg = 0; + Vertex tmpVertex = FFITypeFactoryhelper.newVertexLong(); + while (buffer.getPureMessage(tmpVector)) { + // The retrieved tmp vector has been resized, so the cached objAddress is not available. + // trigger the refresh + tmpVector.touch(); + bytesOfReceivedMsg += tmpVector.size(); + // logger.info("Frag [{}] digest message of size {}", graph.fid(), tmpVector.size()); + Path path = new Path(); + FFIByteVectorInputStream inputStream = new FFIByteVectorInputStream(tmpVector); + long gid = inputStream.readLong(); + if (!graph.innerVertexGid2Vertex(gid, tmpVertex)){ + logger.error("Fail to get lid from gid {}", gid); + } + // logger.info("Got msg to lid {}", tmpVertex.getValue()); + path.read(inputStream); + // Add the tail node of new path here. +// path.add(gid); + //TODO: make this thread safe. + digestMessage(ctx, tmpVertex, path); + tmpVector.clear(); + } + logger.info("total message received by frag {} bytes {}", graph.fid(), bytesOfReceivedMsg); + tmpVector.delete(); + } + + void receiveMessage(IFragment graph, ParallelMessageManager messageManager, CirclePIEParallelContext ctx) { + CountDownLatch countDownLatch = new CountDownLatch(ctx.threadNum); + MessageInBuffer.Factory bufferFactory = FFITypeFactoryhelper.newMessageInBuffer(); + int chunkSize = 1024; + for (int tid = 0; tid < ctx.threadNum; ++tid) { + final int finalTid = tid; + ctx.executor.execute( + new Runnable() { + @Override + public void run() { + MessageInBuffer messageInBuffer = bufferFactory.create(); + FFIByteVector tmpVector = (FFIByteVector) FFIByteVectorFactory.INSTANCE.create(); + boolean result; + while (true) { + result = messageManager.getMessageInBuffer(messageInBuffer); + if (result) { + try { + receiveMessageImpl(graph, messageManager, ctx, messageInBuffer); + } + catch (Exception e) { + e.printStackTrace(); + logger.error( + "Error when receiving message in fragment {} thread {}", + graph.fid(), + finalTid); + } + } else { + break; + } + } + countDownLatch.countDown(); + } + }); + } + try { + countDownLatch.await(); + } catch (Exception e) { + e.printStackTrace(); + ctx.executor.shutdown(); + } + } + + void digestMessage(CirclePIEParallelContext ctx, Vertex vertex, Path path ) { + ctx.addToCurrentPath(vertex,path); + } + + void vprog() { + + } +} diff --git a/analytical_engine/java/grape-demo/src/main/java/com/alibaba/graphscope/example/circle/CirclePIEParallelContext.java b/analytical_engine/java/grape-demo/src/main/java/com/alibaba/graphscope/example/circle/CirclePIEParallelContext.java new file mode 100644 index 000000000000..627d8c42f44f --- /dev/null +++ b/analytical_engine/java/grape-demo/src/main/java/com/alibaba/graphscope/example/circle/CirclePIEParallelContext.java @@ -0,0 +1,131 @@ +package com.alibaba.graphscope.example.circle; + +import com.alibaba.fastffi.impl.CXXStdString; +import com.alibaba.fastjson.JSONObject; +import com.alibaba.graphscope.context.ParallelContextBase; +import com.alibaba.graphscope.context.VertexDataContext; +import com.alibaba.graphscope.ds.GSVertexArray; +import com.alibaba.graphscope.ds.Vertex; +import com.alibaba.graphscope.fragment.IFragment; +import com.alibaba.graphscope.parallel.ParallelMessageManager; +import com.alibaba.graphscope.stdcxx.StdString; +import com.alibaba.graphscope.ds.StringView; +import com.alibaba.graphscope.utils.FFITypeFactoryhelper; +import com.alibaba.graphscope.utils.LongIdParser; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.util.ArrayList; +import java.util.List; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.Executors; + +public class CirclePIEParallelContext extends VertexDataContext, StdString> + implements ParallelContextBase { + private static final Logger logger = LoggerFactory.getLogger(CirclePIEParallelContext.class); + + public int maxStep = 4; + public int curStep = 0; + public int threadNum; + public List> curPaths; // Paths ending at vertex. + public List> nextPaths; // New generated path end at vertex this round, + public List> results; // Paths that are in circle. + public ExecutorService executor; + public LongIdParser parser; + + /** + * Called by grape framework, before any PEval. You can initiating data structures need during + * super steps here. + * + * @param frag The graph fragment providing accesses to graph data. + * @param messageManager The message manger which manages messages between fragments. + * @param jsonObject String args from cmdline. + * @see IFragment + * @see ParallelMessageManager + * @see JSONObject + */ + @Override + public void Init(IFragment frag, ParallelMessageManager messageManager, JSONObject jsonObject) { + long innerVertexNum = frag.getInnerVerticesNum(); + createFFIContext(frag, StdString.class, false); + if (jsonObject.containsKey("maxStep")) { + maxStep = jsonObject.getInteger("maxStep"); + return; + } + if (!jsonObject.containsKey("threadNum")) { + logger.warn("No threadNum in params"); + threadNum = 2; + } else { + threadNum = jsonObject.getInteger("threadNum"); + } + curPaths = new ArrayList>((int) innerVertexNum); + nextPaths = new ArrayList>((int) innerVertexNum); + results = new ArrayList>((int) innerVertexNum); + for (int i = 0; i < innerVertexNum; ++i ){ + curPaths.add(new ArrayList()); + nextPaths.add(new ArrayList()); + results.add(new ArrayList()); + } + parser = new LongIdParser(frag.fnum(), 1); + executor = Executors.newFixedThreadPool(threadNum); + messageManager.initChannels(threadNum); + } + + public void addToCurrentPath(Vertex vertex, Path path) { + curPaths.get(vertex.getValue().intValue()).add(new Path(path)); + } + + public void addToNextPath(Vertex vertex, Path path) { + nextPaths.get(vertex.getValue().intValue()).add(new Path(path)); + } + + public void persistCirclePathInCurrent() { + for (int i = 0; i < nextPaths.size(); ++i) { + for (int j = 0; j < nextPaths.get(i).size(); ++j) { + Path path = nextPaths.get(i).get(j); + tryToFindCircle(path); + //Do we need to remove the path? + } + } + } + + public void swapPaths() { + List> tmp = curPaths; + curPaths = nextPaths; + nextPaths = tmp; + int size = nextPaths.size(); + nextPaths.clear(); + for (int i = 0; i < size; ++i) { + nextPaths.add(new ArrayList()); + } + } + + /** + * Output will be executed when the computations finalizes. Data maintained in this context + * shall be outputted here. + * + * @param frag The graph fragment contains the graph info. + * @see IFragment + */ + @Override + public void Output(IFragment frag) { + // logger.info("finally cur path {}", curPaths); + // logger.info("finally next path {}", nextPaths); + + GSVertexArray vertexArray = data(); + Vertex cur = FFITypeFactoryhelper.newVertexLong(); + for (long vid = 0; vid < frag.getInnerVerticesNum(); ++vid) { + cur.setValue(vid); + StdString value = (StdString) vertexArray.get(cur); + value.fromJavaString(results.get((int) vid).toString()); + } + } + + public void tryToFindCircle(Path path) { + if (path.isCircle()){ + // logger.info("path is circle {}", path); + long lid = parser.getOffset(path.top()); + this.results.get((int)lid).add(path); + } + } +} diff --git a/analytical_engine/test/app_tests.sh b/analytical_engine/test/app_tests.sh index b611c4908fd5..bed1e536a432 100755 --- a/analytical_engine/test/app_tests.sh +++ b/analytical_engine/test/app_tests.sh @@ -454,7 +454,7 @@ then GLOG_v=10 mpirun -n 2 ./run_java_app "${socket_file}" \ 1 "../test/modern_graph/knows.csv#header_row=True#delimiter=|#src_label=v0&dst_label=v0&label=e" \ 1 "../test/modern_graph/person.csv#header_row=True#delimiter=|#label=v0" 1 0 1 \ - com.alibaba.graphscope.example.circle.CirclePIE + com.alibaba.graphscope.example.circle.CirclePIEParallel # GLOG_v=10 mpirun -n 2 ./run_java_app "${socket_file}" \ # 1 "/workspaces/GraphScope/single/e.csv#header_row=False#delimiter=,#src_label=v0&dst_label=v0&label=e" \ diff --git a/analytical_engine/test/run_java_app.cc b/analytical_engine/test/run_java_app.cc index 0f18e12ec637..f82312c69577 100644 --- a/analytical_engine/test/run_java_app.cc +++ b/analytical_engine/test/run_java_app.cc @@ -346,7 +346,8 @@ void QueryProjected(vineyard::Client& client, const std::string& selector_string, const std::string& selectors_string) { // using AppType = gs::JavaPIEProjectedParallelAppOE; - using AppType = gs::JavaPIEProjectedDefaultApp; + // using AppType = gs::JavaPIEProjectedDefaultApp; + using AppType = gs::JavaPIEProjectedParallelApp; auto app = std::make_shared(); auto worker = AppType::CreateWorker(app, fragment); auto spec = grape::DefaultParallelEngineSpec(); From dcae89297fbaa9ecd7d623c15dff856927954fe4 Mon Sep 17 00:00:00 2001 From: "xiaolei.zl" Date: Thu, 1 Aug 2024 16:21:30 +0800 Subject: [PATCH 44/52] fix compilation Committed-by: xiaolei.zl from Dev container --- analytical_engine/CMakeLists.txt | 16 ++++++++-------- .../example/giraph/circle/CircleInputFormat.java | 2 +- .../example/giraph/myCircle/Circle.java | 2 +- 3 files changed, 10 insertions(+), 10 deletions(-) diff --git a/analytical_engine/CMakeLists.txt b/analytical_engine/CMakeLists.txt index f428c8d59bd8..f4a5b2c0964e 100644 --- a/analytical_engine/CMakeLists.txt +++ b/analytical_engine/CMakeLists.txt @@ -509,14 +509,14 @@ if(ENABLE_JAVA_SDK) set(GAE_JAVA_JNI_LIB "${GAE_JAVA_DIR}/grape-runtime/target/native/libgrape-jni.so") endif() - # add_custom_command( - # OUTPUT "${GAE_JAVA_RUNTIME_JAR}" - # COMMAND mvn -Drevision=${GRAPHSCOPE_ANALYTICAL_JAR_VERSION} install -DskipTests --quiet - # DEPENDS gs_proto - # WORKING_DIRECTORY ${GAE_JAVA_DIR} - # COMMENT "Building GAE-java..." - # VERBATIM - # ) + add_custom_command( + OUTPUT "${GAE_JAVA_RUNTIME_JAR}" + COMMAND mvn -Drevision=${GRAPHSCOPE_ANALYTICAL_JAR_VERSION} clean install -DskipTests --quiet + DEPENDS gs_proto + WORKING_DIRECTORY ${GAE_JAVA_DIR} + COMMENT "Building GAE-java..." + VERBATIM + ) add_custom_target(grape_jni ALL DEPENDS "${GAE_JAVA_RUNTIME_JAR}" ) diff --git a/analytical_engine/java/grape-demo/src/main/java/com/alibaba/graphscope/example/giraph/circle/CircleInputFormat.java b/analytical_engine/java/grape-demo/src/main/java/com/alibaba/graphscope/example/giraph/circle/CircleInputFormat.java index 2f3ebebf3df1..37d616d4c7c1 100644 --- a/analytical_engine/java/grape-demo/src/main/java/com/alibaba/graphscope/example/giraph/circle/CircleInputFormat.java +++ b/analytical_engine/java/grape-demo/src/main/java/com/alibaba/graphscope/example/giraph/circle/CircleInputFormat.java @@ -5,7 +5,7 @@ package com.alibaba.graphscope.example.giraph.circle; -import com.alibaba.graphscope.example.giraph.format.VertexAttrWritable; +import com.alibaba.graphscope.example.giraph.circle.VertexAttrWritable; import com.google.common.collect.Lists; import java.io.IOException; import java.util.List; diff --git a/analytical_engine/java/grape-demo/src/main/java/com/alibaba/graphscope/example/giraph/myCircle/Circle.java b/analytical_engine/java/grape-demo/src/main/java/com/alibaba/graphscope/example/giraph/myCircle/Circle.java index 5d5a26bccb4e..f111c7f85779 100644 --- a/analytical_engine/java/grape-demo/src/main/java/com/alibaba/graphscope/example/giraph/myCircle/Circle.java +++ b/analytical_engine/java/grape-demo/src/main/java/com/alibaba/graphscope/example/giraph/myCircle/Circle.java @@ -1,6 +1,6 @@ package com.alibaba.graphscope.example.giraph.myCircle; -import com.alibaba.graphscope.example.giraph.format.VertexAttrWritable; +import com.alibaba.graphscope.example.giraph.circle.VertexAttrWritable; import org.apache.giraph.graph.BasicComputation; import org.apache.giraph.graph.Vertex; import org.apache.hadoop.io.LongWritable; From 5a7cbe418373ca9cb585f8994b6468e5dada499d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E9=9C=84=E7=A3=8A?= Date: Mon, 26 Aug 2024 10:57:37 +0800 Subject: [PATCH 45/52] add method assign --- .../graphscope/utils/ThreadSafeBitSet.java | 20 ++++++++++++++++--- 1 file changed, 17 insertions(+), 3 deletions(-) diff --git a/analytical_engine/java/grape-graphx/src/main/java/com/alibaba/graphscope/utils/ThreadSafeBitSet.java b/analytical_engine/java/grape-graphx/src/main/java/com/alibaba/graphscope/utils/ThreadSafeBitSet.java index 37d56fbe6b09..f6fe02705c5e 100644 --- a/analytical_engine/java/grape-graphx/src/main/java/com/alibaba/graphscope/utils/ThreadSafeBitSet.java +++ b/analytical_engine/java/grape-graphx/src/main/java/com/alibaba/graphscope/utils/ThreadSafeBitSet.java @@ -40,9 +40,9 @@ public class ThreadSafeBitSet { public static final int DEFAULT_LOG2_SEGMENT_SIZE_IN_BITS = 14; - private final int numLongsPerSegment; - private final int log2SegmentSize; - private final int segmentMask; + private int numLongsPerSegment; + private int log2SegmentSize; + private int segmentMask; private final AtomicReference segments; public ThreadSafeBitSet() { @@ -99,6 +99,16 @@ public void set(int position) { } } + public void assign(ThreadSafeBitSet other) { + //copy the other bitset to this bitset + ThreadSafeBitSetSegments otherSegments = other.segments.get(); + ThreadSafeBitSetSegments newSegments = new ThreadSafeBitSetSegments(otherSegments, otherSegments.numSegments(), otherSegments.segmentLength()); + segments.set(newSegments); + this.log2SegmentSize = other.log2SegmentSize; + this.numLongsPerSegment = other.numLongsPerSegment; + this.segmentMask = other.segmentMask; + } + public void setUntil(int position) { // int segmentPosition = position >>> log2SegmentSize; /// which segment -- div by // num bits per segment @@ -433,6 +443,10 @@ public int numSegments() { return segments.length; } + public int segmentLength() { + return segments[0].length(); + } + public AtomicLongArray getSegment(int index) { return segments[index]; } From 64a32e4d7fe9db8acd471e492c9163c7f8d47f44 Mon Sep 17 00:00:00 2001 From: "xiaolei.zl" Date: Mon, 26 Aug 2024 17:51:01 +0800 Subject: [PATCH 46/52] fix memory leak Committed-by: xiaolei.zl from Dev container --- .../core/context/java_context_base.h | 23 +- .../graphscope/parallel/MessageInBuffer.java | 4 +- .../graphscope/utils/ThreadSafeBitSet.java | 531 ++++++++++++++++++ 3 files changed, 554 insertions(+), 4 deletions(-) create mode 100644 analytical_engine/java/grape-jdk/src/main/java/com/alibaba/graphscope/utils/ThreadSafeBitSet.java diff --git a/analytical_engine/core/context/java_context_base.h b/analytical_engine/core/context/java_context_base.h index cec36bc86619..11f985876919 100644 --- a/analytical_engine/core/context/java_context_base.h +++ b/analytical_engine/core/context/java_context_base.h @@ -86,8 +86,27 @@ class JavaContextBase : public grape::ContextBase { } JNIEnvMark m; if (m.env()) { - m.env()->DeleteGlobalRef(url_class_loader_object_); - VLOG(1) << "Delete URL class loader"; + // Delete the java objects + if (app_object_) { + m.env()->DeleteGlobalRef(app_object_); + VLOG(1) << "Delete app object"; + } + if (context_object_) { + m.env()->DeleteGlobalRef(context_object_); + VLOG(1) << "Delete context object"; + } + if (fragment_object_) { + m.env()->DeleteGlobalRef(fragment_object_); + VLOG(1) << "Delete fragment object"; + } + if (mm_object_) { + m.env()->DeleteGlobalRef(mm_object_); + VLOG(1) << "Delete message manager object"; + } + if (url_class_loader_object_) { + m.env()->DeleteGlobalRef(url_class_loader_object_); + VLOG(1) << "Delete url class loader object"; + } } else { LOG(ERROR) << "JNI env not available."; } diff --git a/analytical_engine/java/grape-jdk/src/main/java/com/alibaba/graphscope/parallel/MessageInBuffer.java b/analytical_engine/java/grape-jdk/src/main/java/com/alibaba/graphscope/parallel/MessageInBuffer.java index 59ca7443d884..ce6f9f0192fc 100644 --- a/analytical_engine/java/grape-jdk/src/main/java/com/alibaba/graphscope/parallel/MessageInBuffer.java +++ b/analytical_engine/java/grape-jdk/src/main/java/com/alibaba/graphscope/parallel/MessageInBuffer.java @@ -25,11 +25,11 @@ import static com.alibaba.graphscope.utils.CppHeaderName.GRAPE_PARALLEL_MESSAGE_IN_BUFFER_H; import com.alibaba.fastffi.CXXHead; +import com.alibaba.fastffi.CXXPointer; import com.alibaba.fastffi.CXXReference; import com.alibaba.fastffi.FFIFactory; import com.alibaba.fastffi.FFIGen; import com.alibaba.fastffi.FFINameAlias; -import com.alibaba.fastffi.FFIPointer; import com.alibaba.fastffi.FFISkip; import com.alibaba.fastffi.FFITypeAlias; import com.alibaba.graphscope.ds.Vertex; @@ -50,7 +50,7 @@ CORE_JAVA_TYPE_ALIAS_H, CORE_JAVA_JAVA_MESSAGES_H }) -public interface MessageInBuffer extends FFIPointer { +public interface MessageInBuffer extends CXXPointer { default boolean getMessage( @CXXReference IFragment frag, @CXXReference Vertex vertex, diff --git a/analytical_engine/java/grape-jdk/src/main/java/com/alibaba/graphscope/utils/ThreadSafeBitSet.java b/analytical_engine/java/grape-jdk/src/main/java/com/alibaba/graphscope/utils/ThreadSafeBitSet.java new file mode 100644 index 000000000000..f6fe02705c5e --- /dev/null +++ b/analytical_engine/java/grape-jdk/src/main/java/com/alibaba/graphscope/utils/ThreadSafeBitSet.java @@ -0,0 +1,531 @@ +/* + * Copyright 2016-2019 Netflix, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ +package com.alibaba.graphscope.utils; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.DataOutputStream; +import java.io.IOException; +import java.util.Arrays; +import java.util.BitSet; +import java.util.concurrent.atomic.AtomicLongArray; +import java.util.concurrent.atomic.AtomicReference; + +/** + * This is a lock-free, thread-safe version of a {@link java.util.BitSet}.

+ * + * Instead of a long array to hold the bits, this implementation uses an AtomicLongArray, then + * does the appropriate compare-and-swap operations when setting the bits. + * + * @author dkoszewnik + * + */ +public class ThreadSafeBitSet { + private static Logger logger = LoggerFactory.getLogger(ThreadSafeBitSet.class.getName()); + + public static final int DEFAULT_LOG2_SEGMENT_SIZE_IN_BITS = 14; + + private int numLongsPerSegment; + private int log2SegmentSize; + private int segmentMask; + private final AtomicReference segments; + + public ThreadSafeBitSet() { + this(DEFAULT_LOG2_SEGMENT_SIZE_IN_BITS); // / 16384 bits, 2048 bytes, 256 longs per segment + } + + public ThreadSafeBitSet(int log2SegmentSizeInBits) { + this(log2SegmentSizeInBits, 0); + } + + public ThreadSafeBitSet(int log2SegmentSizeInBits, int numBitsToPreallocate) { + if (log2SegmentSizeInBits < 6) + throw new IllegalArgumentException( + "Cannot specify fewer than 64 bits in each segment!"); + + this.log2SegmentSize = log2SegmentSizeInBits; + this.numLongsPerSegment = (1 << (log2SegmentSizeInBits - 6)); + this.segmentMask = numLongsPerSegment - 1; + + long numBitsPerSegment = numLongsPerSegment * 64; + int numSegmentsToPreallocate = + numBitsToPreallocate == 0 + ? 1 + : (int) (((numBitsToPreallocate - 1) / numBitsPerSegment) + 1); + + segments = new AtomicReference(); + segments.set(new ThreadSafeBitSetSegments(numSegmentsToPreallocate, numLongsPerSegment)); + } + + public void set(int position) { + int segmentPosition = + position >>> log2SegmentSize; // / which segment -- div by num bits per segment + int longPosition = + (position >>> 6) + & segmentMask; /// which long in the segment -- remainder of div by num bits + // per segment + int bitPosition = + position & 0x3F; // / which bit in the long -- remainder of div by num bits in long + // (64) + + AtomicLongArray segment = getSegment(segmentPosition); + + long mask = 1L << bitPosition; + + // Thread safety: we need to loop until we win the race to set the long value. + while (true) { + // determine what the new long value will be after we set the appropriate bit. + long currentLongValue = segment.get(longPosition); + long newLongValue = currentLongValue | mask; + + // if no other thread has modified the value since we read it, we won the race and we + // are done. + if (segment.compareAndSet(longPosition, currentLongValue, newLongValue)) break; + } + } + + public void assign(ThreadSafeBitSet other) { + //copy the other bitset to this bitset + ThreadSafeBitSetSegments otherSegments = other.segments.get(); + ThreadSafeBitSetSegments newSegments = new ThreadSafeBitSetSegments(otherSegments, otherSegments.numSegments(), otherSegments.segmentLength()); + segments.set(newSegments); + this.log2SegmentSize = other.log2SegmentSize; + this.numLongsPerSegment = other.numLongsPerSegment; + this.segmentMask = other.segmentMask; + } + + public void setUntil(int position) { + // int segmentPosition = position >>> log2SegmentSize; /// which segment -- div by + // num bits per segment + // for (int i = 0; i < segmentPosition; ++i){ + // AtomicLongArray segment = getSegment(segmentPosition); + // for (int j = 0; j < segment.length(); ++j){ + // segment.set(j, -1); + // } + // } + // int start = (segmentPosition << log2SegmentSize); + // for (int i = start; i < position; ++i){ + // set(i); + // } + long time0 = System.nanoTime(); + for (int i = 0; i < position; ++i) { + set(i); + } + long time1 = System.nanoTime(); + logger.info("Set until {} cost {} ms", position, (time1 - time0) / 1000000); + } + + public void clear(int position) { + int segmentPosition = + position >>> log2SegmentSize; // / which segment -- div by num bits per segment + int longPosition = + (position >>> 6) + & segmentMask; /// which long in the segment -- remainder of div by num bits + // per segment + int bitPosition = + position & 0x3F; // / which bit in the long -- remainder of div by num bits in long + // (64) + + AtomicLongArray segment = getSegment(segmentPosition); + + long mask = ~(1L << bitPosition); + + // Thread safety: we need to loop until we win the race to set the long value. + while (true) { + // determine what the new long value will be after we set the appropriate bit. + long currentLongValue = segment.get(longPosition); + long newLongValue = currentLongValue & mask; + + // if no other thread has modified the value since we read it, we won the race and we + // are done. + if (segment.compareAndSet(longPosition, currentLongValue, newLongValue)) break; + } + } + + public boolean get(int position) { + int segmentPosition = + position >>> log2SegmentSize; // / which segment -- div by num bits per segment + int longPosition = + (position >>> 6) + & segmentMask; /// which long in the segment -- remainder of div by num bits + // per segment + int bitPosition = + position & 0x3F; // / which bit in the long -- remainder of div by num bits in long + // (64) + + AtomicLongArray segment = getSegment(segmentPosition); + + long mask = 1L << bitPosition; + + return ((segment.get(longPosition) & mask) != 0); + } + + public long maxSetBit() { + ThreadSafeBitSetSegments segments = this.segments.get(); + + int segmentIdx = segments.numSegments() - 1; + + for (; segmentIdx >= 0; segmentIdx--) { + AtomicLongArray segment = segments.getSegment(segmentIdx); + for (int longIdx = segment.length() - 1; longIdx >= 0; longIdx--) { + long l = segment.get(longIdx); + if (l != 0) + return (segmentIdx << log2SegmentSize) + + (longIdx * 64) + + (63 - Long.numberOfLeadingZeros(l)); + } + } + + return -1; + } + + public int nextSetBit(int fromIndex) { + if (fromIndex < 0) throw new IndexOutOfBoundsException("fromIndex < 0: " + fromIndex); + + int segmentPosition = fromIndex >>> log2SegmentSize; + + ThreadSafeBitSetSegments segments = this.segments.get(); + + if (segmentPosition >= segments.numSegments()) return -1; + + int longPosition = (fromIndex >>> 6) & segmentMask; + int bitPosition = fromIndex & 0x3F; + AtomicLongArray segment = segments.getSegment(segmentPosition); + + long word = segment.get(longPosition) & (0xffffffffffffffffL << bitPosition); + + while (true) { + if (word != 0) + return (segmentPosition << (log2SegmentSize)) + + (longPosition << 6) + + Long.numberOfTrailingZeros(word); + if (++longPosition > segmentMask) { + segmentPosition++; + if (segmentPosition >= segments.numSegments()) return -1; + segment = segments.getSegment(segmentPosition); + longPosition = 0; + } + + word = segment.get(longPosition); + } + } + + /** + * @return the number of bits which are set in this bit set. + */ + public int cardinality() { + ThreadSafeBitSetSegments segments = this.segments.get(); + + int numSetBits = 0; + + for (int i = 0; i < segments.numSegments(); i++) { + AtomicLongArray segment = segments.getSegment(i); + for (int j = 0; j < segment.length(); j++) { + numSetBits += Long.bitCount(segment.get(j)); + } + } + + return numSetBits; + } + + public long[] getWords() { + int len = 0; + ThreadSafeBitSetSegments segments = this.segments.get(); + for (int i = 0; i < segments.numSegments(); ++i) { + AtomicLongArray segment = segments.getSegment(i); + len += segment.length(); + } + long[] res = new long[len]; + int ind = 0; + for (int i = 0; i < segments.numSegments(); ++i) { + AtomicLongArray segment = segments.getSegment(i); + for (int j = 0; j < segment.length(); ++j) { + res[ind] = segment.get(j); + ind += 1; + } + } + return res; + } + + /** + * @return the number of bits which are current specified by this bit set. This is the maximum value + * to which you might need to iterate, if you were to iterate over all bits in this set. + */ + public int currentCapacity() { + return segments.get().numSegments() * (1 << log2SegmentSize); + } + + /** + * Clear all bits to 0. + */ + public void clearAll() { + ThreadSafeBitSetSegments segments = this.segments.get(); + + for (int i = 0; i < segments.numSegments(); i++) { + AtomicLongArray segment = segments.getSegment(i); + + for (int j = 0; j < segment.length(); j++) { + segment.set(j, 0L); + } + } + } + + /** + * Return a new bit set which contains all bits which are contained in this bit set, and which are NOT contained in the other bit set.

+ * + * In other words, return a new bit set, which is a bitwise and with the bitwise not of the other bit set. + * + * @param other the other bit set + * @return the resulting bit set + */ + public ThreadSafeBitSet andNot(ThreadSafeBitSet other) { + if (other.log2SegmentSize != log2SegmentSize) + throw new IllegalArgumentException("Segment sizes must be the same"); + + ThreadSafeBitSetSegments thisSegments = this.segments.get(); + ThreadSafeBitSetSegments otherSegments = other.segments.get(); + ThreadSafeBitSetSegments newSegments = + new ThreadSafeBitSetSegments(thisSegments.numSegments(), numLongsPerSegment); + + for (int i = 0; i < thisSegments.numSegments(); i++) { + AtomicLongArray thisArray = thisSegments.getSegment(i); + AtomicLongArray otherArray = + (i < otherSegments.numSegments()) ? otherSegments.getSegment(i) : null; + AtomicLongArray newArray = newSegments.getSegment(i); + + for (int j = 0; j < thisArray.length(); j++) { + long thisLong = thisArray.get(j); + long otherLong = (otherArray == null) ? 0 : otherArray.get(j); + + newArray.set(j, thisLong & ~otherLong); + } + } + + ThreadSafeBitSet andNot = new ThreadSafeBitSet(log2SegmentSize); + andNot.segments.set(newSegments); + return andNot; + } + + /** + * Return a new bit set which contains all bits which are contained in *any* of the specified bit sets. + * + * @param bitSets the other bit sets + * @return the resulting bit set + */ + public static ThreadSafeBitSet orAll(ThreadSafeBitSet... bitSets) { + if (bitSets.length == 0) return new ThreadSafeBitSet(); + + int log2SegmentSize = bitSets[0].log2SegmentSize; + int numLongsPerSegment = bitSets[0].numLongsPerSegment; + + ThreadSafeBitSetSegments segments[] = new ThreadSafeBitSetSegments[bitSets.length]; + int maxNumSegments = 0; + + for (int i = 0; i < bitSets.length; i++) { + if (bitSets[i].log2SegmentSize != log2SegmentSize) + throw new IllegalArgumentException("Segment sizes must be the same"); + + segments[i] = bitSets[i].segments.get(); + if (segments[i].numSegments() > maxNumSegments) + maxNumSegments = segments[i].numSegments(); + } + + ThreadSafeBitSetSegments newSegments = + new ThreadSafeBitSetSegments(maxNumSegments, numLongsPerSegment); + + AtomicLongArray segment[] = new AtomicLongArray[segments.length]; + + for (int i = 0; i < maxNumSegments; i++) { + for (int j = 0; j < segments.length; j++) { + segment[j] = i < segments[j].numSegments() ? segments[j].getSegment(i) : null; + } + + AtomicLongArray newSegment = newSegments.getSegment(i); + + for (int j = 0; j < numLongsPerSegment; j++) { + long value = 0; + for (int k = 0; k < segments.length; k++) { + if (segment[k] != null) value |= segment[k].get(j); + } + newSegment.set(j, value); + } + } + + ThreadSafeBitSet or = new ThreadSafeBitSet(log2SegmentSize); + or.segments.set(newSegments); + return or; + } + + /** + * Get the segment at segmentIndex. If this segment does not yet exist, create it. + * + * @param segmentIndex the segment index + * @return the segment + */ + private AtomicLongArray getSegment(int segmentIndex) { + ThreadSafeBitSetSegments visibleSegments = segments.get(); + + while (visibleSegments.numSegments() <= segmentIndex) { + /// Thread safety: newVisibleSegments contains all of the segments from the currently + // visible segments, plus extra. + /// all of the segments in the currently visible segments are canonical and will not + // change. + ThreadSafeBitSetSegments newVisibleSegments = + new ThreadSafeBitSetSegments( + visibleSegments, segmentIndex + 1, numLongsPerSegment); + + /// because we are using a compareAndSet, if this thread "wins the race" and + // successfully sets this variable, then the segments + /// which are newly defined in newVisibleSegments become canonical. + if (segments.compareAndSet(visibleSegments, newVisibleSegments)) { + visibleSegments = newVisibleSegments; + } else { + /// If we "lose the race" and are growing the ThreadSafeBitSet segments larger, + /// then we will gather the new canonical sets from the update which we missed on + // the next iteration of this loop. + /// Newly defined segments in newVisibleSegments will be discarded, they do not get + // to become canonical. + visibleSegments = segments.get(); + } + } + + return visibleSegments.getSegment(segmentIndex); + } + + private static class ThreadSafeBitSetSegments { + + private final AtomicLongArray segments[]; + + private ThreadSafeBitSetSegments(int numSegments, int segmentLength) { + AtomicLongArray segments[] = new AtomicLongArray[numSegments]; + + for (int i = 0; i < numSegments; i++) { + segments[i] = new AtomicLongArray(segmentLength); + } + + /// Thread safety: Because this.segments is final, the preceding operations in this + // constructor are guaranteed to be visible to any + /// other thread which accesses this.segments. + this.segments = segments; + } + + private ThreadSafeBitSetSegments( + ThreadSafeBitSetSegments copyFrom, int numSegments, int segmentLength) { + AtomicLongArray segments[] = new AtomicLongArray[numSegments]; + + for (int i = 0; i < numSegments; i++) { + segments[i] = + i < copyFrom.numSegments() + ? copyFrom.getSegment(i) + : new AtomicLongArray(segmentLength); + } + + /// see above re: thread-safety of this assignment + this.segments = segments; + } + + public int numSegments() { + return segments.length; + } + + public int segmentLength() { + return segments[0].length(); + } + + public AtomicLongArray getSegment(int index) { + return segments[index]; + } + } + + public void serializeBitsTo(DataOutputStream os) throws IOException { + ThreadSafeBitSetSegments segments = this.segments.get(); + + os.writeInt(segments.numSegments() * numLongsPerSegment); + + for (int i = 0; i < segments.numSegments(); i++) { + AtomicLongArray arr = segments.getSegment(i); + + for (int j = 0; j < arr.length(); j++) { + os.writeLong(arr.get(j)); + } + } + } + + @Override + public boolean equals(Object obj) { + if (!(obj instanceof ThreadSafeBitSet)) return false; + + ThreadSafeBitSet other = (ThreadSafeBitSet) obj; + + if (other.log2SegmentSize != log2SegmentSize) + throw new IllegalArgumentException("Segment sizes must be the same"); + + ThreadSafeBitSetSegments thisSegments = this.segments.get(); + ThreadSafeBitSetSegments otherSegments = other.segments.get(); + + for (int i = 0; i < thisSegments.numSegments(); i++) { + AtomicLongArray thisArray = thisSegments.getSegment(i); + AtomicLongArray otherArray = + (i < otherSegments.numSegments()) ? otherSegments.getSegment(i) : null; + + for (int j = 0; j < thisArray.length(); j++) { + long thisLong = thisArray.get(j); + long otherLong = (otherArray == null) ? 0 : otherArray.get(j); + + if (thisLong != otherLong) return false; + } + } + + for (int i = thisSegments.numSegments(); i < otherSegments.numSegments(); i++) { + AtomicLongArray otherArray = otherSegments.getSegment(i); + + for (int j = 0; j < otherArray.length(); j++) { + long l = otherArray.get(j); + + if (l != 0) return false; + } + } + + return true; + } + + @Override + public int hashCode() { + int result = log2SegmentSize; + result = 31 * result + Arrays.hashCode(segments.get().segments); + return result; + } + + /** + * @return a new BitSet with same bits set + */ + public BitSet toBitSet() { + BitSet resultSet = new BitSet(); + int ordinal = this.nextSetBit(0); + while (ordinal != -1) { + resultSet.set(ordinal); + ordinal = this.nextSetBit(ordinal + 1); + } + return resultSet; + } + + @Override + public String toString() { + return toBitSet().toString(); + } +} From 70d37b58074746439f500489d524e0180fef93ac Mon Sep 17 00:00:00 2001 From: "xiaolei.zl" Date: Thu, 29 Aug 2024 17:01:34 +0800 Subject: [PATCH 47/52] add circle parallel and try to debug memory increasing Committed-by: xiaolei.zl from Dev container --- analytical_engine/CMakeLists.txt | 24 +- .../core/context/java_context_base.h | 2 + analytical_engine/core/java/javasdk.cc | 20 +- analytical_engine/core/java/javasdk.h | 5 +- analytical_engine/java/grape-demo/pom.xml | 6 + .../parallel/formal/CircleAppParallel.java | 98 ++++ .../formal/CircleAppParallelContext.java | 439 ++++++++++++++++++ .../circle/parallel/formal/CircleUtil.java | 150 ++++++ .../parallel/formal/PathSerAndDeser.java | 38 ++ .../parallel/formal/store/ComputeStep.java | 406 ++++++++++++++++ .../store/disk/AppendObjectOutputStream.java | 36 ++ .../store/disk/DiskComputeStepImpl.java | 159 +++++++ .../formal/store/disk/DiskStoreContext.java | 160 +++++++ .../formal/store/disk/FileBatchProcess.java | 98 ++++ .../formal/store/disk/FileObjectStorage.java | 162 +++++++ .../formal/store/disk/IBatchProcess.java | 36 ++ .../formal/store/disk/IObjectStorage.java | 25 + .../store/disk/LocalThreadMessageStorage.java | 119 +++++ .../LocalThreadMessageStorageFactory.java | 59 +++ .../formal/store/disk/MessageStorage.java | 184 ++++++++ .../formal/store/disk/PathStorage.java | 184 ++++++++ .../store/memory/MemoryComputeStepImpl.java | 128 +++++ analytical_engine/test/app_tests.sh | 71 +-- analytical_engine/test/run_java_app.cc | 269 ++++++----- 24 files changed, 2705 insertions(+), 173 deletions(-) create mode 100644 analytical_engine/java/grape-demo/src/main/java/com/alibaba/graphscope/example/circle/parallel/formal/CircleAppParallel.java create mode 100644 analytical_engine/java/grape-demo/src/main/java/com/alibaba/graphscope/example/circle/parallel/formal/CircleAppParallelContext.java create mode 100644 analytical_engine/java/grape-demo/src/main/java/com/alibaba/graphscope/example/circle/parallel/formal/CircleUtil.java create mode 100644 analytical_engine/java/grape-demo/src/main/java/com/alibaba/graphscope/example/circle/parallel/formal/PathSerAndDeser.java create mode 100644 analytical_engine/java/grape-demo/src/main/java/com/alibaba/graphscope/example/circle/parallel/formal/store/ComputeStep.java create mode 100644 analytical_engine/java/grape-demo/src/main/java/com/alibaba/graphscope/example/circle/parallel/formal/store/disk/AppendObjectOutputStream.java create mode 100644 analytical_engine/java/grape-demo/src/main/java/com/alibaba/graphscope/example/circle/parallel/formal/store/disk/DiskComputeStepImpl.java create mode 100644 analytical_engine/java/grape-demo/src/main/java/com/alibaba/graphscope/example/circle/parallel/formal/store/disk/DiskStoreContext.java create mode 100644 analytical_engine/java/grape-demo/src/main/java/com/alibaba/graphscope/example/circle/parallel/formal/store/disk/FileBatchProcess.java create mode 100644 analytical_engine/java/grape-demo/src/main/java/com/alibaba/graphscope/example/circle/parallel/formal/store/disk/FileObjectStorage.java create mode 100644 analytical_engine/java/grape-demo/src/main/java/com/alibaba/graphscope/example/circle/parallel/formal/store/disk/IBatchProcess.java create mode 100644 analytical_engine/java/grape-demo/src/main/java/com/alibaba/graphscope/example/circle/parallel/formal/store/disk/IObjectStorage.java create mode 100644 analytical_engine/java/grape-demo/src/main/java/com/alibaba/graphscope/example/circle/parallel/formal/store/disk/LocalThreadMessageStorage.java create mode 100644 analytical_engine/java/grape-demo/src/main/java/com/alibaba/graphscope/example/circle/parallel/formal/store/disk/LocalThreadMessageStorageFactory.java create mode 100644 analytical_engine/java/grape-demo/src/main/java/com/alibaba/graphscope/example/circle/parallel/formal/store/disk/MessageStorage.java create mode 100644 analytical_engine/java/grape-demo/src/main/java/com/alibaba/graphscope/example/circle/parallel/formal/store/disk/PathStorage.java create mode 100644 analytical_engine/java/grape-demo/src/main/java/com/alibaba/graphscope/example/circle/parallel/formal/store/memory/MemoryComputeStepImpl.java diff --git a/analytical_engine/CMakeLists.txt b/analytical_engine/CMakeLists.txt index f4a5b2c0964e..1c03f883abb9 100644 --- a/analytical_engine/CMakeLists.txt +++ b/analytical_engine/CMakeLists.txt @@ -377,7 +377,7 @@ if (BUILD_TESTS) target_include_directories(run_java_app PRIVATE core utils apps) target_compile_definitions(run_java_app PUBLIC ENABLE_JAVA_SDK) target_link_libraries(run_java_app gs_proto ${GFLAGS_LIBRARIES} ${MPI_CXX_LIBRARIES} ${JNI_LIBRARIES} ${Boost_LIBRARIES} - ${CMAKE_DL_LIBS} ${GLOG_LIBRARIES} ${VINEYARD_LIBRARIES}) + ${CMAKE_DL_LIBS} ${GLOG_LIBRARIES} ${VINEYARD_LIBRARIES} ${JAVA_JVM_LIBRARY}) # # java app benchmark # add_executable(property_graph_java_app_benchmarks benchmarks/property_graph_java_app_benchmarks.cc core/java/javasdk.cc core/object/dynamic.cc) @@ -509,17 +509,17 @@ if(ENABLE_JAVA_SDK) set(GAE_JAVA_JNI_LIB "${GAE_JAVA_DIR}/grape-runtime/target/native/libgrape-jni.so") endif() - add_custom_command( - OUTPUT "${GAE_JAVA_RUNTIME_JAR}" - COMMAND mvn -Drevision=${GRAPHSCOPE_ANALYTICAL_JAR_VERSION} clean install -DskipTests --quiet - DEPENDS gs_proto - WORKING_DIRECTORY ${GAE_JAVA_DIR} - COMMENT "Building GAE-java..." - VERBATIM - ) - add_custom_target(grape_jni ALL - DEPENDS "${GAE_JAVA_RUNTIME_JAR}" - ) + # add_custom_command( + # OUTPUT "${GAE_JAVA_RUNTIME_JAR}" + # COMMAND mvn -Drevision=${GRAPHSCOPE_ANALYTICAL_JAR_VERSION} clean install -DskipTests --quiet + # DEPENDS gs_proto + # WORKING_DIRECTORY ${GAE_JAVA_DIR} + # COMMENT "Building GAE-java..." + # VERBATIM + # ) + # add_custom_target(grape_jni ALL + # DEPENDS "${GAE_JAVA_RUNTIME_JAR}" + # ) install(FILES DESTINATION lib) install(FILES "${GAE_JAVA_RUNTIME_JAR}" DESTINATION lib) install(FILES "${GAE_JAVA_GRAPHX_JAR}" DESTINATION lib) diff --git a/analytical_engine/core/context/java_context_base.h b/analytical_engine/core/context/java_context_base.h index 11f985876919..4f83ec228349 100644 --- a/analytical_engine/core/context/java_context_base.h +++ b/analytical_engine/core/context/java_context_base.h @@ -107,6 +107,8 @@ class JavaContextBase : public grape::ContextBase { m.env()->DeleteGlobalRef(url_class_loader_object_); VLOG(1) << "Delete url class loader object"; } + // Invoke a gc, since we have created a lot of objects. + InvokeGC(m.env()); } else { LOG(ERROR) << "JNI env not available."; } diff --git a/analytical_engine/core/java/javasdk.cc b/analytical_engine/core/java/javasdk.cc index 5efa27d015fa..f4df9f915089 100644 --- a/analytical_engine/core/java/javasdk.cc +++ b/analytical_engine/core/java/javasdk.cc @@ -110,7 +110,7 @@ bool InitWellKnownClasses(JNIEnv* env) { return true; } -inline uint64_t getTotalSystemMemory() { +uint64_t getTotalSystemMemory() { uint64_t pages = sysconf(_SC_PHYS_PAGES); uint64_t page_size = sysconf(_SC_PAGE_SIZE); uint64_t ret = pages * page_size; @@ -121,7 +121,7 @@ inline uint64_t getTotalSystemMemory() { return ret; } -inline uint64_t getCurrentAvailableMemory() { +uint64_t getCurrentAvailableMemory() { uint64_t pages = sysconf(_SC_AVPHYS_PAGES); uint64_t page_size = sysconf(_SC_PAGE_SIZE); uint64_t ret = pages * page_size; @@ -132,6 +132,20 @@ inline uint64_t getCurrentAvailableMemory() { return ret; } +uint64_t getProcessMemory() { + std::string pid = std::to_string(getpid()); + std::string cmd = "cat /proc/" + pid + "/status | grep VmRSS"; + std::string res = exec(cmd.c_str()); + std::string::size_type pos = res.find(":"); + if (pos == std::string::npos) { + return 0; + } + std::string mem_str = res.substr(pos + 1); + mem_str.erase(std::remove_if(mem_str.begin(), mem_str.end(), ::isspace), + mem_str.end()); + return std::stoull(mem_str); +} + void SetupEnv(const int local_num) { int systemMemory = (getCurrentAvailableMemory() * 2) / 3; int systemMemoryPerWorker = std::max(systemMemory / local_num, 1); @@ -358,8 +372,10 @@ jobject LoadAndCreate(JNIEnv* env, const jobject& url_class_loader_obj, } void InvokeGC(JNIEnv* env) { + VLOG(1) << "Before invoking GC:" << getCurrentAvailableMemory(); VLOG(1) << "GC ..."; env->CallStaticVoidMethod(system_class, gc_methodID); + VLOG(1) << "After invoking GC:" << getCurrentAvailableMemory(); } std::string GetJobjectClassName(JNIEnv* env, jobject object) { diff --git a/analytical_engine/core/java/javasdk.h b/analytical_engine/core/java/javasdk.h index 9f17d4904bcd..a65a0d3d5f2a 100644 --- a/analytical_engine/core/java/javasdk.h +++ b/analytical_engine/core/java/javasdk.h @@ -20,6 +20,7 @@ #include #include +#include #include #include @@ -38,7 +39,9 @@ namespace gs { std::string JString2String(JNIEnv* env, jstring jStr); bool InitWellKnownClasses(JNIEnv* env); -inline uint64_t getTotalSystemMemory(); +uint64_t getTotalSystemMemory(); +uint64_t getCurrentAvailableMemory(); +uint64_t getProcessMemory(); void SetupEnv(const int local_num); diff --git a/analytical_engine/java/grape-demo/pom.xml b/analytical_engine/java/grape-demo/pom.xml index 1c3a107ae213..622a5039e3fd 100644 --- a/analytical_engine/java/grape-demo/pom.xml +++ b/analytical_engine/java/grape-demo/pom.xml @@ -66,6 +66,11 @@ org.slf4j slf4j-api --> + + com.carrotsearch + hppc + 0.8.1 + @@ -123,6 +128,7 @@ com.alibaba.fastffi:* it.unimi.dsi:fastutil* org.apache.spark:* + com.carrotsearch:hppc* diff --git a/analytical_engine/java/grape-demo/src/main/java/com/alibaba/graphscope/example/circle/parallel/formal/CircleAppParallel.java b/analytical_engine/java/grape-demo/src/main/java/com/alibaba/graphscope/example/circle/parallel/formal/CircleAppParallel.java new file mode 100644 index 000000000000..d7682c9a7127 --- /dev/null +++ b/analytical_engine/java/grape-demo/src/main/java/com/alibaba/graphscope/example/circle/parallel/formal/CircleAppParallel.java @@ -0,0 +1,98 @@ +package com.alibaba.graphscope.example.circle.parallel.formal; + +import com.alibaba.graphscope.app.ParallelAppBase; +import com.alibaba.graphscope.context.ParallelContextBase; +import com.alibaba.graphscope.ds.Vertex; +import com.alibaba.graphscope.fragment.IFragment; +import com.alibaba.graphscope.parallel.ParallelMessageManager; +import com.alibaba.graphscope.utils.FFITypeFactoryhelper; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.IOException; + +/** + *

    + *
  1. lid:fragment内部从0开始编码,0-innerVerticesNum-1
  2. + *
  3. gid:前n位为frag_id, 中间是label_id, 后面是lid
  4. + *
  5. oid:外部输入文件的vertices id
  6. + *
+ * + * @author liumin + * @date 2024-07-12 + */ +public class CircleAppParallel implements ParallelAppBase { + private static final Logger logger = LoggerFactory.getLogger(CircleAppParallel.class); + + /** + * 1个 num worker 包含 1个fragment,该方法只会调用一次 + * + * @param fragment @param graph fragment. The graph fragment providing accesses to graph data. each fragment is a local object (i.e.,payloads live entirely on a local node). + * @param defaultContextBase context. User defined context which manages data during the whole + * computations. + * @param messageManager The message manger which manages messages between fragments. + * @see IFragment + * @see ParallelContextBase + * @see ParallelMessageManager + */ + @Override + public void PEval(IFragment fragment, ParallelContextBase defaultContextBase, ParallelMessageManager messageManager) { + CircleAppParallelContext ctx = (CircleAppParallelContext) defaultContextBase; + logger.info("PEval start.show frag info,inner vertices num is {},outer vertices num is {},frag vertices num is {},graph vertices num is {},active vertices num is {}", fragment.getInnerVerticesNum(), fragment.getOuterVerticesNum(), fragment.getVerticesNum(), fragment.getTotalVerticesNum(), ctx.currentPaths.size()); + + // 获取所有的内部点 + Vertex vertex = FFITypeFactoryhelper.newVertexLong(); + for (long i = 0; i < fragment.getInnerVerticesNum(); ++i) { + // 表示获取 到 frag 内部的第 几个 点 + vertex.setValue(i); + try { + // 内存处理 + ctx.storeProcessor.sendToNbr(fragment, messageManager, vertex, 0, 0); + } catch (IOException e) { + logger.error("PEval error", e); + } + } + + ctx.storeProcessor.initial(fragment, messageManager); + + ctx.currModified.assign(ctx.nextModified); + int cardinality = ctx.nextModified.cardinality(); + if (cardinality > 0) { + logger.info("PEval Continue,nextModified is {}", cardinality); + messageManager.forceContinue(); + } + + ctx.removeTheadLocalVariable(); + // 不同 frag 有不同 超步 + ctx.superStep += 1; + } + + /** + * Inc compute, superStep start from 1 + * + * @param frag + * @param defaultContextBase + * @param messageManager + */ + @Override + public void IncEval(IFragment frag, ParallelContextBase defaultContextBase, ParallelMessageManager messageManager) { + CircleAppParallelContext ctx = (CircleAppParallelContext) defaultContextBase; + ctx.nextModified.clearAll(); + // start from 1 + if (ctx.superStep > ctx.maxIteration) { + logger.info("superStep exceeds maxIteration,superStep is {}", ctx.superStep); + return; + } + + ctx.storeProcessor.inc(frag, messageManager); + + logger.info("IncEval end.superStep is {},currModified cnt is {},local nextModified cnt is {}", ctx.superStep, ctx.currModified.cardinality(), ctx.nextModified.cardinality()); + // assign nextModified to currModified + ctx.currModified.assign(ctx.nextModified); + if (ctx.nextModified.cardinality() > 0) { + messageManager.forceContinue(); + } + + ctx.superStep += 1; + } +} diff --git a/analytical_engine/java/grape-demo/src/main/java/com/alibaba/graphscope/example/circle/parallel/formal/CircleAppParallelContext.java b/analytical_engine/java/grape-demo/src/main/java/com/alibaba/graphscope/example/circle/parallel/formal/CircleAppParallelContext.java new file mode 100644 index 000000000000..f8ed61552e2e --- /dev/null +++ b/analytical_engine/java/grape-demo/src/main/java/com/alibaba/graphscope/example/circle/parallel/formal/CircleAppParallelContext.java @@ -0,0 +1,439 @@ +package com.alibaba.graphscope.example.circle.parallel.formal; + +import com.alibaba.graphscope.example.circle.parallel.formal.store.ComputeStep; +import com.alibaba.graphscope.example.circle.parallel.formal.store.disk.DiskComputeStepImpl; +import com.alibaba.graphscope.example.circle.parallel.formal.store.disk.DiskStoreContext; +import com.alibaba.graphscope.example.circle.parallel.formal.store.memory.MemoryComputeStepImpl; +import com.alibaba.graphscope.utils.ThreadSafeBitSet; +import com.alibaba.fastjson.JSONObject; +import com.alibaba.graphscope.context.ParallelContextBase; +import com.alibaba.graphscope.context.VertexDataContext; +import com.alibaba.graphscope.ds.GSVertexArray; +import com.alibaba.graphscope.ds.Vertex; +import com.alibaba.graphscope.fragment.IFragment; +import com.alibaba.graphscope.parallel.ParallelMessageManager; +import com.alibaba.graphscope.serialization.FFIByteVectorOutputStream; +import com.alibaba.graphscope.stdcxx.StdString; +import com.alibaba.graphscope.utils.FFITypeFactoryhelper; +import com.carrotsearch.hppc.LongArrayList; +import com.google.common.collect.Lists; +import org.apache.commons.lang.StringUtils; +import org.apache.commons.lang3.concurrent.BasicThreadFactory; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.util.ArrayList; +import java.util.HashMap; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.concurrent.ConcurrentHashMap; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.ThreadFactory; +import java.util.concurrent.ThreadPoolExecutor; +import java.util.stream.Collectors; + +/** + * App Context + * 不支持 用户自定义的 点属性类型 + * VertexDataContext 仅支持String Primitive(boolean、int、long)等简单的数据类型 + * + * @author liumin + * @date 2024-07-12 + */ +public class CircleAppParallelContext extends VertexDataContext, StdString> implements ParallelContextBase { + private static final Logger logger = LoggerFactory.getLogger(CircleAppParallelContext.class); + /** + * superStep超步 + */ + public int superStep; + /** + * 最小迭代次数 + */ + public int minIteration; + /** + * 最大迭代次数 + */ + public int maxIteration; + /** + * 并发接收/发送消息的线程数 + */ + public int threadNum; + public ExecutorService executor; + private static final int NO_BATCH = -2; + /** + * 种子节点执行的批次数 + */ + public int batchNum; + + /** + * 存在多线程 对同一个点 处理的情况,避免并发问题 + * 多个点 同时 发消息给 同一个点 + *

+ * 当前超步所有路径列表 + */ + public Map> currentPaths; + + /** + * 当前超步,frag 内部 inner vertex 之间互相发送消息保存的消息列表 + *

+ * 下一超步使用 + */ + public Map> nextPaths; + + /** + * 存储环形结果 + */ + public Map> circleResult; + + /** + * 当前超步活跃的节点,包含: + * 1)接收到 msg 的 vertex 集合 + * 2)上一个迭代 send to local nbr 对应的 nbr + */ + public ThreadSafeBitSet currModified; + /** + * 当前超步更新的 frag 内部的点集合,下一超步活跃的节点: + * 1)send to local nbr 对应的 nbr + */ + public ThreadSafeBitSet nextModified; + + /** + * 环形是否有向 + */ + public boolean isDirected; + + /** + * 消息Stream,每个线程维护一个 FFIByteVectorOutputStream,避免并发问题 + * Key 为线程id,Value 为消息 Stream + */ + public ThreadLocal msgVectorMap; + + /** + * 反序列化后的消息集合 + * Key 为线程id,Value 为 传播的路径列表 + */ + public ThreadLocal> deserializedMsgMap; + + /** + * 发送消息前,合并多边后使用的集合 + * Key 为线程id,Value 为Map: key 为 nbr id,value 为邻居节点 + */ + public ThreadLocal>> nbrMap; + + /** + * memory or disk 处理器 + */ + public ComputeStep storeProcessor; + + /** + * disk 模式 context + */ + public DiskStoreContext diskStoreContext; + + /** + * 是否所有节点均为种子节点 + */ + public boolean allSeed = true; + /** + * 消息 send 单个点上的路径阈值,超过阈值则压缩 + */ + public int pathThreshold; + + private static final int DEFAULT_MIN_ITERATION = 2; + private static final int UNDIRECTED_DEFAULT_MIN_ITERATION = 3; + + @Override + public void Init(IFragment frag, ParallelMessageManager messageManager, JSONObject jsonObject) { + // Output 设置 + createFFIContext(frag, StdString.class, false); + + // initial variable,初始 size 和 线程数量相等 + msgVectorMap = ThreadLocal.withInitial(FFIByteVectorOutputStream::new); + deserializedMsgMap = ThreadLocal.withInitial(Lists::newArrayList); + nbrMap = ThreadLocal.withInitial(HashMap::new); + + superStep = 0; + + // 是否有向 + if (!jsonObject.containsKey("isDirected")) { + isDirected = true; + } else { + isDirected = jsonObject.getBoolean("isDirected"); + } + // 最小迭代次数 + if (!jsonObject.containsKey("minIteration")) { + minIteration = DEFAULT_MIN_ITERATION; + } else { + minIteration = jsonObject.getInteger("minIteration"); + } + + if (!isDirected && minIteration < UNDIRECTED_DEFAULT_MIN_ITERATION) { + // 无向最小环大小为 3 + minIteration = UNDIRECTED_DEFAULT_MIN_ITERATION; + } + + // 最大迭代次数 + if (!jsonObject.containsKey("maxIteration")) { + maxIteration = 3; + } else { + maxIteration = jsonObject.getInteger("maxIteration"); + } + + // 线程数 + if (!jsonObject.containsKey("threadNum")) { + threadNum = 2; + } else { + threadNum = jsonObject.getInteger("threadNum"); + } + // 路径数量阈值 + if (!jsonObject.containsKey("pathThreshold")) { + pathThreshold = 10000; + } else { + pathThreshold = jsonObject.getInteger("pathThreshold"); + } + // 初始化线程池 + ThreadFactory threadFactory = new BasicThreadFactory.Builder().namingPattern("Graphscope" + "-%d").build(); + executor = new ThreadPoolExecutor(threadNum, threadNum, 60L, java.util.concurrent.TimeUnit.SECONDS, new java.util.concurrent.LinkedBlockingQueue<>(100000), threadFactory); + + /** + * 批次数,拆分种子节点分多批次执行 + * start from 0 + */ + if (!jsonObject.containsKey("batchNum")) { + batchNum = NO_BATCH; + } else { + batchNum = jsonObject.getInteger("batchNum"); + } + + long innerVertexNum = frag.getInnerVerticesNum(); + + // 初始化容量大小等于 frag 内部节点数 + currentPaths = new ConcurrentHashMap<>((int) innerVertexNum); + nextPaths = new ConcurrentHashMap<>((int) innerVertexNum); + circleResult = new ConcurrentHashMap<>((int) innerVertexNum); + + Vertex vertex = FFITypeFactoryhelper.newVertexLong(); + + // 遍历 frag 内部所有点,初始化种子节点 + for (long i = 0; i < innerVertexNum; ++i) { + vertex.setValue(i); + + long oid = frag.getId(vertex); + // 点属性 表示批次数 + long vertexData = frag.getData(vertex); + // 非种子节点跳过 + if (vertexData == -1) { + allSeed = false; + continue; + } + if (batchNum == NO_BATCH || batchNum == vertexData) { + initSeedVertex((int) i, oid); + } + } + + currModified = new ThreadSafeBitSet(ThreadSafeBitSet.DEFAULT_LOG2_SEGMENT_SIZE_IN_BITS, (int) frag.getInnerVerticesNum()); + nextModified = new ThreadSafeBitSet(ThreadSafeBitSet.DEFAULT_LOG2_SEGMENT_SIZE_IN_BITS, (int) frag.getInnerVerticesNum()); + + /** + * 计算中间数据存储级别 + *

    + *
  1. memory
  2. + *
  3. disk
  4. + *
+ */ + String storageLevel = !jsonObject.containsKey("storageLevel") ? "memory" : jsonObject.getString("storageLevel"); + if (storageLevel.equals("memory")) { + storeProcessor = new MemoryComputeStepImpl(this); + } else if (storageLevel.equals("disk")) { + diskStoreContext = new DiskStoreContext(frag, jsonObject); + storeProcessor = new DiskComputeStepImpl(this); + } else { + throw new IllegalArgumentException("storageLevel only can be [memory | disk]"); + } + + messageManager.initChannels(threadNum); + + logger.info("init end.innerVertexNum is {},minIteration is {},maxIteration is {},batchNum is {},isDirected is {},threadNum is {},active vertexNum is {},allSeed is {}", innerVertexNum, minIteration, maxIteration, batchNum, isDirected, threadNum, currentPaths.size(), allSeed); + } + + /** + * 初始化种子节点 + * + * @param lid 内部自增点id + * @param oid 原始点id + */ + private void initSeedVertex(int lid, long oid) { + // initial vertex condition,init path with new list + LongArrayList path = new LongArrayList(CircleUtil.DEFAULT_INITIAL_LIST_SIZE, CircleUtil.RESIZE_STRATEGY); + path.add(oid); + Set initialAttr = new HashSet<>(); + initialAttr.add(path); + this.currentPaths.put(lid, initialAttr); + } + + public Set getCurrentPaths(int lid) { + return currentPaths.get(lid); + } + + public Set getCurrentPaths(Vertex vertex) { + return getCurrentPaths(vertex.getValue().intValue()); + } + + /** + * 单个点消息发送结束后,该点在当前超步内存中的数据可以被清理掉 + * 以下场景执行该内存清理操作有效: + *
    + *
  1. memory 模式
  2. + *
  3. disk 模式下,superStep=0
  4. + *
+ * + * @param vertex 需要被清理的点 + */ + public void clearVertexInMemory(Vertex vertex) { + this.currentPaths.compute(vertex.getValue().intValue(), (k, path) -> { + if (path != null && !path.isEmpty()) { + path.clear(); + } + return path; + }); + } + + public List getPathListObject() { + return deserializedMsgMap.get(); + } + + public FFIByteVectorOutputStream getMsgVectorObject() { + return msgVectorMap.get(); + } + + public Map> getNbrMapObject() { + return nbrMap.get(); + } + + @Override + public void Output(IFragment frag) { + GSVertexArray vertexArray = data(); + Vertex cur = FFITypeFactoryhelper.newVertexLong(); + for (long vid = 0; vid < frag.getInnerVerticesNum(); ++vid) { + cur.setValue(vid); + + StdString value = (StdString) vertexArray.get(cur); + if (circleResult.get((int) vid) != null) { + value.fromJavaString(outPutCircleResult((int) vid)); + } + } + + clean(); + } + + private String outPutCircleResult(int lid) { + // 输出到 ctx,仅输出成环结果 + List pathList = this.circleResult.get(lid).stream().filter(path -> CircleUtil.isCircle(path) && path.size() > minIteration).map(this::outputToCtx).collect(Collectors.toList()); + if (!pathList.isEmpty()) { + return StringUtils.join(pathList, CircleUtil.PATH_SEPARATOR); + } + return ""; + } + + private String outputToCtx(LongArrayList path) { + StringBuffer sb = new StringBuffer(); + int size = path.size(); + for (int i = 0; i < size; i++) { + long id = path.get(i); + sb.append(id); + if (i < size - 1) { + sb.append(CircleUtil.IDS_SEPARATOR); + } + } + + return sb.toString(); + } + + /** + * 内存模式下,交换当前超步的路径和下一次超步的路径 + */ + public void swapPaths() { + Map> tmp = currentPaths; + currentPaths = nextPaths; + nextPaths = tmp; + + nextPaths.clear(); + } + + /** + * add to current path + * 数据来源: 1. received msg 2.next paths + * + * @param lid 点的 local id + * @param addPaths 新增的路径列表 + */ + public void addPathToCurrentPathsAndFindCircle(int lid, List addPaths) { + // lazy initialize + addPathToCtxAndFindCircle(lid, addPaths, this.currentPaths); + } + + /** + * 将 环形 结果添加到结果集合 + * from current Paths + * + * @param lid 点的 local id + * @param path 新增路径 + */ + public void addPathToCircleResult(int lid, LongArrayList path) { + // lazy initialize + this.circleResult.compute(lid, (k, v) -> { + if (v == null) { + v = new ArrayList<>(); + } + v.add(path); + return v; + }); + } + + /** + * local nbr update,add to next paths + * + * @param lid 点id + * @param addPaths 新增的路径列表 + */ + public void addPathToNextPathsAndFindCircle(int lid, List addPaths) { + addPathToCtxAndFindCircle(lid, addPaths, this.nextPaths); + } + + private void addPathToCtxAndFindCircle(int lid, List addPaths, Map> ctxPaths) { + // lazy initialize + ctxPaths.compute(lid, (id, nextPath) -> { + if (nextPath == null) { + nextPath = new HashSet<>(); + } + + for (LongArrayList path : addPaths) { + if (CircleUtil.isCircle(path)) { + if (path.size() > minIteration) { + addPathToCircleResult(id, path); + } + continue; + } + + nextPath.add(path); + } + return nextPath; + }); + } + + private void clean() { + executor.shutdown(); + } + + /** + * 移除 thread local 变量 + * 均在 send msg 的时候使用 + */ + public void removeTheadLocalVariable() { + this.deserializedMsgMap.remove(); + this.msgVectorMap.remove(); + this.nbrMap.remove(); + } +} diff --git a/analytical_engine/java/grape-demo/src/main/java/com/alibaba/graphscope/example/circle/parallel/formal/CircleUtil.java b/analytical_engine/java/grape-demo/src/main/java/com/alibaba/graphscope/example/circle/parallel/formal/CircleUtil.java new file mode 100644 index 000000000000..42b60cc949a9 --- /dev/null +++ b/analytical_engine/java/grape-demo/src/main/java/com/alibaba/graphscope/example/circle/parallel/formal/CircleUtil.java @@ -0,0 +1,150 @@ +package com.alibaba.graphscope.example.circle.parallel.formal; + +import com.carrotsearch.hppc.ArraySizingStrategy; +import com.carrotsearch.hppc.BoundedProportionalArraySizingStrategy; +import com.carrotsearch.hppc.LongArrayList; + +import java.util.Collection; +import java.util.List; +import java.util.Set; +import java.util.stream.Collectors; + +/** + * 环形 util + * @author liumin + * @date 2024-08-02 + */ +public class CircleUtil { + /** + * 初始化 LongArrayList 使用的 resizeStrategy + */ + public static final ArraySizingStrategy RESIZE_STRATEGY = new BoundedProportionalArraySizingStrategy(1, BoundedProportionalArraySizingStrategy.DEFAULT_MAX_GROW_COUNT, BoundedProportionalArraySizingStrategy.DEFAULT_GROW_RATIO); + public static final String IDS_SEPARATOR = "&"; + public static final String PATH_SEPARATOR = "|"; + public static final int DEFAULT_INITIAL_LIST_SIZE = 4; + public static final int MIN_CIRCLE_VERTEX_SIZE = 3; + + /** + * 根据当前点的属性过滤,保留符合条件的路径: + *
    + *
  1. 点路径长度 >= 当前超步 + 1
  2. + *
  3. 非环
  4. + *
+ * + * @param vertexPath 点path + * @param currIteration 当前超步 + 1 + * @return {@code 符合条件:true + * 不符合条件:false} + */ + public static boolean filterByAttr(LongArrayList vertexPath, long currIteration) { + int vertexPathSize = vertexPath.size(); + + boolean pathSizeCheck = vertexPathSize >= currIteration; + boolean notCircle = !CircleUtil.isCircle(vertexPath); + return pathSizeCheck && notCircle; + } + + /** + * 消息过滤条件,根据邻居,在点属性中筛选符合条件的数据 + *
    + *
  1. 路径以当前点结尾
  2. + *
  3. 不包含子环。判断若加入 to 节点,路径是否包含子环,e.g. [1,2,3 + 2]
  4. + *
  5. 不往回走。针对无向环,出边已走过的情况下,入边不往回走,e.g.[1,2 + 1]
  6. + *
+ * + * @param from 当前点id + * @param to 待发送消息的点id + * @param vertexPath 单条消息 + * @return {{@code true/false}} + */ + public static boolean filterByNbr(long from, long to, LongArrayList vertexPath, boolean isDirected) { + int vertexPathSize = vertexPath.size(); + + boolean pathDataCheck = from == vertexPath.get(vertexPathSize - 1); + boolean notContainInnerCircle = (vertexPathSize <= 1 && !vertexPath.contains(to)) || (vertexPathSize > 1 && !subListContain(vertexPath, 1, vertexPathSize, to)); + + + // 避免无向环 来回发送 1 -e1-> 2<-e1- 1 + // 不支持长度为 2 的无向环,1 -e1-> 2 <-e2- 1 + boolean undirectedCycleMsg = !isDirected && vertexPathSize == 2 && vertexPath.get(0) == to; + + return pathDataCheck + // 避免子环 1 2 3 4 2 + && notContainInnerCircle && !undirectedCycleMsg; + } + + /** + * 子列表中是否包含指定value + * {@code fromIndex}, inclusive, and {@code toIndex}, exclusive + * + * @param list 列表 + * @param fromIndex 起始索引 inclusive + * @param toIndex 结束索引 exclusive + * @param value 指定值 + * @return + */ + private static boolean subListContain(LongArrayList list, int fromIndex, int toIndex, long value) { + for (int i = fromIndex; i < toIndex; i++) { + if (list.get(i) == value) { + return true; + } + } + return false; + } + + /** + * 从 点属性和关联边 中提取下一次迭代要发送的消息 + * 将符合条件的消息添加到 msg 集合 + * + * @param superStep 超步 + * @param vertexPath 当前迭代的路径列表 + * @param from 当前点id + * @param to 邻居点id + * @param msg 消息列表 + * @param isDirected 是否有向,无向环不能往回发送消息 + */ + public static void makeMsgToSendAlongEdge(int superStep, Set vertexPath, long from, long to, List msg, boolean isDirected) { + for (LongArrayList path : vertexPath) { + if (CircleUtil.filterByAttr(path, superStep + 1) && CircleUtil.filterByNbr(from, to, path, isDirected)) { + msg.add(path); + } + } + } + + /** + * 从 点属性和关联边 中提取下一次迭代要发送的消息,然后压缩消息 + *
    + *
  1. 压缩前需要过滤出符合条件的消息,避免压缩后生成无效消息
  2. + *
  3. 消息压缩,避免单条消息过大。导致后续迭代OOM
  4. + *
+ * 简单实现针对环的压缩 + * 1)图中所有点为种子节点,若路径上 点 均为种子节点,将路径首尾相同的,保留一条即可 + * + * @param vertexPath 路径列表 + * @param msg 压缩后的路径列表 + */ + public static void makeAndCompressMsgToSendAlongEdge(int superStep, Set vertexPath, long from, long to, List msg, boolean isDirected) { + // 将路径首尾相同的压缩 + Collection selectedPaths = vertexPath.stream() + .filter(path -> CircleUtil.filterByAttr(path, superStep + 1) + && CircleUtil.filterByNbr(from, to, path, isDirected)) + .collect(Collectors.groupingBy(path -> path.get(0) + "," + path.get(path.size() - 1), + Collectors.collectingAndThen(Collectors.toList(), + grouped -> grouped.get(0)))) + .values(); + msg.addAll(selectedPaths); + } + + /** + * 路径是否为环 + *
    点数量 >=3
+ *
    起始节点 == 结束节点
+ * + * @param path 路径 + * @return 是否成环 + */ + public static boolean isCircle(LongArrayList path) { + int size = path.size(); + return size >= MIN_CIRCLE_VERTEX_SIZE && path.get(0) == path.get(size - 1); + } +} diff --git a/analytical_engine/java/grape-demo/src/main/java/com/alibaba/graphscope/example/circle/parallel/formal/PathSerAndDeser.java b/analytical_engine/java/grape-demo/src/main/java/com/alibaba/graphscope/example/circle/parallel/formal/PathSerAndDeser.java new file mode 100644 index 000000000000..c67528b2a608 --- /dev/null +++ b/analytical_engine/java/grape-demo/src/main/java/com/alibaba/graphscope/example/circle/parallel/formal/PathSerAndDeser.java @@ -0,0 +1,38 @@ +package com.alibaba.graphscope.example.circle.parallel.formal; + +import com.carrotsearch.hppc.LongArrayList; +import com.carrotsearch.hppc.cursors.LongCursor; + +import java.io.DataInput; +import java.io.DataOutput; +import java.io.IOException; + +/** + * @author liumin + * @date 2024-07-26 + */ +public class PathSerAndDeser { + public static void serialize(DataOutput dataOutput, LongArrayList path) throws IOException { + int vSize = path.size(); + if (vSize == 0) { + System.out.println("serialize size is 0,path=[" + path + "]"); + } + dataOutput.writeInt(vSize); + for (LongCursor v : path) { + dataOutput.writeLong(v.value); + } + } + + public static LongArrayList deserialize(DataInput dataInput) throws IOException { + int size = dataInput.readInt(); + if (size == 0) { + System.out.println("deserialize size is 0"); + } + + LongArrayList list = new LongArrayList(size, CircleUtil.RESIZE_STRATEGY); + for (int i = 0; i < size; i++) { + list.add(dataInput.readLong()); + } + return list; + } +} diff --git a/analytical_engine/java/grape-demo/src/main/java/com/alibaba/graphscope/example/circle/parallel/formal/store/ComputeStep.java b/analytical_engine/java/grape-demo/src/main/java/com/alibaba/graphscope/example/circle/parallel/formal/store/ComputeStep.java new file mode 100644 index 000000000000..05910b3f9a08 --- /dev/null +++ b/analytical_engine/java/grape-demo/src/main/java/com/alibaba/graphscope/example/circle/parallel/formal/store/ComputeStep.java @@ -0,0 +1,406 @@ +package com.alibaba.graphscope.example.circle.parallel.formal.store; + +import com.alibaba.graphscope.example.circle.parallel.formal.CircleAppParallelContext; +import com.alibaba.graphscope.example.circle.parallel.formal.CircleUtil; +import com.alibaba.graphscope.example.circle.parallel.formal.PathSerAndDeser; +import com.alibaba.graphscope.ds.Vertex; +import com.alibaba.graphscope.ds.adaptor.AdjList; +import com.alibaba.graphscope.ds.adaptor.Nbr; +import com.alibaba.graphscope.fragment.IFragment; +import com.alibaba.graphscope.parallel.MessageInBuffer; +import com.alibaba.graphscope.parallel.ParallelMessageManager; +import com.alibaba.graphscope.serialization.FFIByteVectorInputStream; +import com.alibaba.graphscope.serialization.FFIByteVectorOutputStream; +import com.alibaba.graphscope.stdcxx.FFIByteVector; +import com.alibaba.graphscope.stdcxx.FFIByteVectorFactory; +import com.alibaba.graphscope.utils.FFITypeFactoryhelper; +import com.carrotsearch.hppc.LongArrayList; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.concurrent.CountDownLatch; +import java.util.stream.Collectors; +import java.util.stream.IntStream; + +/** + * PIE 环形抽象计算类 + * 区分Disk模式和Memory模式 + * + * @author liumin + * @date 2024-08-11 + */ +public abstract class ComputeStep { + private static final Logger logger = LoggerFactory.getLogger(ComputeStep.class); + protected CircleAppParallelContext ctx; + + public ComputeStep(CircleAppParallelContext ctx) { + this.ctx = ctx; + } + + /** + * 初始化 + * + * @param frag 子图 + * @param messageManager 消息管理器 + */ + public abstract void initial(IFragment frag, ParallelMessageManager messageManager); + + /** + * 迭代计算 + * + * @param frag 子图 + * @param messageManager 消息管理器 + */ + public abstract void inc(IFragment frag, ParallelMessageManager messageManager); + + /** + * frag 内部发送消息后处理步骤 + * 本机发送给 frag 内部的 inner vertex + * + * @param frag 子图 + * @param nbrV 邻居节点 + * @param msg 消息 + */ + public abstract void sendToLocalFrag(IFragment frag, Vertex nbrV, List msg); + + /** + * 接收 frag 之间的消息处理步骤 + * + * @param frag 子图 + * @param lid 内部自增id + * @param oid 原始id + * @param msgs 消息列表 + */ + public abstract void processUponReceiveMsg(IFragment frag, int lid, long oid, List msgs); + + /** + * 获取当前点的属性信息,即保留的路径列表 + * + * @param lid 内部自增id + * @param batchId frag 内部 batchId + * @return 点的属性信息 + */ + public abstract Set getVertexPath(int lid, Integer batchId); + + /** + * 构造消息及发送消息 + * + * @param frag 子图 + * @param messageManager 消息管理器 + * @param vertex 点 + * @param isOutGoing 消息发送方向 + * @param threadId 线程id + * @param batchId 超步内批次ID + * @throws IOException 抛出的异常 + */ + private void sendToAdjList(IFragment frag, ParallelMessageManager messageManager, Vertex vertex, boolean isOutGoing, int threadId, int batchId) throws IOException { + // 获取点的属性信息 + Set vertexPath = getVertexPath(vertex.getValue().intValue(), batchId); + if (vertexPath == null || vertexPath.isEmpty()) { + return; + } + + List msg = ctx.getPathListObject(); + FFIByteVectorOutputStream msgVector = ctx.getMsgVectorObject(); + + long currOid = frag.getId(vertex); + AdjList nbrs = isOutGoing ? frag.getOutgoingAdjList(vertex) : frag.getIncomingAdjList(vertex); + + // 1. 合并多边 + Map> nbrMap = ctx.getNbrMapObject(); + mergeMutiEdgesToOne(nbrMap, frag, nbrs); + + // currentPaths can not be modified in this loop + for (Vertex nbrV : nbrMap.values()) { + long nbrOid = frag.getId(nbrV); + + if (currOid == nbrOid) { + continue; + } + + // 2. 构造要发送的消息 + if (vertexPath.size() > ctx.pathThreshold && ctx.allSeed) { + CircleUtil.makeAndCompressMsgToSendAlongEdge(ctx.superStep, vertexPath, currOid, nbrOid, msg, ctx.isDirected); + } else { + CircleUtil.makeMsgToSendAlongEdge(ctx.superStep, vertexPath, currOid, nbrOid, msg, ctx.isDirected); + } + + // 3.send msg to nbr + sendMsgToOuterVertexOrUpdateInnerVertex(frag, messageManager, msgVector, msg, nbrV, threadId); + if (frag.getInnerVerticesNum() < 30) { + logger.info("makeMsgs end.superStep={},msgToSend={},src={},srcFrag={},dst={},dstFrag={},vertexCurrentPath={},circleResults={}", ctx.superStep, msg.toString(), frag.getId(vertex), frag.getFragId(vertex), nbrOid, frag.getFragId(nbrV), vertexPath, ctx.circleResult); + } + msg.clear(); + } + nbrMap.clear(); + } + + /** + * 合并两点间多边 + * + * @param map 存储nbrV + * @param frag 子图 + * @param nbrs Graphscope 获取的邻居列表,注意 Nbr 不能被多次使用,需要从 Nbr 中获取需要的数据 + */ + private void mergeMutiEdgesToOne(Map> map, IFragment frag, AdjList nbrs) { + for (Nbr longLongNbr : nbrs.iterable()) { + Vertex nbrV = longLongNbr.neighbor(); + map.putIfAbsent(frag.getId(nbrV), nbrV); + } + } + + /** + * 发送消息到邻居节点 + *
    + *
  1. outerVertex,消息发送给邻居点所在 frag
  2. + *
  3. innerVertex,消息 local处理。Disk模式写入磁盘,Memory模式内存更新。下一超步处理
  4. + *
+ * + * @param frag 图数据 + * @param messageManager 消息管理器 + * @param msgVector 消息序列化OutputStream + * @param msgToSend 消息列表 + * @param nbrV 邻居点 + * @param threadId 线程Id + * @throws IOException + */ + protected void sendMsgToOuterVertexOrUpdateInnerVertex(IFragment frag, ParallelMessageManager messageManager, FFIByteVectorOutputStream msgVector, List msgToSend, Vertex nbrV, int threadId) throws IOException { + if (msgToSend.isEmpty()) { + return; + } + + if (frag.isOuterVertex(nbrV)) { + msgVector.reset(); + + // ------------------ write msg info start ------------------ // + // 消息序列化 + msgVector.writeLong(frag.getOuterVertexGid(nbrV)); + msgVector.writeInt(msgToSend.size()); + for (LongArrayList path : msgToSend) { + PathSerAndDeser.serialize(msgVector, path); + } + + // 将序列化后的消息,远程发消息给邻居节点的 frag + messageManager.sendToFragment(frag.getFragId(nbrV), msgVector.getVector(), threadId); + } else { + // Inner nbr 添加到 nextModified 集合,作为下一个迭代的起始节点 + ctx.nextModified.set(nbrV.getValue().intValue()); + + sendToLocalFrag(frag, nbrV, msgToSend); + } + } + + /** + * 接收消息 + * 并发执行 + * + * @param graph 子图 + * @param messageManager 消息管理器 + */ + protected void receiveMessage(IFragment graph, ParallelMessageManager messageManager) { + long start = System.currentTimeMillis(); + CountDownLatch countDownLatch = new CountDownLatch(ctx.threadNum); + MessageInBuffer.Factory bufferFactory = FFITypeFactoryhelper.newMessageInBuffer(); + for (int tid = 0; tid < ctx.threadNum; ++tid) { + final int finalTid = tid; + ctx.executor.execute(new Runnable() { + @Override + public void run() { + // 每个线程 维护一个 messageInBuffer + MessageInBuffer messageInBuffer = bufferFactory.create(); + boolean result; + while (true) { + result = messageManager.getMessageInBuffer(messageInBuffer); + if (result) { + try { + receiveMessageImpl(graph, messageInBuffer); + } catch (Exception e) { + logger.error("Error when receiving message in fragment {} thread {}", graph.fid(), finalTid, e); + } + } else { + break; + } + } + if (ctx.diskStoreContext != null) { + // 将线程内 batch dump 剩余的 msg 刷盘 + ctx.diskStoreContext.threadLocalMessageStorageFactory.dumpAll(); + ctx.diskStoreContext.threadLocalMessageStorageFactory.remove(); + } + messageInBuffer.delete(); + countDownLatch.countDown(); + } + }); + } + try { + countDownLatch.await(); + } catch (Exception e) { + logger.error("receiveMessageAndUpdateVertex error.", e); + ctx.executor.shutdown(); + } + + logger.info("received message end.Frag id is {},superStep is {},currModified vertex cnt is {},times is {}s", graph.fid(), ctx.superStep, ctx.currModified.cardinality(), (System.currentTimeMillis() - start) / 1000); + } + + /** + * 接收消息线程内实现 + * + * @param graph 子图 + * @param buffer 消息缓冲区 + * @throws IOException + */ + protected void receiveMessageImpl(IFragment graph, MessageInBuffer buffer) throws IOException { + FFIByteVector tmpVector = (FFIByteVector) FFIByteVectorFactory.INSTANCE.create(); + Vertex tmpVertex = FFITypeFactoryhelper.newVertexLong(); + + // 接收 发给本 frag 的所有消息 + List receivedMsg = new ArrayList<>(CircleUtil.DEFAULT_INITIAL_LIST_SIZE); + while (buffer.getPureMessage(tmpVector)) { + // The retrieved tmp vector has been resized, so the cached objAddress is not available. + // trigger the refresh + tmpVector.touch(); + + FFIByteVectorInputStream inputStream = new FFIByteVectorInputStream(tmpVector); + // tgt oid in msg + long gid = inputStream.readLong(); + if (!graph.innerVertexGid2Vertex(gid, tmpVertex)) { + logger.error("Fail to get lid from gid {}", gid); + } + + ctx.currModified.set(tmpVertex.getValue().intValue()); + + // ------------------ read msg info start ------------------ // + int size = inputStream.readInt(); + + if (size != 0) { + for (int i = 0; i < size; i++) { + LongArrayList path = PathSerAndDeser.deserialize(inputStream); + // vertex path and edge path in msg must not be null + receivedMsg.add(path); + } + } + // ------------------ read msg info end ------------------ // + long oid = graph.getId(tmpVertex); + processUponReceiveMsg(graph, tmpVertex.getValue().intValue(), oid, receivedMsg); + + if (showLog(graph)) { + logger.info("vprog end.superStep is {},lid is {},oid is {},vdata is {},msg is {}", ctx.superStep, tmpVertex.getValue().intValue(), oid, getVertexPath(tmpVertex.getValue().intValue(), null), receivedMsg); + } + tmpVector.clear(); + receivedMsg.clear(); + } + + tmpVector.delete(); + } + + /** + * 多线程发送消息,可以区分批次 + * + * @param frag 子图 + * @param messageManager 消息管理器 + * @param begin 单个批次内的起始节点 + * @param end 单个批次内的结束节点 + * @param batchId 批次id + */ + public void sendMessageThroughOE(IFragment frag, ParallelMessageManager messageManager, long begin, long end, int batchId) { + CountDownLatch countDownLatch = new CountDownLatch(ctx.threadNum); + Map rangeMap = splitBeginAndEnd(begin, end, ctx.threadNum); + + for (int tid = 0; tid < ctx.threadNum; ++tid) { + long threadBegin = rangeMap.get(tid)[0]; + long threadEnd = rangeMap.get(tid)[1]; + int finalTid = tid; + ctx.executor.execute(() -> { + try { + sendMessageThroughOEImpl(frag, threadBegin, threadEnd, messageManager, finalTid, batchId); + } catch (IOException e) { + throw new RuntimeException(e); + } + + ctx.removeTheadLocalVariable(); + countDownLatch.countDown(); + }); + } + try { + countDownLatch.await(); + } catch (Exception e) { + logger.error("sendMessageThroughOE error.", e); + ctx.executor.shutdown(); + } + } + + /** + * 单个线程发送消息实现 + * + * @param frag 子图 + * @param startVertex 起始点 + * @param endVertex 结束点 + * @param messageManager 消息管理器 + * @param threadId 线程id + * @param batchId 批次id + * @throws IOException + */ + private void sendMessageThroughOEImpl(IFragment frag, long startVertex, long endVertex, ParallelMessageManager messageManager, int threadId, int batchId) throws IOException { + Vertex vertex = FFITypeFactoryhelper.newVertexLong(); + for (long i = startVertex; i < endVertex; ++i) { + if (ctx.currModified.get((int) i)) { + vertex.setValue(i); + sendToNbr(frag, messageManager, vertex, threadId, batchId); + } + } + } + + /** + * 发送消息给邻居节点。区分有向/无向 + * + * @param frag 子图 + * @param messageManager 消息管理器 + * @param vertex 当前点 + * @param threadId 线程 id,0~threadNum-1 + * 发送给 frag 时需要指定 threadId + * @param batchId 超步内批次 id,0~fragBatchNum-1 + *
    + *
  1. memory 模式:batchId 固定为0
  2. + *
  3. disk 模式: 超步内可以分批次执行,减轻内存压力
  4. + *
+ * @throws IOException + */ + public void sendToNbr(IFragment frag, ParallelMessageManager messageManager, Vertex vertex, int threadId, int batchId) throws IOException { + // src -> dst + sendToAdjList(frag, messageManager, vertex, true, threadId, batchId); + if (!ctx.isDirected) { + // dst -> src + sendToAdjList(frag, messageManager, vertex, false, threadId, batchId); + } + // 清除 点的 current 属性 + ctx.clearVertexInMemory(vertex); + } + + /** + * 将较大范围的 long 区间按批次拆分成小范围,生成不同的 start 和 end + * + * @param begin 起始点id + * @param end 结束点id + * @param batchNum 批次数 + * @return 线程id -> [begin, end] + */ + public Map splitBeginAndEnd(long begin, long end, int batchNum) { + long chunkSize = (end - begin) / batchNum; + return IntStream.range(0, batchNum).boxed().collect(Collectors.toMap(i -> i, i -> { + long batchBegin = begin + i * chunkSize; + long batchEnd = begin + (i + 1) * chunkSize; + if (i == batchNum - 1) { + batchEnd = end; + } + return new long[]{batchBegin, batchEnd}; + })); + } + + protected boolean showLog(IFragment frag) { + return frag.getInnerVerticesNum() < 30; + } +} diff --git a/analytical_engine/java/grape-demo/src/main/java/com/alibaba/graphscope/example/circle/parallel/formal/store/disk/AppendObjectOutputStream.java b/analytical_engine/java/grape-demo/src/main/java/com/alibaba/graphscope/example/circle/parallel/formal/store/disk/AppendObjectOutputStream.java new file mode 100644 index 000000000000..5313e62d618a --- /dev/null +++ b/analytical_engine/java/grape-demo/src/main/java/com/alibaba/graphscope/example/circle/parallel/formal/store/disk/AppendObjectOutputStream.java @@ -0,0 +1,36 @@ +package com.alibaba.graphscope.example.circle.parallel.formal.store.disk; + +import java.io.IOException; +import java.io.ObjectOutputStream; +import java.io.OutputStream; + +public class AppendObjectOutputStream extends ObjectOutputStream { + + public AppendObjectOutputStream(OutputStream out) throws IOException { + super(out); + } + + /** + * 覆盖父类的方法,使其在已有对象信息并追加时,不写header信息 + * 查看源码会发现:writeStreamHeader方法会写入以下两行内容: + *

+ * bout.writeShort(STREAM_MAGIC); + * bout.writeShort(STREAM_VERSION); + *

+ * 这两行对应的值: + * final static short STREAM_MAGIC = (short)0xaced; + * final static short STREAM_VERSION = 5; + *

+ * 在文件中头部就会写入:AC ED 00 05 + * 一个文件对象只有在文件头出应该出现此信息,文件内容中不能出现此信息,否则会导致读取错误 + * 所以在追加时,就需要覆盖父类的writeStreamHeader方法,执行reset()方法 + *

+ * reset()方法写入的是这个:final static byte TC_RESET = (byte)0x79; + * + * @throws IOException + */ + @Override + protected void writeStreamHeader() throws IOException { + super.reset(); + } +} \ No newline at end of file diff --git a/analytical_engine/java/grape-demo/src/main/java/com/alibaba/graphscope/example/circle/parallel/formal/store/disk/DiskComputeStepImpl.java b/analytical_engine/java/grape-demo/src/main/java/com/alibaba/graphscope/example/circle/parallel/formal/store/disk/DiskComputeStepImpl.java new file mode 100644 index 000000000000..87cffaf01f64 --- /dev/null +++ b/analytical_engine/java/grape-demo/src/main/java/com/alibaba/graphscope/example/circle/parallel/formal/store/disk/DiskComputeStepImpl.java @@ -0,0 +1,159 @@ +package com.alibaba.graphscope.example.circle.parallel.formal.store.disk; + +import com.alibaba.graphscope.example.circle.parallel.formal.CircleAppParallelContext; +import com.alibaba.graphscope.example.circle.parallel.formal.store.ComputeStep; +import com.alibaba.graphscope.ds.Vertex; +import com.alibaba.graphscope.fragment.IFragment; +import com.alibaba.graphscope.parallel.ParallelMessageManager; +import com.carrotsearch.hppc.LongArrayList; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.util.List; +import java.util.Set; +import java.util.stream.Collectors; + +/** + * 环形磁盘模式计算 + * + * @author liumin + * @date 2024-08-11 + */ +public class DiskComputeStepImpl extends ComputeStep { + private static final Logger logger = LoggerFactory.getLogger(DiskComputeStepImpl.class); + /** + * disk mode 参数管理 + */ + private final DiskStoreContext diskStoreContext; + /** + * app context + */ + private final CircleAppParallelContext ctx; + /** + * 批次处理类 + */ + private final IBatchProcess process; + + public DiskComputeStepImpl(CircleAppParallelContext ctx) { + super(ctx); + this.ctx = ctx; + this.diskStoreContext = ctx.diskStoreContext; + this.process = new FileBatchProcess(); + } + + /** + * 初始化处理 + * + * @param frag 子图 + * @param messageManager 消息管理器 + */ + @Override + public void initial(IFragment frag, ParallelMessageManager messageManager) { + // 清理残留的磁盘文件 + diskStoreContext.clearDiskPathStorages(); + diskStoreContext.clearDiskMessageStorages(); + // 首次发送消息后,将未 flush 的 message 统一 flush + diskStoreContext.messageStorages.forEach(FileObjectStorage::dump); + } + + /** + * 迭代计算 + * + * @param frag 子图 + * @param messageManager 消息管理器 + */ + @Override + public void inc(IFragment frag, ParallelMessageManager messageManager) { + // 1.接收消息并发写入 threadLocal MessageStore + receiveMessage(frag, messageManager); + + // 2.从 MessageStore 按 batch 读取数据写入 PathStore + for (int batchId = 0; batchId < diskStoreContext.fragBatchNum; ++batchId) { + // 上一个超步发送过来的 msg 全部写入 messageStore 后,按 batchId 读取 messageStore 文件内容,合并消息 + // 更新完点属性,将该批次的数据增量 append 写入 pathStore 文件 + MessageStorage messageStorage = diskStoreContext.getMessageStorage(batchId); + messageStorage.loadInBatchAndUpdateStorage(ctx, process, frag, batchId); + } + + // 3.清除当前super step的消息 + diskStoreContext.clearMessageStorages(); + diskStoreContext.clearMemoryPathStorages(); + + if (ctx.superStep < ctx.maxIteration) { + // 4.从 PathStore 按 batch 读取数据发送消息 + for (int batchId = 0; batchId < diskStoreContext.fragBatchNum; ++batchId) { + // 分 batch 读取 pathStore 文件,发送消息 + // 我们将在[0, batchNum - 1] 的super step中,分批次发送消息 + // frag 内部 inner vertex 之间发送的消息会被保存到 MessageStorage + // frag 之间 outer vertex 之间发送的消息通过网络发送出去,会暂存到 C++ 的memory + PathStorage pathStorage = diskStoreContext.getPathStorages(batchId); + // load vertex attr from pathStore file and send message + pathStorage.loadObjectsAndSendMessage(ctx, this, frag, process, messageManager); + } + } + // 5. 清理当前super step的磁盘点属性文件 + diskStoreContext.clearDiskPathStorages(); + } + + /** + * 发送消息后处理步骤 + * 本机发送给 frag 内部的 inner vertex,更新到内存 + * + *

磁盘模式下,消息可以切分成小批次处理。 + * 发送消息时,小批次的消息先暂存到内存,待达到阈值时,统一 flush 到 disk

+ * 注意: 该方法会在多线程环境下被调用 + * + * @param frag 子图 + * @param nbrV 邻居节点 + * @param msg 消息 + */ + @Override + public void sendToLocalFrag(IFragment frag, Vertex nbrV, List msg) { + int toLid = nbrV.getValue().intValue(); + int batchId = diskStoreContext.getBatchIdFromVertexId(toLid); + MessageStorage messageStorage = diskStoreContext.getMessageStorage(batchId); + // 将消息暂存到内存,后续统一 flush 到 disk + msg.replaceAll(LongArrayList::clone); + messageStorage.putToStorage(toLid, msg); + } + + /** + * 接收消息后处理步骤 + * 接收消息和发送消息的区别在于:接收的消息来源于网络传输,我们无法提前判断接收哪个点的消息以及一次接收多少消息 + * 所以,为了将消息切分成小批次,需要一边接收消息,一边判断消息数量是否到达阈值,达到阈值后需要flush到disk,最后清理内存 + * 注意:上述整个流程需要在多线程环境调用。要保证:1)更新内存数据结构和清理内存数据结构互相不影响 2)避免串行更新,有较好的性能。因此每个线程单独维护一个 messageStore,互不影响 {{{@link LocalThreadMessageStorage}}} + * + * @param frag 子图 + * @param lid 内部自增id + * @param oid 原始id + * @param msgs 消息列表 + */ + @Override + public void processUponReceiveMsg(IFragment frag, int lid, long oid, List msgs) { + int batchId = diskStoreContext.getBatchIdFromVertexId(lid); + LocalThreadMessageStorage messageStorage = diskStoreContext.getLocalMessageStorage(batchId); + // local frag lid + messageStorage.batchDumpMessages(lid, msgs); + + if (showLog(frag)) { + messageStorage.getMessageStorage().load(); + logger.info("vprog end.superStep is {},batchId is {},fid is {},lid is {},oid is {},msg is {},messageStore is {}", ctx.superStep, batchId, frag.fid(), lid, oid, msgs, diskStoreContext.threadLocalMessageStorageFactory.getAllValue().stream().map(i -> i.getMessageStorage().getBatchMessages().get(lid)).collect(Collectors.toList())); + } + } + + /** + * 获取当前点的属性信息,即保留的路径列表 + * 磁盘模式下,superStep=0时,数据保留在内存 + * + * @param lid 内部自增id + * @param batchId frag 内部 batchId + * @return 点的属性信息 + */ + @Override + public Set getVertexPath(int lid, Integer batchId) { + if (batchId == null) { + batchId = diskStoreContext.getBatchIdFromVertexId(lid); + } + return ctx.superStep == 0 ? ctx.currentPaths.get(lid) : diskStoreContext.getPathStorages(batchId).getPath(lid); + } +} diff --git a/analytical_engine/java/grape-demo/src/main/java/com/alibaba/graphscope/example/circle/parallel/formal/store/disk/DiskStoreContext.java b/analytical_engine/java/grape-demo/src/main/java/com/alibaba/graphscope/example/circle/parallel/formal/store/disk/DiskStoreContext.java new file mode 100644 index 000000000000..215c91ca018b --- /dev/null +++ b/analytical_engine/java/grape-demo/src/main/java/com/alibaba/graphscope/example/circle/parallel/formal/store/disk/DiskStoreContext.java @@ -0,0 +1,160 @@ +package com.alibaba.graphscope.example.circle.parallel.formal.store.disk; + +import com.alibaba.fastjson.JSONObject; +import com.alibaba.graphscope.fragment.IFragment; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.stream.Collectors; + +/** + * 磁盘模式 context + * + * @author liumin + * @date 2024-08-12 + */ +public class DiskStoreContext { + private static final Logger logger = LoggerFactory.getLogger(DiskStoreContext.class); + /** + * 批次读写文件消息阈值 + * 仅 Disk 模式下 + */ + public final int msgThreshold; + /** + * 单个 frag 执行的batch数量 + */ + public int fragBatchNum; + private final List pathStorages; + public List messageStorages; + /** + * frag 内部节点数 + */ + private final int ivnum; + private final List fragBatchRange; + public LocalThreadMessageStorageFactory threadLocalMessageStorageFactory; + + /** + * @param jsonObject input param from app + */ + public DiskStoreContext(IFragment frag, JSONObject jsonObject) { + ivnum = (int) frag.getInnerVerticesNum(); + // frag batch + if (!jsonObject.containsKey("fragBatchNum")) { + fragBatchNum = 1; + } else { + fragBatchNum = jsonObject.getInteger("fragBatchNum"); + } + + if (fragBatchNum > ivnum) { + fragBatchNum = ivnum; + } + // 单次处理消息的阈值 + if (!jsonObject.containsKey("msgThreshold")) { + msgThreshold = 200000; + } else { + msgThreshold = jsonObject.getInteger("msgThreshold"); + } + + // 初始化 pathStorage 和 messageStorage + pathStorages = new ArrayList<>(); + messageStorages = new ArrayList<>(); + Map> threadLocalMessageStoreMap = new HashMap<>(); + + this.fragBatchRange = getFragBatchRange(); + + for (int i = 0; i < fragBatchRange.size(); i++) { + long[] startAndEnd = fragBatchRange.get(i); + long beingVertex = startAndEnd[0]; + long endVertex = startAndEnd[1]; + pathStorages.add(new PathStorage(msgThreshold, getPath("vertex_atr", frag.fid(), beingVertex, endVertex), + i, beingVertex, endVertex)); + MessageStorage messageStorage = new MessageStorage(msgThreshold, getPath("received_path", frag.fid(), beingVertex, endVertex), beingVertex, endVertex); + messageStorages.add(messageStorage); + threadLocalMessageStoreMap.put(i, ThreadLocal.withInitial(() -> new LocalThreadMessageStorage(messageStorage))); + } + + this.threadLocalMessageStorageFactory = new LocalThreadMessageStorageFactory(threadLocalMessageStoreMap); + logger.info("disk context init end.msgThreshold is {},fragBatchRange is {}", msgThreshold, fragBatchRange.stream().map(startAndEnd -> startAndEnd[0] + "_" + startAndEnd[1]).collect(Collectors.toList())); + } + + /** + * {@code beingVertex inclusive, + * endVertex exclusive} + * + * @return + */ + private List getFragBatchRange() { + int verticesNumPerBatch = (int) (ivnum / fragBatchNum); + + List rangeList = new ArrayList<>(fragBatchNum); + for (int i = 0; i < fragBatchNum; ++i) { + long beingVertex = (long) i * verticesNumPerBatch; + long endVertex = Math.min((i + 1) * verticesNumPerBatch, ivnum); + + if (i == fragBatchNum - 1) { + endVertex = ivnum; + } + rangeList.add(new long[]{beingVertex, endVertex}); + } + + return rangeList; + } + + private String getPath(String prefix, int fid, long beginVertex, long endVertex) { + return prefix + "_" + fid + "_" + beginVertex + "_" + endVertex; + } + + public MessageStorage getMessageStorage(int batchId) { + return messageStorages.get(batchId); + } + + public LocalThreadMessageStorage getLocalMessageStorage(int batchId) { + return threadLocalMessageStorageFactory.get(batchId); + } + + public PathStorage getPathStorages(int batchId) { + return pathStorages.get(batchId); + } + + public int getBatchIdFromVertexId(long vertexId) { + for (int i = 0; i < fragBatchRange.size(); i++) { + long[] longs = fragBatchRange.get(i); + long beginVertex = longs[0]; + long endVertex = longs[1]; + if (vertexId >= beginVertex && vertexId < endVertex) { + return i; + } + } + logger.error("getBatchIdFromVertexId error.vertexId is {}", vertexId); + throw new RuntimeException("getBatchIdFromVertexId error."); + } + + public void clearMemoryPathStorages() { + for (PathStorage ps : pathStorages) { + ps.clearInMemory(); + } + } + + public void clearMessageStorages() { + for (MessageStorage ms : messageStorages) { + ms.clearInMemory(); + ms.clearInDisk(); + } + } + + public void clearDiskPathStorages() { + for (PathStorage ps : pathStorages) { + ps.clearInDisk(); + } + } + + public void clearDiskMessageStorages() { + for (MessageStorage ms : messageStorages) { + ms.clearInDisk(); + } + } +} diff --git a/analytical_engine/java/grape-demo/src/main/java/com/alibaba/graphscope/example/circle/parallel/formal/store/disk/FileBatchProcess.java b/analytical_engine/java/grape-demo/src/main/java/com/alibaba/graphscope/example/circle/parallel/formal/store/disk/FileBatchProcess.java new file mode 100644 index 000000000000..1e99e6f6c592 --- /dev/null +++ b/analytical_engine/java/grape-demo/src/main/java/com/alibaba/graphscope/example/circle/parallel/formal/store/disk/FileBatchProcess.java @@ -0,0 +1,98 @@ +package com.alibaba.graphscope.example.circle.parallel.formal.store.disk; + +import com.alibaba.graphscope.example.circle.parallel.formal.CircleAppParallelContext; +import com.alibaba.graphscope.example.circle.parallel.formal.CircleUtil; +import com.alibaba.graphscope.example.circle.parallel.formal.store.ComputeStep; +import com.alibaba.graphscope.ds.Vertex; +import com.alibaba.graphscope.fragment.IFragment; +import com.alibaba.graphscope.parallel.ParallelMessageManager; +import com.alibaba.graphscope.utils.FFITypeFactoryhelper; +import com.carrotsearch.hppc.LongArrayList; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.util.Map; +import java.util.Set; + +/** + * 磁盘模式批次处理实现类 + * 批次读写文件,处理数据 + * + * @author liumin + * @date 2024-08-14 + */ +public class FileBatchProcess implements IBatchProcess { + private static final Logger logger = LoggerFactory.getLogger(FileBatchProcess.class); + + /** + * 分批次读取 Message Store 更新 Path Storage 中的数据 + * + * @param ctx app context + * @param graph 子图 + * @param messageStorage 消息管理器 + * @param pathStorage 待更新的pathStorage + */ + @Override + public void batchUpdatePathStorage(CircleAppParallelContext ctx, IFragment graph, MessageStorage messageStorage, PathStorage pathStorage) { + if (messageStorage.getBatchMessages().isEmpty()) { + return; + } + long start = System.currentTimeMillis(); + + // 将消息集合中发送给同一个点的 消息 聚合 + Vertex vertex = FFITypeFactoryhelper.newVertexLong(); + for (Map.Entry> integerListEntry : messageStorage.getBatchMessages().entrySet()) { + Set receivedMessages = integerListEntry.getValue(); + int toLid = integerListEntry.getKey(); + if (receivedMessages.isEmpty()) { + continue; + } + + vertex.setValue((long) toLid); + long oid = graph.getId(vertex); + for (LongArrayList path : receivedMessages) { + path.add(oid); + if (CircleUtil.isCircle(path) && path.size() > ctx.minIteration) { + ctx.addPathToCircleResult(toLid, path); + continue; + } + + pathStorage.putToStorage(toLid, path); + } + } + + logger.info("Inner batchUpdatePathStorage end.superStep is {},pathStoreFile is {},msg size is {},time is {}s", ctx.superStep, pathStorage.getPath(), pathStorage.getFragmentPaths().values().stream().mapToInt(Set::size).sum(), (System.currentTimeMillis() - start) / 1000); + + if (ctx.superStep < ctx.maxIteration) { + if (!pathStorage.getFragmentPaths().isEmpty()) { + pathStorage.dump(); + } + } else { + pathStorage.clearInMemory(); + } + messageStorage.clearInMemory(); + } + + /** + * 分批次发送消息 + * + * @param ctx app context + * @param computeStep 计算步骤 + * @param graph 子图 + * @param ps 从 pathStore 中读取数据 + * @param messageManager 消息管理器 + */ + @Override + public void sendMsg(CircleAppParallelContext ctx, ComputeStep computeStep, IFragment graph, PathStorage ps, ParallelMessageManager messageManager) { + long start = System.currentTimeMillis(); + computeStep.sendMessageThroughOE(graph, messageManager, ps.getBeginVertex(), ps.getEndVertex(), ps.getBatchId()); + + // dump local nbr received msg to messageStorages + ctx.diskStoreContext.messageStorages.forEach(FileObjectStorage::dump); + logger.info("Inner batch sendMsg from file end.superStep is {},pathStoreFile is {},msg size is {},time is {}s", ctx.superStep, ps.getPath(), ps.getFragmentPaths().values().stream().mapToInt(Set::size).sum(), (System.currentTimeMillis() - start) / 1000); + + // clear memory after all messages are sent + ps.clearInMemory(); + } + +} diff --git a/analytical_engine/java/grape-demo/src/main/java/com/alibaba/graphscope/example/circle/parallel/formal/store/disk/FileObjectStorage.java b/analytical_engine/java/grape-demo/src/main/java/com/alibaba/graphscope/example/circle/parallel/formal/store/disk/FileObjectStorage.java new file mode 100644 index 000000000000..ff2b8361e99b --- /dev/null +++ b/analytical_engine/java/grape-demo/src/main/java/com/alibaba/graphscope/example/circle/parallel/formal/store/disk/FileObjectStorage.java @@ -0,0 +1,162 @@ +package com.alibaba.graphscope.example.circle.parallel.formal.store.disk; + +import com.alibaba.graphscope.example.circle.parallel.formal.PathSerAndDeser; +import com.carrotsearch.hppc.LongArrayList; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.BufferedInputStream; +import java.io.BufferedOutputStream; +import java.io.EOFException; +import java.io.File; +import java.io.FileInputStream; +import java.io.FileOutputStream; +import java.io.IOException; +import java.io.ObjectInputStream; +import java.io.ObjectOutputStream; +import java.nio.file.Files; +import java.nio.file.Path; +import java.nio.file.Paths; +import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.stream.Collectors; +import java.util.stream.Stream; + +/** + * 磁盘模式读写文件抽象类 + * + * @author liumin + * @date 2024/8/19 + */ +public abstract class FileObjectStorage implements IObjectStorage { + private static final Logger logger = LoggerFactory.getLogger(FileObjectStorage.class); + /** + * 文件路径 + */ + private String path; + /** + * 文件是否追加写入 + */ + private boolean append; + private File file; + /** + * 批量读写文件消息阈值 + */ + private final int msgThreshold; + + + public boolean isAppend() { + return append; + } + + public String getPath() { + return path; + } + + public int getMsgThreshold() { + return msgThreshold; + } + + public File getFile() { + return file; + } + + public FileObjectStorage(String path, boolean append, int msgThreshold) { + this.path = path; + this.append = append; + this.file = new File(path); + this.msgThreshold = msgThreshold; + } + + /** + * 加载文件 + * + * @param in 输入流 + * @throws IOException 抛出异常类型 + */ + public abstract void loadObjects(ObjectInputStream in) throws IOException; + + /** + * 写入文件 + * + * @param out 输出流 + */ + public abstract void dumpObjects(ObjectOutputStream out); + + @Override + public void load() { + long start = System.currentTimeMillis(); + File f = new File(path); + if (!f.exists()) { + return; + } + + try (FileInputStream fileIn = new FileInputStream(f); + BufferedInputStream bufferedInputStream = new BufferedInputStream(fileIn, 1024); + ObjectInputStream in = new ObjectInputStream(bufferedInputStream);) { + + clearInMemory(); + while (true) { + try { + loadObjects(in); + } catch (EOFException e) { + break; + } + } + } catch (Exception e) { + logger.error("load error", e); + } + } + + @Override + public void dump() { + try (FileOutputStream fileOutputStream = new FileOutputStream + (path, append); + BufferedOutputStream bufferedOutputStream = new BufferedOutputStream(fileOutputStream, 1024); + ObjectOutputStream out = file.length() == 0 ? new ObjectOutputStream(bufferedOutputStream) : new AppendObjectOutputStream(bufferedOutputStream)) { + dumpObjects(out); + bufferedOutputStream.flush(); + } catch (Exception e) { + logger.error("dump error", e); + } + } + + protected void clearInDisk(String filePrefix) { + try (Stream listedFiles = Files.list(Paths.get("."))) { + // 列出目录中的所有文件 + List files = listedFiles.filter(p -> p.getFileName().toString().startsWith(filePrefix)).collect(Collectors.toList()); + + // 删除具有指定前缀的文件 + for (Path file : files) { + Files.deleteIfExists(file); + logger.info("Deleted: " + file); + } + } catch (IOException e) { + logger.error("clearInDisk error.", e); + } + } + + public static void dumpVertexObjects(ObjectOutputStream out, Map> dumpData) { + try { + if (dumpData.isEmpty()) { + return; + } + + out.writeInt(dumpData.size()); + // from begin to end vertex + for (Map.Entry> integerListEntry : dumpData.entrySet()) { + out.writeInt(integerListEntry.getKey()); + Set paths = integerListEntry.getValue(); + out.writeInt(paths.size()); + for (LongArrayList path : paths) { + PathSerAndDeser.serialize(out, path); + } + } + + out.flush(); + } catch (IOException e) { + logger.error("dumpObjects error", e); + } + } +} diff --git a/analytical_engine/java/grape-demo/src/main/java/com/alibaba/graphscope/example/circle/parallel/formal/store/disk/IBatchProcess.java b/analytical_engine/java/grape-demo/src/main/java/com/alibaba/graphscope/example/circle/parallel/formal/store/disk/IBatchProcess.java new file mode 100644 index 000000000000..7cd66ecb77cd --- /dev/null +++ b/analytical_engine/java/grape-demo/src/main/java/com/alibaba/graphscope/example/circle/parallel/formal/store/disk/IBatchProcess.java @@ -0,0 +1,36 @@ +package com.alibaba.graphscope.example.circle.parallel.formal.store.disk; + +import com.alibaba.graphscope.example.circle.parallel.formal.CircleAppParallelContext; +import com.alibaba.graphscope.example.circle.parallel.formal.store.ComputeStep; +import com.alibaba.graphscope.fragment.IFragment; +import com.alibaba.graphscope.parallel.ParallelMessageManager; + +/** + * 磁盘模式批次处理接口 + * + * @author liumin + * @date 2024-08-14 + */ +public interface IBatchProcess { + /** + * 分批次读取 Message Store 更新 Path Storage 中的数据 + * + * @param ctx app context + * @param graph 子图 + * @param messageStorage 消息管理器 + * @param pathStorage 待更新的pathStorage + */ + void batchUpdatePathStorage(CircleAppParallelContext ctx, IFragment graph, MessageStorage messageStorage, PathStorage pathStorage); + + /** + * 分批次发送消息 + * + * @param ctx app context + * @param computeStep 计算步骤 + * @param graph 子图 + * @param ps 从 pathStore 中读取数据 + * @param messageManager 消息管理器 + */ + void sendMsg(CircleAppParallelContext ctx, ComputeStep computeStep, IFragment graph, PathStorage ps, ParallelMessageManager + messageManager); +} diff --git a/analytical_engine/java/grape-demo/src/main/java/com/alibaba/graphscope/example/circle/parallel/formal/store/disk/IObjectStorage.java b/analytical_engine/java/grape-demo/src/main/java/com/alibaba/graphscope/example/circle/parallel/formal/store/disk/IObjectStorage.java new file mode 100644 index 000000000000..7aaaf43f227a --- /dev/null +++ b/analytical_engine/java/grape-demo/src/main/java/com/alibaba/graphscope/example/circle/parallel/formal/store/disk/IObjectStorage.java @@ -0,0 +1,25 @@ +package com.alibaba.graphscope.example.circle.parallel.formal.store.disk; + +/** + * 文件读写接口 + * + * @author liumin + * @date 2024/8/21 + */ +public interface IObjectStorage { + /** + * 加载文件 + */ + void load(); + + /** + * 文件写入 + */ + void dump(); + + /** + * 清理内存数据 + */ + void clearInMemory(); +} + diff --git a/analytical_engine/java/grape-demo/src/main/java/com/alibaba/graphscope/example/circle/parallel/formal/store/disk/LocalThreadMessageStorage.java b/analytical_engine/java/grape-demo/src/main/java/com/alibaba/graphscope/example/circle/parallel/formal/store/disk/LocalThreadMessageStorage.java new file mode 100644 index 000000000000..be3827a42a5e --- /dev/null +++ b/analytical_engine/java/grape-demo/src/main/java/com/alibaba/graphscope/example/circle/parallel/formal/store/disk/LocalThreadMessageStorage.java @@ -0,0 +1,119 @@ +package com.alibaba.graphscope.example.circle.parallel.formal.store.disk; + +import com.carrotsearch.hppc.LongArrayList; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.IOException; +import java.io.ObjectInputStream; +import java.io.ObjectOutputStream; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.concurrent.ConcurrentHashMap; +import java.util.concurrent.locks.ReadWriteLock; + +/** + * 线程安全的LocalThreadMessageStorage,通过ThreadLocal 存储 MessageStorage + *

使用 {{{@link LocalThreadMessageStorageFactory}}} 初始化 + *

使用场景:多线程更新及清空 batchMessages + *

不同 LocalThreadMessageStorage 写入同一个 MessageStorage对应的磁盘文件 + * + * @author liumin + * @date 2024-08-07 + */ +public class LocalThreadMessageStorage extends FileObjectStorage { + private static final Logger logger = LoggerFactory.getLogger(LocalThreadMessageStorage.class); + private final Map> batchMessages; + private final MessageStorage messageStorage; + + public LocalThreadMessageStorage(MessageStorage messageStorage) { + super(messageStorage.getPath(), messageStorage.isAppend(), messageStorage.getMsgThreshold()); + this.messageStorage = messageStorage; + this.batchMessages = new ConcurrentHashMap<>(); + } + + public MessageStorage getMessageStorage() { + return messageStorage; + } + + public void putToLocalStorage(int lid, List msgs) { + // msgs 是重复使用的集合,不可直接 put 到 batchMessages。为优化对象创建,避免在此频繁 new Arraylist + this.batchMessages.compute(lid, (k, storedMsgs) -> { + if (storedMsgs == null) { + return new HashSet<>(msgs); + } + storedMsgs.addAll(msgs); + return storedMsgs; + }); + } + + /** + * 按批次并发处理消息并写入对应 messageStore 文件 + * @param toLid + * @param msg + */ + public void batchDumpMessages(int toLid, List msg) { + putToLocalStorage(toLid, msg); + if (loadOrFlush()) { + dump(); + } + } + + /** + * 根据收到的消息数量判断是否flush + * + * @return + */ + private boolean loadOrFlush() { + return this.batchMessages.size() >= messageStorage.getMsgThreshold(); + } + + /** + * 顺序写同一个 messageStore 文件 + * 一个 messageStore 对应一个文件 + */ + @Override + public void dump() { + ReadWriteLock lock = messageStorage.getLock(); + try { + lock.writeLock().lock(); + super.dump(); + } finally { + lock.writeLock().unlock(); + } + } + + /** + * 清理 local thread 的 batchMessages + */ + @Override + public void clearInMemory() { + for (Set msg : this.batchMessages.values()) { + msg.clear(); + } + + this.batchMessages.clear(); + } + + @Override + public String toString() { + return "MessageStorage{" + "batchMessages=" + batchMessages + '}'; + } + + @Override + public void loadObjects(ObjectInputStream in) throws IOException { + // 加载 messageStore 磁盘文件 + messageStorage.loadObjects(in); + } + + @Override + public void dumpObjects(ObjectOutputStream out) { + try { + FileObjectStorage.dumpVertexObjects(out, this.batchMessages); + } finally { + clearInMemory(); + } + } +} diff --git a/analytical_engine/java/grape-demo/src/main/java/com/alibaba/graphscope/example/circle/parallel/formal/store/disk/LocalThreadMessageStorageFactory.java b/analytical_engine/java/grape-demo/src/main/java/com/alibaba/graphscope/example/circle/parallel/formal/store/disk/LocalThreadMessageStorageFactory.java new file mode 100644 index 000000000000..c6970040ccd2 --- /dev/null +++ b/analytical_engine/java/grape-demo/src/main/java/com/alibaba/graphscope/example/circle/parallel/formal/store/disk/LocalThreadMessageStorageFactory.java @@ -0,0 +1,59 @@ +package com.alibaba.graphscope.example.circle.parallel.formal.store.disk; + +import java.util.List; +import java.util.Map; +import java.util.stream.Collectors; + +/** + * 线程内的 MessageStore 工厂 + * 每个batch 对应一个线程内的 MessageStore + * + * @author liumin + * @date 2024-08-07 + */ +public class LocalThreadMessageStorageFactory { + + /** + * MessageStore 按 batch 区分 + * key 为 batchId,value 为线程内存储的 MessageStore + */ + private final Map> threadLocalMessageStoreMap; + + public LocalThreadMessageStorageFactory(Map> threadLocalMessageStoreMap) { + this.threadLocalMessageStoreMap = threadLocalMessageStoreMap; + } + + /** + * 根据 batchId 获取线程内的某个 MessageStore + * + * @param batchId + * @return + */ + public LocalThreadMessageStorage get(int batchId) { + return threadLocalMessageStoreMap.get(batchId) + .get(); + } + + /** + * 获取所有 batch 的线程内的 MessageStore + * + * @return + */ + public List getAllValue() { + return threadLocalMessageStoreMap.values().stream().map(ThreadLocal::get).collect(Collectors.toList()); + } + + /** + * 移除所有 batch 对应的 thread local 变量 + */ + public void remove() { + threadLocalMessageStoreMap.values().forEach(ThreadLocal::remove); + } + + /** + * 将线程内所有 batch 对应的 messageStore 持久化 + */ + public void dumpAll() { + threadLocalMessageStoreMap.values().forEach(batchMessageStore -> batchMessageStore.get().dump()); + } +} diff --git a/analytical_engine/java/grape-demo/src/main/java/com/alibaba/graphscope/example/circle/parallel/formal/store/disk/MessageStorage.java b/analytical_engine/java/grape-demo/src/main/java/com/alibaba/graphscope/example/circle/parallel/formal/store/disk/MessageStorage.java new file mode 100644 index 000000000000..78a164b6826e --- /dev/null +++ b/analytical_engine/java/grape-demo/src/main/java/com/alibaba/graphscope/example/circle/parallel/formal/store/disk/MessageStorage.java @@ -0,0 +1,184 @@ +package com.alibaba.graphscope.example.circle.parallel.formal.store.disk; + +import com.alibaba.graphscope.example.circle.parallel.formal.CircleAppParallelContext; +import com.alibaba.graphscope.example.circle.parallel.formal.PathSerAndDeser; +import com.alibaba.graphscope.fragment.IFragment; +import com.carrotsearch.hppc.LongArrayList; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.BufferedInputStream; +import java.io.EOFException; +import java.io.FileInputStream; +import java.io.IOException; +import java.io.ObjectInputStream; +import java.io.ObjectOutputStream; +import java.util.ArrayList; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.concurrent.ConcurrentHashMap; +import java.util.concurrent.locks.ReadWriteLock; +import java.util.concurrent.locks.ReentrantReadWriteLock; + +/** + * 消息文件存储 + * + * @author liumin + * @date 2024-08-07 + */ +public class MessageStorage extends FileObjectStorage { + private static final Logger logger = LoggerFactory.getLogger(MessageStorage.class); + private final Map> batchMessages; + private final long beginVertex; + private final long endVertex; + private final List loadContainer; + private final ReadWriteLock lock; + + public MessageStorage(int msgThreshold, String path, long beginVertex, long endVertex) { + super(path, true, msgThreshold); + this.batchMessages = new ConcurrentHashMap<>(); + this.beginVertex = beginVertex; + this.endVertex = endVertex; + this.loadContainer = new ArrayList<>(); + this.lock = new ReentrantReadWriteLock(); + } + + public ReadWriteLock getLock() { + return lock; + } + + public Map> getBatchMessages() { + return batchMessages; + } + + + /** + * 将消息更新到 MessageStore,相同 Key 的消息合并成一条 + * + * @param lid 内部点自增id + * @param msgs 消息集合 + */ + public void putToStorage(int lid, List msgs) { + // msgs 是重复使用的集合,不可直接 put 到 batchMessages。为优化对象创建,避免在此频繁 new Arraylist + this.batchMessages.compute(lid, (k, storedMsgs) -> { + if (storedMsgs == null) { + return new HashSet<>(msgs); + } + storedMsgs.addAll(msgs); + return storedMsgs; + }); + } + + private boolean loadOrFlush() { + return this.batchMessages.size() >= getMsgThreshold(); + } + + /** + * 读取 batch 的所有消息 + * + * @param in + */ + @Override + public void loadObjects(ObjectInputStream in) throws IOException { + + int vertexSize = in.readInt(); + // read from begin to end vertex + for (int i = 0; i < vertexSize; ++i) { + loadContainer.clear(); + + int index = in.readInt(); + int pathNum = in.readInt(); + //List list = new ArrayList<>(pathNum); + for (int j = 0; j < pathNum; ++j) { + LongArrayList l = PathSerAndDeser.deserialize(in); + if (l.isEmpty()) { + logger.info("MessS loadObjects.path size is 0"); + } + loadContainer.add(l); + } + + putToStorage(index, loadContainer); + } + } + + /** + * 写入一条消息 + * + * @param out 输出字节流 + */ + @Override + public void dumpObjects(ObjectOutputStream out) { + try { + FileObjectStorage.dumpVertexObjects(out, this.batchMessages); + } finally { + clearInMemory(); + } + } + + public boolean isEmpty() { + return this.batchMessages.isEmpty(); + } + + public int size() { + return this.batchMessages.size(); + } + + @Override + public void clearInMemory() { + for (Set msg : this.batchMessages.values()) { + msg.clear(); + } + + this.batchMessages.clear(); + } + + public void clearInDisk() { + clearInDisk("received_path_"); + } + + @Override + public String toString() { + return "MessageStorage{" + "batchMessages=" + batchMessages + '}'; + } + + /** + * 从 Message Store 加载消息更新到 Path Store + * 串行执行 + * TODO:若消息数量太多,可优化成并行 + * + * @param ctx + * @param process + * @param graph + * @param batchId + */ + public void loadInBatchAndUpdateStorage(CircleAppParallelContext ctx, IBatchProcess process, IFragment graph, int batchId) { + long start = System.currentTimeMillis(); + if (!getFile().exists()) { + return; + } + // 获取 点属性 storage + PathStorage pathStorage = ctx.diskStoreContext.getPathStorages(batchId); + try (FileInputStream fileIn = new FileInputStream(getFile()); + BufferedInputStream bufferedInputStream = new BufferedInputStream(fileIn, 1024); + ObjectInputStream in = new ObjectInputStream(bufferedInputStream);) { + + clearInMemory(); + while (true) { + if (loadOrFlush()) { + process.batchUpdatePathStorage(ctx, graph, this, pathStorage); + } + try { + loadObjects(in); + } catch (EOFException e) { + process.batchUpdatePathStorage(ctx, graph, this, pathStorage); + break; + } + } + } catch (Exception e) { + logger.error("load error", e); + } + logger.info("batchUpdatePathStorage end.superStep is {},batch is {},begin -> end is {},file is {},time is {}s", ctx.superStep, batchId, beginVertex + "->" + endVertex, getPath(), (System.currentTimeMillis() - start) / 1000); + } +} diff --git a/analytical_engine/java/grape-demo/src/main/java/com/alibaba/graphscope/example/circle/parallel/formal/store/disk/PathStorage.java b/analytical_engine/java/grape-demo/src/main/java/com/alibaba/graphscope/example/circle/parallel/formal/store/disk/PathStorage.java new file mode 100644 index 000000000000..ae52bd29ba6d --- /dev/null +++ b/analytical_engine/java/grape-demo/src/main/java/com/alibaba/graphscope/example/circle/parallel/formal/store/disk/PathStorage.java @@ -0,0 +1,184 @@ +package com.alibaba.graphscope.example.circle.parallel.formal.store.disk; + +import com.alibaba.graphscope.example.circle.parallel.formal.CircleAppParallelContext; +import com.alibaba.graphscope.example.circle.parallel.formal.PathSerAndDeser; +import com.alibaba.graphscope.example.circle.parallel.formal.store.ComputeStep; +import com.alibaba.graphscope.fragment.IFragment; +import com.alibaba.graphscope.parallel.ParallelMessageManager; +import com.carrotsearch.hppc.LongArrayList; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.BufferedInputStream; +import java.io.EOFException; +import java.io.FileInputStream; +import java.io.IOException; +import java.io.ObjectInputStream; +import java.io.ObjectOutputStream; +import java.util.ArrayList; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.concurrent.ConcurrentHashMap; + +/** + * 点属性文件存储 + * + * @author liumin + * @date 2024-08-07 + */ +public class PathStorage extends FileObjectStorage { + private static final Logger logger = LoggerFactory.getLogger(PathStorage.class); + /** + * 这里我们使用一个 Map 来存储一个 batch 内点的所有path + */ + private Map> fragmentPaths; + private final int batchId; + private final long beginVertex; + private final long endVertex; + private final List loadContainer; + + public Map> getFragmentPaths() { + return fragmentPaths; + } + + public PathStorage(int msgThreshold, String path, int batchId, long beginVertex, long endVertex) { + super(path, true, msgThreshold); + this.fragmentPaths = new ConcurrentHashMap<>(); + this.batchId = batchId; + this.beginVertex = beginVertex; + this.endVertex = endVertex; + this.loadContainer = new ArrayList<>(); + } + + public long getBeginVertex() { + return beginVertex; + } + + public long getEndVertex() { + return endVertex; + } + + public int getBatchId() { + return batchId; + } + + public Set getPath(long vid) { + return fragmentPaths.computeIfAbsent((int) vid, k -> new HashSet<>()); + } + + public void putToStorage(int vid, List paths) { + // paths 是重复使用的集合,不可直接 put 到 batchMessages。为优化对象创建,避免在此频繁 new Arraylist + this.fragmentPaths.compute(vid, (k, v) -> { + if (v == null) { + return new HashSet<>(paths); + } + + v.addAll(paths); + return v; + }); + } + + public void putToStorage(int vid, LongArrayList msg) { + // paths 是重复使用的集合,不可直接 put 到 batchMessages。为优化对象创建,避免在此频繁 new Arraylist + this.fragmentPaths.compute(vid, (k, paths) -> { + if (paths == null) { + Set result = new HashSet<>(); + result.add(msg); + return result; + } + paths.add(msg); + return paths; + }); + } + + @Override + public synchronized void clearInMemory() { + for (Set l : fragmentPaths.values()) { + for (LongArrayList path : l) { + if (path != null) { + path.clear(); + } + } + l.clear(); + } + fragmentPaths.clear(); + } + + public void clearInDisk() { + clearInDisk("vertex_atr_"); + } + + @Override + public void loadObjects(ObjectInputStream in) throws IOException { + + int vertexSize = in.readInt(); + // read from begin to end vertex + for (int i = 0; i < vertexSize; ++i) { + loadContainer.clear(); + + int index = in.readInt(); + int pathNum = in.readInt(); + + for (int j = 0; j < pathNum; ++j) { + LongArrayList l = PathSerAndDeser.deserialize(in); + if (l.isEmpty()) { + logger.info("PathS loadObjects.path size is 0"); + } + loadContainer.add(l); + } + + putToStorage(index, loadContainer); + } + } + + @Override + public void dumpObjects(ObjectOutputStream out) { + try { + FileObjectStorage.dumpVertexObjects(out, this.fragmentPaths); + } finally { + clearInMemory(); + } + } + + private boolean loadOrFlush() { + return this.fragmentPaths.size() >= getMsgThreshold(); + } + + /** + * 从 Path Store 加载消息,发送出去 + * 发送消息并行执行 + * + * @param ctx 上下文 + * @param step 流程计算类 + * @param graph 子图 + * @param process 批次处理类 + * @param mm 并行消息管理器 + */ + public void loadObjectsAndSendMessage(CircleAppParallelContext ctx, ComputeStep step, IFragment graph, IBatchProcess process, ParallelMessageManager mm) { + long start = System.currentTimeMillis(); + if (!getFile().exists()) { + return; + } + + try (FileInputStream fileIn = new FileInputStream(getFile()); BufferedInputStream bufferedInputStream = new BufferedInputStream(fileIn, 1024); ObjectInputStream in = new ObjectInputStream(bufferedInputStream);) { + + clearInMemory(); + while (true) { + if (loadOrFlush()) { + process.sendMsg(ctx, step, graph, this, mm); + } + try { + loadObjects(in); + } catch (EOFException e) { + process.sendMsg(ctx, step, graph, this, mm); + break; + } + } + } catch (Exception e) { + logger.error("load error", e); + } + logger.info("sendMessage end.superStep is {},batch is {},begin -> end is {},file is {},time is {}s", ctx.superStep, batchId, getBeginVertex() + "->" + getEndVertex(), getPath(), (System.currentTimeMillis() - start) / 1000); + } +} \ No newline at end of file diff --git a/analytical_engine/java/grape-demo/src/main/java/com/alibaba/graphscope/example/circle/parallel/formal/store/memory/MemoryComputeStepImpl.java b/analytical_engine/java/grape-demo/src/main/java/com/alibaba/graphscope/example/circle/parallel/formal/store/memory/MemoryComputeStepImpl.java new file mode 100644 index 000000000000..a7456a307b3c --- /dev/null +++ b/analytical_engine/java/grape-demo/src/main/java/com/alibaba/graphscope/example/circle/parallel/formal/store/memory/MemoryComputeStepImpl.java @@ -0,0 +1,128 @@ +package com.alibaba.graphscope.example.circle.parallel.formal.store.memory; + +import com.alibaba.graphscope.example.circle.parallel.formal.CircleAppParallelContext; +import com.alibaba.graphscope.example.circle.parallel.formal.store.ComputeStep; +import com.alibaba.graphscope.ds.Vertex; +import com.alibaba.graphscope.fragment.IFragment; +import com.alibaba.graphscope.parallel.ParallelMessageManager; +import com.carrotsearch.hppc.LongArrayList; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.util.List; +import java.util.Set; + +/** + * 内存模式计算 + * + * @author liumin + * @date 2024-08-11 + */ +public class MemoryComputeStepImpl extends ComputeStep { + private static final Logger logger = LoggerFactory.getLogger(MemoryComputeStepImpl.class); + + public MemoryComputeStepImpl(CircleAppParallelContext ctx) { + super(ctx); + } + + /** + * 初始化处理 + * + * @param frag 子图 + * @param messageManager 消息管理器 + */ + @Override + public void initial(IFragment frag, ParallelMessageManager messageManager) { + // 首次处理结束后,交换 currentPaths 和 nextPaths + ctx.swapPaths(); + } + + /** + * 迭代计算 + * + * @param frag 子图 + * @param messageManager 消息管理器 + */ + @Override + public void inc(IFragment frag, ParallelMessageManager messageManager) { + // 接收 消息 + // outer vertex + receiveMessage(frag, messageManager); + + if (ctx.superStep < ctx.maxIteration) { + // 发送消息 + sendMessageThroughOE(frag, messageManager, 0, frag.getInnerVerticesNum(), 0); + + ctx.swapPaths(); + } else { + ctx.currentPaths.clear(); + } + } + + /** + * 发送消息后处理步骤 + * 本机发送给 frag 内部的 inner vertex + * + * @param graph 子图 + * @param nbrV 邻居节点 + * @param msg 消息 + */ + @Override + public void sendToLocalFrag(IFragment graph, Vertex nbrV, List msg) { + // Update local nbr vertex + long neiOid = graph.getId(nbrV); + for (int i = 0; i < msg.size(); i++) { + LongArrayList clonePath = msg.get(i).clone(); + // 将 点 添加到路径 + clonePath.add(neiOid); + msg.set(i, clonePath); + } + ctx.addPathToNextPathsAndFindCircle(nbrV.getValue().intValue(), msg); + + if (graph.getInnerVerticesNum() < 30) { + logger.info("superStep is {},id is {},size is {},next path is {}", ctx.superStep, neiOid, msg.size(), ctx.nextPaths.get(nbrV.getValue().intValue())); + } + } + + /** + * 接收消息后处理步骤 + * + * @param frag 子图 + * @param lid 内部自增id + * @param oid 原始id + * @param msgs 消息列表 + */ + @Override + public void processUponReceiveMsg(IFragment frag, int lid, long oid, List msgs) { + vprog(lid, oid, msgs); + if (frag.getInnerVerticesNum() < 30) { + logger.info("vprog end.superStep is {},lid is {},oid is {},vdata is {},msg is {},currentPaths is {}", ctx.superStep, lid, oid, ctx.currentPaths.get(lid), msgs, ctx.currentPaths); + } + } + + @Override + public Set getVertexPath(int lid, Integer batchId) { + return ctx.getCurrentPaths(lid); + } + + /** + * 更新点属性,将符合条件的点 id 添加到路径集合 + * frag 内部的点在当前 superStep 中更新属性 + * frag 外部的点在下一 superStep 中接收消息并更新属性 + * + * @param lid 当前点lid + * @param oid 点原始id + * @param msgs 发送给该点的消息。需保证发过来的消息提前过滤 + */ + private void vprog(int lid, long oid, List msgs) { + // 消息处理 + for (LongArrayList path : msgs) { + // 将 点 添加到路径 + path.add(oid); + } + + // 将单次迭代 消息更新到 current集合 + ctx.addPathToCurrentPathsAndFindCircle(lid, msgs); + } + +} diff --git a/analytical_engine/test/app_tests.sh b/analytical_engine/test/app_tests.sh index 8e7ecf69086c..94700b6c1218 100755 --- a/analytical_engine/test/app_tests.sh +++ b/analytical_engine/test/app_tests.sh @@ -415,7 +415,7 @@ get_test_data # exact_verify "${test_dir}"/p2p-31-"${app}" # done -#start_vineyard +start_vineyard # run_vy ${np} ./run_vy_app "${socket_file}" 2 "${test_dir}"/new_property/v2_e2/twitter_e 2 "${test_dir}"/new_property/v2_e2/twitter_v 0 # run_vy_2 ${np} ./run_vy_app "${socket_file}" 4 "${test_dir}"/projected_property/twitter_property_e "${test_dir}"/projected_property/twitter_property_v 1 @@ -443,40 +443,41 @@ then if [[ "${USER_JAR_PATH}"x != ""x ]] then echo "Running Java tests..." - run_vy_2 ${np} ./run_java_app "${socket_file}" 1 "${test_dir}"/projected_property/twitter_property_e "${test_dir}"/projected_property/twitter_property_v 1 0 1 com.alibaba.graphscope.example.bfs.BFS - - GLOG_v=10 mpirun -n 2 ./run_java_app "${socket_file}" \ - 1 "../test/modern_graph/knows.csv#header_row=True#delimiter=|#src_label=v0&dst_label=v0&label=e" \ - 1 "../test/modern_graph/person.csv#header_row=True#delimiter=|#label=v0" 1 0 1 \ - com.alibaba.graphscope.example.circle.CirclePIEParallel - - GLOG_v=10 mpirun -n 1 ./run_java_app "${socket_file}" 1 \ - "${test_dir}/property/p2p-31_property_e_0#header_row=True#src_label=v&dst_label=v&label=e&delimiter=," \ - 1 "${test_dir}/property/p2p-31_property_v_0#header_row=True#label=v&included_column=id,age&delimiter=," \ - 1 0 1 com.alibaba.graphscope.example.sssp.SSSP - - GLOG_v=10 ./run_java_string_app /tmp/vineyard.sock \ - 1 "${test_dir}/projected_property/twitter_property_e_0#header_row=True#src_label=v&dst_label=v&label=e&include_all_columns=true&column_types=int64_t,int64_t,int32_t,int32_t,std::string" \ - 1 "${test_dir}/projected_property/twitter_property_v_0#header_row=True#label=v&include_all_columns=true&column_types=int64_t,std::string" \ - com.alibaba.graphscope.example.stringApp.StringApp - - echo "Running girpah tests..." - ./giraph_runner --vertex_input_format_class giraph:com.alibaba.graphscope.example.giraph.format.P2PVertexInputFormat \ - --edge_input_format_class giraph:com.alibaba.graphscope.example.giraph.format.P2PEdgeInputFormat --vfile "${test_dir}"/p2p-31.v \ - --efile "${test_dir}"/p2p-31.e --ipc_socket /tmp/vineyard.sock --lib_path /opt/graphscope/lib/libgrape-jni.so \ - --user_app_class com.alibaba.graphscope.example.giraph.SSSP - - echo "Test Giraph app user Customized Writable" - ./giraph_runner --vertex_input_format_class giraph:com.alibaba.graphscope.example.giraph.format.P2PVertexMultipleLongInputFormat \ - --edge_input_format_class giraph:com.alibaba.graphscope.example.giraph.format.P2PEdgeMultipleLongInputFormat --vfile "${test_dir}"/p2p-31.v \ - --efile "${test_dir}"/p2p-31.e --ipc_socket /tmp/vineyard.sock --lib_path /opt/graphscope/lib/libgrape-jni.so \ - --user_app_class com.alibaba.graphscope.example.giraph.MessageAppWithUserWritable - - echo "Test Giraph app user Circle App" - GLOG_v=10 ./giraph_runner --vertex_input_format_class giraph:com.alibaba.graphscope.example.giraph.circle.CircleVertexInputFormat \ - --edge_input_format_class giraph:com.alibaba.graphscope.example.giraph.circle.CircleEdgeInputFormat --vfile "${test_dir}"/p2p-31.v \ - --efile "${test_dir}"/p2p-31.e --ipc_socket /tmp/vineyard.sock --lib_path /opt/graphscope/lib/libgrape-jni.so \ - --user_app_class com.alibaba.graphscope.example.giraph.circle.Circle + # run_vy_2 ${np} ./run_java_app "${socket_file}" 1 "${test_dir}"/projected_property/twitter_property_e "${test_dir}"/projected_property/twitter_property_v 1 0 1 com.alibaba.graphscope.example.bfs.BFS + + # GLOG_v=10 mpirun -n 2 ./run_java_app "${socket_file}" \ + # 1 "../test/modern_graph/knows.csv#header_row=True#delimiter=|#src_label=v0&dst_label=v0&label=e" \ + # 1 "../test/modern_graph/person.csv#header_row=True#delimiter=|#label=v0" 1 0 1 \ + # com.alibaba.graphscope.example.circle.CirclePIEParallel + + GLOG_v=10 mpirun -n 2 ./run_java_app "${socket_file}" 1 \ + "${test_dir}/property/p2p-31_property_e_0#header_row=True#src_label=v&dst_label=v&label=e&include_all_columns=true&column_types=int64_t,int64_t,int32_t,int32_t,int64_t" \ + 1 "${test_dir}/property/p2p-31_property_v_0#header_row=True#label=v&include_all_columns=true&column_types=int64_t,int64_t" \ + 1 0 1 com.alibaba.graphscope.example.circle.parallel.formal.CircleAppParallel 10 + # 1 0 1 com.alibaba.graphscope.example.sssp.SSSP + + # GLOG_v=10 ./run_java_string_app /tmp/vineyard.sock \ + # 1 "${test_dir}/projected_property/twitter_property_e_0#header_row=True#src_label=v&dst_label=v&label=e&include_all_columns=true&column_types=int64_t,int64_t,int32_t,int32_t,std::string" \ + # 1 "${test_dir}/projected_property/twitter_property_v_0#header_row=True#label=v&include_all_columns=true&column_types=int64_t,std::string" \ + # com.alibaba.graphscope.example.stringApp.StringApp + + # echo "Running girpah tests..." + # ./giraph_runner --vertex_input_format_class giraph:com.alibaba.graphscope.example.giraph.format.P2PVertexInputFormat \ + # --edge_input_format_class giraph:com.alibaba.graphscope.example.giraph.format.P2PEdgeInputFormat --vfile "${test_dir}"/p2p-31.v \ + # --efile "${test_dir}"/p2p-31.e --ipc_socket /tmp/vineyard.sock --lib_path /opt/graphscope/lib/libgrape-jni.so \ + # --user_app_class com.alibaba.graphscope.example.giraph.SSSP + + # echo "Test Giraph app user Customized Writable" + # ./giraph_runner --vertex_input_format_class giraph:com.alibaba.graphscope.example.giraph.format.P2PVertexMultipleLongInputFormat \ + # --edge_input_format_class giraph:com.alibaba.graphscope.example.giraph.format.P2PEdgeMultipleLongInputFormat --vfile "${test_dir}"/p2p-31.v \ + # --efile "${test_dir}"/p2p-31.e --ipc_socket /tmp/vineyard.sock --lib_path /opt/graphscope/lib/libgrape-jni.so \ + # --user_app_class com.alibaba.graphscope.example.giraph.MessageAppWithUserWritable + + # echo "Test Giraph app user Circle App" + # GLOG_v=10 ./giraph_runner --vertex_input_format_class giraph:com.alibaba.graphscope.example.giraph.circle.CircleVertexInputFormat \ + # --edge_input_format_class giraph:com.alibaba.graphscope.example.giraph.circle.CircleEdgeInputFormat --vfile "${test_dir}"/p2p-31.v \ + # --efile "${test_dir}"/p2p-31.e --ipc_socket /tmp/vineyard.sock --lib_path /opt/graphscope/lib/libgrape-jni.so \ + # --user_app_class com.alibaba.graphscope.example.giraph.circle.Circle fi fi diff --git a/analytical_engine/test/run_java_app.cc b/analytical_engine/test/run_java_app.cc index f1e3b7bd771e..c73359de5de0 100644 --- a/analytical_engine/test/run_java_app.cc +++ b/analytical_engine/test/run_java_app.cc @@ -351,137 +351,146 @@ void QueryProperty(vineyard::Client& client, } template -void QueryProjected( - vineyard::Client& client, std::shared_ptr fragment, - const grape::CommSpec& comm_spec, const std::string& app_name, - const std::string& out_prefix, const std::string& basic_params, - const std::string& selector_string, const std::string& selectors_string, - int32_t expected_data_type, vineyard::AnyType expected_tensor_type) { +void QueryProjected(vineyard::Client& client, + std::shared_ptr fragment, + const grape::CommSpec& comm_spec, + const std::string& app_name, const std::string& out_prefix, + const std::string& basic_params, + const std::string& selector_string, + const std::string& selectors_string, + int32_t expected_data_type, + vineyard::AnyType expected_tensor_type, int cur_time = 0) { using AppType = gs::JavaPIEProjectedParallelAppOE; auto app = std::make_shared(); auto worker = AppType::CreateWorker(app, fragment); auto spec = grape::DefaultParallelEngineSpec(); worker->Init(comm_spec, spec); std::string lib_path = ""; - worker->Query(basic_params, lib_path); - std::ofstream ostream; - std::string output_path = - grape::GetResultFilename(out_prefix, fragment->fid()); + if (cur_time == 0) { + worker->Query(basic_params, lib_path); + } + // std::ofstream ostream; + // std::string output_path = + // grape::GetResultFilename(out_prefix, fragment->fid()); - ostream.open(output_path); - worker->Output(ostream); - ostream.close(); + // ostream.open(output_path); + // worker->Output(ostream); + // ostream.close(); std::shared_ptr> ctx = worker->GetContext(); worker->Finalize(); - gs::rpc::graph::GraphDefPb graph_def; - graph_def.set_graph_type(gs::rpc::graph::ARROW_PROJECTED); - - auto selectors = gs::Selector::ParseSelectors(selectors_string).value(); - auto selector = gs::Selector::parse(selector_string).value(); - auto range = std::make_pair("", ""); - - auto frag_wrapper = - std::make_shared>( - "graph_123", graph_def, fragment); - - auto ctx_wrapper = ctx->CreateInnerCtxWrapper( - "ctx_wrapper_" + vineyard::random_string(8), frag_wrapper); - if (ctx_wrapper->context_type() == "vertex_property") { - auto vp_ctx_wrapper = - std::dynamic_pointer_cast( - ctx_wrapper); - /// 0. test ndarray - { - std::unique_ptr arc = std::move( - vp_ctx_wrapper->ToNdArray(comm_spec, selector, range).value()); - std::string java_out_prefix = - out_prefix + "/java_projected_assembled_ndarray.dat"; - output_nd_array(comm_spec, std::move(arc), java_out_prefix, - expected_data_type); // 4 for int64_t - } - VLOG(1) << "[0] java projected finish test ndarray"; - - // 1. Test data frame - { - // auto selectors = - // gs::gs::Selector::ParseSelectors(s_selectors).value(); - std::unique_ptr arc = std::move( - vp_ctx_wrapper->ToDataframe(comm_spec, selectors, range).value()); - std::string java_data_frame_out_prefix = out_prefix + "/java_projected"; - output_data_frame(comm_spec, std::move(arc), java_data_frame_out_prefix, - expected_data_type); - } - - VLOG(1) << "[1] java projected finish test dataframe"; - // 2. test vineyard tensor - { - auto tmp = - vp_ctx_wrapper->ToVineyardTensor(comm_spec, client, selector, range); - CHECK(tmp); - vineyard::ObjectID ndarray_object = tmp.value(); - std::string java_v6d_tensor_prefix = out_prefix + "/java_projected"; - output_vineyard_tensor(client, ndarray_object, comm_spec, - java_v6d_tensor_prefix, - expected_tensor_type); - } - VLOG(1) << "[2] java projected finish test vineyard tensor"; - - } else if (ctx_wrapper->context_type() == "vertex_data") { - auto vd_ctx_wrapper = - std::dynamic_pointer_cast(ctx_wrapper); - /// 0. test ndarray - { - std::unique_ptr arc = std::move( - vd_ctx_wrapper->ToNdArray(comm_spec, selector, range).value()); - std::string java_out_prefix = - out_prefix + "/java_projected_assembled_ndarray.dat"; - output_nd_array(comm_spec, std::move(arc), java_out_prefix, - expected_data_type); // 4 for int64_t - } - VLOG(1) << "[0] java projected finish test ndarray"; - - // 1. Test data frame - { - std::unique_ptr arc = std::move( - vd_ctx_wrapper->ToDataframe(comm_spec, selectors, range).value()); - std::string java_data_frame_out_prefix = out_prefix + "/java_projected"; - output_data_frame(comm_spec, std::move(arc), java_data_frame_out_prefix, - expected_data_type); - } - - VLOG(1) << "[1] java projected finish test dataframe"; - // 2. test vineyard tensor - { - auto tmp = - vd_ctx_wrapper->ToVineyardTensor(comm_spec, client, selector, range); - CHECK(tmp); - vineyard::ObjectID ndarray_object = tmp.value(); - std::string java_v6d_tensor_prefix = out_prefix + "/java_projected"; - if (expected_tensor_type == vineyard::AnyType::Double) { - output_vineyard_tensor(client, ndarray_object, comm_spec, - java_v6d_tensor_prefix, - expected_tensor_type); - } else if (expected_tensor_type == vineyard::AnyType::Int64) { - output_vineyard_tensor(client, ndarray_object, comm_spec, - java_v6d_tensor_prefix, - expected_tensor_type); - } else { - LOG(FATAL) << "Unregonizable data type " << expected_tensor_type; - } - } - VLOG(1) << "[2] java projected finish test vineyard tensor"; - } else { - LOG(ERROR) << "Unrecognized ctx type: " << ctx_wrapper->context_type(); - } + // gs::rpc::graph::GraphDefPb graph_def; + // graph_def.set_graph_type(gs::rpc::graph::ARROW_PROJECTED); + + // auto selectors = gs::Selector::ParseSelectors(selectors_string).value(); + // auto selector = gs::Selector::parse(selector_string).value(); + // auto range = std::make_pair("", ""); + + // auto frag_wrapper = + // std::make_shared>( + // "graph_123", graph_def, fragment); + + // auto ctx_wrapper = ctx->CreateInnerCtxWrapper( + // "ctx_wrapper_" + vineyard::random_string(8), frag_wrapper); + // if (ctx_wrapper->context_type() == "vertex_property") { + // auto vp_ctx_wrapper = + // std::dynamic_pointer_cast( + // ctx_wrapper); + // /// 0. test ndarray + // { + // std::unique_ptr arc = std::move( + // vp_ctx_wrapper->ToNdArray(comm_spec, selector, range).value()); + // std::string java_out_prefix = + // out_prefix + "/java_projected_assembled_ndarray.dat"; + // output_nd_array(comm_spec, std::move(arc), java_out_prefix, + // expected_data_type); // 4 for int64_t + // } + // VLOG(1) << "[0] java projected finish test ndarray"; + + // // 1. Test data frame + // { + // // auto selectors = + // // gs::gs::Selector::ParseSelectors(s_selectors).value(); + // std::unique_ptr arc = std::move( + // vp_ctx_wrapper->ToDataframe(comm_spec, selectors, range).value()); + // std::string java_data_frame_out_prefix = out_prefix + + // "/java_projected"; output_data_frame(comm_spec, std::move(arc), + // java_data_frame_out_prefix, + // expected_data_type); + // } + + // VLOG(1) << "[1] java projected finish test dataframe"; + // // 2. test vineyard tensor + // { + // auto tmp = + // vp_ctx_wrapper->ToVineyardTensor(comm_spec, client, selector, + // range); + // CHECK(tmp); + // vineyard::ObjectID ndarray_object = tmp.value(); + // std::string java_v6d_tensor_prefix = out_prefix + "/java_projected"; + // output_vineyard_tensor(client, ndarray_object, comm_spec, + // java_v6d_tensor_prefix, + // expected_tensor_type); + // } + // VLOG(1) << "[2] java projected finish test vineyard tensor"; + + // } else if (ctx_wrapper->context_type() == "vertex_data") { + // auto vd_ctx_wrapper = + // std::dynamic_pointer_cast(ctx_wrapper); + // /// 0. test ndarray + // { + // std::unique_ptr arc = std::move( + // vd_ctx_wrapper->ToNdArray(comm_spec, selector, range).value()); + // std::string java_out_prefix = + // out_prefix + "/java_projected_assembled_ndarray.dat"; + // output_nd_array(comm_spec, std::move(arc), java_out_prefix, + // expected_data_type); // 4 for int64_t + // } + // VLOG(1) << "[0] java projected finish test ndarray"; + + // // 1. Test data frame + // { + // std::unique_ptr arc = std::move( + // vd_ctx_wrapper->ToDataframe(comm_spec, selectors, range).value()); + // std::string java_data_frame_out_prefix = out_prefix + + // "/java_projected"; output_data_frame(comm_spec, std::move(arc), + // java_data_frame_out_prefix, + // expected_data_type); + // } + + // VLOG(1) << "[1] java projected finish test dataframe"; + // // 2. test vineyard tensor + // { + // auto tmp = + // vd_ctx_wrapper->ToVineyardTensor(comm_spec, client, selector, + // range); + // CHECK(tmp); + // vineyard::ObjectID ndarray_object = tmp.value(); + // std::string java_v6d_tensor_prefix = out_prefix + "/java_projected"; + // if (expected_tensor_type == vineyard::AnyType::Double) { + // output_vineyard_tensor(client, ndarray_object, comm_spec, + // java_v6d_tensor_prefix, + // expected_tensor_type); + // } else if (expected_tensor_type == vineyard::AnyType::Int64) { + // output_vineyard_tensor(client, ndarray_object, comm_spec, + // java_v6d_tensor_prefix, + // expected_tensor_type); + // } else { + // LOG(FATAL) << "Unregonizable data type " << expected_tensor_type; + // } + // } + // VLOG(1) << "[2] java projected finish test vineyard tensor"; + // } else { + // LOG(ERROR) << "Unrecognized ctx type: " << ctx_wrapper->context_type(); + // } } // Running test doesn't require codegen. void Run(vineyard::Client& client, const grape::CommSpec& comm_spec, vineyard::ObjectID id, bool run_projected, bool run_property, - const std::string& app_name) { + const std::string& app_name, int times = 1) { std::shared_ptr fragment = std::dynamic_pointer_cast(client.GetObject(id)); @@ -530,7 +539,8 @@ void Run(vineyard::Client& client, const grape::CommSpec& comm_spec, QueryProperty(client, fragment, comm_spec, app_name, "/tmp", basic_params, selector_string, selectors_string); } else { // 3. run projected - if (app_name.find("SSSP") != std::string::npos) { + if (app_name.find("SSSP") != std::string::npos || + app_name.find("CircleAppParallel") != std::string::npos) { pt.put("frag_name", "gs::ArrowProjectedFragment"); } else { @@ -566,20 +576,27 @@ void Run(vineyard::Client& client, const grape::CommSpec& comm_spec, selectors_string = gs::generate_selectors(selector_list); } } - if (app_name.find("SSSP") != std::string::npos) { + if (app_name.find("SSSP") != std::string::npos || + app_name.find("CircleAppParallel") != std::string::npos) { using ProjectedFragmentType = gs::ArrowProjectedFragment; std::shared_ptr projected_fragment = - ProjectedFragmentType::Project(fragment, 0, 0, 0, 0); + ProjectedFragmentType::Project(fragment, 0, 0, 0, 2); // test get data using vertex_t = ProjectedFragmentType::vertex_t; vertex_t vertex; projected_fragment->GetInnerVertex(4, vertex); VLOG(1) << "source vertex" << vertex.GetValue(); - QueryProjected(client, projected_fragment, comm_spec, app_name, "/tmp", - basic_params, selector_string, selectors_string, - vineyard::TypeToInt::value, - vineyard::AnyType::Double); + for (int i = 0; i < times; ++i) { + LOG(INFO) << "Run project for times " << i + << " current memory usage: " << gs::getProcessMemory(); + QueryProjected(client, projected_fragment, comm_spec, app_name, + "/tmp", basic_params, selector_string, + selectors_string, vineyard::TypeToInt::value, + vineyard::AnyType::Double, i); + LOG(INFO) << "Finish project for times " << i + << " current memory usage: " << gs::getProcessMemory(); + } } else { using ProjectedFragmentType = gs::ArrowProjectedFragment; @@ -607,7 +624,7 @@ int main(int argc, char** argv) { "usage: ./run_java_app " " " " " - "[directed] [app_name]\n"); + "[directed] [app_name] [times]\n"); return 1; } int index = 1; @@ -637,6 +654,11 @@ int main(int argc, char** argv) { app_name = argv[index++]; } VLOG(1) << "app name " << app_name; + int times = 1; + + if (argc > index) { + times = atoi(argv[index++]); + } grape::InitMPIComm(); { @@ -671,7 +693,8 @@ int main(int argc, char** argv) { MPI_Barrier(comm_spec.comm()); - Run(client, comm_spec, fragment_id, run_projected, run_property, app_name); + Run(client, comm_spec, fragment_id, run_projected, run_property, app_name, + times); MPI_Barrier(comm_spec.comm()); } From 249cb48e6eac4762a1a4c71e7ca9bf309c2ceb38 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E9=9C=84=E7=A3=8A?= Date: Thu, 29 Aug 2024 19:55:29 +0800 Subject: [PATCH 48/52] impl message app --- .../parallel/formal/CircleAppParallel.java | 1 + .../formal/CircleAppParallelContext.java | 2 + .../parallel/formal/store/ComputeStep.java | 2 + .../formal/store/disk/FileBatchProcess.java | 1 + .../graphscope/example/message/Message.java | 223 ++++++++++++++++++ .../example/message/MessageContext.java | 66 ++++++ .../graphscope/ds/DenseVertexSetTest.java | 2 + 7 files changed, 297 insertions(+) create mode 100644 analytical_engine/java/grape-demo/src/main/java/com/alibaba/graphscope/example/message/Message.java create mode 100644 analytical_engine/java/grape-demo/src/main/java/com/alibaba/graphscope/example/message/MessageContext.java diff --git a/analytical_engine/java/grape-demo/src/main/java/com/alibaba/graphscope/example/circle/parallel/formal/CircleAppParallel.java b/analytical_engine/java/grape-demo/src/main/java/com/alibaba/graphscope/example/circle/parallel/formal/CircleAppParallel.java index d7682c9a7127..81a2e8bd1593 100644 --- a/analytical_engine/java/grape-demo/src/main/java/com/alibaba/graphscope/example/circle/parallel/formal/CircleAppParallel.java +++ b/analytical_engine/java/grape-demo/src/main/java/com/alibaba/graphscope/example/circle/parallel/formal/CircleAppParallel.java @@ -52,6 +52,7 @@ public void PEval(IFragment fragment, ParallelContextBas logger.error("PEval error", e); } } + vertex.delete(); ctx.storeProcessor.initial(fragment, messageManager); diff --git a/analytical_engine/java/grape-demo/src/main/java/com/alibaba/graphscope/example/circle/parallel/formal/CircleAppParallelContext.java b/analytical_engine/java/grape-demo/src/main/java/com/alibaba/graphscope/example/circle/parallel/formal/CircleAppParallelContext.java index f8ed61552e2e..46ac96e454bf 100644 --- a/analytical_engine/java/grape-demo/src/main/java/com/alibaba/graphscope/example/circle/parallel/formal/CircleAppParallelContext.java +++ b/analytical_engine/java/grape-demo/src/main/java/com/alibaba/graphscope/example/circle/parallel/formal/CircleAppParallelContext.java @@ -232,6 +232,7 @@ public void Init(IFragment frag, ParallelMessageManager initSeedVertex((int) i, oid); } } + vertex.delete(); currModified = new ThreadSafeBitSet(ThreadSafeBitSet.DEFAULT_LOG2_SEGMENT_SIZE_IN_BITS, (int) frag.getInnerVerticesNum()); nextModified = new ThreadSafeBitSet(ThreadSafeBitSet.DEFAULT_LOG2_SEGMENT_SIZE_IN_BITS, (int) frag.getInnerVerticesNum()); @@ -324,6 +325,7 @@ public void Output(IFragment frag) { value.fromJavaString(outPutCircleResult((int) vid)); } } + cur.delete(); clean(); } diff --git a/analytical_engine/java/grape-demo/src/main/java/com/alibaba/graphscope/example/circle/parallel/formal/store/ComputeStep.java b/analytical_engine/java/grape-demo/src/main/java/com/alibaba/graphscope/example/circle/parallel/formal/store/ComputeStep.java index 05910b3f9a08..47d84c699f46 100644 --- a/analytical_engine/java/grape-demo/src/main/java/com/alibaba/graphscope/example/circle/parallel/formal/store/ComputeStep.java +++ b/analytical_engine/java/grape-demo/src/main/java/com/alibaba/graphscope/example/circle/parallel/formal/store/ComputeStep.java @@ -295,6 +295,7 @@ protected void receiveMessageImpl(IFragment graph, Messa } tmpVector.delete(); + tmpVertex.delete(); } /** @@ -352,6 +353,7 @@ private void sendMessageThroughOEImpl(IFragment frag, lo sendToNbr(frag, messageManager, vertex, threadId, batchId); } } + vertex.delete(); } /** diff --git a/analytical_engine/java/grape-demo/src/main/java/com/alibaba/graphscope/example/circle/parallel/formal/store/disk/FileBatchProcess.java b/analytical_engine/java/grape-demo/src/main/java/com/alibaba/graphscope/example/circle/parallel/formal/store/disk/FileBatchProcess.java index 1e99e6f6c592..b030251c97ae 100644 --- a/analytical_engine/java/grape-demo/src/main/java/com/alibaba/graphscope/example/circle/parallel/formal/store/disk/FileBatchProcess.java +++ b/analytical_engine/java/grape-demo/src/main/java/com/alibaba/graphscope/example/circle/parallel/formal/store/disk/FileBatchProcess.java @@ -60,6 +60,7 @@ public void batchUpdatePathStorage(CircleAppParallelContext ctx, IFragment { + + private static final Logger logger = LoggerFactory.getLogger(Message.class); + + @Override + public void PEval(IFragment iFragment, + ParallelContextBase parallelContextBase, + ParallelMessageManager parallelMessageManager) { + MessageContext ctx = (MessageContext) parallelContextBase; + logger.info("Frag id: " + iFragment.fid() + " PEval"); + Vertex vertex = FFITypeFactoryhelper.newVertexLong(); + for (long i = 0; i < iFragment.getInnerVerticesNum(); ++i) { + // 表示获取 到 frag 内部的第 几个 点 + vertex.setValue(i); + try { + // 内存处理 + sendToNbr(iFragment,ctx, parallelMessageManager, vertex, 0); + } catch (IOException e) { + logger.error("PEval error", e); + } + } + vertex.delete(); + + parallelMessageManager.forceContinue(); + ctx.currentStep += 1; + } + + @Override + public void IncEval(IFragment iFragment, + ParallelContextBase parallelContextBase, + ParallelMessageManager parallelMessageManager) { + MessageContext ctx = (MessageContext) parallelContextBase; + + if (ctx.currentStep >= ctx.maxSteps) { + parallelMessageManager.forceContinue(); + return; + } + + // receive messages + receiveMessages(iFragment, ctx, parallelMessageManager); + + // sendToNbr(iFragment, parallelMessageManager, vertex, 0, 0); + parallelSendToNbr(iFragment, parallelMessageManager, ctx); + } + + void receiveMessages(IFragment frag, MessageContext ctx, + ParallelMessageManager messageManager) { + long start = System.currentTimeMillis(); + CountDownLatch countDownLatch = new CountDownLatch(ctx.threadNum); + MessageInBuffer.Factory bufferFactory = FFITypeFactoryhelper.newMessageInBuffer(); + for (int tid = 0; tid < ctx.threadNum; ++tid) { + final int finalTid = tid; + ctx.executor.execute(new Runnable() { + @Override + public void run() { + // 每个线程 维护一个 messageInBuffer + MessageInBuffer messageInBuffer = bufferFactory.create(); + boolean result; + while (true) { + result = messageManager.getMessageInBuffer(messageInBuffer); + if (result) { + try { + receiveMessageImpl(frag, messageInBuffer); + } catch (Exception e) { + logger.error( + "Error when receiving message in fragment {} thread {}", + frag.fid(), finalTid, e); + } + } else { + break; + } + } + messageInBuffer.delete(); + countDownLatch.countDown(); + } + }); + } + try { + countDownLatch.await(); + } catch (Exception e) { + logger.error("receiveMessageAndUpdateVertex error.", e); + ctx.executor.shutdown(); + } + } + + void receiveMessageImpl(IFragment frag, + MessageInBuffer buffer) throws IOException { + FFIByteVector tmpVector = (FFIByteVector) FFIByteVectorFactory.INSTANCE.create(); + Vertex tmpVertex = FFITypeFactoryhelper.newVertexLong(); + + List receivedMsg = new ArrayList<>(); + while (buffer.getPureMessage(tmpVector)) { + tmpVector.touch(); + FFIByteVectorInputStream inputStream = new FFIByteVectorInputStream(tmpVector); + long gid = inputStream.readLong(); + if (!frag.innerVertexGid2Vertex(gid, tmpVertex)) { + logger.error("Fail to get lid from gid {}", gid); + } + int size = inputStream.readInt(); + + if (size != 0) { + for (int i = 0; i < size; i++) { + LongArrayList path = PathSerAndDeser.deserialize(inputStream); + receivedMsg.add(path); + } + } + } + tmpVector.delete(); + tmpVertex.delete(); + logger.info("Received {} messages", receivedMsg.size()); + receivedMsg.clear(); + } + + + void parallelSendToNbr(IFragment frag, ParallelMessageManager messageManager, MessageContext ctx) { + logger.info("Send message through oe"); + CountDownLatch countDownLatch = new CountDownLatch(ctx.threadNum); + AtomicInteger atomicInteger = new AtomicInteger(0); + int chunkSize = 256; + + int originEnd = (int) frag.getInnerVerticesNum(); + for (int tid = 0; tid < ctx.threadNum; ++tid) { + final int finalTid = tid; + ctx.executor.execute( + new Runnable() { + @Override + public void run() { + while (true) { + int curBegin = + Math.min(atomicInteger.getAndAdd(chunkSize), originEnd); + int curEnd = Math.min(curBegin + chunkSize, originEnd); + Vertex vertex = FFITypeFactoryhelper.newVertexLong(); + if (curBegin >= originEnd) { + break; + } + for (long i = curBegin; i < curEnd; ++i) { + vertex.setValue(i); + try { + sendToAdjList(frag, ctx, messageManager, vertex, finalTid); + } catch (IOException e) { + e.printStackTrace(); + } + } + } + countDownLatch.countDown(); + } + }); + } + try { + countDownLatch.await(); + } catch (Exception e) { + e.printStackTrace(); + ctx.executor.shutdown(); + } + ctx.currentStep += 1; + } + + void sendToNbr(IFragment frag, MessageContext ctx, ParallelMessageManager messageManager, Vertex vertex, int threadId) throws IOException { + sendToAdjList(frag, ctx, messageManager, vertex, threadId); + } + + void sendToAdjList(IFragment frag, MessageContext ctx, ParallelMessageManager messageManager, Vertex vertex, int threadId) throws IOException { + List msgs = createMessages(); + FFIByteVectorOutputStream msgVector = ctx.getMsgVectorStream(threadId); + AdjList nbrs = frag.getOutgoingAdjList(vertex); + for (Nbr nbr : nbrs.iterable()) { + Vertex nbrVertex = nbr.neighbor(); + if (frag.isOuterVertex(nbrVertex)) { + msgVector.reset(); + msgVector.writeLong(frag.getOuterVertexGid(nbrVertex)); + msgVector.writeInt(msgs.size()); + for (LongArrayList msg : msgs) { + PathSerAndDeser.serialize(msgVector, msg); + } + msgVector.finishSetting(); + messageManager.sendToFragment(frag.getFragId(nbrVertex),msgVector.getVector(), threadId); + } + else { + // skip for send to inner vertex + } + } + } + + List createMessages() { + List list = new ArrayList<>(); + for (int i = 0; i < 5; ++i) { + LongArrayList curList = new LongArrayList(10); + for (int j = 0; j < 10; ++j) { + curList.add(j); + } + } + return list; + } +} diff --git a/analytical_engine/java/grape-demo/src/main/java/com/alibaba/graphscope/example/message/MessageContext.java b/analytical_engine/java/grape-demo/src/main/java/com/alibaba/graphscope/example/message/MessageContext.java new file mode 100644 index 000000000000..83c9deb70b57 --- /dev/null +++ b/analytical_engine/java/grape-demo/src/main/java/com/alibaba/graphscope/example/message/MessageContext.java @@ -0,0 +1,66 @@ +package com.alibaba.graphscope.example.message; + +import com.alibaba.fastjson.JSONObject; +import com.alibaba.graphscope.context.ParallelContextBase; +import com.alibaba.graphscope.context.VertexDataContext; +import com.alibaba.graphscope.example.circle.parallel.formal.CircleAppParallelContext; +import com.alibaba.graphscope.fragment.IFragment; +import com.alibaba.graphscope.parallel.ParallelMessageManager; +import com.alibaba.graphscope.serialization.FFIByteVectorOutputStream; +import com.alibaba.graphscope.stdcxx.FFIByteVector; +import com.alibaba.graphscope.stdcxx.FFIByteVectorFactory; +import com.alibaba.graphscope.stdcxx.StdString; +import com.alibaba.graphscope.utils.FFITypeFactoryhelper; +import java.util.List; +import java.util.concurrent.ThreadPoolExecutor; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +public class MessageContext extends + VertexDataContext, StdString> implements + ParallelContextBase { + private static final Logger logger = LoggerFactory.getLogger(MessageContext.class); + + + public int maxSteps = 10; + public int currentStep = 0; + public int threadNum = 1; + public List msgVectorStream; + public ThreadPoolExecutor executor; + + @Override + public void Init(IFragment iFragment, + ParallelMessageManager parallelMessageManager, JSONObject jsonObject) { + createFFIContext(iFragment, StdString.class, false); + currentStep = 0; + if (!jsonObject.containsKey("maxSteps")) { + maxSteps = 10; + } + else { + maxSteps = jsonObject.getInteger("maxSteps"); + } + if (jsonObject.containsKey("threadNum")) { + threadNum = jsonObject.getInteger("threadNum"); + } + else { + threadNum = 8; + } + logger.info("Init MessageContext maxSteps: " + maxSteps + " threadNum: " + threadNum); + msgVectorStream = new java.util.ArrayList(); + for (int i = 0; i < threadNum; ++i) { + msgVectorStream.add(new FFIByteVectorOutputStream()); + } + executor = new ThreadPoolExecutor(threadNum, threadNum, 60L, java.util.concurrent.TimeUnit.SECONDS, new java.util.concurrent.LinkedBlockingQueue<>(100000)); + } + + @Override + public void Output(IFragment iFragment) { + for (int i = 0; i < threadNum; ++i) { + msgVectorStream.get(i).close(); + } + } + + public FFIByteVectorOutputStream getMsgVectorStream(int threadId) { + return msgVectorStream.get(threadId); + } +} diff --git a/analytical_engine/java/grape-runtime/src/test/java/com/alibaba/graphscope/ds/DenseVertexSetTest.java b/analytical_engine/java/grape-runtime/src/test/java/com/alibaba/graphscope/ds/DenseVertexSetTest.java index a2417caa4f92..6b36f27799f2 100644 --- a/analytical_engine/java/grape-runtime/src/test/java/com/alibaba/graphscope/ds/DenseVertexSetTest.java +++ b/analytical_engine/java/grape-runtime/src/test/java/com/alibaba/graphscope/ds/DenseVertexSetTest.java @@ -50,5 +50,7 @@ public void test1() { Assert.assertFalse(denseVertexSet.Empty()); Assert.assertFalse(denseVertexSet.PartialEmpty(0L, 100L)); Assert.assertTrue(denseVertexSet.PartialEmpty(51L, 100L)); + + vertex.delete(); } } From 3b52a4ab48de7d02c1c74c009fc41ad381363542 Mon Sep 17 00:00:00 2001 From: "xiaolei.zl" Date: Mon, 2 Sep 2024 14:08:36 +0800 Subject: [PATCH 49/52] reproceduce the memory leak Committed-by: xiaolei.zl from Dev container --- .../graphscope/example/message/Message.java | 30 +++++++------ .../example/message/MessageContext.java | 1 + analytical_engine/test/app_tests.sh | 3 +- analytical_engine/test/run_java_app.cc | 42 +++++++++++-------- 4 files changed, 44 insertions(+), 32 deletions(-) diff --git a/analytical_engine/java/grape-demo/src/main/java/com/alibaba/graphscope/example/message/Message.java b/analytical_engine/java/grape-demo/src/main/java/com/alibaba/graphscope/example/message/Message.java index 7515168595d1..ed02b2bd1e4b 100644 --- a/analytical_engine/java/grape-demo/src/main/java/com/alibaba/graphscope/example/message/Message.java +++ b/analytical_engine/java/grape-demo/src/main/java/com/alibaba/graphscope/example/message/Message.java @@ -30,6 +30,7 @@ public class Message implements ParallelAppBase { private static final Logger logger = LoggerFactory.getLogger(Message.class); + private List msgs = createMessages(); @Override public void PEval(IFragment iFragment, @@ -61,7 +62,6 @@ public void IncEval(IFragment iFragment, MessageContext ctx = (MessageContext) parallelContextBase; if (ctx.currentStep >= ctx.maxSteps) { - parallelMessageManager.forceContinue(); return; } @@ -136,7 +136,7 @@ void receiveMessageImpl(IFragment frag, } tmpVector.delete(); tmpVertex.delete(); - logger.info("Received {} messages", receivedMsg.size()); + // logger.info("Received {} messages", receivedMsg.size()); receivedMsg.clear(); } @@ -154,15 +154,17 @@ void parallelSendToNbr(IFragment frag, ParallelMessageMa new Runnable() { @Override public void run() { + int cnt = 0; + Vertex vertex = FFITypeFactoryhelper.newVertexLong(); while (true) { int curBegin = Math.min(atomicInteger.getAndAdd(chunkSize), originEnd); int curEnd = Math.min(curBegin + chunkSize, originEnd); - Vertex vertex = FFITypeFactoryhelper.newVertexLong(); if (curBegin >= originEnd) { break; } for (long i = curBegin; i < curEnd; ++i) { + cnt += 1; vertex.setValue(i); try { sendToAdjList(frag, ctx, messageManager, vertex, finalTid); @@ -171,6 +173,8 @@ public void run() { } } } + vertex.delete(); + logger.info("Thread {} send {} vertices", finalTid, cnt); countDownLatch.countDown(); } }); @@ -181,7 +185,6 @@ public void run() { e.printStackTrace(); ctx.executor.shutdown(); } - ctx.currentStep += 1; } void sendToNbr(IFragment frag, MessageContext ctx, ParallelMessageManager messageManager, Vertex vertex, int threadId) throws IOException { @@ -189,20 +192,22 @@ void sendToNbr(IFragment frag, MessageContext ctx, Paral } void sendToAdjList(IFragment frag, MessageContext ctx, ParallelMessageManager messageManager, Vertex vertex, int threadId) throws IOException { - List msgs = createMessages(); + FFIByteVectorOutputStream msgVector = ctx.getMsgVectorStream(threadId); AdjList nbrs = frag.getOutgoingAdjList(vertex); for (Nbr nbr : nbrs.iterable()) { Vertex nbrVertex = nbr.neighbor(); if (frag.isOuterVertex(nbrVertex)) { - msgVector.reset(); - msgVector.writeLong(frag.getOuterVertexGid(nbrVertex)); - msgVector.writeInt(msgs.size()); - for (LongArrayList msg : msgs) { - PathSerAndDeser.serialize(msgVector, msg); + for (int j = 0; j < 100; ++j){ + msgVector.reset(); + msgVector.writeLong(frag.getOuterVertexGid(nbrVertex)); + msgVector.writeInt(msgs.size()); + for (LongArrayList msg : msgs) { + PathSerAndDeser.serialize(msgVector, msg); + } + msgVector.finishSetting(); + messageManager.sendToFragment(frag.getFragId(nbrVertex),msgVector.getVector(), threadId); } - msgVector.finishSetting(); - messageManager.sendToFragment(frag.getFragId(nbrVertex),msgVector.getVector(), threadId); } else { // skip for send to inner vertex @@ -217,6 +222,7 @@ List createMessages() { for (int j = 0; j < 10; ++j) { curList.add(j); } + list.add(curList); } return list; } diff --git a/analytical_engine/java/grape-demo/src/main/java/com/alibaba/graphscope/example/message/MessageContext.java b/analytical_engine/java/grape-demo/src/main/java/com/alibaba/graphscope/example/message/MessageContext.java index 83c9deb70b57..4806f0c3d662 100644 --- a/analytical_engine/java/grape-demo/src/main/java/com/alibaba/graphscope/example/message/MessageContext.java +++ b/analytical_engine/java/grape-demo/src/main/java/com/alibaba/graphscope/example/message/MessageContext.java @@ -32,6 +32,7 @@ public class MessageContext extends public void Init(IFragment iFragment, ParallelMessageManager parallelMessageManager, JSONObject jsonObject) { createFFIContext(iFragment, StdString.class, false); + parallelMessageManager.initChannels(threadNum); currentStep = 0; if (!jsonObject.containsKey("maxSteps")) { maxSteps = 10; diff --git a/analytical_engine/test/app_tests.sh b/analytical_engine/test/app_tests.sh index 94700b6c1218..4e5cb13725af 100755 --- a/analytical_engine/test/app_tests.sh +++ b/analytical_engine/test/app_tests.sh @@ -453,8 +453,7 @@ then GLOG_v=10 mpirun -n 2 ./run_java_app "${socket_file}" 1 \ "${test_dir}/property/p2p-31_property_e_0#header_row=True#src_label=v&dst_label=v&label=e&include_all_columns=true&column_types=int64_t,int64_t,int32_t,int32_t,int64_t" \ 1 "${test_dir}/property/p2p-31_property_v_0#header_row=True#label=v&include_all_columns=true&column_types=int64_t,int64_t" \ - 1 0 1 com.alibaba.graphscope.example.circle.parallel.formal.CircleAppParallel 10 - # 1 0 1 com.alibaba.graphscope.example.sssp.SSSP + 1 0 1 com.alibaba.graphscope.example.message.Message 5 # GLOG_v=10 ./run_java_string_app /tmp/vineyard.sock \ # 1 "${test_dir}/projected_property/twitter_property_e_0#header_row=True#src_label=v&dst_label=v&label=e&include_all_columns=true&column_types=int64_t,int64_t,int32_t,int32_t,std::string" \ diff --git a/analytical_engine/test/run_java_app.cc b/analytical_engine/test/run_java_app.cc index c73359de5de0..a1a27b49cb18 100644 --- a/analytical_engine/test/run_java_app.cc +++ b/analytical_engine/test/run_java_app.cc @@ -360,40 +360,41 @@ void QueryProjected(vineyard::Client& client, const std::string& selectors_string, int32_t expected_data_type, vineyard::AnyType expected_tensor_type, int cur_time = 0) { - using AppType = gs::JavaPIEProjectedParallelAppOE; + using AppType = gs::JavaPIEProjectedParallelAppE; auto app = std::make_shared(); auto worker = AppType::CreateWorker(app, fragment); auto spec = grape::DefaultParallelEngineSpec(); worker->Init(comm_spec, spec); std::string lib_path = ""; - if (cur_time == 0) { - worker->Query(basic_params, lib_path); - } - // std::ofstream ostream; - // std::string output_path = - // grape::GetResultFilename(out_prefix, fragment->fid()); + // if (cur_time == 0) { + worker->Query(basic_params, lib_path); + // } + std::ofstream ostream; + std::string output_path = + grape::GetResultFilename(out_prefix, fragment->fid()); - // ostream.open(output_path); - // worker->Output(ostream); + ostream.open(output_path); + worker->Output(ostream); // ostream.close(); std::shared_ptr> ctx = worker->GetContext(); worker->Finalize(); - // gs::rpc::graph::GraphDefPb graph_def; - // graph_def.set_graph_type(gs::rpc::graph::ARROW_PROJECTED); + gs::rpc::graph::GraphDefPb graph_def; + graph_def.set_graph_type(gs::rpc::graph::ARROW_PROJECTED); // auto selectors = gs::Selector::ParseSelectors(selectors_string).value(); // auto selector = gs::Selector::parse(selector_string).value(); // auto range = std::make_pair("", ""); - // auto frag_wrapper = - // std::make_shared>( - // "graph_123", graph_def, fragment); + auto frag_wrapper = + std::make_shared>( + "graph_123", graph_def, fragment); - // auto ctx_wrapper = ctx->CreateInnerCtxWrapper( - // "ctx_wrapper_" + vineyard::random_string(8), frag_wrapper); + // TO make sure context.output() is called. + auto ctx_wrapper = ctx->CreateInnerCtxWrapper( + "ctx_wrapper_" + vineyard::random_string(8), frag_wrapper); // if (ctx_wrapper->context_type() == "vertex_property") { // auto vp_ctx_wrapper = // std::dynamic_pointer_cast( @@ -540,7 +541,8 @@ void Run(vineyard::Client& client, const grape::CommSpec& comm_spec, selector_string, selectors_string); } else { // 3. run projected if (app_name.find("SSSP") != std::string::npos || - app_name.find("CircleAppParallel") != std::string::npos) { + app_name.find("CircleAppParallel") != std::string::npos || + app_name.find("Message") != std::string::npos) { pt.put("frag_name", "gs::ArrowProjectedFragment"); } else { @@ -577,7 +579,8 @@ void Run(vineyard::Client& client, const grape::CommSpec& comm_spec, } } if (app_name.find("SSSP") != std::string::npos || - app_name.find("CircleAppParallel") != std::string::npos) { + app_name.find("CircleAppParallel") != std::string::npos || + app_name.find("Message") != std::string::npos) { using ProjectedFragmentType = gs::ArrowProjectedFragment; std::shared_ptr projected_fragment = @@ -596,6 +599,9 @@ void Run(vineyard::Client& client, const grape::CommSpec& comm_spec, vineyard::AnyType::Double, i); LOG(INFO) << "Finish project for times " << i << " current memory usage: " << gs::getProcessMemory(); + int dummy; + std::cin >> dummy; + LOG(INFO) << "Continue with dummy: " << dummy; } } else { using ProjectedFragmentType = From 87a50e96c699a92217595691772ace2fa004d938 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E9=9C=84=E7=A3=8A?= Date: Mon, 2 Sep 2024 14:23:15 +0800 Subject: [PATCH 50/52] don't try to interpret the received message --- .../graphscope/example/message/Message.java | 43 ++++++++++--------- 1 file changed, 22 insertions(+), 21 deletions(-) diff --git a/analytical_engine/java/grape-demo/src/main/java/com/alibaba/graphscope/example/message/Message.java b/analytical_engine/java/grape-demo/src/main/java/com/alibaba/graphscope/example/message/Message.java index ed02b2bd1e4b..0882e21cfaef 100644 --- a/analytical_engine/java/grape-demo/src/main/java/com/alibaba/graphscope/example/message/Message.java +++ b/analytical_engine/java/grape-demo/src/main/java/com/alibaba/graphscope/example/message/Message.java @@ -44,7 +44,7 @@ public void PEval(IFragment iFragment, vertex.setValue(i); try { // 内存处理 - sendToNbr(iFragment,ctx, parallelMessageManager, vertex, 0); + sendToAdjList(iFragment,ctx, parallelMessageManager, vertex, 0); } catch (IOException e) { logger.error("PEval error", e); } @@ -66,14 +66,14 @@ public void IncEval(IFragment iFragment, } // receive messages - receiveMessages(iFragment, ctx, parallelMessageManager); + receiveMessages(iFragment, ctx, parallelMessageManager, true); // sendToNbr(iFragment, parallelMessageManager, vertex, 0, 0); parallelSendToNbr(iFragment, parallelMessageManager, ctx); } void receiveMessages(IFragment frag, MessageContext ctx, - ParallelMessageManager messageManager) { + ParallelMessageManager messageManager, boolean simple) { long start = System.currentTimeMillis(); CountDownLatch countDownLatch = new CountDownLatch(ctx.threadNum); MessageInBuffer.Factory bufferFactory = FFITypeFactoryhelper.newMessageInBuffer(); @@ -89,7 +89,7 @@ public void run() { result = messageManager.getMessageInBuffer(messageInBuffer); if (result) { try { - receiveMessageImpl(frag, messageInBuffer); + receiveMessageImpl(frag, messageInBuffer, simple); } catch (Exception e) { logger.error( "Error when receiving message in fragment {} thread {}", @@ -113,24 +113,29 @@ public void run() { } void receiveMessageImpl(IFragment frag, - MessageInBuffer buffer) throws IOException { + MessageInBuffer buffer, boolean simple) throws IOException { FFIByteVector tmpVector = (FFIByteVector) FFIByteVectorFactory.INSTANCE.create(); Vertex tmpVertex = FFITypeFactoryhelper.newVertexLong(); List receivedMsg = new ArrayList<>(); - while (buffer.getPureMessage(tmpVector)) { - tmpVector.touch(); - FFIByteVectorInputStream inputStream = new FFIByteVectorInputStream(tmpVector); - long gid = inputStream.readLong(); - if (!frag.innerVertexGid2Vertex(gid, tmpVertex)) { - logger.error("Fail to get lid from gid {}", gid); - } - int size = inputStream.readInt(); + if (simple){ + return ; // do nothing + } + else { + while (buffer.getPureMessage(tmpVector)) { + tmpVector.touch(); + FFIByteVectorInputStream inputStream = new FFIByteVectorInputStream(tmpVector); + long gid = inputStream.readLong(); + if (!frag.innerVertexGid2Vertex(gid, tmpVertex)) { + logger.error("Fail to get lid from gid {}", gid); + } + int size = inputStream.readInt(); - if (size != 0) { - for (int i = 0; i < size; i++) { - LongArrayList path = PathSerAndDeser.deserialize(inputStream); - receivedMsg.add(path); + if (size != 0) { + for (int i = 0; i < size; i++) { + LongArrayList path = PathSerAndDeser.deserialize(inputStream); + receivedMsg.add(path); + } } } } @@ -187,10 +192,6 @@ public void run() { } } - void sendToNbr(IFragment frag, MessageContext ctx, ParallelMessageManager messageManager, Vertex vertex, int threadId) throws IOException { - sendToAdjList(frag, ctx, messageManager, vertex, threadId); - } - void sendToAdjList(IFragment frag, MessageContext ctx, ParallelMessageManager messageManager, Vertex vertex, int threadId) throws IOException { FFIByteVectorOutputStream msgVector = ctx.getMsgVectorStream(threadId); From db3162f66b804ac043dac96cc9b1a0d5803dfb05 Mon Sep 17 00:00:00 2001 From: "xiaolei.zl" Date: Mon, 2 Sep 2024 19:31:10 +0800 Subject: [PATCH 51/52] todo: impl project adjlist Committed-by: xiaolei.zl from Dev container --- .../graphscope/example/message/Message.java | 90 +++++++++++----- .../com/alibaba/graphscope/ds/NbrBase.java | 3 +- .../graphscope/ds/ProjectedAdjList.java | 100 +++++++++++------- .../alibaba/graphscope/ds/ProjectedNbr.java | 6 ++ .../fragment/ArrowProjectedFragment.java | 30 ++++-- .../annotation/AnnotationInvoker.java | 4 +- analytical_engine/test/run_java_app.cc | 2 +- 7 files changed, 161 insertions(+), 74 deletions(-) diff --git a/analytical_engine/java/grape-demo/src/main/java/com/alibaba/graphscope/example/message/Message.java b/analytical_engine/java/grape-demo/src/main/java/com/alibaba/graphscope/example/message/Message.java index 0882e21cfaef..3583e31ccfd7 100644 --- a/analytical_engine/java/grape-demo/src/main/java/com/alibaba/graphscope/example/message/Message.java +++ b/analytical_engine/java/grape-demo/src/main/java/com/alibaba/graphscope/example/message/Message.java @@ -16,6 +16,7 @@ import com.alibaba.graphscope.stdcxx.FFIByteVector; import com.alibaba.graphscope.stdcxx.FFIByteVectorFactory; import com.alibaba.graphscope.utils.FFITypeFactoryhelper; +import com.alibaba.fastffi.CXXValueScope; import com.carrotsearch.hppc.LongArrayList; import java.io.IOException; import java.util.ArrayList; @@ -26,11 +27,13 @@ import java.util.concurrent.atomic.AtomicInteger; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import com.alibaba.graphscope.parallel.message.DoubleMsg; public class Message implements ParallelAppBase { private static final Logger logger = LoggerFactory.getLogger(Message.class); private List msgs = createMessages(); + // private DoubleMsg doubleMsg = FFITypeFactoryhelper.newDoubleMsg(1.0); @Override public void PEval(IFragment iFragment, @@ -44,7 +47,7 @@ public void PEval(IFragment iFragment, vertex.setValue(i); try { // 内存处理 - sendToAdjList(iFragment,ctx, parallelMessageManager, vertex, 0); + sendToAdjList(iFragment,ctx, parallelMessageManager, vertex, 0, false); } catch (IOException e) { logger.error("PEval error", e); } @@ -66,10 +69,11 @@ public void IncEval(IFragment iFragment, } // receive messages - receiveMessages(iFragment, ctx, parallelMessageManager, true); + receiveMessages(iFragment, ctx, parallelMessageManager, false); // sendToNbr(iFragment, parallelMessageManager, vertex, 0, 0); parallelSendToNbr(iFragment, parallelMessageManager, ctx); + ctx.currentStep += 1; } void receiveMessages(IFragment frag, MessageContext ctx, @@ -88,12 +92,14 @@ public void run() { while (true) { result = messageManager.getMessageInBuffer(messageInBuffer); if (result) { - try { - receiveMessageImpl(frag, messageInBuffer, simple); - } catch (Exception e) { - logger.error( - "Error when receiving message in fragment {} thread {}", - frag.fid(), finalTid, e); + if (!simple){ + try { + receiveMessageImpl(frag, messageInBuffer, simple); + } catch (Exception e) { + logger.error( + "Error when receiving message in fragment {} thread {}", + frag.fid(), finalTid, e); + } } } else { break; @@ -172,7 +178,7 @@ public void run() { cnt += 1; vertex.setValue(i); try { - sendToAdjList(frag, ctx, messageManager, vertex, finalTid); + sendToAdjList(frag, ctx, messageManager, vertex, finalTid, false); } catch (IOException e) { e.printStackTrace(); } @@ -192,27 +198,57 @@ public void run() { } } - void sendToAdjList(IFragment frag, MessageContext ctx, ParallelMessageManager messageManager, Vertex vertex, int threadId) throws IOException { - - FFIByteVectorOutputStream msgVector = ctx.getMsgVectorStream(threadId); - AdjList nbrs = frag.getOutgoingAdjList(vertex); - for (Nbr nbr : nbrs.iterable()) { - Vertex nbrVertex = nbr.neighbor(); - if (frag.isOuterVertex(nbrVertex)) { - for (int j = 0; j < 100; ++j){ - msgVector.reset(); - msgVector.writeLong(frag.getOuterVertexGid(nbrVertex)); - msgVector.writeInt(msgs.size()); - for (LongArrayList msg : msgs) { - PathSerAndDeser.serialize(msgVector, msg); + void sendToAdjList(IFragment frag, MessageContext ctx, ParallelMessageManager messageManager, Vertex vertex, int threadId, boolean simple) throws IOException { + if (simple){ + for (int j = 0; j < 100; ++j){ + messageManager.sendMsgThroughOEdges(frag, vertex, 1.0, threadId); + } + } + else { + FFIByteVectorOutputStream msgVector = ctx.getMsgVectorStream(threadId); + AdjList nbrs = frag.getOutgoingAdjList(vertex); + + Nbr begin = nbrs.begin(); + Nbr end = nbrs.end(); + while (!begin.eq(end)){ + Vertex nbrVertex = begin.neighbor(); + if (frag.isOuterVertex(nbrVertex)) { + for (int j = 0; j < 100; ++j){ + msgVector.reset(); + msgVector.writeLong(frag.getOuterVertexGid(nbrVertex)); + msgVector.writeInt(msgs.size()); + for (LongArrayList msg : msgs) { + PathSerAndDeser.serialize(msgVector, msg); + } + msgVector.finishSetting(); + messageManager.sendToFragment(frag.getFragId(nbrVertex),msgVector.getVector(), threadId); } - msgVector.finishSetting(); - messageManager.sendToFragment(frag.getFragId(nbrVertex),msgVector.getVector(), threadId); } + else { + // skip for send to inner vertex + } + begin.inc(); } - else { - // skip for send to inner vertex - } + + + // if (vertex.getValue() % 1000 != 0) { + // return ; + // } + // Vertex nbrVertex = FFITypeFactoryhelper.newVertexLong(); + // for (long vid = frag.getInnerVerticesNum(); vid < frag.getVerticesNum(); ++vid) { + // nbrVertex.setValue(vid); + // for (int j = 0; j < 10; ++j){ + // msgVector.reset(); + // msgVector.writeLong(frag.getOuterVertexGid(nbrVertex)); + // msgVector.writeInt(msgs.size()); + // for (LongArrayList msg : msgs) { + // PathSerAndDeser.serialize(msgVector, msg); + // } + // msgVector.finishSetting(); + // messageManager.sendToFragment(frag.getFragId(nbrVertex),msgVector.getVector(), threadId); + // } + // } + // nbrVertex.delete(); } } diff --git a/analytical_engine/java/grape-jdk/src/main/java/com/alibaba/graphscope/ds/NbrBase.java b/analytical_engine/java/grape-jdk/src/main/java/com/alibaba/graphscope/ds/NbrBase.java index 0ee1258819ed..8468a580f034 100644 --- a/analytical_engine/java/grape-jdk/src/main/java/com/alibaba/graphscope/ds/NbrBase.java +++ b/analytical_engine/java/grape-jdk/src/main/java/com/alibaba/graphscope/ds/NbrBase.java @@ -17,7 +17,8 @@ package com.alibaba.graphscope.ds; import com.alibaba.fastffi.FFIPointer; +import com.alibaba.fastffi.CXXPointer; -public interface NbrBase extends FFIPointer { +public interface NbrBase extends CXXPointer { Vertex neighbor(); } diff --git a/analytical_engine/java/grape-jdk/src/main/java/com/alibaba/graphscope/ds/ProjectedAdjList.java b/analytical_engine/java/grape-jdk/src/main/java/com/alibaba/graphscope/ds/ProjectedAdjList.java index 2a2ad3aab597..264e1ea9a3d6 100644 --- a/analytical_engine/java/grape-jdk/src/main/java/com/alibaba/graphscope/ds/ProjectedAdjList.java +++ b/analytical_engine/java/grape-jdk/src/main/java/com/alibaba/graphscope/ds/ProjectedAdjList.java @@ -26,6 +26,7 @@ import com.alibaba.fastffi.FFINameAlias; import com.alibaba.fastffi.FFIPointer; import com.alibaba.fastffi.FFITypeAlias; +import com.alibaba.fastffi.CXXPointer; import java.util.Iterator; @@ -37,18 +38,13 @@ * @param vertex id type. * @param edge data type. */ -@FFIGen -@CXXHead(ARROW_PROJECTED_FRAGMENT_H) -@CXXHead(CORE_JAVA_TYPE_ALIAS_H) -@FFITypeAlias(PROJECTED_ADJ_LIST) -public interface ProjectedAdjList extends FFIPointer { +public interface ProjectedAdjList { /** * Get the first Nbr. * * @return first Nbr. */ - @CXXValue ProjectedNbr begin(); /** @@ -56,7 +52,6 @@ public interface ProjectedAdjList extends FFIPointer { * * @return last Nbr. */ - @CXXValue ProjectedNbr end(); /** @@ -64,7 +59,6 @@ public interface ProjectedAdjList extends FFIPointer { * * @return size. */ - @FFINameAlias("Size") long size(); /** @@ -72,7 +66,6 @@ public interface ProjectedAdjList extends FFIPointer { * * @return true if no nbr. */ - @FFINameAlias("Empty") boolean empty(); /** @@ -80,36 +73,69 @@ public interface ProjectedAdjList extends FFIPointer { * * @return false if empty. */ - @FFINameAlias("NotEmpty") boolean notEmpty(); + - /** - * The iterator for ProjectedAdjList. You can use enhanced for loop instead of directly using - * this. - * - * @return the iterator. - */ - default Iterable> iterable() { - return () -> - new Iterator>() { - ProjectedNbr cur = begin().dec(); - ProjectedNbr end = end(); - boolean flag = false; - - @Override - public boolean hasNext() { - if (!flag) { - cur = cur.inc(); - flag = !cur.eq(end); + public class ProjectedAdjListImpl implements ProjectedAdjList { + private ProjectedNbr begin; + private ProjectedNbr end; + private int elementSize; + + public ProjectedAdjListImpl(ProjectedNbr begin, ProjectedNbr end) { + this.begin = begin; + this.end = end; + //If VID_T is long, elementSize is 16, otherwise 8 + elementSize = 16; + } + + ProjectedNbr begin() { + return begin; + } + + ProjectedNbr end() { + return end; + } + + long size() { + return (end.getAddress() - begin.getAddress()) / elementSize; + } + + boolean empty() { + return begin.eq(end); + } + + boolean notEmpty() { + return !empty(); + } + + /** + * The iterator for ProjectedAdjList. You can use enhanced for loop instead of directly using + * this. + * + * @return the iterator. + */ + default Iterable> iterable() { + return () -> + new Iterator>() { + ProjectedNbr cur = begin().dec(); + ProjectedNbr end = end(); + boolean flag = false; + + @Override + public boolean hasNext() { + if (!flag) { + cur = cur.inc(); + flag = !cur.eq(end); + } + return flag; + } + + @Override + public ProjectedNbr next() { + flag = false; + return cur; } - return flag; - } - - @Override - public ProjectedNbr next() { - flag = false; - return cur; - } - }; + }; + } } } diff --git a/analytical_engine/java/grape-jdk/src/main/java/com/alibaba/graphscope/ds/ProjectedNbr.java b/analytical_engine/java/grape-jdk/src/main/java/com/alibaba/graphscope/ds/ProjectedNbr.java index 1fa54747b8bf..09d5e166009a 100644 --- a/analytical_engine/java/grape-jdk/src/main/java/com/alibaba/graphscope/ds/ProjectedNbr.java +++ b/analytical_engine/java/grape-jdk/src/main/java/com/alibaba/graphscope/ds/ProjectedNbr.java @@ -41,6 +41,7 @@ @CXXHead(CORE_JAVA_TYPE_ALIAS_H) @FFITypeAlias(PROJECTED_NBR) public interface ProjectedNbr extends NbrBase { + Factory factory = FFITypeFactory.getFactory(Factory.class, LongMsg.class); /** * Get the neighbor vertex. @@ -85,4 +86,9 @@ public interface ProjectedNbr extends NbrBase { @CXXOperator("--") @CXXReference ProjectedNbr dec(); + + @FFIFactory + interface Factory { + ProjectedNbr create(); + } } diff --git a/analytical_engine/java/grape-jdk/src/main/java/com/alibaba/graphscope/fragment/ArrowProjectedFragment.java b/analytical_engine/java/grape-jdk/src/main/java/com/alibaba/graphscope/fragment/ArrowProjectedFragment.java index 6b8e8c68a0c4..add431c7a3bf 100644 --- a/analytical_engine/java/grape-jdk/src/main/java/com/alibaba/graphscope/fragment/ArrowProjectedFragment.java +++ b/analytical_engine/java/grape-jdk/src/main/java/com/alibaba/graphscope/fragment/ArrowProjectedFragment.java @@ -29,6 +29,7 @@ import com.alibaba.graphscope.ds.BaseTypedArray; import com.alibaba.graphscope.ds.ProjectedAdjList; import com.alibaba.graphscope.ds.Vertex; +// import com.alibaba.graphscope.utils.LongIdParser; /** * Java wrapper for extends BaseArrowProjectedFragment { - @FFINameAlias("GetIncomingAdjList") - @CXXValue - ProjectedAdjList getIncomingAdjList(@CXXReference Vertex vertex); + // private LongIdParser idParser = new LongIdParser(fragment.fnum(), 1);; - @FFINameAlias("GetOutgoingAdjList") - @CXXValue - ProjectedAdjList getOutgoingAdjList(@CXXReference Vertex vertex); + ProjectedAdjList getIncomingAdjList(@CXXReference Vertex vertex) { + + } + + ProjectedAdjList getOutgoingAdjList(@CXXReference Vertex vertex) { + PropertyNbrUnit nbrUnit = getOutEdgesPtr(); + long nbrUnitInitAddress = nbrUnit.getAddress(); + long offsetEndPtrFirstAddr = this.projectedFragment.getOEOffsetsEndPtr(); + + long offsetBeginPtrFirstAddr = this.projectedFragment.getOEOffsetsBeginPtr(); + // long offset = idParser.getOffset(lid); + long offset = vertex.getValue(); + long oeBeginOffset = JavaRuntime.getLong(offsetBeginPtrFirstAddr + offset * 8); + long oeEndOffset = JavaRuntime.getLong(offsetEndPtrFirstAddr + offset * 8); + long curAddress = nbrUnitInitAddress + nbrUnitEleSize * oeBeginOffset; + long endAddress = nbrUnitInitAddress + nbrUnitEleSize * oeEndOffset; + ProjectedNbr begin = FFITypeFactoryhelper.newProjectedNbr(); + begin.setAddress(curAddress); + ProjectedNbr end = FFITypeFactoryhelper.newProjectedNbr(); + end.setAddress(endAddress); + return new ProjectedAdjListImpl(begin, end); + } @FFINameAlias("get_edata_array_accessor") @CXXReference diff --git a/analytical_engine/java/grape-runtime/src/main/java/com/alibaba/graphscope/annotation/AnnotationInvoker.java b/analytical_engine/java/grape-runtime/src/main/java/com/alibaba/graphscope/annotation/AnnotationInvoker.java index 03734a6e8f28..903a4755260e 100644 --- a/analytical_engine/java/grape-runtime/src/main/java/com/alibaba/graphscope/annotation/AnnotationInvoker.java +++ b/analytical_engine/java/grape-runtime/src/main/java/com/alibaba/graphscope/annotation/AnnotationInvoker.java @@ -1371,7 +1371,7 @@ "int64_t", CPP_ARROW_PROJECTED_FRAGMENT + "", - DOUBLE_MSG, + "double", "any" }, java = { @@ -1381,7 +1381,7 @@ "Long", JAVA_ARROW_PROJECTED_FRAGMENT + "", - "com.alibaba.graphscope.parallel.message.DoubleMsg", + "Double", "com.alibaba.graphscope.runtime.UnusedImpl.LongLongDouble" }), @CXXTemplate( diff --git a/analytical_engine/test/run_java_app.cc b/analytical_engine/test/run_java_app.cc index a1a27b49cb18..2e956e52e4f3 100644 --- a/analytical_engine/test/run_java_app.cc +++ b/analytical_engine/test/run_java_app.cc @@ -360,7 +360,7 @@ void QueryProjected(vineyard::Client& client, const std::string& selectors_string, int32_t expected_data_type, vineyard::AnyType expected_tensor_type, int cur_time = 0) { - using AppType = gs::JavaPIEProjectedParallelAppE; + using AppType = gs::JavaPIEProjectedParallelAppOE; auto app = std::make_shared(); auto worker = AppType::CreateWorker(app, fragment); auto spec = grape::DefaultParallelEngineSpec(); From 80bbd188443ed264e9a3f398d28118edf89514e9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E9=9C=84=E7=A3=8A?= Date: Mon, 2 Sep 2024 19:52:41 +0800 Subject: [PATCH 52/52] todo: test new ProjectedAdjList --- .../graphscope/example/message/Message.java | 5 ++ .../graphscope/ds/ProjectedAdjList.java | 64 +---------------- .../alibaba/graphscope/ds/ProjectedNbr.java | 8 +-- .../ds/adaptor/GrapeNbrAdaptor.java | 6 ++ .../alibaba/graphscope/ds/adaptor/Nbr.java | 4 ++ .../ds/adaptor/ProjectedNbrAdaptor.java | 10 +++ .../ds/impl/ProjectedAdjListImpl.java | 69 +++++++++++++++++++ .../fragment/ArrowProjectedFragment.java | 40 ++++++++--- .../adaptor/ArrowProjectedAdaptor.java | 8 +-- .../utils/FFITypeFactoryhelper.java | 7 ++ .../annotation/AnnotationInvoker.java | 32 ++++----- 11 files changed, 155 insertions(+), 98 deletions(-) create mode 100644 analytical_engine/java/grape-jdk/src/main/java/com/alibaba/graphscope/ds/impl/ProjectedAdjListImpl.java diff --git a/analytical_engine/java/grape-demo/src/main/java/com/alibaba/graphscope/example/message/Message.java b/analytical_engine/java/grape-demo/src/main/java/com/alibaba/graphscope/example/message/Message.java index 3583e31ccfd7..f35913c0a79e 100644 --- a/analytical_engine/java/grape-demo/src/main/java/com/alibaba/graphscope/example/message/Message.java +++ b/analytical_engine/java/grape-demo/src/main/java/com/alibaba/graphscope/example/message/Message.java @@ -2,6 +2,7 @@ import com.alibaba.graphscope.app.ParallelAppBase; import com.alibaba.graphscope.context.ParallelContextBase; +import com.alibaba.graphscope.ds.ProjectedNbr; import com.alibaba.graphscope.ds.Vertex; import com.alibaba.graphscope.ds.adaptor.AdjList; import com.alibaba.graphscope.ds.adaptor.Nbr; @@ -207,9 +208,11 @@ void sendToAdjList(IFragment frag, MessageContext ctx, P else { FFIByteVectorOutputStream msgVector = ctx.getMsgVectorStream(threadId); AdjList nbrs = frag.getOutgoingAdjList(vertex); + logger.info("nbr size : {}" , nbrs.size()); Nbr begin = nbrs.begin(); Nbr end = nbrs.end(); + logger.info("begin addr {}, end addr {}", begin.getAddress(), end.getAddress()); while (!begin.eq(end)){ Vertex nbrVertex = begin.neighbor(); if (frag.isOuterVertex(nbrVertex)) { @@ -229,6 +232,8 @@ void sendToAdjList(IFragment frag, MessageContext ctx, P } begin.inc(); } + begin.delete(); + end.delete(); // if (vertex.getValue() % 1000 != 0) { diff --git a/analytical_engine/java/grape-jdk/src/main/java/com/alibaba/graphscope/ds/ProjectedAdjList.java b/analytical_engine/java/grape-jdk/src/main/java/com/alibaba/graphscope/ds/ProjectedAdjList.java index 264e1ea9a3d6..001bee44586a 100644 --- a/analytical_engine/java/grape-jdk/src/main/java/com/alibaba/graphscope/ds/ProjectedAdjList.java +++ b/analytical_engine/java/grape-jdk/src/main/java/com/alibaba/graphscope/ds/ProjectedAdjList.java @@ -74,68 +74,6 @@ public interface ProjectedAdjList { * @return false if empty. */ boolean notEmpty(); - - public class ProjectedAdjListImpl implements ProjectedAdjList { - private ProjectedNbr begin; - private ProjectedNbr end; - private int elementSize; - - public ProjectedAdjListImpl(ProjectedNbr begin, ProjectedNbr end) { - this.begin = begin; - this.end = end; - //If VID_T is long, elementSize is 16, otherwise 8 - elementSize = 16; - } - - ProjectedNbr begin() { - return begin; - } - - ProjectedNbr end() { - return end; - } - - long size() { - return (end.getAddress() - begin.getAddress()) / elementSize; - } - - boolean empty() { - return begin.eq(end); - } - - boolean notEmpty() { - return !empty(); - } - - /** - * The iterator for ProjectedAdjList. You can use enhanced for loop instead of directly using - * this. - * - * @return the iterator. - */ - default Iterable> iterable() { - return () -> - new Iterator>() { - ProjectedNbr cur = begin().dec(); - ProjectedNbr end = end(); - boolean flag = false; - - @Override - public boolean hasNext() { - if (!flag) { - cur = cur.inc(); - flag = !cur.eq(end); - } - return flag; - } - - @Override - public ProjectedNbr next() { - flag = false; - return cur; - } - }; - } - } + Iterable> iterable(); } diff --git a/analytical_engine/java/grape-jdk/src/main/java/com/alibaba/graphscope/ds/ProjectedNbr.java b/analytical_engine/java/grape-jdk/src/main/java/com/alibaba/graphscope/ds/ProjectedNbr.java index 09d5e166009a..3167e0eb8d30 100644 --- a/analytical_engine/java/grape-jdk/src/main/java/com/alibaba/graphscope/ds/ProjectedNbr.java +++ b/analytical_engine/java/grape-jdk/src/main/java/com/alibaba/graphscope/ds/ProjectedNbr.java @@ -24,8 +24,10 @@ import com.alibaba.fastffi.CXXOperator; import com.alibaba.fastffi.CXXReference; import com.alibaba.fastffi.CXXValue; +import com.alibaba.fastffi.FFIFactory; import com.alibaba.fastffi.FFIGen; import com.alibaba.fastffi.FFINameAlias; +import com.alibaba.fastffi.FFISettablePointer; import com.alibaba.fastffi.FFITypeAlias; /** @@ -40,9 +42,7 @@ @CXXHead(ARROW_PROJECTED_FRAGMENT_H) @CXXHead(CORE_JAVA_TYPE_ALIAS_H) @FFITypeAlias(PROJECTED_NBR) -public interface ProjectedNbr extends NbrBase { - Factory factory = FFITypeFactory.getFactory(Factory.class, LongMsg.class); - +public interface ProjectedNbr extends NbrBase, FFISettablePointer { /** * Get the neighbor vertex. * @@ -88,7 +88,7 @@ public interface ProjectedNbr extends NbrBase { ProjectedNbr dec(); @FFIFactory - interface Factory { + interface Factory { ProjectedNbr create(); } } diff --git a/analytical_engine/java/grape-jdk/src/main/java/com/alibaba/graphscope/ds/adaptor/GrapeNbrAdaptor.java b/analytical_engine/java/grape-jdk/src/main/java/com/alibaba/graphscope/ds/adaptor/GrapeNbrAdaptor.java index 2723fca0d8d8..d8ba7aa7d4b3 100644 --- a/analytical_engine/java/grape-jdk/src/main/java/com/alibaba/graphscope/ds/adaptor/GrapeNbrAdaptor.java +++ b/analytical_engine/java/grape-jdk/src/main/java/com/alibaba/graphscope/ds/adaptor/GrapeNbrAdaptor.java @@ -40,6 +40,7 @@ public void setAddress(long address) { nbr.setAddress(address); } + @Override public long getAddress() { return nbr.getAddress(); } @@ -80,4 +81,9 @@ public Nbr dec() { logger.error("No implementation for dec in grapeNbr"); return null; } + + @Override + public void delete() { + nbr.delete(); + } } diff --git a/analytical_engine/java/grape-jdk/src/main/java/com/alibaba/graphscope/ds/adaptor/Nbr.java b/analytical_engine/java/grape-jdk/src/main/java/com/alibaba/graphscope/ds/adaptor/Nbr.java index c58b07f90a94..7faaeeee76bc 100644 --- a/analytical_engine/java/grape-jdk/src/main/java/com/alibaba/graphscope/ds/adaptor/Nbr.java +++ b/analytical_engine/java/grape-jdk/src/main/java/com/alibaba/graphscope/ds/adaptor/Nbr.java @@ -40,4 +40,8 @@ public interface Nbr { boolean eq(Nbr rhs); Nbr dec(); + + void delete(); + + long getAddress(); } diff --git a/analytical_engine/java/grape-jdk/src/main/java/com/alibaba/graphscope/ds/adaptor/ProjectedNbrAdaptor.java b/analytical_engine/java/grape-jdk/src/main/java/com/alibaba/graphscope/ds/adaptor/ProjectedNbrAdaptor.java index cdc461334dae..9c62b9bce2b0 100644 --- a/analytical_engine/java/grape-jdk/src/main/java/com/alibaba/graphscope/ds/adaptor/ProjectedNbrAdaptor.java +++ b/analytical_engine/java/grape-jdk/src/main/java/com/alibaba/graphscope/ds/adaptor/ProjectedNbrAdaptor.java @@ -72,4 +72,14 @@ public Nbr dec() { nbr.dec(); return this; } + + @Override + public void delete() { + nbr.delete(); + } + + @Override + public long getAddress() { + return nbr.getAddress(); + } } diff --git a/analytical_engine/java/grape-jdk/src/main/java/com/alibaba/graphscope/ds/impl/ProjectedAdjListImpl.java b/analytical_engine/java/grape-jdk/src/main/java/com/alibaba/graphscope/ds/impl/ProjectedAdjListImpl.java new file mode 100644 index 000000000000..a210f761a4a8 --- /dev/null +++ b/analytical_engine/java/grape-jdk/src/main/java/com/alibaba/graphscope/ds/impl/ProjectedAdjListImpl.java @@ -0,0 +1,69 @@ +package com.alibaba.graphscope.ds.impl; + +import com.alibaba.graphscope.ds.ProjectedAdjList; +import com.alibaba.graphscope.ds.ProjectedNbr; +import java.util.Iterator; + +public class ProjectedAdjListImpl implements ProjectedAdjList { + + private ProjectedNbr begin; + private ProjectedNbr end; + private int elementSize; + + public ProjectedAdjListImpl(ProjectedNbr begin, ProjectedNbr end) { + this.begin = begin; + this.end = end; + //If VID_T is long, elementSize is 16, otherwise 8 + elementSize = 16; + } + + public ProjectedNbr begin() { + return begin; + } + + public ProjectedNbr end() { + return end; + } + + public long size() { + return (end.getAddress() - begin.getAddress()) / elementSize; + } + + public boolean empty() { + return begin.eq(end); + } + + public boolean notEmpty() { + return !empty(); + } + + /** + * The iterator for ProjectedAdjList. You can use enhanced for loop instead of directly using + * this. + * + * @return the iterator. + */ + public Iterable> iterable() { + return () -> + new Iterator>() { + ProjectedNbr cur = begin().dec(); + ProjectedNbr end = end(); + boolean flag = false; + + @Override + public boolean hasNext() { + if (!flag) { + cur = cur.inc(); + flag = !cur.eq(end); + } + return flag; + } + + @Override + public ProjectedNbr next() { + flag = false; + return cur; + } + }; + } +} diff --git a/analytical_engine/java/grape-jdk/src/main/java/com/alibaba/graphscope/fragment/ArrowProjectedFragment.java b/analytical_engine/java/grape-jdk/src/main/java/com/alibaba/graphscope/fragment/ArrowProjectedFragment.java index add431c7a3bf..5c87a0ac5bd4 100644 --- a/analytical_engine/java/grape-jdk/src/main/java/com/alibaba/graphscope/fragment/ArrowProjectedFragment.java +++ b/analytical_engine/java/grape-jdk/src/main/java/com/alibaba/graphscope/fragment/ArrowProjectedFragment.java @@ -26,9 +26,14 @@ import com.alibaba.fastffi.FFIGen; import com.alibaba.fastffi.FFINameAlias; import com.alibaba.fastffi.FFITypeAlias; +import com.alibaba.fastffi.llvm4jni.runtime.JavaRuntime; import com.alibaba.graphscope.ds.BaseTypedArray; import com.alibaba.graphscope.ds.ProjectedAdjList; +import com.alibaba.graphscope.ds.ProjectedNbr; +import com.alibaba.graphscope.ds.PropertyNbrUnit; import com.alibaba.graphscope.ds.Vertex; +import com.alibaba.graphscope.ds.impl.ProjectedAdjListImpl; +import com.alibaba.graphscope.utils.FFITypeFactoryhelper; // import com.alibaba.graphscope.utils.LongIdParser; /** @@ -50,25 +55,38 @@ public interface ArrowProjectedFragment // private LongIdParser idParser = new LongIdParser(fragment.fnum(), 1);; - ProjectedAdjList getIncomingAdjList(@CXXReference Vertex vertex) { - + default ProjectedAdjList getIncomingAdjList(Vertex vertex, Class vidType, Class edataType) { + PropertyNbrUnit nbrUnit = getInEdgesPtr(); + long nbrUnitInitAddress = nbrUnit.getAddress(); + long offsetEndPtrFirstAddr = getIEOffsetsEndPtr(); + long offsetBeginPtrFirstAddr = getIEOffsetsBeginPtr(); + // long offset = idParser.getOffset(lid); + long offset = (long) vertex.getValue(); + long oeBeginOffset = JavaRuntime.getLong(offsetBeginPtrFirstAddr + offset * 8); + long oeEndOffset = JavaRuntime.getLong(offsetEndPtrFirstAddr + offset * 8); + long curAddress = nbrUnitInitAddress + 16 * oeBeginOffset; + long endAddress = nbrUnitInitAddress + 16 * oeEndOffset; + ProjectedNbr begin = FFITypeFactoryhelper.newProjectedNbr(vidType, edataType); + begin.setAddress(curAddress); + ProjectedNbr end = FFITypeFactoryhelper.newProjectedNbr(vidType, edataType); + end.setAddress(endAddress); + return new ProjectedAdjListImpl(begin, end); } - ProjectedAdjList getOutgoingAdjList(@CXXReference Vertex vertex) { + default ProjectedAdjList getOutgoingAdjList(@CXXReference Vertex vertex, Class vidType, Class edataType) { PropertyNbrUnit nbrUnit = getOutEdgesPtr(); long nbrUnitInitAddress = nbrUnit.getAddress(); - long offsetEndPtrFirstAddr = this.projectedFragment.getOEOffsetsEndPtr(); - - long offsetBeginPtrFirstAddr = this.projectedFragment.getOEOffsetsBeginPtr(); + long offsetBeginPtrFirstAddr = getOEOffsetsBeginPtr(); + long offsetEndPtrFirstAddr = getOEOffsetsEndPtr(); // long offset = idParser.getOffset(lid); - long offset = vertex.getValue(); + long offset = (long) vertex.getValue(); long oeBeginOffset = JavaRuntime.getLong(offsetBeginPtrFirstAddr + offset * 8); long oeEndOffset = JavaRuntime.getLong(offsetEndPtrFirstAddr + offset * 8); - long curAddress = nbrUnitInitAddress + nbrUnitEleSize * oeBeginOffset; - long endAddress = nbrUnitInitAddress + nbrUnitEleSize * oeEndOffset; - ProjectedNbr begin = FFITypeFactoryhelper.newProjectedNbr(); + long curAddress = nbrUnitInitAddress + 16 * oeBeginOffset; + long endAddress = nbrUnitInitAddress + 16 * oeEndOffset; + ProjectedNbr begin = FFITypeFactoryhelper.newProjectedNbr(vidType, edataType); begin.setAddress(curAddress); - ProjectedNbr end = FFITypeFactoryhelper.newProjectedNbr(); + ProjectedNbr end = FFITypeFactoryhelper.newProjectedNbr(vidType, edataType); end.setAddress(endAddress); return new ProjectedAdjListImpl(begin, end); } diff --git a/analytical_engine/java/grape-jdk/src/main/java/com/alibaba/graphscope/fragment/adaptor/ArrowProjectedAdaptor.java b/analytical_engine/java/grape-jdk/src/main/java/com/alibaba/graphscope/fragment/adaptor/ArrowProjectedAdaptor.java index f43435e69b5b..1905bd684413 100644 --- a/analytical_engine/java/grape-jdk/src/main/java/com/alibaba/graphscope/fragment/adaptor/ArrowProjectedAdaptor.java +++ b/analytical_engine/java/grape-jdk/src/main/java/com/alibaba/graphscope/fragment/adaptor/ArrowProjectedAdaptor.java @@ -86,20 +86,20 @@ public ArrowProjectedFragment getArrowProjectedF public AdjList getIncomingAdjList(Vertex vertex) { if (edataPrimitive) return new ProjectedAdjListAdaptor<>( - fragment.getIncomingAdjList(vertex), primitiveEDataArray); + fragment.getIncomingAdjList(vertex, getVidClass(), getEdataClass()), primitiveEDataArray); else return new ProjectedAdjListAdaptor<>( - fragment.getIncomingAdjList(vertex), complexEDataArray); + fragment.getIncomingAdjList(vertex, getVidClass(), getEdataClass()), complexEDataArray); } @Override public AdjList getOutgoingAdjList(Vertex vertex) { if (edataPrimitive) return new ProjectedAdjListAdaptor<>( - fragment.getOutgoingAdjList(vertex), primitiveEDataArray); + fragment.getOutgoingAdjList(vertex, getVidClass(), getEdataClass()), primitiveEDataArray); else return new ProjectedAdjListAdaptor<>( - fragment.getOutgoingAdjList(vertex), complexEDataArray); + fragment.getOutgoingAdjList(vertex, getVidClass(), getEdataClass()), complexEDataArray); } /** diff --git a/analytical_engine/java/grape-jdk/src/main/java/com/alibaba/graphscope/utils/FFITypeFactoryhelper.java b/analytical_engine/java/grape-jdk/src/main/java/com/alibaba/graphscope/utils/FFITypeFactoryhelper.java index 371fb0673530..0a0c378e26a9 100644 --- a/analytical_engine/java/grape-jdk/src/main/java/com/alibaba/graphscope/utils/FFITypeFactoryhelper.java +++ b/analytical_engine/java/grape-jdk/src/main/java/com/alibaba/graphscope/utils/FFITypeFactoryhelper.java @@ -21,6 +21,7 @@ import static com.alibaba.graphscope.utils.CppClassName.GS_ARROW_PROJECTED_FRAGMENT_IMPL_TYPED_ARRAY; import static com.alibaba.graphscope.utils.CppClassName.GS_PRIMITIVE_MESSAGE; import static com.alibaba.graphscope.utils.CppClassName.GS_VERTEX_ARRAY; +import static com.alibaba.graphscope.utils.CppClassName.PROJECTED_NBR; import com.alibaba.fastffi.FFIForeignType; import com.alibaba.fastffi.FFIPointer; @@ -32,6 +33,7 @@ import com.alibaba.graphscope.ds.EmptyType; import com.alibaba.graphscope.ds.GSVertexArray; import com.alibaba.graphscope.ds.PrimitiveTypedArray; +import com.alibaba.graphscope.ds.ProjectedNbr; import com.alibaba.graphscope.ds.StringTypedArray; import com.alibaba.graphscope.ds.StringView; import com.alibaba.graphscope.ds.Vertex; @@ -442,6 +444,11 @@ public static DoubleMsg newDoubleMsg(double value) { return DoubleMsg.factory.create(value); } + public static ProjectedNbr newProjectedNbr(Class vidClz, Class edataClz) { + ProjectedNbr.Factory factory = FFITypeFactory.getFactory(ProjectedNbr.class, PROJECTED_NBR + "<" + javaType2CppType(vidClz) + "," + javaType2CppType(edataClz) + ">"); + return factory.create(); + } + /** * For Any ffi-generated class, we can get the typealias via annotation * diff --git a/analytical_engine/java/grape-runtime/src/main/java/com/alibaba/graphscope/annotation/AnnotationInvoker.java b/analytical_engine/java/grape-runtime/src/main/java/com/alibaba/graphscope/annotation/AnnotationInvoker.java index 903a4755260e..43c54a4d2eb4 100644 --- a/analytical_engine/java/grape-runtime/src/main/java/com/alibaba/graphscope/annotation/AnnotationInvoker.java +++ b/analytical_engine/java/grape-runtime/src/main/java/com/alibaba/graphscope/annotation/AnnotationInvoker.java @@ -319,22 +319,22 @@ cxx = {"uint64_t", "std::string"}, java = {"Long", STRING_VIEW}), }), - @FFIGen( - type = "com.alibaba.graphscope.ds.ProjectedAdjList", - templates = { - @CXXTemplate( - cxx = {"uint64_t", "double"}, - java = {"Long", "Double"}), - @CXXTemplate( - cxx = {"uint64_t", "int64_t"}, - java = {"Long", "Long"}), - @CXXTemplate( - cxx = {"uint64_t", "int32_t"}, - java = {"Long", "Integer"}), - @CXXTemplate( - cxx = {"uint64_t", "std::string"}, - java = {"Long", STRING_VIEW}), - }), +// @FFIGen( +// type = "com.alibaba.graphscope.ds.ProjectedAdjList", +// templates = { +// @CXXTemplate( +// cxx = {"uint64_t", "double"}, +// java = {"Long", "Double"}), +// @CXXTemplate( +// cxx = {"uint64_t", "int64_t"}, +// java = {"Long", "Long"}), +// @CXXTemplate( +// cxx = {"uint64_t", "int32_t"}, +// java = {"Long", "Integer"}), +// @CXXTemplate( +// cxx = {"uint64_t", "std::string"}, +// java = {"Long", STRING_VIEW}), +// }), @FFIGen( type = JAVA_ARROW_PROJECTED_FRAGMENT, templates = {