From f9a1522b17f189815d1b52c9146f813abf7fce8f Mon Sep 17 00:00:00 2001 From: "xiaolei.zl@alibaba-inc.com" Date: Tue, 10 Dec 2024 14:24:09 +0800 Subject: [PATCH] add tuanhuo-cro related graph def Committed-by: xiaolei.zl@alibaba-inc.com from Dev container --- .../interactive/examples/com_orkut/graph.yaml | 22 ++++++++++ .../examples/com_orkut/import.yaml | 40 +++++++++++++++++++ .../examples/com_orkut/prepprocess.py | 39 ++++++++++++++++++ .../examples/friendster/graph.yaml | 22 ++++++++++ .../examples/friendster/import.yaml | 40 +++++++++++++++++++ .../examples/friendster/preprocess.py | 39 ++++++++++++++++++ flex/interactive/examples/wiki/graph.yaml | 22 ++++++++++ flex/interactive/examples/wiki/import.yaml | 40 +++++++++++++++++++ flex/tests/leiden/test.cc | 5 +++ 9 files changed, 269 insertions(+) create mode 100644 flex/interactive/examples/com_orkut/graph.yaml create mode 100644 flex/interactive/examples/com_orkut/import.yaml create mode 100644 flex/interactive/examples/com_orkut/prepprocess.py create mode 100644 flex/interactive/examples/friendster/graph.yaml create mode 100644 flex/interactive/examples/friendster/import.yaml create mode 100644 flex/interactive/examples/friendster/preprocess.py create mode 100644 flex/interactive/examples/wiki/graph.yaml create mode 100644 flex/interactive/examples/wiki/import.yaml diff --git a/flex/interactive/examples/com_orkut/graph.yaml b/flex/interactive/examples/com_orkut/graph.yaml new file mode 100644 index 000000000000..788b777ec898 --- /dev/null +++ b/flex/interactive/examples/com_orkut/graph.yaml @@ -0,0 +1,22 @@ +name: wiki # then must have a modern dir under ${data} directory +store_type: mutable_csr # v6d, groot, gart +schema: + vertex_types: + - type_name: user + type_id: 0 + x_csr_params: + max_vertex_num: 5000000 + properties: + - property_id: 0 + property_name: id + property_type: + primitive_type: DT_SIGNED_INT64 + primary_keys: + - id + edge_types: + - type_name: friend + type_id: 0 + vertex_type_pair_relations: + - source_vertex: user + destination_vertex: user + relation: MANY_TO_MANY diff --git a/flex/interactive/examples/com_orkut/import.yaml b/flex/interactive/examples/com_orkut/import.yaml new file mode 100644 index 000000000000..16e77f2e3904 --- /dev/null +++ b/flex/interactive/examples/com_orkut/import.yaml @@ -0,0 +1,40 @@ +graph: modern_graph +loading_config: + data_source: + scheme: file # file, oss, s3, hdfs; only file is supported now + import_option: init # append, overwrite, only init is supported now + format: + type: csv + metadata: + delimiter: "," # other loading configuration places here + header_row: false # whether to use the first row as the header + quoting: false + quote_char: '"' + double_quote: true + escape_char: '\' + escaping: false + block_size: 4MB + batch_reader: true + null_values: [""] + +vertex_mappings: + - type_name: user # must align with the schema + inputs: + - vertices.csv + column_mappings: + - column: + index: 0 # can be omitted if the index is the same as the property index + property: id +edge_mappings: + - type_triplet: + edge: friend + source_vertex: user + destination_vertex: user + inputs: + - edges.csv + source_vertex_mappings: + - column: + index: 0 + destination_vertex_mappings: + - column: + index: 1 \ No newline at end of file diff --git a/flex/interactive/examples/com_orkut/prepprocess.py b/flex/interactive/examples/com_orkut/prepprocess.py new file mode 100644 index 000000000000..40b48cc9bb07 --- /dev/null +++ b/flex/interactive/examples/com_orkut/prepprocess.py @@ -0,0 +1,39 @@ +#!/bin/python3 + +import os +import sys + +if __name__ == "__main__": + # Expect a arg of file path + if len(sys.argv) != 4: + print("Usage: python3 preprocess.py ") + sys.exit(1) + # Get the file path + file_path = sys.argv[1] + vertex_file_path = sys.argv[2] + edge_file_path = sys.argv[3] + vertices = set() + edges = [] + # open the file and iterate over the lines + with open(file_path, "r") as file: + for line in file: + # if line starts with #, skip it + if line.startswith("#"): + continue + # split the line by space + parts = line.split() + # if contains two parts, it is a edge + if len(parts) == 2: + vertices.add(parts[0]) + vertices.add(parts[1]) + edges.append(parts) + # write vertices to vertices.csv, and edges to edges.csv + with open(vertex_file_path, "w") as file: + for vertex in vertices: + file.write(vertex + "\n") + with open(edge_file_path, "w") as file: + for edge in edges: + file.write(edge[0] + "," + edge[1] + "\n") + + + \ No newline at end of file diff --git a/flex/interactive/examples/friendster/graph.yaml b/flex/interactive/examples/friendster/graph.yaml new file mode 100644 index 000000000000..788b777ec898 --- /dev/null +++ b/flex/interactive/examples/friendster/graph.yaml @@ -0,0 +1,22 @@ +name: wiki # then must have a modern dir under ${data} directory +store_type: mutable_csr # v6d, groot, gart +schema: + vertex_types: + - type_name: user + type_id: 0 + x_csr_params: + max_vertex_num: 5000000 + properties: + - property_id: 0 + property_name: id + property_type: + primitive_type: DT_SIGNED_INT64 + primary_keys: + - id + edge_types: + - type_name: friend + type_id: 0 + vertex_type_pair_relations: + - source_vertex: user + destination_vertex: user + relation: MANY_TO_MANY diff --git a/flex/interactive/examples/friendster/import.yaml b/flex/interactive/examples/friendster/import.yaml new file mode 100644 index 000000000000..16e77f2e3904 --- /dev/null +++ b/flex/interactive/examples/friendster/import.yaml @@ -0,0 +1,40 @@ +graph: modern_graph +loading_config: + data_source: + scheme: file # file, oss, s3, hdfs; only file is supported now + import_option: init # append, overwrite, only init is supported now + format: + type: csv + metadata: + delimiter: "," # other loading configuration places here + header_row: false # whether to use the first row as the header + quoting: false + quote_char: '"' + double_quote: true + escape_char: '\' + escaping: false + block_size: 4MB + batch_reader: true + null_values: [""] + +vertex_mappings: + - type_name: user # must align with the schema + inputs: + - vertices.csv + column_mappings: + - column: + index: 0 # can be omitted if the index is the same as the property index + property: id +edge_mappings: + - type_triplet: + edge: friend + source_vertex: user + destination_vertex: user + inputs: + - edges.csv + source_vertex_mappings: + - column: + index: 0 + destination_vertex_mappings: + - column: + index: 1 \ No newline at end of file diff --git a/flex/interactive/examples/friendster/preprocess.py b/flex/interactive/examples/friendster/preprocess.py new file mode 100644 index 000000000000..40b48cc9bb07 --- /dev/null +++ b/flex/interactive/examples/friendster/preprocess.py @@ -0,0 +1,39 @@ +#!/bin/python3 + +import os +import sys + +if __name__ == "__main__": + # Expect a arg of file path + if len(sys.argv) != 4: + print("Usage: python3 preprocess.py ") + sys.exit(1) + # Get the file path + file_path = sys.argv[1] + vertex_file_path = sys.argv[2] + edge_file_path = sys.argv[3] + vertices = set() + edges = [] + # open the file and iterate over the lines + with open(file_path, "r") as file: + for line in file: + # if line starts with #, skip it + if line.startswith("#"): + continue + # split the line by space + parts = line.split() + # if contains two parts, it is a edge + if len(parts) == 2: + vertices.add(parts[0]) + vertices.add(parts[1]) + edges.append(parts) + # write vertices to vertices.csv, and edges to edges.csv + with open(vertex_file_path, "w") as file: + for vertex in vertices: + file.write(vertex + "\n") + with open(edge_file_path, "w") as file: + for edge in edges: + file.write(edge[0] + "," + edge[1] + "\n") + + + \ No newline at end of file diff --git a/flex/interactive/examples/wiki/graph.yaml b/flex/interactive/examples/wiki/graph.yaml new file mode 100644 index 000000000000..bd8e93942a0f --- /dev/null +++ b/flex/interactive/examples/wiki/graph.yaml @@ -0,0 +1,22 @@ +name: wiki # then must have a modern dir under ${data} directory +store_type: mutable_csr # v6d, groot, gart +schema: + vertex_types: + - type_name: article + type_id: 0 + x_csr_params: + max_vertex_num: 5000000 + properties: + - property_id: 0 + property_name: id + property_type: + primitive_type: DT_SIGNED_INT64 + primary_keys: + - id + edge_types: + - type_name: link + type_id: 0 + vertex_type_pair_relations: + - source_vertex: article + destination_vertex: article + relation: MANY_TO_MANY diff --git a/flex/interactive/examples/wiki/import.yaml b/flex/interactive/examples/wiki/import.yaml new file mode 100644 index 000000000000..17826e1cc2da --- /dev/null +++ b/flex/interactive/examples/wiki/import.yaml @@ -0,0 +1,40 @@ +graph: modern_graph +loading_config: + data_source: + scheme: file # file, oss, s3, hdfs; only file is supported now + import_option: init # append, overwrite, only init is supported now + format: + type: csv + metadata: + delimiter: " " # other loading configuration places here + header_row: false # whether to use the first row as the header + quoting: false + quote_char: '"' + double_quote: true + escape_char: '\' + escaping: false + block_size: 4MB + batch_reader: true + null_values: [""] + +vertex_mappings: + - type_name: article # must align with the schema + inputs: + - article.csv + column_mappings: + - column: + index: 0 # can be omitted if the index is the same as the property index + property: id +edge_mappings: + - type_triplet: + edge: link + source_vertex: article + destination_vertex: article + inputs: + - link.csv + source_vertex_mappings: + - column: + index: 0 + destination_vertex_mappings: + - column: + index: 1 \ No newline at end of file diff --git a/flex/tests/leiden/test.cc b/flex/tests/leiden/test.cc index d2d07cf2da1d..baa5040c122c 100644 --- a/flex/tests/leiden/test.cc +++ b/flex/tests/leiden/test.cc @@ -75,6 +75,8 @@ int main(int argc, char** argv) { LOG(INFO) << "Graph created, vcount: " << igraph_vcount(&g) << ", ecount: " << igraph_ecount(&g); + double t = -grape::GetCurrentTime(); + Graph graph(&g); CPMVertexPartition part(&graph, 0.5 /* resolution */); @@ -91,4 +93,7 @@ int main(int argc, char** argv) { } igraph_destroy(&g); + + t += grape::GetCurrentTime(); + LOG(INFO) << "Time: " << t << "s"; } \ No newline at end of file