From 512c6167f7de61cb6a2de6a4ca6795cbf34c3616 Mon Sep 17 00:00:00 2001 From: Ubuntu Date: Wed, 9 Oct 2024 02:23:29 +0000 Subject: [PATCH 1/8] change test_mp_dataloader --- tests/distributed/test_mp_dataloader.py | 20 +++++++++++++++----- 1 file changed, 15 insertions(+), 5 deletions(-) diff --git a/tests/distributed/test_mp_dataloader.py b/tests/distributed/test_mp_dataloader.py index e031481a8910..6b7463c40405 100644 --- a/tests/distributed/test_mp_dataloader.py +++ b/tests/distributed/test_mp_dataloader.py @@ -118,7 +118,9 @@ def start_dist_dataloader( gpb = None disable_shared_mem = num_server > 1 if disable_shared_mem: - _, _, _, gpb, _, _, _ = load_partition(part_config, rank) + _, _, _, gpb, _, _, _ = load_partition( + part_config, rank, use_graphbolt=use_graphbolt + ) num_nodes_to_sample = 202 batch_size = 32 train_nid = th.arange(num_nodes_to_sample) @@ -463,7 +465,9 @@ def start_node_dataloader( gpb = None disable_shared_mem = num_server > 1 if disable_shared_mem: - _, _, _, gpb, _, _, _ = load_partition(part_config, rank) + _, _, _, gpb, _, _, _ = load_partition( + part_config, rank, use_graphbolt=use_graphbolt + ) num_nodes_to_sample = 202 batch_size = 32 graph_name = os.path.splitext(os.path.basename(part_config))[0] @@ -482,7 +486,9 @@ def start_node_dataloader( } for i in range(num_server): - part, _, _, _, _, _, _ = load_partition(part_config, i) + part, _, _, _, _, _, _ = load_partition( + part_config, i, use_graphbolt=use_graphbolt + ) # Create sampler _prob = None @@ -589,7 +595,9 @@ def start_edge_dataloader( gpb = None disable_shared_mem = num_server > 1 if disable_shared_mem: - _, _, _, gpb, _, _, _ = load_partition(part_config, rank) + _, _, _, gpb, _, _, _ = load_partition( + part_config, rank, use_graphbolt=use_graphbolt + ) num_edges_to_sample = 202 batch_size = 32 graph_name = os.path.splitext(os.path.basename(part_config))[0] @@ -604,7 +612,9 @@ def start_edge_dataloader( } for i in range(num_server): - part, _, _, _, _, _, _ = load_partition(part_config, i) + part, _, _, _, _, _, _ = load_partition( + part_config, i, use_graphbolt=use_graphbolt + ) # Create sampler _prob = None From fc61534e7cdfd1e01f54d7a09a1a964e5d93b74d Mon Sep 17 00:00:00 2001 From: Ubuntu Date: Fri, 11 Oct 2024 03:15:55 +0000 Subject: [PATCH 2/8] change load partition --- python/dgl/distributed/partition.py | 17 ++++++++++++++++- tests/distributed/test_mp_dataloader.py | 22 ++++++---------------- 2 files changed, 22 insertions(+), 17 deletions(-) diff --git a/python/dgl/distributed/partition.py b/python/dgl/distributed/partition.py index de782a5f144d..ca856dcee9f1 100644 --- a/python/dgl/distributed/partition.py +++ b/python/dgl/distributed/partition.py @@ -283,7 +283,7 @@ def _verify_graphbolt_partition(graph, part_id, gpb, ntypes, etypes): print(f"Partition {part_id} looks good!") -def load_partition(part_config, part_id, load_feats=True, use_graphbolt=False): +def load_partition(part_config, part_id, load_feats=True, use_graphbolt=None): """Load data of a partition from the data path. A partition data includes a graph structure of the partition, a dict of node tensors, @@ -334,6 +334,21 @@ def load_partition(part_config, part_id, load_feats=True, use_graphbolt=False): "part-{}".format(part_id) in part_metadata ), "part-{} does not exist".format(part_id) part_files = part_metadata["part-{}".format(part_id)] + + if use_graphbolt is None: + if os.path.exists( + os.path.join(config_path, f"part{part_id}", "graph.dgl") + ): + use_graphbolt = False + elif os.path.exists( + os.path.join( + config_path, f"part{part_id}", "fused_csc_sampling_graph.pt" + ) + ): + use_graphbolt = True + else: + raise ValueError("The graph object doesn't exist.") + if use_graphbolt: part_graph_field = "part_graph_graphbolt" else: diff --git a/tests/distributed/test_mp_dataloader.py b/tests/distributed/test_mp_dataloader.py index 6b7463c40405..bf84544aa603 100644 --- a/tests/distributed/test_mp_dataloader.py +++ b/tests/distributed/test_mp_dataloader.py @@ -118,9 +118,7 @@ def start_dist_dataloader( gpb = None disable_shared_mem = num_server > 1 if disable_shared_mem: - _, _, _, gpb, _, _, _ = load_partition( - part_config, rank, use_graphbolt=use_graphbolt - ) + _, _, _, gpb, _, _, _ = load_partition(part_config, rank) num_nodes_to_sample = 202 batch_size = 32 train_nid = th.arange(num_nodes_to_sample) @@ -465,9 +463,7 @@ def start_node_dataloader( gpb = None disable_shared_mem = num_server > 1 if disable_shared_mem: - _, _, _, gpb, _, _, _ = load_partition( - part_config, rank, use_graphbolt=use_graphbolt - ) + _, _, _, gpb, _, _, _ = load_partition(part_config, rank) num_nodes_to_sample = 202 batch_size = 32 graph_name = os.path.splitext(os.path.basename(part_config))[0] @@ -486,9 +482,7 @@ def start_node_dataloader( } for i in range(num_server): - part, _, _, _, _, _, _ = load_partition( - part_config, i, use_graphbolt=use_graphbolt - ) + part, _, _, _, _, _, _ = load_partition(part_config, i) # Create sampler _prob = None @@ -595,9 +589,7 @@ def start_edge_dataloader( gpb = None disable_shared_mem = num_server > 1 if disable_shared_mem: - _, _, _, gpb, _, _, _ = load_partition( - part_config, rank, use_graphbolt=use_graphbolt - ) + _, _, _, gpb, _, _, _ = load_partition(part_config, rank) num_edges_to_sample = 202 batch_size = 32 graph_name = os.path.splitext(os.path.basename(part_config))[0] @@ -612,9 +604,7 @@ def start_edge_dataloader( } for i in range(num_server): - part, _, _, _, _, _, _ = load_partition( - part_config, i, use_graphbolt=use_graphbolt - ) + part, _, _, _, _, _, _ = load_partition(part_config, i) # Create sampler _prob = None @@ -1187,4 +1177,4 @@ def test_deprecated_dataloader(dataloader_type): 0, dataloader_type, use_deprecated_dataloader=True, - ) + ) \ No newline at end of file From 37e2e0806925fa32a2c1be93b1d500152bbe2265 Mon Sep 17 00:00:00 2001 From: Ubuntu Date: Fri, 11 Oct 2024 03:26:59 +0000 Subject: [PATCH 3/8] fix test_mp_dataloader.py format --- tests/distributed/test_mp_dataloader.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/distributed/test_mp_dataloader.py b/tests/distributed/test_mp_dataloader.py index bf84544aa603..e031481a8910 100644 --- a/tests/distributed/test_mp_dataloader.py +++ b/tests/distributed/test_mp_dataloader.py @@ -1177,4 +1177,4 @@ def test_deprecated_dataloader(dataloader_type): 0, dataloader_type, use_deprecated_dataloader=True, - ) \ No newline at end of file + ) From 2dc7b779b74b17655904a6d81e3968e6cfe15ff4 Mon Sep 17 00:00:00 2001 From: Ubuntu Date: Mon, 14 Oct 2024 05:31:08 +0000 Subject: [PATCH 4/8] change load_partition --- python/dgl/distributed/partition.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/python/dgl/distributed/partition.py b/python/dgl/distributed/partition.py index ca856dcee9f1..1573f03c1f1f 100644 --- a/python/dgl/distributed/partition.py +++ b/python/dgl/distributed/partition.py @@ -283,7 +283,7 @@ def _verify_graphbolt_partition(graph, part_id, gpb, ntypes, etypes): print(f"Partition {part_id} looks good!") -def load_partition(part_config, part_id, load_feats=True, use_graphbolt=None): +def load_partition(part_config, part_id, load_feats=True, use_graphbolt=False): """Load data of a partition from the data path. A partition data includes a graph structure of the partition, a dict of node tensors, @@ -335,7 +335,7 @@ def load_partition(part_config, part_id, load_feats=True, use_graphbolt=None): ), "part-{} does not exist".format(part_id) part_files = part_metadata["part-{}".format(part_id)] - if use_graphbolt is None: + if use_graphbolt is False: if os.path.exists( os.path.join(config_path, f"part{part_id}", "graph.dgl") ): From 74617d6b65ea09f8bb785557108b2e2fe3749c18 Mon Sep 17 00:00:00 2001 From: Ubuntu Date: Tue, 15 Oct 2024 01:50:39 +0000 Subject: [PATCH 5/8] change partition --- python/dgl/distributed/partition.py | 34 ++++++++++++++++++----------- 1 file changed, 21 insertions(+), 13 deletions(-) diff --git a/python/dgl/distributed/partition.py b/python/dgl/distributed/partition.py index 1573f03c1f1f..98c394db83eb 100644 --- a/python/dgl/distributed/partition.py +++ b/python/dgl/distributed/partition.py @@ -335,19 +335,27 @@ def load_partition(part_config, part_id, load_feats=True, use_graphbolt=False): ), "part-{} does not exist".format(part_id) part_files = part_metadata["part-{}".format(part_id)] - if use_graphbolt is False: - if os.path.exists( - os.path.join(config_path, f"part{part_id}", "graph.dgl") - ): - use_graphbolt = False - elif os.path.exists( - os.path.join( - config_path, f"part{part_id}", "fused_csc_sampling_graph.pt" - ) - ): - use_graphbolt = True - else: - raise ValueError("The graph object doesn't exist.") + exist_dgl_graph = exist_graphbolt_graph = False + if os.path.exists( + os.path.join(config_path, f"part{part_id}", "graph.dgl") + ): + use_graphbolt = False + exist_dgl_graph = True + elif os.path.exists( + os.path.join( + config_path, f"part{part_id}", "fused_csc_sampling_graph.pt" + ) + ): + use_graphbolt = True + exist_graphbolt_graph = True + else: + raise ValueError("The graph object doesn't exist.") + + # Check if both DGL graph and GraphBolt graph exist. Make sure only one exists. + if exist_dgl_graph and exist_graphbolt_graph: + raise ValueError( + "Both DGL graph and GraphBolt graph exist. Please remove one." + ) if use_graphbolt: part_graph_field = "part_graph_graphbolt" From fb7e57ad19819627c2b2164cc3c70c4dad7ff02b Mon Sep 17 00:00:00 2001 From: Ubuntu Date: Tue, 15 Oct 2024 01:53:31 +0000 Subject: [PATCH 6/8] change format --- python/dgl/distributed/partition.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/python/dgl/distributed/partition.py b/python/dgl/distributed/partition.py index 98c394db83eb..12c6f36775e7 100644 --- a/python/dgl/distributed/partition.py +++ b/python/dgl/distributed/partition.py @@ -336,9 +336,7 @@ def load_partition(part_config, part_id, load_feats=True, use_graphbolt=False): part_files = part_metadata["part-{}".format(part_id)] exist_dgl_graph = exist_graphbolt_graph = False - if os.path.exists( - os.path.join(config_path, f"part{part_id}", "graph.dgl") - ): + if os.path.exists(os.path.join(config_path, f"part{part_id}", "graph.dgl")): use_graphbolt = False exist_dgl_graph = True elif os.path.exists( From 9d53bca0125475c2135fb0e5c080e63ae67eb6f1 Mon Sep 17 00:00:00 2001 From: Ubuntu Date: Tue, 15 Oct 2024 04:02:00 +0000 Subject: [PATCH 7/8] change load_partition --- python/dgl/distributed/partition.py | 10 +++++----- tests/distributed/test_partition.py | 2 ++ 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/python/dgl/distributed/partition.py b/python/dgl/distributed/partition.py index 12c6f36775e7..a84098a0bf9e 100644 --- a/python/dgl/distributed/partition.py +++ b/python/dgl/distributed/partition.py @@ -339,18 +339,18 @@ def load_partition(part_config, part_id, load_feats=True, use_graphbolt=False): if os.path.exists(os.path.join(config_path, f"part{part_id}", "graph.dgl")): use_graphbolt = False exist_dgl_graph = True - elif os.path.exists( + if os.path.exists( os.path.join( config_path, f"part{part_id}", "fused_csc_sampling_graph.pt" ) ): use_graphbolt = True exist_graphbolt_graph = True - else: - raise ValueError("The graph object doesn't exist.") - # Check if both DGL graph and GraphBolt graph exist. Make sure only one exists. - if exist_dgl_graph and exist_graphbolt_graph: + # Check if both DGL graph and GraphBolt graph exist or not exist. Make sure only one exists. + if not exist_dgl_graph and not exist_graphbolt_graph: + raise ValueError("The graph object doesn't exist.") + elif exist_dgl_graph and exist_graphbolt_graph: raise ValueError( "Both DGL graph and GraphBolt graph exist. Please remove one." ) diff --git a/tests/distributed/test_partition.py b/tests/distributed/test_partition.py index ef075841e25b..a46e0b778367 100644 --- a/tests/distributed/test_partition.py +++ b/tests/distributed/test_partition.py @@ -990,6 +990,7 @@ def test_dgl_partition_to_graphbolt_homo( orig_g = dgl.load_graphs( os.path.join(test_dir, f"part{part_id}/graph.dgl") )[0][0] + os.remove(os.path.join(test_dir, f"part{part_id}/graph.dgl")) new_g = load_partition( part_config, part_id, load_feats=False, use_graphbolt=True )[0] @@ -1067,6 +1068,7 @@ def test_dgl_partition_to_graphbolt_hetero( orig_g = dgl.load_graphs( os.path.join(test_dir, f"part{part_id}/graph.dgl") )[0][0] + os.remove(os.path.join(test_dir, f"part{part_id}/graph.dgl")) new_g = load_partition( part_config, part_id, load_feats=False, use_graphbolt=True )[0] From 7202d44408bb3c70eaf4f3d4c6493d62b56491f5 Mon Sep 17 00:00:00 2001 From: Ubuntu Date: Tue, 15 Oct 2024 07:27:48 +0000 Subject: [PATCH 8/8] change format --- python/dgl/distributed/partition.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/dgl/distributed/partition.py b/python/dgl/distributed/partition.py index a84098a0bf9e..f74da1cf9685 100644 --- a/python/dgl/distributed/partition.py +++ b/python/dgl/distributed/partition.py @@ -350,7 +350,7 @@ def load_partition(part_config, part_id, load_feats=True, use_graphbolt=False): # Check if both DGL graph and GraphBolt graph exist or not exist. Make sure only one exists. if not exist_dgl_graph and not exist_graphbolt_graph: raise ValueError("The graph object doesn't exist.") - elif exist_dgl_graph and exist_graphbolt_graph: + if exist_dgl_graph and exist_graphbolt_graph: raise ValueError( "Both DGL graph and GraphBolt graph exist. Please remove one." )