Skip to content

Commit

Permalink
[Fix] Convert float64 to float32 when creating tensor (dmlc#3751)
Browse files Browse the repository at this point in the history
* [Fix] Convert float64 to float32 when creating tensor

* refine docstring
  • Loading branch information
Rhett-Ying authored Feb 18, 2022
1 parent e424d29 commit 5558ce2
Show file tree
Hide file tree
Showing 2 changed files with 30 additions and 14 deletions.
17 changes: 13 additions & 4 deletions python/dgl/data/csv_dataset_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,15 @@ def _validate_data_length(data_dict):
"All data are required to have same length while some of them does not. Length of data={}".format(str(len_dict)))


def _tensor(data, dtype=None):
"""Float32 is the default dtype for float tensor in DGL
so let's cast float64 into float32 to avoid dtype mismatch.
"""
ret = F.tensor(data, dtype)
if F.dtype(ret) == F.float64:
ret = F.tensor(ret, dtype=F.float32)
return ret

class BaseData:
""" Class of base data which is inherited by Node/Edge/GraphData. Internal use only. """
@staticmethod
Expand Down Expand Up @@ -137,7 +146,7 @@ def to_dict(node_data: List['NodeData']) -> dict:
node_dict[graph_id] = {}
node_dict[graph_id][n_data.type] = {'mapping': {index: i for i,
index in enumerate(ids[u_indices])},
'data': {k: F.tensor(v[idx][u_indices])
'data': {k: _tensor(v[idx][u_indices])
for k, v in n_data.data.items()}}
return node_dict

Expand Down Expand Up @@ -187,8 +196,8 @@ def to_dict(edge_data: List['EdgeData'], node_dict: dict) -> dict:
dst_ids = [dst_mapping[index] for index in e_data.dst[idx]]
if graph_id not in edge_dict:
edge_dict[graph_id] = {}
edge_dict[graph_id][e_data.type] = {'edges': (F.tensor(src_ids), F.tensor(dst_ids)),
'data': {k: F.tensor(v[idx])
edge_dict[graph_id][e_data.type] = {'edges': (_tensor(src_ids), _tensor(dst_ids)),
'data': {k: _tensor(v[idx])
for k, v in e_data.data.items()}}
return edge_dict

Expand Down Expand Up @@ -226,7 +235,7 @@ def to_dict(graph_data: 'GraphData', graphs_dict: dict) -> dict:
{('_V', '_E', '_V'): ([], [])})
for graph_id in graph_ids:
graphs.append(graphs_dict[graph_id])
data = {k: F.tensor(v) for k, v in graph_data.data.items()}
data = {k: _tensor(v) for k, v in graph_data.data.items()}
return graphs, data


Expand Down
27 changes: 17 additions & 10 deletions tests/compute/test_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -257,7 +257,9 @@ def _test_construct_graphs_homo():
def assert_data(lhs, rhs):
for key, value in lhs.items():
assert key in rhs
assert F.array_equal(F.tensor(value), rhs[key])
assert F.dtype(rhs[key]) != F.float64
assert F.array_equal(
F.tensor(value, dtype=F.dtype(rhs[key])), rhs[key])
assert_data(ndata, g.ndata)
assert_data(edata, g.edata)

Expand Down Expand Up @@ -314,7 +316,9 @@ def _test_construct_graphs_hetero():
def assert_data(lhs, rhs):
for key, value in lhs.items():
assert key in rhs
assert F.array_equal(F.tensor(value), rhs[key])
assert F.dtype(rhs[key]) != F.float64
assert F.array_equal(
F.tensor(value, dtype=F.dtype(rhs[key])), rhs[key])
for ntype in g.ntypes:
assert g.num_nodes(ntype) == num_nodes
assert_data(ndata_dict[ntype], g.nodes[ntype].data)
Expand Down Expand Up @@ -364,7 +368,8 @@ def _test_construct_graphs_multiple():
assert len(graphs) == num_graphs
assert len(data_dict) == len(gdata)
for k, v in data_dict.items():
assert F.array_equal(F.tensor(gdata[k]), v)
assert F.dtype(v) != F.float64
assert F.array_equal(F.tensor(gdata[k], dtype=F.dtype(v)), v)
for i, g in enumerate(graphs):
assert g.is_homogeneous
assert g.num_nodes() == num_nodes
Expand All @@ -377,7 +382,9 @@ def assert_data(lhs, rhs, size, node=False):
if node:
indices = u_indices[i*size:(i+1)*size]
value = value[indices]
assert F.array_equal(F.tensor(value), rhs[key])
assert F.dtype(rhs[key]) != F.float64
assert F.array_equal(
F.tensor(value, dtype=F.dtype(rhs[key])), rhs[key])
assert_data(ndata, g.ndata, num_nodes, node=True)
assert_data(edata, g.edata, num_edges)

Expand Down Expand Up @@ -798,13 +805,13 @@ def _test_CSVDataset_single():
assert csv_dataset.has_cache()
for ntype in g.ntypes:
assert g.num_nodes(ntype) == num_nodes
assert F.array_equal(F.tensor(feat_ndata),
assert F.array_equal(F.tensor(feat_ndata, dtype=F.float32),
g.nodes[ntype].data['feat'])
assert np.array_equal(label_ndata,
F.asnumpy(g.nodes[ntype].data['label']))
for etype in g.etypes:
assert g.num_edges(etype) == num_edges
assert F.array_equal(F.tensor(feat_edata),
assert F.array_equal(F.tensor(feat_edata, dtype=F.float32),
g.edges[etype].data['feat'])
assert np.array_equal(label_edata,
F.asnumpy(g.edges[etype].data['label']))
Expand Down Expand Up @@ -880,21 +887,21 @@ def _test_CSVDataset_multiple():
assert len(csv_dataset.data) == 2
assert 'feat' in csv_dataset.data
assert 'label' in csv_dataset.data
assert F.array_equal(F.tensor(feat_gdata),
assert F.array_equal(F.tensor(feat_gdata, dtype=F.float32),
csv_dataset.data['feat'])
for i, (g, g_data) in enumerate(csv_dataset):
assert not g.is_homogeneous
assert F.asnumpy(g_data['label']) == label_gdata[i]
assert F.array_equal(g_data['feat'], F.tensor(feat_gdata[i]))
assert F.array_equal(g_data['feat'], F.tensor(feat_gdata[i], dtype=F.float32))
for ntype in g.ntypes:
assert g.num_nodes(ntype) == num_nodes
assert F.array_equal(F.tensor(feat_ndata[i*num_nodes:(i+1)*num_nodes]),
assert F.array_equal(F.tensor(feat_ndata[i*num_nodes:(i+1)*num_nodes], dtype=F.float32),
g.nodes[ntype].data['feat'])
assert np.array_equal(label_ndata[i*num_nodes:(i+1)*num_nodes],
F.asnumpy(g.nodes[ntype].data['label']))
for etype in g.etypes:
assert g.num_edges(etype) == num_edges
assert F.array_equal(F.tensor(feat_edata[i*num_edges:(i+1)*num_edges]),
assert F.array_equal(F.tensor(feat_edata[i*num_edges:(i+1)*num_edges], dtype=F.float32),
g.edges[etype].data['feat'])
assert np.array_equal(label_edata[i*num_edges:(i+1)*num_edges],
F.asnumpy(g.edges[etype].data['label']))
Expand Down

0 comments on commit 5558ce2

Please sign in to comment.