forked from sparticlesteve/cosmoflow-benchmark
-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathdata_benchmark.py
71 lines (57 loc) · 2.13 KB
/
data_benchmark.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
"""Data loading benchmark code for cosmoflow-benchmark
This script can be used to test just the data-loading part of the CosmoFlow
application to understand I/O performance.
"""
# System imports
import argparse
import time
import pprint
from types import SimpleNamespace
# External imports
import tensorflow as tf
# Local imports
from data import get_datasets
def parse_args():
"""Parse command line arguments"""
parser = argparse.ArgumentParser()
add_arg = parser.add_argument
add_arg('--data-dir', default='/global/cscratch1/sd/sfarrell/cosmoflow-benchmark/data/cosmoUniverse_2019_05_4parE_tf')
add_arg('--n-samples', type=int, default=512)
add_arg('--batch-size', type=int, default=4)
add_arg('--n-epochs', type=int, default=1)
add_arg('--inter-threads', type=int, default=2)
add_arg('--intra-threads', type=int, default=32)
return parser.parse_args()
def main():
# Parse command line arguments
args = parse_args()
# Session setup
tf.compat.v1.enable_eager_execution(
config=tf.compat.v1.ConfigProto(
inter_op_parallelism_threads=args.inter_threads,
intra_op_parallelism_threads=args.intra_threads))
# Not running distributed
dist = SimpleNamespace(rank=0, size=1, local_rank=0, local_size=1)
# Load the dataset
data = get_datasets(name='cosmo',
data_dir=args.data_dir,
sample_shape=[128, 128, 128, 4],
n_train=args.n_samples,
n_valid=0,
batch_size=args.batch_size,
n_epochs=args.n_epochs,
apply_log=True,
shard=False,
dist=dist)
pprint.pprint(data)
start_time = time.perf_counter()
for x, y in data['train_dataset']:
# Perform a simple operation
tf.math.reduce_sum(x)
tf.math.reduce_sum(y)
duration = time.perf_counter() - start_time
print('Total time: %.4f s' % duration)
print('Throughput: %.4f samples/s' % (args.n_samples / duration))
print('All done!')
if __name__ == '__main__':
main()