forked from asrivast28/bn-utils
-
Notifications
You must be signed in to change notification settings - Fork 0
/
HDF5DataReader.hpp
154 lines (140 loc) · 5.68 KB
/
HDF5DataReader.hpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
/**
* @file HDF5DataReader.hpp
* @brief Declaration of the functions for reading files.
* @author Ankit Srivastava <[email protected]>
*
* Copyright 2020 Georgia Institute of Technology
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef HDF5_DATAREADER_HPP_
#define HDF5_DATAREADER_HPP_
#include <string>
#include <vector>
#include "DataReader.hpp"
#include "HDF5Utils.hpp"
#include <hdf5.h>
/**
* @brief Class that reads a file with observations in HDF5 file.
*
* @tparam DataType The type of the data to be read.
*/
template <typename DataType>
class HDF5ObservationReader : public DataReader<DataType> {
std::vector<std::string> m_obsNames;
public:
HDF5ObservationReader(const std::string&, const uint32_t, const uint32_t,
const std::string&, const std::string&,
const std::string&, const std::string&,
const bool = false);
}; // class RowObservationReader
template <typename DataType>
/**
* @brief Constructor that reads data from the file.
*
* @param fileName The name of the file to be read.
* @param numRows Total number of rows (variables) in the file.
* @param numCols Total number of columns (observations) in the file.
* @param path Path in HDF5 file within which all data is stored
* @param matrixPath Path in HDF5 file within which the matrix is stored
* @param obsPath Path in HDF5 file within which the observation names are
* stored
* @param varsPath Path in HDF5 file within which the variables names are stored
* @param parallelRead If the data should be read in parallel.
*/
HDF5ObservationReader<DataType>::HDF5ObservationReader(
const std::string& fileName, const uint32_t numRows, const uint32_t numCols,
const std::string& path, const std::string& matrixPath,
const std::string& obsPath, const std::string& varsPath,
const bool parallelRead)
: DataReader<DataType>(numRows, false, true) {
using HDF5Ifx = HDF5Utils<DataType>;
mxx::comm comm;
//
// ssize_t stride_bytes =
// HDF5Ifx::get_aligned_size(numCols * sizeof(DataType), 0);
if (parallelRead && comm.size() > 1) {
// read the names.
MPI_Info info = MPI_INFO_NULL;
hid_t plist_id = H5Pcreate(H5P_FILE_ACCESS);
H5Pset_fapl_mpio(plist_id, comm, info);
hid_t file_id = H5Fopen(fileName.c_str(), H5F_ACC_RDONLY, plist_id);
if (file_id < 0) {
std::cerr << "ERROR: failed to open PHDF5 file " << fileName
<< std::endl;
H5Pclose(plist_id);
throw std::runtime_error("Failed to Open File : " + fileName);
}
hid_t group_id;
auto status = H5Lexists(file_id, path.c_str(), H5P_DEFAULT);
if (status > 0) {
group_id = H5Gopen(file_id, path.c_str(), H5P_DEFAULT);
} else {
std::cerr << "WARN: unable to get group " << path << " in file "
<< fileName << std::endl;
H5Fclose(file_id);
H5Pclose(plist_id);
throw std::runtime_error("WARN: unable to get group " + path +
" in file " + fileName);
}
try {
this->m_varNames =
HDF5Ifx::read_strings(group_id, varsPath).value();
m_obsNames = HDF5Ifx::read_strings(group_id, obsPath).value();
} catch (const std::bad_optional_access& e) {
throw std::runtime_error(
" Failed to load Variable and observation names from " +
varsPath + " and " + obsPath);
}
// read the data.
HDF5Ifx::read_matrix(group_id, matrixPath, numRows, numCols, comm,
this->m_data.data());
H5Gclose(group_id);
H5Fclose(file_id);
H5Pclose(plist_id);
} else {
this->m_data.resize(numRows * numCols);
// open the file for reading only.
hid_t file_id = H5Fopen(fileName.c_str(), H5F_ACC_RDONLY, H5P_DEFAULT);
if (file_id < 0) {
throw std::runtime_error("Failed to Open File : " + fileName);
}
hid_t group_id;
auto status = H5Lexists(file_id, path.c_str(), H5P_DEFAULT);
if (status > 0) {
group_id = H5Gopen(file_id, path.c_str(), H5P_DEFAULT);
} else {
std::cout << "WARN: unable to get group " << path << " in file "
<< fileName << std::endl;
H5Fclose(file_id);
throw std::runtime_error("WARN: unable to get group " + path +
" in file " + fileName);
}
// read the names.
try {
this->m_varNames =
HDF5Ifx::read_strings(group_id, varsPath).value();
m_obsNames = HDF5Ifx::read_strings(group_id, obsPath).value();
} catch (const std::bad_optional_access& e) {
throw std::runtime_error(
" Failed to load Variable and observation names from " +
varsPath + " and " + obsPath);
}
// read the data.
HDF5Ifx::read_matrix(group_id, matrixPath, numRows, numCols,
this->m_data.data());
H5Gclose(group_id);
H5Fclose(file_id);
}
}
#endif