-
Notifications
You must be signed in to change notification settings - Fork 2
/
readDMV.py
431 lines (386 loc) · 21.7 KB
/
readDMV.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
def readDMV(filename):
"""
Reader for SSEC DMV binary files using "pure Python". This function
is currently capable of reading RNC, RFC, RLC, and CXS files, but not SUM files.
The function returns an xarray Dataset that contains various data
variables and metadata for those variables.
To use this function, try the steps below:
For RNC file
from readDMV import readDMV
readDMV('160602C1.RNC')
For RFC file
from readDMV import readDMV
readDMV('160602C1.RFC')
For RLC file
from readDMV import readDMV
readDMV('160602C1.RLC')
For CXS file
from readDMV import readDMV
readDMV('160602F1.CXS')
For CXV file
from readDMV import readDMV
readDMV('160602F1.CXV')
For SUM file
from readDMV import readDMV
readDMV('160602.SUM')
Written by:
Von P. Walden
Washington State University
7 August 2018
Updates:
15 August 2019 - Updated documentation within this function.
8 January 2020- Major update (v2.0):
1) create new functions,
2) added support for CXV and SUM files.
1 August 2021 - Added support for RFC files.
"""
import numpy as np
import pandas as pd
import xarray as xr
from collections import OrderedDict
from ohwhio import getDMVformat
# Suppress performance warnings from Pandas, vpw 231112
import warnings
warnings.filterwarnings('ignore')
def readTOC(sizeTOC):
dependentVariables = OrderedDict({})
dependentVariableRecords = OrderedDict({})
if (sizeTOC == 40): # RNC, RFC, RLC, ...
# dependent data information for single-variable file.
sizeDependentRecord = np.fromfile(f, np.int32, 1)[0]
formatDependentRecord = np.fromfile(f, np.int32, 1)[0]
scalingFactorLog = np.fromfile(f, np.int32, 1)[0]
dependentPrecisionLog = np.fromfile(f, np.int32, 1)[0]
# independent data information
independentMinimum = np.fromfile(f, np.float64, 1)[0]
independentMaximum = np.fromfile(f, np.float64, 1)[0]
independentPrecisionLog = np.fromfile(f, np.int32, 1)[0]
# number of attributes for next section.
numberOfDependentAttributes = np.fromfile(f, np.int32, 1)[0]
numberOfDependentVariables = 1
# Now read the attributes for the single variable.
# Variable name
nbytes = np.fromfile(f, np.int32, 1)[0]
variableName = f.read(nbytes).decode('utf-8')
# Short name
nbytes = np.fromfile(f, np.int32, 1)[0]
shortname = f.read(nbytes).decode('utf-8')
# Short name
nbytes = np.fromfile(f, np.int32, 1)[0]
longname = f.read(nbytes).decode('utf-8')
# Units
nbytes = np.fromfile(f, np.int32, 1)[0]
units = f.read(nbytes).decode('utf-8')
# Precision
precision = "{:.0E}".format(10 ** dependentPrecisionLog)
# Now add this to the data variable dictionary.
dependentVariables.update({variableName: OrderedDict([('longname', longname),
('units', units),
('precision', precision)])})
dependentVariableRecords.update({variableName: OrderedDict([('sizeDependentRecord', sizeDependentRecord),
('formatDependentRecord', formatDependentRecord),
('scalingFactorLog', scalingFactorLog),
('dependentPrecisionLog', dependentPrecisionLog),
('identifier', identifier),
('independentMinimum', independentMinimum),
('independentMaximum', independentMaximum),
('numberOfDependentAttributes', numberOfDependentAttributes),
('numberOfDependentVariables', numberOfDependentVariables)])})
elif (sizeTOC == 48): # CXS, CSV, CVS, UVS, SUM, ...
Continuation = -1 # Non-zero to start loop.
while (Continuation):
# dependent data information
sizeDependentRecord = np.fromfile(f, np.int32, 1)[0]
formatDependentRecord = np.fromfile(f, np.int32, 1)[0]
scalingFactorLog = np.fromfile(f, np.int32, 1)[0]
dependentPrecisionLog = np.fromfile(f, np.int32, 1)[0]
# independent data information
independentMinimum = np.fromfile(f, np.float64, 1)[0]
independentMaximum = np.fromfile(f, np.float64, 1)[0]
independentPrecisionLog = np.fromfile(f, np.int32, 1)[0]
# additional data to support multiple variables
identifier = np.fromfile(f, np.int32, 1)[0]
Continuation = np.fromfile(f, np.int32, 1)[0]
# number of attributes for next section.
numberOfDependentAttributes = np.fromfile(f, np.int32, 1)[0]
numberOfDependentVariables = identifier + Continuation
# Now read the attributes for the single variable.
# Variable name
nbytes = np.fromfile(f, np.int32, 1)[0]
variableName = f.read(nbytes).decode('utf-8')
# Short name
nbytes = np.fromfile(f, np.int32, 1)[0]
shortname = f.read(nbytes).decode('utf-8')
# Short name
nbytes = np.fromfile(f, np.int32, 1)[0]
longname = f.read(nbytes).decode('utf-8')
# Units
nbytes = np.fromfile(f, np.int32, 1)[0]
units = f.read(nbytes).decode('utf-8')
# Precision
precision = "{:.0E}".format(10 ** dependentPrecisionLog)
# Now add this to the data variable dictionary.
dependentVariables.update({variableName: OrderedDict([('longname', longname),
('units', units),
('precision', precision)])})
dependentVariableRecords.update({variableName: OrderedDict([('sizeDependentRecord', sizeDependentRecord),
('formatDependentRecord', formatDependentRecord),
('scalingFactorLog', scalingFactorLog),
('dependentPrecisionLog', dependentPrecisionLog),
('identifier', identifier),
('independentMinimum', independentMinimum),
('independentMaximum', independentMaximum),
('numberOfDependentAttributes', numberOfDependentAttributes),
(
'numberOfDependentVariables', numberOfDependentVariables)])})
else:
print('Erroneous size of Table of Contents!! Something is strange with your DMV file!!')
return (sizeTOC)
return dependentVariables, dependentVariableRecords
def DMVfileStructure(filename):
'''Determines the structure for DMV files.
Input:
filename - DMV file name
Output:
recordSize - size of data records in bytes for each measurement in time
variableOffset - offset (in floats) to where the variables start
dataOffset - offset (in float values) to where data starts
Notes:
Determine number of data records for each time step.
factor of 5 is the number of measurements: BB1-BB2-scene-BB2-BB1
numberOfDependentVariableBytes is the cumulative number of bytes for all dependent variables
factor of 4 is the number of bytes in each number.
'''
ext = filename.split('.')[-1]
# Determine the cumulative number of bytes in the dependent variables.
numberOfDependentVariableBytes = np.array([dependentVariableRecords[v]['sizeDependentRecord'] for v in dependentVariableRecords]).sum()
# Determine the record size, variable offset and data offset based on file type.
# ....RNC ######################################################################################################
if ((ext == 'RNC') | (ext == 'rnc')):
channel = filename.split('.')[0][-1]
if channel == '1':
nvars = 79
else:
nvars = 71
nvarsExtra1 = 14
nvarsExtra2 = 22
recordSize = ((nvars * 5) + nvarsExtra1 + (nvars * 5) + nvarsExtra2) * 4 + numberOfDependentVariableBytes
variableOffset = (nvars * 4) + (nvars + nvarsExtra1) + (nvars * 4)
dataOffset = [(nvars * 4) + (nvars + nvarsExtra1) + (nvars * 4) + (nvars + nvarsExtra2)]
# ....RFC and RLC ######################################################################################################
elif ((ext == 'RLC') | (ext == 'rlc') | (ext == 'RFC') | (ext == 'rfc')):
channel = filename.split('.')[0][-1]
typ = filename.split('.')[0][-2:-1]
if (typ == 'B'):
scanDirection = 'Backward'
elif(typ == 'F'):
scanDirection = 'Forward'
else:
scanDirection = 'Both' # C1 or C2
if ((scanDirection=='Backward') | (scanDirection=='Forward')): # Backward and Forward
if channel == '1':
nvars = 79
else:
nvars = 71
nvarsExtra = 14
recordSize = (nvars * 4)*4 + (nvars + nvarsExtra)*4 + numberOfDependentVariableBytes
variableOffset = nvars * 4
dataOffset = [(nvars * 5) + nvarsExtra]
else: # Both (C1 or C2)
if channel == '1':
nvars = 79
else:
nvars = 71
nvarsExtra1 = 14
nvarsExtra2 = 15
recordSize = ((nvars * 4) + (nvars + nvarsExtra1) + (nvars * 4) + (nvars + nvarsExtra2)) * 4 + numberOfDependentVariableBytes
variableOffset = (nvars * 4) + (nvars + nvarsExtra1) + (nvars * 4)
dataOffset = [(nvars * 4) + (nvars + nvarsExtra1) + (nvars * 4) + (nvars + nvarsExtra2)]
# ....CXS ######################################################################################################
elif ((ext == 'CXS') | (ext == 'cxs')):
nvars = 71
nvarsExtra1 = 0
nvarsExtra2 = 0
channel = filename.split('.')[0][-1]
typ = filename.split('.')[0][-2:-1]
if (typ == 'B'):
scanDirection = 'Backward'
else:
scanDirection = 'Forward'
# Special case for Channel 1, Forward direction, which contains 104 extra variables of 28 bytes each.
if ((channel == '1') & (scanDirection == 'Forward')):
extraBytes = np.array([dependentVariableRecords[v]['sizeDependentRecord'] for v in dependentVariableRecords])[2:].sum()
# Now drop all of the extra dependent variables except the real and imag spectra.
vs = [variable for variable in dependentVariables]
for v in vs[2:]:
dependentVariables.pop(v);
dependentVariableRecords.pop(v);
numberOfDependentVariableBytes = numberOfDependentVariableBytes - extraBytes
else:
extraBytes = 0
# print(numberOfDependentVariableBytes, extraBytes)
recordSize = (nvars * 4) + numberOfDependentVariableBytes + extraBytes
variableOffset = 0
dataOffset = [nvars]
for v in dependentVariableRecords:
dataOffset.append(dataOffset[-1] + int(dependentVariableRecords[v]['sizeDependentRecord']/4))
dataOffset.pop();
# ....CXV ######################################################################################################
elif ((ext == 'CXV') | (ext == 'cxv')):
nvars = 79
nvarsExtra1 = 0
nvarsExtra2 = 0
channel = filename.split('.')[0][-1]
typ = filename.split('.')[0][-2:-1]
if (typ == 'B'):
scanDirection = 'Backward'
else:
scanDirection = 'Forward'
# Special case for Channel 1, Forward direction, which contains 104 extra variables of 28 bytes each.
if ((channel == '1') & (scanDirection == 'Forward')):
extraBytes = np.array([dependentVariableRecords[v]['sizeDependentRecord'] for v in dependentVariableRecords])[2:].sum()
# Now drop all of the extra dependent variables except the real and imag spectra.
vs = [variable for variable in dependentVariables]
for v in vs[2:]:
dependentVariables.pop(v);
dependentVariableRecords.pop(v);
numberOfDependentVariableBytes = numberOfDependentVariableBytes - extraBytes
else:
extraBytes = 0
# print(numberOfDependentVariableBytes, extraBytes)
recordSize = (nvars * 4) + numberOfDependentVariableBytes + extraBytes
variableOffset = 0
dataOffset = [nvars]
for v in dependentVariableRecords:
dataOffset.append(dataOffset[-1] + int(dependentVariableRecords[v]['sizeDependentRecord']/4))
dataOffset.pop();
# ....SUM ######################################################################################################
elif ((ext == 'SUM') | (ext == 'sum')):
# Handles a special case where the format of the SUM files changed
# probably because AERI.xml was changed during ICECAPS.
yy = filename.split('.')[-2][-6:-4]
if int(yy)>96:
yymmdd = '19' + filename.split('.')[-2][-6:]
else:
yymmdd = '20' + filename.split('.')[-2][-6:]
if pd.to_datetime(yymmdd) < pd.to_datetime('20110707'):
recordSize = 9776
else:
recordSize = 9744
nvars = 144
variableOffset = 1479
dataOffset = [variableOffset + nvars]
for v in dependentVariableRecords:
dataOffset.append(dataOffset[-1] + int(dependentVariableRecords[v]['sizeDependentRecord']/4))
dataOffset.pop();
else:
print('ERROR: Incorrect file type. Try again...')
return {}
numberOfRecords = int((eof - headerSize + 1) / recordSize)
numberOfValues = int(recordSize / 4)
return {'recordSize': recordSize,
'variableOffset': variableOffset,
'dataOffset': dataOffset,
'numberOfRecords': numberOfRecords,
'numberOfValues': numberOfValues,
'numberOfVariables': nvars
}
def determineWavenumberScales(filename):
ext = filename.split('.')[-1]
vs = [variable for variable in dependentVariableRecords]
if ((ext == 'RNC') | (ext == 'rnc') | (ext == 'RFC') | (ext == 'rfc') | (ext == 'RLC') | (ext == 'rlc') | (ext == 'CXS') | (ext == 'cxs') | (ext == 'CXV') | (ext == 'cxv')):
v = vs[0]
bwn = dependentVariableRecords[v]['independentMinimum']
ewn = dependentVariableRecords[v]['independentMaximum']
nwn = int(dependentVariableRecords[v]['sizeDependentRecord'] / 4)
wnum1 = np.linspace(bwn, ewn, nwn, dtype=np.float64)
# Add the wavenumber scale as a variable to the xarray dataset.
ds[wavenumberScales[v]] = wnum1.astype(np.float64)
ds[wavenumberScales[v]].attrs['longname'] = 'Wavenumber in reciprocal centimeters'
ds[wavenumberScales[v]].attrs['units'] = 'centimeter^-1'
ds[wavenumberScales[v]].attrs['precision'] = '1E-4'
ds[wavenumberScales[v]].attrs['range_of_values'] = '[ ' + str(bwn) + ', ' + str(ewn) + ' ]'
elif((ext == 'SUM') | (ext == 'sum')):
for v in ['ResponsivitySpectralAveragesCh1', 'ResponsivitySpectralAveragesCh2', 'SkyVariabilityAveragesCh1', 'SkyVariabilityAveragesCh2', 'SkyRadianceSpectralAveragesCh1', 'SkyRadianceSpectralAveragesCh2']:
bwn = dependentVariableRecords[v]['independentMinimum']
ewn = dependentVariableRecords[v]['independentMaximum']
nwn = int(dependentVariableRecords[v]['sizeDependentRecord'] / 4)
wnum1 = np.linspace(bwn, ewn, nwn, dtype=np.float64)
# Add the wavenumber scale as a variable to the xarray dataset.
ds[wavenumberScales[v]] = wnum1.astype(np.float64)
ds[wavenumberScales[v]].attrs['longname'] = 'Wavenumber in reciprocal centimeters'
ds[wavenumberScales[v]].attrs['units'] = 'centimeter^-1'
ds[wavenumberScales[v]].attrs['precision'] = '1E-4'
ds[wavenumberScales[v]].attrs['range_of_values'] = '[ ' + str(bwn) + ', ' + str(ewn) + ' ]'
else:
print('ERROR: Incorrect file type. Try again...')
return {}
return
# Opens the DMV file.
f = open(filename, 'rb')
# Determine the file size by searching for the end-of-file; eof.
eof = f.seek(-1, 2) # go to the file end and record byte value
# Determine header size, then skip to beginning of data records.
f.seek(0)
# Read the header.
headerSize = int(f.readline().decode('utf-8'))
f.seek(0)
FileHistory = f.read(headerSize).decode('utf-8')
# Decode dependent variables that are associated with the data in the particular file.
ID = f.read(12).decode('utf-8')
# f.seek(12,1) # Skip the 12-byte identifier, "SSECRGD ".
sizeTOC = np.fromfile(f, np.int32, 1)[0]
dependentVariables, dependentVariableRecords = readTOC(sizeTOC)
# Determine independent variables.
variables, wavenumberScales = getDMVformat(filename)
variables.update(dependentVariables) # Append dependent variables to list of variables
# Read the next 4 bytes; not sure what these bytes are, but they aren't part of the data records.
nbytes = np.fromfile(f, np.int32, 1)[0]
np.fromfile(f, np.int32, nbytes) # Skip these bytes until I figure out what they represent...
# Read data in as a float32 array; all RNC variables are float32.
arr = np.fromfile(f, np.float32)
f.close()
# Determine file structure.
fileStructure = DMVfileStructure(filename)
# Decode the base_time from the filename.
base_time = pd.to_datetime('20' + filename.split('/')[-1][0:2] + '-' + filename.split('/')[-1][2:4] + '-' + filename.split('/')[-1][4:6])
Time = arr[fileStructure['variableOffset']::fileStructure['numberOfValues']]
# Create a Pandas dataframe for all independent variables.
df = pd.DataFrame({}, index=base_time + pd.to_timedelta(Time, unit='h'))
df.index.name = 'time'
for offset, variable in enumerate(variables):
if (offset >= fileStructure['numberOfVariables']): break
df[variable] = arr[fileStructure['variableOffset'] + offset::fileStructure['numberOfValues']]
# Creates an xarray dataset from the Pandas dataframe.
ds = xr.Dataset().from_dataframe(df)
# Determines the wavenumbers scales and adds them to the xarray dataset.
determineWavenumberScales(filename)
# Add data for dependent variables.
for variable, offset in zip(dependentVariables, fileStructure['dataOffset']):
ds[variable] = xr.DataArray(np.array(
[arr[int((record * fileStructure['recordSize'] / 4) + offset):int((record * fileStructure['recordSize'] / 4) + offset + len(ds[wavenumberScales[variable]]))] for record in range(fileStructure['numberOfRecords'])]),
coords=[df.index, ds[wavenumberScales[variable]].data],
dims=['time', wavenumberScales[variable]])
# Global attributes
ds['FileHistory'] = FileHistory
# base_time
ds['base_time'] = np.int32(
(base_time - pd.to_datetime('1970-01-01') + pd.Timedelta(Time[0], unit='h')).total_seconds())
ds['base_time'].attrs['longname'] = 'Base time in Epoch'
ds['base_time'].attrs['date'] = df.index[0].strftime('%Y-%m-%d,%H:%M:%S GMT')
# date
ds['date'] = np.int32(filename.split('/')[-1][0:6])
# time_offset
ds['time_offset'] = np.array(
[(pd.Timedelta(time, unit='h') - pd.Timedelta(Time[0], unit='h')).total_seconds() for time in Time])
ds['time_offset'].attrs['longname'] = 'Time offset from base_time'
# Adds attributes for each independent variable.
for offset, variable in enumerate(variables):
if (offset >= fileStructure['numberOfVariables']): break
for attribute in variables[variable]:
ds[variable].attrs[attribute] = variables[variable][attribute]
# Adds attributes for each dependent variable.
for variable in dependentVariables:
for attribute in variables[variable]:
ds[variable].attrs[attribute] = variables[variable][attribute]
return ds