This repository has been archived by the owner on Mar 4, 2018. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathinterpolate_and_create_new_pytable.py
executable file
·227 lines (177 loc) · 10.1 KB
/
interpolate_and_create_new_pytable.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
from sys import exit
from datetime import datetime
from tables import openFile, IsDescription, Float64Col
from scipy import array
import numpy as np
import matplotlib.pyplot as plt
import question
from query_yes_no import query_yes_no
from variable_limits import low_limit, high_limit
def interpolate(var1, var2):
""" Interpolate to make two variable lists equal length
var = ['variable', 'filename', 'station_ID', 'kind']
"""
print ''
print 'Interpolating and creating new table...'
print ''
# declare a class
class Variable1(IsDescription):
variable1 = Float64Col()
variable2 = Float64Col()
intermediate1 = var1[0][1].replace('data_s' + str(var1[0][2]) + '_', '')
intermediate2 = intermediate1.partition(' -')
start_date = intermediate2[0]
intermediate3 = intermediate2[2][1:]
end_date = intermediate3.replace('.h5','')
filename = 'interpolated_table_' + str(var1[0][0]) + '_station' + str(var1[0][2]) + '_with_' + str(var2[0][0]) + '_station' + str(var2[0][2]) + '_' + start_date + '_' + end_date + '.h5'
"""
intermediate1 = var1[0][1].replace('data_s' + str(var1[0][2]) + '_', '')
station_id_and_date_interval1 = intermediate1.replace('.h5', '')
intermediate2 = var1[-1][1].replace('data_', '')
station_id_and_date_interval2 = intermediate2.replace('.h5', '')
filename = 'cor_' + str(var1[0][0]) + '_' + str(var1[0][2]) + '_' + str(var2[0][0]) + '_' + str(var2[0][2]) + '_' + station_id_and_date_interval1 + '_' + station_id_and_date_interval2 + '.h5'
"""
# make new table
data_cor = openFile(filename, 'w')
group_variable1 = data_cor.createGroup("/", 'correlation')
table_variable1 = data_cor.createTable(group_variable1, 'table', Variable1)
# Insert a new particle record
particle = table_variable1.row
for i in range(len(var1)):
# open data file 1
data_var1 = openFile(str(var1[i][1]), 'r')
# fetch timestamps and variable 1 from station 1
timestamps_station1 = eval("data_var1.root.s%s.%s.col('timestamp')" % (str(var1[i][2]), str(var1[i][3])))
var1_station1 = eval("data_var1.root.s%s.%s.col('%s')" % (str(var1[i][2]), str(var1[i][3]), str(var1[i][0])))
data_var1.close()
if len(var1_station1.shape) != 1:
print 'There are %d plates with an individual %s value.' % (var1_station1.shape[1], str(var1[i][0]))
plate_number1 = int(question.digit_plate("Enter the plate number that you want to you use in your correlation analysis ( e.g. '1' ): ", var1_station1.shape[1]))
var1_station1 = var1_station1[:, plate_number1 - 1]
var1_station1.tolist()
elif len(var1_station1.shape) == 1:
var1_station1 = var1_station1.tolist()
else:
print 'weird'
# zip the hisparc timestamps together with the event_rates,
# sort them on the basis of timestamp value
variable1_sorted = sorted(zip(timestamps_station1, var1_station1))
del timestamps_station1, var1_station1
if var1[i][0] in low_limit:
var_list_without_bad_data = []
for t1, v1 in variable1_sorted:
if low_limit[var1[i][0]] <= v1 <= high_limit[var1[i][0]]:
var_list_without_bad_data.append((t1, v1))
if len(variable1_sorted) != len(var_list_without_bad_data):
print 'Removed %d rows of bad %s data.' % (len(variable1_sorted) - len(var_list_without_bad_data), var1[i][0])
if len(var_list_without_bad_data) == 0:
print 'Exit. In your data file there is no valid %s data' % (var1[i][0])
exit()
variable1_sorted = var_list_without_bad_data
del var_list_without_bad_data
length_var1 = len(variable1_sorted)
# open data file 2
data_var2 = openFile(str(var2[i][1]), 'r')
#fetch timestamps and variable2 from station 2
timestamps_station2 = eval("data_var2.root.s%s.%s.col('timestamp')" % (str(var2[i][2]), str(var2[i][3])))
var2_station2 = eval("data_var2.root.s%s.%s.col('%s')" % (str(var2[i][2]), str(var2[i][3]), str(var2[i][0])))
data_var2.close()
if len(var2_station2.shape) != 1:
print 'There are %d plates with an individual %s value.' % (var2_station2.shape[1], str(var2[i][0]))
plate_number1 = int(question.digit_plate("Enter the plate number that you want to you use in your correlation analysis ( e.g. '1' ): ", var2_station2.shape[1]))
var2_station2 = var2_station2[:, plate_number1 - 1]
var2_station2.tolist()
elif len(var2_station2.shape) == 1:
var2_station2 = var2_station2.tolist()
else:
print 'weird'
# zip the hisparc time stamps together with the event_rates,
# sort them on the basis of timestamp value
variable2_sorted = sorted(zip(timestamps_station2, var2_station2))
del timestamps_station2, var2_station2
if var2[i][0] in low_limit:
var_list_without_bad_data2 = []
for t2,v2 in variable2_sorted:
if v2 >= low_limit[var2[i][0]] and v2 <= high_limit[var2[i][0]]:
var_list_without_bad_data2.append((t2, v2))
if len(variable2_sorted) != len(var_list_without_bad_data2):
print 'Removed %d rows of bad %s data.' % (len(variable2_sorted) - len(var_list_without_bad_data2), var2[i][0])
if len(var_list_without_bad_data2) == 0:
print 'Exit. In your data file there is no valid %s data' % (var2[i][0])
exit()
variable2_sorted = var_list_without_bad_data2
del var_list_without_bad_data2
length_var2 = len(variable2_sorted)
if length_var1 != length_var2:
#print 'variable2_sorted[:10]', variable2_sorted[:10]
#print 'variable1_sorted[:10]', variable1_sorted[:10]
# Apply linear interpolation
if length_var1 > length_var2:
x, variable1 = zip(*variable1_sorted)
xp, fp = zip(*variable2_sorted)
result = np.interp(x, xp, fp)
variable2 = result
del variable1_sorted, variable2_sorted, result, x, xp, fp
for i in range(len(variable1)):
particle['variable1'] = variable1[i]
particle['variable2'] = variable2[i]
particle.append()
del variable1, variable2
table_variable1.flush()
elif length_var1 < length_var2:
xp, fp = zip(*variable1_sorted)
x, variable2 = zip(*variable2_sorted)
result = np.interp(x, xp, fp)
variable1 = result
del variable1_sorted, variable2_sorted, result, x, xp, fp
for i in range(len(variable1)):
particle['variable1'] = variable1[i]
particle['variable2'] = variable2[i]
particle.append()
del variable1, variable2
table_variable1.flush()
else:
print ''
print 'No interpolation necessary'
print ''
timestamps_station1, var1_station1 = zip(*variable1_sorted)
timestamps_station2, var1_station2 = zip(*variable2_sorted)
combo_two_vars = zip(timestamps_station1, var1_station1, timestamps_station2, var1_station2)
combo_new = []
for combo in combo_two_vars:
if combo[0] == combo[2]:
combo_new.append([combo[1], combo[3]])
var1, var2 = zip(*combo_new)
for i in range(len(var1)):
particle['variable1'] = var1[i]
particle['variable2'] = var2[i]
particle.append()
table_variable1.flush()
data_cor.close()
return filename
"""
import tables
data = tables.openFile('data_s501_2011,7,21 - 2011,7,22.h5','r')
colnames_shower = data.root.s501.events.colnames
index = colnames_shower.index('baseline')
colnames_shower = colnames_shower[index+1:]
colnames_weather = data.root.s501.weather.colnames
data.close()
for var_shower in colnames_shower:
for var_weather in colnames_weather:
print ''
print var_shower, var_weather
var1 = [('%s' % (var_shower), 'data_s501_2011,7,21 - 2011,7,22.h5', '501', 'events')]
var2 = [('%s' % (var_weather), 'data_s501_2011,7,21 - 2011,7,22.h5', '501', 'weather')]
interpolate(var1,var2)
#var1 = [('baseline', 'data_s501_2011,7,21 - 2011,7,22.h5', '501', 'events')]
#var2 = [('event_rate', 'data_s501_2011,7,21 - 2011,7,22.h5', '501', 'events')]
#var1 = [('event_rate', 'data_s501_2011,12,1 - 2011,12,23.h5', '501', 'events')]
#var2 = [('humidity_outside', 'data_s501_2011,12,1 - 2011,12,23.h5', '501', 'weather')]
#var1 = [('event_rate', 'data_s501_2011,5,23 - 2011,6,1.h5', '501', 'events'), ('event_rate', 'data_s501_2011,6,1 - 2011,6,30.h5', '501', 'events'), ('event_rate', 'data_s501_2011,7,1 - 2011,7,31.h5', '501', 'events'), ('event_rate', 'data_s501_2011,8,1 - 2011,8,31.h5', '501', 'events'), ('event_rate', 'data_s501_2011,9,1 - 2011,9,30.h5', '501', 'events'), ('event_rate', 'data_s501_2011,10,1 - 2011,10,15.h5', '501', 'events')]
#var2 = [('barometer', 'data_s501_2011,5,23 - 2011,6,1.h5', '501', 'weather'), ('barometer', 'data_s501_2011,6,1 - 2011,6,30.h5', '501', 'weather'), ('barometer', 'data_s501_2011,7,1 - 2011,7,31.h5', '501', 'weather'), ('barometer', 'data_s501_2011,8,1 - 2011,8,31.h5', '501', 'weather'), ('barometer', 'data_s501_2011,9,1 - 2011,9,30.h5', '501', 'weather'), ('barometer', 'data_s501_2011,10,1 - 2011,10,15.h5', '501', 'weather')]
#var1 = [('event_rate', 'data_s501_2011,7,1 - 2011,7,10.h5', '501', 'events'), ('event_rate', 'data_s501_2011,7,11 - 2011,7,20.h5', '501', 'events')]
#var2 = [('barometer', 'data_s501_2011,7,1 - 2011,7,10.h5', '501', 'weather'), ('barometer', 'data_s501_2011,7,11 - 2011,7,20.h5', '501', 'weather')]
#var1 = [('event_rate', 'data_s501_2011,7,1 - 2011,7,10.h5', '501', 'events')]
#var2 = [('barometer', 'data_s501_2011,7,1 - 2011,7,10.h5', '501', 'weather')]
"""