-
Notifications
You must be signed in to change notification settings - Fork 21
/
raw_value_streams.py
253 lines (246 loc) · 13.6 KB
/
raw_value_streams.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
'''
This file can be used to combine a coefficient matrix file (non-pre-solved) and associated marginals
from GAMS gdx solution file to produce value streams for the variables of the model.
'''
import pandas as pd
import gdxpds
import subprocess
from datetime import datetime
import logging
logger = logging.getLogger('')
def get_value_streams(solution_file, problem_file, var_list=None, con_list=None, prob_file_type='jacobian'):
'''
Create dataframe of value streams for each variable of interest based on variable coefficients
in a coefficient matrix file and constraint and variable marginals in the associated GAMS gdx solution file.
Note that all strings are lowercased because GAMS is case insensitive.
Args:
solution_file (string): Full path to GAMS gdx solution file.
problem_file (string): Full path to the file of the model associated with the solution file.
var_list (list of strings): List of lowercased variable names that are of interest. If no list is given,
value streams will be created for all variables. If a list is given, variables not on the list will be
filtered out.
con_list (list of strings): List of lowercased constraint names that are of interest. If no list is given,
value streams will be created for all constraints. If a list is given, constraints not on the list will be
filtered out.
prob_file_type (string): Either 'jacobian' or 'mps'.
Returns:
df (pandas dataframe): Value streams of variables with these columns:
var_name (string): Name of variable.
var_set (string): Period-seperated sets of the variable.
con_name (string): Constraint name or '_obj' for objective coefficients.
con_set (string): Period-seperated sets of the constraint.
coeff (float): Coefficient of the variable in the constraint or objective.
var_level (float): Level of the variable in the solution.
var_marginal (float): Marginal of the variable in the solution.
con_level (float): Level of the constraint in the solution.
con_marginal (float): Marginal of the constraint in the solution or -1 for the objective (costs are negative).
value_per_unit (float): Value per unit of the variable from that constraint (equal to coeff * var_marginal).
value (float): Value that is produced by the variable from the constraint (equal to var_level * value_per_unit).
'''
if prob_file_type == 'jacobian':
df_prob = get_df_jacobian(problem_file, var_list, con_list)
elif prob_file_type == 'mps':
df_prob = get_df_mps(problem_file, var_list, con_list)
var_list_prob = df_prob['var_name'].unique()
con_list_prob = df_prob['con_name'].unique()
dfs_solution = get_df_solution(solution_file, var_list_prob, con_list_prob)
df = pd.merge(left=df_prob, right=dfs_solution['vars'], how='left', on=['var_name', 'var_set'], sort=False)
df = pd.merge(left=df, right=dfs_solution['cons'], how='left', on=['con_name', 'con_set'], sort=False)
#The objective essentially has a con_marginal of -1
df.loc[df['con_name']=='_obj','con_marginal'] = -1
#When variable has a marginal, this marginal must be added as a new row
df_var_marg = df[df['var_marginal'] != 0].copy()
df_var_marg.drop_duplicates(inplace=True, subset=['var_name','var_set'])
df_var_marg['con_name'] = 'var'
df_var_marg['coeff'] = 1
df_var_marg['con_marginal'] = df_var_marg['var_marginal']
#concatenate back into original dataframe
df = pd.concat([df, df_var_marg], ignore_index=True)
#Calculate value streams
df['value_per_unit'] = df['coeff']*df['con_marginal']
df['value'] = df['value_per_unit']*df['var_level']
return df
def get_df_mps(mps_file, var_list=None, con_list=None):
'''
Create dataframe of coefficients for each variable in each constraint and objective function. Note that all
strings are lowercased because GAMS is case insensitive.
Args:
mps_file (string): Full path to the non-presolved mps file of the model associated with the solution file.
var_list (list of strings): List of lowercased variable names that are of interest. If no list is given,
value streams will be created for all variables. If a list is given, variables not on the list will be
filtered out.
con_list (list of strings): List of lowercased constraint names that are of interest. If no list is given,
value streams will be created for all constraints. If a list is given, constraints not on the list will be
filtered out.
Returns:
df (pandas dataframe): Value streams of variables with these columns:
var_name (string): Name of variable.
var_set (string): Period-seperated sets of the variable.
con_name (string): Constraint name or '_obj' for objective coefficients.
con_set (string): Period-seperated sets of the constraint.
coeff (float): Coefficient of the variable in the constraint or objective.
'''
#First, gather all rows of mps between 'COLUMNS' and 'RHS' into a dataframe,
#separating the set elements from the variable and constraint names, and separating
#the coefficients into their own column
start = datetime.now()
mps_ls = []
columns = False
with open(mps_file) as mpsfile:
for line in mpsfile:
if columns:
if line[:3] == 'RHS':
break
if line[:8] == ' MARK':
continue
#split on whitespace
ls = line.split()
if len(ls) > 3:
#This means there was whitespace in one of the set elements. We need to recombine list elements.
i = 0
while i < len(ls):
if '(' in ls[i] and ')' not in ls[i]:
j = i
while ')' not in ls[j]:
j = j + 1
ls[i:j+1] = [' '.join(ls[i:j+1])]
i = i + 1
var_ls = ls[0].split('(')
con_ls = ls[1].split('(')
if (var_list == None or var_ls[0].lower() in var_list) and (con_list == None or con_ls[0].lower() in con_list + ['_obj']):
if len(var_ls) == 1:
var_ls.append('')
else:
var_ls[1] = var_ls[1][:-1].replace('"','').replace("'",'')
if len(con_ls) == 1:
con_ls.append('')
else:
con_ls[1] = con_ls[1][:-1].replace('"','').replace("'",'')
mps_ls.append(var_ls + con_ls + [float(ls[2])])
elif line[:7] == 'COLUMNS':
columns = True
df_mps = pd.DataFrame(mps_ls)
df_mps.columns = ['var_name','var_set','con_name','con_set', 'coeff']
for col in ['var_name','var_set','con_name','con_set']:
df_mps[col] = df_mps[col].str.lower()
logger.info('mps read: ' + str(datetime.now() - start))
return df_mps
def get_df_jacobian(jacobian_file, var_list=None, con_list=None):
'''
Create dataframe of coefficients for each variable in each constraint and objective function. Note that all
strings are lowercased because GAMS is case insensitive.
Args:
jacobian_file (string): Full path to the non-presolved jacobian gdx file of the model associated with the solution file.
var_list (list of strings): List of lowercased variable names that are of interest. If no list is given,
value streams will be created for all variables. If a list is given, variables not on the list will be
filtered out.
con_list (list of strings): List of lowercased constraint names that are of interest. If no list is given,
value streams will be created for all constraints. If a list is given, constraints not on the list will be
filtered out.
Returns:
df (pandas dataframe): Value streams of variables with these columns:
var_name (string): Name of variable.
var_set (string): Period-seperated sets of the variable.
con_name (string): Constraint name or '_obj' for objective coefficients.
con_set (string): Period-seperated sets of the constraint.
coeff (float): Coefficient of the variable in the constraint or objective.
'''
start = datetime.now()
df_A = gdxpds.to_dataframe(jacobian_file, 'A', old_interface=False)
for x in ['j','i']:
#For i (equation) and j (variable) sets, I need to dump to csv to get the Text column, ugh
x_file = jacobian_file.replace('.gdx',f'_{x}.csv')
subprocess.Popen(f'gdxdump "{jacobian_file}" format=csv epsout=0 symb={x} output="{x_file}" CSVSetText').wait()
df_x = pd.read_csv(x_file)
df_x[['name','set']] = df_x['Text'].str.rstrip(')').str.replace(',','.').str.lower().str.split('(', expand=True, n=1)
if x == 'j':
#'j' means variable
name_col = 'var_name'
set_col = 'var_set'
lst = var_list
else:
#'i' means equation
name_col = 'con_name'
set_col = 'con_set'
lst = con_list
df_x = df_x.rename(columns={'Dim1':x, 'name':name_col, 'set':set_col})
df_x = df_x.drop(columns=['Text'])
if lst != None:
df_x = df_x[df_x[name_col].isin(lst)].copy()
#inner merge with df_A (note that map may be much faster)
df_A = df_A.merge(df_x, on=x, how='inner')
df_A = df_A.rename(columns={'Value':'coeff'})
df_A = df_A[['var_name','var_set','con_name','con_set','coeff']].copy()
logger.info('jacobian read: ' + str(datetime.now() - start))
return df_A
def get_df_solution(solution_file, var_list_prob, con_list_prob):
'''
Create dataframes of marginals and levels of variables and constraints of interest.
Note that all strings are lowercased because GAMS is case insensitive.
Args:
solution_file (string): Full path to GAMS gdx solution file.
var_list_prob (list of strings): List of lowercased variable names that are of interest.
con_list_prob (list of strings): List of lowercased constraint names that are of interest.
Returns:
dict of two pandas dataframes, one for variables ('vars'), and one for constraints ('cons').
Columns in the 'vars' dataframe:
var_name (string): Name of variable.
var_set (string): Period-seperated sets of the variable.
var_level (float): Level of the variable in the solution.
var_marginal (float): Marginal of the variable in the solution.
Columns in the 'cons' dataframe
con_name (string): Constraint name.
con_set (string): Period-seperated sets of the constraint.
con_level (float): Level of the constraint in the solution.
con_marginal (float): Marginal of the constraint in the solution.
'''
start = datetime.now()
dfs = gdxpds.to_dataframes(solution_file)
logger.info('solution read: ' + str(datetime.now() - start))
start = datetime.now()
dfs = {k.lower(): v for k, v in list(dfs.items())}
df_vars = get_df_symbols(dfs, var_list_prob)
df_vars = df_vars.rename(columns={"Level": "var_level", "Marginal": "var_marginal", 'sym_name':'var_name', 'sym_set': 'var_set'})
df_cons = get_df_symbols(dfs, con_list_prob)
df_cons = df_cons.rename(columns={"Level": "con_level", "Marginal": "con_marginal", 'sym_name':'con_name', 'sym_set': 'con_set'})
logger.info('solution reformatted: ' + str(datetime.now() - start))
return {'vars':df_vars, 'cons':df_cons}
def get_df_symbols(dfs, symbols):
'''
Create dataframes of marginals and levels of symbols of interest.
Note that all strings are lowercased because GAMS is case insensitive.
Args:
dfs (dict of pandas dataframes): A result of gdxpds.to_dataframes(), with keys lowercased.
Dataframes of gdxpds.to_dataframes() always have separate columns for each set,
followed by Level, Marginal, Lower, Upper, and Scale columns.
symbols (list of strings): List of lowercased symbol names that are of interest.
Returns:
df_syms (pandas dataframe): dataframe of symbol levels and marginals with these columns:
sym_name (string): Name of symbol, lowercased
sym_set (string): Period-seperated sets of the symbol, lowercased
Level (float): Level of the symbol in the solution.
Marginal (float): Marginal of the symbol in the solution.
'''
df_syms = []
for sym_name in symbols:
if sym_name not in dfs:
continue
df_sym = dfs[sym_name]
df_sym['sym_name'] = sym_name
#concatenate all the set columns into one column
level_col = df_sym.columns.get_loc('Level')
df_sym['sym_set'] = ''
for s in range(level_col):
set_col = df_sym.iloc[:,s]
if set_col.str.contains(r'[.\'"()]| ').any():
logger.info('Warning: Invalid character (dot, quote, parens, double space) found in column #' + str(s) + ' of ' + sym_name)
df_sym['sym_set'] = df_sym['sym_set'] + set_col
if s < level_col - 1:
df_sym['sym_set'] = df_sym['sym_set'] + '.'
#reduce to only the columns of interest
df_sym = df_sym[['sym_name','sym_set','Level','Marginal']]
df_syms.append(df_sym)
df_syms = pd.concat(df_syms).reset_index(drop=True)
for col in ['sym_name','sym_set']:
df_syms[col] = df_syms[col].str.lower()
return df_syms