From a55de82f7f274f836c3b7d5fd4f7e440e351e198 Mon Sep 17 00:00:00 2001 From: Darryl Melander Date: Tue, 23 May 2023 17:58:46 -0600 Subject: [PATCH] Remove unused code; standardize line endings --- egret/parsers/rts_gmlc/_reserves.py | 128 +- egret/parsers/rts_gmlc/parsed_cache.py | 534 +++---- egret/parsers/rts_gmlc/parser.py | 1879 ++++++++++++------------ 3 files changed, 1257 insertions(+), 1284 deletions(-) diff --git a/egret/parsers/rts_gmlc/_reserves.py b/egret/parsers/rts_gmlc/_reserves.py index f4b8bb11..16217495 100644 --- a/egret/parsers/rts_gmlc/_reserves.py +++ b/egret/parsers/rts_gmlc/_reserves.py @@ -1,64 +1,64 @@ -# ___________________________________________________________________________ -# -# EGRET: Electrical Grid Research and Engineering Tools -# Copyright 2019 National Technology & Engineering Solutions of Sandia, LLC -# (NTESS). Under the terms of Contract DE-NA0003525 with NTESS, the U.S. -# Government retains certain rights in this software. -# This software is distributed under the Revised BSD License. -# ___________________________________________________________________________ - -from __future__ import annotations - -from typing import NamedTuple, Optional, Sequence - -reserve_name_map = { - 'Spin_Up': 'spinning_reserve_requirement', - 'Reg_Up': 'regulation_up_requirement', - 'Reg_Down': 'regulation_down_requirement', - 'Flex_Up': 'flexible_ramp_up_requirement', - 'Flex_Down': 'flexible_ramp_down_requirement' -} - -def is_valid_reserve_name(name:str, model_dict:dict=None): - if name in reserve_name_map: - return True - if name.find('_R') < 0: - return False - res, area = name.split('_R', 1) - return (res in reserve_name_map) and \ - ((model_dict is None) or (area in model_dict['elements']['area'])) - -class ScalarReserveValue(NamedTuple): - ''' A reserve type, scope, and scalar value. - - If area_name is None, this is a global reserve value. - ''' - reserve_type: str - area_name: Optional[str] - value: float - -class ScalarReserveData(): - ''' Scalar reserve values that should only be applied to one type of model. - ''' - def __init__(self, - da_scalars: Sequence[ScalarReserveValue], - rt_scalars: Sequence[ScalarReserveValue]): - self._da_scalars = da_scalars - self._rt_scalars = rt_scalars - - @property - def da_scalars(self) -> Sequence[ScalarReserveValue]: - ''' Scalar reserve values that only apply to DAY_AHEAD models - ''' - return self._da_scalars - - @property - def rt_scalars(self) -> Sequence[ScalarReserveValue]: - ''' Scalar reserve values that only apply to REAL_TIME models - ''' - return self._rt_scalars - - def get_simulation_reserves(self, simulation_type:str) -> Sequence[ScalarReserveValue]: - ''' Get scalar reserve values that only apply the requested simulation type - ''' - return self._rt_scalars if simulation_type == 'REAL_TIME' else self._da_scalars +# ___________________________________________________________________________ +# +# EGRET: Electrical Grid Research and Engineering Tools +# Copyright 2019 National Technology & Engineering Solutions of Sandia, LLC +# (NTESS). Under the terms of Contract DE-NA0003525 with NTESS, the U.S. +# Government retains certain rights in this software. +# This software is distributed under the Revised BSD License. +# ___________________________________________________________________________ + +from __future__ import annotations + +from typing import NamedTuple, Optional, Sequence + +reserve_name_map = { + 'Spin_Up': 'spinning_reserve_requirement', + 'Reg_Up': 'regulation_up_requirement', + 'Reg_Down': 'regulation_down_requirement', + 'Flex_Up': 'flexible_ramp_up_requirement', + 'Flex_Down': 'flexible_ramp_down_requirement' +} + +def is_valid_reserve_name(name:str, model_dict:dict=None): + if name in reserve_name_map: + return True + if name.find('_R') < 0: + return False + res, area = name.split('_R', 1) + return (res in reserve_name_map) and \ + ((model_dict is None) or (area in model_dict['elements']['area'])) + +class ScalarReserveValue(NamedTuple): + ''' A reserve type, scope, and scalar value. + + If area_name is None, this is a global reserve value. + ''' + reserve_type: str + area_name: Optional[str] + value: float + +class ScalarReserveData(): + ''' Scalar reserve values that should only be applied to one type of model. + ''' + def __init__(self, + da_scalars: Sequence[ScalarReserveValue], + rt_scalars: Sequence[ScalarReserveValue]): + self._da_scalars = da_scalars + self._rt_scalars = rt_scalars + + @property + def da_scalars(self) -> Sequence[ScalarReserveValue]: + ''' Scalar reserve values that only apply to DAY_AHEAD models + ''' + return self._da_scalars + + @property + def rt_scalars(self) -> Sequence[ScalarReserveValue]: + ''' Scalar reserve values that only apply to REAL_TIME models + ''' + return self._rt_scalars + + def get_simulation_reserves(self, simulation_type:str) -> Sequence[ScalarReserveValue]: + ''' Get scalar reserve values that only apply the requested simulation type + ''' + return self._rt_scalars if simulation_type == 'REAL_TIME' else self._da_scalars diff --git a/egret/parsers/rts_gmlc/parsed_cache.py b/egret/parsers/rts_gmlc/parsed_cache.py index 2275b2ba..01b7a3dd 100644 --- a/egret/parsers/rts_gmlc/parsed_cache.py +++ b/egret/parsers/rts_gmlc/parsed_cache.py @@ -1,267 +1,267 @@ -# ___________________________________________________________________________ -# -# EGRET: Electrical Grid Research and Engineering Tools -# Copyright 2019 National Technology & Engineering Solutions of Sandia, LLC -# (NTESS). Under the terms of Contract DE-NA0003525 with NTESS, the U.S. -# Government retains certain rights in this software. -# This software is distributed under the Revised BSD License. -# ___________________________________________________________________________ - -from __future__ import annotations - -from typing import TYPE_CHECKING -if TYPE_CHECKING: - from typing import Dict - from pandas import DataFrame - from datetime import datetime - -import copy -from datetime import timedelta - -from egret.data.model_data import ModelData - -from ._reserves import reserve_name_map, ScalarReserveData - -class ParsedCache(): - - def __init__(self, model_skeleton:dict, - begin_time:datetime, end_time:datetime, - minutes_per_day_ahead_period:int, minutes_per_real_time_period:int, - timeseries_data:DataFrame, - load_participation_factors:Dict[str,float], - scalar_reserve_data:ScalarReserveData): - self.skeleton = model_skeleton - self.begin_time = begin_time - self.end_time = end_time - self.timeseries_df = timeseries_data - self.minutes_per_period = { - 'DAY_AHEAD': minutes_per_day_ahead_period, - 'REAL_TIME': minutes_per_real_time_period - } - self.load_participation_factors = load_participation_factors - - # Find and save the index of the first row of each sim type in timeseries_df - cur_sim = self.timeseries_df['Simulation'].iat[0] - self._first_indices = {cur_sim:0} - for i in range(1,len(self.timeseries_df)): - if self.timeseries_df['Simulation'].iat[i] != cur_sim: - cur_sim = self.timeseries_df['Simulation'].iat[i] - self._first_indices[cur_sim] = i - - self.scalar_reserve_data = scalar_reserve_data - - - def generate_model(self, simulation_type:str, begin_time:datetime, end_time:datetime) -> ModelData: - """ Create a new model populated with requested data - - Parameters - ---------- - simulation_type:str - Either 'DAY_AHEAD' or 'REAL_TIME' - begin_time:datetime - The earliest time to include in the returned data - end_time:datetime - The earliest time to NOT include in the returned data - """ - md = self.get_new_skeleton() - self.populate_skeleton_with_data(md, simulation_type, begin_time, end_time) - return ModelData(md) - - def get_new_skeleton(self) -> dict: - """ Get a new model dict with system elements but no time-specific data - """ - return copy.deepcopy(self.skeleton) - - def populate_skeleton_with_data(self, skeleton_dict:dict, simulation_type:str, - begin_time:datetime, end_time:datetime) -> None: - """ Update an existing model dict with requested data - - Parameters - ---------- - skeleton_dict:dict - The skeleton model dict to populate with data - simulation_type:str - Either 'DAY_AHEAD' or 'REAL_TIME' - begin_time:datetime - The earliest time to include in the returned data - end_time:datetime - The earliest time to NOT include in the returned data - """ - - #Because pandas includes the end of a range, reduce our end time by one second - end_time = end_time - timedelta(seconds=1) - self._insert_scalar_reserve_data(skeleton_dict, simulation_type) - self._process_timeseries_data(skeleton_dict, simulation_type, begin_time, end_time) - self._insert_system_data(skeleton_dict, simulation_type, begin_time, end_time) - - def get_timeseries_locations(self, simulation_type:str, md:dict) -> Iterable[Tuple[dict, str]]: - ''' Get all locations in the provided model with a defined time series. - - Returns - ------- - Each location is returned as a dict and the name of a key within the dict. - - Remarks - ------- - This method returns time series locations as specified in the RTS-GMLC input that created - this cache. It only returns locations the rts_gmlc parser knows how to map from the input - to a location in the Egret model; other time series, if any, are skipped. - ''' - - df = self.timeseries_df - - system = md.data['system'] - loads = md.data['elements']['load'] - generators = md.data['elements']['generator'] - areas = md.data['elements']['area'] - - sim_col = df.columns.get_loc('Simulation') - cat_col = df.columns.get_loc('Category') - obj_col = df.columns.get_loc('Object') - param_col = df.columns.get_loc('Parameter') - - # Go through each timeseries value for this simulation type - for i in range(self._first_indices[simulation_type], len(df)): - if df.iat[i, sim_col] != simulation_type: - break - - category = df.iat[i, cat_col] - - if category == 'Generator': - gen_name = df.iat[i, obj_col] - param = df.iat[i, param_col] - - if param == 'PMin MW': - yield (generators[gen_name], 'p_min') - elif param == 'PMax MW': - yield (generators[gen_name], 'p_max') - - elif category == 'Area': - area_name = df.iat[i, obj_col] - param = df.iat[i, param_col] - assert(param == "MW Load") - for l_d in loads.values(): - # Skip loads from other areas - if l_d['area'] != area_name: - continue - yield (l_d, 'p_load') - yield (l_d, 'q_load') - - elif category == 'Reserve': - res_name = df.iat[i, obj_col] - if res_name in reserve_name_map: - yield (system, reserve_name_map[res_name]) - else: - # reserve name must be _R, - # split into type and area - res_name, area_name = res_name.split("_R", 1) - yield (areas[area_name], reserve_name_map[res_name]) - - - def _process_timeseries_data(self, md:dict, simulation_type:str, - begin_time:datetime, end_time:datetime) -> None: - df = self.timeseries_df - - # Go through each timeseries value for this simulation type - for i in range(self._first_indices[simulation_type], len(df)): - if df.iat[i, df.columns.get_loc('Simulation')] != simulation_type: - break - - category = df.iat[i, df.columns.get_loc('Category')] - - if category == 'Generator': - self._process_generator_timeseries(md, begin_time, end_time, i) - elif category == 'Area': - self._process_area_timeseries(md, begin_time, end_time, i) - elif category == 'Reserve': - self._process_reserve_timeseries(md, begin_time, end_time, i) - - def _process_generator_timeseries(self, md:dict, begin_time:datetime, - end_time:datetime, df_index:int): - df = self.timeseries_df - i = df_index - gen_name = df.iat[i, df.columns.get_loc('Object')] - gen_dict = md['elements']['generator'][gen_name] - param = df.iat[i, df.columns.get_loc('Parameter')] - data = df.iat[i, df.columns.get_loc('Series')][begin_time:end_time].to_list() - - if param == 'PMin MW': - gen_dict['p_min'] = { 'data_type': 'time_series', - 'values' : data } - elif param == 'PMax MW': - gen_dict['p_max'] = { 'data_type': 'time_series', - 'values' : data } - else: - raise ValueError(f"Unexpected generator timeseries data: {param}") - - def _process_area_timeseries(self, md:dict, begin_time:datetime, - end_time:datetime, df_index:int): - df = self.timeseries_df - i = df_index - - area_name = df.iat[i, df.columns.get_loc('Object')] - param = df.iat[i, df.columns.get_loc('Parameter')] - assert(param == "MW Load") - data = df.iat[i, df.columns.get_loc('Series')][begin_time:end_time] - - skeleton_loads = self.skeleton['elements']['load'] - for bus, load_dict in md['elements']['load'].items(): - # Skip loads from other areas - if load_dict['area'] != area_name: - continue - - # Replace skeleton's p_load with the timeseries data, scaled by the load's - # portion of the area's total load. - # Also replace q_load, if present, with timeseries - p_factor = self.load_participation_factors[bus] - # save skeleton's scalar p_load - p_load = skeleton_loads[bus]['p_load'] if 'p_load' in skeleton_loads[bus] else None - # overwrite p_load with timeseries - load_dict['p_load'] = { 'data_type': 'time_series', - 'values' : [v*p_factor for v in data] } - if p_load is not None and 'q_load' in skeleton_loads[bus]: - q_over_p = skeleton_loads[bus]['q_load'] / p_load - load_dict['q_load'] = { 'data_type': 'time_series', - 'values' : [v*q_over_p for v in load_dict['p_load']['values']] } - - def _process_reserve_timeseries(self, md:dict, begin_time:datetime, - end_time:datetime, df_index:int): - df = self.timeseries_df - i = df_index - - res_name = df.iat[i, df.columns.get_loc('Object')] - - if res_name in reserve_name_map: - target_dict = md['system'] - else: - # reserve name must be _R, - # split into type and area - res_name, area_name = res_name.split("_R", 1) - target_dict = md['elements']['area'][area_name] - - data = df.iat[i, df.columns.get_loc('Series')][begin_time:end_time] - target_dict[reserve_name_map[res_name]] = { 'data_type': 'time_series', - 'values' : data.to_list() } - - def _insert_system_data(self, md:dict, simulation_type:str, - begin_time:datetime, end_time:datetime): - md['system']['time_period_length_minutes'] = self.minutes_per_period[simulation_type] - - df = self.timeseries_df - sample_df = df.iat[self._first_indices[simulation_type], df.columns.get_loc('Series')] - dates = sample_df[begin_time:end_time].index - md['system']['time_keys'] = [dt.strftime('%Y-%m-%d %H:%M') for dt in dates] - - def _insert_scalar_reserve_data(self, md:dict, simulation_type:str): - ''' Insert scalar reserve values into the model dict - ''' - system = md['system'] - areas = md['elements']['area'] - - reserve_list = self.scalar_reserve_data.get_simulation_reserves(simulation_type) - for res in reserve_list: - if res.area_name is None: - target_dict = system - else: - target_dict = areas[res.area_name] - target_dict[reserve_name_map[res.reserve_type]] = res.value +# ___________________________________________________________________________ +# +# EGRET: Electrical Grid Research and Engineering Tools +# Copyright 2019 National Technology & Engineering Solutions of Sandia, LLC +# (NTESS). Under the terms of Contract DE-NA0003525 with NTESS, the U.S. +# Government retains certain rights in this software. +# This software is distributed under the Revised BSD License. +# ___________________________________________________________________________ + +from __future__ import annotations + +from typing import TYPE_CHECKING +if TYPE_CHECKING: + from typing import Dict + from pandas import DataFrame + from datetime import datetime + +import copy +from datetime import timedelta + +from egret.data.model_data import ModelData + +from ._reserves import reserve_name_map, ScalarReserveData + +class ParsedCache(): + + def __init__(self, model_skeleton:dict, + begin_time:datetime, end_time:datetime, + minutes_per_day_ahead_period:int, minutes_per_real_time_period:int, + timeseries_data:DataFrame, + load_participation_factors:Dict[str,float], + scalar_reserve_data:ScalarReserveData): + self.skeleton = model_skeleton + self.begin_time = begin_time + self.end_time = end_time + self.timeseries_df = timeseries_data + self.minutes_per_period = { + 'DAY_AHEAD': minutes_per_day_ahead_period, + 'REAL_TIME': minutes_per_real_time_period + } + self.load_participation_factors = load_participation_factors + + # Find and save the index of the first row of each sim type in timeseries_df + cur_sim = self.timeseries_df['Simulation'].iat[0] + self._first_indices = {cur_sim:0} + for i in range(1,len(self.timeseries_df)): + if self.timeseries_df['Simulation'].iat[i] != cur_sim: + cur_sim = self.timeseries_df['Simulation'].iat[i] + self._first_indices[cur_sim] = i + + self.scalar_reserve_data = scalar_reserve_data + + + def generate_model(self, simulation_type:str, begin_time:datetime, end_time:datetime) -> ModelData: + """ Create a new model populated with requested data + + Parameters + ---------- + simulation_type:str + Either 'DAY_AHEAD' or 'REAL_TIME' + begin_time:datetime + The earliest time to include in the returned data + end_time:datetime + The earliest time to NOT include in the returned data + """ + md = self.get_new_skeleton() + self.populate_skeleton_with_data(md, simulation_type, begin_time, end_time) + return ModelData(md) + + def get_new_skeleton(self) -> dict: + """ Get a new model dict with system elements but no time-specific data + """ + return copy.deepcopy(self.skeleton) + + def populate_skeleton_with_data(self, skeleton_dict:dict, simulation_type:str, + begin_time:datetime, end_time:datetime) -> None: + """ Update an existing model dict with requested data + + Parameters + ---------- + skeleton_dict:dict + The skeleton model dict to populate with data + simulation_type:str + Either 'DAY_AHEAD' or 'REAL_TIME' + begin_time:datetime + The earliest time to include in the returned data + end_time:datetime + The earliest time to NOT include in the returned data + """ + + #Because pandas includes the end of a range, reduce our end time by one second + end_time = end_time - timedelta(seconds=1) + self._insert_scalar_reserve_data(skeleton_dict, simulation_type) + self._process_timeseries_data(skeleton_dict, simulation_type, begin_time, end_time) + self._insert_system_data(skeleton_dict, simulation_type, begin_time, end_time) + + def get_timeseries_locations(self, simulation_type:str, md:dict) -> Iterable[Tuple[dict, str]]: + ''' Get all locations in the provided model with a defined time series. + + Returns + ------- + Each location is returned as a dict and the name of a key within the dict. + + Remarks + ------- + This method returns time series locations as specified in the RTS-GMLC input that created + this cache. It only returns locations the rts_gmlc parser knows how to map from the input + to a location in the Egret model; other time series, if any, are skipped. + ''' + + df = self.timeseries_df + + system = md.data['system'] + loads = md.data['elements']['load'] + generators = md.data['elements']['generator'] + areas = md.data['elements']['area'] + + sim_col = df.columns.get_loc('Simulation') + cat_col = df.columns.get_loc('Category') + obj_col = df.columns.get_loc('Object') + param_col = df.columns.get_loc('Parameter') + + # Go through each timeseries value for this simulation type + for i in range(self._first_indices[simulation_type], len(df)): + if df.iat[i, sim_col] != simulation_type: + break + + category = df.iat[i, cat_col] + + if category == 'Generator': + gen_name = df.iat[i, obj_col] + param = df.iat[i, param_col] + + if param == 'PMin MW': + yield (generators[gen_name], 'p_min') + elif param == 'PMax MW': + yield (generators[gen_name], 'p_max') + + elif category == 'Area': + area_name = df.iat[i, obj_col] + param = df.iat[i, param_col] + assert(param == "MW Load") + for l_d in loads.values(): + # Skip loads from other areas + if l_d['area'] != area_name: + continue + yield (l_d, 'p_load') + yield (l_d, 'q_load') + + elif category == 'Reserve': + res_name = df.iat[i, obj_col] + if res_name in reserve_name_map: + yield (system, reserve_name_map[res_name]) + else: + # reserve name must be _R, + # split into type and area + res_name, area_name = res_name.split("_R", 1) + yield (areas[area_name], reserve_name_map[res_name]) + + + def _process_timeseries_data(self, md:dict, simulation_type:str, + begin_time:datetime, end_time:datetime) -> None: + df = self.timeseries_df + + # Go through each timeseries value for this simulation type + for i in range(self._first_indices[simulation_type], len(df)): + if df.iat[i, df.columns.get_loc('Simulation')] != simulation_type: + break + + category = df.iat[i, df.columns.get_loc('Category')] + + if category == 'Generator': + self._process_generator_timeseries(md, begin_time, end_time, i) + elif category == 'Area': + self._process_area_timeseries(md, begin_time, end_time, i) + elif category == 'Reserve': + self._process_reserve_timeseries(md, begin_time, end_time, i) + + def _process_generator_timeseries(self, md:dict, begin_time:datetime, + end_time:datetime, df_index:int): + df = self.timeseries_df + i = df_index + gen_name = df.iat[i, df.columns.get_loc('Object')] + gen_dict = md['elements']['generator'][gen_name] + param = df.iat[i, df.columns.get_loc('Parameter')] + data = df.iat[i, df.columns.get_loc('Series')][begin_time:end_time].to_list() + + if param == 'PMin MW': + gen_dict['p_min'] = { 'data_type': 'time_series', + 'values' : data } + elif param == 'PMax MW': + gen_dict['p_max'] = { 'data_type': 'time_series', + 'values' : data } + else: + raise ValueError(f"Unexpected generator timeseries data: {param}") + + def _process_area_timeseries(self, md:dict, begin_time:datetime, + end_time:datetime, df_index:int): + df = self.timeseries_df + i = df_index + + area_name = df.iat[i, df.columns.get_loc('Object')] + param = df.iat[i, df.columns.get_loc('Parameter')] + assert(param == "MW Load") + data = df.iat[i, df.columns.get_loc('Series')][begin_time:end_time] + + skeleton_loads = self.skeleton['elements']['load'] + for bus, load_dict in md['elements']['load'].items(): + # Skip loads from other areas + if load_dict['area'] != area_name: + continue + + # Replace skeleton's p_load with the timeseries data, scaled by the load's + # portion of the area's total load. + # Also replace q_load, if present, with timeseries + p_factor = self.load_participation_factors[bus] + # save skeleton's scalar p_load + p_load = skeleton_loads[bus]['p_load'] if 'p_load' in skeleton_loads[bus] else None + # overwrite p_load with timeseries + load_dict['p_load'] = { 'data_type': 'time_series', + 'values' : [v*p_factor for v in data] } + if p_load is not None and 'q_load' in skeleton_loads[bus]: + q_over_p = skeleton_loads[bus]['q_load'] / p_load + load_dict['q_load'] = { 'data_type': 'time_series', + 'values' : [v*q_over_p for v in load_dict['p_load']['values']] } + + def _process_reserve_timeseries(self, md:dict, begin_time:datetime, + end_time:datetime, df_index:int): + df = self.timeseries_df + i = df_index + + res_name = df.iat[i, df.columns.get_loc('Object')] + + if res_name in reserve_name_map: + target_dict = md['system'] + else: + # reserve name must be _R, + # split into type and area + res_name, area_name = res_name.split("_R", 1) + target_dict = md['elements']['area'][area_name] + + data = df.iat[i, df.columns.get_loc('Series')][begin_time:end_time] + target_dict[reserve_name_map[res_name]] = { 'data_type': 'time_series', + 'values' : data.to_list() } + + def _insert_system_data(self, md:dict, simulation_type:str, + begin_time:datetime, end_time:datetime): + md['system']['time_period_length_minutes'] = self.minutes_per_period[simulation_type] + + df = self.timeseries_df + sample_df = df.iat[self._first_indices[simulation_type], df.columns.get_loc('Series')] + dates = sample_df[begin_time:end_time].index + md['system']['time_keys'] = [dt.strftime('%Y-%m-%d %H:%M') for dt in dates] + + def _insert_scalar_reserve_data(self, md:dict, simulation_type:str): + ''' Insert scalar reserve values into the model dict + ''' + system = md['system'] + areas = md['elements']['area'] + + reserve_list = self.scalar_reserve_data.get_simulation_reserves(simulation_type) + for res in reserve_list: + if res.area_name is None: + target_dict = system + else: + target_dict = areas[res.area_name] + target_dict[reserve_name_map[res.reserve_type]] = res.value diff --git a/egret/parsers/rts_gmlc/parser.py b/egret/parsers/rts_gmlc/parser.py index bd5284a8..19cbbdd0 100644 --- a/egret/parsers/rts_gmlc/parser.py +++ b/egret/parsers/rts_gmlc/parser.py @@ -1,953 +1,926 @@ -# ___________________________________________________________________________ -# -# EGRET: Electrical Grid Research and Engineering Tools -# Copyright 2019 National Technology & Engineering Solutions of Sandia, LLC -# (NTESS). Under the terms of Contract DE-NA0003525 with NTESS, the U.S. -# Government retains certain rights in this software. -# This software is distributed under the Revised BSD License. -# ___________________________________________________________________________ - -from __future__ import annotations - -from typing import TYPE_CHECKING -if TYPE_CHECKING: - from typing import Dict, Union, Optional, Tuple, Set - -import sys -import os.path -import pandas as pd -from datetime import datetime, timedelta -import dateutil.parser -from collections import namedtuple - -from egret.common.log import logger -import egret.data.model_data as md - -from .parsed_cache import ParsedCache -from ._reserves import is_valid_reserve_name, reserve_name_map, ScalarReserveData, ScalarReserveValue - -def create_ModelData(rts_gmlc_dir:str, - begin_time:Union[datetime,str], end_time:Union[datetime,str], - simulation:str="DAY_AHEAD", t0_state:Optional[dict] = None) -> md.ModelData: - - """ - Create a ModelData object from the RTS-GMLC data. - - Parameters - ---------- - rts_gmlc_dir : str - Path to directory holding csv files in RTS-GMLC format (bus.csv, gen.csv, etc). - begin_time : datetime.datetime or str - Beginning of time horizon. If str, date/time in "YYYY-MM-DD HH:MM:SS" or "YYYY-MM-DD" format, - the later of which assumes a midnight start. - end_time : datetime.datetime or str - End of time horizon. If str, date/time in "YYYY-MM-DD HH:MM:SS" or "YYYY-MM-DD" format, - the later of which assumes a midnight start. - simulation : str - Either "DAY_AHEAD" or "REAL_TIME", which specifies which time series the data is taken from, - default is "DAY_AHEAD". - t0_state : dict or None - Keys of this dict are thermal generator names, each element of which is another dictionary with - keys "initial_status", "initial_p_output", and "initial_q_output", which specify whether the - generator is on at t0, the real power output at t0, and the reactive power output at t0. - If t0_state is None, values are read from initial_status.csv in the rts_gmlc_dir. - If that file does not exist, no initial state data is set in the model. - - Returns - ------- - egret.model_data.ModelData - Returns a ModelData object with the timeseries data specified - """ - return md.ModelData(create_model_data_dict(rts_gmlc_dir, begin_time, end_time, simulation, t0_state)) - -def create_model_data_dict(rts_gmlc_dir:str, - begin_time:Union[datetime,str], end_time:Union[datetime,str], - simulation:str="DAY_AHEAD", t0_state:Optional[dict]=None) -> dict: - - """ - Create a model_data dictionary from the RTS-GMLC data. - - Parameters - ---------- - rts_gmlc_dir : str - Path to directory holding csv files in RTS-GMLC format (bus.csv, gen.csv, etc). - begin_time : datetime.datetime or str - Beginning of time horizon. If str, date/time in "YYYY-MM-DD HH:MM:SS" or "YYYY-MM-DD" format, - the later of which assumes a midnight start. - end_time : datetime.datetime or str - End of time horizon. If str, date/time in "YYYY-MM-DD HH:MM:SS" or "YYYY-MM-DD" format, - the later of which assumes a midnight start. - simulation : str - Either "DAY_AHEAD" or "REAL_TIME", which specifies which time series the data is taken from, - default is "DAY_AHEAD". - t0_state : dict or None - Keys of this dict are thermal generator names, each element of which is another dictionary with - keys "initial_status", "initial_p_output", and "initial_q_output", which specify whether the - generator is on at t0, the real power output at t0, and the reactive power output at t0. - If this is None, no initial state data is included in the dict. - - Returns - ------- - dict : A dictionary in the format required for the ModelData object. - """ - - # Convert date string to datetimes, if necessary - begin_time, end_time = _parse_datetimes_if_strings(begin_time, end_time) - - cache = parse_to_cache(rts_gmlc_dir, begin_time, end_time, t0_state) - model = cache.generate_model(simulation, begin_time, end_time) - return model.data - - -def parse_to_cache(rts_gmlc_dir:str, - begin_time:Union[datetime,str], - end_time:Union[datetime,str], - t0_state:Optional[dict]=None, - honor_lookahead=True) -> ParsedCache: - ''' Parse data in RTS-GMLC format, keeping the portions between a start and end time - - rts_gmlc_dir : str - Path to directory holding csv files in RTS-GMLC format (bus.csv, gen.csv, etc). - begin_time : datetime.datetime or str - Beginning of time horizon. - end_time : datetime.datetime or str - End of time horizon. - simulation : str - Either "DAY_AHEAD" or "REAL_TIME", which specifies which time series the data is taken from, - default is "DAY_AHEAD". - t0_state : dict or None - Keys of this dict are thermal generator names, each element of which is another dictionary with - keys "initial_status", "initial_p_output", and "initial_q_output", which specify whether the - generator is on at t0, the real power output at t0, and the reactive power output at t0. - If this is None, initial state data is not included in the cache. - ''' - if not os.path.exists(rts_gmlc_dir): - raise ValueError(f'RTS-GMLC directory "{rts_gmlc_dir}" does not exist') - - # Create the skeleton - model_data = _create_rtsgmlc_skeleton(rts_gmlc_dir) - - # Save the data frequencies - metadata_df = _read_metadata(rts_gmlc_dir, honor_lookahead) - minutes_per_period = {'DAY_AHEAD':int(metadata_df.loc['Period_Resolution', 'DAY_AHEAD'])//60, - 'REAL_TIME':int(metadata_df.loc['Period_Resolution', 'REAL_TIME'])//60} - - data_start, data_end = _get_data_date_range(metadata_df) - - constant_reserve_data = _get_scalar_reserve_data(rts_gmlc_dir, metadata_df, model_data) - - begin_time, end_time = _parse_datetimes_if_strings(begin_time, end_time) - # TODO: Validate begin_time and end_time. - # Do we want to enforce that they fall within the data date range? - - timeseries_df = _read_timeseries_data(model_data, rts_gmlc_dir, - begin_time, end_time, minutes_per_period) - - load_participation_factors = _compute_bus_load_participation_factors(model_data) - - set_t0_data(model_data, rts_gmlc_dir, t0_state) - - return ParsedCache(model_data, begin_time, end_time, - minutes_per_period['DAY_AHEAD'], minutes_per_period['REAL_TIME'], - timeseries_df, load_participation_factors, constant_reserve_data) - - -def _read_metadata(base_dir:str, honor_lookahead:bool) -> pd.DataFrame: - metadata_path = os.path.join(base_dir, "simulation_objects.csv") - if not os.path.exists(metadata_path): - raise ValueError(f'RTS-GMLC directory "{rts_gmlc_dir}" does not contain expected CSV files.') - - # Read metadata about the data - metadata_df = pd.read_csv(metadata_path, index_col=0) - - if not honor_lookahead: - metadata_df.loc['Look_Ahead_Periods_per_Step']['DAY_AHEAD'] = 0 - metadata_df.loc['Look_Ahead_Periods_per_Step']['REAL_TIME'] = 0 - - return metadata_df - -def _get_data_date_range(metadata_df) -> Tuple[datetime, datetime]: - ''' Get the range of dates for which there is data available - ''' - - # Data start time - row = metadata_df.loc['Date_From'] - data_start = max(dateutil.parser.parse(row['DAY_AHEAD']), - dateutil.parser.parse(row['REAL_TIME'])) - - # Data end time is a little more tricky - row = metadata_df.loc['Date_To'] - def _extract_end_date(which:str): - # The actual end date is the metadata's Date_To plus a number of look ahead periods. - # Each look ahead period is a specified number of seconds - extra_seconds = int(metadata_df.loc['Look_Ahead_Periods_per_Step'][which]) * \ - int(metadata_df.loc['Look_Ahead_Resolution'][which]) - end_date = dateutil.parser.parse(row[which]) - return end_date + timedelta(seconds=extra_seconds) - # Get the end date for each kind of data. Both kinds of data - # are available up through the later of the two simulation categories. - data_end = max(_extract_end_date('DAY_AHEAD'), - _extract_end_date('REAL_TIME')) - - return (data_start, data_end) - -def _read_timeseries_file(file_name:str, minutes_per_period:int, - start_time:datetime, end_time:datetime, - series_name:str) -> pd.DataFrame: - """ - Read data from a timeseries file, returning only the data that falls within the requested time range - - Parameters - ---------- - file_name:str - Path to a CVS file with timeseries data - minutes_per_period:int - The number of minutes between time periods in the data - start_time:datetime - The earliest time to include in the returned data - end_time:datetime - The first time to NOT include in the returned data - series_name:str - The name the column holding the resulting data - - Returns - ------- - df : A DataFrame with data between the specified dates - - Timeseries files can be in one of two formats, columnar or 2D. A columnar file can hold more - than one timeseries in the same file, with one row per time period and one column per series. - A 2D timeseries file holds only one timeseries, with one row per day and one column per time - period within the day. This function reads from either format. - - If the indicated file is columnar, the returned DataFrame will include all columns in the file, - one column per time series, with columns named as they appear in the file. - - If the indicated file is 2D, the returned DataFrame will have a single column whose name - is assigned to be the `series_name` passed into the function. - """ - # Determine which layout we're working with by checking column headers - headers = pd.read_csv(file_name, nrows=0).columns.tolist() - if headers[3] == 'Period': - return _read_columnar_timeseries_file(file_name, minutes_per_period, start_time, end_time) - else: - df = _read_2D_timeseries_file(file_name, minutes_per_period, start_time, end_time, series_name) - return df - -def _read_columnar_timeseries_file(file_name:str, minutes_per_period:int, - start_time:datetime, end_time:datetime) -> pd.DataFrame: - """ - Read data from a timeseries file, returning only the data that falls within the requested time range - - Parameters - ---------- - file_name:str - Path to a CVS file with timeseries data - minutes_per_period:int - The number of minutes between time periods in the data - start_time:datetime - The earliest time to include in the returned data - end_time:datetime - The first time to NOT include in the returned data - - Returns - ------- - df : A DataFrame with data between the specified dates - - The returned DataFrame converts the first 4 columns into a datetime which is used as - the DataFrame's index. All other CSV columns are included as columns in the DataFrame. - """ - df = pd.read_csv(file_name, - header=0, - parse_dates=[['Year', 'Month', 'Day']]) - df.index = df['Year_Month_Day'] + pd.to_timedelta((df['Period']-1)*minutes_per_period, 'm') - df.index.names = ['DateTime'] - df.drop(['Year_Month_Day', 'Period'], axis=1, inplace=True) - - df.sort_index(inplace=True) - - # Remove data outside requested time period. - # DataFrame slices include the end of the slice, so we need to reduce it slightly to - # avoid including an extra value at the end. - end_time = end_time - timedelta(seconds=1) - df = df[start_time:end_time] - - # Be sure to return a copy instead of a view into the larger data set - return df.copy() - -def _read_2D_timeseries_file(file_name:str, minutes_per_period:int, - start_time:datetime, end_time:datetime, - column_name:str) -> DataFrame: - """ - Read data from a timeseries file with a 2D layout, returning only the data that falls within the requested time range - - Parameters - ---------- - file_name:str - Path to a CVS file with reserve-formatted timeseries data - start_time:datetime - The earliest time to include in the returned data - end_time:datetime - The first time to NOT include in the returned data - minutes_per_period:int - The number of minutes between time periods in the data - column_name:str - The name that should be given to the DataFrame's column - - Returns - ------- - df : A single-column pandas DataFrame with data between the specified dates - - The returned DataFrame has one row per cell in the original CSV, indexed by the cell's - datetime. The first 3 columns indicate the date for all cells in the row, and other - column headers indicate the time within the day. Only data within the requested time - period is included in the results. Like a typical python range, the returned data includes - the start_time but does not include the end_time. - """ - df = pd.read_csv(file_name, - header=0, - parse_dates=[['Year', 'Month', 'Day']], - index_col=0) - df.sort_index(inplace=True) - - # Remove data outside requested time period. - # DataFrame slices include the end of the slice, so we need to reduce it slightly to - # avoid including an extra value at the end. - end_time = end_time - timedelta(seconds=1) - df = df[start_time:end_time] - - # Now divide rows into one row per column, resetting index to appropriate datetime - s = df.stack() - s.index = map(lambda i: i[0] + timedelta(minutes=minutes_per_period*(int(i[1])-1)), s.index) - s.index.names = ['DateTime'] - - # Filter one more time, trimming out times of day that fall before or after the requested range - if s.index[0] < start_time or s.index[-1] >= end_time: - s = s[start_time:end_time] - - # Create and return a new 1-column DataFrame - return pd.DataFrame({column_name: s}) - -def _create_rtsgmlc_skeleton(rts_gmlc_dir:str) -> dict: - """ - Creates a data dictionary from the RTS-GMLC data files, without loading hourly data - - Parameters - ---------- - rts_gmlc_dir : str - Path to directory holding csv files in RTS-GMLC format (bus.csv, gen.csv, etc). - - Returns - ------- - data : dict - Returns a dict loaded from the RTS-GMLC data - """ - - base_dir = rts_gmlc_dir - - model_data = md.ModelData.empty_model_data_dict() - - elements = model_data["elements"] - system = model_data["system"] - - system["name"] = "RTS-GMLC" - - # this is the default used in the MATPOWER writer for RTS-GMLC - system["baseMVA"] = 100. - - bus_id_to_name = _read_buses_and_areas(base_dir, elements, system) - _read_branches(base_dir, elements, bus_id_to_name) - _read_generators(base_dir, elements, bus_id_to_name) - - return model_data - -def _read_buses_and_areas(base_dir:str, elements:dict, system:dict) -> dict: - - elements["bus"] = {} - elements["load"] = {} - elements["shunt"] = {} - - # add the buses - bus_types = {'PQ': 'PQ', - 'PV': 'PV', - 'Ref': 'ref'} - bus_id_to_name = {} - bus_areas = set() - bus_df = pd.read_csv(os.path.join(base_dir,'bus.csv')) - has_shunt_cols = 'MW Shunt G' in bus_df and 'MVAR Shunt B' in bus_df - for idx,row in bus_df.iterrows(): - BUS_TYPE = row['Bus Type'] - if not BUS_TYPE in bus_types: - raise ValueError(f'Encountered an unsupported bus type: "{BUS_TYPE}" when parsing RTS-GMLC input file') - - bus_name = str(row['Bus Name']) - bus_dict = { - "id": str(row['Bus ID']), - "base_kv": float(row['BaseKV']), - "matpower_bustype": bus_types[BUS_TYPE], - "vm": float(row['V Mag']), - "va": float(row['V Angle']), - "v_min": 0.95, - "v_max": 1.05, - "area": str(row['Area']), - "zone": str(row['Zone']), - } - - if bus_dict["base_kv"] <= 0: - raise ValueError(f'BaseKV value for bus "{bus_name}" is <= 0. Not supported.') - - PD = float(row['MW Load']) - QD = float(row['MVAR Load']) - if PD != 0 or QD != 0: - load_dict = { - "bus": bus_name, - "in_service":True, - "p_load": PD, - "q_load": QD, - "area": bus_dict['area'], - "zone": bus_dict['zone'] - } - elements["load"][bus_name] = load_dict - - if has_shunt_cols: - GS = float(row['MW Shunt G']) - BS = float(row['MVAR Shunt B']) - if GS != 0 or BS != 0: - shunt_dict = { - "shunt_type":"fixed", - "bus": bus_name, - "gs": GS, - "bs": BS - } - elements["shunt"][bus_name] = shunt_dict - - if BUS_TYPE == 'Ref': - va = bus_dict['va'] - if va != 0: - if abs(va) >= 1e-16: - raise ValueError('EGRET only supports reference buses with an angle of 0 degrees.') - msg = "\nEgret only supports reference buses with an angle of 0 degrees. \nFound a " \ - "reference bus with an angle close to 0. \n" \ - "Value: {va}\nSetting reference bus angle to 0." - warnings.warn(msg) - bus_dict['va'] = 0.0 - system["reference_bus"] = bus_name - system["reference_bus_angle"] = 0 - - bus_id_to_name[bus_dict['id']] = bus_name - bus_areas.add(bus_dict['area']) - elements['bus'][bus_name] = bus_dict - - # add the areas - elements['area'] = {name:dict() for name in bus_areas} - - return bus_id_to_name - -def _read_branches(base_dir:str, elements:dict, bus_id_to_name:dict) -> None: - - # add the branches - elements["branch"] = {} - branch_df = pd.read_csv(os.path.join(base_dir,'branch.csv')) - for idx,row in branch_df.iterrows(): - - branch_dict = { - "from_bus": bus_id_to_name[str(row['From Bus'])], - "to_bus": bus_id_to_name[str(row['To Bus'])], - "in_service": True, - "resistance": float(row['R']), - "reactance": float(row['X']), - "charging_susceptance": float(row['B']), - "rating_long_term": float(row['Cont Rating']) or None, - "rating_short_term": float(row['LTE Rating']) or None, - "rating_emergency": float(row['STE Rating']) or None, - "angle_diff_min": -90, - "angle_diff_max": 90, - "pf": None, - "qf": None, - "pt": None, - "qt": None - } - - TAP = float(row['Tr Ratio']) - if TAP != 0.0: - branch_dict["branch_type"] = "transformer" - branch_dict["transformer_tap_ratio"] = TAP - branch_dict["transformer_phase_shift"] = 0.0 - else: - branch_dict["branch_type"] = "line" - - name = str(row['UID']) - elements["branch"][name] = branch_dict - branch_df = None - - # add the DC branches - if os.path.exists(os.path.join(base_dir,'dc_branch.csv')): - elements["dc_branch"] = {} - branch_df = pd.read_csv(os.path.join(base_dir,'dc_branch.csv')) - for idx,row in branch_df.iterrows(): - branch_dict = { - "from_bus": bus_id_to_name[str(row['From Bus'])], - "to_bus": bus_id_to_name[str(row['To Bus'])], - "rating_short_term": float(row['MW Load']), - "rating_long_term": float(row['MW Load']), - "rating_emergency": float(row['MW Load']) - } - name = str(row['UID']) - elements["dc_branch"][name] = branch_dict - branch_df = None - -def _read_generators(base_dir:str, elements:dict, bus_id_to_name:dict) -> None: - from math import isnan - - # add the generators - elements["generator"] = {} - RENEWABLE_TYPES = {'WIND', 'HYDRO', 'RTPV', 'PV', 'ROR'} - gen_df = pd.read_csv(os.path.join(base_dir,'gen.csv')) - for idx,row in gen_df.iterrows(): - # if this is storage we need to handle it differently - if row['Fuel'] == 'Storage': - continue - - # NOTE: for now, Egret doesn't handle CSP -- not clear how to model - if row['Unit Type'] == 'CSP': - continue - - name = str(row['GEN UID']) - bus_name = bus_id_to_name[str(row['Bus ID'])] - gen_dict = { - "bus": bus_name, - "in_service": True, - "mbase": 100.0, - "pg": float(row['MW Inj']), - "qg": float(row['MVAR Inj']), - "p_min": float(row['PMin MW']), - "p_max": float(row['PMax MW']), - "q_min": float(row['QMin MVAR']), - "q_max": float(row['QMax MVAR']), - "ramp_q": float(row['Ramp Rate MW/Min']), - "fuel": str(row['Fuel']), - "unit_type": str(row['Unit Type']), - "area": elements['bus'][bus_name]['area'], - "zone": elements['bus'][bus_name]['zone'] - } - - # Remove optional values if not present - for key in ('p_min', 'p_max', 'q_min', 'q_max', 'ramp_q'): - if isnan(gen_dict[key]): - del gen_dict[key] - - UNIT_TYPE = str(row['Unit Type']) - if UNIT_TYPE in RENEWABLE_TYPES: - gen_dict["generator_type"] = "renewable" - # ROR is treated as HYDRO by Egret - if UNIT_TYPE == 'ROR': - gen_dict["unit_type"] = "HYDRO" - elif UNIT_TYPE == 'SYNC_COND': - ## TODO: should we have a flag for these? - gen_dict["generator_type"] = "thermal" - else: - gen_dict["generator_type"] = "thermal" - - elements["generator"][name] = gen_dict - - # after this is only really needed for thermal units - if UNIT_TYPE in RENEWABLE_TYPES: - continue - - # Gen cost - ## round as in RTS-GMLC Prescient/topysp.py - pmax = float(row['PMax MW']) - # There can be any number of 'Output_pct_' columns. - # Stop at the first one that doesn't exist or doesn't hold a number - def valid_output_pcts(): - for i in range(50): - try: - val = float(row[f'Output_pct_{i}']) - if isnan(val): - return - yield (i, val) - except: - return - x = {i: round(val*pmax, 1) - for i,val in valid_output_pcts() - } - fuel_field_count = len(x) - - if fuel_field_count > 0: - ## /1000. from the RTS-GMLC MATPOWER writer -- - ## heat rates are in BTU/kWh, 10^6 BTU == 1 MMBTU, 10^3 kWh == 1 MWh, so MMBTU/MWh == 10^3/10^6 * BTU/kWh - f = {} - f[0] = (float(row['HR_avg_0'])*1000./ 1000000.)*x[0] - for i in range(1,fuel_field_count): - f[i] = (((x[i]-x[i-1])*(float(row[f'HR_incr_{i}'])*1000. / 1000000.))) + f[i-1] - - F_COEFF = [ (x[i], round(f[i],2)) for i in range(fuel_field_count) if (((i == 0) or (x[i-1],f[i-1]) != (x[i], f[i])) and (x[i], f[i]) != (0.,0.)) ] - if F_COEFF == []: - F_COEFF = [(pmax, 0.0)] - gen_dict["p_fuel"] = {"data_type": "fuel_curve", "values": F_COEFF } - - # UC Data - MIN_DN_TIME = float(row['Min Down Time Hr']) - - # Startup types and costs, from hot to cold - startup_heat = (float(row['Start Heat Hot MBTU']), - float(row['Start Heat Warm MBTU']), - float(row['Start Heat Cold MBTU'])) - startup_time = (float(row['Start Time Hot Hr']), - float(row['Start Time Warm Hr']), - float(row['Start Time Cold Hr'])) - - # Arrange fuel requirements from hottest to coldest, ignoring missing values. - startup_fuel = [] - for i in range(3): - # Skip blank values - if isnan(startup_time[i]) or isnan(startup_heat[i]): - continue - - t = max(startup_time[i], MIN_DN_TIME) - f = startup_heat[i] - - # For entries with matching times, use to the colder data - if len(startup_fuel) > 0 and startup_fuel[-1][0] == t: - startup_fuel[-1] = (t,f) - else: - startup_fuel.append((t,f)) - - # If the warmest fuel requirement has a time longer than the minimum - # down time, extend that warmest requirement down to minimum down time. - if len(startup_fuel) > 0 and startup_fuel[0][0] > MIN_DN_TIME: - startup_fuel[0] = (MIN_DN_TIME, startup_fuel[0][1]) - - gen_dict["startup_fuel"] = startup_fuel - fixed_startup_cost = float(row['Non Fuel Start Cost $']) - if not isnan(fixed_startup_cost): - gen_dict["non_fuel_startup_cost"] = fixed_startup_cost - gen_dict["shutdown_cost"] = 0.0 - - gen_dict["agc_capable"] = True - gen_dict["p_min_agc"] = gen_dict["p_min"] - gen_dict["p_max_agc"] = gen_dict["p_max"] - - ramp_q = gen_dict['ramp_q'] - gen_dict["ramp_agc"] = ramp_q - gen_dict["ramp_up_60min"] = 60.*ramp_q - gen_dict["ramp_down_60min"] = 60.*ramp_q - - gen_dict["fuel_cost"] = float(row['Fuel Price $/MMBTU']) - - # these assumptions are the same as prescient-rtsgmlc - gen_dict["startup_capacity"] = gen_dict['p_min'] - gen_dict["shutdown_capacity"] = gen_dict['p_min'] - gen_dict["min_up_time"] = float(row['Min Up Time Hr']) - gen_dict["min_down_time"] = MIN_DN_TIME - - elements["generator"][name] = gen_dict - gen_df = None - -def _get_scalar_reserve_data(base_dir:str, metadata_df:df, model_dict:dict) -> ScalarReserveData: - # Store scalar reserve values as stored in the input - # - # Scalar reserve values that apply to both simulation types are stored in the - # passed in model dict. Scalar values that vary depending on model type are stored - # in the returned ScalarReserveData. - - da_scalar_reserves, rt_scalar_reserves = _identify_allowed_scalar_reserve_types(metadata_df) - shared_reserves = da_scalar_reserves.intersection(rt_scalar_reserves) - - # Collect constant scalar reserves - da_scalars = [] - rt_scalars = [] - - if not os.path.exists(os.path.join(base_dir,'reserves.csv')): - logger.warning(f"Did not find reserves.csv; assuming no reserves") - return ScalarReserveData(da_scalars, rt_scalars) - - reserve_df = pd.read_csv(os.path.join(base_dir,'reserves.csv')) - system = model_dict['system'] - areas = model_dict['elements']['area'] - for idx,row in reserve_df.iterrows(): - res_name = row['Reserve Product'] - req = float(row['Requirement (MW)']) - - if res_name in reserve_name_map: - target_dict = system - area_name = None - else: - # reserve name must be _R. - # split into type and area - res_name, area_name = res_name.split("_R", 1) - if res_name not in reserve_name_map: - logger.warning(f"Skipping reserve for unrecognized reserve type '{res_name}'") - continue - if area_name not in areas: - logger.warning(f"Skipping reserve for unrecognized area '{area_name}'") - continue - target_dict = areas[area_name] - - if res_name in shared_reserves: - # If it applies to both types, save it in the skeleton - target_dict[reserve_name_map[res_name]] = req - elif res_name in da_scalar_reserves: - # If it applies to just day-ahead, save to DA cache - da_scalars.append(ScalarReserveValue(res_name, area_name, req)) - elif res_name in rt_scalar_reserves: - # If it applies to just real-time, save to RT cache - rt_scalars.append(ScalarReserveValue(res_name, area_name, req)) - - return ScalarReserveData(da_scalars, rt_scalars) - -def _identify_allowed_scalar_reserve_types(metadata_df:df) -> Tuple[Set[str], Set[str]]: - ''' Return a list of reserve types that apply to each type of model (DA and RT). - - Arguments - --------- - metadata_df:df - The contents of simulation_objects.csv in a DataFrame - - Returns - ------- - Returns a tuple with two lists of strings, one list for day-ahead models, - and another list for real-time models. Each list holds the names of reserve - categories whose scalar reserve values (if specified in reserves.csv) should - be applied to that type of model. - - (day_ahead_reserves, rt_reserves) - day_ahead_reserves: Sequence[str] - The names of reserve categories whose scalar values apply to day-ahead models - rt_reserves: Sequence[str] - The names of reserve categories whose scalar values apply to real-time models - ''' - if not 'Reserve_Products' in metadata_df.index: - # By default, accept all reserve types in both types of model - all_reserves = set(reserve_name_map.keys()) - return (all_reserves, all_reserves) - - row = metadata_df.loc['Reserve_Products'] - def parse_reserves(which:str) -> Sequence[str]: - all = row[which] - if type(all) is not str: - return {} - # strip off parentheses, if present - all = all.strip('()') - return set(s.strip() for s in all.split(',')) - - return (parse_reserves('DAY_AHEAD'), parse_reserves('REAL_TIME')) - - -def _compute_bus_load_participation_factors(model_data): - ''' - compute aggregate load per area, and then compute - load participation factors from each bus from that data. - - Returns - ======= - participation_factors:dict[str,float] - Maps bus name to the fraction of its area load that it carries (0 to 1) - ''' - elements = model_data['elements'] - - # Sum the loads for each area - area_total_load = {area:0 for area in elements['area']} - for name, load in elements["load"].items(): - area = elements["bus"][load["bus"]]["area"] - area_total_load[area] += load["p_load"] - - bus_load_participation_factors = {} - for name, load in elements["load"].items(): - area = elements["bus"][load["bus"]]["area"] - bus_load_participation_factors[name] = load["p_load"] / area_total_load[area] - - return bus_load_participation_factors - -def _read_timeseries_data(model_data:dict, rts_gmlc_dir:str, - start_time:datetime, end_time:datetime, - minutes_per_period:Dict[str,int]): - """ - Parse all relevant timeseries files - - Returns - ======= - all_timeseries: DataFrame - A DataFrame with the following columns: - [Simulation, Category, Object, Parameter, Series] - - The Series column holds the data as a pandas series, indexed by the datetime - of the value. - - """ - - # All timeseries data that has already been read (map[filename] -> DataFrame) - timeseries_file_map = {} - - timeseries_pointer_df = pd.read_csv(os.path.join(rts_gmlc_dir, "timeseries_pointers.csv"), - header=0, dtype={'Object':str}) - - elements = model_data['elements'] - params_of_interest = { - 'Generator': { 'PMin MW', 'PMax MW'}, - 'Reserve': {'Requirement'}, - 'Area': {'MW Load'} - } - - # Create an array where we can gather parsed timeseries data - series_data = [None]*timeseries_pointer_df.shape[0] - - # Store the timeseries data in the timeseries DF - for idx,row in timeseries_pointer_df.iterrows(): - # Skip rows we don't ingest - if not row['Category'] in params_of_interest: - continue - if not row['Parameter'] in params_of_interest[row['Category']]: - continue - - # Skip generators not in skeleton - if row['Category'] == 'Generator' and not row['Object'] in elements['generator']: - continue - # Skip areas not in skeleton - if row['Category'] == 'Area' and not row['Object'] in elements['area']: - continue - - is_reserve = (row['Category'] == 'Reserve') - if is_reserve: - # Skip unrecognized reserve names - name = row['Object'] - if not is_valid_reserve_name(name, model_data): - continue - - # Read the timeseries file if we haven't already, using the - # canonical file path as a key into previously read filenames. - fname = os.path.abspath(os.path.join(rts_gmlc_dir, row['Data File'])) - if not fname in timeseries_file_map: - sim = row['Simulation'] - data = _read_timeseries_file(fname, minutes_per_period[sim], - start_time, end_time, row['Object']) - timeseries_file_map[fname] = data - - # Save a reference to the relevant data as a Series - series_data[idx]= timeseries_file_map[fname][row['Object']] - - # Add the 'Series' column - timeseries_pointer_df = timeseries_pointer_df.assign(Series=series_data) - - # Remove columns that we don't want to preserve - keepers= {'Simulation', 'Category', 'Object', 'Parameter', 'Series'} - for c in timeseries_pointer_df.columns: - if not c in keepers: - timeseries_pointer_df.pop(c) - - # Remove irrelevant rows - timeseries_pointer_df.dropna(subset=['Series'], inplace=True) - - # Sort by simulation - timeseries_pointer_df.sort_values(by='Simulation', inplace=True) - - return timeseries_pointer_df - -def _convert_to_datetime(when:Union[datetime,str]): - ''' - Convert an object to a datetime, if it is not already one. - - Parameters - ---------- - when: datetime or str - The date and time to be returned or parsed - - Returns - ------- - datetime - The passed in object as a datetime, parsing it if necessary - - If `when` is a datetime it is simply returned. - If `when` is a string it is parsed, inferring the format - If `when` is any other type, a TypeError is raised - - ''' - if isinstance(when, datetime): - return when - elif isinstance(when, str): - return dateutil.parser.parse(when) - else: - raise TypeError(f'Invalid argument, expected a datetime or str, got a {type(when)}') - -def _parse_datetimes_if_strings(begin_time:Union[datetime,str], end_time:Union[datetime,str]): - ''' - Ensure both dates are datetimes, parsing date strings if necessary. - - Returns - ------- - begin_time:datetime - The begin_time as a datetime, parsing it if necessary - end_time:datetime - The end_time as a datetime, parsing it if necessary - ''' - - begin_time = _convert_to_datetime(begin_time) - end_time = _convert_to_datetime(end_time) - - return begin_time, end_time - -def set_t0_data(md:dict, base_dir:str="", t0_state:Optional[dict]=None): - """ Put t0 information into the passed in model dict - - Only t0 data for thermal generators is populated. - - Data comes from: - * t0_state, if provided - * otherwise, a file called initial_status.csv, if present - * otherwise, t0 data is left blank - - If t0_state is provided, it should be organized as t0_state[name][value], - where `name` is the name of a generator, and `value` is 'initial_status', - 'initial_p_output', and 'initial_q_output'. For any generator included in - t0_state, all three values must be present. - - If initial_status.csv is used, it must have a header row and may have - from 1 to 3 data rows. Row 1 is 'initial_status'. Row 2 is - 'initial_p_output'. Row 3 is 'initial_q_output'. Column headers are - the generator names. Default values are used for any missing rows. - - Any generators not mentioned in the data source are left untouched. - """ - if t0_state is not None: - for name, gen in md['elements']['generator'].items(): - if gen['generator_type']=='thermal' and name in t0_state: - gen['initial_status'] = t0_state[name]['initial_status'] - gen['initial_p_output'] = t0_state[name]['initial_p_output'] - gen['initial_q_output'] = t0_state[name]['initial_q_output'] - return - - state_fname = os.path.join(base_dir, 'initial_status.csv') - if os.path.exists(state_fname): - import csv - with open(state_fname, 'r') as f: - reader = csv.DictReader(f) - rows = list(reader) - - # We now have a list of rows, from 1 to 3 rows long. - # Row 1 is 'initial_status', row 2 is 'initial_p_output', and row 3 is 'initial_q_output'. - # Any missing row uses defaults - row_count = len(rows) - for name, gen in md['elements']['generator'].items(): - if gen['generator_type'] != 'thermal': - continue - if name not in reader.fieldnames: - continue - gen['initial_status'] = float(rows[0][name]) - if gen['initial_status'] < 0: - gen['initial_p_output'] = 0.0 - gen['initial_q_output'] = 0.0 - else: - if row_count >= 2: - gen['initial_p_output'] = float(rows[1][name]) - else: - gen["initial_p_output"] = gen["p_min"] - if row_count >= 3: - gen['initial_q_output'] = float(rows[2][name]) - else: - gen["initial_q_output"] = max(0., gen["q_min"]) - else: - logger.warning("Setting default t0 state in RTS-GMLC parser") - for name, gen in md['elements']['generator'].items(): - if gen['generator_type']=='thermal': - gen['initial_status'] = gen['min_up_time']+1 - gen['initial_p_output'] = gen['p_min'] - gen['initial_q_output'] = 0. +# ___________________________________________________________________________ +# +# EGRET: Electrical Grid Research and Engineering Tools +# Copyright 2019 National Technology & Engineering Solutions of Sandia, LLC +# (NTESS). Under the terms of Contract DE-NA0003525 with NTESS, the U.S. +# Government retains certain rights in this software. +# This software is distributed under the Revised BSD License. +# ___________________________________________________________________________ + +from __future__ import annotations + +from typing import TYPE_CHECKING +if TYPE_CHECKING: + from typing import Dict, Union, Optional, Tuple, Set + +import sys +import os.path +import pandas as pd +from datetime import datetime, timedelta +import dateutil.parser +from collections import namedtuple + +from egret.common.log import logger +import egret.data.model_data as md + +from .parsed_cache import ParsedCache +from ._reserves import is_valid_reserve_name, reserve_name_map, ScalarReserveData, ScalarReserveValue + +def create_ModelData(rts_gmlc_dir:str, + begin_time:Union[datetime,str], end_time:Union[datetime,str], + simulation:str="DAY_AHEAD", t0_state:Optional[dict] = None) -> md.ModelData: + + """ + Create a ModelData object from the RTS-GMLC data. + + Parameters + ---------- + rts_gmlc_dir : str + Path to directory holding csv files in RTS-GMLC format (bus.csv, gen.csv, etc). + begin_time : datetime.datetime or str + Beginning of time horizon. If str, date/time in "YYYY-MM-DD HH:MM:SS" or "YYYY-MM-DD" format, + the later of which assumes a midnight start. + end_time : datetime.datetime or str + End of time horizon. If str, date/time in "YYYY-MM-DD HH:MM:SS" or "YYYY-MM-DD" format, + the later of which assumes a midnight start. + simulation : str + Either "DAY_AHEAD" or "REAL_TIME", which specifies which time series the data is taken from, + default is "DAY_AHEAD". + t0_state : dict or None + Keys of this dict are thermal generator names, each element of which is another dictionary with + keys "initial_status", "initial_p_output", and "initial_q_output", which specify whether the + generator is on at t0, the real power output at t0, and the reactive power output at t0. + If t0_state is None, values are read from initial_status.csv in the rts_gmlc_dir. + If that file does not exist, no initial state data is set in the model. + + Returns + ------- + egret.model_data.ModelData + Returns a ModelData object with the timeseries data specified + """ + return md.ModelData(create_model_data_dict(rts_gmlc_dir, begin_time, end_time, simulation, t0_state)) + +def create_model_data_dict(rts_gmlc_dir:str, + begin_time:Union[datetime,str], end_time:Union[datetime,str], + simulation:str="DAY_AHEAD", t0_state:Optional[dict]=None) -> dict: + + """ + Create a model_data dictionary from the RTS-GMLC data. + + Parameters + ---------- + rts_gmlc_dir : str + Path to directory holding csv files in RTS-GMLC format (bus.csv, gen.csv, etc). + begin_time : datetime.datetime or str + Beginning of time horizon. If str, date/time in "YYYY-MM-DD HH:MM:SS" or "YYYY-MM-DD" format, + the later of which assumes a midnight start. + end_time : datetime.datetime or str + End of time horizon. If str, date/time in "YYYY-MM-DD HH:MM:SS" or "YYYY-MM-DD" format, + the later of which assumes a midnight start. + simulation : str + Either "DAY_AHEAD" or "REAL_TIME", which specifies which time series the data is taken from, + default is "DAY_AHEAD". + t0_state : dict or None + Keys of this dict are thermal generator names, each element of which is another dictionary with + keys "initial_status", "initial_p_output", and "initial_q_output", which specify whether the + generator is on at t0, the real power output at t0, and the reactive power output at t0. + If this is None, no initial state data is included in the dict. + + Returns + ------- + dict : A dictionary in the format required for the ModelData object. + """ + + # Convert date string to datetimes, if necessary + begin_time, end_time = _parse_datetimes_if_strings(begin_time, end_time) + + cache = parse_to_cache(rts_gmlc_dir, begin_time, end_time, t0_state) + model = cache.generate_model(simulation, begin_time, end_time) + return model.data + + +def parse_to_cache(rts_gmlc_dir:str, + begin_time:Union[datetime,str], + end_time:Union[datetime,str], + t0_state:Optional[dict]=None, + honor_lookahead=True) -> ParsedCache: + ''' Parse data in RTS-GMLC format, keeping the portions between a start and end time + + rts_gmlc_dir : str + Path to directory holding csv files in RTS-GMLC format (bus.csv, gen.csv, etc). + begin_time : datetime.datetime or str + Beginning of time horizon. + end_time : datetime.datetime or str + End of time horizon. + simulation : str + Either "DAY_AHEAD" or "REAL_TIME", which specifies which time series the data is taken from, + default is "DAY_AHEAD". + t0_state : dict or None + Keys of this dict are thermal generator names, each element of which is another dictionary with + keys "initial_status", "initial_p_output", and "initial_q_output", which specify whether the + generator is on at t0, the real power output at t0, and the reactive power output at t0. + If this is None, initial state data is not included in the cache. + ''' + if not os.path.exists(rts_gmlc_dir): + raise ValueError(f'RTS-GMLC directory "{rts_gmlc_dir}" does not exist') + + # Create the skeleton + model_data = _create_rtsgmlc_skeleton(rts_gmlc_dir) + + # Save the data frequencies + metadata_df = _read_metadata(rts_gmlc_dir, honor_lookahead) + minutes_per_period = {'DAY_AHEAD':int(metadata_df.loc['Period_Resolution', 'DAY_AHEAD'])//60, + 'REAL_TIME':int(metadata_df.loc['Period_Resolution', 'REAL_TIME'])//60} + + constant_reserve_data = _get_scalar_reserve_data(rts_gmlc_dir, metadata_df, model_data) + + begin_time, end_time = _parse_datetimes_if_strings(begin_time, end_time) + # TODO: Validate begin_time and end_time. + # Do we want to enforce that they fall within the data date range? + + timeseries_df = _read_timeseries_data(model_data, rts_gmlc_dir, + begin_time, end_time, minutes_per_period) + + load_participation_factors = _compute_bus_load_participation_factors(model_data) + + set_t0_data(model_data, rts_gmlc_dir, t0_state) + + return ParsedCache(model_data, begin_time, end_time, + minutes_per_period['DAY_AHEAD'], minutes_per_period['REAL_TIME'], + timeseries_df, load_participation_factors, constant_reserve_data) + + +def _read_metadata(base_dir:str, honor_lookahead:bool) -> pd.DataFrame: + metadata_path = os.path.join(base_dir, "simulation_objects.csv") + if not os.path.exists(metadata_path): + raise ValueError(f'RTS-GMLC directory "{rts_gmlc_dir}" does not contain expected CSV files.') + + # Read metadata about the data + metadata_df = pd.read_csv(metadata_path, index_col=0) + + if not honor_lookahead: + metadata_df.loc['Look_Ahead_Periods_per_Step']['DAY_AHEAD'] = 0 + metadata_df.loc['Look_Ahead_Periods_per_Step']['REAL_TIME'] = 0 + + return metadata_df + +def _read_timeseries_file(file_name:str, minutes_per_period:int, + start_time:datetime, end_time:datetime, + series_name:str) -> pd.DataFrame: + """ + Read data from a timeseries file, returning only the data that falls within the requested time range + + Parameters + ---------- + file_name:str + Path to a CVS file with timeseries data + minutes_per_period:int + The number of minutes between time periods in the data + start_time:datetime + The earliest time to include in the returned data + end_time:datetime + The first time to NOT include in the returned data + series_name:str + The name the column holding the resulting data + + Returns + ------- + df : A DataFrame with data between the specified dates + + Timeseries files can be in one of two formats, columnar or 2D. A columnar file can hold more + than one timeseries in the same file, with one row per time period and one column per series. + A 2D timeseries file holds only one timeseries, with one row per day and one column per time + period within the day. This function reads from either format. + + If the indicated file is columnar, the returned DataFrame will include all columns in the file, + one column per time series, with columns named as they appear in the file. + + If the indicated file is 2D, the returned DataFrame will have a single column whose name + is assigned to be the `series_name` passed into the function. + """ + # Determine which layout we're working with by checking column headers + headers = pd.read_csv(file_name, nrows=0).columns.tolist() + if headers[3] == 'Period': + return _read_columnar_timeseries_file(file_name, minutes_per_period, start_time, end_time) + else: + df = _read_2D_timeseries_file(file_name, minutes_per_period, start_time, end_time, series_name) + return df + +def _read_columnar_timeseries_file(file_name:str, minutes_per_period:int, + start_time:datetime, end_time:datetime) -> pd.DataFrame: + """ + Read data from a timeseries file, returning only the data that falls within the requested time range + + Parameters + ---------- + file_name:str + Path to a CVS file with timeseries data + minutes_per_period:int + The number of minutes between time periods in the data + start_time:datetime + The earliest time to include in the returned data + end_time:datetime + The first time to NOT include in the returned data + + Returns + ------- + df : A DataFrame with data between the specified dates + + The returned DataFrame converts the first 4 columns into a datetime which is used as + the DataFrame's index. All other CSV columns are included as columns in the DataFrame. + """ + df = pd.read_csv(file_name, + header=0, + parse_dates=[['Year', 'Month', 'Day']]) + df.index = df['Year_Month_Day'] + pd.to_timedelta((df['Period']-1)*minutes_per_period, 'm') + df.index.names = ['DateTime'] + df.drop(['Year_Month_Day', 'Period'], axis=1, inplace=True) + + df.sort_index(inplace=True) + + # Remove data outside requested time period. + # DataFrame slices include the end of the slice, so we need to reduce it slightly to + # avoid including an extra value at the end. + end_time = end_time - timedelta(seconds=1) + df = df[start_time:end_time] + + # Be sure to return a copy instead of a view into the larger data set + return df.copy() + +def _read_2D_timeseries_file(file_name:str, minutes_per_period:int, + start_time:datetime, end_time:datetime, + column_name:str) -> DataFrame: + """ + Read data from a timeseries file with a 2D layout, returning only the data that falls within the requested time range + + Parameters + ---------- + file_name:str + Path to a CVS file with reserve-formatted timeseries data + start_time:datetime + The earliest time to include in the returned data + end_time:datetime + The first time to NOT include in the returned data + minutes_per_period:int + The number of minutes between time periods in the data + column_name:str + The name that should be given to the DataFrame's column + + Returns + ------- + df : A single-column pandas DataFrame with data between the specified dates + + The returned DataFrame has one row per cell in the original CSV, indexed by the cell's + datetime. The first 3 columns indicate the date for all cells in the row, and other + column headers indicate the time within the day. Only data within the requested time + period is included in the results. Like a typical python range, the returned data includes + the start_time but does not include the end_time. + """ + df = pd.read_csv(file_name, + header=0, + parse_dates=[['Year', 'Month', 'Day']], + index_col=0) + df.sort_index(inplace=True) + + # Remove data outside requested time period. + # DataFrame slices include the end of the slice, so we need to reduce it slightly to + # avoid including an extra value at the end. + end_time = end_time - timedelta(seconds=1) + df = df[start_time:end_time] + + # Now divide rows into one row per column, resetting index to appropriate datetime + s = df.stack() + s.index = map(lambda i: i[0] + timedelta(minutes=minutes_per_period*(int(i[1])-1)), s.index) + s.index.names = ['DateTime'] + + # Filter one more time, trimming out times of day that fall before or after the requested range + if s.index[0] < start_time or s.index[-1] >= end_time: + s = s[start_time:end_time] + + # Create and return a new 1-column DataFrame + return pd.DataFrame({column_name: s}) + +def _create_rtsgmlc_skeleton(rts_gmlc_dir:str) -> dict: + """ + Creates a data dictionary from the RTS-GMLC data files, without loading hourly data + + Parameters + ---------- + rts_gmlc_dir : str + Path to directory holding csv files in RTS-GMLC format (bus.csv, gen.csv, etc). + + Returns + ------- + data : dict + Returns a dict loaded from the RTS-GMLC data + """ + + base_dir = rts_gmlc_dir + + model_data = md.ModelData.empty_model_data_dict() + + elements = model_data["elements"] + system = model_data["system"] + + system["name"] = "RTS-GMLC" + + # this is the default used in the MATPOWER writer for RTS-GMLC + system["baseMVA"] = 100. + + bus_id_to_name = _read_buses_and_areas(base_dir, elements, system) + _read_branches(base_dir, elements, bus_id_to_name) + _read_generators(base_dir, elements, bus_id_to_name) + + return model_data + +def _read_buses_and_areas(base_dir:str, elements:dict, system:dict) -> dict: + + elements["bus"] = {} + elements["load"] = {} + elements["shunt"] = {} + + # add the buses + bus_types = {'PQ': 'PQ', + 'PV': 'PV', + 'Ref': 'ref'} + bus_id_to_name = {} + bus_areas = set() + bus_df = pd.read_csv(os.path.join(base_dir,'bus.csv')) + has_shunt_cols = 'MW Shunt G' in bus_df and 'MVAR Shunt B' in bus_df + for idx,row in bus_df.iterrows(): + BUS_TYPE = row['Bus Type'] + if not BUS_TYPE in bus_types: + raise ValueError(f'Encountered an unsupported bus type: "{BUS_TYPE}" when parsing RTS-GMLC input file') + + bus_name = str(row['Bus Name']) + bus_dict = { + "id": str(row['Bus ID']), + "base_kv": float(row['BaseKV']), + "matpower_bustype": bus_types[BUS_TYPE], + "vm": float(row['V Mag']), + "va": float(row['V Angle']), + "v_min": 0.95, + "v_max": 1.05, + "area": str(row['Area']), + "zone": str(row['Zone']), + } + + if bus_dict["base_kv"] <= 0: + raise ValueError(f'BaseKV value for bus "{bus_name}" is <= 0. Not supported.') + + PD = float(row['MW Load']) + QD = float(row['MVAR Load']) + if PD != 0 or QD != 0: + load_dict = { + "bus": bus_name, + "in_service":True, + "p_load": PD, + "q_load": QD, + "area": bus_dict['area'], + "zone": bus_dict['zone'] + } + elements["load"][bus_name] = load_dict + + if has_shunt_cols: + GS = float(row['MW Shunt G']) + BS = float(row['MVAR Shunt B']) + if GS != 0 or BS != 0: + shunt_dict = { + "shunt_type":"fixed", + "bus": bus_name, + "gs": GS, + "bs": BS + } + elements["shunt"][bus_name] = shunt_dict + + if BUS_TYPE == 'Ref': + va = bus_dict['va'] + if va != 0: + if abs(va) >= 1e-16: + raise ValueError('EGRET only supports reference buses with an angle of 0 degrees.') + msg = "\nEgret only supports reference buses with an angle of 0 degrees. \nFound a " \ + "reference bus with an angle close to 0. \n" \ + "Value: {va}\nSetting reference bus angle to 0." + warnings.warn(msg) + bus_dict['va'] = 0.0 + system["reference_bus"] = bus_name + system["reference_bus_angle"] = 0 + + bus_id_to_name[bus_dict['id']] = bus_name + bus_areas.add(bus_dict['area']) + elements['bus'][bus_name] = bus_dict + + # add the areas + elements['area'] = {name:dict() for name in bus_areas} + + return bus_id_to_name + +def _read_branches(base_dir:str, elements:dict, bus_id_to_name:dict) -> None: + + # add the branches + elements["branch"] = {} + branch_df = pd.read_csv(os.path.join(base_dir,'branch.csv')) + for idx,row in branch_df.iterrows(): + + branch_dict = { + "from_bus": bus_id_to_name[str(row['From Bus'])], + "to_bus": bus_id_to_name[str(row['To Bus'])], + "in_service": True, + "resistance": float(row['R']), + "reactance": float(row['X']), + "charging_susceptance": float(row['B']), + "rating_long_term": float(row['Cont Rating']) or None, + "rating_short_term": float(row['LTE Rating']) or None, + "rating_emergency": float(row['STE Rating']) or None, + "angle_diff_min": -90, + "angle_diff_max": 90, + "pf": None, + "qf": None, + "pt": None, + "qt": None + } + + TAP = float(row['Tr Ratio']) + if TAP != 0.0: + branch_dict["branch_type"] = "transformer" + branch_dict["transformer_tap_ratio"] = TAP + branch_dict["transformer_phase_shift"] = 0.0 + else: + branch_dict["branch_type"] = "line" + + name = str(row['UID']) + elements["branch"][name] = branch_dict + branch_df = None + + # add the DC branches + if os.path.exists(os.path.join(base_dir,'dc_branch.csv')): + elements["dc_branch"] = {} + branch_df = pd.read_csv(os.path.join(base_dir,'dc_branch.csv')) + for idx,row in branch_df.iterrows(): + branch_dict = { + "from_bus": bus_id_to_name[str(row['From Bus'])], + "to_bus": bus_id_to_name[str(row['To Bus'])], + "rating_short_term": float(row['MW Load']), + "rating_long_term": float(row['MW Load']), + "rating_emergency": float(row['MW Load']) + } + name = str(row['UID']) + elements["dc_branch"][name] = branch_dict + branch_df = None + +def _read_generators(base_dir:str, elements:dict, bus_id_to_name:dict) -> None: + from math import isnan + + # add the generators + elements["generator"] = {} + RENEWABLE_TYPES = {'WIND', 'HYDRO', 'RTPV', 'PV', 'ROR'} + gen_df = pd.read_csv(os.path.join(base_dir,'gen.csv')) + for idx,row in gen_df.iterrows(): + # if this is storage we need to handle it differently + if row['Fuel'] == 'Storage': + continue + + # NOTE: for now, Egret doesn't handle CSP -- not clear how to model + if row['Unit Type'] == 'CSP': + continue + + name = str(row['GEN UID']) + bus_name = bus_id_to_name[str(row['Bus ID'])] + gen_dict = { + "bus": bus_name, + "in_service": True, + "mbase": 100.0, + "pg": float(row['MW Inj']), + "qg": float(row['MVAR Inj']), + "p_min": float(row['PMin MW']), + "p_max": float(row['PMax MW']), + "q_min": float(row['QMin MVAR']), + "q_max": float(row['QMax MVAR']), + "ramp_q": float(row['Ramp Rate MW/Min']), + "fuel": str(row['Fuel']), + "unit_type": str(row['Unit Type']), + "area": elements['bus'][bus_name]['area'], + "zone": elements['bus'][bus_name]['zone'] + } + + # Remove optional values if not present + for key in ('p_min', 'p_max', 'q_min', 'q_max', 'ramp_q'): + if isnan(gen_dict[key]): + del gen_dict[key] + + UNIT_TYPE = str(row['Unit Type']) + if UNIT_TYPE in RENEWABLE_TYPES: + gen_dict["generator_type"] = "renewable" + # ROR is treated as HYDRO by Egret + if UNIT_TYPE == 'ROR': + gen_dict["unit_type"] = "HYDRO" + elif UNIT_TYPE == 'SYNC_COND': + ## TODO: should we have a flag for these? + gen_dict["generator_type"] = "thermal" + else: + gen_dict["generator_type"] = "thermal" + + elements["generator"][name] = gen_dict + + # after this is only really needed for thermal units + if UNIT_TYPE in RENEWABLE_TYPES: + continue + + # Gen cost + ## round as in RTS-GMLC Prescient/topysp.py + pmax = float(row['PMax MW']) + # There can be any number of 'Output_pct_' columns. + # Stop at the first one that doesn't exist or doesn't hold a number + def valid_output_pcts(): + for i in range(50): + try: + val = float(row[f'Output_pct_{i}']) + if isnan(val): + return + yield (i, val) + except: + return + x = {i: round(val*pmax, 1) + for i,val in valid_output_pcts() + } + fuel_field_count = len(x) + + if fuel_field_count > 0: + ## /1000. from the RTS-GMLC MATPOWER writer -- + ## heat rates are in BTU/kWh, 10^6 BTU == 1 MMBTU, 10^3 kWh == 1 MWh, so MMBTU/MWh == 10^3/10^6 * BTU/kWh + f = {} + f[0] = (float(row['HR_avg_0'])*1000./ 1000000.)*x[0] + for i in range(1,fuel_field_count): + f[i] = (((x[i]-x[i-1])*(float(row[f'HR_incr_{i}'])*1000. / 1000000.))) + f[i-1] + + F_COEFF = [ (x[i], round(f[i],2)) for i in range(fuel_field_count) if (((i == 0) or (x[i-1],f[i-1]) != (x[i], f[i])) and (x[i], f[i]) != (0.,0.)) ] + if F_COEFF == []: + F_COEFF = [(pmax, 0.0)] + gen_dict["p_fuel"] = {"data_type": "fuel_curve", "values": F_COEFF } + + # UC Data + MIN_DN_TIME = float(row['Min Down Time Hr']) + + # Startup types and costs, from hot to cold + startup_heat = (float(row['Start Heat Hot MBTU']), + float(row['Start Heat Warm MBTU']), + float(row['Start Heat Cold MBTU'])) + startup_time = (float(row['Start Time Hot Hr']), + float(row['Start Time Warm Hr']), + float(row['Start Time Cold Hr'])) + + # Arrange fuel requirements from hottest to coldest, ignoring missing values. + startup_fuel = [] + for i in range(3): + # Skip blank values + if isnan(startup_time[i]) or isnan(startup_heat[i]): + continue + + t = max(startup_time[i], MIN_DN_TIME) + f = startup_heat[i] + + # For entries with matching times, use to the colder data + if len(startup_fuel) > 0 and startup_fuel[-1][0] == t: + startup_fuel[-1] = (t,f) + else: + startup_fuel.append((t,f)) + + # If the warmest fuel requirement has a time longer than the minimum + # down time, extend that warmest requirement down to minimum down time. + if len(startup_fuel) > 0 and startup_fuel[0][0] > MIN_DN_TIME: + startup_fuel[0] = (MIN_DN_TIME, startup_fuel[0][1]) + + gen_dict["startup_fuel"] = startup_fuel + fixed_startup_cost = float(row['Non Fuel Start Cost $']) + if not isnan(fixed_startup_cost): + gen_dict["non_fuel_startup_cost"] = fixed_startup_cost + gen_dict["shutdown_cost"] = 0.0 + + gen_dict["agc_capable"] = True + gen_dict["p_min_agc"] = gen_dict["p_min"] + gen_dict["p_max_agc"] = gen_dict["p_max"] + + ramp_q = gen_dict['ramp_q'] + gen_dict["ramp_agc"] = ramp_q + gen_dict["ramp_up_60min"] = 60.*ramp_q + gen_dict["ramp_down_60min"] = 60.*ramp_q + + gen_dict["fuel_cost"] = float(row['Fuel Price $/MMBTU']) + + # these assumptions are the same as prescient-rtsgmlc + gen_dict["startup_capacity"] = gen_dict['p_min'] + gen_dict["shutdown_capacity"] = gen_dict['p_min'] + gen_dict["min_up_time"] = float(row['Min Up Time Hr']) + gen_dict["min_down_time"] = MIN_DN_TIME + + elements["generator"][name] = gen_dict + gen_df = None + +def _get_scalar_reserve_data(base_dir:str, metadata_df:df, model_dict:dict) -> ScalarReserveData: + # Store scalar reserve values as stored in the input + # + # Scalar reserve values that apply to both simulation types are stored in the + # passed in model dict. Scalar values that vary depending on model type are stored + # in the returned ScalarReserveData. + + da_scalar_reserves, rt_scalar_reserves = _identify_allowed_scalar_reserve_types(metadata_df) + shared_reserves = da_scalar_reserves.intersection(rt_scalar_reserves) + + # Collect constant scalar reserves + da_scalars = [] + rt_scalars = [] + + if not os.path.exists(os.path.join(base_dir,'reserves.csv')): + logger.warning(f"Did not find reserves.csv; assuming no reserves") + return ScalarReserveData(da_scalars, rt_scalars) + + reserve_df = pd.read_csv(os.path.join(base_dir,'reserves.csv')) + system = model_dict['system'] + areas = model_dict['elements']['area'] + for idx,row in reserve_df.iterrows(): + res_name = row['Reserve Product'] + req = float(row['Requirement (MW)']) + + if res_name in reserve_name_map: + target_dict = system + area_name = None + else: + # reserve name must be _R. + # split into type and area + res_name, area_name = res_name.split("_R", 1) + if res_name not in reserve_name_map: + logger.warning(f"Skipping reserve for unrecognized reserve type '{res_name}'") + continue + if area_name not in areas: + logger.warning(f"Skipping reserve for unrecognized area '{area_name}'") + continue + target_dict = areas[area_name] + + if res_name in shared_reserves: + # If it applies to both types, save it in the skeleton + target_dict[reserve_name_map[res_name]] = req + elif res_name in da_scalar_reserves: + # If it applies to just day-ahead, save to DA cache + da_scalars.append(ScalarReserveValue(res_name, area_name, req)) + elif res_name in rt_scalar_reserves: + # If it applies to just real-time, save to RT cache + rt_scalars.append(ScalarReserveValue(res_name, area_name, req)) + + return ScalarReserveData(da_scalars, rt_scalars) + +def _identify_allowed_scalar_reserve_types(metadata_df:df) -> Tuple[Set[str], Set[str]]: + ''' Return a list of reserve types that apply to each type of model (DA and RT). + + Arguments + --------- + metadata_df:df + The contents of simulation_objects.csv in a DataFrame + + Returns + ------- + Returns a tuple with two lists of strings, one list for day-ahead models, + and another list for real-time models. Each list holds the names of reserve + categories whose scalar reserve values (if specified in reserves.csv) should + be applied to that type of model. + + (day_ahead_reserves, rt_reserves) + day_ahead_reserves: Sequence[str] + The names of reserve categories whose scalar values apply to day-ahead models + rt_reserves: Sequence[str] + The names of reserve categories whose scalar values apply to real-time models + ''' + if not 'Reserve_Products' in metadata_df.index: + # By default, accept all reserve types in both types of model + all_reserves = set(reserve_name_map.keys()) + return (all_reserves, all_reserves) + + row = metadata_df.loc['Reserve_Products'] + def parse_reserves(which:str) -> Sequence[str]: + all = row[which] + if type(all) is not str: + return {} + # strip off parentheses, if present + all = all.strip('()') + return set(s.strip() for s in all.split(',')) + + return (parse_reserves('DAY_AHEAD'), parse_reserves('REAL_TIME')) + + +def _compute_bus_load_participation_factors(model_data): + ''' + compute aggregate load per area, and then compute + load participation factors from each bus from that data. + + Returns + ======= + participation_factors:dict[str,float] + Maps bus name to the fraction of its area load that it carries (0 to 1) + ''' + elements = model_data['elements'] + + # Sum the loads for each area + area_total_load = {area:0 for area in elements['area']} + for name, load in elements["load"].items(): + area = elements["bus"][load["bus"]]["area"] + area_total_load[area] += load["p_load"] + + bus_load_participation_factors = {} + for name, load in elements["load"].items(): + area = elements["bus"][load["bus"]]["area"] + bus_load_participation_factors[name] = load["p_load"] / area_total_load[area] + + return bus_load_participation_factors + +def _read_timeseries_data(model_data:dict, rts_gmlc_dir:str, + start_time:datetime, end_time:datetime, + minutes_per_period:Dict[str,int]): + """ + Parse all relevant timeseries files + + Returns + ======= + all_timeseries: DataFrame + A DataFrame with the following columns: + [Simulation, Category, Object, Parameter, Series] + + The Series column holds the data as a pandas series, indexed by the datetime + of the value. + + """ + + # All timeseries data that has already been read (map[filename] -> DataFrame) + timeseries_file_map = {} + + timeseries_pointer_df = pd.read_csv(os.path.join(rts_gmlc_dir, "timeseries_pointers.csv"), + header=0, dtype={'Object':str}) + + elements = model_data['elements'] + params_of_interest = { + 'Generator': { 'PMin MW', 'PMax MW'}, + 'Reserve': {'Requirement'}, + 'Area': {'MW Load'} + } + + # Create an array where we can gather parsed timeseries data + series_data = [None]*timeseries_pointer_df.shape[0] + + # Store the timeseries data in the timeseries DF + for idx,row in timeseries_pointer_df.iterrows(): + # Skip rows we don't ingest + if not row['Category'] in params_of_interest: + continue + if not row['Parameter'] in params_of_interest[row['Category']]: + continue + + # Skip generators not in skeleton + if row['Category'] == 'Generator' and not row['Object'] in elements['generator']: + continue + # Skip areas not in skeleton + if row['Category'] == 'Area' and not row['Object'] in elements['area']: + continue + + is_reserve = (row['Category'] == 'Reserve') + if is_reserve: + # Skip unrecognized reserve names + name = row['Object'] + if not is_valid_reserve_name(name, model_data): + continue + + # Read the timeseries file if we haven't already, using the + # canonical file path as a key into previously read filenames. + fname = os.path.abspath(os.path.join(rts_gmlc_dir, row['Data File'])) + if not fname in timeseries_file_map: + sim = row['Simulation'] + data = _read_timeseries_file(fname, minutes_per_period[sim], + start_time, end_time, row['Object']) + timeseries_file_map[fname] = data + + # Save a reference to the relevant data as a Series + series_data[idx]= timeseries_file_map[fname][row['Object']] + + # Add the 'Series' column + timeseries_pointer_df = timeseries_pointer_df.assign(Series=series_data) + + # Remove columns that we don't want to preserve + keepers= {'Simulation', 'Category', 'Object', 'Parameter', 'Series'} + for c in timeseries_pointer_df.columns: + if not c in keepers: + timeseries_pointer_df.pop(c) + + # Remove irrelevant rows + timeseries_pointer_df.dropna(subset=['Series'], inplace=True) + + # Sort by simulation + timeseries_pointer_df.sort_values(by='Simulation', inplace=True) + + return timeseries_pointer_df + +def _convert_to_datetime(when:Union[datetime,str]): + ''' + Convert an object to a datetime, if it is not already one. + + Parameters + ---------- + when: datetime or str + The date and time to be returned or parsed + + Returns + ------- + datetime + The passed in object as a datetime, parsing it if necessary + + If `when` is a datetime it is simply returned. + If `when` is a string it is parsed, inferring the format + If `when` is any other type, a TypeError is raised + + ''' + if isinstance(when, datetime): + return when + elif isinstance(when, str): + return dateutil.parser.parse(when) + else: + raise TypeError(f'Invalid argument, expected a datetime or str, got a {type(when)}') + +def _parse_datetimes_if_strings(begin_time:Union[datetime,str], end_time:Union[datetime,str]): + ''' + Ensure both dates are datetimes, parsing date strings if necessary. + + Returns + ------- + begin_time:datetime + The begin_time as a datetime, parsing it if necessary + end_time:datetime + The end_time as a datetime, parsing it if necessary + ''' + + begin_time = _convert_to_datetime(begin_time) + end_time = _convert_to_datetime(end_time) + + return begin_time, end_time + +def set_t0_data(md:dict, base_dir:str="", t0_state:Optional[dict]=None): + """ Put t0 information into the passed in model dict + + Only t0 data for thermal generators is populated. + + Data comes from: + * t0_state, if provided + * otherwise, a file called initial_status.csv, if present + * otherwise, t0 data is left blank + + If t0_state is provided, it should be organized as t0_state[name][value], + where `name` is the name of a generator, and `value` is 'initial_status', + 'initial_p_output', and 'initial_q_output'. For any generator included in + t0_state, all three values must be present. + + If initial_status.csv is used, it must have a header row and may have + from 1 to 3 data rows. Row 1 is 'initial_status'. Row 2 is + 'initial_p_output'. Row 3 is 'initial_q_output'. Column headers are + the generator names. Default values are used for any missing rows. + + Any generators not mentioned in the data source are left untouched. + """ + if t0_state is not None: + for name, gen in md['elements']['generator'].items(): + if gen['generator_type']=='thermal' and name in t0_state: + gen['initial_status'] = t0_state[name]['initial_status'] + gen['initial_p_output'] = t0_state[name]['initial_p_output'] + gen['initial_q_output'] = t0_state[name]['initial_q_output'] + return + + state_fname = os.path.join(base_dir, 'initial_status.csv') + if os.path.exists(state_fname): + import csv + with open(state_fname, 'r') as f: + reader = csv.DictReader(f) + rows = list(reader) + + # We now have a list of rows, from 1 to 3 rows long. + # Row 1 is 'initial_status', row 2 is 'initial_p_output', and row 3 is 'initial_q_output'. + # Any missing row uses defaults + row_count = len(rows) + for name, gen in md['elements']['generator'].items(): + if gen['generator_type'] != 'thermal': + continue + if name not in reader.fieldnames: + continue + gen['initial_status'] = float(rows[0][name]) + if gen['initial_status'] < 0: + gen['initial_p_output'] = 0.0 + gen['initial_q_output'] = 0.0 + else: + if row_count >= 2: + gen['initial_p_output'] = float(rows[1][name]) + else: + gen["initial_p_output"] = gen["p_min"] + if row_count >= 3: + gen['initial_q_output'] = float(rows[2][name]) + else: + gen["initial_q_output"] = max(0., gen["q_min"]) + else: + logger.warning("Setting default t0 state in RTS-GMLC parser") + for name, gen in md['elements']['generator'].items(): + if gen['generator_type']=='thermal': + gen['initial_status'] = gen['min_up_time']+1 + gen['initial_p_output'] = gen['p_min'] + gen['initial_q_output'] = 0.