Skip to content

Commit

Permalink
[Minor] Make LaggedRegressorsConfig a dataclass and Clean up model co…
Browse files Browse the repository at this point in the history
…nfig (#1640)

* black np

* update default_factory

* fixes

* fix tests
  • Loading branch information
ourownstory authored Aug 30, 2024
1 parent 4459338 commit 565c7d5
Show file tree
Hide file tree
Showing 8 changed files with 67 additions and 66 deletions.
14 changes: 9 additions & 5 deletions neuralprophet/configure.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,6 @@

@dataclass
class Model:
lagged_reg_layers: Optional[List[int]]
quantiles: Optional[List[float]] = None

def setup_quantiles(self):
Expand Down Expand Up @@ -499,15 +498,21 @@ class LaggedRegressor:
as_scalar: bool
normalize: Union[bool, str]
n_lags: int
lagged_reg_layers: Optional[List[int]]

def __post_init__(self):
if self.reg_lambda is not None:
if self.reg_lambda < 0:
raise ValueError("regularization must be >= 0")


ConfigLaggedRegressors = OrderedDictType[str, LaggedRegressor]
@dataclass
class ConfigLaggedRegressors:
layers: Optional[List[int]] = field(default_factory=list)
# List of hidden layers for shared NN across LaggedReg. The default value is ``[]``, which initializes no hidden layers.
regressors: OrderedDict[LaggedRegressor] = field(init=False)

Check failure on line 512 in neuralprophet/configure.py

View workflow job for this annotation

GitHub Actions / pyright

Too few type arguments provided for "OrderedDict"; expected 2 but received 1 (reportInvalidTypeArguments)

def __post_init__(self):
self.regressors = None


@dataclass
Expand All @@ -521,8 +526,7 @@ class Regressor:
class ConfigFutureRegressors:
model: str
regressors_layers: Optional[List[int]]

regressors: OrderedDict = field(init=False) # contains RegressorConfig objects
regressors: OrderedDict = field(init=False) # contains Regressor objects

def __post_init__(self):
self.regressors = None
Expand Down
22 changes: 11 additions & 11 deletions neuralprophet/data/process.py
Original file line number Diff line number Diff line change
Expand Up @@ -101,8 +101,8 @@ def _reshape_raw_predictions_to_forecst_df(
lagged_components = [
"ar",
]
if config_lagged_regressors is not None:
for name in config_lagged_regressors.keys():
if config_lagged_regressors is not None and config_lagged_regressors.regressors is not None:
for name in config_lagged_regressors.regressors.keys():
lagged_components.append(f"lagged_regressor_{name}")
for comp in lagged_components:
if comp in components:
Expand Down Expand Up @@ -362,8 +362,8 @@ def _validate_column_name(
if seasons and config_seasonality is not None:
if name in config_seasonality.periods:
raise ValueError(f"Name {name!r} already used for a seasonality.")
if covariates and config_lagged_regressors is not None:
if name in config_lagged_regressors:
if covariates and config_lagged_regressors is not None and config_lagged_regressors.regressors is not None:
if name in config_lagged_regressors.regressors.keys():
raise ValueError(f"Name {name!r} already used for an added covariate.")
if regressors and config_regressors.regressors is not None:
if name in config_regressors.regressors.keys():
Expand Down Expand Up @@ -410,7 +410,7 @@ def _check_dataframe(
df, regressors_to_remove, lag_regressors_to_remove = df_utils.check_dataframe(
df=df,
check_y=check_y,
covariates=model.config_lagged_regressors if exogenous else None,
covariates=model.config_lagged_regressors.regressors if exogenous else None,
regressors=model.config_regressors.regressors if exogenous else None,
events=model.config_events if exogenous else None,
seasonalities=model.config_seasonality if exogenous else None,
Expand All @@ -423,12 +423,12 @@ def _check_dataframe(
model.config_regressors.regressors.pop(reg)
if model.config_regressors.regressors is not None and len(model.config_regressors.regressors) == 0:
model.config_regressors.regressors = None
if model.config_lagged_regressors is not None:
if model.config_lagged_regressors is not None and model.config_lagged_regressors.regressors is not None:
for reg in lag_regressors_to_remove:
log.warning(f"Removing lagged regressor {reg} because it is not present in the data.")
model.config_lagged_regressors.pop(reg)
if len(model.config_lagged_regressors) == 0:
model.config_lagged_regressors = None
model.config_lagged_regressors.regressors.pop(reg)
if len(model.config_lagged_regressors.regressors) == 0:
model.config_lagged_regressors.regressors = None
return df


Expand Down Expand Up @@ -528,8 +528,8 @@ def _handle_missing_data(
data_columns = []
if n_lags > 0:
data_columns.append("y")
if config_lagged_regressors is not None:
data_columns.extend(config_lagged_regressors.keys())
if config_lagged_regressors is not None and config_lagged_regressors.regressors is not None:
data_columns.extend(config_lagged_regressors.regressors.keys())
if config_regressors is not None and config_regressors.regressors is not None:
data_columns.extend(config_regressors.regressors.keys())
if config_events is not None:
Expand Down
12 changes: 6 additions & 6 deletions neuralprophet/df_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -103,11 +103,11 @@ def get_max_num_lags(n_lags: int, config_lagged_regressors: Optional[ConfigLagge
int
Maximum number of lags between the autoregression lags and the covariates lags.
"""
if config_lagged_regressors is not None:
if config_lagged_regressors is not None and config_lagged_regressors.regressors is not None:
# log.debug("config_lagged_regressors exists")
return max([n_lags] + [val.n_lags for key, val in config_lagged_regressors.items()])
return max([n_lags] + [val.n_lags for key, val in config_lagged_regressors.regressors.items()])
else:
# log.debug("config_lagged_regressors does not exist")
# log.debug("config_lagged_regressors.regressors does not exist")
return n_lags


Expand Down Expand Up @@ -203,11 +203,11 @@ def data_params_definition(
norm_type=normalize,
)

if config_lagged_regressors is not None:
for covar in config_lagged_regressors.keys():
if config_lagged_regressors is not None and config_lagged_regressors.regressors is not None:
for covar in config_lagged_regressors.regressors.keys():
if covar not in df.columns:
raise ValueError(f"Lagged regressor {covar} not found in DataFrame.")
norm_type_lag = config_lagged_regressors[covar].normalize
norm_type_lag = config_lagged_regressors.regressors[covar].normalize
if local_run_despite_global:
if len(df[covar].unique()) < 2:
norm_type_lag = "soft"
Expand Down
20 changes: 9 additions & 11 deletions neuralprophet/forecaster.py
Original file line number Diff line number Diff line change
Expand Up @@ -503,7 +503,6 @@ def __init__(

# Model
self.config_model = configure.Model(
lagged_reg_layers=lagged_reg_layers,
quantiles=quantiles,
)
self.config_model.setup_quantiles()
Expand Down Expand Up @@ -554,8 +553,11 @@ def __init__(
self.config_events: Optional[configure.ConfigEvents] = None
self.config_country_holidays: Optional[configure.ConfigCountryHolidays] = None

# Extra Regressors
self.config_lagged_regressors: Optional[configure.ConfigLaggedRegressors] = None
# Lagged Regressors
self.config_lagged_regressors = configure.ConfigLaggedRegressors(
layers=lagged_reg_layers,
)
# Future Regressors
self.config_regressors = configure.ConfigFutureRegressors(
model=future_regressors_model,
regressors_layers=future_regressors_layers,
Expand Down Expand Up @@ -604,8 +606,6 @@ def add_lagged_regressor(
optional, specify whether this regressor will benormalized prior to fitting.
if ``auto``, binary regressors will not be normalized.
"""
lagged_reg_layers = self.config_model.lagged_reg_layers

if n_lags == 0 or n_lags is None:
raise ValueError(
f"Received n_lags {n_lags} for lagged regressor {names}. Please set n_lags > 0 or use options 'scalar' or 'auto'."
Expand Down Expand Up @@ -640,14 +640,13 @@ def add_lagged_regressor(
config_lagged_regressors=self.config_lagged_regressors,
config_regressors=self.config_regressors,
)
if self.config_lagged_regressors is None:
self.config_lagged_regressors = OrderedDict()
self.config_lagged_regressors[name] = configure.LaggedRegressor(
if self.config_lagged_regressors.regressors is None:
self.config_lagged_regressors.regressors = OrderedDict()
self.config_lagged_regressors.regressors[name] = configure.LaggedRegressor(
reg_lambda=regularization,
normalize=normalize,
as_scalar=only_last_value,
n_lags=n_lags,
lagged_reg_layers=lagged_reg_layers,
)
return self

Expand Down Expand Up @@ -1036,7 +1035,7 @@ def fit(
self.config_events,
self.config_country_holidays,
self.config_trend,
self.config_lagged_regressors,
self.config_lagged_regressors.regressors,
]
)
if reg_enabled:
Expand Down Expand Up @@ -2675,7 +2674,6 @@ def _init_model(self):
n_lags=self.n_lags,
max_lags=self.max_lags,
ar_layers=self.config_ar.ar_layers,
lagged_reg_layers=self.config_model.lagged_reg_layers,
metrics=self.metrics,
id_list=self.id_list,
num_trends_modelled=self.num_trends_modelled,
Expand Down
17 changes: 9 additions & 8 deletions neuralprophet/plot_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -190,9 +190,10 @@ def check_if_configured(m, components, error_flag=False): # move to utils
if "autoregression" in components and not m.config_ar.n_lags > 0:
components.remove("autoregression")
invalid_components.append("autoregression")
if "lagged_regressors" in components and m.config_lagged_regressors is None:
components.remove("lagged_regressors")
invalid_components.append("lagged_regressors")
if "lagged_regressors" in components:
if m.config_lagged_regressors is None or m.config_lagged_regressors.regressors is None:
components.remove("lagged_regressors")
invalid_components.append("lagged_regressors")
if "events" in components and (m.config_events is None and m.config_country_holidays is None):
components.remove("events")
invalid_components.append("events")
Expand All @@ -209,7 +210,7 @@ def check_if_configured(m, components, error_flag=False): # move to utils
return components


def get_valid_configuration( # move to utils
def get_valid_configuration(
m, components=None, df_name=None, valid_set=None, validator=None, forecast_in_focus=None, quantile=0.5
):
"""Validate and adapt the selected components to be plotted.
Expand Down Expand Up @@ -382,7 +383,7 @@ def get_valid_configuration( # move to utils
if "lagged_regressors" in components:
if validator == "plot_components":
if forecast_in_focus is None:
for name in m.config_lagged_regressors.keys():
for name in m.config_lagged_regressors.regressors.keys():
plot_components.append(
{
"plot_name": f'Lagged Regressor "{name}"',
Expand All @@ -392,16 +393,16 @@ def get_valid_configuration( # move to utils
}
)
else:
for name in m.config_lagged_regressors.keys():
for name in m.config_lagged_regressors.regressors.keys():
plot_components.append(
{
"plot_name": f'Lagged Regressor "{name}" ({forecast_in_focus})-ahead',
"comp_name": f"lagged_regressor_{name}{forecast_in_focus}",
}
)
elif validator == "plot_parameters":
for name in m.config_lagged_regressors.keys():
if m.config_lagged_regressors[name].as_scalar:
for name in m.config_lagged_regressors.regressors.keys():
if m.config_lagged_regressors.regressors[name].as_scalar:
lagged_scalar_regressors.append((name, m.model.get_covar_weights()[name].detach().numpy()))
else:
plot_components.append(
Expand Down
12 changes: 8 additions & 4 deletions neuralprophet/time_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -389,7 +389,9 @@ def tabularize_univariate_datetime_single_index(
inputs["lags"] = lags

# COVARIATES / LAGGED REGRESSORS: Lagged regressor inputs: analogous to LAGS
if config_lagged_regressors is not None: # and max_lags > 0:
if (
config_lagged_regressors is not None and config_lagged_regressors.regressors is not None
): # and max_lags > 0:
inputs["covariates"] = self.get_sample_lagged_regressors(
df_tensors=df_tensors, origin_index=origin_index, config_lagged_regressors=config_lagged_regressors
)
Expand Down Expand Up @@ -651,9 +653,11 @@ def create_nan_mask(
valid_origins &= y_lags_valid

# LAGGED REGRESSORS
if config_lagged_regressors is not None: # and max_lags > 0:
if (
config_lagged_regressors is not None and config_lagged_regressors.regressors is not None
): # and max_lags > 0:
reg_lags_valid = torch.ones(tensor_length, dtype=torch.bool)
for name, lagged_regressor in config_lagged_regressors.items():
for name, lagged_regressor in config_lagged_regressors.regressors.items():
n_reg_lags = lagged_regressor.n_lags
if n_reg_lags > 0:
# boolean vector, starting at origin_index = n_lags -1
Expand Down Expand Up @@ -724,7 +728,7 @@ def get_sample_targets(self, df_tensors, origin_index, n_forecasts, max_lags, pr
def get_sample_lagged_regressors(self, df_tensors, origin_index, config_lagged_regressors):
lagged_regressors = OrderedDict({})
# Future TODO: optimize this computation for many lagged_regressors
for name, lagged_regressor in config_lagged_regressors.items():
for name, lagged_regressor in config_lagged_regressors.regressors.items():
covar_lags = lagged_regressor.n_lags
assert covar_lags > 0
# Indexing tensors instead of DataFrame
Expand Down
34 changes: 14 additions & 20 deletions neuralprophet/time_net.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,6 @@ def __init__(
n_lags: int = 0,
max_lags: int = 0,
ar_layers: Optional[List[int]] = [],
lagged_reg_layers: Optional[List[int]] = [],
compute_components_flag: bool = False,
metrics: Optional[np_types.CollectMetricsMode] = {},
id_list: List[str] = ["__df__"],
Expand Down Expand Up @@ -99,14 +98,6 @@ def __init__(
----
The default value is ``[]``, which initializes no hidden layers.
lagged_reg_layers : list
List of hidden layers (for covariate-Net).
Note
----
The default value is ``[]``, which initializes no hidden layers.
compute_components_flag : bool
Flag whether to compute the components of the model or not.
metrics : dict
Expand Down Expand Up @@ -283,12 +274,11 @@ def __init__(
nn.init.kaiming_normal_(lay.weight, mode="fan_in")

# Lagged regressors
self.lagged_reg_layers = lagged_reg_layers
self.config_lagged_regressors = config_lagged_regressors
if self.config_lagged_regressors is not None:
if self.config_lagged_regressors is not None and self.config_lagged_regressors.regressors is not None:
covar_net_layers = []
d_inputs = sum([covar.n_lags for _, covar in self.config_lagged_regressors.items()])
for d_hidden_i in self.lagged_reg_layers:
d_inputs = sum([covar.n_lags for _, covar in self.config_lagged_regressors.regressors.items()])
for d_hidden_i in self.config_lagged_regressors.layers:
covar_net_layers.append(nn.Linear(d_inputs, d_hidden_i, bias=True))
covar_net_layers.append(nn.ReLU())
d_inputs = d_hidden_i
Expand Down Expand Up @@ -325,16 +315,16 @@ def get_covar_weights(self, covar_input=None) -> torch.Tensor:
"""
Get attributions of covariates network w.r.t. the model input.
"""
if self.config_lagged_regressors is not None:
if self.config_lagged_regressors is not None and self.config_lagged_regressors.regressors is not None:
# Accumulate the lags of the covariates
covar_splits = np.add.accumulate(
[covar.n_lags for _, covar in self.config_lagged_regressors.items()][:-1]
[covar.n_lags for _, covar in self.config_lagged_regressors.regressors.items()][:-1]
).tolist()
# If actual covariates are provided, use them to compute the attributions
if covar_input is not None:
covar_input = torch.cat([covar for _, covar in covar_input.items()], axis=1)
# Calculate the attributions w.r.t. the inputs
if self.lagged_reg_layers == []:
if self.config_lagged_regressors.layers == []:
attributions = self.covar_net[0].weight
else:
attributions = interprete_model(self, "covar_net", "forward_covar_net", covar_input)
Expand All @@ -345,7 +335,7 @@ def get_covar_weights(self, covar_input=None) -> torch.Tensor:
axis=1,
)
# Combine attributions and covariate name
covar_attributions = dict(zip(self.config_lagged_regressors.keys(), attributions_split))
covar_attributions = dict(zip(self.config_lagged_regressors.regressors.keys(), attributions_split))
else:
covar_attributions = None
return covar_attributions
Expand Down Expand Up @@ -692,7 +682,11 @@ def compute_components(self, inputs: Dict, components_raw: Dict, meta: Dict) ->
)
if self.n_lags > 0 and "lags" in inputs:
components["ar"] = components_raw["lags"]
if self.config_lagged_regressors is not None and "covariates" in inputs:
if (
self.config_lagged_regressors is not None
and self.config_lagged_regressors.regressors is not None
and "covariates" in inputs
):
# Combined forward pass
all_covariates = components_raw["covariates"]
# Calculate the contribution of each covariate on each forecast
Expand Down Expand Up @@ -1040,11 +1034,11 @@ class DeepNet(nn.Module):
A simple, general purpose, fully connected network
"""

def __init__(self, d_inputs, d_outputs, lagged_reg_layers=[]):
def __init__(self, d_inputs, d_outputs, layers=[]):
# Perform initialization of the pytorch superclass
super(DeepNet, self).__init__()
layers = []
for d_hidden_i in lagged_reg_layers:
for d_hidden_i in layers:
layers.append(nn.Linear(d_inputs, d_hidden_i, bias=True))
layers.append(nn.ReLU())
d_inputs = d_hidden_i
Expand Down
2 changes: 1 addition & 1 deletion tests/test_regularization.py
Original file line number Diff line number Diff line change
Expand Up @@ -169,7 +169,7 @@ def test_regularization_lagged_regressor():
lagged_regressors_config = dict(lagged_regressors)

weights = m.model.get_covar_weights()
for name in m.config_lagged_regressors.keys():
for name in m.config_lagged_regressors.regressors.keys():
weight_average = np.average(weights[name].detach().numpy())

lagged_regressor_weight = lagged_regressors_config[name]
Expand Down

0 comments on commit 565c7d5

Please sign in to comment.