Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Issue 190 reindexing #192

Merged
merged 13 commits into from
Mar 19, 2022
4 changes: 2 additions & 2 deletions pyaf/TS/Exogenous.py
Original file line number Diff line number Diff line change
@@ -91,9 +91,9 @@ def transformDataset(self, df):
return df1;

def createEncodedExogenous(self):
self.mExogDummiesDataFrame = pd.DataFrame();
self.mExogDummiesDataFrame = pd.DataFrame(index = self.mExogenousDataFrame.index);
self.mEncodedExogenous = [];
self.mEncodedExogenousDataFrame = pd.DataFrame();
self.mEncodedExogenousDataFrame = pd.DataFrame(index = self.mExogenousDataFrame.index);
self.mEncodedExogenousDataFrame[self.mDateVariable] = self.mExogenousDataFrame[self.mDateVariable];
for exog in self.mExogenousVariables:
if(exog not in self.mExcluded):
2 changes: 1 addition & 1 deletion pyaf/TS/Intermittent_Models.py
Original file line number Diff line number Diff line change
@@ -127,7 +127,7 @@ def fit(self):
self.mSignal = self.mTimeInfo.mSignal;
lAREstimFrame = self.mSplit.getEstimPart(self.mARFrame)
self.mOffset = lAREstimFrame[self.mCycleResidueName].min()
print("OFFSET", (self.mCycleResidueName, self.mOffset))
# print("OFFSET", (self.mCycleResidueName, self.mOffset))
self.estimate_alpha(lAREstimFrame)
self.mFeatureSelector = None;
self.mInputNamesAfterSelection = self.mInputNames;
1 change: 0 additions & 1 deletion pyaf/TS/Keras_Models.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
import numpy as np
import pandas as pd
# from sklearn.preprocessing import StandardScaler, MinMaxScaler
from . import SignalDecomposition_AR as tsar
import sys

2 changes: 1 addition & 1 deletion pyaf/TS/Perf.py
Original file line number Diff line number Diff line change
@@ -79,7 +79,7 @@ def compute_LnQ(self, signal , estimator):

def dump_perf_data(self, signal , estimator):
logger = tsutil.get_pyaf_logger();
df = pd.DataFrame();
df = pd.DataFrame(index = signal.index);
df['sig'] = signal.values;
df['est'] = estimator.values;
logger.debug(str(df.head()));
2 changes: 1 addition & 1 deletion pyaf/TS/PredictionIntervals.py
Original file line number Diff line number Diff line change
@@ -16,7 +16,7 @@ class cPredictionIntervalsEstimator:

def __init__(self):
self.mModel = None;
self.mSignalFrame = pd.DataFrame()
self.mSignalFrame = None
self.mHorizon = -1;
self.mFitPerformances = {}
self.mForecastPerformances = {}
20 changes: 4 additions & 16 deletions pyaf/TS/Scikit_Models.py
Original file line number Diff line number Diff line change
@@ -12,6 +12,7 @@ def __init__(self , cycle_residue_name, P , iExogenousInfo = None):
self.mNbLags = P;
self.mNbExogenousLags = P;
self.mScikitModel = None;
self.mFeatureSelector = None

def dumpCoefficients(self, iMax=10):
# print(self.mScikitModel.__dict__);
@@ -24,11 +25,6 @@ def set_name(self):
assert(0);


def is_used(self, name):
if(self.mFeatureSelector):
return (name in self.mInputNamesAfterSelection)
return True

def fit(self):
# print("ESTIMATE_SCIKIT_MODEL_START" , self.mCycleResidueName);

@@ -40,12 +36,12 @@ def fit(self):
self.mSignal = self.mTimeInfo.mSignal;
lAREstimFrame = self.mSplit.getEstimPart(self.mARFrame)

# print("mAREstimFrame columns :" , self.mAREstimFrame.columns);
lARInputs = lAREstimFrame[self.mInputNames].values
lARTarget = lAREstimFrame[series].values
# print(len(self.mInputNames), lARInputs.shape , lARTarget.shape)
assert(lARInputs.shape[1] > 0);
assert(lARTarget.shape[0] > 0);
assert(lARInputs.shape[1] == len(self.mInputNames))

from sklearn.feature_selection import SelectKBest
from sklearn.feature_selection import f_regression
@@ -69,9 +65,9 @@ def fit(self):

if(self.mFeatureSelector):
lARInputsAfterSelection = self.mFeatureSelector.transform(lARInputs);
# print(self.mInputNames , self.mFeatureSelector.get_support(indices=True));
lSupport = self.mFeatureSelector.get_support(indices=True);
self.mInputNamesAfterSelection = [self.mInputNames[k] for k in lSupport];

else:
lARInputsAfterSelection = lARInputs;
self.mInputNamesAfterSelection = self.mInputNames;
@@ -115,16 +111,8 @@ def transformDataset(self, df, horizon_index = 1):
series = self.mCycleResidueName;
if(self.mExogenousInfo is not None):
df = self.mExogenousInfo.transformDataset(df);
# print(df.columns);
# print(df.info());
# print(df.head());
# print(df.tail());
lag_df = self.generateLagsForForecast(df);
# print(self.mInputNames);
# print(self.mFormula, "\n", lag_df.columns);
# lag_df.to_csv("LAGGED_ " + str(self.mNbLags) + ".csv");
# print(len(list(lag_df.columns)) , len(self.mInputNamesAfterSelection))
inputs_after_feat_selection = lag_df.values[:,1:] # the first column is the signal
inputs_after_feat_selection = lag_df[self.mInputNamesAfterSelection].values
# inputs_after_feat_selection = self.mFeatureSelector.transform(inputs) if self.mFeatureSelector else inputs;
if(self.mScikitModel is not None):
pred = self.mScikitModel.predict(inputs_after_feat_selection)
9 changes: 2 additions & 7 deletions pyaf/TS/SignalDecomposition.py
Original file line number Diff line number Diff line change
@@ -43,18 +43,13 @@ def sample_signal_if_needed(iInputDS, iOptions):
class cSignalDecompositionOneTransform:

def __init__(self):
self.mSignalFrame = pd.DataFrame()
self.mSignalFrame = None
self.mTime = None
self.mSignal = None
self.mTimeInfo = tsti.cTimeInfo();
self.mForecastFrame = pd.DataFrame()
self.mTransformation = tstransf.cSignalTransform_None();


def serialize(self):
from sklearn.externals import joblib
joblib.dump(self, self.mTimeInfo.mTime + "_" + self.mSignal + "_TS.pkl")


def setParams(self , iInputDS, iTime, iSignal, iHorizon, iTransformation,
iDecomspositionType, iExogenousData = None):
assert(iInputDS.shape[0] > 0)
142 changes: 69 additions & 73 deletions pyaf/TS/SignalDecomposition_AR.py
Original file line number Diff line number Diff line change
@@ -21,8 +21,8 @@
class cAbstractAR:
def __init__(self , cycle_residue_name, iExogenousInfo = None):
self.mTimeInfo = tsti.cTimeInfo()
self.mCycleFrame = pd.DataFrame()
self.mARFrame = pd.DataFrame()
self.mCycleFrame = None
self.mARFrame = None
self.mCycleResidueName = cycle_residue_name
self.mCycle = None
self.mTrend = None
@@ -57,6 +57,7 @@ def plot(self):
def register_lag(self, series, p):
name = series+'_Lag' + str(p);
# print("register_lag", (series , p , name))
assert(name not in self.mInputNames)
self.mInputNames.append(name);
self.mLagsForSeries[series] = self.mLagsForSeries.get(series , [])
self.mLagsForSeries[series].append(p)
@@ -79,37 +80,29 @@ def shift_series(self, series, p, idefault):
first_values = np.full((p), idefault, dtype=lType)
new_values = np.hstack((first_values, series.values[0:N-p]));
new_values = new_values.astype(lType)
return new_values;

return new_values
def getDefaultValue(self, series):
return self.mDefaultValues[series];

def is_used(self, name):
return True

def addLagForForecast(self, df, lag_df, series, p):
name = series+'_Lag' + str(p);
if(not self.is_used(name)):
return
assert(p in self.mLagsForSeries[series])
lSeries = df[series];
lShiftedSeries = self.shift_series(lSeries, p , self.mDefaultValues[series]);
lag_df[name] = lShiftedSeries;

def generateLagsForForecast(self, df):
lag_df = pd.DataFrame()
lag_df[self.mCycleResidueName] = df[self.mCycleResidueName].reset_index(drop=True)
lDict = {}
# lDict[self.mCycleResidueName] = df[self.mCycleResidueName]
series = self.mCycleResidueName
lSeries = df[self.mCycleResidueName]
for p in self.mLagsForSeries[self.mCycleResidueName]:
# signal lags ... plain old AR model
self.addLagForForecast(df, lag_df, self.mCycleResidueName, p);
name = series +'_Lag' + str(p);
lShiftedSeries = self.shift_series(lSeries, p , self.mDefaultValues[series]);
lDict[name] = lShiftedSeries
# Exogenous variables lags
if(self.mExogenousInfo is not None):
# print(self.mExogenousInfo.mEncodedExogenous);
# print(df.columns);
for ex in self.mExogenousInfo.mEncodedExogenous:
if(self.mLagsForSeries.get(ex)):
for p in self.mLagsForSeries[ex]:
self.addLagForForecast(df, lag_df, ex, p);
name = ex +'_Lag' + str(p);
lShiftedSeries = self.shift_series(df[ex], p , self.mDefaultValues[ex]);
lDict[name] = lShiftedSeries
lag_df = pd.DataFrame(lDict, index = df.index, dtype = lSeries.dtype)
return lag_df;


@@ -151,8 +144,8 @@ def transformDataset(self, df, horizon_index = 1):
class cAutoRegressiveEstimator:
def __init__(self):
self.mTimeInfo = tsti.cTimeInfo()
self.mCycleFrame = pd.DataFrame()
self.mARFrame = pd.DataFrame()
self.mCycleFrame = None
self.mARFrame = None
self.mARList = {}
self.mExogenousInfo = None;

@@ -176,58 +169,61 @@ def shift_series(self, series, p):
first_values = np.full((p), series.values[0], dtype=lType)
new_values = np.hstack((first_values, series.values[0:N-p]));
new_values = new_values.astype(lType)
return new_values;

def addLagForTraining(self, df, lag_df, series, autoreg, p):
name = series+'_Lag' + str(p);
if(name in lag_df.columns):
autoreg.register_lag(series, p);
return lag_df;
return new_values

def generateLagsForTraining(self, df, series, pMinMax):
(pmin, pmax) = pMinMax
lSeries = df[series];
lShiftedSeries = self.shift_series(lSeries, p);
self.mDefaultValues[series] = lSeries.values[0];

lShiftedEstim = self.mSplit.getEstimPart(lShiftedSeries);
lAcceptable = self.is_not_constant(lShiftedEstim);
if(lAcceptable):
autoreg.register_lag(series, p);
lag_df[name] = lShiftedSeries;
self.mLagOrigins[name] = series;
return lag_df;

def addLagsForTraining(self, df, cycle_residue, iNeedExogenous = False):
lDict = {}
lags = []
for p in range(pmin, pmax+1):
name = series+'_Lag' + str(p)
lShiftedSeries = self.shift_series(lSeries, p)
lShiftedEstim = self.mSplit.getEstimPart(lShiftedSeries);
lAcceptable = self.is_not_constant(lShiftedEstim);
if(lAcceptable):
lDict[name] = lShiftedSeries
lags.append((series, p))
lag_df = pd.DataFrame(lDict, index = df.index, dtype = lSeries.dtype)
return (lag_df, lags)

def addLagsForTraining(self, df, cycle_residue):
logger = tsutil.get_pyaf_logger();
add_lag_start_time = time.time()
P = self.get_nb_lags();
lag_df, lags = self.generateLagsForTraining(df, cycle_residue, (1, P));
lag_dfs = [lag_df]
for autoreg in self.mARList[cycle_residue]:
P = autoreg.mNbLags;
for p in range(1,P+1):
# signal lags ... plain old AR model
self.addLagForTraining(df, self.mARFrame, cycle_residue, autoreg, p);
# Avoid dataframe fragemntation.
self.mARFrame = self.mARFrame.copy()

# Exogenous variables lags
if(autoreg.mExogenousInfo is not None):
P1 = P;
lExogCount = len(autoreg.mExogenousInfo.mEncodedExogenous);
lNbVars = P * lExogCount;
if(lNbVars >= self.mOptions.mMaxFeatureForAutoreg):
P1 = self.mOptions.mMaxFeatureForAutoreg // lExogCount;
autoreg.mNbExogenousLags = P1;
for ex in autoreg.mExogenousInfo.mEncodedExogenous:
for p in range(1,P1+1):
# print(autoreg.mExogenousInfo.mEncodedExogenous);
# print(df.columns);
self.addLagForTraining(df, self.mARFrame, ex, autoreg, p);
# Avoid dataframe fragemntation.
self.mARFrame = self.mARFrame.copy()
for lag in lags:
(name , p) = lag
autoreg.register_lag(name, p)

# Exogenous variables lags
if(self.mExogenousInfo is not None):
P1 = P;
lExogCount = len(self.mExogenousInfo.mEncodedExogenous);
lNbVars = P * lExogCount;
if(lNbVars >= self.mOptions.mMaxFeatureForAutoreg):
P1 = self.mOptions.mMaxFeatureForAutoreg // lExogCount;
autoreg.mNbExogenousLags = P1;
for ex in self.mExogenousInfo.mEncodedExogenous:
(lag_df, lags_ex) = self.generateLagsForTraining(df, ex, (1, P1));
lag_dfs = lag_dfs + [lag_df]
for autoreg in self.mARList[cycle_residue]:
if(autoreg.mExogenousInfo is not None): # ARX,XGBX, ... only
for lag in lags_ex:
(name , p) = lag
autoreg.register_lag(name, p)

# print("AUTOREG_DETAIL" , P , len(autoreg.mInputNames));
if(autoreg.mExogenousInfo is not None):
assert((P + P*len(autoreg.mExogenousInfo.mEncodedExogenous)) >= len(autoreg.mInputNames));
else:
assert(P >= len(autoreg.mInputNames));

self.mARFrame = pd.concat([self.mARFrame] + lag_dfs, axis = 1)

if(self.mOptions.mDebugProfile):
logger.info("LAG_TIME_IN_SECONDS " + self.mTimeInfo.mSignal + " " +
str(len(self.mARFrame.columns)) + " " +
@@ -237,13 +233,12 @@ def addLagsForTraining(self, df, cycle_residue, iNeedExogenous = False):
# @profile
def estimate_ar_models_for_cycle(self, cycle_residue):
logger = tsutil.get_pyaf_logger();
self.mARFrame = pd.DataFrame();
self.mARFrame = pd.DataFrame(index = self.mCycleFrame.index);
self.mTimeInfo.addVars(self.mARFrame);
self.mCycleFrame[cycle_residue] = self.mCycleFrame[cycle_residue]
self.mARFrame[cycle_residue] = self.mCycleFrame[cycle_residue]

self.mDefaultValues = {};
self.mLagOrigins = {};

if(self.mOptions.mDebugProfile):
logger.info("AR_MODEL_ADD_LAGS_START '" +
@@ -278,7 +273,6 @@ def estimate_ar_models_for_cycle(self, cycle_residue):
autoreg.mARFrame = self.mARFrame
autoreg.mTimeInfo = self.mTimeInfo;
autoreg.mSplit = self.mSplit;
autoreg.mLagOrigins = self.mLagOrigins;
autoreg.mDefaultValues = self.mDefaultValues;
autoreg.mDecompositionType = self.mDecompositionType
autoreg.fit();
@@ -300,7 +294,11 @@ def check_not_nan(self, sig , name):
pass



def get_nb_lags(self):
lLags = self.mCycleFrame.shape[0] // 4;
if(lLags >= self.mOptions.mMaxAROrder):
lLags = self.mOptions.mMaxAROrder;
return lLags

# @profile
def estimate(self):
@@ -319,9 +317,7 @@ def estimate(self):
self.mARList[cycle_residue] = [];
if(self.mOptions.mActiveAutoRegressions['NoAR']):
self.mARList[cycle_residue] = [ cZeroAR(cycle_residue)];
lLags = self.mCycleFrame[cycle_residue].shape[0] // 4;
if(lLags >= self.mOptions.mMaxAROrder):
lLags = self.mOptions.mMaxAROrder;
lLags = self.get_nb_lags()
lKeep = (self.mCycleFrame[cycle_residue].shape[0] > 12) and (self.mCycleFrame[cycle_residue].std() > 0.00001)
if(not lKeep):
logger.info("SKIPPING_AR_MODELS_WITH_LOW_VARIANCE_CYCLE_RESIDUE '" + cycle_residue + "'");
@@ -408,5 +404,5 @@ def estimate(self):
for cycle_residue in self.mARList.keys():
self.estimate_ar_models_for_cycle(cycle_residue);
for autoreg in self.mARList[cycle_residue]:
autoreg.mARFrame = pd.DataFrame();
autoreg.mARFrame = pd.DataFrame(index = self.mCycleFrame.index);
del self.mARFrame;
Loading