From 60e220c4312ced63de1354b607a365484c8a74e3 Mon Sep 17 00:00:00 2001 From: Antoine CARME Date: Sat, 19 Mar 2022 16:43:17 +0100 Subject: [PATCH 01/13] Internal Datframes index refactoring #190 --- pyaf/TS/Exogenous.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pyaf/TS/Exogenous.py b/pyaf/TS/Exogenous.py index dd22feb80..89fd0e006 100644 --- a/pyaf/TS/Exogenous.py +++ b/pyaf/TS/Exogenous.py @@ -91,9 +91,9 @@ def transformDataset(self, df): return df1; def createEncodedExogenous(self): - self.mExogDummiesDataFrame = pd.DataFrame(); + self.mExogDummiesDataFrame = pd.DataFrame(index = self.mExogenousDataFrame.index); self.mEncodedExogenous = []; - self.mEncodedExogenousDataFrame = pd.DataFrame(); + self.mEncodedExogenousDataFrame = pd.DataFrame(index = self.mExogenousDataFrame.index); self.mEncodedExogenousDataFrame[self.mDateVariable] = self.mExogenousDataFrame[self.mDateVariable]; for exog in self.mExogenousVariables: if(exog not in self.mExcluded): From 4dcac9954082e97a90cca33bc6edc3333fac127d Mon Sep 17 00:00:00 2001 From: Antoine CARME Date: Sat, 19 Mar 2022 16:44:04 +0100 Subject: [PATCH 02/13] Internal Datframes index refactoring #190 --- pyaf/TS/Perf.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyaf/TS/Perf.py b/pyaf/TS/Perf.py index 666d55cbb..ce9b6183d 100644 --- a/pyaf/TS/Perf.py +++ b/pyaf/TS/Perf.py @@ -79,7 +79,7 @@ def compute_LnQ(self, signal , estimator): def dump_perf_data(self, signal , estimator): logger = tsutil.get_pyaf_logger(); - df = pd.DataFrame(); + df = pd.DataFrame(index = signal.index); df['sig'] = signal.values; df['est'] = estimator.values; logger.debug(str(df.head())); From 6671a60187986ffd6cbc520663bdbdd59fc485b2 Mon Sep 17 00:00:00 2001 From: Antoine CARME Date: Sat, 19 Mar 2022 16:44:31 +0100 Subject: [PATCH 03/13] Internal Datframes index refactoring #190 --- pyaf/TS/PredictionIntervals.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyaf/TS/PredictionIntervals.py b/pyaf/TS/PredictionIntervals.py index e222369be..cac6e17cb 100644 --- a/pyaf/TS/PredictionIntervals.py +++ b/pyaf/TS/PredictionIntervals.py @@ -16,7 +16,7 @@ class cPredictionIntervalsEstimator: def __init__(self): self.mModel = None; - self.mSignalFrame = pd.DataFrame() + self.mSignalFrame = None self.mHorizon = -1; self.mFitPerformances = {} self.mForecastPerformances = {} From f1c9670310c3609cc2943fb85561183fce0f7284 Mon Sep 17 00:00:00 2001 From: Antoine CARME Date: Sat, 19 Mar 2022 16:44:58 +0100 Subject: [PATCH 04/13] Internal Datframes index refactoring #190 --- pyaf/TS/TimeSeriesModel.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyaf/TS/TimeSeriesModel.py b/pyaf/TS/TimeSeriesModel.py index 15618840d..b5093b4b3 100644 --- a/pyaf/TS/TimeSeriesModel.py +++ b/pyaf/TS/TimeSeriesModel.py @@ -63,7 +63,7 @@ def getComplexity(self): return lComplexity; def updatePerfs(self, compute_all_indicators = False): - self.mModelFrame = pd.DataFrame(); + self.mModelFrame = pd.DataFrame(index = self.mTrend.mSignalFrame.index); lSignal = self.mTrend.mSignalFrame[self.mSignal] N = lSignal.shape[0]; self.mTrend.mTimeInfo.addVars(self.mModelFrame); From 32e2cfe54d0ca1cab0a659f893917908013e6fe3 Mon Sep 17 00:00:00 2001 From: Antoine CARME Date: Sat, 19 Mar 2022 16:45:23 +0100 Subject: [PATCH 05/13] Internal Datframes index refactoring #190 --- pyaf/TS/Time.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyaf/TS/Time.py b/pyaf/TS/Time.py index 5fce8584e..c92028d33 100644 --- a/pyaf/TS/Time.py +++ b/pyaf/TS/Time.py @@ -16,7 +16,7 @@ class cTimeInfo: # class data def __init__(self): - self.mSignalFrame = pd.DataFrame() + self.mSignalFrame = None self.mTimeMin = None; self.mTimeMax = None; self.mTimeMinMaxDiff = None; From 0d994cddc2f713d0c0949340780742d200553eb0 Mon Sep 17 00:00:00 2001 From: Antoine CARME Date: Sat, 19 Mar 2022 16:45:51 +0100 Subject: [PATCH 06/13] Internal Datframes index refactoring #190 --- pyaf/TS/SignalDecomposition_Trend.py | 26 +++++++++++--------------- 1 file changed, 11 insertions(+), 15 deletions(-) diff --git a/pyaf/TS/SignalDecomposition_Trend.py b/pyaf/TS/SignalDecomposition_Trend.py index d924b4a0b..8b428ce76 100644 --- a/pyaf/TS/SignalDecomposition_Trend.py +++ b/pyaf/TS/SignalDecomposition_Trend.py @@ -13,17 +13,13 @@ from . import Plots as tsplot from . import Utils as tsutil -import sklearn as skl -import sklearn.preprocessing as preprocessing import sklearn.linear_model as linear_model -from sklearn.feature_selection import RFE -from sklearn.linear_model import LinearRegression class cAbstractTrend: def __init__(self): - self.mSignalFrame = pd.DataFrame() + self.mSignalFrame = None self.mTimeInfo = tsti.cTimeInfo() - self.mTrendFrame = pd.DataFrame() + self.mTrendFrame = None self.mTrendPerf = tsperf.cPerf(); self.mOutName = "" self.mFormula = None; @@ -77,7 +73,7 @@ def addTrendInputVariables(self): self.mTime = self.mTimeInfo.mTime; self.mSignal = self.mTimeInfo.mSignal; self.mOutName = self.mSignal + "_" + self.mOutName; - self.mTrendFrame = pd.DataFrame() + self.mTrendFrame = pd.DataFrame(index = self.mTimeInfo.mSignalFrame.index) self.mTimeInfo.addVars(self.mTrendFrame); def transformDataset(self, df): @@ -114,7 +110,7 @@ def addTrendInputVariables(self): self.mTime = self.mTimeInfo.mTime; self.mSignal = self.mTimeInfo.mSignal; self.mOutName = self.mSignal + "_" + self.mOutName; - self.mTrendFrame = pd.DataFrame() + self.mTrendFrame = pd.DataFrame(index = self.mTimeInfo.mSignalFrame.index) self.mTimeInfo.addVars(self.mTrendFrame); def replaceFirstMissingValue(self, df, series): @@ -162,7 +158,7 @@ def addTrendInputVariables(self): self.mTime = self.mTimeInfo.mTime; self.mSignal = self.mTimeInfo.mSignal; self.mOutName = self.mSignal + "_" + self.mOutName; - self.mTrendFrame = pd.DataFrame() + self.mTrendFrame = pd.DataFrame(index = self.mTimeInfo.mSignalFrame.index) self.mTimeInfo.addVars(self.mTrendFrame); def fit(self): @@ -201,7 +197,7 @@ def addTrendInputVariables(self): self.mTime = self.mTimeInfo.mTime; self.mSignal = self.mTimeInfo.mSignal; self.mOutName = self.mSignal + "_" + self.mOutName; - self.mTrendFrame = pd.DataFrame() + self.mTrendFrame = pd.DataFrame(index = self.mTimeInfo.mSignalFrame.index) self.mTimeInfo.addVars(self.mTrendFrame); def fit(self): @@ -240,7 +236,7 @@ def addTrendInputVariables(self): self.mTime = self.mTimeInfo.mTime; self.mSignal = self.mTimeInfo.mSignal; self.mOutName = self.mSignal + "_" + self.mOutName; - self.mTrendFrame = pd.DataFrame() + self.mTrendFrame = pd.DataFrame(index = self.mTimeInfo.mSignalFrame.index) self.mTimeInfo.addVars(self.mTrendFrame); def fit(self): @@ -285,7 +281,7 @@ def addTrendInputVariables(self): self.mTime = self.mTimeInfo.mTime; self.mSignal = self.mTimeInfo.mSignal; self.mOutName = self.mSignal + "_" + self.mOutName; - self.mTrendFrame = pd.DataFrame() + self.mTrendFrame = pd.DataFrame(index = self.mTimeInfo.mSignalFrame.index) self.mTimeInfo.addVars(self.mTrendFrame); self.mTrendFrame[self.mTimeInfo.mNormalizedTimeColumn + "_^2"] = self.mTrendFrame[self.mTimeInfo.mNormalizedTimeColumn] ** 2; self.mTrendFrame[self.mTimeInfo.mNormalizedTimeColumn + "_^3"] = self.mTrendFrame[self.mTimeInfo.mNormalizedTimeColumn] ** 3; @@ -337,9 +333,9 @@ def dump_values(self): class cTrendEstimator: def __init__(self): - self.mSignalFrame = pd.DataFrame() + self.mSignalFrame = None self.mTimeInfo = tsti.cTimeInfo() - self.mTrendFrame = pd.DataFrame() + self.mTrendFrame = None def needMovingTrend(self, df, i): @@ -407,7 +403,7 @@ def check_residue(self , trend, sig, name): def estimateTrends(self): self.mTime = self.mTimeInfo.mTime; self.mSignal = self.mTimeInfo.mSignal; - self.mTrendFrame = pd.DataFrame() + self.mTrendFrame = pd.DataFrame(index = self.mSignalFrame.index) self.mTimeInfo.addVars(self.mTrendFrame); for trend in self.mTrendList: trend.mOptions = self.mOptions From 38db26a5d5c054810fbc3e1706b6ae18d6907fea Mon Sep 17 00:00:00 2001 From: Antoine CARME Date: Sat, 19 Mar 2022 16:46:23 +0100 Subject: [PATCH 07/13] Internal Datframes index refactoring #190 --- pyaf/TS/SignalDecomposition_Cycle.py | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/pyaf/TS/SignalDecomposition_Cycle.py b/pyaf/TS/SignalDecomposition_Cycle.py index 850bf3956..a54ffbc83 100644 --- a/pyaf/TS/SignalDecomposition_Cycle.py +++ b/pyaf/TS/SignalDecomposition_Cycle.py @@ -20,8 +20,8 @@ class cAbstractCycle: def __init__(self , trend): self.mTimeInfo = tsti.cTimeInfo() - self.mTrendFrame = pd.DataFrame() - self.mCycleFrame = pd.DataFrame() + self.mTrendFrame = None + self.mCycleFrame = None self.mTrend = trend; self.mTrend_residue_name = self.mTrend.mOutName + '_residue' self.mFormula = None; @@ -241,7 +241,7 @@ def transformDataset(self, df): class cBestCycleForTrend(cAbstractCycle): def __init__(self , trend, criterion): super().__init__(trend); - self.mCycleFrame = pd.DataFrame() + self.mCycleFrame = None self.mCyclePerfByLength = {} self.mBestCycleValueDict = {} self.mBestCycleLength = None @@ -294,7 +294,7 @@ def generate_cycles(self): lMaxRobustCycleLength = self.mTrendFrame.shape[0]//12; # print("MAX_ROBUST_CYCLE_LENGTH", self.mTrendFrame.shape[0], lMaxRobustCycleLength); lCycleLengths = self.mOptions.mCycleLengths or range(2,lMaxRobustCycleLength + 1) - lCycleFrame = pd.DataFrame(); + lCycleFrame = pd.DataFrame(index = self.mTrendFrame.index); lCycleFrame[self.mTrend_residue_name ] = self.mTrendFrame[self.mTrend_residue_name] for lLength in lCycleLengths: if ((lLength > 1) and (lLength <= lMaxRobustCycleLength)): @@ -359,8 +359,8 @@ class cCycleEstimator: def __init__(self): self.mTimeInfo = tsti.cTimeInfo() - self.mTrendFrame = pd.DataFrame() - self.mCycleFrame = pd.DataFrame() + self.mTrendFrame = None + self.mCycleFrame = None self.mCycleList = {} def addSeasonal(self, trend, seas_type, resolution): @@ -414,6 +414,7 @@ def defineCycles(self): self.mCycleList[trend] = [cZeroCycle(trend)]; for cycle in self.mCycleList[trend]: cycle.mTrendFrame = self.mTrendFrame; + cycle.mCycleFrame = pd.DataFrame(index = self.mTrendFrame.index) cycle.mTimeInfo = self.mTimeInfo; cycle.mSplit = self.mSplit; cycle.mOptions = self.mOptions; @@ -444,6 +445,7 @@ def dumpCyclePerf(self, cycle): def estimateCycles(self): self.mTime = self.mTimeInfo.mTime; self.mSignal = self.mTimeInfo.mSignal; + self.mCycleFrame = pd.DataFrame(index = self.mTrendFrame.index); self.mTimeInfo.addVars(self.mCycleFrame); for trend in self.mTrendList: lTrend_residue_name = trend.mOutName + '_residue' From fbfe062379bb0dc8526a4ca8a0c03a777ac682f6 Mon Sep 17 00:00:00 2001 From: Antoine CARME Date: Sat, 19 Mar 2022 16:46:59 +0100 Subject: [PATCH 08/13] Internal Datframes index refactoring #190 --- pyaf/TS/Signal_Transformation.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pyaf/TS/Signal_Transformation.py b/pyaf/TS/Signal_Transformation.py index fad1a5e3a..e4f6cd732 100644 --- a/pyaf/TS/Signal_Transformation.py +++ b/pyaf/TS/Signal_Transformation.py @@ -10,7 +10,7 @@ from . import Utils as tsutil def testTransform_one_seed(tr1 , seed_value): - df = pd.DataFrame(); + df = pd.DataFrame(index = None); np.random.seed(seed_value) df['A'] = np.random.normal(0, 1.0, 10); # df['A'] = range(1, 6000); @@ -135,7 +135,7 @@ def test(self): pass def dump_apply_invert(self, df_before_apply, df_after_apply): - df = pd.DataFrame(); + df = pd.DataFrame(index = None); df['before_apply'] = df_before_apply; df['after_apply'] = df_after_apply; print("dump_apply_invert_head", df.head()); @@ -541,7 +541,7 @@ def create_tranformation(iName , arg): class cTransformationEstimator: def __init__(self): - self.mSignalFrame = pd.DataFrame() + self.mSignalFrame = None self.mTransformList = {} def validateTransformation(self , transf , df, iTime, iSignal): From 9b341c9fe3e49aa8458c31c6e1d3cd0d36e641d4 Mon Sep 17 00:00:00 2001 From: Antoine CARME Date: Sat, 19 Mar 2022 16:47:26 +0100 Subject: [PATCH 09/13] Internal Datframes index refactoring #190 --- pyaf/TS/SignalDecomposition.py | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/pyaf/TS/SignalDecomposition.py b/pyaf/TS/SignalDecomposition.py index decbf268d..16d7e0416 100644 --- a/pyaf/TS/SignalDecomposition.py +++ b/pyaf/TS/SignalDecomposition.py @@ -43,18 +43,13 @@ def sample_signal_if_needed(iInputDS, iOptions): class cSignalDecompositionOneTransform: def __init__(self): - self.mSignalFrame = pd.DataFrame() + self.mSignalFrame = None self.mTime = None self.mSignal = None self.mTimeInfo = tsti.cTimeInfo(); - self.mForecastFrame = pd.DataFrame() self.mTransformation = tstransf.cSignalTransform_None(); - - - def serialize(self): - from sklearn.externals import joblib - joblib.dump(self, self.mTimeInfo.mTime + "_" + self.mSignal + "_TS.pkl") + def setParams(self , iInputDS, iTime, iSignal, iHorizon, iTransformation, iDecomspositionType, iExogenousData = None): assert(iInputDS.shape[0] > 0) From ef227ac2d2460eabe3bf6d0ea9b642cd715d897b Mon Sep 17 00:00:00 2001 From: Antoine CARME Date: Sat, 19 Mar 2022 16:47:49 +0100 Subject: [PATCH 10/13] Internal Datframes index refactoring #190 --- pyaf/TS/Intermittent_Models.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyaf/TS/Intermittent_Models.py b/pyaf/TS/Intermittent_Models.py index a5b63de9c..6e71f8a47 100644 --- a/pyaf/TS/Intermittent_Models.py +++ b/pyaf/TS/Intermittent_Models.py @@ -127,7 +127,7 @@ def fit(self): self.mSignal = self.mTimeInfo.mSignal; lAREstimFrame = self.mSplit.getEstimPart(self.mARFrame) self.mOffset = lAREstimFrame[self.mCycleResidueName].min() - print("OFFSET", (self.mCycleResidueName, self.mOffset)) + # print("OFFSET", (self.mCycleResidueName, self.mOffset)) self.estimate_alpha(lAREstimFrame) self.mFeatureSelector = None; self.mInputNamesAfterSelection = self.mInputNames; From 8f2ce2a4706bea09ffbc25810659df01a3cb20f4 Mon Sep 17 00:00:00 2001 From: Antoine CARME Date: Sat, 19 Mar 2022 16:48:13 +0100 Subject: [PATCH 11/13] Internal Datframes index refactoring #190 --- pyaf/TS/Keras_Models.py | 1 - 1 file changed, 1 deletion(-) diff --git a/pyaf/TS/Keras_Models.py b/pyaf/TS/Keras_Models.py index 718c341bd..55becce14 100644 --- a/pyaf/TS/Keras_Models.py +++ b/pyaf/TS/Keras_Models.py @@ -1,6 +1,5 @@ import numpy as np import pandas as pd -# from sklearn.preprocessing import StandardScaler, MinMaxScaler from . import SignalDecomposition_AR as tsar import sys From d1442bbc7b818e1966e81454215b04145facda2e Mon Sep 17 00:00:00 2001 From: Antoine CARME Date: Sat, 19 Mar 2022 16:49:35 +0100 Subject: [PATCH 12/13] Internal Datframes index refactoring #190 Lag generation process optimization #191 --- pyaf/TS/Scikit_Models.py | 20 +---- pyaf/TS/SignalDecomposition_AR.py | 142 +++++++++++++++--------------- 2 files changed, 73 insertions(+), 89 deletions(-) diff --git a/pyaf/TS/Scikit_Models.py b/pyaf/TS/Scikit_Models.py index 941cdc9ce..abc3a86ec 100644 --- a/pyaf/TS/Scikit_Models.py +++ b/pyaf/TS/Scikit_Models.py @@ -12,6 +12,7 @@ def __init__(self , cycle_residue_name, P , iExogenousInfo = None): self.mNbLags = P; self.mNbExogenousLags = P; self.mScikitModel = None; + self.mFeatureSelector = None def dumpCoefficients(self, iMax=10): # print(self.mScikitModel.__dict__); @@ -24,11 +25,6 @@ def set_name(self): assert(0); - def is_used(self, name): - if(self.mFeatureSelector): - return (name in self.mInputNamesAfterSelection) - return True - def fit(self): # print("ESTIMATE_SCIKIT_MODEL_START" , self.mCycleResidueName); @@ -40,12 +36,12 @@ def fit(self): self.mSignal = self.mTimeInfo.mSignal; lAREstimFrame = self.mSplit.getEstimPart(self.mARFrame) - # print("mAREstimFrame columns :" , self.mAREstimFrame.columns); lARInputs = lAREstimFrame[self.mInputNames].values lARTarget = lAREstimFrame[series].values # print(len(self.mInputNames), lARInputs.shape , lARTarget.shape) assert(lARInputs.shape[1] > 0); assert(lARTarget.shape[0] > 0); + assert(lARInputs.shape[1] == len(self.mInputNames)) from sklearn.feature_selection import SelectKBest from sklearn.feature_selection import f_regression @@ -69,9 +65,9 @@ def fit(self): if(self.mFeatureSelector): lARInputsAfterSelection = self.mFeatureSelector.transform(lARInputs); - # print(self.mInputNames , self.mFeatureSelector.get_support(indices=True)); lSupport = self.mFeatureSelector.get_support(indices=True); self.mInputNamesAfterSelection = [self.mInputNames[k] for k in lSupport]; + else: lARInputsAfterSelection = lARInputs; self.mInputNamesAfterSelection = self.mInputNames; @@ -115,16 +111,8 @@ def transformDataset(self, df, horizon_index = 1): series = self.mCycleResidueName; if(self.mExogenousInfo is not None): df = self.mExogenousInfo.transformDataset(df); - # print(df.columns); - # print(df.info()); - # print(df.head()); - # print(df.tail()); lag_df = self.generateLagsForForecast(df); - # print(self.mInputNames); - # print(self.mFormula, "\n", lag_df.columns); - # lag_df.to_csv("LAGGED_ " + str(self.mNbLags) + ".csv"); - # print(len(list(lag_df.columns)) , len(self.mInputNamesAfterSelection)) - inputs_after_feat_selection = lag_df.values[:,1:] # the first column is the signal + inputs_after_feat_selection = lag_df[self.mInputNamesAfterSelection].values # inputs_after_feat_selection = self.mFeatureSelector.transform(inputs) if self.mFeatureSelector else inputs; if(self.mScikitModel is not None): pred = self.mScikitModel.predict(inputs_after_feat_selection) diff --git a/pyaf/TS/SignalDecomposition_AR.py b/pyaf/TS/SignalDecomposition_AR.py index dcd90e17f..f1e89c25b 100644 --- a/pyaf/TS/SignalDecomposition_AR.py +++ b/pyaf/TS/SignalDecomposition_AR.py @@ -21,8 +21,8 @@ class cAbstractAR: def __init__(self , cycle_residue_name, iExogenousInfo = None): self.mTimeInfo = tsti.cTimeInfo() - self.mCycleFrame = pd.DataFrame() - self.mARFrame = pd.DataFrame() + self.mCycleFrame = None + self.mARFrame = None self.mCycleResidueName = cycle_residue_name self.mCycle = None self.mTrend = None @@ -57,6 +57,7 @@ def plot(self): def register_lag(self, series, p): name = series+'_Lag' + str(p); # print("register_lag", (series , p , name)) + assert(name not in self.mInputNames) self.mInputNames.append(name); self.mLagsForSeries[series] = self.mLagsForSeries.get(series , []) self.mLagsForSeries[series].append(p) @@ -79,37 +80,29 @@ def shift_series(self, series, p, idefault): first_values = np.full((p), idefault, dtype=lType) new_values = np.hstack((first_values, series.values[0:N-p])); new_values = new_values.astype(lType) - return new_values; - + return new_values + def getDefaultValue(self, series): return self.mDefaultValues[series]; - def is_used(self, name): - return True - - def addLagForForecast(self, df, lag_df, series, p): - name = series+'_Lag' + str(p); - if(not self.is_used(name)): - return - assert(p in self.mLagsForSeries[series]) - lSeries = df[series]; - lShiftedSeries = self.shift_series(lSeries, p , self.mDefaultValues[series]); - lag_df[name] = lShiftedSeries; - def generateLagsForForecast(self, df): - lag_df = pd.DataFrame() - lag_df[self.mCycleResidueName] = df[self.mCycleResidueName].reset_index(drop=True) + lDict = {} + # lDict[self.mCycleResidueName] = df[self.mCycleResidueName] + series = self.mCycleResidueName + lSeries = df[self.mCycleResidueName] for p in self.mLagsForSeries[self.mCycleResidueName]: - # signal lags ... plain old AR model - self.addLagForForecast(df, lag_df, self.mCycleResidueName, p); + name = series +'_Lag' + str(p); + lShiftedSeries = self.shift_series(lSeries, p , self.mDefaultValues[series]); + lDict[name] = lShiftedSeries # Exogenous variables lags if(self.mExogenousInfo is not None): - # print(self.mExogenousInfo.mEncodedExogenous); - # print(df.columns); for ex in self.mExogenousInfo.mEncodedExogenous: if(self.mLagsForSeries.get(ex)): for p in self.mLagsForSeries[ex]: - self.addLagForForecast(df, lag_df, ex, p); + name = ex +'_Lag' + str(p); + lShiftedSeries = self.shift_series(df[ex], p , self.mDefaultValues[ex]); + lDict[name] = lShiftedSeries + lag_df = pd.DataFrame(lDict, index = df.index, dtype = lSeries.dtype) return lag_df; @@ -151,8 +144,8 @@ def transformDataset(self, df, horizon_index = 1): class cAutoRegressiveEstimator: def __init__(self): self.mTimeInfo = tsti.cTimeInfo() - self.mCycleFrame = pd.DataFrame() - self.mARFrame = pd.DataFrame() + self.mCycleFrame = None + self.mARFrame = None self.mARList = {} self.mExogenousInfo = None; @@ -176,58 +169,61 @@ def shift_series(self, series, p): first_values = np.full((p), series.values[0], dtype=lType) new_values = np.hstack((first_values, series.values[0:N-p])); new_values = new_values.astype(lType) - return new_values; - - def addLagForTraining(self, df, lag_df, series, autoreg, p): - name = series+'_Lag' + str(p); - if(name in lag_df.columns): - autoreg.register_lag(series, p); - return lag_df; + return new_values + def generateLagsForTraining(self, df, series, pMinMax): + (pmin, pmax) = pMinMax lSeries = df[series]; - lShiftedSeries = self.shift_series(lSeries, p); self.mDefaultValues[series] = lSeries.values[0]; - - lShiftedEstim = self.mSplit.getEstimPart(lShiftedSeries); - lAcceptable = self.is_not_constant(lShiftedEstim); - if(lAcceptable): - autoreg.register_lag(series, p); - lag_df[name] = lShiftedSeries; - self.mLagOrigins[name] = series; - return lag_df; - - def addLagsForTraining(self, df, cycle_residue, iNeedExogenous = False): + lDict = {} + lags = [] + for p in range(pmin, pmax+1): + name = series+'_Lag' + str(p) + lShiftedSeries = self.shift_series(lSeries, p) + lShiftedEstim = self.mSplit.getEstimPart(lShiftedSeries); + lAcceptable = self.is_not_constant(lShiftedEstim); + if(lAcceptable): + lDict[name] = lShiftedSeries + lags.append((series, p)) + lag_df = pd.DataFrame(lDict, index = df.index, dtype = lSeries.dtype) + return (lag_df, lags) + + def addLagsForTraining(self, df, cycle_residue): logger = tsutil.get_pyaf_logger(); add_lag_start_time = time.time() + P = self.get_nb_lags(); + lag_df, lags = self.generateLagsForTraining(df, cycle_residue, (1, P)); + lag_dfs = [lag_df] for autoreg in self.mARList[cycle_residue]: - P = autoreg.mNbLags; - for p in range(1,P+1): - # signal lags ... plain old AR model - self.addLagForTraining(df, self.mARFrame, cycle_residue, autoreg, p); - # Avoid dataframe fragemntation. - self.mARFrame = self.mARFrame.copy() - - # Exogenous variables lags - if(autoreg.mExogenousInfo is not None): - P1 = P; - lExogCount = len(autoreg.mExogenousInfo.mEncodedExogenous); - lNbVars = P * lExogCount; - if(lNbVars >= self.mOptions.mMaxFeatureForAutoreg): - P1 = self.mOptions.mMaxFeatureForAutoreg // lExogCount; - autoreg.mNbExogenousLags = P1; - for ex in autoreg.mExogenousInfo.mEncodedExogenous: - for p in range(1,P1+1): - # print(autoreg.mExogenousInfo.mEncodedExogenous); - # print(df.columns); - self.addLagForTraining(df, self.mARFrame, ex, autoreg, p); - # Avoid dataframe fragemntation. - self.mARFrame = self.mARFrame.copy() + for lag in lags: + (name , p) = lag + autoreg.register_lag(name, p) + + # Exogenous variables lags + if(self.mExogenousInfo is not None): + P1 = P; + lExogCount = len(self.mExogenousInfo.mEncodedExogenous); + lNbVars = P * lExogCount; + if(lNbVars >= self.mOptions.mMaxFeatureForAutoreg): + P1 = self.mOptions.mMaxFeatureForAutoreg // lExogCount; + autoreg.mNbExogenousLags = P1; + for ex in self.mExogenousInfo.mEncodedExogenous: + (lag_df, lags_ex) = self.generateLagsForTraining(df, ex, (1, P1)); + lag_dfs = lag_dfs + [lag_df] + for autoreg in self.mARList[cycle_residue]: + if(autoreg.mExogenousInfo is not None): # ARX,XGBX, ... only + for lag in lags_ex: + (name , p) = lag + autoreg.register_lag(name, p) # print("AUTOREG_DETAIL" , P , len(autoreg.mInputNames)); if(autoreg.mExogenousInfo is not None): assert((P + P*len(autoreg.mExogenousInfo.mEncodedExogenous)) >= len(autoreg.mInputNames)); else: assert(P >= len(autoreg.mInputNames)); + + self.mARFrame = pd.concat([self.mARFrame] + lag_dfs, axis = 1) + if(self.mOptions.mDebugProfile): logger.info("LAG_TIME_IN_SECONDS " + self.mTimeInfo.mSignal + " " + str(len(self.mARFrame.columns)) + " " + @@ -237,13 +233,12 @@ def addLagsForTraining(self, df, cycle_residue, iNeedExogenous = False): # @profile def estimate_ar_models_for_cycle(self, cycle_residue): logger = tsutil.get_pyaf_logger(); - self.mARFrame = pd.DataFrame(); + self.mARFrame = pd.DataFrame(index = self.mCycleFrame.index); self.mTimeInfo.addVars(self.mARFrame); self.mCycleFrame[cycle_residue] = self.mCycleFrame[cycle_residue] self.mARFrame[cycle_residue] = self.mCycleFrame[cycle_residue] self.mDefaultValues = {}; - self.mLagOrigins = {}; if(self.mOptions.mDebugProfile): logger.info("AR_MODEL_ADD_LAGS_START '" + @@ -278,7 +273,6 @@ def estimate_ar_models_for_cycle(self, cycle_residue): autoreg.mARFrame = self.mARFrame autoreg.mTimeInfo = self.mTimeInfo; autoreg.mSplit = self.mSplit; - autoreg.mLagOrigins = self.mLagOrigins; autoreg.mDefaultValues = self.mDefaultValues; autoreg.mDecompositionType = self.mDecompositionType autoreg.fit(); @@ -300,7 +294,11 @@ def check_not_nan(self, sig , name): pass - + def get_nb_lags(self): + lLags = self.mCycleFrame.shape[0] // 4; + if(lLags >= self.mOptions.mMaxAROrder): + lLags = self.mOptions.mMaxAROrder; + return lLags # @profile def estimate(self): @@ -319,9 +317,7 @@ def estimate(self): self.mARList[cycle_residue] = []; if(self.mOptions.mActiveAutoRegressions['NoAR']): self.mARList[cycle_residue] = [ cZeroAR(cycle_residue)]; - lLags = self.mCycleFrame[cycle_residue].shape[0] // 4; - if(lLags >= self.mOptions.mMaxAROrder): - lLags = self.mOptions.mMaxAROrder; + lLags = self.get_nb_lags() lKeep = (self.mCycleFrame[cycle_residue].shape[0] > 12) and (self.mCycleFrame[cycle_residue].std() > 0.00001) if(not lKeep): logger.info("SKIPPING_AR_MODELS_WITH_LOW_VARIANCE_CYCLE_RESIDUE '" + cycle_residue + "'"); @@ -408,5 +404,5 @@ def estimate(self): for cycle_residue in self.mARList.keys(): self.estimate_ar_models_for_cycle(cycle_residue); for autoreg in self.mARList[cycle_residue]: - autoreg.mARFrame = pd.DataFrame(); + autoreg.mARFrame = pd.DataFrame(index = self.mCycleFrame.index); del self.mARFrame; From 6bc4281539f413e087e47cd822fddb4b65c02f0f Mon Sep 17 00:00:00 2001 From: Antoine CARME Date: Sat, 19 Mar 2022 16:50:07 +0100 Subject: [PATCH 13/13] Updated this test --- .../transformations/test_ozone_transf_relative_difference_1.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/transformations/test_ozone_transf_relative_difference_1.py b/tests/transformations/test_ozone_transf_relative_difference_1.py index f52774515..b236fcf7f 100644 --- a/tests/transformations/test_ozone_transf_relative_difference_1.py +++ b/tests/transformations/test_ozone_transf_relative_difference_1.py @@ -20,7 +20,7 @@ def create_df(): def test_transformation(itransformation): df = create_df(); - df.to_csv('a.csv') + # df.to_csv('a.csv') lEngine = autof.cForecastEngine() lEngine