From a473673dbf69013534ef69730e124fcb1ab740f9 Mon Sep 17 00:00:00 2001 From: wendycwong Date: Mon, 21 Oct 2024 10:26:11 -0700 Subject: [PATCH] GH-8487: remove standardize from HGLM as the convention does not do standardization. --- h2o-algos/src/main/java/hex/hglm/HGLM.java | 13 +- .../src/main/java/hex/hglm/HGLMModel.java | 53 +-- .../src/main/java/hex/hglm/HGLMScore.java | 2 +- .../src/main/java/hex/hglm/HGLMTask.java | 4 +- .../src/main/java/hex/hglm/HGLMUtils.java | 134 -------- .../main/java/hex/hglm/MetricBuilderHGLM.java | 4 +- .../main/java/hex/schemas/HGLMModelV3.java | 77 +---- .../src/main/java/hex/schemas/HGLMV3.java | 5 - .../test/java/hex/glm/GLMCheckpointTest.java | 2 + .../src/test/java/hex/hglm/HGLMBasicTest.java | 307 +++--------------- h2o-bindings/bin/custom/R/gen_hglm.py | 19 -- h2o-bindings/bin/custom/python/gen_hglm.py | 17 +- .../java/hex/ModelMetricsRegressionHGLM.java | 28 -- h2o-py/h2o/estimators/hglm.py | 34 -- .../pyunit_utils/utils_for_glm_hglm_tests.py | 69 ---- ...7_1p5_noise_var_scoring_history_summary.py | 4 +- ...487_2_noise_var_scoring_history_summary.py | 4 +- ..._noise_variance_scoring_history_summary.py | 4 +- .../hglm/pyunit_GH_8487_coefficients_check.py | 23 +- ..._noise_variance_scoring_history_summary.py | 2 +- ...87_p5_noise_var_scoring_history_summary.py | 2 +- h2o-r/h2o-package/R/hglm.R | 26 -- h2o-r/h2o-package/R/models.R | 4 +- h2o-r/h2o-package/pkgdown/_pkgdown.yml | 2 - h2o-r/tests/testdir_algos/gam/runit_gam.R | 3 +- .../hglm/runit_GH_8487_HGLM_testAgainstR1.R | 6 +- .../hglm/runit_GH_8487_coefs_check.R | 3 +- 27 files changed, 99 insertions(+), 752 deletions(-) diff --git a/h2o-algos/src/main/java/hex/hglm/HGLM.java b/h2o-algos/src/main/java/hex/hglm/HGLM.java index 465982b332f0..7888242bca1b 100644 --- a/h2o-algos/src/main/java/hex/hglm/HGLM.java +++ b/h2o-algos/src/main/java/hex/hglm/HGLM.java @@ -183,12 +183,6 @@ else if (!trainFrame.vec(_parms._group_column).isCategorical()) if (_parms._tau_e_var_init <= 0) error("tau_e_var_init", "If gen_syn_data is true, tau_e_var_init must be > 0."); } - - if (!_parms._random_intercept && _parms._standardize) - warn("random_intercept and standardize", - "If random_intercept is false and standardize is true, model building process can be unstable" + - " due to the denormalization process which can create singular T matrix. If encounter singlar" + - " T matrix problem, set standardize to false in this case to ensure model building can finish."); } } @@ -214,8 +208,8 @@ public void computeImpl() { * 3. Set modelOutput fields. */ // _dinfo._adaptedFrame will contain group_column. Check and make sure clients will pass that along as well. - _dinfo = new DataInfo(_train.clone(), null, 1, _parms._use_all_factor_levels, _parms._standardize ? - DataInfo.TransformType.STANDARDIZE : DataInfo.TransformType.NONE, DataInfo.TransformType.NONE, + _dinfo = new DataInfo(_train.clone(), null, 1, _parms._use_all_factor_levels, + DataInfo.TransformType.NONE, DataInfo.TransformType.NONE, _parms.missingValuesHandling() == Skip, _parms.missingValuesHandling() == MeanImputation || _parms.missingValuesHandling() == PlugValues, @@ -293,7 +287,6 @@ void fitEM(HGLMModel model, Job job, ScoringHistory scTrain, ScoringHistory scVa if (_parms._showFixedMatVecs) model._output.setModelOutputFixMatVec(engineTask); _state = new ComputationStateHGLM(_job, _parms, _dinfo, engineTask, iteration); - generateNonStandardizeZTZArjTArs(_parms, model); // generate not standardized transpose(Z)*Z, transpose(Zj)*Zj try { if (_parms._max_iterations > 0) { // grab current value of fixed beta, tauEVar, tauUVar @@ -360,7 +353,7 @@ public boolean progress(double[] beta, double[][] ubeta, double[][] tmat, double if (_parms.valid() != null) scoreAndUpdateModel(model, false, scValid); } else { - // calculate log likelihood with current parameter settings, standardize if parms._standardize and vice versa + // calculate log likelihood with current parameter settings double logLikelihood = calHGLMllg(_state._nobs, tmat, tauEVarE10, model._output._arjtarj, rLlh._sse_fixed, rLlh._yMinusXTimesZ); scTrain.addIterationScore(_state._iter, logLikelihood, tauEVarE10); diff --git a/h2o-algos/src/main/java/hex/hglm/HGLMModel.java b/h2o-algos/src/main/java/hex/hglm/HGLMModel.java index a827caec626a..d612293a20ee 100644 --- a/h2o-algos/src/main/java/hex/hglm/HGLMModel.java +++ b/h2o-algos/src/main/java/hex/hglm/HGLMModel.java @@ -61,7 +61,7 @@ protected PredictScoreResult predictScoreImpl(Frame fr, Frame adaptFrm, String d if (gs._computeMetrics) { // only calculate log-likelihood, mse and other metrics if _computeMetrics mb = gs._mb; if (forTraining) { - _output._yminusxtimesz_score = gs._yMinusXTimesZ; + _output._yminusxtimesz = gs._yMinusXTimesZ; _output._yMinusfixPredSquare = mb._yMinusfixPredSquare; } else { // store for all frames other than the training frame _output._yminusxtimesz_valid = gs._yMinusXTimesZ; @@ -88,7 +88,6 @@ private HGLMScore makeScoringTask(Frame adaptFrm, boolean makePredictions, Job j public static class HGLMParameters extends Model.Parameters { public long _seed = -1; public GLMModel.GLMParameters.Family _family; - public boolean _standardize = false; public int _max_iterations = -1; public double[] _initial_fixed_effects; // initial values of fixed coefficients public Key _initial_random_effects; // frame key that contains the initial starting values of random coefficient effects @@ -174,13 +173,10 @@ public static class HGLMModelOutput extends Model.Output { final GLMModel.GLMParameters.Family _random_family; public String[] _fixed_coefficient_names; // include intercept only if _parms._intercept is true public String[] _random_coefficient_names; // include intercept only if _parms._random_intercept = true - public String[] _random_coefficient_names_normalized; public String[] _group_column_names; public long _training_time_ms; - public double[] _beta; // fixed coefficients, not normalized - public double[][] _ubeta; // random coefficients, not normalized - public double[] _beta_normalized; - public double[][] _ubeta_normalized; + public double[] _beta; // fixed coefficients + public double[][] _ubeta; // random coefficients public double[][] _tmat; // calculated with non-standardize random effects coefficients double _tauUVar; public double _tau_e_var; @@ -191,14 +187,10 @@ public static class HGLMModelOutput extends Model.Output { public double[][][] _arjtarj; public double[][][] _afjtarj; //public double[][] _zttimesz; // calculate from standardized or non-standardized Zj - public double[][][] _arjtarj_score; // used during scoring for metrics calculation. Not standardized - // public double[][] _zttimesz_score; // used during scoring for metrics calculation. Not standardized public double[][] _yminusxtimesz; // generate during training - public double[][] _yminusxtimesz_score; // generate during scoring public double[][] _yminusxtimesz_valid; // store same value for frames other than training frame public int _num_fixed_coeffs; public int _num_random_coeffs; - public int _num_random_coeffs_normalized; int[] _randomCatIndices; int[] _randomNumIndices; int[] _randomCatArrayStartIndices; @@ -217,7 +209,6 @@ public static class HGLMModelOutput extends Model.Output { public double _yMinusfixPredSquare; public double _yMinusfixPredSquare_valid; public TwoDimTable _scoring_history_valid; - public double _mse_fixed; // mse with fixed effect only /** * For debugging only. Copy over the generated fixed matrices to model._output. @@ -250,13 +241,13 @@ public HGLMModelOutput(HGLM b, DataInfo dinfo) { _family = b._parms._family; _random_family = b._parms._random_family; } - + public void setModelOutputFields(ComputationStateHGLM state) { _fixed_coefficient_names = state.get_fixedCofficientNames(); _random_coefficient_names = state.get_randomCoefficientNames(); _group_column_names = state.get_groupColumnNames(); _tauUVar = state.get_tauUVar(); - // _tau_e_var = state.get_tauEVarE17(); + // _tau_e_var = state.get_tauEVarE17(); _tau_e_var = state.get_tauEVarE10(); _tmat = state.get_T(); _num_fixed_coeffs = state.get_numFixedCoeffs(); @@ -264,41 +255,11 @@ public void setModelOutputFields(ComputationStateHGLM state) { _numLevel2Units = state.get_numLevel2Units(); _level2UnitIndex = state.get_level2UnitIndex(); _nobs = state._nobs; - if (state._parms._standardize) { // for random coefficients, the names of random coefficients names may change - _beta_normalized = state.get_beta(); - _ubeta_normalized = state.get_ubeta(); - _beta = denormalizedOneBeta(_beta_normalized, _fixed_coefficient_names, _dinfo._adaptedFrame.names(), - state._parms.train(), true); - _ubeta = denormalizedUBeta(_ubeta_normalized, _random_coefficient_names, state._parms._random_columns, - state._parms.train(), state._parms._random_intercept); - _random_coefficient_names_normalized = _random_coefficient_names.clone(); - if (_ubeta_normalized[0].length < _ubeta[0].length) // added intercept term, need to add name to random coeff names - _random_coefficient_names = copyCoefAddIntercept(_random_coefficient_names_normalized); - _tmat = generateNewTmat(_ubeta); - } else { - _beta = state.get_beta(); - _beta_normalized = normalizedOneBeta(_beta, _fixed_coefficient_names, _dinfo._adaptedFrame.names(), - state._parms.train(), true); - _ubeta = state.get_ubeta(); - _ubeta_normalized = normalizedUBeta(_ubeta, _random_coefficient_names, state._parms._random_columns, - state._parms.train(), state._parms._random_intercept); - if (_ubeta[0].length == _ubeta_normalized[0].length) - _random_coefficient_names_normalized = _random_coefficient_names; - else - _random_coefficient_names_normalized = copyCoefAddIntercept(_random_coefficient_names); - } - _num_random_coeffs_normalized = _ubeta_normalized[0].length; + _beta = state.get_beta(); + _ubeta = state.get_ubeta(); _num_random_coeffs = _ubeta[0].length; _iterations = state._iter; } - - public static String[] copyCoefAddIntercept(String[] originalNames) { - int nameLen = originalNames.length; - String[] longerNames = new String[nameLen+1]; - System.arraycopy(originalNames, 0, longerNames, 0, nameLen); - longerNames[nameLen] = "intercept"; - return longerNames; - } @Override public int nclasses() { // only support Gaussian now diff --git a/h2o-algos/src/main/java/hex/hglm/HGLMScore.java b/h2o-algos/src/main/java/hex/hglm/HGLMScore.java index a8c4a56394f4..b1dc7faec689 100644 --- a/h2o-algos/src/main/java/hex/hglm/HGLMScore.java +++ b/h2o-algos/src/main/java/hex/hglm/HGLMScore.java @@ -65,7 +65,7 @@ public HGLMScore(final Job j, final HGLMModel model, DataInfo dinfo, final Strin _randomCatArrayStartIndices = model._output._randomCatArrayStartIndices; _predStartIndexRandom = model._output._predStartIndexRandom; _randomSlopeToo = model._output._randomSlopeToo; - _randomIntercept = _parms._random_intercept || (_parms._standardize); + _randomIntercept = _parms._random_intercept; _tmat = model._output._tmat; // generated from non-standardized random coefficients randomObj = new Random(_parms._seed); _noiseStd = Math.sqrt(_parms._tau_e_var_init); // not affected by standardization/normalization diff --git a/h2o-algos/src/main/java/hex/hglm/HGLMTask.java b/h2o-algos/src/main/java/hex/hglm/HGLMTask.java index 4954e766a78d..0c54f24fad33 100644 --- a/h2o-algos/src/main/java/hex/hglm/HGLMTask.java +++ b/h2o-algos/src/main/java/hex/hglm/HGLMTask.java @@ -80,7 +80,7 @@ public void map(Chunk[] chks) { double residualFixed; DataInfo.Row r = _dinfo.newDenseRow(); for (int rowInd = 0; rowInd < chkLen; rowInd++) { - _dinfo.extractDenseRow(chks, rowInd, r); // numerical values are standardized automatically if standardize=true + _dinfo.extractDenseRow(chks, rowInd, r); if (!r.isBad() && !(r.weight == 0)) { y = r.response(0); level2Index = _parms._use_all_factor_levels ? r.binIds[_level2UnitIndex] - _dinfo._catOffsets[_level2UnitIndex] : @@ -266,7 +266,7 @@ public void map(Chunk[] chks) { int chkLen = chks[0].len(); DataInfo.Row r = _dinfo.newDenseRow(); for (int rowInd = 0; rowInd < chkLen; rowInd++) { - _dinfo.extractDenseRow(chks, rowInd, r); // numerical values are standardized automatically if standardize=true + _dinfo.extractDenseRow(chks, rowInd, r); if (!r.isBad() && !(r.weight == 0)) { y = r.response(0); _YjTYjSum += y * y; diff --git a/h2o-algos/src/main/java/hex/hglm/HGLMUtils.java b/h2o-algos/src/main/java/hex/hglm/HGLMUtils.java index 628b6602af49..5de8e398e423 100644 --- a/h2o-algos/src/main/java/hex/hglm/HGLMUtils.java +++ b/h2o-algos/src/main/java/hex/hglm/HGLMUtils.java @@ -1,19 +1,13 @@ package hex.hglm; import Jama.Matrix; -import hex.DataInfo; import water.DKV; import water.Key; import water.fvec.Frame; import water.util.ArrayUtils; import java.util.Arrays; -import java.util.HashMap; -import java.util.List; -import java.util.Map; -import java.util.stream.Collectors; -import static hex.glm.GLMModel.GLMParameters.MissingValuesHandling.*; import static water.util.ArrayUtils.*; public class HGLMUtils { @@ -137,106 +131,6 @@ public static double calTauEvarEq17(double residualSquare, double tauEVar, doubl double sigmaTrace = tauEVar * trace(cInvArjTArj) ; return (residualSquare + sigmaTrace)*oneOverN; } - - public static double[] denormalizedOneBeta(double[] beta, String[] coeffNames, String[] colNames, - Frame train, boolean interceptPresent) { - int numRandomCoeff = beta.length; - Map coefMean = new HashMap<>(); - Map coefStd = new HashMap<>(); - List randomColList = Arrays.stream(colNames).collect(Collectors.toList()); - genMeanStd(coeffNames, randomColList, train, coefMean, coefStd); - int interceptIndex = interceptPresent ? numRandomCoeff - 1 : numRandomCoeff; - double[] denormalizedUBeta = new double[interceptIndex + 1]; - if (interceptPresent) - denormalizedUBeta[interceptIndex] = beta[interceptIndex]; - - String coefName; - for (int coefInd = 0; coefInd < numRandomCoeff; coefInd++) { - coefName = coeffNames[coefInd]; - if (randomColList.contains(coefName)) { // pick out the numerical columns - denormalizedUBeta[coefInd] = beta[coefInd] / coefStd.get(coefName); - denormalizedUBeta[interceptIndex] -= beta[coefInd] * coefMean.get(coefName) / coefStd.get(coefName); - } else if (coefName != "intercept") { - denormalizedUBeta[coefInd] = beta[coefInd]; - } - } - return denormalizedUBeta; - } - - public static double[][] denormalizedUBeta(double[][] ubeta, String[] randomCoeffNames, String[] randomColNames, - Frame train, boolean randomIntercept) { - int numLevel2 = ubeta.length; - double[][] denormalizedBeta = new double[numLevel2][]; - boolean onlyEnumRandomCols = randomColAllEnum(train, randomColNames); - for (int index=0; index colNamesList = Arrays.stream(columnNames).collect(Collectors.toList()); - Map coefMean = new HashMap<>(); - Map coefStd = new HashMap<>(); - genMeanStd(coeffNames, colNamesList, train, coefMean, coefStd); - - if (interceptPresent) - normalizedBeta[interceptIndex] = beta[interceptIndex]; - - String coefName; - for (int coefInd=0; coefInd < numCoeff; coefInd++) { - coefName = coeffNames[coefInd]; - if (colNamesList.contains(coefName)) { // pick out numerical columns - normalizedBeta[coefInd] = beta[coefInd] * coefStd.get(coefName); - normalizedBeta[interceptIndex] += normalizedBeta[coefInd] * coefMean.get(coefName)/coefStd.get(coefName); - } else if (coefName != "intercept"){ // no change to enum columns - normalizedBeta[coefInd] = beta[coefInd]; - } - } - return normalizedBeta; - } - - /** - * Normalize ubeta, intercept is always the last one - */ - public static double[][] normalizedUBeta(double[][] ubeta, String[] randomCoeffNames, String[] randomColNames, - Frame train, boolean randomIntercept) { - int numLevel2 = ubeta.length; - double[][] normalizedUBeta = new double[numLevel2][]; - boolean onlyEnumRandomCols = randomColAllEnum(train, randomColNames); - for (int index=0; index randomColNames, Frame train, - Map coefMean, Map coefSTD) { - int numCoeff = randomCoeffNames.length; - String coefName; - double colMean; - double colStd; - for (int index=0; index= 0; } - public static void generateNonStandardizeZTZArjTArs(HGLMModel.HGLMParameters parms, HGLMModel model) { - if (parms._standardize) { - boolean orignalRandomIntercept = parms._random_intercept; - parms._random_intercept = parms._random_intercept || !randomColAllEnum(parms.train(), parms._random_columns); - List colNames = Arrays.asList(parms.train().names()); - boolean hasWeights = model._parms._weights_column != null && colNames.contains(model._parms._weights_column); - boolean hasOffsets = model._parms._offset_column != null && colNames.contains(model._parms._offset_column); - DataInfo dinfo = new DataInfo(parms.train().clone(), null, 1, parms._use_all_factor_levels, - DataInfo.TransformType.NONE, DataInfo.TransformType.NONE, - parms.missingValuesHandling() == Skip, parms.missingValuesHandling() == MeanImputation - || parms.missingValuesHandling() == PlugValues, parms.makeImputer(), false, hasWeights, - hasOffsets, false, null); - HGLMTask.ComputationEngineTask engineTask = new HGLMTask.ComputationEngineTask(null, parms, dinfo); - engineTask.doAll(dinfo._adaptedFrame); - model._output._arjtarj_score = engineTask._ArjTArj; - // model._output._zttimesz_score = engineTask._zTTimesZ; - parms._random_intercept = orignalRandomIntercept; - } else { - model._output._arjtarj_score = model._output._arjtarj; - // model._output._zttimesz_score = model._output._zttimesz; - } - } - public static double[][] generateNewTmat(double[][] ubeta) { int numIndex2 = ubeta.length; double oneOverJ = 1.0/numIndex2; @@ -292,9 +163,4 @@ public static double[][] generateNewTmat(double[][] ubeta) { mult(newTmat, oneOverJ); return newTmat; } - - public static boolean randomColAllEnum(Frame train, String[] randomColumns) { - int numRandCols = randomColumns.length; - return Arrays.stream(randomColumns).filter(x -> train.vec(x).isCategorical()).count() == numRandCols; - } } diff --git a/h2o-algos/src/main/java/hex/hglm/MetricBuilderHGLM.java b/h2o-algos/src/main/java/hex/hglm/MetricBuilderHGLM.java index b8341d00b87b..00efc9d7c2c6 100644 --- a/h2o-algos/src/main/java/hex/hglm/MetricBuilderHGLM.java +++ b/h2o-algos/src/main/java/hex/hglm/MetricBuilderHGLM.java @@ -77,8 +77,8 @@ public ModelMetrics makeModelMetrics(Model m, Frame f, Frame adaptedFrame, Frame double[][] tmat = hglmM._output._tmat; // already set with non-standardized random coefficients if (forTraining) { - double loglikelihood = calHGLMllg(metricsRegression._nobs, tmat, hglmM._output._tau_e_var, hglmM._output._arjtarj_score, - this._yMinusfixPredSquare, hglmM._output._yminusxtimesz_score); + double loglikelihood = calHGLMllg(metricsRegression._nobs, tmat, hglmM._output._tau_e_var, hglmM._output._arjtarj, + this._yMinusfixPredSquare, hglmM._output._yminusxtimesz); mm = new ModelMetricsRegressionHGLM(m, f, metricsRegression._nobs, this.weightedSigma(), loglikelihood, this._customMetric, hglmM._output._iterations, hglmM._output._beta, hglmM._output._ubeta, tmat, hglmM._output._tau_e_var, metricsRegression._MSE, this._yMinusfixPredSquare / metricsRegression._nobs, diff --git a/h2o-algos/src/main/java/hex/schemas/HGLMModelV3.java b/h2o-algos/src/main/java/hex/schemas/HGLMModelV3.java index 1e81286b6ebe..ad8c585752fd 100644 --- a/h2o-algos/src/main/java/hex/schemas/HGLMModelV3.java +++ b/h2o-algos/src/main/java/hex/schemas/HGLMModelV3.java @@ -34,23 +34,14 @@ public static final class HGLMModelOutputV3 extends ModelOutputSchemaV3 randomCoeffNamesLen) { // model building, added intercept to coeffs_normalized, extend coeffs - coeffNamesused = coeffNamesNormalized; - coeffsUsed = addInterceptValue(coeffs); - coeffsNormalizedUsed = coeffs_normalized; - } else if (randomCoefNameLenNorm < randomCoeffNamesLen) { // model building with standardization, added intercept to coeffs, extend coeffsNormalized - coeffNamesused = coeffNames; - coeffsUsed = coeffs; - coeffsNormalizedUsed = addInterceptValue(coeffs_normalized); - } else { - coeffNamesused = coeffNames; - coeffsUsed = coeffs; - coeffsNormalizedUsed = coeffs_normalized; - } + coeffNamesused = coeffNames; + coeffsUsed = coeffs; + double[] fCoeffValues = flattenArray(coeffsUsed); - double[] fCoeffValuesNormalized = flattenArray(coeffsNormalizedUsed); String[] fCoeffNames = extendCoeffNames(coeffNamesused, numLevel2Index); String[] fLevel2Vals = extendLevel2Ind(level2Domain, coeffsUsed[0].length); - - String[] colnames = new String[] {"coefficient names", "coefficients", "standardized_coefficients"}; - String[] colFormats = new String[] {"%s", "%.5f", "%.5f"}; - String[] colTypes = new String[] {"string", "double", "double"}; + + String[] colnames = new String[]{"coefficient names", "coefficients"}; + String[] colFormats = new String[]{"%s", "%.5f"}; + String[] colTypes = new String[]{"string", "double"}; TwoDimTable tdt = new TwoDimTable(title1, title2, fLevel2Vals, colnames, colTypes, colFormats, "names"); int tableLen = fCoeffNames.length; - for (int index=0; index rowValues = new ArrayList<>(); int catVal; for (String enumName : enumPredNames) { @@ -535,10 +521,7 @@ public double[] grabRow2Arrays(String[] enumPredNames, String[] numPredNames, bo } for (String numName:numPredNames) { double val = fr.vec(numName).at(rowInd); - if (standardize) - rowValues.add((val - fr.vec(numName).mean())/fr.vec(numName).sigma()); - else - rowValues.add(val); + rowValues.add(val); } if (hasIntercept) @@ -567,11 +550,11 @@ public void testMatVecFormation() { params._random_columns = new String[]{"num2", "num1", "enum3", "enum2"}; HGLMModel model2 = new HGLM(params).trainModel().get(); Scope.track_generic(model2); - checkDoubleArrays(model._output._afjtyj, model2._output._afjtyj, 1e-6); - checkDoubleArrays(model._output._arjtyj, model2._output._arjtyj, 1e-6); - check3DArrays(model._output._afjtafj, model2._output._afjtafj, 1e-6); - check3DArrays(model._output._afjtarj, model2._output._afjtarj, 1e-6); - check3DArrays(model._output._arjtarj, model2._output._arjtarj, 1e-6); + checkDoubleArrays(model._output._afjtyj, model2._output._afjtyj, TOL); + checkDoubleArrays(model._output._arjtyj, model2._output._arjtyj, TOL); + check3DArrays(model._output._afjtafj, model2._output._afjtafj, TOL); + check3DArrays(model._output._afjtarj, model2._output._afjtarj, TOL); + check3DArrays(model._output._arjtarj, model2._output._arjtarj, TOL); } finally { Scope.exit(); } @@ -629,7 +612,6 @@ public void testSetInitBetasTvar() { params._initial_fixed_effects = initBetas; params._initial_t_matrix = tMat._key; params._initial_random_effects = ubetaFrame._key; - params._standardize = false; params._tau_e_var_init = sigmaEpsilon; params._max_iterations = 0; HGLMModel model = new HGLM(params).trainModel().get(); @@ -642,20 +624,20 @@ public void testSetInitBetasTvar() { public void checkCorrectInitValue(HGLMModel model, double[] initBetas, Frame ubetaFrame, Frame tMat, double sigmaEpsilon) { // check fixed coefficient initialization - checkArrays(initBetas, model._output._beta, 1e-6); + checkArrays(initBetas, model._output._beta, TOL); // check random coefficient initialization double[][] ubetaInit = new double[(int) ubetaFrame.numRows()][(int) ubetaFrame.numCols()]; final ArrayUtils.FrameToArray f2a = new ArrayUtils.FrameToArray(0, ubetaInit[0].length-1, ubetaInit.length, ubetaInit); ubetaInit = f2a.doAll(ubetaFrame).getArray(); - checkDoubleArrays(ubetaInit, model._output._ubeta, 1e-6); + checkDoubleArrays(ubetaInit, model._output._ubeta, TOL); // check T matrix initialization double[][] tMatInit = new double[tMat.numCols()][tMat.numCols()]; final ArrayUtils.FrameToArray f2a2 = new ArrayUtils.FrameToArray(0, tMat.numCols()-1, tMatInit.length, tMatInit); tMatInit = f2a2.doAll(tMat).getArray(); - checkDoubleArrays(tMatInit, model._output._tmat, 1e-6); + checkDoubleArrays(tMatInit, model._output._tmat, TOL); // check sigma epsilon initializaiton - assertEquals(sigmaEpsilon, model._output._tau_e_var, 1e-6); + assertEquals(sigmaEpsilon, model._output._tau_e_var, TOL); } /** @@ -678,7 +660,6 @@ public void testSetInitT() { params._group_column = "RACE"; params._use_all_factor_levels = true; params._random_columns = new String[]{"GLEASON", "DPROS", "DCAPS"}; - params._standardize = false; params._tau_u_var_init = sigmaU; params._max_iterations = 0; HGLMModel model = new HGLM(params).trainModel().get(); @@ -693,191 +674,7 @@ public void checkCorrectTMat(HGLMModel model, double sigmaU) { double[][] correctTMat = new double[model._output._tmat.length][model._output._tmat.length]; for (int ind=0; ind=1. A value of 0 is only set when only the model coefficient names and model coefficient dimensions are needed. Defaults to ``-1``. @@ -185,7 +181,6 @@ def __init__(self, self.plug_values = plug_values self.family = family self.rand_family = rand_family - self.standardize = standardize self.max_iterations = max_iterations self.initial_fixed_effects = initial_fixed_effects self.initial_random_effects = initial_random_effects @@ -427,20 +422,6 @@ def rand_family(self, rand_family): assert_is_type(rand_family, None, Enum("gaussian")) self._parms["rand_family"] = rand_family - @property - def standardize(self): - """ - Standardize numeric columns to have zero mean and unit variance. - - Type: ``bool``, defaults to ``False``. - """ - return self._parms.get("standardize") - - @standardize.setter - def standardize(self, standardize): - assert_is_type(standardize, None, bool) - self._parms["standardize"] = standardize - @property def max_iterations(self): """ @@ -640,13 +621,6 @@ def coefs_random_names(self): """ return self._model_json["output"]["random_coefficient_names"] - def coefs_random_names_norm(self): - """ - Get the random effect coefficient names including the intercept if applicable for normalized/standardized - random effect coefficients. - """ - return self._model_json["output"]["random_coefficient_names_normalized"] - def coefs_random(self): """ Get the random coefficients of the model. @@ -655,14 +629,6 @@ def coefs_random(self): random_coefs = self._model_json["output"]["ubeta"] return dict(zip(level_2_names, random_coefs)) - def coefs_random_norm(self): - """ - Get the normalized/standardized random coefficients of the model. - """ - level_2_names = self.level_2_names() - random_coefs = self._model_json["output"]["ubeta_normalized"] - return dict(zip(level_2_names, random_coefs)) - def scoring_history_valid(self, as_data_frame=True): """ Retrieve Model Score History for validation data frame if present diff --git a/h2o-py/tests/pyunit_utils/utils_for_glm_hglm_tests.py b/h2o-py/tests/pyunit_utils/utils_for_glm_hglm_tests.py index d5439952aeeb..d855cfc254d9 100644 --- a/h2o-py/tests/pyunit_utils/utils_for_glm_hglm_tests.py +++ b/h2o-py/tests/pyunit_utils/utils_for_glm_hglm_tests.py @@ -147,30 +147,6 @@ def find_model_iterations(glm_model): iteration_index = glm_model._model_json["output"]["model_summary"].col_header.index("number_of_iterations") return cell_values[lengths-1][iteration_index] -def normalize_denormalize_random_coefs(random_coefs, random_coefs_names, level_2_names, numerical_cols, training_frame, normalize = True): - """ - Given a random effect coefficients dict, this method will standardize/normalize the coefficients - - :param random_coefs: python dict with random column names and a list of random coefficients for each level 2 index - :param random_coefs_names: python list of random coefficient name - :param level_2_names: python string list of level 2 values - :param numerical_cols: numerical columns of the frame - :param training_frame: h2o frame used to build the model - :return: python dict with random columns names and a list of normalized/standardized random coefficients - """ - normalized_coefs = dict() - # extract random coefficients for each level 2 value - for level2_val in level_2_names: - # extract dict for one level 2 value - dictLevel2 = extract_coef_dict(random_coefs, level2_val, random_coefs_names) - if normalize: - transform_one_coef = normalize_coefs(dictLevel2, numerical_cols, training_frame) - else: - transform_one_coef = denormalize_coefs(dictLevel2, numerical_cols, training_frame) - - add_to_random_coef_dict(normalized_coefs, transform_one_coef, level2_val, random_coefs_names) - return normalized_coefs - def add_to_random_coef_dict(normalized_coefs, normalized_one_coefs, level2_val, random_coefs_names): one_list = [] for one_name in random_coefs_names: @@ -184,51 +160,6 @@ def extract_coef_dict(random_coeffs, level2_name, random_coefs_names): random_coef_level2[cname] = random_coeffs[level2_name][index] index = index+1 return random_coef_level2 - - - -def normalize_coefs(coefs, numerical_cols, training_frame): - """ - Given a coefficient as a dict, the method will normalized/standardized the given coefficents and return it in another - dict. - - :param coefs: coefficients as a dict without normalization/standardization - :param numerical_cols: column names of numerical columns - :param training_frame: H2O frame used to train the model - :return: a python dict with normalized/standardized coefficients - """ - intercept_adjust = 0 - all_coefs_names = coefs.keys() - normalized_coefs = coefs.copy() - # only numerical coefficients are changed. - for cname in numerical_cols: - if cname in all_coefs_names: - cmean = training_frame[cname].mean()[0,0] - csigma = training_frame[cname].sd()[0] - normalized_coefs[cname] = coefs[cname] * csigma - intercept_adjust = intercept_adjust + normalized_coefs[cname]*cmean/csigma - if "intercept" in all_coefs_names: - normalized_coefs["intercept"] = coefs["intercept"]+intercept_adjust - else: - normalized_coefs["intercept"] = intercept_adjust - return normalized_coefs - -def denormalize_coefs(coefs_normalized, numerical_cols, training_frame): - intercept_adjust = 0 - all_coefs_names = coefs_normalized.keys() - denormalize_coefs = coefs_normalized.copy() - for cname in numerical_cols: - if cname in all_coefs_names: - cmean = training_frame[cname].mean()[0,0] - csigma = training_frame[cname].sd()[0] - denormalize_coefs[cname] = coefs_normalized[cname] / csigma - intercept_adjust = intercept_adjust - cmean * coefs_normalized[cname] / csigma - - if "intercept" in all_coefs_names: - denormalize_coefs["intercept"] = denormalize_coefs["intercept"] + intercept_adjust - else: - denormalize_coefs["intercept"] = intercept_adjust - return denormalize_coefs def compare_dicts_with_tupple(dict1, dict2, tolerance=1e-6): keys = dict1.keys() diff --git a/h2o-py/tests/testdir_algos/hglm/pyunit_GH_8487_1p5_noise_var_scoring_history_summary.py b/h2o-py/tests/testdir_algos/hglm/pyunit_GH_8487_1p5_noise_var_scoring_history_summary.py index 02279374e526..2e37c3798148 100644 --- a/h2o-py/tests/testdir_algos/hglm/pyunit_GH_8487_1p5_noise_var_scoring_history_summary.py +++ b/h2o-py/tests/testdir_algos/hglm/pyunit_GH_8487_1p5_noise_var_scoring_history_summary.py @@ -5,7 +5,7 @@ from h2o.estimators.hglm import H2OHGLMEstimator as hglm from tests.pyunit_utils import utils_for_glm_hglm_tests -# in this test, want to check the following with standardization and with random intercept: +# in this test, want to check the following with random intercept: # 1.scoring history (both training and valid) # 2. the model summary # 3. Fixed effect coefficients, normal and standardized @@ -20,7 +20,7 @@ def test_scoring_history_model_summary(): x.remove("C1") random_columns = ["C2", "C3", "C4"] hglm_model = hglm(random_columns=random_columns, group_column = "C1", score_each_iteration=True, seed=12345, - max_iterations = 20, random_intercept = False, standardize=False) + max_iterations = 20, random_intercept = False) hglm_model.train(x=x, y=y, training_frame=train, validation_frame=valid) # grab various metrics model_metrics = hglm_model.training_model_metrics() diff --git a/h2o-py/tests/testdir_algos/hglm/pyunit_GH_8487_2_noise_var_scoring_history_summary.py b/h2o-py/tests/testdir_algos/hglm/pyunit_GH_8487_2_noise_var_scoring_history_summary.py index 4ea4954f4697..0ff356a873bc 100644 --- a/h2o-py/tests/testdir_algos/hglm/pyunit_GH_8487_2_noise_var_scoring_history_summary.py +++ b/h2o-py/tests/testdir_algos/hglm/pyunit_GH_8487_2_noise_var_scoring_history_summary.py @@ -5,7 +5,7 @@ from h2o.estimators.hglm import H2OHGLMEstimator as hglm from tests.pyunit_utils import utils_for_glm_hglm_tests -# in this test, want to check the following with standardization and with random intercept: +# in this test, want to check the following with random intercept: # 1.scoring history (both training and valid) # 2. the model summary # 3. Fixed effect coefficients, normal and standardized @@ -20,7 +20,7 @@ def test_scoring_history_model_summary(): x.remove("C1") random_columns = ["C2", "C3", "C4"] hglm_model = hglm(random_columns=random_columns, group_column = "C1", score_each_iteration=True, seed=12345, - max_iterations = 20, random_intercept = False, standardize=False) + max_iterations = 20, random_intercept = False) hglm_model.train(x=x, y=y, training_frame=train, validation_frame=valid) # grab various metrics model_metrics = hglm_model.training_model_metrics() diff --git a/h2o-py/tests/testdir_algos/hglm/pyunit_GH_8487_3_noise_variance_scoring_history_summary.py b/h2o-py/tests/testdir_algos/hglm/pyunit_GH_8487_3_noise_variance_scoring_history_summary.py index 8c9bff043de2..552fc2750103 100644 --- a/h2o-py/tests/testdir_algos/hglm/pyunit_GH_8487_3_noise_variance_scoring_history_summary.py +++ b/h2o-py/tests/testdir_algos/hglm/pyunit_GH_8487_3_noise_variance_scoring_history_summary.py @@ -5,7 +5,7 @@ from h2o.estimators.hglm import H2OHGLMEstimator as hglm from tests.pyunit_utils import utils_for_glm_hglm_tests -# in this test, want to check the following with standardization and with random intercept: +# in this test, want to check the following with random intercept: # 1.scoring history (both training and valid) # 2. the model summary # 3. Fixed effect coefficients, normal and standardized @@ -20,7 +20,7 @@ def test_scoring_history_model_summary(): x.remove("C1") random_columns = ["C2", "C3", "C10", "C20"] hglm_model = hglm(random_columns=random_columns, group_column = "C1", score_each_iteration=True, seed=12345, - random_intercept = True, standardize = False, max_iterations=10) + random_intercept = True, max_iterations=10) hglm_model.train(x=x, y=y, training_frame=train, validation_frame=valid) print(hglm_model) # make sure this one works. # grab various metrics diff --git a/h2o-py/tests/testdir_algos/hglm/pyunit_GH_8487_coefficients_check.py b/h2o-py/tests/testdir_algos/hglm/pyunit_GH_8487_coefficients_check.py index a6589b29fede..f5f68353b02c 100644 --- a/h2o-py/tests/testdir_algos/hglm/pyunit_GH_8487_coefficients_check.py +++ b/h2o-py/tests/testdir_algos/hglm/pyunit_GH_8487_coefficients_check.py @@ -5,7 +5,7 @@ from h2o.estimators.hglm import H2OHGLMEstimator as hglm from tests.pyunit_utils import utils_for_glm_hglm_tests -# in this test, want to check the following with standardization and de-standardization and with random intercept: +# in this test, want to check to make sure we are getting our coefficients # 1. Fixed effect coefficients; # 2. Random effect coefficients. def test_scoring_history_model_summary(): @@ -17,15 +17,12 @@ def test_scoring_history_model_summary(): x.remove("C1") random_columns = ["C2", "C3", "C10", "C20"] hglm_model = hglm(random_columns=random_columns, group_column = "C1", score_each_iteration=True, seed=12345, - max_iterations=10, standardize=True) + max_iterations=10) hglm_model.train(x=x, y=y, training_frame=train, validation_frame=valid) # grab various metrics coef = hglm_model.coef() - coef_norm = hglm_model.coef_norm() coef_random = hglm_model.coefs_random() coef_random_names = hglm_model.coefs_random_names() - coef_random_norm = hglm_model.coefs_random_norm() - coef_random_names_norm = hglm_model.coefs_random_names_norm() residual_var = hglm_model.residual_variance() mse = hglm_model.mse() mse_fixed = hglm_model.mean_residual_fixed() @@ -41,17 +38,11 @@ def test_scoring_history_model_summary(): " not.".format(mse_fixed, mse) assert mse < mse_fixed_valid, "residual error with only fixed effects from validation frames {0} should exceed that" \ " of mse {1} but is not.".format(mse_fixed_valid, mse) - # check coefficients and normalized coefficients are converted correctly. - numerical_columns = ["C10", "C20", "C30", "C40", "C50"] - coef_norm_manually = utils_for_glm_hglm_tests.normalize_coefs(coef, numerical_columns, train) - pyunit_utils.assertCoefDictEqual(coef_norm, coef_norm_manually, 1e-6) - coef_manually = utils_for_glm_hglm_tests.denormalize_coefs(coef_norm, numerical_columns, train) - pyunit_utils.assertCoefDictEqual(coef, coef_manually, 1e-6) - # check random effect coefficients and normalized random effect coefficients are converted correctly. - random_coeffs_norm_manually = utils_for_glm_hglm_tests.normalize_denormalize_random_coefs(coef_random, coef_random_names, level2_names, numerical_columns, train, normalize=True) - random_coeffs_manually = utils_for_glm_hglm_tests.normalize_denormalize_random_coefs(coef_random_norm, coef_random_names_norm, level2_names, numerical_columns, train, normalize=False) - utils_for_glm_hglm_tests.compare_dicts_with_tupple(coef_random, random_coeffs_manually, tolerance=1e-6) - utils_for_glm_hglm_tests.compare_dicts_with_tupple(coef_random_norm, random_coeffs_norm_manually, tolerance=1e-6) + assert len(coef) == len(coef_random_names), "fixed coefficient length {0} should equal to random coefficient names" \ + " length: {1}".format(len(coef), len(coef_random_names)) + assert len(level2_names) == len(coef_random), \ + "expected random coefficient length: {0}, actual random coefficient names length " \ + "{1}".format(len(level2_names),len(coef_random)) if __name__ == "__main__": pyunit_utils.standalone_test(test_scoring_history_model_summary) diff --git a/h2o-py/tests/testdir_algos/hglm/pyunit_GH_8487_p05_noise_variance_scoring_history_summary.py b/h2o-py/tests/testdir_algos/hglm/pyunit_GH_8487_p05_noise_variance_scoring_history_summary.py index ab741897ac61..c23f2128cc5b 100644 --- a/h2o-py/tests/testdir_algos/hglm/pyunit_GH_8487_p05_noise_variance_scoring_history_summary.py +++ b/h2o-py/tests/testdir_algos/hglm/pyunit_GH_8487_p05_noise_variance_scoring_history_summary.py @@ -20,7 +20,7 @@ def test_scoring_history_model_summary(): x.remove("C1") random_columns = ["C10", "C20", "C30"] hglm_model = hglm(random_columns=random_columns, group_column="C1", score_each_iteration=True, seed=12345, - random_intercept=False, max_iterations=10, standardize=False, em_epsilon=0.000001) + random_intercept=False, max_iterations=10, em_epsilon=0.000001) hglm_model.train(x=x, y=y, training_frame=train, validation_frame=valid) print(hglm_model) # make sure this one works. # grab various metrics diff --git a/h2o-py/tests/testdir_algos/hglm/pyunit_GH_8487_p5_noise_var_scoring_history_summary.py b/h2o-py/tests/testdir_algos/hglm/pyunit_GH_8487_p5_noise_var_scoring_history_summary.py index e209b7b452ab..0aa576ac51bc 100644 --- a/h2o-py/tests/testdir_algos/hglm/pyunit_GH_8487_p5_noise_var_scoring_history_summary.py +++ b/h2o-py/tests/testdir_algos/hglm/pyunit_GH_8487_p5_noise_var_scoring_history_summary.py @@ -20,7 +20,7 @@ def test_scoring_history_model_summary(): x.remove("C1") random_columns = ["C2", "C3", "C4"] hglm_model = hglm(random_columns=random_columns, group_column = "C1", score_each_iteration=True, seed=12345, - max_iterations = 10, random_intercept = True, standardize=False) + max_iterations = 10, random_intercept = True) hglm_model.train(x=x, y=y, training_frame=train, validation_frame=valid) # grab various metrics model_metrics = hglm_model.training_model_metrics() diff --git a/h2o-r/h2o-package/R/hglm.R b/h2o-r/h2o-package/R/hglm.R index 2c37e12d0bef..854c57a46ccb 100644 --- a/h2o-r/h2o-package/R/hglm.R +++ b/h2o-r/h2o-package/R/hglm.R @@ -40,7 +40,6 @@ #' @param family Family. Only gaussian is supported now. Must be one of: "gaussian". Defaults to gaussian. #' @param rand_family rand_family. Set distribution of random effects. Only Gaussian is implemented now. Must be one of: #' "gaussian". -#' @param standardize \code{Logical}. Standardize numeric columns to have zero mean and unit variance. Defaults to FALSE. #' @param max_iterations Maximum number of iterations. Value should >=1. A value of 0 is only set when only the model coefficient #' names and model coefficient dimensions are needed. Defaults to -1. #' @param initial_fixed_effects An array that contains initial values of the fixed effects coefficient. @@ -94,7 +93,6 @@ h2o.hglm <- function(x, plug_values = NULL, family = c("gaussian"), rand_family = c("gaussian"), - standardize = FALSE, max_iterations = -1, initial_fixed_effects = NULL, initial_random_effects = NULL, @@ -165,8 +163,6 @@ h2o.hglm <- function(x, parms$family <- family if (!missing(rand_family)) parms$rand_family <- rand_family - if (!missing(standardize)) - parms$standardize <- standardize if (!missing(max_iterations)) parms$max_iterations <- max_iterations if (!missing(initial_fixed_effects)) @@ -214,7 +210,6 @@ h2o.hglm <- function(x, plug_values = NULL, family = c("gaussian"), rand_family = c("gaussian"), - standardize = FALSE, max_iterations = -1, initial_fixed_effects = NULL, initial_random_effects = NULL, @@ -290,8 +285,6 @@ h2o.hglm <- function(x, parms$family <- family if (!missing(rand_family)) parms$rand_family <- rand_family - if (!missing(standardize)) - parms$standardize <- standardize if (!missing(max_iterations)) parms$max_iterations <- max_iterations if (!missing(initial_fixed_effects)) @@ -340,15 +333,6 @@ h2o.coef_random <- function(model) { return(model@model$ubeta) } -#' Extracts the normalized/standardized random effects coefficients of an HGLM model. -#' -#' @param model is a H2O HGLM model. -#' @export -h2o.coef_random_norm <- function(model) { - if (is(model, "H2OModel") && (model@algorithm=="hglm")) - return(model@model$ubeta_normalized) -} - #' Extracts the group_column levels of an HGLM model. The group_column is usually referred to as level 2 predictor. #' #' @param model is a H2O HGLM model. @@ -367,16 +351,6 @@ h2o.coefs_random_names <- function(model) { return(model@model$random_coefficient_names) } -#' Extracts the coefficient names of normalized/standardized random effect coefficients. If no random intercept is -#' set, during the normalization/de-normalization process, an random intercept will be added. -#' -#' @param model is a H2O HGLM model. -#' @export -h2o.coefs_random_names_norm <- function(model) { - if (is(model, "H2OModel") && (model@algorithm=="hglm")) - return(model@model$random_coefficient_names_normalized) -} - #' Extracts scoring history of validation dataframe during training #' #' @param model is a H2O HGLM model. diff --git a/h2o-r/h2o-package/R/models.R b/h2o-r/h2o-package/R/models.R index dcd2429fdf38..764d262b100d 100755 --- a/h2o-r/h2o-package/R/models.R +++ b/h2o-r/h2o-package/R/models.R @@ -3104,7 +3104,7 @@ h2o.coef_names <- function(object) { #' @export h2o.coef_norm <- function(object, predictorSize=-1) { if (is(object, "H2OModel") && - (object@algorithm %in% c("glm", "gam", "coxph", "modelselection", "hglm"))) { + (object@algorithm %in% c("glm", "gam", "coxph", "modelselection"))) { if (object@algorithm == "modelselection") { if (object@allparameters$mode == "maxrsweep" && !object@allparameters$build_glm_model) { @@ -3174,7 +3174,7 @@ h2o.coef_norm <- function(object, predictorSize=-1) { ) } } else { - stop("Can only extract coefficients from GAMs/GLMs/HGLMs/CoxPHs/ModelSelections") + stop("Can only extract coefficients from GAMs/GLMs/CoxPHs/ModelSelections") } } diff --git a/h2o-r/h2o-package/pkgdown/_pkgdown.yml b/h2o-r/h2o-package/pkgdown/_pkgdown.yml index 341749c6db11..b4c42b3fa98b 100644 --- a/h2o-r/h2o-package/pkgdown/_pkgdown.yml +++ b/h2o-r/h2o-package/pkgdown/_pkgdown.yml @@ -76,8 +76,6 @@ reference: - h2o.coef_norm - h2o.coef_random - h2o.coefs_random_names - - h2o.coefs_random_names_norm - - h2o.coef_random_norm - h2o.coef_with_p_values - h2o.colnames - h2o.columns_by_type diff --git a/h2o-r/tests/testdir_algos/gam/runit_gam.R b/h2o-r/tests/testdir_algos/gam/runit_gam.R index 559ee4d81273..f006f9c061c9 100644 --- a/h2o-r/tests/testdir_algos/gam/runit_gam.R +++ b/h2o-r/tests/testdir_algos/gam/runit_gam.R @@ -2,7 +2,7 @@ setwd(normalizePath(dirname(R.utils::commandArgs(asValues=TRUE)$"f"))) source("../../../scripts/h2o-r-test-setup.R") test.model.gam <- function() { - data <- h2o.importFile('/Users/wendycwong/h2o-3/smalldata/testng/airlines_train.csv') + data <- h2o.importFile(path = locate('smalldata/testng/airlines_train.csv')) cols <- c("Distance") original_model <- h2o.gam(x=cols, y = "IsDepDelayed", gam_columns = cols, training_frame = data, family = "binomial") print(original_model) @@ -10,7 +10,6 @@ test.model.gam <- function() { prostate = h2o.importFile(locate("smalldata/prostate/prostate.csv")) prostate$CAPSULE <- as.factor(prostate$CAPSULE) - browser() h2o.gam(y = "CAPSULE", x = c("AGE","RACE","PSA","DCAPS"), gam_columns = c("AGE"), training_frame = prostate, family = "binomial", alpha = 0.5, lambda_search = FALSE) } diff --git a/h2o-r/tests/testdir_algos/hglm/runit_GH_8487_HGLM_testAgainstR1.R b/h2o-r/tests/testdir_algos/hglm/runit_GH_8487_HGLM_testAgainstR1.R index de6253cfa0a9..3bba2d1d5dce 100644 --- a/h2o-r/tests/testdir_algos/hglm/runit_GH_8487_HGLM_testAgainstR1.R +++ b/h2o-r/tests/testdir_algos/hglm/runit_GH_8487_HGLM_testAgainstR1.R @@ -14,9 +14,9 @@ test.HGLMData1 <- function() { z <- c(1) hglm_model <- h2o.hglm(x=xlist, y=yresp, training_frame=h2odata, random_columns=xlist, group_column="Device") - print(m11H2O) - summary(m11H2O) - modelMetrics = h2o.HGLMMetrics(m11H2O) # grab HGLM model metrics + print(hglm_model) + summary(hglm_model) + modelMetrics = h2o.HGLMMetrics(hglm_model) # grab HGLM model metrics # correct R outputs # rModelMetrics <- list(hlik = 363.6833, diff --git a/h2o-r/tests/testdir_algos/hglm/runit_GH_8487_coefs_check.R b/h2o-r/tests/testdir_algos/hglm/runit_GH_8487_coefs_check.R index 7cd424a4257c..7742f29fdf8a 100644 --- a/h2o-r/tests/testdir_algos/hglm/runit_GH_8487_coefs_check.R +++ b/h2o-r/tests/testdir_algos/hglm/runit_GH_8487_coefs_check.R @@ -5,7 +5,6 @@ source("../../../scripts/h2o-r-test-setup.R") ## test.HGLMData1 <- function() { - tol = 1e-4 h2odata <- h2o.importFile(locate("smalldata/hglm_test/gaussian_0GC_allRC_2enum2numeric_p5oise_p08T_wIntercept_standardize.gz")) yresp <- "response" @@ -13,7 +12,7 @@ test.HGLMData1 <- function() { group_column <- "C1" predictor <- c("C2","C3","C4","C5") hglm_model <- h2o.hglm(x=predictor, y=yresp, training_frame=h2odata, group_column=group_column, random_columns=random_columns, - seed=12345, max_iterations=10, em_epsilon=0.0000001, random_intercept=TRUE, standardize=FALSE) + seed=12345, max_iterations=10, em_epsilon=0.0000001, random_intercept=TRUE) coeff <- h2o.coef(hglm_model) coeff_random_effects <- h2o.coef_random(hglm_model) coeff_random_effects_norm <- h2o.coef_random_norm(hglm_model)