Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

GH-8487: implement HGLM gaussian [nocheck] #16403

Merged
merged 2 commits into from
Oct 29, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 1 addition & 19 deletions h2o-algos/src/main/java/hex/DataInfo.java
Original file line number Diff line number Diff line change
Expand Up @@ -373,7 +373,7 @@ public DataInfo validDinfo(Frame valid) {

public double[] denormalizeBeta(double [] beta) {
int N = fullN()+1;
assert (beta.length % N) == 0:"beta len = " + beta.length + " expected multiple of" + N;
assert (beta.length % N) == 0:"beta len = " + beta.length + " expected multiple of " + N;
int nclasses = beta.length/N;
beta = MemoryManager.arrayCopyOf(beta,beta.length);
if (_predictor_transform == DataInfo.TransformType.STANDARDIZE) {
Expand Down Expand Up @@ -1078,24 +1078,6 @@ public final double innerProduct(DataInfo.Row row) {
return res;
}

/***
* For HGLM, will perform multiplication of w*data part and not the random columns.
* @param w
* @param rowContent
* @param catOffsets
* @return
*/
public double[] scalarProduct(double w, double[] rowContent, int catOffsets) { // multiple a row with scaler w
rowContent[0] = w; // intercept term
for (int i = 0; i < nBins; ++i) {
rowContent[binIds[i]+1] = w; // value is absolute
}

for (int i = 0; i < numVals.length; ++i)
rowContent[i+catOffsets+1] += numVals[i]*w;

return rowContent;
}
public final double twoNormSq() {
assert !_intercept;
assert numIds == null;
Expand Down
2 changes: 1 addition & 1 deletion h2o-algos/src/main/java/hex/api/RegisterAlgos.java
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@

import hex.ModelBuilder;
import hex.anovaglm.ANOVAGLM;
import hex.modelselection.ModelSelection;
import hex.psvm.PSVM;
import hex.tree.TreeHandler;
import water.api.AlgoAbstractRegister;
Expand Down Expand Up @@ -39,6 +38,7 @@ public void registerEndPoints(RestApiContext context) {
new hex.modelselection.ModelSelection (true),
new hex.isotonic .IsotonicRegression(true),
new hex.tree.dt .DT (true),
new hex.hglm .HGLM (true),
new hex.adaboost. AdaBoost (true)
};

Expand Down
3 changes: 1 addition & 2 deletions h2o-algos/src/main/java/hex/gam/GAM.java
Original file line number Diff line number Diff line change
Expand Up @@ -46,8 +46,7 @@
import static hex.glm.GLMModel.GLMParameters.GLMType.gam;
import static hex.util.LinearAlgebraUtils.generateOrthogonalComplement;
import static hex.util.LinearAlgebraUtils.generateQR;
import static water.util.ArrayUtils.expandArray;
import static water.util.ArrayUtils.subtract;
import static water.util.ArrayUtils.*;


public class GAM extends ModelBuilder<GAMModel, GAMModel.GAMParameters, GAMModel.GAMModelOutput> {
Expand Down
23 changes: 0 additions & 23 deletions h2o-algos/src/main/java/hex/gam/MatrixFrameUtils/GamUtils.java
Original file line number Diff line number Diff line change
Expand Up @@ -139,29 +139,6 @@ else if (!name1ContainsResp && standarNContainsResp) // if name1 does not conta
return equalNames;
}

public static void copy2DArray(double[][] src_array, double[][] dest_array) {
int numRows = src_array.length;
for (int colIdx = 0; colIdx < numRows; colIdx++) { // save zMatrix for debugging purposes or later scoring on training dataset
System.arraycopy(src_array[colIdx], 0, dest_array[colIdx], 0,
src_array[colIdx].length);
}
}

// copy a square array
public static double[][] copy2DArray(double[][] src_array) {
double[][] dest_array = MemoryManager.malloc8d(src_array.length, src_array[0].length);
copy2DArray(src_array, dest_array);
return dest_array;
}

public static void copy2DArray(int[][] src_array, int[][] dest_array) {
int numRows = src_array.length;
for (int colIdx = 0; colIdx < numRows; colIdx++) { // save zMatrix for debugging purposes or later scoring on training dataset
System.arraycopy(src_array[colIdx], 0, dest_array[colIdx], 0,
src_array[colIdx].length);
}
}

public static void copyCVGLMtoGAMModel(GAMModel model, GLMModel glmModel, GAMParameters parms, String foldColumn) {
// copy over cross-validation metrics
model._output._cross_validation_metrics = glmModel._output._cross_validation_metrics;
Expand Down
102 changes: 0 additions & 102 deletions h2o-algos/src/main/java/hex/glm/ComputationState.java
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,6 @@
import water.H2ORuntime;
import water.Job;
import water.MemoryManager;
import water.fvec.Frame;
import water.util.ArrayUtils;
import water.util.IcedHashMap;
import water.util.Log;
Expand Down Expand Up @@ -49,7 +48,6 @@ public final class ComputationState {
private boolean _dispersionEstimated;
boolean _allIn;
int _iter;
int _iterHGLM_GLMMME; // keep track of iterations used in estimating fixed/random coefficients
private double _lambda = 0;
private double _lambdaMax = Double.NaN;
private GLMGradientInfo _ginfo; // gradient info excluding l1 penalty
Expand All @@ -73,15 +71,6 @@ public final class ComputationState {
ConstraintsGram[] _gramLess = null;
private final GLM.BetaInfo _modelBetaInfo;
private double[] _beta; // vector of coefficients corresponding to active data
private double[] _ubeta; // HGLM, store coefficients of random effects;
private double[] _psi; // HGLM, psi
private double[] _phi; // HGLM, size random columns levels
private double _tau; // HGLM for ei
private double _correction_HL; // HGLM
double[] _sumEtaSquareConvergence; // HGLM: sotre sumEtaSquare, convergence
double[] _likelihoodInfo; // HGLM: stores 4 elements: hlik, pvh, pbvh, cAIC
public String[] _randCoeffNames; // store random coefficient names
private Frame _priorw_wpsi; // weight calculated for psi
final DataInfo _dinfo;
private GLMGradientSolver _gslvr;
private final Job _job;
Expand All @@ -105,11 +94,6 @@ public ComputationState(Job job, GLMParameters parms, DataInfo dinfo, BetaConstr
_nbetas = bi._nBetas;
_betaLengthPerClass = dinfo.fullN()+1;
_totalBetaLength = _betaLengthPerClass * _nbetas;
if (_parms._HGLM) {
_sumEtaSquareConvergence = new double[2];
if (_parms._calc_like)
_likelihoodInfo = new double[4];
}
_modelBetaInfo = bi;
}

Expand Down Expand Up @@ -224,84 +208,14 @@ void copyCheckModel2State(GLMModel model, int[][] _gamColIndices) {
}
}
}

public void set_sumEtaSquareConvergence(double[] sumInfo) {
_sumEtaSquareConvergence = sumInfo;
}

/***
* Copy GLM coefficients stored in beta to _beta of computationState
* @param beta: store coefficients to be copied from
* @param startIdx: first index of beta to copy from
* @param len: length of coefficients to copy from beta
* @param interceptFirst: true if the first index of beta stored the intercept term
*/
public void set_beta_HGLM(double[] beta, int startIdx, int len, boolean interceptFirst) {
if (_beta==null)
_beta = new double[len];
if (interceptFirst) {
int lastIndex = len-1;
System.arraycopy(beta, startIdx+1, _beta, 0, lastIndex);
_beta[lastIndex] = beta[startIdx];
} else {
System.arraycopy(beta, startIdx, _beta, 0, len);
}
}

public void set_likelihoodInfo(double hlik, double pvh, double pbvh, double cAIC) {
_likelihoodInfo[0] = hlik;
_likelihoodInfo[1] = pvh;
_likelihoodInfo[2] = pbvh;
_likelihoodInfo[3] = cAIC;
}

public void set_ubeta_HGLM(double[] ubeta, int startIdx, int len) {
if (_ubeta==null)
_ubeta = new double[len];
System.arraycopy(ubeta, startIdx, _ubeta, 0, len);
}

public void setZValues(double[] zValues, boolean dispersionEstimated) {
_zValues = zValues;
_dispersionEstimated = dispersionEstimated;
}

public double[] get_psi() {
return _psi;
}

public double get_correction_HL() {
return _correction_HL;
}

public double[] get_phi() {
return _phi;
}

public Frame get_priorw_wpsi() {
return _priorw_wpsi;
}

public double get_tau() {
return _tau;
}

public boolean getLambdaNull() { return _lambdaNull; }

public void set_tau(double tau) {
_tau=tau;
}

public void set_psi(double[] psi) {
assert _psi.length==psi.length:"Length of _psi and psi should be the same.";
System.arraycopy(psi, 0, _psi, 0, psi.length);
}

public void set_phi(double[] phi) {
assert _phi.length==phi.length:"Length of _phi and phi should be the same.";
System.arraycopy(phi, 0, _phi, 0, phi.length);
}

public GLMGradientSolver gslvr(){return _gslvr;}
public double lambda(){return _lambda;}
public double alpha() {return _alpha;}
Expand Down Expand Up @@ -339,9 +253,6 @@ public void setLambda(double lambda) {
return betaMultinomial(_activeClass,_beta);
return _beta;
}
public double[] ubeta(){
return _ubeta; // could be null. Be careful
}
public GLMGradientInfo ginfo(){return _ginfo == null?(_ginfo = gslvr().getGradient(beta())):_ginfo;}
public BetaConstraint activeBC(){return _activeBC;}
public double likelihood() {return _likelihood;}
Expand Down Expand Up @@ -1060,19 +971,6 @@ protected void setIter(int iteration) {
protected void setActiveDataMultinomialNull() { _activeDataMultinomial = null; }
protected void setActiveDataNull() { _activeData = null; }
protected void setLambdaSimple(double lambda) { _lambda=lambda; }

protected void setHGLMComputationState(double [] beta, double[] ubeta, double[] psi, double[] phi,
double hlcorrection, double tau, Frame wpsi, String[] randCoeffNames){
_beta = Arrays.copyOf(beta, beta.length);
_ubeta = Arrays.copyOf(ubeta, ubeta.length);
_randCoeffNames = Arrays.copyOf(randCoeffNames, randCoeffNames.length);
_psi = Arrays.copyOf(psi, psi.length);
_phi = Arrays.copyOf(phi, phi.length);
_correction_HL = hlcorrection;
_tau = tau;
_priorw_wpsi = wpsi; // store prior_weight and calculated wpsi value for coefficients of random columns
_iterHGLM_GLMMME = 0;
}

public double [] expandBeta(double [] beta) { // for multinomials
int fullCoefLen = (_dinfo.fullN() + 1) * _nbetas;
Expand Down
Loading
Loading