Skip to content

Commit

Permalink
GH-16420: change default values from -1 to actual defaults. Remove ch…
Browse files Browse the repository at this point in the history
…eck for setting extra parameters.
  • Loading branch information
wendycwong committed Oct 14, 2024
1 parent dfdbcc1 commit fa46b15
Show file tree
Hide file tree
Showing 4 changed files with 98 additions and 107 deletions.
11 changes: 5 additions & 6 deletions h2o-admissibleml/src/main/java/hex/Infogram/InfogramModel.java
Original file line number Diff line number Diff line change
Expand Up @@ -5,11 +5,10 @@
import hex.*;
import hex.genmodel.utils.DistributionFamily;
import hex.glm.GLMModel;
import hex.schemas.*;
import hex.schemas.InfogramV3;
import water.*;
import water.fvec.Frame;
import water.udf.CFuncRef;
import water.util.TwoDimTable;

import java.lang.reflect.Field;
import java.util.*;
Expand Down Expand Up @@ -55,10 +54,10 @@ public static class InfogramParameters extends Model.Parameters {
public String[] _protected_columns = null; // store features to be excluded from final model
public double _cmi_threshold = 0.1; // default set by Deep
public double _relevance_threshold = 0.1; // default set by Deep
public double _total_information_threshold = -1; // relevance threshold for core infogram
public double _net_information_threshold = -1; // cmi threshold for core infogram
public double _safety_index_threshold = -1; // cmi threshold for safe infogram
public double _relevance_index_threshold = -1; // relevance threshold for safe infogram
public double _total_information_threshold = 0.1; // relevance threshold for core infogram
public double _net_information_threshold = 0.1; // cmi threshold for core infogram
public double _safety_index_threshold = 0.1; // cmi threshold for safe infogram
public double _relevance_index_threshold = 0.1; // relevance threshold for safe infogram
public double _data_fraction = 1.0; // fraction of data to use to calculate infogram
public Model.Parameters _infogram_algorithm_parameters; // store parameters of chosen algorithm
public int _top_n_features = 50; // if 0 consider all predictors, otherwise, consider topk predictors
Expand Down
29 changes: 13 additions & 16 deletions h2o-admissibleml/src/main/java/hex/schemas/InfogramV3.java
Original file line number Diff line number Diff line change
Expand Up @@ -15,12 +15,10 @@
import water.api.SchemaServer;
import water.api.schemas3.KeyV3;
import water.api.schemas3.ModelParametersSchemaV3;
import static hex.util.DistributionUtils.distributionToFamily;

import java.util.*;
import java.util.Arrays;
import java.util.HashMap;
import java.util.Map;
import java.util.Properties;

import static hex.util.DistributionUtils.distributionToFamily;

public class InfogramV3 extends ModelBuilderSchema<Infogram, InfogramV3, InfogramV3.InfogramParametersV3> {
public static final class InfogramParametersV3 extends ModelParametersSchemaV3<InfogramModel.InfogramParameters, InfogramParametersV3> {
Expand Down Expand Up @@ -134,44 +132,43 @@ public static final class InfogramParametersV3 extends ModelParametersSchemaV3<I
level = API.Level.secondary, gridable=true)
public String[] protected_columns;

@API(help = "A number between 0 and 1 representing a threshold for total information, defaulting to 0.1. " +
@API(help = "A number between 0 and 1 representing a threshold for total information. " +
"For a specific feature, if the total information is higher than this threshold, and the corresponding " +
"net information is also higher than the threshold ``net_information_threshold``, that feature will be " +
"considered admissible. The total information is the x-axis of the Core Infogram. " +
"Default is -1 which gets set to 0.1.",
"considered admissible. The total information is the x-axis of the Core Infogram. ",
level = API.Level.secondary, gridable = true)
public double total_information_threshold;

@API(help = "A number between 0 and 1 representing a threshold for net information, defaulting to 0.1. For a " +
@API(help = "A number between 0 and 1 representing a threshold for net information. For a " +
"specific feature, if the net information is higher than this threshold, and the corresponding total " +
"information is also higher than the total_information_threshold, that feature will be considered admissible. " +
"The net information is the y-axis of the Core Infogram. Default is -1 which gets set to 0.1.",
"The net information is the y-axis of the Core Infogram.",
level = API.Level.secondary, gridable = true)
public double net_information_threshold;

@API(help = "A number between 0 and 1 representing a threshold for the relevance index, defaulting to 0.1. This is " +
@API(help = "A number between 0 and 1 representing a threshold for the relevance index. This is " +
"only used when ``protected_columns`` is set by the user. For a specific feature, if the relevance index " +
"value is higher than this threshold, and the corresponding safety index is also higher than the " +
"safety_index_threshold``, that feature will be considered admissible. The relevance index is the x-axis " +
"of the Fair Infogram. Default is -1 which gets set to 0.1.",
"of the Fair Infogram.",
level = API.Level.secondary, gridable = true)
public double relevance_index_threshold;

@API(help = "A number between 0 and 1 representing a threshold for the safety index, defaulting to 0.1. This is " +
@API(help = "A number between 0 and 1 representing a threshold for the safety index. This is " +
"only used when protected_columns is set by the user. For a specific feature, if the safety index value " +
"is higher than this threshold, and the corresponding relevance index is also higher than the " +
"relevance_index_threshold, that feature will be considered admissible. The safety index is the y-axis of " +
"the Fair Infogram. Default is -1 which gets set to 0.1.",
"the Fair Infogram.",
level = API.Level.secondary, gridable = true)
public double safety_index_threshold;

@API(help = "The fraction of training frame to use to build the infogram model. Defaults to 1.0, and any value greater " +
@API(help = "The fraction of training frame to use to build the infogram model. Any value greater " +
"than 0 and less than or equal to 1.0 is acceptable.",
level = API.Level.secondary, gridable = true)
public double data_fraction;

@API(help = "An integer specifying the number of columns to evaluate in the infogram. The columns are ranked by " +
"variable importance, and the top N are evaluated. Defaults to 50.",
"variable importance, and the top N are evaluated.",
level = API.Level.secondary, gridable = true)
public int top_n_features;

Expand Down
108 changes: 52 additions & 56 deletions h2o-py/h2o/estimators/infogram.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,10 +66,10 @@ def __init__(self,
algorithm="auto", # type: Literal["auto", "deeplearning", "drf", "gbm", "glm", "xgboost"]
algorithm_params=None, # type: Optional[dict]
protected_columns=None, # type: Optional[List[str]]
total_information_threshold=-1.0, # type: float
net_information_threshold=-1.0, # type: float
relevance_index_threshold=-1.0, # type: float
safety_index_threshold=-1.0, # type: float
total_information_threshold=0.1, # type: float
net_information_threshold=0.1, # type: float
relevance_index_threshold=0.1, # type: float
safety_index_threshold=0.1, # type: float
data_fraction=1.0, # type: float
top_n_features=50, # type: int
):
Expand Down Expand Up @@ -194,40 +194,38 @@ def __init__(self,
response.
Defaults to ``None``.
:type protected_columns: List[str], optional
:param total_information_threshold: A number between 0 and 1 representing a threshold for total information,
defaulting to 0.1. For a specific feature, if the total information is higher than this threshold, and
the corresponding net information is also higher than the threshold ``net_information_threshold``, that
feature will be considered admissible. The total information is the x-axis of the Core Infogram. Default
is -1 which gets set to 0.1.
Defaults to ``-1.0``.
:param total_information_threshold: A number between 0 and 1 representing a threshold for total information.
For a specific feature, if the total information is higher than this threshold, and the corresponding net
information is also higher than the threshold ``net_information_threshold``, that feature will be
considered admissible. The total information is the x-axis of the Core Infogram.
Defaults to ``0.1``.
:type total_information_threshold: float
:param net_information_threshold: A number between 0 and 1 representing a threshold for net information,
defaulting to 0.1. For a specific feature, if the net information is higher than this threshold, and the
corresponding total information is also higher than the total_information_threshold, that feature will be
considered admissible. The net information is the y-axis of the Core Infogram. Default is -1 which gets
set to 0.1.
Defaults to ``-1.0``.
:param net_information_threshold: A number between 0 and 1 representing a threshold for net information. For a
specific feature, if the net information is higher than this threshold, and the corresponding total
information is also higher than the total_information_threshold, that feature will be considered
admissible. The net information is the y-axis of the Core Infogram.
Defaults to ``0.1``.
:type net_information_threshold: float
:param relevance_index_threshold: A number between 0 and 1 representing a threshold for the relevance index,
defaulting to 0.1. This is only used when ``protected_columns`` is set by the user. For a specific
feature, if the relevance index value is higher than this threshold, and the corresponding safety index
is also higher than the safety_index_threshold``, that feature will be considered admissible. The
relevance index is the x-axis of the Fair Infogram. Default is -1 which gets set to 0.1.
Defaults to ``-1.0``.
:param relevance_index_threshold: A number between 0 and 1 representing a threshold for the relevance index.
This is only used when ``protected_columns`` is set by the user. For a specific feature, if the
relevance index value is higher than this threshold, and the corresponding safety index is also higher
than the safety_index_threshold``, that feature will be considered admissible. The relevance index is
the x-axis of the Fair Infogram.
Defaults to ``0.1``.
:type relevance_index_threshold: float
:param safety_index_threshold: A number between 0 and 1 representing a threshold for the safety index,
defaulting to 0.1. This is only used when protected_columns is set by the user. For a specific feature,
if the safety index value is higher than this threshold, and the corresponding relevance index is also
higher than the relevance_index_threshold, that feature will be considered admissible. The safety index
is the y-axis of the Fair Infogram. Default is -1 which gets set to 0.1.
Defaults to ``-1.0``.
:param safety_index_threshold: A number between 0 and 1 representing a threshold for the safety index. This is
only used when protected_columns is set by the user. For a specific feature, if the safety index value
is higher than this threshold, and the corresponding relevance index is also higher than the
relevance_index_threshold, that feature will be considered admissible. The safety index is the y-axis of
the Fair Infogram.
Defaults to ``0.1``.
:type safety_index_threshold: float
:param data_fraction: The fraction of training frame to use to build the infogram model. Defaults to 1.0, and
any value greater than 0 and less than or equal to 1.0 is acceptable.
:param data_fraction: The fraction of training frame to use to build the infogram model. Any value greater than
0 and less than or equal to 1.0 is acceptable.
Defaults to ``1.0``.
:type data_fraction: float
:param top_n_features: An integer specifying the number of columns to evaluate in the infogram. The columns are
ranked by variable importance, and the top N are evaluated. Defaults to 50.
ranked by variable importance, and the top N are evaluated.
Defaults to ``50``.
:type top_n_features: int
"""
Expand Down Expand Up @@ -739,12 +737,12 @@ def protected_columns(self, protected_columns):
@property
def total_information_threshold(self):
"""
A number between 0 and 1 representing a threshold for total information, defaulting to 0.1. For a specific
feature, if the total information is higher than this threshold, and the corresponding net information is also
higher than the threshold ``net_information_threshold``, that feature will be considered admissible. The total
information is the x-axis of the Core Infogram. Default is -1 which gets set to 0.1.
A number between 0 and 1 representing a threshold for total information. For a specific feature, if the total
information is higher than this threshold, and the corresponding net information is also higher than the
threshold ``net_information_threshold``, that feature will be considered admissible. The total information is
the x-axis of the Core Infogram.
Type: ``float``, defaults to ``-1.0``.
Type: ``float``, defaults to ``0.1``.
"""
return self._parms.get("total_information_threshold")

Expand All @@ -762,12 +760,12 @@ def total_information_threshold(self, total_information_threshold):
@property
def net_information_threshold(self):
"""
A number between 0 and 1 representing a threshold for net information, defaulting to 0.1. For a specific
feature, if the net information is higher than this threshold, and the corresponding total information is also
higher than the total_information_threshold, that feature will be considered admissible. The net information is
the y-axis of the Core Infogram. Default is -1 which gets set to 0.1.
A number between 0 and 1 representing a threshold for net information. For a specific feature, if the net
information is higher than this threshold, and the corresponding total information is also higher than the
total_information_threshold, that feature will be considered admissible. The net information is the y-axis of
the Core Infogram.
Type: ``float``, defaults to ``-1.0``.
Type: ``float``, defaults to ``0.1``.
"""
return self._parms.get("net_information_threshold")

Expand All @@ -785,13 +783,12 @@ def net_information_threshold(self, net_information_threshold):
@property
def relevance_index_threshold(self):
"""
A number between 0 and 1 representing a threshold for the relevance index, defaulting to 0.1. This is only used
when ``protected_columns`` is set by the user. For a specific feature, if the relevance index value is higher
than this threshold, and the corresponding safety index is also higher than the safety_index_threshold``, that
feature will be considered admissible. The relevance index is the x-axis of the Fair Infogram. Default is -1
which gets set to 0.1.
A number between 0 and 1 representing a threshold for the relevance index. This is only used when
``protected_columns`` is set by the user. For a specific feature, if the relevance index value is higher than
this threshold, and the corresponding safety index is also higher than the safety_index_threshold``, that
feature will be considered admissible. The relevance index is the x-axis of the Fair Infogram.
Type: ``float``, defaults to ``-1.0``.
Type: ``float``, defaults to ``0.1``.
"""
return self._parms.get("relevance_index_threshold")

Expand All @@ -809,13 +806,12 @@ def relevance_index_threshold(self, relevance_index_threshold):
@property
def safety_index_threshold(self):
"""
A number between 0 and 1 representing a threshold for the safety index, defaulting to 0.1. This is only used
when protected_columns is set by the user. For a specific feature, if the safety index value is higher than
this threshold, and the corresponding relevance index is also higher than the relevance_index_threshold, that
feature will be considered admissible. The safety index is the y-axis of the Fair Infogram. Default is -1 which
gets set to 0.1.
A number between 0 and 1 representing a threshold for the safety index. This is only used when
protected_columns is set by the user. For a specific feature, if the safety index value is higher than this
threshold, and the corresponding relevance index is also higher than the relevance_index_threshold, that feature
will be considered admissible. The safety index is the y-axis of the Fair Infogram.
Type: ``float``, defaults to ``-1.0``.
Type: ``float``, defaults to ``0.1``.
"""
return self._parms.get("safety_index_threshold")

Expand All @@ -833,8 +829,8 @@ def safety_index_threshold(self, safety_index_threshold):
@property
def data_fraction(self):
"""
The fraction of training frame to use to build the infogram model. Defaults to 1.0, and any value greater than 0
and less than or equal to 1.0 is acceptable.
The fraction of training frame to use to build the infogram model. Any value greater than 0 and less than or
equal to 1.0 is acceptable.
Type: ``float``, defaults to ``1.0``.
"""
Expand All @@ -849,7 +845,7 @@ def data_fraction(self, data_fraction):
def top_n_features(self):
"""
An integer specifying the number of columns to evaluate in the infogram. The columns are ranked by variable
importance, and the top N are evaluated. Defaults to 50.
importance, and the top N are evaluated.
Type: ``int``, defaults to ``50``.
"""
Expand Down
Loading

0 comments on commit fa46b15

Please sign in to comment.