GH-16420: change default values from -1 to actual defaults. Remove ch…

…eck for setting extra parameters.
h2oai · Oct 14, 2024 · fa46b15 · fa46b15
1 parent dfdbcc1
commit fa46b15
Show file tree

Hide file tree

Showing 4 changed files with 98 additions and 107 deletions.
diff --git a/h2o-admissibleml/src/main/java/hex/Infogram/InfogramModel.java b/h2o-admissibleml/src/main/java/hex/Infogram/InfogramModel.java
@@ -5,11 +5,10 @@
 import hex.*;
 import hex.genmodel.utils.DistributionFamily;
 import hex.glm.GLMModel;
-import hex.schemas.*;
+import hex.schemas.InfogramV3;
 import water.*;
 import water.fvec.Frame;
 import water.udf.CFuncRef;
-import water.util.TwoDimTable;
 
 import java.lang.reflect.Field;
 import java.util.*;
@@ -55,10 +54,10 @@ public static class InfogramParameters extends Model.Parameters {
     public String[] _protected_columns = null;    // store features to be excluded from final model
     public double _cmi_threshold = 0.1;           // default set by Deep
     public double _relevance_threshold = 0.1;         // default set by Deep
-    public double _total_information_threshold = -1;  // relevance threshold for core infogram
-    public double _net_information_threshold = -1;    // cmi threshold for core infogram
-    public double _safety_index_threshold = -1;       // cmi threshold for safe infogram
-    public double _relevance_index_threshold = -1;    // relevance threshold for safe infogram
+    public double _total_information_threshold = 0.1;  // relevance threshold for core infogram
+    public double _net_information_threshold = 0.1;    // cmi threshold for core infogram
+    public double _safety_index_threshold = 0.1;       // cmi threshold for safe infogram
+    public double _relevance_index_threshold = 0.1;    // relevance threshold for safe infogram
     public double _data_fraction = 1.0;               // fraction of data to use to calculate infogram
     public Model.Parameters _infogram_algorithm_parameters;   // store parameters of chosen algorithm
     public int _top_n_features = 50;                          // if 0 consider all predictors, otherwise, consider topk predictors

diff --git a/h2o-admissibleml/src/main/java/hex/schemas/InfogramV3.java b/h2o-admissibleml/src/main/java/hex/schemas/InfogramV3.java
@@ -15,12 +15,10 @@
 import water.api.SchemaServer;
 import water.api.schemas3.KeyV3;
 import water.api.schemas3.ModelParametersSchemaV3;
-import static hex.util.DistributionUtils.distributionToFamily;
+
 import java.util.*;
-import java.util.Arrays;
-import java.util.HashMap;
-import java.util.Map;
-import java.util.Properties;
+
+import static hex.util.DistributionUtils.distributionToFamily;
 
 public class InfogramV3 extends ModelBuilderSchema<Infogram, InfogramV3, InfogramV3.InfogramParametersV3> {
   public static final class InfogramParametersV3 extends ModelParametersSchemaV3<InfogramModel.InfogramParameters, InfogramParametersV3> {
@@ -134,44 +132,43 @@ public static final class InfogramParametersV3 extends ModelParametersSchemaV3<I
             level = API.Level.secondary, gridable=true)
     public String[] protected_columns;    
 
-    @API(help = "A number between 0 and 1 representing a threshold for total information, defaulting to 0.1. " + 
+    @API(help = "A number between 0 and 1 representing a threshold for total information.  " + 
             "For a specific feature, if the total information is higher than this threshold, and the corresponding " + 
             "net information is also higher than the threshold ``net_information_threshold``, that feature will be " + 
-            "considered admissible. The total information is the x-axis of the Core Infogram. " +
-            "Default is -1 which gets set to 0.1.",
+            "considered admissible. The total information is the x-axis of the Core Infogram. ",
             level = API.Level.secondary, gridable = true)
     public double total_information_threshold;       
 
-    @API(help = "A number between 0 and 1 representing a threshold for net information, defaulting to 0.1.  For a " + 
+    @API(help = "A number between 0 and 1 representing a threshold for net information.  For a " + 
             "specific feature, if the net information is higher than this threshold, and the corresponding total " + 
             "information is also higher than the total_information_threshold, that feature will be considered admissible. " + 
-            "The net information is the y-axis of the Core Infogram. Default is -1 which gets set to 0.1.",
+            "The net information is the y-axis of the Core Infogram.",
             level = API.Level.secondary, gridable = true)
     public double net_information_threshold; 
 
-    @API(help = "A number between 0 and 1 representing a threshold for the relevance index, defaulting to 0.1.  This is " + 
+    @API(help = "A number between 0 and 1 representing a threshold for the relevance index.  This is " + 
             "only used when ``protected_columns`` is set by the user.  For a specific feature, if the relevance index " + 
             "value is higher than this threshold, and the corresponding safety index is also higher than the " + 
             "safety_index_threshold``, that feature will be considered admissible.  The relevance index is the x-axis " + 
-            "of the Fair Infogram. Default is -1 which gets set to 0.1.", 
+            "of the Fair Infogram.", 
             level = API.Level.secondary, gridable = true)
     public double relevance_index_threshold;    
 
-    @API(help = "A number between 0 and 1 representing a threshold for the safety index, defaulting to 0.1.  This is " + 
+    @API(help = "A number between 0 and 1 representing a threshold for the safety index.  This is " + 
             "only used when protected_columns is set by the user.  For a specific feature, if the safety index value " + 
             "is higher than this threshold, and the corresponding relevance index is also higher than the " + 
             "relevance_index_threshold, that feature will be considered admissible.  The safety index is the y-axis of " + 
-            "the Fair Infogram. Default is -1 which gets set to 0.1.",
+            "the Fair Infogram.",
             level = API.Level.secondary, gridable = true)
     public double safety_index_threshold;
 
-    @API(help = "The fraction of training frame to use to build the infogram model. Defaults to 1.0, and any value greater " + 
+    @API(help = "The fraction of training frame to use to build the infogram model. Any value greater " + 
             "than 0 and less than or equal to 1.0 is acceptable.",
             level = API.Level.secondary, gridable = true)
     public double data_fraction;
 
     @API(help = "An integer specifying the number of columns to evaluate in the infogram.  The columns are ranked by " + 
-            "variable importance, and the top N are evaluated.  Defaults to 50.",
+            "variable importance, and the top N are evaluated.",
             level = API.Level.secondary, gridable = true)
     public int top_n_features;
 

diff --git a/h2o-py/h2o/estimators/infogram.py b/h2o-py/h2o/estimators/infogram.py
@@ -66,10 +66,10 @@ def __init__(self,
                  algorithm="auto",  # type: Literal["auto", "deeplearning", "drf", "gbm", "glm", "xgboost"]
                  algorithm_params=None,  # type: Optional[dict]
                  protected_columns=None,  # type: Optional[List[str]]
-                 total_information_threshold=-1.0,  # type: float
-                 net_information_threshold=-1.0,  # type: float
-                 relevance_index_threshold=-1.0,  # type: float
-                 safety_index_threshold=-1.0,  # type: float
+                 total_information_threshold=0.1,  # type: float
+                 net_information_threshold=0.1,  # type: float
+                 relevance_index_threshold=0.1,  # type: float
+                 safety_index_threshold=0.1,  # type: float
                  data_fraction=1.0,  # type: float
                  top_n_features=50,  # type: int
                  ):
@@ -194,40 +194,38 @@ def __init__(self,
                response.
                Defaults to ``None``.
         :type protected_columns: List[str], optional
-        :param total_information_threshold: A number between 0 and 1 representing a threshold for total information,
-               defaulting to 0.1. For a specific feature, if the total information is higher than this threshold, and
-               the corresponding net information is also higher than the threshold ``net_information_threshold``, that
-               feature will be considered admissible. The total information is the x-axis of the Core Infogram. Default
-               is -1 which gets set to 0.1.
-               Defaults to ``-1.0``.
+        :param total_information_threshold: A number between 0 and 1 representing a threshold for total information.
+               For a specific feature, if the total information is higher than this threshold, and the corresponding net
+               information is also higher than the threshold ``net_information_threshold``, that feature will be
+               considered admissible. The total information is the x-axis of the Core Infogram.
+               Defaults to ``0.1``.
         :type total_information_threshold: float
-        :param net_information_threshold: A number between 0 and 1 representing a threshold for net information,
-               defaulting to 0.1.  For a specific feature, if the net information is higher than this threshold, and the
-               corresponding total information is also higher than the total_information_threshold, that feature will be
-               considered admissible. The net information is the y-axis of the Core Infogram. Default is -1 which gets
-               set to 0.1.
-               Defaults to ``-1.0``.
+        :param net_information_threshold: A number between 0 and 1 representing a threshold for net information.  For a
+               specific feature, if the net information is higher than this threshold, and the corresponding total
+               information is also higher than the total_information_threshold, that feature will be considered
+               admissible. The net information is the y-axis of the Core Infogram.
+               Defaults to ``0.1``.
         :type net_information_threshold: float
-        :param relevance_index_threshold: A number between 0 and 1 representing a threshold for the relevance index,
-               defaulting to 0.1.  This is only used when ``protected_columns`` is set by the user.  For a specific
-               feature, if the relevance index value is higher than this threshold, and the corresponding safety index
-               is also higher than the safety_index_threshold``, that feature will be considered admissible.  The
-               relevance index is the x-axis of the Fair Infogram. Default is -1 which gets set to 0.1.
-               Defaults to ``-1.0``.
+        :param relevance_index_threshold: A number between 0 and 1 representing a threshold for the relevance index.
+               This is only used when ``protected_columns`` is set by the user.  For a specific feature, if the
+               relevance index value is higher than this threshold, and the corresponding safety index is also higher
+               than the safety_index_threshold``, that feature will be considered admissible.  The relevance index is
+               the x-axis of the Fair Infogram.
+               Defaults to ``0.1``.
         :type relevance_index_threshold: float
-        :param safety_index_threshold: A number between 0 and 1 representing a threshold for the safety index,
-               defaulting to 0.1.  This is only used when protected_columns is set by the user.  For a specific feature,
-               if the safety index value is higher than this threshold, and the corresponding relevance index is also
-               higher than the relevance_index_threshold, that feature will be considered admissible.  The safety index
-               is the y-axis of the Fair Infogram. Default is -1 which gets set to 0.1.
-               Defaults to ``-1.0``.
+        :param safety_index_threshold: A number between 0 and 1 representing a threshold for the safety index.  This is
+               only used when protected_columns is set by the user.  For a specific feature, if the safety index value
+               is higher than this threshold, and the corresponding relevance index is also higher than the
+               relevance_index_threshold, that feature will be considered admissible.  The safety index is the y-axis of
+               the Fair Infogram.
+               Defaults to ``0.1``.
         :type safety_index_threshold: float
-        :param data_fraction: The fraction of training frame to use to build the infogram model. Defaults to 1.0, and
-               any value greater than 0 and less than or equal to 1.0 is acceptable.
+        :param data_fraction: The fraction of training frame to use to build the infogram model. Any value greater than
+               0 and less than or equal to 1.0 is acceptable.
                Defaults to ``1.0``.
         :type data_fraction: float
         :param top_n_features: An integer specifying the number of columns to evaluate in the infogram.  The columns are
-               ranked by variable importance, and the top N are evaluated.  Defaults to 50.
+               ranked by variable importance, and the top N are evaluated.
                Defaults to ``50``.
         :type top_n_features: int
         """
@@ -739,12 +737,12 @@ def protected_columns(self, protected_columns):
     @property
     def total_information_threshold(self):
         """
-        A number between 0 and 1 representing a threshold for total information, defaulting to 0.1. For a specific
-        feature, if the total information is higher than this threshold, and the corresponding net information is also
-        higher than the threshold ``net_information_threshold``, that feature will be considered admissible. The total
-        information is the x-axis of the Core Infogram. Default is -1 which gets set to 0.1.
+        A number between 0 and 1 representing a threshold for total information.  For a specific feature, if the total
+        information is higher than this threshold, and the corresponding net information is also higher than the
+        threshold ``net_information_threshold``, that feature will be considered admissible. The total information is
+        the x-axis of the Core Infogram.
 
-        Type: ``float``, defaults to ``-1.0``.
+        Type: ``float``, defaults to ``0.1``.
         """
         return self._parms.get("total_information_threshold")
 
@@ -762,12 +760,12 @@ def total_information_threshold(self, total_information_threshold):
     @property
     def net_information_threshold(self):
         """
-        A number between 0 and 1 representing a threshold for net information, defaulting to 0.1.  For a specific
-        feature, if the net information is higher than this threshold, and the corresponding total information is also
-        higher than the total_information_threshold, that feature will be considered admissible. The net information is
-        the y-axis of the Core Infogram. Default is -1 which gets set to 0.1.
+        A number between 0 and 1 representing a threshold for net information.  For a specific feature, if the net
+        information is higher than this threshold, and the corresponding total information is also higher than the
+        total_information_threshold, that feature will be considered admissible. The net information is the y-axis of
+        the Core Infogram.
 
-        Type: ``float``, defaults to ``-1.0``.
+        Type: ``float``, defaults to ``0.1``.
         """
         return self._parms.get("net_information_threshold")
 
@@ -785,13 +783,12 @@ def net_information_threshold(self, net_information_threshold):
     @property
     def relevance_index_threshold(self):
         """
-        A number between 0 and 1 representing a threshold for the relevance index, defaulting to 0.1.  This is only used
-        when ``protected_columns`` is set by the user.  For a specific feature, if the relevance index value is higher
-        than this threshold, and the corresponding safety index is also higher than the safety_index_threshold``, that
-        feature will be considered admissible.  The relevance index is the x-axis of the Fair Infogram. Default is -1
-        which gets set to 0.1.
+        A number between 0 and 1 representing a threshold for the relevance index.  This is only used when
+        ``protected_columns`` is set by the user.  For a specific feature, if the relevance index value is higher than
+        this threshold, and the corresponding safety index is also higher than the safety_index_threshold``, that
+        feature will be considered admissible.  The relevance index is the x-axis of the Fair Infogram.
 
-        Type: ``float``, defaults to ``-1.0``.
+        Type: ``float``, defaults to ``0.1``.
         """
         return self._parms.get("relevance_index_threshold")
 
@@ -809,13 +806,12 @@ def relevance_index_threshold(self, relevance_index_threshold):
     @property
     def safety_index_threshold(self):
         """
-        A number between 0 and 1 representing a threshold for the safety index, defaulting to 0.1.  This is only used
-        when protected_columns is set by the user.  For a specific feature, if the safety index value is higher than
-        this threshold, and the corresponding relevance index is also higher than the relevance_index_threshold, that
-        feature will be considered admissible.  The safety index is the y-axis of the Fair Infogram. Default is -1 which
-        gets set to 0.1.
+        A number between 0 and 1 representing a threshold for the safety index.  This is only used when
+        protected_columns is set by the user.  For a specific feature, if the safety index value is higher than this
+        threshold, and the corresponding relevance index is also higher than the relevance_index_threshold, that feature
+        will be considered admissible.  The safety index is the y-axis of the Fair Infogram.
 
-        Type: ``float``, defaults to ``-1.0``.
+        Type: ``float``, defaults to ``0.1``.
         """
         return self._parms.get("safety_index_threshold")
 
@@ -833,8 +829,8 @@ def safety_index_threshold(self, safety_index_threshold):
     @property
     def data_fraction(self):
         """
-        The fraction of training frame to use to build the infogram model. Defaults to 1.0, and any value greater than 0
-        and less than or equal to 1.0 is acceptable.
+        The fraction of training frame to use to build the infogram model. Any value greater than 0 and less than or
+        equal to 1.0 is acceptable.
 
         Type: ``float``, defaults to ``1.0``.
         """
@@ -849,7 +845,7 @@ def data_fraction(self, data_fraction):
     def top_n_features(self):
         """
         An integer specifying the number of columns to evaluate in the infogram.  The columns are ranked by variable
-        importance, and the top N are evaluated.  Defaults to 50.
+        importance, and the top N are evaluated.
 
         Type: ``int``, defaults to ``50``.
         """