Merge pull request #199 from h2oai/sr/fix-install-recipes

Update recipe dependencies for Python 3.11 compatibility
h2oai · May 22, 2024 · 38e4aa0 · 38e4aa0
2 parents 8cb2371 + 1439a0f
commit 38e4aa0
Show file tree

Hide file tree

Showing 56 changed files with 119 additions and 122 deletions.
diff --git a/data/any_env.py b/data/any_env.py
@@ -12,10 +12,10 @@ def create_data(X=None):
     @staticmethod
     def create_data_popen(X=None):
         # Specify the python package dependencies.  Will be installed in order of list
-        pyversion = "3.8"
+        pyversion = "3.11"
         _install_h2oaicore = False
         _install_datatable = True
-        _modules_needed_by_name = ["pandas==1.1.5"]
+        _modules_needed_by_name = ["pandas==1.5.3"]
 
         import os
         from h2oaicore.data import DataContribLoader

diff --git a/data/any_env2.py b/data/any_env2.py
@@ -4,7 +4,7 @@
 import functools
 
 
-def wrap_create(pyversion="3.8", install_h2oaicore=False, install_datatable=True, modules_needed_by_name=[],
+def wrap_create(pyversion="3.11", install_h2oaicore=False, install_datatable=True, modules_needed_by_name=[],
                 cache_env=False, file=None, id=None,
                 **kwargs_wrapper):
     """ Decorate a function to create_data in popen in isolated env
@@ -23,7 +23,7 @@ def wrapper(*args, **kwargs):
     return actual_decorator
 
 
-def create_data_popen(func, *args, pyversion="3.8", install_h2oaicore=False, install_datatable=True,
+def create_data_popen(func, *args, pyversion="3.11", install_h2oaicore=False, install_datatable=True,
                       modules_needed_by_name=[], cache_env=False, file=None, id=None,
                       X=None, **kwargs):
     """ Run recipe in popen in isolated env
@@ -119,9 +119,9 @@ class FreshEnvData(CustomData):
     # NOTE: Keep @wrap_create on a single line
     # NOTE: If want to share cache across recipes, can set cache_env=True and set id=<some unique identifier, like myrecipe12345>
     # Below caches the env into "id" folder
-    # @wrap_create(pyversion="3.8", install_h2oaicore=False, install_datatable=True, modules_needed_by_name=["pandas==1.1.5"], cache_env=True, file=__file__, id="myrecipe12345")
+    # @wrap_create(pyversion="3.11", install_h2oaicore=False, install_datatable=True, modules_needed_by_name=["pandas==1.5.3"], cache_env=True, file=__file__, id="myrecipe12345")
     # Below does not cache the env
-    @wrap_create(pyversion="3.8", install_h2oaicore=False, install_datatable=True, modules_needed_by_name=["pandas==1.1.5"], file=__file__)
+    @wrap_create(pyversion="3.11", install_h2oaicore=False, install_datatable=True, modules_needed_by_name=["pandas==1.5.3"], file=__file__)
     def create_data(X=None):
         import os
         import datatable as dt

diff --git a/data/any_env3.py b/data/any_env3.py
@@ -10,9 +10,9 @@ class FreshEnvData(CustomData):
     # NOTE: Keep @wrap_create on a single line
     # NOTE: If want to share cache across recipes, can set cache_env=True and set id=<some unique identifier, like myrecipe12345>
     # Below caches the env into "id" folder
-    # @wrap_create(pyversion="3.6", install_h2oaicore=False, install_datatable=True, modules_needed_by_name=["pandas==1.1.5"], cache_env=True, file=__file__, id="myrecipe12345")
+    # @wrap_create(pyversion="3.11", install_h2oaicore=False, install_datatable=True, modules_needed_by_name=["pandas==1.5.3"], cache_env=True, file=__file__, id="myrecipe12345")
     # Below does not cache the env
-    @wrap_create(pyversion="3.8", install_h2oaicore=False, install_datatable=True, modules_needed_by_name=["pandas==1.1.5"], file=__file__)
+    @wrap_create(pyversion="3.11", install_h2oaicore=False, install_datatable=True, modules_needed_by_name=["pandas==1.5.3"], file=__file__)
     def create_data(X=None):
         import os
         import datatable as dt

diff --git a/data/any_env4.py b/data/any_env4.py
@@ -6,10 +6,10 @@
 class FreshEnvData(CustomData):
     # Specify the python package dependencies.  Will be installed in order of list
     # Below caches the env into "id" folder
-    # isolate_env = dict(pyversion="3.6", install_h2oaicore=False, install_datatable=True, modules_needed_by_name=["pandas==1.1.5"], cache_env=True, id="myrecipe12345")
+    # isolate_env = dict(pyversion="3.11", install_h2oaicore=False, install_datatable=True, modules_needed_by_name=["pandas==1.5.3"], cache_env=True, id="myrecipe12345")
     # Below does not cache the env
-    isolate_env = dict(pyversion="3.8", install_h2oaicore=False, install_datatable=True,
-                       modules_needed_by_name=["pandas==1.1.5"])
+    isolate_env = dict(pyversion="3.11", install_h2oaicore=False, install_datatable=True,
+                       modules_needed_by_name=["pandas==1.5.3"])
 
     @staticmethod
     def create_data(X=None):

diff --git a/data/audio_to_image.py b/data/audio_to_image.py
@@ -38,7 +38,7 @@ class AudioToMelSpectogram:
     For Ubuntu, required to do: sudo apt-get install libsndfile1 libsndfile1-dev
     For Centos, required to do: sudo yum install libsndfile libsndfile-dev
     """
-    _modules_needed_by_name = ["librosa==0.8.1"]
+    _modules_needed_by_name = ["librosa==0.10.2.post1"]
 
     def __init__(
             self, min_seconds=2, sampling_rate=44100, n_mels=128, hop_length=345 * 2

diff --git a/data/nlp/text_summarization.py b/data/nlp/text_summarization.py
@@ -10,7 +10,7 @@
 # output dataset name
 output_dataset_name = "data_with_summary"
 
-_global_modules_needed_by_name = ["gensim==3.8.0"]
+_global_modules_needed_by_name = ["gensim==4.3.2"]
 
 
 class TextSummarizationClass(CustomData):

diff --git a/data/nlp/topic_modeling.py b/data/nlp/topic_modeling.py
@@ -20,7 +20,7 @@
 # number of top words to be represented in the column name
 n_words_colname = 10
 
-_global_modules_needed_by_name = ["gensim==3.8.0"]
+_global_modules_needed_by_name = ["gensim==4.3.2"]
 
 stop_words = ['i', 'me', 'my', 'myself', 'we', 'our', 'ours', 'ourselves', 'you', "you're", "you've", "you'll", "you'd",
               'your', 'yours', 'yourself', 'yourselves', 'he', 'him', 'his', 'himself', 'she', "she's", 'her', 'hers',

diff --git a/data/video_to_image.py b/data/video_to_image.py
@@ -35,7 +35,7 @@ class VideoToFrames:
     Additionally detects all faces for each frame.
     """
 
-    _modules_needed_by_name = ["torchvision==0.4.1", "facenet-pytorch==2.2.9"]
+    _modules_needed_by_name = ["torchvision==0.18.0+rocm6.0", "facenet-pytorch==2.5.3"]
 
     def __init__(self, num_frames_per_video=3, face_additional_area=0.5):
         self.num_frames_per_video = num_frames_per_video

diff --git a/data/wav2txt.py b/data/wav2txt.py
@@ -29,7 +29,7 @@ class AzureWav2Txt(BaseData):
     """
 
     """Specify the python package dependencies (will be installed via pip install mypackage==1.3.37)"""
-    _modules_needed_by_name = ["azure-cognitiveservices-speech==1.16.0"]
+    _modules_needed_by_name = ["azure-cognitiveservices-speech==1.37.0"]
 
     @staticmethod
     def create_data(X: dt.Frame = None) -> dt.Frame:

diff --git a/explainers/explainers/morris_sensitivity_explainer.py b/explainers/explainers/morris_sensitivity_explainer.py
@@ -48,7 +48,7 @@ class MorrisSensitivityLeExplainer(CustomExplainer, CustomDaiExplainer):
     # declaration of explanation types this explainer creates e.g. feature importance
     _explanation_types = [GlobalFeatImpExplanation]
     # Python package dependencies (can be installed using pip)
-    _modules_needed_by_name = ["interpret==0.3.2"]
+    _modules_needed_by_name = ["interpret==0.6.1"]
 
     # explainer constructor must not have any required parameters
     def __init__(self):

diff --git a/models/algorithms/autogluon.py b/models/algorithms/autogluon.py
@@ -27,7 +27,7 @@ class AutoGluonModel(CustomModel):
 
     # autogluon depends upon slightly different package versions than DAI has, or could work with xgboost but needs official xgboost <1.5
     # so use isolated env
-    isolate_env = dict(pyversion="3.8", install_h2oaicore=False, install_datatable=True, cache_env=True,
+    isolate_env = dict(pyversion="3.11", install_h2oaicore=False, install_datatable=True, cache_env=True,
                        cache_by_full_module_name=False, install_pip="latest",
                        modules_needed_by_name=['autogluon==0.3.1'])
 

diff --git a/models/algorithms/calibratedClassifier.py b/models/algorithms/calibratedClassifier.py
@@ -34,7 +34,7 @@ class CalibratedClassifierModel:
 
     le = LabelEncoder()
 
-    _modules_needed_by_name = ['ml_insights==0.1.4']  # for SplineCalibration
+    _modules_needed_by_name = ['ml_insights==1.0.3']  # for SplineCalibration
 
     @staticmethod
     def is_enabled():

diff --git a/models/algorithms/catboost.py b/models/algorithms/catboost.py
@@ -87,7 +87,7 @@ def has_pred_contribs(self):
     def has_output_margin(self):
         return True
 
-    _modules_needed_by_name = ['catboost==1.0.5']
+    _modules_needed_by_name = ['catboost==1.2.5']
 
     def set_default_params(self,
                            accuracy=10, time_tolerance=10, interpretability=1,

diff --git a/models/algorithms/catboost_regression_uncertanity.py b/models/algorithms/catboost_regression_uncertanity.py
@@ -46,7 +46,7 @@ class CatBoostRegressionUncertanityModel(CustomModel):
 
     _display_name = "CatBoostRegressionUncertanity"
     _description = "Yandex CatBoost GBM"
-    _modules_needed_by_name = ["catboost==1.0.4"]
+    _modules_needed_by_name = ["catboost==1.2.5"]
 
     @staticmethod
     def do_acceptance_test():

diff --git a/models/algorithms/h2o-3-gbm-poisson.py b/models/algorithms/h2o-3-gbm-poisson.py
@@ -11,7 +11,7 @@
 
 import numpy as np
 
-_global_modules_needed_by_name = ['h2o==3.34.0.7']
+_global_modules_needed_by_name = ['h2o==3.46.0.2']
 import h2o
 import os
 

diff --git a/models/algorithms/h2o-3-models.py b/models/algorithms/h2o-3-models.py
@@ -12,7 +12,7 @@
 import numpy as np
 import pandas as pd
 
-_global_modules_needed_by_name = ['h2o==3.34.0.7']
+_global_modules_needed_by_name = ['h2o==3.46.0.2']
 import h2o
 import os
 

diff --git a/models/algorithms/h2o-glm-poisson.py b/models/algorithms/h2o-glm-poisson.py
@@ -9,7 +9,7 @@
 from h2oaicore.systemutils import config, user_dir, remove, IgnoreEntirelyError
 import numpy as np
 
-_global_modules_needed_by_name = ['h2o==3.34.0.7']
+_global_modules_needed_by_name = ['h2o==3.46.0.2']
 import h2o
 import os
 

diff --git a/models/mli/model_ebm.py b/models/mli/model_ebm.py
@@ -32,7 +32,7 @@ class EBMModel(CustomModel):
         "Unified Framework for Machine Learning Interpretability. "
         "URL https://arxiv.org/pdf/1909.09223.pdf"
     )
-    _modules_needed_by_name = ["pillow==8.3.2", "interpret==0.1.20"]
+    _modules_needed_by_name = ["pillow==10.3.0", "interpret==0.6.1"]
 
     @staticmethod
     def do_acceptance_test():

diff --git a/models/mli/model_gam.py b/models/mli/model_gam.py
@@ -17,7 +17,7 @@ class GAM(CustomModel):
     _multiclass = False
     _display_name = "GAM"
     _description = "Generalized Additive Model"
-    _modules_needed_by_name = ['pygam==0.8.0']
+    _modules_needed_by_name = ['pygam==0.9.1']
     _testing_can_skip_failure = False  # ensure tested as if shouldn't fail
 
     @staticmethod

diff --git a/models/timeseries/autoarima_parallel.py b/models/timeseries/autoarima_parallel.py
@@ -79,24 +79,20 @@ def do_acceptance_test():
     froms3 = True
     if froms3:
         _root_path = "https://s3.amazonaws.com/artifacts.h2o.ai/deps/dai/recipes"
-        _suffix = "-cp38-cp38-linux_x86_64.whl"
+        _suffix = "-cp311-cp311-linux_x86_64.whl"
         _modules_needed_by_name = [
             '%s/setuptools_git-1.2%s' % (_root_path, _suffix),
             '%s/LunarCalendar-0.0.9%s' % (_root_path, _suffix),
-            '%s/ephem-3.7.7.1%s' % (_root_path, _suffix),
-            '%s/cmdstanpy-0.9.5%s' % (_root_path, _suffix),
-            '%s/pystan-2.19.1.1%s' % (_root_path, _suffix),
-            '%s/httpstan-4.5.0%s' % (_root_path, _suffix),
-            '%s/fbprophet-0.7.1%s' % (_root_path, _suffix),
-            "statsforecast==0.6.0",
-            "prophet==1.1",
+            '%s/ephem-4.1.5%s' % (_root_path, _suffix),
+            '%s/cmdstanpy-1.2.2%s' % (_root_path, _suffix),
+            '%s/pystan-3.9.1%s' % (_root_path, _suffix),
+            '%s/httpstan-4.12.0%s' % (_root_path, _suffix),
+            '%s/prophet-1.1.5%s' % (_root_path, _suffix),
+            "statsforecast==1.7.4",
         ]
     else:
-        _modules_needed_by_name = ['holidays==0.11.1', 'convertdate', 'lunarcalendar', 'pystan==2.19.1.1',
-                                   'fbprophet==0.7.1',
-                                   "statsforecast==0.6.0",
-                                   "prophet==1.1",
-                                   ]
+        _modules_needed_by_name = ['holidays==0.47', 'convertdate', 'lunarcalendar', 'pystan==3.9.1',
+                                   'prophet==1.1.5', 'statsforecast==1.7.4']
 
     def set_default_params(
             self, accuracy=None, time_tolerance=None, interpretability=None, **kwargs

diff --git a/models/timeseries/fb_prophet.py b/models/timeseries/fb_prophet.py
@@ -53,19 +53,19 @@ def do_acceptance_test():
     froms3 = True
     if froms3:
         _root_path = "https://s3.amazonaws.com/artifacts.h2o.ai/deps/dai/recipes"
-        _suffix = "-cp38-cp38-linux_x86_64.whl"
+        _suffix = "-cp311-cp311-linux_x86_64.whl"
         _modules_needed_by_name = [
             '%s/setuptools_git-1.2%s' % (_root_path, _suffix),
             '%s/LunarCalendar-0.0.9%s' % (_root_path, _suffix),
-            '%s/ephem-3.7.7.1%s' % (_root_path, _suffix),
-            '%s/cmdstanpy-0.9.5%s' % (_root_path, _suffix),
-            '%s/pystan-2.19.1.1%s' % (_root_path, _suffix),
-            '%s/httpstan-4.5.0%s' % (_root_path, _suffix),
-            '%s/fbprophet-0.7.1%s' % (_root_path, _suffix),
+            '%s/ephem-4.1.5%s' % (_root_path, _suffix),
+            '%s/cmdstanpy-1.2.2%s' % (_root_path, _suffix),
+            '%s/pystan-3.9.1%s' % (_root_path, _suffix),
+            '%s/httpstan-4.12.0%s' % (_root_path, _suffix),
+            '%s/prophet-1.1.5%s' % (_root_path, _suffix),
         ]
     else:
-        _modules_needed_by_name = ['holidays==0.11.1', 'convertdate', 'lunarcalendar', 'pystan==2.19.1.1',
-                                   'fbprophet==0.7.1']
+        _modules_needed_by_name = ['holidays==0.47', 'convertdate', 'lunarcalendar', 'pystan==3.9.1',
+                                   'prophet==1.1.5']
 
     def set_default_params(self,
                            accuracy=None, time_tolerance=None, interpretability=None,
@@ -192,7 +192,7 @@ def fit(self, X, y, sample_weight=None, eval_set=None, sample_weight_eval_set=No
         priors = {}
         models = {}
 
-        mod = importlib.import_module('fbprophet')
+        mod = importlib.import_module('prophet')
         Prophet = getattr(mod, "Prophet")
 
         # Fit 1 FB Prophet model per time group columns

diff --git a/models/timeseries/fb_prophet_parallel.py b/models/timeseries/fb_prophet_parallel.py
@@ -67,19 +67,19 @@ def do_acceptance_test():
     froms3 = True
     if froms3:
         _root_path = "https://s3.amazonaws.com/artifacts.h2o.ai/deps/dai/recipes"
-        _suffix = "-cp38-cp38-linux_x86_64.whl"
+        _suffix = "-cp311-cp311-linux_x86_64.whl"
         _modules_needed_by_name = [
             '%s/setuptools_git-1.2%s' % (_root_path, _suffix),
             '%s/LunarCalendar-0.0.9%s' % (_root_path, _suffix),
-            '%s/ephem-3.7.7.1%s' % (_root_path, _suffix),
-            '%s/cmdstanpy-0.9.5%s' % (_root_path, _suffix),
-            '%s/pystan-2.19.1.1%s' % (_root_path, _suffix),
-            '%s/httpstan-4.5.0%s' % (_root_path, _suffix),
-            '%s/fbprophet-0.7.1%s' % (_root_path, _suffix),
+            '%s/ephem-4.1.5%s' % (_root_path, _suffix),
+            '%s/cmdstanpy-1.2.2%s' % (_root_path, _suffix),
+            '%s/pystan-3.9.1%s' % (_root_path, _suffix),
+            '%s/httpstan-4.12.0%s' % (_root_path, _suffix),
+            '%s/prophet-1.1.5%s' % (_root_path, _suffix),
         ]
     else:
-        _modules_needed_by_name = ['holidays==0.11.1', 'convertdate', 'lunarcalendar', 'pystan==2.19.1.1',
-                                   'fbprophet==0.7.1']
+        _modules_needed_by_name = ['holidays==0.47', 'convertdate', 'lunarcalendar', 'pystan==3.9.1',
+                                   'prophet==1.1.5']
 
     def set_default_params(self,
                            accuracy=None, time_tolerance=None, interpretability=None,
@@ -178,7 +178,7 @@ def _fit_async(X_path, grp_hash, tmp_folder):
             # print("prophet - small data work-around for group: %s" % grp_hash)
             return grp_hash, None
         # Import FB Prophet package
-        mod = importlib.import_module('fbprophet')
+        mod = importlib.import_module('prophet')
         Prophet = getattr(mod, "Prophet")
         nrows = X[['ds', 'y']].shape[0]
         n_changepoints = max(1, int(nrows * (2 / 3)))
@@ -264,7 +264,7 @@ def _fit_async(X_path, grp_hash, tmp_folder, params, cap):
             return grp_hash, None
 
         # Import FB Prophet package
-        mod = importlib.import_module('fbprophet')
+        mod = importlib.import_module('prophet')
         Prophet = getattr(mod, "Prophet")
 
         # Fit current model and prior
@@ -398,7 +398,7 @@ def fit(self, X, y, sample_weight=None, eval_set=None, sample_weight_eval_set=No
         X_avg = X[['ds', 'y']].groupby('ds').mean().reset_index()
 
         # Send that to Prophet
-        mod = importlib.import_module('fbprophet')
+        mod = importlib.import_module('prophet')
         Prophet = getattr(mod, "Prophet")
         nrows = X[['ds', 'y']].shape[0]
         n_changepoints = max(1, int(nrows * (2 / 3)))

diff --git a/models/timeseries/nixtla_arimax.py b/models/timeseries/nixtla_arimax.py
@@ -74,7 +74,7 @@ class AutoARIMAParallelModel(CustomTimeSeriesModel):
     _parallel_task = True
     _testing_can_skip_failure = False  # ensure tested as if shouldn't fail
 
-    _modules_needed_by_name = ['statsforecast==1.5.0']
+    _modules_needed_by_name = ['statsforecast==1.7.4']
 
     @staticmethod
     def is_enabled():

diff --git a/models/timeseries/nixtla_ces.py b/models/timeseries/nixtla_ces.py
@@ -74,7 +74,7 @@ class AutoCESParallelModel(CustomTimeSeriesModel):
     _parallel_task = True
     _testing_can_skip_failure = False  # ensure tested as if shouldn't fail
 
-    _modules_needed_by_name = ['statsforecast==1.5.0']
+    _modules_needed_by_name = ['statsforecast==1.7.4']
 
     @staticmethod
     def is_enabled():

diff --git a/models/timeseries/nixtla_ets.py b/models/timeseries/nixtla_ets.py
@@ -74,7 +74,7 @@ class AutoETSParallelModel(CustomTimeSeriesModel):
     _parallel_task = True
     _testing_can_skip_failure = False  # ensure tested as if shouldn't fail
 
-    _modules_needed_by_name = ['statsforecast==1.5.0']
+    _modules_needed_by_name = ['statsforecast==1.7.4']
 
     @staticmethod
     def is_enabled():

diff --git a/models/timeseries/nixtla_theta.py b/models/timeseries/nixtla_theta.py
@@ -74,7 +74,7 @@ class AutoThetaParallelModel(CustomTimeSeriesModel):
     _parallel_task = True
     _testing_can_skip_failure = False  # ensure tested as if shouldn't fail
 
-    _modules_needed_by_name = ['statsforecast==1.5.0']
+    _modules_needed_by_name = ['statsforecast==1.7.4']
 
     @staticmethod
     def is_enabled():

diff --git a/models/unsupervised/TextKMeansIsolationForest.py b/models/unsupervised/TextKMeansIsolationForest.py
@@ -214,7 +214,7 @@ class TextLDATopicUnsupervisedTransformer(CustomUnsupervisedTransformer):
     """Transformer to extract topics from text column using LDA"""
     _is_reproducible = False
     _testing_can_skip_failure = False  # ensure tested as if shouldn't fail
-    _modules_needed_by_name = ["gensim==3.8.0"]
+    _modules_needed_by_name = ["gensim==4.3.2"]
 
     def __init__(self, n_topics, **kwargs):
         super().__init__(**kwargs)

diff --git a/models/unsupervised/lda.py b/models/unsupervised/lda.py
@@ -11,7 +11,7 @@ class TextLDATopicTransformer(CustomUnsupervisedTransformer):
     """Transformer to extract topics from text column using LDA"""
     _is_reproducible = False
     _testing_can_skip_failure = False  # ensure tested as if shouldn't fail
-    _modules_needed_by_name = ["gensim==3.8.0"]
+    _modules_needed_by_name = ["gensim==4.3.2"]
 
     def __init__(self, n_topics, **kwargs):
         super().__init__(**kwargs)

diff --git a/transformers/anomaly /isolation_forest.py b/transformers/anomaly /isolation_forest.py
@@ -15,7 +15,7 @@
 from h2oaicore.systemutils import config, user_dir, remove, IgnoreEntirelyError, print_debug
 from h2oaicore.transformer_utils import CustomTransformer
 
-_global_modules_needed_by_name = ['h2o==3.34.0.7']
+_global_modules_needed_by_name = ['h2o==3.46.0.2']
 import h2o
 from h2o import H2OFrame
 from h2o.estimators import H2OEstimator