From b403f7155142a0daa38396065430c7b4fba8b863 Mon Sep 17 00:00:00 2001 From: XQ Hu Date: Wed, 27 Nov 2024 01:26:41 +0000 Subject: [PATCH 1/3] fixed ML tests --- .../apache_beam/ml/transforms/base_test.py | 21 +++++++++---------- .../ml/transforms/handlers_test.py | 8 +++---- sdks/python/setup.py | 7 ++++--- sdks/python/tox.ini | 2 ++ 4 files changed, 20 insertions(+), 18 deletions(-) diff --git a/sdks/python/apache_beam/ml/transforms/base_test.py b/sdks/python/apache_beam/ml/transforms/base_test.py index 1a21f6caf7e1..3db5a63b9542 100644 --- a/sdks/python/apache_beam/ml/transforms/base_test.py +++ b/sdks/python/apache_beam/ml/transforms/base_test.py @@ -21,7 +21,6 @@ import shutil import tempfile import time -import typing import unittest from collections.abc import Sequence from typing import Any @@ -140,8 +139,8 @@ def test_ml_transform_on_list_dict(self): 'x': int, 'y': float }, expected_dtype={ - 'x': typing.Sequence[np.float32], - 'y': typing.Sequence[np.float32], + 'x': Sequence[np.float32], + 'y': Sequence[np.float32], }, ), param( @@ -153,8 +152,8 @@ def test_ml_transform_on_list_dict(self): 'x': np.int32, 'y': np.float32 }, expected_dtype={ - 'x': typing.Sequence[np.float32], - 'y': typing.Sequence[np.float32], + 'x': Sequence[np.float32], + 'y': Sequence[np.float32], }, ), param( @@ -165,8 +164,8 @@ def test_ml_transform_on_list_dict(self): 'x': list[int], 'y': list[float] }, expected_dtype={ - 'x': typing.Sequence[np.float32], - 'y': typing.Sequence[np.float32], + 'x': Sequence[np.float32], + 'y': Sequence[np.float32], }, ), param( @@ -174,12 +173,12 @@ def test_ml_transform_on_list_dict(self): 'x': [1, 2, 3], 'y': [2.0, 3.0, 4.0] }], input_types={ - 'x': typing.Sequence[int], - 'y': typing.Sequence[float], + 'x': Sequence[int], + 'y': Sequence[float], }, expected_dtype={ - 'x': typing.Sequence[np.float32], - 'y': typing.Sequence[np.float32], + 'x': Sequence[np.float32], + 'y': Sequence[np.float32], }, ), ]) diff --git a/sdks/python/apache_beam/ml/transforms/handlers_test.py b/sdks/python/apache_beam/ml/transforms/handlers_test.py index 4b53026c36a4..bb5f9b5f0f70 100644 --- a/sdks/python/apache_beam/ml/transforms/handlers_test.py +++ b/sdks/python/apache_beam/ml/transforms/handlers_test.py @@ -20,9 +20,9 @@ import shutil import sys import tempfile -import typing import unittest import uuid +from collections.abc import Sequence from typing import NamedTuple from typing import Union @@ -276,9 +276,9 @@ def test_tft_process_handler_transformed_data_schema(self): schema_utils.schema_from_feature_spec(raw_data_feature_spec)) expected_transformed_data_schema = { - 'x': typing.Sequence[np.float32], - 'y': typing.Sequence[np.float32], - 'z': typing.Sequence[bytes] + 'x': Sequence[np.float32], + 'y': Sequence[np.float32], + 'z': Sequence[bytes] } actual_transformed_data_schema = ( diff --git a/sdks/python/setup.py b/sdks/python/setup.py index 3b45cbf82fc1..73c7fff2e5a3 100644 --- a/sdks/python/setup.py +++ b/sdks/python/setup.py @@ -492,10 +492,11 @@ def get_portability_package_data(): 'pillow', # Support TF 2.16.0: https://github.com/apache/beam/issues/31294 # Once TF version is unpinned, also don't restrict Python version. - 'tensorflow<2.16.0;python_version<"3.12"', + # 'tensorflow<2.16.0;python_version<"3.12"', + # limit this to 2.12.x to make tests stable + 'tensorflow>=2.12rc1,<2.13', 'tensorflow-hub', - # https://github.com/tensorflow/transform/issues/313 - 'tensorflow-transform;python_version<"3.11"', + 'tensorflow-transform', 'tf2onnx', 'torch', 'transformers', diff --git a/sdks/python/tox.ini b/sdks/python/tox.ini index 7a2424325890..2f4ceb28e65d 100644 --- a/sdks/python/tox.ini +++ b/sdks/python/tox.ini @@ -106,6 +106,8 @@ commands = setenv = extras = test,gcp,dataframe,ml_test commands = + # Log tensorflow version for debugging + /bin/sh -c "pip freeze | grep -E tensorflow" bash {toxinidir}/scripts/run_pytest.sh {envname} "{posargs}" [testenv:py{39,310,311,312}-dask] From e2c246ced2b4c21410cfd9cbdf3084080b195730 Mon Sep 17 00:00:00 2001 From: XQ Hu Date: Wed, 27 Nov 2024 03:29:47 +0000 Subject: [PATCH 2/3] try some new setups --- sdks/python/setup.py | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/sdks/python/setup.py b/sdks/python/setup.py index 73c7fff2e5a3..be42c633acc3 100644 --- a/sdks/python/setup.py +++ b/sdks/python/setup.py @@ -490,11 +490,7 @@ def get_portability_package_data(): 'sentence-transformers', 'skl2onnx', 'pillow', - # Support TF 2.16.0: https://github.com/apache/beam/issues/31294 - # Once TF version is unpinned, also don't restrict Python version. - # 'tensorflow<2.16.0;python_version<"3.12"', - # limit this to 2.12.x to make tests stable - 'tensorflow>=2.12rc1,<2.13', + 'tensorflow', 'tensorflow-hub', 'tensorflow-transform', 'tf2onnx', From a88135546dceb48bfb2ab7d58d4a59c73af709c0 Mon Sep 17 00:00:00 2001 From: XQ Hu Date: Wed, 27 Nov 2024 04:51:34 +0000 Subject: [PATCH 3/3] created py312-ml tox section --- sdks/python/setup.py | 13 +++++++++++++ sdks/python/tox.ini | 12 +++++++++++- 2 files changed, 24 insertions(+), 1 deletion(-) diff --git a/sdks/python/setup.py b/sdks/python/setup.py index be42c633acc3..53c7a532e706 100644 --- a/sdks/python/setup.py +++ b/sdks/python/setup.py @@ -501,6 +501,19 @@ def get_portability_package_data(): # https://github.com/apache/beam/issues/31285 # 'xgboost<2.0', # https://github.com/apache/beam/issues/31252 ], + 'p312_ml_test': [ + 'datatable', + 'embeddings', + 'onnxruntime', + 'sentence-transformers', + 'skl2onnx', + 'pillow', + 'tensorflow', + 'tensorflow-hub', + 'tf2onnx', + 'torch', + 'transformers', + ], 'aws': ['boto3>=1.9,<2'], 'azure': [ 'azure-storage-blob>=12.3.2,<13', diff --git a/sdks/python/tox.ini b/sdks/python/tox.ini index 2f4ceb28e65d..68ac15ced70d 100644 --- a/sdks/python/tox.ini +++ b/sdks/python/tox.ini @@ -101,7 +101,7 @@ commands = python apache_beam/examples/complete/autocomplete_test.py bash {toxinidir}/scripts/run_pytest.sh {envname} "{posargs}" -[testenv:py{39,310,311,312}-ml] +[testenv:py{39,310,311}-ml] # Don't set TMPDIR to avoid "AF_UNIX path too long" errors in certain tests. setenv = extras = test,gcp,dataframe,ml_test @@ -110,6 +110,16 @@ commands = /bin/sh -c "pip freeze | grep -E tensorflow" bash {toxinidir}/scripts/run_pytest.sh {envname} "{posargs}" +[testenv:py312-ml] +# many packages do not support py3.12 +# Don't set TMPDIR to avoid "AF_UNIX path too long" errors in certain tests. +setenv = +extras = test,gcp,dataframe,p312_ml_test +commands = + # Log tensorflow version for debugging + /bin/sh -c "pip freeze | grep -E tensorflow" + bash {toxinidir}/scripts/run_pytest.sh {envname} "{posargs}" + [testenv:py{39,310,311,312}-dask] extras = test,dask,dataframes commands_pre =