From ed8639e401d30b842fac66b52636f5c1ab0c71b7 Mon Sep 17 00:00:00 2001 From: Harshal Sheth Date: Fri, 27 Dec 2024 13:46:49 -0500 Subject: [PATCH] chore(python): test with python 3.11 (#11280) Co-authored-by: Tamas Nemeth Co-authored-by: Mayuri Nehate <33225191+mayurinehate@users.noreply.github.com> --- .github/workflows/dagster-plugin.yml | 6 +++--- .github/workflows/metadata-ingestion.yml | 4 ++-- .github/workflows/prefect-plugin.yml | 4 ++-- metadata-ingestion-modules/airflow-plugin/setup.py | 4 ---- metadata-ingestion-modules/dagster-plugin/README.md | 3 +-- metadata-ingestion-modules/dagster-plugin/setup.py | 3 --- metadata-ingestion-modules/gx-plugin/README.md | 3 +-- metadata-ingestion-modules/gx-plugin/setup.py | 3 --- metadata-ingestion-modules/prefect-plugin/README.md | 2 +- metadata-ingestion-modules/prefect-plugin/setup.py | 6 +----- metadata-ingestion/setup.py | 10 ++++------ .../src/datahub/ingestion/source/s3/source.py | 2 +- .../tests/integration/feast/test_feast_repository.py | 8 ++++++++ 13 files changed, 24 insertions(+), 34 deletions(-) diff --git a/.github/workflows/dagster-plugin.yml b/.github/workflows/dagster-plugin.yml index d8a9cd7bfd6a35..ae9a0b1605cdf3 100644 --- a/.github/workflows/dagster-plugin.yml +++ b/.github/workflows/dagster-plugin.yml @@ -30,11 +30,11 @@ jobs: DATAHUB_TELEMETRY_ENABLED: false strategy: matrix: - python-version: ["3.9", "3.10"] + python-version: ["3.9", "3.11"] include: - python-version: "3.9" extraPythonRequirement: "dagster>=1.3.3" - - python-version: "3.10" + - python-version: "3.11" extraPythonRequirement: "dagster>=1.3.3" fail-fast: false steps: @@ -57,7 +57,7 @@ jobs: if: always() run: source metadata-ingestion-modules/dagster-plugin/venv/bin/activate && uv pip freeze - uses: actions/upload-artifact@v4 - if: ${{ always() && matrix.python-version == '3.10' && matrix.extraPythonRequirement == 'dagster>=1.3.3' }} + if: ${{ always() && matrix.python-version == '3.11' && matrix.extraPythonRequirement == 'dagster>=1.3.3' }} with: name: Test Results (dagster Plugin ${{ matrix.python-version}}) path: | diff --git a/.github/workflows/metadata-ingestion.yml b/.github/workflows/metadata-ingestion.yml index ad00c6d1551d1d..106cba1473982e 100644 --- a/.github/workflows/metadata-ingestion.yml +++ b/.github/workflows/metadata-ingestion.yml @@ -33,7 +33,7 @@ jobs: # DATAHUB_LOOKML_GIT_TEST_SSH_KEY: ${{ secrets.DATAHUB_LOOKML_GIT_TEST_SSH_KEY }} strategy: matrix: - python-version: ["3.8", "3.10"] + python-version: ["3.8", "3.11"] command: [ "testQuick", @@ -43,7 +43,7 @@ jobs: ] include: - python-version: "3.8" - - python-version: "3.10" + - python-version: "3.11" fail-fast: false steps: - name: Free up disk space diff --git a/.github/workflows/prefect-plugin.yml b/.github/workflows/prefect-plugin.yml index e4a70426f3a618..d77142a1f00ded 100644 --- a/.github/workflows/prefect-plugin.yml +++ b/.github/workflows/prefect-plugin.yml @@ -30,7 +30,7 @@ jobs: DATAHUB_TELEMETRY_ENABLED: false strategy: matrix: - python-version: ["3.8", "3.9", "3.10"] + python-version: ["3.8", "3.9", "3.10", "3.11"] fail-fast: false steps: - name: Set up JDK 17 @@ -52,7 +52,7 @@ jobs: if: always() run: source metadata-ingestion-modules/prefect-plugin/venv/bin/activate && uv pip freeze - uses: actions/upload-artifact@v4 - if: ${{ always() && matrix.python-version == '3.10'}} + if: ${{ always() && matrix.python-version == '3.11'}} with: name: Test Results (Prefect Plugin ${{ matrix.python-version}}) path: | diff --git a/metadata-ingestion-modules/airflow-plugin/setup.py b/metadata-ingestion-modules/airflow-plugin/setup.py index 3209233184d55a..2693aab0700da3 100644 --- a/metadata-ingestion-modules/airflow-plugin/setup.py +++ b/metadata-ingestion-modules/airflow-plugin/setup.py @@ -148,10 +148,6 @@ def get_long_description(): "Programming Language :: Python", "Programming Language :: Python :: 3", "Programming Language :: Python :: 3 :: Only", - "Programming Language :: Python :: 3.8", - "Programming Language :: Python :: 3.9", - "Programming Language :: Python :: 3.10", - "Programming Language :: Python :: 3.11", "Intended Audience :: Developers", "Intended Audience :: Information Technology", "Intended Audience :: System Administrators", diff --git a/metadata-ingestion-modules/dagster-plugin/README.md b/metadata-ingestion-modules/dagster-plugin/README.md index 8e1460957ed9ff..5113fc37dcc222 100644 --- a/metadata-ingestion-modules/dagster-plugin/README.md +++ b/metadata-ingestion-modules/dagster-plugin/README.md @@ -1,4 +1,3 @@ # Datahub Dagster Plugin -See the DataHub Dagster docs for details. - +See the [DataHub Dagster docs](https://datahubproject.io/docs/lineage/dagster/) for details. diff --git a/metadata-ingestion-modules/dagster-plugin/setup.py b/metadata-ingestion-modules/dagster-plugin/setup.py index 0e0685cb378c1b..22c15497bd8070 100644 --- a/metadata-ingestion-modules/dagster-plugin/setup.py +++ b/metadata-ingestion-modules/dagster-plugin/setup.py @@ -107,9 +107,6 @@ def get_long_description(): "Programming Language :: Python", "Programming Language :: Python :: 3", "Programming Language :: Python :: 3 :: Only", - "Programming Language :: Python :: 3.8", - "Programming Language :: Python :: 3.9", - "Programming Language :: Python :: 3.10", "Intended Audience :: Developers", "Intended Audience :: Information Technology", "Intended Audience :: System Administrators", diff --git a/metadata-ingestion-modules/gx-plugin/README.md b/metadata-ingestion-modules/gx-plugin/README.md index 1ffd87a955432d..9d50235a093d63 100644 --- a/metadata-ingestion-modules/gx-plugin/README.md +++ b/metadata-ingestion-modules/gx-plugin/README.md @@ -1,4 +1,3 @@ # Datahub GX Plugin -See the DataHub GX docs for details. - +See the [DataHub GX docs](https://datahubproject.io/docs/metadata-ingestion/integration_docs/great-expectations) for details. diff --git a/metadata-ingestion-modules/gx-plugin/setup.py b/metadata-ingestion-modules/gx-plugin/setup.py index 73d5d1a9a02f18..40afc81a98f9c8 100644 --- a/metadata-ingestion-modules/gx-plugin/setup.py +++ b/metadata-ingestion-modules/gx-plugin/setup.py @@ -118,9 +118,6 @@ def get_long_description(): "Programming Language :: Python", "Programming Language :: Python :: 3", "Programming Language :: Python :: 3 :: Only", - "Programming Language :: Python :: 3.8", - "Programming Language :: Python :: 3.9", - "Programming Language :: Python :: 3.10", "Intended Audience :: Developers", "Intended Audience :: Information Technology", "Intended Audience :: System Administrators", diff --git a/metadata-ingestion-modules/prefect-plugin/README.md b/metadata-ingestion-modules/prefect-plugin/README.md index 0896942e78ef61..f21e00b4945135 100644 --- a/metadata-ingestion-modules/prefect-plugin/README.md +++ b/metadata-ingestion-modules/prefect-plugin/README.md @@ -28,7 +28,7 @@ The `prefect-datahub` collection allows you to easily integrate DataHub's metada ## Prerequisites -- Python 3.7+ +- Python 3.8+ - Prefect 2.0.0+ and < 3.0.0+ - A running instance of DataHub diff --git a/metadata-ingestion-modules/prefect-plugin/setup.py b/metadata-ingestion-modules/prefect-plugin/setup.py index 7e56fe8b6ad114..70b0e958195645 100644 --- a/metadata-ingestion-modules/prefect-plugin/setup.py +++ b/metadata-ingestion-modules/prefect-plugin/setup.py @@ -103,10 +103,6 @@ def get_long_description(): "Programming Language :: Python", "Programming Language :: Python :: 3", "Programming Language :: Python :: 3 :: Only", - "Programming Language :: Python :: 3.7", - "Programming Language :: Python :: 3.8", - "Programming Language :: Python :: 3.9", - "Programming Language :: Python :: 3.10", "Intended Audience :: Developers", "Intended Audience :: Information Technology", "Intended Audience :: System Administrators", @@ -120,7 +116,7 @@ def get_long_description(): ], # Package info. zip_safe=False, - python_requires=">=3.7", + python_requires=">=3.8", package_dir={"": "src"}, packages=setuptools.find_namespace_packages(where="./src"), entry_points=entry_points, diff --git a/metadata-ingestion/setup.py b/metadata-ingestion/setup.py index c6994dd6d5aa65..986dc189cb29ba 100644 --- a/metadata-ingestion/setup.py +++ b/metadata-ingestion/setup.py @@ -298,8 +298,8 @@ } data_lake_profiling = { - "pydeequ~=1.1.0", - "pyspark~=3.3.0", + "pydeequ>=1.1.0", + "pyspark~=3.5.0", } delta_lake = { @@ -318,7 +318,7 @@ # 0.1.11 appears to have authentication issues with azure databricks # 0.22.0 has support for `include_browse` in metadata list apis "databricks-sdk>=0.30.0", - "pyspark~=3.3.0", + "pyspark~=3.5.0", "requests", # Version 2.4.0 includes sqlalchemy dialect, 2.8.0 includes some bug fixes # Version 3.0.0 required SQLAlchemy > 2.0.21 @@ -874,9 +874,6 @@ "Programming Language :: Python", "Programming Language :: Python :: 3", "Programming Language :: Python :: 3 :: Only", - "Programming Language :: Python :: 3.8", - "Programming Language :: Python :: 3.9", - "Programming Language :: Python :: 3.10", "Intended Audience :: Developers", "Intended Audience :: Information Technology", "Intended Audience :: System Administrators", @@ -917,6 +914,7 @@ "sync-file-emitter", "sql-parser", "iceberg", + "feast", } else set() ) diff --git a/metadata-ingestion/src/datahub/ingestion/source/s3/source.py b/metadata-ingestion/src/datahub/ingestion/source/s3/source.py index 3ddf47b70cdf80..ceac9e96d1ddd0 100644 --- a/metadata-ingestion/src/datahub/ingestion/source/s3/source.py +++ b/metadata-ingestion/src/datahub/ingestion/source/s3/source.py @@ -225,7 +225,7 @@ def __init__(self, config: DataLakeSourceConfig, ctx: PipelineContext): self.init_spark() def init_spark(self): - os.environ.setdefault("SPARK_VERSION", "3.3") + os.environ.setdefault("SPARK_VERSION", "3.5") spark_version = os.environ["SPARK_VERSION"] # Importing here to avoid Deequ dependency for non profiling use cases diff --git a/metadata-ingestion/tests/integration/feast/test_feast_repository.py b/metadata-ingestion/tests/integration/feast/test_feast_repository.py index 7f04337145dc36..80d7c6311a9589 100644 --- a/metadata-ingestion/tests/integration/feast/test_feast_repository.py +++ b/metadata-ingestion/tests/integration/feast/test_feast_repository.py @@ -1,3 +1,6 @@ +import sys + +import pytest from freezegun import freeze_time from datahub.ingestion.run.pipeline import Pipeline @@ -6,6 +9,11 @@ FROZEN_TIME = "2020-04-14 07:00:00" +# The test is skipped for python 3.11 due to conflicting dependencies in installDev +# setup that requires pydantic < 2 for majority plugins. Note that the test works with +# python 3.11 if run with standalone virtual env setup with feast plugin alone using +# `pip install acryl-datahub[feast]` since it allows pydantic > 2 +@pytest.mark.skipif(sys.version_info > (3, 11), reason="Skipped on Python 3.11+") @freeze_time(FROZEN_TIME) def test_feast_repository_ingest(pytestconfig, tmp_path, mock_time): test_resources_dir = pytestconfig.rootpath / "tests/integration/feast"