diff --git a/python/.pre-commit-config.yaml b/python/.pre-commit-config.yaml index 3e6c6b73b75d..baca4e81fc26 100644 --- a/python/.pre-commit-config.yaml +++ b/python/.pre-commit-config.yaml @@ -28,52 +28,28 @@ repos: - id: debug-statements - id: check-yaml - id: check-ast + - repo: https://github.com/astral-sh/ruff-pre-commit + # Ruff version (Used for linting) + rev: v0.0.286 + hooks: + - id: ruff + args: [ --fix, --exit-non-zero-on-fix ] - repo: https://github.com/ambv/black rev: 23.3.0 hooks: - id: black - - repo: https://github.com/pre-commit/mirrors-isort - rev: v5.10.1 - hooks: - - id: isort - args: [--settings-path=python/pyproject.toml] + args: [--skip-string-normalization] - repo: https://github.com/pre-commit/mirrors-mypy rev: v1.3.0 hooks: - id: mypy args: [--install-types, --non-interactive, --config=python/pyproject.toml] - - repo: https://github.com/pre-commit/mirrors-prettier - rev: v3.0.0-alpha.9-for-vscode - hooks: - - id: prettier - args: [--plugin=prettier-plugin-toml] - additional_dependencies: - - prettier@2.7.1 - - prettier-plugin-toml@0.3.1 - repo: https://github.com/hadialqattan/pycln rev: v2.1.5 hooks: - id: pycln args: [--config=python/pyproject.toml] - - repo: https://github.com/asottile/pyupgrade - rev: v3.4.0 - hooks: - - id: pyupgrade - args: [--py38-plus, --keep-runtime-typing] - - repo: https://github.com/pycqa/pylint - rev: v3.0.0a6 - hooks: - - id: pylint - args: [--rcfile=python/pylintrc] - - repo: https://github.com/pycqa/flake8 - rev: "6.0.0" - hooks: - - id: flake8 - args: ["--ignore=E501,W503,E203,B024,B028"] - additional_dependencies: - - flake8-bugbear==23.3.23 - - flake8-comprehensions==3.12.0 - repo: https://github.com/executablebooks/mdformat rev: 0.7.16 hooks: diff --git a/python/pyiceberg/avro/decoder_fast.pyi b/python/pyiceberg/avro/decoder_fast.pyi index 989ad8c5f8ec..cf45ce5066bc 100644 --- a/python/pyiceberg/avro/decoder_fast.pyi +++ b/python/pyiceberg/avro/decoder_fast.pyi @@ -15,8 +15,6 @@ # specific language governing permissions and limitations # under the License. -from typing import Tuple, Dict - from pyiceberg.avro.decoder import BinaryDecoder class CythonBinaryDecoder(BinaryDecoder): @@ -30,9 +28,9 @@ class CythonBinaryDecoder(BinaryDecoder): pass def read_int(self) -> int: pass - def read_ints(self, count: int) -> Tuple[int, ...]: + def read_ints(self, count: int) -> tuple[int, ...]: pass - def read_int_bytes_dict(self, count: int, dest: Dict[int, bytes]) -> None: + def read_int_bytes_dict(self, count: int, dest: dict[int, bytes]) -> None: pass def read_bytes(self) -> bytes: pass diff --git a/python/pyiceberg/avro/decoder_fast.pyx b/python/pyiceberg/avro/decoder_fast.pyx index ddc6ff649d45..182fd0e92e1a 100644 --- a/python/pyiceberg/avro/decoder_fast.pyx +++ b/python/pyiceberg/avro/decoder_fast.pyx @@ -23,6 +23,7 @@ from libc.stdint cimport uint64_t, int64_t import array + cdef extern from "decoder_basic.c": void decode_zigzag_ints(const unsigned char **buffer, const uint64_t count, uint64_t *result); void skip_zigzag_int(const unsigned char **buffer); diff --git a/python/pyiceberg/catalog/hive.py b/python/pyiceberg/catalog/hive.py index 7563270d09bb..21f171421eac 100644 --- a/python/pyiceberg/catalog/hive.py +++ b/python/pyiceberg/catalog/hive.py @@ -29,9 +29,8 @@ from urllib.parse import urlparse from hive_metastore.ThriftHiveMetastore import Client -from hive_metastore.ttypes import AlreadyExistsException -from hive_metastore.ttypes import Database as HiveDatabase from hive_metastore.ttypes import ( + AlreadyExistsException, FieldSchema, InvalidOperationException, MetaException, @@ -39,6 +38,7 @@ SerDeInfo, StorageDescriptor, ) +from hive_metastore.ttypes import Database as HiveDatabase from hive_metastore.ttypes import Table as HiveTable from thrift.protocol import TBinaryProtocol from thrift.transport import TSocket, TTransport diff --git a/python/pyiceberg/io/pyarrow.py b/python/pyiceberg/io/pyarrow.py index 7f6045abeda4..f2d60e7534ff 100644 --- a/python/pyiceberg/io/pyarrow.py +++ b/python/pyiceberg/io/pyarrow.py @@ -1161,12 +1161,12 @@ def max_as_bytes(self) -> Optional[bytes]: return None if self.primitive_type == StringType(): - if type(self.current_max) != str: + if not isinstance(self.current_max, str): raise ValueError("Expected the current_max to be a string") s_result = truncate_upper_bound_text_string(self.current_max, self.trunc_length) return self.serialize(s_result) if s_result is not None else None elif self.primitive_type == BinaryType(): - if type(self.current_max) != bytes: + if not isinstance(self.current_max, bytes): raise ValueError("Expected the current_max to be bytes") b_result = truncate_upper_bound_binary_string(self.current_max, self.trunc_length) return self.serialize(b_result) if b_result is not None else None diff --git a/python/pyiceberg/table/metadata.py b/python/pyiceberg/table/metadata.py index b8cd7f19798b..73d76d860628 100644 --- a/python/pyiceberg/table/metadata.py +++ b/python/pyiceberg/table/metadata.py @@ -28,9 +28,8 @@ Union, ) -from pydantic import Field +from pydantic import Field, model_validator from pydantic import ValidationError as PydanticValidationError -from pydantic import model_validator from typing_extensions import Annotated from pyiceberg.exceptions import ValidationError diff --git a/python/pyiceberg/transforms.py b/python/pyiceberg/transforms.py index b00ae6fc9982..9cda2190992f 100644 --- a/python/pyiceberg/transforms.py +++ b/python/pyiceberg/transforms.py @@ -20,9 +20,8 @@ from abc import ABC, abstractmethod from enum import IntEnum from functools import singledispatch -from typing import Any, Callable, Generic +from typing import Any, Callable, Generic, Optional, TypeVar from typing import Literal as LiteralType -from typing import Optional, TypeVar from uuid import UUID import mmh3 diff --git a/python/pyproject.toml b/python/pyproject.toml index ef64f5fb06b8..4b67a4903640 100644 --- a/python/pyproject.toml +++ b/python/pyproject.toml @@ -126,17 +126,11 @@ markers = [ line-length = 130 target-version = ['py38'] -[tool.isort] -src_paths = ["pyiceberg/", "tests/"] -multi_line_output = 3 -profile = 'black' -line_length = 130 -force_grid_wrap = 4 - [tool.pycln] all = true [tool.mypy] +mypy_path = "python" no_implicit_optional = true namespace_packages = false warn_redundant_casts = true @@ -295,3 +289,58 @@ ignore_missing_imports = true [tool.coverage.run] source = ['pyiceberg/'] + +[tool.ruff] +src = ['pyiceberg','tests'] +extend-exclude = ["dev/provision.py"] +# Enable the pycodestyle (`E`) and Pyflakes (`F`) rules by default. +# Unlike Flake8, Ruff doesn't enable pycodestyle warnings (`W`) or +# McCabe complexity (`C901`) by default. +select = ["E", "F", "W", "I", "UP"] +ignore = ["E501","E203","B024","B028"] + +# Allow autofix for all enabled rules (when `--fix`) is provided. +fixable = ["ALL"] +unfixable = [] + +# Exclude a variety of commonly ignored directories. +exclude = [ + ".bzr", + ".direnv", + ".eggs", + ".git", + ".git-rewrite", + ".hg", + ".mypy_cache", + ".nox", + ".pants.d", + ".pytype", + ".ruff_cache", + ".svn", + ".tox", + ".venv", + "__pypackages__", + "_build", + "buck-out", + "build", + "dist", + "node_modules", + "venv", +] +per-file-ignores = {} +# Ignore _all_ violations. +# Same as Black. +line-length = 130 + +# Allow unused variables when underscore-prefixed. +dummy-variable-rgx = "^(_+|(_+[a-zA-Z0-9_]*[a-zA-Z0-9]+?))$" + +[tool.ruff.pyupgrade] +# Preserve types, even if a file imports `from __future__ import annotations`. +keep-runtime-typing = true + +[tool.ruff.isort] +detect-same-package = true +lines-between-types = 0 +known-first-party = ["pyiceberg", "tests"] +section-order = ["future", "standard-library", "third-party", "first-party", "local-folder"] diff --git a/python/tests/catalog/test_hive.py b/python/tests/catalog/test_hive.py index ef4757cbf4c4..9e3dd1a94521 100644 --- a/python/tests/catalog/test_hive.py +++ b/python/tests/catalog/test_hive.py @@ -19,9 +19,8 @@ from unittest.mock import MagicMock, patch import pytest -from hive_metastore.ttypes import AlreadyExistsException -from hive_metastore.ttypes import Database as HiveDatabase from hive_metastore.ttypes import ( + AlreadyExistsException, FieldSchema, InvalidOperationException, MetaException, @@ -30,6 +29,7 @@ SkewedInfo, StorageDescriptor, ) +from hive_metastore.ttypes import Database as HiveDatabase from hive_metastore.ttypes import Table as HiveTable from pyiceberg.catalog import PropertiesUpdateSummary