Merge remote-tracking branch 'upstream/main' into pr/27

kempenep · Nov 3, 2023 · 744162c · 744162c
2 parents 644f6a2 + f02306d
commit 744162c
Show file tree

Hide file tree

Showing 9 changed files with 183 additions and 30 deletions.
diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml
@@ -0,0 +1,27 @@
+# This workflow will install Python dependencies and lint with a single version of Python
+# For more information see: https://help.github.com/actions/language-and-framework-guides/using-python-with-github-actions
+
+name: Tests
+
+on:
+  push:
+    branches: [ main, 0.** ]
+  pull_request:
+    branches: [ main, 0.** ]
+  schedule:
+    - cron: "0 0 * * *"
+
+concurrency: 
+  group: ${{ github.workflow }}-${{ github.ref }}
+  cancel-in-progress: true
+
+jobs:
+  Linting:
+    runs-on: ubuntu-latest
+
+    steps:
+      - uses: actions/checkout@v3
+      - uses: actions/setup-python@v4
+        with:
+          python-version: "3.11"
+      - uses: pre-commit/[email protected]
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -0,0 +1,14 @@
+ci:
+    autofix_prs: false
+    autoupdate_schedule: weekly
+
+repos:
+    - repo: https://github.com/psf/black
+      rev: 23.1.0
+      hooks:
+          - id: black
+            language_version: python3
+    - repo: https://github.com/astral-sh/ruff-pre-commit
+      rev: "v0.0.271"
+      hooks:
+          - id: ruff
diff --git a/benchmarker.py b/benchmarker.py
@@ -1,4 +1,3 @@
-# -*- coding: utf-8 -*-
 """
 Module for benchmarking.
 """
@@ -8,26 +7,15 @@
 import inspect
 import logging
 from pathlib import Path
-import sys
 import tempfile
 from typing import List, Optional
 
 import pandas as pd
 
-# Add path so the benchmark packages are found
-sys.path.insert(0, str(Path(__file__).resolve().parent))
 import reporter
 
-################################################################################
-# Some init
-################################################################################
-
 logger = logging.getLogger(__name__)
 
-################################################################################
-# The real work
-################################################################################
-
 
 class RunResult:
     """The result of a benchmark run."""
@@ -72,7 +60,6 @@ def run_benchmarks(
     modules: Optional[List[str]] = None,
     functions: Optional[List[str]] = None,
 ):
-
     # Init logging
     logging.basicConfig(
         format="%(asctime)s.%(msecs)03d|%(levelname)s|%(name)s|%(message)s",
@@ -92,9 +79,7 @@ def run_benchmarks(
         if (not module_name.startswith("_")) and (module_name not in globals()):
             if modules is not None and module_name not in modules:
                 # Benchmark whitelist specified, and this one isn't in it
-                logger.info(
-                    f"skip module {module_name}: not in modules: {modules}"
-                )
+                logger.info(f"skip module {module_name}: not in modules: {modules}")
                 continue
 
             benchmark_implementation = importlib.import_module(

diff --git a/benchmarks_vector_ops/benchmarks_geopandas_pyogrio.py b/benchmarks_vector_ops/benchmarks_geopandas_pyogrio.py
@@ -208,9 +208,7 @@ def union(tmp_dir: Path) -> RunResult:
     # union
     start_time_op = datetime.now()
     result_gdf = input1_gdf.overlay(input2_gdf, how="union")
-    logger.info(
-        f"time for union: {(datetime.now()-start_time_op).total_seconds()}"
-    )
+    logger.info(f"time for union: {(datetime.now()-start_time_op).total_seconds()}")
 
     # Write to output file
     start_time_write = datetime.now()

diff --git a/benchmarks_zonalstats/benchmarks_rasterstats.py b/benchmarks_zonalstats/benchmarks_rasterstats.py
@@ -50,9 +50,14 @@ def zonalstats_1band(tmp_dir: Path) -> List[RunResult]:
     start_time = datetime.now()
     #  1.000: 10s
     # 10.000: 97s
-    stats = list(rasterstats.gen_zonal_stats(
-        str(vector_tmp_path), raster_path, band=1, stats=["count", "min", "max", "mean"]
-    ))
+    stats = list(
+        rasterstats.gen_zonal_stats(
+            str(vector_tmp_path),
+            raster_path,
+            band=1,
+            stats=["count", "min", "max", "mean"],
+        )
+    )
     # print(stats)
 
     secs_taken = (datetime.now() - start_time).total_seconds()

diff --git a/environment.yml b/environment.yml
@@ -12,5 +12,6 @@ dependencies:
   - pygeoprocessing
   - rasterstats
   # linting
-  - black
-  - flake8
+  - black =23
+  - ruff
+  - pre-commit
diff --git a/project.toml b/project.toml
@@ -0,0 +1,124 @@
+[tool.black]
+line-length = 88
+
+[tool.ruff]
+line-length = 88
+select = [
+    # pyflakes
+    "F",
+    # pycodestyle
+    "E",
+    "W",
+    # flake8-2020
+    "YTT",
+    # flake8-bugbear
+    "B",
+    # flake8-quotes
+    "Q",
+    # flake8-debugger
+    "T10",
+    # flake8-gettext
+    "INT",
+    # pylint
+    "PLC",
+    "PLE",
+    "PLR",
+    "PLW",
+    # misc lints
+    "PIE",
+    # flake8-pyi
+    "PYI",
+    # tidy imports
+    "TID",
+    # implicit string concatenation
+    "ISC",
+    # type-checking imports
+    "TCH",
+    # comprehensions
+    "C4",
+    # pygrep-hooks
+    "PGH",
+    # Ruff-specific rules
+    "RUF",
+]
+target-version = "py38"
+ignore = [ # space before : (needed for how black formats slicing)
+    # "E203",  # not yet implemented
+    # do not assign a lambda expression, use a def
+    "E731",
+    # line break before binary operator
+    # "W503",  # not yet implemented
+    # line break after binary operator
+    # "W504",  # not yet implemented
+    # controversial
+    "B006",
+    # controversial
+    "B007",
+    # controversial
+    "B008",
+    # setattr is used to side-step mypy
+    "B009",
+    # getattr is used to side-step mypy
+    "B010",
+    # tests use assert False
+    "B011",
+    # tests use comparisons but not their returned value
+    "B015",
+    # false positives
+    "B019",
+    # Loop control variable overrides iterable it iterates
+    "B020",
+    # Function definition does not bind loop variable
+    "B023",
+    # Functions defined inside a loop must not use variables redefined in the loop
+    # "B301",  # not yet implemented
+    # Only works with python >=3.10
+    "B905",
+    # Too many arguments to function call
+    "PLR0913",
+    # Too many returns
+    "PLR0911",
+    # Too many branches
+    "PLR0912",
+    # Too many statements
+    "PLR0915",
+    # Redefined loop name
+    "PLW2901",
+    # Global statements are discouraged
+    "PLW0603",
+    # Docstrings should not be included in stubs
+    "PYI021",
+    # No builtin `eval()` allowed
+    "PGH001",
+    # compare-to-empty-string
+    "PLC1901",
+    # Use typing_extensions.TypeAlias for type aliases
+    # "PYI026",  # not yet implemented
+    # Use "collections.abc.*" instead of "typing.*" (PEP 585 syntax)
+    # "PYI027",  # not yet implemented
+    # while int | float can be shortened to float, the former is more explicit
+    # "PYI041",  # not yet implemented
+
+    # Additional checks that don't pass yet
+    # Useless statement
+    "B018",
+    # Within an except clause, raise exceptions with ...
+    "B904",
+    # Magic number
+    "PLR2004",
+    # Consider `elif` instead of `else` then `if` to remove indentation level
+    "PLR5501",
+    # ambiguous-unicode-character-string
+    "RUF001",
+    # ambiguous-unicode-character-docstring
+    "RUF002",
+    # ambiguous-unicode-character-comment
+    "RUF003",
+    # collection-literal-concatenation
+    "RUF005",
+    # pairwise-over-zipped (>=PY310 only)
+    "RUF007",
+    # explicit-f-string-type-conversion
+    "RUF010",
+]
+exclude = ["doc/*", "benchmarks/*", "versioneer.py", "geopandas/_version.py"]
diff --git a/run_benchmarks_IO_all.py b/run_benchmarks_IO_all.py
@@ -6,8 +6,10 @@ def main():
     if not all_benchmarks:
         # Only run specific benchmark function(s)
         benchmarker.run_benchmarks(
-            benchmarks_subdir="benchmarks_IO", results_subdir="results_IO",
-            modules=["benchmarks_pyogrio"], functions=["write_dataframe"]
+            benchmarks_subdir="benchmarks_IO",
+            results_subdir="results_IO",
+            modules=["benchmarks_pyogrio"],
+            functions=["write_dataframe"],
         )
         return
 
@@ -17,4 +19,4 @@ def main():
 
 
 if __name__ == "__main__":
-    main()
+    main()
diff --git a/setup.cfg b/setup.cfg