From 871e270b3ef3fe56c80d7d7e2d51910ccd79280f Mon Sep 17 00:00:00 2001
From: Philip Chmielowiec <67855069+philipc2@users.noreply.github.com>
Date: Mon, 12 Feb 2024 22:21:38 -0600
Subject: [PATCH] Setup ASV Benchmarking (#694)

* initial asv setup

* configure ruff to work to skip benchmarks dir

* fix failing benchmark

* fix failing benchmark

* use underscore instead of dash

* fix file path

* fix file path

* fix file path

* fix file path

* add docstrings to benchmarks

* update asv-benchmarking.yml

* update conda env path

* fix typo

* Delete .github/workflows/asv-benchmarking.yml

* add asv to gitignore

* add workflow for asv

* Update quad_hexagon.py

* Update asv-benchmarking.yml
---
 .github/workflows/asv-benchmarking.yml |  86 +++++++++++
 .gitignore                             |   6 +
 .pre-commit-config.yaml                |   2 -
 benchmarks/__init__.py                 |   0
 benchmarks/asv.conf.json               | 188 +++++++++++++++++++++++++
 benchmarks/import.py                   |   5 +
 benchmarks/quad_hexagon.py             |  36 +++++
 ci/asv.yml                             |  31 ++++
 ruff.toml                              |   3 +
 9 files changed, 355 insertions(+), 2 deletions(-)
 create mode 100644 .github/workflows/asv-benchmarking.yml
 create mode 100644 benchmarks/__init__.py
 create mode 100644 benchmarks/asv.conf.json
 create mode 100644 benchmarks/import.py
 create mode 100644 benchmarks/quad_hexagon.py
 create mode 100644 ci/asv.yml
 create mode 100644 ruff.toml

diff --git a/.github/workflows/asv-benchmarking.yml b/.github/workflows/asv-benchmarking.yml
new file mode 100644
index 000000000..aa06f2441
--- /dev/null
+++ b/.github/workflows/asv-benchmarking.yml
@@ -0,0 +1,86 @@
+name: ASV Benchmarking
+
+on:
+  push:
+    branches:
+      - main
+  workflow_dispatch:
+
+jobs:
+  benchmark:
+    runs-on: ubuntu-latest
+    env:
+      CONDA_ENV_FILE: ./ci/asv.yml
+      ASV_DIR: ./benchmarks
+
+    steps:
+      - name: Checkout uxarray
+        uses: actions/checkout@v4
+        with:
+          repository: UXARRAY/uxarray
+          fetch-depth: 0
+      - name: Checkout uxarray-asv
+        uses: actions/checkout@v4
+        with:
+          repository: uxarray/uxarray-asv
+          persist-credentials: false
+          fetch-depth: 0
+          ref: main
+          path: uxarray-asv
+      - name: Set environment variables
+        run: |
+          echo "TODAY=$(date +'%Y-%m-%d')" >> $GITHUB_ENV
+
+      - name: Set up conda environment
+        id: env-setup
+        continue-on-error: true
+        uses: mamba-org/setup-micromamba@v1
+        with:
+          environment-file: ./ci/asv.yml
+          cache-environment: true
+          cache-environment-key: "benchmark-${{runner.os}}-${{runner.arch}}-${{env.TODAY}}"
+
+      - name: retry environment set up if failed
+        if: steps.env-setup.outcome == 'failure'
+        uses: mamba-org/setup-micromamba@v1
+        with:
+          download-micromamba: false
+          environment-file: ./ci/asv.yml
+          cache-environment: true
+          cache-environment-key: "benchmark-${{runner.os}}-${{runner.arch}}-${{env.TODAY}}"
+
+      - name: Copy existing results
+        run: |
+          if [ -d "uxarray-asv/results" ]; then
+            cp -r uxarray-asv/results benchmarks/
+          fi
+
+      - name: Run benchmarks
+        shell: bash -l {0}
+        id: benchmark
+        run: |
+          cd benchmarks
+          asv machine --machine GH-Actions --os ubuntu-latest --arch x64 --cpu "2-core unknown" --ram 7GB
+          asv run v2024.02.0..main --skip-existing --parallel || true
+
+      - name: Commit and push benchmark results
+        run: |
+          if [ -d "uxarray-asv/results" ]; then
+            rm -r uxarray-asv/results
+          fi
+          cp -r benchmarks/results/ uxarray-asv/
+          cd uxarray-asv
+          git config --local user.email "${{ secrets.UXARRAY_ASV_EMAIL }}"
+          git config --local user.name "${{ secrets.UXARRAY_ASV_USER_NAME }}"
+          git add results
+          git commit -m "[🤖] Update benchmark results"
+
+      - name: Push to uxarray-asv
+        if: github.ref == 'refs/heads/main' && github.repository == 'UXARRAY/uxarray'
+        uses: ad-m/github-push-action@master
+        with:
+          github_token: ${{ secrets.UXARRAY_ASV_PAT }}
+          branch: main
+          force: true
+          repository: UXARRAY/uxarray-asv
+          directory: uxarray-asv
diff --git a/.gitignore b/.gitignore
index 2c41b5e38..af6b72791 100644
--- a/.gitignore
+++ b/.gitignore
@@ -152,3 +152,9 @@ test/write_*
 docs/user_api/_autosummary/
 
 docs/notebook-examples.txt
+
+
+# benchmarking
+benchmarks/env
+benchmarks/results
+benchmarks/html
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 77ef1bad1..574548d3d 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -22,8 +22,6 @@ repos:
     # Run the linter.
     - id: ruff
       args: [ --fix ]
-      exclude: ".test"
 
     # Run the formatter.
     - id: ruff-format
-      exclude: ".test"
diff --git a/benchmarks/__init__.py b/benchmarks/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/benchmarks/asv.conf.json b/benchmarks/asv.conf.json
new file mode 100644
index 000000000..e1dd0d8a9
--- /dev/null
+++ b/benchmarks/asv.conf.json
@@ -0,0 +1,188 @@
+{
+    // The version of the config file format.  Do not change, unless
+    // you know what you are doing.
+    "version": 1,
+
+    // The name of the project being benchmarked
+    "project": "uxarray",
+
+    // The project's homepage
+    "project_url": "https://github.com/UXARRAY/uxarray",
+
+    // The URL or local path of the source code repository for the
+    // project being benchmarked
+    "repo": "..",
+
+    // The Python project's subdirectory in your repo.  If missing or
+    // the empty string, the project is assumed to be located at the root
+    // of the repository.
+    // "repo_subdir": "",
+
+    // Customizable commands for building the project.
+    // See asv.conf.json documentation.
+    // To build the package using pyproject.toml (PEP518), uncomment the following lines
+    // "build_command": [
+    //     "python -m pip install build",
+    //     "python -m build",
+    //     "PIP_NO_BUILD_ISOLATION=false python -mpip wheel --no-deps --no-index -w {build_cache_dir} {build_dir}"
+    // ],
+    // To build the package using setuptools and a setup.py file, uncomment the following lines
+    // "build_command": [
+    //     "python setup.py build",
+    //     "PIP_NO_BUILD_ISOLATION=false python -mpip wheel --no-deps --no-index -w {build_cache_dir} {build_dir}"
+    // ],
+
+    // Customizable commands for installing and uninstalling the project.
+    // See asv.conf.json documentation.
+    // "install_command": ["in-dir={env_dir} python -mpip install {wheel_file}"],
+    // "uninstall_command": ["return-code=any python -mpip uninstall -y {project}"],
+
+    // List of branches to benchmark. If not provided, defaults to "master"
+    // (for git) or "default" (for mercurial).
+    "branches": ["main"], // for git
+    // "branches": ["default"],    // for mercurial
+
+    // The DVCS being used.  If not set, it will be automatically
+    // determined from "repo" by looking at the protocol in the URL
+    // (if remote), or by looking for special directories, such as
+    // ".git" (if local).
+    "dvcs": "git",
+
+    // The tool to use to create environments.  May be "conda",
+    // "virtualenv", "mamba" (above 3.8)
+    // or other value depending on the plugins in use.
+    // If missing or the empty string, the tool will be automatically
+    // determined by looking for tools on the PATH environment
+    // variable.
+    "environment_type": "conda",
+
+    // timeout in seconds for installing any dependencies in environment
+    // defaults to 10 min
+    "install_timeout": 600,
+
+    // the base URL to show a commit for the project.
+    "show_commit_url": "https://github.com/UXARRAY/uxarray/commit/",
+
+    // The Pythons you'd like to test against.  If not provided, defaults
+    // to the current version of Python used to run `asv`.
+    "pythons": ["3.11"],
+
+    // The list of conda channel names to be searched for benchmark
+    // dependency packages in the specified order
+     "conda_channels": ["conda-forge"],
+
+    // A conda environment file that is used for environment creation.
+     "conda_environment_file": "../ci/asv.yml",
+
+    // The matrix of dependencies to test.  Each key of the "req"
+    // requirements dictionary is the name of a package (in PyPI) and
+    // the values are version numbers.  An empty list or empty string
+    // indicates to just test against the default (latest)
+    // version. null indicates that the package is to not be
+    // installed. If the package to be tested is only available from
+    // PyPi, and the 'environment_type' is conda, then you can preface
+    // the package name by 'pip+', and the package will be installed
+    // via pip (with all the conda available packages installed first,
+    // followed by the pip installed packages).
+    //
+    // The ``@env`` and ``@env_nobuild`` keys contain the matrix of
+    // environment variables to pass to build and benchmark commands.
+    // An environment will be created for every combination of the
+    // cartesian product of the "@env" variables in this matrix.
+    // Variables in "@env_nobuild" will be passed to every environment
+    // during the benchmark phase, but will not trigger creation of
+    // new environments.  A value of ``null`` means that the variable
+    // will not be set for the current combination.
+    //
+    "matrix": {
+        "python": [""],
+    },
+
+    // Combinations of libraries/python versions can be excluded/included
+    // from the set to test. Each entry is a dictionary containing additional
+    // key-value pairs to include/exclude.
+    //
+    // An exclude entry excludes entries where all values match. The
+    // values are regexps that should match the whole string.
+    //
+    // An include entry adds an environment. Only the packages listed
+    // are installed. The 'python' key is required. The exclude rules
+    // do not apply to includes.
+    //
+    // In addition to package names, the following keys are available:
+    //
+    // - python
+    //     Python version, as in the *pythons* variable above.
+    // - environment_type
+    //     Environment type, as above.
+    // - sys_platform
+    //     Platform, as in sys.platform. Possible values for the common
+    //     cases: 'linux2', 'win32', 'cygwin', 'darwin'.
+    // - req
+    //     Required packages
+    // - env
+    //     Environment variables
+    // - env_nobuild
+    //     Non-build environment variables
+    //
+    // "exclude": [
+    //     {"python": "3.2", "sys_platform": "win32"}, // skip py3.2 on windows
+    //     {"environment_type": "conda", "req": {"six": null}}, // don't run without six on conda
+    //     {"env": {"ENV_VAR_1": "val2"}}, // skip val2 for ENV_VAR_1
+    // ],
+    //
+    // "include": [
+    //     // additional env for python2.7
+    //     {"python": "2.7", "req": {"numpy": "1.8"}, "env_nobuild": {"FOO": "123"}},
+    //     // additional env if run on windows+conda
+    //     {"platform": "win32", "environment_type": "conda", "python": "2.7", "req": {"libpython": ""}},
+    // ],
+
+    // The directory (relative to the current directory) that benchmarks are
+    // stored in.  If not provided, defaults to "benchmarks"
+    "benchmark_dir": ".",
+
+    // The directory (relative to the current directory) to cache the Python
+    // environments in.  If not provided, defaults to "env"
+    // "env_dir": "env",
+
+    // The directory (relative to the current directory) that raw benchmark
+    // results are stored in.  If not provided, defaults to "results".
+    // "results_dir": "results",
+
+    // The directory (relative to the current directory) that the html tree
+    // should be written to.  If not provided, defaults to "html".
+    // "html_dir": "html",
+
+    // The number of characters to retain in the commit hashes.
+    // "hash_length": 8,
+
+    // `asv` will cache results of the recent builds in each
+    // environment, making them faster to install next time.  This is
+    // the number of builds to keep, per environment.
+    // "build_cache_size": 2,
+
+    // The commits after which the regression search in `asv publish`
+    // should start looking for regressions. Dictionary whose keys are
+    // regexps matching to benchmark names, and values corresponding to
+    // the commit (exclusive) after which to start looking for
+    // regressions.  The default is to start from the first commit
+    // with results. If the commit is `null`, regression detection is
+    // skipped for the matching benchmark.
+    //
+    // "regressions_first_commits": {
+    //    "some_benchmark": "352cdf",  // Consider regressions only after this commit
+    //    "another_benchmark": null,   // Skip regression detection altogether
+    // },
+
+    // The thresholds for relative change in results, after which `asv
+    // publish` starts reporting regressions. Dictionary of the same
+    // form as in ``regressions_first_commits``, with values
+    // indicating the thresholds.  If multiple entries match, the
+    // maximum is taken. If no entry matches, the default is 5%.
+    //
+    // "regressions_thresholds": {
+    //    "some_benchmark": 0.01,     // Threshold of 1%
+    //    "another_benchmark": 0.5,   // Threshold of 50%
+    // },
+}
diff --git a/benchmarks/import.py b/benchmarks/import.py
new file mode 100644
index 000000000..e53515f2a
--- /dev/null
+++ b/benchmarks/import.py
@@ -0,0 +1,5 @@
+class Imports:
+    """Benchmark importing uxarray."""
+
+    def timeraw_import_uxarray(self):
+        return "import uxarray"
diff --git a/benchmarks/quad_hexagon.py b/benchmarks/quad_hexagon.py
new file mode 100644
index 000000000..ee740d832
--- /dev/null
+++ b/benchmarks/quad_hexagon.py
@@ -0,0 +1,36 @@
+import os
+from pathlib import Path
+
+import uxarray as ux
+
+current_path = Path(os.path.dirname(os.path.realpath(__file__))).parents[0]
+
+grid_path = current_path / "test" / "meshfiles" / "ugrid" / "quad-hexagon" / "grid.nc"
+data_path = current_path / "test" / "meshfiles" / "ugrid" / "quad-hexagon" / "data.nc"
+
+
+class QuadHexagon:
+    def time_open_grid(self):
+        """Time to open a `Grid`"""
+        ux.open_grid(grid_path)
+
+    def mem_open_grid(self):
+        """Memory Occupied by a `Grid`"""
+        return ux.open_grid(grid_path)
+
+    def peakmem_open_grid(self):
+        """Peak memory usage of a `Grid`"""
+        uxgrid = ux.open_grid(grid_path)
+
+
+    def time_open_dataset(self):
+        """Time to open a `UxDataset`"""
+        ux.open_dataset(grid_path, data_path)
+
+    def mem_open_dataset(self):
+        """Memory occupied by a `UxDataset`"""
+        return ux.open_dataset(grid_path, data_path)
+
+    def peakmem_open_dataset(self):
+        """Peak memory usage of a `UxDataset`"""
+        uxds = ux.open_dataset(grid_path, data_path)
diff --git a/ci/asv.yml b/ci/asv.yml
new file mode 100644
index 000000000..761a6ab5c
--- /dev/null
+++ b/ci/asv.yml
@@ -0,0 +1,31 @@
+name: asv
+channels:
+  - conda-forge
+dependencies:
+  - python=3.11
+  - cartopy
+  - dask
+  - datashader
+  - gmpy2
+  - holoviews
+  - matplotlib-base
+  - netcdf4
+  - numba
+  - numpy
+  - pandas<2.1.0
+  - pathlib
+  - pre_commit
+  - pyarrow<13.0.0   # pin due to CI faliures on macOS & ubuntu
+  - pytest
+  - pytest-cov
+  - requests
+  - scikit-learn
+  - scipy
+  - shapely
+  - spatialpandas
+  - xarray
+  - pip:
+    - antimeridian
+    - pyfma
+    - asv
+    - -e ../
diff --git a/ruff.toml b/ruff.toml
new file mode 100644
index 000000000..b13fa12c6
--- /dev/null
+++ b/ruff.toml
@@ -0,0 +1,3 @@
+extend-exclude = [
+"test",
+"benchmarks"]