From 7dc8f4e2c271467b50689e51e86e1561a59a82a6 Mon Sep 17 00:00:00 2001 From: Marcin Zalewski Date: Fri, 30 Sep 2022 11:16:02 -0700 Subject: [PATCH 01/89] Run CI every 4 hours (#623) Run CI every 4 hours in addition to commit-triggered runs. Co-authored-by: Marcin Zalewski --- .github/workflows/ci.yml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index eda13fbda..9bf976c8a 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -6,6 +6,9 @@ on: pull_request: branches-ignore: - gh-pages # deployment target branch (this workflow should not exist on that branch anyway) + schedule: + # * is a special character in YAML so you have to quote this string + - cron: '* */4 * * *' env: COMMIT: ${{ github.event.pull_request.head.sha || github.sha }} PROJECT: github-cunumeric-ci From 298f094acbe3cb84757d12cabfc721f0bf15838c Mon Sep 17 00:00:00 2001 From: Wonchan Lee Date: Fri, 30 Sep 2022 11:54:18 -0700 Subject: [PATCH 02/89] src/cunumeric/matrix: stop including coll.h in solve_template.inl (#620) (#637) This file was included unnecessarily, and led to build issues on distributed machines. In particular, including coll.h pulls in mpi.h, which is an unresolved header to NVCC. Signed-off-by: Rohan Yadav Co-authored-by: Rohan Yadav --- src/cunumeric/matrix/solve_template.inl | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/cunumeric/matrix/solve_template.inl b/src/cunumeric/matrix/solve_template.inl index a4f0f7894..bff40ad9c 100644 --- a/src/cunumeric/matrix/solve_template.inl +++ b/src/cunumeric/matrix/solve_template.inl @@ -18,8 +18,6 @@ #include -#include "core/comm/coll.h" - // Useful for IDEs #include "cunumeric/matrix/solve.h" From e0601452f855163984e035aefc8e1e367e291279 Mon Sep 17 00:00:00 2001 From: Marcin Zalewski Date: Wed, 5 Oct 2022 12:16:33 -0700 Subject: [PATCH 03/89] Adjust the schedule of the CI runs (#641) Co-authored-by: Marcin Zalewski --- .github/workflows/ci.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 9bf976c8a..6abe49c3b 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -8,7 +8,7 @@ on: - gh-pages # deployment target branch (this workflow should not exist on that branch anyway) schedule: # * is a special character in YAML so you have to quote this string - - cron: '* */4 * * *' + - cron: '0 */6 * * *' env: COMMIT: ${{ github.event.pull_request.head.sha || github.sha }} PROJECT: github-cunumeric-ci From 52bb4f5b3fce583901e49c43a05b76f76fae4586 Mon Sep 17 00:00:00 2001 From: Bryan Van de Ven Date: Mon, 10 Oct 2022 11:51:21 -0700 Subject: [PATCH 04/89] Move test driver code to legate.core (#627) --- test.py | 8 +- tests/_utils/__init__.py | 74 ----- tests/_utils/args.py | 286 ------------------ tests/_utils/config.py | 161 ---------- tests/_utils/logger.py | 67 ---- tests/_utils/stages/__init__.py | 41 --- tests/_utils/stages/_linux/__init__.py | 24 -- tests/_utils/stages/_linux/cpu.py | 80 ----- tests/_utils/stages/_linux/eager.py | 71 ----- tests/_utils/stages/_linux/gpu.py | 82 ----- tests/_utils/stages/_linux/omp.py | 84 ----- tests/_utils/stages/_osx/__init__.py | 24 -- tests/_utils/stages/_osx/cpu.py | 64 ---- tests/_utils/stages/_osx/eager.py | 64 ---- tests/_utils/stages/_osx/gpu.py | 51 ---- tests/_utils/stages/_osx/omp.py | 70 ----- tests/_utils/stages/test_stage.py | 265 ---------------- tests/_utils/stages/util.py | 115 ------- tests/_utils/system.py | 170 ----------- tests/_utils/test_plan.py | 131 -------- tests/_utils/tests/__init__.py | 15 - tests/_utils/tests/stages/__init__.py | 38 --- tests/_utils/tests/stages/_linux/__init__.py | 22 -- tests/_utils/tests/stages/_linux/test_cpu.py | 131 -------- .../_utils/tests/stages/_linux/test_eager.py | 81 ----- tests/_utils/tests/stages/_linux/test_gpu.py | 100 ------ tests/_utils/tests/stages/_linux/test_omp.py | 163 ---------- tests/_utils/tests/stages/test_test_stage.py | 87 ------ tests/_utils/tests/stages/test_util.py | 48 --- tests/_utils/tests/test___init__.py | 73 ----- tests/_utils/tests/test_args.py | 132 -------- tests/_utils/tests/test_config.py | 177 ----------- tests/_utils/tests/test_logger.py | 74 ----- tests/_utils/tests/test_system.py | 78 ----- tests/_utils/tests/test_types.py | 30 -- tests/_utils/tests/test_ui.py | 103 ------- tests/_utils/types.py | 50 --- tests/_utils/ui.py | 229 -------------- 38 files changed, 4 insertions(+), 3559 deletions(-) delete mode 100644 tests/_utils/__init__.py delete mode 100644 tests/_utils/args.py delete mode 100644 tests/_utils/config.py delete mode 100644 tests/_utils/logger.py delete mode 100644 tests/_utils/stages/__init__.py delete mode 100644 tests/_utils/stages/_linux/__init__.py delete mode 100644 tests/_utils/stages/_linux/cpu.py delete mode 100644 tests/_utils/stages/_linux/eager.py delete mode 100644 tests/_utils/stages/_linux/gpu.py delete mode 100644 tests/_utils/stages/_linux/omp.py delete mode 100644 tests/_utils/stages/_osx/__init__.py delete mode 100644 tests/_utils/stages/_osx/cpu.py delete mode 100644 tests/_utils/stages/_osx/eager.py delete mode 100644 tests/_utils/stages/_osx/gpu.py delete mode 100644 tests/_utils/stages/_osx/omp.py delete mode 100644 tests/_utils/stages/test_stage.py delete mode 100644 tests/_utils/stages/util.py delete mode 100644 tests/_utils/system.py delete mode 100644 tests/_utils/test_plan.py delete mode 100644 tests/_utils/tests/__init__.py delete mode 100644 tests/_utils/tests/stages/__init__.py delete mode 100644 tests/_utils/tests/stages/_linux/__init__.py delete mode 100644 tests/_utils/tests/stages/_linux/test_cpu.py delete mode 100644 tests/_utils/tests/stages/_linux/test_eager.py delete mode 100644 tests/_utils/tests/stages/_linux/test_gpu.py delete mode 100644 tests/_utils/tests/stages/_linux/test_omp.py delete mode 100644 tests/_utils/tests/stages/test_test_stage.py delete mode 100644 tests/_utils/tests/stages/test_util.py delete mode 100644 tests/_utils/tests/test___init__.py delete mode 100644 tests/_utils/tests/test_args.py delete mode 100644 tests/_utils/tests/test_config.py delete mode 100644 tests/_utils/tests/test_logger.py delete mode 100644 tests/_utils/tests/test_system.py delete mode 100644 tests/_utils/tests/test_types.py delete mode 100644 tests/_utils/tests/test_ui.py delete mode 100644 tests/_utils/types.py delete mode 100644 tests/_utils/ui.py diff --git a/test.py b/test.py index edf9d772b..8dcda54be 100755 --- a/test.py +++ b/test.py @@ -18,14 +18,14 @@ import sys -from tests._utils.config import Config -from tests._utils.system import System -from tests._utils.test_plan import TestPlan +from legate.tester.config import Config +from legate.tester.test_plan import TestPlan +from legate.tester.test_system import TestSystem if __name__ == "__main__": config = Config(sys.argv) - system = System(dry_run=config.dry_run) + system = TestSystem(dry_run=config.dry_run) plan = TestPlan(config, system) diff --git a/tests/_utils/__init__.py b/tests/_utils/__init__.py deleted file mode 100644 index 11b8f1d70..000000000 --- a/tests/_utils/__init__.py +++ /dev/null @@ -1,74 +0,0 @@ -# Copyright 2022 NVIDIA Corporation -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -"""Utilities and helpers for implementing the Cunumeric custom test runner. - -""" -from __future__ import annotations - -from typing import Union -from typing_extensions import Literal, TypeAlias - -#: Define the available feature types for tests -FeatureType: TypeAlias = Union[ - Literal["cpus"], Literal["cuda"], Literal["eager"], Literal["openmp"] -] - -#: Value to use if --cpus is not specified. -DEFAULT_CPUS_PER_NODE = 4 - -#: Value to use if --gpus is not specified. -DEFAULT_GPUS_PER_NODE = 1 - -# Delay to introduce between GPU test invocations (ms) -DEFAULT_GPU_DELAY = 2000 - -# Value to use if --fbmem is not specified (MB) -DEFAULT_GPU_MEMORY_BUDGET = 4096 - -#: Value to use if --omps is not specified. -DEFAULT_OMPS_PER_NODE = 1 - -#: Value to use if --ompthreads is not specified. -DEFAULT_OMPTHREADS = 4 - -#: Default values to apply to normalize the testing environment. -DEFAULT_PROCESS_ENV = { - "LEGATE_TEST": "1", -} - -#: Width for terminal ouput headers and footers. -UI_WIDTH = 65 - -#: Feature values that are accepted for --use, in the relative order -#: that the corresponding test stages should always execute in -FEATURES: tuple[FeatureType, ...] = ( - "cpus", - "cuda", - "eager", - "openmp", -) - -#: Paths to example files that should be skipped. -SKIPPED_EXAMPLES = { - "examples/ingest.py", - "examples/kmeans_sort.py", - "examples/lstm_full.py", - "examples/wgrad.py", -} - -#: Extra arguments to supply when specific examples are executed. -PER_FILE_ARGS = { - "examples/lstm_full.py": ["--file", "resources/lstm_input.txt"], -} diff --git a/tests/_utils/args.py b/tests/_utils/args.py deleted file mode 100644 index d97ebf603..000000000 --- a/tests/_utils/args.py +++ /dev/null @@ -1,286 +0,0 @@ -# Copyright 2022 NVIDIA Corporation -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -"""Provide an argparse ArgumentParser for the test runner. - -""" -from __future__ import annotations - -from argparse import Action, ArgumentParser, Namespace -from typing import ( - Any, - Generic, - Iterable, - Iterator, - Literal, - Sequence, - TypeVar, - Union, -) - -from typing_extensions import TypeAlias - -from . import ( - DEFAULT_CPUS_PER_NODE, - DEFAULT_GPU_DELAY, - DEFAULT_GPU_MEMORY_BUDGET, - DEFAULT_GPUS_PER_NODE, - DEFAULT_OMPS_PER_NODE, - DEFAULT_OMPTHREADS, - FEATURES, -) - -T = TypeVar("T") - -PinOptionsType: TypeAlias = Union[ - Literal["partial"], - Literal["none"], - Literal["strict"], -] - -PIN_OPTIONS: tuple[PinOptionsType, ...] = ( - "partial", - "none", - "strict", -) - - -class MultipleChoices(Generic[T]): - """A container that reports True for any item or subset inclusion. - - Parameters - ---------- - choices: Iterable[T] - The values to populate the containter. - - Examples - -------- - - >>> choices = MultipleChoices(["a", "b", "c"]) - - >>> "a" in choices - True - - >>> ("b", "c") in choices - True - - """ - - def __init__(self, choices: Iterable[T]) -> None: - self.choices = set(choices) - - def __contains__(self, x: Union[T, Iterable[T]]) -> bool: - if isinstance(x, (list, tuple)): - return set(x).issubset(self.choices) - return x in self.choices - - def __iter__(self) -> Iterator[T]: - return self.choices.__iter__() - - -class ExtendAction(Action): - """A custom argparse action to collect multiple values into a list.""" - - def __call__( - self, - parser: ArgumentParser, - namespace: Namespace, - values: Union[str, Sequence[Any], None], - option_string: Union[str, None] = None, - ) -> None: - items = getattr(namespace, self.dest, None) or [] - if isinstance(values, list): - items.extend(values) - else: - items.append(values) - setattr(namespace, self.dest, items) - - -#: The argument parser for test.py -parser = ArgumentParser( - description="Run the Cunumeric test suite", - epilog="Any extra arguments will be forwarded to the Legate script", -) - - -stages = parser.add_argument_group("Feature stage selection") - - -stages.add_argument( - "--use", - dest="features", - action=ExtendAction, - choices=MultipleChoices(sorted(FEATURES)), - # argpase evidently only expects string returns from the type converter - # here, but returning a list of strings seems to work in practice - type=lambda s: s.split(","), # type: ignore[return-value, arg-type] - help="Test Legate with features (also via USE_*)", -) - - -selection = parser.add_argument_group("Test file selection") - - -selection.add_argument( - "--files", - nargs="+", - default=None, - help="Explicit list of test files to run", -) - - -selection.add_argument( - "--unit", - dest="unit", - action="store_true", - default=False, - help="Include unit tests", -) - - -feature_opts = parser.add_argument_group("Feature stage configuration options") - - -feature_opts.add_argument( - "--cpus", - dest="cpus", - type=int, - default=DEFAULT_CPUS_PER_NODE, - help="Number of CPUs per node to use", -) - - -feature_opts.add_argument( - "--gpus", - dest="gpus", - type=int, - default=DEFAULT_GPUS_PER_NODE, - help="Number of GPUs per node to use", -) - - -feature_opts.add_argument( - "--omps", - dest="omps", - type=int, - default=DEFAULT_OMPS_PER_NODE, - help="Number OpenMP processors per node to use", -) - - -feature_opts.add_argument( - "--utility", - dest="utility", - type=int, - default=1, - help="Number of of utility CPUs to reserve for runtime services", -) - - -feature_opts.add_argument( - "--cpu-pin", - dest="cpu_pin", - choices=PIN_OPTIONS, - default="partial", - help="CPU pinning behavior on platforms that support CPU pinning", -) - -feature_opts.add_argument( - "--gpu-delay", - dest="gpu_delay", - type=int, - default=DEFAULT_GPU_DELAY, - help="Delay to introduce between GPU tests (ms)", -) - - -feature_opts.add_argument( - "--fbmem", - dest="fbmem", - type=int, - default=DEFAULT_GPU_MEMORY_BUDGET, - help="GPU framebuffer memory (MB)", -) - - -feature_opts.add_argument( - "--ompthreads", - dest="ompthreads", - metavar="THREADS", - type=int, - default=DEFAULT_OMPTHREADS, - help="Number of threads per OpenMP processor", -) - - -test_opts = parser.add_argument_group("Test run configuration options") - - -test_opts.add_argument( - "--legate", - dest="legate_dir", - metavar="LEGATE_DIR", - action="store", - default=None, - required=False, - help="Path to Legate installation directory", -) - - -test_opts.add_argument( - "-C", - "--directory", - dest="test_root", - metavar="DIR", - action="store", - default=None, - required=False, - help="Root directory containing the tests subdirectory", -) - - -test_opts.add_argument( - "-j", - "--workers", - dest="workers", - type=int, - default=None, - help="Number of parallel workers for testing", -) - - -test_opts.add_argument( - "-v", - "--verbose", - dest="verbose", - action="count", - default=0, - help="Display verbose output. Use -vv for even more output (test stdout)", -) - - -test_opts.add_argument( - "--dry-run", - dest="dry_run", - action="store_true", - help="Print the test plan but don't run anything", -) - - -test_opts.add_argument( - "--debug", - dest="debug", - action="store_true", - help="Print out the commands that are to be executed", -) diff --git a/tests/_utils/config.py b/tests/_utils/config.py deleted file mode 100644 index 06b61e9de..000000000 --- a/tests/_utils/config.py +++ /dev/null @@ -1,161 +0,0 @@ -# Copyright 2022 NVIDIA Corporation -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -"""Consolidate test configuration from command-line and environment. - -""" -from __future__ import annotations - -import os -from argparse import Namespace -from pathlib import Path - -from . import DEFAULT_PROCESS_ENV, FEATURES, SKIPPED_EXAMPLES, FeatureType -from .args import parser -from .types import ArgList, EnvDict - - -class Config: - """A centralized configuration object that provides the information - needed by test stages in order to run. - - Parameters - ---------- - argv : ArgList - command-line arguments to use when building the configuration - - """ - - def __init__(self, argv: ArgList) -> None: - args, self._extra_args = parser.parse_known_args(argv[1:]) - - # which tests to run - self.examples = True - self.integration = True - self.unit = args.unit - self.files = args.files - - # feature configuration - self.features = self._compute_features(args) - - # feature options for integration tests - self.cpus = args.cpus - self.gpus = args.gpus - self.omps = args.omps - self.utility = args.utility - self.cpu_pin = args.cpu_pin - self.fbmem = args.fbmem - self.gpu_delay = args.gpu_delay - self.ompthreads = args.ompthreads - - # test run configuration - self.debug = args.debug - self.dry_run = args.dry_run - self.verbose = args.verbose - self.test_root = args.test_root - self.requested_workers = args.workers - self.legate_dir = self._compute_legate_dir(args) - - @property - def env(self) -> EnvDict: - """Custom environment settings used for process exectution.""" - return dict(DEFAULT_PROCESS_ENV) - - @property - def extra_args(self) -> ArgList: - """Extra command-line arguments to pass on to individual test files.""" - return self._extra_args - - @property - def root_dir(self) -> Path: - """Path to the directory containing the tests.""" - if self.test_root: - return Path(self.test_root) - return Path(__file__).parents[2] - - @property - def test_files(self) -> tuple[Path, ...]: - """List of all test files to use for each stage. - - An explicit list of files from the command line will take precedence. - - Otherwise, the files are computed based on command-line options, etc. - - """ - if self.files: - return self.files - - files = [] - - if self.examples: - examples = ( - path.relative_to(self.root_dir) - for path in self.root_dir.joinpath("examples").glob("*.py") - if str(path.relative_to(self.root_dir)) not in SKIPPED_EXAMPLES - ) - files.extend(sorted(examples)) - - if self.integration: - integration_tests = ( - path.relative_to(self.root_dir) - for path in self.root_dir.joinpath("tests/integration").glob( - "*.py" - ) - ) - files.extend(sorted(integration_tests)) - - if self.unit: - unit_tests = ( - path.relative_to(self.root_dir) - for path in self.root_dir.joinpath("tests/unit").glob( - "**/*.py" - ) - ) - files.extend(sorted(unit_tests)) - - return tuple(files) - - @property - def legate_path(self) -> str: - """Computed path to the legate driver script""" - if self.legate_dir is None: - return "legate" - return str(self.legate_dir / "bin" / "legate") - - def _compute_features(self, args: Namespace) -> tuple[FeatureType, ...]: - if args.features is not None: - computed = args.features - else: - computed = [ - feature - for feature in FEATURES - if os.environ.get(f"USE_{feature.upper()}", None) == "1" - ] - - # if nothing is specified any other way, at least run CPU stage - if len(computed) == 0: - computed.append("cpus") - - return tuple(computed) - - def _compute_legate_dir(self, args: Namespace) -> Path: - # self._legate_source below is purely for testing - if args.legate_dir: - self._legate_source = "cmd" - return Path(args.legate_dir) - elif "LEGATE_DIR" in os.environ: - self._legate_source = "env" - return Path(os.environ["LEGATE_DIR"]) - self._legate_source = "install" - return None diff --git a/tests/_utils/logger.py b/tests/_utils/logger.py deleted file mode 100644 index f40904219..000000000 --- a/tests/_utils/logger.py +++ /dev/null @@ -1,67 +0,0 @@ -# Copyright 2022 NVIDIA Corporation -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -"""Provide a basic logger that can scrub ANSI color codes. - -""" -from __future__ import annotations - -import re - -# ref: https://stackoverflow.com/a/14693789 -_ANSI_ESCAPE = re.compile(r"\x1B(?:[@-Z\\-_]|\[[0-?]*[ -/]*[@-~])") - - -class Log: - def __init__(self) -> None: - self._record: list[str] = [] - - def __call__(self, *lines: str) -> tuple[int, int]: - return self.record(*lines) - - def record(self, *lines: str) -> tuple[int, int]: - if len(lines) == 1 and "\n" in lines[0]: - lines = tuple(lines[0].split("\n")) - - start = len(self._record) - for line in lines: - self._record.append(line) - print(line, flush=True) - return (start, len(self._record)) - - def clear(self) -> None: - self._record = [] - - def dump( - self, - *, - start: int = 0, - end: int | None = None, - filter_ansi: bool = True, - ) -> str: - lines = self._record[start:end] - - if filter_ansi: - full_text = _ANSI_ESCAPE.sub("", "\n".join(lines)) - else: - full_text = "\n".join(lines) - - return full_text - - @property - def lines(self) -> tuple[str, ...]: - return tuple(self._record) - - -LOG = Log() diff --git a/tests/_utils/stages/__init__.py b/tests/_utils/stages/__init__.py deleted file mode 100644 index fa8f916d5..000000000 --- a/tests/_utils/stages/__init__.py +++ /dev/null @@ -1,41 +0,0 @@ -# Copyright 2022 NVIDIA Corporation -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -"""Provide TestStage subclasses for running configured test files using -specific features. - -""" -from __future__ import annotations - -import sys -from typing import Dict, Type - -from .. import FeatureType -from .test_stage import TestStage -from .util import log_proc - -if sys.platform == "darwin": - from ._osx import CPU, Eager, GPU, OMP -elif sys.platform.startswith("linux"): - from ._linux import CPU, Eager, GPU, OMP -else: - raise RuntimeError(f"unsupported platform: {sys.platform}") - -#: All the available test stages that can be selected -STAGES: Dict[FeatureType, Type[TestStage]] = { - "cpus": CPU, - "cuda": GPU, - "openmp": OMP, - "eager": Eager, -} diff --git a/tests/_utils/stages/_linux/__init__.py b/tests/_utils/stages/_linux/__init__.py deleted file mode 100644 index 032305f9c..000000000 --- a/tests/_utils/stages/_linux/__init__.py +++ /dev/null @@ -1,24 +0,0 @@ -# Copyright 2022 NVIDIA Corporation -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -"""Provide TestStage subclasses for running configured test files using -specific features on linux platforms. - -""" -from __future__ import annotations - -from .cpu import CPU -from .gpu import GPU -from .eager import Eager -from .omp import OMP diff --git a/tests/_utils/stages/_linux/cpu.py b/tests/_utils/stages/_linux/cpu.py deleted file mode 100644 index 665793081..000000000 --- a/tests/_utils/stages/_linux/cpu.py +++ /dev/null @@ -1,80 +0,0 @@ -# Copyright 2022 NVIDIA Corporation -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -from __future__ import annotations - -from itertools import chain - -from ... import FeatureType -from ...config import Config -from ...system import System -from ...types import ArgList, EnvDict -from ..test_stage import TestStage -from ..util import ( - CUNUMERIC_TEST_ARG, - UNPIN_ENV, - Shard, - StageSpec, - adjust_workers, -) - - -class CPU(TestStage): - """A test stage for exercising CPU features. - - Parameters - ---------- - config: Config - Test runner configuration - - system: System - Process execution wrapper - - """ - - kind: FeatureType = "cpus" - - args = [CUNUMERIC_TEST_ARG] - - def __init__(self, config: Config, system: System) -> None: - self._init(config, system) - - def env(self, config: Config, system: System) -> EnvDict: - return {} if config.cpu_pin == "strict" else dict(UNPIN_ENV) - - def shard_args(self, shard: Shard, config: Config) -> ArgList: - args = [ - "--cpus", - str(config.cpus), - ] - if config.cpu_pin != "none": - args += [ - "--cpu-bind", - ",".join(str(x) for x in shard), - ] - return args - - def compute_spec(self, config: Config, system: System) -> StageSpec: - cpus = system.cpus - - procs = config.cpus + config.utility + int(config.cpu_pin == "strict") - workers = adjust_workers(len(cpus) // procs, config.requested_workers) - - shards: list[tuple[int, ...]] = [] - for i in range(workers): - shard_cpus = range(i * procs, (i + 1) * procs) - shard = chain.from_iterable(cpus[j].ids for j in shard_cpus) - shards.append(tuple(sorted(shard))) - - return StageSpec(workers, shards) diff --git a/tests/_utils/stages/_linux/eager.py b/tests/_utils/stages/_linux/eager.py deleted file mode 100644 index 8e63fc49b..000000000 --- a/tests/_utils/stages/_linux/eager.py +++ /dev/null @@ -1,71 +0,0 @@ -# Copyright 2022 NVIDIA Corporation -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -from __future__ import annotations - -from ... import FeatureType -from ...config import Config -from ...system import System -from ...types import ArgList, EnvDict -from ..test_stage import TestStage -from ..util import Shard, StageSpec, adjust_workers - - -class Eager(TestStage): - """A test stage for exercising Eager Numpy execution features. - - Parameters - ---------- - config: Config - Test runner configuration - - system: System - Process execution wrapper - - """ - - kind: FeatureType = "eager" - - args: ArgList = [] - - def __init__(self, config: Config, system: System) -> None: - self._init(config, system) - - def env(self, config: Config, system: System) -> EnvDict: - # Raise min chunk sizes for deferred codepaths to force eager execution - env = { - "CUNUMERIC_MIN_CPU_CHUNK": "2000000000", - "CUNUMERIC_MIN_OMP_CHUNK": "2000000000", - "CUNUMERIC_MIN_GPU_CHUNK": "2000000000", - } - return env - - def shard_args(self, shard: Shard, config: Config) -> ArgList: - return [ - "--cpus", - "1", - "--cpu-bind", - ",".join(str(x) for x in shard), - ] - - def compute_spec(self, config: Config, system: System) -> StageSpec: - N = len(system.cpus) - - degree = min(N, 60) # ~LEGION_MAX_NUM_PROCS just in case - workers = adjust_workers(degree, config.requested_workers) - - # Just put each worker on its own full CPU for eager tests - shards = [cpu.ids for cpu in system.cpus] - - return StageSpec(workers, shards) diff --git a/tests/_utils/stages/_linux/gpu.py b/tests/_utils/stages/_linux/gpu.py deleted file mode 100644 index 12012a481..000000000 --- a/tests/_utils/stages/_linux/gpu.py +++ /dev/null @@ -1,82 +0,0 @@ -# Copyright 2022 NVIDIA Corporation -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -from __future__ import annotations - -import time - -from ... import FeatureType -from ...config import Config -from ...system import System -from ...types import ArgList, EnvDict -from ..test_stage import TestStage -from ..util import CUNUMERIC_TEST_ARG, Shard, StageSpec, adjust_workers - -BLOAT_FACTOR = 1.5 # hard coded for now - - -class GPU(TestStage): - """A test stage for exercising GPU features. - - Parameters - ---------- - config: Config - Test runner configuration - - system: System - Process execution wrapper - - """ - - kind: FeatureType = "cuda" - - args = [CUNUMERIC_TEST_ARG] - - def __init__(self, config: Config, system: System) -> None: - self._init(config, system) - - def env(self, config: Config, system: System) -> EnvDict: - return {} - - def delay(self, shard: Shard, config: Config, system: System) -> None: - time.sleep(config.gpu_delay / 1000) - - def shard_args(self, shard: Shard, config: Config) -> ArgList: - return [ - "--fbmem", - str(config.fbmem), - "--gpus", - str(len(shard)), - "--gpu-bind", - ",".join(str(x) for x in shard), - ] - - def compute_spec(self, config: Config, system: System) -> StageSpec: - N = len(system.gpus) - degree = N // config.gpus - - fbsize = min(gpu.total for gpu in system.gpus) / (2 << 20) # MB - oversub_factor = int(fbsize // (config.fbmem * BLOAT_FACTOR)) - workers = adjust_workers( - degree * oversub_factor, config.requested_workers - ) - - # https://docs.python.org/3/library/itertools.html#itertools-recipes - # grouper('ABCDEF', 3) --> ABC DEF - args = [iter(range(degree * config.gpus))] * config.gpus - per_worker_shards = list(zip(*args)) - - shards = per_worker_shards * workers - - return StageSpec(workers, shards) diff --git a/tests/_utils/stages/_linux/omp.py b/tests/_utils/stages/_linux/omp.py deleted file mode 100644 index 84a954412..000000000 --- a/tests/_utils/stages/_linux/omp.py +++ /dev/null @@ -1,84 +0,0 @@ -# Copyright 2022 NVIDIA Corporation -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -from __future__ import annotations - -from itertools import chain - -from ... import FeatureType -from ...config import Config -from ...system import System -from ...types import ArgList, EnvDict -from ..test_stage import TestStage -from ..util import ( - CUNUMERIC_TEST_ARG, - UNPIN_ENV, - Shard, - StageSpec, - adjust_workers, -) - - -class OMP(TestStage): - """A test stage for exercising OpenMP features. - - Parameters - ---------- - config: Config - Test runner configuration - - system: System - Process execution wrapper - - """ - - kind: FeatureType = "openmp" - - args = [CUNUMERIC_TEST_ARG] - - def __init__(self, config: Config, system: System) -> None: - self._init(config, system) - - def env(self, config: Config, system: System) -> EnvDict: - return {} if config.cpu_pin == "strict" else dict(UNPIN_ENV) - - def shard_args(self, shard: Shard, config: Config) -> ArgList: - args = [ - "--omps", - str(config.omps), - "--ompthreads", - str(config.ompthreads), - ] - if config.cpu_pin != "none": - args += [ - "--cpu-bind", - ",".join(str(x) for x in shard), - ] - return args - - def compute_spec(self, config: Config, system: System) -> StageSpec: - cpus = system.cpus - omps, threads = config.omps, config.ompthreads - procs = ( - omps * threads + config.utility + int(config.cpu_pin == "strict") - ) - workers = adjust_workers(len(cpus) // procs, config.requested_workers) - - shards: list[tuple[int, ...]] = [] - for i in range(workers): - shard_cpus = range(i * procs, (i + 1) * procs) - shard = chain.from_iterable(cpus[j].ids for j in shard_cpus) - shards.append(tuple(sorted(shard))) - - return StageSpec(workers, shards) diff --git a/tests/_utils/stages/_osx/__init__.py b/tests/_utils/stages/_osx/__init__.py deleted file mode 100644 index 80a7c368d..000000000 --- a/tests/_utils/stages/_osx/__init__.py +++ /dev/null @@ -1,24 +0,0 @@ -# Copyright 2022 NVIDIA Corporation -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -"""Provide TestStage subclasses for running configured test files using -specific features on OSX. - -""" -from __future__ import annotations - -from .cpu import CPU -from .gpu import GPU -from .eager import Eager -from .omp import OMP diff --git a/tests/_utils/stages/_osx/cpu.py b/tests/_utils/stages/_osx/cpu.py deleted file mode 100644 index ec6d23f20..000000000 --- a/tests/_utils/stages/_osx/cpu.py +++ /dev/null @@ -1,64 +0,0 @@ -# Copyright 2022 NVIDIA Corporation -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -from __future__ import annotations - -from ... import FeatureType -from ...config import Config -from ...system import System -from ...types import ArgList, EnvDict -from ..test_stage import TestStage -from ..util import ( - CUNUMERIC_TEST_ARG, - UNPIN_ENV, - Shard, - StageSpec, - adjust_workers, -) - - -class CPU(TestStage): - """A test stage for exercising CPU features. - - Parameters - ---------- - config: Config - Test runner configuration - - system: System - Process execution wrapper - - """ - - kind: FeatureType = "cpus" - - args = [CUNUMERIC_TEST_ARG] - - def __init__(self, config: Config, system: System) -> None: - self._init(config, system) - - def env(self, config: Config, system: System) -> EnvDict: - return UNPIN_ENV - - def shard_args(self, shard: Shard, config: Config) -> ArgList: - return ["--cpus", str(config.cpus)] - - def compute_spec(self, config: Config, system: System) -> StageSpec: - procs = config.cpus + config.utility - workers = adjust_workers( - len(system.cpus) // procs, config.requested_workers - ) - - # return a dummy set of shards just for the runner to iterate over - return StageSpec(workers, [(i,) for i in range(workers)]) diff --git a/tests/_utils/stages/_osx/eager.py b/tests/_utils/stages/_osx/eager.py deleted file mode 100644 index 5cc5d557d..000000000 --- a/tests/_utils/stages/_osx/eager.py +++ /dev/null @@ -1,64 +0,0 @@ -# Copyright 2022 NVIDIA Corporation -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -from __future__ import annotations - -from ... import FeatureType -from ...config import Config -from ...system import System -from ...types import ArgList, EnvDict -from ..test_stage import TestStage -from ..util import UNPIN_ENV, Shard, StageSpec, adjust_workers - - -class Eager(TestStage): - """A test stage for exercising Eager Numpy execution features. - - Parameters - ---------- - config: Config - Test runner configuration - - system: System - Process execution wrapper - - """ - - kind: FeatureType = "eager" - - args: ArgList = [] - - def __init__(self, config: Config, system: System) -> None: - self._init(config, system) - - def env(self, config: Config, system: System) -> EnvDict: - # Raise min chunk sizes for deferred codepaths to force eager execution - env = { - "CUNUMERIC_MIN_CPU_CHUNK": "2000000000", - "CUNUMERIC_MIN_OMP_CHUNK": "2000000000", - "CUNUMERIC_MIN_GPU_CHUNK": "2000000000", - } - env.update(UNPIN_ENV) - return env - - def shard_args(self, shard: Shard, config: Config) -> ArgList: - return ["--cpus", "1"] - - def compute_spec(self, config: Config, system: System) -> StageSpec: - N = len(system.cpus) - degree = min(N, 60) # ~LEGION_MAX_NUM_PROCS just in case - workers = adjust_workers(degree, config.requested_workers) - - # return a dummy set of shards just for the runner to iterate over - return StageSpec(workers, [(i,) for i in range(workers)]) diff --git a/tests/_utils/stages/_osx/gpu.py b/tests/_utils/stages/_osx/gpu.py deleted file mode 100644 index f89fe7377..000000000 --- a/tests/_utils/stages/_osx/gpu.py +++ /dev/null @@ -1,51 +0,0 @@ -# Copyright 2022 NVIDIA Corporation -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -from __future__ import annotations - -import time - -from ... import FeatureType -from ...config import Config -from ...system import System -from ...types import ArgList, EnvDict -from ..test_stage import TestStage -from ..util import CUNUMERIC_TEST_ARG, UNPIN_ENV, Shard - - -class GPU(TestStage): - """A test stage for exercising GPU features. - - Parameters - ---------- - config: Config - Test runner configuration - - system: System - Process execution wrapper - - """ - - kind: FeatureType = "cuda" - - args: ArgList = [CUNUMERIC_TEST_ARG] - - def __init__(self, config: Config, system: System) -> None: - raise RuntimeError("GPU test are not supported on OSX") - - def env(self, config: Config, system: System) -> EnvDict: - return UNPIN_ENV - - def delay(self, shard: Shard, config: Config, system: System) -> None: - time.sleep(config.gpu_delay / 1000) diff --git a/tests/_utils/stages/_osx/omp.py b/tests/_utils/stages/_osx/omp.py deleted file mode 100644 index f5f19194d..000000000 --- a/tests/_utils/stages/_osx/omp.py +++ /dev/null @@ -1,70 +0,0 @@ -# Copyright 2022 NVIDIA Corporation -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -from __future__ import annotations - -from ... import FeatureType -from ...config import Config -from ...system import System -from ...types import ArgList, EnvDict -from ..test_stage import TestStage -from ..util import ( - CUNUMERIC_TEST_ARG, - UNPIN_ENV, - Shard, - StageSpec, - adjust_workers, -) - - -class OMP(TestStage): - """A test stage for exercising OpenMP features. - - Parameters - ---------- - config: Config - Test runner configuration - - system: System - Process execution wrapper - - """ - - kind: FeatureType = "openmp" - - args = [CUNUMERIC_TEST_ARG] - - def __init__(self, config: Config, system: System) -> None: - self._init(config, system) - - def env(self, config: Config, system: System) -> EnvDict: - return UNPIN_ENV - - def shard_args(self, shard: Shard, config: Config) -> ArgList: - return [ - "--omps", - str(config.omps), - "--ompthreads", - str(config.ompthreads), - ] - - def compute_spec(self, config: Config, system: System) -> StageSpec: - omps, threads = config.omps, config.ompthreads - procs = omps * threads + config.utility - workers = adjust_workers( - len(system.cpus) // procs, config.requested_workers - ) - - # return a dummy set of shards just for the runner to iterate over - return StageSpec(workers, [(i,) for i in range(workers)]) diff --git a/tests/_utils/stages/test_stage.py b/tests/_utils/stages/test_stage.py deleted file mode 100644 index 0bfbe4f06..000000000 --- a/tests/_utils/stages/test_stage.py +++ /dev/null @@ -1,265 +0,0 @@ -# Copyright 2022 NVIDIA Corporation -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -from __future__ import annotations - -import multiprocessing -from datetime import datetime -from pathlib import Path - -from typing_extensions import Protocol - -from .. import PER_FILE_ARGS, FeatureType -from ..config import Config -from ..system import ProcessResult, System -from ..types import ArgList, EnvDict -from ..ui import banner, summary, yellow -from .util import Shard, StageResult, StageSpec, log_proc - - -class TestStage(Protocol): - """Encapsulate running configured test files using specific features. - - Parameters - ---------- - config: Config - Test runner configuration - - system: System - Process execution wrapper - - """ - - kind: FeatureType - - #: The computed specification for processes to launch to run the - #: configured test files. - spec: StageSpec - - #: The computed sharding id sets to use for job runs - shards: multiprocessing.Queue[Shard] - - #: After the stage completes, results will be stored here - result: StageResult - - #: Any fixed stage-specific command-line args to pass - args: ArgList - - # --- Protocol methods - - def __init__(self, config: Config, system: System) -> None: - ... - - def env(self, config: Config, system: System) -> EnvDict: - """Generate stage-specific customizations to the process env - - Parameters - ---------- - config: Config - Test runner configuration - - system: System - Process execution wrapper - - """ - ... - - def delay(self, shard: Shard, config: Config, system: System) -> None: - """Wait any delay that should be applied before running the next - test. - - Parameters - ---------- - shard: Shard - The shard to be used for the next test that is run - - config: Config - Test runner configuration - - system: System - Process execution wrapper - - """ - ... - - def shard_args(self, shard: Shard, config: Config) -> ArgList: - """Generate the command line arguments necessary to launch - the next test process on the given shard. - - Parameters - ---------- - shard: Shard - The shard to be used for the next test that is run - - config: Config - Test runner configuration - - """ - ... - - def compute_spec(self, config: Config, system: System) -> StageSpec: - """Compute the number of worker processes to launch and stage shards - to use for running the configured test files. - - Parameters - ---------- - config: Config - Test runner configuration - - system: System - Process execution wrapper - - """ - ... - - # --- Shared implementation methods - - def __call__(self, config: Config, system: System) -> None: - """Execute this test stage. - - Parameters - ---------- - config: Config - Test runner configuration - - system: System - Process execution wrapper - - """ - t0 = datetime.now() - procs = self._launch(config, system) - t1 = datetime.now() - - self.result = StageResult(procs, t1 - t0) - - @property - def name(self) -> str: - """A stage name to display for tests in this stage.""" - return self.__class__.__name__ - - @property - def intro(self) -> str: - """An informative banner to display at stage end.""" - workers = self.spec.workers - workers_text = f"{workers} worker{'s' if workers > 1 else ''}" - return ( - banner(f"Entering stage: {self.name} (with {workers_text})") + "\n" - ) - - @property - def outro(self) -> str: - """An informative banner to display at stage end.""" - total, passed = self.result.total, self.result.passed - - result = summary(self.name, total, passed, self.result.time) - - footer = banner( - f"Exiting stage: {self.name}", - details=( - "* Results : " - + yellow( - f"{passed} / {total} files passed " # noqa E500 - f"({passed/total*100:0.1f}%)" - if total > 0 - else "0 tests are running, Please check " - ), - "* Elapsed time : " + yellow(f"{self.result.time}"), - ), - ) - - return f"{result}\n{footer}" - - def file_args(self, test_file: Path, config: Config) -> ArgList: - """Extra command line arguments based on the test file. - - Parameters - ---------- - test_file : Path - Path to a test file - - config: Config - Test runner configuration - - """ - test_file_string = str(test_file) - args = PER_FILE_ARGS.get(test_file_string, []) - - # These are a bit ugly but necessary in order to make pytest generate - # more verbose output for integration tests when -v, -vv is specified - if "integration" in test_file_string and config.verbose > 0: - args += ["-v"] - if "integration" in test_file_string and config.verbose > 1: - args += ["-s"] - - return args - - def run( - self, test_file: Path, config: Config, system: System - ) -> ProcessResult: - """Execute a single test files with appropriate environment and - command-line options for a feature test stage. - - Parameters - ---------- - test_file : Path - Test file to execute - - config: Config - Test runner configuration - - system: System - Process execution wrapper - - """ - test_path = config.root_dir / test_file - - shard = self.shards.get() - - stage_args = self.args + self.shard_args(shard, config) - file_args = self.file_args(test_file, config) - - cmd = [str(config.legate_path), str(test_path)] - cmd += stage_args + file_args + config.extra_args - - self.delay(shard, config, system) - - result = system.run(cmd, test_file, env=self._env(config, system)) - log_proc(self.name, result, config, verbose=config.verbose) - - self.shards.put(shard) - - return result - - def _env(self, config: Config, system: System) -> EnvDict: - env = dict(config.env) - env.update(self.env(config, system)) - return env - - def _init(self, config: Config, system: System) -> None: - self.spec = self.compute_spec(config, system) - self.shards = system.manager.Queue(len(self.spec.shards)) - for shard in self.spec.shards: - self.shards.put(shard) - - def _launch(self, config: Config, system: System) -> list[ProcessResult]: - - pool = multiprocessing.pool.ThreadPool(self.spec.workers) - - jobs = [ - pool.apply_async(self.run, (path, config, system)) - for path in config.test_files - ] - pool.close() - - return [job.get() for job in jobs] diff --git a/tests/_utils/stages/util.py b/tests/_utils/stages/util.py deleted file mode 100644 index 357474c90..000000000 --- a/tests/_utils/stages/util.py +++ /dev/null @@ -1,115 +0,0 @@ -# Copyright 2022 NVIDIA Corporation -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -from __future__ import annotations - -from dataclasses import dataclass -from datetime import timedelta -from typing import Tuple, Union - -from typing_extensions import TypeAlias - -from ..config import Config -from ..logger import LOG -from ..system import ProcessResult -from ..ui import failed, passed, shell, skipped - -CUNUMERIC_TEST_ARG = "-cunumeric:test" - -UNPIN_ENV = {"REALM_SYNTHETIC_CORE_MAP": ""} - -Shard: TypeAlias = Tuple[int, ...] - - -@dataclass(frozen=True) -class StageSpec: - """Specify the operation of a test run""" - - #: The number of worker processes to start for running tests - workers: int - - # A list of (cpu or gpu) shards to draw on for each test - shards: list[Shard] - - -@dataclass(frozen=True) -class StageResult: - """Collect results from all tests in a TestStage.""" - - #: Individual test process results including return code and stdout. - procs: list[ProcessResult] - - #: Cumulative execution time for all tests in a stage. - time: timedelta - - @property - def total(self) -> int: - """The total number of tests run in this stage.""" - return len(self.procs) - - @property - def passed(self) -> int: - """The number of tests in this stage that passed.""" - return sum(p.returncode == 0 for p in self.procs) - - -def adjust_workers(workers: int, requested_workers: Union[int, None]) -> int: - """Adjust computed workers according to command line requested workers. - - The final number of workers will only be adjusted down by this function. - - Parameters - ---------- - workers: int - The computed number of workers to use - - requested_workers: int | None, optional - Requested number of workers from the user, if supplied (default: None) - - Returns - ------- - int - The number of workers to actually use - - """ - if requested_workers is not None and requested_workers < 0: - raise ValueError("requested workers must be non-negative") - - if requested_workers is not None: - if requested_workers > workers: - raise RuntimeError( - "Requested workers greater than assignable workers" - ) - workers = requested_workers - - if workers == 0: - raise RuntimeError("Current configuration results in zero workers") - - return workers - - -def log_proc( - name: str, proc: ProcessResult, config: Config, *, verbose: bool -) -> None: - """Log a process result according to the current configuration""" - if config.debug or config.dry_run: - LOG(shell(proc.invocation)) - msg = f"({name}) {proc.test_file}" - details = proc.output.split("\n") if verbose else None - if proc.skipped: - LOG(skipped(msg)) - elif proc.returncode == 0: - LOG(passed(msg, details=details)) - else: - LOG(failed(msg, details=details)) diff --git a/tests/_utils/system.py b/tests/_utils/system.py deleted file mode 100644 index 71411b45b..000000000 --- a/tests/_utils/system.py +++ /dev/null @@ -1,170 +0,0 @@ -# Copyright 2022 NVIDIA Corporation -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -"""Provide a System class to encapsulate process execution and reporting -system information (number of CPUs present, etc). - -""" -from __future__ import annotations - -import multiprocessing -import os -import sys -from dataclasses import dataclass -from functools import cached_property -from pathlib import Path -from subprocess import PIPE, STDOUT, run as stdlib_run -from typing import Sequence - -from .types import CPUInfo, EnvDict, GPUInfo - - -@dataclass -class ProcessResult: - - #: The command invovation, including relevant environment vars - invocation: str - - # User-friendly test file path to use in reported output - test_file: Path - - #: Whether this process was actually invoked - skipped: bool = False - - #: The returncode from the process - returncode: int = 0 - - #: The collected stdout and stderr output from the process - output: str = "" - - -class System: - """A facade class for system-related functions. - - Parameters - ---------- - dry_run : bool, optional - If True, no commands will be executed, but a log of any commands - submitted to ``run`` will be made. (default: False) - - """ - - def __init__( - self, - *, - dry_run: bool = False, - ) -> None: - self.manager = multiprocessing.Manager() - self.dry_run: bool = dry_run - - def run( - self, - cmd: Sequence[str], - test_file: Path, - *, - env: EnvDict | None = None, - cwd: str | None = None, - ) -> ProcessResult: - """Wrapper for subprocess.run that encapsulates logging. - - Parameters - ---------- - cmd : sequence of str - The command to run, split on whitespace into a sequence - of strings - - test_file : Path - User-friendly test file path to use in reported output - - env : dict[str, str] or None, optional, default: None - Environment variables to apply when running the command - - cwd: str or None, optional, default: None - A current working directory to pass to stdlib ``run``. - - """ - - env = env or {} - - envstr = ( - " ".join(f"{k}={v}" for k, v in env.items()) - + min(len(env), 1) * " " - ) - - invocation = envstr + " ".join(cmd) - - if self.dry_run: - return ProcessResult(invocation, test_file, skipped=True) - - full_env = dict(os.environ) - full_env.update(env) - - proc = stdlib_run( - cmd, cwd=cwd, env=full_env, stdout=PIPE, stderr=STDOUT, text=True - ) - - return ProcessResult( - invocation, - test_file, - returncode=proc.returncode, - output=proc.stdout, - ) - - @cached_property - def cpus(self) -> tuple[CPUInfo, ...]: - """A list of CPUs on the system.""" - - N = multiprocessing.cpu_count() - - if sys.platform == "darwin": - return tuple(CPUInfo((i,)) for i in range(N)) - - sibling_sets: set[tuple[int, ...]] = set() - for i in range(N): - line = open( - f"/sys/devices/system/cpu/cpu{i}/topology/thread_siblings_list" - ).read() - sibling_sets.add( - tuple(sorted(int(x) for x in line.strip().split(","))) - ) - return tuple(CPUInfo(siblings) for siblings in sorted(sibling_sets)) - - @cached_property - def gpus(self) -> tuple[GPUInfo, ...]: - """A list of GPUs on the system, including total memory information.""" - - try: - # This pynvml import is protected inside this method so that in - # case pynvml is not installed, tests stages that don't need gpu - # info (e.g. cpus, eager) will proceed unaffected. Test stages - # that do require gpu info will fail here with an ImportError. - import pynvml # type: ignore[import] - - # Also a pynvml package is available on some platforms that won't - # have GPUs for some reason. In which case this init call will - # fail. - pynvml.nvmlInit() - except Exception: - return () - - num_gpus = pynvml.nvmlDeviceGetCount() - - results = [] - for i in range(num_gpus): - info = pynvml.nvmlDeviceGetMemoryInfo( - pynvml.nvmlDeviceGetHandleByIndex(i) - ) - results.append(GPUInfo(i, info.total)) - - return tuple(results) diff --git a/tests/_utils/test_plan.py b/tests/_utils/test_plan.py deleted file mode 100644 index 9e2a92532..000000000 --- a/tests/_utils/test_plan.py +++ /dev/null @@ -1,131 +0,0 @@ -# Copyright 2022 NVIDIA Corporation -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -"""Provide a TestPlan class to coordinate multiple feature test stages. - -""" -from __future__ import annotations - -from datetime import timedelta -from itertools import chain - -from .config import Config -from .logger import LOG -from .stages import STAGES, log_proc -from .system import System -from .ui import banner, rule, summary, yellow - - -class TestPlan: - """Encapsulate an entire test run with multiple feature test stages. - - Parameters - ---------- - config: Config - Test runner configuration - - system: System - Process execution wrapper - - """ - - def __init__(self, config: Config, system: System) -> None: - self._config = config - self._system = system - self._stages = [ - STAGES[feature](config, system) for feature in config.features - ] - - def execute(self) -> int: - """Execute the entire test run with all configured feature stages.""" - LOG.clear() - - LOG(self.intro) - - for stage in self._stages: - LOG(stage.intro) - stage(self._config, self._system) - LOG(stage.outro) - - all_procs = tuple( - chain.from_iterable(s.result.procs for s in self._stages) - ) - total = len(all_procs) - passed = sum(proc.returncode == 0 for proc in all_procs) - - LOG(f"\n{rule()}") - - self._log_failures(total, passed) - - LOG(self.outro(total, passed)) - - return int((total - passed) > 0) - - @property - def intro(self) -> str: - """An informative banner to display at test run start.""" - - cpus = len(self._system.cpus) - try: - gpus = len(self._system.gpus) - except ImportError: - gpus = 0 - - details = ( - f"* Feature stages : {', '.join(yellow(x) for x in self._config.features)}", # noqa E501 - f"* Test files per stage : {yellow(str(len(self._config.test_files)))}", # noqa E501 - f"* System description : {yellow(str(cpus) + ' cpus')} / {yellow(str(gpus) + ' gpus')}", # noqa E501 - ) - return banner("Test Suite Configuration", details=details) - - def outro(self, total: int, passed: int) -> str: - """An informative banner to display at test run end. - - Parameters - ---------- - total: int - Number of total tests that ran in all stages - - passed: int - Number of tests that passed in all stages - - """ - details = [ - f"* {s.name: <6}: " - + yellow( - f"{s.result.passed} / {s.result.total} passed in {s.result.time.total_seconds():0.2f}s" # noqa E501 - ) - for s in self._stages - ] - - time = sum((s.result.time for s in self._stages), timedelta(0, 0)) - details.append("") - details.append( - summary("All tests", total, passed, time, justify=False) - ) - - overall = banner("Overall summary", details=details) - - return f"{overall}\n" - - def _log_failures(self, total: int, passed: int) -> None: - if total == passed: - return - - LOG(f"{banner('FAILURES')}\n") - - for stage in self._stages: - procs = (proc for proc in stage.result.procs if proc.returncode) - for proc in procs: - log_proc(stage.name, proc, self._config, verbose=True) diff --git a/tests/_utils/tests/__init__.py b/tests/_utils/tests/__init__.py deleted file mode 100644 index f0b271624..000000000 --- a/tests/_utils/tests/__init__.py +++ /dev/null @@ -1,15 +0,0 @@ -# Copyright 2022 NVIDIA Corporation -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -from __future__ import annotations diff --git a/tests/_utils/tests/stages/__init__.py b/tests/_utils/tests/stages/__init__.py deleted file mode 100644 index 6e3992dc1..000000000 --- a/tests/_utils/tests/stages/__init__.py +++ /dev/null @@ -1,38 +0,0 @@ -# Copyright 2022 NVIDIA Corporation -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -from __future__ import annotations - -from typing import Any - -from ...system import System -from ...types import CPUInfo, GPUInfo - - -class FakeSystem(System): - def __init__( - self, cpus: int = 6, gpus: int = 6, fbmem: int = 6 << 32, **kwargs: Any - ) -> None: - self._cpus = cpus - self._gpus = gpus - self._fbmem = fbmem - super().__init__(**kwargs) - - @property - def cpus(self) -> tuple[CPUInfo, ...]: - return tuple(CPUInfo((i,)) for i in range(self._cpus)) - - @property - def gpus(self) -> tuple[GPUInfo, ...]: - return tuple(GPUInfo(i, self._fbmem) for i in range(self._gpus)) diff --git a/tests/_utils/tests/stages/_linux/__init__.py b/tests/_utils/tests/stages/_linux/__init__.py deleted file mode 100644 index 345983919..000000000 --- a/tests/_utils/tests/stages/_linux/__init__.py +++ /dev/null @@ -1,22 +0,0 @@ -# Copyright 2022 NVIDIA Corporation -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -from __future__ import annotations - -import sys - -import pytest - -if sys.platform != "linux": - pytestmark = pytest.mark.skip() diff --git a/tests/_utils/tests/stages/_linux/test_cpu.py b/tests/_utils/tests/stages/_linux/test_cpu.py deleted file mode 100644 index cc2825c31..000000000 --- a/tests/_utils/tests/stages/_linux/test_cpu.py +++ /dev/null @@ -1,131 +0,0 @@ -# Copyright 2022 NVIDIA Corporation -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -"""Consolidate test configuration from command-line and environment. - -""" -from __future__ import annotations - -import pytest - -from ....config import Config -from ....stages._linux import cpu as m -from ....stages.util import UNPIN_ENV -from .. import FakeSystem - - -def test_default() -> None: - c = Config([]) - s = FakeSystem(cpus=12) - stage = m.CPU(c, s) - assert stage.kind == "cpus" - assert stage.args == ["-cunumeric:test"] - assert stage.env(c, s) == UNPIN_ENV - assert stage.spec.workers > 0 - - shard = (1, 2, 3) - assert "--cpu-bind" in stage.shard_args(shard, c) - - -def test_cpu_pin_strict() -> None: - c = Config(["test.py", "--cpu-pin", "strict"]) - s = FakeSystem(cpus=12) - stage = m.CPU(c, s) - assert stage.kind == "cpus" - assert stage.args == ["-cunumeric:test"] - assert stage.env(c, s) == {} - assert stage.spec.workers > 0 - - shard = (1, 2, 3) - assert "--cpu-bind" in stage.shard_args(shard, c) - - -def test_cpu_pin_none() -> None: - c = Config(["test.py", "--cpu-pin", "none"]) - s = FakeSystem(cpus=12) - stage = m.CPU(c, s) - assert stage.kind == "cpus" - assert stage.args == ["-cunumeric:test"] - assert stage.env(c, s) == UNPIN_ENV - assert stage.spec.workers > 0 - - shard = (1, 2, 3) - assert "--cpu-bind" not in stage.shard_args(shard, c) - - -@pytest.mark.parametrize("shard,expected", [[(2,), "2"], [(1, 2, 3), "1,2,3"]]) -def test_shard_args(shard: tuple[int, ...], expected: str) -> None: - c = Config([]) - s = FakeSystem() - stage = m.CPU(c, s) - result = stage.shard_args(shard, c) - assert result == ["--cpus", f"{c.cpus}", "--cpu-bind", expected] - - -def test_spec_with_cpus_1() -> None: - c = Config(["test.py", "--cpus", "1"]) - s = FakeSystem() - stage = m.CPU(c, s) - assert stage.spec.workers == 3 - assert stage.spec.shards == [(0, 1), (2, 3), (4, 5)] - - -def test_spec_with_cpus_2() -> None: - c = Config(["test.py", "--cpus", "2"]) - s = FakeSystem() - stage = m.CPU(c, s) - assert stage.spec.workers == 2 - assert stage.spec.shards == [(0, 1, 2), (3, 4, 5)] - - -def test_spec_with_utility() -> None: - c = Config(["test.py", "--cpus", "1", "--utility", "2"]) - s = FakeSystem() - stage = m.CPU(c, s) - assert stage.spec.workers == 2 - assert stage.spec.shards == [(0, 1, 2), (3, 4, 5)] - - -def test_spec_with_requested_workers() -> None: - c = Config(["test.py", "--cpus", "1", "-j", "2"]) - s = FakeSystem() - stage = m.CPU(c, s) - assert stage.spec.workers == 2 - assert stage.spec.shards == [(0, 1), (2, 3)] - - -def test_spec_with_requested_workers_zero() -> None: - s = FakeSystem() - c = Config(["test.py", "-j", "0"]) - assert c.requested_workers == 0 - with pytest.raises(RuntimeError): - m.CPU(c, s) - - -def test_spec_with_requested_workers_bad() -> None: - s = FakeSystem() - c = Config(["test.py", "-j", f"{len(s.cpus)+1}"]) - assert c.requested_workers > len(s.cpus) - with pytest.raises(RuntimeError): - m.CPU(c, s) - - -def test_spec_with_verbose() -> None: - args = ["test.py", "--cpus", "2"] - c = Config(args) - cv = Config(args + ["--verbose"]) - s = FakeSystem() - - spec, vspec = m.CPU(c, s).spec, m.CPU(cv, s).spec - assert vspec == spec diff --git a/tests/_utils/tests/stages/_linux/test_eager.py b/tests/_utils/tests/stages/_linux/test_eager.py deleted file mode 100644 index 8fc21ecb6..000000000 --- a/tests/_utils/tests/stages/_linux/test_eager.py +++ /dev/null @@ -1,81 +0,0 @@ -# Copyright 2022 NVIDIA Corporation -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -"""Consolidate test configuration from command-line and environment. - -""" -from __future__ import annotations - -import pytest - -from ....config import Config -from ....stages._linux import eager as m -from .. import FakeSystem - - -def test_default() -> None: - c = Config([]) - s = FakeSystem() - stage = m.Eager(c, s) - assert stage.kind == "eager" - assert stage.args == [] - assert stage.env(c, s) == { - "CUNUMERIC_MIN_CPU_CHUNK": "2000000000", - "CUNUMERIC_MIN_OMP_CHUNK": "2000000000", - "CUNUMERIC_MIN_GPU_CHUNK": "2000000000", - } - assert stage.spec.workers > 0 - - -@pytest.mark.parametrize("shard,expected", [[(2,), "2"], [(1, 2, 3), "1,2,3"]]) -def test_shard_args(shard: tuple[int, ...], expected: str) -> None: - c = Config([]) - s = FakeSystem() - stage = m.Eager(c, s) - result = stage.shard_args(shard, c) - assert result == ["--cpus", "1", "--cpu-bind", expected] - - -def test_spec() -> None: - c = Config([]) - s = FakeSystem() - stage = m.Eager(c, s) - assert stage.spec.workers == len(s.cpus) - # [cpu.ids for cpu in system.cpus] - assert stage.spec.shards == [(i,) for i in range(stage.spec.workers)] - - -def test_spec_with_requested_workers_zero() -> None: - s = FakeSystem() - c = Config(["test.py", "-j", "0"]) - assert c.requested_workers == 0 - with pytest.raises(RuntimeError): - m.Eager(c, s) - - -def test_spec_with_requested_workers_bad() -> None: - s = FakeSystem() - c = Config(["test.py", "-j", f"{len(s.cpus)+1}"]) - assert c.requested_workers > len(s.cpus) - with pytest.raises(RuntimeError): - m.Eager(c, s) - - -def test_spec_with_verbose() -> None: - c = Config(["test.py"]) - cv = Config(["test.py", "--verbose"]) - s = FakeSystem() - - spec, vspec = m.Eager(c, s).spec, m.Eager(cv, s).spec - assert vspec == spec diff --git a/tests/_utils/tests/stages/_linux/test_gpu.py b/tests/_utils/tests/stages/_linux/test_gpu.py deleted file mode 100644 index 13c7bb836..000000000 --- a/tests/_utils/tests/stages/_linux/test_gpu.py +++ /dev/null @@ -1,100 +0,0 @@ -# Copyright 2022 NVIDIA Corporation -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -"""Consolidate test configuration from command-line and environment. - -""" -from __future__ import annotations - -import pytest - -from ....config import Config -from ....stages._linux import gpu as m -from .. import FakeSystem - - -def test_default() -> None: - c = Config([]) - s = FakeSystem() - stage = m.GPU(c, s) - assert stage.kind == "cuda" - assert stage.args == ["-cunumeric:test"] - assert stage.env(c, s) == {} - assert stage.spec.workers > 0 - - -@pytest.mark.parametrize("shard,expected", [[(2,), "2"], [(1, 2, 3), "1,2,3"]]) -def test_shard_args(shard: tuple[int, ...], expected: str) -> None: - c = Config([]) - s = FakeSystem() - stage = m.GPU(c, s) - result = stage.shard_args(shard, c) - assert result == [ - "--fbmem", - "4096", - "--gpus", - f"{len(shard)}", - "--gpu-bind", - expected, - ] - - -def test_spec_with_gpus_1() -> None: - c = Config(["test.py", "--gpus", "1"]) - s = FakeSystem() - stage = m.GPU(c, s) - assert stage.spec.workers == 12 - assert stage.spec.shards == [(0,), (1,), (2,), (3,), (4,), (5,)] * 12 - - -def test_spec_with_gpus_2() -> None: - c = Config(["test.py", "--gpus", "2"]) - s = FakeSystem() - stage = m.GPU(c, s) - assert stage.spec.workers == 6 - assert stage.spec.shards == [(0, 1), (2, 3), (4, 5)] * 6 - - -def test_spec_with_requested_workers() -> None: - c = Config(["test.py", "--gpus", "1", "-j", "2"]) - s = FakeSystem() - stage = m.GPU(c, s) - assert stage.spec.workers == 2 - assert stage.spec.shards == [(0,), (1,), (2,), (3,), (4,), (5,)] * 2 - - -def test_spec_with_requested_workers_zero() -> None: - s = FakeSystem() - c = Config(["test.py", "-j", "0"]) - assert c.requested_workers == 0 - with pytest.raises(RuntimeError): - m.GPU(c, s) - - -def test_spec_with_requested_workers_bad() -> None: - s = FakeSystem() - c = Config(["test.py", "-j", f"{len(s.gpus)+100}"]) - assert c.requested_workers > len(s.gpus) - with pytest.raises(RuntimeError): - m.GPU(c, s) - - -def test_spec_with_verbose() -> None: - args = ["test.py", "--gpus", "2"] - c = Config(args) - cv = Config(args + ["--verbose"]) - s = FakeSystem() - - spec, vspec = m.GPU(c, s).spec, m.GPU(cv, s).spec - assert vspec == spec diff --git a/tests/_utils/tests/stages/_linux/test_omp.py b/tests/_utils/tests/stages/_linux/test_omp.py deleted file mode 100644 index fd836759e..000000000 --- a/tests/_utils/tests/stages/_linux/test_omp.py +++ /dev/null @@ -1,163 +0,0 @@ -# Copyright 2022 NVIDIA Corporation -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -"""Consolidate test configuration from command-line and environment. - -""" -from __future__ import annotations - -import pytest - -from ....config import Config -from ....stages._linux import omp as m -from ....stages.util import UNPIN_ENV -from .. import FakeSystem - - -def test_default() -> None: - c = Config([]) - s = FakeSystem(cpus=12) - stage = m.OMP(c, s) - assert stage.kind == "openmp" - assert stage.args == ["-cunumeric:test"] - assert stage.env(c, s) == UNPIN_ENV - assert stage.spec.workers > 0 - - shard = (1, 2, 3) - assert "--cpu-bind" in stage.shard_args(shard, c) - - -def test_cpu_pin_strict() -> None: - c = Config(["test.py", "--cpu-pin", "strict"]) - s = FakeSystem(cpus=12) - stage = m.OMP(c, s) - assert stage.kind == "openmp" - assert stage.args == ["-cunumeric:test"] - assert stage.env(c, s) == {} - assert stage.spec.workers > 0 - - shard = (1, 2, 3) - assert "--cpu-bind" in stage.shard_args(shard, c) - - -def test_cpu_pin_none() -> None: - c = Config(["test.py", "--cpu-pin", "none"]) - s = FakeSystem(cpus=12) - stage = m.OMP(c, s) - assert stage.kind == "openmp" - assert stage.args == ["-cunumeric:test"] - assert stage.env(c, s) == UNPIN_ENV - assert stage.spec.workers > 0 - - shard = (1, 2, 3) - assert "--cpu-bind" not in stage.shard_args(shard, c) - - -@pytest.mark.parametrize("shard,expected", [[(2,), "2"], [(1, 2, 3), "1,2,3"]]) -def test_shard_args(shard: tuple[int, ...], expected: str) -> None: - c = Config([]) - s = FakeSystem(cpus=12) - stage = m.OMP(c, s) - result = stage.shard_args(shard, c) - assert result == [ - "--omps", - f"{c.omps}", - "--ompthreads", - f"{c.ompthreads}", - "--cpu-bind", - expected, - ] - - -def test_spec_with_omps_1_threads_1() -> None: - c = Config(["test.py", "--omps", "1", "--ompthreads", "1"]) - s = FakeSystem(cpus=12) - stage = m.OMP(c, s) - assert stage.spec.workers == 6 - assert stage.spec.shards == [ - (0, 1), - (2, 3), - (4, 5), - (6, 7), - (8, 9), - (10, 11), - ] - - -def test_spec_with_omps_1_threads_2() -> None: - c = Config(["test.py", "--omps", "1", "--ompthreads", "2"]) - s = FakeSystem(cpus=12) - stage = m.OMP(c, s) - assert stage.spec.workers == 4 - assert stage.spec.shards == [(0, 1, 2), (3, 4, 5), (6, 7, 8), (9, 10, 11)] - - -def test_spec_with_omps_2_threads_1() -> None: - c = Config(["test.py", "--omps", "2", "--ompthreads", "1"]) - s = FakeSystem(cpus=12) - stage = m.OMP(c, s) - assert stage.spec.workers == 4 - assert stage.spec.shards == [(0, 1, 2), (3, 4, 5), (6, 7, 8), (9, 10, 11)] - - -def test_spec_with_omps_2_threads_2() -> None: - c = Config(["test.py", "--omps", "2", "--ompthreads", "2"]) - s = FakeSystem(cpus=12) - stage = m.OMP(c, s) - assert stage.spec.workers == 2 - assert stage.spec.shards == [(0, 1, 2, 3, 4), (5, 6, 7, 8, 9)] - - -def test_spec_with_utility() -> None: - c = Config( - ["test.py", "--omps", "2", "--ompthreads", "2", "--utility", "3"] - ) - s = FakeSystem(cpus=12) - stage = m.OMP(c, s) - assert stage.spec.workers == 1 - assert stage.spec.shards == [(0, 1, 2, 3, 4, 5, 6)] - - -def test_spec_with_requested_workers() -> None: - c = Config(["test.py", "--omps", "1", "--ompthreads", "1", "-j", "2"]) - s = FakeSystem(cpus=12) - stage = m.OMP(c, s) - assert stage.spec.workers == 2 - assert stage.spec.shards == [(0, 1), (2, 3)] - - -def test_spec_with_requested_workers_zero() -> None: - s = FakeSystem(cpus=12) - c = Config(["test.py", "-j", "0"]) - assert c.requested_workers == 0 - with pytest.raises(RuntimeError): - m.OMP(c, s) - - -def test_spec_with_requested_workers_bad() -> None: - s = FakeSystem(cpus=12) - c = Config(["test.py", "-j", f"{len(s.cpus)+1}"]) - assert c.requested_workers > len(s.cpus) - with pytest.raises(RuntimeError): - m.OMP(c, s) - - -def test_spec_with_verbose() -> None: - args = ["test.py", "--cpus", "2"] - c = Config(args) - cv = Config(args + ["--verbose"]) - s = FakeSystem(cpus=12) - - spec, vspec = m.OMP(c, s).spec, m.OMP(cv, s).spec - assert vspec == spec diff --git a/tests/_utils/tests/stages/test_test_stage.py b/tests/_utils/tests/stages/test_test_stage.py deleted file mode 100644 index 393ac18bc..000000000 --- a/tests/_utils/tests/stages/test_test_stage.py +++ /dev/null @@ -1,87 +0,0 @@ -# Copyright 2022 NVIDIA Corporation -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -"""Consolidate test configuration from command-line and environment. - -""" -from __future__ import annotations - -from datetime import timedelta -from pathlib import Path - -from ... import FeatureType -from ...config import Config -from ...stages import test_stage as m -from ...stages.util import StageResult, StageSpec -from ...system import ProcessResult, System -from . import FakeSystem - -s = FakeSystem() - - -class MockTestStage(m.TestStage): - - kind: FeatureType = "eager" - - name = "mock" - - args = ["-foo", "-bar"] - - def __init__(self, config: Config, system: System) -> None: - self._init(config, system) - - def compute_spec(self, config: Config, system: System) -> StageSpec: - return StageSpec(2, [(0,), (1,), (2,)]) - - -class TestTestStage: - def test_name(self) -> None: - c = Config([]) - stage = MockTestStage(c, s) - assert stage.name == "mock" - - def test_intro(self) -> None: - c = Config([]) - stage = MockTestStage(c, s) - assert "Entering stage: mock" in stage.intro - - def test_outro(self) -> None: - c = Config([]) - stage = MockTestStage(c, s) - stage.result = StageResult( - [ProcessResult("invoke", Path("test/file"))], - timedelta(seconds=2.123), - ) - outro = stage.outro - assert "Exiting stage: mock" in outro - assert "Passed 1 of 1 tests (100.0%)" in outro - assert "2.123" in outro - - def test_file_args_default(self) -> None: - c = Config([]) - stage = MockTestStage(c, s) - assert stage.file_args(Path("integration/foo"), c) == [] - assert stage.file_args(Path("unit/foo"), c) == [] - - def test_file_args_v(self) -> None: - c = Config(["test.py", "-v"]) - stage = MockTestStage(c, s) - assert stage.file_args(Path("integration/foo"), c) == ["-v"] - assert stage.file_args(Path("unit/foo"), c) == [] - - def test_file_args_vv(self) -> None: - c = Config(["test.py", "-vv"]) - stage = MockTestStage(c, s) - assert stage.file_args(Path("integration/foo"), c) == ["-v", "-s"] - assert stage.file_args(Path("unit/foo"), c) == [] diff --git a/tests/_utils/tests/stages/test_util.py b/tests/_utils/tests/stages/test_util.py deleted file mode 100644 index 7d9dfe143..000000000 --- a/tests/_utils/tests/stages/test_util.py +++ /dev/null @@ -1,48 +0,0 @@ -# Copyright 2022 NVIDIA Corporation -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -"""Consolidate test configuration from command-line and environment. - -""" -from __future__ import annotations - -import pytest - -from ...stages import util as m - - -class Test_adjust_workers: - @pytest.mark.parametrize("n", (1, 5, 100)) - def test_None_requested(self, n: int) -> None: - assert m.adjust_workers(n, None) == n - - @pytest.mark.parametrize("n", (1, 2, 9)) - def test_requested(self, n: int) -> None: - assert m.adjust_workers(10, n) == n - - def test_negative_requested(self) -> None: - with pytest.raises(ValueError): - assert m.adjust_workers(10, -1) - - def test_zero_requested(self) -> None: - with pytest.raises(RuntimeError): - assert m.adjust_workers(10, 0) - - def test_zero_computed(self) -> None: - with pytest.raises(RuntimeError): - assert m.adjust_workers(0, None) - - def test_requested_too_large(self) -> None: - with pytest.raises(RuntimeError): - assert m.adjust_workers(10, 11) diff --git a/tests/_utils/tests/test___init__.py b/tests/_utils/tests/test___init__.py deleted file mode 100644 index 393f5d7bc..000000000 --- a/tests/_utils/tests/test___init__.py +++ /dev/null @@ -1,73 +0,0 @@ -# Copyright 2022 NVIDIA Corporation -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -"""Consolidate test configuration from command-line and environment. - -""" -from __future__ import annotations - -from .. import ( - DEFAULT_CPUS_PER_NODE, - DEFAULT_GPU_DELAY, - DEFAULT_GPU_MEMORY_BUDGET, - DEFAULT_GPUS_PER_NODE, - DEFAULT_OMPS_PER_NODE, - DEFAULT_OMPTHREADS, - DEFAULT_PROCESS_ENV, - FEATURES, - PER_FILE_ARGS, - SKIPPED_EXAMPLES, - UI_WIDTH, -) - - -class TestConsts: - def test_DEFAULT_CPUS_PER_NODE(self) -> None: - assert DEFAULT_CPUS_PER_NODE == 4 - - def test_DEFAULT_GPUS_PER_NODE(self) -> None: - assert DEFAULT_GPUS_PER_NODE == 1 - - def test_DEFAULT_GPU_DELAY(self) -> None: - assert DEFAULT_GPU_DELAY == 2000 - - def test_DEFAULT_GPU_MEMORY_BUDGET(self) -> None: - assert DEFAULT_GPU_MEMORY_BUDGET == 4096 - - def test_DEFAULT_OMPS_PER_NODE(self) -> None: - assert DEFAULT_OMPS_PER_NODE == 1 - - def test_DEFAULT_OMPTHREADS(self) -> None: - assert DEFAULT_OMPTHREADS == 4 - - def test_DEFAULT_PROCESS_ENV(self) -> None: - assert DEFAULT_PROCESS_ENV == { - "LEGATE_TEST": "1", - } - - def test_UI_WIDTH(self) -> None: - assert UI_WIDTH == 65 - - def test_FEATURES(self) -> None: - assert FEATURES == ("cpus", "cuda", "eager", "openmp") - - def test_SKIPPED_EXAMPLES(self) -> None: - assert isinstance(SKIPPED_EXAMPLES, set) - assert all(isinstance(x, str) for x in SKIPPED_EXAMPLES) - assert all(x.startswith("examples") for x in SKIPPED_EXAMPLES) - - def test_PER_FILE_ARGS(self) -> None: - assert isinstance(PER_FILE_ARGS, dict) - assert all(isinstance(x, str) for x in PER_FILE_ARGS.keys()) - assert all(isinstance(x, list) for x in PER_FILE_ARGS.values()) diff --git a/tests/_utils/tests/test_args.py b/tests/_utils/tests/test_args.py deleted file mode 100644 index 1f17a9bdb..000000000 --- a/tests/_utils/tests/test_args.py +++ /dev/null @@ -1,132 +0,0 @@ -# Copyright 2022 NVIDIA Corporation -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -"""Consolidate test configuration from command-line and environment. - -""" -from __future__ import annotations - -from itertools import chain, combinations -from typing import Iterable, TypeVar - -import pytest - -from .. import ( - DEFAULT_CPUS_PER_NODE, - DEFAULT_GPU_DELAY, - DEFAULT_GPU_MEMORY_BUDGET, - DEFAULT_GPUS_PER_NODE, - DEFAULT_OMPS_PER_NODE, - DEFAULT_OMPTHREADS, - args as m, -) - -T = TypeVar("T") - - -# https://docs.python.org/3/library/itertools.html#itertools-recipes -def powerset(iterable: Iterable[T]) -> Iterable[Iterable[T]]: - xs = list(iterable) - return chain.from_iterable(combinations(xs, n) for n in range(len(xs) + 1)) - - -class TestParserDefaults: - def test_featurs(self) -> None: - assert m.parser.get_default("features") is None - - def test_files(self) -> None: - assert m.parser.get_default("files") is None - - def test_unit(self) -> None: - assert m.parser.get_default("unit") is False - - def test_cpus(self) -> None: - assert m.parser.get_default("cpus") == DEFAULT_CPUS_PER_NODE - - def test_gpus(self) -> None: - assert m.parser.get_default("gpus") == DEFAULT_GPUS_PER_NODE - - def test_cpu_pin(self) -> None: - assert m.parser.get_default("cpu_pin") == "partial" - - def test_gpu_delay(self) -> None: - assert m.parser.get_default("gpu_delay") == DEFAULT_GPU_DELAY - - def test_fbmem(self) -> None: - assert m.parser.get_default("fbmem") == DEFAULT_GPU_MEMORY_BUDGET - - def test_omps(self) -> None: - assert m.parser.get_default("omps") == DEFAULT_OMPS_PER_NODE - - def test_ompthreads(self) -> None: - assert m.parser.get_default("ompthreads") == DEFAULT_OMPTHREADS - - def test_legate_dir(self) -> None: - assert m.parser.get_default("legate_dir") is None - - def test_test_root(self) -> None: - assert m.parser.get_default("test_root") is None - - def test_workers(self) -> None: - assert m.parser.get_default("workers") is None - - def test_verbose(self) -> None: - assert m.parser.get_default("verbose") == 0 - - def test_dry_run(self) -> None: - assert m.parser.get_default("dry_run") is False - - def test_debug(self) -> None: - assert m.parser.get_default("debug") is False - - -class TestParserConfig: - def test_parser_epilog(self) -> None: - assert ( - m.parser.epilog - == "Any extra arguments will be forwarded to the Legate script" - ) - - def test_parser_description(self) -> None: - assert m.parser.description == "Run the Cunumeric test suite" - - -class TestMultipleChoices: - @pytest.mark.parametrize("choices", ([1, 2, 3], range(4), ("a", "b"))) - def test_init(self, choices: Iterable[T]) -> None: - mc = m.MultipleChoices(choices) - assert mc.choices == set(choices) - - def test_contains_item(self) -> None: - choices = [1, 2, 3] - mc = m.MultipleChoices(choices) - for item in choices: - assert item in mc - - def test_contains_subset(self) -> None: - choices = [1, 2, 3] - mc = m.MultipleChoices(choices) - for subset in powerset(choices): - assert subset in mc - - def test_iter(self) -> None: - choices = [1, 2, 3] - mc = m.MultipleChoices(choices) - assert list(mc) == choices - - -# Testing this directly would require getting into argparse -# internals. See test_config.py for indirect tests with --use -class TestExtendAction: - pass diff --git a/tests/_utils/tests/test_config.py b/tests/_utils/tests/test_config.py deleted file mode 100644 index 76f71d7e7..000000000 --- a/tests/_utils/tests/test_config.py +++ /dev/null @@ -1,177 +0,0 @@ -# Copyright 2022 NVIDIA Corporation -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -"""Consolidate test configuration from command-line and environment. - -""" -from __future__ import annotations - -from pathlib import Path, PurePath - -import pytest - -from .. import ( - DEFAULT_CPUS_PER_NODE, - DEFAULT_GPU_DELAY, - DEFAULT_GPU_MEMORY_BUDGET, - DEFAULT_GPUS_PER_NODE, - DEFAULT_OMPS_PER_NODE, - DEFAULT_OMPTHREADS, - FEATURES, - config as m, -) -from ..args import PIN_OPTIONS, PinOptionsType - - -class TestConfig: - def test_default_init(self) -> None: - c = m.Config([]) - - assert c.examples is True - assert c.integration is True - assert c.unit is False - assert c.files is None - - assert c.features == ("cpus",) - - assert c.cpus == DEFAULT_CPUS_PER_NODE - assert c.gpus == DEFAULT_GPUS_PER_NODE - assert c.cpu_pin == "partial" - assert c.gpu_delay == DEFAULT_GPU_DELAY - assert c.fbmem == DEFAULT_GPU_MEMORY_BUDGET - assert c.omps == DEFAULT_OMPS_PER_NODE - assert c.ompthreads == DEFAULT_OMPTHREADS - - assert c.debug is False - assert c.dry_run is False - assert c.verbose == 0 - assert c.test_root is None - assert c.requested_workers is None - assert c.legate_dir is None - - assert c.extra_args == [] - assert c.root_dir == PurePath(m.__file__).parents[2] - assert len(c.test_files) > 0 - assert any("examples" in str(x) for x in c.test_files) - assert any("integration" in str(x) for x in c.test_files) - assert all("unit" not in str(x) for x in c.test_files) - assert c.legate_path == "legate" - - @pytest.mark.parametrize("feature", FEATURES) - def test_env_features( - self, monkeypatch: pytest.MonkeyPatch, feature: str - ) -> None: - monkeypatch.setenv(f"USE_{feature.upper()}", "1") - - # test default config - c = m.Config([]) - assert set(c.features) == {feature} - - # also test with a --use value provided - c = m.Config(["test.py", "--use", "cuda"]) - assert set(c.features) == {"cuda"} - - @pytest.mark.parametrize("feature", FEATURES) - def test_cmd_features(self, feature: str) -> None: - - # test a single value - c = m.Config(["test.py", "--use", feature]) - assert set(c.features) == {feature} - - # also test with multiple / duplication - c = m.Config(["test.py", "--use", f"cpus,{feature}"]) - assert set(c.features) == {"cpus", feature} - - def test_unit(self) -> None: - c = m.Config(["test.py", "--unit"]) - assert len(c.test_files) > 0 - assert any("examples" in str(x) for x in c.test_files) - assert any("integration" in str(x) for x in c.test_files) - assert any("unit" in str(x) for x in c.test_files) - - def test_files(self) -> None: - c = m.Config(["test.py", "--files", "a", "b", "c"]) - assert c.files == ["a", "b", "c"] - - @pytest.mark.parametrize( - "opt", ("cpus", "gpus", "gpu-delay", "fbmem", "omps", "ompthreads") - ) - def test_feature_options(self, opt: str) -> None: - c = m.Config(["test.py", f"--{opt}", "1234"]) - assert getattr(c, opt.replace("-", "_")) == 1234 - - @pytest.mark.parametrize("value", PIN_OPTIONS) - def test_cpu_pin(self, value: PinOptionsType) -> None: - c = m.Config(["test.py", "--cpu-pin", value]) - assert c.cpu_pin == value - - def test_workers(self) -> None: - c = m.Config(["test.py", "-j", "1234"]) - assert c.requested_workers == 1234 - - def test_debug(self) -> None: - c = m.Config(["test.py", "--debug"]) - assert c.debug is True - - def test_dry_run(self) -> None: - c = m.Config(["test.py", "--dry-run"]) - assert c.dry_run is True - - @pytest.mark.parametrize("arg", ("-v", "--verbose")) - def test_verbose1(self, arg: str) -> None: - c = m.Config(["test.py", arg]) - assert c.verbose == 1 - - def test_verbose2(self) -> None: - c = m.Config(["test.py", "-vv"]) - assert c.verbose == 2 - - @pytest.mark.parametrize("arg", ("-C", "--directory")) - def test_test_root(self, arg: str) -> None: - c = m.Config(["test.py", arg, "some/path"]) - assert c.test_root == "some/path" - - def test_legate_dir(self) -> None: - c = m.Config([]) - assert c.legate_dir is None - assert c.legate_path == "legate" - assert c._legate_source == "install" - - def test_cmd_legate_dir_good(self) -> None: - legate_dir = Path("/usr/local") - c = m.Config(["test.py", "--legate", str(legate_dir)]) - assert c.legate_dir == legate_dir - assert c.legate_path == str(legate_dir / "bin" / "legate") - assert c._legate_source == "cmd" - - def test_env_legate_dir_good( - self, monkeypatch: pytest.MonkeyPatch - ) -> None: - legate_dir = Path("/usr/local") - monkeypatch.setenv("LEGATE_DIR", str(legate_dir)) - c = m.Config([]) - assert c.legate_dir == legate_dir - assert c.legate_path == str(legate_dir / "bin" / "legate") - assert c._legate_source == "env" - - def test_extra_args(self) -> None: - extra = ["-foo", "--bar", "--baz", "10"] - c = m.Config(["test.py"] + extra) - assert c.extra_args == extra - - # also test with --files since that option collects arguments - c = m.Config(["test.py", "--files", "a", "b"] + extra) - assert c.extra_args == extra - c = m.Config(["test.py"] + extra + ["--files", "a", "b"]) - assert c.extra_args == extra diff --git a/tests/_utils/tests/test_logger.py b/tests/_utils/tests/test_logger.py deleted file mode 100644 index 637b4a5c7..000000000 --- a/tests/_utils/tests/test_logger.py +++ /dev/null @@ -1,74 +0,0 @@ -# Copyright 2022 NVIDIA Corporation -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -"""Consolidate test configuration from command-line and environment. - -""" -from __future__ import annotations - -from .. import logger as m - -TEST_LINES = ( - "line 1", - "\x1b[31mfoo\x1b[0m", # ui.red("foo") - "bar", - "last line", -) - - -class TestLogger: - def test_init(self) -> None: - log = m.Log() - assert log.lines == () - assert log.dump() == "" - - def test_record_lines(self) -> None: - log = m.Log() - log.record(*TEST_LINES) - assert log.lines == TEST_LINES - assert log.dump(filter_ansi=False) == "\n".join(TEST_LINES) - - def test_record_line_with_newlines(self) -> None: - log = m.Log() - log.record("\n".join(TEST_LINES)) - assert log.lines == TEST_LINES - assert log.dump(filter_ansi=False) == "\n".join(TEST_LINES) - - def test_call(self) -> None: - log = m.Log() - log(*TEST_LINES) - assert log.lines == TEST_LINES - assert log.dump() == "line 1\nfoo\nbar\nlast line" - - def test_dump_filter(self) -> None: - log = m.Log() - log.record(*TEST_LINES) - assert log.lines == TEST_LINES - assert log.dump() == "line 1\nfoo\nbar\nlast line" - - def test_dump_index(self) -> None: - log = m.Log() - log.record(*TEST_LINES) - assert log.dump(start=1, end=3) == "foo\nbar" - - def test_clear(self) -> None: - log = m.Log() - log.record(*TEST_LINES) - assert len(log.lines) > 0 - log.clear() - assert len(log.lines) == 0 - - -def test_LOG() -> None: - assert isinstance(m.LOG, m.Log) diff --git a/tests/_utils/tests/test_system.py b/tests/_utils/tests/test_system.py deleted file mode 100644 index d110e260f..000000000 --- a/tests/_utils/tests/test_system.py +++ /dev/null @@ -1,78 +0,0 @@ -# Copyright 2022 NVIDIA Corporation -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -"""Consolidate test configuration from command-line and environment. - -""" -from __future__ import annotations - -import sys -from pathlib import Path -from subprocess import CompletedProcess -from unittest.mock import MagicMock - -import pytest -from pytest_mock import MockerFixture - -from .. import system as m - - -@pytest.fixture -def mock_subprocess_run(mocker: MockerFixture) -> MagicMock: - return mocker.patch.object(m, "stdlib_run") - - -CMD = "legate script.py --cpus 4" - - -class TestSystem: - def test_init(self) -> None: - s = m.System() - assert s.dry_run is False - - def test_run(self, mock_subprocess_run: MagicMock) -> None: - s = m.System() - - expected = m.ProcessResult( - CMD, Path("test/file"), returncode=10, output="" - ) - mock_subprocess_run.return_value = CompletedProcess( - CMD, 10, stdout="" - ) - - result = s.run(CMD.split(), Path("test/file")) - mock_subprocess_run.assert_called() - - assert result == expected - - def test_dry_run(self, mock_subprocess_run: MagicMock) -> None: - s = m.System(dry_run=True) - - result = s.run(CMD.split(), Path("test/file")) - mock_subprocess_run.assert_not_called() - - assert result.output == "" - assert result.skipped - - def test_cpus(self) -> None: - s = m.System() - cpus = s.cpus - assert len(cpus) > 0 - assert all(len(cpu.ids) > 0 for cpu in cpus) - - @pytest.mark.skipif(sys.platform != "linux", reason="pynvml required") - def test_gpus(self) -> None: - s = m.System() - # can't really assume / test much here - s.gpus diff --git a/tests/_utils/tests/test_types.py b/tests/_utils/tests/test_types.py deleted file mode 100644 index 30fe05a37..000000000 --- a/tests/_utils/tests/test_types.py +++ /dev/null @@ -1,30 +0,0 @@ -# Copyright 2022 NVIDIA Corporation -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -"""Consolidate test configuration from command-line and environment. - -""" -from __future__ import annotations - -from .. import types as m - - -class TestCPUInfo: - def test_fields(self) -> None: - assert set(m.CPUInfo.__dataclass_fields__) == {"ids"} - - -class TestGPUInfo: - def test_fields(self) -> None: - assert set(m.GPUInfo.__dataclass_fields__) == {"id", "total"} diff --git a/tests/_utils/tests/test_ui.py b/tests/_utils/tests/test_ui.py deleted file mode 100644 index 9cc92948a..000000000 --- a/tests/_utils/tests/test_ui.py +++ /dev/null @@ -1,103 +0,0 @@ -# Copyright 2022 NVIDIA Corporation -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -"""Consolidate test configuration from command-line and environment. - -""" -from __future__ import annotations - -from datetime import timedelta - -import pytest -from pytest_mock import MockerFixture - -from .. import UI_WIDTH, ui as m - - -@pytest.fixture(autouse=True) -def use_plain_text(mocker: MockerFixture) -> None: - mocker.patch.object(m, "bright", m._text) - mocker.patch.object(m, "dim", m._text) - mocker.patch.object(m, "white", m._text) - mocker.patch.object(m, "cyan", m._text) - mocker.patch.object(m, "red", m._text) - mocker.patch.object(m, "green", m._text) - mocker.patch.object(m, "yellow", m._text) - - -def test_banner_simple() -> None: - assert ( - m.banner("some text") - == "\n" + "#" * UI_WIDTH + "\n### some text\n" + "#" * UI_WIDTH - ) - - -def test_banner_full() -> None: - assert ( - m.banner("some text", char="*", width=100, details=["a", "b"]) - == "\n" - + "*" * 100 - + "\n*** \n*** some text\n*** \n*** a\n*** b\n*** \n" - + "*" * 100 - ) - - -def test_rule_default() -> None: - assert m.rule() == " " + "~" * (UI_WIDTH - 4) - - -def test_rule_with_args() -> None: - assert m.rule(10, "-") == " " * 10 + "-" * (UI_WIDTH - 10) - - -def test_shell() -> None: - assert m.shell("cmd --foo") == "+cmd --foo" - - -def test_shell_with_char() -> None: - assert m.shell("cmd --foo", char="") == "cmd --foo" - - -def test_passed() -> None: - assert m.passed("msg") == "[PASS] msg" - - -def test_passed_with_details() -> None: - assert m.passed("msg", details=["a", "b"]) == "[PASS] msg\n a\n b" - - -def test_failed() -> None: - assert m.failed("msg") == "[FAIL] msg" - - -def test_failed_with_details() -> None: - assert m.failed("msg", details=["a", "b"]) == "[FAIL] msg\n a\n b" - - -def test_skipped() -> None: - assert m.skipped("msg") == "[SKIP] msg" - - -def test_summary() -> None: - assert ( - m.summary("foo", 12, 11, timedelta(seconds=2.123)) - == f"{'foo: Passed 11 of 12 tests (91.7%) in 2.12s': >{UI_WIDTH}}" - ) - - -def test_summary_no_justify() -> None: - assert ( - m.summary("foo", 12, 11, timedelta(seconds=2.123), justify=False) - == "foo: Passed 11 of 12 tests (91.7%) in 2.12s" - ) diff --git a/tests/_utils/types.py b/tests/_utils/types.py deleted file mode 100644 index 1641bd597..000000000 --- a/tests/_utils/types.py +++ /dev/null @@ -1,50 +0,0 @@ -# Copyright 2022 NVIDIA Corporation -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -"""Provide types that are useful throughout the test driver code. - -""" -from __future__ import annotations - -from dataclasses import dataclass -from typing import Dict, List - -from typing_extensions import TypeAlias - - -@dataclass(frozen=True) -class CPUInfo: - """Encapsulate information about a single CPU""" - - #: IDs of hypterthreading sibling cores for a given physscal core - ids: tuple[int, ...] - - -@dataclass(frozen=True) -class GPUInfo: - """Encapsulate information about a single CPU""" - - #: ID of the GPU to specify in test shards - id: int - - #: The total framebuffer memory of this GPU - total: int - - -#: Represent command line arguments -ArgList = List[str] - - -#: Represent str->str environment variable mappings -EnvDict: TypeAlias = Dict[str, str] diff --git a/tests/_utils/ui.py b/tests/_utils/ui.py deleted file mode 100644 index eaa97d7c0..000000000 --- a/tests/_utils/ui.py +++ /dev/null @@ -1,229 +0,0 @@ -# Copyright AS2022 NVIDIA Corporation -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -"""Helpler functions for simple text UI output. - -The color functions in this module require ``colorama`` to be installed in -order to generate color output. If ``colorama`` is not available, plain -text output (i.e. without ANSI color codes) will generated. - -""" -from __future__ import annotations - -import sys -from datetime import timedelta -from typing import Iterable - -from typing_extensions import TypeAlias - -from . import UI_WIDTH - -Details: TypeAlias = Iterable[str] - - -def _text(text: str) -> str: - return text - - -try: - import colorama # type: ignore[import] - - def bright(text: str) -> str: - return f"{colorama.Style.BRIGHT}{text}{colorama.Style.RESET_ALL}" - - def dim(text: str) -> str: - return f"{colorama.Style.DIM}{text}{colorama.Style.RESET_ALL}" - - def white(text: str) -> str: - return f"{colorama.Fore.WHITE}{text}{colorama.Style.RESET_ALL}" - - def cyan(text: str) -> str: - return f"{colorama.Fore.CYAN}{text}{colorama.Style.RESET_ALL}" - - def red(text: str) -> str: - return f"{colorama.Fore.RED}{text}{colorama.Style.RESET_ALL}" - - def green(text: str) -> str: - return f"{colorama.Fore.GREEN}{text}{colorama.Style.RESET_ALL}" - - def yellow(text: str) -> str: - return f"{colorama.Fore.YELLOW}{text}{colorama.Style.RESET_ALL}" - - if sys.platform == "win32": - colorama.init() - -except ImportError: - - bright = dim = white = cyan = red = green = yellow = _text - - -def _format_details( - details: Iterable[str] | None = None, pre: str = " " -) -> str: - if details: - return f"{pre}" + f"\n{pre}".join(f"{line}" for line in details) - return "" - - -def banner( - heading: str, - *, - char: str = "#", - width: int = UI_WIDTH, - details: Iterable[str] | None = None, -) -> str: - """Generate a title banner, with optional details included. - - Parameters - ---------- - heading : str - Text to use for the title - - char : str, optional - A character to use to frame the banner. (default: "#") - - width : int, optional - How wide to draw the banner. (Note: user-supplied heading or - details willnot be truncated if they exceed this width) - - details : Iterable[str], optional - A list of lines to diplay inside the banner area below the heading - - """ - pre = f"{char*3} " - divider = char * width - if not details: - return f"\n{divider}\n{pre}{heading}\n{divider}" - return f""" -{divider} -{pre} -{pre}{heading} -{pre} -{_format_details(details, pre)} -{pre} -{divider}""" - - -def failed(msg: str, *, details: Details | None = None) -> str: - """Report a failed test result with a bright red [FAIL]. - - Parameters - ---------- - msg : str - Text to display after [FAIL] - - details : Iterable[str], optional - A sequenece of text lines to diplay below the ``msg`` line - - """ - if details: - return f"{bright(red('[FAIL]'))} {msg}\n{_format_details(details)}" - return f"{bright(red('[FAIL]'))} {msg}" - - -def passed(msg: str, *, details: Details | None = None) -> str: - """Report a passed test result with a bright green [PASS]. - - Parameters - ---------- - msg : str - Text to display after [PASS] - - details : Iterable[str], optional - A sequenece of text lines to diplay below the ``msg`` line - - """ - if details: - return f"{bright(green('[PASS]'))} {msg}\n{_format_details(details)}" - return f"{bright(green('[PASS]'))} {msg}" - - -def rule(pad: int = 4, char: str = "~") -> str: - """Generate a horizontal rule. - - Parameters - ---------- - pad : int, optional - How much whitespace to precede the rule. (default: 4) - - char : str, optional - A character to use to "draw" the rule. (default: "~") - - """ - w = UI_WIDTH - pad - return f"{char*w: >{UI_WIDTH}}" - - -def shell(cmd: str, *, char: str = "+") -> str: - """Report a shell command in a dim white color. - - Parameters - ---------- - cmd : str - The shell command string to display - - char : str, optional - A character to prefix the ``cmd`` with. (default: "+") - - """ - return dim(white(f"{char}{cmd}")) - - -def skipped(msg: str) -> str: - """Report a skipped test with a cyan [SKIP] - - Parameters - ---------- - msg : str - Text to display after [SKIP] - - """ - return f"{cyan('[SKIP]')} {msg}" - - -def summary( - name: str, - total: int, - passed: int, - time: timedelta, - *, - justify: bool = True, -) -> str: - """Generate a test result summary line. - - The output is bright green if all tests passed, otherwise bright red. - - Parameters - ---------- - name : str - A name to display in this summary line. - - total : int - The total number of tests to report. - - passed : int - The number of passed tests to report. - - time : timedelta - The time taken to run the tests - - """ - summary = ( - f"{name}: Passed {passed} of {total} tests ({passed/total*100:0.1f}%) " - f"in {time.total_seconds():0.2f}s" - if total > 0 - else f"{name}: 0 tests are running, Please check" - ) - color = green if passed == total and total > 0 else red - return bright(color(f"{summary: >{UI_WIDTH}}" if justify else summary)) From 31d6bb928ceb1f48d4e36a6497defd5b8c239fda Mon Sep 17 00:00:00 2001 From: Wonchan Lee Date: Mon, 10 Oct 2022 15:05:52 -0700 Subject: [PATCH 05/89] Make the code compile with bounds checks (#648) * Make the code compile with bounds checks * Fix bounds checks issues in the CPU sorting code * Fix "out-of-bounds" accesses in unary reductions on GPUs * Update the comments to make them accurate --- src/cunumeric/sort/sort_cpu.inl | 24 ++++++++++++------- .../unary/scalar_unary_red_template.inl | 2 +- src/cunumeric/unary/unary_red.cu | 7 ++++++ 3 files changed, 23 insertions(+), 10 deletions(-) diff --git a/src/cunumeric/sort/sort_cpu.inl b/src/cunumeric/sort/sort_cpu.inl index 3f04ed303..25323801b 100644 --- a/src/cunumeric/sort/sort_cpu.inl +++ b/src/cunumeric/sort/sort_cpu.inl @@ -482,8 +482,9 @@ void sample_sort_nd(SortPiece> local_sorted, comm::coll::collAllgather( worker_counts.ptr(my_rank), worker_counts.ptr(0), 1, comm::coll::CollDataType::CollInt, comm); + auto p_worker_count = worker_counts.ptr(0); int32_t worker_count = - std::accumulate(worker_counts.ptr(0), worker_counts.ptr(num_ranks), 0, std::plus()); + std::accumulate(p_worker_count, p_worker_count + num_ranks, 0, std::plus()); if (worker_count < num_ranks) { const size_t number_sort_groups = num_ranks / num_sort_ranks; @@ -565,7 +566,8 @@ void sample_sort_nd(SortPiece> local_sorted, for (size_t sort_rank = 0; sort_rank < num_sort_ranks; ++sort_rank) { comm_size[sort_ranks[sort_rank]] = num_samples_l * sizeof(SegmentSample); } - thrust::exclusive_scan(exec, comm_size.ptr(0), comm_size.ptr(num_ranks), rdispls.ptr(0), 0); + auto p_comm_size = comm_size.ptr(0); + thrust::exclusive_scan(exec, p_comm_size, p_comm_size + num_ranks, rdispls.ptr(0), 0); comm::coll::collAlltoallv(samples_l.ptr(0), comm_size.ptr(0), // num_samples_l*size for all in sort group @@ -612,8 +614,9 @@ void sample_sort_nd(SortPiece> local_sorted, auto segment_blocks = create_buffer(num_sort_ranks * num_segments_l); // initialize sizes to send [r][segment] - auto size_send = create_buffer(num_sort_ranks * (num_segments_l + 1)); - std::fill(size_send.ptr(0), size_send.ptr(num_sort_ranks * (num_segments_l + 1)), 0); + auto size_send = create_buffer(num_sort_ranks * (num_segments_l + 1)); + auto p_size_send = size_send.ptr(0); + std::fill(p_size_send, p_size_send + num_sort_ranks * (num_segments_l + 1), 0); { for (int32_t segment = 0; segment < num_segments_l; ++segment) { @@ -685,7 +688,8 @@ void sample_sort_nd(SortPiece> local_sorted, for (size_t sort_rank = 0; sort_rank < num_sort_ranks; ++sort_rank) { comm_size[sort_ranks[sort_rank]] = num_segments_l + 1; } - thrust::exclusive_scan(exec, comm_size.ptr(0), comm_size.ptr(num_ranks), displs.ptr(0), 0); + auto p_comm_size = comm_size.ptr(0); + thrust::exclusive_scan(exec, p_comm_size, p_comm_size + num_ranks, displs.ptr(0), 0); comm::coll::collAlltoallv( size_send.ptr(0), @@ -781,10 +785,12 @@ void sample_sort_nd(SortPiece> local_sorted, recv_size_total[sort_ranks[sort_rank]] = sizeof(VAL) * size_recv[sort_rank * (num_segments_l + 1) + num_segments_l]; } + auto p_send_size_total = send_size_total.ptr(0); + auto p_recv_size_total = recv_size_total.ptr(0); thrust::exclusive_scan( - exec, send_size_total.ptr(0), send_size_total.ptr(num_ranks), sdispls.ptr(0), 0); + exec, p_send_size_total, p_send_size_total + num_ranks, sdispls.ptr(0), 0); thrust::exclusive_scan( - exec, recv_size_total.ptr(0), recv_size_total.ptr(num_ranks), rdispls.ptr(0), 0); + exec, p_recv_size_total, p_recv_size_total + num_ranks, rdispls.ptr(0), 0); comm::coll::collAlltoallv(val_send_buffer.ptr(0), send_size_total.ptr(0), @@ -804,9 +810,9 @@ void sample_sort_nd(SortPiece> local_sorted, } thrust::exclusive_scan( - exec, send_size_total.ptr(0), send_size_total.ptr(num_ranks), sdispls.ptr(0), 0); + exec, p_send_size_total, p_send_size_total + num_ranks, sdispls.ptr(0), 0); thrust::exclusive_scan( - exec, recv_size_total.ptr(0), recv_size_total.ptr(num_ranks), rdispls.ptr(0), 0); + exec, p_recv_size_total, p_recv_size_total + num_ranks, rdispls.ptr(0), 0); comm::coll::collAlltoallv(idc_send_buffer.ptr(0), send_size_total.ptr(0), sdispls.ptr(0), diff --git a/src/cunumeric/unary/scalar_unary_red_template.inl b/src/cunumeric/unary/scalar_unary_red_template.inl index 482d96187..3c097aaf8 100644 --- a/src/cunumeric/unary/scalar_unary_red_template.inl +++ b/src/cunumeric/unary/scalar_unary_red_template.inl @@ -100,8 +100,8 @@ struct ScalarUnaryRed { void execute() const noexcept { -#ifndef LEGION_BOUNDS_CHECKS auto identity = LG_OP::identity; +#ifndef LEGION_BOUNDS_CHECKS // The constexpr if here prevents the DenseReduction from being instantiated for GPU kernels // which limits compile times and binary sizes. if constexpr (KIND != VariantKind::GPU) { diff --git a/src/cunumeric/unary/unary_red.cu b/src/cunumeric/unary/unary_red.cu index 1cc0d4653..99682fc8b 100644 --- a/src/cunumeric/unary/unary_red.cu +++ b/src/cunumeric/unary/unary_red.cu @@ -270,6 +270,13 @@ static __device__ __forceinline__ Point local_reduce(LHS& result, } #endif +#ifdef LEGION_BOUNDS_CHECKS + // Note: this isn't necessary because we know that the affine transformation on the output + // accessor will ignore coordinates of the collapsed dimension. However, Legion's bounds checks + // want the accessor to honor the sub-rectangle passed when it was created, so we need to + // put points back in the bounds to appease the checks. + point[collapsed_dim] = domain.lo[collapsed_dim]; +#endif return point; } From 9023e5bec3db087cacfa10beb6d5b84c3c577eed Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Mon, 10 Oct 2022 16:34:23 -0700 Subject: [PATCH 06/89] [pre-commit.ci] pre-commit autoupdate (#650) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit updates: - [github.com/psf/black: 22.8.0 → 22.10.0](https://github.com/psf/black/compare/22.8.0...22.10.0) Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> --- .pre-commit-config.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 878ef81ac..bdc37baff 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -4,7 +4,7 @@ repos: hooks: - id: isort - repo: https://github.com/psf/black - rev: 22.8.0 + rev: 22.10.0 hooks: - id: black - repo: https://github.com/PyCQA/flake8 From d0231c32b0ea129ec57a3896f2b77dacf9225437 Mon Sep 17 00:00:00 2001 From: Marcin Zalewski Date: Mon, 10 Oct 2022 22:39:53 -0700 Subject: [PATCH 07/89] Add changelog config (#605) Co-authored-by: Marcin Zalewski --- .github/release.yml | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) create mode 100644 .github/release.yml diff --git a/.github/release.yml b/.github/release.yml new file mode 100644 index 000000000..0a37704fb --- /dev/null +++ b/.github/release.yml @@ -0,0 +1,17 @@ +changelog: + exclude: + labels: + - category:task + categories: + - title: 🐛 Bug Fixes + labels: + - category:bug-fix + - title: 🚀 New Features + labels: + - category:new-feature + - title: 🛠️ Improvements + labels: + - category:improvement + - title: 📖 Documentation + labels: + - category:documentation \ No newline at end of file From 2d476815bf252af6f4b38dcabec1dcf63e6e789e Mon Sep 17 00:00:00 2001 From: Manolis Papadakis Date: Wed, 12 Oct 2022 14:18:23 -0700 Subject: [PATCH 08/89] MatVec & MatVecMul use reduction stores, not outputs (#646) Co-authored-by: Manolis Papadakis --- src/cunumeric/mapper.cc | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/cunumeric/mapper.cc b/src/cunumeric/mapper.cc index 855121cd2..ada6ca268 100644 --- a/src/cunumeric/mapper.cc +++ b/src/cunumeric/mapper.cc @@ -119,14 +119,14 @@ std::vector CuNumericMapper::store_mappings( // TODO: Our actual requirements are a little less strict than this; we require each array or // vector to have a stride of 1 on at least one dimension. std::vector mappings; - auto& inputs = task.inputs(); - auto& outputs = task.outputs(); + auto& inputs = task.inputs(); + auto& reductions = task.reductions(); for (auto& input : inputs) { mappings.push_back(StoreMapping::default_mapping(input, options.front())); mappings.back().policy.exact = true; } - for (auto& output : outputs) { - mappings.push_back(StoreMapping::default_mapping(output, options.front())); + for (auto& reduction : reductions) { + mappings.push_back(StoreMapping::default_mapping(reduction, options.front())); mappings.back().policy.exact = true; } return std::move(mappings); From 7144efefba8443cde1a644a74eb94ee6a0f2b3df Mon Sep 17 00:00:00 2001 From: Manolis Papadakis Date: Wed, 12 Oct 2022 16:51:53 -0700 Subject: [PATCH 09/89] Remove leftover files from old build (#615) --- src/Makefile | 87 ----------------- src/cunumeric.mk | 166 --------------------------------- src/cunumeric/random/random.mk | 24 ----- src/cunumeric/sort/sort.mk | 50 ---------- 4 files changed, 327 deletions(-) delete mode 100644 src/Makefile delete mode 100644 src/cunumeric.mk delete mode 100644 src/cunumeric/random/random.mk delete mode 100644 src/cunumeric/sort/sort.mk diff --git a/src/Makefile b/src/Makefile deleted file mode 100644 index 76ecd56d8..000000000 --- a/src/Makefile +++ /dev/null @@ -1,87 +0,0 @@ -# Copyright 2021-2022 NVIDIA Corporation -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -ifndef LEGATE_DIR -$(error LEGATE_DIR variable is not defined, aborting build) -endif -ifndef OPENBLAS_PATH -$(error OPENBLAS_PATH variable is not defined, aborting build) -endif -ifndef OPENBLAS_LIBNAME -$(error OPENBLAS_PATH variable is not defined, aborting build) -endif -ifndef TBLIS_PATH -$(error TBLIS_PATH variable is not defined, aborting build) -endif -ifeq ($(strip $(USE_CUDA)),1) -ifndef CUTENSOR_PATH -$(error CUTENSOR_PATH variable is not defined, aborting build) -endif -ifndef NCCL_PATH -$(error NCCL_PATH variable is not defined, aborting build) -endif -endif # ifeq ($(strip $(USE_CUDA)),1) -ifndef THRUST_PATH -$(error THRUST_PATH variable is not defined, aborting build) -endif - -include $(LEGATE_DIR)/share/legate/config.mk - -LIBNAME = libcunumeric - -CURAND_PATH ?= - -CC_FLAGS ?= -CC_FLAGS += -I. -I$(OPENBLAS_PATH)/include -I$(TBLIS_PATH)/include -I$(THRUST_PATH) -CC_FLAGS += -DTHRUST_HOST_SYSTEM=THRUST_HOST_SYSTEM_CPP -DTHRUST_DEVICE_SYSTEM=THRUST_DEVICE_SYSTEM_OMP - -ifdef CURAND_PATH -BUILD_CURAND_TASKS = 1 -CC_FLAGS += -I$(CURAND_PATH)/include -DCUNUMERIC_CURAND_FOR_CPU_BUILD -else -ifeq ($(strip $(USE_CUDA)),1) -BUILD_CURAND_TASKS = 1 -else -BUILD_CURAND_TASKS = 0 -endif -endif - -LD_FLAGS ?= -LD_FLAGS += -L$(OPENBLAS_PATH)/lib -l$(OPENBLAS_LIBNAME) -Wl,-rpath,$(OPENBLAS_PATH)/lib -LD_FLAGS += -L$(TBLIS_PATH)/lib -ltblis -Wl,-rpath,$(TBLIS_PATH)/lib -ifeq ($(strip $(USE_CUDA)),1) -LD_FLAGS += -lcublas -lcusolver -lcufft -LD_FLAGS += -L$(CUTENSOR_PATH)/lib -lcutensor -Wl,-rpath,$(CUTENSOR_PATH)/lib -LD_FLAGS += -L$(NCCL_PATH)/lib -lnccl -Wl,-rpath,$(NCCL_PATH)/lib -endif -NVCC_FLAGS ?= -NVCC_FLAGS += -I. -I$(THRUST_PATH) -I$(CUTENSOR_PATH)/include -I$(NCCL_PATH)/include -Wno-deprecated-declarations - -ifeq ($(strip $(DEBUG)),1) -CC_FLAGS += -DDEBUG_CUNUMERIC -NVCC_FLAGS += -DDEBUG_CUNUMERIC -endif - -CHECK_BOUNDS ?= 0 -ifeq ($(strip $(CHECK_BOUNDS)),1) -CC_FLAGS += -DBOUNDS_CHECKS -endif - -GEN_CPU_SRC = -GEN_GPU_SRC = - -include cunumeric.mk - -include $(LEGATE_DIR)/share/legate/legate.mk diff --git a/src/cunumeric.mk b/src/cunumeric.mk deleted file mode 100644 index 1b7f17080..000000000 --- a/src/cunumeric.mk +++ /dev/null @@ -1,166 +0,0 @@ -# Copyright 2021-2022 NVIDIA Corporation -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -# List all the application source files that need OpenMP separately -# since we have to add the -fopenmp flag to CC_FLAGS for them -GEN_CPU_SRC += cunumeric/ternary/where.cc \ - cunumeric/scan/scan_global.cc \ - cunumeric/scan/scan_local.cc \ - cunumeric/binary/binary_op.cc \ - cunumeric/binary/binary_red.cc \ - cunumeric/bits/packbits.cc \ - cunumeric/bits/unpackbits.cc \ - cunumeric/unary/scalar_unary_red.cc \ - cunumeric/unary/unary_op.cc \ - cunumeric/unary/unary_red.cc \ - cunumeric/unary/convert.cc \ - cunumeric/nullary/arange.cc \ - cunumeric/nullary/eye.cc \ - cunumeric/nullary/fill.cc \ - cunumeric/nullary/window.cc \ - cunumeric/index/advanced_indexing.cc \ - cunumeric/index/choose.cc \ - cunumeric/index/repeat.cc \ - cunumeric/index/wrap.cc \ - cunumeric/index/zip.cc \ - cunumeric/item/read.cc \ - cunumeric/item/write.cc \ - cunumeric/matrix/contract.cc \ - cunumeric/matrix/diag.cc \ - cunumeric/matrix/gemm.cc \ - cunumeric/matrix/matmul.cc \ - cunumeric/matrix/matvecmul.cc \ - cunumeric/matrix/dot.cc \ - cunumeric/matrix/potrf.cc \ - cunumeric/matrix/solve.cc \ - cunumeric/matrix/syrk.cc \ - cunumeric/matrix/tile.cc \ - cunumeric/matrix/transpose.cc \ - cunumeric/matrix/trilu.cc \ - cunumeric/matrix/trsm.cc \ - cunumeric/matrix/util.cc \ - cunumeric/random/rand.cc \ - cunumeric/search/argwhere.cc \ - cunumeric/search/nonzero.cc \ - cunumeric/set/unique.cc \ - cunumeric/set/unique_reduce.cc \ - cunumeric/stat/bincount.cc \ - cunumeric/convolution/convolve.cc \ - cunumeric/transform/flip.cc \ - cunumeric/arg.cc \ - cunumeric/mapper.cc - -GEN_CPU_SRC += cunumeric/cephes/chbevl.cc \ - cunumeric/cephes/i0.cc - -ifeq ($(strip $(USE_OPENMP)),1) -GEN_CPU_SRC += cunumeric/ternary/where_omp.cc \ - cunumeric/scan/scan_global_omp.cc \ - cunumeric/scan/scan_local_omp.cc \ - cunumeric/binary/binary_op_omp.cc \ - cunumeric/binary/binary_red_omp.cc \ - cunumeric/bits/packbits_omp.cc \ - cunumeric/bits/unpackbits_omp.cc \ - cunumeric/unary/unary_op_omp.cc \ - cunumeric/unary/scalar_unary_red_omp.cc \ - cunumeric/unary/unary_red_omp.cc \ - cunumeric/unary/convert_omp.cc \ - cunumeric/nullary/arange_omp.cc \ - cunumeric/nullary/eye_omp.cc \ - cunumeric/nullary/fill_omp.cc \ - cunumeric/nullary/window_omp.cc \ - cunumeric/index/advanced_indexing_omp.cc\ - cunumeric/index/choose_omp.cc \ - cunumeric/index/repeat_omp.cc \ - cunumeric/index/wrap_omp.cc \ - cunumeric/index/zip_omp.cc \ - cunumeric/matrix/contract_omp.cc \ - cunumeric/matrix/diag_omp.cc \ - cunumeric/matrix/gemm_omp.cc \ - cunumeric/matrix/matmul_omp.cc \ - cunumeric/matrix/matvecmul_omp.cc \ - cunumeric/matrix/dot_omp.cc \ - cunumeric/matrix/potrf_omp.cc \ - cunumeric/matrix/solve_omp.cc \ - cunumeric/matrix/syrk_omp.cc \ - cunumeric/matrix/tile_omp.cc \ - cunumeric/matrix/transpose_omp.cc \ - cunumeric/matrix/trilu_omp.cc \ - cunumeric/matrix/trsm_omp.cc \ - cunumeric/matrix/util_omp.cc \ - cunumeric/random/rand_omp.cc \ - cunumeric/search/argwhere_omp.cc \ - cunumeric/search/nonzero_omp.cc \ - cunumeric/set/unique_omp.cc \ - cunumeric/stat/bincount_omp.cc \ - cunumeric/convolution/convolve_omp.cc \ - cunumeric/transform/flip_omp.cc -endif - -GEN_GPU_SRC += cunumeric/ternary/where.cu \ - cunumeric/scan/scan_global.cu \ - cunumeric/scan/scan_local.cu \ - cunumeric/binary/binary_op.cu \ - cunumeric/binary/binary_red.cu \ - cunumeric/bits/packbits.cu \ - cunumeric/bits/unpackbits.cu \ - cunumeric/unary/scalar_unary_red.cu \ - cunumeric/unary/unary_red.cu \ - cunumeric/unary/unary_op.cu \ - cunumeric/unary/convert.cu \ - cunumeric/nullary/arange.cu \ - cunumeric/nullary/eye.cu \ - cunumeric/nullary/fill.cu \ - cunumeric/nullary/window.cu \ - cunumeric/index/advanced_indexing.cu \ - cunumeric/index/choose.cu \ - cunumeric/index/repeat.cu \ - cunumeric/index/wrap.cu \ - cunumeric/index/zip.cu \ - cunumeric/item/read.cu \ - cunumeric/item/write.cu \ - cunumeric/matrix/contract.cu \ - cunumeric/matrix/diag.cu \ - cunumeric/matrix/gemm.cu \ - cunumeric/matrix/matmul.cu \ - cunumeric/matrix/matvecmul.cu \ - cunumeric/matrix/dot.cu \ - cunumeric/matrix/potrf.cu \ - cunumeric/matrix/solve.cu \ - cunumeric/matrix/syrk.cu \ - cunumeric/matrix/tile.cu \ - cunumeric/matrix/transpose.cu \ - cunumeric/matrix/trilu.cu \ - cunumeric/matrix/trsm.cu \ - cunumeric/random/rand.cu \ - cunumeric/search/argwhere.cu \ - cunumeric/search/nonzero.cu \ - cunumeric/set/unique.cu \ - cunumeric/stat/bincount.cu \ - cunumeric/convolution/convolve.cu \ - cunumeric/fft/fft.cu \ - cunumeric/transform/flip.cu \ - cunumeric/cudalibs.cu \ - cunumeric/cunumeric.cu - -include cunumeric/sort/sort.mk - -ifeq ($(strip $(BUILD_CURAND_TASKS)),1) -include cunumeric/random/random.mk -endif - -GEN_CPU_SRC += cunumeric/cunumeric.cc # This must always be the last file! - # It guarantees we do our registration callback - # only after all task variants are recorded diff --git a/src/cunumeric/random/random.mk b/src/cunumeric/random/random.mk deleted file mode 100644 index e2b2f20a6..000000000 --- a/src/cunumeric/random/random.mk +++ /dev/null @@ -1,24 +0,0 @@ -# Copyright 2022 NVIDIA Corporation -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -GEN_CPU_SRC += cunumeric/random/bitgenerator.cc \ - cunumeric/random/randutil/generator_host.cc \ - cunumeric/random/randutil/generator_host_straightforward.cc \ - cunumeric/random/randutil/generator_host_advanced.cc - -GEN_GPU_SRC += cunumeric/random/bitgenerator.cu \ - cunumeric/random/randutil/generator_device.cu \ - cunumeric/random/randutil/generator_device_straightforward.cu \ - cunumeric/random/randutil/generator_device_advanced.cu \ No newline at end of file diff --git a/src/cunumeric/sort/sort.mk b/src/cunumeric/sort/sort.mk deleted file mode 100644 index f13422c69..000000000 --- a/src/cunumeric/sort/sort.mk +++ /dev/null @@ -1,50 +0,0 @@ -# Copyright 2022 NVIDIA Corporation -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -GEN_CPU_SRC += cunumeric/sort/sort.cc \ - cunumeric/sort/searchsorted.cc -ifeq ($(strip $(USE_OPENMP)),1) -GEN_CPU_SRC += cunumeric/sort/sort_omp.cc \ - cunumeric/sort/searchsorted_omp.cc -endif - -GEN_GPU_SRC += cunumeric/sort/sort.cu \ - cunumeric/sort/searchsorted.cu \ - cunumeric/sort/cub_sort_bool.cu \ - cunumeric/sort/cub_sort_int8.cu \ - cunumeric/sort/cub_sort_int16.cu \ - cunumeric/sort/cub_sort_int32.cu \ - cunumeric/sort/cub_sort_int64.cu \ - cunumeric/sort/cub_sort_uint8.cu \ - cunumeric/sort/cub_sort_uint16.cu \ - cunumeric/sort/cub_sort_uint32.cu \ - cunumeric/sort/cub_sort_uint64.cu \ - cunumeric/sort/cub_sort_half.cu \ - cunumeric/sort/cub_sort_float.cu \ - cunumeric/sort/cub_sort_double.cu \ - cunumeric/sort/thrust_sort_bool.cu \ - cunumeric/sort/thrust_sort_int8.cu \ - cunumeric/sort/thrust_sort_int16.cu \ - cunumeric/sort/thrust_sort_int32.cu \ - cunumeric/sort/thrust_sort_int64.cu \ - cunumeric/sort/thrust_sort_uint8.cu \ - cunumeric/sort/thrust_sort_uint16.cu \ - cunumeric/sort/thrust_sort_uint32.cu \ - cunumeric/sort/thrust_sort_uint64.cu \ - cunumeric/sort/thrust_sort_half.cu \ - cunumeric/sort/thrust_sort_float.cu \ - cunumeric/sort/thrust_sort_double.cu \ - cunumeric/sort/thrust_sort_complex64.cu \ - cunumeric/sort/thrust_sort_complex128.cu From b01ee12c6e70713cfcbd2c47ed6627aef59bdf28 Mon Sep 17 00:00:00 2001 From: Jeremy Date: Wed, 12 Oct 2022 16:52:34 -0700 Subject: [PATCH 10/89] Set default generator based on whether ninja is available (#602) * check for ninja to determine default CMake generator * Address PR comments, fix typos Co-authored-by: Manolis Papadakis Co-authored-by: Manolis Papadakis --- install.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/install.py b/install.py index 11d838b32..febf25b8b 100755 --- a/install.py +++ b/install.py @@ -308,7 +308,7 @@ def validate_path(path): cmake_flags = [] if cmake_generator: - cmake_flags += [f"-G{cmake_generator}"] + cmake_flags += [f"-G'{cmake_generator}'"] if debug or verbose: cmake_flags += ["--log-level=%s" % ("DEBUG" if debug else "VERBOSE")] @@ -520,8 +520,8 @@ def driver(): "--cmake-generator", dest="cmake_generator", required=False, - default="Ninja", - choices=["Ninja", "Unix Makefiles"], + default=(None if shutil.which("ninja") is None else "Ninja"), + choices=["Ninja", "Unix Makefiles", None], help="The CMake makefiles generator", ) parser.add_argument( From 50d837082c0f8faa6a49fad3467cbe39d58d4877 Mon Sep 17 00:00:00 2001 From: Manolis Papadakis Date: Wed, 12 Oct 2022 16:53:28 -0700 Subject: [PATCH 11/89] Allow args to be passed by position and name in auto_convert (#640) * Allow args to be passed by position and name in auto_convert * Address PR comments * Address PR comments --- cunumeric/array.py | 11 +++--- cunumeric/deferred.py | 91 +++++++++++++++++++++++++------------------ 2 files changed, 59 insertions(+), 43 deletions(-) diff --git a/cunumeric/array.py b/cunumeric/array.py index dd389f995..64784d59a 100644 --- a/cunumeric/array.py +++ b/cunumeric/array.py @@ -94,7 +94,8 @@ def add_boilerplate( parameter (if present), to cuNumeric ndarrays. * Convert the special "where" parameter (if present) to a valid predicate. """ - keys: Set[str] = set(array_params) + keys = set(array_params) + assert len(keys) == len(array_params) def decorator(func: Callable[P, R]) -> Callable[P, R]: assert not hasattr( @@ -104,18 +105,18 @@ def decorator(func: Callable[P, R]) -> Callable[P, R]: # For each parameter specified by name, also consider the case where # it's passed as a positional parameter. indices: Set[int] = set() - all_formals: Set[str] = set() where_idx: Optional[int] = None out_idx: Optional[int] = None - for (idx, param) in enumerate(signature(func).parameters): - all_formals.add(param) + params = signature(func).parameters + extra = keys - set(params) + assert len(extra) == 0, f"unknown parameter(s): {extra}" + for (idx, param) in enumerate(params): if param == "where": where_idx = idx elif param == "out": out_idx = idx elif param in keys: indices.add(idx) - assert len(keys - all_formals) == 0, "unkonwn parameter(s)" @wraps(func) def wrapper(*args: Any, **kwargs: Any) -> R: diff --git a/cunumeric/deferred.py b/cunumeric/deferred.py index 04fe6e829..54f481977 100644 --- a/cunumeric/deferred.py +++ b/cunumeric/deferred.py @@ -18,13 +18,13 @@ from collections import Counter from collections.abc import Iterable from enum import IntEnum, unique -from functools import reduce +from functools import reduce, wraps +from inspect import signature from itertools import product from typing import ( TYPE_CHECKING, Any, Callable, - Collection, Dict, Optional, Sequence, @@ -95,24 +95,39 @@ def _prod(tpl: Sequence[int]) -> int: def auto_convert( - indices: Collection[int], keys: Sequence[str] = [] + *thunk_params: str, ) -> Callable[[Callable[P, R]], Callable[P, R]]: - indices = set(indices) + """ + Converts all named parameters to DeferredArrays. + """ + keys = set(thunk_params) + assert len(keys) == len(thunk_params) def decorator(func: Callable[P, R]) -> Callable[P, R]: - def wrapper(*args: Any, **kwargs: Any) -> Any: + assert not hasattr( + func, "__wrapped__" + ), "this decorator must be the innermost" + + # For each parameter specified by name, also consider the case where + # it's passed as a positional parameter. + params = signature(func).parameters + extra = keys - set(params) + assert len(extra) == 0, f"unknown parameter(s): {extra}" + indices = {idx for (idx, param) in enumerate(params) if param in keys} + + @wraps(func) + def wrapper(*args: Any, **kwargs: Any) -> R: + # Convert relevant arguments to DeferredArrays self = args[0] - args = tuple( - self.runtime.to_deferred_array(arg) if idx in indices else arg + self.runtime.to_deferred_array(arg) + if idx in indices and arg is not None + else arg for (idx, arg) in enumerate(args) ) - for key in keys: - v = kwargs.get(key, None) - if v is None: - continue - v = self.runtime.to_deferred_array(v) - kwargs[key] = v + for (k, v) in kwargs.items(): + if k in keys and v is not None: + kwargs[k] = self.runtime.to_deferred_array(v) return func(*args, **kwargs) @@ -350,7 +365,7 @@ def conj(self) -> NumPyThunk: return result # Copy source array to the destination array - @auto_convert([1]) + @auto_convert("rhs") def copy(self, rhs: Any, deep: bool = False) -> None: if self.scalar and rhs.scalar: self.base.set_storage(rhs.base.storage) @@ -858,7 +873,7 @@ def get_item(self, key: Any) -> NumPyThunk: return result - @auto_convert([2]) + @auto_convert("rhs") def set_item(self, key: Any, rhs: Any) -> None: assert self.dtype == rhs.dtype # Check to see if this is advanced indexing or not @@ -1178,7 +1193,7 @@ def swapaxes(self, axis1: int, axis2: int) -> DeferredArray: return result # Convert the source array to the destination array - @auto_convert([1]) + @auto_convert("rhs") def convert( self, rhs: Any, @@ -1214,7 +1229,7 @@ def convert( if temporary: lhs.set_linear() - @auto_convert([1, 2]) + @auto_convert("v", "lhs") def convolve(self, v: Any, lhs: Any, mode: ConvolveMode) -> None: input = self.base filter = v.base @@ -1249,7 +1264,7 @@ def convolve(self, v: Any, lhs: Any, mode: ConvolveMode) -> None: task.execute() - @auto_convert([1]) + @auto_convert("rhs") def fft( self, rhs: Any, @@ -1327,7 +1342,7 @@ def fill(self, numpy_array: Any) -> None: ) self._fill(store) - @auto_convert([2, 4]) + @auto_convert("rhs1_thunk", "rhs2_thunk") def contract( self, lhs_modes: list[str], @@ -1595,7 +1610,7 @@ def choose(self, rhs: Any, *args: Any) -> None: task.execute() # Create or extract a diagonal from a matrix - @auto_convert([1]) + @auto_convert("rhs") def _diag_helper( self, rhs: Any, @@ -1712,7 +1727,7 @@ def create_scalar(value: Any, dtype: np.dtype[Any]) -> Any: task.execute() # Tile the src array onto the destination array - @auto_convert([1]) + @auto_convert("rhs") def tile(self, rhs: Any, reps: Union[Any, Sequence[int]]) -> None: src_array = rhs dst_array = self @@ -1739,7 +1754,7 @@ def transpose( result = DeferredArray(self.runtime, result, self.dtype) return result - @auto_convert([1]) + @auto_convert("rhs") def trilu(self, rhs: Any, k: int, lower: bool) -> None: lhs = self.base rhs = rhs._broadcast(lhs.shape) @@ -1780,7 +1795,7 @@ def repeat( task.execute() return out - @auto_convert([1]) + @auto_convert("rhs") def flip(self, rhs: Any, axes: Union[None, int, tuple[int, ...]]) -> None: input = rhs.base output = self.base @@ -1801,7 +1816,7 @@ def flip(self, rhs: Any, axes: Union[None, int, tuple[int, ...]]) -> None: task.execute() # Perform a bin count operation on the array - @auto_convert([1], ["weights"]) + @auto_convert("rhs", "weights") def bincount(self, rhs: Any, weights: Optional[NumPyThunk] = None) -> None: weight_array = weights src_array = rhs @@ -2872,7 +2887,7 @@ def random_integer( self.random(RandGenCode.INTEGER, [low, high]) # Perform the unary operation and put the result in the array - @auto_convert([2]) + @auto_convert("src") def unary_op( self, op: UnaryOpCode, @@ -2901,7 +2916,7 @@ def unary_op( # Perform a unary reduction operation from one set of dimensions down to # fewer - @auto_convert([2]) + @auto_convert("src") def unary_reduction( self, op: UnaryRedCode, @@ -3017,7 +3032,7 @@ def isclose( self.binary_op(BinaryOpCode.ISCLOSE, rhs1, rhs2, True, args) # Perform the binary operation and put the result in the lhs array - @auto_convert([2, 3]) + @auto_convert("src1", "src2") def binary_op( self, op_code: BinaryOpCode, @@ -3043,7 +3058,7 @@ def binary_op( task.execute() - @auto_convert([2, 3]) + @auto_convert("src1", "src2") def binary_reduction( self, op: BinaryOpCode, @@ -3079,7 +3094,7 @@ def binary_reduction( task.execute() - @auto_convert([1, 2, 3]) + @auto_convert("src1", "src2", "src3") def where(self, src1: Any, src2: Any, src3: Any) -> None: lhs = self.base rhs1 = src1._broadcast(lhs.shape) @@ -3138,15 +3153,15 @@ def compute_strides(shape: NdShape) -> tuple[int, ...]: stride *= dim return result - @auto_convert([1]) + @auto_convert("src") def cholesky(self, src: Any, no_tril: bool = False) -> None: cholesky(self, src, no_tril) - @auto_convert([1, 2]) + @auto_convert("a", "b") def solve(self, a: Any, b: Any) -> None: solve(self, a, b) - @auto_convert([2]) + @auto_convert("rhs") def scan( self, op: int, @@ -3223,7 +3238,7 @@ def unique(self) -> NumPyThunk: return result - @auto_convert([1, 2]) + @auto_convert("rhs", "v") def searchsorted(self, rhs: Any, v: Any, side: SortSide = "left") -> None: task = self.context.create_task(CuNumericOpCode.SEARCHSORTED) @@ -3249,7 +3264,7 @@ def searchsorted(self, rhs: Any, v: Any, side: SortSide = "left") -> None: task.add_scalar_arg(rhs.size, ty.int64) task.execute() - @auto_convert([1]) + @auto_convert("rhs") def sort( self, rhs: Any, @@ -3274,7 +3289,7 @@ def sort( sort(self, rhs, argsort, axis, stable) - @auto_convert([1]) + @auto_convert("rhs") def partition( self, rhs: Any, @@ -3305,7 +3320,7 @@ def create_window(self, op_code: WindowOpCode, M: int, *args: Any) -> None: task.add_scalar_arg(arg, ty.float64) task.execute() - @auto_convert([1]) + @auto_convert("src") def packbits( self, src: Any, axis: Union[int, None], bitorder: BitOrder ) -> None: @@ -3321,7 +3336,7 @@ def packbits( task.add_constraint(p_in <= p_out * scale) # type: ignore task.execute() - @auto_convert([1]) + @auto_convert("src") def unpackbits( self, src: Any, axis: Union[int, None], bitorder: BitOrder ) -> None: @@ -3337,7 +3352,7 @@ def unpackbits( task.add_constraint(p_out <= p_in * scale) # type: ignore task.execute() - @auto_convert([1]) + @auto_convert("src") def _wrap(self, src: Any, new_len: int) -> None: if src.base.kind == Future or src.base.transformed: src = src._convert_future_to_regionfield() From 51b027ea958d05e3d2c84bcefded5a7a2a1c6392 Mon Sep 17 00:00:00 2001 From: Jeremy Date: Thu, 13 Oct 2022 06:57:57 -0700 Subject: [PATCH 12/89] force positive values for log and sqrt tests (#580) --- tests/integration/test_unary_ufunc.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/integration/test_unary_ufunc.py b/tests/integration/test_unary_ufunc.py index ae624850f..5f43fbf37 100644 --- a/tests/integration/test_unary_ufunc.py +++ b/tests/integration/test_unary_ufunc.py @@ -215,7 +215,7 @@ def test_log_ops(op): check_op_input(op, astype="F", out_dtype="D") check_op_input(op, randint=True, a_min=3, a_max=10) - check_op_input(op, shape=(1,), offset=3) + check_op_input(op, shape=(1,), a_min=0.1, offset=3) even_root_ops = ("sqrt",) @@ -231,7 +231,7 @@ def test_even_root_ops(op): # Complex inputs can be negative check_op_input(op, astype="F", out_dtype="D") check_op_input(op, randint=True, a_min=3, a_max=10) - check_op_input(op, shape=(1,), offset=3) + check_op_input(op, shape=(1,), a_min=0.1, offset=3) odd_root_ops = ("cbrt",) From 1e16e9c5c3ca5afed17bcdcd4d59de3e8519f9d2 Mon Sep 17 00:00:00 2001 From: Manolis Papadakis Date: Fri, 14 Oct 2022 09:03:36 -0700 Subject: [PATCH 13/89] Use right type in shmem calculation for kernels using reduce_output (#659) Co-authored-by: Manolis Papadakis --- src/cunumeric/index/advanced_indexing.cu | 2 +- src/cunumeric/index/repeat.cu | 2 +- src/cunumeric/search/nonzero.cuh | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/cunumeric/index/advanced_indexing.cu b/src/cunumeric/index/advanced_indexing.cu index fde5590fd..a7d3f2f94 100644 --- a/src/cunumeric/index/advanced_indexing.cu +++ b/src/cunumeric/index/advanced_indexing.cu @@ -94,7 +94,7 @@ struct AdvancedIndexingImplBody { const size_t blocks = (volume + THREADS_PER_BLOCK - 1) / THREADS_PER_BLOCK; - size_t shmem_size = THREADS_PER_BLOCK / 32 * sizeof(int64_t); + size_t shmem_size = THREADS_PER_BLOCK / 32 * sizeof(uint64_t); if (blocks >= MAX_REDUCTION_CTAS) { const size_t iters = (blocks + MAX_REDUCTION_CTAS - 1) / MAX_REDUCTION_CTAS; diff --git a/src/cunumeric/index/repeat.cu b/src/cunumeric/index/repeat.cu index 30f0c2aff..1b658874a 100644 --- a/src/cunumeric/index/repeat.cu +++ b/src/cunumeric/index/repeat.cu @@ -139,7 +139,7 @@ struct RepeatImplBody { DeviceScalarReductionBuffer> sum(stream); const size_t blocks_count = (extent + THREADS_PER_BLOCK - 1) / THREADS_PER_BLOCK; - const size_t shmem_size = THREADS_PER_BLOCK / 32 * sizeof(int64_t); + const size_t shmem_size = THREADS_PER_BLOCK / 32 * sizeof(uint64_t); if (blocks_count > MAX_REDUCTION_CTAS) { const size_t iters = (blocks_count + MAX_REDUCTION_CTAS - 1) / MAX_REDUCTION_CTAS; diff --git a/src/cunumeric/search/nonzero.cuh b/src/cunumeric/search/nonzero.cuh index 1b777b34c..e9af92578 100644 --- a/src/cunumeric/search/nonzero.cuh +++ b/src/cunumeric/search/nonzero.cuh @@ -63,7 +63,7 @@ int64_t compute_offsets(const AccessorRO& in, DeviceScalarReductionBuffer> size(stream); const size_t blocks = (volume + THREADS_PER_BLOCK - 1) / THREADS_PER_BLOCK; - size_t shmem_size = THREADS_PER_BLOCK / 32 * sizeof(int64_t); + size_t shmem_size = THREADS_PER_BLOCK / 32 * sizeof(uint64_t); if (blocks >= MAX_REDUCTION_CTAS) { const size_t iters = (blocks + MAX_REDUCTION_CTAS - 1) / MAX_REDUCTION_CTAS; From 7a781331bba431cfadd3f5151ed253e7a1d3459c Mon Sep 17 00:00:00 2001 From: robinw0928 <104830875+robinw0928@users.noreply.github.com> Date: Mon, 17 Oct 2022 13:04:23 +0800 Subject: [PATCH 14/89] Enhance test_tri*.py. (#658) --- tests/integration/test_tri.py | 139 +++++++++++++-- tests/integration/test_trilu.py | 90 ++++++---- tests/integration/test_trilu_indices.py | 226 ++++++++++++++++++++---- tests/integration/utils/utils.py | 2 +- 4 files changed, 368 insertions(+), 89 deletions(-) diff --git a/tests/integration/test_tri.py b/tests/integration/test_tri.py index 13b9db665..127180064 100644 --- a/tests/integration/test_tri.py +++ b/tests/integration/test_tri.py @@ -15,36 +15,137 @@ import numpy as np import pytest +from utils.utils import check_module_function import cunumeric as num -KS = [0, -1, 1, -2, 2] +KS = (0, -1, 1, -2, 2) +N = 100 -def _test(func, k): - num_f = getattr(num, func) - np_f = getattr(np, func) +@pytest.mark.parametrize("n", (0, 1, N), ids=lambda n: f"(n={n})") +def test_tri_n(n): + print_msg = f"np & cunumeric.tri({n})" + check_module_function("tri", [n], {}, print_msg) - a = num_f(100, k=k) - an = np_f(100, k=k) - assert num.array_equal(a, an) - a = num_f(100, 50, k=k) - an = np_f(100, 50, k=k) - assert num.array_equal(a, an) +@pytest.mark.parametrize("k", KS + (-N, N), ids=lambda k: f"(k={k})") +@pytest.mark.parametrize("m", (1, 10, N), ids=lambda m: f"(M={m})") +@pytest.mark.parametrize("n", (1, N), ids=lambda n: f"(n={n})") +def test_tri_full(n, m, k): + print_msg = f"np & cunumeric.tri({n}, k={k}, M={m})" + check_module_function("tri", [n], {"k": k, "M": m}, print_msg) - a = num_f(100, k=k, dtype=int) - an = np_f(100, k=k, dtype=int) - assert num.array_equal(a, an) - a = num_f(100, k=k, dtype=bool) - an = np_f(100, k=k, dtype=bool) - assert num.array_equal(a, an) +@pytest.mark.parametrize("m", (0, None), ids=lambda m: f"(M={m})") +def test_tri_m(m): + print_msg = f"np & cunumeric.tri({N}, M={m})" + check_module_function("tri", [N], {"M": m}, print_msg) -@pytest.mark.parametrize("k", KS, ids=lambda k: f"(k={k})") -def test_tri(k): - _test("tri", k) +DTYPES = ( + int, + float, + bool, + pytest.param(None, marks=pytest.mark.xfail), +) + + +@pytest.mark.parametrize("dtype", DTYPES, ids=str) +def test_tri_dtype(dtype): + # cuNumeric: returns an array with dtype=int + # Numpy: returns an array with dtype=float + print_msg = f"np & cunumeric.tri({N}, dtype={dtype})" + check_module_function("tri", [N], {"dtype": dtype}, print_msg) + + +@pytest.mark.xfail +@pytest.mark.parametrize("k", (-10.5, 0.0, 10.5), ids=lambda k: f"(k={k})") +def test_tri_float_k(k): + # cuNumeric: struct.error: required argument is not an integer + # Numpy: pass + print_msg = f"np & cunumeric.tri({N}, k={k})" + check_module_function("tri", [N], {"k": k}, print_msg) + + +class TestTriErrors: + def test_negative_n(self): + with pytest.raises(ValueError): + num.tri(-100) + + @pytest.mark.xfail + def test_negative_n_DIVERGENCE(self): + # np.tri(-100) returns empty array + # num.tri(-100) raises ValueError + n = -100 + np_res = np.tri(n) + num_res = num.tri(n) + assert np.array_equal(np_res, num_res) + + @pytest.mark.parametrize("n", (-10.5, 0.0, 10.5)) + def test_float_n(self, n): + msg = "expected a sequence of integers or a single integer" + with pytest.raises(TypeError, match=msg): + num.tri(n) + + @pytest.mark.xfail + @pytest.mark.parametrize("n", (-10.5, 0.0, 10.5)) + def test_float_n_DIVERGENCE(self, n): + # np.tri(-10.5) returns empty array + # np.tri(0.0) returns empty array + # np.tri(10.5) returns array + # num.tri(-10.5) raises TypeError + # num.tri(0.0) raises TypeError + # num.tri(10.5) raises TypeError + np_res = np.tri(n) + num_res = num.tri(n) + assert np.array_equal(np_res, num_res) + + def test_negative_m(self): + with pytest.raises(ValueError): + num.tri(N, M=-10) + + @pytest.mark.xfail + def test_negative_m_DIVERGENCE(self): + # np.tri(100, M=-10) returns empty array + # num.tri(100, M=-10) raises ValueError + m = -10 + np_res = np.tri(N, M=m) + num_res = num.tri(N, M=m) + assert np.array_equal(np_res, num_res) + + @pytest.mark.parametrize("m", (-10.5, 0.0, 10.5)) + def test_float_m(self, m): + msg = "expected a sequence of integers or a single integer" + with pytest.raises(TypeError, match=msg): + num.tri(N, M=m) + + @pytest.mark.xfail + @pytest.mark.parametrize("m", (-10.5, 0.0, 10.5)) + def test_float_m_DIVERGENCE(self, m): + # np.tri(100, M=-10.5) returns empty array + # np.tri(100, M=0.0) returns empty array + # np.tri(100, M=10.5) returns array + # num.tri(100, M=-10.5) raises TypeError + # num.tri(100, M=0.0) raises TypeError + # num.tri(100, M=10.5) raises TypeError + np_res = np.tri(N, M=m) + num_res = num.tri(N, M=m) + assert np.array_equal(np_res, num_res) + + def test_n_none(self): + msg = "expected a sequence of integers or a single integer" + with pytest.raises(TypeError, match=msg): + num.tri(None) + + @pytest.mark.xfail + def test_k_none(self): + # In cuNumeric, it raises struct.error, + # msg is required argument is not an integer + # In Numpy, it raises TypeError, + # msg is bad operand type for unary -: 'NoneType' + with pytest.raises(TypeError): + num.tri(N, k=None) if __name__ == "__main__": diff --git a/tests/integration/test_trilu.py b/tests/integration/test_trilu.py index 395d2dd4c..80e9ae7d8 100644 --- a/tests/integration/test_trilu.py +++ b/tests/integration/test_trilu.py @@ -18,51 +18,73 @@ import cunumeric as num -KS = [0, -1, 1, -2, 2] - -a = num.array( - [ - [1, 2, 3, 4], - [5, 6, 7, 8], - [9, 10, 11, 12], - [13, 14, 15, 16], - [17, 18, 19, 20], - ] -) - -anp = np.array( - [ - [1, 2, 3, 4], - [5, 6, 7, 8], - [9, 10, 11, 12], - [13, 14, 15, 16], - [17, 18, 19, 20], - ] -) +KS = (0, -1, 1, -2, 2) +FUNCTIONS = ("tril", "triu") -@pytest.mark.parametrize("k", KS, ids=lambda k: f"(k={k})") -@pytest.mark.parametrize("func", ("tril", "triu")) -def test_full(func, k): +def _test(func, anp, a, k): num_f = getattr(num, func) np_f = getattr(np, func) b = num_f(a, k=k) - bn = np_f(anp, k=k) + bnp = np_f(anp, k=k) - assert num.array_equal(b, bn) + assert num.array_equal(b, bnp) -@pytest.mark.parametrize("k", KS, ids=lambda k: f"(k={k})") -@pytest.mark.parametrize("func", ("tril", "triu")) -def test_slice(func, k): - num_f = getattr(num, func) - np_f = getattr(np, func) +ARRAY_SHAPE = ( + (0,), + (1,), + (10,), + (1, 10), + (10, 10), + (1, 1, 10), + (1, 10, 10), + (10, 10, 10), +) - b = num_f(a[0, :], k=k) - bn = np_f(anp[0, :], k=k) - assert num.array_equal(b, bn) +@pytest.mark.parametrize("k", KS + (-10, 10), ids=lambda k: f"(k={k})") +@pytest.mark.parametrize("dtype", (int, float), ids=str) +@pytest.mark.parametrize( + "shape", ARRAY_SHAPE, ids=lambda shape: f"(shape={shape})" +) +@pytest.mark.parametrize("func", FUNCTIONS) +def test_trilu(func, shape, dtype, k): + anp = np.ones(shape, dtype=dtype) + a = num.ones(shape, dtype=dtype) + + _test(func, anp, a, k) + + +@pytest.mark.xfail +@pytest.mark.parametrize("k", (-2.5, 0.0, 2.5), ids=lambda k: f"(k={k})") +@pytest.mark.parametrize("func", FUNCTIONS) +def test_trilu_float_k(func, k): + # cuNumeric: struct.error: required argument is not an integer + # Numpy: pass + shape = (10, 10) + anp = np.ones(shape) + a = num.ones(shape) + + _test(func, anp, a, k) + + +class TestTriluErrors: + def test_arr_none(self): + msg = "'NoneType' object has no attribute 'ndim'" + with pytest.raises(AttributeError, match=msg): + num.tril(None) + + @pytest.mark.xfail + def test_k_none(self): + # In cuNumeric, it raises struct.error, + # msg is required argument is not an integer + # In Numpy, it raises TypeError, + # msg is bad operand type for unary -: 'NoneType' + a = num.ones((3, 3)) + with pytest.raises(TypeError): + num.tril(a, k=None) if __name__ == "__main__": diff --git a/tests/integration/test_trilu_indices.py b/tests/integration/test_trilu_indices.py index d25e4e718..6962d283a 100644 --- a/tests/integration/test_trilu_indices.py +++ b/tests/integration/test_trilu_indices.py @@ -15,54 +15,210 @@ import numpy as np import pytest +from utils.utils import check_module_function import cunumeric as num -KS = [0, -1, 1, -2, 2] +KS = (0, -1, 1, -2, 2) +FUNCTIONS_INDICES = ("tril_indices", "triu_indices") +FUNCTIONS_INDICES_FROM = ("tril_indices_from", "triu_indices_from") +N = 100 -def _test(func, k): +def _test_from(func, shape, k): num_f = getattr(num, func) np_f = getattr(np, func) - - a = num_f(100, k=k) - an = np_f(100, k=k) - assert num.array_equal(a, an) - - a = num_f(100, k=k, m=30) - an = np_f(100, k=k, m=30) - assert num.array_equal(a, an) - - -def _test_from(func, k): - num_f = getattr(num, func) - np_f = getattr(np, func) - a = num.ones((70, 40), dtype=int) - an = np.ones((70, 40), dtype=int) + a = num.ones(shape, dtype=int) + an = np.ones(shape, dtype=int) b = num_f(a, k=k) bn = np_f(an, k=k) assert num.array_equal(b, bn) -@pytest.mark.parametrize("k", KS, ids=lambda k: f"(k={k})") -def test_tril_indices_from(k): - _test_from("tril_indices_from", k) - - -@pytest.mark.parametrize("k", KS, ids=lambda k: f"(k={k})") -def test_triu_indices_from(k): - _test_from("triu_indices_from", k) - - -@pytest.mark.parametrize("k", KS, ids=lambda k: f"(k={k})") -def test_tril_indices(k): - _test("tril_indices", k) - - -@pytest.mark.parametrize("k", KS, ids=lambda k: f"(k={k})") -def test_triu_indices(k): - _test("triu_indices", k) +@pytest.mark.parametrize("n", (0, 1, 100), ids=lambda n: f"(n={n})") +@pytest.mark.parametrize("func", FUNCTIONS_INDICES) +def test_trilu_indices_default(func, n): + print_msg = f"np & cunumeric.{func}({n})" + check_module_function(func, [n], {}, print_msg) + + +@pytest.mark.parametrize("k", KS + (-N, N), ids=lambda k: f"(k={k})") +@pytest.mark.parametrize("m", (1, 10, N), ids=lambda m: f"(m={m})") +@pytest.mark.parametrize("n", (1, N), ids=lambda n: f"(n={n})") +@pytest.mark.parametrize("func", FUNCTIONS_INDICES) +def test_trilu_indices_full(func, n, m, k): + print_msg = f"np & cunumeric.{func}({n}, k={k}, m={m})" + check_module_function(func, [n], {"k": k, "m": m}, print_msg) + + +@pytest.mark.parametrize("m", (0, None), ids=lambda m: f"(m={m})") +@pytest.mark.parametrize("func", FUNCTIONS_INDICES) +def test_trilu_indices_m(func, m): + print_msg = f"np & cunumeric.{func}({N}, m={m})" + check_module_function(func, [N], {"m": m}, print_msg) + + +@pytest.mark.xfail +@pytest.mark.parametrize("k", (-10.5, 0.0, 10.5), ids=lambda k: f"(k={k})") +@pytest.mark.parametrize("func", FUNCTIONS_INDICES) +def test_trilu_indices_float_k(func, k): + # cuNumeric: struct.error: required argument is not an integer + # Numpy: pass + print_msg = f"np & cunumeric.{func}({N}, k={k})" + check_module_function(func, [N], {"k": k}, print_msg) + + +class TestTriluIndicesErrors: + def test_negative_n(self): + with pytest.raises(ValueError): + num.tril_indices(-100) + + @pytest.mark.xfail + def test_negative_n_DIVERGENCE(self): + # np.tril_indices(-100) returns empty array, dtype=int64 + # num.tril_indices(-100) raises ValueError + n = -100 + np_res = np.tril_indices(n) + num_res = num.tril_indices(n) + assert np.array_equal(np_res, num_res) + + @pytest.mark.parametrize("n", (-10.5, 0.0, 10.5)) + def test_float_n(self, n): + msg = "expected a sequence of integers or a single integer" + with pytest.raises(TypeError, match=msg): + num.tril_indices(n) + + @pytest.mark.xfail + @pytest.mark.parametrize("n", (-10.5, 0.0, 10.5)) + def test_float_n_DIVERGENCE(self, n): + # np.tril_indices(-10.5) returns empty array, dtype=int64 + # np.tril_indices(0.0) returns empty array, dtype=int64 + # np.tril_indices(10.5) returns array, dtype=int64 + # num.tril_indices(-10.5) raises TypeError + # num.tril_indices(0.0) raises TypeError + # num.tril_indices(10.5) raises TypeError + np_res = np.tril_indices(n) + num_res = num.tril_indices(n) + assert np.array_equal(np_res, num_res) + + def test_negative_m(self): + with pytest.raises(ValueError): + num.tril_indices(N, m=-10) + + @pytest.mark.xfail + def test_negative_m_DIVERGENCE(self): + # np.tril_indices(100, m=-10) returns empty array, dtype=int64 + # num.tril_indices(100, m=-10) raises ValueError + m = -10 + np_res = np.tril_indices(N, m=m) + num_res = num.tril_indices(N, m=m) + assert np.array_equal(np_res, num_res) + + @pytest.mark.parametrize("m", (-10.5, 0.0, 10.5)) + def test_float_m(self, m): + msg = "expected a sequence of integers or a single integer" + with pytest.raises(TypeError, match=msg): + num.tril_indices(N, m=m) + + @pytest.mark.xfail + @pytest.mark.parametrize("m", (-10.5, 0.0, 10.5)) + def test_float_m_DIVERGENCE(self, m): + # np.tril_indices(100, m=-10.5) returns empty array, dtype=int64 + # np.tril_indices(100, m=0.0) returns empty array, dtype=int64 + # np.tril_indices(100, m=10.5) returns array, dtype=int64 + # num.tril_indices(100, m=-10.5) raises TypeError + # num.tril_indices(100, m=0.0) raises TypeError + # num.tril_indices(100, m=10.5) raises TypeError + np_res = np.tril_indices(N, m=m) + num_res = num.tril_indices(N, m=m) + assert np.array_equal(np_res, num_res) + + def test_n_none(self): + msg = "expected a sequence of integers or a single integer" + with pytest.raises(TypeError, match=msg): + num.tril_indices(None) + + @pytest.mark.xfail + def test_k_none(self): + # In cuNumeric, it raises struct.error, + # msg is required argument is not an integer + # In Numpy, it raises TypeError, + # msg is bad operand type for unary -: 'NoneType' + with pytest.raises(TypeError): + num.tril_indices(N, k=None) + + +ARRAY_SHAPE = ( + (1, 1), + (1, N), + (10, N), + (N, N), + (N, 10), + (N, 1), +) + + +@pytest.mark.parametrize("k", KS + (-N, N), ids=lambda k: f"(k={k})") +@pytest.mark.parametrize( + "shape", ARRAY_SHAPE, ids=lambda shape: f"(shape={shape})" +) +@pytest.mark.parametrize("func", FUNCTIONS_INDICES_FROM) +def test_trilu_indices_from(func, shape, k): + _test_from(func, shape, k) + + +@pytest.mark.parametrize( + "shape", ((10, 0), (0, 10), (0, 0)), ids=lambda shape: f"(shape={shape})" +) +@pytest.mark.parametrize("func", FUNCTIONS_INDICES_FROM) +def test_trilu_indices_from_empty_array(func, shape): + k = 0 + _test_from(func, shape, k) + + +@pytest.mark.xfail +@pytest.mark.parametrize("k", (-10.5, 0.0, 10.5), ids=lambda k: f"(k={k})") +@pytest.mark.parametrize("func", FUNCTIONS_INDICES_FROM) +def test_trilu_indices_from_float_k(func, k): + # cuNumeric: struct.error: required argument is not an integer + # Numpy: pass + shape = (10, 10) + _test_from(func, shape, k) + + +class TestTriluIndicesFromErrors: + @pytest.mark.parametrize("size", ((5,), (0,)), ids=str) + @pytest.mark.parametrize( + "dimension", (1, 3), ids=lambda dimension: f"(dim={dimension})" + ) + def test_arr_non_2d(self, dimension, size): + shape = size * dimension + a = num.ones(shape, dtype=int) + msg = "input array must be 2-d" + with pytest.raises(ValueError, match=msg): + num.tril_indices_from(a) + + def test_arr_0d(self): + a = num.array(3) + msg = "input array must be 2-d" + with pytest.raises(ValueError, match=msg): + num.tril_indices_from(a) + + def test_arr_none(self): + msg = "'NoneType' object has no attribute 'ndim'" + with pytest.raises(AttributeError, match=msg): + num.tril_indices_from(None) + + @pytest.mark.xfail + def test_k_none(self): + # In cuNumeric, it raises struct.error, + # msg is required argument is not an integer + # In Numpy, it raises TypeError, + # msg is bad operand type for unary -: 'NoneType' + a = num.ones((3, 3)) + with pytest.raises(TypeError): + num.tril_indices_from(a, k=None) if __name__ == "__main__": diff --git a/tests/integration/utils/utils.py b/tests/integration/utils/utils.py index 505249321..892154d45 100644 --- a/tests/integration/utils/utils.py +++ b/tests/integration/utils/utils.py @@ -39,7 +39,7 @@ def compare_array_and_print_results(a, b, print_msg, check_type=True): """ Compare two arrays and print results. """ - if isinstance(a, list): + if isinstance(a, list) or isinstance(a, tuple): is_equal, err_arr = compare_array(a, b, check_type=False) assert is_equal, ( f"Failed, {print_msg}\n" From bf5b7f279c281ebd1c3c0f2ce1509cd959447a60 Mon Sep 17 00:00:00 2001 From: Bryan Van de Ven Date: Tue, 18 Oct 2022 09:26:08 -0700 Subject: [PATCH 15/89] Remove --install-dir option (#656) * remove --install-dir option * remove --with-core as well * remove legate-url and legate-branch * suggestions --- install.py | 63 +++++++++++------------------------------------------- 1 file changed, 12 insertions(+), 51 deletions(-) diff --git a/install.py b/install.py index febf25b8b..c6ee2d80d 100755 --- a/install.py +++ b/install.py @@ -139,10 +139,6 @@ def install_cunumeric( gasnet_dir, networks, hdf, - install_dir, - legate_branch, - legate_dir, - legate_url, llvm, march, maxdim, @@ -187,10 +183,6 @@ def install_cunumeric( print("gasnet_dir: ", gasnet_dir) print("networks: ", networks) print("hdf: ", hdf) - print("install_dir: ", install_dir) - print("legate_branch: ", legate_branch) - print("legate_dir: ", legate_dir) - print("legate_url: ", legate_url) print("llvm: ", llvm) print("march: ", march) print("maxdim: ", maxdim) @@ -226,20 +218,21 @@ def validate_path(path): cuda_dir = validate_path(cuda_dir) nccl_dir = validate_path(nccl_dir) tblis_dir = validate_path(tblis_dir) - legate_dir = validate_path(legate_dir) thrust_dir = validate_path(thrust_dir) curand_dir = validate_path(curand_dir) gasnet_dir = validate_path(gasnet_dir) cutensor_dir = validate_path(cutensor_dir) openblas_dir = validate_path(openblas_dir) - if legate_dir is None: - try: - import legate.install_info as lg_install_info + try: + import legate.install_info as lg_install_info + except ImportError: + raise RuntimeError( + "Cannot determine Legate install directory. Please make sure " + "legate.core is installed in the current Python environment." + ) - legate_dir = dirname(lg_install_info.libpath) - except Exception: - pass + legate_dir = dirname(lg_install_info.libpath) if verbose: print("cuda_dir: ", cuda_dir) @@ -274,6 +267,8 @@ def validate_path(path): pip_install_cmd = [sys.executable, "-m", "pip", "install"] cmd_env = dict(os.environ.items()) + install_dir = None + if unknown is not None: try: prefix_loc = unknown.index("--prefix") @@ -350,12 +345,8 @@ def validate_path(path): # A custom path to cuRAND is ignored when CUDA support is available if cuda and curand_dir is not None: cmake_flags += ["-Dcunumeric_cuRAND_INCLUDE_DIR=%s" % curand_dir] - if legate_dir: - cmake_flags += ["-Dlegate_core_ROOT=%s" % legate_dir] - if legate_url: - cmake_flags += ["-Dcunumeric_LEGATE_CORE_REPOSITORY=%s" % legate_url] - if legate_branch: - cmake_flags += ["-Dcunumeric_LEGATE_CORE_BRANCH=%s" % legate_branch] + + cmake_flags += ["-Dlegate_core_ROOT=%s" % legate_dir] cmake_flags += extra_flags cmd_env.update( @@ -370,14 +361,6 @@ def validate_path(path): def driver(): parser = argparse.ArgumentParser(description="Install cuNumeric.") - parser.add_argument( - "--install-dir", - dest="install_dir", - metavar="DIR", - required=False, - default=None, - help="Path to install cuNumeric software", - ) parser.add_argument( "--debug", dest="debug", @@ -434,28 +417,6 @@ def driver(): default=os.environ.get("GASNET"), help="Path to GASNet installation directory.", ) - parser.add_argument( - "--with-core", - dest="legate_dir", - metavar="DIR", - required=False, - default=os.environ.get("LEGATE_DIR"), - help="Path to Legate Core installation directory.", - ) - parser.add_argument( - "--legate-url", - dest="legate_url", - required=False, - default="https://github.com/nv-legate/legate.core.git", - help="Legate git URL to build cuNumeric with.", - ) - parser.add_argument( - "--legate-branch", - dest="legate_branch", - required=False, - default="branch-22.10", - help="Legate branch to build cuNumeric with.", - ) parser.add_argument( "--with-openblas", dest="openblas_dir", From 81b6ac3c86d6c8c8375bb3d7cbffb9cec47663cf Mon Sep 17 00:00:00 2001 From: Mark Vaz Date: Wed, 19 Oct 2022 09:09:59 +1100 Subject: [PATCH 16/89] Fix missing legate-core run requirement (#661) --- conda/conda-build/meta.yaml | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/conda/conda-build/meta.yaml b/conda/conda-build/meta.yaml index 4adf59927..7a274a1d3 100644 --- a/conda/conda-build/meta.yaml +++ b/conda/conda-build/meta.yaml @@ -132,7 +132,10 @@ requirements: run: - numpy {{ numpy_version }} - libopenblas =* =*openmp* -{% if gpu_enabled_bool %} +{% if not gpu_enabled_bool %} + - legate-core ={{ core_version }} =*_cpu +{% else %} + - legate-core ={{ core_version }} - cuda-cudart >={{ cuda_version }} # - libcutensor >=1.3 - cutensor >=1.3 From 29a56c31bab448c011c5a1c8b2b4c6f5500c74fe Mon Sep 17 00:00:00 2001 From: xialu00 <110973296+xialu00@users.noreply.github.com> Date: Wed, 19 Oct 2022 10:12:54 +0800 Subject: [PATCH 17/89] add test cases for test_tile.py and test_repeat.py (#657) * add test cases for test_tile.py and test_repeat.py * fix bug * fix bug --- cunumeric/module.py | 34 +++++- tests/integration/test_repeat.py | 188 +++++++++++++++++++++++++++---- tests/integration/test_tile.py | 81 +++++++------ 3 files changed, 245 insertions(+), 58 deletions(-) diff --git a/cunumeric/module.py b/cunumeric/module.py index 7a3024e55..69647b3cb 100644 --- a/cunumeric/module.py +++ b/cunumeric/module.py @@ -2327,17 +2327,44 @@ def repeat(a: ndarray, repeats: Any, axis: Optional[int] = None) -> ndarray: Multiple GPUs, Multiple CPUs """ + if repeats is None: + raise TypeError( + "int() argument must be a string, a bytes-like object or a number," + " not 'NoneType'" + ) + + if np.ndim(repeats) > 1: + raise ValueError("`repeats` should be scalar or 1D array") + + # axes should be integer type + if axis is not None and not isinstance(axis, int): + raise TypeError("Axis should be integer type") + # when array is a scalar if np.ndim(a) == 0: + if axis is not None and axis != 0: + raise np.AxisError("axis is out of bounds for array of dimension") if np.ndim(repeats) == 0: + if not isinstance(repeats, int): + runtime.warn( + "converting repeats to an integer type", + category=UserWarning, + ) + repeats = np.int64(repeats) return full((repeats,), cast(Union[int, float], a)) + elif np.ndim(repeats) == 1 and len(repeats) == 1: + if not isinstance(repeats, int): + runtime.warn( + "converting repeats to an integer type", + category=UserWarning, + ) + repeats = np.int64(repeats) + return full((repeats[0],), cast(Union[int, float], a)) else: raise ValueError( "`repeat` with a scalar parameter `a` is only " "implemented for scalar values of the parameter `repeats`." ) - if np.ndim(repeats) > 1: - raise ValueError("`repeats` should be scalar or 1D array") # array is an array array = convert_to_cunumeric_ndarray(a) @@ -2349,9 +2376,6 @@ def repeat(a: ndarray, repeats: Any, axis: Optional[int] = None) -> ndarray: array = array.ravel() axis = 0 - # axes should be integer type - if not isinstance(axis, int): - raise TypeError("Axis should be integer type") axis_int = np.int32(axis) if axis_int >= array.ndim: diff --git a/tests/integration/test_repeat.py b/tests/integration/test_repeat.py index a704d884a..1128a34a3 100644 --- a/tests/integration/test_repeat.py +++ b/tests/integration/test_repeat.py @@ -12,7 +12,6 @@ # See the License for the specific language governing permissions and # limitations under the License. # - import numpy as np import pytest from legate.core import LEGATE_MAX_DIM @@ -23,31 +22,164 @@ np.random.seed(12345) -def test_basic(): - assert np.array_equal(num.repeat(3, 4), np.repeat(3, 4)) - assert np.array_equal(num.repeat([3, 1], 4), np.repeat([3, 1], 4)) +@pytest.mark.parametrize( + "array", (None, [], 4, [2, 3], mk_seq_array(num, (3, 4, 2))) +) +def test_repeats_none(array): + with pytest.raises(TypeError): + num.repeat(array, None) + + +@pytest.mark.parametrize("repeats", (-3, [], [-3], [2, 3])) +def test_array_none_invalid(repeats): + with pytest.raises(ValueError): + num.repeat(None, repeats) + + +@pytest.mark.parametrize("repeats", (3, [0], [3], 4.7, [4.7])) +def test_array_none_valid(repeats): + res_num = num.repeat(None, repeats) + res_np = np.repeat(None, repeats) + assert np.array_equal(res_np, res_num) + + +@pytest.mark.parametrize("repeats", (-3, 0, 3, 4.7, [], [-3], [0], [3], [4.7])) +def test_array_empty_repeats_valid(repeats): + res_np = np.repeat([], repeats) + res_num = num.repeat([], repeats) + assert np.array_equal(res_np, res_num) + + +@pytest.mark.parametrize("repeats", ([3, 4], [1, 2, 3])) +def test_array_empty_repeats_invalid_negative(repeats): + # numpy raises: + # ValueError: operands could not be broadcast together with shape (0,) (2,) + # while cunumeric is pass with the result [] + res_num = num.repeat([], repeats) + assert np.array_equal(res_num, []) + + +@pytest.mark.xfail +@pytest.mark.parametrize("repeats", ([3, 4], [1, 2, 3])) +def test_array_empty_repeats_invalid(repeats): + res_np = np.repeat([], repeats) + res_num = num.repeat([], repeats) + assert np.array_equal(res_num, res_np) + + +@pytest.mark.parametrize("repeats", (-3, 0, 3, 4.7, [], [-3], [0], [3], [4.7])) +def test_array_empty_axis_valid(repeats): + res_np = np.repeat([], repeats, axis=0) + res_num = num.repeat([], repeats, axis=0) + assert np.array_equal(res_np, res_num) + + +@pytest.mark.parametrize("repeats", (-3, 0, 3, 4.7, [], [-3], [0], [3], [4.7])) +def test_array_empty_axis_invalid(repeats): + with pytest.raises(ValueError): + num.repeat([], repeats, axis=1) + + +@pytest.mark.parametrize("repeats", (-3, [-3])) +def test_array_int_repeats_negative(repeats): + with pytest.raises(ValueError): + num.repeat(3, repeats) + + +@pytest.mark.parametrize("repeats", (0, 3, 4.7, [0], [3], [4.7])) +def test_array_int_repeats_valid(repeats): + res_np = np.repeat(3, repeats) + res_num = num.repeat(3, repeats) + assert np.array_equal(res_np, res_num) + + +@pytest.mark.parametrize("repeats", ([], [1, 2])) +def test_array_int_repeats_invalid(repeats): + msg = r"scalar" + with pytest.raises(ValueError, match=msg): + num.repeat(3, repeats) + + +@pytest.mark.parametrize("repeats", (0, 3, 4.7, [0], [3], [4.7], [2, 3, 4])) +def test_array_1d_repeats_valid(repeats): + anp = np.array([1, 2, 3]) + res_np = np.repeat(anp, repeats) + res_num = num.repeat(anp, repeats) + assert np.array_equal(res_np, res_num) + +@pytest.mark.parametrize("repeats", ([], [2, 3])) +def test_array_1d_repeats_invalid(repeats): + anp = np.array([1, 2, 3]) + with pytest.raises(ValueError): + num.repeat(anp, repeats) -def test_axis(): + +@pytest.mark.parametrize("repeats", (0, [0], 3, 4.7, [3], [4.7])) +def test_array_2d_repeats_valid(repeats): + anp = np.array([[1, 3], [2, 4]]) + res_np = np.repeat(anp, repeats) + res_num = num.repeat(anp, repeats) + assert np.array_equal(res_np, res_num) + + +@pytest.mark.parametrize("repeats", ([], [2, 3])) +def test_array_2d_repeats_invalid(repeats): + anp = np.array([[1, 3], [2, 4]]) + with pytest.raises(ValueError): + num.repeat(anp, repeats) + + +@pytest.mark.skip() +@pytest.mark.parametrize("arr", ([1, 2, 3], [[1, 3], [2, 4]])) +@pytest.mark.parametrize("repeats", (-3, [-3])) +def test_array_1d_repeats_fatal_error(arr, repeats): + anp = np.array(arr) + # numpy raises "ValueError: negative dimensions are not allowed" + # while cunumeric got "Fatal Python error: Aborted" + num.repeat(anp, repeats) + + +@pytest.mark.parametrize("arr", (None, [], 3, [1, 2, 3], [[1, 3], [2, 4]])) +@pytest.mark.parametrize( + "repeats", + ([[2, 3], [3, 3]], np.random.randint(low=-10.0, high=10, size=(3, 3, 3))), +) +def test_repeats_nd(arr, repeats): + anp = np.array(arr) + msg = r"should be scalar or 1D array" + with pytest.raises(ValueError, match=msg): + num.repeat(anp, repeats) + + +@pytest.mark.parametrize(("arr", "repeats"), ((3, 3), ([1, 2, 3], [1, 2, 3]))) +@pytest.mark.parametrize("axis", ("hello", 0.9)) +def test_axis_string(arr, repeats, axis): + msg = r"integer" + with pytest.raises(TypeError, match=msg): + num.repeat(arr, repeats, axis=axis) + + +def test_array_axis_out_bound(): + anp = np.array([1, 2, 3, 4, 5]) + # np.repeat(anp, 4, 2) + # numpy.AxisError: axis 2 is out of bounds for array of dimension 1 + msg = r"dimension" + with pytest.raises(ValueError, match=msg): + num.repeat(anp, 4, 2) + + +@pytest.mark.xfail() +def test_array_axis_negative_equal(): anp = np.array([1, 2, 3, 4, 5]) - a = num.array(anp) - repnp = np.array([1, 2, 1, 2, 1]) - rep = num.array(repnp) - print(num.repeat(a, rep, axis=0)) - print(np.repeat(anp, repnp, axis=0)) - assert np.array_equal( - num.repeat(a, rep, axis=0), np.repeat(anp, repnp, axis=0) - ) - xnp = np.array([[1, 2], [3, 4]]) - x = num.array([[1, 2], [3, 4]]) - assert np.array_equal( - num.repeat(x, [1, 2], axis=0), np.repeat(xnp, [1, 2], axis=0) - ) - assert np.array_equal(num.repeat(x, 0, axis=0), np.repeat(xnp, 0, axis=0)) + res_np = np.repeat(anp, 4, -1) # [1 1 1 1 2 2 2 2 3 3 3 3 4 4 4 4 5 5 5 5] + res_num = num.repeat(anp, 4, -1) # [1 1 1 1 2] + # They have different outputs. + assert np.array_equal(res_np, res_num) @pytest.mark.parametrize("ndim", range(1, LEGATE_MAX_DIM + 1)) -def test_nd(ndim): +def test_nd_basic(ndim): a_shape = tuple(np.random.randint(1, 9) for _ in range(ndim)) np_array = mk_seq_array(np, a_shape) num_array = mk_seq_array(num, a_shape) @@ -55,10 +187,26 @@ def test_nd(ndim): res_num = num.repeat(num_array, repeats) res_np = np.repeat(np_array, repeats) assert np.array_equal(res_num, res_np) + + +@pytest.mark.parametrize("ndim", range(1, LEGATE_MAX_DIM + 1)) +def test_nd_axis(ndim): for axis in range(0, ndim): + a_shape = tuple(np.random.randint(1, 9) for _ in range(ndim)) + np_array = mk_seq_array(np, a_shape) + num_array = mk_seq_array(num, a_shape) + repeats = np.random.randint(0, 15) res_num2 = num.repeat(num_array, repeats, axis) res_np2 = np.repeat(np_array, repeats, axis) assert np.array_equal(res_num2, res_np2) + + +@pytest.mark.parametrize("ndim", range(1, LEGATE_MAX_DIM + 1)) +def test_nd_repeats(ndim): + a_shape = tuple(np.random.randint(1, 9) for _ in range(ndim)) + np_array = mk_seq_array(np, a_shape) + num_array = mk_seq_array(num, a_shape) + for axis in range(0, ndim): rep_shape = (a_shape[axis],) rep_arr_np = mk_seq_array(np, rep_shape) rep_arr_num = mk_seq_array(num, rep_shape) diff --git a/tests/integration/test_tile.py b/tests/integration/test_tile.py index d9ec3e1c7..1bfc1dcf8 100644 --- a/tests/integration/test_tile.py +++ b/tests/integration/test_tile.py @@ -12,46 +12,61 @@ # See the License for the specific language governing permissions and # limitations under the License. # +import numpy as np import pytest import cunumeric as num -def test_1d(): +def test_negative(): a = num.array([0, 1, 2]) + with pytest.raises(ValueError): + num.tile(a, -4) - b = num.tile(a, 4) - assert num.array_equal(b, [0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2]) - - c = num.tile(a, (3, 4)) - assert num.array_equal( - c, - [ - [0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2], - [0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2], - [0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2], - ], - ) - - d = num.tile(a, (3, 1, 4)) - assert num.array_equal( - d, - [ - [[0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2]], - [[0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2]], - [[0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2]], - ], - ) - - -def test_2d(): - e = num.array([[1, 2], [3, 4]]) - - f = num.tile(e, 2) - assert num.array_equal(f, [[1, 2, 1, 2], [3, 4, 3, 4]]) - - g = num.tile(e, (2, 1)) - assert num.array_equal(g, [[1, 2], [3, 4], [1, 2], [3, 4]]) + +def test_float(): + a = num.array([0, 1, 2]) + msg = r"float" + with pytest.raises(TypeError, match=msg): + num.tile(a, 2.2) + + +def test_list(): + a = num.array([0, 1, 2]) + msg = r"1d sequence" + with pytest.raises(TypeError, match=msg): + num.tile(a, [[1, 2], [3, 4]]) + + +def test_tuple(): + a = num.array([0, 1, 2]) + msg = r"1d sequence" + with pytest.raises(TypeError, match=msg): + num.tile(a, ((1, 2), (3, 4))) + + +DIM = 5 +SIZES = [ + (0,), + (1), + (0, 1), + (1, 0), + (1, 1), + (1, DIM), + (DIM, 1), + (DIM, DIM), + (1, 1, 1), + (DIM, DIM, DIM), +] + + +@pytest.mark.parametrize("size", SIZES, ids=str) +@pytest.mark.parametrize("value", (0, DIM, (DIM, DIM), (DIM, DIM, DIM))) +def test_basic(size, value): + a = np.random.randint(low=-10.0, high=10, size=size) + res_np = np.tile(a, value) + res_num = num.tile(a, value) + assert np.array_equal(res_np, res_num) if __name__ == "__main__": From 87c7d450fd69ee883d268d1df14bb4ee206b9a86 Mon Sep 17 00:00:00 2001 From: Irina Demeshko Date: Wed, 19 Oct 2022 11:28:03 -0600 Subject: [PATCH 18/89] Implementing PUT routine (#582) Adding support for PUT --- cunumeric/array.py | 82 +++++++++++++--- cunumeric/deferred.py | 69 ++++++++++++- cunumeric/eager.py | 7 ++ cunumeric/module.py | 45 +++++++-- cunumeric/thunk.py | 4 + docs/cunumeric/source/api/indexing.rst | 1 + src/cunumeric/index/wrap.cc | 16 +-- src/cunumeric/index/wrap.cu | 83 +++++++++++++--- src/cunumeric/index/wrap.h | 29 ++++++ src/cunumeric/index/wrap_omp.cc | 10 +- src/cunumeric/index/wrap_template.inl | 22 ++++- src/cunumeric/index/zip.cu | 98 ++++++++++++++---- src/cunumeric/index/zip.h | 6 ++ tests/integration/test_put.py | 131 +++++++++++++++++++++++++ 14 files changed, 536 insertions(+), 67 deletions(-) create mode 100644 tests/integration/test_put.py diff --git a/cunumeric/array.py b/cunumeric/array.py index 64784d59a..cd14eda7c 100644 --- a/cunumeric/array.py +++ b/cunumeric/array.py @@ -920,12 +920,8 @@ def _convert_key(self, key: Any, first: bool = True) -> Any: key = convert_to_cunumeric_ndarray(key) if key.dtype != bool and not np.issubdtype(key.dtype, np.integer): raise TypeError("index arrays should be int or bool type") - if key.dtype != bool and key.dtype != np.int64: - runtime.warn( - "converting index array to int64 type", - category=RuntimeWarning, - ) - key = key.astype(np.int64) + if key.dtype != bool: + key = key._warn_and_convert(np.dtype(np.int64)) return key._thunk @@ -2104,12 +2100,8 @@ def compress( raise ValueError( "Dimension mismatch: condition must be a 1D array" ) - if condition.dtype != bool: - runtime.warn( - "converting condition to bool type", - category=RuntimeWarning, - ) - condition = condition.astype(bool) + + condition = condition._warn_and_convert(np.dtype(bool)) if axis is None: axis = 0 @@ -2476,6 +2468,62 @@ def diagonal( raise ValueError("Either axis1/axis2 or axes must be supplied") return self._diag_helper(offset=offset, axes=axes, extract=extract) + @add_boilerplate("indices", "values") + def put( + self, indices: ndarray, values: ndarray, mode: str = "raise" + ) -> None: + """ + Replaces specified elements of the array with given values. + + Refer to :func:`cunumeric.put` for full documentation. + + See Also + -------- + cunumeric.put : equivalent function + + Availability + -------- + Multiple GPUs, Multiple CPUs + + """ + + if values.size == 0 or indices.size == 0 or self.size == 0: + return + + if mode not in ("raise", "wrap", "clip"): + raise ValueError( + "mode must be one of 'clip', 'raise', or 'wrap' " + f"(got {mode})" + ) + + if mode == "wrap": + indices = indices % self.size + elif mode == "clip": + indices = indices.clip(0, self.size - 1) + + indices = indices._warn_and_convert(np.dtype(np.int64)) + values = values._warn_and_convert(self.dtype) + + if indices.ndim > 1: + indices = indices.ravel() + + if self.shape == (): + if mode == "raise": + if indices.min() < -1 or indices.max() > 0: + raise ValueError("Indices out of bounds") + if values.shape == (): + v = values + else: + v = values[0] + self._thunk.copy(v._thunk, deep=False) + return + + # call _wrap on the values if they need to be wrapped + if values.ndim != indices.ndim or values.size != indices.size: + values = values._wrap(indices.size) + + self._thunk.put(indices._thunk, values._thunk) + @add_boilerplate() def trace( self, @@ -3822,6 +3870,16 @@ def _maybe_convert(self, dtype: np.dtype[Any], hints: Any) -> ndarray: copy._thunk.convert(self._thunk) return copy + def _warn_and_convert(self, dtype: np.dtype[Any]) -> ndarray: + if self.dtype != dtype: + runtime.warn( + f"converting array to {dtype} type", + category=RuntimeWarning, + ) + return self.astype(dtype) + else: + return self + # For performing normal/broadcast unary operations @classmethod def _perform_unary_op( diff --git a/cunumeric/deferred.py b/cunumeric/deferred.py index 54f481977..3bb5c4db7 100644 --- a/cunumeric/deferred.py +++ b/cunumeric/deferred.py @@ -796,10 +796,16 @@ def _broadcast(self, shape: NdShape) -> Any: return result - def _convert_future_to_regionfield(self) -> DeferredArray: + def _convert_future_to_regionfield( + self, change_shape: bool = False + ) -> DeferredArray: + if change_shape and self.shape == (): + shape: NdShape = (1,) + else: + shape = self.shape store = self.context.create_store( self.dtype, - shape=self.shape, + shape=shape, optimize_scalar=False, ) thunk_copy = DeferredArray( @@ -1679,6 +1685,60 @@ def _diag_helper( task.execute() + @auto_convert("indices", "values") + def put(self, indices: Any, values: Any) -> None: + + if indices.base.kind == Future or indices.base.transformed: + change_shape = indices.base.kind == Future + indices = indices._convert_future_to_regionfield(change_shape) + if values.base.kind == Future or values.base.transformed: + change_shape = values.base.kind == Future + values = values._convert_future_to_regionfield(change_shape) + + if self.base.kind == Future or self.base.transformed: + change_shape = self.base.kind == Future + self_tmp = self._convert_future_to_regionfield(change_shape) + else: + self_tmp = self + + assert indices.size == values.size + + # first, we create indirect array with PointN type that + # (indices.size,) shape and is used to copy data from values + # to the target ND array (self) + N = self_tmp.ndim + pointN_dtype = self.runtime.get_point_type(N) + indirect = cast( + DeferredArray, + self.runtime.create_empty_thunk( + shape=indices.shape, + dtype=pointN_dtype, + inputs=[indices], + ), + ) + + shape = self_tmp.shape + task = self.context.create_task(CuNumericOpCode.WRAP) + task.add_output(indirect.base) + task.add_scalar_arg(shape, (ty.int64,)) + task.add_scalar_arg(True, bool) # has_input + task.add_input(indices.base) + task.add_alignment(indices.base, indirect.base) + task.throws_exception(IndexError) + task.execute() + if indirect.base.kind == Future: + indirect = indirect._convert_future_to_regionfield() + + copy = self.context.create_copy() + copy.set_target_indirect_out_of_range(False) + copy.add_input(values.base) + copy.add_target_indirect(indirect.base) + copy.add_output(self_tmp.base) + copy.execute() + + if self_tmp is not self: + self.copy(self_tmp, deep=True) + # Create an identity array with the ones offset from the diagonal by k def eye(self, k: int) -> None: assert self.ndim == 2 # Only 2-D arrays should be here @@ -2896,6 +2956,7 @@ def unary_op( args: Any, multiout: Optional[Any] = None, ) -> None: + lhs = self.base rhs = src._broadcast(lhs.shape) @@ -3355,7 +3416,8 @@ def unpackbits( @auto_convert("src") def _wrap(self, src: Any, new_len: int) -> None: if src.base.kind == Future or src.base.transformed: - src = src._convert_future_to_regionfield() + change_shape = src.base.kind == Future + src = src._convert_future_to_regionfield(change_shape) # first, we create indirect array with PointN type that # (len,) shape and is used to copy data from original array @@ -3374,6 +3436,7 @@ def _wrap(self, src: Any, new_len: int) -> None: task = self.context.create_task(CuNumericOpCode.WRAP) task.add_output(indirect.base) task.add_scalar_arg(src.shape, (ty.int64,)) + task.add_scalar_arg(False, bool) # has_input task.execute() copy = self.context.create_copy() diff --git a/cunumeric/eager.py b/cunumeric/eager.py index fdb8f7989..b8cb36ecd 100644 --- a/cunumeric/eager.py +++ b/cunumeric/eager.py @@ -620,6 +620,13 @@ def _diag_helper( axes = tuple(range(ndims - naxes, ndims)) self.array = diagonal_reference(rhs.array, axes) + def put(self, indices: Any, values: Any) -> None: + self.check_eager_args(indices, values) + if self.deferred is not None: + self.deferred.put(indices, values) + else: + np.put(self.array, indices.array, values.array) + def eye(self, k: int) -> None: if self.deferred is not None: self.deferred.eye(k) diff --git a/cunumeric/module.py b/cunumeric/module.py index 69647b3cb..0a4e97a5a 100644 --- a/cunumeric/module.py +++ b/cunumeric/module.py @@ -2410,12 +2410,7 @@ def repeat(a: ndarray, repeats: Any, axis: Optional[int] = None) -> ndarray: # repeats is an array else: # repeats should be integer type - if repeats.dtype != np.int64: - runtime.warn( - "converting repeats to an integer type", - category=RuntimeWarning, - ) - repeats = repeats.astype(np.int64) + repeats = repeats._warn_and_convert(np.int64) if repeats.shape[0] != array.shape[axis]: raise ValueError("incorrect shape of repeats array") result = array._thunk.repeat( @@ -3473,6 +3468,44 @@ def diagonal( ) +@add_boilerplate("a", "indices", "values") +def put( + a: ndarray, indices: ndarray, values: ndarray, mode: str = "raise" +) -> None: + """ + Replaces specified elements of an array with given values. + The indexing works as if the target array is first flattened. + + Parameters + ---------- + a : array_like + Array to put data into + indices : array_like + Target indices, interpreted as integers. + WARNING: In case there are repeated entries in the + indices array, Legate doesn't guarantee the order in + which values are updated. + + values : array_like + Values to place in `a` at target indices. If values array is shorter + than indices, it will be repeated as necessary. + mode : {'raise', 'wrap', 'clip'}, optional + Specifies how out-of-bounds indices will behave. + 'raise' : raise an error. + 'wrap' : wrap around. + 'clip' : clip to the range. + + See Also + -------- + numpy.put + + Availability + -------- + Multiple GPUs, Multiple CPUs + """ + a.put(indices=indices, values=values, mode=mode) + + @add_boilerplate("a", "val") def fill_diagonal(a: ndarray, val: ndarray, wrap: bool = False) -> None: """ diff --git a/cunumeric/thunk.py b/cunumeric/thunk.py index bdc773aeb..e1f1dab77 100644 --- a/cunumeric/thunk.py +++ b/cunumeric/thunk.py @@ -197,6 +197,10 @@ def _diag_helper( ) -> None: ... + @abstractmethod + def put(self, indices: Any, values: Any) -> None: + ... + @abstractmethod def eye(self, k: int) -> None: ... diff --git a/docs/cunumeric/source/api/indexing.rst b/docs/cunumeric/source/api/indexing.rst index 1023ed1d4..1ace111d4 100644 --- a/docs/cunumeric/source/api/indexing.rst +++ b/docs/cunumeric/source/api/indexing.rst @@ -43,5 +43,6 @@ Inserting data into arrays :toctree: generated/ fill_diagonal + put put_along_axis place diff --git a/src/cunumeric/index/wrap.cc b/src/cunumeric/index/wrap.cc index 33dfcfe4b..a5483cbdd 100644 --- a/src/cunumeric/index/wrap.cc +++ b/src/cunumeric/index/wrap.cc @@ -24,28 +24,30 @@ using namespace legate; template struct WrapImplBody { + template void operator()(const AccessorWO, 1>& out, const Pitches<0>& pitches_out, const Rect<1>& out_rect, const Pitches& pitches_in, const Rect& in_rect, - const bool dense) const + const bool dense, + const IND& indices) const { const int64_t start = out_rect.lo[0]; const int64_t end = out_rect.hi[0]; const auto in_volume = in_rect.volume(); if (dense) { - int64_t out_idx = 0; - auto outptr = out.ptr(out_rect); + auto outptr = out.ptr(out_rect); for (int64_t i = start; i <= end; i++) { - const int64_t input_idx = i % in_volume; + check_idx(i, in_volume, indices); + const int64_t input_idx = compute_idx(i, in_volume, indices); auto point = pitches_in.unflatten(input_idx, in_rect.lo); - outptr[out_idx] = point; - out_idx++; + outptr[i - start] = point; } } else { for (int64_t i = start; i <= end; i++) { - const int64_t input_idx = i % in_volume; + check_idx(i, in_volume, indices); + const int64_t input_idx = compute_idx(i, in_volume, indices); auto point = pitches_in.unflatten(input_idx, in_rect.lo); out[i] = point; } diff --git a/src/cunumeric/index/wrap.cu b/src/cunumeric/index/wrap.cu index 0f118eadf..af81073d6 100644 --- a/src/cunumeric/index/wrap.cu +++ b/src/cunumeric/index/wrap.cu @@ -23,7 +23,28 @@ namespace cunumeric { using namespace Legion; using namespace legate; -template +template +__global__ static void __launch_bounds__(THREADS_PER_BLOCK, MIN_CTAS_PER_SM) + check_kernel(Output out, + const AccessorRO indices, + const int64_t start, + const int64_t volume, + const int64_t in_volume, + const int64_t iters) +{ + bool value = false; + for (size_t i = 0; i < iters; i++) { + const auto idx = (i * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x; + if (idx >= volume) break; + auto index_tmp = indices[idx + start]; + int64_t index = index_tmp < 0 ? index_tmp + in_volume : index_tmp; + bool val = (index < 0 || index >= in_volume); + SumReduction::fold(value, val); + } + reduce_output(out, value); +} + +template __global__ static void __launch_bounds__(THREADS_PER_BLOCK, MIN_CTAS_PER_SM) wrap_kernel(const AccessorWO, 1> out, const int64_t start, @@ -32,53 +53,93 @@ __global__ static void __launch_bounds__(THREADS_PER_BLOCK, MIN_CTAS_PER_SM) const Point<1> out_lo, const Pitches pitches_in, const Point in_lo, - const size_t in_volume) + const size_t in_volume, + const IND indices) { const auto idx = global_tid_1d(); if (idx >= volume) return; - const int64_t input_idx = (idx + start) % in_volume; + const int64_t input_idx = compute_idx((idx + start), in_volume, indices); auto out_p = pitches_out.unflatten(idx, out_lo); auto p = pitches_in.unflatten(input_idx, in_lo); out[out_p] = p; } -template +template __global__ static void __launch_bounds__(THREADS_PER_BLOCK, MIN_CTAS_PER_SM) wrap_kernel_dense(Point* out, const int64_t start, const int64_t volume, const Pitches pitches_in, const Point in_lo, - const size_t in_volume) + const size_t in_volume, + const IND indices) { const auto idx = global_tid_1d(); if (idx >= volume) return; - const int64_t input_idx = (idx + start) % in_volume; + const int64_t input_idx = compute_idx((idx + start), in_volume, indices); auto p = pitches_in.unflatten(input_idx, in_lo); out[idx] = p; } +// don't do anything when indices is a boolean +void check_out_of_bounds(const bool& indices, + const int64_t start, + const int64_t volume, + const int64_t volume_in, + cudaStream_t stream) +{ +} + +void check_out_of_bounds(const AccessorRO& indices, + const int64_t start, + const int64_t volume, + const int64_t volume_in, + cudaStream_t stream) +{ + const size_t blocks = (volume + THREADS_PER_BLOCK - 1) / THREADS_PER_BLOCK; + size_t shmem_size = THREADS_PER_BLOCK / 32 * sizeof(bool); + DeviceScalarReductionBuffer> out_of_bounds(stream); + + if (blocks >= MAX_REDUCTION_CTAS) { + const size_t iters = (blocks + MAX_REDUCTION_CTAS - 1) / MAX_REDUCTION_CTAS; + check_kernel<<>>( + out_of_bounds, indices, start, volume, volume_in, iters); + } else { + check_kernel<<>>( + out_of_bounds, indices, start, volume, volume_in, 1); + } + CHECK_CUDA_STREAM(stream); + + bool res = out_of_bounds.read(stream); + if (res) throw legate::TaskException("index is out of bounds in index array"); +} + template struct WrapImplBody { + template void operator()(const AccessorWO, 1>& out, const Pitches<0>& pitches_out, const Rect<1>& out_rect, const Pitches& pitches_in, const Rect& in_rect, - const bool dense) const + const bool dense, + const IND& indices) const { auto stream = get_cached_stream(); const int64_t start = out_rect.lo[0]; const int64_t volume = out_rect.volume(); const auto in_volume = in_rect.volume(); const size_t blocks = (volume + THREADS_PER_BLOCK - 1) / THREADS_PER_BLOCK; + + check_out_of_bounds(indices, start, volume, in_volume, stream); + if (dense) { auto outptr = out.ptr(out_rect); - wrap_kernel_dense<<>>( - outptr, start, volume, pitches_in, in_rect.lo, in_volume); + wrap_kernel_dense<<>>( + outptr, start, volume, pitches_in, in_rect.lo, in_volume, indices); } else { - wrap_kernel<<>>( - out, start, volume, pitches_out, out_rect.lo, pitches_in, in_rect.lo, in_volume); + wrap_kernel<<>>( + out, start, volume, pitches_out, out_rect.lo, pitches_in, in_rect.lo, in_volume, indices); } CHECK_CUDA_STREAM(stream); } diff --git a/src/cunumeric/index/wrap.h b/src/cunumeric/index/wrap.h index 91c3f2326..181a9b97c 100644 --- a/src/cunumeric/index/wrap.h +++ b/src/cunumeric/index/wrap.h @@ -25,6 +25,8 @@ struct WrapArgs { // copy information from original array to the // `wrapped` one const Legion::DomainPoint shape; // shape of the original array + const bool has_input; + const Array& in = Array(); }; class WrapTask : public CuNumericTask { @@ -41,4 +43,31 @@ class WrapTask : public CuNumericTask { #endif }; +__CUDA_HD__ static int64_t compute_idx(const int64_t i, const int64_t volume, const bool&) +{ + return i % volume; +} + +__CUDA_HD__ static int64_t compute_idx(const int64_t i, + const int64_t volume, + const legate::AccessorRO& indices) +{ + int64_t idx = indices[i]; + int64_t index = idx < 0 ? idx + volume : idx; + return index; +} + +static void check_idx(const int64_t i, + const int64_t volume, + const legate::AccessorRO& indices) +{ + int64_t idx = indices[i]; + int64_t index = idx < 0 ? idx + volume : idx; + if (index < 0 || index >= volume) + throw legate::TaskException("index is out of bounds in index array"); +} +static void check_idx(const int64_t i, const int64_t volume, const bool&) +{ + // don't do anything when wrapping indices +} } // namespace cunumeric diff --git a/src/cunumeric/index/wrap_omp.cc b/src/cunumeric/index/wrap_omp.cc index f95e9123c..531592df9 100644 --- a/src/cunumeric/index/wrap_omp.cc +++ b/src/cunumeric/index/wrap_omp.cc @@ -24,12 +24,14 @@ using namespace legate; template struct WrapImplBody { + template void operator()(const AccessorWO, 1>& out, const Pitches<0>& pitches_out, const Rect<1>& out_rect, const Pitches& pitches_in, const Rect& in_rect, - const bool dense) const + const bool dense, + const IND& indices) const { const int64_t start = out_rect.lo[0]; const int64_t end = out_rect.hi[0]; @@ -38,14 +40,16 @@ struct WrapImplBody { auto outptr = out.ptr(out_rect); #pragma omp parallel for schedule(static) for (int64_t i = start; i <= end; i++) { - const int64_t input_idx = i % in_volume; + check_idx(i, in_volume, indices); + const int64_t input_idx = compute_idx(i, in_volume, indices); auto point = pitches_in.unflatten(input_idx, in_rect.lo); outptr[i - start] = point; } } else { #pragma omp parallel for schedule(static) for (int64_t i = start; i <= end; i++) { - const int64_t input_idx = i % in_volume; + check_idx(i, in_volume, indices); + const int64_t input_idx = compute_idx(i, in_volume, indices); auto point = pitches_in.unflatten(input_idx, in_rect.lo); out[i] = point; } diff --git a/src/cunumeric/index/wrap_template.inl b/src/cunumeric/index/wrap_template.inl index 46885f24e..093f5f5b1 100644 --- a/src/cunumeric/index/wrap_template.inl +++ b/src/cunumeric/index/wrap_template.inl @@ -60,16 +60,30 @@ struct WrapImpl { assert(volume_in != 0); #endif - WrapImplBody()(out, pitches_out, out_rect, pitches_in, input_rect, dense); + if (args.has_input) { + auto in_rect = args.in.shape<1>(); + auto in = args.in.read_accessor(in_rect); // input should be always integer type +#ifdef DEBUG_CUNUMERIC + assert(in_rect == out_rect); +#endif + WrapImplBody()(out, pitches_out, out_rect, pitches_in, input_rect, dense, in); + + } else { + bool tmp = false; + WrapImplBody()(out, pitches_out, out_rect, pitches_in, input_rect, dense, tmp); + } // else } }; template static void wrap_template(TaskContext& context) { - auto shape = context.scalars()[0].value(); - int dim = shape.dim; - WrapArgs args{context.outputs()[0], shape}; + auto shape = context.scalars()[0].value(); + int dim = shape.dim; + bool has_input = context.scalars()[1].value(); + Array tmp_array = Array(); + WrapArgs args{ + context.outputs()[0], shape, has_input, has_input ? context.inputs()[0] : tmp_array}; dim_dispatch(dim, WrapImpl{}, args); } diff --git a/src/cunumeric/index/zip.cu b/src/cunumeric/index/zip.cu index 8bdfcd3f0..82d162126 100644 --- a/src/cunumeric/index/zip.cu +++ b/src/cunumeric/index/zip.cu @@ -28,15 +28,15 @@ __global__ static void __launch_bounds__(THREADS_PER_BLOCK, MIN_CTAS_PER_SM) const Buffer, 1> index_arrays, const Rect rect, const Pitches pitches, - size_t volume, - DomainPoint shape, + const size_t volume, + const DomainPoint shape, std::index_sequence) { const size_t idx = global_tid_1d(); if (idx >= volume) return; auto p = pitches.unflatten(idx, rect.lo); Legion::Point new_point; - for (size_t i = 0; i < N; i++) { new_point[i] = compute_idx(index_arrays[i][p], shape[i]); } + for (size_t i = 0; i < N; i++) { new_point[i] = compute_idx_cuda(index_arrays[i][p], shape[i]); } out[p] = new_point; } @@ -45,14 +45,16 @@ __global__ static void __launch_bounds__(THREADS_PER_BLOCK, MIN_CTAS_PER_SM) zip_kernel_dense(Point* out, const Buffer index_arrays, const Rect rect, - size_t volume, - DomainPoint shape, + const size_t volume, + const DomainPoint shape, std::index_sequence) { const size_t idx = global_tid_1d(); if (idx >= volume) return; Legion::Point new_point; - for (size_t i = 0; i < N; i++) { new_point[i] = compute_idx(index_arrays[i][idx], shape[i]); } + for (size_t i = 0; i < N; i++) { + new_point[i] = compute_idx_cuda(index_arrays[i][idx], shape[i]); + } out[idx] = new_point; } @@ -62,11 +64,11 @@ __global__ static void __launch_bounds__(THREADS_PER_BLOCK, MIN_CTAS_PER_SM) const Buffer, 1> index_arrays, const Rect rect, const Pitches pitches, - int narrays, - size_t volume, - int64_t key_dim, - int64_t start_index, - DomainPoint shape) + const int64_t narrays, + const size_t volume, + const int64_t key_dim, + const int64_t start_index, + const DomainPoint shape) { const size_t idx = global_tid_1d(); if (idx >= volume) return; @@ -74,7 +76,7 @@ __global__ static void __launch_bounds__(THREADS_PER_BLOCK, MIN_CTAS_PER_SM) Legion::Point new_point; for (size_t i = 0; i < start_index; i++) { new_point[i] = p[i]; } for (size_t i = 0; i < narrays; i++) { - new_point[start_index + i] = compute_idx(index_arrays[i][p], shape[start_index + i]); + new_point[start_index + i] = compute_idx_cuda(index_arrays[i][p], shape[start_index + i]); } for (size_t i = (start_index + narrays); i < N; i++) { int64_t j = key_dim + i - narrays; @@ -83,10 +85,63 @@ __global__ static void __launch_bounds__(THREADS_PER_BLOCK, MIN_CTAS_PER_SM) out[p] = new_point; } +template +__global__ static void __launch_bounds__(THREADS_PER_BLOCK, MIN_CTAS_PER_SM) + check_kernel(Output out, + const Buffer, 1> index_arrays, + const int64_t volume, + const int64_t iters, + const Rect rect, + const Pitches pitches, + const int64_t narrays, + const int64_t start_index, + const DomainPoint shape) +{ + bool value = false; + for (size_t i = 0; i < iters; i++) { + const auto idx = (i * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x; + if (idx >= volume) break; + auto p = pitches.unflatten(idx, rect.lo); + for (size_t n = 0; n < narrays; n++) { + const int64_t extent = shape[start_index + n]; + coord_t index = index_arrays[n][p] < 0 ? index_arrays[n][p] + extent : index_arrays[n][p]; + bool val = (index < 0 || index >= extent); + SumReduction::fold(value, val); + } // for n + } + reduce_output(out, value); +} + template struct ZipImplBody { using VAL = int64_t; + void check_out_of_bounds(const Buffer, 1>& index_arrays, + const int64_t volume, + const Rect& rect, + const Pitches& pitches, + const int64_t narrays, + const int64_t start_index, + const DomainPoint& shape, + cudaStream_t stream) const + { + const size_t blocks = (volume + THREADS_PER_BLOCK - 1) / THREADS_PER_BLOCK; + size_t shmem_size = THREADS_PER_BLOCK / 32 * sizeof(bool); + DeviceScalarReductionBuffer> out_of_bounds(stream); + if (blocks >= MAX_REDUCTION_CTAS) { + const size_t iters = (blocks + MAX_REDUCTION_CTAS - 1) / MAX_REDUCTION_CTAS; + check_kernel<<>>( + out_of_bounds, index_arrays, volume, iters, rect, pitches, narrays, start_index, shape); + } else { + check_kernel<<>>( + out_of_bounds, index_arrays, volume, 1, rect, pitches, narrays, start_index, shape); + } + CHECK_CUDA_STREAM(stream); + + bool res = out_of_bounds.read(stream); + if (res) throw legate::TaskException("index is out of bounds in index array"); + } + template void operator()(const AccessorWO, DIM>& out, const std::vector>& index_arrays, @@ -101,19 +156,23 @@ struct ZipImplBody { auto stream = get_cached_stream(); const size_t volume = rect.volume(); const size_t blocks = (volume + THREADS_PER_BLOCK - 1) / THREADS_PER_BLOCK; + + auto index_buf = + create_buffer, 1>(index_arrays.size(), Memory::Kind::Z_COPY_MEM); + for (uint32_t idx = 0; idx < index_arrays.size(); ++idx) index_buf[idx] = index_arrays[idx]; + check_out_of_bounds( + index_buf, volume, rect, pitches, index_arrays.size(), start_index, shape, stream); + if (index_arrays.size() == N) { if (dense) { - auto index_buf = + auto index_buf_dense = create_buffer(index_arrays.size(), Memory::Kind::Z_COPY_MEM); for (uint32_t idx = 0; idx < index_arrays.size(); ++idx) { - index_buf[idx] = index_arrays[idx].ptr(rect); + index_buf_dense[idx] = index_arrays[idx].ptr(rect); } zip_kernel_dense<<>>( - out.ptr(rect), index_buf, rect, volume, shape, std::make_index_sequence()); + out.ptr(rect), index_buf_dense, rect, volume, shape, std::make_index_sequence()); } else { - auto index_buf = - create_buffer, 1>(index_arrays.size(), Memory::Kind::Z_COPY_MEM); - for (uint32_t idx = 0; idx < index_arrays.size(); ++idx) index_buf[idx] = index_arrays[idx]; zip_kernel<<>>( out, index_buf, rect, pitches, volume, shape, std::make_index_sequence()); } @@ -121,9 +180,6 @@ struct ZipImplBody { #ifdef DEBUG_CUNUMERIC assert(index_arrays.size() < N); #endif - auto index_buf = - create_buffer, 1>(index_arrays.size(), Memory::Kind::Z_COPY_MEM); - for (uint32_t idx = 0; idx < index_arrays.size(); ++idx) index_buf[idx] = index_arrays[idx]; int num_arrays = index_arrays.size(); zip_kernel<<>>( out, index_buf, rect, pitches, num_arrays, volume, key_dim, start_index, shape); diff --git a/src/cunumeric/index/zip.h b/src/cunumeric/index/zip.h index 61a87104c..ffa5941d5 100644 --- a/src/cunumeric/index/zip.h +++ b/src/cunumeric/index/zip.h @@ -51,4 +51,10 @@ constexpr coord_t compute_idx(coord_t index, coord_t extent) return new_index; } +constexpr coord_t compute_idx_cuda(coord_t index, coord_t extent) +{ + coord_t new_index = index < 0 ? index + extent : index; + return new_index; +} + } // namespace cunumeric diff --git a/tests/integration/test_put.py b/tests/integration/test_put.py new file mode 100644 index 000000000..1c69a705b --- /dev/null +++ b/tests/integration/test_put.py @@ -0,0 +1,131 @@ +# Copyright 2022 NVIDIA Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import numpy as np +import pytest +from legate.core import LEGATE_MAX_DIM +from utils.generators import mk_seq_array + +import cunumeric as num + + +@pytest.mark.parametrize("mode", ("wrap", "clip")) +def test_scalar(mode): + # testing the case when indices is a scalar + x = mk_seq_array(np, (3, 4, 5)) + x_num = mk_seq_array(num, (3, 4, 5)) + values = mk_seq_array(np, (6,)) * 10 + values_num = num.array(values) + + np.put(x, 0, values) + num.put(x_num, 0, values_num) + assert np.array_equal(x_num, x) + + np.put(x, 1, -10, mode) + num.put(x_num, 1, -10, mode) + assert np.array_equal(x_num, x) + + # checking transformed array + y = x[:1] + y_num = x_num[:1] + np.put(y, 0, values) + num.put(y_num, 0, values_num) + assert np.array_equal(x_num, x) + + x = np.zeros(1) + x_num = num.zeros(1) + np.put(x, np.arange(4), np.ones(4), mode="clip") + num.put(x_num, num.arange(4), num.ones(4), mode="clip") + assert np.array_equal(x_num, x) + + x = np.arange(5) + x_num = num.array(x) + indices = np.array([1, 4]) + indices_num = num.array(indices) + np.put(x, indices, 10) + num.put(x_num, indices_num, 10) + assert np.array_equal(x_num, x) + + x = np.zeros(()) + x_num = num.zeros(()) + np.put(x, 0, 1) + num.put(x_num, 0, 1) + assert np.array_equal(x_num, x) + + x = np.zeros(()) + x_num = num.zeros(()) + np.put(x, [0], 1) + num.put(x_num, [0], 1) + assert np.array_equal(x_num, x) + + x = np.zeros(()) + x_num = num.zeros(()) + np.put(x, [0], [1]) + num.put(x_num, [0], [1]) + assert np.array_equal(x_num, x) + + +def test_indices_type_convert(): + x = mk_seq_array(np, (3, 4, 5)) + x_num = mk_seq_array(num, (3, 4, 5)) + values = mk_seq_array(np, (6,)) * 10 + values_num = num.array(values) + indices = np.array([-2, 2], dtype=np.int32) + indices_num = num.array(indices) + np.put(x, indices, values) + num.put(x_num, indices_num, values_num) + assert np.array_equal(x_num, x) + + +@pytest.mark.parametrize("ndim", range(1, LEGATE_MAX_DIM + 1)) +def test_ndim(ndim): + shape = (5,) * ndim + np_arr = mk_seq_array(np, shape) + num_arr = mk_seq_array(num, shape) + shape_in = (3,) * ndim + np_indices = mk_seq_array(np, shape_in) + num_indices = mk_seq_array(num, shape_in) + shape_val = (2,) * ndim + np_values = mk_seq_array(np, shape_val) * 10 + num_values = mk_seq_array(num, shape_val) * 10 + + np.put(np_arr, np_indices, np_values) + num.put(num_arr, num_indices, num_values) + assert np.array_equal(np_arr, num_arr) + + +INDICES = ([1, 2, 3, 100], [[2, 1], [3, 100]], [1], [100]) + + +@pytest.mark.parametrize("ndim", range(1, LEGATE_MAX_DIM + 1)) +@pytest.mark.parametrize("mode", ("wrap", "clip")) +@pytest.mark.parametrize("indices", INDICES) +def test_ndim_mode(ndim, mode, indices): + shape = (5,) * ndim + np_arr = mk_seq_array(np, shape) + num_arr = mk_seq_array(num, shape) + shape_val = (2,) * ndim + np_values = mk_seq_array(np, shape_val) * 10 + num_values = mk_seq_array(num, shape_val) * 10 + + np.put(np_arr, indices, np_values, mode=mode) + num.put(num_arr, indices, num_values, mode=mode) + assert np.array_equal(np_arr, num_arr) + + +if __name__ == "__main__": + import sys + + sys.exit(pytest.main(sys.argv)) From 47d1e8b492df59244218529655de68fa1b36812d Mon Sep 17 00:00:00 2001 From: Manolis Papadakis Date: Wed, 19 Oct 2022 11:23:57 -0700 Subject: [PATCH 19/89] Update version number (#663) --- CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 53bbc0790..ee10d8337 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -47,7 +47,7 @@ include(rapids-cuda) include(rapids-export) include(rapids-find) -set(cunumeric_version 22.10.00) +set(cunumeric_version 22.12.00) # For now we want the optimization flags to match on both normal make and cmake # builds so we override the cmake defaults here for release, this changes From 8e902287330f990e78071d17da756ed91a89817a Mon Sep 17 00:00:00 2001 From: xialu00 <110973296+xialu00@users.noreply.github.com> Date: Sat, 22 Oct 2022 09:17:23 +0800 Subject: [PATCH 20/89] Testcase add test cases for test_squeeze.py and test_transpose.py (#662) * add test cases for test_tile.py and test_repeat.py * fix bug * fix bug * add test cases for test_squeeze.py and test_transpose.py * fix bug * fix bug --- tests/integration/test_squeeze.py | 166 +++++++++++++---- tests/integration/test_transpose.py | 278 +++++++++++++++++++++++++--- 2 files changed, 385 insertions(+), 59 deletions(-) diff --git a/tests/integration/test_squeeze.py b/tests/integration/test_squeeze.py index 68067422d..84ac8be2e 100644 --- a/tests/integration/test_squeeze.py +++ b/tests/integration/test_squeeze.py @@ -13,39 +13,143 @@ # limitations under the License. # +import numpy as np import pytest -import cunumeric as np - -x = np.array([[[1, 2, 3]]]) - - -def test_default(): - y = x.squeeze() - - assert np.array_equal(y, [1, 2, 3]) - - -def test_axis_1d(): - y = x.squeeze(axis=1) - - assert np.array_equal(y, [[1, 2, 3]]) - - -def test_axis_2d(): - x = np.array([[[1], [2], [3]]]) - - y = x.squeeze(axis=(0, 2)) - - assert np.array_equal(y, [1, 2, 3]) - - -def test_idempotent(): - x = np.array([1, 2, 3]) - - y = x.squeeze() - - assert x is y +import cunumeric as num + +DIM = 5 +SIZES = [ + (0,), + (1), + (DIM), + (0, 1), + (1, 0), + (1, 1), + (1, DIM), + (DIM, 1), + (DIM, DIM), + (1, 0, 0), + (1, 1, 0), + (1, 0, 1), + (1, 1, 1), + (DIM, 1, 1), + (1, DIM, 1), + (1, 1, DIM), + (DIM, DIM, DIM), +] + + +@pytest.mark.xfail +def test_none_array_compare(): + res_num = num.squeeze(None) # AttributeError: 'NoneType' + res_np = np.squeeze(None) # return None + assert np.array_equal(res_num, res_np, equal_nan=True) + + +def test_none_array(): + # numpy returned None + msg = r"NoneType" + with pytest.raises(AttributeError, match=msg): + num.squeeze(None) + + +def test_num_invalid_axis(): + size = (1, 2, 1) + a = num.random.randint(low=-10, high=10, size=size) + msg = r"one" + with pytest.raises(ValueError, match=msg): + num.squeeze(a, axis=1) + + +def test_array_invalid_axis(): + size = (1, 2, 1) + a = num.random.randint(low=-10, high=10, size=size) + msg = r"one" + with pytest.raises(ValueError, match=msg): + a.squeeze(axis=1) + + +def test_num_axis_out_bound(): + size = (1, 2, 1) + a = num.random.randint(low=-10, high=10, size=size) + msg = r"bounds" + with pytest.raises(np.AxisError, match=msg): + num.squeeze(a, axis=3) + + +def test_array_axis_out_bound(): + size = (1, 2, 1) + a = num.random.randint(-10, 10, size=size) + msg = r"bounds" + with pytest.raises(np.AxisError, match=msg): + a.squeeze(axis=3) + + +@pytest.mark.parametrize("axes", (-1, -3)) +def test_num_axis_negative(axes): + size = (1, 2, 1) + a = np.random.randint(low=-10, high=10, size=size) + b = num.array(a) + res_np = np.squeeze(a, axis=axes) + res_num = num.squeeze(b, axis=axes) + assert np.array_equal(res_num, res_np) + + +@pytest.mark.parametrize("axes", (-1, -3)) +def test_array_axis_negative(axes): + size = (1, 2, 1) + a = np.random.randint(low=-10, high=10, size=size) + b = num.array(a) + res_np = a.squeeze(axis=axes) + res_num = b.squeeze(axis=axes) + assert np.array_equal(res_num, res_np) + + +@pytest.mark.parametrize("size", SIZES, ids=str) +def test_num_basic(size): + a = np.random.randint(low=-10, high=10, size=size) + b = num.array(a) + res_np = np.squeeze(a) + res_num = num.squeeze(b) + assert np.array_equal(res_num, res_np) + + +@pytest.mark.parametrize("size", SIZES, ids=str) +def test_array_basic(size): + a = np.random.randint(low=-10, high=10, size=size) + b = num.array(a) + res_np = a.squeeze() + res_num = b.squeeze() + assert np.array_equal(res_num, res_np) + + +@pytest.mark.parametrize( + "size", (s for s in SIZES if type(s) == tuple if 1 in s), ids=str +) +def test_num_axis(size): + a = np.random.randint(low=-10, high=10, size=size) + b = num.array(a) + + for k, axis in enumerate(a.shape): + if axis == 1: + res_np = np.squeeze(a, axis=k) + res_num = num.squeeze(b, axis=k) + assert np.array_equal(res_num, res_np) + + +@pytest.mark.parametrize( + "size", (s for s in SIZES if type(s) == tuple if 1 in s), ids=str +) +def test_array_axis(size): + a = np.random.randint(low=-10, high=10, size=size) + b = num.array(a) + + for k, axis in enumerate(a.shape): + if axis == 1: + res_np = a.squeeze(axis=k) + res_num = b.squeeze(axis=k) + assert np.array_equal(res_num, res_np) if __name__ == "__main__": diff --git a/tests/integration/test_transpose.py b/tests/integration/test_transpose.py index 1bc9c9ffb..4162df713 100644 --- a/tests/integration/test_transpose.py +++ b/tests/integration/test_transpose.py @@ -17,34 +17,256 @@ import cunumeric as num -rect = num.array([[1, 2, 3], [4, 5, 6]]) -square = num.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]]) - - -@pytest.mark.parametrize("x", (rect, square), ids=("rect", "square")) -class Test_free_function: - def test_forward(self, x): - y = num.transpose(x) - npx = np.array(x) - assert num.array_equal(y, np.transpose(npx)) - - def test_round_trip(self, x): - y = num.transpose(x) - z = num.transpose(y) - assert num.array_equal(x, z) - - -@pytest.mark.parametrize("x", (rect, square), ids=("rect", "square")) -class Test_method: - def test_forward(self, x): - y = x.transpose() - npx = np.array(x) - assert num.array_equal(y, npx.transpose()) - - def test_round_trip(self, x): - y = x.transpose() - z = y.transpose() - assert num.array_equal(x, z) +DIM = 5 +SIZES = [ + 1, + DIM, + (0,), + (1, 1), + (1, DIM), + (DIM, 1), + (DIM, DIM), + (1, 1, 1), + (DIM, 1, 1), + (1, DIM, 1), + (1, DIM - 1, DIM), + (2, DIM - 1, DIM), + (DIM, DIM, DIM), +] + + +class TestModule: + @pytest.mark.xfail + def test_none_array_compare(self): + res_num = num.transpose(None) # AttributeError: 'NoneType' + res_np = np.transpose(None) # return None + assert np.array_equal(res_num, res_np, equal_nan=True) + + def test_none_array(self): + # numpy returned None + msg = r"NoneType" + with pytest.raises(AttributeError, match=msg): + num.transpose(None) + + @pytest.mark.parametrize( + "axes", ((1, 1, 1), (1, 2, 3), (1, 2), (1, 2, 0, 1)) + ) + def test_invalid_axis(self, axes): + size = (2, 3, 4) + a = num.random.randint(low=-10, high=10, size=size) + with pytest.raises(ValueError): + num.transpose(a, axes=axes) + + def test_int_axis(self): + size = (2, 3, 4) + a = num.random.randint(low=-10, high=10, size=size) + # numpy raises "ValueError: axes don't match array". + # cunumeric raises "TypeError". + with pytest.raises(TypeError): + num.transpose(a, axes=2) + + @pytest.mark.xfail + def test_int_axis_compare(self): + size = (2, 3, 4) + a = num.random.randint(low=-10, high=10, size=size) + # numpy raises "ValueError: axes don't match array". + # cunumeric raises "TypeError". + with pytest.raises(ValueError): + num.transpose(a, axes=2) + + @pytest.mark.parametrize("size", SIZES, ids=str) + def test_round(self, size): + a = num.random.randint(low=-10, high=10, size=size) + b = num.transpose(a) + c = num.transpose(b) + assert num.array_equal(c, a) + + @pytest.mark.parametrize("size", SIZES, ids=str) + def test_basic(self, size): + a = np.random.randint(low=-10, high=10, size=size) + b = num.array(a) + res_np = np.transpose(a) + res_num = num.transpose(b) + assert np.array_equal(res_num, res_np) + + @pytest.mark.parametrize("size", (0, 1, DIM)) + def test_axes_1d(self, size): + a = np.random.randint(low=-10, high=10, size=size) + b = num.array(a) + res_np = np.transpose(a, axes=0) + res_num = num.transpose(b, axes=0) + assert num.array_equal(res_num, res_np) + + @pytest.mark.xfail + @pytest.mark.parametrize("size", (0, 1, DIM)) + @pytest.mark.parametrize("axes", (-3, 3)) + def test_axes_1d_int(self, size, axes): + # For cunumeric, if array.dim==1, it returns the array itself directly, + # no matter what the axes value is. + # For numpy, it raises + # "numpy.AxisError: axis * is out of bounds for array of dimension 1". + a = np.random.randint(low=-10, high=10, size=size) + b = num.array(a) + res_np = np.transpose(a, axes=axes) + res_num = num.transpose(b, axes=axes) + assert num.array_equal(res_num, res_np) + + @pytest.mark.xfail + @pytest.mark.parametrize("size", (0, 1, DIM)) + @pytest.mark.parametrize("axes", ((1,), (3, 1))) + def test_axes_1d_tuple(self, size, axes): + # For cunumeric, if array.dim==1, it returns the array itself directly, + # no matter what the axes value is. + # For numpy, it raises "ValueError: axes don't match array". + a = np.random.randint(low=-10, high=10, size=size) + b = num.array(a) + res_np = np.transpose(a, axes=axes) + res_num = num.transpose(b, axes=axes) + assert num.array_equal(res_num, res_np) + + @pytest.mark.parametrize( + "size", + ((1, 0), (1, 1), (1, DIM), (DIM, 1), (DIM - 1, DIM - 2), (DIM, DIM)), + ) + @pytest.mark.parametrize("axes", ((0, 1), (1, 0))) + def test_axes_2d(self, size, axes): + a = num.random.randint(low=-10, high=10, size=size) + b = num.array(a) + res_np = np.transpose(a, axes=axes) + res_num = num.transpose(b, axes=axes) + assert num.array_equal(res_num, res_np) + + @pytest.mark.parametrize( + "size", + ( + (1, 0, 1), + (1, 1, 1), + (DIM, DIM - 1, 1), + (1, 1, DIM), + (2, 3, 4), + (DIM, DIM, DIM), + ), + ) + @pytest.mark.parametrize( + "axes", ((0, 2, 1), (1, 0, 2), (1, 2, 0), (2, 0, 1), (2, 1, 0)) + ) + def test_axes_3d(self, size, axes): + a = num.random.randint(low=-10, high=10, size=size) + b = num.array(a) + res_np = np.transpose(a, axes=axes) + res_num = num.transpose(b, axes=axes) + assert num.array_equal(res_num, res_np) + + +class TestArrayMethod: + @pytest.mark.parametrize( + "axes", ((1, 1, 1), (1, 2, 3), (1, 2), (1, 2, 0, 1)) + ) + def test_invalid_axis(self, axes): + size = (2, 3, 4) + a = num.random.randint(low=-10, high=10, size=size) + with pytest.raises(ValueError): + a.transpose(axes=axes) + + def test_int_axis(self): + size = (2, 3, 4) + a = num.random.randint(low=-10, high=10, size=size) + # numpy raises "ValueError: axes don't match array". + # cunumeric raises "TypeError". + with pytest.raises(TypeError): + a.transpose(axes=2) + + @pytest.mark.xfail + def test_int_axis_compare(self): + size = (2, 3, 4) + a = num.random.randint(low=-10, high=10, size=size) + # numpy raises "ValueError: axes don't match array". + # cunumeric raises "TypeError". + with pytest.raises(ValueError): + a.transpose(axes=2) + + @pytest.mark.parametrize("size", SIZES, ids=str) + def test_round(self, size): + a = num.random.randint(low=-10, high=10, size=size) + b = a.transpose() + c = b.transpose() + assert num.array_equal(c, a) + + @pytest.mark.parametrize("size", SIZES, ids=str) + def test_basic(self, size): + a = np.random.randint(low=-10, high=10, size=size) + b = num.array(a) + res_np = a.transpose() + res_num = b.transpose() + assert np.array_equal(res_num, res_np) + + @pytest.mark.parametrize("size", (0, 1, DIM)) + def test_axes_1d(self, size): + a = np.random.randint(low=-10, high=10, size=size) + b = num.array(a) + res_np = a.transpose(0) + res_num = b.transpose(0) + assert num.array_equal(res_num, res_np) + + @pytest.mark.xfail + @pytest.mark.parametrize("size", (0, 1, DIM)) + @pytest.mark.parametrize("axes", (-3, 3)) + def test_axes_1d_int(self, size, axes): + # For cunumeric, if array.dim==1, it returns the array itself directly, + # no matter what the axes value is. + # For Numpy, it raises + # "numpy.AxisError: axis * is out of bounds for array of dimension 1". + a = np.random.randint(low=-10, high=10, size=size) + b = num.array(a) + res_np = a.transpose(axes) + res_num = b.transpose(axes) + assert num.array_equal(res_num, res_np) + + @pytest.mark.xfail + @pytest.mark.parametrize("size", (0, 1, DIM)) + @pytest.mark.parametrize("axes", ((1,), (3, 1))) + def test_axes_1d_tuple(self, size, axes): + # For cunumeric, if array.dim==1, it returns the array itself directly, + # no matter what the axes value is. + # For Numpy, it raises "ValueError: axes don't match array". + a = np.random.randint(low=-10, high=10, size=size) + b = num.array(a) + res_np = a.transpose(axes) + res_num = b.transpose(axes) + assert num.array_equal(res_num, res_np) + + @pytest.mark.parametrize( + "size", + ((1, 0), (1, 1), (1, DIM), (DIM, 1), (DIM - 1, DIM - 2), (DIM, DIM)), + ) + @pytest.mark.parametrize("axes", ((0, 1), (1, 0))) + def test_axes_2d(self, size, axes): + a = np.random.randint(low=-10, high=10, size=size) + b = num.array(a) + res_np = a.transpose(axes) + res_num = b.transpose(axes) + assert num.array_equal(res_num, res_np) + + @pytest.mark.parametrize( + "size", + ( + (1, 0, 1), + (1, 1, 1), + (DIM, DIM - 1, 1), + (1, 1, DIM), + (2, 3, 4), + (DIM, DIM, DIM), + ), + ) + @pytest.mark.parametrize( + "axes", ((0, 2, 1), (1, 0, 2), (1, 2, 0), (2, 0, 1), (2, 1, 0)) + ) + def test_axes_3d(self, size, axes): + a = np.random.randint(low=-10, high=10, size=size) + b = num.array(a) + res_np = a.transpose(axes) + res_num = b.transpose(axes) + assert num.array_equal(res_num, res_np) if __name__ == "__main__": From 0d5f84e6bb2c331deedfdcf85faa0e687ef146d1 Mon Sep 17 00:00:00 2001 From: Manolis Papadakis Date: Tue, 25 Oct 2022 10:44:49 -0700 Subject: [PATCH 21/89] Updates for new script-based conda env generation (#651) * Remove old conda env files * Update build documentation * Fix a file link --- BUILD.md | 98 +++++++++------------------------ README.md | 67 +--------------------- conda/environment-test-3.10.yml | 61 -------------------- conda/environment-test-3.8.yml | 61 -------------------- conda/environment-test-3.9.yml | 61 -------------------- 5 files changed, 27 insertions(+), 321 deletions(-) delete mode 100644 conda/environment-test-3.10.yml delete mode 100644 conda/environment-test-3.8.yml delete mode 100644 conda/environment-test-3.9.yml diff --git a/BUILD.md b/BUILD.md index cea0ce4d1..589d7c016 100644 --- a/BUILD.md +++ b/BUILD.md @@ -15,89 +15,42 @@ limitations under the License. --> -# Overview +# Dependencies -The build system is designed to enable two different modes of use: -1. Simple `pip install` for users -2. Highly customizable incremental builds for developers - -We review each of these modes with examples. +Users must have a working installation of the +[Legate Core](https://github.com/nv-legate/legate.core) +library prior to installing cuNumeric. +As for other dependencies, the Dependencies section on the +[Legate Core build instructions](https://github.com/nv-legate/legate.core/blob/HEAD/BUILD.md) +also covers cuNumeric. # Building for Users -## Using install.py - -For releases <= 22.07, the main method for building cuNumeric was the `install.py` script. -Although the underlying implementation has significantly changed, `install.py` still supports the -same usage and same set of flags. For a full list of flags, users can run: - -``` -$ ./install.py --help -``` - -## Using Conda - -cuNumeric can be installed using Conda by pointing to the required channels (`-c`): - -``` -conda install -c nvidia -c conda-forge -c legate legate-core -``` - -## Using pip - -cuNumeric is not yet registered in a standard pip repository. However, users can still use the -pip installer to build and install cuNumeric. After downloading or cloning the cunumeric source, -users can run the following in the cunumeric folder: - -``` -$ pip install . -``` -or -``` -$ python3 -m pip install . -``` - -This will install cuNumeric in the standard packages directory for the environment Python. -Note: This is currently not sufficient for running cuNumeric programs. cuNumeric relies -on the `legate` launcher from Legate core, which must be installed separately. -For details on installing Legate, consult the [Legate repository](https://github.com/nv-legate/legate.core). - -### Advanced Customization +cuNumeric provides the same source-based installation scripts as Legate Core (a +custom `install.py` script, that is backed by `pip install`). See the +[Legate Core build instructions](https://github.com/nv-legate/legate.core/blob/HEAD/BUILD.md) +for help on using these. -If users need to customize details of the underlying CMake build, they can pass -CMake flags through the `SKBUILD_CONFIGURE_OPTIONS` environment variable: - -``` -$ SKBUILD_CONFIGURE_OPTIONS="-D Legion_USE_CUDA:BOOL=ON" \ - pip install . -``` -An alternative syntax using `setup.py` with `scikit-build` is -``` -$ python setup.py install -- -DLegion_USE_CUDA:BOOL=ON -``` +Note: Installing cuNumeric by itself will *not* automatically install Legate Core. # Building for Developers ## Overview -pip uses [scikit-build](https://scikit-build.readthedocs.io/en/latest/) -in `setup.py` to drive the build and installation. A `pip install` will trigger three general actions: +cuNumeric uses the same cmake/scikit-build-based build workflow as Legate Core. +See the +[Legate Core build instructions](https://github.com/nv-legate/legate.core/blob/HEAD/BUILD.md) +for an overview. -1. CMake build and installation of C++ libraries -2. CMake generation of configuration files and build-dependent Python files -3. pip installation of Python files +## Example -The CMake build can be configured independently of `pip`, allowing incremental C++ builds directly through CMake. -This simplifies rebuilding `libcunumeric.so` either via command-line or via IDE. -After building the C++ libraries, the `pip install` can be done in "editable" mode using the `-e` flag. -This configures the Python site packages to import the Python source tree directly. -The Python source can then be edited and used directly for testing without requiring a `pip install`. +There are several examples in the `scripts` folder. We walk through the steps in +`build-with-legate-separately-no-install.sh` here. -## Example +We assume a pre-existing Legate Core build. For details on building Legate Core, +consult the [Legate Core repository](https://github.com/nv-legate/legate.core). -There are several examples in the `scripts` folder. We walk through the steps in the `build-with-legate-separately-no-install.sh` here. -We assume a pre-existing Legate CUDA build. For details on building Legate, consult the [Legate repository](https://github.com/nv-legate/legate.core). First, the CMake build needs to be configured: ``` @@ -106,6 +59,7 @@ $ cmake -S . -B build -GNinja -D legate_core_ROOT:STRING=path/to/legate/build We point cuNumeric to the Legate *build* tree, not an installation. This generates all build-dependent headers and Python files. + Once configured, we can build the C++ libraries: ``` @@ -118,14 +72,12 @@ Once the C++ libraries are available, we can do an editable (development) pip in ``` $ SKBUILD_BUILD_OPTIONS="-D FIND_CUNUMERIC_CPP=ON -D cunumeric_ROOT=$(pwd)/build" \ python3 -m pip install \ - --root / --no-deps --no-build-isolation + --root / --no-deps --no-build-isolation --editable . ``` -The Python source tree and CMake build tree are now available with the environment Python -for running cuNumeric programs. The diagram below illustrates the +The Python source tree and CMake build tree are now available with the environment Python +for running cuNumeric programs. The diagram below illustrates the complete workflow for building both Legate core and cuNumeric. drawing - - diff --git a/README.md b/README.md index 1d85c650d..93fee01ef 100644 --- a/README.md +++ b/README.md @@ -36,8 +36,6 @@ canonical NumPy implementation. If you have questions, please contact us at legate(at)nvidia.com. 1. [Installation](#installation) -1. [Dependencies](#dependencies) -1. [Building from Source](#building-from-source) 1. [Usage and Execution](#usage-and-execution) 1. [Supported and Planned Features](#supported-and-planned-features) 1. [Supported Types and Dimensions](#supported-types-and-dimensions) @@ -53,6 +51,7 @@ cuNumeric is available [on conda](https://anaconda.org/legate/cunumeric): ``` conda install -c nvidia -c conda-forge -c legate cunumeric ``` + The conda package is compatible with CUDA >= 11.4 (CUDA driver version >= r470), and Volta or later GPU architectures. @@ -60,69 +59,7 @@ Docker image build scripts, as well as specialized install scripts for supported clusters are available on the [quickstart](https://github.com/nv-legate/quickstart) repo. -Read on for general instructions on building cuNumeric from source. - -## Dependencies - -Users must have a working installation of the -[Legate Core](https://github.com/nv-legate/legate.core) -library prior to installing cuNumeric. - -cuNumeric requires the following: - - - Python >= 3.8 - - [CUDA](https://developer.nvidia.com/cuda-downloads) >= 10.2 - - GNU Make - - C++17 compatible compiler (g++, clang, or nvc++) - - Fortran compiler (for building OpenBLAS; not necessary if you provide a pre-built version of OpenBLAS) - - the Python packages listed in any one of the conda environment files: - - `conda/environment-test-3.8.yml` - - `conda/environment-test-3.9.yml` - - `conda/environment-test-3.10.yml` - -See the [corresponding section](https://github.com/nv-legate/legate.core#dependencies) -on the Legate Core instructions for help on installing the required Python packages -using conda. - -cuNumeric is tested and guaranteed to be compatible with Volta and later GPU -architectures. You can use cuNumeric with Pascal GPUs as well, but there could -be issues due to lack of independent thread scheduling. Please report any such -issues on GitHub. - -## Building from Source - -Installation can be done the `install.py` script. -For releases >= 22.10, `pip install` is now available. -The most common installation command is: - -``` -./install.py --with-core -``` - -This will build cuNumeric against the Legate Core installation and then -install cuNumeric into the same location. - -If Legate Core has been installed with CUDA support, a working cuTENSOR -installation must also be provided to the installation command with the -`--with-cutensor` option: -``` -./install.py --with-core --with-cutensor -``` - -You can also specify an installation of [OpenBLAS](https://www.openblas.net/) -to use for the build. If you already have an installation of OpenBLAS on your -machine, you can inform the installation script using the `--with-openblas` -option: - -``` -./install.py --with-openblas -``` - -Advanced users can also invoke `install.py --help` to see options for -configuring cuNumeric by invoking the `install.py` script directly. -More information on building - including development workflows - can be found -in the [build instructions](BUILD.md) - +See [BUILD.md]() for instructions on building cuNumeric from source. ## Usage and Execution diff --git a/conda/environment-test-3.10.yml b/conda/environment-test-3.10.yml deleted file mode 100644 index 1066db97e..000000000 --- a/conda/environment-test-3.10.yml +++ /dev/null @@ -1,61 +0,0 @@ -name: cunumeric-test -channels: - - conda-forge -dependencies: - - python=3.10 - - # build - - git - - nccl - - make - - zlib - - cmake>=3.24 - - ninja - - openmpi - - c-compiler - - cxx-compiler - - gcc_linux-64 # [linux64] - - sysroot_linux-64==2.17 # [linux64] - - setuptools>=60 - - cutensor>=1.3.3 - - scikit-build>=0.13.1 - - # runtime - - cffi - - numpy>=1.22 - - opt_einsum - - pyarrow>=5 - - scipy - - typing_extensions - - llvm-openmp - - openblas=*=*openmp* - - # tests - - clang>=8 - - clang-tools>=8 - - colorama - - coverage - - mock - - mypy>=0.961 - - pre-commit - - pynvml - - pytest - - pytest-cov - - pytest-mock - - pytest-lazy-fixture - - types-docutils - - # pip dependencies - - pip - - pip: - # docs - - jinja2 - - pydata-sphinx-theme - - recommonmark - - markdown<3.4.0 - - sphinx>=4.4.0 - - sphinx-copybutton - - sphinx-markdown-tables - - # examples - - tifffile diff --git a/conda/environment-test-3.8.yml b/conda/environment-test-3.8.yml deleted file mode 100644 index 9049ec0b9..000000000 --- a/conda/environment-test-3.8.yml +++ /dev/null @@ -1,61 +0,0 @@ -name: cunumeric-test -channels: - - conda-forge -dependencies: - - python=3.8 - - # build - - git - - nccl - - make - - zlib - - cmake>=3.24 - - ninja - - openmpi - - c-compiler - - cxx-compiler - - gcc_linux-64 # [linux64] - - sysroot_linux-64==2.17 # [linux64] - - setuptools>=60 - - cutensor>=1.3.3 - - scikit-build>=0.13.1 - - # runtime - - cffi - - numpy>=1.22 - - opt_einsum - - pyarrow>=5 - - scipy - - typing_extensions - - llvm-openmp - - openblas=*=*openmp* - - # tests - - clang>=8 - - clang-tools>=8 - - colorama - - coverage - - mock - - mypy>=0.961 - - pre-commit - - pynvml - - pytest - - pytest-cov - - pytest-mock - - pytest-lazy-fixture - - types-docutils - - # pip dependencies - - pip - - pip: - # docs - - jinja2 - - pydata-sphinx-theme - - recommonmark - - markdown<3.4.0 - - sphinx>=4.4.0 - - sphinx-copybutton - - sphinx-markdown-tables - - # examples - - tifffile diff --git a/conda/environment-test-3.9.yml b/conda/environment-test-3.9.yml deleted file mode 100644 index 482277bae..000000000 --- a/conda/environment-test-3.9.yml +++ /dev/null @@ -1,61 +0,0 @@ -name: cunumeric-test -channels: - - conda-forge -dependencies: - - python=3.9 - - # build - - git - - nccl - - make - - zlib - - cmake>=3.24 - - ninja - - openmpi - - c-compiler - - cxx-compiler - - gcc_linux-64 # [linux64] - - sysroot_linux-64==2.17 # [linux64] - - setuptools>=60 - - cutensor>=1.3.3 - - scikit-build>=0.13.1 - - # runtime - - cffi - - numpy>=1.22 - - opt_einsum - - pyarrow>=5 - - scipy - - typing_extensions - - llvm-openmp - - openblas=*=*openmp* - - # tests - - clang>=8 - - clang-tools>=8 - - colorama - - coverage - - mock - - mypy>=0.961 - - pre-commit - - pynvml - - pytest - - pytest-cov - - pytest-mock - - pytest-lazy-fixture - - types-docutils - - # pip dependencies - - pip - - pip: - # docs - - jinja2 - - pydata-sphinx-theme - - recommonmark - - markdown<3.4.0 - - sphinx>=4.4.0 - - sphinx-copybutton - - sphinx-markdown-tables - - # examples - - tifffile From f072d660d016286aae70458d755255981b5e46be Mon Sep 17 00:00:00 2001 From: Marcin Zalewski Date: Wed, 26 Oct 2022 06:22:44 -0700 Subject: [PATCH 22/89] Update upload artifact action version (#669) v2 -> v3 to avoid GitHub warnings. Co-authored-by: Marcin Zalewski --- .github/workflows/ci.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 6abe49c3b..499dce58a 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -60,7 +60,7 @@ jobs: if: always() - name: Upload Build Log if: always() - uses: actions/upload-artifact@v2 + uses: actions/upload-artifact@v3 with: name: build-log path: ./**/${{ env.COMMIT }}-build.log.gpg @@ -128,7 +128,7 @@ jobs: cat *artifacts/*/* - name: Upload Log if: always() - uses: actions/upload-artifact@v2 + uses: actions/upload-artifact@v3 with: name: test-${{ matrix.log }}-log path: ./**/${{ env.COMMIT }}-test-${{ matrix.log }}.log.gpg From 5e5105fb396a1be2c43b7ebaf54cdf5c17135bd1 Mon Sep 17 00:00:00 2001 From: Irina Demeshko Date: Wed, 26 Oct 2022 14:03:11 -0700 Subject: [PATCH 23/89] Following on comments for PUT (#665) * cheking bounds only for the raise mode * renaming some variables for better readability * fix issue with assertion inside omp loop * Typo in comment Co-authored-by: Manolis Papadakis --- cunumeric/array.py | 2 +- cunumeric/deferred.py | 4 +- cunumeric/eager.py | 4 +- cunumeric/thunk.py | 2 +- src/cunumeric/index/wrap.cc | 27 ++++++----- src/cunumeric/index/wrap.cu | 68 +++++++++++++++------------ src/cunumeric/index/wrap.h | 12 +++++ src/cunumeric/index/wrap_omp.cc | 32 ++++++++----- src/cunumeric/index/wrap_template.inl | 44 +++++++++-------- src/cunumeric/index/zip.h | 7 +++ src/cunumeric/index/zip_omp.cc | 16 +++++-- 11 files changed, 134 insertions(+), 84 deletions(-) diff --git a/cunumeric/array.py b/cunumeric/array.py index cd14eda7c..0ac528531 100644 --- a/cunumeric/array.py +++ b/cunumeric/array.py @@ -2522,7 +2522,7 @@ def put( if values.ndim != indices.ndim or values.size != indices.size: values = values._wrap(indices.size) - self._thunk.put(indices._thunk, values._thunk) + self._thunk.put(indices._thunk, values._thunk, mode == "raise") @add_boilerplate() def trace( diff --git a/cunumeric/deferred.py b/cunumeric/deferred.py index 3bb5c4db7..8e6d8cacb 100644 --- a/cunumeric/deferred.py +++ b/cunumeric/deferred.py @@ -1686,7 +1686,7 @@ def _diag_helper( task.execute() @auto_convert("indices", "values") - def put(self, indices: Any, values: Any) -> None: + def put(self, indices: Any, values: Any, check_bounds: bool) -> None: if indices.base.kind == Future or indices.base.transformed: change_shape = indices.base.kind == Future @@ -1722,6 +1722,7 @@ def put(self, indices: Any, values: Any) -> None: task.add_output(indirect.base) task.add_scalar_arg(shape, (ty.int64,)) task.add_scalar_arg(True, bool) # has_input + task.add_scalar_arg(check_bounds, bool) task.add_input(indices.base) task.add_alignment(indices.base, indirect.base) task.throws_exception(IndexError) @@ -3437,6 +3438,7 @@ def _wrap(self, src: Any, new_len: int) -> None: task.add_output(indirect.base) task.add_scalar_arg(src.shape, (ty.int64,)) task.add_scalar_arg(False, bool) # has_input + task.add_scalar_arg(False, bool) # check bounds task.execute() copy = self.context.create_copy() diff --git a/cunumeric/eager.py b/cunumeric/eager.py index b8cb36ecd..cef2b7b49 100644 --- a/cunumeric/eager.py +++ b/cunumeric/eager.py @@ -620,10 +620,10 @@ def _diag_helper( axes = tuple(range(ndims - naxes, ndims)) self.array = diagonal_reference(rhs.array, axes) - def put(self, indices: Any, values: Any) -> None: + def put(self, indices: Any, values: Any, check_bounds: bool) -> None: self.check_eager_args(indices, values) if self.deferred is not None: - self.deferred.put(indices, values) + self.deferred.put(indices, values, check_bounds) else: np.put(self.array, indices.array, values.array) diff --git a/cunumeric/thunk.py b/cunumeric/thunk.py index e1f1dab77..7ade503d0 100644 --- a/cunumeric/thunk.py +++ b/cunumeric/thunk.py @@ -198,7 +198,7 @@ def _diag_helper( ... @abstractmethod - def put(self, indices: Any, values: Any) -> None: + def put(self, indices: Any, values: Any, check_bounds: bool) -> None: ... @abstractmethod diff --git a/src/cunumeric/index/wrap.cc b/src/cunumeric/index/wrap.cc index a5483cbdd..9d8fef331 100644 --- a/src/cunumeric/index/wrap.cc +++ b/src/cunumeric/index/wrap.cc @@ -27,28 +27,29 @@ struct WrapImplBody { template void operator()(const AccessorWO, 1>& out, const Pitches<0>& pitches_out, - const Rect<1>& out_rect, - const Pitches& pitches_in, - const Rect& in_rect, + const Rect<1>& rect_out, + const Pitches& pitches_base, + const Rect& rect_base, const bool dense, + const bool check_bounds, const IND& indices) const { - const int64_t start = out_rect.lo[0]; - const int64_t end = out_rect.hi[0]; - const auto in_volume = in_rect.volume(); + const int64_t start = rect_out.lo[0]; + const int64_t end = rect_out.hi[0]; + const auto volume_base = rect_base.volume(); if (dense) { - auto outptr = out.ptr(out_rect); + auto outptr = out.ptr(rect_out); for (int64_t i = start; i <= end; i++) { - check_idx(i, in_volume, indices); - const int64_t input_idx = compute_idx(i, in_volume, indices); - auto point = pitches_in.unflatten(input_idx, in_rect.lo); + if (check_bounds) check_idx(i, volume_base, indices); + const int64_t input_idx = compute_idx(i, volume_base, indices); + auto point = pitches_base.unflatten(input_idx, rect_base.lo); outptr[i - start] = point; } } else { for (int64_t i = start; i <= end; i++) { - check_idx(i, in_volume, indices); - const int64_t input_idx = compute_idx(i, in_volume, indices); - auto point = pitches_in.unflatten(input_idx, in_rect.lo); + if (check_bounds) check_idx(i, volume_base, indices); + const int64_t input_idx = compute_idx(i, volume_base, indices); + auto point = pitches_base.unflatten(input_idx, rect_base.lo); out[i] = point; } } // else diff --git a/src/cunumeric/index/wrap.cu b/src/cunumeric/index/wrap.cu index af81073d6..cc82418a0 100644 --- a/src/cunumeric/index/wrap.cu +++ b/src/cunumeric/index/wrap.cu @@ -29,7 +29,7 @@ __global__ static void __launch_bounds__(THREADS_PER_BLOCK, MIN_CTAS_PER_SM) const AccessorRO indices, const int64_t start, const int64_t volume, - const int64_t in_volume, + const int64_t volume_base, const int64_t iters) { bool value = false; @@ -37,8 +37,8 @@ __global__ static void __launch_bounds__(THREADS_PER_BLOCK, MIN_CTAS_PER_SM) const auto idx = (i * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x; if (idx >= volume) break; auto index_tmp = indices[idx + start]; - int64_t index = index_tmp < 0 ? index_tmp + in_volume : index_tmp; - bool val = (index < 0 || index >= in_volume); + int64_t index = index_tmp < 0 ? index_tmp + volume_base : index_tmp; + bool val = (index < 0 || index >= volume_base); SumReduction::fold(value, val); } reduce_output(out, value); @@ -51,16 +51,16 @@ __global__ static void __launch_bounds__(THREADS_PER_BLOCK, MIN_CTAS_PER_SM) const int64_t volume, const Pitches<0> pitches_out, const Point<1> out_lo, - const Pitches pitches_in, - const Point in_lo, - const size_t in_volume, + const Pitches pitches_base, + const Point base_lo, + const size_t volume_base, const IND indices) { const auto idx = global_tid_1d(); if (idx >= volume) return; - const int64_t input_idx = compute_idx((idx + start), in_volume, indices); + const int64_t input_idx = compute_idx((idx + start), volume_base, indices); auto out_p = pitches_out.unflatten(idx, out_lo); - auto p = pitches_in.unflatten(input_idx, in_lo); + auto p = pitches_base.unflatten(input_idx, base_lo); out[out_p] = p; } @@ -69,15 +69,15 @@ __global__ static void __launch_bounds__(THREADS_PER_BLOCK, MIN_CTAS_PER_SM) wrap_kernel_dense(Point* out, const int64_t start, const int64_t volume, - const Pitches pitches_in, - const Point in_lo, - const size_t in_volume, + const Pitches pitches_base, + const Point base_lo, + const size_t volume_base, const IND indices) { const auto idx = global_tid_1d(); if (idx >= volume) return; - const int64_t input_idx = compute_idx((idx + start), in_volume, indices); - auto p = pitches_in.unflatten(input_idx, in_lo); + const int64_t input_idx = compute_idx((idx + start), volume_base, indices); + auto p = pitches_base.unflatten(input_idx, base_lo); out[idx] = p; } @@ -85,7 +85,7 @@ __global__ static void __launch_bounds__(THREADS_PER_BLOCK, MIN_CTAS_PER_SM) void check_out_of_bounds(const bool& indices, const int64_t start, const int64_t volume, - const int64_t volume_in, + const int64_t volume_base, cudaStream_t stream) { } @@ -93,7 +93,7 @@ void check_out_of_bounds(const bool& indices, void check_out_of_bounds(const AccessorRO& indices, const int64_t start, const int64_t volume, - const int64_t volume_in, + const int64_t volume_base, cudaStream_t stream) { const size_t blocks = (volume + THREADS_PER_BLOCK - 1) / THREADS_PER_BLOCK; @@ -103,10 +103,10 @@ void check_out_of_bounds(const AccessorRO& indices, if (blocks >= MAX_REDUCTION_CTAS) { const size_t iters = (blocks + MAX_REDUCTION_CTAS - 1) / MAX_REDUCTION_CTAS; check_kernel<<>>( - out_of_bounds, indices, start, volume, volume_in, iters); + out_of_bounds, indices, start, volume, volume_base, iters); } else { check_kernel<<>>( - out_of_bounds, indices, start, volume, volume_in, 1); + out_of_bounds, indices, start, volume, volume_base, 1); } CHECK_CUDA_STREAM(stream); @@ -119,27 +119,35 @@ struct WrapImplBody { template void operator()(const AccessorWO, 1>& out, const Pitches<0>& pitches_out, - const Rect<1>& out_rect, - const Pitches& pitches_in, - const Rect& in_rect, + const Rect<1>& rect_out, + const Pitches& pitches_base, + const Rect& rect_base, const bool dense, + const bool check_bounds, const IND& indices) const { - auto stream = get_cached_stream(); - const int64_t start = out_rect.lo[0]; - const int64_t volume = out_rect.volume(); - const auto in_volume = in_rect.volume(); - const size_t blocks = (volume + THREADS_PER_BLOCK - 1) / THREADS_PER_BLOCK; + auto stream = get_cached_stream(); + const int64_t start = rect_out.lo[0]; + const int64_t volume = rect_out.volume(); + const auto volume_base = rect_base.volume(); + const size_t blocks = (volume + THREADS_PER_BLOCK - 1) / THREADS_PER_BLOCK; - check_out_of_bounds(indices, start, volume, in_volume, stream); + if (check_bounds) check_out_of_bounds(indices, start, volume, volume_base, stream); if (dense) { - auto outptr = out.ptr(out_rect); + auto outptr = out.ptr(rect_out); wrap_kernel_dense<<>>( - outptr, start, volume, pitches_in, in_rect.lo, in_volume, indices); + outptr, start, volume, pitches_base, rect_base.lo, volume_base, indices); } else { - wrap_kernel<<>>( - out, start, volume, pitches_out, out_rect.lo, pitches_in, in_rect.lo, in_volume, indices); + wrap_kernel<<>>(out, + start, + volume, + pitches_out, + rect_out.lo, + pitches_base, + rect_base.lo, + volume_base, + indices); } CHECK_CUDA_STREAM(stream); } diff --git a/src/cunumeric/index/wrap.h b/src/cunumeric/index/wrap.h index 181a9b97c..8c4167983 100644 --- a/src/cunumeric/index/wrap.h +++ b/src/cunumeric/index/wrap.h @@ -26,6 +26,7 @@ struct WrapArgs { // `wrapped` one const Legion::DomainPoint shape; // shape of the original array const bool has_input; + const bool check_bounds; const Array& in = Array(); }; @@ -70,4 +71,15 @@ static void check_idx(const int64_t i, const int64_t volume, const bool&) { // don't do anything when wrapping indices } + +static bool check_idx_omp(const int64_t i, + const int64_t volume, + const legate::AccessorRO& indices) +{ + int64_t idx = indices[i]; + int64_t index = idx < 0 ? idx + volume : idx; + return (index < 0 || index >= volume); +} +static bool check_idx_omp(const int64_t i, const int64_t volume, const bool&) { return false; } + } // namespace cunumeric diff --git a/src/cunumeric/index/wrap_omp.cc b/src/cunumeric/index/wrap_omp.cc index 531592df9..9387e2e3b 100644 --- a/src/cunumeric/index/wrap_omp.cc +++ b/src/cunumeric/index/wrap_omp.cc @@ -27,33 +27,39 @@ struct WrapImplBody { template void operator()(const AccessorWO, 1>& out, const Pitches<0>& pitches_out, - const Rect<1>& out_rect, - const Pitches& pitches_in, - const Rect& in_rect, + const Rect<1>& rect_out, + const Pitches& pitches_base, + const Rect& rect_base, const bool dense, + const bool check_bounds, const IND& indices) const { - const int64_t start = out_rect.lo[0]; - const int64_t end = out_rect.hi[0]; - const auto in_volume = in_rect.volume(); + const int64_t start = rect_out.lo[0]; + const int64_t end = rect_out.hi[0]; + const auto volume_base = rect_base.volume(); + std::atomic is_out_of_bounds = false; if (dense) { - auto outptr = out.ptr(out_rect); + auto outptr = out.ptr(rect_out); #pragma omp parallel for schedule(static) for (int64_t i = start; i <= end; i++) { - check_idx(i, in_volume, indices); - const int64_t input_idx = compute_idx(i, in_volume, indices); - auto point = pitches_in.unflatten(input_idx, in_rect.lo); + if (check_bounds) + if (check_idx_omp(i, volume_base, indices)) is_out_of_bounds = true; + const int64_t input_idx = compute_idx(i, volume_base, indices); + auto point = pitches_base.unflatten(input_idx, rect_base.lo); outptr[i - start] = point; } } else { #pragma omp parallel for schedule(static) for (int64_t i = start; i <= end; i++) { - check_idx(i, in_volume, indices); - const int64_t input_idx = compute_idx(i, in_volume, indices); - auto point = pitches_in.unflatten(input_idx, in_rect.lo); + if (check_bounds) + if (check_idx_omp(i, volume_base, indices)) is_out_of_bounds = true; + const int64_t input_idx = compute_idx(i, volume_base, indices); + auto point = pitches_base.unflatten(input_idx, rect_base.lo); out[i] = point; } } // else + + if (is_out_of_bounds) throw legate::TaskException("index is out of bounds in index array"); } }; diff --git a/src/cunumeric/index/wrap_template.inl b/src/cunumeric/index/wrap_template.inl index 093f5f5b1..9a9fc3b28 100644 --- a/src/cunumeric/index/wrap_template.inl +++ b/src/cunumeric/index/wrap_template.inl @@ -34,15 +34,15 @@ struct WrapImpl { void operator()(WrapArgs& args) const { using VAL = Point; - auto out_rect = args.out.shape<1>(); // output array is always 1D - auto out = args.out.write_accessor, 1>(out_rect); + auto rect_out = args.out.shape<1>(); // output array is always 1D + auto out = args.out.write_accessor, 1>(rect_out); Pitches<0> pitches_out; - size_t volume_out = pitches_out.flatten(out_rect); + size_t volume_out = pitches_out.flatten(rect_out); if (volume_out == 0) return; #ifndef LEGION_BOUNDS_CHECKS - bool dense = out.accessor.is_dense_row_major(out_rect); + bool dense = out.accessor.is_dense_row_major(rect_out); #else bool dense = false; #endif @@ -52,25 +52,27 @@ struct WrapImpl { point_lo[dim] = 0; point_hi[dim] = args.shape[dim] - 1; } - Rect input_rect(point_lo, point_hi); + Rect rect_base(point_lo, point_hi); - Pitches pitches_in; - size_t volume_in = pitches_in.flatten(input_rect); + Pitches pitches_base; + size_t volume_base = pitches_base.flatten(rect_base); #ifdef DEBUG_CUNUMERIC - assert(volume_in != 0); + assert(volume_base != 0); #endif if (args.has_input) { - auto in_rect = args.in.shape<1>(); - auto in = args.in.read_accessor(in_rect); // input should be always integer type + auto rect_in = args.in.shape<1>(); + auto in = args.in.read_accessor(rect_in); // input should be always integer type #ifdef DEBUG_CUNUMERIC - assert(in_rect == out_rect); + assert(rect_in == rect_out); #endif - WrapImplBody()(out, pitches_out, out_rect, pitches_in, input_rect, dense, in); + WrapImplBody()( + out, pitches_out, rect_out, pitches_base, rect_base, dense, args.check_bounds, in); } else { bool tmp = false; - WrapImplBody()(out, pitches_out, out_rect, pitches_in, input_rect, dense, tmp); + WrapImplBody()( + out, pitches_out, rect_out, pitches_base, rect_base, dense, args.check_bounds, tmp); } // else } }; @@ -78,12 +80,16 @@ struct WrapImpl { template static void wrap_template(TaskContext& context) { - auto shape = context.scalars()[0].value(); - int dim = shape.dim; - bool has_input = context.scalars()[1].value(); - Array tmp_array = Array(); - WrapArgs args{ - context.outputs()[0], shape, has_input, has_input ? context.inputs()[0] : tmp_array}; + auto shape = context.scalars()[0].value(); + int dim = shape.dim; + bool has_input = context.scalars()[1].value(); + bool check_bounds = context.scalars()[2].value(); + Array tmp_array = Array(); + WrapArgs args{context.outputs()[0], + shape, + has_input, + check_bounds, + has_input ? context.inputs()[0] : tmp_array}; dim_dispatch(dim, WrapImpl{}, args); } diff --git a/src/cunumeric/index/zip.h b/src/cunumeric/index/zip.h index ffa5941d5..e3c7af8a7 100644 --- a/src/cunumeric/index/zip.h +++ b/src/cunumeric/index/zip.h @@ -51,6 +51,13 @@ constexpr coord_t compute_idx(coord_t index, coord_t extent) return new_index; } +constexpr std::pair compute_idx_omp(coord_t index, coord_t extent) +{ + coord_t new_index = index < 0 ? index + extent : index; + bool out_of_bounds = (new_index < 0 || new_index >= extent); + return {new_index, out_of_bounds}; +} + constexpr coord_t compute_idx_cuda(coord_t index, coord_t extent) { coord_t new_index = index < 0 ? index + extent : index; diff --git a/src/cunumeric/index/zip_omp.cc b/src/cunumeric/index/zip_omp.cc index 14a3c4b25..aa014547e 100644 --- a/src/cunumeric/index/zip_omp.cc +++ b/src/cunumeric/index/zip_omp.cc @@ -37,7 +37,8 @@ struct ZipImplBody { const DomainPoint& shape, std::index_sequence) const { - const size_t volume = rect.volume(); + const size_t volume = rect.volume(); + std::atomic is_out_of_bounds = false; if (index_arrays.size() == N) { if (dense) { std::vector indx_ptrs = {index_arrays[Is].ptr(rect)...}; @@ -46,7 +47,9 @@ struct ZipImplBody { for (size_t idx = 0; idx < volume; ++idx) { Legion::Point new_point; for (size_t i = 0; i < N; i++) { - new_point[i] = compute_idx(indx_ptrs[i][idx], shape[i]); + auto pair = compute_idx_omp(indx_ptrs[i][idx], shape[i]); + new_point[i] = pair.first; + if (pair.second) is_out_of_bounds = true; } outptr[idx] = new_point; } @@ -56,7 +59,9 @@ struct ZipImplBody { auto p = pitches.unflatten(idx, rect.lo); Legion::Point new_point; for (size_t i = 0; i < N; i++) { - new_point[i] = compute_idx(index_arrays[i][p], shape[i]); + auto pair = compute_idx_omp(index_arrays[i][p], shape[i]); + new_point[i] = pair.first; + if (pair.second) is_out_of_bounds = true; } out[p] = new_point; } @@ -71,7 +76,9 @@ struct ZipImplBody { Legion::Point new_point; for (size_t i = 0; i < start_index; i++) { new_point[i] = p[i]; } for (size_t i = 0; i < index_arrays.size(); i++) { - new_point[start_index + i] = compute_idx(index_arrays[i][p], shape[start_index + i]); + auto pair = compute_idx_omp(index_arrays[i][p], shape[start_index + i]); + new_point[start_index + i] = pair.first; + if (pair.second) is_out_of_bounds = true; } for (size_t i = (start_index + index_arrays.size()); i < N; i++) { int64_t j = key_dim + i - index_arrays.size(); @@ -80,6 +87,7 @@ struct ZipImplBody { out[p] = new_point; } } + if (is_out_of_bounds) throw legate::TaskException("index is out of bounds in index array"); } }; From bbd0887ffe0f7b8d559b45af872f94013183853d Mon Sep 17 00:00:00 2001 From: Manolis Papadakis Date: Thu, 27 Oct 2022 10:10:58 -0700 Subject: [PATCH 24/89] Fix BUILD.md link --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 93fee01ef..62eecb153 100644 --- a/README.md +++ b/README.md @@ -59,7 +59,7 @@ Docker image build scripts, as well as specialized install scripts for supported clusters are available on the [quickstart](https://github.com/nv-legate/quickstart) repo. -See [BUILD.md]() for instructions on building cuNumeric from source. +See [BUILD.md](BUILD.md) for instructions on building cuNumeric from source. ## Usage and Execution From 63e6206ed627396c2376656db9b2864787cd666a Mon Sep 17 00:00:00 2001 From: robinw0928 <104830875+robinw0928@users.noreply.github.com> Date: Tue, 1 Nov 2022 09:09:06 +0800 Subject: [PATCH 25/89] Enhance test_put_along_axis, test_take_along_axis. (#671) * Enhance test_put_along_axis, test_take_along_axis. * Address comments --- tests/integration/test_put_along_axis.py | 203 +++++++++++++++++++++- tests/integration/test_take_along_axis.py | 106 ++++++++++- tests/integration/utils/generators.py | 14 ++ 3 files changed, 320 insertions(+), 3 deletions(-) diff --git a/tests/integration/test_put_along_axis.py b/tests/integration/test_put_along_axis.py index 4df2108c1..a289ffc2a 100644 --- a/tests/integration/test_put_along_axis.py +++ b/tests/integration/test_put_along_axis.py @@ -16,13 +16,26 @@ import numpy as np import pytest from legate.core import LEGATE_MAX_DIM -from utils.generators import mk_seq_array +from utils.generators import ( + broadcasts_to, + broadcasts_to_along_axis, + mk_seq_array, +) import cunumeric as num -def test_None(): +def equivalent_shapes_gen(shape): + yield shape + for i in range(len(shape) - 1): + if shape[i] == 1: + i += 1 + yield shape[i:] + else: + break + +def test_axis_None(): x = mk_seq_array(np, (256,)) x_num = mk_seq_array(num, (256,)) @@ -54,6 +67,192 @@ def test_ndim(ndim): assert np.array_equal(np_a, num_a) +@pytest.mark.parametrize( + "axis", range(-1, 3), ids=lambda axis: f"(axis={axis})" +) +def test_full(axis): + shape = (3, 4, 5) + np_arr = mk_seq_array(np, shape) + num_arr = mk_seq_array(num, shape) + + size = shape[axis] + axis_values = (0, size - 1, size * 2) + + for shape_idx in broadcasts_to_along_axis(shape, axis, axis_values): + np_indices = mk_seq_array(np, shape_idx) % shape[axis] + num_indices = mk_seq_array(num, shape_idx) % shape[axis] + np_a = np_arr.copy() + num_a = num_arr.copy() + np.put_along_axis(np_a, np_indices, 100, axis=axis) + num.put_along_axis(num_a, num_indices, 100, axis=axis) + assert np.array_equal(np_a, num_a) + + +def test_values(): + shape = (3, 4, 5) + np_arr = mk_seq_array(np, shape) + num_arr = mk_seq_array(num, shape) + shape_idx = (3, 4, 5) + axis = 0 + np_indices = mk_seq_array(np, shape_idx) % shape[axis] + num_indices = mk_seq_array(num, shape_idx) % shape[axis] + + for shape_values in broadcasts_to(shape_idx): + for s in equivalent_shapes_gen(shape_values): + np_values = mk_seq_array(np, s) + num_values = mk_seq_array(num, s) + np_a = np_arr.copy() + num_a = num_arr.copy() + np.put_along_axis(np_a, np_indices, np_values, axis=axis) + num.put_along_axis(num_a, num_indices, num_values, axis=axis) + assert np.array_equal(np_a, num_a) + + +def test_empty_indice(): + x = mk_seq_array(np, (10,)) + x_num = mk_seq_array(num, (10,)) + + indices = np.array([], dtype=int) + indices_num = num.array([], dtype=int) + + np.put_along_axis(x, indices, 99, axis=0) + num.put_along_axis(x_num, indices_num, 99, axis=0) + assert np.array_equal(x_num, x) + + +class TestPutAlongAxisErrors: + def setup(self): + self.a = num.ones((3, 3)) + self.ai = num.ones((3, 3), dtype=int) + + @pytest.mark.parametrize("dtype", (bool, float), ids=str) + def test_indices_bad_type(self, dtype): + ai = num.ones((3, 3), dtype=dtype) + msg = "`indices` must be an integer array" + with pytest.raises(TypeError, match=msg): + num.put_along_axis(self.a, ai, 100, axis=0) + + @pytest.mark.xfail + @pytest.mark.parametrize( + "shape", ((3, 2), (3, 0)), ids=lambda shape: f"(shape={shape})" + ) + def test_indices_bad_shape(self, shape): + # In Numpy, it raises IndexError. + # In cuNumeric, it raises ValueError. + ai = num.ones(shape, dtype=int) + msg = "shape mismatch: indexing arrays could not be broadcast" + with pytest.raises(IndexError, match=msg): + num.put_along_axis(self.a, ai, 100, axis=0) + + @pytest.mark.parametrize( + "shape", ((1,), (3, 3, 1)), ids=lambda shape: f"(shape={shape})" + ) + def test_indices_bad_dims(self, shape): + ai = num.ones(shape, dtype=int) + msg = "`indices` and `a` must have the same number of dimensions" + with pytest.raises(ValueError, match=msg): + num.put_along_axis(self.a, ai, 100, axis=0) + + @pytest.mark.parametrize( + "value", (-4, 3), ids=lambda value: f"(value={value})" + ) + def test_indices_out_of_bound(self, value): + ai = num.full((3, 3), value, dtype=int) + msg = "out of bounds" + with pytest.raises(IndexError, match=msg): + num.put_along_axis(self.a, ai, 100, axis=0) + + @pytest.mark.parametrize( + "axis", (2, -3), ids=lambda axis: f"(axis={axis})" + ) + def test_axis_out_of_bound(self, axis): + msg = "out of bounds" + # In Numpy, it raises AxisError + with pytest.raises(ValueError, match=msg): + num.put_along_axis(self.a, self.ai, 100, axis=axis) + + def test_axis_float(self): + axis = 0.0 + msg = "integer argument expected" + with pytest.raises(TypeError, match=msg): + num.put_along_axis(self.a, self.ai, 100, axis=axis) + + def test_axis_none_indice_not_1d(self): + axis = None + msg = "indices must be 1D if axis=None" + with pytest.raises(ValueError, match=msg): + num.put_along_axis(self.a, self.ai, 100, axis=axis) + + def test_axis_none_andim_greater_than_one(self): + ai = num.ones((3 * 3), dtype=int) + axis = None + msg = "a.ndim>1 case is not supported when axis=None" + with pytest.raises(ValueError, match=msg): + num.put_along_axis(self.a, ai, 100, axis=axis) + + @pytest.mark.parametrize( + "shape", + ((1, 2), (4, 1), (0,), (2,), (4,), (1, 0)), + ids=lambda shape: f"(shape={shape})", + ) + def test_values_bad_shape(self, shape): + values = num.ones(shape) + with pytest.raises(ValueError): + num.put_along_axis(self.a, self.ai, values, axis=0) + + def test_values_bad_shape2(self): + shape = (3, 3, 1) + values = num.ones(shape) + with pytest.raises(ValueError): + num.put_along_axis(self.a, self.ai, values, axis=0) + + @pytest.mark.parametrize( + "shape", ((0,), (5,), (4, 5)), ids=lambda shape: f"(shape={shape})" + ) + def test_values_axis_none(self, shape): + a = mk_seq_array(num, (10,)) + ai = mk_seq_array(num, (7,)) + values = mk_seq_array(num, shape) + with pytest.raises(ValueError): + num.put_along_axis(a, ai, values, None) + + @pytest.mark.xfail + @pytest.mark.parametrize( + "shape", ((0,), (5,), (4, 5)), ids=lambda shape: f"(shape={shape})" + ) + def test_values_axis_none_DIVERGENC(self, shape): + # In Numpy, all 3 cases pass + # In cuNumeric, all 3 cases raise ValueError "Shape did not match" + np_arr = mk_seq_array(np, (10,)) + num_arr = mk_seq_array(num, (10,)) + + indices = mk_seq_array(np, (7,)) + indices_num = mk_seq_array(num, (7,)) + + values = mk_seq_array(np, shape) + values_num = mk_seq_array(num, shape) + + np.put_along_axis(np_arr, indices, values, None) + num.put_along_axis(num_arr, indices_num, values_num, None) + assert np.array_equal(np_arr, num_arr) + + def test_a_none(self): + ai = num.array([1, 1, 1]) + msg = "object has no attribute 'ndim'" + with pytest.raises(AttributeError, match=msg): + num.put_along_axis(None, ai, 100, axis=0) + + def test_indice_none(self): + msg = "'NoneType' object has no attribute 'dtype'" + with pytest.raises(AttributeError, match=msg): + num.put_along_axis(self.a, None, 100, axis=0) + + def test_values_none(self): + msg = "'NoneType' object has no attribute 'dtype'" + with pytest.raises(AttributeError, match=msg): + num.put_along_axis(self.a, self.ai, None, axis=0) + + if __name__ == "__main__": import sys diff --git a/tests/integration/test_take_along_axis.py b/tests/integration/test_take_along_axis.py index 4d98680a4..b19638ae9 100644 --- a/tests/integration/test_take_along_axis.py +++ b/tests/integration/test_take_along_axis.py @@ -16,7 +16,7 @@ import numpy as np import pytest from legate.core import LEGATE_MAX_DIM -from utils.generators import mk_seq_array +from utils.generators import broadcasts_to_along_axis, mk_seq_array import cunumeric as num @@ -42,6 +42,110 @@ def test_ndim(ndim): assert np.array_equal(res_num, res_np) +@pytest.mark.parametrize( + "axis", range(-1, 3), ids=lambda axis: f"(axis={axis})" +) +def test_full(axis): + shape = (3, 4, 5) + np_arr = mk_seq_array(np, shape) + num_arr = mk_seq_array(num, shape) + + size = shape[axis] + axis_values = (0, size - 1, size * 2) + + for shape_idx in broadcasts_to_along_axis(shape, axis, axis_values): + np_indices = mk_seq_array(np, shape_idx) % shape[axis] + num_indices = mk_seq_array(num, shape_idx) % shape[axis] + res_np = np.take_along_axis(np_arr, np_indices, axis=axis) + res_num = num.take_along_axis(num_arr, num_indices, axis=axis) + assert np.array_equal(res_num, res_np) + + +def test_empty_indice(): + np_arr = mk_seq_array(np, (10,)) + num_arr = mk_seq_array(num, (10,)) + np_indices = np.array([], dtype=int) + num_indices = num.array([], dtype=int) + res_np = np.take_along_axis(np_arr, np_indices, axis=0) + res_num = num.take_along_axis(num_arr, num_indices, axis=0) + assert np.array_equal(res_num, res_np) + + +class TestTakeAlongAxisErrors: + def setup(self): + self.a = num.ones((3, 3)) + self.ai = num.ones((3, 3), dtype=int) + + @pytest.mark.parametrize("dtype", (bool, float), ids=str) + def test_indices_bad_type(self, dtype): + ai = num.ones((3, 3), dtype=dtype) + msg = "`indices` must be an integer array" + with pytest.raises(TypeError, match=msg): + num.take_along_axis(self.a, ai, axis=0) + + @pytest.mark.xfail + @pytest.mark.parametrize( + "shape", ((3, 2), (3, 0)), ids=lambda shape: f"(shape={shape})" + ) + def test_indices_bad_shape(self, shape): + # In Numpy, it raises IndexError. + # In cuNumeric, it raises ValueError. + ai = num.ones(shape, dtype=int) + msg = "shape mismatch: indexing arrays could not be broadcast" + with pytest.raises(IndexError, match=msg): + num.take_along_axis(self.a, ai, axis=0) + + @pytest.mark.parametrize( + "shape", ((1,), (3, 3, 1)), ids=lambda shape: f"(shape={shape})" + ) + def test_indices_bad_dims(self, shape): + ai = num.ones(shape, dtype=int) + msg = "`indices` and `a` must have the same number of dimensions" + with pytest.raises(ValueError, match=msg): + num.take_along_axis(self.a, ai, axis=0) + + @pytest.mark.parametrize( + "value", (-4, 3), ids=lambda value: f"(value={value})" + ) + def test_indices_out_of_bound(self, value): + ai = num.full((3, 3), value, dtype=int) + msg = "out of bounds" + with pytest.raises(IndexError, match=msg): + num.take_along_axis(self.a, ai, axis=0) + + @pytest.mark.parametrize( + "axis", (2, -3), ids=lambda axis: f"(axis={axis})" + ) + def test_axis_out_of_bound(self, axis): + msg = "out of bounds" + # In Numpy, it raises AxisError + with pytest.raises(ValueError, match=msg): + num.take_along_axis(self.a, self.ai, axis=axis) + + def test_axis_float(self): + axis = 0.0 + msg = "integer argument expected" + with pytest.raises(TypeError, match=msg): + num.take_along_axis(self.a, self.ai, axis=axis) + + def test_axis_none_indice_not_1d(self): + axis = None + msg = "indices must be 1D if axis=None" + with pytest.raises(ValueError, match=msg): + num.take_along_axis(self.a, self.ai, axis=axis) + + def test_a_none(self): + ai = num.array([1, 1, 1]) + msg = "object has no attribute 'ndim'" + with pytest.raises(AttributeError, match=msg): + num.take_along_axis(None, ai, axis=0) + + def test_indice_none(self): + msg = "'NoneType' object has no attribute 'dtype'" + with pytest.raises(AttributeError, match=msg): + num.take_along_axis(self.a, None, axis=0) + + if __name__ == "__main__": import sys diff --git a/tests/integration/utils/generators.py b/tests/integration/utils/generators.py index cc5c521b3..624f34b9d 100644 --- a/tests/integration/utils/generators.py +++ b/tests/integration/utils/generators.py @@ -80,3 +80,17 @@ def permutes_to(tgt_shape): for (i, j) in enumerate(axes): src_shape[j] = tgt_shape[i] yield (axes, tuple(src_shape)) + + +def broadcasts_to_along_axis(tgt_shape, axis, values): + """ + Generates all shapes that broadcast to `tgt_shape` along axis for + each value. + """ + axis = axis % (len(tgt_shape)) + tgt_shape_axis_removed = tgt_shape[:axis] + tgt_shape[axis + 1 :] + + for s in broadcasts_to(tgt_shape_axis_removed): + for v in values: + shape = s[:axis] + (v,) + s[axis:] + yield shape From 8f0233c3bbcdc54a6ab7d1c839fd73f70ea11522 Mon Sep 17 00:00:00 2001 From: Wonchan Lee Date: Tue, 1 Nov 2022 14:50:38 -0700 Subject: [PATCH 26/89] Eliminate empty kernel launch in `cunumeric.unique` (#675) * Handle empty sub-stores correctly unique for GPUs * Add a test case with empty subregions --- src/cunumeric/set/unique.cu | 33 ++++++++++++++++++-------------- tests/integration/test_unique.py | 10 ++++++++++ 2 files changed, 29 insertions(+), 14 deletions(-) diff --git a/src/cunumeric/set/unique.cu b/src/cunumeric/set/unique.cu index 38528b633..11e9e6fc1 100644 --- a/src/cunumeric/set/unique.cu +++ b/src/cunumeric/set/unique.cu @@ -153,27 +153,32 @@ struct UniqueImplBody { // Make a copy of the input as we're going to sort it auto temp = create_buffer(volume); VAL* ptr = temp.ptr(0); - if (in.accessor.is_dense_arbitrary(rect)) { - auto* src = in.ptr(rect.lo); - CHECK_CUDA(cudaMemcpyAsync(ptr, src, sizeof(VAL) * volume, cudaMemcpyDeviceToDevice, stream)); - } else { - const size_t num_blocks = (volume + THREADS_PER_BLOCK - 1) / THREADS_PER_BLOCK; - copy_into_buffer<<>>( - ptr, in, rect.lo, pitches, volume); - } - CHECK_CUDA_STREAM(stream); + VAL* end = ptr; + if (volume > 0) { + if (in.accessor.is_dense_arbitrary(rect)) { + auto* src = in.ptr(rect.lo); + CHECK_CUDA( + cudaMemcpyAsync(ptr, src, sizeof(VAL) * volume, cudaMemcpyDeviceToDevice, stream)); + } else { + const size_t num_blocks = (volume + THREADS_PER_BLOCK - 1) / THREADS_PER_BLOCK; + copy_into_buffer<<>>( + ptr, in, rect.lo, pitches, volume); + } + CHECK_CUDA_STREAM(stream); - // Find unique values - thrust::sort(thrust::cuda::par.on(stream), ptr, ptr + volume); - auto* end = thrust::unique(thrust::cuda::par.on(stream), ptr, ptr + volume); + // Find unique values + thrust::sort(thrust::cuda::par.on(stream), ptr, ptr + volume); + auto* end = thrust::unique(thrust::cuda::par.on(stream), ptr, ptr + volume); + } Piece result; result.second = end - ptr; auto buf_size = (get_aligned_size(result.second * sizeof(VAL)) + sizeof(VAL) - 1) / sizeof(VAL); assert(end - ptr <= buf_size); result.first = create_buffer(buf_size); - CHECK_CUDA(cudaMemcpyAsync( - result.first.ptr(0), ptr, sizeof(VAL) * result.second, cudaMemcpyDeviceToDevice, stream)); + if (result.second > 0) + CHECK_CUDA(cudaMemcpyAsync( + result.first.ptr(0), ptr, sizeof(VAL) * result.second, cudaMemcpyDeviceToDevice, stream)); if (comms.size() > 0) { // The launch domain is 1D because of the output region diff --git a/tests/integration/test_unique.py b/tests/integration/test_unique.py index 54eb17092..4f3d84274 100644 --- a/tests/integration/test_unique.py +++ b/tests/integration/test_unique.py @@ -20,6 +20,16 @@ import cunumeric as num +def test_with_nonzero(): + (a,) = num.nonzero(num.array([1, 1, 0, 0])) + a_np = a.__array__() + + b = num.unique(a) + b_np = num.unique(a_np) + + assert np.array_equal(b, b_np) + + @pytest.mark.parametrize("ndim", range(LEGATE_MAX_DIM + 1)) def test_ndim(ndim): shape = (4,) * ndim From 329339b7f6bd4717e34a5640772bcd236b5d5b3b Mon Sep 17 00:00:00 2001 From: Wonchan Lee Date: Tue, 1 Nov 2022 15:44:20 -0700 Subject: [PATCH 27/89] Some quality-of-life changes (#674) * Catch up changes in the type traits * Allow ThreadLocalStorage to be used for values bigger than 64B --- src/cunumeric/binary/binary_op_util.h | 18 ++++---- src/cunumeric/omp_help.h | 7 +++- src/cunumeric/scan/scan_local_template.inl | 15 +++---- src/cunumeric/unary/convert_template.inl | 4 +- src/cunumeric/unary/convert_util.h | 48 +++++++++++----------- src/cunumeric/unary/unary_op_util.h | 46 ++++++++++----------- src/cunumeric/unary/unary_red_util.h | 8 ++-- 7 files changed, 73 insertions(+), 73 deletions(-) diff --git a/src/cunumeric/binary/binary_op_util.h b/src/cunumeric/binary/binary_op_util.h index 6d1375e13..a4c1538ec 100644 --- a/src/cunumeric/binary/binary_op_util.h +++ b/src/cunumeric/binary/binary_op_util.h @@ -311,7 +311,7 @@ template struct BinaryOp { using T = legate::legate_type_of; static constexpr bool valid = - not(CODE == legate::LegateTypeCode::BOOL_LT or legate::is_complex::value); + not(CODE == legate::LegateTypeCode::BOOL_LT or legate::is_complex::value); BinaryOp(const std::vector& args) {} template ::value>* = nullptr> @@ -459,7 +459,7 @@ struct BinaryOp { atol_ = args[1].scalar(); } - template ::value>* = nullptr> + template ::value>* = nullptr> constexpr bool operator()(const T& a, const T& b) const { using std::fabs; @@ -469,7 +469,7 @@ struct BinaryOp { atol_ + rtol_ * static_cast(fabs(b)); } - template ::value>* = nullptr> + template ::value>* = nullptr> constexpr bool operator()(const T& a, const T& b) const { return static_cast(abs(a - b)) <= atol_ + rtol_ * static_cast(abs(b)); @@ -606,13 +606,13 @@ struct BinaryOp { static constexpr bool valid = true; BinaryOp(const std::vector& args) {} - template ::value>* = nullptr> + template ::value>* = nullptr> constexpr bool operator()(const _T& a, const _T& b) const { return static_cast(a.real()) && static_cast(b.real()); } - template ::value>* = nullptr> + template ::value>* = nullptr> constexpr bool operator()(const _T& a, const _T& b) const { return static_cast(a) && static_cast(b); @@ -626,13 +626,13 @@ struct BinaryOp { BinaryOp(const std::vector& args) {} - template ::value>* = nullptr> + template ::value>* = nullptr> constexpr bool operator()(const _T& a, const _T& b) const { return static_cast(a.real()) || static_cast(b.real()); } - template ::value>* = nullptr> + template ::value>* = nullptr> constexpr bool operator()(const _T& a, const _T& b) const { return static_cast(a) || static_cast(b); @@ -645,13 +645,13 @@ struct BinaryOp { static constexpr bool valid = true; BinaryOp(const std::vector& args) {} - template ::value>* = nullptr> + template ::value>* = nullptr> constexpr bool operator()(const _T& a, const _T& b) const { return static_cast(a.real()) != static_cast(b.real()); } - template ::value>* = nullptr> + template ::value>* = nullptr> constexpr bool operator()(const _T& a, const _T& b) const { return static_cast(a) != static_cast(b); diff --git a/src/cunumeric/omp_help.h b/src/cunumeric/omp_help.h index 0093d265c..e4b4cb49f 100644 --- a/src/cunumeric/omp_help.h +++ b/src/cunumeric/omp_help.h @@ -25,10 +25,13 @@ template struct ThreadLocalStorage { private: static constexpr size_t CACHE_LINE_SIZE = 64; + // Round the element size to the nearest multiple of cache line size + static constexpr size_t PER_THREAD_SIZE = + (sizeof(VAL) + CACHE_LINE_SIZE - 1) / CACHE_LINE_SIZE * CACHE_LINE_SIZE; public: ThreadLocalStorage(size_t num_threads) - : storage_(CACHE_LINE_SIZE * num_threads), num_threads_(num_threads) + : storage_(PER_THREAD_SIZE * num_threads), num_threads_(num_threads) { } ~ThreadLocalStorage() {} @@ -36,7 +39,7 @@ struct ThreadLocalStorage { public: VAL& operator[](size_t idx) { - return *reinterpret_cast(storage_.data() + CACHE_LINE_SIZE * idx); + return *reinterpret_cast(storage_.data() + PER_THREAD_SIZE * idx); } private: diff --git a/src/cunumeric/scan/scan_local_template.inl b/src/cunumeric/scan/scan_local_template.inl index 0680ee34a..56c038fcc 100644 --- a/src/cunumeric/scan/scan_local_template.inl +++ b/src/cunumeric/scan/scan_local_template.inl @@ -33,9 +33,8 @@ struct ScanLocalImpl { // Case where NANs are transformed template ::value || - legate::is_complex>::value)>* = nullptr> + std::enable_if_t::value || + legate::is_complex::value)>* = nullptr> void operator()(ScanLocalArgs& args) const { using OP = ScanOp; @@ -58,12 +57,10 @@ struct ScanLocalImpl { ScanLocalNanImplBody()(func, out, in, args.sum_vals, pitches, rect); } // Case where NANs are as is - template < - LegateTypeCode CODE, - int DIM, - std::enable_if_t::value || - legate::is_complex>::value))>* = nullptr> + template ::value || + legate::is_complex::value))>* = nullptr> void operator()(ScanLocalArgs& args) const { using OP = ScanOp; diff --git a/src/cunumeric/unary/convert_template.inl b/src/cunumeric/unary/convert_template.inl index 41265892a..fe35005d2 100644 --- a/src/cunumeric/unary/convert_template.inl +++ b/src/cunumeric/unary/convert_template.inl @@ -75,7 +75,7 @@ template struct ConvertDispatch { template ::value || - legate::is_complex>::value) || + legate::is_complex::value) || NAN_OP == ConvertCode::NOOP>* = nullptr> void operator()(ConvertArgs& args) const { @@ -85,7 +85,7 @@ struct ConvertDispatch { template ::value || - legate::is_complex>::value) || + legate::is_complex::value) || (NAN_OP == ConvertCode::NOOP))>* = nullptr> void operator()(ConvertArgs& args) const { diff --git a/src/cunumeric/unary/convert_util.h b/src/cunumeric/unary/convert_util.h index 3d4a10d48..03e3692c8 100644 --- a/src/cunumeric/unary/convert_util.h +++ b/src/cunumeric/unary/convert_util.h @@ -52,17 +52,17 @@ struct ConvertOp { using SRC = legate::legate_type_of; using DST = legate::legate_type_of; - template < - typename _SRC = SRC, - std::enable_if_t::value or legate::is_complex::value>* = nullptr> + template ::value or + legate::is_complex_type::value>* = nullptr> constexpr DST operator()(const _SRC& src) const { return static_cast(src); } - template ::value and !legate::is_complex::value>* = - nullptr> + template ::value and + !legate::is_complex_type::value>* = nullptr> constexpr DST operator()(const _SRC& src) const { if constexpr (DST_TYPE == legate::LegateTypeCode::BOOL_LT) @@ -79,13 +79,13 @@ template struct ConvertOp { using SRC = legate::legate_type_of; - template ::value>* = nullptr> + template ::value>* = nullptr> __CUDA_HD__ __half operator()(const _SRC& src) const { return static_cast<__half>(static_cast(src)); } - template ::value>* = nullptr> + template ::value>* = nullptr> __CUDA_HD__ __half operator()(const _SRC& src) const { return static_cast<__half>(static_cast(src.real())); @@ -107,17 +107,17 @@ struct ConvertOp { using SRC = legate::legate_type_of; using DST = legate::legate_type_of; - template < - typename _SRC = SRC, - std::enable_if_t::value or legate::is_complex::value>* = nullptr> + template ::value or + legate::is_complex_type::value>* = nullptr> constexpr DST operator()(const _SRC& src) const { return cunumeric::is_nan(src) ? static_cast(1) : static_cast(src); } - template ::value and !legate::is_complex::value>* = - nullptr> + template ::value and + !legate::is_complex_type::value>* = nullptr> constexpr DST operator()(const _SRC& src) const { return cunumeric::is_nan(src) ? static_cast(1) : static_cast(src.real()); @@ -128,14 +128,14 @@ template struct ConvertOp { using SRC = legate::legate_type_of; - template ::value>* = nullptr> + template ::value>* = nullptr> __CUDA_HD__ __half operator()(const _SRC& src) const { return cunumeric::is_nan(src) ? static_cast<__half>(1) : static_cast<__half>(static_cast(src)); } - template ::value>* = nullptr> + template ::value>* = nullptr> __CUDA_HD__ __half operator()(const _SRC& src) const { return cunumeric::is_nan(src) ? static_cast<__half>(1) @@ -159,17 +159,17 @@ struct ConvertOp { using SRC = legate::legate_type_of; using DST = legate::legate_type_of; - template < - typename _SRC = SRC, - std::enable_if_t::value or legate::is_complex::value>* = nullptr> + template ::value or + legate::is_complex_type::value>* = nullptr> constexpr DST operator()(const _SRC& src) const { return cunumeric::is_nan(src) ? static_cast(0) : static_cast(src); } - template ::value and !legate::is_complex::value>* = - nullptr> + template ::value and + !legate::is_complex_type::value>* = nullptr> constexpr DST operator()(const _SRC& src) const { return cunumeric::is_nan(src) ? static_cast(0) : static_cast(src.real()); @@ -180,14 +180,14 @@ template struct ConvertOp { using SRC = legate::legate_type_of; - template ::value>* = nullptr> + template ::value>* = nullptr> __CUDA_HD__ __half operator()(const _SRC& src) const { return cunumeric::is_nan(src) ? static_cast<__half>(0) : static_cast<__half>(static_cast(src)); } - template ::value>* = nullptr> + template ::value>* = nullptr> __CUDA_HD__ __half operator()(const _SRC& src) const { return cunumeric::is_nan(src) ? static_cast<__half>(0) diff --git a/src/cunumeric/unary/unary_op_util.h b/src/cunumeric/unary/unary_op_util.h index 61d11da17..f5012df0d 100644 --- a/src/cunumeric/unary/unary_op_util.h +++ b/src/cunumeric/unary/unary_op_util.h @@ -189,7 +189,7 @@ static constexpr bool is_floating_point = template static constexpr bool is_floating_or_complex = - is_floating_point || legate::is_complex>::value; + is_floating_point || legate::is_complex::value; template struct UnaryOp { @@ -203,7 +203,7 @@ struct UnaryOp { UnaryOp(const std::vector& args) {} - template ::value>* = nullptr> + template ::value>* = nullptr> constexpr decltype(auto) operator()(const _T& x) const { return abs(x); @@ -225,9 +225,9 @@ struct UnaryOp { return x; } - template < - typename _T = T, - std::enable_if_t::value and !std::is_integral<_T>::value>* = nullptr> + template ::value and + !std::is_integral<_T>::value>* = nullptr> constexpr _T operator()(const _T& x) const { using std::fabs; @@ -428,13 +428,13 @@ struct UnaryOp { UnaryOp(const std::vector& args) {} - template ::value>* = nullptr> + template ::value>* = nullptr> constexpr T operator()(const T& x) const { return T{x.real(), -x.imag()}; } - template ::value>* = nullptr> + template ::value>* = nullptr> constexpr T operator()(const T& x) const { return x; @@ -537,13 +537,13 @@ struct UnaryOp { UnaryOp(const std::vector& args) {} - template ::value>* = nullptr> + template ::value>* = nullptr> constexpr T operator()(const T& x) const { return std::exp2(x); } - template ::value>* = nullptr> + template ::value>* = nullptr> constexpr T operator()(const T& x) const { using std::exp; @@ -578,14 +578,14 @@ struct UnaryOp { UnaryOp(const std::vector& args) {} - template ::value>* = nullptr> + template ::value>* = nullptr> constexpr decltype(auto) operator()(const T& x) const { using std::expm1; return expm1(x); } - template ::value>* = nullptr> + template ::value>* = nullptr> constexpr decltype(auto) operator()(const T& x) const { using std::exp; @@ -634,7 +634,7 @@ struct UnaryOp { template struct UnaryOp { using T = legate::legate_type_of; - static constexpr bool valid = legate::is_complex::value; + static constexpr bool valid = legate::is_complex_type::value; UnaryOp(const std::vector& args) {} @@ -789,14 +789,14 @@ struct UnaryOp { UnaryOp(const std::vector& args) {} - template ::value>* = nullptr> + template ::value>* = nullptr> constexpr decltype(auto) operator()(const T& x) const { using std::log1p; return log1p(x); } - template ::value>* = nullptr> + template ::value>* = nullptr> constexpr decltype(auto) operator()(const T& x) const { using std::log; @@ -826,14 +826,14 @@ struct UnaryOp { UnaryOp(const std::vector& args) {} - template ::value>* = nullptr> + template ::value>* = nullptr> constexpr decltype(auto) operator()(const T& x) const { using std::log2; return log2(x); } - template ::value>* = nullptr> + template ::value>* = nullptr> constexpr decltype(auto) operator()(const T& x) const { using std::log; @@ -862,13 +862,13 @@ struct UnaryOp { UnaryOp(const std::vector& args) {} - template ::value>* = nullptr> + template ::value>* = nullptr> constexpr bool operator()(const T& x) const { return !static_cast(x); } - template ::value>* = nullptr> + template ::value>* = nullptr> constexpr bool operator()(const T& x) const { return !static_cast(x.real()); @@ -911,7 +911,7 @@ struct UnaryOp { template struct UnaryOp { using T = legate::legate_type_of; - static constexpr bool valid = legate::is_complex::value; + static constexpr bool valid = legate::is_complex_type::value; UnaryOp(const std::vector& args) {} @@ -952,13 +952,13 @@ struct UnaryOp { UnaryOp(const std::vector& args) {} - template ::value>* = nullptr> + template ::value>* = nullptr> constexpr decltype(auto) operator()(const _T& x) const { return _T(std::rint(x.real()), std::rint(x.imag())); } - template ::value>* = nullptr> + template ::value>* = nullptr> constexpr decltype(auto) operator()(const _T& x) const { return std::rint(x); @@ -1002,7 +1002,7 @@ struct UnaryOp { UnaryOp(const std::vector& args) {} - template ::value>* = nullptr> + template ::value>* = nullptr> constexpr decltype(auto) operator()(const _T& x) const { if (x.real() != 0) { @@ -1012,7 +1012,7 @@ struct UnaryOp { } } - template ::value>* = nullptr> + template ::value>* = nullptr> constexpr decltype(auto) operator()(const _T& x) const { return detail::sign(x); diff --git a/src/cunumeric/unary/unary_red_util.h b/src/cunumeric/unary/unary_red_util.h index 04edd774e..ab193a7df 100644 --- a/src/cunumeric/unary/unary_red_util.h +++ b/src/cunumeric/unary/unary_red_util.h @@ -151,7 +151,7 @@ struct UnaryRedOp { template struct UnaryRedOp { - static constexpr bool valid = !legate::is_complex>::value; + static constexpr bool valid = !legate::is_complex::value; using RHS = legate::legate_type_of; using VAL = RHS; @@ -174,7 +174,7 @@ struct UnaryRedOp { template struct UnaryRedOp { - static constexpr bool valid = !legate::is_complex>::value; + static constexpr bool valid = !legate::is_complex::value; using RHS = legate::legate_type_of; using VAL = RHS; @@ -243,7 +243,7 @@ struct UnaryRedOp { template struct UnaryRedOp { - static constexpr bool valid = !legate::is_complex>::value; + static constexpr bool valid = !legate::is_complex::value; using RHS = legate::legate_type_of; using VAL = Argval; @@ -276,7 +276,7 @@ struct UnaryRedOp { template struct UnaryRedOp { - static constexpr bool valid = !legate::is_complex>::value; + static constexpr bool valid = !legate::is_complex::value; using RHS = legate::legate_type_of; using VAL = Argval; From 03b1d3c4e9de770442f168156046555430e70850 Mon Sep 17 00:00:00 2001 From: Wonchan Lee Date: Tue, 1 Nov 2022 17:34:32 -0700 Subject: [PATCH 28/89] Mark sort and unique variants as concurrent (#676) --- src/cunumeric/set/unique.cc | 6 +++++- src/cunumeric/sort/sort.cc | 7 ++++++- 2 files changed, 11 insertions(+), 2 deletions(-) diff --git a/src/cunumeric/set/unique.cc b/src/cunumeric/set/unique.cc index ad07ec718..997d99cd6 100644 --- a/src/cunumeric/set/unique.cc +++ b/src/cunumeric/set/unique.cc @@ -58,7 +58,11 @@ struct UniqueImplBody { namespace // unnamed { -static void __attribute__((constructor)) register_tasks(void) { UniqueTask::register_variants(); } +static void __attribute__((constructor)) register_tasks(void) +{ + UniqueTask::register_variants( + {{LEGATE_GPU_VARIANT, legate::VariantOptions{}.with_concurrent(true)}}); +} } // namespace } // namespace cunumeric diff --git a/src/cunumeric/sort/sort.cc b/src/cunumeric/sort/sort.cc index bf273b59f..3f3a192f3 100644 --- a/src/cunumeric/sort/sort.cc +++ b/src/cunumeric/sort/sort.cc @@ -73,7 +73,12 @@ struct SortImplBody { namespace // unnamed { -static void __attribute__((constructor)) register_tasks(void) { SortTask::register_variants(); } +static void __attribute__((constructor)) register_tasks(void) +{ + auto options = legate::VariantOptions{}.with_concurrent(true); + SortTask::register_variants( + {{LEGATE_CPU_VARIANT, options}, {LEGATE_GPU_VARIANT, options}, {LEGATE_OMP_VARIANT, options}}); +} } // namespace } // namespace cunumeric From 5d3f743ba21e6458d7ee611935ec952314edad77 Mon Sep 17 00:00:00 2001 From: Paul Taylor Date: Fri, 4 Nov 2022 13:18:35 -0700 Subject: [PATCH 29/89] Make `install.py` reconfigure editable installs when build type changes (#670) * pass -mindepth 1 so we don't accidentally delete the search root if it matches one of the `-d` names * pass unknown flags to `pip install` command * use CMAKE_ARGS instead of SKBUILD_CONFIGURE_OPTIONS to work around scikit-build bug * replace SKBUILD_CONFIGURE_OPTIONS with CMAKE_ARGS everywhere --- install.py | 13 ++++++++++--- scripts/build-install.sh | 4 ++-- scripts/build-no-install.sh | 4 ++-- scripts/build-separately-no-install.sh | 4 ++-- scripts/build-with-legate-no-install.sh | 4 ++-- scripts/build-with-legate-separately-no-install.sh | 4 ++-- ...tall-global-legion-legate-core-and-cunumeric.sh | 14 +++++++------- 7 files changed, 27 insertions(+), 20 deletions(-) diff --git a/install.py b/install.py index c6ee2d80d..8bed64992 100755 --- a/install.py +++ b/install.py @@ -296,14 +296,21 @@ def validate_path(path): pip_install_cmd += ["--no-deps", "--no-build-isolation"] pip_install_cmd += ["--upgrade"] + if unknown is not None: + pip_install_cmd += unknown + pip_install_cmd += ["."] if verbose: pip_install_cmd += ["-vv"] - cmake_flags = [] + # Also use preexisting CMAKE_ARGS from conda if set + cmake_flags = cmd_env.get("CMAKE_ARGS", "").split(" ") if cmake_generator: - cmake_flags += [f"-G'{cmake_generator}'"] + if " " not in cmake_generator: + cmake_flags += [f"-G{cmake_generator}"] + else: + cmake_flags += [f"-G'{cmake_generator}'"] if debug or verbose: cmake_flags += ["--log-level=%s" % ("DEBUG" if debug else "VERBOSE")] @@ -352,7 +359,7 @@ def validate_path(path): cmd_env.update( { "SKBUILD_BUILD_OPTIONS": f"-j{str(thread_count)}", - "SKBUILD_CONFIGURE_OPTIONS": "\n".join(cmake_flags), + "CMAKE_ARGS": " ".join(cmake_flags), } ) diff --git a/scripts/build-install.sh b/scripts/build-install.sh index 4d9bdbfc8..8adb472d2 100755 --- a/scripts/build-install.sh +++ b/scripts/build-install.sh @@ -13,7 +13,7 @@ source ./scripts/util/uninstall-global-legion-legate-core-and-cunumeric.sh rm -rf ./{build,_skbuild,dist,cunumeric.egg-info} # Define CMake configuration arguments -cmake_args= +cmake_args="${CMAKE_ARGS:-}" # Use ninja-build if installed if [[ -n "$(which ninja)" ]]; then cmake_args+="-GNinja"; fi @@ -29,7 +29,7 @@ ninja_args="-j$(nproc --ignore=2)" # Build cunumeric + cunumeric_python and install into the current Python environment SKBUILD_BUILD_OPTIONS="$ninja_args" \ -SKBUILD_CONFIGURE_OPTIONS="$cmake_args" \ +CMAKE_ARGS="$cmake_args" \ python -m pip install \ --root / --prefix "$CONDA_PREFIX" \ --no-deps --no-build-isolation \ diff --git a/scripts/build-no-install.sh b/scripts/build-no-install.sh index 623ca788d..c398eda58 100755 --- a/scripts/build-no-install.sh +++ b/scripts/build-no-install.sh @@ -11,7 +11,7 @@ source ./scripts/util/compiler-flags.sh rm -rf ./{build,_skbuild,dist,cunumeric.egg-info} # Define CMake configuration arguments -cmake_args= +cmake_args="${CMAKE_ARGS:-}" # Use ninja-build if installed if [[ -n "$(which ninja)" ]]; then cmake_args+="-GNinja"; fi @@ -27,7 +27,7 @@ ninja_args="-j$(nproc --ignore=2)" # Build legion_core + legion_core_python and perform an "editable" install SKBUILD_BUILD_OPTIONS="$ninja_args" \ -SKBUILD_CONFIGURE_OPTIONS="$cmake_args" \ +CMAKE_ARGS="$cmake_args" \ SETUPTOOLS_ENABLE_FEATURES="legacy-editable" \ python -m pip install \ --root / --prefix "$CONDA_PREFIX" \ diff --git a/scripts/build-separately-no-install.sh b/scripts/build-separately-no-install.sh index b9de045b4..8d8078723 100644 --- a/scripts/build-separately-no-install.sh +++ b/scripts/build-separately-no-install.sh @@ -11,7 +11,7 @@ source ./scripts/util/compiler-flags.sh rm -rf ./{build,_skbuild,dist,cunumeric.egg-info} # Define CMake configuration arguments -cmake_args= +cmake_args="${CMAKE_ARGS:-}" # Use ninja-build if installed if [[ -n "$(which ninja)" ]]; then cmake_args+="-GNinja"; fi @@ -44,7 +44,7 @@ cmake_args+=" # Build legion_core_python and perform an "editable" install SKBUILD_BUILD_OPTIONS="$ninja_args" \ -SKBUILD_CONFIGURE_OPTIONS="$cmake_args" \ +CMAKE_ARGS="$cmake_args" \ SETUPTOOLS_ENABLE_FEATURES="legacy-editable" \ python -m pip install \ --root / --prefix "$CONDA_PREFIX" \ diff --git a/scripts/build-with-legate-no-install.sh b/scripts/build-with-legate-no-install.sh index ad1da812a..498745e31 100644 --- a/scripts/build-with-legate-no-install.sh +++ b/scripts/build-with-legate-no-install.sh @@ -13,7 +13,7 @@ source ./scripts/util/read-legate-core-root.sh "$0" rm -rf ./{build,_skbuild,dist,cunumeric.egg-info} # Define CMake configuration arguments -cmake_args= +cmake_args="${CMAKE_ARGS:-}" # Use ninja-build if installed if [[ -n "$(which ninja)" ]]; then cmake_args+="-GNinja"; fi @@ -28,7 +28,7 @@ ninja_args="-j$(nproc --ignore=2)" # Build legion_core + legion_core_python and perform an "editable" install SKBUILD_BUILD_OPTIONS="$ninja_args" \ -SKBUILD_CONFIGURE_OPTIONS="$cmake_args" \ +CMAKE_ARGS="$cmake_args" \ SETUPTOOLS_ENABLE_FEATURES="legacy-editable" \ python -m pip install \ --root / --prefix "$CONDA_PREFIX" \ diff --git a/scripts/build-with-legate-separately-no-install.sh b/scripts/build-with-legate-separately-no-install.sh index c04e7f9ed..fa9e97d05 100755 --- a/scripts/build-with-legate-separately-no-install.sh +++ b/scripts/build-with-legate-separately-no-install.sh @@ -13,7 +13,7 @@ source ./scripts/util/read-legate-core-root.sh "$0" rm -rf ./{build,_skbuild,dist,cunumeric.egg-info} # Define CMake configuration arguments -cmake_args= +cmake_args="${CMAKE_ARGS:-}" # Use ninja-build if installed if [[ -n "$(which ninja)" ]]; then cmake_args+="-GNinja"; fi @@ -45,7 +45,7 @@ cmake_args+=" # Build legion_core_python and perform an "editable" install SKBUILD_BUILD_OPTIONS="$ninja_args" \ -SKBUILD_CONFIGURE_OPTIONS="$cmake_args" \ +CMAKE_ARGS="$cmake_args" \ SETUPTOOLS_ENABLE_FEATURES="legacy-editable" \ python -m pip install \ --root / --prefix "$CONDA_PREFIX" \ diff --git a/scripts/util/uninstall-global-legion-legate-core-and-cunumeric.sh b/scripts/util/uninstall-global-legion-legate-core-and-cunumeric.sh index a759dd37f..4f37467bb 100755 --- a/scripts/util/uninstall-global-legion-legate-core-and-cunumeric.sh +++ b/scripts/util/uninstall-global-legion-legate-core-and-cunumeric.sh @@ -1,10 +1,10 @@ #! /usr/bin/env bash -rm -rf $(find "$CONDA_PREFIX/lib" -type d -name '*cunumeric*') \ - $(find "$CONDA_PREFIX/lib" -type f -name 'libcunumeric*') \ - $(find "$CONDA_PREFIX/lib" -type f -name 'cunumeric.egg-link') \ - $(find "$CONDA_PREFIX/include" -type f -name 'tci.h') \ - $(find "$CONDA_PREFIX/include" -type d -name 'tci') \ - $(find "$CONDA_PREFIX/include" -type d -name 'tblis') \ - $(find "$CONDA_PREFIX/include" -type d -name 'cunumeric') \ +rm -rf $(find "$CONDA_PREFIX/lib" -mindepth 1 -type d -name '*cunumeric*') \ + $(find "$CONDA_PREFIX/lib" -mindepth 1 -type f -name 'libcunumeric*') \ + $(find "$CONDA_PREFIX/lib" -mindepth 1 -type f -name 'cunumeric.egg-link') \ + $(find "$CONDA_PREFIX/include" -mindepth 1 -type f -name 'tci.h') \ + $(find "$CONDA_PREFIX/include" -mindepth 1 -type d -name 'tci') \ + $(find "$CONDA_PREFIX/include" -mindepth 1 -type d -name 'tblis') \ + $(find "$CONDA_PREFIX/include" -mindepth 1 -type d -name 'cunumeric') \ ; From 1aba3a01f76b99997340bfae63f267c896e3a431 Mon Sep 17 00:00:00 2001 From: Bryan Van de Ven Date: Mon, 7 Nov 2022 13:50:11 -0800 Subject: [PATCH 30/89] configure test overrides in the project test.py (#678) --- test.py | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/test.py b/test.py index 8dcda54be..50e22ee88 100755 --- a/test.py +++ b/test.py @@ -18,10 +18,27 @@ import sys +from legate.tester import PER_FILE_ARGS, SKIPPED_EXAMPLES from legate.tester.config import Config from legate.tester.test_plan import TestPlan from legate.tester.test_system import TestSystem +SKIPPED_EXAMPLES.update( + { + "examples/ingest.py", + "examples/kmeans_sort.py", + "examples/lstm_full.py", + "examples/wgrad.py", + } +) + +PER_FILE_ARGS.update( + { + "examples/lstm_full.py": ["--file", "resources/lstm_input.txt"], + } +) + + if __name__ == "__main__": config = Config(sys.argv) From 22b3f172f5ea631bfd4591e7839ea6a883efb56b Mon Sep 17 00:00:00 2001 From: xialu00 <110973296+xialu00@users.noreply.github.com> Date: Tue, 8 Nov 2022 10:25:20 +0800 Subject: [PATCH 31/89] add test case for test_compress.py and test_extract.py (#672) * add test case for test_compress.py * add test case for test_extract.py * fix comments * fix comments --- cunumeric/array.py | 8 +- tests/integration/test_compress.py | 173 ++++++++++++++++++-------- tests/integration/test_extract.py | 187 +++++++++++++++++++---------- 3 files changed, 255 insertions(+), 113 deletions(-) diff --git a/cunumeric/array.py b/cunumeric/array.py index 0ac528531..6dc818112 100644 --- a/cunumeric/array.py +++ b/cunumeric/array.py @@ -2096,11 +2096,15 @@ def compress( """ a = self - if condition.ndim != 1: + try: + if condition.ndim != 1: + raise ValueError( + "Dimension mismatch: condition must be a 1D array" + ) + except AttributeError: raise ValueError( "Dimension mismatch: condition must be a 1D array" ) - condition = condition._warn_and_convert(np.dtype(bool)) if axis is None: diff --git a/tests/integration/test_compress.py b/tests/integration/test_compress.py index 2523dbd18..7247685e6 100644 --- a/tests/integration/test_compress.py +++ b/tests/integration/test_compress.py @@ -21,25 +21,93 @@ import cunumeric as num -def test_1d(): - a = mk_seq_array(np, (10,)) - a_num = num.array(a) - - res = np.compress([True, False, True], a, axis=0) - res_num = num.compress([True, False, True], a_num, axis=0) - - assert np.array_equal(res_num, res) - - -@pytest.mark.parametrize("axis", (0, 1)) -def test_2d_axis(axis): - a = np.array([[1, 2], [3, 4], [5, 6]]) - num_a = num.array(a) - - res_np = np.compress([0, 1], a, axis=axis) - res_num = num.compress([0, 1], num_a, axis=axis) - - assert np.array_equal(res_num, res_np) +@pytest.mark.xfail +def test_none_array(): + res_np = np.compress([0], None) # numpy return [] + # cuNumeric raises: + # AttributeError: 'NoneType' object has no attribute 'compress' + res_num = num.compress([0], None) + assert np.array_equal(res_np, res_num) + + +@pytest.mark.xfail +def test_empty_array(): + res_np = np.compress([0], []) # numpy return [] + # cuNumeric raises: ValueError: + # Shape mismatch: condition contains entries that are out of bounds + res_num = num.compress([0], []) + assert np.array_equal(res_np, res_num) + + +@pytest.mark.parametrize("con", (-3, 0, 3, None, False, True)) +def test_negative_condition(con): + a = num.array([1, 2, 3, 4]) + with pytest.raises(ValueError): + num.compress(con, a) + + +def test_condition_out_bound(): + a = num.array([1, 2, 3, 4]) + msg = r"bounds" + with pytest.raises(ValueError, match=msg): + num.compress([1, 2, 3, 4, 5], a) + + +def test_axis_out_bound(): + a = num.array([1, 2, 3, 4]) + msg = r"bounds" + with pytest.raises(ValueError, match=msg): + num.compress([1, 2, 3, 4], a, axis=1) + + +@pytest.mark.parametrize( + "con", ([True, True], [True, True, True, True, True, True]) +) +def test_out_bounds(con): + a = num.array([1, 2, 3, 4]) + b = num.array([-1, -2, -3, -4]) + with pytest.raises(ValueError): + num.compress(con, a, out=b) + + +@pytest.mark.xfail +def test_dtype_out1(): + a = mk_seq_array(np, (4,)) + b = mk_seq_array(num, (4,)) + out_np = np.random.random((4,)) + out_num = num.random.random((4,)) + # for Numpy, it will raise TypeError: + # "Cannot cast array data from dtype('float64') to dtype('int64') + # according to the rule 'safe'". + # cuNumeric passed. + np.compress([True, True, True, True], a, out=out_np) + num.compress([True, True, True, True], b, out=out_num) + assert np.array_equal(out_np, out_num) + + +def test_dtype_out2(): + # both Numpy and cuNumeric turn float into int + a = np.random.random((4,)) * 10 + b = num.array(a) + out_np = np.random.randint(1, 10, (4,)) + out_num = num.random.randint(-10, -1, (4,)) + np.compress([True, True, True, True], a, out=out_np) + num.compress([True, True, True, True], b, out=out_num) + assert np.array_equal(out_np, out_num) + + +@pytest.mark.xfail +def test_out_parameter(): + a = mk_seq_array(np, (4,)) + b = mk_seq_array(num, (4,)) + out_np = np.random.randint(1, 5, (4,)) + out_num = np.random.randint(1, 5, (4,)) + np.compress([True, True, True, True], a, 0, out_np) + num.compress([True, True, True, True], b, 0, out_num) + # for cuNumeric, the last parameter 'out', + # it should be written as 'out=out_num' + # otherwise it raises error + assert np.array_equal(out_num, out_np) def test_bool_condition(): @@ -52,51 +120,58 @@ def test_bool_condition(): assert np.array_equal(res_num, res_np) -def test_out(): - a = np.array([[1, 2], [3, 4], [5, 6]]) - num_a = num.array(a) - out_np = np.array([[1], [1], [1]]) - out_num = num.array(out_np) - - res_np = np.compress([0, 1], a, axis=1, out=out_np) - res_num = num.compress([0, 1], num_a, axis=1, out=out_num) - - assert np.array_equal(res_num, res_np) - assert np.array_equal(out_num, out_np) - - -def test_different_types(): - a = np.array([[1, 2], [3, 4], [5, 6]], dtype=float) - num_a = num.array(a) - out_np = np.array([[1], [1], [1]]) - out_num = num.array(out_np) - - res_np = np.compress([0, 1], a, axis=1, out=out_np) - res_num = num.compress([0, 1], num_a, axis=1, out=out_num) - - assert np.array_equal(res_num, res_np) - assert np.array_equal(out_num, out_np) - - @pytest.mark.parametrize("ndim", range(1, LEGATE_MAX_DIM + 1)) -def test_ndim(ndim): +def test_ndim_basic(ndim): shape = (5,) * ndim np_arr = mk_seq_array(np, shape) num_arr = mk_seq_array(num, shape) - # make sure condition is between 1 and 2 - np_condition = mk_seq_array(np, (5,)) % 2 - num_condition = mk_seq_array(num, (5,)) % 2 + # make sure condition is between 0 and 1 + np_condition = np.array((mk_seq_array(np, (5,)) % 2).astype(bool)) + num_condition = num.array((mk_seq_array(num, (5,)) % 2).astype(bool)) res_np = np.compress(np_condition, np_arr) res_num = num.compress(num_condition, num_arr) assert np.array_equal(res_num, res_np) + +@pytest.mark.parametrize("ndim", range(1, LEGATE_MAX_DIM + 1)) +def test_ndim_axis(ndim): + shape = (5,) * ndim + np_arr = mk_seq_array(np, shape) + num_arr = mk_seq_array(num, shape) + # make sure condition is between 0 and 1 + np_condition = np.array((mk_seq_array(np, (5,)) % 2).astype(bool)) + num_condition = num.array((mk_seq_array(num, (5,)) % 2).astype(bool)) + for axis in range(ndim): res_np = np.compress(np_condition, np_arr, axis) res_num = num.compress(num_condition, num_arr, axis) assert np.array_equal(res_num, res_np) +@pytest.mark.parametrize("ndim", range(1, LEGATE_MAX_DIM + 1)) +def test_ndim_out(ndim): + shape = (5,) * ndim + np_arr = mk_seq_array(np, shape) + num_arr = mk_seq_array(num, shape) + # make sure condition is between 0 and 1 + np_condition = np.array((mk_seq_array(np, (5,)) % 2).astype(bool)) + num_condition = num.array((mk_seq_array(num, (5,)) % 2).astype(bool)) + + for axis in range(ndim): + shape_list = list(shape) + shape_list[axis] = 3 + shape_new = tuple(shape_list) + + out_np = np.random.randint(1, 10, shape_new) + out_num = np.random.randint(-10, -1, shape_new) + + np.compress(np_condition, np_arr, axis, out_np) + num.compress(num_condition, num_arr, axis, out=out_num) + + assert np.array_equal(out_num, out_np) + + if __name__ == "__main__": import sys diff --git a/tests/integration/test_extract.py b/tests/integration/test_extract.py index d6d369c8b..105609f05 100644 --- a/tests/integration/test_extract.py +++ b/tests/integration/test_extract.py @@ -15,103 +15,166 @@ import numpy as np import pytest +from utils.generators import mk_seq_array import cunumeric as num -np.random.seed(42) +DIM = 5 +SIZES = [ + (0,), + 1, + 5, + (0, 1), + (1, 0), + (1, 1), + (1, DIM), + (DIM, 1), + (DIM, DIM), + (1, 0, 0), + (1, 1, 0), + (1, 0, 1), + (1, 1, 1), + (DIM, 1, 1), + (1, DIM, 1), + (1, 1, DIM), + (DIM, DIM, DIM), +] + +VALUES = [ + [0], + [42], + [42 + 3j], + [11, 12, 13], + [True, False, False, True], + [42.3, 42.3, 42.3, 42.3, 42.3], + [np.inf, np.Inf], +] + +@pytest.mark.xfail +def test_none_array(): + res_np = np.extract([0], None) # return [] + res_num = num.extract( + [0], None + ) # AttributeError: 'NoneType' object has no attribute 'size' + assert np.array_equal(res_np, res_num) -def test_extract(): - cnp = np.array( - [1, 54, 4, 4, 0, 45, 5, 58, 0, 9, 0, 4, 0, 0, 0, 5, 0, 1] - ).reshape( - (6, 3) - ) # noqa E501 - c = num.array(cnp) - bnp = np.random.randn(6, 3) - b = num.array(bnp) - assert num.array_equal(num.extract(c, b), np.extract(cnp, bnp)) + +@pytest.mark.xfail +def test_empty_array(): + res_np = np.extract([0], []) # return [] + res_num = num.extract( + [0], [] + ) # ValueError: arr array and condition array must be of same size + assert np.array_equal(res_np, res_num) + + +@pytest.mark.xfail +def test_none_condition(): + a = num.array([1, 2, 3, 4]) + res_np = np.extract(None, a) # all return [] + res_num = num.extract( + None, a + ) # AttributeError: 'NoneType' object has no attribute 'size' + assert np.array_equal(res_np, res_num) + + +@pytest.mark.parametrize( + "con", (-3, 0, 3, False, True, [2], [2, 3], [2, -3, 4], [1, 2, 3, 4, 5]) +) +def test_negative_condition(con): + a = num.array([1, 2, 3, 4]) + with pytest.raises(ValueError): + num.extract(con, a) + + +@pytest.mark.xfail +def test_complex_condition(): + # when condition is complex type a+bj, + # if a==0, cuNumeric take it as 0, while Numpy take it as 1 + a = np.array([1, 2, 3, 4]) + b = num.array([1, 2, 3, 4]) + condition = [1 + 2j, 2, 2, 5j] + res_np = np.extract(condition, a) # array([1, 2, 3, 4]) + res_num = num.extract(condition, b) # array([1, 2, 3]) + assert np.array_equal(res_np, res_num) ARR = [ - [1, 54, 4, 4, 0, 45, 5, 58, 0, 9, 0, 4, 0, 0, 0, 5, 0, 1], - [[1, 54, 4], [4, 0, 45], [5, 58, 0], [9, 0, 4], [0, 0, 0], [5, 0, 1]], [ - [[1, 54, 4], [4, 0, 45]], - [[5, 58, 0], [9, 0, 4]], - [[0, 0, 0], [5, 0, 1]], + [[1 + 2j, 54, 4], [4, 3 + 1j, 45]], + [[5.5, 58.3, 0.6], [9, 0, 4]], + [[0, 0, 0], [-9, 0, -4]], ], - [[[1 + 2j, 54, 4], [4, 0 + 1j, 45]], [[5, 58, 0], [9, 0, 4]]], [[True, False], [True, True], [True, False]], [[]], - [], + [[], []], [ [[0, 0, 0], [0, 0, 0]], [[0, 0, 0], [0, 0, 1]], ], - [False, False, False], - [ - [[0, 0, 0], [0, 0, 0]], - [[0, 0, 0], [0, 0, 0]], - ], ] +def array_condition(): + arr_list = [] + for arr in ARR: + arr_np = np.array(arr) + condition_np = arr_np.copy() + arr_list.append((condition_np, arr_np)) + arr_list.append((condition_np.flatten(), arr_np)) + arr_list.append((condition_np, arr_np.flatten())) + arr_list.append( + (condition_np.swapaxes(0, condition_np.ndim - 1), arr_np) + ) + arr_list.append( + (condition_np, arr_np.swapaxes(0, condition_np.ndim - 1)) + ) + return arr_list + + def check_extract(condition_np, arr_np): arr_num = num.array(arr_np) condition_num = num.array(condition_np) - result_np = np.extract(condition_np, arr_np) result_np2 = arr_np[condition_np.reshape(arr_np.shape).astype(bool)] - assert np.array_equal(result_np, result_np2) result_num = num.extract(condition_num, arr_num) - assert np.array_equal(result_np, result_num) + assert np.array_equal(result_np2, result_num) -@pytest.mark.parametrize("arr", ARR, ids=str) -def test_extract_bool(arr): - arr_np = np.array(arr) - condition_np = arr_np != 0 - check_extract(condition_np, arr_np) - check_extract(condition_np.flatten(), arr_np) - check_extract(condition_np, arr_np.flatten()) - check_extract(condition_np.swapaxes(0, condition_np.ndim - 1), arr_np) - check_extract(condition_np, arr_np.swapaxes(0, condition_np.ndim - 1)) +@pytest.mark.parametrize( + "con, arr", (data for data in array_condition()), ids=str +) +def test_extract_nonzero1(con, arr): + check_extract(con, arr) -@pytest.mark.parametrize("arr", ARR, ids=str) -def test_extract_nonzero(arr): - arr_np = np.array(arr) - condition_np = arr_np.copy() - check_extract(condition_np, arr_np) - check_extract(condition_np.flatten(), arr_np) - check_extract(condition_np, arr_np.flatten()) - check_extract(condition_np.swapaxes(0, condition_np.ndim - 1), arr_np) - check_extract(condition_np, arr_np.swapaxes(0, condition_np.ndim - 1)) +@pytest.mark.parametrize("shape", SIZES, ids=str) +def test_extract_basic(shape): + np_arr = mk_seq_array(np, shape) + num_arr = mk_seq_array(num, shape) + # make sure condition is between 0 and 1 + np_condition = np.array((mk_seq_array(np, shape) % 2).astype(bool)) + num_condition = num.array((mk_seq_array(num, shape) % 2).astype(bool)) + res_np = np.extract(np_condition, np_arr) + res_num = num.extract(num_condition, num_arr) + assert np.array_equal(res_num, res_np) -VALUES = [ - [11, 12, 13], - [99, 93, 76, 65, 76, 87, 43, 23, 12, 54, 756, 2345, 232, 2323, 12145], - [42], - [True, False, False, True], - [42.3, 42.3, 42.3, 42.3, 42.3, 42.3, 42.3, 42.3], - [42 + 3j], -] - -@pytest.mark.parametrize("arr", ARR, ids=str) +@pytest.mark.parametrize("shape", SIZES, ids=str) @pytest.mark.parametrize("vals", VALUES, ids=str) -def test_place(arr, vals): - arr_np = np.array(arr) - vals_np = np.array(vals).astype(arr_np.dtype) - condition_np = arr_np != 0 +def test_place_basic(shape, vals): + arr_np = mk_seq_array(np, shape) + arr_num = num.array(mk_seq_array(num, shape)) - arr_num = num.array(arr_np) - condition_num = num.array(condition_np) + mask_np = np.array((mk_seq_array(np, shape) % 2).astype(bool)) + mask_num = num.array((mk_seq_array(np, shape) % 2).astype(bool)) + + vals_np = np.array(vals).astype(arr_np.dtype) vals_num = num.array(vals_np) - np.place(arr_np, condition_np, vals_np) - num.place(arr_num, condition_num, vals_num) + np.place(arr_np, mask_np, vals_np) + num.place(arr_num, mask_num, vals_num) assert np.array_equal(arr_np, arr_num) From d7ca2782f75dbb533e060dab75889592a3b0afcf Mon Sep 17 00:00:00 2001 From: Wonchan Lee Date: Wed, 9 Nov 2022 20:03:42 -0800 Subject: [PATCH 32/89] Mypy fix (#688) * Fix mypy errors * Add mypy to pre-commit hooks * Update .pre-commit-config.yaml Co-authored-by: Bryan Van de Ven * Remove 'tests' from the mypy args * Remove the mypy hook for now Co-authored-by: Bryan Van de Ven --- cunumeric/config.py | 2 ++ cunumeric/random/bitgenerator.py | 3 ++- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/cunumeric/config.py b/cunumeric/config.py index 88802b911..c45ae6313 100644 --- a/cunumeric/config.py +++ b/cunumeric/config.py @@ -15,6 +15,7 @@ from __future__ import annotations import os +from abc import abstractmethod from enum import IntEnum, unique from typing import TYPE_CHECKING, Any, List, Union, cast @@ -269,6 +270,7 @@ class _CunumericSharedLib: CUNUMERIC_WRITE: int CUNUMERIC_ZIP: int + @abstractmethod def cunumeric_has_curand(self) -> int: ... diff --git a/cunumeric/random/bitgenerator.py b/cunumeric/random/bitgenerator.py index 2c5dfc577..1bd0aaa03 100644 --- a/cunumeric/random/bitgenerator.py +++ b/cunumeric/random/bitgenerator.py @@ -15,6 +15,7 @@ from __future__ import annotations import time +from abc import abstractproperty from typing import TYPE_CHECKING, Union import numpy as np @@ -66,7 +67,7 @@ def __init__( self.generatorType, seed, self.flags, forceBuild ) - @property + @abstractproperty def generatorType(self) -> BitGeneratorType: ... From a93ea498815a26ca8f4b13b61f9bba2997555363 Mon Sep 17 00:00:00 2001 From: Wonchan Lee Date: Wed, 9 Nov 2022 23:01:53 -0800 Subject: [PATCH 33/89] Add a test case for 0D region-backed stores (#666) --- tests/integration/test_0d_store.py | 38 ++++++++++++++++++++++++++++++ 1 file changed, 38 insertions(+) create mode 100644 tests/integration/test_0d_store.py diff --git a/tests/integration/test_0d_store.py b/tests/integration/test_0d_store.py new file mode 100644 index 000000000..1701983f4 --- /dev/null +++ b/tests/integration/test_0d_store.py @@ -0,0 +1,38 @@ +# Copyright 2022 NVIDIA Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +from itertools import product + +import pytest + +import cunumeric as num + +SIZE = 3 + + +def test_0d_region_backed_stores(): + arr = num.arange(9).reshape(3, 3) + + for i, j in product(range(SIZE), range(SIZE)): + i_ind = num.array(i) + j_ind = num.array(j) + v = arr[i_ind, j_ind] + assert int(v) == i * SIZE + j + + +if __name__ == "__main__": + import sys + + sys.exit(pytest.main(sys.argv)) From 3162f5eb5c362c2a0b21df6187d22ba573ebb65f Mon Sep 17 00:00:00 2001 From: Wonchan Lee Date: Wed, 9 Nov 2022 23:03:18 -0800 Subject: [PATCH 34/89] Fix a silly mistake. Fixes #684. (#686) --- src/cunumeric/set/unique.cu | 2 +- tests/integration/test_unique.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/cunumeric/set/unique.cu b/src/cunumeric/set/unique.cu index 11e9e6fc1..908a87664 100644 --- a/src/cunumeric/set/unique.cu +++ b/src/cunumeric/set/unique.cu @@ -168,7 +168,7 @@ struct UniqueImplBody { // Find unique values thrust::sort(thrust::cuda::par.on(stream), ptr, ptr + volume); - auto* end = thrust::unique(thrust::cuda::par.on(stream), ptr, ptr + volume); + end = thrust::unique(thrust::cuda::par.on(stream), ptr, ptr + volume); } Piece result; diff --git a/tests/integration/test_unique.py b/tests/integration/test_unique.py index 4f3d84274..c657c2c57 100644 --- a/tests/integration/test_unique.py +++ b/tests/integration/test_unique.py @@ -25,7 +25,7 @@ def test_with_nonzero(): a_np = a.__array__() b = num.unique(a) - b_np = num.unique(a_np) + b_np = np.unique(a_np) assert np.array_equal(b, b_np) From 9c28d6738f63441879bb06d3843670641a61f5fe Mon Sep 17 00:00:00 2001 From: Bryan Van de Ven Date: Thu, 10 Nov 2022 12:52:43 -0800 Subject: [PATCH 35/89] Add missing put method to docs (#689) --- docs/cunumeric/source/api/_ndarray.rst | 1 + docs/cunumeric/source/api/ndarray.rst | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/docs/cunumeric/source/api/_ndarray.rst b/docs/cunumeric/source/api/_ndarray.rst index 317772104..3320f0857 100644 --- a/docs/cunumeric/source/api/_ndarray.rst +++ b/docs/cunumeric/source/api/_ndarray.rst @@ -43,6 +43,7 @@ cunumeric.ndarray ~ndarray.nonzero ~ndarray.partition ~ndarray.prod + ~ndarray.put ~ndarray.ravel ~ndarray.reshape ~ndarray.searchsorted diff --git a/docs/cunumeric/source/api/ndarray.rst b/docs/cunumeric/source/api/ndarray.rst index 1b2c2107c..afdd1406f 100644 --- a/docs/cunumeric/source/api/ndarray.rst +++ b/docs/cunumeric/source/api/ndarray.rst @@ -124,7 +124,7 @@ Item selection and manipulation :toctree: generated/ ndarray.take - .. ndarray.put + ndarray.put .. ndarray.repeat ndarray.choose ndarray.sort From 6b835d6df4d693937344688fb33cecb46ef41e5d Mon Sep 17 00:00:00 2001 From: robinw0928 <104830875+robinw0928@users.noreply.github.com> Date: Fri, 11 Nov 2022 11:05:11 +0800 Subject: [PATCH 36/89] Enhance test_linspace.py and test_swapaxes.py (#680) * Enhance test_linspace.py and test_swapaxes.py * Address comments. * Address comments part-2. --- tests/integration/test_linspace.py | 305 +++++++++++++++++++++++++++-- tests/integration/test_swapaxes.py | 212 +++++++++++++++++--- 2 files changed, 468 insertions(+), 49 deletions(-) diff --git a/tests/integration/test_linspace.py b/tests/integration/test_linspace.py index 170bec59c..4f05a843c 100644 --- a/tests/integration/test_linspace.py +++ b/tests/integration/test_linspace.py @@ -13,48 +13,315 @@ # limitations under the License. # +from itertools import chain + import numpy as np import pytest +from utils.generators import broadcasts_to, mk_seq_array import cunumeric as num -def test_basic(): - x = np.linspace(2.0, 3.0, num=5) - y = num.linspace(2.0, 3.0, num=5) +def equivalent_shapes_gen(shape): + """ + Generate more equivalent shapes by removing + leading singleton dimensions from `shape`. + e.g., shape=(1, 4, 1), yield (1, 4, 1), (4, 1) + shape=(1, 1, 5), yield (1, 1, 5), (1, 5), (5,) + """ + yield shape + for i in range(len(shape) - 1): + if shape[i] == 1: + i += 1 + yield shape[i:] + else: + break + + +@pytest.mark.parametrize( + "endpoint", (True, False), ids=lambda endpoint: f"(endpoint={endpoint})" +) +@pytest.mark.parametrize( + "number", (0, 1, 10), ids=lambda number: f"(num={number})" +) +@pytest.mark.parametrize( + "values", + ((10, -5.5), (2.0, 3.0), (0, 0), (1 + 2.5j, 10 + 5j), (0j, 10)), + ids=lambda values: f"(values={values})", +) +def test_scalar_basic(values, number, endpoint): + start, stop = values + x = np.linspace(start, stop, num=number, endpoint=endpoint) + y = num.linspace(start, stop, num=number, endpoint=endpoint) assert np.array_equal(x, y) -def test_endpoint(): - x = np.linspace(2.0, 3.0, num=5, endpoint=False) - y = num.linspace(2.0, 3.0, num=5, endpoint=False) +@pytest.mark.parametrize( + "endpoint", (True, False), ids=lambda endpoint: f"(endpoint={endpoint})" +) +@pytest.mark.parametrize( + "number", (0, 1, 10), ids=lambda number: f"(num={number})" +) +@pytest.mark.parametrize( + "values", + ((10, -5.5), (2.0, 3.0), (0, 0), (1 + 2.5j, 10 + 5j), (0j, 10)), + ids=lambda values: f"(values={values})", +) +def test_scalar_basic_retstep(values, number, endpoint): + start, stop = values + x = np.linspace(start, stop, num=number, endpoint=endpoint, retstep=True) + y = num.linspace(start, stop, num=number, endpoint=endpoint, retstep=True) + + assert np.array_equal(x[0], y[0]) + if not (np.isnan(x[1]) and np.isnan(y[1])): + assert x[1] == y[1] + + +@pytest.mark.parametrize( + "endpoint", (True, False), ids=lambda endpoint: f"(endpoint={endpoint})" +) +def test_arrays_basic(endpoint): + shape = (2, 2, 3) + np_start = mk_seq_array(np, shape) + num_start = mk_seq_array(num, shape) + np_stop = mk_seq_array(np, shape) + 10 + num_stop = mk_seq_array(num, shape) + 10 + x = np.linspace(np_start, np_stop, num=5, endpoint=endpoint) + y = np.linspace(num_start, num_stop, num=5, endpoint=endpoint) assert np.array_equal(x, y) -def test_retstep(): - x = np.linspace(2.0, 3.0, num=5, retstep=True) - y = np.linspace(2.0, 3.0, num=5, retstep=True) +@pytest.mark.parametrize( + "endpoint", (True, False), ids=lambda endpoint: f"(endpoint={endpoint})" +) +def test_arrays_basic_retstep(endpoint): + shape = (2, 2, 3) + np_start = mk_seq_array(np, shape) + num_start = mk_seq_array(num, shape) + np_stop = mk_seq_array(np, shape) + 10 + num_stop = mk_seq_array(num, shape) + 10 + x = np.linspace(np_start, np_stop, num=5, endpoint=endpoint, retstep=True) + y = np.linspace( + num_start, num_stop, num=5, endpoint=endpoint, retstep=True + ) assert np.array_equal(x[0], y[0]) - assert x[1] == y[1] + assert np.array_equal(x[1], y[1]) + + +shape_start = (2, 2, 3) +shape_stops = (equivalent_shapes_gen(s) for s in broadcasts_to(shape_start)) + + +@pytest.mark.parametrize( + "shape_stop", + chain.from_iterable(shape_stops), + ids=lambda shape_stop: f"(shape_stop={shape_stop})", +) +def test_array_broadcast_stops(shape_stop): + np_start = mk_seq_array(np, shape_start) + num_start = mk_seq_array(num, shape_start) + + np_stop = mk_seq_array(np, shape_stop) + 5 + num_stop = mk_seq_array(num, shape_stop) + 5 + x = np.linspace(np_start, np_stop, num=5) + y = num.linspace(num_start, num_stop, num=5) + assert np.array_equal(x, y) + + +def test_arrays_both_start_and_stop_broadcast(): + shape_start = (1, 3) + np_start = mk_seq_array(np, shape_start) + num_start = mk_seq_array(num, shape_start) + shape_stop = (2, 1) + np_stop = mk_seq_array(np, shape_stop) + 5 + num_stop = mk_seq_array(num, shape_stop) + 5 + + x = np.linspace(np_start, np_stop, num=5) + y = num.linspace(num_start, num_stop, num=5) + assert np.array_equal(x, y) + + +@pytest.mark.parametrize( + "shape", ((0,), (3,), (2, 1)), ids=lambda shape: f"(shape={shape})" +) +def test_array_with_scalar(shape): + np_arr = mk_seq_array(np, shape) + num_arr = mk_seq_array(num, shape) + scalar = 10 + + x1 = np.linspace(np_arr, scalar, num=5) + y1 = num.linspace(num_arr, scalar, num=5) + assert np.array_equal(x1, y1) + + x2 = np.linspace(scalar, np_arr, num=5) + y2 = num.linspace(scalar, num_arr, num=5) + assert np.array_equal(x2, y2) + + +@pytest.mark.parametrize( + "endpoint", (True, False), ids=lambda endpoint: f"(endpoint={endpoint})" +) +@pytest.mark.parametrize( + "shape", ((0,), (2, 1)), ids=lambda shape: f"(shape={shape})" +) +def test_empty_array(shape, endpoint): + np_arr = mk_seq_array(np, shape) + num_arr = mk_seq_array(num, shape) + + x1 = np.linspace(np_arr, [], num=5, endpoint=endpoint) + y1 = num.linspace(num_arr, [], num=5, endpoint=endpoint) + assert np.array_equal(x1, y1) + x2 = np.linspace([], np_arr, num=5, endpoint=endpoint) + y2 = num.linspace([], num_arr, num=5, endpoint=endpoint) + assert np.array_equal(x2, y2) -def test_axis(): + +@pytest.mark.parametrize( + "endpoint", (True, False), ids=lambda endpoint: f"(endpoint={endpoint})" +) +@pytest.mark.parametrize( + "shape", ((0,), (2, 1)), ids=lambda shape: f"(shape={shape})" +) +def test_empty_array_retstep(shape, endpoint): + np_arr = mk_seq_array(np, shape) + num_arr = mk_seq_array(num, shape) + + x1 = np.linspace(np_arr, [], num=5, endpoint=endpoint, retstep=True) + y1 = num.linspace(num_arr, [], num=5, endpoint=endpoint, retstep=True) + assert np.array_equal(x1[0], y1[0]) + assert np.array_equal(x1[1], y1[1]) + + x2 = np.linspace([], np_arr, num=5, endpoint=endpoint, retstep=True) + y2 = num.linspace([], num_arr, num=5, endpoint=endpoint, retstep=True) + assert np.array_equal(x2[0], y2[0]) + assert np.array_equal(x2[1], y2[1]) + + +@pytest.mark.xfail +@pytest.mark.parametrize( + "number", (0, 1, 10), ids=lambda number: f"(num={number})" +) +@pytest.mark.parametrize( + "axis", range(-3, 3), ids=lambda axis: f"(axis={axis})" +) +def test_arrays_axis(axis, number): + # In cuNumeric, if axis < -1, raise ValueError + # 'Point cannot exceed 4 dimensions set from LEGATE_MAX_DIM' + # In Numpy, if axis is -2 or -3, also pass + # In cuNumeric, for axis >= -1, if num=0, raise IndexError: + # tuple index out of range + # In Numpy, if num=0, pass and returns empty array x = np.array([[0, 1], [2, 3]]) y = np.array([[4, 5], [6, 7]]) xp = num.array(x) yp = num.array(y) - z = np.linspace(x, y, num=5, axis=0) - w = num.linspace(xp, yp, num=5, axis=0) + z = np.linspace(x, y, num=number, axis=axis) + w = num.linspace(xp, yp, num=number, axis=axis) assert np.array_equal(z, w) - z = np.linspace(x, y, num=5, axis=1) - w = num.linspace(xp, yp, num=5, axis=1) - assert np.array_equal(z, w) - z = np.linspace(x, y, num=5, axis=2) - w = num.linspace(xp, yp, num=5, axis=2) - assert np.array_equal(z, w) +@pytest.mark.parametrize( + "axis", range(-1, 1), ids=lambda axis: f"(axis={axis})" +) +def test_scalar_axis(axis): + start = 2.0 + stop = 3.0 + x = np.linspace(start, stop, num=5, axis=axis) + y = num.linspace(start, stop, num=5, axis=axis) + assert np.array_equal(x, y) + + +@pytest.mark.parametrize( + "dtype", (None, int, float, bool), ids=lambda dtype: f"(dtype={dtype})" +) +def test_dtype(dtype): + start = 2.0 + stop = 3.0 + x = np.linspace(start, stop, num=5, dtype=dtype) + y = num.linspace(start, stop, num=5, dtype=dtype) + assert np.array_equal(x, y) + + +class TestLinspaceErrors: + def setup_method(self): + self.start = mk_seq_array(num, (2, 3)) + self.stop = mk_seq_array(num, (2, 3)) + 10 + self.num = 5 + + @pytest.mark.xfail + def test_num_float(self): + # In Numpy, raise TypeError + # In cuNumeric, pass + msg = "cannot be interpreted as an integer" + with pytest.raises(TypeError, match=msg): + num.linspace(0, 10, num=4.5) + + def test_num_negative(self): + msg = "must be non-negative" + with pytest.raises(ValueError, match=msg): + num.linspace(0, 10, num=-1) + + def test_num_none(self): + msg = "not supported between instances of 'NoneType' and 'int'" + with pytest.raises(TypeError, match=msg): + num.linspace(0, 10, num=None) + + @pytest.mark.xfail + @pytest.mark.parametrize( + "axis", (-4, 3), ids=lambda axis: f"(axis={axis})" + ) + def test_axis_out_of_bound_array(self, axis): + # In cuNumeric, if axis < -1, raise ValueError + # 'Point cannot exceed 4 dimensions set from LEGATE_MAX_DIM' + msg = "out of bounds" + # In Numpy, it raises AxisError + with pytest.raises(ValueError, match=msg): + num.linspace(self.start, self.stop, axis=axis) + + @pytest.mark.xfail + @pytest.mark.parametrize( + "axis", (-2, 1), ids=lambda axis: f"(axis={axis})" + ) + def test_axis_out_of_bound_scalar(self, axis): + # In cuNumeric, it pass and the result equals when axis=0 + # In Numpy, it raises AxisError + msg = "out of bounds" + with pytest.raises(ValueError, match=msg): + num.linspace(2.0, 3.0, axis=axis) + + def test_axis_float(self): + axis = 1.0 + msg = "can't multiply sequence by non-int of type 'float'" + with pytest.raises(TypeError, match=msg): + num.linspace(self.start, self.stop, axis=axis) + + @pytest.mark.xfail + def test_axis_none(self): + # In cuNumeric, pass and treat it as axis=0 + # In Numpy, raises TypeError + axis = None + msg = "'NoneType' object is not iterable" + with pytest.raises(TypeError, match=msg): + num.linspace(self.start, self.stop, axis=axis) + + @pytest.mark.parametrize( + "shape", ((0,), (2,), (3, 3)), ids=lambda shape: f"(shape={shape})" + ) + def test_array_bad_shape(self, shape): + msg = "shape mismatch" + stop = mk_seq_array(num, shape) + with pytest.raises(ValueError, match=msg): + num.linspace(self.start, stop) + + def test_start_none(self): + with pytest.raises(TypeError): + num.linspace(None, 10, num=5) + + def test_stop_none(self): + with pytest.raises(TypeError): + num.linspace(0, None, num=5) if __name__ == "__main__": diff --git a/tests/integration/test_swapaxes.py b/tests/integration/test_swapaxes.py index 92dc83e4e..0217019c9 100644 --- a/tests/integration/test_swapaxes.py +++ b/tests/integration/test_swapaxes.py @@ -15,42 +15,194 @@ import numpy as np import pytest +from utils.generators import mk_seq_array import cunumeric as num a = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]]) -def test_small(): - a_num = num.array(a) - b_num = a_num.swapaxes(0, 1) - - assert num.array_equal(a_num.sum(axis=0), b_num.sum(axis=1)) - - -def test_tall(): - a_tall = np.concatenate((a,) * 100) - a_tall_num = num.array(a_tall) - b_tall_num = a_tall_num.swapaxes(0, 1) - - assert num.array_equal(a_tall_num.sum(axis=0), b_tall_num.sum(axis=1)) - - -def test_wide(): - a_wide = np.concatenate((a,) * 100, axis=1) - a_wide_num = num.array(a_wide) - b_wide_num = a_wide_num.swapaxes(0, 1) - - assert num.array_equal(a_wide_num.sum(axis=0), b_wide_num.sum(axis=1)) - - -def test_big(): - a_tall = np.concatenate((a,) * 100) - a_big = np.concatenate((a_tall,) * 100, axis=1) - a_big_num = num.array(a_big) - b_big_num = a_big_num.swapaxes(0, 1) - - assert num.array_equal(a_big_num.sum(axis=0), b_big_num.sum(axis=1)) +class TestSwapAxesModule: + def test_small(self): + a_num = num.array(a) + b = np.swapaxes(a, 0, 1) + b_num = num.swapaxes(a_num, 0, 1) + assert np.array_equal(b, b_num) + + def test_tall(self): + a_tall = np.concatenate((a,) * 100) + a_tall_num = num.array(a_tall) + + b_tall = np.swapaxes(a_tall, 0, 1) + b_tall_num = num.swapaxes(a_tall_num, 0, 1) + assert np.array_equal(b_tall, b_tall_num) + + def test_wide(self): + a_wide = np.concatenate((a,) * 100, axis=1) + a_wide_num = num.array(a_wide) + + b_wide = np.swapaxes(a_wide, 0, 1) + b_wide_num = num.swapaxes(a_wide_num, 0, 1) + assert np.array_equal(b_wide, b_wide_num) + + def test_big(self): + a_tall = np.concatenate((a,) * 100) + a_big = np.concatenate((a_tall,) * 100, axis=1) + a_big_num = num.array(a_big) + + b_big = np.swapaxes(a_big, 0, 1) + b_big_num = num.swapaxes(a_big_num, 0, 1) + assert np.array_equal(b_big, b_big_num) + + @pytest.mark.parametrize( + "axes", + ((0, 0), (-3, 1), (0, 2), (-3, -2)), + ids=lambda axes: f"(axes={axes})", + ) + def test_axes(self, axes): + shape = (3, 4, 5) + np_arr = mk_seq_array(np, shape) + num_arr = num.array(np_arr) + axis1, axis2 = axes + + res_np = np.swapaxes(np_arr, axis1, axis2) + res_num = num.swapaxes(num_arr, axis1, axis2) + assert np.array_equal(res_num, res_np) + + def test_emtpy_array(self): + np_arr = np.array([]) + num_arr = num.array([]) + axis1 = 0 + axis2 = 0 + + res_np = np.swapaxes(np_arr, axis1, axis2) + res_num = num.swapaxes(num_arr, axis1, axis2) + assert np.array_equal(res_num, res_np) + + +class TestSwapAxesModuleErrors: + def setup_method(self): + self.a = mk_seq_array(num, (3, 3)) + + def test_a_none(self): + msg = "has no attribute 'swapaxes'" + with pytest.raises(AttributeError, match=msg): + num.swapaxes(None, 0, 0) + + @pytest.mark.parametrize( + "axes", ((3, 0), (0, 3)), ids=lambda axes: f"(axes={axes})" + ) + def test_axes_out_of_bound1(self, axes): + axis1, axis2 = axes + msg = "too large for swapaxes" + with pytest.raises(ValueError, match=msg): + num.swapaxes(self.a, axis1, axis2) + + @pytest.mark.parametrize( + "axes", ((-4, 0), (0, -4)), ids=lambda axes: f"(axes={axes})" + ) + def test_axes_out_of_bound2(self, axes): + axis1, axis2 = axes + with pytest.raises(IndexError): + num.swapaxes(self.a, axis1, axis2) + + @pytest.mark.parametrize( + "axes", ((None, 0), (0, None)), ids=lambda axes: f"(axes={axes})" + ) + def test_axes_none(self, axes): + axis1, axis2 = axes + msg = "not supported between instances of 'NoneType' and 'int'" + with pytest.raises(TypeError, match=msg): + num.swapaxes(self.a, axis1, axis2) + + +class TestSwapAxesArrayMethod: + def test_small(self): + a_num = num.array(a) + b = a.swapaxes(0, 1) + b_num = a_num.swapaxes(0, 1) + assert np.array_equal(b, b_num) + + def test_tall(self): + a_tall = np.concatenate((a,) * 100) + a_tall_num = num.array(a_tall) + + b_tall = a_tall.swapaxes(0, 1) + b_tall_num = a_tall_num.swapaxes(0, 1) + assert np.array_equal(b_tall, b_tall_num) + + def test_wide(self): + a_wide = np.concatenate((a,) * 100, axis=1) + a_wide_num = num.array(a_wide) + + b_wide = a_wide.swapaxes(0, 1) + b_wide_num = a_wide_num.swapaxes(0, 1) + assert np.array_equal(b_wide, b_wide_num) + + def test_big(self): + a_tall = np.concatenate((a,) * 100) + a_big = np.concatenate((a_tall,) * 100, axis=1) + a_big_num = num.array(a_big) + + b_big = a_big.swapaxes(0, 1) + b_big_num = a_big_num.swapaxes(0, 1) + assert np.array_equal(b_big, b_big_num) + + @pytest.mark.parametrize( + "axes", + ((0, 0), (-3, 1), (0, 2), (-3, -2)), + ids=lambda axes: f"(axes={axes})", + ) + def test_axes(self, axes): + shape = (3, 4, 5) + np_arr = mk_seq_array(np, shape) + num_arr = num.array(np_arr) + axis1, axis2 = axes + + res_np_arr = np_arr.swapaxes(axis1, axis2) + res_num_arr = num_arr.swapaxes(axis1, axis2) + assert np.array_equal(res_num_arr, res_np_arr) + + def test_emtpy_array(self): + np_arr = np.array([]) + num_arr = num.array([]) + axis1 = 0 + axis2 = 0 + + res_np_arr = np_arr.swapaxes(axis1, axis2) + res_num_arr = num_arr.swapaxes(axis1, axis2) + assert np.array_equal(res_num_arr, res_np_arr) + + +class TestSwapAxesArrayMethodErrors: + def setup_method(self): + self.a = mk_seq_array(num, (3, 3)) + + @pytest.mark.parametrize( + "axes", ((3, 0), (0, 3)), ids=lambda axes: f"(axes={axes})" + ) + def test_axes_out_of_bound1(self, axes): + axis1, axis2 = axes + msg = "too large for swapaxes" + with pytest.raises(ValueError, match=msg): + self.a.swapaxes(axis1, axis2) + + @pytest.mark.parametrize( + "axes", ((-4, 0), (0, -4)), ids=lambda axes: f"(axes={axes})" + ) + def test_axes_out_of_bound2(self, axes): + axis1, axis2 = axes + with pytest.raises(IndexError): + self.a.swapaxes(axis1, axis2) + + @pytest.mark.parametrize( + "axes", ((None, 0), (0, None)), ids=lambda axes: f"(axes={axes})" + ) + def test_axes_none(self, axes): + axis1, axis2 = axes + msg = "not supported between instances of 'NoneType' and 'int'" + with pytest.raises(TypeError, match=msg): + self.a.swapaxes(axis1, axis2) if __name__ == "__main__": From dfbb053deed140e81f0961cd55d3ba6fe5e3b076 Mon Sep 17 00:00:00 2001 From: Wonchan Lee Date: Mon, 14 Nov 2022 14:41:23 -0800 Subject: [PATCH 37/89] Handle complex dtypes in __legate_data_interface__ (#690) * Handle complex dtypes in __legate_data_interface__ * Fix a mypy error --- cunumeric/array.py | 18 +++++++++++++++++- tests/integration/test_data_interface.py | 8 ++++++-- 2 files changed, 23 insertions(+), 3 deletions(-) diff --git a/cunumeric/array.py b/cunumeric/array.py index 6dc818112..2c7b7f770 100644 --- a/cunumeric/array.py +++ b/cunumeric/array.py @@ -31,6 +31,7 @@ cast, ) +import legate.core.types as ty import numpy as np import pyarrow # type: ignore from legate.core import Array @@ -187,6 +188,21 @@ def _convert_all_to_numpy(obj: Any) -> Any: return obj +# FIXME: we can't give an accurate return type as mypy thinks +# the pyarrow import can be ignored, and can't override the check +# either, because no-any-unimported needs Python >= 3.10. We can +# fix it once we bump up the Python version +def convert_numpy_dtype_to_pyarrow(dtype: np.dtype[Any]) -> Any: + if dtype.kind != "c": + return pyarrow.from_numpy_dtype(dtype) + elif dtype == np.complex64: + return ty.complex64 + elif dtype == np.complex128: + return ty.complex128 + else: + raise ValueError(f"Unsupported NumPy dtype: {dtype}") + + @clone_np_ndarray class ndarray: def __init__( @@ -269,7 +285,7 @@ def __legate_data_interface__(self) -> dict[str, Any]: # All of our thunks implement the Legate Store interface # so we just need to convert our type and stick it in # a Legate Array - arrow_type = pyarrow.from_numpy_dtype(self.dtype) + arrow_type = convert_numpy_dtype_to_pyarrow(self.dtype) # If the thunk is an eager array, we need to convert it to a # deferred array so we can extract a legate store deferred_thunk = runtime.to_deferred_array(self._thunk) diff --git a/tests/integration/test_data_interface.py b/tests/integration/test_data_interface.py index 1421437ea..6c617db43 100644 --- a/tests/integration/test_data_interface.py +++ b/tests/integration/test_data_interface.py @@ -16,6 +16,9 @@ import pytest import cunumeric as num +from cunumeric.runtime import _supported_dtypes + +DTYPES = _supported_dtypes.keys() # A simple wrapper with a legate data interface implementation for testing @@ -28,8 +31,9 @@ def __legate_data_interface__(self): return self.wrapped -def test_roundtrip(): - arr1 = num.array([1, 2, 3, 4], dtype=num.float64) +@pytest.mark.parametrize("dtype", DTYPES) +def test_roundtrip(dtype): + arr1 = num.array([1, 2, 3, 4], dtype=dtype) data = Wrapper(arr1.__legate_data_interface__) arr2 = num.asarray(data) assert num.array_equal(arr1, arr2) From 4bd9762e1e61e2e29212f52b0861c0fcab7112f4 Mon Sep 17 00:00:00 2001 From: Wonchan Lee Date: Mon, 14 Nov 2022 23:41:07 -0800 Subject: [PATCH 38/89] Log operator names of unary and binary operations using annotations (#679) --- cunumeric/deferred.py | 88 +++++++++++++++++++++++-------------------- 1 file changed, 48 insertions(+), 40 deletions(-) diff --git a/cunumeric/deferred.py b/cunumeric/deferred.py index 8e6d8cacb..b04b3088c 100644 --- a/cunumeric/deferred.py +++ b/cunumeric/deferred.py @@ -35,7 +35,7 @@ import legate.core.types as ty import numpy as np -from legate.core import Future, ReductionOp, Store +from legate.core import Annotation, Future, ReductionOp, Store from numpy.core.numeric import normalize_axis_tuple # type: ignore from typing_extensions import ParamSpec @@ -2961,20 +2961,21 @@ def unary_op( lhs = self.base rhs = src._broadcast(lhs.shape) - task = self.context.create_auto_task(CuNumericOpCode.UNARY_OP) - task.add_output(lhs) - task.add_input(rhs) - task.add_scalar_arg(op.value, ty.int32) - self.add_arguments(task, args) + with Annotation(self.context, {"OpCode": op.name}): + task = self.context.create_auto_task(CuNumericOpCode.UNARY_OP) + task.add_output(lhs) + task.add_input(rhs) + task.add_scalar_arg(op.value, ty.int32) + self.add_arguments(task, args) - task.add_alignment(lhs, rhs) + task.add_alignment(lhs, rhs) - if multiout is not None: - for out in multiout: - task.add_output(out.base) - task.add_alignment(out.base, rhs) + if multiout is not None: + for out in multiout: + task.add_output(out.base) + task.add_alignment(out.base, rhs) - task.execute() + task.execute() # Perform a unary reduction operation from one set of dimensions down to # fewer @@ -3010,10 +3011,6 @@ def unary_reduction( 0 if keepdims else lhs_array.ndim ) - task = self.context.create_auto_task( - CuNumericOpCode.SCALAR_UNARY_RED - ) - if initial is not None: assert not argred fill_value = initial @@ -3026,14 +3023,21 @@ def unary_reduction( while lhs.ndim > 1: lhs = lhs.project(0, 0) - task.add_reduction(lhs, _UNARY_RED_TO_REDUCTION_OPS[op]) - task.add_input(rhs_array.base) - task.add_scalar_arg(op, ty.int32) - task.add_scalar_arg(rhs_array.shape, (ty.int64,)) + with Annotation( + self.context, {"OpCode": op.name, "ArgRed?": str(argred)} + ): + task = self.context.create_auto_task( + CuNumericOpCode.SCALAR_UNARY_RED + ) - self.add_arguments(task, args) + task.add_reduction(lhs, _UNARY_RED_TO_REDUCTION_OPS[op]) + task.add_input(rhs_array.base) + task.add_scalar_arg(op, ty.int32) + task.add_scalar_arg(rhs_array.shape, (ty.int64,)) - task.execute() + self.add_arguments(task, args) + + task.execute() else: # Before we perform region reduction, make sure to have the lhs @@ -3062,18 +3066,21 @@ def unary_reduction( "Need support for reducing multiple dimensions" ) - task = self.context.create_auto_task(CuNumericOpCode.UNARY_RED) + with Annotation( + self.context, {"OpCode": op.name, "ArgRed?": str(argred)} + ): + task = self.context.create_auto_task(CuNumericOpCode.UNARY_RED) - task.add_input(rhs_array.base) - task.add_reduction(result, _UNARY_RED_TO_REDUCTION_OPS[op]) - task.add_scalar_arg(axis, ty.int32) - task.add_scalar_arg(op, ty.int32) + task.add_input(rhs_array.base) + task.add_reduction(result, _UNARY_RED_TO_REDUCTION_OPS[op]) + task.add_scalar_arg(axis, ty.int32) + task.add_scalar_arg(op, ty.int32) - self.add_arguments(task, args) + self.add_arguments(task, args) - task.add_alignment(result, rhs_array.base) + task.add_alignment(result, rhs_array.base) - task.execute() + task.execute() if argred: self.unary_op( @@ -3107,18 +3114,19 @@ def binary_op( rhs1 = src1._broadcast(lhs.shape) rhs2 = src2._broadcast(lhs.shape) - # Populate the Legate launcher - task = self.context.create_auto_task(CuNumericOpCode.BINARY_OP) - task.add_output(lhs) - task.add_input(rhs1) - task.add_input(rhs2) - task.add_scalar_arg(op_code.value, ty.int32) - self.add_arguments(task, args) + with Annotation(self.context, {"OpCode": op_code.name}): + # Populate the Legate launcher + task = self.context.create_auto_task(CuNumericOpCode.BINARY_OP) + task.add_output(lhs) + task.add_input(rhs1) + task.add_input(rhs2) + task.add_scalar_arg(op_code.value, ty.int32) + self.add_arguments(task, args) - task.add_alignment(lhs, rhs1) - task.add_alignment(lhs, rhs2) + task.add_alignment(lhs, rhs1) + task.add_alignment(lhs, rhs2) - task.execute() + task.execute() @auto_convert("src1", "src2") def binary_reduction( From 4293e6b562c32c9208d1a006a0d4d7ec27ba3f43 Mon Sep 17 00:00:00 2001 From: Bryan Van de Ven Date: Tue, 15 Nov 2022 08:42:26 -0800 Subject: [PATCH 39/89] mypy pre-commit hook for local but not CI (#695) --- .pre-commit-config.yaml | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index bdc37baff..798efa23d 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -1,4 +1,11 @@ repos: + - repo: https://github.com/pre-commit/mirrors-mypy + rev: 'v0.982' + hooks: + - id: mypy + language: system + pass_filenames: false + args: ['cunumeric'] - repo: https://github.com/PyCQA/isort rev: 5.10.1 hooks: @@ -25,5 +32,9 @@ repos: entry: python scripts/hooks/enforce_pytest_main.py language: python pass_filenames: false + +ci: + skip: [mypy] + default_language_version: python: python3 From ba2f73f7842924cb0588dbb9a71bb1e4f4201631 Mon Sep 17 00:00:00 2001 From: Bryan Van de Ven Date: Tue, 15 Nov 2022 15:00:49 -0800 Subject: [PATCH 40/89] Add CI job to build docs (#697) --- .github/workflows/ci.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 499dce58a..be0db706c 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -80,6 +80,7 @@ jobs: - {name: 2 OpenMPs test, options: --use openmp --omps 2 --ompthreads 2 --debug, log: omps} - {name: Eager execution test, options: --use eager --debug, log: eager} - {name: mypy, options: mypy, log: mypy} + - {name: documentation, options: docs, log: docs} name: ${{ matrix.name }} steps: - name: Dump GitHub context From 7a0e7f784175e8e73c5845759d7ecd84a4b908b4 Mon Sep 17 00:00:00 2001 From: xialu00 <110973296+xialu00@users.noreply.github.com> Date: Wed, 16 Nov 2022 08:58:01 +0800 Subject: [PATCH 41/89] add negative test case for test_convolve.py and test_astype.py (#694) * add negative test case for test_convolve.py * add test case for test_astype.py * add test case for test_astype.py * fix bug --- tests/integration/test_astype.py | 80 +++++++++++++++++++++++++++++- tests/integration/test_convolve.py | 23 +++++++++ 2 files changed, 102 insertions(+), 1 deletion(-) diff --git a/tests/integration/test_astype.py b/tests/integration/test_astype.py index 534496950..5a54a7789 100644 --- a/tests/integration/test_astype.py +++ b/tests/integration/test_astype.py @@ -21,12 +21,51 @@ TEST_VECTOR = [0, 0, 1, 2, 3, 0, 1, 2, 3] ALL_BUT_COMPLEX = ["?", "b", "h", "i", "l", "B", "H", "I", "L", "e", "f", "d"] ALL_TYPES = ALL_BUT_COMPLEX + ["F", "D"] +ORDER = ("C", "F", "A", "K") +CASTING = ("no", "equiv", "safe", "same_kind") +UNSIGNED_TYPE = ["b", "h", "i", "l"] +SIGNED_TYPE = ["B", "H", "I", "L"] +ALL_TYPES_BUT_BOOL = [ + "b", + "h", + "i", + "l", + "B", + "H", + "I", + "L", + "e", + "f", + "d", + "F", + "D", +] def to_dtype(s): return str(np.dtype(s)) +def test_none(): + arr = None + in_np = num.array(arr) + msg = r"NoneType" + with pytest.raises(TypeError, match=msg): + in_np.astype("b") + + +@pytest.mark.parametrize("src_dtype", ALL_TYPES, ids=to_dtype) +def test_empty(src_dtype): + arr = [] + in_np = np.array(arr) + out_np = in_np.astype(src_dtype) + + in_num = np.array(arr) + out_num = in_num.astype(src_dtype) + + assert np.array_equal(out_np, out_num) + + @pytest.mark.parametrize("src_dtype", ALL_BUT_COMPLEX, ids=to_dtype) @pytest.mark.parametrize("dst_dtype", ALL_TYPES, ids=to_dtype) def test_basic(src_dtype, dst_dtype): @@ -39,8 +78,30 @@ def test_basic(src_dtype, dst_dtype): assert np.array_equal(out_num, out_np) -@pytest.mark.parametrize("src_dtype", ("F", "D"), ids=to_dtype) +@pytest.mark.parametrize("src_dtype", ALL_BUT_COMPLEX, ids=to_dtype) @pytest.mark.parametrize("dst_dtype", ALL_TYPES, ids=to_dtype) +@pytest.mark.parametrize("order", ORDER, ids=str) +def test_order(src_dtype, dst_dtype, order): + in_np = np.array(TEST_VECTOR, dtype=src_dtype) + in_num = num.array(in_np) + + out_np = in_np.astype(dst_dtype, order=order) + out_num = in_num.astype(dst_dtype, order=order) + + assert np.array_equal(out_num, out_np) + + +@pytest.mark.parametrize("src_dtype", UNSIGNED_TYPE, ids=to_dtype) +@pytest.mark.parametrize("dst_dtype", SIGNED_TYPE, ids=to_dtype) +@pytest.mark.parametrize("cast", CASTING, ids=str) +def test_casting_negative(src_dtype, dst_dtype, cast): + in_num = num.array(TEST_VECTOR, dtype=src_dtype) + with pytest.raises(TypeError): + in_num.astype(dst_dtype, casting=cast) + + +@pytest.mark.parametrize("src_dtype", ("F", "D"), ids=to_dtype) +@pytest.mark.parametrize("dst_dtype", ALL_TYPES_BUT_BOOL, ids=to_dtype) def test_complex(src_dtype, dst_dtype): complex_input = [ complex(v1, v2) for v1, v2 in zip(TEST_VECTOR[:-1], TEST_VECTOR[1:]) @@ -54,6 +115,23 @@ def test_complex(src_dtype, dst_dtype): assert np.array_equal(out_num, out_np) +@pytest.mark.xfail +@pytest.mark.parametrize("src_dtype", ("F", "D"), ids=to_dtype) +def test_complex_negative(src_dtype): + complex_input = [ + complex(v1, v2) for v1, v2 in zip(TEST_VECTOR[:-1], TEST_VECTOR[1:]) + ] + in_np = np.array(complex_input, dtype=src_dtype) + in_num = num.array(in_np) + + out_np = in_np.astype(to_dtype("?")) + out_num = in_num.astype(to_dtype("?")) + + # Numpy and cuNumeric have different performance. + # For complex data 0.+1.j, Numpy set as True, cuNumeric set as False. + assert np.array_equal(out_num, out_np) + + if __name__ == "__main__": import sys diff --git a/tests/integration/test_convolve.py b/tests/integration/test_convolve.py index a0beada23..08bbb3af4 100644 --- a/tests/integration/test_convolve.py +++ b/tests/integration/test_convolve.py @@ -25,6 +25,29 @@ FILTER_SHAPES = [(5,), (3, 5), (3, 5, 3)] +@pytest.mark.xfail +def test_none(): + # Numpy raises: + # TypeError: unsupported operand type(s) for *: 'NoneType' and 'NoneType' + with pytest.raises(AttributeError): + num.convolve(None, None, mode="same") + + +def test_empty(): + msg = r"empty" + with pytest.raises(ValueError, match=msg): + num.convolve([], [], mode="same") + + +def test_diff_dims(): + shape1 = (5,) * 3 + shape2 = (5,) * 2 + arr1 = num.random.random(shape1) + arr2 = num.random.random(shape2) + with pytest.raises(RuntimeError): + num.convolve(arr1, arr2, mode="same") + + def check_convolve(a, v): anp = a.__array__() vnp = v.__array__() From b4656bca1a32535b3ec24e8d9b3460d4229753dd Mon Sep 17 00:00:00 2001 From: Marcin Zalewski Date: Tue, 15 Nov 2022 21:58:29 -0800 Subject: [PATCH 42/89] Print build start and end times (#687) Co-authored-by: Marcin Zalewski --- conda/conda-build/build.sh | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/conda/conda-build/build.sh b/conda/conda-build/build.sh index b1d79b52b..d0df68008 100644 --- a/conda/conda-build/build.sh +++ b/conda/conda-build/build.sh @@ -1,7 +1,5 @@ #!/bin/bash - -set -x; - + # Rewrite conda's -DCMAKE_FIND_ROOT_PATH_MODE_INCLUDE=ONLY to # -DCMAKE_FIND_ROOT_PATH_MODE_INCLUDE=BOTH CMAKE_ARGS="$(echo "$CMAKE_ARGS" | sed -r "s@_INCLUDE=ONLY@_INCLUDE=BOTH@g")" @@ -32,6 +30,8 @@ export CUDAFLAGS="-UNDEBUG" export CMAKE_GENERATOR=Ninja export CUDAHOSTCXX=${CXX} +echo "Build starting on $(date)" + cmake -S . -B build ${CMAKE_ARGS} cmake --build build -j$CPU_COUNT cmake --install build @@ -51,6 +51,8 @@ $PYTHON -m pip install \ --disable-pip-version-check \ . -vv +echo "Build ending on $(date)" + # Legion leaves an egg-info file which will confuse conda trying to pick up the information # Remove it so the legate-core is the only egg-info file added rm -rf $SP_DIR/legion*egg-info From d1e95bc27c35292e0fa51e523697eaab0714f338 Mon Sep 17 00:00:00 2001 From: Irina Demeshko Date: Wed, 16 Nov 2022 12:11:00 -0700 Subject: [PATCH 43/89] Fixing logic for putmask with axis = None * put_along_axis: fixing logic for the case when axis=None * putmask: removing test that produces different error message fron numpy --- cunumeric/module.py | 10 +++++- tests/integration/test_put_along_axis.py | 41 ------------------------ 2 files changed, 9 insertions(+), 42 deletions(-) diff --git a/cunumeric/module.py b/cunumeric/module.py index 0a4e97a5a..d8a8f2a2e 100644 --- a/cunumeric/module.py +++ b/cunumeric/module.py @@ -3211,7 +3211,7 @@ def put_along_axis( Parameters ---------- - arr : ndarray (Ni..., M, Nk...) + a : ndarray (Ni..., M, Nk...) Destination array. indices : ndarray (Ni..., J, Nk...) Indices to change along each 1d slice of `arr`. This must match the @@ -3239,6 +3239,10 @@ def put_along_axis( Multiple GPUs, Multiple CPUs """ + + if a.size == 0: + return + if not np.issubdtype(indices.dtype, np.integer): raise TypeError("`indices` must be an integer array") @@ -3249,6 +3253,10 @@ def put_along_axis( if a.ndim > 1: # TODO call a=a.flat when flat is implemented raise ValueError("a.ndim>1 case is not supported when axis=None") + if (indices.size == 0) or (values.size == 0): + return + if values.shape != indices.shape: + values = values._wrap(indices.size) else: computed_axis = normalize_axis_index(axis, a.ndim) diff --git a/tests/integration/test_put_along_axis.py b/tests/integration/test_put_along_axis.py index a289ffc2a..8b46ff0e3 100644 --- a/tests/integration/test_put_along_axis.py +++ b/tests/integration/test_put_along_axis.py @@ -132,18 +132,6 @@ def test_indices_bad_type(self, dtype): with pytest.raises(TypeError, match=msg): num.put_along_axis(self.a, ai, 100, axis=0) - @pytest.mark.xfail - @pytest.mark.parametrize( - "shape", ((3, 2), (3, 0)), ids=lambda shape: f"(shape={shape})" - ) - def test_indices_bad_shape(self, shape): - # In Numpy, it raises IndexError. - # In cuNumeric, it raises ValueError. - ai = num.ones(shape, dtype=int) - msg = "shape mismatch: indexing arrays could not be broadcast" - with pytest.raises(IndexError, match=msg): - num.put_along_axis(self.a, ai, 100, axis=0) - @pytest.mark.parametrize( "shape", ((1,), (3, 3, 1)), ids=lambda shape: f"(shape={shape})" ) @@ -210,19 +198,6 @@ def test_values_bad_shape2(self): "shape", ((0,), (5,), (4, 5)), ids=lambda shape: f"(shape={shape})" ) def test_values_axis_none(self, shape): - a = mk_seq_array(num, (10,)) - ai = mk_seq_array(num, (7,)) - values = mk_seq_array(num, shape) - with pytest.raises(ValueError): - num.put_along_axis(a, ai, values, None) - - @pytest.mark.xfail - @pytest.mark.parametrize( - "shape", ((0,), (5,), (4, 5)), ids=lambda shape: f"(shape={shape})" - ) - def test_values_axis_none_DIVERGENC(self, shape): - # In Numpy, all 3 cases pass - # In cuNumeric, all 3 cases raise ValueError "Shape did not match" np_arr = mk_seq_array(np, (10,)) num_arr = mk_seq_array(num, (10,)) @@ -236,22 +211,6 @@ def test_values_axis_none_DIVERGENC(self, shape): num.put_along_axis(num_arr, indices_num, values_num, None) assert np.array_equal(np_arr, num_arr) - def test_a_none(self): - ai = num.array([1, 1, 1]) - msg = "object has no attribute 'ndim'" - with pytest.raises(AttributeError, match=msg): - num.put_along_axis(None, ai, 100, axis=0) - - def test_indice_none(self): - msg = "'NoneType' object has no attribute 'dtype'" - with pytest.raises(AttributeError, match=msg): - num.put_along_axis(self.a, None, 100, axis=0) - - def test_values_none(self): - msg = "'NoneType' object has no attribute 'dtype'" - with pytest.raises(AttributeError, match=msg): - num.put_along_axis(self.a, self.ai, None, axis=0) - if __name__ == "__main__": import sys From db2a4f80994a7a2e311798088a752dcb5ff9b768 Mon Sep 17 00:00:00 2001 From: Irina Demeshko Date: Wed, 16 Nov 2022 12:27:04 -0700 Subject: [PATCH 44/89] Implementing Putmask (#667) implementing putmask --- cunumeric/config.py | 2 + cunumeric/deferred.py | 147 +++++++++++------ cunumeric/eager.py | 7 + cunumeric/module.py | 44 +++++ cunumeric/thunk.py | 4 + cunumeric_cpp.cmake | 3 + docs/cunumeric/source/api/indexing.rst | 1 + src/cunumeric/cunumeric_c.h | 1 + .../indexing/parallel_loop.cuh | 51 ++++++ .../execution_policy/indexing/parallel_loop.h | 37 +++++ .../indexing/parallel_loop_omp.h | 38 +++++ src/cunumeric/index/putmask.cc | 32 ++++ src/cunumeric/index/putmask.cu | 28 ++++ src/cunumeric/index/putmask.h | 43 +++++ src/cunumeric/index/putmask_omp.cc | 28 ++++ src/cunumeric/index/putmask_template.inl | 116 +++++++++++++ tests/integration/test_putmask.py | 154 ++++++++++++++++++ tests/unit/cunumeric/test_config.py | 1 + 18 files changed, 684 insertions(+), 53 deletions(-) create mode 100644 src/cunumeric/execution_policy/indexing/parallel_loop.cuh create mode 100644 src/cunumeric/execution_policy/indexing/parallel_loop.h create mode 100644 src/cunumeric/execution_policy/indexing/parallel_loop_omp.h create mode 100644 src/cunumeric/index/putmask.cc create mode 100644 src/cunumeric/index/putmask.cu create mode 100644 src/cunumeric/index/putmask.h create mode 100644 src/cunumeric/index/putmask_omp.cc create mode 100644 src/cunumeric/index/putmask_template.inl create mode 100644 tests/integration/test_putmask.py diff --git a/cunumeric/config.py b/cunumeric/config.py index c45ae6313..9195022d6 100644 --- a/cunumeric/config.py +++ b/cunumeric/config.py @@ -167,6 +167,7 @@ class _CunumericSharedLib: CUNUMERIC_NONZERO: int CUNUMERIC_PACKBITS: int CUNUMERIC_POTRF: int + CUNUMERIC_PUTMASK: int CUNUMERIC_RAND: int CUNUMERIC_READ: int CUNUMERIC_RED_ALL: int @@ -358,6 +359,7 @@ class CuNumericOpCode(IntEnum): NONZERO = _cunumeric.CUNUMERIC_NONZERO PACKBITS = _cunumeric.CUNUMERIC_PACKBITS POTRF = _cunumeric.CUNUMERIC_POTRF + PUTMASK = _cunumeric.CUNUMERIC_PUTMASK RAND = _cunumeric.CUNUMERIC_RAND READ = _cunumeric.CUNUMERIC_READ REPEAT = _cunumeric.CUNUMERIC_REPEAT diff --git a/cunumeric/deferred.py b/cunumeric/deferred.py index b04b3088c..5c9330ee4 100644 --- a/cunumeric/deferred.py +++ b/cunumeric/deferred.py @@ -533,7 +533,10 @@ def _slice_store(k: slice, store: Store, dim: int) -> tuple[slice, Store]: return k, store def _create_indexing_array( - self, key: Any, is_set: bool = False + self, + key: Any, + is_set: bool = False, + set_value: Optional[Any] = None, ) -> tuple[bool, Any, Any, Any]: store = self.base rhs = self @@ -583,61 +586,79 @@ def _create_indexing_array( for i in range(key_store.ndim, rhs.ndim): key_store = key_store.promote(i, rhs.shape[i]) - out_dtype = rhs.dtype - # in the case this operation is called for the set_item, we - # return Point type field that is later used for - # indirect copy operation - if is_set: - N = rhs.ndim - out_dtype = rhs.runtime.get_point_type(N) - - # TODO : current implementation of the ND output regions - # requires out.ndim == rhs.ndim. This will be fixed in the - # future - out = rhs.runtime.create_unbound_thunk(out_dtype, ndim=rhs.ndim) - key_dims = key.ndim # dimension of the original key + # has_set_value && set_value.size==1 corresponds to the case + # when a[bool_indices]=scalar + # then we can call "putmask" to modify input array + # and avoid calling Copy + has_set_value = set_value is not None and set_value.size == 1 + if has_set_value: + mask = DeferredArray( + self.runtime, + base=key_store, + dtype=self.dtype, + ) + rhs.putmask(mask, set_value) + return False, rhs, rhs, self + else: + out_dtype = rhs.dtype + # in the case this operation is called for the set_item, we + # return Point type field that is later used for + # indirect copy operation + if is_set: + N = rhs.ndim + out_dtype = rhs.runtime.get_point_type(N) + + # TODO : current implementation of the ND output regions + # requires out.ndim == rhs.ndim. This will be fixed in the + # future + out = rhs.runtime.create_unbound_thunk( + out_dtype, ndim=rhs.ndim + ) + key_dims = key.ndim # dimension of the original key - task = rhs.context.create_auto_task( - CuNumericOpCode.ADVANCED_INDEXING - ) - task.add_output(out.base) - task.add_input(rhs.base) - task.add_input(key_store) - task.add_scalar_arg(is_set, bool) - task.add_scalar_arg(key_dims, ty.int64) - task.add_alignment(rhs.base, key_store) - task.add_broadcast( - rhs.base, axes=tuple(range(1, len(rhs.base.shape))) - ) - task.execute() + task = rhs.context.create_auto_task( + CuNumericOpCode.ADVANCED_INDEXING + ) + task.add_output(out.base) + task.add_input(rhs.base) + task.add_input(key_store) + task.add_scalar_arg(is_set, bool) + task.add_scalar_arg(key_dims, ty.int64) + task.add_alignment(rhs.base, key_store) + task.add_broadcast( + rhs.base, axes=tuple(range(1, len(rhs.base.shape))) + ) + task.execute() - # TODO : current implementation of the ND output regions - # requires out.ndim == rhs.ndim. - # The logic below will be removed in the future - out_dim = rhs.ndim - key_dims + 1 - - if out_dim != rhs.ndim: - out_tmp = out.base - - if out.size == 0: - out_shape = tuple(out.shape[i] for i in range(0, out_dim)) - out = cast( - DeferredArray, - self.runtime.create_empty_thunk( - out_shape, - out_dtype, - inputs=[out], - ), - ) - if not is_set: - out.fill(np.array(0, dtype=out_dtype)) - else: - for dim in range(rhs.ndim - out_dim): - out_tmp = out_tmp.project(rhs.ndim - dim - 1, 0) + # TODO : current implementation of the ND output regions + # requires out.ndim == rhs.ndim. + # The logic below will be removed in the future + out_dim = rhs.ndim - key_dims + 1 + + if out_dim != rhs.ndim: + out_tmp = out.base + + if out.size == 0: + out_shape = tuple( + out.shape[i] for i in range(0, out_dim) + ) + out = cast( + DeferredArray, + self.runtime.create_empty_thunk( + out_shape, + out_dtype, + inputs=[out], + ), + ) + if not is_set: + out.fill(np.array(0, dtype=out_dtype)) + else: + for dim in range(rhs.ndim - out_dim): + out_tmp = out_tmp.project(rhs.ndim - dim - 1, 0) - out = out._copy_store(out_tmp) + out = out._copy_store(out_tmp) - return False, rhs, out, self + return is_set, rhs, out, self if isinstance(key, NumPyThunk): key = (key,) @@ -890,7 +911,10 @@ def set_item(self, key: Any, rhs: Any) -> None: lhs, index_array, self, - ) = self._create_indexing_array(key, True) + ) = self._create_indexing_array(key, True, rhs) + + if not copy_needed: + return if rhs.shape != index_array.shape: rhs_tmp = rhs._broadcast(index_array.base.shape) @@ -917,6 +941,8 @@ def set_item(self, key: Any, rhs: Any) -> None: index_array = index_array._convert_future_to_regionfield() if lhs.base.kind == Future: lhs = lhs._convert_future_to_regionfield() + if lhs.base.transformed: + lhs = lhs._copy_store(lhs.base) if index_array.size != 0: copy = self.context.create_copy() @@ -1740,6 +1766,21 @@ def put(self, indices: Any, values: Any, check_bounds: bool) -> None: if self_tmp is not self: self.copy(self_tmp, deep=True) + @auto_convert("mask", "values") + def putmask(self, mask: Any, values: Any) -> None: + if values.shape != self.shape: + values_new = values._broadcast(self.shape) + else: + values_new = values.base + task = self.context.create_task(CuNumericOpCode.PUTMASK) + task.add_input(self.base) + task.add_input(mask.base) + task.add_input(values_new) + task.add_output(self.base) + task.add_alignment(self.base, mask.base) + task.add_alignment(self.base, values_new) + task.execute() + # Create an identity array with the ones offset from the diagonal by k def eye(self, k: int) -> None: assert self.ndim == 2 # Only 2-D arrays should be here diff --git a/cunumeric/eager.py b/cunumeric/eager.py index cef2b7b49..530b805c5 100644 --- a/cunumeric/eager.py +++ b/cunumeric/eager.py @@ -627,6 +627,13 @@ def put(self, indices: Any, values: Any, check_bounds: bool) -> None: else: np.put(self.array, indices.array, values.array) + def putmask(self, mask: Any, values: Any) -> None: + self.check_eager_args(mask, values) + if self.deferred is not None: + self.deferred.putmask(mask, values) + else: + np.putmask(self.array, mask.array, values.array) + def eye(self, k: int) -> None: if self.deferred is not None: self.deferred.eye(k) diff --git a/cunumeric/module.py b/cunumeric/module.py index d8a8f2a2e..390f8d755 100644 --- a/cunumeric/module.py +++ b/cunumeric/module.py @@ -3514,6 +3514,50 @@ def put( a.put(indices=indices, values=values, mode=mode) +@add_boilerplate("a", "mask", "values") +def putmask(a: ndarray, mask: ndarray, values: ndarray) -> None: + """ + putmask(a, mask, values) + Changes elements of an array based on conditional and input values. + Sets ``a.flat[n] = values[n]`` for each n where ``mask.flat[n]==True``. + If `values` is not the same size as `a` and `mask` then it will repeat. + This gives behavior different from ``a[mask] = values``. + + Parameters + ---------- + a : ndarray + Target array. + mask : array_like + Boolean mask array. It has to be the same shape as `a`. + values : array_like + Values to put into `a` where `mask` is True. If `values` is smaller + than `a` it will be repeated. + + See Also + -------- + numpy.putmask + + Availability + ------------ + Multiple GPUs, Multiple CPUs + """ + if not a.shape == mask.shape: + raise ValueError("mask and data must be the same size") + + mask = mask._warn_and_convert(np.dtype(bool)) + + if a.dtype != values.dtype: + values = values._warn_and_convert(a.dtype) + + try: + np.broadcast_shapes(values.shape, a.shape) + except ValueError: + values = values._wrap(a.size) + values = values.reshape(a.shape) + + a._thunk.putmask(mask._thunk, values._thunk) + + @add_boilerplate("a", "val") def fill_diagonal(a: ndarray, val: ndarray, wrap: bool = False) -> None: """ diff --git a/cunumeric/thunk.py b/cunumeric/thunk.py index 7ade503d0..e80941d4e 100644 --- a/cunumeric/thunk.py +++ b/cunumeric/thunk.py @@ -201,6 +201,10 @@ def _diag_helper( def put(self, indices: Any, values: Any, check_bounds: bool) -> None: ... + @abstractmethod + def putmask(self, mask: Any, values: Any) -> None: + ... + @abstractmethod def eye(self, k: int) -> None: ... diff --git a/cunumeric_cpp.cmake b/cunumeric_cpp.cmake index a47038a3b..9ab2741b3 100644 --- a/cunumeric_cpp.cmake +++ b/cunumeric_cpp.cmake @@ -131,6 +131,7 @@ list(APPEND cunumeric_SOURCES src/cunumeric/index/repeat.cc src/cunumeric/index/wrap.cc src/cunumeric/index/zip.cc + src/cunumeric/index/putmask.cc src/cunumeric/item/read.cc src/cunumeric/item/write.cc src/cunumeric/matrix/contract.cc @@ -180,6 +181,7 @@ if(Legion_USE_OpenMP) src/cunumeric/nullary/window_omp.cc src/cunumeric/index/advanced_indexing_omp.cc src/cunumeric/index/choose_omp.cc + src/cunumeric/index/putmask_omp.cc src/cunumeric/index/repeat_omp.cc src/cunumeric/index/wrap_omp.cc src/cunumeric/index/zip_omp.cc @@ -229,6 +231,7 @@ if(Legion_USE_CUDA) src/cunumeric/index/repeat.cu src/cunumeric/index/wrap.cu src/cunumeric/index/zip.cu + src/cunumeric/index/putmask.cu src/cunumeric/item/read.cu src/cunumeric/item/write.cu src/cunumeric/matrix/contract.cu diff --git a/docs/cunumeric/source/api/indexing.rst b/docs/cunumeric/source/api/indexing.rst index 1ace111d4..ab02bbcc4 100644 --- a/docs/cunumeric/source/api/indexing.rst +++ b/docs/cunumeric/source/api/indexing.rst @@ -44,5 +44,6 @@ Inserting data into arrays fill_diagonal put + putmask put_along_axis place diff --git a/src/cunumeric/cunumeric_c.h b/src/cunumeric/cunumeric_c.h index 60d6e108d..462214782 100644 --- a/src/cunumeric/cunumeric_c.h +++ b/src/cunumeric/cunumeric_c.h @@ -52,6 +52,7 @@ enum CuNumericOpCode { CUNUMERIC_NONZERO, CUNUMERIC_PACKBITS, CUNUMERIC_POTRF, + CUNUMERIC_PUTMASK, CUNUMERIC_RAND, CUNUMERIC_READ, CUNUMERIC_REPEAT, diff --git a/src/cunumeric/execution_policy/indexing/parallel_loop.cuh b/src/cunumeric/execution_policy/indexing/parallel_loop.cuh new file mode 100644 index 000000000..81788908f --- /dev/null +++ b/src/cunumeric/execution_policy/indexing/parallel_loop.cuh @@ -0,0 +1,51 @@ +/* Copyright 2022 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +#pragma once + +#include "cunumeric/cunumeric.h" +#include "cunumeric/execution_policy/indexing/parallel_loop.h" +#include "cunumeric/cuda_help.h" + +namespace cunumeric { + +template +static __global__ void __launch_bounds__(THREADS_PER_BLOCK, MIN_CTAS_PER_SM) + parallel_loop_kernel(const size_t volume, KERNEL kernel, Tag tag) +{ + const size_t idx = global_tid_1d(); + if (idx >= volume) return; + kernel(idx, tag); +} + +template +struct ParallelLoopPolicy { + template + void operator()(const RECT& rect, KERNEL&& kernel) + { + const size_t volume = rect.volume(); + if (0 == volume) return; + auto stream = get_cached_stream(); + const size_t blocks = (volume + THREADS_PER_BLOCK - 1) / THREADS_PER_BLOCK; + + parallel_loop_kernel<<>>( + volume, std::forward(kernel), Tag{}); + + CHECK_CUDA_STREAM(stream); + } +}; + +} // namespace cunumeric diff --git a/src/cunumeric/execution_policy/indexing/parallel_loop.h b/src/cunumeric/execution_policy/indexing/parallel_loop.h new file mode 100644 index 000000000..31adf811f --- /dev/null +++ b/src/cunumeric/execution_policy/indexing/parallel_loop.h @@ -0,0 +1,37 @@ +/* Copyright 2022 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +#pragma once + +#include "cunumeric/cunumeric.h" + +namespace cunumeric { + +template +struct ParallelLoopPolicy { +}; + +template +struct ParallelLoopPolicy { + template + void operator()(const RECT& rect, KERNEL&& kernel) + { + const size_t volume = rect.volume(); + for (size_t idx = 0; idx < volume; ++idx) { kernel(idx, Tag{}); } + } +}; + +} // namespace cunumeric diff --git a/src/cunumeric/execution_policy/indexing/parallel_loop_omp.h b/src/cunumeric/execution_policy/indexing/parallel_loop_omp.h new file mode 100644 index 000000000..a89702fe3 --- /dev/null +++ b/src/cunumeric/execution_policy/indexing/parallel_loop_omp.h @@ -0,0 +1,38 @@ +/* Copyright 2022 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +#pragma once + +#include "cunumeric/cunumeric.h" +#include "cunumeric/execution_policy/indexing/parallel_loop.h" +#include "cunumeric/omp_help.h" + +#include + +namespace cunumeric { + +template +struct ParallelLoopPolicy { + template + void operator()(const RECT& rect, KERNEL&& kernel) + { + const size_t volume = rect.volume(); +#pragma omp for schedule(static) + for (size_t idx = 0; idx < volume; ++idx) { kernel(idx, Tag{}); } + } +}; + +} // namespace cunumeric diff --git a/src/cunumeric/index/putmask.cc b/src/cunumeric/index/putmask.cc new file mode 100644 index 000000000..595329f13 --- /dev/null +++ b/src/cunumeric/index/putmask.cc @@ -0,0 +1,32 @@ +/* Copyright 2022 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +#include "cunumeric/index/putmask.h" +#include "cunumeric/index/putmask_template.inl" + +namespace cunumeric { + +/*static*/ void PutmaskTask::cpu_variant(TaskContext& context) +{ + putmask_template(context); +} + +namespace // unnamed +{ +static void __attribute__((constructor)) register_tasks(void) { PutmaskTask::register_variants(); } +} // namespace + +} // namespace cunumeric diff --git a/src/cunumeric/index/putmask.cu b/src/cunumeric/index/putmask.cu new file mode 100644 index 000000000..abe94d82f --- /dev/null +++ b/src/cunumeric/index/putmask.cu @@ -0,0 +1,28 @@ +/* Copyright 2022 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +#include "cunumeric/execution_policy/indexing/parallel_loop.cuh" +#include "cunumeric/index/putmask.h" +#include "cunumeric/index/putmask_template.inl" + +namespace cunumeric { + +/*static*/ void PutmaskTask::gpu_variant(TaskContext& context) +{ + putmask_template(context); +} + +} // namespace cunumeric diff --git a/src/cunumeric/index/putmask.h b/src/cunumeric/index/putmask.h new file mode 100644 index 000000000..07a418d19 --- /dev/null +++ b/src/cunumeric/index/putmask.h @@ -0,0 +1,43 @@ +/* Copyright 2022 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +#pragma once + +#include "cunumeric/cunumeric.h" + +namespace cunumeric { + +struct PutmaskArgs { + const Array& input; + const Array& mask; + const Array& values; +}; + +class PutmaskTask : public CuNumericTask { + public: + static const int TASK_ID = CUNUMERIC_PUTMASK; + + public: + static void cpu_variant(legate::TaskContext& context); +#ifdef LEGATE_USE_OPENMP + static void omp_variant(legate::TaskContext& context); +#endif +#ifdef LEGATE_USE_CUDA + static void gpu_variant(legate::TaskContext& context); +#endif +}; + +} // namespace cunumeric diff --git a/src/cunumeric/index/putmask_omp.cc b/src/cunumeric/index/putmask_omp.cc new file mode 100644 index 000000000..8550b41cd --- /dev/null +++ b/src/cunumeric/index/putmask_omp.cc @@ -0,0 +1,28 @@ +/* Copyright 2022 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +#include "cunumeric/execution_policy/indexing/parallel_loop_omp.h" +#include "cunumeric/index/putmask.h" +#include "cunumeric/index/putmask_template.inl" + +namespace cunumeric { + +/*static*/ void PutmaskTask::omp_variant(TaskContext& context) +{ + putmask_template(context); +} + +} // namespace cunumeric diff --git a/src/cunumeric/index/putmask_template.inl b/src/cunumeric/index/putmask_template.inl new file mode 100644 index 000000000..6f55c34e0 --- /dev/null +++ b/src/cunumeric/index/putmask_template.inl @@ -0,0 +1,116 @@ +/* Copyright 2022 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +#pragma once + +// Useful for IDEs +#include +#include "cunumeric/index/putmask.h" +#include "cunumeric/pitches.h" +#include "cunumeric/execution_policy/indexing/parallel_loop.h" + +namespace cunumeric { + +using namespace Legion; +using namespace legate; + +template +struct Putmask { + using T = legate_type_of; + using IN = AccessorRW; + using MASK = AccessorRO; + using VALUES = AccessorRO; + + IN input; + T* inputptr; + MASK mask; + const bool* maskptr; + VALUES values; + const T* valptr; + Pitches pitches; + Rect rect; + bool dense; + size_t volume; + + struct DenseTag {}; + struct SparseTag {}; + + // constructor: + Putmask(PutmaskArgs& args) : dense(false) + { + rect = args.input.shape(); +#ifdef DEBUG_CUNUMERIC + assert(rect == args.mask.shape()); +#endif + + input = args.input.read_write_accessor(rect); + mask = args.mask.read_accessor(rect); + values = args.values.read_accessor(rect); + volume = pitches.flatten(rect); + if (volume == 0) return; +#ifndef LEGION_BOUNDS_CHECKS + dense = input.accessor.is_dense_row_major(rect) && mask.accessor.is_dense_row_major(rect); + dense = dense && values.accessor.is_dense_row_major(rect); + if (dense) { + inputptr = input.ptr(rect); + maskptr = mask.ptr(rect); + valptr = values.ptr(rect); + } +#endif + } // constructor + + __CUDA_HD__ void operator()(const size_t idx, DenseTag) const noexcept + { + if (maskptr[idx]) inputptr[idx] = valptr[idx]; + } + + __CUDA_HD__ void operator()(const size_t idx, SparseTag) const noexcept + { + auto p = pitches.unflatten(idx, rect.lo); + if (mask[p]) input[p] = values[p]; + } + + void execute() const noexcept + { +#ifndef LEGION_BOUNDS_CHECKS + if (dense) { return ParallelLoopPolicy()(rect, *this); } +#endif + return ParallelLoopPolicy()(rect, *this); + } +}; + +using namespace Legion; +using namespace legate; + +template +struct PutmaskImpl { + template + void operator()(PutmaskArgs& args) const + { + Putmask putmask(args); + putmask.execute(); + } +}; + +template +static void putmask_template(TaskContext& context) +{ + auto& inputs = context.inputs(); + PutmaskArgs args{context.outputs()[0], inputs[1], inputs[2]}; + double_dispatch(args.input.dim(), args.input.code(), PutmaskImpl{}, args); +} + +} // namespace cunumeric diff --git a/tests/integration/test_putmask.py b/tests/integration/test_putmask.py new file mode 100644 index 000000000..b484cbad7 --- /dev/null +++ b/tests/integration/test_putmask.py @@ -0,0 +1,154 @@ +# Copyright 2022 NVIDIA Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import numpy as np +import pytest +from legate.core import LEGATE_MAX_DIM +from utils.generators import mk_seq_array + +import cunumeric as num + + +def test_scalar(): + x = mk_seq_array(np, (3,)) + x_num = mk_seq_array(num, (3,)) + values = np.zeros((), dtype=np.int32) + values_num = num.zeros((), dtype=np.int32) + mask = (x % 2).astype(bool) + mask_num = num.array(mask) + np.putmask(x[:1], mask[2:], values) + num.putmask(x_num[:1], mask_num[2:], values_num) + assert np.array_equal(x_num, x) + + x = mk_seq_array(np, (3, 4, 5)) + x_num = mk_seq_array(num, (3, 4, 5)) + mask = (x % 2).astype(bool) + mask_num = num.array(mask) + np.putmask(x, mask, 100) + num.putmask(x_num, mask_num, 100) + assert np.array_equal(x_num, x) + + x = np.zeros((), dtype=np.int32) + x_num = num.zeros((), dtype=np.int32) + mask = False + mask_num = False + np.putmask(x, mask, -1) + num.putmask(x_num, mask_num, -1) + assert np.array_equal(x_num, x) + + x = np.zeros((), dtype=np.int32) + x_num = num.zeros((), dtype=np.int32) + mask = True + mask_num = True + np.putmask(x, mask, -1) + num.putmask(x_num, mask_num, -1) + assert np.array_equal(x_num, x) + + # testing the case when indices is a scalar + x = mk_seq_array(np, (3, 4, 5)) + x_num = mk_seq_array(num, (3, 4, 5)) + values = mk_seq_array(np, (6,)) * 10 + values_num = num.array(values) + mask = (x % 2).astype(bool) + mask_num = num.array(mask) + np.putmask(x, mask, values[:1]) + num.putmask(x_num, mask_num, values_num[:1]) + assert np.array_equal(x_num, x) + + +def test_type_convert(): + x = mk_seq_array(np, (3, 4, 5)) + x_num = mk_seq_array(num, (3, 4, 5)) + values = mk_seq_array(np, (6,)) * 10 + values_num = num.array(values) + mask = x % 2 + mask_num = x_num % 2 + np.putmask(x, mask, values) + num.putmask(x_num, mask_num, values_num) + assert np.array_equal(x_num, x) + + x = mk_seq_array(np, (3, 4, 5)) + x_num = mk_seq_array(num, (3, 4, 5)) + values = mk_seq_array(np, (6,)) * 10 + values_num = num.array(values) + mask = np.zeros( + ( + 3, + 4, + 5, + ) + ) + mask_num = num.zeros((3, 4, 5)) + np.putmask(x, mask, values) + num.putmask(x_num, mask_num, values_num) + assert np.array_equal(x_num, x) + + x = mk_seq_array(np, (3, 4, 5)) + x_num = mk_seq_array(num, (3, 4, 5)) + x = x.astype(np.int32) + x_num = x_num.astype(np.int32) + mask = np.zeros( + ( + 3, + 4, + 5, + ) + ) + mask_num = num.zeros((3, 4, 5)) + np.putmask(x, mask, 11) + num.putmask(x_num, mask_num, 11) + assert np.array_equal(x_num, x) + + +@pytest.mark.parametrize("ndim", range(1, LEGATE_MAX_DIM + 1)) +def test_ndim(ndim): + shape = (5,) * ndim + np_arr = mk_seq_array(np, shape) + num_arr = mk_seq_array(num, shape) + np_mask = (np_arr % 2).astype(bool) + num_mask = (num_arr % 2).astype(bool) + # scalar_val + np.putmask(np_arr, np_mask, -10) + num.putmask(num_arr, num_mask, -10) + assert np.array_equal(np_arr, num_arr) + + # val is the same shape: + np_val = np_arr * 10 + num_val = num_arr * 10 + np.putmask(np_arr, np_mask, np_val) + num.putmask(num_arr, num_mask, num_val) + assert np.array_equal(np_arr, num_arr) + + # val is different shape, but the same size + shape_val = (np_arr.size,) + np_values = mk_seq_array(np, shape_val) * 10 + num_values = mk_seq_array(num, shape_val) * 10 + np.putmask(np_arr, np_mask, np_values) + num.putmask(num_arr, num_mask, num_values) + assert np.array_equal(np_arr, num_arr) + + # val is different shape and different size for vals and array + shape_val = (2,) * ndim + np_values = mk_seq_array(np, shape_val) * 10 + num_values = mk_seq_array(num, shape_val) * 10 + np.putmask(np_arr, np_mask, np_values) + num.putmask(num_arr, num_mask, num_values) + assert np.array_equal(np_arr, num_arr) + + +if __name__ == "__main__": + import sys + + sys.exit(pytest.main(sys.argv)) diff --git a/tests/unit/cunumeric/test_config.py b/tests/unit/cunumeric/test_config.py index db486df5d..ddede6241 100644 --- a/tests/unit/cunumeric/test_config.py +++ b/tests/unit/cunumeric/test_config.py @@ -154,6 +154,7 @@ def test_CuNumericOpCode() -> None: "NONZERO", "PACKBITS", "POTRF", + "PUTMASK", "RAND", "READ", "REPEAT", From 0297bf6eb7b62430612193e6981dd2d2a088d03f Mon Sep 17 00:00:00 2001 From: robinw0928 <104830875+robinw0928@users.noreply.github.com> Date: Fri, 18 Nov 2022 06:52:02 +0800 Subject: [PATCH 45/89] Enhance test_index_routines.py and test_reshape.py (#696) * Enhance test_index_routines.py and test_reshape.py * Address comments. --- tests/integration/test_index_routines.py | 444 +++++++++++++++++++---- tests/integration/test_reshape.py | 117 +++++- 2 files changed, 487 insertions(+), 74 deletions(-) diff --git a/tests/integration/test_index_routines.py b/tests/integration/test_index_routines.py index c5ff7869c..f01c3fa7b 100644 --- a/tests/integration/test_index_routines.py +++ b/tests/integration/test_index_routines.py @@ -25,7 +25,8 @@ from cunumeric.eager import diagonal_reference -def test_choose_1d(): +class TestChoose1d: + choices1 = [ [0, 1, 2, 3], [10, 11, 12, 13], @@ -35,26 +36,36 @@ def test_choose_1d(): a1 = [2, 3, 1, 0] num_a1 = num.array(a1) num_choices1 = num.array(choices1) + b = [2, 4, 1, 0] + num_b = num.array(b) - aout = np.array([2.3, 3.0, 1.2, 0.3]) - num_aout = num.array(aout) + def test_basic(self): + assert np.array_equal( + num.choose(self.num_a1, self.num_choices1), + np.choose(self.a1, self.choices1), + ) - assert np.array_equal( - np.choose(a1, choices1, out=aout), - num.choose(num_a1, num_choices1, out=num_aout), - ) - assert np.array_equal(aout, num_aout) + def test_out_none(self): + assert np.array_equal( + num.choose(self.num_a1, self.num_choices1, out=None), + np.choose(self.a1, self.choices1, out=None), + ) - b = [2, 4, 1, 0] - num_b = num.array(b) - assert np.array_equal( - np.choose(b, choices1, mode="clip"), - num.choose(num_b, num_choices1, mode="clip"), - ) - assert np.array_equal( - np.choose(b, choices1, mode="wrap"), - num.choose(num_b, num_choices1, mode="wrap"), - ) + def test_out(self): + aout = np.array([2.3, 3.0, 1.2, 0.3]) + num_aout = num.array(aout) + assert np.array_equal( + np.choose(self.a1, self.choices1, out=aout), + num.choose(self.num_a1, self.num_choices1, out=num_aout), + ) + assert np.array_equal(aout, num_aout) + + @pytest.mark.parametrize("mode", ("wrap", "clip"), ids=str) + def test_mode(self, mode): + assert np.array_equal( + np.choose(self.b, self.choices1, mode=mode), + num.choose(self.num_b, self.num_choices1, mode=mode), + ) def test_choose_2d(): @@ -102,6 +113,163 @@ def test_choose_target_ndim(ndim): assert np.array_equal(np_res, num_res) +SHAPES_A = ( + (2, 4), + (2, 1), + (1, 4), + (1, 1), + (4,), + (1,), + (3, 2, 4), + (2, 3, 2, 4), + (1, 3, 1, 1), +) + + +@pytest.mark.parametrize( + "shape_a", SHAPES_A, ids=lambda shape_a: f"(shape_a={shape_a})" +) +def test_choose_a_array(shape_a): + shape_choices = (3, 2, 4) + np_a = mk_seq_array(np, shape_a) % shape_choices[0] + num_a = mk_seq_array(num, shape_a) % shape_choices[0] + np_choices = mk_seq_array(np, shape_choices) + num_choices = mk_seq_array(num, shape_choices) + + np_res = np.choose(np_a, np_choices) + num_res = num.choose(num_a, num_choices) + assert np.array_equal(np_res, num_res) + + +def test_choose_a_scalar(): + shape_choices = (3, 2, 4) + a = 1 + np_choices = mk_seq_array(np, shape_choices) + num_choices = mk_seq_array(num, shape_choices) + + np_res = np.choose(a, np_choices) + num_res = num.choose(a, num_choices) + assert np.array_equal(np_res, num_res) + + +@pytest.mark.parametrize("mode", ("wrap", "clip"), ids=str) +@pytest.mark.parametrize( + "shape_a", ((3, 2, 4), (4,)), ids=lambda shape_a: f"(shape_a={shape_a})" +) +def test_choose_mode(shape_a, mode): + shape_choices = (3, 2, 4) + np_a = mk_seq_array(np, shape_a) - 10 + num_a = mk_seq_array(num, shape_a) - 10 + np_choices = mk_seq_array(np, shape_choices) + num_choices = mk_seq_array(num, shape_choices) + + np_res = np.choose(np_a, np_choices, mode=mode) + num_res = num.choose(num_a, num_choices, mode=mode) + assert np.array_equal(np_res, num_res) + + +def test_choose_out(): + shape_choices = (3, 2, 4) + shape_a = (2, 4) + shape_a_out = (2, 4) + np_a = mk_seq_array(np, shape_a) % shape_choices[0] + np_a = np_a.astype(np.int32) + num_a = mk_seq_array(num, shape_a) % shape_choices[0] + num_a = num_a.astype( + np.int32 + ) # cuNumeric would convert np.int32 to default type np.int64 + np_choices = mk_seq_array(np, shape_choices) + num_choices = mk_seq_array(num, shape_choices) + np_aout = mk_seq_array(np, shape_a_out) - 10 + num_aout = mk_seq_array(num, shape_a_out) - 10 + + np_res = np.choose(np_a, np_choices, out=np_aout) + num_res = num.choose(num_a, num_choices, out=num_aout) + assert np.array_equal(np_res, num_res) + assert np.array_equal(np_aout, num_aout) + + +@pytest.mark.xfail +def test_choose_mode_none(): + # In Numpy, pass and returns array equals default mode + # In cuNumeric, raises ValueError: mode=None not understood. + # Must be 'raise', 'wrap', or 'clip' + shape_choices = (3, 2, 4) + shape_a = (2, 4) + np_a = mk_seq_array(np, shape_a) % shape_choices[0] + num_a = mk_seq_array(num, shape_a) % shape_choices[0] + np_choices = mk_seq_array(np, shape_choices) + num_choices = mk_seq_array(num, shape_choices) + + np_res = np.choose(np_a, np_choices, mode=None) + num_res = num.choose(num_a, num_choices, mode=None) + assert np.array_equal(np_res, num_res) + + +class TestChooseErrors: + def setup_method(self): + self.shape_choices = (3, 2, 4) + self.choices = mk_seq_array(num, self.shape_choices) + self.shape_a = (2, 4) + self.a = mk_seq_array(num, self.shape_a) % self.shape_choices[0] + + @pytest.mark.parametrize( + "value", (-1, 3), ids=lambda value: f"(value={value})" + ) + def test_a_value_out_of_bound(self, value): + shape_a = (2, 4) + a = num.full(shape_a, value) + msg = "invalid entry in choice array" + with pytest.raises(ValueError, match=msg): + num.choose(a, self.choices) + + def test_a_value_float(self): + shape_a = (2, 4) + a = num.full(shape_a, 1.0) + with pytest.raises(TypeError): + num.choose(a, self.choices) + + @pytest.mark.parametrize( + "shape_a", + ((3, 4), (2, 2), (2,), (0,)), + ids=lambda shape_a: f"(shape_a={shape_a})", + ) + def test_a_invalid_shape(self, shape_a): + a = mk_seq_array(num, shape_a) % self.shape_choices[0] + msg = "shape mismatch" + with pytest.raises(ValueError, match=msg): + num.choose(a, self.choices) + + @pytest.mark.xfail + def test_a_none(self): + # In Numpy, it raises TypeError + # In cuNumeric, it raises AttributeError: + # 'NoneType' object has no attribute 'choose' + with pytest.raises(TypeError): + num.choose(None, self.choices) + + def test_empty_choices(self): + msg = "invalid entry in choice array" + with pytest.raises(ValueError, match=msg): + num.choose(self.a, []) + + @pytest.mark.xfail + def test_choices_none(self): + # In Numpy, it raises TypeError + # In cuNumeric, it raises IndexError: tuple index out of range + with pytest.raises(TypeError): + num.choose(self.a, None) + + def test_invalid_mode(self): + with pytest.raises(ValueError): + num.choose(self.a, self.choices, mode="InvalidValue") + + def test_out_invalid_shape(self): + aout = mk_seq_array(num, (1, 4)) + with pytest.raises(ValueError): + num.choose(self.a, self.choices, out=aout) + + def test_diagonal(): ad = np.arange(24).reshape(4, 3, 2) num_ad = num.array(ad) @@ -129,7 +297,6 @@ def test_diagonal(): for ndim in range(3, LEGATE_MAX_DIM + 1): a_shape = tuple(random.randint(1, 9) for i in range(ndim)) np_array = mk_seq_array(np, a_shape) - np_array = mk_seq_array(np, a_shape) num_array = mk_seq_array(num, a_shape) for num_axes in range(3, ndim + 1): for axes in permutations(range(ndim), num_axes): @@ -140,65 +307,212 @@ def test_diagonal(): assert np.array_equal(res_num, res_ref) -KS = [0, -1, 1, -2, 2] +KS = (0, -1, 1, -2, 2) +@pytest.mark.xfail @pytest.mark.parametrize("k", KS, ids=lambda k: f"(k={k})") -def test_diag(k): - print(f"diag(k={k})") - a = num.array( - [ - [1, 2, 3, 4], - [5, 6, 7, 8], - [9, 10, 11, 12], - [13, 14, 15, 16], - [17, 18, 19, 20], - ] +@pytest.mark.parametrize( + "shape", ((5, 1), (1, 5)), ids=lambda shape: f"(shape={shape})" +) +def test_diagonal_offset(shape, k): + # for shape=(5, 1) and k=1, 2, + # for shape=(1, 5) and k=-1, -2, + # In cuNumeric, raise ValueError: 'offset' + # for diag or diagonal must be in range + # In Numpy, pass and returns empty array + a = mk_seq_array(num, shape) + an = mk_seq_array(np, shape) + + b = num.diagonal(a, k) + bn = np.diagonal(an, k) + assert np.array_equal(b, bn) + + +@pytest.mark.parametrize( + "shape", + (pytest.param((3, 0), marks=pytest.mark.xfail), (0, 3)), + ids=lambda shape: f"(shape={shape})", +) +def test_diagonal_empty_array(shape): + # for shape=(3, 0) and k=0, + # In cuNumeric, raise ValueError: 'offset' + # for diag or diagonal must be in range + # In Numpy, pass and returns empty array + a = mk_seq_array(num, shape) + an = mk_seq_array(np, shape) + + b = num.diagonal(a) + bn = np.diagonal(an) + assert np.array_equal(b, bn) + + +class TestDiagonalErrors: + def setup_method(self): + shape = (3, 4, 5) + self.a = mk_seq_array(num, shape) + + def test_0d_array(self): + a = num.array(3) + with pytest.raises(ValueError): + num.diagonal(a) + + def test_1d_array(self): + shape = (3,) + a = mk_seq_array(num, shape) + with pytest.raises(ValueError): + num.diagonal(a) + + @pytest.mark.xfail + def test_array_none(self): + # In cuNumeric, it raises AttributeError: + # 'NoneType' object has no attribute 'diagonal' + # In Numpy, it raises ValueError: + # diag requires an array of at least two dimensions. + with pytest.raises(ValueError): + num.diagonal(None) + + @pytest.mark.parametrize( + "axes", + ((0, 0), pytest.param((0, -3), marks=pytest.mark.xfail)), + ids=lambda axes: f"(axes={axes})", ) - an = np.array( - [ - [1, 2, 3, 4], - [5, 6, 7, 8], - [9, 10, 11, 12], - [13, 14, 15, 16], - [17, 18, 19, 20], - ] + def test_axes_same(self, axes): + # For axes = (0, -3), + # In cuNumeric, it raises ValueError: + # axes must be the same size as ndim for transpose + # In Numpy, it raises ValueError: axis1 and axis2 cannot be the same + axis1, axis2 = axes + msg = "axes passed to _diag_helper should be all different" + with pytest.raises(ValueError, match=msg): + num.diagonal(self.a, 0, axis1, axis2) + + @pytest.mark.xfail + @pytest.mark.parametrize( + "axes", ((0, -4), (3, 0)), ids=lambda axes: f"(axes={axes})" + ) + def test_axes_out_of_bound(self, axes): + # In Numpy, it raises numpy.AxisError: is out of bounds + # In cuNumeric, it raises ValueError: + # axes must be the same size as ndim for transpose + axis1, axis2 = axes + with pytest.raises(np.AxisError): + num.diagonal(self.a, 0, axis1, axis2) + + @pytest.mark.xfail + def test_axes_float(self): + # In Numpy, it raise TypeError + # In cuNumeric, it raises AssertionError + with pytest.raises(TypeError): + num.diagonal(self.a, 0, 0.0, 1) + + @pytest.mark.xfail + def test_axes_none(self): + # In Numpy, it raise TypeError + # In cuNumeric, it raises AssertionError + with pytest.raises(TypeError): + num.diagonal(self.a, 0, None, 0) + + @pytest.mark.parametrize( + "k", + (pytest.param(0.0, marks=pytest.mark.xfail), -1.5, 1.5), + ids=lambda k: f"(k={k})", ) + def test_k_float(self, k): + # for k=0.0, + # In cuNumeric, pass + # In Numpy, raises TypeError: integer argument expected, got float + with pytest.raises(TypeError): + num.diagonal(self.a, k) + + def test_k_none(self): + with pytest.raises(TypeError): + num.diagonal(self.a, None) + + +@pytest.mark.parametrize("k", KS, ids=lambda k: f"(k={k})") +@pytest.mark.parametrize( + "shape", + ( + (5,), + (3, 3), + pytest.param((5, 1), marks=pytest.mark.xfail), + pytest.param((1, 5), marks=pytest.mark.xfail), + ), + ids=lambda shape: f"(shape={shape})", +) +def test_diag(shape, k): + # for shape=(5, 1) and k=1, 2, + # for shape=(1, 5) and k=-1, -2, + # In cuNumeric, raise ValueError: + # 'offset' for diag or diagonal must be in range + # In Numpy, pass and returns empty array + a = mk_seq_array(num, shape) + an = mk_seq_array(np, shape) b = num.diag(a, k=k) bn = np.diag(an, k=k) assert np.array_equal(b, bn) - c = num.diag(b, k=k) - cn = np.diag(bn, k=k) - assert np.array_equal(c, cn) - - d = num.array( - [ - [1, 2, 3, 4, 5], - [6, 7, 8, 9, 10], - [11, 12, 13, 14, 15], - [16, 17, 18, 19, 20], - ] - ) - dn = np.array( - [ - [1, 2, 3, 4, 5], - [6, 7, 8, 9, 10], - [11, 12, 13, 14, 15], - [16, 17, 18, 19, 20], - ] - ) - e = num.diag(d, k=k) - en = np.diag(dn, k=k) - assert np.array_equal(e, en) +@pytest.mark.parametrize( + "shape", + ((0,), pytest.param((3, 0), marks=pytest.mark.xfail), (0, 3)), + ids=lambda shape: f"(shape={shape})", +) +def test_diag_empty_array(shape): + # for shape=(3, 0) and k=0, + # In cuNumeric, raise ValueError: + # 'offset' for diag or diagonal must be in range + # In Numpy, pass and returns empty array + a = mk_seq_array(num, shape) + an = mk_seq_array(np, shape) + + b = num.diag(a) + bn = np.diag(an) + assert np.array_equal(b, bn) + - f = num.diag(e, k=k) - fn = np.diag(en, k=k) - assert np.array_equal(f, fn) +class TestDiagErrors: + def test_0d_array(self): + a = num.array(3) + msg = "Input must be 1- or 2-d" + with pytest.raises(ValueError, match=msg): + num.diag(a) + + def test_3d_array(self): + shape = (3, 4, 5) + a = mk_seq_array(num, shape) + with pytest.raises(ValueError): + num.diag(a) + + @pytest.mark.xfail + def test_array_none(self): + # In cuNumeric, it raises AttributeError, + # 'NoneType' object has no attribute 'ndim' + # In Numpy, it raises ValueError, Input must be 1- or 2-d. + with pytest.raises(ValueError): + num.diag(None) + + @pytest.mark.parametrize( + "k", + (pytest.param(0.0, marks=pytest.mark.xfail), -1.5, 1.5), + ids=lambda k: f"(k={k})", + ) + def test_k_float(self, k): + # for k=0.0, + # In cuNumeric, pass + # In Numpy, raises TypeError: integer argument expected, got float + shape = (3, 3) + a = mk_seq_array(num, shape) + with pytest.raises(TypeError): + num.diag(a, k=k) - return + def test_k_none(self): + shape = (3, 3) + a = mk_seq_array(num, shape) + with pytest.raises(TypeError): + num.diag(a, k=None) if __name__ == "__main__": diff --git a/tests/integration/test_reshape.py b/tests/integration/test_reshape.py index 1e4c7f8c5..4fe6814cb 100644 --- a/tests/integration/test_reshape.py +++ b/tests/integration/test_reshape.py @@ -37,12 +37,13 @@ def test_basic(self): a = num.arange(100).reshape(10, 10) assert np.array_equal(self.anp, a) + @pytest.mark.parametrize("order", ("C", "F", "A", None), ids=str) @pytest.mark.parametrize("shape", SQUARE_CASES, ids=str) - def test_shape(self, shape): + def test_shape(self, shape, order): a = num.arange(100).reshape(10, 10) assert np.array_equal( - num.reshape(a, shape), - np.reshape(self.anp, shape), + num.reshape(a, shape, order=order), + np.reshape(self.anp, shape, order=order), ) def test_1d(self): @@ -102,7 +103,7 @@ def test_ravel(self): assert np.array_equal(a.ravel(), anp.ravel()) -RECT_CASES = [ +RECT_CASES = ( (10, 2, 10), (20, 10), (20, -5), @@ -110,15 +111,31 @@ def test_ravel(self): (200, 1), (1, 200), (10, 20), -] +) class TestRect: anp = np.random.rand(5, 4, 10) + @pytest.mark.parametrize("order", ("C", "F", "A", None), ids=str) @pytest.mark.parametrize("shape", RECT_CASES, ids=str) - def test_shape(self, shape): + def test_shape(self, shape, order): + a = num.array(self.anp) + assert np.array_equal( + num.reshape(a, shape, order=order), + np.reshape(self.anp, shape, order=order), + ) + + @pytest.mark.parametrize( + "shape", + (200, -1, -2, pytest.param(None, marks=pytest.mark.xfail)), + ids=str, + ) + def test_0d(self, shape): + # for shape=None, + # In Numpy, pass, returns the flattened 1-D array + # In cuNumeric, raises TypeError: 'NoneType' object is not iterable a = num.array(self.anp) assert np.array_equal( num.reshape(a, shape), @@ -132,13 +149,95 @@ def test_1d(self): np.reshape(self.anp, (200,)), ) - def test_ravel(self): + @pytest.mark.parametrize( + "order", + ("C", "F", "A", pytest.param("K", marks=pytest.mark.xfail), None), + ids=str, + ) + def test_ravel(self, order): + # In Numpy, pass with 'K' + # In cuNumeric, when order is 'K', raise ValueError: + # order 'K' is not permitted for reshaping a = num.array(self.anp) assert np.array_equal( - num.ravel(a), - np.ravel(self.anp), + num.ravel(a, order=order), + np.ravel(self.anp, order=order), ) + @pytest.mark.xfail + def test_ravel_a_none(self): + # In Numpy, pass and returns [None] + # In cuNumeric, raises AttributeError: + # 'NoneType' object has no attribute 'ravel' + assert np.array_equal( + num.ravel(None), + np.ravel(None), + ) + + +@pytest.mark.parametrize("shape", (0, (0,), (1, 0), (0, 1, 1)), ids=str) +def test_reshape_empty_array(shape): + a = num.arange(0).reshape(0, 1) + anp = np.arange(0).reshape(0, 1) + assert np.array_equal( + num.reshape(a, shape), + np.reshape(anp, shape), + ) + + +class TestReshapeErrors: + def setup_method(self): + self.a = num.arange(24) + self.shape = (4, 3, 2) + + @pytest.mark.xfail + def test_a_none(self): + # In Numpy, it raises ValueError: cannot reshape array + # In cuNumeric, it raises AttributeError: + # 'NoneType' object has no attribute + with pytest.raises(ValueError): + num.reshape(None, self.shape) + + def test_empty_array_shape_invalid_size(self): + a = num.arange(0).reshape(0, 1, 1) + shape = (1, 1) + with pytest.raises(ValueError): + num.reshape(a, shape) + + @pytest.mark.parametrize( + "shape", + ((-1, 0, 2), (4, 3, 4), (4, 3, 0), (4, 3), (4,), (0,), 4), + ids=str, + ) + def test_shape_invalid_size(self, shape): + msg = "cannot reshape array" + with pytest.raises(ValueError, match=msg): + num.reshape(self.a, shape) + + def test_shape_unknown_dimensions(self): + shape = (-5, -1, 2) + msg = "can only specify one unknown dimension" + with pytest.raises(ValueError, match=msg): + num.reshape(self.a, shape) + + @pytest.mark.parametrize("shape", ((4, 3, 2.0), 24.0), ids=str) + def test_shape_float(self, shape): + with pytest.raises(TypeError): + num.reshape(self.a, shape) + + def test_invalid_order(self): + with pytest.raises(ValueError): + num.reshape(self.a, self.shape, order="Z") + + +class TestRavelErrors: + def setup_method(self): + self.a = num.arange(24).reshape(4, 3, 2) + + def test_invalid_order(self): + with pytest.raises(ValueError): + num.ravel(self.a, order="Z") + if __name__ == "__main__": import sys From 560372098500c4c6e8265d1593f05dfd3595feda Mon Sep 17 00:00:00 2001 From: Bryan Van de Ven Date: Thu, 17 Nov 2022 15:44:05 -0800 Subject: [PATCH 46/89] Minor test QOL improvements: make cunumeric imports consistent (#701) * make cunumeric test imports consistent * more prefix/suffix renames and vertical space to emphasize AAA --- tests/integration/test_array_dunders.py | 95 +++++++++++-------- tests/integration/test_diag_indices.py | 52 +++++----- tests/integration/test_einsum.py | 28 +++--- tests/integration/test_einsum_path.py | 14 +-- tests/integration/test_indices.py | 40 ++++---- tests/integration/test_ingest.py | 12 +-- tests/integration/test_lstm_simple_forward.py | 30 +++--- tests/integration/test_matrix_power.py | 12 +-- tests/integration/test_min_on_gpu.py | 6 +- tests/integration/test_moveaxis.py | 56 +++++------ tests/integration/test_multi_dot.py | 22 ++--- tests/integration/test_norm.py | 30 +++--- tests/integration/test_outer.py | 6 +- tests/integration/test_random_creation.py | 12 ++- tests/integration/test_reduction_axis.py | 6 +- tests/integration/test_vdot.py | 4 +- tests/integration/test_window.py | 10 +- tests/integration/utils/contractions.py | 26 ++--- 18 files changed, 237 insertions(+), 224 deletions(-) diff --git a/tests/integration/test_array_dunders.py b/tests/integration/test_array_dunders.py index 42b2a6ec2..626df5dee 100644 --- a/tests/integration/test_array_dunders.py +++ b/tests/integration/test_array_dunders.py @@ -16,77 +16,88 @@ import numpy as np import pytest -import cunumeric as cn +import cunumeric as num + +arr_np = np.eye(4) +vec_np = np.arange(4).astype(np.float64) + +arr_num = num.array(arr_np) +vec_num = num.array(vec_np) -np_arr = np.eye(4) -np_vec = np.arange(4).astype(np.float64) -cn_arr = cn.array(np_arr) -cn_vec = cn.array(np_vec) indices = [0, 3, 1, 2] def test_array_function_implemented(): - np_res = np.dot(np_arr, np_vec) - cn_res = np.dot(cn_arr, cn_vec) - assert np.array_equal(np_res, cn_res) - assert isinstance(cn_res, cn.ndarray) # implemented + res_np = np.dot(arr_np, vec_np) + res_num = np.dot(arr_num, vec_num) + + assert np.array_equal(res_np, res_num) + assert isinstance(res_num, num.ndarray) # implemented def test_array_function_unimplemented(): - np_res = np.linalg.tensorsolve(np_arr, np_vec) - cn_res = np.linalg.tensorsolve(cn_arr, cn_vec) - assert np.array_equal(np_res, cn_res) - assert isinstance(cn_res, np.ndarray) # unimplemented + res_np = np.linalg.tensorsolve(arr_np, vec_np) + res_num = np.linalg.tensorsolve(arr_num, vec_num) + + assert np.array_equal(res_np, res_num) + assert isinstance(res_num, np.ndarray) # unimplemented def test_array_ufunc_through_array_op(): - assert np.array_equal(cn_vec + cn_vec, np_vec + np_vec) - assert isinstance(cn_vec + np_vec, cn.ndarray) - assert isinstance(np_vec + cn_vec, cn.ndarray) + assert np.array_equal(vec_num + vec_num, vec_np + vec_np) + assert isinstance(vec_num + vec_np, num.ndarray) + assert isinstance(vec_np + vec_num, num.ndarray) def test_array_ufunc_call(): - np_res = np.add(np_vec, np_vec) - cn_res = np.add(cn_vec, cn_vec) - assert np.array_equal(np_res, cn_res) - assert isinstance(cn_res, cn.ndarray) # implemented + res_np = np.add(vec_np, vec_np) + res_num = np.add(vec_num, vec_num) + + assert np.array_equal(res_np, res_num) + assert isinstance(res_num, num.ndarray) # implemented def test_array_ufunc_reduce(): - np_res = np.add.reduce(np_vec) - cn_res = np.add.reduce(cn_vec) - assert np.array_equal(np_res, cn_res) - assert isinstance(cn_res, cn.ndarray) # implemented + res_np = np.add.reduce(vec_np) + res_num = np.add.reduce(vec_num) + + assert np.array_equal(res_np, res_num) + assert isinstance(res_num, num.ndarray) # implemented def test_array_ufunc_accumulate(): - np_res = np.add.accumulate(np_vec) - cn_res = np.add.accumulate(cn_vec) - assert np.array_equal(np_res, cn_res) - assert isinstance(cn_res, np.ndarray) # unimplemented + res_np = np.add.accumulate(vec_np) + res_num = np.add.accumulate(vec_num) + + assert np.array_equal(res_np, res_num) + assert isinstance(res_num, np.ndarray) # unimplemented def test_array_ufunc_reduceat(): - np_res = np.add.reduceat(np_vec, indices) - cn_res = np.add.reduceat(cn_vec, indices) - assert np.array_equal(np_res, cn_res) - assert isinstance(cn_res, np.ndarray) # unimplemented + res_np = np.add.reduceat(vec_np, indices) + res_num = np.add.reduceat(vec_num, indices) + + assert np.array_equal(res_np, res_num) + assert isinstance(res_num, np.ndarray) # unimplemented def test_array_ufunc_outer(): - np_res = np.add.outer(np_vec, np_vec) - cn_res = np.add.outer(cn_vec, cn_vec) - assert np.array_equal(np_res, cn_res) - assert isinstance(cn_res, np.ndarray) # unimplemented + res_np = np.add.outer(vec_np, vec_np) + res_num = np.add.outer(vec_num, vec_num) + + assert np.array_equal(res_np, res_num) + assert isinstance(res_num, np.ndarray) # unimplemented def test_array_ufunc_at(): - np_res = np.full((4,), 42) - cn_res = cn.full((4,), 42) - np.add.at(np_res, indices, np_vec) - np.add.at(cn_res, indices, cn_vec) - assert np.array_equal(np_res, cn_res) - assert isinstance(cn_res, cn.ndarray) + res_np = np.full((4,), 42) + res_num = num.full((4,), 42) + + np.add.at(res_np, indices, vec_np) + np.add.at(res_num, indices, vec_num) + + assert np.array_equal(res_np, res_num) + assert isinstance(res_num, num.ndarray) if __name__ == "__main__": diff --git a/tests/integration/test_diag_indices.py b/tests/integration/test_diag_indices.py index e848b2dbe..38c421477 100644 --- a/tests/integration/test_diag_indices.py +++ b/tests/integration/test_diag_indices.py @@ -17,62 +17,62 @@ import pytest from legate.core import LEGATE_MAX_DIM -import cunumeric as cn +import cunumeric as num def test_diag_indices_default_ndim(): - np_res = np.diag_indices(10) - cn_res = cn.diag_indices(10) - assert np.array_equal(np_res, cn_res) + a_np = np.diag_indices(10) + a_num = num.diag_indices(10) + assert np.array_equal(a_np, a_num) @pytest.mark.parametrize("ndim", range(0, LEGATE_MAX_DIM + 1)) def test_diag_indices_basic(ndim): - np_res = np.diag_indices(10, ndim) - cn_res = cn.diag_indices(10, ndim) - assert np.array_equal(np_res, cn_res) + a_np = np.diag_indices(10, ndim) + a_num = num.diag_indices(10, ndim) + assert np.array_equal(a_np, a_num) @pytest.mark.parametrize("n", [0, 0.0, 1, 10.5]) @pytest.mark.parametrize("ndim", [-4, 0, 1]) def test_diag_indices(n, ndim): - np_res = np.diag_indices(n, ndim) - cn_res = cn.diag_indices(n, ndim) - assert np.array_equal(np_res, cn_res) + a_np = np.diag_indices(n, ndim) + a_num = num.diag_indices(n, ndim) + assert np.array_equal(a_np, a_num) class TestDiagIndicesErrors: @pytest.mark.parametrize("n", [-10.5, -1]) def test_negative_n(self, n): with pytest.raises(ValueError): - cn.diag_indices(n) + num.diag_indices(n) @pytest.mark.xfail @pytest.mark.parametrize("n", [-10.5, -1]) def test_negative_n_DIVERGENCE(self, n): # np.diag_indices(-10.5) returns empty 2-D array, dtype=float64 # np.diag_indices(-1) returns empty 2-D array, dtype=int32 - # cn.diag_indices(-10.5) raises ValueError - # cn.diag_indices(-1) raises ValueError - np_res = np.diag_indices(n) - cn_res = cn.diag_indices(n) - assert np.array_equal(np_res, cn_res) + # num.diag_indices(-10.5) raises ValueError + # num.diag_indices(-1) raises ValueError + a_np = np.diag_indices(n) + a_num = num.diag_indices(n) + assert np.array_equal(a_np, a_num) def test_none_n(self): msg = "unsupported operand type" with pytest.raises(TypeError, match=msg): - cn.diag_indices(None) + num.diag_indices(None) @pytest.mark.parametrize("ndim", [-1.5, 0.0, 1.5]) def test_float_ndim(self, ndim): msg = "can't multiply sequence by non-int of type 'float'" with pytest.raises(TypeError, match=msg): - cn.diag_indices(10, ndim) + num.diag_indices(10, ndim) def test_none_ndim(self): msg = "can't multiply sequence by non-int of type 'NoneType'" with pytest.raises(TypeError, match=msg): - cn.diag_indices(10, None) + num.diag_indices(10, None) @pytest.mark.parametrize("size", [(5,), (0,)], ids=str) @@ -80,10 +80,10 @@ def test_none_ndim(self): def test_diag_indices_from_basic(size, ndim): shape = size * ndim a = np.ones(shape, dtype=int) - a_cn = cn.array(a) - np_res = np.diag_indices_from(a) - cn_res = cn.diag_indices_from(a_cn) - assert np.array_equal(np_res, cn_res) + a_num = num.array(a) + a_np = np.diag_indices_from(a) + a_num = num.diag_indices_from(a_num) + assert np.array_equal(a_np, a_num) class TestDiagIndicesFromErrors: @@ -92,13 +92,13 @@ def test_1d(self, size): a = np.ones(size, dtype=int) msg = "input array must be at least 2-d" with pytest.raises(ValueError, match=msg): - cn.diag_indices_from(a) + num.diag_indices_from(a) def test_0d(self): a = np.array(3) msg = "input array must be at least 2-d" with pytest.raises(ValueError, match=msg): - cn.diag_indices_from(a) + num.diag_indices_from(a) @pytest.mark.parametrize( "size", @@ -115,7 +115,7 @@ def test_unequal_length(self, size): a = np.ones(size, dtype=int) msg = "All dimensions of input must be of equal length" with pytest.raises(ValueError, match=msg): - cn.diag_indices_from(a) + num.diag_indices_from(a) if __name__ == "__main__": diff --git a/tests/integration/test_einsum.py b/tests/integration/test_einsum.py index 68ca4b949..fd34ef088 100644 --- a/tests/integration/test_einsum.py +++ b/tests/integration/test_einsum.py @@ -22,7 +22,7 @@ from utils.comparisons import allclose from utils.generators import mk_0to1_array, permutes_to -import cunumeric as cn +import cunumeric as num # Limits for exhaustive expression generation routines MAX_MODES = 3 @@ -213,48 +213,48 @@ def mk_typed_output(lib, shape): ] -def check_np_vs_cn(expr, mk_input, mk_output=None, **kwargs): +def check_np_vs_num(expr, mk_input, mk_output=None, **kwargs): lhs, rhs = expr.split("->") opers = lhs.split(",") in_shapes = [ tuple(BASE_DIM_LEN + ord(m) - ord("a") for m in op) for op in opers ] out_shape = tuple(BASE_DIM_LEN + ord(m) - ord("a") for m in rhs) - for (np_inputs, cn_inputs) in zip( + for (np_inputs, num_inputs) in zip( product(*(mk_input(np, sh) for sh in in_shapes)), - product(*(mk_input(cn, sh) for sh in in_shapes)), + product(*(mk_input(num, sh) for sh in in_shapes)), ): np_res = np.einsum(expr, *np_inputs, **kwargs) - cn_res = cn.einsum(expr, *cn_inputs, **kwargs) + num_res = num.einsum(expr, *num_inputs, **kwargs) rtol = ( 1e-02 if any(x.dtype == np.float16 for x in np_inputs) or kwargs.get("dtype") == np.float16 else 1e-05 ) - assert allclose(np_res, cn_res, rtol=rtol) + assert allclose(np_res, num_res, rtol=rtol) if mk_output is not None: - for cn_out in mk_output(cn, out_shape): - cn.einsum(expr, *cn_inputs, out=cn_out, **kwargs) - rtol_out = 1e-02 if cn_out.dtype == np.float16 else rtol - assert allclose(cn_out, cn_res, rtol=rtol_out) + for num_out in mk_output(num, out_shape): + num.einsum(expr, *num_inputs, out=num_out, **kwargs) + rtol_out = 1e-02 if num_out.dtype == np.float16 else rtol + assert allclose(num_out, num_res, rtol=rtol_out) @pytest.mark.parametrize("expr", gen_expr()) def test_small(expr): - check_np_vs_cn(expr, mk_input_that_permutes_to) - check_np_vs_cn(expr, mk_input_that_broadcasts_to) + check_np_vs_num(expr, mk_input_that_permutes_to) + check_np_vs_num(expr, mk_input_that_broadcasts_to) @pytest.mark.parametrize("expr", LARGE_EXPRS) def test_large(expr): - check_np_vs_cn(expr, mk_input_default) + check_np_vs_num(expr, mk_input_default) @pytest.mark.parametrize("expr", SMALL_EXPRS) @pytest.mark.parametrize("dtype", [None, np.float32]) def test_cast(expr, dtype): - check_np_vs_cn( + check_np_vs_num( expr, mk_typed_input, mk_typed_output, dtype=dtype, casting="unsafe" ) diff --git a/tests/integration/test_einsum_path.py b/tests/integration/test_einsum_path.py index e54fcd1d7..db6370257 100644 --- a/tests/integration/test_einsum_path.py +++ b/tests/integration/test_einsum_path.py @@ -16,15 +16,15 @@ import numpy as np import pytest -import cunumeric as cn +import cunumeric as num expr = "ij,jk,kl->il" np_a = np.empty((2, 2)) np_b = np.empty((2, 5)) np_c = np.empty((5, 2)) -cn_a = cn.empty((2, 2)) -cn_b = cn.empty((2, 5)) -cn_c = cn.empty((5, 2)) +num_a = num.empty((2, 2)) +num_b = num.empty((2, 5)) +num_c = num.empty((5, 2)) OPTIMIZE = [ True, @@ -38,9 +38,9 @@ @pytest.mark.parametrize("optimize", OPTIMIZE) def test_einsum_path(optimize): - np_path, _ = np.einsum_path(expr, np_a, np_b, np_c, optimize=optimize) - cn_path, _ = cn.einsum_path(expr, cn_a, cn_b, cn_c, optimize=optimize) - assert np_path == cn_path + path_np, _ = np.einsum_path(expr, np_a, np_b, np_c, optimize=optimize) + path_num, _ = num.einsum_path(expr, num_a, num_b, num_c, optimize=optimize) + assert path_np == path_num if __name__ == "__main__": diff --git a/tests/integration/test_indices.py b/tests/integration/test_indices.py index 2ebbe9c2a..08b6042a0 100644 --- a/tests/integration/test_indices.py +++ b/tests/integration/test_indices.py @@ -19,7 +19,7 @@ import pytest from legate.core import LEGATE_MAX_DIM -import cunumeric as cn +import cunumeric as num class TestIndicesErrors: @@ -32,19 +32,19 @@ def test_int_dimensions(self): dimensions = 3 msg = r"'int' object is not iterable" with pytest.raises(TypeError, match=msg): - cn.indices(dimensions) + num.indices(dimensions) def test_negative_dimensions(self): dimensions = -3 msg = r"'int' object is not iterable" with pytest.raises(TypeError, match=msg): - cn.indices(dimensions) + num.indices(dimensions) def test_float_dimensions(self): dimensions = 3.2 msg = r"'float' object is not iterable" with pytest.raises(TypeError, match=msg): - cn.indices(dimensions) + num.indices(dimensions) def test_negative_tuple_dimensions(self): dimensions = (1, -1) @@ -54,7 +54,7 @@ def test_negative_tuple_dimensions(self): # in other conditions, it raises # "ValueError: Invalid shape: Shape((2, 1, -1))" with pytest.raises(ValueError): - cn.indices(dimensions) + num.indices(dimensions) def test_float_tuple_dimensions(self): dimensions = (3.5, 2.5) @@ -62,7 +62,7 @@ def test_float_tuple_dimensions(self): # "TypeError: 'float' object cannot be interpreted as an integer" msg = r"expected a sequence of integers or a single integer" with pytest.raises(TypeError, match=msg): - cn.indices(dimensions) + num.indices(dimensions) class TestIndices: @@ -73,40 +73,40 @@ class TestIndices: @pytest.mark.parametrize("dimensions", [(0,), (0, 0), (0, 1), (1, 1)]) def test_indices_zero(self, dimensions): np_res = np.indices(dimensions) - cn_res = cn.indices(dimensions) + num_res = num.indices(dimensions) - assert np.array_equal(np_res, cn_res) + assert np.array_equal(np_res, num_res) @pytest.mark.parametrize("ndim", range(0, LEGATE_MAX_DIM)) def test_indices_basic(self, ndim): - dimensions = tuple(random.randint(1, 5) for i in range(ndim)) + dimensions = tuple(random.randint(1, 5) for _ in range(ndim)) np_res = np.indices(dimensions) - cn_res = cn.indices(dimensions) - assert np.array_equal(np_res, cn_res) + num_res = num.indices(dimensions) + assert np.array_equal(np_res, num_res) @pytest.mark.parametrize("ndim", range(0, LEGATE_MAX_DIM)) def test_indices_dtype_none(self, ndim): - dimensions = tuple(random.randint(1, 5) for i in range(ndim)) + dimensions = tuple(random.randint(1, 5) for _ in range(ndim)) np_res = np.indices(dimensions, dtype=None) - cn_res = cn.indices(dimensions, dtype=None) - assert np.array_equal(np_res, cn_res) + num_res = num.indices(dimensions, dtype=None) + assert np.array_equal(np_res, num_res) @pytest.mark.parametrize("ndim", range(0, LEGATE_MAX_DIM)) def test_indices_dtype_float(self, ndim): - dimensions = tuple(random.randint(1, 5) for i in range(ndim)) + dimensions = tuple(random.randint(1, 5) for _ in range(ndim)) np_res = np.indices(dimensions, dtype=float) - cn_res = cn.indices(dimensions, dtype=float) - assert np.array_equal(np_res, cn_res) + num_res = num.indices(dimensions, dtype=float) + assert np.array_equal(np_res, num_res) @pytest.mark.parametrize("ndim", range(0, LEGATE_MAX_DIM)) def test_indices_sparse(self, ndim): - dimensions = tuple(random.randint(1, 5) for i in range(ndim)) + dimensions = tuple(random.randint(1, 5) for _ in range(ndim)) np_res = np.indices(dimensions, sparse=True) - cn_res = cn.indices(dimensions, sparse=True) + num_res = num.indices(dimensions, sparse=True) for i in range(len(np_res)): - assert np.array_equal(np_res[i], cn_res[i]) + assert np.array_equal(np_res[i], num_res[i]) if __name__ == "__main__": diff --git a/tests/integration/test_ingest.py b/tests/integration/test_ingest.py index 0860e7e03..0db699b1c 100644 --- a/tests/integration/test_ingest.py +++ b/tests/integration/test_ingest.py @@ -25,7 +25,7 @@ legion, ) -import cunumeric as lg +import cunumeric as num tile_shape = (4, 7) colors = (5, 3) @@ -80,7 +80,7 @@ def _ingest(custom_partitioning, custom_sharding): get_buffer, get_local_colors if custom_sharding else None, ) - return lg.array(tab) + return num.array(tab) @pytest.mark.parametrize("custom_sharding", [True, False]) @@ -89,10 +89,10 @@ def test(custom_partitioning, custom_sharding): size = 1 for d in shape: size *= d - np_arr = np.arange(size).reshape(shape) - lg_arr = _ingest(custom_partitioning, custom_sharding) - assert np.array_equal(np_arr, lg_arr) - assert np.array_equal(np_arr, lg_arr * 1.0) # force a copy + a_np = np.arange(size).reshape(shape) + a_num = _ingest(custom_partitioning, custom_sharding) + assert np.array_equal(a_np, a_num) + assert np.array_equal(a_np, a_num * 1.0) # force a copy if __name__ == "__main__": diff --git a/tests/integration/test_lstm_simple_forward.py b/tests/integration/test_lstm_simple_forward.py index 07da11ced..56f4a4a44 100644 --- a/tests/integration/test_lstm_simple_forward.py +++ b/tests/integration/test_lstm_simple_forward.py @@ -14,7 +14,7 @@ # import pytest -import cunumeric as np +import cunumeric as num def test_basic(): @@ -22,27 +22,27 @@ def test_basic(): hidden_size = 10 sentence_length = 2 batch_size = 3 - X = np.random.randn(sentence_length, batch_size, hidden_size) - h0 = np.random.randn(1, hidden_size) - WLSTM = np.random.randn( + X = num.random.randn(sentence_length, batch_size, hidden_size) + h0 = num.random.randn(1, hidden_size) + WLSTM = num.random.randn( word_size + hidden_size, 4 * hidden_size - ) / np.sqrt(word_size + hidden_size) + ) / num.sqrt(word_size + hidden_size) xphpb = WLSTM.shape[0] d = hidden_size n = sentence_length b = batch_size - Hin = np.zeros((n, b, xphpb)) - Hout = np.zeros((n, b, d)) - IFOG = np.zeros((n, b, d * 4)) - IFOGf = np.zeros((n, b, d * 4)) - C = np.zeros((n, b, d)) - Ct = np.zeros((n, b, d)) + Hin = num.zeros((n, b, xphpb)) + Hout = num.zeros((n, b, d)) + IFOG = num.zeros((n, b, d * 4)) + IFOGf = num.zeros((n, b, d * 4)) + C = num.zeros((n, b, d)) + Ct = num.zeros((n, b, d)) for t in range(0, n): if t == 0: - prev = np.tile(h0, (b, 1)) + prev = num.tile(h0, (b, 1)) else: prev = Hout[t - 1] @@ -52,14 +52,14 @@ def test_basic(): IFOG[t] = Hin[t].dot(WLSTM) # non-linearities IFOGf[t, :, : 3 * d] = 1.0 / ( - 1.0 + np.exp(-IFOG[t, :, : 3 * d]) + 1.0 + num.exp(-IFOG[t, :, : 3 * d]) ) # sigmoids these are the gates - IFOGf[t, :, 3 * d :] = np.tanh(IFOG[t, :, 3 * d :]) # tanh + IFOGf[t, :, 3 * d :] = num.tanh(IFOG[t, :, 3 * d :]) # tanh # compute the cell activation C[t] = IFOGf[t, :, :d] * IFOGf[t, :, 3 * d :] if t > 0: C[t] += IFOGf[t, :, d : 2 * d] * C[t - 1] - Ct[t] = np.tanh(C[t]) + Ct[t] = num.tanh(C[t]) Hout[t] = IFOGf[t, :, 2 * d : 3 * d] * Ct[t] diff --git a/tests/integration/test_matrix_power.py b/tests/integration/test_matrix_power.py index d9a0dfca8..d4cfe4b23 100644 --- a/tests/integration/test_matrix_power.py +++ b/tests/integration/test_matrix_power.py @@ -19,7 +19,7 @@ from utils.comparisons import allclose from utils.generators import mk_0to1_array -import cunumeric as cn +import cunumeric as num # TODO: add negative exponents here, once they become supported EXPONENTS = [0, 1, 3, 5] @@ -29,11 +29,11 @@ @pytest.mark.parametrize("exp", EXPONENTS) def test_matrix_power(ndim, exp): shape = (3,) * ndim + (2, 2) - np_a = mk_0to1_array(np, shape) - cn_a = mk_0to1_array(cn, shape) - np_res = np.linalg.matrix_power(np_a, exp) - cn_res = cn.linalg.matrix_power(cn_a, exp) - assert allclose(np_res, cn_res) + a_np = mk_0to1_array(np, shape) + a_num = mk_0to1_array(num, shape) + res_np = np.linalg.matrix_power(a_np, exp) + res_num = num.linalg.matrix_power(a_num, exp) + assert allclose(res_np, res_num) if __name__ == "__main__": diff --git a/tests/integration/test_min_on_gpu.py b/tests/integration/test_min_on_gpu.py index 7a5fb2bb2..2a5345c4f 100644 --- a/tests/integration/test_min_on_gpu.py +++ b/tests/integration/test_min_on_gpu.py @@ -15,12 +15,12 @@ import pytest -import cunumeric as cn +import cunumeric as num def test_min(): - x = cn.array([1, 2, 3]) - assert cn.min(x) == 1 + x = num.array([1, 2, 3]) + assert num.min(x) == 1 if __name__ == "__main__": diff --git a/tests/integration/test_moveaxis.py b/tests/integration/test_moveaxis.py index 9f12ad6d0..6c5682160 100644 --- a/tests/integration/test_moveaxis.py +++ b/tests/integration/test_moveaxis.py @@ -18,7 +18,7 @@ from legate.core import LEGATE_MAX_DIM from utils.generators import mk_0to1_array -import cunumeric as cn +import cunumeric as num AXES = ( (0, 0), @@ -34,26 +34,26 @@ @pytest.mark.parametrize("axes", AXES) def test_moveaxis(ndim, axes): source, destination = axes - np_a = mk_0to1_array(np, (3,) * ndim) - cn_a = mk_0to1_array(cn, (3,) * ndim) - np_res = np.moveaxis(np_a, source, destination) - cn_res = cn.moveaxis(cn_a, source, destination) - assert np.array_equal(np_res, cn_res) + a_np = mk_0to1_array(np, (3,) * ndim) + a_num = mk_0to1_array(num, (3,) * ndim) + res_np = np.moveaxis(a_np, source, destination) + res_num = num.moveaxis(a_num, source, destination) + assert np.array_equal(res_np, res_num) # Check that the returned array is a view - cn_res[:] = 0 - assert cn_a.sum() == 0 + res_num[:] = 0 + assert a_num.sum() == 0 def test_moveaxis_with_empty_axis(): - np_a = np.ones((3, 4, 5)) - cn_a = cn.ones((3, 4, 5)) + a_np = np.ones((3, 4, 5)) + a_num = num.ones((3, 4, 5)) axes = ([], []) source, destination = axes - np_res = np.moveaxis(np_a, source, destination) - cn_res = cn.moveaxis(cn_a, source, destination) - assert np.array_equal(np_res, cn_res) + res_np = np.moveaxis(a_np, source, destination) + res_num = num.moveaxis(a_num, source, destination) + assert np.array_equal(res_np, res_num) EMPTY_ARRAYS = ( @@ -68,57 +68,57 @@ def test_moveaxis_with_empty_array(a): axes = (0, -1) source, destination = axes - np_res = np.moveaxis(a, source, destination) - cn_res = cn.moveaxis(a, source, destination) - assert np.array_equal(np_res, cn_res) + res_np = np.moveaxis(a, source, destination) + res_num = num.moveaxis(a, source, destination) + assert np.array_equal(res_np, res_num) class TestMoveAxisErrors: def setup(self): - self.x = cn.ones((3, 4, 5)) + self.x = num.ones((3, 4, 5)) def test_repeated_axis(self): msg = "repeated axis" with pytest.raises(ValueError, match=msg): - cn.moveaxis(self.x, [0, 0], [1, 0]) + num.moveaxis(self.x, [0, 0], [1, 0]) with pytest.raises(ValueError, match=msg): - cn.moveaxis(self.x, [0, 1], [0, -3]) + num.moveaxis(self.x, [0, 1], [0, -3]) def test_axis_out_of_bound(self): msg = "out of bound" with pytest.raises(np.AxisError, match=msg): - cn.moveaxis(self.x, [0, 3], [0, 1]) + num.moveaxis(self.x, [0, 3], [0, 1]) with pytest.raises(np.AxisError, match=msg): - cn.moveaxis(self.x, [0, 1], [0, -4]) + num.moveaxis(self.x, [0, 1], [0, -4]) with pytest.raises(np.AxisError, match=msg): - cn.moveaxis(self.x, 4, 0) + num.moveaxis(self.x, 4, 0) with pytest.raises(np.AxisError, match=msg): - cn.moveaxis(self.x, 0, -4) + num.moveaxis(self.x, 0, -4) def test_axis_with_different_length(self): msg = "arguments must have the same number of elements" with pytest.raises(ValueError, match=msg): - cn.moveaxis(self.x, [0], [1, 0]) + num.moveaxis(self.x, [0], [1, 0]) def test_axis_float(self): msg = "integer argument expected, got float" with pytest.raises(TypeError, match=msg): - cn.moveaxis(self.x, [0.0, 1], [1, 0]) + num.moveaxis(self.x, [0.0, 1], [1, 0]) with pytest.raises(TypeError, match=msg): - cn.moveaxis(self.x, [0, 1], [1, 0.0]) + num.moveaxis(self.x, [0, 1], [1, 0.0]) def test_axis_none(self): msg = "'NoneType' object is not iterable" with pytest.raises(TypeError, match=msg): - cn.moveaxis(self.x, None, 0) + num.moveaxis(self.x, None, 0) with pytest.raises(TypeError, match=msg): - cn.moveaxis(self.x, 0, None) + num.moveaxis(self.x, 0, None) if __name__ == "__main__": diff --git a/tests/integration/test_multi_dot.py b/tests/integration/test_multi_dot.py index 7fc054bc9..ecba326ef 100644 --- a/tests/integration/test_multi_dot.py +++ b/tests/integration/test_multi_dot.py @@ -18,7 +18,7 @@ from utils.comparisons import allclose from utils.generators import mk_0to1_array -import cunumeric as cn +import cunumeric as num SHAPES = [ # 2 arrays @@ -42,28 +42,28 @@ @pytest.mark.parametrize("shapes", SHAPES) def test_multi_dot(shapes): np_arrays = [mk_0to1_array(np, shape) for shape in shapes] - cn_arrays = [mk_0to1_array(cn, shape) for shape in shapes] - np_res = np.linalg.multi_dot(np_arrays) - cn_res = cn.linalg.multi_dot(cn_arrays) - assert allclose(np_res, cn_res) + num_arrays = [mk_0to1_array(num, shape) for shape in shapes] + res_np = np.linalg.multi_dot(np_arrays) + res_num = num.linalg.multi_dot(num_arrays) + assert allclose(res_np, res_num) if len(shapes[0]) == 1: if len(shapes[-1]) == 1: - out = cn.zeros(()) + out = num.zeros(()) else: - out = cn.zeros((shapes[-1][1],)) + out = num.zeros((shapes[-1][1],)) else: if len(shapes[-1]) == 1: - out = cn.zeros((shapes[0][0],)) + out = num.zeros((shapes[0][0],)) else: - out = cn.zeros( + out = num.zeros( ( shapes[0][0], shapes[-1][1], ) ) - cn_res = cn.linalg.multi_dot(cn_arrays, out=out) - assert allclose(np_res, out) + res_num = num.linalg.multi_dot(num_arrays, out=out) + assert allclose(res_np, out) if __name__ == "__main__": diff --git a/tests/integration/test_norm.py b/tests/integration/test_norm.py index 7fd6b7461..ca20b8e04 100644 --- a/tests/integration/test_norm.py +++ b/tests/integration/test_norm.py @@ -19,7 +19,7 @@ from utils.comparisons import allclose from utils.generators import mk_0to1_array -import cunumeric as cn +import cunumeric as num VECTOR_ORDS = [None, np.inf, -np.inf, 0, 1, -1, 2, -2] @@ -30,8 +30,8 @@ mk_0to1_array(np, (3,) * ndim) - 0.5 for ndim in range(0, LEGATE_MAX_DIM + 1) ] -cn_arrays = [ - mk_0to1_array(cn, (3,) * ndim) - 0.5 +num_arrays = [ + mk_0to1_array(num, (3,) * ndim) - 0.5 for ndim in range(0, LEGATE_MAX_DIM + 1) ] @@ -40,24 +40,24 @@ @pytest.mark.parametrize("keepdims", [False, True]) def test_noaxis_1d(ord, keepdims): np_res = np.linalg.norm(np_arrays[1], ord=ord, keepdims=keepdims) - cn_res = cn.linalg.norm(cn_arrays[1], ord=ord, keepdims=keepdims) - assert allclose(np_res, cn_res) + num_res = num.linalg.norm(num_arrays[1], ord=ord, keepdims=keepdims) + assert allclose(np_res, num_res) @pytest.mark.parametrize("ord", MATRIX_ORDS) @pytest.mark.parametrize("keepdims", [False, True]) def test_noaxis_2d(ord, keepdims): np_res = np.linalg.norm(np_arrays[2], ord=ord, keepdims=keepdims) - cn_res = cn.linalg.norm(cn_arrays[2], ord=ord, keepdims=keepdims) - assert allclose(np_res, cn_res) + num_res = num.linalg.norm(num_arrays[2], ord=ord, keepdims=keepdims) + assert allclose(np_res, num_res) @pytest.mark.parametrize("ndim", [0] + list(range(3, LEGATE_MAX_DIM + 1))) @pytest.mark.parametrize("keepdims", [False, True]) def test_noaxis_other(ndim, keepdims): np_res = np.linalg.norm(np_arrays[ndim], keepdims=keepdims) - cn_res = cn.linalg.norm(cn_arrays[ndim], keepdims=keepdims) - assert allclose(np_res, cn_res) + num_res = num.linalg.norm(num_arrays[ndim], keepdims=keepdims) + assert allclose(np_res, num_res) @pytest.mark.parametrize("ndim", range(1, LEGATE_MAX_DIM + 1)) @@ -67,10 +67,10 @@ def test_axis_1d(ndim, ord, keepdims): np_res = np.linalg.norm( np_arrays[ndim], ord=ord, axis=0, keepdims=keepdims ) - cn_res = cn.linalg.norm( - cn_arrays[ndim], ord=ord, axis=0, keepdims=keepdims + num_res = num.linalg.norm( + num_arrays[ndim], ord=ord, axis=0, keepdims=keepdims ) - assert allclose(np_res, cn_res) + assert allclose(np_res, num_res) @pytest.mark.parametrize("ndim", range(2, LEGATE_MAX_DIM + 1)) @@ -80,10 +80,10 @@ def test_axis_2d(ndim, ord, keepdims): np_res = np.linalg.norm( np_arrays[ndim], ord=ord, axis=(0, 1), keepdims=keepdims ) - cn_res = cn.linalg.norm( - cn_arrays[ndim], ord=ord, axis=(0, 1), keepdims=keepdims + num_res = num.linalg.norm( + num_arrays[ndim], ord=ord, axis=(0, 1), keepdims=keepdims ) - assert allclose(np_res, cn_res) + assert allclose(np_res, num_res) if __name__ == "__main__": diff --git a/tests/integration/test_outer.py b/tests/integration/test_outer.py index bbae647db..67b444348 100644 --- a/tests/integration/test_outer.py +++ b/tests/integration/test_outer.py @@ -18,7 +18,7 @@ from legate.core import LEGATE_MAX_DIM from utils.generators import mk_0to1_array -import cunumeric as cn +import cunumeric as num def _outer(a_ndim, b_ndim, lib): @@ -31,12 +31,12 @@ def _outer(a_ndim, b_ndim, lib): @pytest.mark.parametrize("b_ndim", range(1, LEGATE_MAX_DIM + 1)) def test_basic(a_ndim, b_ndim): assert np.array_equal( - _outer(a_ndim, b_ndim, np), _outer(a_ndim, b_ndim, cn) + _outer(a_ndim, b_ndim, np), _outer(a_ndim, b_ndim, num) ) def test_empty(): - assert np.array_equal(_outer(0, 0, np), _outer(0, 0, cn)) + assert np.array_equal(_outer(0, 0, np), _outer(0, 0, num)) if __name__ == "__main__": diff --git a/tests/integration/test_random_creation.py b/tests/integration/test_random_creation.py index 3f3c6cd12..122b77baa 100644 --- a/tests/integration/test_random_creation.py +++ b/tests/integration/test_random_creation.py @@ -17,16 +17,18 @@ import pytest from utils.comparisons import allclose -import cunumeric as cn +import cunumeric as num @pytest.mark.xfail def test_randn(): - cn.random.seed(42) - x = cn.random.randn(10) np.random.seed(42) - xn = np.random.randn(10) - assert allclose(x, xn) + num.random.seed(42) + + a_np = np.random.randn(10) + a_num = num.random.randn(10) + + assert allclose(a_num, a_np) if __name__ == "__main__": diff --git a/tests/integration/test_reduction_axis.py b/tests/integration/test_reduction_axis.py index 7b6ff0555..6ae89f07c 100644 --- a/tests/integration/test_reduction_axis.py +++ b/tests/integration/test_reduction_axis.py @@ -18,7 +18,7 @@ import numpy as np import pytest -import cunumeric as cn +import cunumeric as num def _sum(shape, axis, lib, dtype=None): @@ -30,9 +30,9 @@ def _sum(shape, axis, lib, dtype=None): @pytest.mark.parametrize("axis", range(3), ids=str) @pytest.mark.parametrize("shape", permutations((3, 4, 5)), ids=str) def test_3d(shape, axis): - assert np.array_equal(_sum(shape, axis, np), _sum(shape, axis, cn)) + assert np.array_equal(_sum(shape, axis, np), _sum(shape, axis, num)) assert np.array_equal( - _sum(shape, axis, np, dtype="D"), _sum(shape, axis, cn, dtype="D") + _sum(shape, axis, np, dtype="D"), _sum(shape, axis, num, dtype="D") ) diff --git a/tests/integration/test_vdot.py b/tests/integration/test_vdot.py index 06d2e25f4..52497f0db 100644 --- a/tests/integration/test_vdot.py +++ b/tests/integration/test_vdot.py @@ -18,7 +18,7 @@ from utils.comparisons import allclose from utils.generators import mk_0to1_array -import cunumeric as cn +import cunumeric as num DTYPES = [np.float32, np.complex64] @@ -33,7 +33,7 @@ def _vdot(a_dtype, b_dtype, lib): @pytest.mark.parametrize("a_dtype", DTYPES) @pytest.mark.parametrize("b_dtype", DTYPES) def test(a_dtype, b_dtype): - assert allclose(_vdot(a_dtype, b_dtype, np), _vdot(a_dtype, b_dtype, cn)) + assert allclose(_vdot(a_dtype, b_dtype, np), _vdot(a_dtype, b_dtype, num)) if __name__ == "__main__": diff --git a/tests/integration/test_window.py b/tests/integration/test_window.py index a10f02319..2a2d9790c 100644 --- a/tests/integration/test_window.py +++ b/tests/integration/test_window.py @@ -18,7 +18,7 @@ import pytest from utils.comparisons import allclose -import cunumeric as cn +import cunumeric as num window_functions = ("bartlett", "blackman", "hamming", "hanning") @@ -27,18 +27,18 @@ @pytest.mark.parametrize("fn", window_functions) def test_basic_window(fn, M): out_np = getattr(np, fn)(M) - out_cn = getattr(cn, fn)(M) + out_num = getattr(num, fn)(M) - assert allclose(out_np, out_cn) + assert allclose(out_np, out_num) @pytest.mark.parametrize("beta", (0, 6)) @pytest.mark.parametrize("M", (0, 1, 10, 100)) def test_kaiser_window(M, beta): out_np = np.kaiser(M, beta) - out_cn = cn.kaiser(M, beta) + out_num = num.kaiser(M, beta) - assert allclose(out_np, out_cn) + assert allclose(out_np, out_num) if __name__ == "__main__": diff --git a/tests/integration/utils/contractions.py b/tests/integration/utils/contractions.py index 487cbfac6..e5530b982 100644 --- a/tests/integration/utils/contractions.py +++ b/tests/integration/utils/contractions.py @@ -16,7 +16,7 @@ import numpy as np from legate.core import LEGATE_MAX_DIM -import cunumeric as cn +import cunumeric as num from .comparisons import allclose from .generators import mk_0to1_array @@ -49,7 +49,7 @@ def gen_inputs_of_various_shapes(lib, modes): # making sure common modes appear with the same extent on both arrays (a_modes, b_modes, out_modes) = modes for (a_shape, b_shape) in gen_shapes(a_modes, b_modes): - if lib == cn: + if lib == num: print(f" {a_shape} x {b_shape}") yield (mk_0to1_array(lib, a_shape), mk_0to1_array(lib, b_shape)) @@ -70,7 +70,7 @@ def gen_permuted_inputs(lib, modes): b = mk_0to1_array(lib, (5,) * len(b_modes)) for a_axes in gen_permutations(len(a_modes)): for b_axes in gen_permutations(len(b_modes)): - if lib == cn: + if lib == num: print(f" transpose{a_axes} x transpose{b_axes}") yield (a.transpose(a_axes), b.transpose(b_axes)) @@ -85,7 +85,7 @@ def gen_inputs_of_various_types(lib, modes): (np.float32, np.float32), (np.complex64, np.complex64), ]: - if lib == cn: + if lib == num: print(f" {a_dtype} x {b_dtype}") yield ( mk_0to1_array(lib, a_shape, a_dtype), @@ -97,7 +97,7 @@ def gen_output_of_various_types(lib, modes, a, b): (a_modes, b_modes, out_modes) = modes out_shape = (5,) * len(out_modes) for out_dtype in [np.float16, np.complex64]: - if lib == cn: + if lib == num: print(f" -> {out_dtype}") yield lib.zeros(out_shape, out_dtype) @@ -109,23 +109,23 @@ def _test(name, modes, operation, gen_inputs, gen_output=None, **kwargs): # because we may need to promote arrays so that one includes all modes. return print(name) - for (np_inputs, cn_inputs) in zip( - gen_inputs(np, modes), gen_inputs(cn, modes) + for (np_inputs, num_inputs) in zip( + gen_inputs(np, modes), gen_inputs(num, modes) ): np_res = operation(np, *np_inputs, **kwargs) - cn_res = operation(cn, *cn_inputs, **kwargs) + num_res = operation(num, *num_inputs, **kwargs) rtol = ( 1e-02 if any(x.dtype == np.float16 for x in np_inputs) or kwargs.get("dtype") == np.float16 else 1e-05 ) - assert allclose(np_res, cn_res, rtol=rtol) + assert allclose(np_res, num_res, rtol=rtol) if gen_output is not None: - for cn_out in gen_output(cn, modes, *cn_inputs): - operation(cn, *cn_inputs, out=cn_out, **kwargs) - rtol_out = 1e-02 if cn_out.dtype == np.float16 else rtol - assert allclose(cn_out, cn_res, rtol=rtol_out) + for num_out in gen_output(num, modes, *num_inputs): + operation(num, *num_inputs, out=num_out, **kwargs) + rtol_out = 1e-02 if num_out.dtype == np.float16 else rtol + assert allclose(num_out, num_res, rtol=rtol_out) def check_default(name, modes, operation): From ae7610261001015a81c6149ec05472fd1e97bb90 Mon Sep 17 00:00:00 2001 From: Manolis Papadakis Date: Thu, 17 Nov 2022 15:59:26 -0800 Subject: [PATCH 47/89] Sync supported-dtype check between utils.py and runtime.py (#698) * Sync supported-dtype check between utils.py and runtime.py * Make supported datatypes dict public * Fix tests Co-authored-by: Manolis Papadakis --- cunumeric/runtime.py | 29 ++++------------ cunumeric/utils.py | 37 ++++++++++++--------- tests/integration/test_data_interface.py | 4 +-- tests/unit/cunumeric/test_utils.py | 42 +++++++++++++----------- 4 files changed, 53 insertions(+), 59 deletions(-) diff --git a/cunumeric/runtime.py b/cunumeric/runtime.py index 49e36abb8..2fdb97822 100644 --- a/cunumeric/runtime.py +++ b/cunumeric/runtime.py @@ -39,33 +39,18 @@ from .eager import EagerArray from .thunk import NumPyThunk from .types import NdShape -from .utils import calculate_volume, find_last_user_stacklevel, get_arg_dtype +from .utils import ( + SUPPORTED_DTYPES, + calculate_volume, + find_last_user_stacklevel, + get_arg_dtype, +) if TYPE_CHECKING: import numpy.typing as npt from legate.core._legion.future import Future from legate.core.operation import AutoTask, ManualTask -_supported_dtypes = { - np.bool_: ty.bool_, - np.int8: ty.int8, - np.int16: ty.int16, - np.int32: ty.int32, - int: ty.int64, - np.int64: ty.int64, - np.uint8: ty.uint8, - np.uint16: ty.uint16, - np.uint32: ty.uint32, - np.uint: ty.uint64, - np.uint64: ty.uint64, - np.float16: ty.float16, - np.float32: ty.float32, - float: ty.float64, - np.float64: ty.float64, - np.complex64: ty.complex64, - np.complex128: ty.complex128, -} - ARGS = [ Argument( "test", @@ -170,7 +155,7 @@ def __init__(self, legate_context: LegateContext) -> None: def _register_dtypes(self) -> None: type_system = self.legate_context.type_system - for numpy_type, core_type in _supported_dtypes.items(): + for numpy_type, core_type in SUPPORTED_DTYPES.items(): type_system.make_alias(np.dtype(numpy_type), core_type) for dtype in _CUNUMERIC_DTYPES: diff --git a/cunumeric/utils.py b/cunumeric/utils.py index 5bfd0b54e..fa5b4462d 100644 --- a/cunumeric/utils.py +++ b/cunumeric/utils.py @@ -20,25 +20,30 @@ from types import FrameType from typing import Any, List, Sequence, Tuple, Union, cast +import legate.core.types as ty import numpy as np from .types import NdShape -_SUPPORTED_DTYPES = [ - np.float16, - np.float32, - np.float64, - float, - np.int16, - np.int32, - np.int64, - int, - np.uint16, - np.uint32, - np.uint64, - np.bool_, - bool, -] +SUPPORTED_DTYPES = { + bool: ty.bool_, + np.bool_: ty.bool_, + np.int8: ty.int8, + np.int16: ty.int16, + np.int32: ty.int32, + int: ty.int64, # np.int is int + np.int64: ty.int64, + np.uint8: ty.uint8, + np.uint16: ty.uint16, + np.uint32: ty.uint32, + np.uint64: ty.uint64, # np.uint is np.uint64 + np.float16: ty.float16, + np.float32: ty.float32, + float: ty.float64, + np.float64: ty.float64, + np.complex64: ty.complex64, + np.complex128: ty.complex128, +} def is_advanced_indexing(key: Any) -> bool: @@ -91,7 +96,7 @@ def find_last_user_frames(top_only: bool = True) -> str: def is_supported_dtype(dtype: Any) -> bool: if not isinstance(dtype, np.dtype): raise TypeError("expected a NumPy dtype") - return dtype.type in _SUPPORTED_DTYPES + return dtype.type in SUPPORTED_DTYPES def calculate_volume(shape: NdShape) -> int: diff --git a/tests/integration/test_data_interface.py b/tests/integration/test_data_interface.py index 6c617db43..a3329a1b6 100644 --- a/tests/integration/test_data_interface.py +++ b/tests/integration/test_data_interface.py @@ -16,9 +16,9 @@ import pytest import cunumeric as num -from cunumeric.runtime import _supported_dtypes +from cunumeric.utils import SUPPORTED_DTYPES -DTYPES = _supported_dtypes.keys() +DTYPES = SUPPORTED_DTYPES.keys() # A simple wrapper with a legate data interface implementation for testing diff --git a/tests/unit/cunumeric/test_utils.py b/tests/unit/cunumeric/test_utils.py index 01a12961a..fa2880bed 100644 --- a/tests/unit/cunumeric/test_utils.py +++ b/tests/unit/cunumeric/test_utils.py @@ -21,21 +21,27 @@ import cunumeric.utils as m # module under test -EXPECTED_SUPPORTED_DTYPES = [ - np.float16, - np.float32, - np.float64, - float, - np.int16, - np.int32, - np.int64, - int, - np.uint16, - np.uint32, - np.uint64, - np.bool_, - bool, -] +EXPECTED_SUPPORTED_DTYPES = set( + [ + bool, + np.bool_, + np.int8, + np.int16, + np.int32, + int, + np.int64, + np.uint8, + np.uint16, + np.uint32, + np.uint64, + np.float16, + np.float32, + float, + np.float64, + np.complex64, + np.complex128, + ] +) class Test_is_advanced_indexing: @@ -110,7 +116,7 @@ def test_top_only_False(self) -> None: def test__SUPPORTED_DTYPES(): - assert m._SUPPORTED_DTYPES == EXPECTED_SUPPORTED_DTYPES + assert set(m.SUPPORTED_DTYPES.keys()) == EXPECTED_SUPPORTED_DTYPES class Test_is_supported_dtype: @@ -126,9 +132,7 @@ def test_supported(self, value) -> None: assert m.is_supported_dtype(np.dtype(value)) # This is just a representative sample, not exhasutive - @pytest.mark.parametrize( - "value", [np.float128, np.complex64, np.datetime64] - ) + @pytest.mark.parametrize("value", [np.float128, np.datetime64]) def test_unsupported(self, value) -> None: assert not m.is_supported_dtype(np.dtype(value)) From 90dbecf9dab5b5cd23efe5ecedf4d01b0289dce4 Mon Sep 17 00:00:00 2001 From: Manolis Papadakis Date: Fri, 18 Nov 2022 11:06:06 -0800 Subject: [PATCH 48/89] Also check shape and dtype in allclose (#699) * Also check shape and dtype in allclose * Fix test failures * Don't check types in solver tests Co-authored-by: Manolis Papadakis --- cunumeric/linalg/linalg.py | 2 +- cunumeric/module.py | 9 +++++++-- tests/integration/test_einsum.py | 4 +++- tests/integration/test_solve.py | 8 ++++++-- tests/integration/utils/comparisons.py | 9 +++++++++ tests/integration/utils/contractions.py | 4 +++- 6 files changed, 29 insertions(+), 7 deletions(-) diff --git a/cunumeric/linalg/linalg.py b/cunumeric/linalg/linalg.py index 88e457194..1eb8454c6 100644 --- a/cunumeric/linalg/linalg.py +++ b/cunumeric/linalg/linalg.py @@ -521,7 +521,7 @@ def norm( # Zero norm return ( (x != 0) - .astype(np.int64) + .astype(x.dtype) .sum(axis=computed_axis, keepdims=keepdims) ) elif ord == 1: diff --git a/cunumeric/module.py b/cunumeric/module.py index 390f8d755..f60021ae8 100644 --- a/cunumeric/module.py +++ b/cunumeric/module.py @@ -4065,7 +4065,8 @@ def _contract( raise ValueError("Unknown mode labels on output") # Handle types - if dtype is not None: + makes_view = b is None and len(a_modes) == len(out_modes) + if dtype is not None and not makes_view: c_dtype = dtype elif out is not None: c_dtype = out.dtype @@ -5867,8 +5868,12 @@ def sort_complex(a: ndarray) -> ndarray: # force complex result upon return if np.issubdtype(result.dtype, np.complexfloating): return result - else: + elif ( + np.issubdtype(result.dtype, np.integer) and result.dtype.itemsize <= 2 + ): return result.astype(np.complex64, copy=True) + else: + return result.astype(np.complex128, copy=True) # partition diff --git a/tests/integration/test_einsum.py b/tests/integration/test_einsum.py index fd34ef088..c4014b6fa 100644 --- a/tests/integration/test_einsum.py +++ b/tests/integration/test_einsum.py @@ -237,7 +237,9 @@ def check_np_vs_num(expr, mk_input, mk_output=None, **kwargs): for num_out in mk_output(num, out_shape): num.einsum(expr, *num_inputs, out=num_out, **kwargs) rtol_out = 1e-02 if num_out.dtype == np.float16 else rtol - assert allclose(num_out, num_res, rtol=rtol_out) + assert allclose( + num_out, num_res, rtol=rtol_out, check_dtype=False + ) @pytest.mark.parametrize("expr", gen_expr()) diff --git a/tests/integration/test_solve.py b/tests/integration/test_solve.py index 7a8bc3770..30b569401 100644 --- a/tests/integration/test_solve.py +++ b/tests/integration/test_solve.py @@ -47,7 +47,9 @@ def test_solve_1d(n, a_dtype, b_dtype): rtol = RTOL[out.dtype] atol = ATOL[out.dtype] - assert allclose(b, num.matmul(a, out), rtol=rtol, atol=atol) + assert allclose( + b, num.matmul(a, out), rtol=rtol, atol=atol, check_dtype=False + ) @pytest.mark.parametrize("n", SIZES) @@ -61,7 +63,9 @@ def test_solve_2d(n, a_dtype, b_dtype): rtol = RTOL[out.dtype] atol = ATOL[out.dtype] - assert allclose(b, num.matmul(a, out), rtol=rtol, atol=atol) + assert allclose( + b, num.matmul(a, out), rtol=rtol, atol=atol, check_dtype=False + ) def test_solve_corner_cases(): diff --git a/tests/integration/utils/comparisons.py b/tests/integration/utils/comparisons.py index dde1011b6..65571b38c 100644 --- a/tests/integration/utils/comparisons.py +++ b/tests/integration/utils/comparisons.py @@ -27,10 +27,19 @@ def allclose( equal_nan: bool = False, *, diff_limit: Union[int, None] = 5, # None means no limit at all + check_dtype: bool = True, ) -> bool: + if np.shape(a) != np.shape(b): + print(f"allclose: different shape: {np.shape(a)} vs {np.shape(b)}") + return False + # simplify handling of scalar values a, b = np.atleast_1d(a), np.atleast_1d(b) + if check_dtype and a.dtype != b.dtype: + print(f"allclose: different dtype: {a.dtype} vs {b.dtype}") + return False + close = np.isclose(a, b, rtol=rtol, atol=atol, equal_nan=equal_nan) all_close = np.all(close) diff --git a/tests/integration/utils/contractions.py b/tests/integration/utils/contractions.py index e5530b982..cc719f696 100644 --- a/tests/integration/utils/contractions.py +++ b/tests/integration/utils/contractions.py @@ -125,7 +125,9 @@ def _test(name, modes, operation, gen_inputs, gen_output=None, **kwargs): for num_out in gen_output(num, modes, *num_inputs): operation(num, *num_inputs, out=num_out, **kwargs) rtol_out = 1e-02 if num_out.dtype == np.float16 else rtol - assert allclose(num_out, num_res, rtol=rtol_out) + assert allclose( + num_out, num_res, rtol=rtol_out, check_dtype=False + ) def check_default(name, modes, operation): From b30e08a77ca17c6f16d6fa18b95791cbe16f922b Mon Sep 17 00:00:00 2001 From: Paul Taylor Date: Mon, 21 Nov 2022 12:01:13 -0800 Subject: [PATCH 49/89] Regenerate `install_info.py` on every build (#705) * regenerate install_info.py on every build * specify custom target dependencies correctly * fix typo --- CMakeLists.txt | 13 ++++---- cmake/generate_install_info_py.cmake | 31 +++++++++++++++++++ cunumeric_python.cmake | 20 ++++-------- scripts/build-install.sh | 2 +- scripts/build-no-install.sh | 2 +- scripts/build-separately-no-install.sh | 2 +- scripts/build-with-legate-no-install.sh | 2 +- ...build-with-legate-separately-no-install.sh | 2 +- 8 files changed, 48 insertions(+), 26 deletions(-) create mode 100644 cmake/generate_install_info_py.cmake diff --git a/CMakeLists.txt b/CMakeLists.txt index ee10d8337..417bb9aa4 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -79,24 +79,23 @@ endif() if(CMAKE_GENERATOR STREQUAL "Ninja") function(add_touch_cunumeric_ninja_build_target) set(_suf ) - set(_depends ) if(SKBUILD) set(_suf "_python") endif() + add_custom_target("touch_cunumeric${_suf}_ninja_build" ALL + COMMAND ${CMAKE_COMMAND} -E touch_nocreate "${CMAKE_CURRENT_BINARY_DIR}/build.ninja" + COMMENT "touch build.ninja so ninja doesn't re-run CMake on rebuild" + VERBATIM + ) foreach(_dep IN ITEMS cunumeric cunumeric_python legion_core legion_core_python Legion LegionRuntime Realm RealmRuntime Regent) if(TARGET ${_dep}) - list(APPEND _depends ${_dep}) + add_dependencies("touch_cunumeric${_suf}_ninja_build" ${_dep}) endif() endforeach() - add_custom_target("touch_cunumeric${_suf}_ninja_build" ALL - COMMAND ${CMAKE_COMMAND} -E touch_nocreate "${CMAKE_CURRENT_BINARY_DIR}/build.ninja" - COMMENT "touch build.ninja so ninja doesn't re-run CMake on rebuild" - VERBATIM DEPENDS ${_depends} - ) endfunction() add_touch_cunumeric_ninja_build_target() endif() diff --git a/cmake/generate_install_info_py.cmake b/cmake/generate_install_info_py.cmake new file mode 100644 index 000000000..2fb14cbcb --- /dev/null +++ b/cmake/generate_install_info_py.cmake @@ -0,0 +1,31 @@ +#============================================================================= +# Copyright 2022 NVIDIA Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +#============================================================================= + +execute_process( + COMMAND ${CMAKE_C_COMPILER} + -E -DLEGATE_USE_PYTHON_CFFI + -I "${CMAKE_CURRENT_LIST_DIR}/../src/cunumeric" + -P "${CMAKE_CURRENT_LIST_DIR}/../src/cunumeric/cunumeric_c.h" + ECHO_ERROR_VARIABLE + OUTPUT_VARIABLE header + COMMAND_ERROR_IS_FATAL ANY +) + +set(libpath "") +configure_file( + "${CMAKE_CURRENT_LIST_DIR}/../cunumeric/install_info.py.in" + "${CMAKE_CURRENT_LIST_DIR}/../cunumeric/install_info.py" +@ONLY) diff --git a/cunumeric_python.cmake b/cunumeric_python.cmake index 3430b5828..c1ca06015 100644 --- a/cunumeric_python.cmake +++ b/cunumeric_python.cmake @@ -43,22 +43,14 @@ if(NOT cunumeric_FOUND) set(SKBUILD ON) endif() -execute_process( - COMMAND ${CMAKE_C_COMPILER} - -E -DLEGATE_USE_PYTHON_CFFI - -I "${CMAKE_CURRENT_SOURCE_DIR}/src/cunumeric" - -P "${CMAKE_CURRENT_SOURCE_DIR}/src/cunumeric/cunumeric_c.h" - ECHO_ERROR_VARIABLE - OUTPUT_VARIABLE header - COMMAND_ERROR_IS_FATAL ANY +add_custom_target("generate_install_info_py" ALL + COMMAND ${CMAKE_COMMAND} + -DCMAKE_C_COMPILER=${CMAKE_C_COMPILER} + -P "${CMAKE_CURRENT_SOURCE_DIR}/cmake/generate_install_info_py.cmake" + COMMENT "Generate install_info.py" + VERBATIM ) -set(libpath "") -configure_file( - "${CMAKE_CURRENT_SOURCE_DIR}/cunumeric/install_info.py.in" - "${CMAKE_CURRENT_SOURCE_DIR}/cunumeric/install_info.py" -@ONLY) - add_library(cunumeric_python INTERFACE) add_library(cunumeric::cunumeric_python ALIAS cunumeric_python) target_link_libraries(cunumeric_python INTERFACE legate::core) diff --git a/scripts/build-install.sh b/scripts/build-install.sh index 8adb472d2..af0f8429d 100755 --- a/scripts/build-install.sh +++ b/scripts/build-install.sh @@ -16,7 +16,7 @@ rm -rf ./{build,_skbuild,dist,cunumeric.egg-info} cmake_args="${CMAKE_ARGS:-}" # Use ninja-build if installed -if [[ -n "$(which ninja)" ]]; then cmake_args+="-GNinja"; fi +if [[ -n "$(which ninja)" ]]; then cmake_args+=" -GNinja"; fi # Add other build options here as desired cmake_args+=" diff --git a/scripts/build-no-install.sh b/scripts/build-no-install.sh index c398eda58..1237d1a5a 100755 --- a/scripts/build-no-install.sh +++ b/scripts/build-no-install.sh @@ -14,7 +14,7 @@ rm -rf ./{build,_skbuild,dist,cunumeric.egg-info} cmake_args="${CMAKE_ARGS:-}" # Use ninja-build if installed -if [[ -n "$(which ninja)" ]]; then cmake_args+="-GNinja"; fi +if [[ -n "$(which ninja)" ]]; then cmake_args+=" -GNinja"; fi # Add other build options here as desired cmake_args+=" diff --git a/scripts/build-separately-no-install.sh b/scripts/build-separately-no-install.sh index 8d8078723..be31507ee 100644 --- a/scripts/build-separately-no-install.sh +++ b/scripts/build-separately-no-install.sh @@ -14,7 +14,7 @@ rm -rf ./{build,_skbuild,dist,cunumeric.egg-info} cmake_args="${CMAKE_ARGS:-}" # Use ninja-build if installed -if [[ -n "$(which ninja)" ]]; then cmake_args+="-GNinja"; fi +if [[ -n "$(which ninja)" ]]; then cmake_args+=" -GNinja"; fi # Add other build options here as desired cmake_args+=" diff --git a/scripts/build-with-legate-no-install.sh b/scripts/build-with-legate-no-install.sh index 498745e31..9d83010b7 100644 --- a/scripts/build-with-legate-no-install.sh +++ b/scripts/build-with-legate-no-install.sh @@ -16,7 +16,7 @@ rm -rf ./{build,_skbuild,dist,cunumeric.egg-info} cmake_args="${CMAKE_ARGS:-}" # Use ninja-build if installed -if [[ -n "$(which ninja)" ]]; then cmake_args+="-GNinja"; fi +if [[ -n "$(which ninja)" ]]; then cmake_args+=" -GNinja"; fi # Add other build options here as desired cmake_args+=" diff --git a/scripts/build-with-legate-separately-no-install.sh b/scripts/build-with-legate-separately-no-install.sh index fa9e97d05..74cc277a0 100755 --- a/scripts/build-with-legate-separately-no-install.sh +++ b/scripts/build-with-legate-separately-no-install.sh @@ -16,7 +16,7 @@ rm -rf ./{build,_skbuild,dist,cunumeric.egg-info} cmake_args="${CMAKE_ARGS:-}" # Use ninja-build if installed -if [[ -n "$(which ninja)" ]]; then cmake_args+="-GNinja"; fi +if [[ -n "$(which ninja)" ]]; then cmake_args+=" -GNinja"; fi # Add other build options here as desired cmake_args+=" From aeeb82bbeac2d92a901378b0ed903d66005260fd Mon Sep 17 00:00:00 2001 From: Wonchan Lee Date: Mon, 21 Nov 2022 14:19:33 -0800 Subject: [PATCH 50/89] More argument checks for `bincount` (#711) --- cunumeric/module.py | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/cunumeric/module.py b/cunumeric/module.py index f60021ae8..95b5350cf 100644 --- a/cunumeric/module.py +++ b/cunumeric/module.py @@ -6246,6 +6246,8 @@ def bincount( -------- Multiple GPUs, Multiple CPUs """ + if x.ndim != 1: + raise ValueError("the input array must be 1-dimensional") if weights is not None: if weights.shape != x.shape: raise ValueError("weights array must be same shape for bincount") @@ -6253,11 +6255,16 @@ def bincount( raise ValueError("weights must be convertible to float64") # Make sure the weights are float64 weights = weights.astype(np.float64) - if x.dtype.kind != "i" and x.dtype.kind != "u": + if x.dtype.kind != "i": raise TypeError("input array for bincount must be integer type") if minlength < 0: raise ValueError("'minlength' must not be negative") - minlength = _builtin_max(minlength, int(amax(x)) + 1) + # Note that the following are non-blocking operations, + # though passing their results to `int` is blocking + max_val, min_val = amax(x), amin(x) + if int(min_val) < 0: + raise ValueError("the input array must have no negative elements") + minlength = _builtin_max(minlength, int(max_val) + 1) if x.size == 1: # Handle the special case of 0-D array if weights is None: From c8f0e750f8905b87ef4e78bfa0780876994c532c Mon Sep 17 00:00:00 2001 From: Manolis Papadakis Date: Mon, 21 Nov 2022 17:59:22 -0800 Subject: [PATCH 51/89] Fix a typo in unique.cu indexing (#713) Co-authored-by: Manolis Papadakis --- src/cunumeric/set/unique.cu | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/cunumeric/set/unique.cu b/src/cunumeric/set/unique.cu index 908a87664..2cc4e6363 100644 --- a/src/cunumeric/set/unique.cu +++ b/src/cunumeric/set/unique.cu @@ -40,7 +40,7 @@ __global__ static void __launch_bounds__(THREADS_PER_BLOCK, MIN_CTAS_PER_SM) size_t offset = blockIdx.x * blockDim.x + threadIdx.x; if (offset >= volume) return; auto point = pitches.unflatten(offset, lo); - out[offset] = accessor[lo + point]; + out[offset] = accessor[point]; } template From b8ad06f5e4b0ed0492f0066ec324762dd9958397 Mon Sep 17 00:00:00 2001 From: Wonchan Lee Date: Mon, 21 Nov 2022 21:10:19 -0800 Subject: [PATCH 52/89] Fixes for buffer allocations (#706) * Two updates to buffer allocations: * Remove the obsolete has_numamem flag, as create_buffer now uses socket memories. * Use layout-aware create_output_buffer to create output buffers for unbound output stores * Start running OpenMP tests with socket memories * Unify temporary buffer allocations for matrix tasks on CPUs * Remove references to an obsolete tunable name * Another place where the obsolete tunable was being used --- .github/workflows/ci.yml | 2 +- cunumeric/config.py | 2 -- src/cunumeric/cunumeric.cc | 10 ------ src/cunumeric/cunumeric.h | 4 --- src/cunumeric/cunumeric_c.h | 1 - src/cunumeric/index/repeat_omp.cc | 3 +- src/cunumeric/index/repeat_template.inl | 4 +-- src/cunumeric/mapper.cc | 9 ------ src/cunumeric/matrix/contract_omp.cc | 6 ++-- src/cunumeric/matrix/matmul_omp.cc | 5 +-- src/cunumeric/matrix/matvecmul_omp.cc | 5 +-- src/cunumeric/matrix/solve_cpu.inl | 17 +++------- src/cunumeric/matrix/util.cc | 3 +- src/cunumeric/matrix/util_omp.cc | 8 ----- src/cunumeric/matrix/util_omp.h | 2 -- src/cunumeric/search/argwhere_template.inl | 5 +-- src/cunumeric/search/nonzero.cc | 16 +++++----- src/cunumeric/search/nonzero.cu | 16 +++++----- src/cunumeric/search/nonzero_omp.cc | 17 +++++----- src/cunumeric/search/nonzero_template.inl | 9 ++---- src/cunumeric/set/unique.cc | 21 ++++++------- src/cunumeric/set/unique.cu | 33 +++++++++++--------- src/cunumeric/set/unique_omp.cc | 22 ++++++------- src/cunumeric/set/unique_reduce.cc | 7 ++--- src/cunumeric/set/unique_reduce_template.inl | 6 +--- src/cunumeric/set/unique_template.inl | 8 ++--- src/cunumeric/sort/sort.cu | 1 - tests/unit/cunumeric/test_config.py | 1 - 28 files changed, 86 insertions(+), 157 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index be0db706c..b9b3bc526 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -77,7 +77,7 @@ jobs: - {name: GPU test, options: --use cuda --gpus 1 --debug, log: gpu} - {name: 2 GPUs test, options: --use cuda --gpus 2 --debug, log: gpus} - {name: OpenMP test, options: --use openmp --omps 1 --ompthreads 2 --debug, log: omp} - - {name: 2 OpenMPs test, options: --use openmp --omps 2 --ompthreads 2 --debug, log: omps} + - {name: 2 NUMA OpenMPs test, options: --use openmp --omps 2 --ompthreads 2 --numamem 2048 --debug, log: omps} - {name: Eager execution test, options: --use eager --debug, log: eager} - {name: mypy, options: mypy, log: mypy} - {name: documentation, options: docs, log: docs} diff --git a/cunumeric/config.py b/cunumeric/config.py index 9195022d6..cad52e77f 100644 --- a/cunumeric/config.py +++ b/cunumeric/config.py @@ -194,7 +194,6 @@ class _CunumericSharedLib: CUNUMERIC_TRANSPOSE_COPY_2D: int CUNUMERIC_TRILU: int CUNUMERIC_TRSM: int - CUNUMERIC_TUNABLE_HAS_NUMAMEM: int CUNUMERIC_TUNABLE_MAX_EAGER_VOLUME: int CUNUMERIC_TUNABLE_NUM_GPUS: int CUNUMERIC_TUNABLE_NUM_PROCS: int @@ -524,7 +523,6 @@ class CuNumericTunable(IntEnum): NUM_GPUS = _cunumeric.CUNUMERIC_TUNABLE_NUM_GPUS NUM_PROCS = _cunumeric.CUNUMERIC_TUNABLE_NUM_PROCS MAX_EAGER_VOLUME = _cunumeric.CUNUMERIC_TUNABLE_MAX_EAGER_VOLUME - HAS_NUMAMEM = _cunumeric.CUNUMERIC_TUNABLE_HAS_NUMAMEM # Match these to CuNumericScanCode in cunumeric_c.h diff --git a/src/cunumeric/cunumeric.cc b/src/cunumeric/cunumeric.cc index bf1ef7657..e8f87bbf6 100644 --- a/src/cunumeric/cunumeric.cc +++ b/src/cunumeric/cunumeric.cc @@ -25,9 +25,6 @@ namespace cunumeric { static const char* const cunumeric_library_name = "cunumeric"; -/*static*/ bool CuNumeric::has_numamem = false; -/*static*/ MapperID CuNumeric::mapper_id = -1; - /*static*/ LegateTaskRegistrar& CuNumeric::get_registrar() { static LegateTaskRegistrar registrar; @@ -60,7 +57,6 @@ void registration_callback(Machine machine, #endif // Now we can register our mapper with the runtime - CuNumeric::mapper_id = context.get_mapper_id(0); context.register_mapper(new CuNumericMapper(runtime, machine, context), 0); } @@ -74,12 +70,6 @@ void cunumeric_perform_registration(void) // in before the runtime starts and make it global so that we know // that this call back is invoked everywhere across all nodes Runtime::perform_registration_callback(cunumeric::registration_callback, true /*global*/); - - Runtime* runtime = Runtime::get_runtime(); - Context ctx = Runtime::get_context(); - Future fut = runtime->select_tunable_value( - ctx, CUNUMERIC_TUNABLE_HAS_NUMAMEM, cunumeric::CuNumeric::mapper_id); - if (fut.get_result() != 0) cunumeric::CuNumeric::has_numamem = true; } bool cunumeric_has_curand() diff --git a/src/cunumeric/cunumeric.h b/src/cunumeric/cunumeric.h index 32af7e6b7..11c4cd990 100644 --- a/src/cunumeric/cunumeric.h +++ b/src/cunumeric/cunumeric.h @@ -37,10 +37,6 @@ struct CuNumeric { get_registrar().record_variant(std::forward(args)...); } static legate::LegateTaskRegistrar& get_registrar(); - - public: - static bool has_numamem; - static Legion::MapperID mapper_id; }; template diff --git a/src/cunumeric/cunumeric_c.h b/src/cunumeric/cunumeric_c.h index 462214782..724db0013 100644 --- a/src/cunumeric/cunumeric_c.h +++ b/src/cunumeric/cunumeric_c.h @@ -206,7 +206,6 @@ enum CuNumericTunable { CUNUMERIC_TUNABLE_NUM_GPUS = 1, CUNUMERIC_TUNABLE_NUM_PROCS = 2, CUNUMERIC_TUNABLE_MAX_EAGER_VOLUME = 3, - CUNUMERIC_TUNABLE_HAS_NUMAMEM = 4, }; enum CuNumericBounds { diff --git a/src/cunumeric/index/repeat_omp.cc b/src/cunumeric/index/repeat_omp.cc index 823a1a16a..9344452d1 100644 --- a/src/cunumeric/index/repeat_omp.cc +++ b/src/cunumeric/index/repeat_omp.cc @@ -62,9 +62,8 @@ struct RepeatImplBody { const int32_t axis, const Rect& in_rect) const { - auto kind = CuNumeric::has_numamem ? Memory::Kind::SOCKET_MEM : Memory::Kind::SYSTEM_MEM; int64_t axis_extent = in_rect.hi[axis] - in_rect.lo[axis] + 1; - auto offsets = create_buffer(axis_extent, kind); + auto offsets = create_buffer(axis_extent); const auto max_threads = omp_get_max_threads(); ThreadLocalStorage local_sums(max_threads); diff --git a/src/cunumeric/index/repeat_template.inl b/src/cunumeric/index/repeat_template.inl index c47603916..30b3249cf 100644 --- a/src/cunumeric/index/repeat_template.inl +++ b/src/cunumeric/index/repeat_template.inl @@ -38,9 +38,7 @@ struct RepeatImpl { auto input_arr = args.input.read_accessor(input_rect); if (input_rect.empty()) { - auto extents = Point::ZEROES(); - auto buffer = create_buffer(extents); - args.output.return_data(buffer, extents); + args.output.make_empty(); return; } diff --git a/src/cunumeric/mapper.cc b/src/cunumeric/mapper.cc index ada6ca268..51797acfe 100644 --- a/src/cunumeric/mapper.cc +++ b/src/cunumeric/mapper.cc @@ -65,15 +65,6 @@ Scalar CuNumericMapper::tunable_value(TunableID tunable_id) } return Scalar(eager_volume); } - case CUNUMERIC_TUNABLE_HAS_NUMAMEM: { - // TODO: This assumes that either all OpenMP processors across the machine have a NUMA - // memory or none does. - Legion::Machine::MemoryQuery query(machine); - query.local_address_space(); - query.only_kind(Legion::Memory::SOCKET_MEM); - int32_t has_numamem = query.count() > 0; - return Scalar(has_numamem); - } default: break; } LEGATE_ABORT; // unknown tunable value diff --git a/src/cunumeric/matrix/contract_omp.cc b/src/cunumeric/matrix/contract_omp.cc index 4a1dd27b2..659db3f0a 100644 --- a/src/cunumeric/matrix/contract_omp.cc +++ b/src/cunumeric/matrix/contract_omp.cc @@ -112,17 +112,17 @@ struct ContractImplBody { std::vector lhs_copy_strides(lhs_ndim); int64_t lhs_size = calculate_volume(lhs_ndim, lhs_shape, lhs_copy_strides.data()); - float* lhs_copy_data = allocate_buffer_omp(lhs_size); + float* lhs_copy_data = allocate_buffer(lhs_size); half_tensor_to_float_omp(lhs_copy_data, lhs_data, lhs_ndim, lhs_shape, lhs_strides); std::vector rhs1_copy_strides(rhs1_ndim); int64_t rhs1_size = calculate_volume(rhs1_ndim, rhs1_shape, rhs1_copy_strides.data()); - float* rhs1_copy_data = allocate_buffer_omp(rhs1_size); + float* rhs1_copy_data = allocate_buffer(rhs1_size); half_tensor_to_float_omp(rhs1_copy_data, rhs1_data, rhs1_ndim, rhs1_shape, rhs1_strides); std::vector rhs2_copy_strides(rhs2_ndim); int64_t rhs2_size = calculate_volume(rhs2_ndim, rhs2_shape, rhs2_copy_strides.data()); - float* rhs2_copy_data = allocate_buffer_omp(rhs2_size); + float* rhs2_copy_data = allocate_buffer(rhs2_size); half_tensor_to_float_omp(rhs2_copy_data, rhs2_data, rhs2_ndim, rhs2_shape, rhs2_strides); ContractImplBody{}(lhs_copy_data, diff --git a/src/cunumeric/matrix/matmul_omp.cc b/src/cunumeric/matrix/matmul_omp.cc index 72b7add85..dd8ea9910 100644 --- a/src/cunumeric/matrix/matmul_omp.cc +++ b/src/cunumeric/matrix/matmul_omp.cc @@ -16,6 +16,7 @@ #include "cunumeric/matrix/matmul.h" #include "cunumeric/matrix/matmul_template.inl" +#include "cunumeric/matrix/util.h" #include "cunumeric/matrix/util_omp.h" #include @@ -102,8 +103,8 @@ struct MatMulImplBody { bool rhs1_transposed, bool rhs2_transposed) { - auto rhs1_copy = allocate_buffer_omp(m * k); - auto rhs2_copy = allocate_buffer_omp(k * n); + auto rhs1_copy = allocate_buffer(m * k); + auto rhs2_copy = allocate_buffer(k * n); if (rhs1_transposed) half_matrix_to_float_omp(rhs1_copy, rhs1, k, m, rhs1_stride); diff --git a/src/cunumeric/matrix/matvecmul_omp.cc b/src/cunumeric/matrix/matvecmul_omp.cc index 33a59052c..4166098be 100644 --- a/src/cunumeric/matrix/matvecmul_omp.cc +++ b/src/cunumeric/matrix/matvecmul_omp.cc @@ -16,6 +16,7 @@ #include "cunumeric/matrix/matvecmul.h" #include "cunumeric/matrix/matvecmul_template.inl" +#include "cunumeric/matrix/util.h" #include "cunumeric/matrix/util_omp.h" #include @@ -68,8 +69,8 @@ struct MatVecMulImplBody { { auto vec_size = transpose_mat ? m : n; - auto mat_copy = allocate_buffer_omp(m * n); - auto vec_copy = allocate_buffer_omp(vec_size); + auto mat_copy = allocate_buffer(m * n); + auto vec_copy = allocate_buffer(vec_size); half_matrix_to_float_omp(mat_copy, mat, m, n, mat_stride); half_vector_to_float_omp(vec_copy, vec, vec_size); diff --git a/src/cunumeric/matrix/solve_cpu.inl b/src/cunumeric/matrix/solve_cpu.inl index 98cba89aa..83dae8063 100644 --- a/src/cunumeric/matrix/solve_cpu.inl +++ b/src/cunumeric/matrix/solve_cpu.inl @@ -24,20 +24,11 @@ namespace cunumeric { using namespace Legion; using namespace legate; -template -Memory::Kind get_memory_kind() -{ - if constexpr (KIND == VariantKind::OMP) - return CuNumeric::has_numamem ? Memory::Kind::SOCKET_MEM : Memory::Kind::SYSTEM_MEM; - else - return Memory::Kind::SYSTEM_MEM; -} - template struct SolveImplBody { void operator()(int32_t m, int32_t n, int32_t nrhs, float* a, float* b) { - auto ipiv = create_buffer(std::min(m, n), get_memory_kind()); + auto ipiv = create_buffer(std::min(m, n)); int32_t info = 0; LAPACK_sgesv(&n, &nrhs, a, &m, ipiv.ptr(0), b, &n, &info); @@ -50,7 +41,7 @@ template struct SolveImplBody { void operator()(int32_t m, int32_t n, int32_t nrhs, double* a, double* b) { - auto ipiv = create_buffer(std::min(m, n), get_memory_kind()); + auto ipiv = create_buffer(std::min(m, n)); int32_t info = 0; LAPACK_dgesv(&n, &nrhs, a, &m, ipiv.ptr(0), b, &n, &info); @@ -63,7 +54,7 @@ template struct SolveImplBody { void operator()(int32_t m, int32_t n, int32_t nrhs, complex* a_, complex* b_) { - auto ipiv = create_buffer(std::min(m, n), get_memory_kind()); + auto ipiv = create_buffer(std::min(m, n)); auto a = reinterpret_cast<__complex__ float*>(a_); auto b = reinterpret_cast<__complex__ float*>(b_); @@ -79,7 +70,7 @@ template struct SolveImplBody { void operator()(int32_t m, int32_t n, int32_t nrhs, complex* a_, complex* b_) { - auto ipiv = create_buffer(std::min(m, n), get_memory_kind()); + auto ipiv = create_buffer(std::min(m, n)); auto a = reinterpret_cast<__complex__ double*>(a_); auto b = reinterpret_cast<__complex__ double*>(b_); diff --git a/src/cunumeric/matrix/util.cc b/src/cunumeric/matrix/util.cc index f2bbb88ee..67010f062 100644 --- a/src/cunumeric/matrix/util.cc +++ b/src/cunumeric/matrix/util.cc @@ -74,8 +74,7 @@ int64_t calculate_volume(size_t ndim, const int64_t* shape, int64_t* strides) float* allocate_buffer(size_t size) { - // We will not call this function on GPUs - auto buffer = legate::create_buffer(size, Memory::Kind::SYSTEM_MEM); + auto buffer = legate::create_buffer(size); return buffer.ptr(0); } diff --git a/src/cunumeric/matrix/util_omp.cc b/src/cunumeric/matrix/util_omp.cc index c847ce6cf..af157e285 100644 --- a/src/cunumeric/matrix/util_omp.cc +++ b/src/cunumeric/matrix/util_omp.cc @@ -24,14 +24,6 @@ namespace cunumeric { using namespace Legion; -float* allocate_buffer_omp(size_t size) -{ - Memory::Kind kind = CuNumeric::has_numamem ? Memory::Kind::SOCKET_MEM : Memory::Kind::SYSTEM_MEM; - // We will not call this function on GPUs - auto buffer = legate::create_buffer(size, kind); - return buffer.ptr(0); -} - void half_vector_to_float_omp(float* out, const __half* ptr, size_t n) { #pragma omp parallel for schedule(static) diff --git a/src/cunumeric/matrix/util_omp.h b/src/cunumeric/matrix/util_omp.h index b17072b3c..805c622e7 100644 --- a/src/cunumeric/matrix/util_omp.h +++ b/src/cunumeric/matrix/util_omp.h @@ -20,8 +20,6 @@ namespace cunumeric { -float* allocate_buffer_omp(size_t size); - // The following assume that the float array was created using allocate_buffer void half_vector_to_float_omp(float* out, const __half* ptr, size_t n); diff --git a/src/cunumeric/search/argwhere_template.inl b/src/cunumeric/search/argwhere_template.inl index f609eaef1..da9224bea 100644 --- a/src/cunumeric/search/argwhere_template.inl +++ b/src/cunumeric/search/argwhere_template.inl @@ -41,10 +41,7 @@ struct ArgWhereImpl { size_t volume = pitches.flatten(rect_in); if (volume == 0) { - auto extents = Point<2>::ZEROES(); - // auto extents = Point<2>(0,DIM); - auto buffer = create_buffer(extents); - args.out.return_data(buffer, extents); + args.out.make_empty(); return; } diff --git a/src/cunumeric/search/nonzero.cc b/src/cunumeric/search/nonzero.cc index 76a0dd8ea..0ccea91f6 100644 --- a/src/cunumeric/search/nonzero.cc +++ b/src/cunumeric/search/nonzero.cc @@ -26,11 +26,11 @@ template struct NonzeroImplBody { using VAL = legate_type_of; - size_t operator()(const AccessorRO& in, - const Pitches& pitches, - const Rect& rect, - const size_t volume, - std::vector>& results) + void operator()(std::vector& outputs, + const AccessorRO& in, + const Pitches& pitches, + const Rect& rect, + const size_t volume) { int64_t size = 0; @@ -39,7 +39,9 @@ struct NonzeroImplBody { size += in[point] != VAL(0); } - for (auto& result : results) result = create_buffer(size, Memory::Kind::SYSTEM_MEM); + std::vector> results; + for (auto& output : outputs) + results.push_back(output.create_output_buffer(Point<1>(size), true)); int64_t out_idx = 0; for (size_t idx = 0; idx < volume; ++idx) { @@ -49,8 +51,6 @@ struct NonzeroImplBody { ++out_idx; } assert(size == out_idx); - - return size; } }; diff --git a/src/cunumeric/search/nonzero.cu b/src/cunumeric/search/nonzero.cu index a542b1bac..6356d1076 100644 --- a/src/cunumeric/search/nonzero.cu +++ b/src/cunumeric/search/nonzero.cu @@ -62,23 +62,23 @@ struct NonzeroImplBody { volume, in, pitches, rect.lo, offsets, p_results); } - size_t operator()(const AccessorRO& in, - const Pitches& pitches, - const Rect& rect, - const size_t volume, - std::vector>& results) + void operator()(std::vector& outputs, + const AccessorRO& in, + const Pitches& pitches, + const Rect& rect, + const size_t volume) { auto stream = get_cached_stream(); auto offsets = create_buffer(volume, Memory::Kind::GPU_FB_MEM); auto size = compute_offsets(in, pitches, rect, volume, offsets, stream); - for (auto& result : results) result = create_buffer(size, Memory::Kind::GPU_FB_MEM); + std::vector> results; + for (auto& output : outputs) + results.push_back(output.create_output_buffer(Point<1>(size), true)); if (size > 0) populate_nonzeros(in, pitches, rect, volume, results, offsets, stream); CHECK_CUDA_STREAM(stream); - - return size; } }; diff --git a/src/cunumeric/search/nonzero_omp.cc b/src/cunumeric/search/nonzero_omp.cc index 178956bd6..b294567c7 100644 --- a/src/cunumeric/search/nonzero_omp.cc +++ b/src/cunumeric/search/nonzero_omp.cc @@ -29,11 +29,11 @@ template struct NonzeroImplBody { using VAL = legate_type_of; - size_t operator()(const AccessorRO& in, - const Pitches& pitches, - const Rect& rect, - const size_t volume, - std::vector>& results) + void operator()(std::vector& outputs, + const AccessorRO& in, + const Pitches& pitches, + const Rect& rect, + const size_t volume) { const auto max_threads = omp_get_max_threads(); @@ -59,8 +59,9 @@ struct NonzeroImplBody { for (auto idx = 1; idx < max_threads; ++idx) offsets[idx] = offsets[idx - 1] + sizes[idx - 1]; } - auto kind = CuNumeric::has_numamem ? Memory::Kind::SOCKET_MEM : Memory::Kind::SYSTEM_MEM; - for (auto& result : results) result = create_buffer(size, kind); + std::vector> results; + for (auto& output : outputs) + results.push_back(output.create_output_buffer(Point<1>(size), true)); #pragma omp parallel { @@ -74,8 +75,6 @@ struct NonzeroImplBody { ++out_idx; } } - - return size; } }; diff --git a/src/cunumeric/search/nonzero_template.inl b/src/cunumeric/search/nonzero_template.inl index cfeaaefaf..0d5227a87 100644 --- a/src/cunumeric/search/nonzero_template.inl +++ b/src/cunumeric/search/nonzero_template.inl @@ -41,17 +41,12 @@ struct NonzeroImpl { size_t volume = pitches.flatten(rect); if (volume == 0) { - auto empty = create_buffer(0); - for (auto& store : args.results) store.return_data(empty, Point<1>(0)); + for (auto& store : args.results) store.make_empty(); return; } auto in = args.input.read_accessor(rect); - std::vector> results(DIM); - auto size = NonzeroImplBody()(in, pitches, rect, volume, results); - - for (int32_t idx = 0; idx < DIM; ++idx) - args.results[idx].return_data(results[idx], Point<1>(size)); + NonzeroImplBody()(args.results, in, pitches, rect, volume); } }; diff --git a/src/cunumeric/set/unique.cc b/src/cunumeric/set/unique.cc index 997d99cd6..b7ff2f25f 100644 --- a/src/cunumeric/set/unique.cc +++ b/src/cunumeric/set/unique.cc @@ -26,13 +26,14 @@ template struct UniqueImplBody { using VAL = legate_type_of; - std::pair, size_t> operator()(const AccessorRO& in, - const Pitches& pitches, - const Rect& rect, - const size_t volume, - const std::vector& comms, - const DomainPoint& point, - const Domain& launch_domain) + void operator()(Array& output, + const AccessorRO& in, + const Pitches& pitches, + const Rect& rect, + const size_t volume, + const std::vector& comms, + const DomainPoint& point, + const Domain& launch_domain) { std::set dedup_set; @@ -41,13 +42,9 @@ struct UniqueImplBody { dedup_set.insert(in[p]); } - size_t size = dedup_set.size(); + auto result = output.create_output_buffer(dedup_set.size(), true); size_t pos = 0; - auto result = create_buffer(size); - for (auto e : dedup_set) result[pos++] = e; - - return std::make_pair(result, size); } }; diff --git a/src/cunumeric/set/unique.cu b/src/cunumeric/set/unique.cu index 2cc4e6363..9104474ef 100644 --- a/src/cunumeric/set/unique.cu +++ b/src/cunumeric/set/unique.cu @@ -49,8 +49,12 @@ using Piece = std::pair, size_t>; auto get_aligned_size = [](auto size) { return std::max(16, (size + 15) / 16 * 16); }; template -static Piece tree_reduce( - Piece my_piece, size_t my_id, size_t num_ranks, cudaStream_t stream, ncclComm_t* comm) +static Piece tree_reduce(Array& output, + Piece my_piece, + size_t my_id, + size_t num_ranks, + cudaStream_t stream, + ncclComm_t* comm) { size_t remaining = num_ranks; size_t radix = 2; @@ -114,7 +118,7 @@ static Piece tree_reduce( auto buf_size = (get_aligned_size(my_piece.second * sizeof(VAL)) + sizeof(VAL) - 1) / sizeof(VAL); assert(my_piece.second <= buf_size); - my_piece.first = create_buffer(buf_size); + my_piece.first = output.create_output_buffer(buf_size); CHECK_CUDA(cudaMemcpyAsync(my_piece.first.ptr(0), p_merged, @@ -130,7 +134,7 @@ static Piece tree_reduce( if (my_id != 0) { my_piece.second = 0; - my_piece.first = create_buffer(0); + my_piece.first = output.create_output_buffer(0); } return my_piece; @@ -140,13 +144,14 @@ template struct UniqueImplBody { using VAL = legate_type_of; - Piece operator()(const AccessorRO& in, - const Pitches& pitches, - const Rect& rect, - const size_t volume, - const std::vector& comms, - const DomainPoint& point, - const Domain& launch_domain) + void operator()(Array& output, + const AccessorRO& in, + const Pitches& pitches, + const Rect& rect, + const size_t volume, + const std::vector& comms, + const DomainPoint& point, + const Domain& launch_domain) { auto stream = get_cached_stream(); @@ -175,7 +180,7 @@ struct UniqueImplBody { result.second = end - ptr; auto buf_size = (get_aligned_size(result.second * sizeof(VAL)) + sizeof(VAL) - 1) / sizeof(VAL); assert(end - ptr <= buf_size); - result.first = create_buffer(buf_size); + result.first = output.create_output_buffer(buf_size); if (result.second > 0) CHECK_CUDA(cudaMemcpyAsync( result.first.ptr(0), ptr, sizeof(VAL) * result.second, cudaMemcpyDeviceToDevice, stream)); @@ -184,12 +189,12 @@ struct UniqueImplBody { // The launch domain is 1D because of the output region assert(point.dim == 1); auto comm = comms[0].get(); - result = tree_reduce(result, point[0], launch_domain.get_volume(), stream, comm); + result = tree_reduce(output, result, point[0], launch_domain.get_volume(), stream, comm); } CHECK_CUDA_STREAM(stream); // Finally we pack the result - return result; + output.return_data(result.first, Point<1>(result.second)); } }; diff --git a/src/cunumeric/set/unique_omp.cc b/src/cunumeric/set/unique_omp.cc index 42914d853..656da03cc 100644 --- a/src/cunumeric/set/unique_omp.cc +++ b/src/cunumeric/set/unique_omp.cc @@ -28,13 +28,14 @@ template struct UniqueImplBody { using VAL = legate_type_of; - std::pair, size_t> operator()(const AccessorRO& in, - const Pitches& pitches, - const Rect& rect, - const size_t volume, - const std::vector& comms, - const DomainPoint& point, - const Domain& launch_domain) + void operator()(Array& output, + const AccessorRO& in, + const Pitches& pitches, + const Rect& rect, + const size_t volume, + const std::vector& comms, + const DomainPoint& point, + const Domain& launch_domain) { const auto max_threads = omp_get_max_threads(); std::vector> dedup_set(max_threads); @@ -66,14 +67,9 @@ struct UniqueImplBody { } auto& final_dedup_set = dedup_set[0]; - size_t size = final_dedup_set.size(); + auto result = output.create_output_buffer(final_dedup_set.size(), true); size_t pos = 0; - auto kind = CuNumeric::has_numamem ? Memory::Kind::SOCKET_MEM : Memory::Kind::SYSTEM_MEM; - auto result = create_buffer(size, kind); - for (auto e : final_dedup_set) result[pos++] = e; - - return std::make_pair(result, size); } }; diff --git a/src/cunumeric/set/unique_reduce.cc b/src/cunumeric/set/unique_reduce.cc index 4f8a4d29e..129d9903f 100644 --- a/src/cunumeric/set/unique_reduce.cc +++ b/src/cunumeric/set/unique_reduce.cc @@ -26,8 +26,7 @@ template struct UniqueReduceImplBody { using VAL = legate_type_of; - std::pair, size_t> operator()( - const std::vector, Rect<1>>>& inputs) + void operator()(Array& output, const std::vector, Rect<1>>>& inputs) { std::set dedup_set; @@ -39,11 +38,9 @@ struct UniqueReduceImplBody { size_t size = dedup_set.size(); size_t pos = 0; - auto result = create_buffer(size); + auto result = output.create_output_buffer(Point<1>(size), true); for (auto e : dedup_set) result[pos++] = e; - - return std::make_pair(result, size); } }; diff --git a/src/cunumeric/set/unique_reduce_template.inl b/src/cunumeric/set/unique_reduce_template.inl index 58d810405..c51976e0a 100644 --- a/src/cunumeric/set/unique_reduce_template.inl +++ b/src/cunumeric/set/unique_reduce_template.inl @@ -43,11 +43,7 @@ struct UniqueReduceImpl { inputs.push_back(std::make_pair(acc, shape)); } - size_t size; - Buffer result; - std::tie(result, size) = UniqueReduceImplBody()(inputs); - - output.return_data(result, Point<1>(size)); + UniqueReduceImplBody()(output, inputs); } }; diff --git a/src/cunumeric/set/unique_template.inl b/src/cunumeric/set/unique_template.inl index e1fbf076c..04f458331 100644 --- a/src/cunumeric/set/unique_template.inl +++ b/src/cunumeric/set/unique_template.inl @@ -44,12 +44,8 @@ struct UniqueImpl { size_t volume = pitches.flatten(rect); auto in = input.read_accessor(rect); - size_t size; - Buffer result; - std::tie(result, size) = - UniqueImplBody()(in, pitches, rect, volume, comms, point, launch_domain); - - output.return_data(result, Point<1>(size)); + UniqueImplBody()( + output, in, pitches, rect, volume, comms, point, launch_domain); } }; diff --git a/src/cunumeric/sort/sort.cu b/src/cunumeric/sort/sort.cu index 3d76f37e7..0297056d1 100644 --- a/src/cunumeric/sort/sort.cu +++ b/src/cunumeric/sort/sort.cu @@ -1426,7 +1426,6 @@ void sample_sort_nccl_nd(SortPiece> local_sorted, CHECK_NCCL(ncclGroupEnd()); // we need the amount of data to transfer on the host --> get it - // FIXME auto kind = CuNumeric::has_numamem ? Memory::Kind::SOCKET_MEM : Memory::Kind::SYSTEM_MEM; Buffer size_send_total = create_buffer(num_sort_ranks, Memory::Z_COPY_MEM); Buffer size_recv_total = create_buffer(num_sort_ranks, Memory::Z_COPY_MEM); { diff --git a/tests/unit/cunumeric/test_config.py b/tests/unit/cunumeric/test_config.py index ddede6241..a3cbd1529 100644 --- a/tests/unit/cunumeric/test_config.py +++ b/tests/unit/cunumeric/test_config.py @@ -248,7 +248,6 @@ def test_CuNumericTunable() -> None: "NUM_GPUS", "NUM_PROCS", "MAX_EAGER_VOLUME", - "HAS_NUMAMEM", } From d870e9a8ecfebf8f50de7583a27a34ea90d6c10c Mon Sep 17 00:00:00 2001 From: Manolis Papadakis Date: Tue, 22 Nov 2022 14:36:23 -0800 Subject: [PATCH 53/89] Don't use cmake 3.25.0 in build-isolation mode (#714) Co-authored-by: Manolis Papadakis --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 22727eb03..5ac994ab9 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -18,7 +18,7 @@ requires = [ "ninja", "setuptools", "scikit-build>=0.13.1", - "cmake>=3.22.1,!=3.23.0", + "cmake>=3.22.1,!=3.23.0,!=3.25.0", ] [tool.pytest.ini_options] From eb46e15eb2c39001c52e50d6f3d4c2904658c3ef Mon Sep 17 00:00:00 2001 From: Bryan Van de Ven Date: Wed, 23 Nov 2022 09:02:47 -0800 Subject: [PATCH 54/89] Minor type improvements (#716) * remove obsolete show_none_errors * tighten up runtime.py * tighten up coverage.py * tighen up linalg.py * tighten up __init__.py * tighten up sort.py * tighten up array.py * tighten up module.py * tighten up eager.py * tighten up deferred.py * tighten up utils.py * fix initial param bug * remove spurious assert --- cunumeric/__init__.py | 2 +- cunumeric/array.py | 10 +++++++--- cunumeric/coverage.py | 2 +- cunumeric/deferred.py | 16 +++++++++------- cunumeric/eager.py | 26 ++++++++++++++------------ cunumeric/linalg/linalg.py | 8 ++++++-- cunumeric/module.py | 14 ++++++++++---- cunumeric/runtime.py | 13 ++++++++----- cunumeric/sort.py | 4 +++- cunumeric/utils.py | 2 +- pyproject.toml | 1 - 11 files changed, 60 insertions(+), 38 deletions(-) diff --git a/cunumeric/__init__.py b/cunumeric/__init__.py index b8b028f9f..7c9e122aa 100644 --- a/cunumeric/__init__.py +++ b/cunumeric/__init__.py @@ -42,4 +42,4 @@ from . import _version -__version__ = _version.get_versions()["version"] # type: ignore +__version__ = _version.get_versions()["version"] # type: ignore [no-untyped-call] diff --git a/cunumeric/array.py b/cunumeric/array.py index 2c7b7f770..f40444d58 100644 --- a/cunumeric/array.py +++ b/cunumeric/array.py @@ -33,10 +33,14 @@ import legate.core.types as ty import numpy as np -import pyarrow # type: ignore +import pyarrow # type: ignore [import] from legate.core import Array -from numpy.core.multiarray import normalize_axis_index # type: ignore -from numpy.core.numeric import normalize_axis_tuple # type: ignore +from numpy.core.multiarray import ( # type: ignore [attr-defined] + normalize_axis_index, +) +from numpy.core.numeric import ( # type: ignore [attr-defined] + normalize_axis_tuple, +) from typing_extensions import ParamSpec from .config import ( diff --git a/cunumeric/coverage.py b/cunumeric/coverage.py index f4d2e0128..f8f4446ae 100644 --- a/cunumeric/coverage.py +++ b/cunumeric/coverage.py @@ -78,7 +78,7 @@ class CuWrapperMetadata: class CuWrapped(AnyCallable, Protocol): _cunumeric: CuWrapperMetadata - __wrapped__: Any + __wrapped__: AnyCallable __name__: str __qualname__: str diff --git a/cunumeric/deferred.py b/cunumeric/deferred.py index 5c9330ee4..a7ba5d6c0 100644 --- a/cunumeric/deferred.py +++ b/cunumeric/deferred.py @@ -36,7 +36,9 @@ import legate.core.types as ty import numpy as np from legate.core import Annotation, Future, ReductionOp, Store -from numpy.core.numeric import normalize_axis_tuple # type: ignore +from numpy.core.numeric import ( # type: ignore [attr-defined] + normalize_axis_tuple, +) from typing_extensions import ParamSpec from .config import ( @@ -710,7 +712,7 @@ def _create_indexing_array( shift = 0 for dim, k in enumerate(key): if np.isscalar(k): - if k < 0: # type: ignore + if k < 0: # type: ignore [operator] k += store.shape[dim + shift] store = store.project(dim + shift, k) shift -= 1 @@ -787,7 +789,7 @@ def _get_view(self, key: Any) -> DeferredArray: elif isinstance(k, slice): k, store = self._slice_store(k, store, dim + shift) elif np.isscalar(k): - if k < 0: # type: ignore + if k < 0: # type: ignore [operator] k += store.shape[dim + shift] store = store.project(dim + shift, k) shift -= 1 @@ -3032,7 +3034,7 @@ def unary_reduction( args: Any, initial: Any, ) -> None: - lhs_array = self + lhs_array: Union[NumPyThunk, DeferredArray] = self rhs_array = src assert lhs_array.ndim <= rhs_array.ndim @@ -3040,7 +3042,7 @@ def unary_reduction( if argred: argred_dtype = self.runtime.get_arg_dtype(rhs_array.dtype) - lhs_array = self.runtime.create_empty_thunk( # type: ignore + lhs_array = self.runtime.create_empty_thunk( lhs_array.shape, dtype=argred_dtype, inputs=[self], @@ -3060,7 +3062,7 @@ def unary_reduction( lhs_array.fill(np.array(fill_value, dtype=lhs_array.dtype)) - lhs = lhs_array.base + lhs = lhs_array.base # type: ignore while lhs.ndim > 1: lhs = lhs.project(0, 0) @@ -3094,7 +3096,7 @@ def unary_reduction( # If output dims is not 0, then we must have axes assert axes is not None # Reduction to a smaller array - result = lhs_array.base + result = lhs_array.base # type: ignore if keepdims: for axis in axes: result = result.project(axis, 0) diff --git a/cunumeric/eager.py b/cunumeric/eager.py index 530b805c5..0c792fbae 100644 --- a/cunumeric/eager.py +++ b/cunumeric/eager.py @@ -215,14 +215,17 @@ def __init__( self.key: Optional[tuple[Any, ...]] = key #: if this ever becomes set (to a DeferredArray), we forward all #: operations to it - self.deferred: Optional[DeferredArray] = None + self.deferred: Optional[Union[DeferredArray, NumPyThunk]] = None self.escaped = False @property def storage(self) -> Union[Future, tuple[Region, FieldID]]: if self.deferred is None: self.to_deferred_array() - return self.deferred.storage # type: ignore + + assert self.deferred is not None + + return self.deferred.storage @property def shape(self) -> NdShape: @@ -265,10 +268,9 @@ def _convert_children(self) -> None: assert self.runtime.is_deferred_array(self.deferred) for child in self.children: if child.deferred is None: - # mypy can't deduce that children nodes will always have - # their .key attribute set. - func = getattr(self.deferred, child.key[0]) # type: ignore - args = child.key[1:] # type: ignore + assert child.key is not None + func = getattr(self.deferred, child.key[0]) + args = child.key[1:] child.deferred = func(*args) # After we've made all the deferred views for each child then # we can traverse down. Do it this way so we can get partition @@ -298,7 +300,7 @@ def to_deferred_array(self) -> DeferredArray: shape=self.shape, ) else: - self.deferred = self.runtime.find_or_create_array_thunk( # type: ignore # noqa E501 + self.deferred = self.runtime.find_or_create_array_thunk( self.array, share=self.escaped, defer=True, @@ -334,7 +336,7 @@ def convolve(self, v: Any, out: Any, mode: ConvolveMode) -> None: if self.ndim == 1: out.array = np.convolve(self.array, v.array, mode) else: - from scipy.signal import convolve # type: ignore + from scipy.signal import convolve # type: ignore [import] out.array = convolve(self.array, v.array, mode) @@ -1468,10 +1470,9 @@ def unary_reduction( return if op in _UNARY_RED_OPS: fn = _UNARY_RED_OPS[op] - if initial is None: - # NumPy starts using this predefined constant, instead of None, - # to mean no value was given by the caller - initial = np._NoValue # type: ignore + # Need to be more careful here, Numpy does not use None to mean + # "was not passed in" in this instance + kws = {"initial": initial} if initial is not None else {} fn( rhs.array, out=self.array, @@ -1480,6 +1481,7 @@ def unary_reduction( where=where if not isinstance(where, EagerArray) else where.array, + **kws, ) elif op == UnaryRedCode.ARGMAX: np.argmax( diff --git a/cunumeric/linalg/linalg.py b/cunumeric/linalg/linalg.py index 1eb8454c6..6474f56f3 100644 --- a/cunumeric/linalg/linalg.py +++ b/cunumeric/linalg/linalg.py @@ -17,8 +17,12 @@ from typing import TYPE_CHECKING, Sequence, Union import numpy as np -from numpy.core.multiarray import normalize_axis_index # type: ignore -from numpy.core.numeric import normalize_axis_tuple # type: ignore +from numpy.core.multiarray import ( # type: ignore [attr-defined] + normalize_axis_index, +) +from numpy.core.numeric import ( # type: ignore [attr-defined] + normalize_axis_tuple, +) from cunumeric._ufunc.math import add, sqrt as _sqrt from cunumeric.array import add_boilerplate, convert_to_cunumeric_ndarray diff --git a/cunumeric/module.py b/cunumeric/module.py index 95b5350cf..a2a972087 100644 --- a/cunumeric/module.py +++ b/cunumeric/module.py @@ -23,7 +23,9 @@ import numpy as np import opt_einsum as oe # type: ignore [import] -from numpy.core.multiarray import normalize_axis_index # type: ignore +from numpy.core.multiarray import ( # type: ignore [attr-defined] + normalize_axis_index, +) from numpy.core.numeric import ( # type: ignore [attr-defined] normalize_axis_tuple, ) @@ -4012,9 +4014,13 @@ def tensordot( # Trivial multi-tensor contraction strategy: contract in input order -class NullOptimizer(oe.paths.PathOptimizer): # type: ignore - def __call__( # type: ignore [no-untyped-def] - self, inputs, output, size_dict, memory_limit=None +class NullOptimizer(oe.paths.PathOptimizer): # type: ignore [misc,no-any-unimported] # noqa + def __call__( + self, + inputs: list[set[str]], + outputs: set[str], + size_dict: dict[str, int], + memory_limit: Union[int, None] = None, ) -> list[tuple[int, int]]: return [(0, 1)] + [(0, -1)] * (len(inputs) - 2) diff --git a/cunumeric/runtime.py b/cunumeric/runtime.py index 2fdb97822..26d8ab207 100644 --- a/cunumeric/runtime.py +++ b/cunumeric/runtime.py @@ -51,6 +51,8 @@ from legate.core._legion.future import Future from legate.core.operation import AutoTask, ManualTask + from .array import ndarray + ARGS = [ Argument( "test", @@ -351,7 +353,7 @@ def is_supported_type(self, dtype: Union[str, np.dtype[Any]]) -> bool: def get_numpy_thunk( self, - obj: Any, + obj: Union[ndarray, npt.NDArray[Any]], share: bool = False, dtype: Optional[np.dtype[Any]] = None, ) -> NumPyThunk: @@ -403,11 +405,12 @@ def compute_parent_child_mapping( # slice object that was used to generate a child array from # a parent array so we can build the same mapping from a # logical region to a subregion - parent_ptr = int(array.base.ctypes.data) # type: ignore + assert array.base is not None + parent_ptr = int(array.base.ctypes.data) child_ptr = int(array.ctypes.data) assert child_ptr >= parent_ptr ptr_diff = child_ptr - parent_ptr - parent_shape = array.base.shape # type: ignore + parent_shape = array.base.shape div = ( reduce(lambda x, y: x * y, parent_shape) if len(parent_shape) > 1 @@ -425,8 +428,8 @@ def compute_parent_child_mapping( key: tuple[Union[slice, None], ...] = () child_idx = 0 child_strides = tuple(array.strides) - parent_strides = tuple(array.base.strides) # type: ignore - for idx in range(array.base.ndim): # type: ignore + parent_strides = tuple(array.base.strides) + for idx in range(array.base.ndim): # Handle the adding and removing dimension cases if parent_strides[idx] == 0: # This was an added dimension in the parent diff --git a/cunumeric/sort.py b/cunumeric/sort.py index 86fa1177e..fbca9146a 100644 --- a/cunumeric/sort.py +++ b/cunumeric/sort.py @@ -17,7 +17,9 @@ from typing import TYPE_CHECKING, Union, cast from legate.core import types as ty -from numpy.core.multiarray import normalize_axis_index # type: ignore +from numpy.core.multiarray import ( # type: ignore [attr-defined] + normalize_axis_index, +) from .config import CuNumericOpCode diff --git a/cunumeric/utils.py b/cunumeric/utils.py index fa5b4462d..25f0f19f1 100644 --- a/cunumeric/utils.py +++ b/cunumeric/utils.py @@ -114,7 +114,7 @@ def get_arg_dtype(dtype: np.dtype[Any]) -> np.dtype[Any]: def get_arg_value_dtype(dtype: np.dtype[Any]) -> np.dtype[Any]: dt = dtype.fields["arg_value"][0].type # type: ignore [index] - return cast(Any, dt) + return cast(np.dtype[Any], dt) Modes = Tuple[List[str], List[str], List[str]] diff --git a/pyproject.toml b/pyproject.toml index 5ac994ab9..73ebc13c8 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -75,7 +75,6 @@ warn_no_return = true warn_return_any = false warn_unreachable = true -show_none_errors = true ignore_errors = false allow_untyped_globals = false From 3077504ca122a1ff8ab520c94b1d852680435f7a Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Wed, 23 Nov 2022 13:25:42 -0800 Subject: [PATCH 55/89] [pre-commit.ci] pre-commit autoupdate (#712) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * [pre-commit.ci] pre-commit autoupdate updates: - [github.com/pre-commit/mirrors-mypy: v0.982 → v0.991](https://github.com/pre-commit/mirrors-mypy/compare/v0.982...v0.991) - [github.com/pre-commit/mirrors-clang-format: v14.0.6 → v15.0.4](https://github.com/pre-commit/mirrors-clang-format/compare/v14.0.6...v15.0.4) * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> --- .pre-commit-config.yaml | 4 ++-- .../execution_policy/indexing/parallel_loop.h | 3 +-- src/cunumeric/matrix/contract.cu | 3 +-- src/cunumeric/matrix/contract_template.inl | 18 ++++++------------ src/cunumeric/matrix/gemm_template.inl | 15 +++++---------- src/cunumeric/matrix/matmul_template.inl | 3 +-- src/cunumeric/matrix/matvecmul_template.inl | 3 +-- src/cunumeric/matrix/potrf_template.inl | 15 +++++---------- src/cunumeric/matrix/solve_template.inl | 15 +++++---------- src/cunumeric/matrix/syrk_template.inl | 15 +++++---------- src/cunumeric/matrix/trsm_template.inl | 15 +++++---------- src/cunumeric/scan/scan_global_util.h | 3 +-- src/cunumeric/scan/scan_local_util.h | 3 +-- src/cunumeric/sort/sort.cu | 9 +++------ src/cunumeric/unary/convert_util.h | 3 +-- src/cunumeric/unary/unary_red_util.h | 9 +++------ 16 files changed, 46 insertions(+), 90 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 798efa23d..dc8cecafe 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -1,6 +1,6 @@ repos: - repo: https://github.com/pre-commit/mirrors-mypy - rev: 'v0.982' + rev: 'v0.991' hooks: - id: mypy language: system @@ -19,7 +19,7 @@ repos: hooks: - id: flake8 - repo: https://github.com/pre-commit/mirrors-clang-format - rev: 'v14.0.6' # Use the sha / tag you want to point at + rev: 'v15.0.4' # Use the sha / tag you want to point at hooks: - id: clang-format files: \.(cu|cuh|h|cc|inl)$ diff --git a/src/cunumeric/execution_policy/indexing/parallel_loop.h b/src/cunumeric/execution_policy/indexing/parallel_loop.h index 31adf811f..609ed04ca 100644 --- a/src/cunumeric/execution_policy/indexing/parallel_loop.h +++ b/src/cunumeric/execution_policy/indexing/parallel_loop.h @@ -21,8 +21,7 @@ namespace cunumeric { template -struct ParallelLoopPolicy { -}; +struct ParallelLoopPolicy {}; template struct ParallelLoopPolicy { diff --git a/src/cunumeric/matrix/contract.cu b/src/cunumeric/matrix/contract.cu index 722916043..7a66e9ba8 100644 --- a/src/cunumeric/matrix/contract.cu +++ b/src/cunumeric/matrix/contract.cu @@ -26,8 +26,7 @@ using namespace Legion; namespace { // anonymous template -struct contract_helper { -}; +struct contract_helper {}; template <> struct contract_helper<__half> { diff --git a/src/cunumeric/matrix/contract_template.inl b/src/cunumeric/matrix/contract_template.inl index 6bd375e5e..d067cafd3 100644 --- a/src/cunumeric/matrix/contract_template.inl +++ b/src/cunumeric/matrix/contract_template.inl @@ -33,23 +33,17 @@ template struct ContractImplBody; template -struct support_contract : std::false_type { -}; +struct support_contract : std::false_type {}; template <> -struct support_contract : std::true_type { -}; +struct support_contract : std::true_type {}; template <> -struct support_contract : std::true_type { -}; +struct support_contract : std::true_type {}; template <> -struct support_contract : std::true_type { -}; +struct support_contract : std::true_type {}; template <> -struct support_contract : std::true_type { -}; +struct support_contract : std::true_type {}; template <> -struct support_contract : std::true_type { -}; +struct support_contract : std::true_type {}; #if 0 // debugging output diff --git a/src/cunumeric/matrix/gemm_template.inl b/src/cunumeric/matrix/gemm_template.inl index 15d2c8f27..4ccf089b7 100644 --- a/src/cunumeric/matrix/gemm_template.inl +++ b/src/cunumeric/matrix/gemm_template.inl @@ -28,20 +28,15 @@ template struct GemmImplBody; template -struct support_gemm : std::false_type { -}; +struct support_gemm : std::false_type {}; template <> -struct support_gemm : std::true_type { -}; +struct support_gemm : std::true_type {}; template <> -struct support_gemm : std::true_type { -}; +struct support_gemm : std::true_type {}; template <> -struct support_gemm : std::true_type { -}; +struct support_gemm : std::true_type {}; template <> -struct support_gemm : std::true_type { -}; +struct support_gemm : std::true_type {}; template struct GemmImpl { diff --git a/src/cunumeric/matrix/matmul_template.inl b/src/cunumeric/matrix/matmul_template.inl index 285c6ceec..5ee01a23c 100644 --- a/src/cunumeric/matrix/matmul_template.inl +++ b/src/cunumeric/matrix/matmul_template.inl @@ -29,8 +29,7 @@ template struct MatMulImplBody; template -struct support_matmul : std::false_type { -}; +struct support_matmul : std::false_type {}; template <> struct support_matmul : std::true_type { using ACC_TYPE = double; diff --git a/src/cunumeric/matrix/matvecmul_template.inl b/src/cunumeric/matrix/matvecmul_template.inl index 57e3970e4..7ccb73b6d 100644 --- a/src/cunumeric/matrix/matvecmul_template.inl +++ b/src/cunumeric/matrix/matvecmul_template.inl @@ -29,8 +29,7 @@ template struct MatVecMulImplBody; template -struct support_matvecmul : std::false_type { -}; +struct support_matvecmul : std::false_type {}; template <> struct support_matvecmul : std::true_type { using ACC_TYPE = double; diff --git a/src/cunumeric/matrix/potrf_template.inl b/src/cunumeric/matrix/potrf_template.inl index 05ff60b46..eea21d8cc 100644 --- a/src/cunumeric/matrix/potrf_template.inl +++ b/src/cunumeric/matrix/potrf_template.inl @@ -28,20 +28,15 @@ template struct PotrfImplBody; template -struct support_potrf : std::false_type { -}; +struct support_potrf : std::false_type {}; template <> -struct support_potrf : std::true_type { -}; +struct support_potrf : std::true_type {}; template <> -struct support_potrf : std::true_type { -}; +struct support_potrf : std::true_type {}; template <> -struct support_potrf : std::true_type { -}; +struct support_potrf : std::true_type {}; template <> -struct support_potrf : std::true_type { -}; +struct support_potrf : std::true_type {}; template struct PotrfImpl { diff --git a/src/cunumeric/matrix/solve_template.inl b/src/cunumeric/matrix/solve_template.inl index bff40ad9c..12fdf3f10 100644 --- a/src/cunumeric/matrix/solve_template.inl +++ b/src/cunumeric/matrix/solve_template.inl @@ -30,20 +30,15 @@ template struct SolveImplBody; template -struct support_solve : std::false_type { -}; +struct support_solve : std::false_type {}; template <> -struct support_solve : std::true_type { -}; +struct support_solve : std::true_type {}; template <> -struct support_solve : std::true_type { -}; +struct support_solve : std::true_type {}; template <> -struct support_solve : std::true_type { -}; +struct support_solve : std::true_type {}; template <> -struct support_solve : std::true_type { -}; +struct support_solve : std::true_type {}; template struct SolveImpl { diff --git a/src/cunumeric/matrix/syrk_template.inl b/src/cunumeric/matrix/syrk_template.inl index 739581bcb..9b1184eef 100644 --- a/src/cunumeric/matrix/syrk_template.inl +++ b/src/cunumeric/matrix/syrk_template.inl @@ -28,20 +28,15 @@ template struct SyrkImplBody; template -struct support_syrk : std::false_type { -}; +struct support_syrk : std::false_type {}; template <> -struct support_syrk : std::true_type { -}; +struct support_syrk : std::true_type {}; template <> -struct support_syrk : std::true_type { -}; +struct support_syrk : std::true_type {}; template <> -struct support_syrk : std::true_type { -}; +struct support_syrk : std::true_type {}; template <> -struct support_syrk : std::true_type { -}; +struct support_syrk : std::true_type {}; template struct SyrkImpl { diff --git a/src/cunumeric/matrix/trsm_template.inl b/src/cunumeric/matrix/trsm_template.inl index 40dd2ca17..ae2b7b840 100644 --- a/src/cunumeric/matrix/trsm_template.inl +++ b/src/cunumeric/matrix/trsm_template.inl @@ -28,20 +28,15 @@ template struct TrsmImplBody; template -struct support_trsm : std::false_type { -}; +struct support_trsm : std::false_type {}; template <> -struct support_trsm : std::true_type { -}; +struct support_trsm : std::true_type {}; template <> -struct support_trsm : std::true_type { -}; +struct support_trsm : std::true_type {}; template <> -struct support_trsm : std::true_type { -}; +struct support_trsm : std::true_type {}; template <> -struct support_trsm : std::true_type { -}; +struct support_trsm : std::true_type {}; template struct TrsmImpl { diff --git a/src/cunumeric/scan/scan_global_util.h b/src/cunumeric/scan/scan_global_util.h index b53ada288..502b9720c 100644 --- a/src/cunumeric/scan/scan_global_util.h +++ b/src/cunumeric/scan/scan_global_util.h @@ -41,8 +41,7 @@ constexpr decltype(auto) op_dispatch(ScanCode op_code, Functor f, Fnargs&&... ar } template -struct ScanOp { -}; +struct ScanOp {}; template struct ScanOp : thrust::plus> { diff --git a/src/cunumeric/scan/scan_local_util.h b/src/cunumeric/scan/scan_local_util.h index 7f1eefc7d..0cfbacb00 100644 --- a/src/cunumeric/scan/scan_local_util.h +++ b/src/cunumeric/scan/scan_local_util.h @@ -53,8 +53,7 @@ constexpr decltype(auto) op_dispatch(ScanCode op_code, } template -struct ScanOp { -}; +struct ScanOp {}; template struct ScanOp : thrust::plus> { diff --git a/src/cunumeric/sort/sort.cu b/src/cunumeric/sort/sort.cu index 0297056d1..af931c807 100644 --- a/src/cunumeric/sort/sort.cu +++ b/src/cunumeric/sort/sort.cu @@ -42,14 +42,11 @@ namespace cunumeric { template -struct support_cub : std::true_type { -}; +struct support_cub : std::true_type {}; template <> -struct support_cub : std::false_type { -}; +struct support_cub : std::false_type {}; template <> -struct support_cub : std::false_type { -}; +struct support_cub : std::false_type {}; template ::value>* = nullptr> void local_sort(const legate_type_of* values_in, diff --git a/src/cunumeric/unary/convert_util.h b/src/cunumeric/unary/convert_util.h index 03e3692c8..f58c0265c 100644 --- a/src/cunumeric/unary/convert_util.h +++ b/src/cunumeric/unary/convert_util.h @@ -44,8 +44,7 @@ constexpr decltype(auto) op_dispatch(ConvertCode nan_op, Functor f, Fnargs&&... } template -struct ConvertOp { -}; +struct ConvertOp {}; template struct ConvertOp { diff --git a/src/cunumeric/unary/unary_red_util.h b/src/cunumeric/unary/unary_red_util.h index ab193a7df..94536cd04 100644 --- a/src/cunumeric/unary/unary_red_util.h +++ b/src/cunumeric/unary/unary_red_util.h @@ -36,14 +36,11 @@ enum class UnaryRedCode : int { }; template -struct is_arg_reduce : std::false_type { -}; +struct is_arg_reduce : std::false_type {}; template <> -struct is_arg_reduce : std::true_type { -}; +struct is_arg_reduce : std::true_type {}; template <> -struct is_arg_reduce : std::true_type { -}; +struct is_arg_reduce : std::true_type {}; template constexpr decltype(auto) op_dispatch(UnaryRedCode op_code, Functor f, Fnargs&&... args) From e07c26d4be0a73a54f2889045c2867a98bf22680 Mon Sep 17 00:00:00 2001 From: Manolis Papadakis Date: Mon, 28 Nov 2022 12:04:26 -0800 Subject: [PATCH 56/89] Update Availability annotations (#715) * Update Availability annotations according to proposed policy * Missing documentation and case for ndarray.view --- cunumeric/array.py | 44 +++++++++++++++++++++++++++++++++++--- cunumeric/logic.py | 6 +++--- cunumeric/random/random.py | 4 ++++ 3 files changed, 48 insertions(+), 6 deletions(-) diff --git a/cunumeric/array.py b/cunumeric/array.py index f40444d58..d163e4ca4 100644 --- a/cunumeric/array.py +++ b/cunumeric/array.py @@ -541,6 +541,10 @@ def flat(self) -> np.flatiter[npt.NDArray[Any]]: -------- flatten : Return a copy of the array collapsed into one dimension. + Availability + -------- + Single CPU + """ return self.__array__().flat @@ -2645,7 +2649,7 @@ def dump(self, file: Union[str, Path]) -> None: Availability -------- - Multiple GPUs, Multiple CPUs + Single CPU """ self.__array__().dump(file=file) @@ -3645,7 +3649,7 @@ def tofile(self, fid: Any, sep: str = "", format: str = "%s") -> None: Availability -------- - Multiple GPUs, Multiple CPUs + Single CPU """ return self.__array__().tofile(fid=fid, sep=sep, format=format) @@ -3817,12 +3821,46 @@ def flip(self, axis: Any = None) -> ndarray: def view( self, dtype: Union[npt.DTypeLike, None] = None, - type: Union[Any, None] = None, + type: Union[type, None] = None, ) -> ndarray: + """ + New view of array with the same data. + + Parameters + ---------- + dtype : data-type or ndarray sub-class, optional + Data-type descriptor of the returned view, e.g., float32 or int16. + Omitting it results in the view having the same data-type as the + input array. This argument can also be specified as an ndarray + sub-class, which then specifies the type of the returned object + (this is equivalent to setting the ``type`` parameter). + type : ndarray sub-class, optional + Type of the returned view, e.g., ndarray or matrix. Again, omission + of the parameter results in type preservation. + + Notes + ----- + cuNumeric does not currently support type reinterpretation, or + conversion to ndarray sub-classes; use :func:`ndarray.__array__()` to + convert to `numpy.ndarray`. + + See Also + -------- + numpy.ndarray.view + + Availability + -------- + Multiple GPUs, Multiple CPUs + """ if dtype is not None and dtype != self.dtype: raise NotImplementedError( "cuNumeric does not currently support type reinterpretation" ) + if type is not None: + raise NotImplementedError( + "cuNumeric does not currently support conversion to ndarray " + "sub-classes; use __array__() to convert to numpy.ndarray" + ) return ndarray(shape=self.shape, dtype=self.dtype, thunk=self._thunk) def unique(self) -> ndarray: diff --git a/cunumeric/logic.py b/cunumeric/logic.py index 5cafffdc5..667ae1d13 100644 --- a/cunumeric/logic.py +++ b/cunumeric/logic.py @@ -176,7 +176,7 @@ def iscomplexobj(x: Union[ndarray, npt.NDArray[Any]]) -> bool: Availability -------- - Single CPU + Multiple GPUs, Multiple CPUs """ if isinstance(x, ndarray): return x.dtype.kind == "c" @@ -244,7 +244,7 @@ def isrealobj(x: ndarray) -> bool: Availability -------- - Single CPU + Multiple GPUs, Multiple CPUs """ return not iscomplexobj(x) @@ -275,7 +275,7 @@ def isscalar(x: Union[ndarray, npt.NDArray[Any]]) -> bool: Availability -------- - Single CPU + Multiple GPUs, Multiple CPUs """ # Since the input can be any value, we can't just convert it to cunumeric diff --git a/cunumeric/random/random.py b/cunumeric/random/random.py index 7a036a86e..7f37e5651 100644 --- a/cunumeric/random/random.py +++ b/cunumeric/random/random.py @@ -37,6 +37,10 @@ def seed(init: Union[int, None] = None) -> None: This function is effective only when cuRAND is NOT used in the build and is a no-op otherwise. + + Availability + -------- + Multiple GPUs, Multiple CPUs """ if init is None: init = 0 From ae1faf4d913dd088add4396711fa53c64cd47c7d Mon Sep 17 00:00:00 2001 From: robinw0928 <104830875+robinw0928@users.noreply.github.com> Date: Tue, 29 Nov 2022 09:05:15 +0800 Subject: [PATCH 57/89] Enhance test_where.py and test_atleast_nd.py (#717) * Enhance test_where.py and test_atleast_nd.py * Fix naming. --- tests/integration/test_atleast_nd.py | 71 +++++++----- tests/integration/test_where.py | 157 ++++++++++++++++++++++----- 2 files changed, 170 insertions(+), 58 deletions(-) diff --git a/tests/integration/test_atleast_nd.py b/tests/integration/test_atleast_nd.py index 62bc80d5e..3946cb92f 100644 --- a/tests/integration/test_atleast_nd.py +++ b/tests/integration/test_atleast_nd.py @@ -16,35 +16,10 @@ import numpy as np import pytest from legate.core import LEGATE_MAX_DIM +from utils.utils import check_module_function import cunumeric as num - -def _check(a, routine, sizes): - b = getattr(np, routine)(*a) - c = getattr(num, routine)(*a) - is_equal = True - err_arr = [b, c] - - if len(b) != len(c): - is_equal = False - err_arr = [b, c] - else: - for each in zip(b, c): - if not np.array_equal(*each): - err_arr = each - is_equal = False - break - print_msg = f"np.{routine}({sizes})" - assert is_equal, ( - f"Failed, {print_msg}\n" - f"numpy result: {err_arr[0]}\n" - f"cunumeric_result: {err_arr[1]}\n" - f"cunumeric and numpy shows different result\n" - ) - print(f"Passed, {print_msg}, np: {b}, cunumeric: {c}") - - DIM = 10 SIZE_CASES = list((DIM,) * ndim for ndim in range(LEGATE_MAX_DIM + 1)) @@ -59,26 +34,62 @@ def _check(a, routine, sizes): @pytest.mark.parametrize("size", SIZE_CASES, ids=str) def test_atleast_1d(size): a = [np.arange(np.prod(size)).reshape(size)] - _check(a, "atleast_1d", size) + print_msg = f"np & cunumeric.atleast_1d(size={size})" + check_module_function("atleast_1d", a, {}, print_msg) + + +def test_atleast_1d_scalar(): + a = 1.0 + assert np.array_equal(np.atleast_1d(a), num.atleast_1d(a)) + + +def test_atleast_1d_none(): + a = None + assert np.array_equal(np.atleast_1d(a), num.atleast_1d(a)) @pytest.mark.parametrize("size", SIZE_CASES, ids=str) def test_atleast_2d(size): a = [np.arange(np.prod(size)).reshape(size)] - _check(a, "atleast_2d", size) + print_msg = f"np & cunumeric.atleast_2d(size={size})" + check_module_function("atleast_2d", a, {}, print_msg) + + +def test_atleast_2d_scalar(): + a = 1.0 + assert np.array_equal(np.atleast_2d(a), num.atleast_2d(a)) + + +def test_atleast_2d_none(): + a = None + assert np.array_equal(np.atleast_2d(a), num.atleast_2d(a)) @pytest.mark.parametrize("size", SIZE_CASES, ids=str) def test_atleast_3d(size): a = [np.arange(np.prod(size)).reshape(size)] - _check(a, "atleast_3d", size) + print_msg = f"np & cunumeric.atleast_3d(size={size})" + check_module_function("atleast_3d", a, {}, print_msg) + + +def test_atleast_3d_scalar(): + a = 1.0 + assert np.array_equal(np.atleast_2d(a), num.atleast_2d(a)) + + +def test_atleast_3d_none(): + a = None + assert np.array_equal(np.atleast_2d(a), num.atleast_2d(a)) # test to run atleast_nd w/ list of arrays @pytest.mark.parametrize("dim", range(1, 4)) def test_atleast_nd(dim): a = list(np.arange(np.prod(size)).reshape(size) for size in SIZE_CASES) - _check(a, f"atleast_{dim}d", SIZE_CASES) + scalar = 10.0 + a.append(scalar) + print_msg = f"np & cunumeric.atleast_{dim}d(size={SIZE_CASES})" + check_module_function(f"atleast_{dim}d", a, {}, print_msg) if __name__ == "__main__": diff --git a/tests/integration/test_where.py b/tests/integration/test_where.py index 20d813bd9..cd66c0ce7 100644 --- a/tests/integration/test_where.py +++ b/tests/integration/test_where.py @@ -15,34 +15,130 @@ import numpy as np import pytest +from utils.generators import mk_seq_array import cunumeric as num -np.random.seed(42) - CONDITIONS = [ [[True, False], [True, True]], [[True, False]], [True, False], False, + [[0.0, 1.0], [0, -2]], ] def test_basic(): - anp = np.array([1, 54, 4, 4, 0, 45, 5, 58, 0, 9, 0, 4, 0, 0, 0, 5, 0]) - a = num.array(anp) - assert num.array_equal(np.where(anp), num.where(a)) + a_np = np.array([1, 54, 4, 4, 0, 45, 5, 58, 0, 9, 0, 4, 0, 0, 0, 5, 0]) + a_num = num.array(a_np) + assert num.array_equal(np.where(a_np), num.where(a_num)) @pytest.mark.parametrize("cond", CONDITIONS, ids=str) def test_condition(cond): - anp = np.array(cond) - xnp = np.array([[1, 2], [3, 4]]) - ynp = np.array([[9, 8], [7, 6]]) - a = num.array(anp) - x = num.array(xnp) - y = num.array(ynp) - assert np.array_equal(np.where(anp, xnp, ynp), num.where(a, x, y)) + a_np = np.array(cond) + x_np = np.array([[1, 2], [3, 4]]) + y_np = np.array([[9, 8], [7, 6]]) + a_num = num.array(a_np) + x_num = num.array(x_np) + y_num = num.array(y_np) + assert np.array_equal( + np.where(a_np, x_np, y_np), num.where(a_num, x_num, y_num) + ) + + +@pytest.mark.parametrize( + "shape_a", + ((1,), (3,), (1, 3), (3, 3), (2, 3, 3)), + ids=lambda shape_a: f"(shape_a={shape_a})", +) +def test_broadcast(shape_a): + a_num = mk_seq_array(num, shape_a) + a_np = mk_seq_array(np, shape_a) + cond_num = a_num > 5 + cond_np = a_np > 5 + + shape_x = (3, 3) + x_num = mk_seq_array(num, shape_x) + x_np = mk_seq_array(np, shape_x) + shape_y = (1, 3) + y_num = mk_seq_array(num, shape_y) * 10 + y_np = mk_seq_array(np, shape_y) * 10 + + assert np.array_equal( + np.where(cond_np, x_np, y_np), num.where(cond_num, x_num, y_num) + ) + + +@pytest.mark.xfail +def test_condition_none(): + # In Numpy, pass and returns [1, 2] + # In cuNumeric, raises AttributeError: + # 'NoneType' object has no attribute '_maybe_convert' + x = 0 + y_np = np.array([1, 2]) + y_num = num.array(y_np) + assert np.array_equal(np.where(None, x, y_np), num.where(None, x, y_num)) + + +@pytest.mark.xfail +@pytest.mark.parametrize( + "values", + ((None, None), (None, 1), (1, None)), + ids=lambda values: f"(values={values})", +) +def test_x_y_none(values): + # For x=None and y=None, + # In Numpy, pass and returns [None, None] + # In cuNumeric, pass and returns (array([0]),) + # For x=None and y=1 + # In Numpy, pass and returns [None, 1] + # In cuNumeric, raises ValueError: both 'x' and 'y' parameters + # must be specified together for where + cond = [True, False] + a_np = np.array(cond) + a_num = num.array(a_np) + x, y = values + assert np.array_equal(np.where(a_np, x, y), num.where(a_num, x, y)) + + +def test_x_y_type(): + x_np = np.arange(4, dtype=np.int32) + y_np = np.arange(4, dtype=np.float32) * 2.2 + x_num = num.array(x_np) + y_num = num.array(y_np) + + res_np = np.where(x_np > 2.0, x_np, y_np) + res_num = num.where(x_num > 2.0, x_num, y_num) + + assert np.array_equal(res_np, res_num) + assert res_np.dtype == res_num.dtype + + +def test_condition_empty(): + cond_num = num.array([]) + cond_np = np.array([]) + x = 0 + y = 1 + assert np.array_equal(np.where(cond_np, x, y), num.where(cond_num, x, y)) + + +class TestWhereErrors: + @pytest.mark.parametrize( + "shape_y", + ((0,), (2,), (1, 2), (4, 1)), + ids=lambda shape_y: f"(shape_y={shape_y})", + ) + def test_x_y_bad_shape(self, shape_y): + shape_a = (3, 3) + a = mk_seq_array(num, shape_a) + cond = a > 5 + x = 1 + y = mk_seq_array(num, shape_y) + + msg = "shape mismatch" + with pytest.raises(ValueError, match=msg): + num.where(cond, x, y) INPUT = [ @@ -60,22 +156,27 @@ def test_condition(cond): @pytest.mark.parametrize("input", INPUT, ids=str) def test_argwhere(input): - anp = np.array(input) - a = num.array(anp) - assert np.array_equal(np.argwhere(anp), num.argwhere(a)) - - -@pytest.mark.skip -def test_extract(): - cnp = np.array( - [1, 54, 4, 4, 0, 45, 5, 58, 0, 9, 0, 4, 0, 0, 0, 5, 0, 1] - ).reshape( - (6, 3) - ) # noqa E501 - c = num.array(cnp) - bnp = np.random.randn(6, 3) - b = num.array(bnp) - assert num.array_equal(num.extract(c, b), np.extract(cnp, bnp)) + a_np = np.array(input) + a_num = num.array(a_np) + assert np.array_equal(np.argwhere(a_np), num.argwhere(a_num)) + + +@pytest.mark.xfail +def test_argwhere_none(): + # In Numpy, it pass and returns [] + # In cuNumeric, it raises AttributeError: + # 'NoneType' object has no attribute '_thunk' + assert np.array_equal(np.argwhere(None), num.argwhere(None)) + + +def test_argwhere_empty(): + a_np = np.array([]) + a_num = num.array(a_np) + assert np.array_equal(np.argwhere(a_np), num.argwhere(a_num)) + + +def test_argwhere_scalar(): + assert np.array_equal(np.argwhere(1), num.argwhere(1)) if __name__ == "__main__": From 4b4b924c13e71c49646c6a321468398d72c0fa43 Mon Sep 17 00:00:00 2001 From: xialu00 <110973296+xialu00@users.noreply.github.com> Date: Tue, 29 Nov 2022 12:28:03 +0800 Subject: [PATCH 58/89] Testcase enhance for test_bincount.py (#708) * add negative test case for test_convolve.py * add test case for test_astype.py * add test case for test_astype.py * fix bug * enhance test_bincount.py * enhance test_bincount.py * enhance test_cholesky.py --- tests/integration/test_bincount.py | 30 ++++++++++++++++++++++++++++++ tests/integration/test_cholesky.py | 19 +++++++++++++++++++ 2 files changed, 49 insertions(+) diff --git a/tests/integration/test_bincount.py b/tests/integration/test_bincount.py index b1d9fd4c5..f137e9151 100644 --- a/tests/integration/test_bincount.py +++ b/tests/integration/test_bincount.py @@ -27,6 +27,36 @@ MINLENGTHS = [0, 5, 15] +def test_dtype_negative(): + arr = num.arange(5, dtype=float) + msg = r"integer type" + with pytest.raises(TypeError, match=msg): + num.bincount(arr) + + +def test_weight_mismatch(): + v_num = num.random.randint(0, 9, size=N) + w_num = num.random.randn(N + 1) + msg = r"same shape" + with pytest.raises(ValueError, match=msg): + num.bincount(v_num, weights=w_num) + + +def test_out_size(): + arr = num.array([0, 1, 1, 3, 2, 1, 7, 23]) + assert num.bincount(arr).size == num.amax(arr) + 1 + + +@pytest.mark.skip() +def test_array_ndim(): + size = (2,) * 3 + arr = num.random.randint(0, high=9, size=size) + # Numpy raises : ValueError: object too deep for desired array + # cuNumeric run aborted + with pytest.raises(ValueError): + num.bincount(arr) + + @pytest.mark.parametrize("dtype", DTYPES) @pytest.mark.parametrize("minlength", MINLENGTHS) def test_bincount_basic(dtype, minlength): diff --git a/tests/integration/test_cholesky.py b/tests/integration/test_cholesky.py index b630aba70..6ed8e35f8 100644 --- a/tests/integration/test_cholesky.py +++ b/tests/integration/test_cholesky.py @@ -22,6 +22,25 @@ SIZES = [8, 9, 255, 512] +def test_matrix(): + arr = [[1, -2j], [2j, 5]] + np_out = np.linalg.cholesky(arr) + num_out = num.linalg.cholesky(arr) + assert np.array_equal(np_out, num_out) + + +def test_array_negative_1dim(): + arr = num.random.randint(0, 9, size=(3,)) + with pytest.raises(ValueError): + num.linalg.cholesky(arr) + + +def test_array_negative_3dim(): + arr = num.random.randint(0, 9, size=(3, 3, 3)) + with pytest.raises(NotImplementedError): + num.linalg.cholesky(arr) + + def test_diagonal(): a = num.eye(10) * 10.0 b = num.linalg.cholesky(a) From 5b043932e940e9927fdebf8ae89a6b62fc5b8fff Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Tue, 29 Nov 2022 17:50:43 -0800 Subject: [PATCH 59/89] [pre-commit.ci] pre-commit autoupdate (#718) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit updates: - [github.com/PyCQA/flake8: 5.0.4 → 6.0.0](https://github.com/PyCQA/flake8/compare/5.0.4...6.0.0) Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> --- .pre-commit-config.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index dc8cecafe..e929c0833 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -15,7 +15,7 @@ repos: hooks: - id: black - repo: https://github.com/PyCQA/flake8 - rev: 5.0.4 + rev: 6.0.0 hooks: - id: flake8 - repo: https://github.com/pre-commit/mirrors-clang-format From 2067484fb568a64e457faa137efd4c997bd07abf Mon Sep 17 00:00:00 2001 From: Manolis Papadakis Date: Thu, 1 Dec 2022 15:57:43 -0800 Subject: [PATCH 60/89] Fix build under CUDA 11.8 (#723) --- src/cunumeric/random/randutil/generator.cuh | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/cunumeric/random/randutil/generator.cuh b/src/cunumeric/random/randutil/generator.cuh index 1b2bb33df..023de9496 100644 --- a/src/cunumeric/random/randutil/generator.cuh +++ b/src/cunumeric/random/randutil/generator.cuh @@ -18,6 +18,8 @@ #include "generator.h" +#include + namespace randutilimpl { static constexpr int blocksPerMultiProcessor = 2; // TODO: refine => number of blocks per mp static constexpr int blockDimX = 256; // TODO: refine ? From d9dd7b33c900cef14c863580cf17930a0498886d Mon Sep 17 00:00:00 2001 From: robinw0928 <104830875+robinw0928@users.noreply.github.com> Date: Tue, 6 Dec 2022 09:15:15 +0800 Subject: [PATCH 61/89] Enhance test_solve.py and test_matrix_power.py (#719) * Enhance test_solve.py and test_matrix_power.py * Address comments * Replace one-letter dtype with full name. --- cunumeric/linalg/linalg.py | 4 +- tests/integration/test_matrix_power.py | 74 +++++++++++- tests/integration/test_solve.py | 154 +++++++++++++++++++++++-- 3 files changed, 215 insertions(+), 17 deletions(-) diff --git a/cunumeric/linalg/linalg.py b/cunumeric/linalg/linalg.py index 6474f56f3..18ecfa140 100644 --- a/cunumeric/linalg/linalg.py +++ b/cunumeric/linalg/linalg.py @@ -196,9 +196,9 @@ def matrix_power(a: ndarray, n: int) -> ndarray: """ # Process inputs if a.ndim < 2: - raise ValueError(f"Expected at least 2d array, but got {a.ndim}d") + raise LinAlgError(f"Expected at least 2d array, but got {a.ndim}d") if a.shape[-2] != a.shape[-1]: - raise ValueError("Last 2 dimensions of the array must be square") + raise LinAlgError("Last 2 dimensions of the array must be square") if not isinstance(n, int): raise TypeError("exponent must be an integer") diff --git a/tests/integration/test_matrix_power.py b/tests/integration/test_matrix_power.py index d4cfe4b23..de4838798 100644 --- a/tests/integration/test_matrix_power.py +++ b/tests/integration/test_matrix_power.py @@ -22,18 +22,86 @@ import cunumeric as num # TODO: add negative exponents here, once they become supported -EXPONENTS = [0, 1, 3, 5] +EXPONENTS = (0, 1, 2, 3, 5) +@pytest.mark.parametrize( + "dtype", + ( + np.float64, + np.complex128, + pytest.param(np.int32, marks=pytest.mark.xfail), + ), +) @pytest.mark.parametrize("ndim", range(0, LEGATE_MAX_DIM - 2)) @pytest.mark.parametrize("exp", EXPONENTS) -def test_matrix_power(ndim, exp): +def test_matrix_power(ndim, exp, dtype): + # If dtype=np.int32 and exp greater than 1, + # In Numpy, pass + # In cuNumeric, raises TypeError: Unsupported type: int32 shape = (3,) * ndim + (2, 2) + a_np = mk_0to1_array(np, shape, dtype=dtype) + a_num = mk_0to1_array(num, shape, dtype=dtype) + res_np = np.linalg.matrix_power(a_np, exp) + res_num = num.linalg.matrix_power(a_num, exp) + assert allclose(res_np, res_num) + + +@pytest.mark.parametrize( + "exp", + ( + 0, + 1, + pytest.param(2, marks=pytest.mark.xfail), + pytest.param(3, marks=pytest.mark.xfail), + ), +) +def test_matrix_power_empty_matrix(exp): + # If exp =2 or 3, + # In Numpy, pass and returns empty array + # In cuNumeric, raise AssertionError in _contract + shape = (0, 0) a_np = mk_0to1_array(np, shape) a_num = mk_0to1_array(num, shape) res_np = np.linalg.matrix_power(a_np, exp) res_num = num.linalg.matrix_power(a_num, exp) - assert allclose(res_np, res_num) + assert np.array_equal(res_np, res_num) + + +class TestMatrixPowerErrors: + @pytest.mark.parametrize("ndim", (0, 1), ids=lambda ndim: f"(ndim={ndim})") + def test_matrix_ndim_smaller_than_two(self, ndim): + shape = (3,) * ndim + a_num = mk_0to1_array(num, shape) + msg = "Expected at least 2d array" + with pytest.raises(num.linalg.LinAlgError, match=msg): + num.linalg.matrix_power(a_num, 1) + + @pytest.mark.parametrize( + "shape", ((2, 1), (2, 2, 1)), ids=lambda shape: f"(shape={shape})" + ) + def test_matrix_not_square(self, shape): + a_num = mk_0to1_array(num, shape) + msg = "Last 2 dimensions of the array must be square" + with pytest.raises(num.linalg.LinAlgError, match=msg): + num.linalg.matrix_power(a_num, 1) + + @pytest.mark.parametrize( + "n", (-1.0, 1.0, [1], None), ids=lambda n: f"(n={n})" + ) + def test_n_not_int(self, n): + shape = (2, 2) + a_num = mk_0to1_array(num, shape) + msg = "exponent must be an integer" + with pytest.raises(TypeError, match=msg): + num.linalg.matrix_power(a_num, n) + + def test_n_negative_int(self): + shape = (2, 2) + n = -1 + a_num = mk_0to1_array(num, shape) + with pytest.raises(NotImplementedError): + num.linalg.matrix_power(a_num, n) if __name__ == "__main__": diff --git a/tests/integration/test_solve.py b/tests/integration/test_solve.py index 30b569401..82a204889 100644 --- a/tests/integration/test_solve.py +++ b/tests/integration/test_solve.py @@ -22,23 +22,27 @@ SIZES = (8, 9, 255) RTOL = { - np.dtype("f"): 1e-1, - np.dtype("F"): 1e-1, - np.dtype("d"): 1e-5, - np.dtype("D"): 1e-5, + np.dtype(np.float32): 1e-1, + np.dtype(np.complex64): 1e-1, + np.dtype(np.float64): 1e-5, + np.dtype(np.complex128): 1e-5, } ATOL = { - np.dtype("f"): 1e-3, - np.dtype("F"): 1e-3, - np.dtype("d"): 1e-8, - np.dtype("D"): 1e-8, + np.dtype(np.float32): 1e-3, + np.dtype(np.complex64): 1e-3, + np.dtype(np.float64): 1e-8, + np.dtype(np.complex128): 1e-8, } @pytest.mark.parametrize("n", SIZES) -@pytest.mark.parametrize("a_dtype", ("f", "d", "F", "D")) -@pytest.mark.parametrize("b_dtype", ("f", "d", "F", "D")) +@pytest.mark.parametrize( + "a_dtype", (np.float32, np.float64, np.complex64, np.complex128) +) +@pytest.mark.parametrize( + "b_dtype", (np.float32, np.float64, np.complex64, np.complex128) +) def test_solve_1d(n, a_dtype, b_dtype): a = np.random.rand(n, n).astype(a_dtype) b = np.random.rand(n).astype(b_dtype) @@ -53,8 +57,12 @@ def test_solve_1d(n, a_dtype, b_dtype): @pytest.mark.parametrize("n", SIZES) -@pytest.mark.parametrize("a_dtype", ("f", "d", "F", "D")) -@pytest.mark.parametrize("b_dtype", ("f", "d", "F", "D")) +@pytest.mark.parametrize( + "a_dtype", (np.float32, np.float64, np.complex64, np.complex128) +) +@pytest.mark.parametrize( + "b_dtype", (np.float32, np.float64, np.complex64, np.complex128) +) def test_solve_2d(n, a_dtype, b_dtype): a = np.random.rand(n, n).astype(a_dtype) b = np.random.rand(n, n + 2).astype(b_dtype) @@ -80,6 +88,128 @@ def test_solve_corner_cases(): assert allclose(b, num.matmul(a, out)) +def test_solve_b_is_empty(): + a = num.random.rand(1, 1) + b = num.atleast_2d([]) + + out = num.linalg.solve(a, b) + assert np.array_equal(b, out) + + +@pytest.mark.parametrize("dtype", (np.int32, np.int64)) +def test_solve_dtype_int(dtype): + a = [[1, 4, 5], [2, 3, 1], [9, 5, 2]] + b = [1, 2, 3] + a_num = num.array(a).astype(dtype) + b_num = num.array(b).astype(dtype) + out = num.linalg.solve(a_num, b_num) + + rtol = RTOL[out.dtype] + atol = ATOL[out.dtype] + assert allclose( + b_num, num.matmul(a_num, out), rtol=rtol, atol=atol, check_dtype=False + ) + + +def test_solve_with_output(): + n = 8 + a = np.random.rand(n, n).astype(np.float32) + b = np.random.rand(n).astype(np.float32) + output = np.zeros((n,)).astype(np.float32) + + out = num.linalg.solve(a, b, out=output) + + rtol = RTOL[out.dtype] + atol = ATOL[out.dtype] + assert allclose( + b, num.matmul(a, out), rtol=rtol, atol=atol, check_dtype=False + ) + assert allclose( + b, num.matmul(a, output), rtol=rtol, atol=atol, check_dtype=False + ) + + +class TestSolveErrors: + def setup_method(self): + self.n = 3 + self.a = num.random.rand(self.n, self.n).astype(np.float64) + self.b = num.random.rand(self.n).astype(np.float64) + + def test_a_bad_dim(self): + a = num.random.rand(self.n).astype(np.float64) + msg = "Array must be at least two-dimensional" + with pytest.raises(num.linalg.LinAlgError, match=msg): + num.linalg.solve(a, self.b) + + a = 10 + msg = "Array must be at least two-dimensional" + with pytest.raises(num.linalg.LinAlgError, match=msg): + num.linalg.solve(a, self.b) + + def test_b_bad_dim(self): + b = 10 + msg = "Array must be at least one-dimensional" + with pytest.raises(num.linalg.LinAlgError, match=msg): + num.linalg.solve(self.a, b) + + def test_a_dim_greater_than_two(self): + a = num.random.rand(self.n, self.n, self.n).astype(np.float64) + b = num.random.rand(self.n, self.n).astype(np.float64) + with pytest.raises(NotImplementedError): + num.linalg.solve(a, b) + + def test_b_dim_greater_than_two(self): + a = num.random.rand(self.n, self.n).astype(np.float64) + b = num.random.rand(self.n, self.n, self.n).astype(np.float64) + with pytest.raises(NotImplementedError): + num.linalg.solve(a, b) + + def test_a_bad_dtype_float16(self): + a = self.a.astype(np.float16) + msg = "array type float16 is unsupported in linalg" + with pytest.raises(TypeError, match=msg): + num.linalg.solve(a, self.b) + + def test_b_bad_dtype_float16(self): + b = self.b.astype(np.float16) + msg = "array type float16 is unsupported in linalg" + with pytest.raises(TypeError, match=msg): + num.linalg.solve(self.a, b) + + def test_a_last_2_dims_not_square(self): + a = num.random.rand(self.n, self.n + 1).astype(np.float64) + msg = "Last 2 dimensions of the array must be square" + with pytest.raises(num.linalg.LinAlgError, match=msg): + num.linalg.solve(a, self.b) + + def test_a_b_mismatched_shape(self): + b = num.random.rand(self.n + 1).astype(np.float64) + with pytest.raises(ValueError): + num.linalg.solve(self.a, b) + + b = num.random.rand(self.n + 1, self.n).astype(np.float64) + with pytest.raises(ValueError): + num.linalg.solve(self.a, b) + + def test_output_mismatched_shape(self): + output = num.zeros((self.n + 1,)).astype(np.float64) + msg = "Output shape mismatch" + with pytest.raises(ValueError, match=msg): + num.linalg.solve(self.a, self.b, out=output) + + def test_output_mismatched_dtype(self): + output = num.zeros((self.n,)).astype(np.float32) + msg = "Output type mismatch" + with pytest.raises(TypeError, match=msg): + num.linalg.solve(self.a, self.b, out=output) + + def test_a_singular_matrix(self): + a = num.zeros((self.n, self.n)).astype(np.float64) + msg = "Singular matrix" + with pytest.raises(num.linalg.LinAlgError, match=msg): + num.linalg.solve(a, self.b) + + if __name__ == "__main__": import sys From 6cf5032faa39a463dd2f15b698403fcb9152b345 Mon Sep 17 00:00:00 2001 From: robinw0928 <104830875+robinw0928@users.noreply.github.com> Date: Wed, 7 Dec 2022 10:05:44 +0800 Subject: [PATCH 62/89] Enhance test_dot.py and test_multi_dot.py (#730) --- tests/integration/test_dot.py | 41 +++++++++++++++ tests/integration/test_multi_dot.py | 77 +++++++++++++++++++++++++++++ 2 files changed, 118 insertions(+) diff --git a/tests/integration/test_dot.py b/tests/integration/test_dot.py index 912cd9ed4..d0157cbf1 100644 --- a/tests/integration/test_dot.py +++ b/tests/integration/test_dot.py @@ -12,10 +12,13 @@ # See the License for the specific language governing permissions and # limitations under the License. # +import numpy as np import pytest from legate.core import LEGATE_MAX_DIM from utils.contractions import check_default +from utils.generators import mk_0to1_array +import cunumeric as num from cunumeric.utils import dot_modes @@ -31,6 +34,44 @@ def operation(lib, *args, **kwargs): check_default(name, modes, operation) +class TestDotErrors: + def setup_method(self): + self.A = mk_0to1_array(num, (5, 3)) + self.B = mk_0to1_array(num, (3, 2)) + + @pytest.mark.parametrize( + "shapeA", + ((3,), (4, 3), (5, 4, 3)), + ids=lambda shapeA: f"(shapeA={shapeA})", + ) + def test_a_b_invalid_shape(self, shapeA): + A = mk_0to1_array(num, shapeA) + B = mk_0to1_array(num, (2, 2)) + with pytest.raises(ValueError): + num.dot(A, B) + + @pytest.mark.parametrize( + "shape", ((5,), (2,), (5, 3)), ids=lambda shape: f"(shape={shape})" + ) + def test_out_invalid_shape(self, shape): + out = num.zeros(shape) + with pytest.raises(ValueError): + num.dot(self.A, self.B, out=out) + + @pytest.mark.xfail + @pytest.mark.parametrize( + "dtype", (np.float32, np.int64), ids=lambda dtype: f"(dtype={dtype})" + ) + def test_out_invalid_dtype(self, dtype): + # In Numpy, for np.float32 and np.int64, it raises ValueError + # In cuNumeric, + # for np.float32, it pass + # for np.int64, it raises TypeError: Unsupported type: int64 + out = np.zeros((5, 2), dtype=dtype) + with pytest.raises(ValueError): + np.dot(self.A, self.B, out=out) + + if __name__ == "__main__": import sys diff --git a/tests/integration/test_multi_dot.py b/tests/integration/test_multi_dot.py index ecba326ef..1c4ca3d05 100644 --- a/tests/integration/test_multi_dot.py +++ b/tests/integration/test_multi_dot.py @@ -66,6 +66,83 @@ def test_multi_dot(shapes): assert allclose(res_np, out) +class TestMultiDotErrors: + def setup_method(self): + A = mk_0to1_array(num, (2, 2)) + B = mk_0to1_array(num, (2, 2)) + C = mk_0to1_array(num, (2, 2)) + self.arrays = [A, B, C] + + def test_zero_array(self): + arrays = [] + msg = "at least two arrays" + with pytest.raises(ValueError, match=msg): + num.linalg.multi_dot(arrays) + + def test_one_array(self): + arrays = [num.array([[1, 2], [3, 4]])] + msg = "at least two arrays" + with pytest.raises(ValueError, match=msg): + num.linalg.multi_dot(arrays) + + def test_invalid_array_dim_zero(self): + A = num.array(3) + B = mk_0to1_array(num, (2, 2)) + C = mk_0to1_array(num, (2, 2)) + arrays = [A, B, C] + with pytest.raises(ValueError): + num.linalg.multi_dot(arrays) + + def test_invalid_array_dim_one(self): + A = mk_0to1_array(num, (2, 2)) + B = mk_0to1_array(num, (2,)) + C = mk_0to1_array(num, (2, 2)) + arrays = [A, B, C] + with pytest.raises(ValueError): + num.linalg.multi_dot(arrays) + + def test_invalid_array_dim_three(self): + A = mk_0to1_array(num, (2, 2, 2)) + B = mk_0to1_array(num, (2, 2, 2)) + C = mk_0to1_array(num, (2, 2, 2)) + arrays = [A, B, C] + with pytest.raises(ValueError): + num.linalg.multi_dot(arrays) + + def test_invalid_array_shape(self): + A = mk_0to1_array(num, (2, 2)) + B = mk_0to1_array(num, (3, 2)) + C = mk_0to1_array(num, (2, 2)) + arrays = [A, B, C] + with pytest.raises(ValueError): + num.linalg.multi_dot(arrays) + + def test_out_invalid_dim(self): + out = num.zeros((2,)) + with pytest.raises(ValueError): + num.linalg.multi_dot(self.arrays, out=out) + + @pytest.mark.xfail + def test_out_invalid_shape(self): + # In cuNumeric, it raises AssertionError + out = num.zeros((2, 1)) + with pytest.raises(ValueError): + num.linalg.multi_dot(self.arrays, out=out) + + @pytest.mark.xfail + @pytest.mark.parametrize( + "dtype", (np.float32, np.int64), ids=lambda dtype: f"(dtype={dtype})" + ) + def test_out_invalid_dtype(self, dtype): + # In Numpy, for np.float32 and np.int64, it raises ValueError + # In cuNumeric, + # for np.float32, it pass + # for np.int64, it raises TypeError: Unsupported type: int64 + out = num.zeros((2, 2), dtype=dtype) + with pytest.raises(ValueError): + num.linalg.multi_dot(self.arrays, out=out) + + if __name__ == "__main__": import sys From f1fe90aa351f140bf2fdd6b38f0be1bebcc8c2a9 Mon Sep 17 00:00:00 2001 From: xialu00 <110973296+xialu00@users.noreply.github.com> Date: Tue, 13 Dec 2022 09:26:43 +0800 Subject: [PATCH 63/89] Testcase enhance test_reduction.py and test_prod.py (#726) * add negative test case for test_convolve.py * add test case for test_astype.py * add test case for test_astype.py * fix bug * enhance test_bincount.py * enhance test_bincount.py * enhance test_cholesky.py * enhance test_reduction.py * enhance test_reduction.py * enhance test_reduction.py * enhance test_reduction.py * enhance test_reduction.py * enhance test_prod.py * fix bug tests/integration/test_prod.py --- tests/integration/test_2d_reduction.py | 49 --- tests/integration/test_3d_reduction.py | 38 --- tests/integration/test_prod.py | 332 ++++++++++++++++++++ tests/integration/test_reduction.py | 312 ++++++++++++++---- tests/integration/test_reduction_axis.py | 42 --- tests/integration/test_reduction_complex.py | 37 --- 6 files changed, 586 insertions(+), 224 deletions(-) delete mode 100644 tests/integration/test_2d_reduction.py delete mode 100644 tests/integration/test_3d_reduction.py create mode 100644 tests/integration/test_prod.py delete mode 100644 tests/integration/test_reduction_axis.py delete mode 100644 tests/integration/test_reduction_complex.py diff --git a/tests/integration/test_2d_reduction.py b/tests/integration/test_2d_reduction.py deleted file mode 100644 index 86a88d2f7..000000000 --- a/tests/integration/test_2d_reduction.py +++ /dev/null @@ -1,49 +0,0 @@ -# Copyright 2021-2022 NVIDIA Corporation -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -import numpy as np -import pytest -from utils.comparisons import allclose - -import cunumeric as num - - -def test_sum(): - anp = np.array([[1, 2, 3], [4, 5, 6]]) - a = num.array(anp) - r = a.sum(0) - assert np.array_equal(r, [5, 7, 9]) - - r = a.sum(1) - assert np.array_equal(r, [6, 15]) - - -def test_random(): - bnp = np.random.random((2, 3)) - b = num.array(bnp) - assert allclose(num.sum(b), np.sum(bnp)) - - -def test_randn(): - af = np.random.randn(4, 5) - bf = num.array(af) - assert allclose(af.mean(0), bf.mean(0)) - assert allclose(af.mean(), bf.mean()) - - -if __name__ == "__main__": - import sys - - sys.exit(pytest.main(sys.argv)) diff --git a/tests/integration/test_3d_reduction.py b/tests/integration/test_3d_reduction.py deleted file mode 100644 index 79b04d614..000000000 --- a/tests/integration/test_3d_reduction.py +++ /dev/null @@ -1,38 +0,0 @@ -# Copyright 2021-2022 NVIDIA Corporation -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -import numpy as np -import pytest -from utils.comparisons import allclose - -import cunumeric as num - -np.random.seed(42) - - -def test_sum(): - b = np.random.random((10, 12, 13)) - a = num.array(b) - assert allclose(a, b) - - lg_sum = num.sum(a) - np_sum = np.sum(b) - assert allclose(np_sum, lg_sum) - - -if __name__ == "__main__": - import sys - - sys.exit(pytest.main(sys.argv)) diff --git a/tests/integration/test_prod.py b/tests/integration/test_prod.py new file mode 100644 index 000000000..01b95cadc --- /dev/null +++ b/tests/integration/test_prod.py @@ -0,0 +1,332 @@ +# Copyright 2021-2022 NVIDIA Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +import random + +import numpy as np +import pytest +from utils.comparisons import allclose + +import cunumeric as num + +# numpy.prod(a, axis=None, dtype=None, out=None, keepdims=, +# initial=, where=) + +DIM = 5 +SIZES = [ + (0,), + (1), + (DIM), + (0, 1), + (1, 0), + (1, 1), + (1, DIM), + (DIM, 1), + (DIM, DIM), + (1, 0, 0), + (1, 1, 0), + (1, 0, 1), + (1, 1, 1), + (DIM, 1, 1), + (1, DIM, 1), + (1, 1, DIM), + (DIM, DIM, DIM), +] +SIZES_E2 = [ + (DIM), + (1, DIM), + (DIM, 1), + (DIM, DIM), + (DIM, 1, 1), + (1, DIM, 1), + (1, 1, DIM), + (DIM, DIM, DIM), +] +SIZE_E = [ + (1, 1), + (1, DIM), + (DIM, 1), + (1, 1, 1), + (DIM, 1, 1), + (1, DIM, 1), + (1, 1, DIM), +] +NO_EMPTY_SIZE = [ + (1), + (DIM), + (1, 1), + (1, DIM), + (DIM, 1), + (DIM, DIM), + (1, 1, 1), + (DIM, 1, 1), + (1, DIM, 1), + (1, 1, DIM), + (DIM, DIM, DIM), +] + +ARR = ([], [[]], [[], []], np.inf, np.Inf, -10.3, 0, 200, 5 + 8j) + +DTYPE = ["l", "L", "f", "e", "d"] +COMPLEX_TYPE = ["F"] +NEGATIVE_COMPLEX_TYPE = ["D"] +NEGATIVE_DTYPE = ["h", "i", "H", "I", "?", "b", "B"] + + +def to_dtype(s): + return str(np.dtype(s)) + + +class TestProdNegative(object): + """ + this class is to test negative cases + """ + + @pytest.mark.parametrize("arr", ARR) + def test_array(self, arr): + assert np.array_equal(np.prod(arr), num.prod(arr)) + + @pytest.mark.xfail + @pytest.mark.parametrize("dtype", NEGATIVE_DTYPE, ids=to_dtype) + def test_dtype_negative(self, dtype): + size = (5, 5, 5) + arr = np.random.random(size) * 10 + 2 + arr_np = np.array(arr, dtype=dtype) + arr_num = num.array(arr_np) + out_np = np.prod(arr_np) # Numpy return product of all datas + out_num = num.prod(arr_num) + # cuNumeric return an array with a different data + assert allclose(out_np, out_num) + + @pytest.mark.skip + @pytest.mark.parametrize("dtype", NEGATIVE_COMPLEX_TYPE, ids=to_dtype) + def test_dtype_complex_negative(self, dtype): + arr = (num.random.rand(5, 5) * 10 + 2) + ( + num.random.rand(5, 5) * 10 * 1.0j + 0.2j + ) + arr_np = np.array(arr, dtype=dtype) + arr_num = num.array(arr_np) + out_np = np.prod(arr_np) + out_num = num.prod(arr_num) + assert allclose(out_np, out_num) + + def test_axis_out_bound(self): + arr = [-1, 0, 1, 2, 10] + msg = r"bounds" + with pytest.raises(np.AxisError, match=msg): + num.prod(arr, axis=2) + + @pytest.mark.xfail + @pytest.mark.parametrize("axis", ((-1, 1), (0, 1), (1, 2), (0, 2))) + def test_axis_tuple(self, axis): + size = (5, 5, 5) + arr_np = np.random.random(size) * 10 + arr_num = num.array(arr_np) + out_np = np.prod(arr_np, axis=axis) + # cuNumeric raises NotImplementedError: + # Need support for reducing multiple dimensions. + # Numpy get results. + out_num = num.prod(arr_num, axis=axis) + assert allclose(out_np, out_num) + + def test_out_negative(self): + in_shape = (2, 3, 4) + out_shape = (2, 3, 3) + arr_num = num.random.random(in_shape) * 10 + arr_out = num.random.random(out_shape) * 10 + msg = r"shapes do not match" + with pytest.raises(ValueError, match=msg): + num.prod(arr_num, out=arr_out, axis=2) + + def test_keepdims(self): + in_shape = (2, 3, 4) + arr_num = num.random.random(in_shape) * 10 + arr_np = np.array(arr_num) + out_np = np.prod(arr_np, axis=2, keepdims=True) + out_num = num.prod(arr_num, axis=2, keepdims=True) + assert np.array_equal(out_np, out_num) + + @pytest.mark.xfail + def test_initial_scalar_list(self): + arr = [[1, 2], [3, 4]] + initial_value = [3] + + out_num = num.prod(arr, initial=initial_value) # array(72) + # Numpy raises ValueError: + # Input object to FillWithScalar is not a scalar + out_np = np.prod(arr, initial=initial_value) + + assert np.array_equal(out_np, out_num) + + def test_initial_list(self): + arr = [[1, 2], [3, 4]] + initial_value = [2, 3] + with pytest.raises(ValueError): + num.prod(arr, initial=initial_value) + + def test_initial_empty_array(self): + size = (1, 0) + arr_np = np.random.random(size) * 10 + arr_num = num.array(arr_np) + initial_value = random.uniform(-20.0, 20.0) + out_num = num.prod(arr_num, initial=initial_value) + out_np = np.prod(arr_np, initial=initial_value) + assert allclose(out_np, out_num) + + @pytest.mark.xfail + def test_where(self): + arr = [[1, 2], [3, 4]] + out_np = np.prod(arr, where=[False, True]) # return 8 + # cuNumeric raises NotImplementedError: + # the `where` parameter is currently not supported + out_num = num.prod(arr, where=[False, True]) + assert np.array_equal(out_np, out_num) + + +class TestProdPositive(object): + """ + this class is to test positive cases + """ + + @pytest.mark.parametrize("size", SIZES) + def test_basic(self, size): + arr_np = np.random.random(size) + arr_num = num.array(arr_np) + out_np = np.prod(arr_np) + out_num = np.prod(arr_num) + assert allclose(out_np, out_num) + + @pytest.mark.parametrize("dtype", DTYPE, ids=to_dtype) + def test_dtype(self, dtype): + size = (5, 5, 5) + arr = np.random.random(size) * 10 + arr_np = np.array(arr, dtype=dtype) + arr_num = num.array(arr_np) + out_np = np.prod(arr_np) + out_num = num.prod(arr_num) + assert allclose(out_np, out_num) + + @pytest.mark.parametrize("dtype", COMPLEX_TYPE, ids=to_dtype) + def test_dtype_complex(self, dtype): + arr = (num.random.rand(5, 5) * 10 + 2) + ( + num.random.rand(5, 5) * 10 * 1.0j + 0.2j + ) + arr_np = np.array(arr, dtype=dtype) + arr_num = num.array(arr_np) + out_np = np.prod(arr_np) + out_num = num.prod(arr_num) + assert allclose(out_np, out_num) + + @pytest.mark.parametrize("axis", (_ for _ in range(-2, 3, 1))) + def test_axis_basic(self, axis): + size = (5, 5, 5) + arr_np = np.random.random(size) * 10 + arr_num = num.array(arr_np) + out_num = num.prod(arr_num, axis=axis) + out_np = np.prod(arr_np, axis=axis) + assert allclose(out_np, out_num) + + @pytest.mark.parametrize("size", SIZES) + def test_out_basic(self, size): + arr_np = np.random.random(size) + arr_num = num.array(arr_np) + out_np = np.random.random(()) + out_num = num.random.random(()) + np.prod(arr_np, out=out_np) + num.prod(arr_num, out=out_num) + assert allclose(out_np, out_num) + + @pytest.mark.parametrize("size", SIZES) + def test_out_axis(self, size): + arr_np = np.random.random(size) + arr_num = num.array(arr_np) + ndim = arr_np.ndim + for axis in range(-ndim + 1, ndim, 1): + out_shape = () + if type(size) == tuple: + out_shape_list = list(size) + del out_shape_list[axis] + out_shape = tuple(out_shape_list) + out_np = np.random.random(out_shape) + out_num = num.random.random(out_shape) + np.prod(arr_np, out=out_np, axis=axis) + num.prod(arr_num, out=out_num, axis=axis) + assert allclose(out_np, out_num) + + @pytest.mark.xfail + @pytest.mark.parametrize("size", SIZES_E2) + def test_out_axis_dtype(self, size): + arr = np.random.random(size) * 10 + arr_np = np.array(arr, dtype=to_dtype("f")) + arr_num = num.array(arr, dtype=to_dtype("f")) + + ndim = arr_np.ndim + for axis in range(-ndim + 1, ndim, 1): + out_shape = () + if type(size) == tuple: + out_shape_list = list(size) + del out_shape_list[axis] + out_shape = tuple(out_shape_list) + out = np.random.random(out_shape) + + out_np = np.array(out, dtype=to_dtype("i")) + out_num = num.array(out, dtype=to_dtype("i")) + + np.prod(arr_np, out=out_np, axis=axis) + num.prod(arr_num, out=out_num, axis=axis) + + assert allclose(out_np, out_num) + + @pytest.mark.parametrize("size", SIZES) + def test_axis_keepdims_false(self, size): + arr_np = np.random.random(size) + arr_num = num.array(arr_np) + ndim = arr_np.ndim + for axis in range(-ndim + 1, ndim, 1): + out_np = np.prod(arr_np, axis=axis, keepdims=False) + out_num = num.prod(arr_num, axis=axis, keepdims=False) + assert allclose(out_np, out_num) + + @pytest.mark.xfail + @pytest.mark.parametrize("size", SIZE_E) + def test_axis_keepdims_true(self, size): + arr_np = np.random.random(size) + arr_num = num.array(arr_np) + ndim = arr_np.ndim + for axis in range(-ndim + 1, ndim, 1): + out_np = np.prod(arr_np, axis=axis, keepdims=True) + out_num = num.prod(arr_num, axis=axis, keepdims=True) + # in cunumeric/deferred/unary_reduction: + # if lhs_array.size == 1: + # > assert axes is None or len(axes) == rhs_array.ndim - ( + # 0 if keepdims else lhs_array.ndim + # ) + # E AssertionError + assert allclose(out_np, out_num) + + @pytest.mark.parametrize("size", NO_EMPTY_SIZE) + def test_initial(self, size): + arr_np = np.random.random(size) * 10 + arr_num = num.array(arr_np) + initial_value = random.uniform(-20.0, 20.0) + out_num = num.prod(arr_num, initial=initial_value) + out_np = np.prod(arr_np, initial=initial_value) + + assert allclose(out_np, out_num) + + +if __name__ == "__main__": + import sys + + sys.exit(pytest.main(sys.argv)) diff --git a/tests/integration/test_reduction.py b/tests/integration/test_reduction.py index a5c0ddad3..b406a10d7 100644 --- a/tests/integration/test_reduction.py +++ b/tests/integration/test_reduction.py @@ -12,6 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. # +import random import numpy as np import pytest @@ -19,85 +20,280 @@ import cunumeric as num +# numpy.sum(a, axis=None, dtype=None, out=None, keepdims=, +# initial=, where=) -def test_basic(): - x = num.array([]) - r = num.sum(x) - assert r == 0 +DIM = 5 +SIZES = [ + (0,), + (1), + (DIM), + (0, 1), + (1, 0), + (1, 1), + (1, DIM), + (DIM, 1), + (DIM, DIM), + (1, 0, 0), + (1, 1, 0), + (1, 0, 1), + (1, 1, 1), + (DIM, 1, 1), + (1, DIM, 1), + (1, 1, DIM), + (DIM, DIM, DIM), +] - x = num.array([1]) - r = num.sum(x) +NO_EMPTY_SIZE = [ + (1), + (DIM), + (1, 1), + (1, DIM), + (DIM, 1), + (DIM, DIM), + (1, 1, 1), + (DIM, 1, 1), + (1, DIM, 1), + (1, 1, DIM), + (DIM, DIM, DIM), +] - assert r == 1 +ARR = ([], [[]], [[], []], np.inf, np.Inf, -10.3, 0, 200, 5 + 8j) - x = num.eye(3) - r = num.sum(x) +DTYPE = ["l", "L", "f", "d"] +COMPLEX_TYPE = ["F", "D"] +NEGATIVE_DTYPE = ["h", "i", "H", "I", "e", "?", "b", "B"] - assert r == 3 - x = num.array([1, 2, 3, 4.0]) - r = num.sum(x) +def to_dtype(s): + return str(np.dtype(s)) - assert r == 10 - x = num.array([1, 2, 3, 4.0, 5.0]) - r = num.prod(x) - assert r == 120 +class TestSumNegative(object): + """ + this class is to test negative cases + """ + @pytest.mark.parametrize("arr", ARR) + def test_array(self, arr): + assert np.array_equal(np.sum(arr), num.sum(arr)) -def test_empty(): - assert np.array_equal(num.sum([]), np.sum([])) - assert np.array_equal(num.sum([[], []]), np.sum([[], []])) + @pytest.mark.xfail + @pytest.mark.parametrize("dtype", NEGATIVE_DTYPE, ids=to_dtype) + def test_dtype_negative(self, dtype): + size = (5, 5, 5) + arr = np.random.random(size) * 10 + arr_np = np.array(arr, dtype=dtype) + arr_num = num.array(arr_np) + out_np = np.sum(arr_np) # Numpy return sum of all datas + out_num = num.sum( + arr_num + ) # cuNumeric return an array with different data + assert allclose(out_np, out_num) + def test_axis_out_bound(self): + arr = [-1, 0, 1, 2, 10] + msg = r"bounds" + with pytest.raises(np.AxisError, match=msg): + num.sum(arr, axis=2) -def test_scalar(): - assert np.array_equal(num.sum(0), np.sum(0)) - assert np.array_equal(num.sum(1), np.sum(1)) + @pytest.mark.xfail + @pytest.mark.parametrize("axis", ((-1, 1), (0, 1), (1, 2), (0, 2))) + def test_axis_tuple(self, axis): + size = (5, 5, 5) + arr_np = np.random.random(size) * 10 + arr_num = num.array(arr_np) + out_np = np.sum(arr_np, axis=axis) + # cuNumeric raises NotImplementedError: + # 'Need support for reducing multiple dimensions' + # Numpy get results + out_num = num.sum(arr_num, axis=axis) + assert allclose(out_np, out_num) + def test_out_negative(self): + in_shape = (2, 3, 4) + out_shape = (2, 3, 3) + arr_num = num.random.random(in_shape) * 10 + arr_out = num.random.random(out_shape) * 10 + msg = r"shapes do not match" + with pytest.raises(ValueError, match=msg): + num.sum(arr_num, out=arr_out, axis=2) -def test_1d(): - assert np.array_equal(num.sum(num.array([0])), np.sum(np.array([0]))) - assert np.array_equal(num.sum([1]), np.sum([1])) + def test_keepdims(self): + in_shape = (2, 3, 4) + arr_num = num.random.random(in_shape) * 10 + arr_np = np.array(arr_num) + out_np = np.sum(arr_np, axis=2, keepdims=True) + out_num = num.sum(arr_num, axis=2, keepdims=True) + assert np.array_equal(out_np, out_num) - x = num.array([1, 0, 2, -1, 0, 0, 8]) - x_np = np.array([1, 0, 2, -1, 0, 0, 8]) - assert np.array_equal(num.sum(x), np.sum(x_np)) + @pytest.mark.xfail + def test_initial_scalar_list(self): + arr = [[1, 2], [3, 4]] + initial_value = [3] + out_num = num.sum(arr, initial=initial_value) # array(13) + out_np = np.sum( + arr, initial=initial_value + ) # ValueError: Input object to FillWithScalar is not a scalar + assert np.array_equal(out_np, out_num) + def test_initial_list(self): + arr = [[1, 2], [3, 4]] + initial_value = [2, 3] + with pytest.raises(ValueError): + num.sum(arr, initial=initial_value) -def test_2d(): - x = num.array([[0, 1, 0], [2, 0, 3]]) - x_np = np.array([[0, 1, 0], [2, 0, 3]]) - assert np.array_equal(num.sum(x), np.sum(x_np)) + @pytest.mark.xfail + def test_initial_empty_array(self): + size = (1, 0) + arr_np = np.random.random(size) * 10 + arr_num = num.array(arr_np) + initial_value = random.uniform(-20.0, 20.0) + out_num = num.sum(arr_num, initial=initial_value) # return 0.0 + out_np = np.sum(arr_np, initial=initial_value) # return initial_value + assert allclose(out_np, out_num) - x = num.eye(3) - x_np = np.eye(3) - assert np.array_equal(num.sum(x), np.sum(x_np)) + @pytest.mark.xfail + def test_where(self): + arr = [[1, 2], [3, 4]] + out_np = np.sum(arr, where=[False, True]) # return 6 + # cuNumeric raises NotImplementedError: + # "the `where` parameter is currently not supported" + out_num = num.sum(arr, where=[False, True]) + assert np.array_equal(out_np, out_num) -def test_3d(): - x = num.array( - [ - [[0, 1], [1, 1], [7, 0], [1, 0], [0, 1]], - [[3, 0], [0, 3], [0, 0], [2, 2], [0, 19]], - ] - ) - x_np = np.array( - [ - [[0, 1], [1, 1], [7, 0], [1, 0], [0, 1]], - [[3, 0], [0, 3], [0, 0], [2, 2], [0, 19]], - ] - ) - assert np.array_equal(num.sum(x, axis=0), np.sum(x_np, axis=0)) - assert np.array_equal(num.sum(x, axis=1), np.sum(x_np, axis=1)) - assert np.array_equal(num.sum(x, axis=2), np.sum(x_np, axis=2)) - assert np.array_equal(num.sum(x), np.sum(x_np)) +class TestSumPositive(object): + """ + this class is to test positive cases + """ - x_np = np.concatenate((x_np,) * 2000, axis=1) - x = num.array(x_np) - assert np.array_equal(num.sum(x, axis=0), np.sum(x_np, axis=0)) - assert np.array_equal(num.sum(x, axis=1), np.sum(x_np, axis=1)) - assert np.array_equal(num.sum(x, axis=2), np.sum(x_np, axis=2)) - assert np.array_equal(num.sum(x), np.sum(x_np)) + @pytest.mark.parametrize("size", SIZES) + def test_basic(self, size): + arr_np = np.random.random(size) + arr_num = num.array(arr_np) + out_np = np.sum(arr_np) + out_num = np.sum(arr_num) + assert allclose(out_np, out_num) + + @pytest.mark.parametrize("dtype", DTYPE, ids=to_dtype) + def test_dtype(self, dtype): + size = (5, 5, 5) + arr = np.random.random(size) * 10 + arr_np = np.array(arr, dtype=dtype) + arr_num = num.array(arr_np) + out_np = np.sum(arr_np) + out_num = num.sum(arr_num) + assert allclose(out_np, out_num) + + @pytest.mark.parametrize("dtype", COMPLEX_TYPE, ids=to_dtype) + def test_dtype_complex(self, dtype): + arr = num.random.rand(5, 5) * 10 + num.random.rand(5, 5) * 10 * 1.0j + arr_np = np.array(arr, dtype=dtype) + arr_num = num.array(arr_np) + out_np = np.sum(arr_np) + out_num = num.sum(arr_num) + assert allclose(out_np, out_num) + + @pytest.mark.parametrize("axis", (_ for _ in range(-2, 3, 1))) + def test_axis_basic(self, axis): + size = (5, 5, 5) + arr_np = np.random.random(size) * 10 + arr_num = num.array(arr_np) + out_num = num.sum(arr_num, axis=axis) + out_np = np.sum(arr_np, axis=axis) + assert allclose(out_np, out_num) + + @pytest.mark.parametrize("size", SIZES) + def test_out_basic(self, size): + arr_np = np.random.random(size) + arr_num = num.array(arr_np) + out_np = np.random.random(()) + out_num = num.random.random(()) + np.sum(arr_np, out=out_np) + num.sum(arr_num, out=out_num) + assert allclose(out_np, out_num) + + @pytest.mark.parametrize("size", SIZES) + def test_out_axis(self, size): + arr_np = np.random.random(size) + arr_num = num.array(arr_np) + ndim = arr_np.ndim + for axis in range(-ndim + 1, ndim, 1): + out_shape = () + if type(size) == tuple: + out_shape_list = list(size) + del out_shape_list[axis] + out_shape = tuple(out_shape_list) + out_np = np.random.random(out_shape) + out_num = num.random.random(out_shape) + np.sum(arr_np, out=out_np, axis=axis) + num.sum(arr_num, out=out_num, axis=axis) + assert allclose(out_np, out_num) + + @pytest.mark.xfail + @pytest.mark.parametrize("size", SIZES) + def test_out_axis_dtype(self, size): + arr = np.random.random(size) * 10 + arr_np = np.array(arr, dtype=to_dtype("f")) + arr_num = num.array(arr, dtype=to_dtype("f")) + + ndim = arr_np.ndim + for axis in range(-ndim + 1, ndim, 1): + out_shape = () + if type(size) == tuple: + out_shape_list = list(size) + del out_shape_list[axis] + out_shape = tuple(out_shape_list) + out = np.random.random(out_shape) + + out_np = np.array(out, dtype=to_dtype("i")) + out_num = num.array(out, dtype=to_dtype("i")) + + np.sum(arr_np, out=out_np, axis=axis) + num.sum(arr_num, out=out_num, axis=axis) + + # some data in the out_result are different + # out_np = array([[39, 23, 22, 37, 19], + # [21, 28, 29, 38, 24], + # [29, 25, 30, 27, 23], + # [24, 30, 22, 29, 22], + # [16, 15, 29, 22, 13]], dtype=int32) + # out_num = array([[38, 21, 20, 35, 17], + # [19, 25, 27, 37, 22], + # [27, 24, 29, 24, 22], + # [21, 27, 20, 26, 19], + # [13, 14, 26, 20, 12]], dtype=int32) + + assert allclose(out_np, out_num) + + @pytest.mark.xfail + @pytest.mark.parametrize("size", SIZES) + @pytest.mark.parametrize("keepdims", [False, True]) + def test_axis_keepdims(self, size, keepdims): + arr_np = np.random.random(size) + arr_num = num.array(arr_np) + ndim = arr_np.ndim + for axis in range(-ndim + 1, ndim, 1): + out_np = np.sum(arr_np, axis=axis, keepdims=keepdims) + out_num = num.sum(arr_num, axis=axis, keepdims=keepdims) + # in cunumeric/deferred/unary_reduction: + # if lhs_array.size == 1: + # > assert axes is None or len(axes) == rhs_array.ndim - ( + # 0 if keepdims else lhs_array.ndim + # ) + # E AssertionError + assert allclose(out_np, out_num) + + @pytest.mark.parametrize("size", NO_EMPTY_SIZE) + def test_initial(self, size): + arr_np = np.random.random(size) * 10 + arr_num = num.array(arr_np) + initial_value = random.uniform(-20.0, 20.0) + out_num = num.sum(arr_num, initial=initial_value) + out_np = np.sum(arr_np, initial=initial_value) + + assert allclose(out_np, out_num) def test_indexed(): diff --git a/tests/integration/test_reduction_axis.py b/tests/integration/test_reduction_axis.py deleted file mode 100644 index 6ae89f07c..000000000 --- a/tests/integration/test_reduction_axis.py +++ /dev/null @@ -1,42 +0,0 @@ -# Copyright 2021-2022 NVIDIA Corporation -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -from itertools import permutations - -import numpy as np -import pytest - -import cunumeric as num - - -def _sum(shape, axis, lib, dtype=None): - return lib.ones(shape).sum(axis=axis, dtype=dtype) - - -# Try various non-square shapes, to nudge the core towards trying many -# different partitionings. -@pytest.mark.parametrize("axis", range(3), ids=str) -@pytest.mark.parametrize("shape", permutations((3, 4, 5)), ids=str) -def test_3d(shape, axis): - assert np.array_equal(_sum(shape, axis, np), _sum(shape, axis, num)) - assert np.array_equal( - _sum(shape, axis, np, dtype="D"), _sum(shape, axis, num, dtype="D") - ) - - -if __name__ == "__main__": - import sys - - sys.exit(pytest.main(sys.argv)) diff --git a/tests/integration/test_reduction_complex.py b/tests/integration/test_reduction_complex.py deleted file mode 100644 index 0800246d0..000000000 --- a/tests/integration/test_reduction_complex.py +++ /dev/null @@ -1,37 +0,0 @@ -# Copyright 2021-2022 NVIDIA Corporation -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -import numpy as np -import pytest - -import cunumeric as num - -x = np.array([1 + 4j, 2 + 5j, 3 + 6j], np.complex64) - - -def test_sum(): - cx = num.array(x) - assert num.all(num.abs(num.sum(cx) - np.sum(x)) < 1e-5) - - -def test_prod(): - cx = num.array(x) - assert num.all(num.abs(num.prod(cx) - np.prod(x)) < 1e-5) - - -if __name__ == "__main__": - import sys - - sys.exit(pytest.main(sys.argv)) From a927aa9e55a68b5a01e0112fc52ead45950c4595 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Mon, 12 Dec 2022 22:04:23 -0800 Subject: [PATCH 64/89] [pre-commit.ci] pre-commit autoupdate (#738) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit updates: - [github.com/PyCQA/isort: 5.10.1 → 5.11.1](https://github.com/PyCQA/isort/compare/5.10.1...5.11.1) - [github.com/psf/black: 22.10.0 → 22.12.0](https://github.com/psf/black/compare/22.10.0...22.12.0) Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> --- .pre-commit-config.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index e929c0833..71a89944f 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -7,11 +7,11 @@ repos: pass_filenames: false args: ['cunumeric'] - repo: https://github.com/PyCQA/isort - rev: 5.10.1 + rev: 5.11.1 hooks: - id: isort - repo: https://github.com/psf/black - rev: 22.10.0 + rev: 22.12.0 hooks: - id: black - repo: https://github.com/PyCQA/flake8 From 69dfb0fb315b0277f13bab677c2bb0bd98c69d65 Mon Sep 17 00:00:00 2001 From: Marcin Zalewski Date: Tue, 13 Dec 2022 00:17:45 -0800 Subject: [PATCH 65/89] Pin conda packages to older versions WAR (#737) * Pin curand * Pin curand * Pin cusolver Co-authored-by: Marcin Zalewski --- conda/conda-build/meta.yaml | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/conda/conda-build/meta.yaml b/conda/conda-build/meta.yaml index 7a274a1d3..b1f2de956 100644 --- a/conda/conda-build/meta.yaml +++ b/conda/conda-build/meta.yaml @@ -104,7 +104,10 @@ requirements: # the nvcc requirement is necessary because it contains crt/host_config.h used by cuda runtime. This is a packaging bug that has been reported. - cuda-nvcc ={{ cuda_version }} # libcurand is used both in CPU and GPU builds - - libcurand-dev + # temporarily pin curand until problems are resolved + - libcurand-dev =10.3.0.86 + # the following line is only necessary for pinning curand + - libcurand =10.3.0.86 # cudart needed for CPU and GPU builds because of curand - cuda-cudart-dev ={{ cuda_version }} - python @@ -140,7 +143,7 @@ requirements: # - libcutensor >=1.3 - cutensor >=1.3 - libcublas - - libcusolver + - libcusolver =11.4.1.48-0 - libcufft {% endif %} - opt_einsum >=3.3 From 53778f348b89dae571e98e44345a61cf4e26b21f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Malte=20F=C3=B6rster?= <97973773+mfoerste4@users.noreply.github.com> Date: Wed, 14 Dec 2022 19:38:04 +0100 Subject: [PATCH 66/89] guard all2all from empty transfer (#727) --- src/cunumeric/sort/sort.cu | 44 +++++++++++++++++++++----------------- 1 file changed, 24 insertions(+), 20 deletions(-) diff --git a/src/cunumeric/sort/sort.cu b/src/cunumeric/sort/sort.cu index af931c807..c303eb1ab 100644 --- a/src/cunumeric/sort/sort.cu +++ b/src/cunumeric/sort/sort.cu @@ -1557,32 +1557,36 @@ void sample_sort_nccl_nd(SortPiece> local_sorted, // communicate all2all (in sort dimension) CHECK_NCCL(ncclGroupStart()); for (size_t r = 0; r < num_sort_ranks; r++) { - CHECK_NCCL(ncclSend(val_send_buffers[r].ptr(0), - size_send_total[r] * sizeof(VAL), - ncclInt8, - sort_ranks[r], - *comm, - stream)); - CHECK_NCCL(ncclRecv(merge_buffers[r].values.ptr(0), - merge_buffers[r].size * sizeof(VAL), - ncclInt8, - sort_ranks[r], - *comm, - stream)); + if (size_send_total[r] > 0) + CHECK_NCCL(ncclSend(val_send_buffers[r].ptr(0), + size_send_total[r] * sizeof(VAL), + ncclInt8, + sort_ranks[r], + *comm, + stream)); + if (merge_buffers[r].size > 0) + CHECK_NCCL(ncclRecv(merge_buffers[r].values.ptr(0), + merge_buffers[r].size * sizeof(VAL), + ncclInt8, + sort_ranks[r], + *comm, + stream)); } CHECK_NCCL(ncclGroupEnd()); if (argsort) { CHECK_NCCL(ncclGroupStart()); for (size_t r = 0; r < num_sort_ranks; r++) { - CHECK_NCCL(ncclSend( - idc_send_buffers[r].ptr(0), size_send_total[r], ncclInt64, sort_ranks[r], *comm, stream)); - CHECK_NCCL(ncclRecv(merge_buffers[r].indices.ptr(0), - merge_buffers[r].size, - ncclInt64, - sort_ranks[r], - *comm, - stream)); + if (size_send_total[r] > 0) + CHECK_NCCL(ncclSend( + idc_send_buffers[r].ptr(0), size_send_total[r], ncclInt64, sort_ranks[r], *comm, stream)); + if (merge_buffers[r].size > 0) + CHECK_NCCL(ncclRecv(merge_buffers[r].indices.ptr(0), + merge_buffers[r].size, + ncclInt64, + sort_ranks[r], + *comm, + stream)); } CHECK_NCCL(ncclGroupEnd()); } From 3177e5aecacba15f032b5500cf0bc369cdb0100c Mon Sep 17 00:00:00 2001 From: Manolis Papadakis Date: Sat, 17 Dec 2022 02:26:04 +0200 Subject: [PATCH 67/89] Clean up the basic build instructions (#741) --- BUILD.md | 24 +++++++++--------------- README.md | 4 ---- 2 files changed, 9 insertions(+), 19 deletions(-) diff --git a/BUILD.md b/BUILD.md index 589d7c016..e7183437c 100644 --- a/BUILD.md +++ b/BUILD.md @@ -15,36 +15,30 @@ limitations under the License. --> -# Dependencies +# Basic build Users must have a working installation of the [Legate Core](https://github.com/nv-legate/legate.core) -library prior to installing cuNumeric. +library prior to installing cuNumeric. **Installing cuNumeric by itself will not +automatically install Legate Core.** As for other dependencies, the Dependencies section on the [Legate Core build instructions](https://github.com/nv-legate/legate.core/blob/HEAD/BUILD.md) -also covers cuNumeric. +also covers cuNumeric, so no additional packages are required. -# Building for Users +Once Legate Core is installed, you can simply invoke `./install.py` from the +cuNumeric top-level directory. The build will automatically pick up the +configuration used when building Legate Core (e.g. the CUDA Toolkit directory). -cuNumeric provides the same source-based installation scripts as Legate Core (a -custom `install.py` script, that is backed by `pip install`). See the -[Legate Core build instructions](https://github.com/nv-legate/legate.core/blob/HEAD/BUILD.md) -for help on using these. - -Note: Installing cuNumeric by itself will *not* automatically install Legate Core. +# Advanced topics -# Building for Developers - -## Overview +## Building through pip & cmake cuNumeric uses the same cmake/scikit-build-based build workflow as Legate Core. See the [Legate Core build instructions](https://github.com/nv-legate/legate.core/blob/HEAD/BUILD.md) for an overview. -## Example - There are several examples in the `scripts` folder. We walk through the steps in `build-with-legate-separately-no-install.sh` here. diff --git a/README.md b/README.md index 62eecb153..d3973a60c 100644 --- a/README.md +++ b/README.md @@ -55,10 +55,6 @@ conda install -c nvidia -c conda-forge -c legate cunumeric The conda package is compatible with CUDA >= 11.4 (CUDA driver version >= r470), and Volta or later GPU architectures. -Docker image build scripts, as well as specialized install scripts for -supported clusters are available on the -[quickstart](https://github.com/nv-legate/quickstart) repo. - See [BUILD.md](BUILD.md) for instructions on building cuNumeric from source. ## Usage and Execution From a79079b43ca18e9a1f32b46f597e3ef0948852ac Mon Sep 17 00:00:00 2001 From: Manolis Papadakis Date: Mon, 19 Dec 2022 15:14:15 +0200 Subject: [PATCH 68/89] Refactor benchmarks (#567) * Centralize time() function in benchmark.py * Remove python2-isms * Centralize some argument parsing in benchmark.py * Use legate.timing in all benchmarks * Add warmup iterations to some more benchmarks * Missing argument * Put back samples argument in run_benchmark * Fix #iterations in einsum * Port added except from branch-22.12 * Cover new solve example * Avoid mixup of --centers with -cunumeric:test Co-authored-by: Manolis Papadakis --- examples/benchmark.py | 54 +++++++++++ examples/black_scholes.py | 32 ++---- examples/cg.py | 178 +++++++++++++++------------------- examples/einsum.py | 72 ++++---------- examples/gemm.py | 64 ++++++------ examples/indexing_routines.py | 21 +--- examples/jacobi.py | 101 ++++++------------- examples/kmeans.py | 26 ++--- examples/kmeans_slow.py | 26 ++--- examples/kmeans_sort.py | 32 ++---- examples/linreg.py | 74 ++++++-------- examples/logreg.py | 107 ++++++-------------- examples/lstm_backward.py | 31 ++---- examples/lstm_forward.py | 31 ++---- examples/lstm_full.py | 25 ++--- examples/richardson_lucy.py | 18 +--- examples/scan.py | 49 +--------- examples/solve.py | 16 ++- examples/sort.py | 53 +--------- examples/stencil.py | 89 +++++------------ examples/wgrad.py | 21 ++-- 21 files changed, 386 insertions(+), 734 deletions(-) diff --git a/examples/benchmark.py b/examples/benchmark.py index ec107f24f..1d0a6f73e 100644 --- a/examples/benchmark.py +++ b/examples/benchmark.py @@ -18,6 +18,60 @@ import math from functools import reduce +try: + from legate.timing import time +except (ImportError, RuntimeError): + from time import perf_counter_ns + + def time(): + return perf_counter_ns() / 1000.0 + + +# Add common arguments and parse +def parse_args(parser): + parser.add_argument( + "-b", + "--benchmark", + type=int, + default=1, + dest="benchmark", + help="number of times to benchmark this application (default 1 - " + "normal execution)", + ) + parser.add_argument( + "--package", + dest="package", + choices=["legate", "numpy", "cupy"], + type=str, + default="legate", + help="NumPy package to use", + ) + parser.add_argument( + "--cupy-allocator", + dest="cupy_allocator", + choices=["default", "off", "managed"], + type=str, + default="default", + help="cupy allocator to use", + ) + args, _ = parser.parse_known_args() + if args.package == "legate": + import cunumeric as np + elif args.package == "cupy": + import cupy as np + + if args.cupy_allocator == "off": + np.cuda.set_allocator(None) + print("Turning off memory pool") + elif args.cupy_allocator == "managed": + np.cuda.set_allocator( + np.cuda.MemoryPool(np.cuda.malloc_managed).malloc + ) + print("Using managed memory pool") + elif args.package == "numpy": + import numpy as np + return args, np + # A helper method for benchmarking applications def run_benchmark(f, samples, name, args): diff --git a/examples/black_scholes.py b/examples/black_scholes.py index aadcef456..d64e032d5 100644 --- a/examples/black_scholes.py +++ b/examples/black_scholes.py @@ -16,12 +16,8 @@ # import argparse -import datetime -import math -from benchmark import run_benchmark - -import cunumeric as np +from benchmark import parse_args, run_benchmark, time def generate_random(N, min, max, D): @@ -75,16 +71,11 @@ def black_scholes(S, X, T, R, V): def run_black_scholes(N, D): print("Running black scholes on %dK options..." % N) N *= 1000 - start = datetime.datetime.now() + start = time() S, X, T, R, V = initialize(N, D) - call, put = black_scholes(S, X, T, R, V) - # Check the result for NaNs to synchronize before stopping timing - call_sum = np.sum(call) - put_sum = np.sum(put) - assert not math.isnan(call_sum) and not math.isnan(put_sum) - stop = datetime.datetime.now() - delta = stop - start - total = delta.total_seconds() * 1000.0 + _, _ = black_scholes(S, X, T, R, V) + stop = time() + total = (stop - start) / 1000.0 print("Elapsed Time: " + str(total) + " ms") return total @@ -107,16 +98,9 @@ def run_black_scholes(N, D): dest="P", help="precision of the computation in bits", ) - parser.add_argument( - "-b", - "--benchmark", - type=int, - default=1, - dest="benchmark", - help="number of times to benchmark this application (default 1 - " - "normal execution)", - ) - args = parser.parse_args() + + args, np = parse_args(parser) + if args.P == 16: run_benchmark( run_black_scholes, diff --git a/examples/cg.py b/examples/cg.py index 50d7d1964..79721f7b0 100644 --- a/examples/cg.py +++ b/examples/cg.py @@ -17,15 +17,7 @@ import argparse -from benchmark import run_benchmark - -try: - from legate.timing import time -except (ImportError, RuntimeError): - from time import perf_counter_ns - - def time(): - return perf_counter_ns() / 1000.0 +from benchmark import parse_args, run_benchmark, time # This is technically dead code right now, but we'll keep it around in @@ -75,7 +67,28 @@ def generate_2D(N, corners): return A, b -def solve(A, b, conv_iters, max_iters, conv_threshold, verbose): +def check(A, x, b): + print("Checking result...") + if np.allclose(A.dot(x), b): + print("PASS!") + else: + print("FAIL!") + + +def run_cg( + N, + corners, + conv_iters, + max_iters, + warmup, + conv_threshold, + perform_check, + timing, + verbose, +): + # A, b = generate_random(N) + A, b = generate_2D(N, corners) + print("Solving system...") x = np.zeros(A.shape[1]) r = b - A.dot(x) @@ -86,7 +99,11 @@ def solve(A, b, conv_iters, max_iters, conv_threshold, verbose): max_iters = ( min(max_iters, b.shape[0]) if max_iters is not None else b.shape[0] ) - for i in range(max_iters): + + start = time() + for i in range(-warmup, max_iters): + if i == 0: + start = time() Ap = A.dot(p) alpha = rsold / (p.dot(Ap)) x = x + alpha * p @@ -94,9 +111,11 @@ def solve(A, b, conv_iters, max_iters, conv_threshold, verbose): rsnew = r.dot(r) # We only do the convergence test every conv_iters or on the last # iteration - if (i % conv_iters == 0 or i == (max_iters - 1)) and np.sqrt( - rsnew - ) < conv_threshold: + if ( + i >= 0 + and (i % conv_iters == 0 or i == (max_iters - 1)) + and np.sqrt(rsnew) < conv_threshold + ): converged = i break if verbose: @@ -104,11 +123,19 @@ def solve(A, b, conv_iters, max_iters, conv_threshold, verbose): beta = rsnew / rsold p = r + beta * p rsold = rsnew + stop = time() + if converged < 0: print("Convergence FAILURE!") else: print("Converged in %d iterations" % (converged)) - return x + if perform_check: + check(A, x, b) + + total = (stop - start) / 1000.0 + if timing: + print(f"Elapsed Time: {total} ms") + return total def precondition(A, N, corners): @@ -120,10 +147,22 @@ def precondition(A, N, corners): return M -def preconditioned_solve( - A, M, b, conv_iters, max_iters, conv_threshold, verbose +def run_preconditioned_cg( + N, + corners, + conv_iters, + max_iters, + warmup, + conv_threshold, + perform_check, + timing, + verbose, ): print("Solving system with preconditioner...") + # A, b = generate_random(N) + A, b = generate_2D(N, corners) + M = precondition(A, N, corners) + x = np.zeros(A.shape[1]) r = b - A.dot(x) z = M.dot(r) @@ -134,7 +173,11 @@ def preconditioned_solve( max_iters = ( min(max_iters, b.shape[0]) if max_iters is not None else b.shape[0] ) - for i in range(max_iters): + + start = time() + for i in range(-warmup, max_iters): + if i == 0: + start = time() Ap = A.dot(p) alpha = rzold / (p.dot(Ap)) x = x + alpha * p @@ -142,9 +185,11 @@ def preconditioned_solve( rznew = r.dot(r) # We only do the convergence test every conv_iters or on the # last iteration - if (i % conv_iters == 0 or i == (max_iters - 1)) and np.sqrt( - rznew - ) < conv_threshold: + if ( + i >= 0 + and (i % conv_iters == 0 or i == (max_iters - 1)) + and np.sqrt(rznew) < conv_threshold + ): converged = i break if verbose: @@ -154,45 +199,15 @@ def preconditioned_solve( beta = rznew / rzold p = z + beta * p rzold = rznew + stop = time() + if converged < 0: print("Convergence FAILURE!") else: print("Converged in %d iterations" % (converged)) - return x - - -def check(A, x, b): - print("Checking result...") - if np.allclose(A.dot(x), b): - print("PASS!") - else: - print("FAIL!") - - -def run_cg( - N, - corners, - preconditioner, - conv_iters, - max_iters, - conv_threshold, - perform_check, - timing, - verbose, -): - # A, b = generate_random(N) - A, b = generate_2D(N, corners) - start = time() - if preconditioner: - M = precondition(A, N, corners) - x = preconditioned_solve( - A, M, b, conv_iters, max_iters, conv_threshold, verbose - ) - else: - x = solve(A, b, conv_iters, max_iters, conv_threshold, verbose) if perform_check: check(A, x, b) - stop = time() + total = (stop - start) / 1000.0 if timing: print(f"Elapsed Time: {total} ms") @@ -237,6 +252,14 @@ def run_cg( dest="max_iters", help="bound the maximum number of iterations", ) + parser.add_argument( + "-w", + "--warmup", + type=int, + default=5, + dest="warmup", + help="warm-up iterations", + ) parser.add_argument( "-n", "--num", @@ -259,15 +282,6 @@ def run_cg( action="store_true", help="print verbose output", ) - parser.add_argument( - "-b", - "--benchmark", - type=int, - default=1, - dest="benchmark", - help="number of times to benchmark this application (default 1 - " - "normal execution)", - ) parser.add_argument( "--threshold", type=float, @@ -275,51 +289,19 @@ def run_cg( dest="conv_threshold", help="convergence check threshold", ) - parser.add_argument( - "--package", - dest="package", - choices=["legate", "numpy", "cupy"], - type=str, - default="legate", - help="NumPy package to use (legate, numpy, or cupy)", - ) - parser.add_argument( - "--cupy-allocator", - dest="cupy_allocator", - choices=["default", "off", "managed"], - type=str, - default="default", - help="cupy allocator to use (default, off, or managed)", - ) - - args, _ = parser.parse_known_args() - - if args.package == "legate": - import cunumeric as np - elif args.package == "cupy": - import cupy as np - if args.cupy_allocator == "off": - np.cuda.set_allocator(None) - print("Turning off memory pool") - elif args.cupy_allocator == "managed": - np.cuda.set_allocator( - np.cuda.MemoryPool(np.cuda.malloc_managed).malloc - ) - print("Using managed memory pool") - elif args.package == "numpy": - import numpy as np + args, np = parse_args(parser) run_benchmark( - run_cg, + run_preconditioned_cg if args.precondition else run_cg, args.benchmark, "PreCG" if args.precondition else "CG", ( args.N, args.corners, - args.precondition, args.conv_iters, args.max_iters, + args.warmup, args.conv_threshold, args.check, args.timing, diff --git a/examples/einsum.py b/examples/einsum.py index 9990c46d7..aac1ec995 100644 --- a/examples/einsum.py +++ b/examples/einsum.py @@ -18,18 +18,10 @@ import argparse import re -from benchmark import run_benchmark +from benchmark import parse_args, run_benchmark, time -try: - from legate.timing import time -except (ImportError, RuntimeError): - from time import perf_counter_ns - def time(): - return perf_counter_ns() / 1000.0 - - -def run_einsum(expr, N, iters, dtype, cupy_compatibility): +def run_einsum(expr, N, iters, warmup, dtype, cupy_compatibility): # Parse contraction expression m = re.match(r"([a-zA-Z]*),([a-zA-Z]*)->([a-zA-Z]*)", expr) assert m is not None @@ -91,7 +83,9 @@ def run_einsum(expr, N, iters, dtype, cupy_compatibility): # Run contraction start = time() - for _ in range(iters): + for idx in range(iters + warmup): + if idx == warmup: + start = time() if cupy_compatibility: C = np.einsum(expr, A, B) else: @@ -144,6 +138,14 @@ def run_einsum(expr, N, iters, dtype, cupy_compatibility): dest="iters", help="number of iterations to run", ) + parser.add_argument( + "-w", + "--warmup", + type=int, + default=5, + dest="warmup", + help="warm-up iterations", + ) parser.add_argument( "-t", "--dtype", @@ -152,31 +154,6 @@ def run_einsum(expr, N, iters, dtype, cupy_compatibility): dest="dtype", help="dtype for array elements", ) - parser.add_argument( - "-b", - "--benchmark", - type=int, - default=1, - dest="benchmark", - help="number of times to benchmark this application (default 1 - " - "normal execution)", - ) - parser.add_argument( - "--package", - dest="package", - choices=["legate", "numpy", "cupy"], - type=str, - default="legate", - help="NumPy package to use (legate, numpy, or cupy)", - ) - parser.add_argument( - "--cupy-allocator", - dest="cupy_allocator", - choices=["default", "off", "managed"], - type=str, - default="default", - help="cupy allocator to use (default, off, or managed)", - ) parser.add_argument( "--cupy-compatibility", action="store_true", @@ -185,25 +162,9 @@ def run_einsum(expr, N, iters, dtype, cupy_compatibility): else, use einsum(expr, A, B, out=C)""", ) - args, _ = parser.parse_known_args() - - cupy_compatibility = args.cupy_compatibility - if args.package == "legate": - import cunumeric as np - elif args.package == "cupy": - import cupy as np - - if args.cupy_allocator == "off": - np.cuda.set_allocator(None) - print("Turning off memory pool") - elif args.cupy_allocator == "managed": - np.cuda.set_allocator( - np.cuda.MemoryPool(np.cuda.malloc_managed).malloc - ) - print("Using managed memory pool") - cupy_compatibility = True - elif args.package == "numpy": - import numpy as np + args, np = parse_args(parser) + + cupy_compatibility = args.cupy_compatibility or args.package == "cupy" if cupy_compatibility: print("Use C = np.einsum(expr, A, B) for cupy compatibility") @@ -222,6 +183,7 @@ def run_einsum(expr, N, iters, dtype, cupy_compatibility): args.expr, args.N, args.iters, + args.warmup, dtypes[args.dtype], cupy_compatibility, ), diff --git a/examples/gemm.py b/examples/gemm.py index 409d43ece..2fe8aafc3 100644 --- a/examples/gemm.py +++ b/examples/gemm.py @@ -16,12 +16,8 @@ # import argparse -import datetime -import math -from benchmark import run_benchmark - -import cunumeric as np +from benchmark import parse_args, run_benchmark, time def initialize(M, N, K, ft): @@ -39,7 +35,7 @@ def total_space(M, N, K, ft): return (M * N + M * K + K * N) * np.dtype(ft).itemsize -def run_gemm(N, I, ft): # noqa: E741 +def run_gemm(N, I, warmup, ft): # noqa: E741 print("Problem Size: M=" + str(N) + " N=" + str(N) + " K=" + str(N)) print("Total Iterations: " + str(I)) flops = total_flops(N, N, N) @@ -47,25 +43,21 @@ def run_gemm(N, I, ft): # noqa: E741 space = total_space(N, N, N, ft) print("Total Size: " + str(space / 1e6) + " MB") A, B, C = initialize(N, N, N, ft) - # Compute some sums and check for NaNs to force synchronization - # before we start the timing - assert not math.isnan(np.sum(A)) - assert not math.isnan(np.sum(B)) - assert not math.isnan(np.sum(C)) - start = datetime.datetime.now() + + start = time() # Run for as many iterations as was requested - for idx in range(I): + for idx in range(I + warmup): + if idx == warmup: + start = time() np.dot(A, B, out=C) # We need to rotate the matrices to keep Legate honest # about moving data so it can't just duplicate A and B # on the first iteration and reuse them, this means # that A, B, C all need to be square A, B, C = B, C, A - # Do another sum to synchronize for timings, B is last output - assert not math.isnan(np.sum(B)) - stop = datetime.datetime.now() - delta = stop - start - total = delta.total_seconds() * 1000.0 + stop = time() + + total = (stop - start) / 1000.0 print("Elapsed Time: " + str(total) + " ms") average = total / I print("Average GEMM: " + str(average) + " ms") @@ -83,6 +75,14 @@ def run_gemm(N, I, ft): # noqa: E741 dest="I", help="number of iterations to run", ) + parser.add_argument( + "-w", + "--warmup", + type=int, + default=5, + dest="warmup", + help="warm-up iterations", + ) parser.add_argument( "-n", "--num", @@ -100,27 +100,29 @@ def run_gemm(N, I, ft): # noqa: E741 help="number of bits of precision to use for the gemm computation " "(16,32,64)", ) - parser.add_argument( - "-b", - "--benchmark", - type=int, - default=1, - dest="benchmark", - help="number of times to benchmark this application (default 1 - " - "normal execution)", - ) - args = parser.parse_args() + + args, np = parse_args(parser) + if args.P == 16: run_benchmark( - run_gemm, args.benchmark, "HGEMM", (args.N, args.I, np.float16) + run_gemm, + args.benchmark, + "HGEMM", + (args.N, args.I, args.warmup, np.float16), ) elif args.P == 32: run_benchmark( - run_gemm, args.benchmark, "SGEMM", (args.N, args.I, np.float32) + run_gemm, + args.benchmark, + "SGEMM", + (args.N, args.I, args.warmup, np.float32), ) elif args.P == 64: run_benchmark( - run_gemm, args.benchmark, "DGEMM", (args.N, args.I, np.float64) + run_gemm, + args.benchmark, + "DGEMM", + (args.N, args.I, args.warmup, np.float64), ) else: raise TypeError("Precision must be one of 16, 32, or 64") diff --git a/examples/indexing_routines.py b/examples/indexing_routines.py index 3d275e49f..2e7f40301 100644 --- a/examples/indexing_routines.py +++ b/examples/indexing_routines.py @@ -15,16 +15,11 @@ # limitations under the License. # -from __future__ import print_function - import argparse import gc import math -from benchmark import run_benchmark -from legate.timing import time - -import cunumeric as np +from benchmark import parse_args, run_benchmark, time def compute_diagonal(steps, N, timing, warmup): @@ -264,15 +259,6 @@ def run_indexing_routines( action="store_true", help="print verbose output", ) - parser.add_argument( - "-b", - "--benchmark", - type=int, - default=1, - dest="benchmark", - help="number of times to benchmark this application (default 1 - " - "normal execution)", - ) parser.add_argument( "-r", "--routine", @@ -281,8 +267,9 @@ def run_indexing_routines( choices=["diagonal", "choose", "repeat", "ai1", "ai2", "ai3", "all"], help="name of the index routine to test", ) - args, unknown = parser.parse_known_args() - print("Warning, unrecognized arguments: ", unknown) + + args, np = parse_args(parser) + run_benchmark( run_indexing_routines, args.benchmark, diff --git a/examples/jacobi.py b/examples/jacobi.py index 56cf8aa90..f4e42081d 100644 --- a/examples/jacobi.py +++ b/examples/jacobi.py @@ -16,17 +16,8 @@ # import argparse -import math -from benchmark import run_benchmark - -try: - from legate.timing import time -except (ImportError, RuntimeError): - from time import perf_counter_ns - - def time(): - return perf_counter_ns() / 1000.0 +from benchmark import parse_args, run_benchmark, time def generate_random(N): @@ -40,16 +31,6 @@ def generate_random(N): return A, b -def solve(A, b, iters, verbose): - print("Solving system...") - x = np.zeros(A.shape[1]) - d = np.diag(A) - R = A - np.diag(d) - for i in range(iters): - x = (b - np.dot(R, x)) / d - return x - - def check(A, x, b): print("Checking result...") if np.allclose(A.dot(x), b): @@ -58,16 +39,24 @@ def check(A, x, b): print("FAIL!") -def run_jacobi(N, iters, perform_check, timing, verbose): +def run_jacobi(N, iters, warmup, perform_check, timing, verbose): A, b = generate_random(N) + + print("Solving system...") + x = np.zeros(A.shape[1]) + d = np.diag(A) + R = A - np.diag(d) + start = time() - x = solve(A, b, iters, verbose) + for i in range(iters + warmup): + if i == warmup: + start = time() + x = (b - np.dot(R, x)) / d + stop = time() + if perform_check: check(A, x, b) - else: - # Need a synchronization here for timing - assert not math.isnan(np.sum(x)) - stop = time() + total = (stop - start) / 1000.0 if timing: print(f"Elapsed Time: {total} ms") @@ -90,6 +79,14 @@ def run_jacobi(N, iters, perform_check, timing, verbose): dest="iters", help="number of iterations to run", ) + parser.add_argument( + "-w", + "--warmup", + type=int, + default=5, + dest="warmup", + help="warm-up iterations", + ) parser.add_argument( "-n", "--num", @@ -112,53 +109,19 @@ def run_jacobi(N, iters, perform_check, timing, verbose): action="store_true", help="print verbose output", ) - parser.add_argument( - "-b", - "--benchmark", - type=int, - default=1, - dest="benchmark", - help="number of times to benchmark this application (default 1 - " - "normal execution)", - ) - parser.add_argument( - "--package", - dest="package", - choices=["legate", "numpy", "cupy"], - type=str, - default="legate", - help="NumPy package to use (legate, numpy, or cupy)", - ) - parser.add_argument( - "--cupy-allocator", - dest="cupy_allocator", - choices=["default", "off", "managed"], - type=str, - default="default", - help="cupy allocator to use (default, off, or managed)", - ) - - args, _ = parser.parse_known_args() - - if args.package == "legate": - import cunumeric as np - elif args.package == "cupy": - import cupy as np - if args.cupy_allocator == "off": - np.cuda.set_allocator(None) - print("Turning off memory pool") - elif args.cupy_allocator == "managed": - np.cuda.set_allocator( - np.cuda.MemoryPool(np.cuda.malloc_managed).malloc - ) - print("Using managed memory pool") - elif args.package == "numpy": - import numpy as np + args, np = parse_args(parser) run_benchmark( run_jacobi, args.benchmark, "Jacobi", - (args.N, args.iters, args.check, args.timing, args.verbose), + ( + args.N, + args.iters, + args.warmup, + args.check, + args.timing, + args.verbose, + ), ) diff --git a/examples/kmeans.py b/examples/kmeans.py index 736b7af58..a64495e7e 100644 --- a/examples/kmeans.py +++ b/examples/kmeans.py @@ -18,11 +18,8 @@ # Derived from https://github.com/bryancatanzaro/kmeans import argparse -import datetime -from benchmark import run_benchmark - -import cunumeric as np +from benchmark import parse_args, run_benchmark, time def initialize(N, D, C, T): @@ -80,7 +77,7 @@ def run_kmeans(C, D, T, I, N, S, benchmarking): # noqa: E741 print("Number of dimensions: " + str(D)) print("Number of centroids: " + str(C)) print("Max iterations: " + str(I)) - start = datetime.datetime.now() + start = time() data, centroids = initialize(N, D, C, T) data_dots = np.square(np.linalg.norm(data, ord=2, axis=1)) @@ -128,9 +125,8 @@ def run_kmeans(C, D, T, I, N, S, benchmarking): # noqa: E741 + ": " + str(prior_distance_sum) ) - stop = datetime.datetime.now() - delta = stop - start - total = delta.total_seconds() * 1000.0 + stop = time() + total = (stop - start) / 1000.0 print("Elapsed Time: " + str(total) + " ms") return total @@ -138,7 +134,6 @@ def run_kmeans(C, D, T, I, N, S, benchmarking): # noqa: E741 if __name__ == "__main__": parser = argparse.ArgumentParser() parser.add_argument( - "-c", "--centers", type=int, default=10, @@ -185,16 +180,9 @@ def run_kmeans(C, D, T, I, N, S, benchmarking): # noqa: E741 dest="S", help="number of iterations between sampling the log likelihood", ) - parser.add_argument( - "-b", - "--benchmark", - type=int, - default=1, - dest="benchmark", - help="number of times to benchmark this application" - " (default 1 - normal execution)", - ) - args = parser.parse_args() + + args, np = parse_args(parser) + if args.P == 16: run_benchmark( run_kmeans, diff --git a/examples/kmeans_slow.py b/examples/kmeans_slow.py index 8727fa7d2..83f226af3 100644 --- a/examples/kmeans_slow.py +++ b/examples/kmeans_slow.py @@ -18,11 +18,8 @@ # Derived from https://github.com/bryancatanzaro/kmeans import argparse -import datetime -from benchmark import run_benchmark - -import cunumeric as np +from benchmark import parse_args, run_benchmark, time def initialize(N, D, C, T): @@ -81,7 +78,7 @@ def run_kmeans(C, D, T, I, N, S, benchmarking): # noqa: E741 print("Number of dimensions: " + str(D)) print("Number of centroids: " + str(C)) print("Max iterations: " + str(I)) - start = datetime.datetime.now() + start = time() data, centroids = initialize(N, D, C, T) data_dots = np.square(np.linalg.norm(data, ord=2, axis=1)) @@ -129,9 +126,8 @@ def run_kmeans(C, D, T, I, N, S, benchmarking): # noqa: E741 + ": " + str(prior_distance_sum) ) - stop = datetime.datetime.now() - delta = stop - start - total = delta.total_seconds() * 1000.0 + stop = time() + total = (stop - start) / 1000.0 print("Elapsed Time: " + str(total) + " ms") return total @@ -139,7 +135,6 @@ def run_kmeans(C, D, T, I, N, S, benchmarking): # noqa: E741 if __name__ == "__main__": parser = argparse.ArgumentParser() parser.add_argument( - "-c", "--centers", type=int, default=10, @@ -186,16 +181,9 @@ def run_kmeans(C, D, T, I, N, S, benchmarking): # noqa: E741 dest="S", help="number of iterations between sampling the log likelihood", ) - parser.add_argument( - "-b", - "--benchmark", - type=int, - default=1, - dest="benchmark", - help="number of times to benchmark this application (default 1 - " - "normal execution)", - ) - args = parser.parse_args() + + args, np = parse_args(parser) + if args.P == 16: run_benchmark( run_kmeans, diff --git a/examples/kmeans_sort.py b/examples/kmeans_sort.py index b848b54e0..406b02833 100644 --- a/examples/kmeans_sort.py +++ b/examples/kmeans_sort.py @@ -18,16 +18,8 @@ # Derived from https://github.com/bryancatanzaro/kmeans import argparse -import datetime -from benchmark import run_benchmark - -import cunumeric as np - -try: - xrange -except NameError: - xrange = range +from benchmark import parse_args, run_benchmark, time def initialize(N, D, C, T): @@ -68,7 +60,7 @@ def find_centroids(data, labels, C, D): # sum across them to create the centroids centroids = np.empty((C, D), dtype=data.dtype) ragged_arrays = np.split(sorted_points, indexes) - for idx in xrange(C): + for idx in range(C): centroids[idx, :] = np.sum(ragged_arrays[idx], axis=0) # To avoid introducing divide by zero errors # If a centroid has no weight, we'll do no normalization @@ -83,7 +75,7 @@ def run_kmeans(C, D, T, I, N, S, benchmarking): # noqa: E741 print("Number of dimensions: " + str(D)) print("Number of centroids: " + str(C)) print("Max iterations: " + str(I)) - start = datetime.datetime.now() + start = time() data, centroids = initialize(N, D, C, T) data_dots = np.square(np.linalg.norm(data, ord=2, axis=1)) @@ -130,9 +122,8 @@ def run_kmeans(C, D, T, I, N, S, benchmarking): # noqa: E741 + ": " + str(prior_distance_sum) ) - stop = datetime.datetime.now() - delta = stop - start - total = delta.total_seconds() * 1000.0 + stop = time() + total = (stop - start) / 1000.0 print("Elapsed Time: " + str(total) + " ms") return total @@ -187,16 +178,9 @@ def run_kmeans(C, D, T, I, N, S, benchmarking): # noqa: E741 dest="S", help="number of iterations between sampling the log likelihood", ) - parser.add_argument( - "-b", - "--benchmark", - type=int, - default=1, - dest="benchmark", - help="number of times to benchmark this application (default 1 - " - "normal execution)", - ) - args = parser.parse_args() + + args, np = parse_args(parser) + if args.P == 16: run_benchmark( run_kmeans, diff --git a/examples/linreg.py b/examples/linreg.py index bce2fff58..a8e684e32 100644 --- a/examples/linreg.py +++ b/examples/linreg.py @@ -16,12 +16,8 @@ # import argparse -import datetime -import math -from benchmark import run_benchmark - -import cunumeric as np +from benchmark import parse_args, run_benchmark, time def initialize(N, F, T): @@ -32,45 +28,37 @@ def initialize(N, F, T): return x, y -def linear_regression( - T, features, target, steps, learning_rate, sample, add_intercept=False -): - if add_intercept: +def run_linear_regression(N, F, T, I, warmup, S, B): # noqa: E741 + print("Running linear regression...") + print("Number of data points: " + str(N) + "K") + print("Number of features: " + str(F)) + print("Number of iterations: " + str(I)) + + learning_rate = 1e-5 + features, target = initialize(N * 1000, F, T) + if B: intercept = np.ones((features.shape[0], 1), dtype=T) features = np.hstack((intercept, features)) - weights = np.zeros(features.shape[1], dtype=T) - for step in range(steps): + start = time() + for step in range(-warmup, I): + if step == 0: + start = time() scores = np.dot(features, weights) error = scores - target gradient = -(1.0 / len(features)) * error.dot(features) weights += learning_rate * gradient - - if step % sample == 0: + if step >= 0 and step % S == 0: print( "Error of step " + str(step) + ": " + str(np.sum(np.power(error, 2))) ) + stop = time() - return weights - - -def run_linear_regression(N, F, T, I, S, B): # noqa: E741 - print("Running linear regression...") - print("Number of data points: " + str(N) + "K") - print("Number of features: " + str(F)) - print("Number of iterations: " + str(I)) - start = datetime.datetime.now() - features, target = initialize(N * 1000, F, T) - weights = linear_regression(T, features, target, I, 1e-5, S, B) - # Check the weights for NaNs to synchronize before stopping timing - assert not math.isnan(np.sum(weights)) - stop = datetime.datetime.now() - delta = stop - start - total = delta.total_seconds() * 1000.0 + total = (stop - start) / 1000.0 print("Elapsed Time: " + str(total) + " ms") return total @@ -78,7 +66,7 @@ def run_linear_regression(N, F, T, I, S, B): # noqa: E741 if __name__ == "__main__": parser = argparse.ArgumentParser() parser.add_argument( - "-b", + "-B", "--intercept", dest="B", action="store_true", @@ -100,6 +88,14 @@ def run_linear_regression(N, F, T, I, S, B): # noqa: E741 dest="I", help="number of iterations to run the algorithm for", ) + parser.add_argument( + "-w", + "--warmup", + type=int, + default=5, + dest="warmup", + help="warm-up iterations", + ) parser.add_argument( "-n", "--num", @@ -124,35 +120,29 @@ def run_linear_regression(N, F, T, I, S, B): # noqa: E741 dest="S", help="number of iterations between sampling the log likelihood", ) - parser.add_argument( - "--benchmark", - type=int, - default=1, - dest="benchmark", - help="number of times to benchmark this application (default 1 - " - "normal execution)", - ) - args = parser.parse_args() + + args, np = parse_args(parser) + if args.P == 16: run_benchmark( run_linear_regression, args.benchmark, "LINREG(H)", - (args.N, args.F, np.float16, args.I, args.S, args.B), + (args.N, args.F, np.float16, args.I, args.warmup, args.S, args.B), ) elif args.P == 32: run_benchmark( run_linear_regression, args.benchmark, "LINREG(S)", - (args.N, args.F, np.float32, args.I, args.S, args.B), + (args.N, args.F, np.float32, args.I, args.warmup, args.S, args.B), ) elif args.P == 64: run_benchmark( run_linear_regression, args.benchmark, "LINREG(D)", - (args.N, args.F, np.float64, args.I, args.S, args.B), + (args.N, args.F, np.float64, args.I, args.warmup, args.S, args.B), ) else: raise TypeError("Precision must be one of 16, 32, or 64") diff --git a/examples/logreg.py b/examples/logreg.py index 4e1abb209..43b0e62b0 100644 --- a/examples/logreg.py +++ b/examples/logreg.py @@ -16,17 +16,8 @@ # import argparse -import math -from benchmark import run_benchmark - -try: - from legate.timing import time -except (ImportError, RuntimeError): - from time import perf_counter_ns - - def time(): - return perf_counter_ns() / 1000.0 +from benchmark import parse_args, run_benchmark, time def initialize(N, F, T): @@ -47,45 +38,37 @@ def log_likelihood(features, target, weights): return np.sum(target * scores - np.log(1.0 + np.exp(scores))) -def logistic_regression( - T, features, target, steps, learning_rate, sample, add_intercept=False -): - if add_intercept: +def run_logistic_regression(N, F, T, I, warmup, S, B): # noqa: E741 + print("Running logistic regression...") + print("Number of data points: " + str(N) + "K") + print("Number of features: " + str(F)) + print("Number of iterations: " + str(I)) + + learning_rate = 1e-5 + features, target = initialize(N * 1000, F, T) + if B: intercept = np.ones((features.shape[0], 1), dtype=T) features = np.hstack((intercept, features)) - weights = np.zeros(features.shape[1], dtype=T) - for step in range(steps): + start = time() + for step in range(-warmup, I): + if step == 0: + start = time() scores = np.dot(features, weights) predictions = sigmoid(scores) - error = target - predictions gradient = np.dot(error, features) weights += learning_rate * gradient - - if step % sample == 0: + if step >= 0 and step % S == 0: print( "Log Likelihood of step " + str(step) + ": " + str(log_likelihood(features, target, weights)) ) - - return weights - - -def run_logistic_regression(N, F, T, I, S, B): # noqa: E741 - print("Running logistic regression...") - print("Number of data points: " + str(N) + "K") - print("Number of features: " + str(F)) - print("Number of iterations: " + str(I)) - features, target = initialize(N * 1000, F, T) - start = time() - weights = logistic_regression(T, features, target, I, 1e-5, S, B) stop = time() - # Check the weights for NaNs - assert not math.isnan(np.sum(weights)) + total = (stop - start) / 1000.0 print(f"Elapsed Time: {total} ms") return total @@ -94,7 +77,7 @@ def run_logistic_regression(N, F, T, I, S, B): # noqa: E741 if __name__ == "__main__": parser = argparse.ArgumentParser() parser.add_argument( - "-b", + "-B", "--intercept", dest="B", action="store_true", @@ -116,6 +99,14 @@ def run_logistic_regression(N, F, T, I, S, B): # noqa: E741 dest="I", help="number of iterations to run the algorithm for", ) + parser.add_argument( + "-w", + "--warmup", + type=int, + default=5, + dest="warmup", + help="warm-up iterations", + ) parser.add_argument( "-n", "--num", @@ -140,69 +131,29 @@ def run_logistic_regression(N, F, T, I, S, B): # noqa: E741 dest="S", help="number of iterations between sampling the log likelihood", ) - parser.add_argument( - "--benchmark", - type=int, - default=1, - dest="benchmark", - help="number of times to benchmark this application (default 1 - " - "normal execution)", - ) - parser.add_argument( - "--package", - dest="package", - choices=["legate", "numpy", "cupy"], - type=str, - default="legate", - help="NumPy package to use (legate, numpy, or cupy)", - ) - parser.add_argument( - "--cupy-allocator", - dest="cupy_allocator", - choices=["default", "off", "managed"], - type=str, - default="default", - help="cupy allocator to use (default, off, or managed)", - ) - - args, _ = parser.parse_known_args() - if args.package == "legate": - import cunumeric as np - elif args.package == "cupy": - import cupy as np - - if args.cupy_allocator == "off": - np.cuda.set_allocator(None) - print("Turning off memory pool") - elif args.cupy_allocator == "managed": - np.cuda.set_allocator( - np.cuda.MemoryPool(np.cuda.malloc_managed).malloc - ) - print("Using managed memory pool") - elif args.package == "numpy": - import numpy as np + args, np = parse_args(parser) if args.P == 16: run_benchmark( run_logistic_regression, args.benchmark, "LOGREG(H)", - (args.N, args.F, np.float16, args.I, args.S, args.B), + (args.N, args.F, np.float16, args.I, args.warmup, args.S, args.B), ) elif args.P == 32: run_benchmark( run_logistic_regression, args.benchmark, "LOGREG(S)", - (args.N, args.F, np.float32, args.I, args.S, args.B), + (args.N, args.F, np.float32, args.I, args.warmup, args.S, args.B), ) elif args.P == 64: run_benchmark( run_logistic_regression, args.benchmark, "LOGREG(D)", - (args.N, args.F, np.float64, args.I, args.S, args.B), + (args.N, args.F, np.float64, args.I, args.warmup, args.S, args.B), ) else: raise TypeError("Precision must be one of 16, 32, or 64") diff --git a/examples/lstm_backward.py b/examples/lstm_backward.py index 554dd49e8..99e47f8be 100644 --- a/examples/lstm_backward.py +++ b/examples/lstm_backward.py @@ -16,16 +16,12 @@ # import argparse -import datetime -import math -from benchmark import run_benchmark - -import cunumeric as np +from benchmark import parse_args, run_benchmark, time def run_lstm(batch_size, hidden_size, sentence_length, word_size, timing): - start = datetime.datetime.now() + start = time() WLSTM = np.random.randn( word_size + hidden_size, 4 * hidden_size @@ -77,13 +73,8 @@ def run_lstm(batch_size, hidden_size, sentence_length, word_size, timing): else: dh0[0] += np.sum(dHin[t, :, word_size:], 0) - # Do a little sum to synchronize and check for NaNs - total = np.sum(dh0) - assert not math.isnan(total) - - stop = datetime.datetime.now() - delta = stop - start - total = delta.total_seconds() * 1000.0 + stop = time() + total = (stop - start) / 1000.0 if timing: print("Elapsed Time: " + str(total) + " ms") return total @@ -92,7 +83,7 @@ def run_lstm(batch_size, hidden_size, sentence_length, word_size, timing): if __name__ == "__main__": parser = argparse.ArgumentParser() parser.add_argument( - "-b", "--batch", type=int, default=32, dest="batch", help="batch size" + "-B", "--batch", type=int, default=32, dest="batch", help="batch size" ) parser.add_argument( "--hidden", type=int, default=10, dest="hidden", help="hidden size" @@ -115,15 +106,9 @@ def run_lstm(batch_size, hidden_size, sentence_length, word_size, timing): action="store_true", help="perform timing", ) - parser.add_argument( - "--benchmark", - type=int, - default=1, - dest="benchmark", - help="number of times to benchmark this application (default 1 - " - "normal execution)", - ) - args = parser.parse_args() + + args, np = parse_args(parser) + run_benchmark( run_lstm, args.benchmark, diff --git a/examples/lstm_forward.py b/examples/lstm_forward.py index dde2e7c76..4f1ab7abf 100644 --- a/examples/lstm_forward.py +++ b/examples/lstm_forward.py @@ -16,16 +16,12 @@ # import argparse -import datetime -import math -from benchmark import run_benchmark - -import cunumeric as np +from benchmark import parse_args, run_benchmark, time def run_lstm(batch_size, hidden_size, sentence_length, word_size, timing): - start = datetime.datetime.now() + start = time() X = np.random.randn(sentence_length, batch_size, hidden_size) h0 = np.random.randn(1, hidden_size) @@ -67,13 +63,8 @@ def run_lstm(batch_size, hidden_size, sentence_length, word_size, timing): Ct[t] = np.tanh(C[t]) Hout[t] = IFOGf[t, :, 2 * d : 3 * d] * Ct[t] - # Do a little sum of the outputs to synchronize and check for NaNs - total = np.sum(Hout) - assert not math.isnan(total) - - stop = datetime.datetime.now() - delta = stop - start - total = delta.total_seconds() * 1000.0 + stop = time() + total = (stop - start) / 1000.0 if timing: print("Elapsed Time: " + str(total) + " ms") return total @@ -82,7 +73,7 @@ def run_lstm(batch_size, hidden_size, sentence_length, word_size, timing): if __name__ == "__main__": parser = argparse.ArgumentParser() parser.add_argument( - "-b", "--batch", type=int, default=32, dest="batch", help="batch size" + "-B", "--batch", type=int, default=32, dest="batch", help="batch size" ) parser.add_argument( "--hidden", type=int, default=10, dest="hidden", help="hidden size" @@ -105,15 +96,9 @@ def run_lstm(batch_size, hidden_size, sentence_length, word_size, timing): action="store_true", help="perform timing", ) - parser.add_argument( - "--benchmark", - type=int, - default=1, - dest="benchmark", - help="number of times to benchmark this application (default 1 - " - "normal execution)", - ) - args = parser.parse_args() + + args, np = parse_args(parser) + run_benchmark( run_lstm, args.benchmark, diff --git a/examples/lstm_full.py b/examples/lstm_full.py index 0a56400a1..864773739 100644 --- a/examples/lstm_full.py +++ b/examples/lstm_full.py @@ -16,11 +16,8 @@ # import argparse -import datetime -from benchmark import run_benchmark - -import cunumeric as np +from benchmark import parse_args, run_benchmark, time class Param: @@ -293,7 +290,7 @@ def run_lstm( pointer = 0 - start = datetime.datetime.now() + start = time() for iteration in range(max_iters): # Reset @@ -328,9 +325,8 @@ def run_lstm( pointer += T_steps update_status(max_iters, smooth_loss) - stop = datetime.datetime.now() - delta = stop - start - total = delta.total_seconds() * 1000.0 + stop = time() + total = (stop - start) / 1000.0 if timing: print("Elapsed Time: " + str(total) + " ms") return total @@ -400,16 +396,9 @@ def run_lstm( dest="weight", help="standard deviation of weights for initialization", ) - parser.add_argument( - "-b", - "--benchmark", - type=int, - default=1, - dest="benchmark", - help="number of times to benchmark this application (default 1 - " - "normal execution)", - ) - args = parser.parse_args() + + args, np = parse_args(parser) + run_benchmark( run_lstm, args.benchmark, diff --git a/examples/richardson_lucy.py b/examples/richardson_lucy.py index db8a06a75..7e5514280 100644 --- a/examples/richardson_lucy.py +++ b/examples/richardson_lucy.py @@ -15,10 +15,7 @@ import argparse -from benchmark import run_benchmark -from legate.timing import time - -import cunumeric as np +from benchmark import parse_args, run_benchmark, time float_type = "float32" @@ -113,16 +110,9 @@ def run_richardson_lucy(shape, filter_shape, num_iter, warmup, timing): action="store_true", help="perform timing", ) - parser.add_argument( - "-b", - "--benchmark", - type=int, - default=1, - dest="benchmark", - help="number of times to benchmark this application (default 1 " - "- normal execution)", - ) - args = parser.parse_args() + + args, np = parse_args(parser) + run_benchmark( run_richardson_lucy, args.benchmark, diff --git a/examples/scan.py b/examples/scan.py index 07b3621fd..03d315325 100644 --- a/examples/scan.py +++ b/examples/scan.py @@ -18,8 +18,7 @@ import argparse import numpy as np -from benchmark import run_benchmark -from legate.timing import time +from benchmark import parse_args, run_benchmark, time def initialize(shape, dt, axis): @@ -81,8 +80,7 @@ def run_scan(OP, shape, dt, ax, check): getattr(num, OP)(A, out=B, axis=ax) stop = time() - delta = stop - start - total = delta / 1000.0 + total = (stop - start) / 1000.0 print(f"Elapsed Time: {total}ms") # error checking if check: @@ -131,49 +129,8 @@ def run_scan(OP, shape, dt, ax, check): action="store_true", help="check the result of the solve", ) - parser.add_argument( - "-b", - "--benchmark", - type=int, - default=1, - dest="benchmark", - help="number of times to benchmark this application (default 1 - " - "normal execution)", - ) - parser.add_argument( - "--package", - dest="package", - choices=["legate", "numpy", "cupy"], - type=str, - default="legate", - help="NumPy package to use (legate, numpy, or cupy)", - ) - parser.add_argument( - "--cupy-allocator", - dest="cupy_allocator", - choices=["default", "off", "managed"], - type=str, - default="default", - help="cupy allocator to use (default, off, or managed)", - ) - args, _ = parser.parse_known_args() - - if args.package == "legate": - import cunumeric as num - elif args.package == "cupy": - import cupy as num - - if args.cupy_allocator == "off": - num.cuda.set_allocator(None) - print("Turning off memory pool") - elif args.cupy_allocator == "managed": - num.cuda.set_allocator( - num.cuda.MemoryPool(num.cuda.malloc_managed).malloc - ) - print("Using managed memory pool") - elif args.package == "numpy": - import numpy as num + args, num = parse_args(parser) run_benchmark( run_scan, diff --git a/examples/solve.py b/examples/solve.py index 5d5082dd4..d07642dba 100644 --- a/examples/solve.py +++ b/examples/solve.py @@ -1,3 +1,5 @@ +#!/usr/bin/env python + # Copyright 2022 NVIDIA Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); @@ -15,9 +17,7 @@ import argparse -from legate.timing import time - -import cunumeric as np +from benchmark import parse_args, run_benchmark, time def solve(m, n, nrhs, dtype): @@ -66,5 +66,11 @@ def solve(m, n, nrhs, dtype): dest="dtype", help="data type", ) - args = parser.parse_args() - solve(args.m, args.n, args.nrhs, args.dtype) + args, np = parse_args(parser) + + run_benchmark( + solve, + args.benchmark, + "Solve", + (args.m, args.n, args.nrhs, args.dtype), + ) diff --git a/examples/sort.py b/examples/sort.py index fb92d3dfb..cfcf7590a 100644 --- a/examples/sort.py +++ b/examples/sort.py @@ -18,15 +18,7 @@ import argparse import numpy as np -from benchmark import run_benchmark - -try: - from legate.timing import time -except (ImportError, RuntimeError): - from time import perf_counter_ns - - def time(): - return perf_counter_ns() / 1000.0 +from benchmark import parse_args, run_benchmark, time def check_sorted(a, a_sorted, package, axis=-1): @@ -162,49 +154,8 @@ def run_sort( action="store_true", help="use argsort", ) - parser.add_argument( - "-b", - "--benchmark", - type=int, - default=1, - dest="benchmark", - help="number of times to benchmark this application (default 1 - " - "normal execution)", - ) - parser.add_argument( - "--package", - dest="package", - choices=["legate", "numpy", "cupy"], - type=str, - default="legate", - help="NumPy package to use (legate, numpy, or cupy)", - ) - parser.add_argument( - "--cupy-allocator", - dest="cupy_allocator", - choices=["default", "off", "managed"], - type=str, - default="default", - help="cupy allocator to use (default, off, or managed)", - ) - - args, _ = parser.parse_known_args() - - if args.package == "legate": - import cunumeric as num - elif args.package == "cupy": - import cupy as num - if args.cupy_allocator == "off": - num.cuda.set_allocator(None) - print("Turning off memory pool") - elif args.cupy_allocator == "managed": - num.cuda.set_allocator( - num.cuda.MemoryPool(num.cuda.malloc_managed).malloc - ) - print("Using managed memory pool") - elif args.package == "numpy": - import numpy as num + args, num = parse_args(parser) run_benchmark( run_sort, diff --git a/examples/stencil.py b/examples/stencil.py index 460cb7bde..3eae3c0b1 100644 --- a/examples/stencil.py +++ b/examples/stencil.py @@ -16,17 +16,8 @@ # import argparse -import math -from benchmark import run_benchmark - -try: - from legate.timing import time -except (ImportError, RuntimeError): - from time import perf_counter_ns - - def time(): - return perf_counter_ns() / 1000.0 +from benchmark import parse_args, run_benchmark, time def initialize(N): @@ -39,30 +30,26 @@ def initialize(N): return grid -def run(grid, I, N): # noqa: E741 +def run_stencil(N, I, warmup, timing): # noqa: E741 + grid = initialize(N) + print("Running Jacobi stencil...") center = grid[1:-1, 1:-1] north = grid[0:-2, 1:-1] east = grid[1:-1, 2:] west = grid[1:-1, 0:-2] south = grid[2:, 1:-1] - for i in range(I): + + start = time() + for i in range(I + warmup): + if i == warmup: + start = time() average = center + north + east + west + south work = 0.2 * average - # delta = np.sum(np.absolute(work - center)) center[:] = work - total = np.sum(center) - return total / (N**2) - - -def run_stencil(N, I, timing): # noqa: E741 - grid = initialize(N) - start = time() - average = run(grid, I, N) stop = time() - print("Average energy is %.8g" % average) + total = (stop - start) / 1000.0 - assert not math.isnan(average) if timing: print(f"Elapsed Time: {total} ms") return total @@ -78,6 +65,14 @@ def run_stencil(N, I, timing): # noqa: E741 dest="I", help="number of iterations to run", ) + parser.add_argument( + "-w", + "--warmup", + type=int, + default=5, + dest="warmup", + help="warm-up iterations", + ) parser.add_argument( "-n", "--num", @@ -93,50 +88,12 @@ def run_stencil(N, I, timing): # noqa: E741 action="store_true", help="perform timing", ) - parser.add_argument( - "-b", - "--benchmark", - type=int, - default=1, - dest="benchmark", - help="number of times to benchmark this application (default 1 " - "- normal execution)", - ) - parser.add_argument( - "--package", - dest="package", - choices=["legate", "numpy", "cupy"], - type=str, - default="legate", - help="NumPy package to use (legate, numpy, or cupy)", - ) - parser.add_argument( - "--cupy-allocator", - dest="cupy_allocator", - choices=["default", "off", "managed"], - type=str, - default="default", - help="cupy allocator to use (default, off, or managed)", - ) - - args, _ = parser.parse_known_args() - - if args.package == "legate": - import cunumeric as np - elif args.package == "cupy": - import cupy as np - if args.cupy_allocator == "off": - np.cuda.set_allocator(None) - print("Turning off memory pool") - elif args.cupy_allocator == "managed": - np.cuda.set_allocator( - np.cuda.MemoryPool(np.cuda.malloc_managed).malloc - ) - print("Using managed memory pool") - elif args.package == "numpy": - import numpy as np + args, np = parse_args(parser) run_benchmark( - run_stencil, args.benchmark, "Stencil", (args.N, args.I, args.timing) + run_stencil, + args.benchmark, + "Stencil", + (args.N, args.I, args.warmup, args.timing), ) diff --git a/examples/wgrad.py b/examples/wgrad.py index d95c00297..f4767f2b0 100644 --- a/examples/wgrad.py +++ b/examples/wgrad.py @@ -16,8 +16,8 @@ # import argparse -import datetime -import math + +from legate.timing import time import cunumeric as np @@ -45,17 +45,14 @@ def cross_correlate(x, y, C, K, R, S, B, H, W): def run_wgrad(H=256, W=256, B=32, C=256, K=32, R=5, S=5, timing=False): - if timing: - start = datetime.datetime.now() + start = time() x, y = initialize(C, K, B, H, W) - dw = cross_correlate(x, y, C, K, R, S, B, H, W) - # Do a little sum over dw to sync the results - total = np.sum(dw) - assert not math.isnan(total) + _ = cross_correlate(x, y, C, K, R, S, B, H, W) + stop = time() + total = (stop - start) / 1000.0 if timing: - stop = datetime.datetime.now() - delta = stop - start - print("Elapsed Time: " + str(delta.total_seconds() * 1000.0) + " ms") + print("Elapsed Time: " + str(total) + " ms") + return total if __name__ == "__main__": @@ -104,7 +101,7 @@ def run_wgrad(H=256, W=256, B=32, C=256, K=32, R=5, S=5, timing=False): dest="W", help="width of images in pixels", ) - args = parser.parse_args() + args = parser.parse_args(parser) run_wgrad( args.H, args.W, args.B, args.C, args.K, args.R, args.R, args.timing ) From b7f0881d8d40cf432ffd86efb561f4f7bc909905 Mon Sep 17 00:00:00 2001 From: Rohan Yadav Date: Tue, 20 Dec 2022 02:46:04 -0700 Subject: [PATCH 69/89] src/cunumeric/item: add openmp variants for write/read tasks (#740) This commit adds OMP variants for the write and read tasks so that they can be used in resource-scoped settings where openmp processors are desired. Signed-off-by: Rohan Yadav Signed-off-by: Rohan Yadav --- src/cunumeric/item/read.h | 3 +++ src/cunumeric/item/write.h | 3 +++ 2 files changed, 6 insertions(+) diff --git a/src/cunumeric/item/read.h b/src/cunumeric/item/read.h index d3bb90774..0606d82e4 100644 --- a/src/cunumeric/item/read.h +++ b/src/cunumeric/item/read.h @@ -26,6 +26,9 @@ class ReadTask : public CuNumericTask { public: static void cpu_variant(legate::TaskContext& context); +#ifdef LEGATE_USE_OPENMP + static void omp_variant(legate::TaskContext& context) { ReadTask::cpu_variant(context); } +#endif #ifdef LEGATE_USE_CUDA static void gpu_variant(legate::TaskContext& context); #endif diff --git a/src/cunumeric/item/write.h b/src/cunumeric/item/write.h index c3455b0e0..725918139 100644 --- a/src/cunumeric/item/write.h +++ b/src/cunumeric/item/write.h @@ -26,6 +26,9 @@ class WriteTask : public CuNumericTask { public: static void cpu_variant(legate::TaskContext& context); +#ifdef LEGATE_USE_OPENMP + static void omp_variant(legate::TaskContext& context) { WriteTask::cpu_variant(context); } +#endif #ifdef LEGATE_USE_CUDA static void gpu_variant(legate::TaskContext& context); #endif From 5617e2c490ebf67bd52f2c6490d99dfaa455fc61 Mon Sep 17 00:00:00 2001 From: Manolis Papadakis Date: Wed, 21 Dec 2022 01:05:39 +0200 Subject: [PATCH 70/89] Add back NaN checks to some benchmarks (#743) * Add back NaN checks to some benchmarks * Add some more debugging info in case of NaNs --- examples/jacobi.py | 12 +++++++----- examples/logreg.py | 5 +++++ 2 files changed, 12 insertions(+), 5 deletions(-) diff --git a/examples/jacobi.py b/examples/jacobi.py index f4e42081d..ef82e76f9 100644 --- a/examples/jacobi.py +++ b/examples/jacobi.py @@ -16,6 +16,7 @@ # import argparse +import math from benchmark import parse_args, run_benchmark, time @@ -33,10 +34,7 @@ def generate_random(N): def check(A, x, b): print("Checking result...") - if np.allclose(A.dot(x), b): - print("PASS!") - else: - print("FAIL!") + return np.allclose(A.dot(x), b) def run_jacobi(N, iters, warmup, perform_check, timing, verbose): @@ -55,7 +53,11 @@ def run_jacobi(N, iters, warmup, perform_check, timing, verbose): stop = time() if perform_check: - check(A, x, b) + assert check(A, x, b) + else: + assert not math.isnan( + np.sum(x) + ), f"{np.count_nonzero(~np.isnan(x))} NaNs in x" total = (stop - start) / 1000.0 if timing: diff --git a/examples/logreg.py b/examples/logreg.py index 43b0e62b0..88fe7cde9 100644 --- a/examples/logreg.py +++ b/examples/logreg.py @@ -16,6 +16,7 @@ # import argparse +import math from benchmark import parse_args, run_benchmark, time @@ -69,6 +70,10 @@ def run_logistic_regression(N, F, T, I, warmup, S, B): # noqa: E741 ) stop = time() + assert not math.isnan( + np.sum(weights) + ), f"{np.count_nonzero(~np.isnan(weights))} NaNs in weights" + total = (stop - start) / 1000.0 print(f"Elapsed Time: {total} ms") return total From 98d1e822cb037cbda142d52a89df740e5907a2fd Mon Sep 17 00:00:00 2001 From: Manolis Papadakis Date: Sun, 25 Dec 2022 19:51:36 +0200 Subject: [PATCH 71/89] Fix CI failures due to numpy 1.24 upgrade (#745) Co-authored-by: Manolis Papadakis --- cunumeric/eager.py | 2 +- tests/integration/test_ndim.py | 4 +--- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/cunumeric/eager.py b/cunumeric/eager.py index 0c792fbae..61e8f5d37 100644 --- a/cunumeric/eager.py +++ b/cunumeric/eager.py @@ -504,7 +504,7 @@ def convert( elif nan_op is ConvertCode.PROD and np.isnan(rhs.array.item()): self.array.fill(1) else: - self.array.fill(rhs.array.item()) + self.array.fill(rhs.array.astype(self.array.dtype).item()) else: if nan_op is ConvertCode.SUM: self.array[:] = np.where(np.isnan(rhs.array), 0, rhs.array) diff --git a/tests/integration/test_ndim.py b/tests/integration/test_ndim.py index d6888cb50..c9bba7f07 100644 --- a/tests/integration/test_ndim.py +++ b/tests/integration/test_ndim.py @@ -44,9 +44,7 @@ def test_ndarray_empty(input): assert np.ndim(input) == num.ndim(input) -@pytest.mark.parametrize( - "input", (([0], [1, 2], [3, 4, 5]), ([1, 2], [3.3, 4.4])) -) +@pytest.mark.parametrize("input", [([1, 2], [3.3, 4.4])]) def test_python_values_diff_dim(input): assert np.ndim(input) == num.ndim(input) From fc202143fb1635ce06a54f5611d28440178dfac2 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Tue, 27 Dec 2022 14:22:57 +0000 Subject: [PATCH 72/89] [pre-commit.ci] pre-commit autoupdate (#744) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit updates: - [github.com/PyCQA/isort: 5.11.1 → 5.11.4](https://github.com/PyCQA/isort/compare/5.11.1...5.11.4) - [github.com/pre-commit/mirrors-clang-format: v15.0.4 → v15.0.6](https://github.com/pre-commit/mirrors-clang-format/compare/v15.0.4...v15.0.6) Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> --- .pre-commit-config.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 71a89944f..eefd667d3 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -7,7 +7,7 @@ repos: pass_filenames: false args: ['cunumeric'] - repo: https://github.com/PyCQA/isort - rev: 5.11.1 + rev: 5.11.4 hooks: - id: isort - repo: https://github.com/psf/black @@ -19,7 +19,7 @@ repos: hooks: - id: flake8 - repo: https://github.com/pre-commit/mirrors-clang-format - rev: 'v15.0.4' # Use the sha / tag you want to point at + rev: 'v15.0.6' # Use the sha / tag you want to point at hooks: - id: clang-format files: \.(cu|cuh|h|cc|inl)$ From a2bc300c940ca26a6fb47e080d01dabe4c6419a0 Mon Sep 17 00:00:00 2001 From: Irina Demeshko Date: Tue, 27 Dec 2022 10:14:03 -0800 Subject: [PATCH 73/89] Improving performance for some special cases of advanced indexing (#731) * moving boolean case for advanced indexing to a separate function * improving performance for special case of advanced indexing with bool array --- cunumeric/deferred.py | 336 +++++++++++++------- src/cunumeric/index/putmask_template.inl | 3 - tests/integration/test_advanced_indexing.py | 14 + 3 files changed, 231 insertions(+), 122 deletions(-) diff --git a/cunumeric/deferred.py b/cunumeric/deferred.py index a7ba5d6c0..baa2cf50d 100644 --- a/cunumeric/deferred.py +++ b/cunumeric/deferred.py @@ -534,142 +534,238 @@ def _slice_store(k: slice, store: Store, dim: int) -> tuple[slice, Store]: return k, store - def _create_indexing_array( + def _has_single_boolean_array( + self, key: Any, is_set: bool + ) -> tuple[bool, DeferredArray, Any]: + if isinstance(key, NumPyThunk) and key.dtype == bool: + return True, self, key + else: + # key is a single array of indices + if isinstance(key, NumPyThunk): + return False, self, key + + assert isinstance(key, tuple) + + key = self._unpack_ellipsis(key, self.ndim) + + # loop through all the keys to check if there + # is a single NumPyThunk entry + num_arrays = 0 + transpose_index = 0 + for dim, k in enumerate(key): + if isinstance(k, NumPyThunk): + num_arrays += 1 + transpose_index = dim + + # this is the case when there is a single boolean array passed + # in this case we transpose original array so that the indx + # to which boolean array is passed to goes first + # doing this we can avoid going through Realm Copy which should + # improve performance + if ( + num_arrays == 1 + and key[transpose_index].dtype == bool + and is_set + ): + lhs = self + key_dim = key[transpose_index].ndim + transpose_indices = tuple( + (transpose_index + i) for i in range(0, key_dim) + ) + transpose_indices += tuple( + i for i in range(0, transpose_index) + ) + transpose_indices += tuple( + i for i in range(transpose_index + key_dim, lhs.ndim) + ) + + new_key = tuple(key[i] for i in range(0, transpose_index)) + new_key += tuple( + key[i] for i in range(transpose_index + 1, len(key)) + ) + lhs = lhs.transpose(transpose_indices) + + # transform original array for all other keys in the tuple + if len(new_key) > 0: + shift = 0 + store = lhs.base + for dim, k in enumerate(new_key): + if np.isscalar(k): + if k < 0: # type: ignore [operator] + k += store.shape[dim + key_dim + shift] + store = store.project(dim + key_dim + shift, k) + shift -= 1 + elif k is np.newaxis: + store = store.promote(dim + key_dim + shift, 1) + elif isinstance(k, slice): + k, store = self._slice_store( + k, store, dim + key_dim + shift + ) + else: + raise TypeError( + "Unsupported entry type passed to advanced ", + "indexing operation", + ) + lhs = DeferredArray(self.runtime, store, self.dtype) + + return True, lhs, key[transpose_index] + + # this is a general advanced indexing case + else: + return False, self, key + + def _advanced_indexing_with_boolean_array( self, key: Any, is_set: bool = False, set_value: Optional[Any] = None, ) -> tuple[bool, Any, Any, Any]: - store = self.base rhs = self - # the index where the first index_array is passed to the [] operator - start_index = -1 - if isinstance(key, NumPyThunk) and key.dtype == bool: - if not isinstance(key, DeferredArray): - key = self.runtime.to_deferred_array(key) - - # in case when boolean array is passed as an index, shape for all - # its dimensions should be the same as the shape of - # corresponding dimensions of the input array - for i in range(key.ndim): - if key.shape[i] != rhs.shape[i]: - raise ValueError( - "shape of the index array for " - f"dimension {i} doesn't match to the shape of the" - f"index array which is {rhs.shape[i]}" - ) - - # if key or rhs are empty, return an empty array with correct shape - if key.size == 0 or rhs.size == 0: - if rhs.size == 0 and key.size != 0: - # we need to calculate shape of the 0 dim of output region - # even though the size of it is 0 - # this can potentially be replaced with COUNT_NONZERO - s = key.nonzero()[0].size - else: - s = 0 - - out_shape = (s,) + tuple( - rhs.shape[i] for i in range(key.ndim, rhs.ndim) + if not isinstance(key, DeferredArray): + key = self.runtime.to_deferred_array(key) + + # in case when boolean array is passed as an index, shape for all + # its dimensions should be the same as the shape of + # corresponding dimensions of the input array + for i in range(key.ndim): + if key.shape[i] != rhs.shape[i]: + raise ValueError( + "shape of the index array for " + f"dimension {i} doesn't match to the shape of the" + f"index array which is {rhs.shape[i]}" ) - out = cast( - DeferredArray, - self.runtime.create_empty_thunk( - out_shape, - rhs.dtype, - inputs=[rhs], - ), - ) - out.fill(np.zeros((), dtype=out.dtype)) - return False, rhs, out, self - - key_store = key.base - # bring key to the same shape as rhs - for i in range(key_store.ndim, rhs.ndim): - key_store = key_store.promote(i, rhs.shape[i]) - - # has_set_value && set_value.size==1 corresponds to the case - # when a[bool_indices]=scalar - # then we can call "putmask" to modify input array - # and avoid calling Copy - has_set_value = set_value is not None and set_value.size == 1 - if has_set_value: - mask = DeferredArray( - self.runtime, - base=key_store, - dtype=self.dtype, - ) - rhs.putmask(mask, set_value) - return False, rhs, rhs, self + + # if key or rhs are empty, return an empty array with correct shape + if key.size == 0 or rhs.size == 0: + if rhs.size == 0 and key.size != 0: + # we need to calculate shape of the 0 dim of output region + # even though the size of it is 0 + # this can potentially be replaced with COUNT_NONZERO + s = key.nonzero()[0].size else: - out_dtype = rhs.dtype - # in the case this operation is called for the set_item, we - # return Point type field that is later used for - # indirect copy operation - if is_set: - N = rhs.ndim - out_dtype = rhs.runtime.get_point_type(N) - - # TODO : current implementation of the ND output regions - # requires out.ndim == rhs.ndim. This will be fixed in the - # future - out = rhs.runtime.create_unbound_thunk( - out_dtype, ndim=rhs.ndim - ) - key_dims = key.ndim # dimension of the original key + s = 0 - task = rhs.context.create_auto_task( - CuNumericOpCode.ADVANCED_INDEXING - ) - task.add_output(out.base) - task.add_input(rhs.base) - task.add_input(key_store) - task.add_scalar_arg(is_set, bool) - task.add_scalar_arg(key_dims, ty.int64) - task.add_alignment(rhs.base, key_store) - task.add_broadcast( - rhs.base, axes=tuple(range(1, len(rhs.base.shape))) - ) - task.execute() + out_shape = (s,) + tuple( + rhs.shape[i] for i in range(key.ndim, rhs.ndim) + ) - # TODO : current implementation of the ND output regions - # requires out.ndim == rhs.ndim. - # The logic below will be removed in the future - out_dim = rhs.ndim - key_dims + 1 - - if out_dim != rhs.ndim: - out_tmp = out.base - - if out.size == 0: - out_shape = tuple( - out.shape[i] for i in range(0, out_dim) - ) - out = cast( - DeferredArray, - self.runtime.create_empty_thunk( - out_shape, - out_dtype, - inputs=[out], - ), - ) - if not is_set: - out.fill(np.array(0, dtype=out_dtype)) - else: - for dim in range(rhs.ndim - out_dim): - out_tmp = out_tmp.project(rhs.ndim - dim - 1, 0) + out = cast( + DeferredArray, + self.runtime.create_empty_thunk( + out_shape, + rhs.dtype, + inputs=[rhs], + ), + ) + out.fill(np.zeros((), dtype=out.dtype)) + return False, rhs, out, self + + key_store = key.base + # bring key to the same shape as rhs + for i in range(key_store.ndim, rhs.ndim): + key_store = key_store.promote(i, rhs.shape[i]) + + # has_set_value && set_value.size==1 corresponds to the case + # when a[bool_indices]=scalar + # then we can call "putmask" to modify input array + # and avoid calling Copy + has_set_value = set_value is not None and set_value.size == 1 + if has_set_value: + + mask = DeferredArray( + self.runtime, + base=key_store, + dtype=self.dtype, + ) + rhs.putmask(mask, set_value) + return False, rhs, rhs, self + else: + out_dtype = rhs.dtype + # in the case this operation is called for the set_item, we + # return Point type field that is later used for + # indirect copy operation + if is_set: + N = rhs.ndim + out_dtype = rhs.runtime.get_point_type(N) - out = out._copy_store(out_tmp) + # TODO : current implementation of the ND output regions + # requires out.ndim == rhs.ndim. This will be fixed in the + # future + out = rhs.runtime.create_unbound_thunk(out_dtype, ndim=rhs.ndim) + key_dims = key.ndim # dimension of the original key - return is_set, rhs, out, self + task = rhs.context.create_auto_task( + CuNumericOpCode.ADVANCED_INDEXING + ) + task.add_output(out.base) + task.add_input(rhs.base) + task.add_input(key_store) + task.add_scalar_arg(is_set, bool) + task.add_scalar_arg(key_dims, ty.int64) + task.add_alignment(rhs.base, key_store) + task.add_broadcast( + rhs.base, axes=tuple(range(1, len(rhs.base.shape))) + ) + task.execute() + + # TODO : current implementation of the ND output regions + # requires out.ndim == rhs.ndim. + # The logic below will be removed in the future + out_dim = rhs.ndim - key_dims + 1 + + if out_dim != rhs.ndim: + out_tmp = out.base + + if out.size == 0: + out_shape = tuple(out.shape[i] for i in range(0, out_dim)) + out = cast( + DeferredArray, + self.runtime.create_empty_thunk( + out_shape, + out_dtype, + inputs=[out], + ), + ) + if not is_set: + out.fill(np.array(0, dtype=out_dtype)) + else: + for dim in range(rhs.ndim - out_dim): + out_tmp = out_tmp.project(rhs.ndim - dim - 1, 0) + + out = out._copy_store(out_tmp) + return is_set, rhs, out, self + def _create_indexing_array( + self, + key: Any, + is_set: bool = False, + set_value: Optional[Any] = None, + ) -> tuple[bool, Any, Any, Any]: + + is_bool_array, lhs, bool_key = self._has_single_boolean_array( + key, is_set + ) + + # the case when single boolean array is passed to the advanced + # indexing operation + if is_bool_array: + return lhs._advanced_indexing_with_boolean_array( + bool_key, is_set, set_value + ) + # general advanced indexing case + + store = self.base + rhs = self if isinstance(key, NumPyThunk): key = (key,) - assert isinstance(key, tuple) key = self._unpack_ellipsis(key, self.ndim) + + # the index where the first index_array is passed to the [] operator + start_index = -1 shift = 0 last_index = self.ndim - # in case when index arrays are passed in the scaterred way, + # in case when index arrays are passed in the scattered way, # we need to transpose original array so all index arrays # are close to each other transpose_needed = False @@ -730,8 +826,8 @@ def _create_indexing_array( "shape of boolean index did not match " "indexed array " ) - # in case of the mixed indises we all nonzero - # for the bool array + # in case of the mixed indices we all nonzero + # for the boolean array k = k.nonzero() shift += len(k) - 1 tuple_of_arrays += k @@ -1770,6 +1866,8 @@ def put(self, indices: Any, values: Any, check_bounds: bool) -> None: @auto_convert("mask", "values") def putmask(self, mask: Any, values: Any) -> None: + assert self.shape == mask.shape + if values.shape != self.shape: values_new = values._broadcast(self.shape) else: diff --git a/src/cunumeric/index/putmask_template.inl b/src/cunumeric/index/putmask_template.inl index 6f55c34e0..f522198b3 100644 --- a/src/cunumeric/index/putmask_template.inl +++ b/src/cunumeric/index/putmask_template.inl @@ -52,9 +52,6 @@ struct Putmask { Putmask(PutmaskArgs& args) : dense(false) { rect = args.input.shape(); -#ifdef DEBUG_CUNUMERIC - assert(rect == args.mask.shape()); -#endif input = args.input.read_write_accessor(rect); mask = args.mask.read_accessor(rect); diff --git a/tests/integration/test_advanced_indexing.py b/tests/integration/test_advanced_indexing.py index 2e3ee475a..4f1d649ac 100644 --- a/tests/integration/test_advanced_indexing.py +++ b/tests/integration/test_advanced_indexing.py @@ -756,6 +756,20 @@ def test(): res_num = x_num[[1, 1], :, [False, True, False, True]] assert np.array_equal(res, res_num) + # set item with mixed indices + x[1, :, [False, True, False, True]] = 129 + x_num[1, :, [False, True, False, True]] = 129 + assert np.array_equal(x, x_num) + + # set item with mixed indices + x[:, [False, True, False], 1] = 111 + x_num[:, [False, True, False], 1] = 111 + assert np.array_equal(x, x_num) + + x[..., [False, True, False, True, False]] = 200 + x_num[..., [False, True, False, True, False]] = 200 + assert np.array_equal(x, x_num) + # b: combining basic and advanced indexing schemes ind0 = np.array([1, 1]) ind0_num = num.array(ind0) From 2844e59e93404e60daf6de6c656619a44cda4bde Mon Sep 17 00:00:00 2001 From: Bryan Van de Ven Date: Wed, 4 Jan 2023 15:13:13 -0800 Subject: [PATCH 74/89] Switch docs from recommonmark to myst-parser (#746) --- README.md | 9 --------- docs/cunumeric/source/conf.py | 3 +-- 2 files changed, 1 insertion(+), 11 deletions(-) diff --git a/README.md b/README.md index d3973a60c..dbe358373 100644 --- a/README.md +++ b/README.md @@ -35,15 +35,6 @@ canonical NumPy implementation. If you have questions, please contact us at legate(at)nvidia.com. -1. [Installation](#installation) -1. [Usage and Execution](#usage-and-execution) -1. [Supported and Planned Features](#supported-and-planned-features) -1. [Supported Types and Dimensions](#supported-types-and-dimensions) -1. [Documentation](#documentation) -1. [Future Directions](#future-directions) -1. [Contributing](#contributing) -1. [Known Bugs](#known-bugs) - ## Installation cuNumeric is available [on conda](https://anaconda.org/legate/cunumeric): diff --git a/docs/cunumeric/source/conf.py b/docs/cunumeric/source/conf.py index 17fd408c1..5d3ce4881 100644 --- a/docs/cunumeric/source/conf.py +++ b/docs/cunumeric/source/conf.py @@ -37,8 +37,7 @@ "sphinx.ext.mathjax", "sphinx.ext.napoleon", "sphinx_copybutton", - "sphinx_markdown_tables", - "recommonmark", + "myst_parser", "cunumeric._sphinxext.comparison_table", "cunumeric._sphinxext.implemented_index", "cunumeric._sphinxext.missing_refs", From b4a40fedf3a5f4a1ddffeadd281e14eafa46c5ca Mon Sep 17 00:00:00 2001 From: Manolis Papadakis Date: Fri, 6 Jan 2023 11:09:11 -0800 Subject: [PATCH 75/89] Fix timing for CuPy tests (#747) CuPy launches work asynchronously on the GPU, so we need to block until all work finishes before taking a measurement. --- examples/benchmark.py | 76 +++++++++++++++++++++++++++++++---- examples/black_scholes.py | 9 ++--- examples/cg.py | 18 ++++----- examples/einsum.py | 11 +++-- examples/gemm.py | 11 +++-- examples/indexing_routines.py | 34 +++++++--------- examples/jacobi.py | 11 +++-- examples/kmeans.py | 9 ++--- examples/kmeans_slow.py | 9 ++--- examples/kmeans_sort.py | 9 ++--- examples/linreg.py | 11 +++-- examples/logreg.py | 11 +++-- examples/lstm_backward.py | 9 ++--- examples/lstm_forward.py | 9 ++--- examples/lstm_full.py | 9 ++--- examples/richardson_lucy.py | 11 +++-- examples/scan.py | 9 ++--- examples/solve.py | 9 ++--- examples/sort.py | 9 ++--- examples/stencil.py | 11 +++-- 20 files changed, 166 insertions(+), 129 deletions(-) diff --git a/examples/benchmark.py b/examples/benchmark.py index 1d0a6f73e..1d0944e3b 100644 --- a/examples/benchmark.py +++ b/examples/benchmark.py @@ -18,13 +18,70 @@ import math from functools import reduce -try: - from legate.timing import time -except (ImportError, RuntimeError): - from time import perf_counter_ns +from typing_extensions import Protocol - def time(): - return perf_counter_ns() / 1000.0 + +class Timer(Protocol): + def start(self): + ... + + def stop(self): + """ + Blocks execution until everything before it has completed. Returns the + duration since the last call to start(), in milliseconds. + """ + ... + + +class CuNumericTimer(Timer): + def __init__(self): + self._start_future = None + + def start(self): + from legate.timing import time + + self._start_future = time() + + def stop(self): + from legate.timing import time + + end_future = time() + return (end_future - self._start_future) / 1000.0 + + +class CuPyTimer(Timer): + def __init__(self): + self._start_event = None + + def start(self): + from cupy import cuda + + self._start_event = cuda.Event() + self._start_event.record() + + def stop(self): + from cupy import cuda + + end_event = cuda.Event() + end_event.record() + end_event.synchronize() + return cuda.get_elapsed_time(self._start_event, end_event) + + +class NumPyTimer(Timer): + def __init__(self): + self._start_time = None + + def start(self): + from time import perf_counter_ns + + self._start_time = perf_counter_ns() / 1000.0 + + def stop(self): + from time import perf_counter_ns + + end_time = perf_counter_ns() / 1000.0 + return (end_time - self._start_time) / 1000.0 # Add common arguments and parse @@ -57,6 +114,8 @@ def parse_args(parser): args, _ = parser.parse_known_args() if args.package == "legate": import cunumeric as np + + timer = CuNumericTimer() elif args.package == "cupy": import cupy as np @@ -68,9 +127,12 @@ def parse_args(parser): np.cuda.MemoryPool(np.cuda.malloc_managed).malloc ) print("Using managed memory pool") + timer = CuPyTimer() elif args.package == "numpy": import numpy as np - return args, np + + timer = NumPyTimer() + return args, np, timer # A helper method for benchmarking applications diff --git a/examples/black_scholes.py b/examples/black_scholes.py index d64e032d5..55374ea09 100644 --- a/examples/black_scholes.py +++ b/examples/black_scholes.py @@ -17,7 +17,7 @@ import argparse -from benchmark import parse_args, run_benchmark, time +from benchmark import parse_args, run_benchmark def generate_random(N, min, max, D): @@ -71,11 +71,10 @@ def black_scholes(S, X, T, R, V): def run_black_scholes(N, D): print("Running black scholes on %dK options..." % N) N *= 1000 - start = time() + timer.start() S, X, T, R, V = initialize(N, D) _, _ = black_scholes(S, X, T, R, V) - stop = time() - total = (stop - start) / 1000.0 + total = timer.stop() print("Elapsed Time: " + str(total) + " ms") return total @@ -99,7 +98,7 @@ def run_black_scholes(N, D): help="precision of the computation in bits", ) - args, np = parse_args(parser) + args, np, timer = parse_args(parser) if args.P == 16: run_benchmark( diff --git a/examples/cg.py b/examples/cg.py index 79721f7b0..a0399778e 100644 --- a/examples/cg.py +++ b/examples/cg.py @@ -17,7 +17,7 @@ import argparse -from benchmark import parse_args, run_benchmark, time +from benchmark import parse_args, run_benchmark # This is technically dead code right now, but we'll keep it around in @@ -100,10 +100,10 @@ def run_cg( min(max_iters, b.shape[0]) if max_iters is not None else b.shape[0] ) - start = time() + timer.start() for i in range(-warmup, max_iters): if i == 0: - start = time() + timer.start() Ap = A.dot(p) alpha = rsold / (p.dot(Ap)) x = x + alpha * p @@ -123,7 +123,7 @@ def run_cg( beta = rsnew / rsold p = r + beta * p rsold = rsnew - stop = time() + total = timer.stop() if converged < 0: print("Convergence FAILURE!") @@ -132,7 +132,6 @@ def run_cg( if perform_check: check(A, x, b) - total = (stop - start) / 1000.0 if timing: print(f"Elapsed Time: {total} ms") return total @@ -174,10 +173,10 @@ def run_preconditioned_cg( min(max_iters, b.shape[0]) if max_iters is not None else b.shape[0] ) - start = time() + timer.start() for i in range(-warmup, max_iters): if i == 0: - start = time() + timer.start() Ap = A.dot(p) alpha = rzold / (p.dot(Ap)) x = x + alpha * p @@ -199,7 +198,7 @@ def run_preconditioned_cg( beta = rznew / rzold p = z + beta * p rzold = rznew - stop = time() + total = timer.stop() if converged < 0: print("Convergence FAILURE!") @@ -208,7 +207,6 @@ def run_preconditioned_cg( if perform_check: check(A, x, b) - total = (stop - start) / 1000.0 if timing: print(f"Elapsed Time: {total} ms") return total @@ -290,7 +288,7 @@ def run_preconditioned_cg( help="convergence check threshold", ) - args, np = parse_args(parser) + args, np, timer = parse_args(parser) run_benchmark( run_preconditioned_cg if args.precondition else run_cg, diff --git a/examples/einsum.py b/examples/einsum.py index aac1ec995..090e3385f 100644 --- a/examples/einsum.py +++ b/examples/einsum.py @@ -18,7 +18,7 @@ import argparse import re -from benchmark import parse_args, run_benchmark, time +from benchmark import parse_args, run_benchmark def run_einsum(expr, N, iters, warmup, dtype, cupy_compatibility): @@ -82,10 +82,10 @@ def run_einsum(expr, N, iters, warmup, dtype, cupy_compatibility): C = np.zeros((N,) * len(c_modes), dtype=dtype) # Run contraction - start = time() + timer.start() for idx in range(iters + warmup): if idx == warmup: - start = time() + timer.start() if cupy_compatibility: C = np.einsum(expr, A, B) else: @@ -102,10 +102,9 @@ def run_einsum(expr, N, iters, warmup, dtype, cupy_compatibility): A, C = C, A else: B, C = C, B - stop = time() + total = timer.stop() # Print statistics - total = (stop - start) / 1000.0 average = total / iters print(f"Elapsed Time: {total:.3f} ms") print(f"Average Iteration: {average:.3f} ms") @@ -162,7 +161,7 @@ def run_einsum(expr, N, iters, warmup, dtype, cupy_compatibility): else, use einsum(expr, A, B, out=C)""", ) - args, np = parse_args(parser) + args, np, timer = parse_args(parser) cupy_compatibility = args.cupy_compatibility or args.package == "cupy" if cupy_compatibility: diff --git a/examples/gemm.py b/examples/gemm.py index 2fe8aafc3..c70a666c1 100644 --- a/examples/gemm.py +++ b/examples/gemm.py @@ -17,7 +17,7 @@ import argparse -from benchmark import parse_args, run_benchmark, time +from benchmark import parse_args, run_benchmark def initialize(M, N, K, ft): @@ -44,20 +44,19 @@ def run_gemm(N, I, warmup, ft): # noqa: E741 print("Total Size: " + str(space / 1e6) + " MB") A, B, C = initialize(N, N, N, ft) - start = time() + timer.start() # Run for as many iterations as was requested for idx in range(I + warmup): if idx == warmup: - start = time() + timer.start() np.dot(A, B, out=C) # We need to rotate the matrices to keep Legate honest # about moving data so it can't just duplicate A and B # on the first iteration and reuse them, this means # that A, B, C all need to be square A, B, C = B, C, A - stop = time() + total = timer.stop() - total = (stop - start) / 1000.0 print("Elapsed Time: " + str(total) + " ms") average = total / I print("Average GEMM: " + str(average) + " ms") @@ -101,7 +100,7 @@ def run_gemm(N, I, warmup, ft): # noqa: E741 "(16,32,64)", ) - args, np = parse_args(parser) + args, np, timer = parse_args(parser) if args.P == 16: run_benchmark( diff --git a/examples/indexing_routines.py b/examples/indexing_routines.py index 2e7f40301..a0f15e120 100644 --- a/examples/indexing_routines.py +++ b/examples/indexing_routines.py @@ -19,7 +19,7 @@ import gc import math -from benchmark import parse_args, run_benchmark, time +from benchmark import parse_args, run_benchmark def compute_diagonal(steps, N, timing, warmup): @@ -27,11 +27,10 @@ def compute_diagonal(steps, N, timing, warmup): print("measuring diagonal") for step in range(steps + warmup): if step == warmup: - start = time() + timer.start() A2 = np.diag(A1) A1 = np.diag(A2) - stop = time() - total = (stop - start) / 1000.0 + total = timer.stop() if timing: space = (N * N + N) * np.dtype(int).itemsize / 1073741824 print("Total Size: " + str(space) + " GB") @@ -52,10 +51,9 @@ def compute_choose(steps, N, timing, warmup): C1 = np.arange(N, dtype=int) % 10 for step in range(steps + warmup): if step == warmup: - start = time() + timer.start() C1 = np.choose(C1, A, mode="wrap") - stop = time() - total = (stop - start) / 1000.0 + total = timer.stop() if timing: space = N * np.dtype(int).itemsize / 1073741824 print("Total Size: " + str(space) + " GB") @@ -82,10 +80,9 @@ def compute_repeat(steps, N, timing, warmup): print("measuring repeat") for step in range(steps + warmup): if step == warmup: - start = time() + timer.start() A2 = np.repeat(A2, R, axis=1) - stop = time() - total = (stop - start) / 1000.0 + total = timer.stop() if timing: space = (N * N) * np.dtype(int).itemsize / 1073741824 print("Total Size: " + str(space) + " GB") @@ -108,11 +105,10 @@ def compute_advanced_indexing_1d(steps, N, timing, warmup): indx_bool = (B % 2).astype(bool) for step in range(steps + warmup): if step == warmup: - start = time() + timer.start() A1[indx] = 10 # 1 copy A1[indx_bool] = 12 # 1 AI and 1 copy - stop = time() - total = (stop - start) / 1000.0 + total = timer.stop() if timing: space = (3 * N) * np.dtype(int).itemsize / 1073741824 print("Total Size: " + str(space) + " GB") @@ -136,12 +132,11 @@ def compute_advanced_indexing_2d(steps, N, timing, warmup): indx2d_bool = (A2 % 2).astype(bool) for step in range(steps + warmup): if step == warmup: - start = time() + timer.start() A2[indx_bool, indx_bool] = 11 # one ZIP and 1 copy = N+N*N A2[:, indx] = 12 # one ZIP and 3 copies = N+3*N*N A2[indx2d_bool] = 13 # 1 copy and one AI task = 2* N*N - stop = time() - total = (stop - start) / 1000.0 + total = timer.stop() if timing: space = (6 * N * N + 2 * N) * np.dtype(int).itemsize / 1073741824 print("Total Size: " + str(space) + " GB") @@ -171,11 +166,10 @@ def compute_advanced_indexing_3d(steps, N, timing, warmup): indx3d_bool = (A3 % 2).astype(bool) for step in range(steps + warmup): if step == warmup: - start = time() + timer.start() A3[indx, :, indx] = 15 # 1 ZIP and 3 copy = N+3N*N A3[indx3d_bool] = 16 # 1 copy and 1 AI task = 2*N*N - stop = time() - total = (stop - start) / 1000.0 + total = timer.stop() if timing: space = (5 * N * N + N) * np.dtype(int).itemsize / 1073741824 print("Total Size: " + str(space) + " GB") @@ -268,7 +262,7 @@ def run_indexing_routines( help="name of the index routine to test", ) - args, np = parse_args(parser) + args, np, timer = parse_args(parser) run_benchmark( run_indexing_routines, diff --git a/examples/jacobi.py b/examples/jacobi.py index ef82e76f9..6b9e46968 100644 --- a/examples/jacobi.py +++ b/examples/jacobi.py @@ -18,7 +18,7 @@ import argparse import math -from benchmark import parse_args, run_benchmark, time +from benchmark import parse_args, run_benchmark def generate_random(N): @@ -45,12 +45,12 @@ def run_jacobi(N, iters, warmup, perform_check, timing, verbose): d = np.diag(A) R = A - np.diag(d) - start = time() + timer.start() for i in range(iters + warmup): if i == warmup: - start = time() + timer.start() x = (b - np.dot(R, x)) / d - stop = time() + total = timer.stop() if perform_check: assert check(A, x, b) @@ -59,7 +59,6 @@ def run_jacobi(N, iters, warmup, perform_check, timing, verbose): np.sum(x) ), f"{np.count_nonzero(~np.isnan(x))} NaNs in x" - total = (stop - start) / 1000.0 if timing: print(f"Elapsed Time: {total} ms") return total @@ -112,7 +111,7 @@ def run_jacobi(N, iters, warmup, perform_check, timing, verbose): help="print verbose output", ) - args, np = parse_args(parser) + args, np, timer = parse_args(parser) run_benchmark( run_jacobi, diff --git a/examples/kmeans.py b/examples/kmeans.py index a64495e7e..a12723d94 100644 --- a/examples/kmeans.py +++ b/examples/kmeans.py @@ -19,7 +19,7 @@ import argparse -from benchmark import parse_args, run_benchmark, time +from benchmark import parse_args, run_benchmark def initialize(N, D, C, T): @@ -77,7 +77,7 @@ def run_kmeans(C, D, T, I, N, S, benchmarking): # noqa: E741 print("Number of dimensions: " + str(D)) print("Number of centroids: " + str(C)) print("Max iterations: " + str(I)) - start = time() + timer.start() data, centroids = initialize(N, D, C, T) data_dots = np.square(np.linalg.norm(data, ord=2, axis=1)) @@ -125,8 +125,7 @@ def run_kmeans(C, D, T, I, N, S, benchmarking): # noqa: E741 + ": " + str(prior_distance_sum) ) - stop = time() - total = (stop - start) / 1000.0 + total = timer.stop() print("Elapsed Time: " + str(total) + " ms") return total @@ -181,7 +180,7 @@ def run_kmeans(C, D, T, I, N, S, benchmarking): # noqa: E741 help="number of iterations between sampling the log likelihood", ) - args, np = parse_args(parser) + args, np, timer = parse_args(parser) if args.P == 16: run_benchmark( diff --git a/examples/kmeans_slow.py b/examples/kmeans_slow.py index 83f226af3..a4d4c7009 100644 --- a/examples/kmeans_slow.py +++ b/examples/kmeans_slow.py @@ -19,7 +19,7 @@ import argparse -from benchmark import parse_args, run_benchmark, time +from benchmark import parse_args, run_benchmark def initialize(N, D, C, T): @@ -78,7 +78,7 @@ def run_kmeans(C, D, T, I, N, S, benchmarking): # noqa: E741 print("Number of dimensions: " + str(D)) print("Number of centroids: " + str(C)) print("Max iterations: " + str(I)) - start = time() + timer.start() data, centroids = initialize(N, D, C, T) data_dots = np.square(np.linalg.norm(data, ord=2, axis=1)) @@ -126,8 +126,7 @@ def run_kmeans(C, D, T, I, N, S, benchmarking): # noqa: E741 + ": " + str(prior_distance_sum) ) - stop = time() - total = (stop - start) / 1000.0 + total = timer.stop() print("Elapsed Time: " + str(total) + " ms") return total @@ -182,7 +181,7 @@ def run_kmeans(C, D, T, I, N, S, benchmarking): # noqa: E741 help="number of iterations between sampling the log likelihood", ) - args, np = parse_args(parser) + args, np, timer = parse_args(parser) if args.P == 16: run_benchmark( diff --git a/examples/kmeans_sort.py b/examples/kmeans_sort.py index 406b02833..ae84ca6da 100644 --- a/examples/kmeans_sort.py +++ b/examples/kmeans_sort.py @@ -19,7 +19,7 @@ import argparse -from benchmark import parse_args, run_benchmark, time +from benchmark import parse_args, run_benchmark def initialize(N, D, C, T): @@ -75,7 +75,7 @@ def run_kmeans(C, D, T, I, N, S, benchmarking): # noqa: E741 print("Number of dimensions: " + str(D)) print("Number of centroids: " + str(C)) print("Max iterations: " + str(I)) - start = time() + timer.start() data, centroids = initialize(N, D, C, T) data_dots = np.square(np.linalg.norm(data, ord=2, axis=1)) @@ -122,8 +122,7 @@ def run_kmeans(C, D, T, I, N, S, benchmarking): # noqa: E741 + ": " + str(prior_distance_sum) ) - stop = time() - total = (stop - start) / 1000.0 + total = timer.stop() print("Elapsed Time: " + str(total) + " ms") return total @@ -179,7 +178,7 @@ def run_kmeans(C, D, T, I, N, S, benchmarking): # noqa: E741 help="number of iterations between sampling the log likelihood", ) - args, np = parse_args(parser) + args, np, timer = parse_args(parser) if args.P == 16: run_benchmark( diff --git a/examples/linreg.py b/examples/linreg.py index a8e684e32..7ec3d11ba 100644 --- a/examples/linreg.py +++ b/examples/linreg.py @@ -17,7 +17,7 @@ import argparse -from benchmark import parse_args, run_benchmark, time +from benchmark import parse_args, run_benchmark def initialize(N, F, T): @@ -41,10 +41,10 @@ def run_linear_regression(N, F, T, I, warmup, S, B): # noqa: E741 features = np.hstack((intercept, features)) weights = np.zeros(features.shape[1], dtype=T) - start = time() + timer.start() for step in range(-warmup, I): if step == 0: - start = time() + timer.start() scores = np.dot(features, weights) error = scores - target gradient = -(1.0 / len(features)) * error.dot(features) @@ -56,9 +56,8 @@ def run_linear_regression(N, F, T, I, warmup, S, B): # noqa: E741 + ": " + str(np.sum(np.power(error, 2))) ) - stop = time() + total = timer.stop() - total = (stop - start) / 1000.0 print("Elapsed Time: " + str(total) + " ms") return total @@ -121,7 +120,7 @@ def run_linear_regression(N, F, T, I, warmup, S, B): # noqa: E741 help="number of iterations between sampling the log likelihood", ) - args, np = parse_args(parser) + args, np, timer = parse_args(parser) if args.P == 16: run_benchmark( diff --git a/examples/logreg.py b/examples/logreg.py index 88fe7cde9..d502e35f3 100644 --- a/examples/logreg.py +++ b/examples/logreg.py @@ -18,7 +18,7 @@ import argparse import math -from benchmark import parse_args, run_benchmark, time +from benchmark import parse_args, run_benchmark def initialize(N, F, T): @@ -52,10 +52,10 @@ def run_logistic_regression(N, F, T, I, warmup, S, B): # noqa: E741 features = np.hstack((intercept, features)) weights = np.zeros(features.shape[1], dtype=T) - start = time() + timer.start() for step in range(-warmup, I): if step == 0: - start = time() + timer.start() scores = np.dot(features, weights) predictions = sigmoid(scores) error = target - predictions @@ -68,13 +68,12 @@ def run_logistic_regression(N, F, T, I, warmup, S, B): # noqa: E741 + ": " + str(log_likelihood(features, target, weights)) ) - stop = time() + total = timer.stop() assert not math.isnan( np.sum(weights) ), f"{np.count_nonzero(~np.isnan(weights))} NaNs in weights" - total = (stop - start) / 1000.0 print(f"Elapsed Time: {total} ms") return total @@ -137,7 +136,7 @@ def run_logistic_regression(N, F, T, I, warmup, S, B): # noqa: E741 help="number of iterations between sampling the log likelihood", ) - args, np = parse_args(parser) + args, np, timer = parse_args(parser) if args.P == 16: run_benchmark( diff --git a/examples/lstm_backward.py b/examples/lstm_backward.py index 99e47f8be..2de702700 100644 --- a/examples/lstm_backward.py +++ b/examples/lstm_backward.py @@ -17,11 +17,11 @@ import argparse -from benchmark import parse_args, run_benchmark, time +from benchmark import parse_args, run_benchmark def run_lstm(batch_size, hidden_size, sentence_length, word_size, timing): - start = time() + timer.start() WLSTM = np.random.randn( word_size + hidden_size, 4 * hidden_size @@ -73,8 +73,7 @@ def run_lstm(batch_size, hidden_size, sentence_length, word_size, timing): else: dh0[0] += np.sum(dHin[t, :, word_size:], 0) - stop = time() - total = (stop - start) / 1000.0 + total = timer.stop() if timing: print("Elapsed Time: " + str(total) + " ms") return total @@ -107,7 +106,7 @@ def run_lstm(batch_size, hidden_size, sentence_length, word_size, timing): help="perform timing", ) - args, np = parse_args(parser) + args, np, timer = parse_args(parser) run_benchmark( run_lstm, diff --git a/examples/lstm_forward.py b/examples/lstm_forward.py index 4f1ab7abf..097218eaf 100644 --- a/examples/lstm_forward.py +++ b/examples/lstm_forward.py @@ -17,11 +17,11 @@ import argparse -from benchmark import parse_args, run_benchmark, time +from benchmark import parse_args, run_benchmark def run_lstm(batch_size, hidden_size, sentence_length, word_size, timing): - start = time() + timer.start() X = np.random.randn(sentence_length, batch_size, hidden_size) h0 = np.random.randn(1, hidden_size) @@ -63,8 +63,7 @@ def run_lstm(batch_size, hidden_size, sentence_length, word_size, timing): Ct[t] = np.tanh(C[t]) Hout[t] = IFOGf[t, :, 2 * d : 3 * d] * Ct[t] - stop = time() - total = (stop - start) / 1000.0 + total = timer.stop() if timing: print("Elapsed Time: " + str(total) + " ms") return total @@ -97,7 +96,7 @@ def run_lstm(batch_size, hidden_size, sentence_length, word_size, timing): help="perform timing", ) - args, np = parse_args(parser) + args, np, timer = parse_args(parser) run_benchmark( run_lstm, diff --git a/examples/lstm_full.py b/examples/lstm_full.py index 864773739..7bab6c9c7 100644 --- a/examples/lstm_full.py +++ b/examples/lstm_full.py @@ -17,7 +17,7 @@ import argparse -from benchmark import parse_args, run_benchmark, time +from benchmark import parse_args, run_benchmark class Param: @@ -290,7 +290,7 @@ def run_lstm( pointer = 0 - start = time() + timer.start() for iteration in range(max_iters): # Reset @@ -325,8 +325,7 @@ def run_lstm( pointer += T_steps update_status(max_iters, smooth_loss) - stop = time() - total = (stop - start) / 1000.0 + total = timer.stop() if timing: print("Elapsed Time: " + str(total) + " ms") return total @@ -397,7 +396,7 @@ def run_lstm( help="standard deviation of weights for initialization", ) - args, np = parse_args(parser) + args, np, timer = parse_args(parser) run_benchmark( run_lstm, diff --git a/examples/richardson_lucy.py b/examples/richardson_lucy.py index 7e5514280..5ffcdcad8 100644 --- a/examples/richardson_lucy.py +++ b/examples/richardson_lucy.py @@ -15,7 +15,7 @@ import argparse -from benchmark import parse_args, run_benchmark, time +from benchmark import parse_args, run_benchmark float_type = "float32" @@ -28,17 +28,16 @@ def run_richardson_lucy(shape, filter_shape, num_iter, warmup, timing): im_deconv = np.full(image.shape, 0.5, dtype=float_type) psf_mirror = np.flip(psf) - start = time() + timer.start() for idx in range(num_iter + warmup): if idx == warmup: - start = time() + timer.start() conv = np.convolve(im_deconv, psf, mode="same") relative_blur = image / conv im_deconv *= np.convolve(relative_blur, psf_mirror, mode="same") - stop = time() - total = (stop - start) / 1000.0 + total = timer.stop() if timing: print("Elapsed Time: " + str(total) + " ms") @@ -111,7 +110,7 @@ def run_richardson_lucy(shape, filter_shape, num_iter, warmup, timing): help="perform timing", ) - args, np = parse_args(parser) + args, np, timer = parse_args(parser) run_benchmark( run_richardson_lucy, diff --git a/examples/scan.py b/examples/scan.py index 03d315325..d4737e54b 100644 --- a/examples/scan.py +++ b/examples/scan.py @@ -18,7 +18,7 @@ import argparse import numpy as np -from benchmark import parse_args, run_benchmark, time +from benchmark import parse_args, run_benchmark def initialize(shape, dt, axis): @@ -74,13 +74,12 @@ def run_scan(OP, shape, dt, ax, check): print(f"Axis: axis={ax}") print(f"Data type: dtype={dt}32") A, B = initialize(shape=shape, dt=dt, axis=ax) - start = time() + timer.start() # op handling getattr(num, OP)(A, out=B, axis=ax) - stop = time() - total = (stop - start) / 1000.0 + total = timer.stop() print(f"Elapsed Time: {total}ms") # error checking if check: @@ -130,7 +129,7 @@ def run_scan(OP, shape, dt, ax, check): help="check the result of the solve", ) - args, num = parse_args(parser) + args, num, timer = parse_args(parser) run_benchmark( run_scan, diff --git a/examples/solve.py b/examples/solve.py index d07642dba..91f92c6dd 100644 --- a/examples/solve.py +++ b/examples/solve.py @@ -17,18 +17,17 @@ import argparse -from benchmark import parse_args, run_benchmark, time +from benchmark import parse_args, run_benchmark def solve(m, n, nrhs, dtype): a = np.random.rand(m, n).astype(dtype=dtype) b = np.random.rand(n, nrhs).astype(dtype=dtype) - start = time() + timer.start() np.linalg.solve(a, b) - stop = time() + total = timer.stop() - total = (stop - start) / 1000.0 print(f"Elapsed Time: {total} ms") @@ -66,7 +65,7 @@ def solve(m, n, nrhs, dtype): dest="dtype", help="data type", ) - args, np = parse_args(parser) + args, np, timer = parse_args(parser) run_benchmark( solve, diff --git a/examples/sort.py b/examples/sort.py index cfcf7590a..5982f91ea 100644 --- a/examples/sort.py +++ b/examples/sort.py @@ -18,7 +18,7 @@ import argparse import numpy as np -from benchmark import parse_args, run_benchmark, time +from benchmark import parse_args, run_benchmark def check_sorted(a, a_sorted, package, axis=-1): @@ -73,19 +73,18 @@ def run_sort( print("UNKNOWN type " + str(newtype)) assert False - start = time() + timer.start() if argsort: a_sorted = num.argsort(a, axis) else: a_sorted = num.sort(a, axis) - stop = time() + total = timer.stop() if perform_check and not argsort: check_sorted(a, a_sorted, package, axis) else: # do we need to synchronize? assert True - total = (stop - start) * 1e-3 if timing: print("Elapsed Time: " + str(total) + " ms") return total @@ -155,7 +154,7 @@ def run_sort( help="use argsort", ) - args, num = parse_args(parser) + args, num, timer = parse_args(parser) run_benchmark( run_sort, diff --git a/examples/stencil.py b/examples/stencil.py index 3eae3c0b1..c0d33c90b 100644 --- a/examples/stencil.py +++ b/examples/stencil.py @@ -17,7 +17,7 @@ import argparse -from benchmark import parse_args, run_benchmark, time +from benchmark import parse_args, run_benchmark def initialize(N): @@ -40,16 +40,15 @@ def run_stencil(N, I, warmup, timing): # noqa: E741 west = grid[1:-1, 0:-2] south = grid[2:, 1:-1] - start = time() + timer.start() for i in range(I + warmup): if i == warmup: - start = time() + timer.start() average = center + north + east + west + south work = 0.2 * average center[:] = work - stop = time() + total = timer.stop() - total = (stop - start) / 1000.0 if timing: print(f"Elapsed Time: {total} ms") return total @@ -89,7 +88,7 @@ def run_stencil(N, I, warmup, timing): # noqa: E741 help="perform timing", ) - args, np = parse_args(parser) + args, np, timer = parse_args(parser) run_benchmark( run_stencil, From 56d35204916f26e2672af38a6e01392af677c080 Mon Sep 17 00:00:00 2001 From: robinw0928 <104830875+robinw0928@users.noreply.github.com> Date: Tue, 10 Jan 2023 09:06:33 +0800 Subject: [PATCH 76/89] Enhance test_inner.py and test_tensordot.py (#748) --- tests/integration/test_dot.py | 4 +-- tests/integration/test_inner.py | 27 ++++++++++++++++++ tests/integration/test_tensordot.py | 44 +++++++++++++++++++++++++++++ 3 files changed, 73 insertions(+), 2 deletions(-) diff --git a/tests/integration/test_dot.py b/tests/integration/test_dot.py index d0157cbf1..40769c354 100644 --- a/tests/integration/test_dot.py +++ b/tests/integration/test_dot.py @@ -67,9 +67,9 @@ def test_out_invalid_dtype(self, dtype): # In cuNumeric, # for np.float32, it pass # for np.int64, it raises TypeError: Unsupported type: int64 - out = np.zeros((5, 2), dtype=dtype) + out = num.zeros((5, 2), dtype=dtype) with pytest.raises(ValueError): - np.dot(self.A, self.B, out=out) + num.dot(self.A, self.B, out=out) if __name__ == "__main__": diff --git a/tests/integration/test_inner.py b/tests/integration/test_inner.py index 03259ccae..3bc5f761d 100644 --- a/tests/integration/test_inner.py +++ b/tests/integration/test_inner.py @@ -15,7 +15,9 @@ import pytest from legate.core import LEGATE_MAX_DIM from utils.contractions import check_default +from utils.generators import mk_0to1_array +import cunumeric as num from cunumeric.utils import inner_modes @@ -31,6 +33,31 @@ def operation(lib, *args, **kwargs): check_default(name, modes, operation) +class TestInnerErrors: + def setup_method(self): + self.A = mk_0to1_array(num, (5, 3)) + self.B = mk_0to1_array(num, (2, 3)) + + @pytest.mark.parametrize( + "shapeA", + ((3,), (4, 3), (5, 4, 3)), + ids=lambda shapeA: f"(shapeA={shapeA})", + ) + def test_a_b_invalid_shape(self, shapeA): + A = mk_0to1_array(num, shapeA) + B = mk_0to1_array(num, (3, 2)) + with pytest.raises(ValueError): + num.inner(A, B) + + @pytest.mark.parametrize( + "shape", ((5,), (2,), (5, 3)), ids=lambda shape: f"(shape={shape})" + ) + def test_out_invalid_shape(self, shape): + out = num.zeros(shape) + with pytest.raises(ValueError): + num.inner(self.A, self.B, out=out) + + if __name__ == "__main__": import sys diff --git a/tests/integration/test_tensordot.py b/tests/integration/test_tensordot.py index 3bea9f522..9f8c902ab 100644 --- a/tests/integration/test_tensordot.py +++ b/tests/integration/test_tensordot.py @@ -15,7 +15,9 @@ import pytest from legate.core import LEGATE_MAX_DIM from utils.contractions import check_default +from utils.generators import mk_0to1_array +import cunumeric as num from cunumeric.utils import tensordot_modes @@ -39,6 +41,48 @@ def operation(lib, *args, **kwargs): check_default(name, modes, operation) +class TestTensorDotErrors: + def setup_method(self): + self.A = mk_0to1_array(num, (2, 3, 4)) + self.B = mk_0to1_array(num, (3, 2, 4)) + + @pytest.mark.parametrize( + "axis", + ( + 1, + 2, + [], + [0], + [0, 0], + ([0, 1], [0, 1]), + ([0, 1], [1, 0], [0, 1]), + ([0, 0], [0, 0]), + ), + ids=lambda axis: f"(axis={axis})", + ) + def test_axis_invalid_value(self, axis): + with pytest.raises(ValueError): + num.tensordot(self.A, self.B, axis) + + @pytest.mark.xfail + @pytest.mark.parametrize( + "axis", (4, ([0, 3], [1, 3])), ids=lambda axis: f"(axis={axis})" + ) + def test_axis_invalid_index(self, axis): + # In Numpy, for both cases, it raises IndexError + # In cuNumeric, for both cases, it raises ValueError + with pytest.raises(IndexError): + num.tensordot(self.A, self.B, axis) + + @pytest.mark.parametrize( + "shape", ((4,), (4, 3)), ids=lambda shape: f"(shape={shape})" + ) + def test_out_invalid_shape(self, shape): + out = num.zeros(shape) + with pytest.raises(ValueError): + num.tensordot(self.A, self.B, out=out) + + if __name__ == "__main__": import sys From 330175735d0fedf831961a06cdfc53eb10ff46c7 Mon Sep 17 00:00:00 2001 From: xialu00 <110973296+xialu00@users.noreply.github.com> Date: Tue, 10 Jan 2023 14:31:02 +0800 Subject: [PATCH 77/89] Testcase enhance test_bits.py and test_contains.py (#742) * add negative test case for test_convolve.py * add test case for test_astype.py * add test case for test_astype.py * fix bug * enhance test_bincount.py * enhance test_bincount.py * enhance test_cholesky.py * enhance test_reduction.py * enhance test_reduction.py * enhance test_reduction.py * enhance test_reduction.py * enhance test_reduction.py * enhance test_prod.py * fix bug tests/integration/test_prod.py * enhance test_bits.py and test_contains.py * fix bugs --- tests/integration/test_bits.py | 176 ++++++++++++++++++++++++----- tests/integration/test_contains.py | 48 ++++++-- 2 files changed, 186 insertions(+), 38 deletions(-) diff --git a/tests/integration/test_bits.py b/tests/integration/test_bits.py index 825c2afd7..08437ea25 100644 --- a/tests/integration/test_bits.py +++ b/tests/integration/test_bits.py @@ -21,18 +21,54 @@ import cunumeric as num -@pytest.mark.parametrize("ndim", range(1, LEGATE_MAX_DIM + 1)) -@pytest.mark.parametrize("dtype", ("B", "i", "?")) -@pytest.mark.parametrize("bitorder", ("little", "big")) -def test_packbits(ndim, dtype, bitorder): - in_np = np.array([], dtype=dtype) - in_num = num.array([], dtype=dtype) - out_np = np.packbits(in_np, bitorder=bitorder) - out_num = num.packbits(in_num, bitorder=bitorder) - assert np.array_equal(out_np, out_num) - - for extent in (3, 5, 8, 16): - shape = (extent,) * ndim +class TestPackbits(object): + def test_none_arr(self): + # Numpy raises "TypeError: + # Expected an input array of integer or boolean data type" + # For cuNumeric raises: + # > if a.dtype.kind not in ("u", "i", "b"): + # E AttributeError: 'NoneType' object has no attribute 'dtype' + with pytest.raises(AttributeError): + num.packbits(None) + + def test_dtype(self): + shape = (3, 3) + in_num = num.random.random(size=shape) + # TypeError: Expected an input array of integer or boolean data type + with pytest.raises(TypeError): + num.packbits(in_num) + + def test_axis_outbound(self): + shape = (3, 3) + in_num = num.random.randint(low=0, high=2, size=shape) + with pytest.raises(ValueError): + num.packbits(in_num, axis=2) + + @pytest.mark.parametrize("bitorder", (1, True, "True", "BIG", "LITTLE")) + def test_bitorder_negative(self, bitorder): + shape = (3, 3) + in_num = num.random.randint(low=0, high=2, size=shape, dtype="i") + # when bitorder is 1 or True, Numpy raises + # "TypeError: pack() argument 3 must be str". + # while cuNumeric raises valueError. + with pytest.raises(ValueError): + num.packbits(in_num, bitorder=bitorder) + + @pytest.mark.parametrize("arr", ([], [[]])) + @pytest.mark.parametrize("dtype", ("B", "i", "?")) + @pytest.mark.parametrize("bitorder", ("little", "big")) + def test_arr(self, arr, dtype, bitorder): + in_np = np.array(arr, dtype=dtype) + in_num = num.array(arr, dtype=dtype) + out_np = np.packbits(in_np, bitorder=bitorder) + out_num = num.packbits(in_num, bitorder=bitorder) + assert np.array_equal(out_np, out_num) + + @pytest.mark.parametrize("ndim", range(1, LEGATE_MAX_DIM + 1)) + @pytest.mark.parametrize("dtype", ("B", "i", "?")) + @pytest.mark.parametrize("bitorder", ("little", "big")) + def test_common(self, ndim, dtype, bitorder): + shape = (3,) * ndim in_np = np.random.randint(low=0, high=2, size=shape, dtype=dtype) in_num = num.array(in_np) @@ -40,48 +76,130 @@ def test_packbits(ndim, dtype, bitorder): out_num = num.packbits(in_num, bitorder=bitorder) assert np.array_equal(out_np, out_num) - for axis in range(ndim): + @pytest.mark.parametrize("ndim", range(1, LEGATE_MAX_DIM + 1)) + @pytest.mark.parametrize("dtype", ("B", "i", "?")) + @pytest.mark.parametrize("bitorder", ("little", "big")) + def test_axis(self, ndim, dtype, bitorder): + shape = (5,) * ndim + in_np = np.random.randint(low=0, high=2, size=shape, dtype=dtype) + in_num = num.array(in_np) + + for axis in range(-ndim + 1, ndim): out_np = np.packbits(in_np, axis=axis, bitorder=bitorder) out_num = num.packbits(in_num, axis=axis, bitorder=bitorder) assert np.array_equal(out_np, out_num) -@pytest.mark.parametrize("ndim", range(1, LEGATE_MAX_DIM + 1)) -@pytest.mark.parametrize("bitorder", ("little", "big")) -def test_unpackbits(ndim, bitorder): - in_np = np.array([], dtype="B") - in_num = num.array([], dtype="B") - out_np = np.unpackbits(in_np, bitorder=bitorder) - out_num = num.unpackbits(in_num, bitorder=bitorder) - assert np.array_equal(out_np, out_num) - - for extent in (3, 5, 8, 16): - shape = (extent,) * ndim +class TestUnpackbits(object): + def test_none_arr(self): + # Numpy raises "TypeError: + # TypeError: Expected an input array of unsigned byte data type + # For cuNumeric raises: + # > if a.dtype != "B": + # E AttributeError: 'NoneType' object has no attribute 'dtype' + with pytest.raises(AttributeError): + num.unpackbits(None) + + def test_dtype(self): + shape = (3, 3) + in_num = num.random.random(size=shape) + # TypeError: Expected an input array of unsigned byte data type + with pytest.raises(TypeError): + num.unpackbits(in_num) + + def test_axis_outbound(self): + shape = (3, 3) in_np = np.random.randint(low=0, high=255, size=shape, dtype="B") in_num = num.array(in_np) + with pytest.raises(ValueError): + num.unpackbits(in_num, axis=2) + @pytest.mark.parametrize("bitorder", (1, True, "True", "BIG", "LITTLE")) + def test_bitorder_negative(self, bitorder): + shape = (3, 3) + in_np = np.random.randint(low=0, high=255, size=shape, dtype="B") + in_num = num.array(in_np) + # when bitorder is 1 or True, Numpy raises + # "TypeError: unpack() argument 4 must be str". + # while cuNumeric raises valueError. + with pytest.raises(ValueError): + num.unpackbits(in_num, bitorder=bitorder) + + @pytest.mark.parametrize("arr", ([], [[]])) + @pytest.mark.parametrize("bitorder", ("little", "big")) + def test_arr(self, arr, bitorder): + in_np = np.array(arr, dtype="B") + in_num = num.array(arr, dtype="B") out_np = np.unpackbits(in_np, bitorder=bitorder) out_num = num.unpackbits(in_num, bitorder=bitorder) assert np.array_equal(out_np, out_num) - out_np = np.unpackbits(in_np, count=extent // 2, bitorder=bitorder) - out_num = num.unpackbits(in_num, count=extent // 2, bitorder=bitorder) + @pytest.mark.parametrize("ndim", range(1, LEGATE_MAX_DIM + 1)) + @pytest.mark.parametrize("bitorder", ("little", "big")) + def test_common(self, ndim, bitorder): + shape = (5,) * ndim + in_np = np.random.randint(low=0, high=255, size=shape, dtype="B") + in_num = num.array(in_np) + + out_np = np.unpackbits(in_np, bitorder=bitorder) + out_num = num.unpackbits(in_num, bitorder=bitorder) assert np.array_equal(out_np, out_num) - for axis in range(ndim): + @pytest.mark.parametrize("count", (-9, 4, -1, 0, 4, 8, 9)) + @pytest.mark.parametrize("ndim", range(1, LEGATE_MAX_DIM + 1)) + @pytest.mark.parametrize("bitorder", ("little", "big")) + def test_count(self, ndim, count, bitorder): + shape = (5,) * ndim + in_np = np.random.randint(low=0, high=255, size=shape, dtype="B") + in_num = num.array(in_np) + + out_np = np.unpackbits(in_np, count=count, bitorder=bitorder) + out_num = num.unpackbits(in_num, count=count, bitorder=bitorder) + assert np.array_equal(out_np, out_num) + + @pytest.mark.parametrize("ndim", range(1, LEGATE_MAX_DIM + 1)) + @pytest.mark.parametrize("bitorder", ("little", "big")) + def test_axis(self, ndim, bitorder): + shape = (5,) * ndim + in_np = np.random.randint(low=0, high=255, size=shape, dtype="B") + in_num = num.array(in_np) + + for axis in range(-ndim + 1, ndim): out_np = np.unpackbits(in_np, axis=axis, bitorder=bitorder) out_num = num.unpackbits(in_num, axis=axis, bitorder=bitorder) assert np.array_equal(out_np, out_num) + @pytest.mark.parametrize("ndim", range(1, LEGATE_MAX_DIM + 1)) + @pytest.mark.parametrize("bitorder", ("little", "big")) + @pytest.mark.parametrize("count", (-2, 0, 2, 5)) + def test_axis_count(self, ndim, bitorder, count): + shape = (5,) * ndim + in_np = np.random.randint(low=0, high=255, size=shape, dtype="B") + in_num = num.array(in_np) + + for axis in range(-ndim + 1, ndim): out_np = np.unpackbits( - in_np, count=extent // 2, axis=axis, bitorder=bitorder + in_np, count=count, axis=axis, bitorder=bitorder ) out_num = num.unpackbits( - in_num, count=extent // 2, axis=axis, bitorder=bitorder + in_num, count=count, axis=axis, bitorder=bitorder ) assert np.array_equal(out_np, out_num) +@pytest.mark.parametrize("ndim", range(1, LEGATE_MAX_DIM + 1)) +@pytest.mark.parametrize("bitorder", ("little", "big")) +@pytest.mark.parametrize("dtype", ("B", "i", "?")) +def test_pack_unpack(ndim, bitorder, dtype): + shape = (8,) * ndim + in_np = np.random.randint(low=0, high=2, size=shape, dtype=dtype) + in_num = num.array(in_np) + for axis in range(ndim): + out_b = num.packbits(in_num, axis=axis) + out_p = num.unpackbits(out_b, count=in_num.shape[0], axis=axis) + assert np.array_equal(in_num, out_p) + + if __name__ == "__main__": import sys diff --git a/tests/integration/test_contains.py b/tests/integration/test_contains.py index c97716c3a..23811ba74 100644 --- a/tests/integration/test_contains.py +++ b/tests/integration/test_contains.py @@ -12,19 +12,49 @@ # See the License for the specific language governing permissions and # limitations under the License. # -import pytest - -import cunumeric as num +from functools import reduce -def test_True(): - x = num.array([1, 2, 3, 4, 5]) - assert 4 in x +import pytest +from utils.generators import mk_seq_array +import cunumeric as num -def test_False(): - x = num.array([1, 2, 3, 4, 5]) - assert 6 not in x +DIM = 128 +NO_EMPTY_SIZES = [ + (DIM,), + (1, DIM), + (DIM, 1), + (DIM, DIM), + (DIM, 1, 1), + (1, DIM, 1), + (1, 1, DIM), + (DIM, DIM, DIM), +] + + +@pytest.mark.parametrize("size", NO_EMPTY_SIZES) +def test_int(size): + arr = mk_seq_array(num, shape=size) + max_data = reduce(lambda x, y: x * y, size) + assert -1 not in arr + assert 0 not in arr + assert 1 in arr + assert max_data // 2 in arr + assert max_data in arr + assert max_data + 1 not in arr + + +@pytest.mark.parametrize("size", NO_EMPTY_SIZES) +def test_complex(size): + arr = mk_seq_array(num, shape=size) + mk_seq_array(num, shape=size) * 1.0j + max_data = reduce(lambda x, y: x * y, size) + assert -1 not in arr + assert 0 not in arr + assert 1 + 1.0j in arr + assert (max_data // 2) + (max_data // 2) * 1.0j in arr + assert max_data + max_data * 1.0j in arr + assert (max_data + 1) + (max_data + 1) * 1.0j not in arr if __name__ == "__main__": From c9d44c9f5970a855f384934227877b31929bcff3 Mon Sep 17 00:00:00 2001 From: Bryan Van de Ven Date: Thu, 12 Jan 2023 14:31:47 -0800 Subject: [PATCH 78/89] fix assertion in config test (#749) --- tests/unit/cunumeric/test_config.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/unit/cunumeric/test_config.py b/tests/unit/cunumeric/test_config.py index a3cbd1529..98171b811 100644 --- a/tests/unit/cunumeric/test_config.py +++ b/tests/unit/cunumeric/test_config.py @@ -113,7 +113,7 @@ def test_destroy(self, mock_destroy) -> None: lib.initialize(_FakeSO) lib.set_runtime(runtime) lib.destroy() - assert mock_destroy.called_once_with() + mock_destroy.assert_called_once_with() def test_CUNUMERIC_LIB_NAME() -> None: From be6767e2535dcc50fe35b31094b7be02503678d7 Mon Sep 17 00:00:00 2001 From: Irina Demeshko Date: Tue, 17 Jan 2023 14:42:57 -0800 Subject: [PATCH 79/89] adding new version for documentations (#751) --- docs/cunumeric/source/versions.rst | 2 ++ 1 file changed, 2 insertions(+) diff --git a/docs/cunumeric/source/versions.rst b/docs/cunumeric/source/versions.rst index c7c1e0ca6..ef6b7a83d 100644 --- a/docs/cunumeric/source/versions.rst +++ b/docs/cunumeric/source/versions.rst @@ -10,3 +10,5 @@ Versions 22.05 22.08 22.10 + 23.01 + From 21879b986bc3e0da2d36175823a983cb1b39c3d4 Mon Sep 17 00:00:00 2001 From: Bryan Van de Ven Date: Wed, 18 Jan 2023 10:21:40 -0800 Subject: [PATCH 80/89] update legate arg processing apis --- cunumeric/runtime.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cunumeric/runtime.py b/cunumeric/runtime.py index 26d8ab207..603a69a47 100644 --- a/cunumeric/runtime.py +++ b/cunumeric/runtime.py @@ -23,7 +23,7 @@ import numpy as np from legate.core import LEGATE_MAX_DIM, Rect, get_legate_runtime, legion from legate.core.context import Context as LegateContext -from legate.rc import ArgSpec, Argument, parse_command_args +from legate.util.args import ArgSpec, Argument, parse_library_command_args from typing_extensions import TypeGuard from .config import ( @@ -149,7 +149,7 @@ def __init__(self, legate_context: LegateContext) -> None: self.has_curand = cunumeric_lib.shared_object.cunumeric_has_curand() self._register_dtypes() - self.args = parse_command_args("cunumeric", ARGS) + self.args = parse_library_command_args("cunumeric", ARGS) self.args.warning = self.args.warning or self.args.test_mode if self.num_gpus > 0 and self.args.preload_cudalibs: From 7e3afb90f1310a0c5d595b8f0f2ba436c7259e0b Mon Sep 17 00:00:00 2001 From: Manolis Papadakis Date: Wed, 18 Jan 2023 15:48:58 -0800 Subject: [PATCH 81/89] Don't turn on cuNumeric debug checks on debug-rel builds (#753) --- cunumeric_cpp.cmake | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cunumeric_cpp.cmake b/cunumeric_cpp.cmake index 9ab2741b3..7034bb600 100644 --- a/cunumeric_cpp.cmake +++ b/cunumeric_cpp.cmake @@ -331,7 +331,7 @@ list(APPEND cunumeric_SOURCES src/cunumeric/cunumeric.cc ) -if(NOT CMAKE_BUILD_TYPE STREQUAL "Release") +if(CMAKE_BUILD_TYPE STREQUAL "Debug") list(APPEND cunumeric_CXX_DEFS DEBUG_CUNUMERIC) list(APPEND cunumeric_CUDA_DEFS DEBUG_CUNUMERIC) endif() From a45f6801f23c0f05b5644698cc84ac9dcca39c4b Mon Sep 17 00:00:00 2001 From: Paul Taylor Date: Fri, 20 Jan 2023 16:54:57 -0800 Subject: [PATCH 82/89] Pass `CMAKE_GENERATOR` to scikit-build (#750) * pass cmake_generator to skbuild as envvar so it overrides skbuild's generator detection * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * change variable name Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> --- install.py | 21 +++++++++++++-------- 1 file changed, 13 insertions(+), 8 deletions(-) diff --git a/install.py b/install.py index 8bed64992..c54c1968a 100755 --- a/install.py +++ b/install.py @@ -306,12 +306,6 @@ def validate_path(path): # Also use preexisting CMAKE_ARGS from conda if set cmake_flags = cmd_env.get("CMAKE_ARGS", "").split(" ") - if cmake_generator: - if " " not in cmake_generator: - cmake_flags += [f"-G{cmake_generator}"] - else: - cmake_flags += [f"-G'{cmake_generator}'"] - if debug or verbose: cmake_flags += ["--log-level=%s" % ("DEBUG" if debug else "VERBOSE")] @@ -356,10 +350,18 @@ def validate_path(path): cmake_flags += ["-Dlegate_core_ROOT=%s" % legate_dir] cmake_flags += extra_flags + build_flags = [f"-j{str(thread_count)}"] + if verbose: + if cmake_generator == "Unix Makefiles": + build_flags += ["VERBOSE=1"] + else: + build_flags += ["--verbose"] + cmd_env.update( { - "SKBUILD_BUILD_OPTIONS": f"-j{str(thread_count)}", "CMAKE_ARGS": " ".join(cmake_flags), + "CMAKE_GENERATOR": cmake_generator, + "SKBUILD_BUILD_OPTIONS": " ".join(build_flags), } ) @@ -488,7 +490,10 @@ def driver(): "--cmake-generator", dest="cmake_generator", required=False, - default=(None if shutil.which("ninja") is None else "Ninja"), + default=os.environ.get( + "CMAKE_GENERATOR", + "Unix Makefiles" if shutil.which("ninja") is None else "Ninja", + ), choices=["Ninja", "Unix Makefiles", None], help="The CMake makefiles generator", ) From 35ebbb741838c34f0f0cb9d96ad0077cbb424d14 Mon Sep 17 00:00:00 2001 From: Paul Taylor Date: Mon, 23 Jan 2023 14:26:08 -0800 Subject: [PATCH 83/89] Move `pip uninstall` step before CMake is run instead of after. (#760) * fix issue of pip removing C++ libs from previous installations * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * skip uninstall if editable Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> --- install.py | 31 ++++++++++++++++++++++++++++--- 1 file changed, 28 insertions(+), 3 deletions(-) diff --git a/install.py b/install.py index c54c1968a..ce4544edd 100755 --- a/install.py +++ b/install.py @@ -76,10 +76,13 @@ def __call__(self, parser, namespace, values, option_string): setattr(namespace, self.dest, not option_string.startswith("--no")) -def execute_command(args, verbose, **kwargs): +def execute_command(args, verbose, ignore_errors=False, **kwargs): if verbose: print('Executing: "', " ".join(args), '" with ', kwargs) - subprocess.check_call(args, **kwargs) + if ignore_errors: + subprocess.call(args, **kwargs) + else: + subprocess.check_call(args, **kwargs) def scikit_build_cmake_build_dir(skbuild_dir): @@ -254,6 +257,29 @@ def validate_path(path): print("Performing a clean build to accommodate build isolation.") clean_first = True + cmd_env = dict(os.environ.items()) + + # Explicitly uninstall cunumeric if doing a clean/isolated build. + # + # A prior installation may have built and installed cunumeric C++ + # dependencies (like BLAS or tblis). + # + # CMake will find and use them for the current build, which would normally + # be correct, but pip uninstalls files from any existing installation as + # the last step of the install process, including the libraries found by + # CMake during the current build. + # + # Therefore this uninstall step must occur *before* CMake attempts to find + # these dependencies, triggering CMake to build and install them again. + if clean_first or (build_isolation and not editable): + execute_command( + [sys.executable, "-m", "pip", "uninstall", "-y", "cunumeric"], + verbose, + ignore_errors=True, + cwd=cunumeric_dir, + env=cmd_env, + ) + if clean_first: shutil.rmtree(skbuild_dir, ignore_errors=True) shutil.rmtree(join(cunumeric_dir, "dist"), ignore_errors=True) @@ -265,7 +291,6 @@ def validate_path(path): # Configure and build cuNumeric via setup.py pip_install_cmd = [sys.executable, "-m", "pip", "install"] - cmd_env = dict(os.environ.items()) install_dir = None From 5f17dd6af24441484f33564c09a1c78892b94a89 Mon Sep 17 00:00:00 2001 From: Marcin Zalewski Date: Mon, 23 Jan 2023 15:35:34 -0800 Subject: [PATCH 84/89] Change march to haswell when on x86 platforms (#762) Use haswell by default on x86 platforms. --- install.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/install.py b/install.py index ce4544edd..96dc13242 100755 --- a/install.py +++ b/install.py @@ -554,7 +554,7 @@ def driver(): "--march", dest="march", required=False, - default="native", + default=("haswell" if platform.machine() == "x86_64" else "native"), help="Specify the target CPU architecture.", ) parser.add_argument( From 638adbc79d6ad6be9b78d9f2095102ee00926450 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Tue, 24 Jan 2023 09:44:31 -0800 Subject: [PATCH 85/89] [pre-commit.ci] pre-commit autoupdate (#763) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit updates: - [github.com/pre-commit/mirrors-clang-format: v15.0.6 → v15.0.7](https://github.com/pre-commit/mirrors-clang-format/compare/v15.0.6...v15.0.7) Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> --- .pre-commit-config.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index eefd667d3..bc47df8a7 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -19,7 +19,7 @@ repos: hooks: - id: flake8 - repo: https://github.com/pre-commit/mirrors-clang-format - rev: 'v15.0.6' # Use the sha / tag you want to point at + rev: 'v15.0.7' # Use the sha / tag you want to point at hooks: - id: clang-format files: \.(cu|cuh|h|cc|inl)$ From 47d65d99da5ab24e2d99f5b1912c55dfebbf4091 Mon Sep 17 00:00:00 2001 From: Marcin Zalewski Date: Tue, 24 Jan 2023 11:35:02 -0800 Subject: [PATCH 86/89] Force conda version of cutensor (#765) * Force conda version of cutensor * Change cutensor package spec --- conda/conda-build/meta.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/conda/conda-build/meta.yaml b/conda/conda-build/meta.yaml index b1f2de956..cc352ff62 100644 --- a/conda/conda-build/meta.yaml +++ b/conda/conda-build/meta.yaml @@ -125,7 +125,7 @@ requirements: - cuda-cudart-dev ={{ cuda_version }} - cuda-nvtx ={{ cuda_version }} # - libcutensor-dev >=1.3 - - cutensor >=1.3 + - cutensor >=1.3 =*_* - libcublas-dev - libcusolver-dev - libcufft-dev @@ -141,7 +141,7 @@ requirements: - legate-core ={{ core_version }} - cuda-cudart >={{ cuda_version }} # - libcutensor >=1.3 - - cutensor >=1.3 + - cutensor >=1.3 =*_* - libcublas - libcusolver =11.4.1.48-0 - libcufft From f26a05d85ced680edfe66ea24f9f87d643078cb4 Mon Sep 17 00:00:00 2001 From: Bryan Van de Ven Date: Tue, 24 Jan 2023 14:28:43 -0800 Subject: [PATCH 87/89] handle numpy 'builtins' properly for coverage (#766) --- cunumeric/coverage.py | 22 ++++++++++++++++++---- cunumeric/random/__init__.py | 2 +- 2 files changed, 19 insertions(+), 5 deletions(-) diff --git a/cunumeric/coverage.py b/cunumeric/coverage.py index f8f4446ae..3efad0342 100644 --- a/cunumeric/coverage.py +++ b/cunumeric/coverage.py @@ -17,7 +17,13 @@ import warnings from dataclasses import dataclass from functools import wraps -from types import FunctionType, MethodDescriptorType, MethodType, ModuleType +from types import ( + BuiltinFunctionType, + FunctionType, + MethodDescriptorType, + MethodType, + ModuleType, +) from typing import Any, Container, Mapping, Optional, cast import numpy as np @@ -194,7 +200,9 @@ def wrapper(*args: Any, **kwargs: Any) -> Any: def clone_module( - origin_module: ModuleType, new_globals: dict[str, Any] + origin_module: ModuleType, + new_globals: dict[str, Any], + include_builtin_function_type: bool = False, ) -> None: """Copy attributes from one module to another, excluding submodules @@ -230,7 +238,10 @@ def clone_module( # Only need to wrap things that are in the origin module to begin with if attr not in origin_module.__dict__: continue - if isinstance(value, (FunctionType, lgufunc)): + if isinstance(value, (FunctionType, lgufunc)) or ( + include_builtin_function_type + and isinstance(value, BuiltinFunctionType) + ): wrapped = implemented( cast(AnyCallable, value), mod_name, attr, reporting=reporting ) @@ -239,7 +250,10 @@ def clone_module( from numpy import ufunc as npufunc for attr, value in missing.items(): - if isinstance(value, (FunctionType, npufunc)): + if isinstance(value, (FunctionType, npufunc)) or ( + include_builtin_function_type + and isinstance(value, BuiltinFunctionType) + ): wrapped = unimplemented(value, mod_name, attr, reporting=reporting) new_globals[attr] = wrapped else: diff --git a/cunumeric/random/__init__.py b/cunumeric/random/__init__.py index a9730d063..2f8a98460 100644 --- a/cunumeric/random/__init__.py +++ b/cunumeric/random/__init__.py @@ -25,7 +25,7 @@ else: from cunumeric.random.legacy import * -clone_module(_nprandom, globals()) +clone_module(_nprandom, globals(), include_builtin_function_type=True) del clone_module del _nprandom From da3319799e5676d6375db8a6b4dd774fc016a5ac Mon Sep 17 00:00:00 2001 From: Marcin Zalewski Date: Fri, 27 Jan 2023 10:32:30 -0800 Subject: [PATCH 88/89] Update the architectures built in conda package (#770) (#771) Co-authored-by: Marcin Zalewski --- conda/conda-build/build.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/conda/conda-build/build.sh b/conda/conda-build/build.sh index d0df68008..19b1f1f48 100644 --- a/conda/conda-build/build.sh +++ b/conda/conda-build/build.sh @@ -13,7 +13,7 @@ if [ -z "$CPU_ONLY" ]; then # cutensor, relying on the conda cutensor package CMAKE_ARGS+=" -Dcutensor_DIR=$PREFIX --DCMAKE_CUDA_ARCHITECTURES:LIST=60-real;70-real;75-real;80-real;86 +-DCMAKE_CUDA_ARCHITECTURES:LIST=60-real;70-real;75-real;80-real;90 " else # When we build without cuda, we need to provide the location of curand From 1817dc773dcf95f7214416bab33d8aee3fcf2926 Mon Sep 17 00:00:00 2001 From: Marcin Zalewski Date: Mon, 30 Jan 2023 14:33:03 -0800 Subject: [PATCH 89/89] Revert "Update the architectures built in conda package (#770) (#771)" (#772) This reverts commit da3319799e5676d6375db8a6b4dd774fc016a5ac. Co-authored-by: Marcin Zalewski --- conda/conda-build/build.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/conda/conda-build/build.sh b/conda/conda-build/build.sh index 19b1f1f48..d0df68008 100644 --- a/conda/conda-build/build.sh +++ b/conda/conda-build/build.sh @@ -13,7 +13,7 @@ if [ -z "$CPU_ONLY" ]; then # cutensor, relying on the conda cutensor package CMAKE_ARGS+=" -Dcutensor_DIR=$PREFIX --DCMAKE_CUDA_ARCHITECTURES:LIST=60-real;70-real;75-real;80-real;90 +-DCMAKE_CUDA_ARCHITECTURES:LIST=60-real;70-real;75-real;80-real;86 " else # When we build without cuda, we need to provide the location of curand