From 9c76d57279d3c2bbc4103fcab58616ee37637967 Mon Sep 17 00:00:00 2001 From: Bryan Van de Ven Date: Tue, 7 Mar 2023 13:16:42 -0800 Subject: [PATCH 001/106] Update BUILD.md (#831) * Update BUILD.md * Update BUILD.md --- BUILD.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/BUILD.md b/BUILD.md index 6adcc4916..9f4547e00 100644 --- a/BUILD.md +++ b/BUILD.md @@ -77,4 +77,4 @@ The Python source tree and CMake build tree are now available with the environme for running cuNumeric programs. The diagram below illustrates the complete workflow for building both Legate core and cuNumeric. -drawing +drawing From f4ea33ba26fe321f223a2c4c369e5135e82eca03 Mon Sep 17 00:00:00 2001 From: robinw0928 <104830875+robinw0928@users.noreply.github.com> Date: Tue, 14 Mar 2023 11:15:39 +0800 Subject: [PATCH 002/106] Enhance test on concatenate and stack APIs. (#828) * Enhance test on concatenate and stack APIs. * Update module.py to the latest and then modify. * Revert one change in module.py --- cunumeric/module.py | 2 +- tests/integration/test_concatenate_stack.py | 425 +++++++++++++++++++- tests/integration/test_vstack.py | 64 --- 3 files changed, 425 insertions(+), 66 deletions(-) delete mode 100644 tests/integration/test_vstack.py diff --git a/cunumeric/module.py b/cunumeric/module.py index 84dc76924..9e2175d9b 100644 --- a/cunumeric/module.py +++ b/cunumeric/module.py @@ -1806,7 +1806,7 @@ def stack( Multiple GPUs, Multiple CPUs """ if type(axis) is not int: - raise ValueError("The target axis should be an integer") + raise TypeError("The target axis should be an integer") arrays, common_info = check_shape_dtype(arrays, stack.__name__, axis) diff --git a/tests/integration/test_concatenate_stack.py b/tests/integration/test_concatenate_stack.py index dad21b4bd..69dd3ad24 100644 --- a/tests/integration/test_concatenate_stack.py +++ b/tests/integration/test_concatenate_stack.py @@ -28,6 +28,9 @@ def run_test(arr, routine, input_size): input_arr.append([axis for axis in range(arr[0].ndim)]) # test axis == 'None' for concatenate if routine == "concatenate": + # test axis == -1 if ndim > 0 + if arr[0].ndim > 0: + input_arr[-1].append(-1) input_arr[-1].append(None) # 'out' argument input_arr.append([None]) @@ -71,6 +74,9 @@ def run_test(arr, routine, input_size): NUM_ARR = [1, 3] SIZES = [ + # In Numpy, hstack and column_stack PASS + # In cuNumeric, hstack and column_stack raise IndexError + pytest.param((), marks=pytest.mark.xfail), # for scalar. (0,), (0, 10), (1,), @@ -82,7 +88,7 @@ def run_test(arr, routine, input_size): ] -@pytest.fixture(autouse=True) +@pytest.fixture(autouse=False) def a(size, num): return [np.random.randint(low=0, high=100, size=size) for _ in range(num)] @@ -93,24 +99,341 @@ def test_concatenate(size, num, a): run_test(tuple(a), "concatenate", size) +def test_concatenate_with_out(): + a = [[1, 2], [3, 4]] + b = [[5, 6]] + axis = 0 + out_np = np.zeros((3, 2)) + out_num = num.array(out_np) + + np.concatenate((np.array(a), np.array(b)), axis=axis, out=out_np) + num.concatenate((num.array(a), num.array(b)), axis=axis, out=out_num) + assert np.array_equal(out_np, out_num) + + +@pytest.mark.parametrize( + "dtype", (np.float32, np.int32), ids=lambda dtype: f"(dtype={dtype})" +) +def test_concatenate_dtype(dtype): + a = [[1, 2], [3, 4]] + b = [[5, 6]] + axis = 0 + + res_np = np.concatenate((np.array(a), np.array(b)), axis=axis, dtype=dtype) + res_num = num.concatenate( + (num.array(a), num.array(b)), axis=axis, dtype=dtype + ) + assert np.array_equal(res_np, res_num) + + +@pytest.mark.parametrize( + "casting", + ("no", "equiv", "safe", "same_kind", "unsafe"), + ids=lambda casting: f"(casting={casting})", +) +def test_concatenate_casting(casting): + a = [[1, 2], [3, 4]] + b = [[5, 6]] + axis = 0 + + res_np = np.concatenate( + (np.array(a), np.array(b)), axis=axis, casting=casting + ) + res_num = num.concatenate( + (num.array(a), num.array(b)), axis=axis, casting=casting + ) + assert np.array_equal(res_np, res_num) + + +class TestConcatenateErrors: + def test_zero_arrays(self): + expected_exc = ValueError + arrays = () + axis = None + with pytest.raises(expected_exc): + np.concatenate(arrays, axis=axis) + with pytest.raises(expected_exc): + num.concatenate(arrays, axis=axis) + + @pytest.mark.parametrize( + "arrays", + ( + pytest.param((1,), marks=pytest.mark.xfail), + pytest.param((1, 2), marks=pytest.mark.xfail), + (1, [3, 4]), + ), + ids=lambda arrays: f"(arrays={arrays})", + ) + def test_scalar_axis_is_not_none(self, arrays): + # For (1,) and (1, 2), + # In Numpy, it raises ValueError + # In cuNumeric, it raises IndexError + expected_exc = ValueError + axis = 0 + with pytest.raises(expected_exc): + np.concatenate(arrays, axis=axis) + with pytest.raises(expected_exc): + num.concatenate(arrays, axis=axis) + + @pytest.mark.parametrize( + "arrays", + ( + ([[1, 2], [3, 4]], [5, 6]), + ([[1, 2], [3, 4], [5, 6]], [[7, 8], [9, 10]]), + pytest.param( + ([[1, 2], [3, 4]], [[5, 6]]), marks=pytest.mark.xfail + ), + ), + ids=lambda arrays: f"(arrays={arrays})", + ) + def test_arrays_mismatched_shape(self, arrays): + # for ([[1, 2], [3, 4]], [[5, 6]]), + # In Numpy, it raises ValueError + # In cuNumeric, it pass + expected_exc = ValueError + axis = 1 + with pytest.raises(expected_exc): + np.concatenate(arrays, axis=axis) + with pytest.raises(expected_exc): + num.concatenate(arrays, axis=axis) + + @pytest.mark.xfail + @pytest.mark.parametrize( + "axis", + (1, -2), + ids=lambda axis: f"(axis={axis})", + ) + def test_axis_out_of_bound(self, axis): + # For axis=-2 or 1, + # In Numpy, it raises ValueError + # In cuNumeric, it raises IndexError + expected_exc = ValueError + a = [1, 2] + b = [5, 6] + with pytest.raises(expected_exc): + np.concatenate((np.array(a), np.array(b)), axis=axis) + with pytest.raises(expected_exc): + num.concatenate((num.array(a), num.array(b)), axis=axis) + + @pytest.mark.xfail + def test_both_out_dtype_are_provided(self): + # In Numpy, it raises TypeError + # In cuNumeric, it pass + expected_exc = TypeError + a = [[1, 2], [3, 4]] + b = [[5, 6]] + axis = 0 + out_np = np.zeros((3, 2)) + out_num = num.array(out_np) + dtype = np.float32 + + with pytest.raises(expected_exc): + np.concatenate( + (np.array(a), np.array(b)), axis=axis, out=out_np, dtype=dtype + ) + with pytest.raises(expected_exc): + num.concatenate( + (num.array(a), num.array(b)), + axis=axis, + out=out_num, + dtype=dtype, + ) + + @pytest.mark.xfail + def test_invalid_casting(self): + # In Numpy, raise ValueError + # In cuNumeric, pass + expected_exc = ValueError + a = [[1, 2], [3, 4]] + b = [[5, 6]] + axis = 0 + casting = "unknown" + with pytest.raises(expected_exc): + np.concatenate( + (np.array(a), np.array(b)), axis=axis, casting=casting + ) + with pytest.raises(expected_exc): + num.concatenate( + (num.array(a), num.array(b)), axis=axis, casting=casting + ) + + @pytest.mark.parametrize("num", NUM_ARR, ids=str) @pytest.mark.parametrize("size", SIZES, ids=str) def test_stack(size, num, a): run_test(tuple(a), "stack", size) +def test_stack_with_out(): + a = [1, 2] + b = [3, 4] + axis = 0 + out_np = np.zeros((2, 2)) + out_num = num.array(out_np) + + np.stack((np.array(a), np.array(b)), axis=axis, out=out_np) + num.stack((num.array(a), num.array(b)), axis=axis, out=out_num) + assert np.array_equal(out_np, out_num) + + +@pytest.mark.parametrize( + "axis", + (-3, pytest.param(-1, marks=pytest.mark.xfail)), + ids=lambda axis: f"(axis={axis})", +) +def test_stack_axis_is_negative(axis): + # for -1, the output by Numpy and cuNumeric is not equal + a = [[1, 2], [3, 4]] + b = [[5, 6], [7, 8]] + res_np = np.stack((np.array(a), np.array(b)), axis=axis) + res_num = num.stack((num.array(a), num.array(b)), axis=axis) + assert np.array_equal(res_np, res_num) + + +class TestStackErrors: + def test_zero_arrays(self): + expected_exc = ValueError + arrays = () + with pytest.raises(expected_exc): + np.stack(arrays) + with pytest.raises(expected_exc): + num.stack(arrays) + + @pytest.mark.parametrize( + "arrays", + ( + (1, []), + ([1, 2], [3]), + ([[1, 2], [3, 4]], [[5, 6, 7], [8, 9, 10]]), + ([[1, 2], [3, 4]], [5, 6]), + ), + ids=lambda arrays: f"(arrays={arrays})", + ) + def test_arrays_mismatched_shape(self, arrays): + expected_exc = ValueError + with pytest.raises(expected_exc): + np.stack(arrays) + with pytest.raises(expected_exc): + num.stack(arrays) + + def test_axis_is_none(self): + expected_exc = TypeError + a = [1, 2] + b = [5, 6] + axis = None + with pytest.raises(expected_exc): + np.stack((np.array(a), np.array(b)), axis=axis) + with pytest.raises(expected_exc): + num.stack((num.array(a), num.array(b)), axis=axis) + + @pytest.mark.parametrize( + "axis", + (2, pytest.param(-3, marks=pytest.mark.xfail)), + ids=lambda axis: f"(axis={axis})", + ) + def test_axis_out_of_bound(self, axis): + # For axis=-3, + # In Numpy, it raises ValueError + # In cuNumeric, it raises IndexError + expected_exc = ValueError + a = [1, 2] + b = [5, 6] + with pytest.raises(expected_exc): + np.stack((np.array(a), np.array(b)), axis=axis) + with pytest.raises(expected_exc): + num.stack((num.array(a), num.array(b)), axis=axis) + + @pytest.mark.parametrize( + "out_shape", + ((2,), (1, 2), (1, 2, 2)), + ids=lambda out_shape: f"(out_shape={out_shape})", + ) + def test_out_invalid_shape(self, out_shape): + expected_exc = ValueError + a = [1, 2] + b = [3, 4] + axis = 0 + out_np = np.zeros(out_shape) + out_num = num.array(out_np) + + with pytest.raises(expected_exc): + np.stack((np.array(a), np.array(b)), axis=axis, out=out_np) + with pytest.raises(expected_exc): + num.stack((num.array(a), num.array(b)), axis=axis, out=out_num) + + @pytest.mark.parametrize("num", NUM_ARR, ids=str) @pytest.mark.parametrize("size", SIZES, ids=str) def test_hstack(size, num, a): run_test(tuple(a), "hstack", size) +class TestHStackErrors: + def test_zero_arrays(self): + expected_exc = ValueError + arrays = () + with pytest.raises(expected_exc): + np.hstack(arrays) + with pytest.raises(expected_exc): + num.hstack(arrays) + + @pytest.mark.parametrize( + "arrays", + ( + ([[1, 2], [3, 4]], [5, 6]), + pytest.param( + ([[1, 2], [3, 4]], [[5, 6]]), marks=pytest.mark.xfail + ), + ), + ids=lambda arrays: f"(arrays={arrays})", + ) + def test_arrays_mismatched_shape(self, arrays): + # for ([[1, 2], [3, 4]], [[5, 6]]) + # In Numpy, it raises ValueError + # In cuNumeric, it pass + expected_exc = ValueError + with pytest.raises(expected_exc): + np.hstack(arrays) + with pytest.raises(expected_exc): + num.hstack(arrays) + + @pytest.mark.parametrize("num", NUM_ARR, ids=str) @pytest.mark.parametrize("size", SIZES, ids=str) def test_column_stack(size, num, a): run_test(tuple(a), "column_stack", size) +class TestColumnStackErrors: + def test_zero_arrays(self): + expected_exc = ValueError + arrays = () + with pytest.raises(expected_exc): + np.column_stack(arrays) + with pytest.raises(expected_exc): + num.column_stack(arrays) + + @pytest.mark.parametrize( + "arrays", + ( + (1, []), + pytest.param(([1, 2], [3]), marks=pytest.mark.xfail), + ([[1, 2]], [3, 4]), + ([[1, 2]], [[3], [4]]), + ), + ids=lambda arrays: f"(arrays={arrays})", + ) + def test_arrays_mismatched_shape(self, arrays): + # for ([1, 2], [3]), + # In Numpy, it raises ValueError + # In cuNumeric, it pass + expected_exc = ValueError + with pytest.raises(expected_exc): + np.column_stack(arrays) + with pytest.raises(expected_exc): + num.column_stack(arrays) + + @pytest.mark.parametrize("num", NUM_ARR, ids=str) @pytest.mark.parametrize("size", SIZES, ids=str) def test_vstack(size, num, a): @@ -120,6 +443,75 @@ def test_vstack(size, num, a): run_test(tuple(a), "vstack", size) +class TestVStackErrors: + def test_zero_arrays(self): + expected_exc = ValueError + arrays = () + with pytest.raises(expected_exc): + np.vstack(arrays) + with pytest.raises(expected_exc): + num.vstack(arrays) + + @pytest.mark.parametrize( + "arrays", + ( + pytest.param((1, []), marks=pytest.mark.xfail), + pytest.param(([1, 2], [3]), marks=pytest.mark.xfail), + pytest.param(([[1, 2], [3, 4]], [5]), marks=pytest.mark.xfail), + ([[[1, 2], [3, 4]]], [5, 6]), + ), + ids=lambda arrays: f"(arrays={arrays})", + ) + def test_arrays_mismatched_shape(self, arrays): + # for (1, []), ([1, 2], [3]), ([[1, 2], [3, 4]], [5]) + # In Numpy, it raises ValueError + # In cuNumeric, it pass + expected_exc = ValueError + with pytest.raises(expected_exc): + np.vstack(arrays) + with pytest.raises(expected_exc): + num.vstack(arrays) + + +@pytest.mark.parametrize("num", NUM_ARR, ids=str) +@pytest.mark.parametrize("size", SIZES, ids=str) +def test_rowstack(size, num, a): + # exception for 1d array on rowstack + if len(size) == 2 and size == (1, DIM): + a.append(np.random.randint(low=0, high=100, size=(DIM,))) + run_test(tuple(a), "row_stack", size) + + +class TestRowStackErrors: + def test_zero_arrays(self): + expected_exc = ValueError + arrays = () + with pytest.raises(expected_exc): + np.row_stack(arrays) + with pytest.raises(expected_exc): + num.row_stack(arrays) + + @pytest.mark.parametrize( + "arrays", + ( + pytest.param((1, []), marks=pytest.mark.xfail), + pytest.param(([1, 2], [3]), marks=pytest.mark.xfail), + pytest.param(([[1, 2], [3, 4]], [5]), marks=pytest.mark.xfail), + ([[[1, 2], [3, 4]]], [5, 6]), + ), + ids=lambda arrays: f"(arrays={arrays})", + ) + def test_arrays_mismatched_shape(self, arrays): + # for (1, []), ([1, 2], [3]), ([[1, 2], [3, 4]], [5]) + # In Numpy, it raises ValueError + # In cuNumeric, it pass + expected_exc = ValueError + with pytest.raises(expected_exc): + np.row_stack(arrays) + with pytest.raises(expected_exc): + num.row_stack(arrays) + + @pytest.mark.parametrize("num", NUM_ARR, ids=str) @pytest.mark.parametrize("size", SIZES, ids=str) def test_dstack(size, num, a): @@ -129,6 +521,37 @@ def test_dstack(size, num, a): run_test(tuple(a), "dstack", size) +class TestDStackErrors: + def test_zero_arrays(self): + expected_exc = ValueError + arrays = () + with pytest.raises(expected_exc): + np.dstack(arrays) + with pytest.raises(expected_exc): + num.dstack(arrays) + + @pytest.mark.xfail + @pytest.mark.parametrize( + "arrays", + ( + (1, []), + ([1, 2], [5]), + ([[1, 2], [3, 4]], [5, 6]), + ([[1, 2], [3, 4]], [[5], [6]]), + ), + ids=lambda arrays: f"(arrays={arrays})", + ) + def test_arrays_mismatched_shape(self, arrays): + # for all cases, + # In Numpy, it raises ValueError + # In cuNumeric, it pass + expected_exc = ValueError + with pytest.raises(expected_exc): + np.dstack(arrays) + with pytest.raises(expected_exc): + num.dstack(arrays) + + if __name__ == "__main__": import sys diff --git a/tests/integration/test_vstack.py b/tests/integration/test_vstack.py deleted file mode 100644 index 6f5a9bfe5..000000000 --- a/tests/integration/test_vstack.py +++ /dev/null @@ -1,64 +0,0 @@ -# Copyright 2021-2022 NVIDIA Corporation -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -import numpy as np -import pytest - -import cunumeric as num - - -def test_1d(): - x = num.array([1, 2, 3]) - y = num.array([4, 5, 6]) - z = num.vstack((x, y)) - - xnp = np.array([1, 2, 3]) - ynp = np.array([4, 5, 6]) - znp = np.vstack((xnp, ynp)) - - assert np.array_equal(z, znp) - - -def test_2d(): - x = num.array([[1, 2, 3], [4, 5, 6]]) - y = num.array([[7, 8, 9], [10, 11, 12]]) - z = num.vstack((x, y)) - - xnp = np.array([[1, 2, 3], [4, 5, 6]]) - ynp = np.array([[7, 8, 9], [10, 11, 12]]) - znp = np.vstack((xnp, ynp)) - - assert np.array_equal(z, znp) - - -def test_3d(): - x = num.array([[[1, 2, 3], [4, 5, 6]], [[7, 8, 9], [10, 11, 12]]]) - y = num.array([[[13, 14, 15], [16, 17, 18]], [[19, 20, 21], [22, 23, 24]]]) - z = num.vstack((x, y)) - - xnp = np.array([[[1, 2, 3], [4, 5, 6]], [[7, 8, 9], [10, 11, 12]]]) - ynp = np.array( - [[[13, 14, 15], [16, 17, 18]], [[19, 20, 21], [22, 23, 24]]] - ) - znp = np.vstack((xnp, ynp)) - - assert np.array_equal(z, znp) - - -if __name__ == "__main__": - import sys - - np.random.seed(12345) - sys.exit(pytest.main(sys.argv)) From ce3abd649625fe02785742639ef4c27a6dce1bda Mon Sep 17 00:00:00 2001 From: Bryan Van de Ven Date: Tue, 14 Mar 2023 09:50:23 -0700 Subject: [PATCH 003/106] Use explicit task creation APIs (#842) --- cunumeric/deferred.py | 18 +++++++++--------- cunumeric/linalg/cholesky.py | 2 +- cunumeric/runtime.py | 12 ++++-------- cunumeric/sort.py | 2 +- 4 files changed, 15 insertions(+), 19 deletions(-) diff --git a/cunumeric/deferred.py b/cunumeric/deferred.py index 8348e5ce3..c30b4c506 100644 --- a/cunumeric/deferred.py +++ b/cunumeric/deferred.py @@ -1837,7 +1837,7 @@ def put(self, indices: Any, values: Any, check_bounds: bool) -> None: ) shape = self_tmp.shape - task = self.context.create_task(CuNumericOpCode.WRAP) + task = self.context.create_auto_task(CuNumericOpCode.WRAP) task.add_output(indirect.base) task.add_scalar_arg(shape, (ty.int64,)) task.add_scalar_arg(True, bool) # has_input @@ -1867,7 +1867,7 @@ def putmask(self, mask: Any, values: Any) -> None: values_new = values._broadcast(self.shape) else: values_new = values.base - task = self.context.create_task(CuNumericOpCode.PUTMASK) + task = self.context.create_auto_task(CuNumericOpCode.PUTMASK) task.add_input(self.base) task.add_input(mask.base) task.add_input(values_new) @@ -2068,7 +2068,7 @@ def bitgenerator_random_raw( seed: Union[int, None], flags: int, ) -> None: - task = self.context.create_task(CuNumericOpCode.BITGENERATOR) + task = self.context.create_auto_task(CuNumericOpCode.BITGENERATOR) task.add_output(self.base) @@ -2094,7 +2094,7 @@ def bitgenerator_distribution( floatparams: tuple[float, ...], doubleparams: tuple[float, ...], ) -> None: - task = self.context.create_task(CuNumericOpCode.BITGENERATOR) + task = self.context.create_auto_task(CuNumericOpCode.BITGENERATOR) task.add_output(self.base) @@ -3322,7 +3322,7 @@ def where(self, src1: Any, src2: Any, src3: Any) -> None: def argwhere(self) -> NumPyThunk: result = self.runtime.create_unbound_thunk(np.dtype(np.int64), ndim=2) - task = self.context.create_task(CuNumericOpCode.ARGWHERE) + task = self.context.create_auto_task(CuNumericOpCode.ARGWHERE) task.add_output(result.base) task.add_input(self.base) @@ -3393,7 +3393,7 @@ def scan( input.copy(swapped, deep=True) output = input - task = output.context.create_task(CuNumericOpCode.SCAN_LOCAL) + task = output.context.create_auto_task(CuNumericOpCode.SCAN_LOCAL) task.add_output(output.base) task.add_input(input.base) task.add_output(temp.base) @@ -3407,7 +3407,7 @@ def scan( # NOTE: Assumes the partitioning stays the same from previous task. # NOTE: Each node will do a sum up to its index, alternatively could # do one centralized scan and broadcast (slightly less redundant work) - task = output.context.create_task(CuNumericOpCode.SCAN_GLOBAL) + task = output.context.create_auto_task(CuNumericOpCode.SCAN_GLOBAL) task.add_input(output.base) task.add_input(temp.base) task.add_output(output.base) @@ -3445,7 +3445,7 @@ def unique(self) -> NumPyThunk: @auto_convert("rhs", "v") def searchsorted(self, rhs: Any, v: Any, side: SortSide = "left") -> None: - task = self.context.create_task(CuNumericOpCode.SEARCHSORTED) + task = self.context.create_auto_task(CuNumericOpCode.SEARCHSORTED) is_left = side == "left" @@ -3574,7 +3574,7 @@ def _wrap(self, src: Any, new_len: int) -> None: ), ) - task = self.context.create_task(CuNumericOpCode.WRAP) + task = self.context.create_auto_task(CuNumericOpCode.WRAP) task.add_output(indirect.base) task.add_scalar_arg(src.shape, (ty.int64,)) task.add_scalar_arg(False, bool) # has_input diff --git a/cunumeric/linalg/cholesky.py b/cunumeric/linalg/cholesky.py index 67d3c67a4..7e023d2cd 100644 --- a/cunumeric/linalg/cholesky.py +++ b/cunumeric/linalg/cholesky.py @@ -175,7 +175,7 @@ def choose_color_shape(runtime: Runtime, shape: Shape) -> Shape: def tril_single(context: Context, output: Store) -> None: - task = context.create_task(CuNumericOpCode.TRILU) + task = context.create_auto_task(CuNumericOpCode.TRILU) task.add_output(output) task.add_input(output) task.add_scalar_arg(True, bool) diff --git a/cunumeric/runtime.py b/cunumeric/runtime.py index 82e931c43..f0ad0398b 100644 --- a/cunumeric/runtime.py +++ b/cunumeric/runtime.py @@ -117,18 +117,16 @@ def record_api_call( self.api_calls.append((name, location, implemented)) def _load_cudalibs(self) -> None: - task = self.legate_context.create_task( + task = self.legate_context.create_manual_task( CuNumericOpCode.LOAD_CUDALIBS, - manual=True, launch_domain=Rect(lo=(0,), hi=(self.num_gpus,)), ) task.execute() self.legate_runtime.issue_execution_fence(block=True) def _unload_cudalibs(self) -> None: - task = self.legate_context.create_task( + task = self.legate_context.create_manual_task( CuNumericOpCode.UNLOAD_CUDALIBS, - manual=True, launch_domain=Rect(lo=(0,), hi=(self.num_gpus,)), ) task.execute() @@ -224,9 +222,8 @@ def bitgenerator_create( ) -> int: self.current_random_bitgenid = self.current_random_bitgenid + 1 if forceCreate: - task = self.legate_context.create_task( + task = self.legate_context.create_manual_task( CuNumericOpCode.BITGENERATOR, - manual=True, launch_domain=Rect(lo=(0,), hi=(self.num_procs,)), ) self.bitgenerator_populate_task( @@ -254,9 +251,8 @@ def bitgenerator_destroy( else: # with explicit destruction, do schedule a task self.legate_runtime.issue_execution_fence() - task = self.legate_context.create_task( + task = self.legate_context.create_manual_task( CuNumericOpCode.BITGENERATOR, - manual=True, launch_domain=Rect(lo=(0,), hi=(self.num_procs,)), ) self.bitgenerator_populate_task( diff --git a/cunumeric/sort.py b/cunumeric/sort.py index fbca9146a..93fa63abb 100644 --- a/cunumeric/sort.py +++ b/cunumeric/sort.py @@ -84,7 +84,7 @@ def sort_swapped( def sort_task( output: DeferredArray, input: DeferredArray, argsort: bool, stable: bool ) -> None: - task = output.context.create_task(CuNumericOpCode.SORT) + task = output.context.create_auto_task(CuNumericOpCode.SORT) uses_unbound_output = output.runtime.num_procs > 1 and input.ndim == 1 From ed801a81d169cce307f9eea5518e3dee1caadfea Mon Sep 17 00:00:00 2001 From: Bryan Van de Ven Date: Tue, 14 Mar 2023 09:53:09 -0700 Subject: [PATCH 004/106] Update python usage notes (#841) * Update python usage notes * Update docs/cunumeric/source/user/usage.rst Co-authored-by: Manolis Papadakis --------- Co-authored-by: Manolis Papadakis --- docs/cunumeric/source/user/usage.rst | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/docs/cunumeric/source/user/usage.rst b/docs/cunumeric/source/user/usage.rst index f39e20012..7f03be09d 100644 --- a/docs/cunumeric/source/user/usage.rst +++ b/docs/cunumeric/source/user/usage.rst @@ -54,9 +54,13 @@ interpreter, with some limitations: When running programs with this method, configuration options may only be passed via the ``LEGATE_CONFIG`` environment variable as shown above. -Additionally, several ``legate`` command line configuration options are not -available when running programs this way. See the output of ``legate --help`` -for more details. +.. note:: + + Usage of standard Python is intended as a quick on-ramp for users to try + out cuNumeric more easily. Several ``legate`` command line configuration + options, especially for multi-node execution, are not available when + running programs with standard Python. See the output of ``legate --help`` + for more details. Multi-node execution -------------------- @@ -88,8 +92,8 @@ Using a manual task manager It is also possible to use "standard python" in place of the ``legate`` driver. -Zero code-change patching -------------------------- +Running Numpy programs without changes +-------------------------------------- The ``lgpatch`` script (in the same location as the ``legate`` executable) can help facilitate quick demonstrations of ``cunumeric`` on existing codebases From eea0957327acb43082660019771007698a71602a Mon Sep 17 00:00:00 2001 From: Bryan Van de Ven Date: Tue, 14 Mar 2023 09:56:50 -0700 Subject: [PATCH 005/106] Add ufunc methods (#834) * checkpoint * Add some missing fallback conversions, remove b from unary .at * fix coverage wrappers * The unimplemented wrapper can perform the fallback directly * add test * unary as well --------- Co-authored-by: Manolis Papadakis --- cunumeric/_ufunc/ufunc.py | 4 ++++ cunumeric/coverage.py | 31 +++++++++++++++++++++++++ tests/unit/cunumeric/test_coverage.py | 33 +++++++++++++++++++++++++++ 3 files changed, 68 insertions(+) diff --git a/cunumeric/_ufunc/ufunc.py b/cunumeric/_ufunc/ufunc.py index 9bd63604c..fc2cc0100 100644 --- a/cunumeric/_ufunc/ufunc.py +++ b/cunumeric/_ufunc/ufunc.py @@ -186,6 +186,10 @@ def __init__(self, name: str, doc: str) -> None: self._name = name self.__doc__ = doc + @property + def __name__(self) -> str: + return self._name + @property def nin(self) -> int: return self._nin diff --git a/cunumeric/coverage.py b/cunumeric/coverage.py index 29108a8d6..cc6730f9e 100644 --- a/cunumeric/coverage.py +++ b/cunumeric/coverage.py @@ -53,6 +53,8 @@ MOD_INTERNAL = {"__dir__", "__getattr__"} +UFUNC_METHODS = ("at", "accumulate", "outer", "reduce", "reduceat") + def filter_namespace( ns: Mapping[str, Any], @@ -250,6 +252,25 @@ def clone_module( cast(AnyCallable, value), mod_name, attr, reporting=reporting ) new_globals[attr] = wrapped + if isinstance(value, lgufunc): + for method in UFUNC_METHODS: + wrapped_method = ( + implemented( + getattr(value, method), + f"{mod_name}.{attr}", + method, + reporting=reporting, + ) + if hasattr(value, method) + else unimplemented( + getattr(getattr(origin_module, attr), method), + f"{mod_name}.{attr}", + method, + reporting=reporting, + fallback=fallback, + ) + ) + setattr(wrapped, method, wrapped_method) from numpy import ufunc as npufunc @@ -266,6 +287,16 @@ def clone_module( fallback=fallback, ) new_globals[attr] = wrapped + if isinstance(value, npufunc): + for method in UFUNC_METHODS: + wrapped_method = unimplemented( + getattr(value, method), + f"{mod_name}.{attr}", + method, + reporting=reporting, + fallback=fallback, + ) + setattr(wrapped, method, wrapped_method) else: new_globals[attr] = value diff --git a/tests/unit/cunumeric/test_coverage.py b/tests/unit/cunumeric/test_coverage.py index 3b3c51fba..71137a03a 100644 --- a/tests/unit/cunumeric/test_coverage.py +++ b/tests/unit/cunumeric/test_coverage.py @@ -461,6 +461,39 @@ def test_fallback(self): assert a.extra(b) == "new extra" +def test_ufunc_methods_binary() -> None: + import cunumeric as np + + # reduce is implemented + assert np.add.reduce.__wrapped__ + assert np.add.reduce._cunumeric.implemented + + # the rest are not + assert np.add.reduceat.__wrapped__ + assert not np.add.reduceat._cunumeric.implemented + assert np.add.outer.__wrapped__ + assert not np.add.outer._cunumeric.implemented + assert np.add.at.__wrapped__ + assert not np.add.at._cunumeric.implemented + assert np.add.accumulate.__wrapped__ + assert not np.add.accumulate._cunumeric.implemented + + +def test_ufunc_methods_unary() -> None: + import cunumeric as np + + assert np.negative.reduce.__wrapped__ + assert not np.negative.reduce._cunumeric.implemented + assert np.negative.reduceat.__wrapped__ + assert not np.negative.reduceat._cunumeric.implemented + assert np.negative.outer.__wrapped__ + assert not np.negative.outer._cunumeric.implemented + assert np.negative.at.__wrapped__ + assert not np.negative.at._cunumeric.implemented + assert np.negative.accumulate.__wrapped__ + assert not np.negative.accumulate._cunumeric.implemented + + if __name__ == "__main__": import sys From 39cd966359e0bf1dfa6e993ea3a6783a57f402b2 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Tue, 14 Mar 2023 11:59:03 -0700 Subject: [PATCH 006/106] [pre-commit.ci] pre-commit autoupdate (#843) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit updates: - [github.com/pre-commit/mirrors-mypy: v1.0.1 → v1.1.1](https://github.com/pre-commit/mirrors-mypy/compare/v1.0.1...v1.1.1) Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> --- .pre-commit-config.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 301623f84..17bdfaaf1 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -1,6 +1,6 @@ repos: - repo: https://github.com/pre-commit/mirrors-mypy - rev: 'v1.0.1' + rev: 'v1.1.1' hooks: - id: mypy language: system From 930afb05c5e8cf9e1d15fe856a8ea2745a6bc4c5 Mon Sep 17 00:00:00 2001 From: Manolis Papadakis Date: Wed, 15 Mar 2023 12:50:14 -0700 Subject: [PATCH 007/106] Switch to rapids-cmake 23.04 early (#846) because 23.02 fails on MacOS --- CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index b815801e9..cb06a78af 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -37,7 +37,7 @@ endif() # - Download and initialize RAPIDS CMake helpers ----------------------------- if(NOT EXISTS ${CMAKE_BINARY_DIR}/RAPIDS.cmake) - file(DOWNLOAD https://raw.githubusercontent.com/rapidsai/rapids-cmake/branch-23.02/RAPIDS.cmake + file(DOWNLOAD https://raw.githubusercontent.com/rapidsai/rapids-cmake/branch-23.04/RAPIDS.cmake ${CMAKE_BINARY_DIR}/RAPIDS.cmake) endif() include(${CMAKE_BINARY_DIR}/RAPIDS.cmake) From 38740f67d83187655a5a05f0090c58f6161157a8 Mon Sep 17 00:00:00 2001 From: xialu00 <110973296+xialu00@users.noreply.github.com> Date: Thu, 16 Mar 2023 13:51:34 +0800 Subject: [PATCH 008/106] Testcase enhance split api (#813) * add negative test case for test_convolve.py * add test case for test_astype.py * add test case for test_astype.py * fix bug * enhance test_bincount.py * enhance test_bincount.py * enhance test_cholesky.py * enhance test_reduction.py * enhance test_reduction.py * enhance test_reduction.py * enhance test_reduction.py * enhance test_reduction.py * enhance test_prod.py * fix bug tests/integration/test_prod.py * enhance test_bits.py and test_contains.py * fix bugs * enhance test_nonzero.py and test_exp.py * enhance some sort test apis and test_ones * enhance some sort test apis and test_ones * enhance test_fill_diagonal and test_searchsorted * enhance test_fill_diagonal and test_searchsorted * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * enhance test_fill_diagonal and test_searchsorted * enhance split api * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * enhance split api * enhance test split * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * enhance split * enhance split --------- Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: xialu00 --- tests/integration/test_split.py | 309 ++++++++++++++++++++++++++++++++ 1 file changed, 309 insertions(+) create mode 100644 tests/integration/test_split.py diff --git a/tests/integration/test_split.py b/tests/integration/test_split.py new file mode 100644 index 000000000..a48800b9c --- /dev/null +++ b/tests/integration/test_split.py @@ -0,0 +1,309 @@ +# Copyright 2021-2022 NVIDIA Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import math + +import numpy as np +import pytest + +import cunumeric as num + +# cunumeric.split(a: ndarray, indices: Union[int, ndarray], axis: int = 0) +# → list[cunumeric.array.ndarray] +# cunumeric.vsplit(a: ndarray, indices: Union[int, ndarray]) +# → list[cunumeric.array.ndarray] (axis=0) +# cunumeric.hsplit(a: ndarray, indices: Union[int, ndarray]) +# → list[cunumeric.array.ndarray] (axis=1) +# cunumeric.dsplit(a: ndarray, indices: Union[int, ndarray]) +# → list[cunumeric.array.ndarray] (axis=2) + + +DIM = 6 +SIZES = [ + (0,), + (1), + (DIM), + (0, 1), + (1, 0), + (1, 1), + (1, DIM), + (DIM, 1), + (DIM, DIM), + (1, 0, 0), + (1, 1, 0), + (1, 0, 1), + (1, 1, 1), + (DIM, 1, 1), + (1, DIM, 1), + (1, 1, DIM), + (DIM, DIM, DIM), +] + + +SIZES_VSPLIT = [ + (1, DIM), + (DIM, 1), + (DIM, DIM), + (1, 1, 1), + (DIM, 1, 1), + (1, DIM, 1), + (1, 1, DIM), + (DIM, DIM, DIM), +] + +SIZES_HSPLIT = [ + (DIM, 1), + (DIM, DIM), + (DIM, 1, 1), + (DIM, DIM, DIM), +] + + +SIZES_DSPLIT = [ + (1, 1, 1), + (DIM, 1, 1), + (1, DIM, 1), + (1, 1, DIM), + (DIM, DIM, DIM), +] + +ARG_FUNCS = ("vsplit", "hsplit", "dsplit") + + +class TestSplitErrors: + """ + this class is to test negative cases + """ + + @pytest.mark.xfail + def test_array_none(self): + expected_exc = AttributeError + with pytest.raises(expected_exc): + np.split(None, 1) + # Numpy raises + # AttributeError: 'NoneType' object has no attribute 'shape' + with pytest.raises(expected_exc): + num.split(None, 1) + # cuNumeric raises + # ValueError: array(()) has less dimensions than axis(0) + + @pytest.mark.parametrize("indices", (-2, 0, "hi", 1.0, None)) + def test_indices_negative(self, indices): + ary = num.arange(10) + expected_exc = ValueError + with pytest.raises(expected_exc): + num.split(ary, indices) + with pytest.raises(expected_exc): + np.split(ary, indices) + + def test_indices_divison(self): + size = (3, 3, 3) + ary = num.random.randint(low=0, high=100, size=size) + expected_exc = ValueError + with pytest.raises(expected_exc): + num.split(ary, 2, 0) + with pytest.raises(expected_exc): + np.split(ary, 2, 0) + + def test_axis_negative(self): + ary = num.arange(10) + expected_exc = IndexError + axis = -2 + with pytest.raises(expected_exc): + num.split(ary, 1, axis=axis) + with pytest.raises(expected_exc): + np.split(ary, 1, axis=axis) + + def test_axis_bigger(self): + ary = num.arange(10) + axis = 2 + expected_exc = ValueError + with pytest.raises(expected_exc): + num.split(ary, 5, axis=axis) + with pytest.raises(expected_exc): + np.split(ary, 5, axis=axis) + + @pytest.mark.parametrize("func_name", ARG_FUNCS) + def test_array_none_different_split(self, func_name): + expected_exc = ValueError + func_num = getattr(num, func_name) + func_np = getattr(np, func_name) + with pytest.raises(expected_exc): + func_np(None, 1) + with pytest.raises(expected_exc): + func_num(None, 1) + + @pytest.mark.parametrize("indices", (-2, 0, "hi", 1.0, None)) + @pytest.mark.parametrize("func_name", ARG_FUNCS) + def test_indices_negative_different_split(self, func_name, indices): + ary = num.arange(10) + func_num = getattr(num, func_name) + func_np = getattr(np, func_name) + + expected_exc = ValueError + with pytest.raises(expected_exc): + func_num(ary, indices) + with pytest.raises(expected_exc): + func_np(ary, indices) + + @pytest.mark.xfail + def test_dimensions_vsplit(self): + ary = [] + expected_exc = ValueError + with pytest.raises(expected_exc): + num.vsplit(ary, 1) + # cuNumeric returns [array([], dtype=float64)] + with pytest.raises(expected_exc): + np.vsplit(ary, 1) + # Numpy raises + # ValueError: vsplit only works on arrays of 2 or more dimensions + + @pytest.mark.xfail + def test_dimensions_vsplit_1(self): + ary = np.random.randint(low=0, high=100, size=(5)) + expected_exc = ValueError + with pytest.raises(expected_exc): + num.vsplit(ary, 1) + # cuNumeric returns the array + with pytest.raises(expected_exc): + np.vsplit(ary, 1) + # Numpy raises + # ValueError: vsplit only works on arrays of 2 or more dimensions + + @pytest.mark.xfail + def test_dimensions_hsplit_0(self): + ary = np.random.randint(low=0, high=100, size=(0,)) + expected_exc = ValueError + with pytest.raises(expected_exc): + num.hsplit(ary, 1) + with pytest.raises(expected_exc): + np.hsplit(ary, 1) + # Numpy returns [array([], dtype=int64)] + + def test_dimensions_hsplit_1(self): + ary = num.arange(10) + expected_exc = ValueError + with pytest.raises(expected_exc): + num.hsplit(ary, 1) + with pytest.raises(expected_exc): + np.hsplit(ary, 1) + + @pytest.mark.parametrize("size", ((0,), (10,), (5, 5))) + def test_dimensions_dsplit(self, size): + ary = np.random.randint(low=0, high=100, size=size) + expected_exc = ValueError + with pytest.raises(expected_exc): + num.dsplit(ary, 1) + with pytest.raises(expected_exc): + np.dsplit(ary, 1) + + +def compare_array(a, b): + """ + Compare two array using zip method. + """ + if len(a) != len(b): + return False + else: + for each in zip(a, b): + if not np.array_equal(*each): + return False + return True + + +def get_indices(arr, axis): + """ + Generate the indices. split the array along axis. + Include the divisible integer or a 1-D array of sorted integers. + """ + indices_arr = [] + axis_size = arr.shape[axis] + even_div = 1 + random_integer = np.random.randint(1, 10) + + if axis_size == 1: + indices_arr.append(1) # in index + + elif axis_size > 1: + for div in range(2, int(math.sqrt(axis_size) + 1)): + if axis_size % div == 0: + indices_arr.append(div) # add divisible integer + even_div = div + + # add 1 and axis_size + indices_arr.append(1) + indices_arr.append(axis_size) + + # an index in the dimension of the array along axis + indices_arr.append(list(range(1, axis_size, even_div))) + + # an index exceeds the dimension of the array along axis + indices_arr.append( + list(range(0, axis_size + even_div * random_integer, even_div)) + ) + indices_arr.append( + list(range(axis_size + even_div * random_integer, 0, -even_div)) + ) + + return indices_arr + + +@pytest.mark.parametrize("size", SIZES, ids=str) +def test_split(size): + a = np.random.randint(low=0, high=100, size=size) + axis_list = list(range(a.ndim)) + [-1] + for axis in axis_list: + input_arr = get_indices(a, axis) + for input_opt in input_arr: + res_num = num.split(a, input_opt, axis=axis) + res_np = np.split(a, input_opt, axis=axis) + assert compare_array(res_num, res_np) + + +@pytest.mark.parametrize("size", SIZES_VSPLIT, ids=str) +def test_vsplit(size): + a = np.random.randint(low=0, high=100, size=size) + input_arr = get_indices(a, 0) + for input_opt in input_arr: + res_num = num.vsplit(a, input_opt) + res_np = np.vsplit(a, input_opt) + assert compare_array(res_num, res_np) + + +@pytest.mark.parametrize("size", SIZES_HSPLIT, ids=str) +def test_hsplit(size): + a = np.random.randint(low=0, high=100, size=size) + input_arr = get_indices(a, 1) + for input_opt in input_arr: + res_num = num.hsplit(a, input_opt) + res_np = np.hsplit(a, input_opt) + assert compare_array(res_num, res_np) + + +@pytest.mark.parametrize("size", SIZES_DSPLIT, ids=str) +def test_dsplit(size): + a = np.random.randint(low=0, high=100, size=size) + input_arr = get_indices(a, 2) + for input_opt in input_arr: + res_num = num.dsplit(a, input_opt) + res_np = np.dsplit(a, input_opt) + assert compare_array(res_num, res_np) + + +if __name__ == "__main__": + import sys + + np.random.seed(12345) + sys.exit(pytest.main(sys.argv)) From 7b3d72dcf858f36bcfae965d5e70fd773262c0f1 Mon Sep 17 00:00:00 2001 From: Wonchan Lee Date: Fri, 17 Mar 2023 01:24:20 -0700 Subject: [PATCH 009/106] Catch up name changes in the API (#849) * Catch up the API naming changes * Point to a fork to pass the CI * Point the commit back to the mainline --- cmake/versions.json | 4 ++-- .../index/advanced_indexing_template.inl | 2 +- src/cunumeric/index/repeat_template.inl | 2 +- src/cunumeric/mapper.cc | 20 +++++++++---------- src/cunumeric/scan/scan_local_template.inl | 4 ++-- src/cunumeric/search/argwhere_template.inl | 2 +- src/cunumeric/search/nonzero_template.inl | 2 +- src/cunumeric/set/unique.cu | 2 +- src/cunumeric/sort/sort.cu | 16 +++++++-------- src/cunumeric/sort/sort_cpu.inl | 12 +++++------ 10 files changed, 33 insertions(+), 33 deletions(-) diff --git a/cmake/versions.json b/cmake/versions.json index 2ad0b05c2..5fe08bfd9 100644 --- a/cmake/versions.json +++ b/cmake/versions.json @@ -1,8 +1,8 @@ { "packages" : { "legate_core" : { - "git_url" : "https://github.com/nv-legate/legate.core.git", - "git_tag" : "37596159b1f8f06439fdc95d6090bbc4315c302c" + "git_url" : "https://github.com/nv-legate/legate.core.git", + "git_tag" : "bb24fda2d84132b7982a7e3020fee24781eb5c57" } } } diff --git a/src/cunumeric/index/advanced_indexing_template.inl b/src/cunumeric/index/advanced_indexing_template.inl index 2ebc31689..5178cb35e 100644 --- a/src/cunumeric/index/advanced_indexing_template.inl +++ b/src/cunumeric/index/advanced_indexing_template.inl @@ -50,7 +50,7 @@ struct AdvancedIndexingImpl { #endif if (volume == 0) { - args.output.make_empty(); + args.output.bind_empty_data(); return; } diff --git a/src/cunumeric/index/repeat_template.inl b/src/cunumeric/index/repeat_template.inl index 59ef47c5e..3b52141f5 100644 --- a/src/cunumeric/index/repeat_template.inl +++ b/src/cunumeric/index/repeat_template.inl @@ -37,7 +37,7 @@ struct RepeatImpl { auto input_arr = args.input.read_accessor(input_rect); if (input_rect.empty()) { - args.output.make_empty(); + args.output.bind_empty_data(); return; } diff --git a/src/cunumeric/mapper.cc b/src/cunumeric/mapper.cc index 432f6713c..950f81ce5 100644 --- a/src/cunumeric/mapper.cc +++ b/src/cunumeric/mapper.cc @@ -100,7 +100,7 @@ std::vector CuNumericMapper::store_mappings( std::vector mappings; auto& outputs = task.outputs(); mappings.push_back(StoreMapping::default_mapping(outputs[0], options.front())); - mappings.back().policy.ordering.fortran_order(); + mappings.back().policy.ordering.set_fortran_order(); mappings.back().policy.exact = true; return std::move(mappings); } else @@ -133,12 +133,12 @@ std::vector CuNumericMapper::store_mappings( auto& outputs = task.outputs(); for (auto& input : inputs) { mappings.push_back(StoreMapping::default_mapping(input, options.front())); - mappings.back().policy.ordering.fortran_order(); + mappings.back().policy.ordering.set_fortran_order(); mappings.back().policy.exact = true; } for (auto& output : outputs) { mappings.push_back(StoreMapping::default_mapping(output, options.front())); - mappings.back().policy.ordering.fortran_order(); + mappings.back().policy.ordering.set_fortran_order(); mappings.back().policy.exact = true; } return std::move(mappings); @@ -150,7 +150,7 @@ std::vector CuNumericMapper::store_mappings( std::vector mappings; auto& input = task.inputs().front(); mappings.push_back(StoreMapping::default_mapping(input, options.front())); - mappings.back().policy.ordering.fortran_order(); + mappings.back().policy.ordering.set_fortran_order(); mappings.back().policy.exact = true; return std::move(mappings); } @@ -167,12 +167,12 @@ std::vector CuNumericMapper::store_mappings( auto& outputs = task.outputs(); for (auto& input : inputs) { mappings.push_back(StoreMapping::default_mapping(input, options.front())); - mappings.back().policy.ordering.c_order(); + mappings.back().policy.ordering.set_c_order(); mappings.back().policy.exact = true; } for (auto& output : outputs) { mappings.push_back(StoreMapping::default_mapping(output, options.front())); - mappings.back().policy.ordering.c_order(); + mappings.back().policy.ordering.set_c_order(); mappings.back().policy.exact = true; } return std::move(mappings); @@ -183,12 +183,12 @@ std::vector CuNumericMapper::store_mappings( auto& outputs = task.outputs(); for (auto& input : inputs) { mappings.push_back(StoreMapping::default_mapping(input, options.front())); - mappings.back().policy.ordering.c_order(); + mappings.back().policy.ordering.set_c_order(); mappings.back().policy.exact = true; } for (auto& output : outputs) { mappings.push_back(StoreMapping::default_mapping(output, options.front())); - mappings.back().policy.ordering.c_order(); + mappings.back().policy.ordering.set_c_order(); mappings.back().policy.exact = true; } return std::move(mappings); @@ -199,12 +199,12 @@ std::vector CuNumericMapper::store_mappings( auto& outputs = task.outputs(); for (auto& input : inputs) { mappings.push_back(StoreMapping::default_mapping(input, options.front())); - mappings.back().policy.ordering.c_order(); + mappings.back().policy.ordering.set_c_order(); mappings.back().policy.exact = true; } for (auto& output : outputs) { mappings.push_back(StoreMapping::default_mapping(output, options.front())); - mappings.back().policy.ordering.c_order(); + mappings.back().policy.ordering.set_c_order(); mappings.back().policy.exact = true; } return std::move(mappings); diff --git a/src/cunumeric/scan/scan_local_template.inl b/src/cunumeric/scan/scan_local_template.inl index 418f2e998..c016873bb 100644 --- a/src/cunumeric/scan/scan_local_template.inl +++ b/src/cunumeric/scan/scan_local_template.inl @@ -45,7 +45,7 @@ struct ScanLocalImpl { size_t volume = pitches.flatten(rect); if (volume == 0) { - args.sum_vals.make_empty(); + args.sum_vals.bind_empty_data(); return; } @@ -71,7 +71,7 @@ struct ScanLocalImpl { size_t volume = pitches.flatten(rect); if (volume == 0) { - args.sum_vals.make_empty(); + args.sum_vals.bind_empty_data(); return; } diff --git a/src/cunumeric/search/argwhere_template.inl b/src/cunumeric/search/argwhere_template.inl index 7cbada29b..5c0a57e5e 100644 --- a/src/cunumeric/search/argwhere_template.inl +++ b/src/cunumeric/search/argwhere_template.inl @@ -40,7 +40,7 @@ struct ArgWhereImpl { size_t volume = pitches.flatten(rect_in); if (volume == 0) { - args.out.make_empty(); + args.out.bind_empty_data(); return; } diff --git a/src/cunumeric/search/nonzero_template.inl b/src/cunumeric/search/nonzero_template.inl index 0a79ce74a..fde09c76a 100644 --- a/src/cunumeric/search/nonzero_template.inl +++ b/src/cunumeric/search/nonzero_template.inl @@ -40,7 +40,7 @@ struct NonzeroImpl { size_t volume = pitches.flatten(rect); if (volume == 0) { - for (auto& store : args.results) store.make_empty(); + for (auto& store : args.results) store.bind_empty_data(); return; } diff --git a/src/cunumeric/set/unique.cu b/src/cunumeric/set/unique.cu index ad54b99c2..6d67eecae 100644 --- a/src/cunumeric/set/unique.cu +++ b/src/cunumeric/set/unique.cu @@ -193,7 +193,7 @@ struct UniqueImplBody { CHECK_CUDA_STREAM(stream); // Finally we pack the result - output.return_data(result.first, Point<1>(result.second)); + output.bind_data(result.first, Point<1>(result.second)); } }; diff --git a/src/cunumeric/sort/sort.cu b/src/cunumeric/sort/sort.cu index d25173aca..0851f0229 100644 --- a/src/cunumeric/sort/sort.cu +++ b/src/cunumeric/sort/sort.cu @@ -1209,7 +1209,7 @@ void sample_sort_nccl_nd(SortPiece> local_sorted, using VAL = legate_type_of; size_t volume = local_sorted.size; - bool is_unbound_1d_storage = output_array_unbound.is_output_store(); + bool is_unbound_1d_storage = output_array_unbound.is_unbound_store(); ///////////////////////////////////////////////////////////////////////////////////////////////// /////////////// Part 0: detection of empty nodes @@ -1241,10 +1241,10 @@ void sample_sort_nccl_nd(SortPiece> local_sorted, // we need to return an empty buffer here if (argsort) { auto buffer = create_buffer(0, legate::Memory::GPU_FB_MEM); - output_array_unbound.return_data(buffer, Point<1>(0)); + output_array_unbound.bind_data(buffer, Point<1>(0)); } else { auto buffer = create_buffer(0, legate::Memory::GPU_FB_MEM); - output_array_unbound.return_data(buffer, Point<1>(0)); + output_array_unbound.bind_data(buffer, Point<1>(0)); } } return; @@ -1640,9 +1640,9 @@ void sample_sort_nccl_nd(SortPiece> local_sorted, merged_result.segments.destroy(); if (argsort) { merged_result.values.destroy(); - output_array_unbound.return_data(merged_result.indices, Point<1>(merged_result.size)); + output_array_unbound.bind_data(merged_result.indices, Point<1>(merged_result.size)); } else { - output_array_unbound.return_data(merged_result.values, Point<1>(merged_result.size)); + output_array_unbound.bind_data(merged_result.values, Point<1>(merged_result.size)); } } } @@ -1684,7 +1684,7 @@ struct SortImplBody { auto stream = get_cached_stream(); - bool is_unbound_1d_storage = output_array.is_output_store(); + bool is_unbound_1d_storage = output_array.is_unbound_store(); bool need_distributed_sort = segment_size_l != segment_size_g || is_unbound_1d_storage; bool rebalance = !is_unbound_1d_storage; assert(DIM == 1 || !is_unbound_1d_storage); @@ -1792,9 +1792,9 @@ struct SortImplBody { // edge case where we have an unbound store but only 1 GPU was assigned with the task if (argsort) { local_sorted.values.destroy(); - output_array.return_data(local_sorted.indices, Point<1>(local_sorted.size)); + output_array.bind_data(local_sorted.indices, Point<1>(local_sorted.size)); } else { - output_array.return_data(local_sorted.values, Point<1>(local_sorted.size)); + output_array.bind_data(local_sorted.values, Point<1>(local_sorted.size)); } } } else if (argsort) { diff --git a/src/cunumeric/sort/sort_cpu.inl b/src/cunumeric/sort/sort_cpu.inl index df990260a..aa738d5e6 100644 --- a/src/cunumeric/sort/sort_cpu.inl +++ b/src/cunumeric/sort/sort_cpu.inl @@ -463,7 +463,7 @@ void sample_sort_nd(SortPiece> local_sorted, using VAL = legate_type_of; size_t volume = local_sorted.size; - bool is_unbound_1d_storage = output_array_unbound.is_output_store(); + bool is_unbound_1d_storage = output_array_unbound.is_unbound_store(); assert((volume > 0 && segment_size_l > 0) || volume == segment_size_l); @@ -887,9 +887,9 @@ void sample_sort_nd(SortPiece> local_sorted, merge_buffer.segments.destroy(); if (argsort) { merge_buffer.values.destroy(); - output_array_unbound.return_data(merge_buffer.indices, Point<1>(merge_buffer.size)); + output_array_unbound.bind_data(merge_buffer.indices, Point<1>(merge_buffer.size)); } else { - output_array_unbound.return_data(merge_buffer.values, Point<1>(merge_buffer.size)); + output_array_unbound.bind_data(merge_buffer.values, Point<1>(merge_buffer.size)); } } } @@ -920,7 +920,7 @@ struct SortImplBodyCpu { // we allow empty domains for distributed sorting assert(rect.empty() || input.accessor.is_dense_row_major(rect)); - bool is_unbound_1d_storage = output_array.is_output_store(); + bool is_unbound_1d_storage = output_array.is_unbound_store(); bool need_distributed_sort = segment_size_l != segment_size_g || is_unbound_1d_storage; bool rebalance = !is_unbound_1d_storage; assert(DIM == 1 || !is_unbound_1d_storage); @@ -1021,9 +1021,9 @@ struct SortImplBodyCpu { // edge case where we have an unbound store but only 1 CPU was assigned with the task if (argsort) { local_sorted.values.destroy(); - output_array.return_data(local_sorted.indices, Point<1>(local_sorted.size)); + output_array.bind_data(local_sorted.indices, Point<1>(local_sorted.size)); } else { - output_array.return_data(local_sorted.values, Point<1>(local_sorted.size)); + output_array.bind_data(local_sorted.values, Point<1>(local_sorted.size)); } } } else if (argsort) { From 219e3ab9d15033994d8bc48cac98d2a02b61536b Mon Sep 17 00:00:00 2001 From: Wonchan Lee Date: Fri, 17 Mar 2023 16:59:19 -0700 Subject: [PATCH 010/106] Update the legate core commit hash (#848) --- cmake/versions.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cmake/versions.json b/cmake/versions.json index 5fe08bfd9..577d91d7a 100644 --- a/cmake/versions.json +++ b/cmake/versions.json @@ -2,7 +2,7 @@ "packages" : { "legate_core" : { "git_url" : "https://github.com/nv-legate/legate.core.git", - "git_tag" : "bb24fda2d84132b7982a7e3020fee24781eb5c57" + "git_tag" : "78c61d836906611bf18348b4335f6e039f944340" } } } From e16bd146fccb23df7cb672d5f62819fef3495bd5 Mon Sep 17 00:00:00 2001 From: "Mads R. B. Kristensen" Date: Tue, 21 Mar 2023 16:17:45 +0100 Subject: [PATCH 011/106] Support of the `shape` argument in `empty_like()` & Co. (#845) * *_like() now takes a shape argument * style * test: fixed missing shape args --- cunumeric/module.py | 39 +++++++++++++++++++----- tests/integration/test_array_creation.py | 25 +++++++++------ 2 files changed, 47 insertions(+), 17 deletions(-) diff --git a/cunumeric/module.py b/cunumeric/module.py index 9e2175d9b..bbe0bade8 100644 --- a/cunumeric/module.py +++ b/cunumeric/module.py @@ -94,7 +94,11 @@ def empty(shape: NdShapeLike, dtype: npt.DTypeLike = np.float64) -> ndarray: @add_boilerplate("a") -def empty_like(a: ndarray, dtype: Optional[npt.DTypeLike] = None) -> ndarray: +def empty_like( + a: ndarray, + dtype: Optional[npt.DTypeLike] = None, + shape: Optional[NdShapeLike] = None, +) -> ndarray: """ empty_like(prototype, dtype=None) @@ -108,6 +112,8 @@ def empty_like(a: ndarray, dtype: Optional[npt.DTypeLike] = None) -> ndarray: of the returned array. dtype : data-type, optional Overrides the data type of the result. + shape : int or tuple[int], optional + Overrides the shape of the result. Returns ------- @@ -123,7 +129,7 @@ def empty_like(a: ndarray, dtype: Optional[npt.DTypeLike] = None) -> ndarray: -------- Multiple GPUs, Multiple CPUs """ - shape = a.shape + shape = a.shape if shape is None else shape if dtype is not None: dtype = np.dtype(dtype) else: @@ -238,7 +244,11 @@ def ones(shape: NdShapeLike, dtype: npt.DTypeLike = np.float64) -> ndarray: return full(shape, 1, dtype=dtype) -def ones_like(a: ndarray, dtype: Optional[npt.DTypeLike] = None) -> ndarray: +def ones_like( + a: ndarray, + dtype: Optional[npt.DTypeLike] = None, + shape: Optional[NdShapeLike] = None, +) -> ndarray: """ Return an array of ones with the same shape and type as a given array. @@ -250,6 +260,8 @@ def ones_like(a: ndarray, dtype: Optional[npt.DTypeLike] = None) -> ndarray: returned array. dtype : data-type, optional Overrides the data type of the result. + shape : int or tuple[int], optional + Overrides the shape of the result. Returns ------- @@ -267,7 +279,7 @@ def ones_like(a: ndarray, dtype: Optional[npt.DTypeLike] = None) -> ndarray: usedtype = a.dtype if dtype is not None: usedtype = np.dtype(dtype) - return full_like(a, 1, dtype=usedtype) + return full_like(a, 1, dtype=usedtype, shape=shape) def zeros(shape: NdShapeLike, dtype: npt.DTypeLike = np.float64) -> ndarray: @@ -301,7 +313,11 @@ def zeros(shape: NdShapeLike, dtype: npt.DTypeLike = np.float64) -> ndarray: return full(shape, 0, dtype=dtype) -def zeros_like(a: ndarray, dtype: Optional[npt.DTypeLike] = None) -> ndarray: +def zeros_like( + a: ndarray, + dtype: Optional[npt.DTypeLike] = None, + shape: Optional[NdShapeLike] = None, +) -> ndarray: """ Return an array of zeros with the same shape and type as a given array. @@ -313,6 +329,8 @@ def zeros_like(a: ndarray, dtype: Optional[npt.DTypeLike] = None) -> ndarray: the returned array. dtype : data-type, optional Overrides the data type of the result. + shape : int or tuple[int], optional + Overrides the shape of the result. Returns ------- @@ -330,7 +348,7 @@ def zeros_like(a: ndarray, dtype: Optional[npt.DTypeLike] = None) -> ndarray: usedtype = a.dtype if dtype is not None: usedtype = np.dtype(dtype) - return full_like(a, 0, dtype=usedtype) + return full_like(a, 0, dtype=usedtype, shape=shape) def full( @@ -376,7 +394,10 @@ def full( def full_like( - a: ndarray, value: Union[int, float], dtype: Optional[npt.DTypeLike] = None + a: ndarray, + value: Union[int, float], + dtype: Optional[npt.DTypeLike] = None, + shape: Optional[NdShapeLike] = None, ) -> ndarray: """ @@ -391,6 +412,8 @@ def full_like( Fill value. dtype : data-type, optional Overrides the data type of the result. + shape : int or tuple[int], optional + Overrides the shape of the result. Returns ------- @@ -409,7 +432,7 @@ def full_like( dtype = np.dtype(dtype) else: dtype = a.dtype - result = empty_like(a, dtype=dtype) + result = empty_like(a, dtype=dtype, shape=shape) val = np.array(value, dtype=result.dtype) result._thunk.fill(val) return result diff --git a/tests/integration/test_array_creation.py b/tests/integration/test_array_creation.py index 0dfd5881c..68b103f0f 100644 --- a/tests/integration/test_array_creation.py +++ b/tests/integration/test_array_creation.py @@ -137,13 +137,16 @@ def test_full_bad_filled_value(self): (np.arange(24).reshape(4, 3, 2), "f4"), ] LIKE_FUNCTIONS = ("zeros_like", "ones_like") +SHAPE_ARG = (None, (-1,), (1, -1)) @pytest.mark.parametrize("x_np,dtype", DATA_ARGS) -def test_empty_like(x_np, dtype): +@pytest.mark.parametrize("shape", SHAPE_ARG) +def test_empty_like(x_np, dtype, shape): + shape = shape if shape is None else x_np.reshape(shape).shape x = num.array(x_np) - xfl = num.empty_like(x, dtype=dtype) - yfl = np.empty_like(x_np, dtype=dtype) + xfl = num.empty_like(x, dtype=dtype, shape=shape) + yfl = np.empty_like(x_np, dtype=dtype, shape=shape) assert xfl.shape == yfl.shape assert xfl.dtype == yfl.dtype @@ -151,13 +154,15 @@ def test_empty_like(x_np, dtype): @pytest.mark.parametrize("x_np,dtype", DATA_ARGS) @pytest.mark.parametrize("fn", LIKE_FUNCTIONS) -def test_func_like(fn, x_np, dtype): +@pytest.mark.parametrize("shape", SHAPE_ARG) +def test_func_like(fn, x_np, dtype, shape): + shape = shape if shape is None else x_np.reshape(shape).shape num_f = getattr(num, fn) np_f = getattr(np, fn) x = num.array(x_np) - xfl = num_f(x, dtype=dtype) - yfl = np_f(x_np, dtype=dtype) + xfl = num_f(x, dtype=dtype, shape=shape) + yfl = np_f(x_np, dtype=dtype, shape=shape) assert np.array_equal(xfl, yfl) assert xfl.dtype == yfl.dtype @@ -165,11 +170,13 @@ def test_func_like(fn, x_np, dtype): @pytest.mark.parametrize("value", FILLED_VALUES) @pytest.mark.parametrize("x_np, dtype", DATA_ARGS) -def test_full_like(x_np, dtype, value): +@pytest.mark.parametrize("shape", SHAPE_ARG) +def test_full_like(x_np, dtype, value, shape): + shape = shape if shape is None else x_np.reshape(shape).shape x = num.array(x_np) - xfl = num.full_like(x, value, dtype=dtype) - yfl = np.full_like(x_np, value, dtype=dtype) + xfl = num.full_like(x, value, dtype=dtype, shape=shape) + yfl = np.full_like(x_np, value, dtype=dtype, shape=shape) assert np.array_equal(xfl, yfl) assert xfl.dtype == yfl.dtype From f2f3ab1f9250841935b8d66202527881760189e4 Mon Sep 17 00:00:00 2001 From: robinw0928 <104830875+robinw0928@users.noreply.github.com> Date: Wed, 22 Mar 2023 09:45:59 +0800 Subject: [PATCH 012/106] Fix bugs in concatenate and stack APIs. (#844) * Fix bugs in concatenate and stack APIs. * Split check_shape_dtype into 2 functions. * Enhance module.py and remove fixed xfails in test * Add check for invalid casting --- cunumeric/module.py | 102 ++++++++++++++------ tests/integration/test_concatenate_stack.py | 57 +++-------- 2 files changed, 84 insertions(+), 75 deletions(-) diff --git a/cunumeric/module.py b/cunumeric/module.py index bbe0bade8..d6e84a8fa 100644 --- a/cunumeric/module.py +++ b/cunumeric/module.py @@ -56,6 +56,13 @@ _builtin_min = min _builtin_sum = sum +casting_kinds: tuple[CastingKind, ...] = ( + "no", + "equiv", + "safe", + "same_kind", + "unsafe", +) ######################### # Array creation routines @@ -1464,10 +1471,33 @@ def check_list_depth(arr: Any, prefix: NdShape = (0,)) -> int: return depths[0] + 1 -def check_shape_dtype( - inputs: Sequence[ndarray], +def check_shape_with_axis( + inputs: list[ndarray], func_name: str, axis: int, +) -> None: + ndim = inputs[0].ndim + shape = inputs[0].shape + + if ndim >= 1: + axis = normalize_axis_index(axis, ndim) + if _builtin_any( + shape[:axis] != inp.shape[:axis] + or shape[axis + 1 :] != inp.shape[axis + 1 :] + for inp in inputs + ): + raise ValueError( + f"All arguments to {func_name} " + "must have the same " + "dimension size in all dimensions " + "except the target axis" + ) + return + + +def check_shape_dtype_without_axis( + inputs: Sequence[ndarray], + func_name: str, dtype: Optional[npt.DTypeLike] = None, casting: CastingKind = "same_kind", ) -> tuple[list[ndarray], ArrayInfo]: @@ -1483,17 +1513,6 @@ def check_shape_dtype( f"All arguments to {func_name} " "must have the same number of dimensions" ) - if ndim > 1 and _builtin_any( - shape[1:axis] != inp.shape[1:axis] - and shape[axis + 1 :] != inp.shape[axis + 1 :] - for inp in inputs - ): - raise ValueError( - f"All arguments to {func_name} " - "must have the same " - "dimension size in all dimensions " - "except the target axis" - ) # Cast arrays with the passed arguments (dtype, casting) if dtype is None: @@ -1551,10 +1570,11 @@ def _block_collect_slices( arrays = list(convert_to_cunumeric_ndarray(inp) for inp in arr) common_shape = arrays[0].shape if len(arr) > 1: - arrays, common_info = check_shape_dtype( - arrays, block.__name__, axis=-1 + arrays, common_info = check_shape_dtype_without_axis( + arrays, block.__name__ ) common_shape = common_info.shape + check_shape_with_axis(arrays, block.__name__, axis=-1) # the initial slices for each arr on arr.shape[-1] out_shape, slices, arrays = _collect_outshape_slices( arrays, common_shape, axis=-1 + len(common_shape) @@ -1771,15 +1791,28 @@ def concatenate( -------- Multiple GPUs, Multiple CPUs """ + if dtype is not None and out is not None: + raise TypeError( + "concatenate() only takes `out` or `dtype` as an argument," + "but both were provided." + ) + + if casting not in casting_kinds: + raise ValueError( + "casting must be one of 'no', 'equiv', " + "'safe', 'same_kind', or 'unsafe'" + ) + # flatten arrays if axis == None and concatenate arrays on the first axis if axis is None: inputs = list(inp.ravel() for inp in inputs) axis = 0 # Check to see if we can build a new tuple of cuNumeric arrays - cunumeric_inputs, common_info = check_shape_dtype( - inputs, concatenate.__name__, axis, dtype, casting + cunumeric_inputs, common_info = check_shape_dtype_without_axis( + inputs, concatenate.__name__, dtype, casting ) + check_shape_with_axis(cunumeric_inputs, concatenate.__name__, axis) return _concatenate( cunumeric_inputs, @@ -1831,15 +1864,14 @@ def stack( if type(axis) is not int: raise TypeError("The target axis should be an integer") - arrays, common_info = check_shape_dtype(arrays, stack.__name__, axis) - - if axis > common_info.ndim: - raise ValueError( - "The target axis should be smaller or" - " equal to the number of dimensions" - " of input arrays" - ) + arrays, common_info = check_shape_dtype_without_axis( + arrays, stack.__name__ + ) + shapes = {inp.shape for inp in arrays} + if len(shapes) != 1: + raise ValueError("all input arrays must have the same shape for stack") + axis = normalize_axis_index(axis, common_info.ndim + 1) shape = common_info.shape[:axis] + (1,) + common_info.shape[axis:] arrays = [arr.reshape(shape) for arr in arrays] common_info.shape = tuple(shape) @@ -1883,7 +1915,10 @@ def vstack(tup: Sequence[ndarray]) -> ndarray: reshaped = _atleast_nd(2, tuple(tup)) if not isinstance(reshaped, list): reshaped = [reshaped] - tup, common_info = check_shape_dtype(reshaped, vstack.__name__, 0) + tup, common_info = check_shape_dtype_without_axis( + reshaped, vstack.__name__ + ) + check_shape_with_axis(tup, vstack.__name__, 0) return _concatenate( tup, common_info, @@ -1925,7 +1960,10 @@ def hstack(tup: Sequence[ndarray]) -> ndarray: -------- Multiple GPUs, Multiple CPUs """ - tup, common_info = check_shape_dtype(tup, hstack.__name__, 1) + tup, common_info = check_shape_dtype_without_axis(tup, hstack.__name__) + check_shape_with_axis( + tup, hstack.__name__, axis=(0 if common_info.ndim == 1 else 1) + ) # When ndim == 1, hstack concatenates arrays along the first axis return _concatenate( tup, @@ -1973,7 +2011,10 @@ def dstack(tup: Sequence[ndarray]) -> ndarray: reshaped = _atleast_nd(3, tuple(tup)) if not isinstance(reshaped, list): reshaped = [reshaped] - tup, common_info = check_shape_dtype(reshaped, dstack.__name__, 2) + tup, common_info = check_shape_dtype_without_axis( + reshaped, dstack.__name__ + ) + check_shape_with_axis(tup, dstack.__name__, 2) return _concatenate( tup, common_info, @@ -2011,11 +2052,14 @@ def column_stack(tup: Sequence[ndarray]) -> ndarray: -------- Multiple GPUs, Multiple CPUs """ - tup, common_info = check_shape_dtype(tup, column_stack.__name__, 1) + tup, common_info = check_shape_dtype_without_axis( + tup, column_stack.__name__ + ) # When ndim == 1, hstack concatenates arrays along the first axis if common_info.ndim == 1: tup = list(inp.reshape((inp.shape[0], 1)) for inp in tup) common_info.shape = tup[0].shape + check_shape_with_axis(tup, dstack.__name__, 1) return _concatenate( tup, common_info, diff --git a/tests/integration/test_concatenate_stack.py b/tests/integration/test_concatenate_stack.py index 69dd3ad24..33a11f392 100644 --- a/tests/integration/test_concatenate_stack.py +++ b/tests/integration/test_concatenate_stack.py @@ -180,16 +180,11 @@ def test_scalar_axis_is_not_none(self, arrays): ( ([[1, 2], [3, 4]], [5, 6]), ([[1, 2], [3, 4], [5, 6]], [[7, 8], [9, 10]]), - pytest.param( - ([[1, 2], [3, 4]], [[5, 6]]), marks=pytest.mark.xfail - ), + ([[1, 2], [3, 4]], [[5, 6]]), ), ids=lambda arrays: f"(arrays={arrays})", ) def test_arrays_mismatched_shape(self, arrays): - # for ([[1, 2], [3, 4]], [[5, 6]]), - # In Numpy, it raises ValueError - # In cuNumeric, it pass expected_exc = ValueError axis = 1 with pytest.raises(expected_exc): @@ -197,16 +192,12 @@ def test_arrays_mismatched_shape(self, arrays): with pytest.raises(expected_exc): num.concatenate(arrays, axis=axis) - @pytest.mark.xfail @pytest.mark.parametrize( "axis", (1, -2), ids=lambda axis: f"(axis={axis})", ) def test_axis_out_of_bound(self, axis): - # For axis=-2 or 1, - # In Numpy, it raises ValueError - # In cuNumeric, it raises IndexError expected_exc = ValueError a = [1, 2] b = [5, 6] @@ -215,10 +206,7 @@ def test_axis_out_of_bound(self, axis): with pytest.raises(expected_exc): num.concatenate((num.array(a), num.array(b)), axis=axis) - @pytest.mark.xfail def test_both_out_dtype_are_provided(self): - # In Numpy, it raises TypeError - # In cuNumeric, it pass expected_exc = TypeError a = [[1, 2], [3, 4]] b = [[5, 6]] @@ -239,7 +227,6 @@ def test_both_out_dtype_are_provided(self): dtype=dtype, ) - @pytest.mark.xfail def test_invalid_casting(self): # In Numpy, raise ValueError # In cuNumeric, pass @@ -278,11 +265,10 @@ def test_stack_with_out(): @pytest.mark.parametrize( "axis", - (-3, pytest.param(-1, marks=pytest.mark.xfail)), + (-3, -1), ids=lambda axis: f"(axis={axis})", ) def test_stack_axis_is_negative(axis): - # for -1, the output by Numpy and cuNumeric is not equal a = [[1, 2], [3, 4]] b = [[5, 6], [7, 8]] res_np = np.stack((np.array(a), np.array(b)), axis=axis) @@ -328,13 +314,10 @@ def test_axis_is_none(self): @pytest.mark.parametrize( "axis", - (2, pytest.param(-3, marks=pytest.mark.xfail)), + (2, -3), ids=lambda axis: f"(axis={axis})", ) def test_axis_out_of_bound(self, axis): - # For axis=-3, - # In Numpy, it raises ValueError - # In cuNumeric, it raises IndexError expected_exc = ValueError a = [1, 2] b = [5, 6] @@ -381,16 +364,11 @@ def test_zero_arrays(self): "arrays", ( ([[1, 2], [3, 4]], [5, 6]), - pytest.param( - ([[1, 2], [3, 4]], [[5, 6]]), marks=pytest.mark.xfail - ), + ([[1, 2], [3, 4]], [[5, 6]]), ), ids=lambda arrays: f"(arrays={arrays})", ) def test_arrays_mismatched_shape(self, arrays): - # for ([[1, 2], [3, 4]], [[5, 6]]) - # In Numpy, it raises ValueError - # In cuNumeric, it pass expected_exc = ValueError with pytest.raises(expected_exc): np.hstack(arrays) @@ -417,16 +395,13 @@ def test_zero_arrays(self): "arrays", ( (1, []), - pytest.param(([1, 2], [3]), marks=pytest.mark.xfail), + ([1, 2], [3]), ([[1, 2]], [3, 4]), ([[1, 2]], [[3], [4]]), ), ids=lambda arrays: f"(arrays={arrays})", ) def test_arrays_mismatched_shape(self, arrays): - # for ([1, 2], [3]), - # In Numpy, it raises ValueError - # In cuNumeric, it pass expected_exc = ValueError with pytest.raises(expected_exc): np.column_stack(arrays) @@ -455,17 +430,14 @@ def test_zero_arrays(self): @pytest.mark.parametrize( "arrays", ( - pytest.param((1, []), marks=pytest.mark.xfail), - pytest.param(([1, 2], [3]), marks=pytest.mark.xfail), - pytest.param(([[1, 2], [3, 4]], [5]), marks=pytest.mark.xfail), + (1, []), + ([1, 2], [3]), + ([[1, 2], [3, 4]], [5]), ([[[1, 2], [3, 4]]], [5, 6]), ), ids=lambda arrays: f"(arrays={arrays})", ) def test_arrays_mismatched_shape(self, arrays): - # for (1, []), ([1, 2], [3]), ([[1, 2], [3, 4]], [5]) - # In Numpy, it raises ValueError - # In cuNumeric, it pass expected_exc = ValueError with pytest.raises(expected_exc): np.vstack(arrays) @@ -494,17 +466,14 @@ def test_zero_arrays(self): @pytest.mark.parametrize( "arrays", ( - pytest.param((1, []), marks=pytest.mark.xfail), - pytest.param(([1, 2], [3]), marks=pytest.mark.xfail), - pytest.param(([[1, 2], [3, 4]], [5]), marks=pytest.mark.xfail), + (1, []), + ([1, 2], [3]), + ([[1, 2], [3, 4]], [5]), ([[[1, 2], [3, 4]]], [5, 6]), ), ids=lambda arrays: f"(arrays={arrays})", ) def test_arrays_mismatched_shape(self, arrays): - # for (1, []), ([1, 2], [3]), ([[1, 2], [3, 4]], [5]) - # In Numpy, it raises ValueError - # In cuNumeric, it pass expected_exc = ValueError with pytest.raises(expected_exc): np.row_stack(arrays) @@ -530,7 +499,6 @@ def test_zero_arrays(self): with pytest.raises(expected_exc): num.dstack(arrays) - @pytest.mark.xfail @pytest.mark.parametrize( "arrays", ( @@ -542,9 +510,6 @@ def test_zero_arrays(self): ids=lambda arrays: f"(arrays={arrays})", ) def test_arrays_mismatched_shape(self, arrays): - # for all cases, - # In Numpy, it raises ValueError - # In cuNumeric, it pass expected_exc = ValueError with pytest.raises(expected_exc): np.dstack(arrays) From e4c164e8086a27c1cd6e6a65c410fd23ce70f839 Mon Sep 17 00:00:00 2001 From: Mark Vaz Date: Wed, 22 Mar 2023 13:01:53 -0700 Subject: [PATCH 013/106] Update conda recipes (#855) Co-authored-by: Mark Vaz --- conda/conda-build/conda_build_config.yaml | 3 ++- conda/conda-build/meta.yaml | 3 ++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/conda/conda-build/conda_build_config.yaml b/conda/conda-build/conda_build_config.yaml index 3b3c5fa23..a508a6ed1 100644 --- a/conda/conda-build/conda_build_config.yaml +++ b/conda/conda-build/conda_build_config.yaml @@ -3,8 +3,9 @@ gpu_enabled: - false python: - - "3.9,!=3.9.7" + - 3.9 - 3.10 + - 3.11 numpy_version: - ">=1.22" diff --git a/conda/conda-build/meta.yaml b/conda/conda-build/meta.yaml index d16d5fd71..2560b94b8 100644 --- a/conda/conda-build/meta.yaml +++ b/conda/conda-build/meta.yaml @@ -10,7 +10,7 @@ ## The placeholder version is strictly for making two-pass conda build process. ## It should not be used for any other purpose, and this is not a default version. {% set placeholder_version = '0.0.0.dev' %} -{% set default_cuda_version = '11.5' %} +{% set default_cuda_version = '11.8' %} {% set cuda_version='.'.join(environ.get('CUDA', default_cuda_version).split('.')[:2]) %} {% set cuda_major=cuda_version.split('.')[0] %} {% set py_version=environ.get('CONDA_PY', '') %} @@ -149,6 +149,7 @@ requirements: run_constrained: - __glibc >=2.17 # [linux] + - python != 3.9.7 {% if gpu_enabled_bool %} - __cuda >={{ cuda_version }} {% endif %} From d01ebc0c81e8f282d31112ea0d3266fc51f9faf9 Mon Sep 17 00:00:00 2001 From: robinw0928 <104830875+robinw0928@users.noreply.github.com> Date: Thu, 23 Mar 2023 07:58:52 +0800 Subject: [PATCH 014/106] Enhance test_putmask/trace/scan (#854) * Enhance test_putmask/trace/scan * Add xfail for NotImplementedError --- cunumeric/array.py | 2 +- tests/integration/test_putmask.py | 92 +++++++++++++++++++++- tests/integration/test_scan.py | 82 +++++++++++++++---- tests/integration/test_trace.py | 126 +++++++++++++++++++++++++++++- 4 files changed, 280 insertions(+), 22 deletions(-) diff --git a/cunumeric/array.py b/cunumeric/array.py index e7de01b4e..a44e5afd8 100644 --- a/cunumeric/array.py +++ b/cunumeric/array.py @@ -2580,7 +2580,7 @@ def trace( # default values for axis axes = (0, 1) elif (axis1 is None) or (axis2 is None): - raise ValueError("both axes should be passed") + raise TypeError("both axes should be passed") else: axes = (axis1, axis2) diff --git a/tests/integration/test_putmask.py b/tests/integration/test_putmask.py index 86e56902f..a59c40aba 100644 --- a/tests/integration/test_putmask.py +++ b/tests/integration/test_putmask.py @@ -16,7 +16,7 @@ import numpy as np import pytest from legate.core import LEGATE_MAX_DIM -from utils.generators import mk_seq_array +from utils.generators import mk_0to1_array, mk_seq_array import cunumeric as num @@ -139,7 +139,7 @@ def test_ndim(ndim): num.putmask(num_arr, num_mask, num_values) assert np.array_equal(np_arr, num_arr) - # val is different shape and different size for vals and array + # val is different shape and smaller size for vals and array shape_val = (2,) * ndim np_values = mk_seq_array(np, shape_val) * 10 num_values = mk_seq_array(num, shape_val) * 10 @@ -147,6 +147,94 @@ def test_ndim(ndim): num.putmask(num_arr, num_mask, num_values) assert np.array_equal(np_arr, num_arr) + # val is different shape and bigger size for vals and array + shape_val = (10,) * ndim + np_values = mk_seq_array(np, shape_val) * 10 + num_values = mk_seq_array(num, shape_val) * 10 + np.putmask(np_arr, np_mask, np_values) + num.putmask(num_arr, num_mask, num_values) + assert np.array_equal(np_arr, num_arr) + + +@pytest.mark.parametrize( + "shape_val", + ( + (1,), + (4,), + (5,), + (1, 4), + (2, 3), + pytest.param((2, 3, 4), marks=pytest.mark.xfail), + (3, 4, 5), + ), + ids=lambda shape_val: f"(shape_val={shape_val})", +) +def test_a_values_different_shapes(shape_val): + # for (2, 3, 4), + # In Numpy, pass + # In cuNumeric, it raises ValueError + shape_arr = (3, 4) + np_arr = mk_seq_array(np, shape_arr) + num_arr = mk_seq_array(num, shape_arr) + np_mask = (np_arr % 2).astype(bool) + num_mask = (num_arr % 2).astype(bool) + np_values = mk_seq_array(np, shape_val) * 10 + num_values = mk_seq_array(num, shape_val) * 10 + np.putmask(np_arr, np_mask, np_values) + num.putmask(num_arr, num_mask, num_values) + assert np.array_equal(np_arr, num_arr) + + +def test_empty_array(): + np_arr = np.array([]) + num_arr = num.array([]) + np_mask = np.array([]) + num_mask = num.array([]) + value = -1 + np.putmask(np_arr, np_mask, value) + num.putmask(num_arr, num_mask, value) + assert np.array_equal(np_arr, num_arr) + + +class TestPutmaskErrors: + def test_invalid_mask_shape(self): + expected_exc = ValueError + shape_arr = (3, 4, 5) + np_arr = mk_seq_array(np, shape_arr) + num_arr = mk_seq_array(num, shape_arr) + shape_mask = (3, 4, 1) + np_mask = np.zeros(shape_mask) + num_mask = num.zeros(shape_mask) + value = -1 + with pytest.raises(expected_exc): + np.putmask(np_arr, np_mask, value) + with pytest.raises(expected_exc): + num.putmask(num_arr, num_mask, value) + + @pytest.mark.xfail + @pytest.mark.parametrize( + "dtype_val", + (float, complex), + ids=lambda dtype_val: f"(dtype_val={dtype_val})", + ) + def test_a_values_different_dtype(self, dtype_val): + # for both cases, + # In Numpy, it raises TypeError + # In cuNumeric, it pass + expected_exc = TypeError + shape = (3, 4) + dtype_arr = int + np_arr = mk_0to1_array(np, shape, dtype=dtype_arr) + num_arr = mk_0to1_array(num, shape, dtype=dtype_arr) + np_mask = (np_arr % 2).astype(bool) + num_mask = (num_arr % 2).astype(bool) + np_values = mk_0to1_array(np, shape, dtype=dtype_val) * 10 + num_values = mk_0to1_array(num, shape, dtype=dtype_val) * 10 + with pytest.raises(expected_exc): + np.putmask(np_arr, np_mask, np_values) + with pytest.raises(expected_exc): + num.putmask(num_arr, num_mask, num_values) + if __name__ == "__main__": import sys diff --git a/tests/integration/test_scan.py b/tests/integration/test_scan.py index 7ca37201a..5ec83884c 100644 --- a/tests/integration/test_scan.py +++ b/tests/integration/test_scan.py @@ -103,10 +103,12 @@ def _run_tests(op, n0, shape, dt, axis, out0, outtype): shapes = [ [100], [4, 25], + [4, 5, 6], ] axes = [ None, 0, + -1, ] dtypes = [ np.int16, @@ -129,12 +131,12 @@ def _run_tests(op, n0, shape, dt, axis, out0, outtype): ] +@pytest.mark.parametrize("op", ops) @pytest.mark.parametrize("shape", shapes) @pytest.mark.parametrize("axis", axes) @pytest.mark.parametrize("outtype", dtypes_simplified) @pytest.mark.parametrize("dt", dtypes_simplified) -def test_scan_out0_shape(shape, axis, outtype, dt): - op = "cumsum" +def test_scan_out0_shape_ops(op, shape, axis, outtype, dt): out0 = True n0 = None _run_tests(op, n0, shape, dt, axis, out0, outtype) @@ -152,28 +154,74 @@ def test_scan_nan(op, outtype, dt, n0): @pytest.mark.parametrize("op", ops) -@pytest.mark.parametrize("outtype", dtypes_simplified) -@pytest.mark.parametrize("dt", dtypes_simplified) -def test_scan_op(op, outtype, dt): - shape = [100] - axis = None - out0 = False - n0 = None - _run_tests(op, n0, shape, dt, axis, out0, outtype) - - -def test_empty_inputs(): +def test_empty_inputs(op): in_np = np.ones(10) in_np[5:] = 0 - in_num = num.array(in_np).nonzero()[0] - in_np = in_np.nonzero()[0] + in_num = num.array(in_np) + out_np = getattr(np, op)(in_np) + out_num = getattr(num, op)(in_num) + assert np.array_equal(out_np, out_num) - out_np = np.cumsum(in_np) - out_num = num.cumsum(in_num) +@pytest.mark.parametrize("op", ops) +def test_empty_array(op): + A = [] + out_np = getattr(np, op)(A) + out_num = getattr(num, op)(A) + assert np.array_equal(out_np, out_num) + + +@pytest.mark.parametrize("op", ops) +def test_scalar(op): + A = 1 + out_np = getattr(np, op)(A) + out_num = getattr(num, op)(A) assert np.array_equal(out_np, out_num) +class TestScanErrors: + @pytest.mark.parametrize("op", ("cumsum", "cumprod")) + def test_array_with_nan(self, op): + expected_exc = TypeError + A = [1, 2, None, 4] + with pytest.raises(expected_exc): + getattr(np, op)(A) + with pytest.raises(expected_exc): + getattr(num, op)(A) + + @pytest.mark.parametrize( + "axis", (-2, 1), ids=lambda axis: f"(axis={axis})" + ) + @pytest.mark.parametrize("op", ops) + def test_axis_out_of_bound(self, op, axis): + expected_exc = ValueError + A = [1, 2, 3, 4] + with pytest.raises(expected_exc): + getattr(np, op)(A, axis=axis) + with pytest.raises(expected_exc): + getattr(num, op)(A, axis=axis) + + @pytest.mark.xfail + @pytest.mark.parametrize( + "out_shape", + ((1,), (2, 3)), + ids=lambda out_shape: f"(out_shape={out_shape})", + ) + @pytest.mark.parametrize("op", ops) + def test_out_invalid_shape(self, op, out_shape): + # for all ops and all out_shape, + # in Numpy, it raises ValueError + # in cuNumeric, it raises NotImplementedError + expected_exc = ValueError + A = [1, 2, 3, 4] + out_np = np.zeros(out_shape) + out_num = num.zeros(out_shape) + with pytest.raises(expected_exc): + getattr(np, op)(A, out=out_np) + with pytest.raises(expected_exc): + getattr(num, op)(A, out=out_num) + + if __name__ == "__main__": import sys diff --git a/tests/integration/test_trace.py b/tests/integration/test_trace.py index bb27ccae8..9f40739ff 100644 --- a/tests/integration/test_trace.py +++ b/tests/integration/test_trace.py @@ -81,11 +81,133 @@ def test_ndim(ndim): diag_size = min(a_shape[axes[0]], a_shape[axes[1]]) - 1 for offset in range(-diag_size + 1, diag_size): assert np.array_equal( - np_array.trace(offset, axes[0], axes[1]), - num_array.trace(offset, axes[0], axes[1]), + np.trace(np_array, offset, axes[0], axes[1]), + num.trace(num_array, offset, axes[0], axes[1]), ) +@pytest.mark.parametrize( + "offset", + ( + pytest.param(-3, marks=pytest.mark.xfail), + pytest.param(-2, marks=pytest.mark.xfail), + -1, + 0, + 1, + 2, + pytest.param(3, marks=pytest.mark.xfail), + ), + ids=lambda offset: f"(offset={offset})", +) +def test_offset(offset): + # For -3, -2, 3 + # In Numpy, pass and return 0 + # In cuNumeric, it raises ValueError: + # 'offset' for diag or diagonal must be in range + a = np.arange(24).reshape((2, 3, 4)) + a_num = num.array(a) + res = np.trace(a, offset=offset) + res_num = num.trace(a_num, offset=offset) + assert np.array_equal(res, res_num) + + +@pytest.mark.xfail +@pytest.mark.parametrize( + "axes", + ((-2, -1), (-2, 0), (1, -3)), + ids=lambda axes: f"(axes={axes})", +) +def test_negative_axes(axes): + # For all 3 cases, + # In Numpy, pass + # In cuNumeric, it raises ValueError: + # axes must be the same size as ndim for transpose + axis1, axis2 = axes + a = np.arange(24).reshape((2, 3, 4)) + a_num = num.array(a) + res = np.trace(a, axis1=axis1, axis2=axis2) + res_num = num.trace(a_num, axis1=axis1, axis2=axis2) + assert np.array_equal(res, res_num) + + +class TestTraceErrors: + def setup_method(self): + self.a_np = np.arange(24).reshape((2, 3, 4)) + self.a_num = num.array(self.a_np) + + @pytest.mark.parametrize( + "array", + (1, [], [1]), + ids=lambda array: f"(array={array})", + ) + def test_invalid_arrays(self, array): + expected_exc = ValueError + with pytest.raises(expected_exc): + np.trace(array) + with pytest.raises(expected_exc): + num.trace(array) + + @pytest.mark.parametrize( + "axes", + ( + (None, 0), + (0, None), + pytest.param((None, None), marks=pytest.mark.xfail), + ), + ids=lambda axes: f"(axes={axes})", + ) + def test_axes_none(self, axes): + # For (None, None) + # In Numpy, it raises TypeError + # In cuNumeric, it pass + expected_exc = TypeError + axis1, axis2 = axes + with pytest.raises(expected_exc): + np.trace(self.a_np, axis1=axis1, axis2=axis2) + with pytest.raises(expected_exc): + num.trace(self.a_num, axis1=axis1, axis2=axis2) + + @pytest.mark.parametrize( + "axes", + ((-4, 1), (0, 3)), + ids=lambda axes: f"(axes={axes})", + ) + def test_axes_out_of_bound(self, axes): + expected_exc = ValueError + axis1, axis2 = axes + with pytest.raises(expected_exc): + np.trace(self.a_np, axis1=axis1, axis2=axis2) + with pytest.raises(expected_exc): + num.trace(self.a_num, axis1=axis1, axis2=axis2) + + @pytest.mark.parametrize( + "axes", + ((-3, 0), (1, 1)), + ids=lambda axes: f"(axes={axes})", + ) + def test_axes_duplicate(self, axes): + expected_exc = ValueError + axis1, axis2 = axes + with pytest.raises(expected_exc): + np.trace(self.a_np, axis1=axis1, axis2=axis2) + with pytest.raises(expected_exc): + num.trace(self.a_num, axis1=axis1, axis2=axis2) + + @pytest.mark.parametrize( + "out_shape", + ((0,), (1, 4)), + ids=lambda out_shape: f"(out_shape={out_shape})", + ) + def test_out_invalid_shape(self, out_shape): + expected_exc = ValueError + out_np = np.zeros(out_shape) + out_num = num.array(out_np) + with pytest.raises(expected_exc): + np.trace(self.a_np, out=out_np) + with pytest.raises(expected_exc): + num.trace(self.a_num, out=out_num) + + if __name__ == "__main__": import sys From e767b49d84a4e248719d9ccb698f3247fe50c15d Mon Sep 17 00:00:00 2001 From: Bryan Van de Ven Date: Thu, 23 Mar 2023 11:07:34 -0700 Subject: [PATCH 015/106] add a linkcheck target to docs build (#853) --- CONTRIBUTING.md | 6 +++--- docs/cunumeric/Makefile | 34 ++++++++++++++++++++++++---------- 2 files changed, 27 insertions(+), 13 deletions(-) diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index bdfc0c562..e083cc3c0 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -6,11 +6,11 @@ CuNumeric is an open-source project released under the [Apache license, version Most of the time, the best thing is to begin by [opening an issue](https://github.com/nv-legate/cunumeric/issues). This gives us a chance to discuss the contribution and to define the problem or feature that it addresses. Often, opening of the issue first may help prevent you from doing unnecessary work or to enhance and further develop your idea. -Once you are ready to start development, we ask you to work on a [fork](https://help.github.com/en/articles/fork-a-repo) of our repository. The next step is to create a (pull request)[https://help.github.com/en/articles/about-pull-requests]. Feel free to open the pull request as soon as you begin your development (just mark it [as a draft](https://github.blog/2019-02-14-introducing-draft-pull-requests/)) or when you are ready to have your contribution merged. +Once you are ready to start development, we ask you to work on a [fork](https://docs.github.com/en/get-started/quickstart/fork-a-repo) of our repository. The next step is to create a (pull request)[https://help.github.com/en/articles/about-pull-requests]. Feel free to open the pull request as soon as you begin your development (just mark it [as a draft](https://github.blog/2019-02-14-introducing-draft-pull-requests/)) or when you are ready to have your contribution merged. ## The Legalese: Developer Certificate of Origin -CuNumeric is released under the open-source [Apache license, version 2.0](https://www.apache.org/licenses/LICENSE-2.0), and is free to use, modify, and redistribute. To ensure that the license can be exercised without encumbrance, we ask you that you only contribute your own work or work to which you have the intellectual rights. To that end, we employ the Developer's Certificate of Origin (DCO), which is the lightweight mechanism for you to certify that you are legally able to make your contribution. Here is the full text of the certificate (also available at [DeveloperCertificate.org](https://DeveloperCertificate.org)): +CuNumeric is released under the open-source [Apache license, version 2.0](https://www.apache.org/licenses/LICENSE-2.0), and is free to use, modify, and redistribute. To ensure that the license can be exercised without encumbrance, we ask you that you only contribute your own work or work to which you have the intellectual rights. To that end, we employ the Developer's Certificate of Origin (DCO), which is the lightweight mechanism for you to certify that you are legally able to make your contribution. Here is the full text of the certificate (also available at [DeveloperCertificate.org](https://developercertificate.org/): ```` Developer Certificate of Origin @@ -61,7 +61,7 @@ Please use your real name and a valid email address at which you can be reached. ## Review Process -We are really grateful that you are thinking of contributing to cuNumeric. We will make every effort to review your contributions as soon as possible. +We are really grateful that you are thinking of contributing to cuNumeric. We will make every effort to review your contributions as soon as possible. As we suggested at the beginning of this document, it will be really helpful to start with an issue unless your proposed change is really trivial. An issue will help to save work in the review process (e.g., maybe somebody is already working on exactly the same thing you want to work on). After you open your pull request (PR), there usually will be a community feedback that often will require further changes to your contribution (the usual open-source process). Usually, this will conclude in the PR being merged by a maintainer, but on rare occasions a PR may be rejected. This may happen, for example, if the PR appears abandoned (no response to the community feedback) or if the PR does not seem to be approaching community acceptance in a reasonable time frame. In any case, an explanation will always be given why a PR is closed. Even if a PR is closed for some reason, it may always be reopened if the situation evolves (feel free to comment on closed PRs to discuss reopening them). diff --git a/docs/cunumeric/Makefile b/docs/cunumeric/Makefile index 71b7909f5..2ba1263b8 100644 --- a/docs/cunumeric/Makefile +++ b/docs/cunumeric/Makefile @@ -20,21 +20,35 @@ # from the environment for the first two. SPHINXOPTS ?= -v -W PARALLEL_BUILD ?= 1 -SPHINXBUILD ?= legate $(shell which sphinx-build) +SPHINXBUILD ?= $(shell which sphinx-build) SOURCEDIR = source BUILDDIR = build -# Put it first so that "make" without argument is like "make help". +.PHONY: help clean html linkcheck + help: - @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) + @echo "Please use 'make ' where is one of" + @echo " all to clean build standalone HTML files and run linkcheck" + @echo " clean to clear all built documentation files" + @echo " html to make standalone (non-gallery) HTML files" + @echo " linkcheck to run the link checker on built docs" -.PHONY: help Makefile +all: clean html linkcheck -# Catch-all target: route all unknown targets to Sphinx using the new -# "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). -%: Makefile - @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) -j $(PARALLEL_BUILD) +clean: rm -rf build/html/docs - mkdir -p build/html/docs - cp -r ../figures build/html/docs/ find source/ -name "generated" | xargs rm -rf + +html: + @start=$$(date +%s) \ + ; $(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) -j $(PARALLEL_BUILD) \ + && mkdir -p build/html/docs \ + && cp -r ../figures build/html/docs/ \ + && echo "\nBuild finished in $$(($$(date +%s)-start)) seconds\n" + +linkcheck: html + @start=$$(date +%s) \ + ; $(SPHINXBUILD) "$(SOURCEDIR)" "$(BUILDDIR)" -b linkcheck \ + ; echo "\nLink check finished in $$(($$(date +%s)-start)) seconds. Any broken links are listed below:\n" \ + && [ -f "$(BUILDDIR)/output.txt" ] && cat "$(BUILDDIR)/output.txt" + From 7e43845b44f1a343a6334b43c5e30e0bde4e23d6 Mon Sep 17 00:00:00 2001 From: Manolis Papadakis Date: Sun, 26 Mar 2023 19:51:33 -0700 Subject: [PATCH 016/106] Fixes #858 (#859) --- cunumeric/array.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cunumeric/array.py b/cunumeric/array.py index a44e5afd8..c7822b6b4 100644 --- a/cunumeric/array.py +++ b/cunumeric/array.py @@ -3050,7 +3050,7 @@ def mean( divisor = self.shape[axis] # Divide by the number of things in the collapsed dimensions # Pick the right kinds of division based on the dtype - if dtype.kind == "f": + if dtype.kind == "f" or dtype.kind == "c": sum_array.__itruediv__( np.array(divisor, dtype=sum_array.dtype), ) From 8d5e8d22fee79c1b409c177d47555104c809eb41 Mon Sep 17 00:00:00 2001 From: xialu00 <110973296+xialu00@users.noreply.github.com> Date: Thu, 30 Mar 2023 06:51:21 +0800 Subject: [PATCH 017/106] test_prod & test_reduce use allclose to compare float arrays (#867) * use allclose to compare float arrays * test_prod & test_reduce use allclose to compare float arrays --------- Co-authored-by: xialu00 --- tests/integration/test_prod.py | 8 ++++---- tests/integration/test_reduction.py | 8 ++++---- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/tests/integration/test_prod.py b/tests/integration/test_prod.py index 3194f9471..d6935cb74 100644 --- a/tests/integration/test_prod.py +++ b/tests/integration/test_prod.py @@ -95,7 +95,7 @@ class TestProdNegative(object): @pytest.mark.parametrize("arr", ARR) def test_array(self, arr): - assert np.array_equal(np.prod(arr), num.prod(arr)) + assert allclose(np.prod(arr), num.prod(arr)) @pytest.mark.xfail @pytest.mark.parametrize("dtype", NEGATIVE_DTYPE, ids=to_dtype) @@ -155,7 +155,7 @@ def test_keepdims(self): arr_np = np.array(arr_num) out_np = np.prod(arr_np, axis=2, keepdims=True) out_num = num.prod(arr_num, axis=2, keepdims=True) - assert np.array_equal(out_np, out_num) + assert allclose(out_np, out_num) @pytest.mark.xfail def test_initial_scalar_list(self): @@ -167,7 +167,7 @@ def test_initial_scalar_list(self): # Input object to FillWithScalar is not a scalar out_np = np.prod(arr, initial=initial_value) - assert np.array_equal(out_np, out_num) + assert allclose(out_np, out_num) def test_initial_list(self): arr = [[1, 2], [3, 4]] @@ -191,7 +191,7 @@ def test_where(self): # cuNumeric raises NotImplementedError: # the `where` parameter is currently not supported out_num = num.prod(arr, where=[False, True]) - assert np.array_equal(out_np, out_num) + assert allclose(out_np, out_num) class TestProdPositive(object): diff --git a/tests/integration/test_reduction.py b/tests/integration/test_reduction.py index 135524064..e823040cb 100644 --- a/tests/integration/test_reduction.py +++ b/tests/integration/test_reduction.py @@ -76,7 +76,7 @@ class TestSumNegative(object): @pytest.mark.parametrize("arr", ARR) def test_array(self, arr): - assert np.array_equal(np.sum(arr), num.sum(arr)) + assert allclose(np.sum(arr), num.sum(arr)) @pytest.mark.xfail @pytest.mark.parametrize("dtype", NEGATIVE_DTYPE, ids=to_dtype) @@ -125,7 +125,7 @@ def test_keepdims(self): arr_np = np.array(arr_num) out_np = np.sum(arr_np, axis=2, keepdims=True) out_num = num.sum(arr_num, axis=2, keepdims=True) - assert np.array_equal(out_np, out_num) + assert allclose(out_np, out_num) @pytest.mark.xfail def test_initial_scalar_list(self): @@ -135,7 +135,7 @@ def test_initial_scalar_list(self): out_np = np.sum( arr, initial=initial_value ) # ValueError: Input object to FillWithScalar is not a scalar - assert np.array_equal(out_np, out_num) + assert allclose(out_np, out_num) def test_initial_list(self): arr = [[1, 2], [3, 4]] @@ -160,7 +160,7 @@ def test_where(self): # cuNumeric raises NotImplementedError: # "the `where` parameter is currently not supported" out_num = num.sum(arr, where=[False, True]) - assert np.array_equal(out_np, out_num) + assert allclose(out_np, out_num) class TestSumPositive(object): From 194467eae5d5b4a95371853461978b207b08cda6 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Wed, 29 Mar 2023 16:33:20 -0700 Subject: [PATCH 018/106] [pre-commit.ci] pre-commit autoupdate (#864) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * [pre-commit.ci] pre-commit autoupdate updates: - [github.com/pre-commit/mirrors-clang-format: v15.0.7 → v16.0.0](https://github.com/pre-commit/mirrors-clang-format/compare/v15.0.7...v16.0.0) --------- Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> --- .pre-commit-config.yaml | 2 +- src/cunumeric/random/philox.h | 2 +- src/cunumeric/unary/unary_op_util.h | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 17bdfaaf1..9a1aae15d 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -19,7 +19,7 @@ repos: hooks: - id: flake8 - repo: https://github.com/pre-commit/mirrors-clang-format - rev: 'v15.0.7' # Use the sha / tag you want to point at + rev: 'v16.0.0' # Use the sha / tag you want to point at hooks: - id: clang-format files: \.(cu|cuh|h|cc|inl)$ diff --git a/src/cunumeric/random/philox.h b/src/cunumeric/random/philox.h index 792ff877b..121bd7137 100644 --- a/src/cunumeric/random/philox.h +++ b/src/cunumeric/random/philox.h @@ -118,7 +118,7 @@ class Philox_2x32 { // This syntax is only supported on >= c++17 const float scale = 0x1.p-32; // 2^-32 #else - const float scale = 0.00000000023283064365386962890625; + const float scale = 0.00000000023283064365386962890625; #endif return (bits * scale); } diff --git a/src/cunumeric/unary/unary_op_util.h b/src/cunumeric/unary/unary_op_util.h index f5012df0d..2f6fab59a 100644 --- a/src/cunumeric/unary/unary_op_util.h +++ b/src/cunumeric/unary/unary_op_util.h @@ -500,7 +500,7 @@ struct UnaryOp { UnaryOp(const std::vector& args) {} - constexpr decltype(auto) operator()(const T& x) const { return x * T{M_PI / 180.0}; } + constexpr decltype(auto) operator()(const T& x) const { return x* T{M_PI / 180.0}; } }; template <> From ee63f1b3ce5d2a4b24ef5a2d07816fed8d666b1b Mon Sep 17 00:00:00 2001 From: robinw0928 <104830875+robinw0928@users.noreply.github.com> Date: Fri, 31 Mar 2023 09:06:56 +0800 Subject: [PATCH 019/106] Fix concatenate and *stack APIs to support scalars(#818, #839) (#866) * Fix concatenate and *stack APIs to support scalars * Address comments --- cunumeric/module.py | 34 ++++++++--- tests/integration/test_concatenate_stack.py | 67 ++++++++++++++++++--- 2 files changed, 82 insertions(+), 19 deletions(-) diff --git a/cunumeric/module.py b/cunumeric/module.py index d6e84a8fa..cfe0a7b4e 100644 --- a/cunumeric/module.py +++ b/cunumeric/module.py @@ -1286,7 +1286,7 @@ def _reshape_recur(ndim: int, arr: ndarray) -> tuple[int, ...]: def _atleast_nd( - ndim: int, arys: tuple[ndarray, ...] + ndim: int, arys: Sequence[ndarray] ) -> Union[list[ndarray], ndarray]: inputs = list(convert_to_cunumeric_ndarray(arr) for arr in arys) # 'reshape' change the shape of arrays @@ -1479,8 +1479,8 @@ def check_shape_with_axis( ndim = inputs[0].ndim shape = inputs[0].shape + axis = normalize_axis_index(axis, ndim) if ndim >= 1: - axis = normalize_axis_index(axis, ndim) if _builtin_any( shape[:axis] != inp.shape[:axis] or shape[axis + 1 :] != inp.shape[axis + 1 :] @@ -1805,7 +1805,11 @@ def concatenate( # flatten arrays if axis == None and concatenate arrays on the first axis if axis is None: - inputs = list(inp.ravel() for inp in inputs) + # Reshape arrays in the `array_list` to handle scalars + reshaped = _atleast_nd(1, inputs) + if not isinstance(reshaped, list): + reshaped = [reshaped] + inputs = list(inp.ravel() for inp in reshaped) axis = 0 # Check to see if we can build a new tuple of cuNumeric arrays @@ -1912,7 +1916,7 @@ def vstack(tup: Sequence[ndarray]) -> ndarray: Multiple GPUs, Multiple CPUs """ # Reshape arrays in the `array_list` if needed before concatenation - reshaped = _atleast_nd(2, tuple(tup)) + reshaped = _atleast_nd(2, tup) if not isinstance(reshaped, list): reshaped = [reshaped] tup, common_info = check_shape_dtype_without_axis( @@ -1960,7 +1964,14 @@ def hstack(tup: Sequence[ndarray]) -> ndarray: -------- Multiple GPUs, Multiple CPUs """ - tup, common_info = check_shape_dtype_without_axis(tup, hstack.__name__) + # Reshape arrays in the `array_list` to handle scalars + reshaped = _atleast_nd(1, tup) + if not isinstance(reshaped, list): + reshaped = [reshaped] + + tup, common_info = check_shape_dtype_without_axis( + reshaped, hstack.__name__ + ) check_shape_with_axis( tup, hstack.__name__, axis=(0 if common_info.ndim == 1 else 1) ) @@ -2008,7 +2019,7 @@ def dstack(tup: Sequence[ndarray]) -> ndarray: Multiple GPUs, Multiple CPUs """ # Reshape arrays to (1,N,1) for ndim ==1 or (M,N,1) for ndim == 2: - reshaped = _atleast_nd(3, tuple(tup)) + reshaped = _atleast_nd(3, tup) if not isinstance(reshaped, list): reshaped = [reshaped] tup, common_info = check_shape_dtype_without_axis( @@ -2052,14 +2063,19 @@ def column_stack(tup: Sequence[ndarray]) -> ndarray: -------- Multiple GPUs, Multiple CPUs """ + # Reshape arrays in the `array_list` to handle scalars + reshaped = _atleast_nd(1, tup) + if not isinstance(reshaped, list): + reshaped = [reshaped] + tup, common_info = check_shape_dtype_without_axis( - tup, column_stack.__name__ + reshaped, column_stack.__name__ ) - # When ndim == 1, hstack concatenates arrays along the first axis + if common_info.ndim == 1: tup = list(inp.reshape((inp.shape[0], 1)) for inp in tup) common_info.shape = tup[0].shape - check_shape_with_axis(tup, dstack.__name__, 1) + check_shape_with_axis(tup, column_stack.__name__, 1) return _concatenate( tup, common_info, diff --git a/tests/integration/test_concatenate_stack.py b/tests/integration/test_concatenate_stack.py index 33a11f392..52aba1672 100644 --- a/tests/integration/test_concatenate_stack.py +++ b/tests/integration/test_concatenate_stack.py @@ -74,9 +74,7 @@ def run_test(arr, routine, input_size): NUM_ARR = [1, 3] SIZES = [ - # In Numpy, hstack and column_stack PASS - # In cuNumeric, hstack and column_stack raise IndexError - pytest.param((), marks=pytest.mark.xfail), # for scalar. + (), (0,), (0, 10), (1,), @@ -87,6 +85,11 @@ def run_test(arr, routine, input_size): (DIM, DIM, DIM), ] +SCALARS = ( + (10,), + (10, 20, 30), +) + @pytest.fixture(autouse=False) def a(size, num): @@ -99,6 +102,13 @@ def test_concatenate(size, num, a): run_test(tuple(a), "concatenate", size) +@pytest.mark.parametrize("arrays", SCALARS, ids=str) +def test_concatenate_scalar(arrays): + res_np = np.concatenate(arrays, axis=None) + res_num = num.concatenate(arrays, axis=None) + assert np.array_equal(res_np, res_num) + + def test_concatenate_with_out(): a = [[1, 2], [3, 4]] b = [[5, 6]] @@ -158,16 +168,13 @@ def test_zero_arrays(self): @pytest.mark.parametrize( "arrays", ( - pytest.param((1,), marks=pytest.mark.xfail), - pytest.param((1, 2), marks=pytest.mark.xfail), + (1,), + (1, 2), (1, [3, 4]), ), ids=lambda arrays: f"(arrays={arrays})", ) def test_scalar_axis_is_not_none(self, arrays): - # For (1,) and (1, 2), - # In Numpy, it raises ValueError - # In cuNumeric, it raises IndexError expected_exc = ValueError axis = 0 with pytest.raises(expected_exc): @@ -228,8 +235,6 @@ def test_both_out_dtype_are_provided(self): ) def test_invalid_casting(self): - # In Numpy, raise ValueError - # In cuNumeric, pass expected_exc = ValueError a = [[1, 2], [3, 4]] b = [[5, 6]] @@ -251,6 +256,13 @@ def test_stack(size, num, a): run_test(tuple(a), "stack", size) +@pytest.mark.parametrize("arrays", SCALARS, ids=str) +def test_stack_scalar(arrays): + res_np = np.stack(arrays) + res_num = num.stack(arrays) + assert np.array_equal(res_np, res_num) + + def test_stack_with_out(): a = [1, 2] b = [3, 4] @@ -351,6 +363,13 @@ def test_hstack(size, num, a): run_test(tuple(a), "hstack", size) +@pytest.mark.parametrize("arrays", SCALARS, ids=str) +def test_hstack_scalar(arrays): + res_np = np.hstack(arrays) + res_num = num.hstack(arrays) + assert np.array_equal(res_np, res_num) + + class TestHStackErrors: def test_zero_arrays(self): expected_exc = ValueError @@ -382,6 +401,13 @@ def test_column_stack(size, num, a): run_test(tuple(a), "column_stack", size) +@pytest.mark.parametrize("arrays", SCALARS, ids=str) +def test_column_stack_scalar(arrays): + res_np = np.column_stack(arrays) + res_num = num.column_stack(arrays) + assert np.array_equal(res_np, res_num) + + class TestColumnStackErrors: def test_zero_arrays(self): expected_exc = ValueError @@ -418,6 +444,13 @@ def test_vstack(size, num, a): run_test(tuple(a), "vstack", size) +@pytest.mark.parametrize("arrays", SCALARS, ids=str) +def test_vstack_scalar(arrays): + res_np = np.vstack(arrays) + res_num = num.vstack(arrays) + assert np.array_equal(res_np, res_num) + + class TestVStackErrors: def test_zero_arrays(self): expected_exc = ValueError @@ -454,6 +487,13 @@ def test_rowstack(size, num, a): run_test(tuple(a), "row_stack", size) +@pytest.mark.parametrize("arrays", SCALARS, ids=str) +def test_row_stack_scalar(arrays): + res_np = np.row_stack(arrays) + res_num = num.row_stack(arrays) + assert np.array_equal(res_np, res_num) + + class TestRowStackErrors: def test_zero_arrays(self): expected_exc = ValueError @@ -490,6 +530,13 @@ def test_dstack(size, num, a): run_test(tuple(a), "dstack", size) +@pytest.mark.parametrize("arrays", SCALARS, ids=str) +def test_dstack_scalar(arrays): + res_np = np.dstack(arrays) + res_num = num.dstack(arrays) + assert np.array_equal(res_np, res_num) + + class TestDStackErrors: def test_zero_arrays(self): expected_exc = ValueError From ee21747bff5472cf55fc2ed36fb477cd7845d49c Mon Sep 17 00:00:00 2001 From: Robin Wang <104830875+robinw0928@users.noreply.github.com> Date: Wed, 5 Apr 2023 10:44:04 +0800 Subject: [PATCH 020/106] Add tests for amax, amin and mean. (#861) * Add tests for amax, amin and mean. * Address comments * Fix issue #870 --- cunumeric/deferred.py | 4 +- tests/integration/test_amax_amin.py | 294 ++++++++++++++++++++++++++++ tests/integration/test_mean.py | 162 +++++++++++++++ 3 files changed, 458 insertions(+), 2 deletions(-) create mode 100755 tests/integration/test_amax_amin.py create mode 100755 tests/integration/test_mean.py diff --git a/cunumeric/deferred.py b/cunumeric/deferred.py index c30b4c506..ecac9332a 100644 --- a/cunumeric/deferred.py +++ b/cunumeric/deferred.py @@ -3142,8 +3142,8 @@ def unary_reduction( # See if we are doing reduction to a point or another region if lhs_array.size == 1: - assert axes is None or len(axes) == rhs_array.ndim - ( - 0 if keepdims else lhs_array.ndim + assert axes is None or lhs_array.ndim == rhs_array.ndim - ( + 0 if keepdims else len(axes) ) if initial is not None: diff --git a/tests/integration/test_amax_amin.py b/tests/integration/test_amax_amin.py new file mode 100755 index 000000000..9ca074e69 --- /dev/null +++ b/tests/integration/test_amax_amin.py @@ -0,0 +1,294 @@ +# Copyright 2021-2022 NVIDIA Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import numpy as np +import pytest +from legate.core import LEGATE_MAX_DIM + +import cunumeric as num + +FUNCS = ("amax", "amin") + + +@pytest.mark.parametrize("initial", (None, -2, 0, 0.5, 2)) +@pytest.mark.parametrize("keepdims", [True, False]) +@pytest.mark.parametrize("ndim", range(LEGATE_MAX_DIM + 1)) +@pytest.mark.parametrize("func_name", FUNCS) +def test_basic(func_name, ndim, keepdims, initial): + shape = (5,) * ndim + in_np = np.random.randint(-5, 5, size=shape) + in_num = num.array(in_np) + + func_np = getattr(np, func_name) + func_num = getattr(num, func_name) + kw = {} if initial is None else dict(initial=initial) + + res_np = func_np(in_np, keepdims=keepdims, **kw) + res_num = func_num(in_num, keepdims=keepdims, **kw) + + assert np.array_equal(res_np, res_num) + + +@pytest.mark.parametrize( + "src_dt", + ( + np.int32, + np.float64, + pytest.param(np.complex128, marks=pytest.mark.xfail), + ), +) +@pytest.mark.parametrize("keepdims", [True, False]) +@pytest.mark.parametrize("func_name", FUNCS) +def test_src_dt(func_name, keepdims, src_dt): + # For src_dt=np.complex128, + # In Numpy, it pass + # In cuNumeric, it raises NotImplementedError + ndim = 3 + shape = (5,) * ndim + in_np = np.random.randint(-5, 5, size=shape).astype(src_dt) + in_num = num.array(in_np) + + func_np = getattr(np, func_name) + func_num = getattr(num, func_name) + + assert np.array_equal( + func_np(in_np, keepdims=keepdims), + func_num(in_num, keepdims=keepdims), + ) + + +@pytest.mark.parametrize("initial", (None, -2, 0, 0.5, 2)) +@pytest.mark.parametrize("keepdims", [True, False]) +@pytest.mark.parametrize("ndim", range(LEGATE_MAX_DIM + 1)) +@pytest.mark.parametrize("func_name", FUNCS) +def test_axis(func_name, ndim, keepdims, initial): + shape = (5,) * ndim + in_np = np.random.randint(-5, 5, size=shape) + in_num = num.array(in_np) + + func_np = getattr(np, func_name) + func_num = getattr(num, func_name) + kw = {} if initial is None else dict(initial=initial) + + axis_list = list(range(in_num.ndim)) + axis_list.append(-ndim) + + for axis in axis_list: + res_np = func_np(in_np, axis=axis, keepdims=keepdims, **kw) + res_num = func_num(in_num, axis=axis, keepdims=keepdims, **kw) + assert np.array_equal(res_np, res_num) + + +@pytest.mark.xfail +@pytest.mark.parametrize("axes", ((-3, -1), (-1, 0), (-2, 2), (0, 2))) +@pytest.mark.parametrize("keepdims", [True, False]) +@pytest.mark.parametrize("func_name", FUNCS) +def test_axis_tuple(func_name, keepdims, axes): + # In Numpy, it pass + # In cuNumeric, it raises NotImplementedError + shape = (3, 4, 5) + in_np = np.random.randint(-5, 5, size=shape) + in_num = num.array(in_np) + + func_np = getattr(np, func_name) + func_num = getattr(num, func_name) + + res_np = func_np(in_np, axis=axes, keepdims=keepdims) + res_num = func_num(in_num, axis=axes, keepdims=keepdims) + assert np.array_equal(res_np, res_num) + + +@pytest.mark.parametrize("keepdims", [True, False]) +@pytest.mark.parametrize("func_name", FUNCS) +def test_out_dim0(func_name, keepdims): + shape = (5,) * 0 + in_np = np.random.randint(-5, 5, size=shape) + in_num = num.array(in_np) + + func_np = getattr(np, func_name) + func_num = getattr(num, func_name) + + res_np = np.empty(()) + res_num = num.empty(()) + + func_np(in_np, out=res_np, keepdims=keepdims) + func_num(in_num, out=res_num, keepdims=keepdims) + + assert np.array_equal(res_np, res_num) + + +@pytest.mark.parametrize("keepdims", [True, False]) +@pytest.mark.parametrize("func_name", FUNCS) +def test_out_dim1(func_name, keepdims): + shape = (5,) * 1 + in_np = np.random.randint(-5, 5, size=shape) + in_num = num.array(in_np) + + func_np = getattr(np, func_name) + func_num = getattr(num, func_name) + + res_shape = (1,) if keepdims else () + res_np = np.empty(res_shape) + res_num = num.empty(res_shape) + + func_np(in_np, axis=0, out=res_np, keepdims=keepdims) + func_num(in_num, axis=0, out=res_num, keepdims=keepdims) + + assert np.array_equal(res_np, res_num) + + +@pytest.mark.parametrize("initial", (None, -2, 0, 0.5, 2)) +@pytest.mark.parametrize("keepdims", [True, False]) +@pytest.mark.parametrize("ndim", range(2, LEGATE_MAX_DIM + 1)) +@pytest.mark.parametrize("func_name", FUNCS) +def test_out(func_name, ndim, keepdims, initial): + shape = (5,) * ndim + in_np = np.random.randint(-5, 5, size=shape) + in_num = num.array(in_np) + + func_np = getattr(np, func_name) + func_num = getattr(num, func_name) + kw = {} if initial is None else dict(initial=initial) + + for axis in range(in_num.ndim): + shape_list = list(shape) + shape_list[axis] = 1 + shape_true = tuple(shape_list) + + res_shape = shape_true if keepdims else (5,) * (ndim - 1) + res_np = np.empty(res_shape) + res_num = num.empty(res_shape) + + func_np(in_np, axis=axis, out=res_np, keepdims=keepdims, **kw) + func_num(in_num, axis=axis, out=res_num, keepdims=keepdims, **kw) + assert np.array_equal(res_np, res_num) + + +@pytest.mark.parametrize( + "out_dt", + ( + np.int32, + np.float64, + pytest.param(np.complex128, marks=pytest.mark.xfail), + ), +) +@pytest.mark.parametrize("keepdims", [True, False]) +@pytest.mark.parametrize("func_name", FUNCS) +def test_out_with_dtype(func_name, keepdims, out_dt): + # For out_dt=np.complex128 + # In Numpy, it pass + # In cuNumeric, it raises KeyError + ndim = 3 + shape = (5,) * ndim + in_np = np.random.randint(-5, 5, size=shape) + in_num = num.array(in_np) + + func_np = getattr(np, func_name) + func_num = getattr(num, func_name) + + for axis in range(in_num.ndim): + shape_list = list(shape) + shape_list[axis] = 1 + shape_true = tuple(shape_list) + + res_shape = shape_true if keepdims else (5,) * (ndim - 1) + res_np = np.empty(res_shape, dtype=out_dt) + res_num = num.empty(res_shape, dtype=out_dt) + + func_np(in_np, axis=axis, out=res_np, keepdims=keepdims) + func_num(in_num, axis=axis, out=res_num, keepdims=keepdims) + assert np.array_equal(res_np, res_num) + + +@pytest.mark.xfail +@pytest.mark.parametrize("func_name", FUNCS) +def test_where(func_name): + # In Numpy, it pass + # In cuNumeric, it raises NotImplementedError + shape = (3, 4, 5) + in_np = np.random.randint(-5, 5, size=shape) + in_num = num.array(in_np) + where_np = in_np > 0.5 + where_num = num.array(where_np) + + func_np = getattr(np, func_name) + func_num = getattr(num, func_name) + + val = 0 + assert np.array_equal( + func_np(in_np, initial=val, where=where_np), + func_num(in_num, initial=val, where=where_num), + ) + + +class TestAmaxAminErrors: + def setup_method(self): + size = (3, 4, 5) + self.arr_np = np.random.randint(-5, 5, size=size) + self.arr_num = num.array(self.arr_np) + + @pytest.mark.parametrize("func_name", FUNCS) + def test_empty_array(self, func_name): + expected_exc = ValueError + func_np = getattr(np, func_name) + func_num = getattr(num, func_name) + + with pytest.raises(expected_exc): + func_np([]) + with pytest.raises(expected_exc): + func_num([]) + + @pytest.mark.parametrize( + "axis", (-4, 3), ids=lambda axis: f"(axis={axis})" + ) + @pytest.mark.parametrize("func_name", FUNCS) + def test_axis_out_of_bound(self, func_name, axis): + expected_exc = ValueError + func_np = getattr(np, func_name) + func_num = getattr(num, func_name) + + with pytest.raises(expected_exc): + func_np(self.arr_np, axis=axis) + with pytest.raises(expected_exc): + func_num(self.arr_num, axis=axis) + + @pytest.mark.parametrize( + "axis_out_shape", + ( + (None, (1,)), + (1, (3, 4)), + ), + ids=lambda axis_out_shape: f"(axis_out_shape={axis_out_shape})", + ) + @pytest.mark.parametrize("func_name", FUNCS) + def test_out_invalid_shape(self, func_name, axis_out_shape): + axis, out_shape = axis_out_shape + expected_exc = ValueError + out_np = np.empty(out_shape) + out_num = num.empty(out_shape) + func_np = getattr(np, func_name) + func_num = getattr(num, func_name) + + with pytest.raises(expected_exc): + func_np(self.arr_np, axis=axis, out=out_np) + with pytest.raises(expected_exc): + func_num(self.arr_num, axis=axis, out=out_num) + + +if __name__ == "__main__": + import sys + + np.random.seed(12345) + sys.exit(pytest.main(sys.argv)) diff --git a/tests/integration/test_mean.py b/tests/integration/test_mean.py new file mode 100755 index 000000000..4b82929a4 --- /dev/null +++ b/tests/integration/test_mean.py @@ -0,0 +1,162 @@ +# Copyright 2021-2022 NVIDIA Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import numpy as np +import pytest + +import cunumeric as num + +DIM = 7 + +NO_EMPTY_SIZE = ( + (1,), + (DIM,), + (1, 1), + (1, DIM), + (DIM, 1), + (DIM, DIM), + (1, 1, 1), + (DIM, 1, 1), + (1, DIM, 1), + (1, 1, DIM), + (DIM, DIM, DIM), +) + + +def gen_out_shape(size, axis): + if axis is None: + return () + if axis < 0: + axis += len(size) + if axis >= 0 and axis < len(size): + return size[:axis] + size[axis + 1 :] + else: + return -1 + + +@pytest.mark.parametrize("arr", ([], [[], []])) +def test_empty_arr(arr): + res_np = np.mean(arr) + res_num = num.mean(arr) + assert np.isnan(res_np) and np.isnan(res_num) + + +@pytest.mark.parametrize("val", (0.0, 10.0, -5, 1 + 1j)) +def test_scalar(val): + res_np = np.mean(val) + res_num = num.mean(val) + assert np.array_equal(res_np, res_num) + + +@pytest.mark.parametrize("size", NO_EMPTY_SIZE) +def test_basic(size): + arr_np = np.random.randint(-5, 5, size=size) + arr_num = num.array(arr_np) + res_np = np.mean(arr_np) + res_num = num.mean(arr_num) + np.array_equal(res_np, res_num) + + +@pytest.mark.xfail +@pytest.mark.parametrize("axis", ((-3, -1), (-1, 0), (-2, 2), (0, 2))) +def test_axis_tuple(axis): + # In Numpy, it pass + # In cuNumeric, it raises NotImplementedError + size = (3, 4, 7) + arr_np = np.random.randint(-5, 5, size=size) + arr_num = num.array(arr_np) + out_np = np.mean(arr_np, axis=axis) + out_num = num.mean(arr_num, axis=axis) + assert np.array_equal(out_np, out_num) + + +@pytest.mark.parametrize("keepdims", (False, True)) +@pytest.mark.parametrize("size", NO_EMPTY_SIZE) +def test_axis_keepdims(size, keepdims): + arr_np = np.random.randint(-5, 5, size=size) + arr_num = num.array(arr_np) + ndim = arr_np.ndim + for axis in range(-ndim, ndim): + out_np = np.mean(arr_np, axis=axis, keepdims=keepdims) + out_num = num.mean(arr_num, axis=axis, keepdims=keepdims) + assert np.array_equal(out_np, out_num) + + +@pytest.mark.parametrize("array_dt", (np.int32, np.float32, np.complex64)) +@pytest.mark.parametrize("dt", (np.int32, np.float32, np.complex64)) +@pytest.mark.parametrize("size", NO_EMPTY_SIZE) +def test_dtype(size, array_dt, dt): + arr_np = np.random.randint(-5, 5, size=size).astype(array_dt) + arr_num = num.array(arr_np) + res_np = np.mean(arr_np, dtype=dt) + res_num = num.mean(arr_num, dtype=np.dtype(dt)) + np.array_equal(res_np, res_num) + + +@pytest.mark.parametrize("out_dt", (np.int32, np.float32, np.complex128)) +@pytest.mark.parametrize("size", NO_EMPTY_SIZE) +def test_out(size, out_dt): + arr_np = np.random.randint(-5, 5, size=size) + arr_num = num.array(arr_np) + ndim = arr_np.ndim + for axis in (-1, ndim - 1, None): + out_shape = gen_out_shape(size, axis) + out_np = np.empty(out_shape, dtype=out_dt) + out_num = num.empty(out_shape, dtype=out_dt) + np.mean(arr_np, axis=axis, out=out_np) + num.mean(arr_num, axis=axis, out=out_num) + np.array_equal(out_np, out_num) + + +class TestMeanErrors: + def setup_method(self): + size = (3, 4, 5) + self.arr_np = np.random.randint(-5, 5, size=size) + self.arr_num = num.array(self.arr_np) + + @pytest.mark.parametrize( + "axis", (-4, 3), ids=lambda axis: f"(axis={axis})" + ) + def test_axis_out_of_bound(self, axis): + expected_exc = ValueError + with pytest.raises(expected_exc): + np.mean(self.arr_np, axis=axis) + with pytest.raises(expected_exc): + num.mean(self.arr_num, axis=axis) + + @pytest.mark.parametrize( + "axis_out_shape", + ( + (None, (1,)), + (1, (3, 4)), + ), + ids=lambda axis_out_shape: f"(axis_out_shape={axis_out_shape})", + ) + def test_out_invalid_shape(self, axis_out_shape): + axis, out_shape = axis_out_shape + expected_exc = ValueError + out_np = np.empty(out_shape) + out_num = num.empty(out_shape) + with pytest.raises(expected_exc): + np.mean(self.arr_np, axis=axis, out=out_np) + with pytest.raises(expected_exc): + num.mean(self.arr_num, axis=axis, out=out_num) + + +if __name__ == "__main__": + import sys + + np.random.seed(12345) + sys.exit(pytest.main(sys.argv)) From 4dc10dbd7f4bc88ee788239c674f15b9c5e5dd85 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Tue, 4 Apr 2023 22:49:27 -0700 Subject: [PATCH 021/106] [pre-commit.ci] pre-commit autoupdate (#871) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit updates: - [github.com/psf/black: 23.1.0 → 23.3.0](https://github.com/psf/black/compare/23.1.0...23.3.0) Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> --- .pre-commit-config.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 9a1aae15d..167bdd343 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -11,7 +11,7 @@ repos: hooks: - id: isort - repo: https://github.com/psf/black - rev: 23.1.0 + rev: 23.3.0 hooks: - id: black - repo: https://github.com/PyCQA/flake8 From ab87c85edaf2a14eaa6cfc2971d40ee6642b8705 Mon Sep 17 00:00:00 2001 From: Manolis Papadakis Date: Thu, 6 Apr 2023 10:57:12 -0700 Subject: [PATCH 022/106] Switch to quarterly updates of pre-commit hooks (#873) --- .pre-commit-config.yaml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 167bdd343..d84f8fa9f 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -34,7 +34,8 @@ repos: pass_filenames: false ci: - skip: [mypy] + skip: [mypy] + autoupdate_schedule: quarterly default_language_version: python: python3 From ef5e502c32a8856cf0974942388d3abc6aaa99c1 Mon Sep 17 00:00:00 2001 From: Marcin Zalewski Date: Fri, 7 Apr 2023 07:45:40 -0700 Subject: [PATCH 023/106] Separate cunumeric CI from legate.core CI (#850) Build legate.core from the versions.json file instead of relying on legate.core CI artifacts. --------- Co-authored-by: Marcin Zalewski --- .github/workflows/ci.yml | 2 +- cmake/versions.json | 7 +++++-- cunumeric_cpp.cmake | 2 +- 3 files changed, 7 insertions(+), 4 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index b9b3bc526..121db6f06 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -52,7 +52,7 @@ jobs: /data/github-runner/legate-bin/setup.sh cd legate-ci/github-ci/cunumeric rm -rf ngc-artifacts || true - ./build-conda.sh > ${COMMIT}-build.log 2>&1 + ./build-separate.sh > ${COMMIT}-build.log 2>&1 - name: Process Output run: | cd legate-ci/github-ci/cunumeric diff --git a/cmake/versions.json b/cmake/versions.json index 577d91d7a..b9c185813 100644 --- a/cmake/versions.json +++ b/cmake/versions.json @@ -1,8 +1,11 @@ { "packages" : { "legate_core" : { - "git_url" : "https://github.com/nv-legate/legate.core.git", - "git_tag" : "78c61d836906611bf18348b4335f6e039f944340" + "version": "23.03.00", + "git_url" : "https://github.com/nv-legate/legate.core.git", + "git_shallow": false, + "always_download": false, + "git_tag" : "2afa4c7" } } } diff --git a/cunumeric_cpp.cmake b/cunumeric_cpp.cmake index 486b51272..4592559d3 100644 --- a/cunumeric_cpp.cmake +++ b/cunumeric_cpp.cmake @@ -45,7 +45,7 @@ rapids_cmake_support_conda_env(conda_env MODIFY_PREFIX_PATH) # - Dependencies ------------------------------------------------------------- # add third party dependencies using CPM -rapids_cpm_init() +rapids_cpm_init(OVERRIDE ${CMAKE_CURRENT_SOURCE_DIR}/cmake/versions.json) find_package(OpenMP) From 554c2ad2c0419455b94246695308b928076e1ed4 Mon Sep 17 00:00:00 2001 From: Manolis Papadakis Date: Fri, 7 Apr 2023 10:52:35 -0700 Subject: [PATCH 024/106] Note new minimum CUDA requirements for conda packages (#875) --- README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 272124439..3739ac1e6 100644 --- a/README.md +++ b/README.md @@ -45,8 +45,8 @@ conda install -c nvidia -c conda-forge -c legate cunumeric Only linux-64 packages are available at the moment. -The default package contains GPU support, and is compatible with CUDA >= 11.4 -(CUDA driver version >= r470), and Volta or later GPU architectures. There are +The default package contains GPU support, and is compatible with CUDA >= 11.8 +(CUDA driver version >= r520), and Volta or later GPU architectures. There are also CPU-only packages available, and will be automatically selected by `conda` when installing on a machine without GPUs. From 4544bdd0675a19bc2a3322a63712a1ba861bcd27 Mon Sep 17 00:00:00 2001 From: Robin Wang <104830875+robinw0928@users.noreply.github.com> Date: Wed, 12 Apr 2023 10:14:26 +0800 Subject: [PATCH 025/106] Add tests for allclose and array_equal. (#872) * Add tests for allclose and array_equal. * Address comments. * Address comments: use chaining is. --- tests/integration/test_allclose.py | 300 ++++++++++++++++++++++++++ tests/integration/test_array_equal.py | 116 ++++++++++ 2 files changed, 416 insertions(+) create mode 100755 tests/integration/test_allclose.py create mode 100755 tests/integration/test_array_equal.py diff --git a/tests/integration/test_allclose.py b/tests/integration/test_allclose.py new file mode 100755 index 000000000..ce821a1dd --- /dev/null +++ b/tests/integration/test_allclose.py @@ -0,0 +1,300 @@ +# Copyright 2021-2022 NVIDIA Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import numpy as np +import pytest + +import cunumeric as num + +SCALARS_TRUE_DEFAULT = ( + (0, -1e-8), + (1e10, 1.00001e10), + (1 + 1j, 1 + 1.00001j), +) + +SCALARS_TRUE_INF = ( + (np.inf, np.inf), + (-np.inf, -np.inf), +) + + +@pytest.mark.parametrize( + ("a", "b"), + SCALARS_TRUE_DEFAULT + SCALARS_TRUE_INF, +) +def test_scalar_true(a, b): + res_np = np.allclose(a, b) + res_num = num.allclose(a, b) + assert res_np is bool(res_num) is True + + res_np_swapped = np.allclose(b, a) + res_num_swapped = num.allclose(b, a) + assert res_np_swapped is bool(res_num_swapped) is True + + +SCALARS_FALSE_DEFAULT = ( + (0, -0.000001), + (1e10, 1.0001e10), + (1 + 1j, 1 + 1.0001j), +) + +SCALARS_FALSE_INF = ((np.inf, -np.inf),) + + +@pytest.mark.parametrize( + ("a", "b"), + SCALARS_FALSE_DEFAULT + SCALARS_FALSE_INF, +) +def test_scalar_false(a, b): + res_np = np.allclose(a, b) + res_num = num.allclose(a, b) + assert res_np is bool(res_num) is False + + res_np_swapped = np.allclose(b, a) + res_num_swapped = num.allclose(b, a) + assert res_np_swapped is bool(res_num_swapped) is False + + +SHAPES = ( + (1,), + (6,), + (1, 1), + (2, 3), + (2, 3, 4), +) + + +@pytest.mark.parametrize("shape", SHAPES, ids=lambda shape: f"(shape={shape})") +def test_array_true(shape): + len_scalars = len(SCALARS_TRUE_DEFAULT) + size = np.prod(shape) + array = [SCALARS_TRUE_DEFAULT[i % len_scalars] for i in range(size)] + a_np = np.array([x[0] for x in array]).reshape(shape) + b_np = np.array([x[1] for x in array]).reshape(shape) + a_num = num.array(a_np) + b_num = num.array(b_np) + + res_np = np.allclose(a_np, b_np) + res_num = num.allclose(a_num, b_num) + assert res_np is bool(res_num) is True + + res_np_swapped = np.allclose(b_np, a_np) + res_num_swapped = num.allclose(b_num, a_num) + assert res_np_swapped is bool(res_num_swapped) is True + + +@pytest.mark.parametrize("shape", SHAPES, ids=lambda shape: f"(shape={shape})") +def test_array_true_inf(shape): + tup_scalars_true = SCALARS_TRUE_INF + SCALARS_TRUE_DEFAULT + len_scalars = len(tup_scalars_true) + size = np.prod(shape) + array = [tup_scalars_true[i % len_scalars] for i in range(size)] + a_np = np.array([x[0] for x in array]).reshape(shape).astype(float) + b_np = np.array([x[1] for x in array]).reshape(shape).astype(float) + a_num = num.array(a_np) + b_num = num.array(b_np) + + res_np = np.allclose(a_np, b_np) + res_num = num.allclose(a_num, b_num) + assert res_np is bool(res_num) is True + + res_np_swapped = np.allclose(b_np, a_np) + res_num_swapped = num.allclose(b_num, a_num) + assert res_np_swapped is bool(res_num_swapped) is True + + +@pytest.mark.parametrize("shape", SHAPES, ids=lambda shape: f"(shape={shape})") +def test_array_false(shape): + len_scalars = len(SCALARS_TRUE_DEFAULT) + size = np.prod(shape) + array = [SCALARS_TRUE_DEFAULT[i % len_scalars] for i in range(size)] + array[-1] = SCALARS_FALSE_DEFAULT[0] + a_np = np.array([x[0] for x in array]).reshape(shape) + b_np = np.array([x[1] for x in array]).reshape(shape) + a_num = num.array(a_np) + b_num = num.array(b_np) + + res_np = np.allclose(a_np, b_np) + res_num = num.allclose(a_num, b_num) + assert res_np is bool(res_num) is False + + res_np_swapped = np.allclose(b_np, a_np) + res_num_swapped = num.allclose(b_num, a_num) + assert res_np_swapped is bool(res_num_swapped) is False + + +SHAPES_BROADCASTING1 = ( + (1, 3), + (2, 3), + (1, 2, 3), + (2, 2, 3), +) + + +@pytest.mark.xfail +@pytest.mark.parametrize( + "shape_b", SHAPES_BROADCASTING1, ids=lambda shape_b: f"(shape_b={shape_b})" +) +def test_broadcast_true1(shape_b): + # for all cases, + # In Numpy, it pass + # In cuNumeric, it raises AttributeError: + # 'Store' object has no attribute '_broadcast' + len_scalars = len(SCALARS_TRUE_DEFAULT) + + shape_a = (3,) + size_a = np.prod(shape_a) + array_a = [SCALARS_TRUE_DEFAULT[i % len_scalars] for i in range(size_a)] + a_np = np.array([x[0] for x in array_a]).reshape(shape_a) + + size_b = np.prod(shape_b) + array_b = [array_a[i % size_a] for i in range(size_b)] + b_np = np.array([x[1] for x in array_b]).reshape(shape_b) + + a_num = num.array(a_np) + b_num = num.array(b_np) + res_np = np.allclose(a_np, b_np) + res_num = num.allclose(a_num, b_num) + + assert res_np is bool(res_num) is True + + +SHAPES_BROADCASTING2 = ( + (1,), + (1, 1), + (1, 2, 1), + (2, 2, 1), +) + + +@pytest.mark.xfail +@pytest.mark.parametrize( + "shape_b", SHAPES_BROADCASTING2, ids=lambda shape_b: f"(shape_b={shape_b})" +) +def test_broadcast_true2(shape_b): + # for all cases, + # In Numpy, it pass + # In cuNumeric, it raises AttributeError: + # 'Store' object has no attribute '_broadcast' + shape_a = (3,) + size_a = np.prod(shape_a) + a_np = np.array( + [SCALARS_TRUE_DEFAULT[0][0] for _ in range(size_a)] + ).reshape(shape_a) + + size_b = np.prod(shape_b) + b_np = np.array( + [SCALARS_TRUE_DEFAULT[0][1] for _ in range(size_b)] + ).reshape(shape_b) + + a_num = num.array(a_np) + b_num = num.array(b_np) + + res_np = np.allclose(a_np, b_np) + res_num = num.allclose(a_num, b_num) + + assert res_np is bool(res_num) is True + + +@pytest.mark.parametrize( + "equal_nan", (False, pytest.param(True, marks=pytest.mark.xfail)) +) +@pytest.mark.parametrize( + "arr", + ([np.nan], [1, 2, np.nan], [[1, 2], [3, np.nan]]), + ids=lambda arr: f"(arr={arr})", +) +def test_equal_nan_basic(arr, equal_nan): + # If equal_nan is True, + # In Numpy, it pass + # In cuNumeric, it raises NotImplementedError + res_np = np.allclose(arr, arr, equal_nan=equal_nan) + res_num = num.allclose(arr, arr, equal_nan=equal_nan) + assert res_np == res_num + + +EMPTY_ARRAY_PAIRS = ( + ([], []), + ([], [[]]), + ([[]], [[]]), +) + + +@pytest.mark.parametrize( + ("a", "b"), + EMPTY_ARRAY_PAIRS, +) +def test_empty_array(a, b): + res_np = np.allclose(a, b) + res_num = num.allclose(a, b) + + assert res_np is bool(res_num) is True + + +SCALAR_BROADCASTING = ( + (1e10, [1.00001e10]), + (1e10, [[1.00001e10]]), +) + + +@pytest.mark.xfail +@pytest.mark.parametrize( + ("a", "b"), + SCALAR_BROADCASTING, +) +def test_scalar_broadcasting(a, b): + # for all cases, + # In Numpy, it pass + # In cuNumeric, it raises AttributeError: + # 'Store' object has no attribute '_broadcast' + res_np = np.allclose(a, b) + res_num = num.allclose(a, b) + + assert res_np is bool(res_num) is True + + +@pytest.mark.parametrize( + ("a", "b"), + SCALARS_FALSE_DEFAULT, +) +def test_scalar_rtol_atol_true(a, b): + rtol = 1e-04 + atol = 1e-06 + + res_np = np.allclose(a, b, rtol=rtol, atol=atol) + res_num = num.allclose(a, b, rtol=rtol, atol=atol) + + assert res_np is bool(res_num) is True + + +@pytest.mark.parametrize( + ("a", "b"), + SCALARS_TRUE_DEFAULT, +) +def test_scalar_rtol_atol_false(a, b): + rtol = 1e-06 + atol = 1e-09 + + res_np = np.allclose(a, b, rtol=rtol, atol=atol) + res_num = num.allclose(a, b, rtol=rtol, atol=atol) + + assert res_np is bool(res_num) is False + + +if __name__ == "__main__": + import sys + + np.random.seed(12345) + sys.exit(pytest.main(sys.argv)) diff --git a/tests/integration/test_array_equal.py b/tests/integration/test_array_equal.py new file mode 100755 index 000000000..741167b1e --- /dev/null +++ b/tests/integration/test_array_equal.py @@ -0,0 +1,116 @@ +# Copyright 2021-2022 NVIDIA Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import numpy as np +import pytest + +import cunumeric as num + + +@pytest.mark.parametrize( + "arr", (1, [], [[]], [1], [[1, 2], [3, 4]]), ids=lambda arr: f"(arr={arr})" +) +def test_equal_arrays(arr): + res_np = np.array_equal(arr, arr) + res_num = num.array_equal(arr, arr) + assert res_np is bool(res_num) is True + + +ARRAYS = ( + (1, 2), + (1, [1]), + (1, []), + ([], [1]), + ([], [[]]), + ([1], [2]), + ([1, 2], [1, 3]), + ([1, 2], [1, 2, 3]), + ([1, 2], [[1, 2]]), +) + + +@pytest.mark.parametrize(("arr1", "arr2"), ARRAYS, ids=str) +def test_unequal_arrays(arr1, arr2): + res_np = np.array_equal(arr1, arr2) + res_num = num.array_equal(arr1, arr2) + assert res_np is bool(res_num) is False + + res_np_swapped = np.array_equal(arr2, arr1) + res_num_swapped = num.array_equal(arr2, arr1) + assert res_np_swapped is bool(res_num_swapped) is False + + +DTYPES = ( + (np.int32, np.float64), + (np.float64, np.complex128), +) + + +@pytest.mark.parametrize(("dtype1", "dtype2"), DTYPES, ids=str) +def test_equal_values_with_different_dtype(dtype1, dtype2): + array = [1, 2, 3] + np_arr1 = np.array(array, dtype=dtype1) + np_arr2 = np.array(array, dtype=dtype2) + num_arr1 = num.array(array, dtype=dtype1) + num_arr2 = num.array(array, dtype=dtype2) + + res_np = np.array_equal(np_arr1, np_arr2) + res_num = num.array_equal(num_arr1, num_arr2) + assert res_np == res_num + + res_np_swapped = np.array_equal(np_arr2, np_arr1) + res_num_swapped = num.array_equal(num_arr2, num_arr1) + assert res_np_swapped == res_num_swapped + + +@pytest.mark.parametrize( + "equal_nan", (False, pytest.param(True, marks=pytest.mark.xfail)) +) +@pytest.mark.parametrize( + "arr", + ([np.nan], [1, 2, np.nan], [[1, 2], [3, np.nan]]), + ids=lambda arr: f"(arr={arr})", +) +def test_equal_nan_basic(arr, equal_nan): + # If equal_nan is True, + # In Numpy, it pass + # In cuNumeric, it raises NotImplementedError + res_np = np.array_equal(arr, arr, equal_nan=equal_nan) + res_num = num.array_equal(arr, arr, equal_nan=equal_nan) + assert res_np == res_num + + +@pytest.mark.parametrize( + "equal_nan", (False, pytest.param(True, marks=pytest.mark.xfail)) +) +def test_equal_nan_complex_values(equal_nan): + # If equal_nan is True, + # In Numpy, it pass + # In cuNumeric, it raises NotImplementedError + a = np.array([1, 1 + 1j]) + b = a.copy() + a.real = np.nan + b.imag = np.nan + + res_np = np.array_equal(a, b, equal_nan=equal_nan) + res_num = num.array_equal(a, b, equal_nan=equal_nan) + assert res_np == res_num + + +if __name__ == "__main__": + import sys + + np.random.seed(12345) + sys.exit(pytest.main(sys.argv)) From b236c58be47ebbc9d759f55441910471eae4b89a Mon Sep 17 00:00:00 2001 From: Robin Wang <104830875+robinw0928@users.noreply.github.com> Date: Thu, 13 Apr 2023 09:21:55 +0800 Subject: [PATCH 026/106] Add array/asarray test and enhance outer test (#877) * Add array/asarray test and enhance outer test * Address comments. --- tests/integration/test_array.py | 186 ++++++++++++++++++++++++++++++++ tests/integration/test_outer.py | 121 ++++++++++++++++++--- 2 files changed, 295 insertions(+), 12 deletions(-) create mode 100755 tests/integration/test_array.py diff --git a/tests/integration/test_array.py b/tests/integration/test_array.py new file mode 100755 index 000000000..195d86283 --- /dev/null +++ b/tests/integration/test_array.py @@ -0,0 +1,186 @@ +# Copyright 2022 NVIDIA Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import numpy as np +import pytest +from legate.core import LEGATE_MAX_DIM + +import cunumeric as num + +SCALARS = ( + 0, + -10.5, + 1 + 1j, +) + +ARRAYS = ( + [], + (1, 2), + ((1, 2),), + [(1, 2), (3, 4.1)], + ( + [1, 2.1], + [3, 4 + 4j], + ), +) + + +def strict_type_equal(a, b): + return np.array_equal(a, b) and a.dtype == b.dtype + + +@pytest.mark.parametrize( + "object", + (None,) + SCALARS + ARRAYS, + ids=lambda object: f"(object={object})", +) +def test_array_basic(object): + res_np = np.array(object) + res_num = num.array(object) + assert strict_type_equal(res_np, res_num) + + +def test_array_ndarray(): + object = [[1, 2], [3, 4]] + res_np = np.array(np.array(object)) + res_num = num.array(num.array(object)) + assert strict_type_equal(res_np, res_num) + + +DTYPES = ( + np.int32, + np.float64, + np.complex128, +) + + +@pytest.mark.parametrize("dtype", DTYPES, ids=lambda dtype: f"(dtype={dtype})") +@pytest.mark.parametrize( + "object", + (0, -10.5, [], [1, 2], [[1, 2], [3, 4.1]]), + ids=lambda object: f"(object={object})", +) +def test_array_dtype(object, dtype): + res_np = np.array(object, dtype=dtype) + res_num = num.array(object, dtype=dtype) + assert strict_type_equal(res_np, res_num) + + +@pytest.mark.xfail +@pytest.mark.parametrize( + "ndmin", + range(-1, LEGATE_MAX_DIM + 1), + ids=lambda ndmin: f"(ndmin={ndmin})", +) +@pytest.mark.parametrize( + "object", + (0, [], [1, 2], [[1, 2], [3, 4.1]]), + ids=lambda object: f"(object={object})", +) +def test_array_ndmin(object, ndmin): + # if dim of object is smaller than ndmin, + # In Numpy, it pass + # In cuNumeric, it raises TypeError: + # 'NoneType' object cannot be interpreted as an integer + res_np = np.array(object, ndmin=ndmin) + res_num = num.array(object, ndmin=ndmin) + assert strict_type_equal(res_np, res_num) + + +@pytest.mark.parametrize( + "copy", (True, False), ids=lambda copy: f"(copy={copy})" +) +def test_array_copy(copy): + x = [[1, 2, 3], [4, 5, 6]] + x_np = np.array(x) + xc_np = np.array(x_np, copy=copy) + x_np[0, :] = [7, 8, 9] + + x_num = num.array(x) + xc_num = num.array(x_num, copy=copy) + x_num[0, :] = [7, 8, 9] + + assert strict_type_equal(xc_np, xc_num) + + +class TestArrayErrors: + @pytest.mark.parametrize( + "dtype", (np.int32, np.float64), ids=lambda dtype: f"(dtype={dtype})" + ) + @pytest.mark.parametrize( + "object", + (1 + 1j, [1, 2, 3.0, 4 + 4j]), + ids=lambda object: f"(object={object})", + ) + def test_invalid_dtype(self, object, dtype): + expected_exc = TypeError + with pytest.raises(expected_exc): + np.array(object, dtype=dtype) + with pytest.raises(expected_exc): + num.array(object, dtype=dtype) + + +@pytest.mark.parametrize( + "object", + (None,) + SCALARS + ARRAYS, + ids=lambda object: f"(object={object})", +) +def test_asarray_basic(object): + res_np = np.asarray(object) + res_num = num.asarray(object) + assert strict_type_equal(res_np, res_num) + + +def test_asarray_ndarray(): + object = [[1, 2], [3, 4]] + res_np = np.asarray(np.array(object)) + res_num = num.asarray(num.array(object)) + assert strict_type_equal(res_np, res_num) + + +@pytest.mark.parametrize("dtype", DTYPES, ids=lambda dtype: f"(dtype={dtype})") +@pytest.mark.parametrize( + "object", + (0, -10.5, [], [1, 2], [[1, 2], [3, 4.1]]), + ids=lambda object: f"(object={object})", +) +def test_asarray_dtype(object, dtype): + res_np = np.asarray(object, dtype=dtype) + res_num = num.asarray(object, dtype=dtype) + assert strict_type_equal(res_np, res_num) + + +class TestAsArrayErrors: + @pytest.mark.parametrize( + "dtype", (np.int32, np.float64), ids=lambda dtype: f"(dtype={dtype})" + ) + @pytest.mark.parametrize( + "object", + (1 + 1j, [1, 2, 3.0, 4 + 4j]), + ids=lambda object: f"(object={object})", + ) + def test_invalid_dtype(self, object, dtype): + expected_exc = TypeError + with pytest.raises(expected_exc): + np.asarray(object, dtype=dtype) + with pytest.raises(expected_exc): + num.asarray(object, dtype=dtype) + + +if __name__ == "__main__": + import sys + + np.random.seed(12345) + sys.exit(pytest.main(sys.argv)) diff --git a/tests/integration/test_outer.py b/tests/integration/test_outer.py index 1980f8847..f70965218 100644 --- a/tests/integration/test_outer.py +++ b/tests/integration/test_outer.py @@ -15,28 +15,125 @@ import numpy as np import pytest -from legate.core import LEGATE_MAX_DIM from utils.generators import mk_0to1_array import cunumeric as num +SHAPES = ((), (0,), (1,), (10,), (4, 5), (1, 4, 5)) -def _outer(a_ndim, b_ndim, lib): - return lib.outer( - mk_0to1_array(lib, (a_ndim,)), mk_0to1_array(lib, (b_ndim,)) - ) +@pytest.mark.parametrize( + "shape_b", SHAPES, ids=lambda shape_b: f"(shape_b={shape_b})" +) +@pytest.mark.parametrize( + "shape_a", SHAPES, ids=lambda shape_a: f"(shape_a={shape_a})" +) +def test_basic(shape_a, shape_b): + a_np = mk_0to1_array(np, shape_a) + b_np = mk_0to1_array(np, shape_b) + a_num = num.array(a_np) + b_num = num.array(b_np) + + res_np = np.outer(a_np, b_np) + res_num = num.outer(a_num, b_num) + + assert np.array_equal(res_np, res_num) + + +@pytest.mark.parametrize( + "shape_b", SHAPES, ids=lambda shape_b: f"(shape_b={shape_b})" +) +@pytest.mark.parametrize( + "shape_a", SHAPES, ids=lambda shape_a: f"(shape_a={shape_a})" +) +def test_out(shape_a, shape_b): + a_np = mk_0to1_array(np, shape_a) + b_np = mk_0to1_array(np, shape_b) + a_num = num.array(a_np) + b_num = num.array(b_np) + + # if shape_a is (), prod is 1.0. Convert it into int. + size_a = np.prod(shape_a).astype(int) + size_b = np.prod(shape_b).astype(int) + shape_out = (size_a, size_b) + res_np = np.empty(shape_out) + res_num = num.empty(shape_out) + + np.outer(a_np, b_np, out=res_np) + num.outer(a_num, b_num, out=res_num) + + assert np.array_equal(res_np, res_num) + + +@pytest.mark.parametrize( + "out_dt", + (np.float32, np.complex128), + ids=lambda out_dt: f"(out_dt={out_dt})", +) +def test_out_dtype(out_dt): + shape_a = (4,) + shape_b = (5,) + a_np = mk_0to1_array(np, shape_a) + b_np = mk_0to1_array(np, shape_b) + a_num = num.array(a_np) + b_num = num.array(b_np) + + size_a = np.prod(shape_a) + size_b = np.prod(shape_b) + shape_out = (size_a, size_b) + res_np = np.empty(shape_out, dtype=out_dt) + res_num = num.empty(shape_out, dtype=out_dt) + + np.outer(a_np, b_np, out=res_np) + num.outer(a_num, b_num, out=res_num) + + assert np.array_equal(res_np, res_num) + + +class TestOuterErrors: + def setup_method(self): + shape_a = (4,) + shape_b = (5,) + self.a_np = mk_0to1_array(np, shape_a) + self.b_np = mk_0to1_array(np, shape_b) + self.a_num = num.array(self.a_np) + self.b_num = num.array(self.b_np) + + @pytest.mark.parametrize( + "out_shape", + ((1, 20), (1,)), + ids=lambda out_shape: f"(out_shape={out_shape})", + ) + def test_out_invalid_shape(self, out_shape): + expected_exc = ValueError + out_np = np.empty(out_shape) + out_num = num.empty(out_shape) + with pytest.raises(expected_exc): + np.outer(self.a_np, self.b_np, out=out_np) + with pytest.raises(expected_exc): + num.outer(self.a_num, self.b_num, out=out_num) -@pytest.mark.parametrize("a_ndim", range(1, LEGATE_MAX_DIM + 1)) -@pytest.mark.parametrize("b_ndim", range(1, LEGATE_MAX_DIM + 1)) -def test_basic(a_ndim, b_ndim): - assert np.array_equal( - _outer(a_ndim, b_ndim, np), _outer(a_ndim, b_ndim, num) + @pytest.mark.parametrize( + ("src_dt", "out_dt"), + ((np.float64, np.int32), (np.complex128, np.float64)), ) + def test_out_invalid_dtype(self, src_dt, out_dt): + expected_exc = TypeError + shape_a = (4,) + shape_b = (5,) + a_np = mk_0to1_array(np, shape_a, dtype=src_dt) + b_np = mk_0to1_array(np, shape_b, dtype=src_dt) + a_num = num.array(a_np) + b_num = num.array(b_np) + out_shape = (a_np.size, b_np.size) + out_np = np.empty(out_shape, dtype=out_dt) + out_num = num.empty(out_shape, dtype=out_dt) -def test_empty(): - assert np.array_equal(_outer(0, 0, np), _outer(0, 0, num)) + with pytest.raises(expected_exc): + np.outer(a_np, b_np, out=out_np) + with pytest.raises(expected_exc): + num.outer(a_num, b_num, out=out_num) if __name__ == "__main__": From 4be401e64292e5bab01c45e05d50fd234760e18a Mon Sep 17 00:00:00 2001 From: Wonchan Lee Date: Wed, 12 Apr 2023 23:51:27 -0700 Subject: [PATCH 027/106] Changes for the new registration API (#876) * Use the new library creation API * Resource configuration is no longer necessary in Python * Remove the obsolete call forwarding * Use the right commit hash for the CI * Bump up the commit hash * Update the commit hash for the last time * Remove an obsolete unit test --- cmake/versions.json | 2 +- cunumeric/config.py | 12 +----------- src/cunumeric/cunumeric.cc | 11 ++++++----- src/cunumeric/cunumeric.h | 10 ++-------- tests/unit/cunumeric/test_config.py | 18 +----------------- 5 files changed, 11 insertions(+), 42 deletions(-) diff --git a/cmake/versions.json b/cmake/versions.json index b9c185813..1782e2fd8 100644 --- a/cmake/versions.json +++ b/cmake/versions.json @@ -5,7 +5,7 @@ "git_url" : "https://github.com/nv-legate/legate.core.git", "git_shallow": false, "always_download": false, - "git_tag" : "2afa4c7" + "git_tag" : "7ec0ac0d2b3d09501caf3bbb07c2ce4f1e45986e" } } } diff --git a/cunumeric/config.py b/cunumeric/config.py index cad52e77f..98361b0bc 100644 --- a/cunumeric/config.py +++ b/cunumeric/config.py @@ -20,7 +20,7 @@ from typing import TYPE_CHECKING, Any, List, Union, cast import numpy as np -from legate.core import Library, ResourceConfig, get_legate_runtime +from legate.core import Library, get_legate_runtime if TYPE_CHECKING: import numpy.typing as npt @@ -310,16 +310,6 @@ def set_runtime(self, runtime: Runtime) -> None: assert self.shared_object is not None self.runtime = runtime - def get_resource_configuration(self) -> ResourceConfig: - assert self.shared_object is not None - config = ResourceConfig() - config.max_tasks = self.shared_object.CUNUMERIC_MAX_TASKS - config.max_mappers = self.shared_object.CUNUMERIC_MAX_MAPPERS - config.max_reduction_ops = self.shared_object.CUNUMERIC_MAX_REDOPS - config.max_projections = 0 - config.max_shardings = 0 - return config - def destroy(self) -> None: if self.runtime is not None: self.runtime.destroy() diff --git a/src/cunumeric/cunumeric.cc b/src/cunumeric/cunumeric.cc index 4760e689a..51cacb036 100644 --- a/src/cunumeric/cunumeric.cc +++ b/src/cunumeric/cunumeric.cc @@ -24,7 +24,7 @@ namespace cunumeric { static const char* const cunumeric_library_name = "cunumeric"; -/*static*/ TaskRegistrar& CuNumeric::get_registrar() +/*static*/ TaskRegistrar& CuNumericRegistrar::get_registrar() { static TaskRegistrar registrar; return registrar; @@ -38,15 +38,16 @@ void registration_callback() config.max_mappers = CUNUMERIC_MAX_MAPPERS; config.max_tasks = CUNUMERIC_MAX_TASKS; config.max_reduction_ops = CUNUMERIC_MAX_REDOPS; - LibraryContext context(cunumeric_library_name, config); - CuNumeric::get_registrar().register_all_tasks(context); + auto context = Runtime::get_runtime()->create_library(cunumeric_library_name, config); + + CuNumericRegistrar::get_registrar().register_all_tasks(*context); // Register our special reduction functions - register_reduction_operators(context); + register_reduction_operators(*context); // Now we can register our mapper with the runtime - context.register_mapper(std::make_unique(), 0); + context->register_mapper(std::make_unique(), 0); } } // namespace cunumeric diff --git a/src/cunumeric/cunumeric.h b/src/cunumeric/cunumeric.h index 43b54dd94..d0461a1d2 100644 --- a/src/cunumeric/cunumeric.h +++ b/src/cunumeric/cunumeric.h @@ -29,19 +29,13 @@ enum class VariantKind : int { GPU = 2, }; -struct CuNumeric { - public: - template - static void record_variant(Args&&... args) - { - get_registrar().record_variant(std::forward(args)...); - } +struct CuNumericRegistrar { static legate::TaskRegistrar& get_registrar(); }; template struct CuNumericTask : public legate::LegateTask { - using Registrar = CuNumeric; + using Registrar = CuNumericRegistrar; }; } // namespace cunumeric diff --git a/tests/unit/cunumeric/test_config.py b/tests/unit/cunumeric/test_config.py index ce41a7181..d3d30707c 100644 --- a/tests/unit/cunumeric/test_config.py +++ b/tests/unit/cunumeric/test_config.py @@ -15,7 +15,7 @@ import numpy as np import pytest -from legate.core import Library, ResourceConfig +from legate.core import Library from legate.core.context import Context from mock import patch @@ -92,22 +92,6 @@ def test_set_runtine(self) -> None: with pytest.raises(AssertionError): lib.set_runtime(runtime) - def test_get_resource_configuration(self) -> None: - lib = m.CuNumericLib("foo") - - # error if not initialized - with pytest.raises(AssertionError): - lib.get_resource_configuration() - - lib.initialize(_FakeSO) - config = lib.get_resource_configuration() - assert isinstance(config, ResourceConfig) - assert config.max_tasks == _FakeSO.CUNUMERIC_MAX_TASKS - assert config.max_mappers == _FakeSO.CUNUMERIC_MAX_MAPPERS - assert config.max_reduction_ops == _FakeSO.CUNUMERIC_MAX_REDOPS - assert config.max_projections == 0 - assert config.max_shardings == 0 - @patch("cunumeric.runtime.destroy") def test_destroy(self, mock_destroy) -> None: lib = m.CuNumericLib("foo") From df3b2ef9aae59b643a02640a1823de25201ba391 Mon Sep 17 00:00:00 2001 From: Manolis Papadakis Date: Thu, 13 Apr 2023 23:40:17 -0700 Subject: [PATCH 028/106] Avoid following compiler symlinks (#880) * Avoid following compiler symlinks * Unconditionally set ENV{CC/CXX} to the cmake compilers * Have TBLIS match the verbosity level of the rest of the build * Don't override ENV{CC} if the user has set them * Override the user's CC/CXX, but make sure to reset it --- CMakeLists.txt | 8 ++++++++ cmake/thirdparty/get_tblis.cmake | 33 +++++++++++++++++--------------- 2 files changed, 26 insertions(+), 15 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index cb06a78af..efa03fc7f 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -33,6 +33,14 @@ if(POLICY CMP0135) set(CMAKE_POLICY_DEFAULT_CMP0135 NEW) endif() +if(POLICY CMP0132) + # Avoid an inconsistency, where cmake would only set the CC/CXX env vars on + # the first run, but not subsequent ones. This would come up when building + # TBLIS. + cmake_policy(SET CMP0132 NEW) + set(CMAKE_POLICY_DEFAULT_CMP0132 NEW) +endif() + ############################################################################## # - Download and initialize RAPIDS CMake helpers ----------------------------- diff --git a/cmake/thirdparty/get_tblis.cmake b/cmake/thirdparty/get_tblis.cmake index 7bcc45aac..b02afbd7d 100644 --- a/cmake/thirdparty/get_tblis.cmake +++ b/cmake/thirdparty/get_tblis.cmake @@ -77,38 +77,41 @@ function(find_or_configure_tblis) set(tblis_thread_model "--enable-thread-model=openmp") endif() - # CMake sets `ENV{CC}` to /usr/bin/cc if it's not set. This causes tblis' - # `./configure` to fail. For now, detect this case and reset ENV{CC/CXX}. - # Remove this workaround when we can use `cmake_policy(SET CMP0132 NEW)`: - # https://gitlab.kitware.com/cmake/cmake/-/merge_requests/7108 - + # Use ENV{CC/CXX} to tell TBLIS to use the same compilers as the + # rest of the build. + # TODO: Consider doing the same for CMAKE_C/CXX_FLAGS set(CC_ORIG "$ENV{CC}") set(CXX_ORIG "$ENV{CXX}") set(_CC "${CMAKE_C_COMPILER}") set(_CXX "${CMAKE_CXX_COMPILER}") - if(CC_ORIG MATCHES "^.*\/cc$") - file(REAL_PATH "${_CC}" _CC EXPAND_TILDE) - file(REAL_PATH "${_CXX}" _CXX EXPAND_TILDE) - set(ENV{CC} "${_CC}") - set(ENV{CXX} "${_CXX}") - endif() - # Use the caching compiler (if provided) to speed up tblis builds if(CMAKE_C_COMPILER_LAUNCHER) - set(ENV{CC} "${CMAKE_C_COMPILER_LAUNCHER} ${_CC}") + set(_CC "${CMAKE_C_COMPILER_LAUNCHER} ${_CC}") endif() if(CMAKE_CXX_COMPILER_LAUNCHER) - set(ENV{CXX} "${CMAKE_CXX_COMPILER_LAUNCHER} ${_CXX}") + set(_CXX "${CMAKE_CXX_COMPILER_LAUNCHER} ${_CXX}") endif() + set(ENV{CC} "${_CC}") + set(ENV{CXX} "${_CXX}") message(VERBOSE "cunumeric: ENV{CC}=\"$ENV{CC}\"") message(VERBOSE "cunumeric: ENV{CXX}=\"$ENV{CXX}\"") + set(tblis_verbosity "--enable-silent-rules") + if(CMAKE_VERSION VERSION_GREATER_EQUAL "3.25") + cmake_language(GET_MESSAGE_LOG_LEVEL log_level) + if(${log_level} STREQUAL "VERBOSE" OR + ${log_level} STREQUAL "DEBUG" OR + ${log_level} STREQUAL "TRACE") + set(tblis_verbosity "--disable-silent-rules") + endif() + endif() + execute_process( COMMAND ./configure ${tblis_thread_model} - --enable-silent-rules + ${tblis_verbosity} --disable-option-checking --with-label-type=int32_t --with-length-type=int64_t From 6621255d637e56e07e16f928d5bac614b78a55f2 Mon Sep 17 00:00:00 2001 From: Wonchan Lee Date: Fri, 14 Apr 2023 17:45:05 -0700 Subject: [PATCH 029/106] Changes for the new mapper API (#878) * Use the new library creation API * Resource configuration is no longer necessary in Python * Remove the obsolete call forwarding * Use the right commit hash for the CI * Bump up the commit hash * Update the mapper registration to use the new API * Update the commit hash --- cmake/versions.json | 2 +- src/cunumeric/cunumeric.cc | 7 ++----- src/cunumeric/cunumeric_c.h | 5 ++--- src/cunumeric/mapper.h | 2 +- 4 files changed, 6 insertions(+), 10 deletions(-) diff --git a/cmake/versions.json b/cmake/versions.json index 1782e2fd8..51e001fab 100644 --- a/cmake/versions.json +++ b/cmake/versions.json @@ -5,7 +5,7 @@ "git_url" : "https://github.com/nv-legate/legate.core.git", "git_shallow": false, "always_download": false, - "git_tag" : "7ec0ac0d2b3d09501caf3bbb07c2ce4f1e45986e" + "git_tag" : "c133b2506c1136ff52920f2a9d9e4dbcf27d3c8c" } } } diff --git a/src/cunumeric/cunumeric.cc b/src/cunumeric/cunumeric.cc index 51cacb036..e7d9eda21 100644 --- a/src/cunumeric/cunumeric.cc +++ b/src/cunumeric/cunumeric.cc @@ -35,19 +35,16 @@ extern void register_reduction_operators(LibraryContext& context); void registration_callback() { ResourceConfig config; - config.max_mappers = CUNUMERIC_MAX_MAPPERS; config.max_tasks = CUNUMERIC_MAX_TASKS; config.max_reduction_ops = CUNUMERIC_MAX_REDOPS; - auto context = Runtime::get_runtime()->create_library(cunumeric_library_name, config); + auto context = Runtime::get_runtime()->create_library( + cunumeric_library_name, config, std::make_unique()); CuNumericRegistrar::get_registrar().register_all_tasks(*context); // Register our special reduction functions register_reduction_operators(*context); - - // Now we can register our mapper with the runtime - context->register_mapper(std::make_unique(), 0); } } // namespace cunumeric diff --git a/src/cunumeric/cunumeric_c.h b/src/cunumeric/cunumeric_c.h index 724db0013..8cfcb0ac8 100644 --- a/src/cunumeric/cunumeric_c.h +++ b/src/cunumeric/cunumeric_c.h @@ -209,9 +209,8 @@ enum CuNumericTunable { }; enum CuNumericBounds { - CUNUMERIC_MAX_MAPPERS = 1, - CUNUMERIC_MAX_REDOPS = 1024, - CUNUMERIC_MAX_TASKS = 1048576, + CUNUMERIC_MAX_REDOPS = 1024, + CUNUMERIC_MAX_TASKS = 1048576, }; // Match these to ScanCode in config.py diff --git a/src/cunumeric/mapper.h b/src/cunumeric/mapper.h index 3c010b8c6..ae9455618 100644 --- a/src/cunumeric/mapper.h +++ b/src/cunumeric/mapper.h @@ -22,7 +22,7 @@ namespace cunumeric { -class CuNumericMapper : public legate::mapping::LegateMapper { +class CuNumericMapper : public legate::mapping::Mapper { public: CuNumericMapper(); virtual ~CuNumericMapper(void) {} From 8c80c372f5ff47aadabe78e82b2b96a9e762fb12 Mon Sep 17 00:00:00 2001 From: Wonchan Lee Date: Sun, 16 Apr 2023 11:34:00 -0700 Subject: [PATCH 030/106] Start using `LibraryContext*` (#886) * Start using LibraryContext* * Update the commit hash for CI * Update versions.json --- cmake/versions.json | 2 +- src/cunumeric/arg.cc | 30 +++++++++++++++--------------- src/cunumeric/cunumeric.cc | 6 +++--- src/cunumeric/cunumeric.cu | 30 +++++++++++++++--------------- 4 files changed, 34 insertions(+), 34 deletions(-) diff --git a/cmake/versions.json b/cmake/versions.json index 51e001fab..4cbda2a82 100644 --- a/cmake/versions.json +++ b/cmake/versions.json @@ -5,7 +5,7 @@ "git_url" : "https://github.com/nv-legate/legate.core.git", "git_shallow": false, "always_download": false, - "git_tag" : "c133b2506c1136ff52920f2a9d9e4dbcf27d3c8c" + "git_tag" : "611f6ff1fa22f42a22809a4be86821b43c4ab6ea" } } } diff --git a/src/cunumeric/arg.cc b/src/cunumeric/arg.cc index 1f4fc6a6f..5c400a0d8 100644 --- a/src/cunumeric/arg.cc +++ b/src/cunumeric/arg.cc @@ -48,23 +48,23 @@ DEFINE_IDENTITIES(uint64_t) #ifndef LEGATE_USE_CUDA -#define REGISTER_REDOPS(OP) \ - { \ - context.register_reduction_operator>(); \ - context.register_reduction_operator>(); \ - context.register_reduction_operator>(); \ - context.register_reduction_operator>(); \ - context.register_reduction_operator>(); \ - context.register_reduction_operator>(); \ - context.register_reduction_operator>(); \ - context.register_reduction_operator>(); \ - context.register_reduction_operator>(); \ - context.register_reduction_operator>(); \ - context.register_reduction_operator>(); \ - context.register_reduction_operator>(); \ +#define REGISTER_REDOPS(OP) \ + { \ + context->register_reduction_operator>(); \ + context->register_reduction_operator>(); \ + context->register_reduction_operator>(); \ + context->register_reduction_operator>(); \ + context->register_reduction_operator>(); \ + context->register_reduction_operator>(); \ + context->register_reduction_operator>(); \ + context->register_reduction_operator>(); \ + context->register_reduction_operator>(); \ + context->register_reduction_operator>(); \ + context->register_reduction_operator>(); \ + context->register_reduction_operator>(); \ } -void register_reduction_operators(legate::LibraryContext& context) +void register_reduction_operators(legate::LibraryContext* context) { REGISTER_REDOPS(ArgmaxReduction); REGISTER_REDOPS(ArgminReduction); diff --git a/src/cunumeric/cunumeric.cc b/src/cunumeric/cunumeric.cc index e7d9eda21..c631d5a8d 100644 --- a/src/cunumeric/cunumeric.cc +++ b/src/cunumeric/cunumeric.cc @@ -30,7 +30,7 @@ static const char* const cunumeric_library_name = "cunumeric"; return registrar; } -extern void register_reduction_operators(LibraryContext& context); +extern void register_reduction_operators(LibraryContext* context); void registration_callback() { @@ -41,10 +41,10 @@ void registration_callback() auto context = Runtime::get_runtime()->create_library( cunumeric_library_name, config, std::make_unique()); - CuNumericRegistrar::get_registrar().register_all_tasks(*context); + CuNumericRegistrar::get_registrar().register_all_tasks(context); // Register our special reduction functions - register_reduction_operators(*context); + register_reduction_operators(context); } } // namespace cunumeric diff --git a/src/cunumeric/cunumeric.cu b/src/cunumeric/cunumeric.cu index 2d07e647d..87cd3a85e 100644 --- a/src/cunumeric/cunumeric.cu +++ b/src/cunumeric/cunumeric.cu @@ -20,23 +20,23 @@ namespace cunumeric { -#define REGISTER_REDOPS(OP) \ - { \ - context.register_reduction_operator>(); \ - context.register_reduction_operator>(); \ - context.register_reduction_operator>(); \ - context.register_reduction_operator>(); \ - context.register_reduction_operator>(); \ - context.register_reduction_operator>(); \ - context.register_reduction_operator>(); \ - context.register_reduction_operator>(); \ - context.register_reduction_operator>(); \ - context.register_reduction_operator>(); \ - context.register_reduction_operator>(); \ - context.register_reduction_operator>(); \ +#define REGISTER_REDOPS(OP) \ + { \ + context->register_reduction_operator>(); \ + context->register_reduction_operator>(); \ + context->register_reduction_operator>(); \ + context->register_reduction_operator>(); \ + context->register_reduction_operator>(); \ + context->register_reduction_operator>(); \ + context->register_reduction_operator>(); \ + context->register_reduction_operator>(); \ + context->register_reduction_operator>(); \ + context->register_reduction_operator>(); \ + context->register_reduction_operator>(); \ + context->register_reduction_operator>(); \ } -void register_reduction_operators(legate::LibraryContext& context) +void register_reduction_operators(legate::LibraryContext* context) { REGISTER_REDOPS(ArgmaxReduction); REGISTER_REDOPS(ArgminReduction); From 0bcda498d1b20c997f9e70a0458dac2a09aa6ceb Mon Sep 17 00:00:00 2001 From: Wonchan Lee Date: Mon, 17 Apr 2023 21:13:33 -0700 Subject: [PATCH 031/106] Bump up the legate core commit hash to be on the latest legion (#888) --- cmake/versions.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cmake/versions.json b/cmake/versions.json index 4cbda2a82..0fd0bb603 100644 --- a/cmake/versions.json +++ b/cmake/versions.json @@ -5,7 +5,7 @@ "git_url" : "https://github.com/nv-legate/legate.core.git", "git_shallow": false, "always_download": false, - "git_tag" : "611f6ff1fa22f42a22809a4be86821b43c4ab6ea" + "git_tag" : "42153d37d5e9ffd2f12d3ac4c3ec632020e02f7c" } } } From 8a938fb706e1139888bab2072fe33ae6b3b13212 Mon Sep 17 00:00:00 2001 From: Wonchan Lee Date: Mon, 17 Apr 2023 22:08:47 -0700 Subject: [PATCH 032/106] Fix for some binary operators on float16 (#889) * Fix for some binary operators on float16 * Missing __host__ __device__ attributes to a constructor --- src/cunumeric/binary/binary_op_util.h | 67 +++++++++++++++++++++++---- 1 file changed, 57 insertions(+), 10 deletions(-) diff --git a/src/cunumeric/binary/binary_op_util.h b/src/cunumeric/binary/binary_op_util.h index a4c1538ec..1cb54464b 100644 --- a/src/cunumeric/binary/binary_op_util.h +++ b/src/cunumeric/binary/binary_op_util.h @@ -138,6 +138,14 @@ constexpr decltype(auto) op_dispatch(BinaryOpCode op_code, Functor f, Fnargs&&.. return f.template operator()(std::forward(args)...); } +template +__CUDA_HD__ __half lift(const __half& _a, const __half& _b, FloatFunc func) +{ + float a = _a; + float b = _b; + return __half{func(a, b)}; +} + template constexpr decltype(auto) reduce_op_dispatch(BinaryOpCode op_code, Functor f, Fnargs&&... args) { @@ -168,6 +176,7 @@ struct BinaryOp { using T = legate::legate_type_of; static constexpr bool valid = legate::is_floating_point::value; + __CUDA_HD__ BinaryOp() {} BinaryOp(const std::vector& args) {} constexpr decltype(auto) operator()(const T& a, const T& b) const @@ -179,14 +188,12 @@ struct BinaryOp { template <> struct BinaryOp { - using T = __half; static constexpr bool valid = true; BinaryOp(const std::vector& args) {} __CUDA_HD__ __half operator()(const __half& a, const __half& b) const { - using std::atan2; - return __half{atan2(static_cast(a), static_cast(b))}; + return lift(a, b, BinaryOp{}); } }; @@ -233,6 +240,7 @@ template struct BinaryOp { using T = legate::legate_type_of; static constexpr bool valid = legate::is_floating_point::value; + __CUDA_HD__ BinaryOp() {} BinaryOp(const std::vector& args) {} constexpr T operator()(const T& a, const T& b) const @@ -250,8 +258,7 @@ struct BinaryOp { __CUDA_HD__ __half operator()(const __half& a, const __half& b) const { - using std::copysign; - return __half{copysign(static_cast(a), static_cast(b))}; + return lift(a, b, BinaryOp{}); } }; @@ -312,6 +319,7 @@ struct BinaryOp { using T = legate::legate_type_of; static constexpr bool valid = not(CODE == legate::LegateTypeCode::BOOL_LT or legate::is_complex::value); + __CUDA_HD__ BinaryOp() {} BinaryOp(const std::vector& args) {} template ::value>* = nullptr> @@ -334,8 +342,7 @@ struct BinaryOp { BinaryOp(const std::vector& args) {} __CUDA_HD__ __half operator()(const __half& a, const __half& b) const { - using std::fmod; - return __half{fmod(static_cast(a), static_cast(b))}; + return lift(a, b, BinaryOp{}); } }; @@ -438,6 +445,7 @@ struct BinaryOp { using T = legate::legate_type_of; static constexpr bool valid = legate::is_floating_point::value; + __CUDA_HD__ BinaryOp() {} BinaryOp(const std::vector& args) {} constexpr decltype(auto) operator()(const T& a, const T& b) const @@ -447,6 +455,18 @@ struct BinaryOp { } }; +template <> +struct BinaryOp { + static constexpr bool valid = true; + + BinaryOp(const std::vector& args) {} + + __CUDA_HD__ __half operator()(const __half& a, const __half& b) const + { + return lift(a, b, BinaryOp{}); + } +}; + template struct BinaryOp { using VAL = legate::legate_type_of; @@ -564,6 +584,7 @@ struct BinaryOp { using T = legate::legate_type_of; static constexpr bool valid = legate::is_floating_point::value; + __CUDA_HD__ BinaryOp() {} BinaryOp(const std::vector& args) {} constexpr decltype(auto) operator()(const T& a, const T& b) const @@ -580,11 +601,24 @@ struct BinaryOp { } }; +template <> +struct BinaryOp { + static constexpr bool valid = true; + + BinaryOp(const std::vector& args) {} + + __CUDA_HD__ __half operator()(const __half& a, const __half& b) const + { + return lift(a, b, BinaryOp{}); + } +}; + template struct BinaryOp { using T = legate::legate_type_of; static constexpr bool valid = legate::is_floating_point::value; + __CUDA_HD__ BinaryOp() {} BinaryOp(const std::vector& args) {} constexpr decltype(auto) operator()(const T& a, const T& b) const @@ -600,6 +634,18 @@ struct BinaryOp { } }; +template <> +struct BinaryOp { + static constexpr bool valid = true; + + BinaryOp(const std::vector& args) {} + + __CUDA_HD__ __half operator()(const __half& a, const __half& b) const + { + return lift(a, b, BinaryOp{}); + } +}; + template struct BinaryOp { using T = legate::legate_type_of; @@ -690,6 +736,7 @@ template struct BinaryOp { using T = legate::legate_type_of; static constexpr bool valid = true; + __CUDA_HD__ BinaryOp() {} BinaryOp(const std::vector& args) {} template { BinaryOp(const std::vector& args) {} __CUDA_HD__ __half operator()(const __half& a, const __half& b) const { - return static_cast<__half>(real_mod(static_cast(a), static_cast(b))); + return lift(a, b, BinaryOp{}); } }; @@ -746,6 +793,7 @@ template struct BinaryOp { using T = legate::legate_type_of; static constexpr bool valid = legate::is_floating_point::value; + __CUDA_HD__ BinaryOp() {} BinaryOp(const std::vector& args) {} constexpr T operator()(const T& a, const T& b) const @@ -763,8 +811,7 @@ struct BinaryOp { __CUDA_HD__ __half operator()(const __half& a, const __half& b) const { - using std::nextafter; - return __half{nextafter(static_cast(a), static_cast(b))}; + return lift(a, b, BinaryOp{}); } }; From 0172eb3e6d476d3ef5434b894b5b54b725b9a7c7 Mon Sep 17 00:00:00 2001 From: Wonchan Lee Date: Mon, 17 Apr 2023 22:12:22 -0700 Subject: [PATCH 033/106] Update the legate core commit hash (#891) --- cmake/versions.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cmake/versions.json b/cmake/versions.json index 0fd0bb603..28b65ab72 100644 --- a/cmake/versions.json +++ b/cmake/versions.json @@ -5,7 +5,7 @@ "git_url" : "https://github.com/nv-legate/legate.core.git", "git_shallow": false, "always_download": false, - "git_tag" : "42153d37d5e9ffd2f12d3ac4c3ec632020e02f7c" + "git_tag" : "65583d03bb185cfe534e3fc7e09616157249d0ed" } } } From 0e0ea0fd1fc5cd850fb8e2a1969a7217d8f7b76c Mon Sep 17 00:00:00 2001 From: Manolis Papadakis Date: Tue, 18 Apr 2023 13:03:30 -0700 Subject: [PATCH 034/106] WAR for TBLIS compiler detection while upstream PR is pending (#890) --- cmake/thirdparty/get_tblis.cmake | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cmake/thirdparty/get_tblis.cmake b/cmake/thirdparty/get_tblis.cmake index b02afbd7d..4a684fd08 100644 --- a/cmake/thirdparty/get_tblis.cmake +++ b/cmake/thirdparty/get_tblis.cmake @@ -171,11 +171,11 @@ function(find_or_configure_tblis) endfunction() if(NOT DEFINED cunumeric_TBLIS_BRANCH) - set(cunumeric_TBLIS_BRANCH master) + set(cunumeric_TBLIS_BRANCH cunumeric) endif() if(NOT DEFINED cunumeric_TBLIS_REPOSITORY) - set(cunumeric_TBLIS_REPOSITORY https://github.com/devinamatthews/tblis.git) + set(cunumeric_TBLIS_REPOSITORY https://github.com/manopapad/tblis.git) endif() find_or_configure_tblis(VERSION 1.2.0 From fa1b8e4b1f0c57464182cccf187dfd6198c50815 Mon Sep 17 00:00:00 2001 From: Marcin Zalewski Date: Tue, 18 Apr 2023 16:04:02 -0700 Subject: [PATCH 035/106] Add support for Python 3.11 (#830) (#837) * updating build config for py311 and clearing an unused comment * Document support for python 3.11 --------- Co-authored-by: Mark Vaz Co-authored-by: Manolis Papadakis --- conda/conda-build/meta.yaml | 1 - setup.py | 1 + 2 files changed, 1 insertion(+), 1 deletion(-) diff --git a/conda/conda-build/meta.yaml b/conda/conda-build/meta.yaml index 2560b94b8..2e67b3d3e 100644 --- a/conda/conda-build/meta.yaml +++ b/conda/conda-build/meta.yaml @@ -137,7 +137,6 @@ requirements: {% else %} - legate-core ={{ core_version }} - cuda-cudart >={{ cuda_version }} - # - libcutensor >=1.3 - cutensor >=1.3 =*_* - libcublas - libcusolver =11.4.1.48-0 diff --git a/setup.py b/setup.py index 95bb8c88b..1ec518096 100644 --- a/setup.py +++ b/setup.py @@ -35,6 +35,7 @@ "Programming Language :: Python", "Programming Language :: Python :: 3.9", "Programming Language :: Python :: 3.10", + "Programming Language :: Python :: 3.11", ], packages=find_packages( where=".", From a7ff4312cfa318ef1e346ef4f7395eaee43ca0f7 Mon Sep 17 00:00:00 2001 From: Marcin Zalewski Date: Tue, 18 Apr 2023 16:05:02 -0700 Subject: [PATCH 036/106] Also build CPU-only packages for haswell (#869) (#882) Co-authored-by: Manolis Papadakis --- conda/conda-build/build.sh | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/conda/conda-build/build.sh b/conda/conda-build/build.sh index 8a6c6c6c0..b78740090 100644 --- a/conda/conda-build/build.sh +++ b/conda/conda-build/build.sh @@ -1,12 +1,13 @@ #!/bin/bash - + # Rewrite conda's -DCMAKE_FIND_ROOT_PATH_MODE_INCLUDE=ONLY to # -DCMAKE_FIND_ROOT_PATH_MODE_INCLUDE=BOTH CMAKE_ARGS="$(echo "$CMAKE_ARGS" | sed -r "s@_INCLUDE=ONLY@_INCLUDE=BOTH@g")" # Add our options to conda's CMAKE_ARGS CMAKE_ARGS+=" ---log-level=VERBOSE" +--log-level=VERBOSE +-DBUILD_MARCH=haswell" # We rely on an environment variable to determine if we need to build cpu-only bits if [ -z "$CPU_ONLY" ]; then @@ -14,7 +15,6 @@ if [ -z "$CPU_ONLY" ]; then CMAKE_ARGS+=" -Dcutensor_DIR=$PREFIX -DCMAKE_CUDA_ARCHITECTURES:LIST=60-real;70-real;75-real;80-real;90 --DBUILD_MARCH=haswell " else # When we build without cuda, we need to provide the location of curand From 07872da585110ecd2c65cb4ae443938b1672e33c Mon Sep 17 00:00:00 2001 From: XiaLuNV <110973296+XiaLuNV@users.noreply.github.com> Date: Wed, 19 Apr 2023 12:07:57 +0800 Subject: [PATCH 037/106] add test_clip.py and test_identity.py (#881) add test_clip.py and test_identity.py --- tests/integration/test_clip.py | 179 +++++++++++++++++++++++++++++ tests/integration/test_identity.py | 76 ++++++++++++ 2 files changed, 255 insertions(+) create mode 100644 tests/integration/test_clip.py create mode 100644 tests/integration/test_identity.py diff --git a/tests/integration/test_clip.py b/tests/integration/test_clip.py new file mode 100644 index 000000000..d0005b0eb --- /dev/null +++ b/tests/integration/test_clip.py @@ -0,0 +1,179 @@ +# Copyright 2022 NVIDIA Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import numpy as np +import pytest +from legate.core import LEGATE_MAX_DIM +from utils.generators import mk_seq_array + +import cunumeric as num + + +class TestClipErrors: + @pytest.mark.xfail + def test_none_array(self): + expected_exc = TypeError + with pytest.raises(expected_exc): + np.clip(None, a_min=0, a_max=0) + with pytest.raises(expected_exc): + # cunumeric raises + # AttributeError: 'NoneType' object has no attribute 'clip' + num.clip(None, a_min=0, a_max=0) + + @pytest.mark.xfail + def test_value_none(self): + array = np.arange(0, 10) + expected_exc = ValueError + with pytest.raises(expected_exc): + # Numpy raises: + # ValueError: One of max or min must be given + np.clip(array, a_min=None, a_max=None) + with pytest.raises(expected_exc): + # cunumeric raises: + # TypeError: int() argument must be a string, + # a bytes-like object or a real number, not 'NoneType' + num.clip(array, a_min=None, a_max=None) + + def test_value_list(self): + array = np.arange(0, 10) + amin = [2, 3, 4, 5, 1] + amax = 8 + expected_exc = ValueError + with pytest.raises(expected_exc): + np.clip(array, a_min=amin, a_max=amax) + with pytest.raises(expected_exc): + num.clip(array, a_min=amin, a_max=amax) + + def test_out(self): + array = np.arange(0, 5) + out_a = np.arange(0, 3) + amin = [2, 3, 4, 5, 1] + amax = 8 + expected_exc = ValueError + with pytest.raises(expected_exc): + np.clip(array, a_min=amin, a_max=amax, out=out_a) + with pytest.raises(expected_exc): + num.clip(array, a_min=amin, a_max=amax, out=out_a) + + +def test_empty_array(): + res_np = np.clip([], a_min=0, a_max=0) + res_num = num.clip([], a_min=0, a_max=0) + assert np.array_equal(res_np, res_num) + + +@pytest.mark.xfail +def test_amin_amax(): + array = np.arange(0, 10) + res_np = np.clip(array, a_min=9, a_max=5) + res_num = num.clip(array, a_min=9, a_max=5) + # the result is different + # array = array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9]) + # res_np = array([5, 5, 5, 5, 5, 5, 5, 5, 5, 5]) + # res_num = array([9, 9, 9, 9, 9, 9, 9, 9, 9, 5]) + assert np.array_equal(res_np, res_num) + + +@pytest.mark.xfail +@pytest.mark.parametrize("amin", (-1, 0.5, 2.5, 5, 11)) +def test_amin_value(amin): + array = np.arange(0, 10) + res_np = np.clip(array, a_min=amin, a_max=8.5) + res_num = num.clip(array, a_min=amin, a_max=8.5) + # res_np is not match res_num + # in Numpy, when one of a_min of a_max is float, + # all data are marked as float, + # while in cunumeric, all datas are int. + # for example, amin = 5 + # array = array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9]) + # res_np = array([5., 5., 5., 5., 5., 5., 6., 7., 8., 8.5]) + # res_num = array([5, 5, 5, 5, 5, 5, 6, 7, 8, 8]) + assert np.array_equal(res_np, res_num) + + +@pytest.mark.xfail +def test_amin_complex(): + array = np.arange(0, 10) + amin = 5 + 5j + res_np = np.clip(array, a_min=amin, a_max=8.5) + # res_np = array([5. +5.j, 5. +5.j, 5. +5.j, 5. +5.j, 5. +5.j, + # 5. +5.j, 6. +0.j, 7. +0.j, 8. +0.j, 8.5+0.j]) + res_num = num.clip(array, a_min=amin, a_max=8.5) + # cunumeric raises: + # TypeError: int() argument must be a string, a bytes-like object + # or a real number, not 'complex' + assert np.array_equal(res_np, res_num) + + +def test_value_list(): + array = np.arange(0, 5) + amin = [2, 3, 4, 5, 1] + amax = 8 + res_np = np.clip(array, a_min=amin, a_max=amax) + res_num = num.clip(array, a_min=amin, a_max=amax) + assert np.array_equal(res_np, res_num) + + +def test_out_ndim(): + array = [[2, 3, 4], [3, 4, 5], [6, 6, 12]] + np_arr = np.array(array) + num_arr = num.array(array) + out_a = [[0, 0, 0], [0, 0, 0], [0, 0, 0]] + out_np = np.array(out_a) + out_num = num.array(out_a) + amin = 3 + amax = 8 + np.clip(np_arr, a_min=amin, a_max=amax, out=out_np) + num.clip(num_arr, a_min=amin, a_max=amax, out=out_num) + assert np.array_equal(out_np, out_num) + + +@pytest.mark.parametrize("ndim", range(1, LEGATE_MAX_DIM + 1)) +def test_basic(ndim): + shape = (5,) * ndim + np_arr = mk_seq_array(np, shape) + num_arr = mk_seq_array(num, shape) + + amin = int(np.prod(shape) / 2) + amax = np.prod(shape) - 1 + + res_np = np.clip(np_arr, amin, amax) + res_num = num.clip(num_arr, amin, amax) + assert np.array_equal(res_num, res_np) + + +@pytest.mark.parametrize("ndim", range(1, LEGATE_MAX_DIM + 1)) +def test_out(ndim): + shape = (5,) * ndim + np_arr = mk_seq_array(np, shape) + num_arr = mk_seq_array(num, shape) + + out_np = np.empty(shape) + out_num = num.empty(shape) + + amin = int(np.prod(shape) / 2) + amax = np.prod(shape) - 1 + + np.clip(np_arr, amin, amax, out=out_np) + num.clip(num_arr, amin, amax, out=out_num) + + assert np.array_equal(out_np, out_num) + + +if __name__ == "__main__": + import sys + + np.random.seed(12345) + sys.exit(pytest.main(sys.argv)) diff --git a/tests/integration/test_identity.py b/tests/integration/test_identity.py new file mode 100644 index 000000000..25dd7172a --- /dev/null +++ b/tests/integration/test_identity.py @@ -0,0 +1,76 @@ +# Copyright 2022 NVIDIA Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import numpy as np +import pytest + +import cunumeric as num + +DTYPE_ALL = [ + np.int8, + np.int16, + np.int32, + np.uint8, + np.uint16, + np.uint32, + np.float16, + np.float32, + np.float64, + bool, + np.complex64, + np.complex128, +] +VALUES = [0, 1, 2, 100] +NEGATIVE_TYPE = [None, np.inf, -np.inf, -3.5, 3.5, 5j, 10 + 20j, -100 - 100j] +NEGATIVE_VALUE = [-1, -2] + + +@pytest.mark.parametrize("val", VALUES) +def test_value(val): + res_np = np.identity(val) + res_num = num.identity(val) + assert np.array_equal(res_np, res_num) + + +@pytest.mark.parametrize("neg_type", NEGATIVE_TYPE) +def test_value_negative_type(neg_type): + expected_exc = TypeError + with pytest.raises(expected_exc): + np.identity(neg_type) + with pytest.raises(expected_exc): + num.identity(neg_type) + + +@pytest.mark.parametrize("neg_val", NEGATIVE_VALUE) +def test_value_negative_value(neg_val): + expected_exc = ValueError + with pytest.raises(expected_exc): + np.identity(neg_val) + with pytest.raises(expected_exc): + num.identity(neg_val) + + +@pytest.mark.parametrize("dtype", DTYPE_ALL) +def test_dtype(dtype): + res_np = np.identity(5, dtype=dtype) + res_num = num.identity(5, dtype=dtype) + assert np.array_equal(res_np, res_num) + + +if __name__ == "__main__": + import sys + + np.random.seed(12345) + sys.exit(pytest.main(sys.argv)) From e0ee373c64974315368f8d13c570fe536f304551 Mon Sep 17 00:00:00 2001 From: Manolis Papadakis Date: Thu, 20 Apr 2023 12:45:01 -0700 Subject: [PATCH 038/106] Bump legate.core version to get legion fixes for CI hang (#900) --- cmake/versions.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cmake/versions.json b/cmake/versions.json index 28b65ab72..49abf9ed9 100644 --- a/cmake/versions.json +++ b/cmake/versions.json @@ -5,7 +5,7 @@ "git_url" : "https://github.com/nv-legate/legate.core.git", "git_shallow": false, "always_download": false, - "git_tag" : "65583d03bb185cfe534e3fc7e09616157249d0ed" + "git_tag" : "b3e280d6212aa2ec0af619b6fee3ac24d069897e" } } } From 7db5d98d6ac13d19a6d331c31af7e2e7f46eafbf Mon Sep 17 00:00:00 2001 From: Bryan Van de Ven Date: Fri, 21 Apr 2023 08:23:10 -0700 Subject: [PATCH 039/106] Document cpp / read-only env config settings (#893) * Add python settings for cpp env vars * help strings * add fast math defaults * fix tests * remove fast_math test_default * Detailed documentation for env-only options * Just remove an obscure option instead of trying to document --------- Co-authored-by: Manolis Papadakis --- cunumeric/settings.py | 81 +++++++++++++++++++++++++-- src/cunumeric/mapper.cc | 24 ++++---- src/cunumeric/mapper.h | 1 - src/env_defaults.h | 32 +++++++++++ tests/unit/cunumeric/test_settings.py | 53 ++++++++++++++---- 5 files changed, 160 insertions(+), 31 deletions(-) create mode 100644 src/env_defaults.h diff --git a/cunumeric/settings.py b/cunumeric/settings.py index c781ac279..f2de75699 100644 --- a/cunumeric/settings.py +++ b/cunumeric/settings.py @@ -14,7 +14,13 @@ # from __future__ import annotations -from legate.util.settings import PrioritizedSetting, Settings, convert_bool +from legate.util.settings import ( + EnvOnlySetting, + PrioritizedSetting, + Settings, + convert_bool, + convert_int, +) __all__ = ("settings",) @@ -39,7 +45,7 @@ class CunumericRuntimeSettings(Settings): convert=convert_bool, help=""" Preload and initialize handles of all CUDA libraries (cuBLAS, cuSOLVER, - etc.) used in cuNumeric + etc.) used in cuNumeric. """, ) @@ -49,7 +55,7 @@ class CunumericRuntimeSettings(Settings): default=False, convert=convert_bool, help=""" - Turn on warnings + Turn on warnings. """, ) @@ -59,7 +65,7 @@ class CunumericRuntimeSettings(Settings): default=False, convert=convert_bool, help=""" - Print an overall percentage of cunumeric coverage + Print an overall percentage of cunumeric coverage. """, ) @@ -69,7 +75,7 @@ class CunumericRuntimeSettings(Settings): default=False, convert=convert_bool, help=""" - Print an overall percentage of cunumeric coverage with call stack info + Print an overall percentage of cunumeric coverage with call stack info. """, ) @@ -78,7 +84,70 @@ class CunumericRuntimeSettings(Settings): "CUNUMERIC_REPORT_DUMP_CSV", default=None, help=""" - Save a coverage report to a specified CSV file + Save a coverage report to a specified CSV file. + """, + ) + + fast_math: EnvOnlySetting[int] = EnvOnlySetting( + "fast_math", + "CUNUMERIC_FAST_MATH", + default=False, + convert=convert_bool, + help=""" + Enable certain optimized execution modes for floating-point math + operations, that may violate strict IEEE specifications. Currently this + flag enables the acceleration of single-precision cuBLAS routines using + TF32 tensor cores. + + This is a read-only environment variable setting used by the runtime. + """, + ) + + min_gpu_chunk: EnvOnlySetting[int] = EnvOnlySetting( + "min_gpu_chunk", + "CUNUMERIC_MIN_GPU_CHUNK", + default=65536, # 1 << 16 + test_default=2, + convert=convert_int, + help=""" + Legate will fall back to vanilla NumPy when handling arrays smaller + than this, rather than attempt to accelerate using GPUs, as the + offloading overhead would likely not be offset by the accelerated + operation code. + + This is a read-only environment variable setting used by the runtime. + """, + ) + + min_cpu_chunk: EnvOnlySetting[int] = EnvOnlySetting( + "min_cpu_chunk", + "CUNUMERIC_MIN_CPU_CHUNK", + default=1024, # 1 << 10 + test_default=2, + convert=convert_int, + help=""" + Legate will fall back to vanilla NumPy when handling arrays smaller + than this, rather than attempt to accelerate using native CPU code, as + the offloading overhead would likely not be offset by the accelerated + operation code. + + This is a read-only environment variable setting used by the runtime. + """, + ) + + min_omp_chunk: EnvOnlySetting[int] = EnvOnlySetting( + "min_omp_chunk", + "CUNUMERIC_MIN_OMP_CHUNK", + default=8192, # 1 << 13 + test_default=2, + convert=convert_int, + help=""" + Legate will fall back to vanilla NumPy when handling arrays smaller + than this, rather than attempt to accelerate using OpenMP, as the + offloading overhead would likely not be offset by the accelerated + operation code. + + This is a read-only environment variable setting used by the runtime. """, ) diff --git a/src/cunumeric/mapper.cc b/src/cunumeric/mapper.cc index 950f81ce5..261421d7f 100644 --- a/src/cunumeric/mapper.cc +++ b/src/cunumeric/mapper.cc @@ -14,6 +14,7 @@ * */ +#include "env_defaults.h" #include "cunumeric/mapper.h" using namespace legate; @@ -22,10 +23,11 @@ using namespace legate::mapping; namespace cunumeric { CuNumericMapper::CuNumericMapper() - : min_gpu_chunk(extract_env("CUNUMERIC_MIN_GPU_CHUNK", 1 << 20, 2)), - min_cpu_chunk(extract_env("CUNUMERIC_MIN_CPU_CHUNK", 1 << 14, 2)), - min_omp_chunk(extract_env("CUNUMERIC_MIN_OMP_CHUNK", 1 << 17, 2)), - eager_fraction(extract_env("CUNUMERIC_EAGER_FRACTION", 16, 1)) + : min_gpu_chunk( + extract_env("CUNUMERIC_MIN_GPU_CHUNK", MIN_GPU_CHUNK_DEFAULT, MIN_GPU_CHUNK_TEST)), + min_cpu_chunk( + extract_env("CUNUMERIC_MIN_CPU_CHUNK", MIN_CPU_CHUNK_DEFAULT, MIN_CPU_CHUNK_TEST)), + min_omp_chunk(extract_env("CUNUMERIC_MIN_OMP_CHUNK", MIN_OMP_CHUNK_DEFAULT, MIN_OMP_CHUNK_TEST)) { } @@ -56,14 +58,12 @@ Scalar CuNumericMapper::tunable_value(TunableID tunable_id) case CUNUMERIC_TUNABLE_MAX_EAGER_VOLUME: { int32_t eager_volume = 0; // TODO: make these profile guided - if (eager_fraction > 0) { - if (!machine->gpus().empty()) - eager_volume = min_gpu_chunk / eager_fraction; - else if (!machine->omps().empty()) - eager_volume = min_omp_chunk / eager_fraction; - else - eager_volume = min_cpu_chunk / eager_fraction; - } + if (!machine->gpus().empty()) + eager_volume = min_gpu_chunk; + else if (!machine->omps().empty()) + eager_volume = min_omp_chunk; + else + eager_volume = min_cpu_chunk; return Scalar(eager_volume); } default: break; diff --git a/src/cunumeric/mapper.h b/src/cunumeric/mapper.h index ae9455618..2360f3b46 100644 --- a/src/cunumeric/mapper.h +++ b/src/cunumeric/mapper.h @@ -47,7 +47,6 @@ class CuNumericMapper : public legate::mapping::Mapper { const int32_t min_gpu_chunk; const int32_t min_cpu_chunk; const int32_t min_omp_chunk; - const int32_t eager_fraction; }; } // namespace cunumeric diff --git a/src/env_defaults.h b/src/env_defaults.h new file mode 100644 index 000000000..af95896a1 --- /dev/null +++ b/src/env_defaults.h @@ -0,0 +1,32 @@ +/* Copyright 2021-2022 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +// These values are copied manually in cunumeric.settings and there is a Python +// unit test that will maintain that these values and the Python settings +// values agree. If these values are modified, the corresponding Python values +// must also be updated. + +// 1 << 16 (need actual number for python to parse) +#define MIN_GPU_CHUNK_DEFAULT 65536 +#define MIN_GPU_CHUNK_TEST 2 + +// 1 << 10 (need actual number for python to parse) +#define MIN_CPU_CHUNK_DEFAULT 1024 +#define MIN_CPU_CHUNK_TEST 2 + +// 1 << 13 (need actual number for python to parse) +#define MIN_OMP_CHUNK_DEFAULT 8192 +#define MIN_OMP_CHUNK_TEST 2 diff --git a/tests/unit/cunumeric/test_settings.py b/tests/unit/cunumeric/test_settings.py index 47088a965..96ba9377b 100644 --- a/tests/unit/cunumeric/test_settings.py +++ b/tests/unit/cunumeric/test_settings.py @@ -14,9 +14,12 @@ # from __future__ import annotations +from pathlib import Path + import numpy as np import pytest -from legate.util.settings import PrioritizedSetting +from legate.util.fs import read_c_define +from legate.util.settings import EnvOnlySetting, PrioritizedSetting import cunumeric.settings as m @@ -27,15 +30,29 @@ "report_coverage", "report_dump_callstack", "report_dump_csv", + "fast_math", + "min_gpu_chunk", + "min_cpu_chunk", + "min_omp_chunk", +) + +_settings_with_test_defaults = ( + # skip fast math which uses getenv instead of extract_env + # "fast_math", + "min_gpu_chunk", + "min_cpu_chunk", + "min_omp_chunk", ) +ENV_HEADER = Path(__file__).parents[3] / "src" / "env_defaults.h" + class TestSettings: def test_standard_settings(self) -> None: settings = [ k for k, v in m.settings.__class__.__dict__.items() - if isinstance(v, PrioritizedSetting) + if isinstance(v, (PrioritizedSetting, EnvOnlySetting)) ] assert set(settings) == set(_expected_settings) @@ -44,17 +61,15 @@ def test_prefix(self, name: str) -> None: ps = getattr(m.settings, name) assert ps.env_var.startswith("CUNUMERIC_") - @pytest.mark.parametrize("name", _expected_settings) - def test_parent(self, name: str) -> None: - ps = getattr(m.settings, name) - assert ps._parent == m.settings - def test_types(self) -> None: - assert m.settings.test.convert_type == "bool" - assert m.settings.preload_cudalibs.convert_type == "bool" - assert m.settings.warn.convert_type == "bool" - assert m.settings.report_coverage.convert_type == "bool" - assert m.settings.report_dump_callstack.convert_type == "bool" + assert m.settings.test.convert_type == 'bool ("0" or "1")' + assert m.settings.preload_cudalibs.convert_type == 'bool ("0" or "1")' + assert m.settings.warn.convert_type == 'bool ("0" or "1")' + assert m.settings.report_coverage.convert_type == 'bool ("0" or "1")' + assert ( + m.settings.report_dump_callstack.convert_type + == 'bool ("0" or "1")' + ) assert m.settings.report_dump_csv.convert_type == "str" @@ -77,6 +92,20 @@ def test_report_dump_callstack(self) -> None: def test_report_dump_csv(self) -> None: assert m.settings.report_dump_csv.default is None + @pytest.mark.parametrize("name", _settings_with_test_defaults) + def test_default(self, name: str) -> None: + setting = getattr(m.settings, name) + define = setting.env_var.removeprefix("CUNUMERIC_") + "_DEFAULT" + expected = setting._convert(read_c_define(ENV_HEADER, define)) + assert setting.default == expected + + @pytest.mark.parametrize("name", _settings_with_test_defaults) + def test_test_default(self, name: str) -> None: + setting = getattr(m.settings, name) + define = setting.env_var.removeprefix("CUNUMERIC_") + "_TEST" + expected = setting._convert(read_c_define(ENV_HEADER, define)) + assert setting.test_default == expected + if __name__ == "__main__": import sys From a9b12f017dfdc3c65e3e6ea3fffec53ccf7f8b00 Mon Sep 17 00:00:00 2001 From: yimoj <130720840+yimoj@users.noreply.github.com> Date: Mon, 24 Apr 2023 09:35:01 +0800 Subject: [PATCH 040/106] Improve code coverage in array_split/unpackbits/place (#892) --- tests/integration/test_array_split.py | 10 +++ tests/integration/test_bits.py | 11 ++++ tests/integration/test_extract.py | 93 +++++++++++++++++++++++++++ 3 files changed, 114 insertions(+) diff --git a/tests/integration/test_array_split.py b/tests/integration/test_array_split.py index 82954d134..e34bd26e8 100644 --- a/tests/integration/test_array_split.py +++ b/tests/integration/test_array_split.py @@ -66,6 +66,16 @@ def test_axis_negative(self): with pytest.raises(IndexError): num.array_split(ary, len(ary) // 2, -2) + def test_indices_str_type(self): + expected_exc = ValueError + arr_np = np.arange(10) + arr_num = num.arange(10) + # Split points in the passed `indices` should be integer + with pytest.raises(expected_exc): + num.array_split(arr_np, ["a", "b"]) + with pytest.raises(expected_exc): + num.array_split(arr_num, ["a", "b"]) + @pytest.mark.parametrize("size", SIZES, ids=str) def test_array_split(size): diff --git a/tests/integration/test_bits.py b/tests/integration/test_bits.py index 5ca61ad45..83fcb69b1 100644 --- a/tests/integration/test_bits.py +++ b/tests/integration/test_bits.py @@ -125,6 +125,17 @@ def test_bitorder_negative(self, bitorder): with pytest.raises(ValueError): num.unpackbits(in_num, bitorder=bitorder) + def test_count_type(self): + expected_exc = TypeError + shape = (3, 3) + in_np = np.random.randint(low=0, high=255, size=shape, dtype="B") + in_num = num.array(in_np) + # count must be an integer or None + with pytest.raises(expected_exc): + np.unpackbits(in_np, count="1") + with pytest.raises(expected_exc): + num.unpackbits(in_num, count="1") + @pytest.mark.parametrize("arr", ([], [[]])) @pytest.mark.parametrize("bitorder", ("little", "big")) def test_arr(self, arr, bitorder): diff --git a/tests/integration/test_extract.py b/tests/integration/test_extract.py index a733f9046..bb873a235 100644 --- a/tests/integration/test_extract.py +++ b/tests/integration/test_extract.py @@ -179,6 +179,99 @@ def test_place_basic(shape, vals): assert np.array_equal(arr_np, arr_num) +@pytest.mark.xfail(reason="cunumeric raises exception when vals is ndim") +@pytest.mark.parametrize("vals", VALUES, ids=str) +@pytest.mark.parametrize("ndim", range(2, DIM), ids=str) +def test_place_vals_ndim(vals, ndim): + shape = (1, 2, 3) + + arr_np = mk_seq_array(np, shape) + arr_num = mk_seq_array(num, shape) + mask_np = (arr_np % 2).astype(bool) + mask_num = (arr_np % 2).astype(bool) + vals_np = np.array(vals, ndmin=ndim).astype(arr_np.dtype) + vals_num = num.array(vals_np) + + # NumPy pass, array([[[2, 2, 2], [2, 2, 2]]]) + np.place(arr_np, mask_np, vals_np) + # cuNumeric raises ValueError: vals array has to be 1-dimensional + num.place(arr_num, mask_num, vals_num) + assert np.array_equal(arr_np, arr_num) + + +@pytest.mark.parametrize("shape", SIZES, ids=str) +@pytest.mark.parametrize("vals", VALUES, ids=str) +def test_place_mask_reshape(shape, vals): + arr_np = mk_seq_array(np, shape) + arr_num = mk_seq_array(num, shape) + + mask_np = (arr_np % 2).astype(bool) + mask_num = (arr_np % 2).astype(bool) + + vals_np = np.array(vals).astype(arr_np.dtype) + vals_num = num.array(vals_np) + + np.place(arr_np, mask_np, vals_np) + num.place(arr_num, mask_num, vals_num) + + assert np.array_equal(arr_np, arr_num) + + +@pytest.mark.parametrize("shape", SIZES, ids=str) +@pytest.mark.parametrize("vals", VALUES, ids=str) +def test_place_mask_zeros(shape, vals): + arr_np = mk_seq_array(np, shape) + arr_num = mk_seq_array(num, shape) + + mask_np = np.zeros(shape, dtype=bool) + mask_num = num.zeros(shape, dtype=bool) + + vals_np = np.array(vals).astype(arr_np.dtype) + vals_num = num.array(vals).astype(arr_num.dtype) + + np.place(arr_np, mask_np, vals_np) + num.place(arr_num, mask_num, vals_num) + + assert np.array_equal(arr_np, arr_num) + + +class TestPlaceErrors: + def test_arr_mask_size(self): + expected_exc = ValueError + arr_shape = (1, 2, 3) + mask_shape = (2, 2) + vals = (11, 12, 13) + + arr_np = np.random.random(arr_shape) + arr_num = num.array(arr_np) + mask_np = np.random.random(mask_shape) + mask_num = num.array(mask_np) + vals_np = np.array(vals, dtype=arr_np.dtype) + vals_num = num.array(vals, dtype=arr_np.dtype) + + with pytest.raises(expected_exc): + np.place(arr_np, mask_np, vals_np) + with pytest.raises(expected_exc): + num.place(arr_num, mask_num, vals_num) + + def test_vals_empty(self): + expected_exc = ValueError + shape = (1, 2, 3) + val = [] + + arr_np = np.array(shape) + arr_num = num.array(shape) + mask_np = np.array(shape, dtype=bool) + mask_num = num.array(shape, dtype=bool) + vals_np = np.array(val, dtype=arr_np.dtype) + vals_num = num.array(val, dtype=arr_num.dtype) + + with pytest.raises(expected_exc): + np.place(arr_np, mask_np, vals_np) + with pytest.raises(expected_exc): + num.place(arr_num, mask_num, vals_num) + + if __name__ == "__main__": import sys From 1d214da67e8742f1dd3ffa55c6c79bdaadac2bd7 Mon Sep 17 00:00:00 2001 From: yimoj <130720840+yimoj@users.noreply.github.com> Date: Tue, 25 Apr 2023 09:43:11 +0800 Subject: [PATCH 041/106] Improve code coverage for cunumeric.bincount (#898) * Improve code coverage for cunumeric.bincount * Fix bincount singleton weight issue --- cunumeric/module.py | 2 +- tests/integration/test_bincount.py | 119 +++++++++++++++++++++++------ 2 files changed, 97 insertions(+), 24 deletions(-) diff --git a/cunumeric/module.py b/cunumeric/module.py index cfe0a7b4e..a5955dfbd 100644 --- a/cunumeric/module.py +++ b/cunumeric/module.py @@ -6361,7 +6361,7 @@ def bincount( else: out = zeros((minlength,), dtype=weights.dtype) index = x[0] - out[index] = weights[index] + out[index] = weights[0] else: # Normal case of bincount if weights is None: diff --git a/tests/integration/test_bincount.py b/tests/integration/test_bincount.py index 4ba917e79..c5ad0b3d4 100644 --- a/tests/integration/test_bincount.py +++ b/tests/integration/test_bincount.py @@ -27,19 +27,85 @@ MINLENGTHS = [0, 5, 15] -def test_dtype_negative(): - arr = num.arange(5, dtype=float) - msg = r"integer type" - with pytest.raises(TypeError, match=msg): - num.bincount(arr) - - -def test_weight_mismatch(): - v_num = num.random.randint(0, 9, size=N) - w_num = num.random.randn(N + 1) - msg = r"same shape" - with pytest.raises(ValueError, match=msg): - num.bincount(v_num, weights=w_num) +class TestBincountNegative: + def test_dtype_negative(self): + expected_exc = TypeError + arr_np = np.arange(5, dtype=float) + arr_num = num.arange(5, dtype=float) + with pytest.raises(expected_exc): + np.bincount(arr_np) + with pytest.raises(expected_exc): + num.bincount(arr_num) + + def test_array_negative(self): + expected_exc = ValueError + arr_np = np.array((-1, 2, 5)) + arr_num = num.array((-1, 2, 5)) + with pytest.raises(expected_exc): + np.bincount(arr_np) + with pytest.raises(expected_exc): + num.bincount(arr_num) + + def test_array_ndim(self): + expected_exc = ValueError + size = (2,) * 3 + arr_np = np.random.randint(0, high=9, size=size) + arr_num = num.random.randint(0, high=9, size=size) + with pytest.raises(expected_exc): + np.bincount(arr_np) + with pytest.raises(expected_exc): + num.bincount(arr_num) + + def test_minlength_negative(self): + expected_exc = ValueError + minlength = -5 + arr_np = np.arange(5) + arr_num = num.arange(5) + with pytest.raises(expected_exc): + np.bincount(arr_np, minlength=minlength) + with pytest.raises(expected_exc): + num.bincount(arr_num, minlength=minlength) + + def test_weight_mismatch(self): + expected_exc = ValueError + v_np = np.random.randint(0, 9, size=N) + w_np = np.random.randn(N + 1) + v_num = num.random.randint(0, 9, size=N) + w_num = num.random.randn(N + 1) + with pytest.raises(expected_exc): + np.bincount(v_np, weights=w_np) + with pytest.raises(expected_exc): + num.bincount(v_num, weights=w_num) + + @pytest.mark.parametrize( + "weight", + [ + ("1", "2"), + ("2", "x"), + (b"x", b"y"), + (np.datetime64(1, "Y"), np.datetime64(123, "Y")), + np.array((5, 3), dtype="F"), + ], + ) + @pytest.mark.xfail( + reason="different behavior when casting weight to float64" + ) + def test_weight_dtype(self, weight): + expected_exc = TypeError + arr_np = np.arange(2) + arr_num = num.arange(2) + w_np = np.array(weight) + w_num = num.array(weight) + with pytest.raises(expected_exc): + # TypeError: Cannot cast array data from dtype(' Date: Thu, 27 Apr 2023 10:16:01 -0700 Subject: [PATCH 042/106] Skip codebase checks in CI (#906) --- tests/unit/cunumeric/test_settings.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tests/unit/cunumeric/test_settings.py b/tests/unit/cunumeric/test_settings.py index 96ba9377b..4b858cc80 100644 --- a/tests/unit/cunumeric/test_settings.py +++ b/tests/unit/cunumeric/test_settings.py @@ -92,6 +92,7 @@ def test_report_dump_callstack(self) -> None: def test_report_dump_csv(self) -> None: assert m.settings.report_dump_csv.default is None + @pytest.mark.skip(reason="Does not work in CI (path issue)") @pytest.mark.parametrize("name", _settings_with_test_defaults) def test_default(self, name: str) -> None: setting = getattr(m.settings, name) @@ -99,6 +100,7 @@ def test_default(self, name: str) -> None: expected = setting._convert(read_c_define(ENV_HEADER, define)) assert setting.default == expected + @pytest.mark.skip(reason="Does not work in CI (path issue)") @pytest.mark.parametrize("name", _settings_with_test_defaults) def test_test_default(self, name: str) -> None: setting = getattr(m.settings, name) From 5c8813adf249a93ad97b29b2b3a95e82dd320a77 Mon Sep 17 00:00:00 2001 From: Robin Wang <104830875+robinwnv@users.noreply.github.com> Date: Fri, 28 Apr 2023 09:05:15 +0800 Subject: [PATCH 043/106] Fix array API. (#910) --- cunumeric/module.py | 2 +- tests/integration/test_array.py | 5 ----- 2 files changed, 1 insertion(+), 6 deletions(-) diff --git a/cunumeric/module.py b/cunumeric/module.py index a5955dfbd..b49a7a88e 100644 --- a/cunumeric/module.py +++ b/cunumeric/module.py @@ -525,7 +525,7 @@ def array( elif copy and obj is result: result = result.copy() if result.ndim < ndmin: - shape = (np.newaxis,) * (ndmin - result.ndim) + result.shape + shape = (1,) * (ndmin - result.ndim) + result.shape result = result.reshape(shape) return result diff --git a/tests/integration/test_array.py b/tests/integration/test_array.py index 195d86283..47e3ae4e0 100755 --- a/tests/integration/test_array.py +++ b/tests/integration/test_array.py @@ -78,7 +78,6 @@ def test_array_dtype(object, dtype): assert strict_type_equal(res_np, res_num) -@pytest.mark.xfail @pytest.mark.parametrize( "ndmin", range(-1, LEGATE_MAX_DIM + 1), @@ -90,10 +89,6 @@ def test_array_dtype(object, dtype): ids=lambda object: f"(object={object})", ) def test_array_ndmin(object, ndmin): - # if dim of object is smaller than ndmin, - # In Numpy, it pass - # In cuNumeric, it raises TypeError: - # 'NoneType' object cannot be interpreted as an integer res_np = np.array(object, ndmin=ndmin) res_num = num.array(object, ndmin=ndmin) assert strict_type_equal(res_np, res_num) From dd27be60ee8ebcad7591e0a1436b2bf8ac39dadf Mon Sep 17 00:00:00 2001 From: Mark Vaz Date: Fri, 28 Apr 2023 13:30:41 -0700 Subject: [PATCH 044/106] Add ops-bot config file (#909) Enables PR copying --- .github/ops-bot.yaml | 4 ++++ 1 file changed, 4 insertions(+) create mode 100644 .github/ops-bot.yaml diff --git a/.github/ops-bot.yaml b/.github/ops-bot.yaml new file mode 100644 index 000000000..84bbe71f4 --- /dev/null +++ b/.github/ops-bot.yaml @@ -0,0 +1,4 @@ +# This file controls which features from the `ops-bot` repository below are enabled. +# - https://github.com/rapidsai/ops-bot + +copy_prs: true From e9e7fe0804aa364a9c056964a10b8cbacee83298 Mon Sep 17 00:00:00 2001 From: yimoj <130720840+yimoj@users.noreply.github.com> Date: Tue, 2 May 2023 09:14:11 +0800 Subject: [PATCH 045/106] Improve matmul/_contract coverage (#908) --- tests/integration/test_matmul.py | 34 +++++++++++++++++++------------- 1 file changed, 20 insertions(+), 14 deletions(-) diff --git a/tests/integration/test_matmul.py b/tests/integration/test_matmul.py index bee51fcd7..bedc62bcc 100644 --- a/tests/integration/test_matmul.py +++ b/tests/integration/test_matmul.py @@ -147,19 +147,23 @@ def test_out_invalid_shape_DIVERGENCE(self): out = num.zeros(shape) num.matmul(A, B, out=out) - def test_out_invalid_dtype(self): + @pytest.mark.parametrize( + ("dtype", "out_dtype", "casting"), + ((None, np.int64, "same_kind"), (float, str, "safe")), + ids=("direct", "intermediate"), + ) + def test_out_invalid_dtype(self, dtype, out_dtype, casting): expected_exc = TypeError - A_np = num.ones((3, 2, 4)) - B_np = num.ones((3, 4, 3)) + A_np = np.ones((3, 2, 4)) + B_np = np.ones((3, 4, 3)) A_num = num.ones((3, 2, 4)) B_num = num.ones((3, 4, 3)) - dtype = np.int64 - out_np = np.zeros((3, 2, 3), dtype=dtype) - out_num = num.zeros((3, 2, 3), dtype=dtype) + out_np = np.zeros((3, 2, 3), dtype=out_dtype) + out_num = num.zeros((3, 2, 3), dtype=out_dtype) with pytest.raises(expected_exc): - np.matmul(A_np, B_np, out=out_np) + np.matmul(A_np, B_np, dtype=dtype, out=out_np, casting=casting) with pytest.raises(expected_exc): - num.matmul(A_num, B_num, out=out_num) + num.matmul(A_num, B_num, dtype=dtype, out=out_num, casting=casting) @pytest.mark.parametrize( "casting_dtype", @@ -183,18 +187,20 @@ def test_invalid_casting_dtype(self, casting_dtype): with pytest.raises(expected_exc): num.matmul(A_num, B_num, casting=casting, dtype=dtype) - @pytest.mark.xfail - def test_invalid_casting(self): - # In Numpy, raise ValueError - # In cuNumeric, pass + @pytest.mark.parametrize( + "dtype", (str, pytest.param(float, marks=pytest.mark.xfail)), ids=str + ) + def test_invalid_casting(self, dtype): expected_exc = ValueError casting = "unknown" A_np = np.ones((2, 4)) - B_np = np.ones((4, 3)) + B_np = np.ones((4, 3), dtype=dtype) A_num = num.ones((2, 4)) - B_num = num.ones((4, 3)) + B_num = num.ones((4, 3), dtype=dtype) + # In Numpy, raise ValueError with pytest.raises(expected_exc): np.matmul(A_np, B_np, casting=casting) + # cuNumeric does not check casting when A and B are of the same dtype with pytest.raises(expected_exc): num.matmul(A_num, B_num, casting=casting) From a1f9fd6dfdb440281d005f7f383abec50bf7cc41 Mon Sep 17 00:00:00 2001 From: Manolis Papadakis Date: Tue, 2 May 2023 16:31:43 -0700 Subject: [PATCH 046/106] Revert "WAR for TBLIS compiler detection while upstream PR is pending (#890)" (#915) This reverts commit 0e0ea0fd1fc5cd850fb8e2a1969a7217d8f7b76c. --- cmake/thirdparty/get_tblis.cmake | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cmake/thirdparty/get_tblis.cmake b/cmake/thirdparty/get_tblis.cmake index 4a684fd08..b02afbd7d 100644 --- a/cmake/thirdparty/get_tblis.cmake +++ b/cmake/thirdparty/get_tblis.cmake @@ -171,11 +171,11 @@ function(find_or_configure_tblis) endfunction() if(NOT DEFINED cunumeric_TBLIS_BRANCH) - set(cunumeric_TBLIS_BRANCH cunumeric) + set(cunumeric_TBLIS_BRANCH master) endif() if(NOT DEFINED cunumeric_TBLIS_REPOSITORY) - set(cunumeric_TBLIS_REPOSITORY https://github.com/manopapad/tblis.git) + set(cunumeric_TBLIS_REPOSITORY https://github.com/devinamatthews/tblis.git) endif() find_or_configure_tblis(VERSION 1.2.0 From 9b9d5e0db0c7be6bfdb9b8822c937b23c608f511 Mon Sep 17 00:00:00 2001 From: Wonchan Lee Date: Wed, 3 May 2023 17:20:32 -0700 Subject: [PATCH 047/106] Using the new core type system (#903) * Catch up the type code refactoring * Massive refactoring again for new type codes * Use core types for points and argvals * Use an aligned struct for argmin/argmax * Use Legate field in the data interface implementation * Use a temporary commit hash for testing * Catch up the Legate core changes * Address comments from @ jjwilke * Update the legate core commit hash --- cmake/versions.json | 2 +- cunumeric/array.py | 48 ++---- cunumeric/config.py | 54 +------ cunumeric/deferred.py | 130 +++++++--------- cunumeric/linalg/cholesky.py | 8 +- cunumeric/linalg/solve.py | 2 +- cunumeric/runtime.py | 127 +++++++-------- cunumeric/sort.py | 14 +- cunumeric/utils.py | 55 +++---- cunumeric_cpp.cmake | 4 +- src/cunumeric/arg.h | 7 +- src/cunumeric/arg.inl | 30 ---- .../{arg.cc => arg_redop_register.cc} | 40 +++-- src/cunumeric/arg_redop_register.cu | 26 ++++ src/cunumeric/arg_redop_register.h | 56 +++++++ src/cunumeric/binary/binary_op.cc | 2 +- src/cunumeric/binary/binary_op.cu | 2 +- src/cunumeric/binary/binary_op_omp.cc | 2 +- src/cunumeric/binary/binary_op_template.inl | 10 +- src/cunumeric/binary/binary_op_util.h | 137 ++++++++--------- src/cunumeric/binary/binary_red.cc | 2 +- src/cunumeric/binary/binary_red.cu | 2 +- src/cunumeric/binary/binary_red_omp.cc | 2 +- src/cunumeric/binary/binary_red_template.inl | 10 +- src/cunumeric/bits/packbits.cc | 2 +- src/cunumeric/bits/packbits.cu | 2 +- src/cunumeric/bits/packbits_omp.cc | 2 +- src/cunumeric/bits/packbits_template.inl | 10 +- src/cunumeric/bits/unpackbits_template.inl | 6 +- src/cunumeric/convolution/convolve.cc | 4 +- src/cunumeric/convolution/convolve.cu | 2 +- src/cunumeric/convolution/convolve_omp.cc | 2 +- .../convolution/convolve_template.inl | 6 +- src/cunumeric/cuda_help.h | 1 - src/cunumeric/cunumeric.cc | 5 - src/cunumeric/cunumeric.cu | 45 ------ src/cunumeric/cunumeric_c.h | 14 +- src/cunumeric/fft/fft.cu | 2 +- src/cunumeric/fft/fft_template.inl | 8 +- src/cunumeric/fft/fft_util.h | 38 ++--- src/cunumeric/index/advanced_indexing.cc | 2 +- src/cunumeric/index/advanced_indexing.cu | 2 +- src/cunumeric/index/advanced_indexing_omp.cc | 2 +- .../index/advanced_indexing_template.inl | 4 +- src/cunumeric/index/choose.cc | 2 +- src/cunumeric/index/choose.cu | 2 +- src/cunumeric/index/choose_omp.cc | 2 +- src/cunumeric/index/choose_template.inl | 4 +- src/cunumeric/index/putmask_template.inl | 4 +- src/cunumeric/index/repeat.cc | 2 +- src/cunumeric/index/repeat.cu | 2 +- src/cunumeric/index/repeat_omp.cc | 2 +- src/cunumeric/index/repeat_template.inl | 4 +- src/cunumeric/item/read_template.inl | 2 +- src/cunumeric/item/write_template.inl | 2 +- src/cunumeric/matrix/contract.cc | 42 ++--- src/cunumeric/matrix/contract.cu | 10 +- src/cunumeric/matrix/contract_omp.cc | 42 ++--- src/cunumeric/matrix/contract_template.inl | 22 ++- src/cunumeric/matrix/diag.cc | 4 +- src/cunumeric/matrix/diag.cu | 4 +- src/cunumeric/matrix/diag_omp.cc | 4 +- src/cunumeric/matrix/diag_template.inl | 4 +- src/cunumeric/matrix/dot.cc | 2 +- src/cunumeric/matrix/dot.cu | 2 +- src/cunumeric/matrix/dot_omp.cc | 2 +- src/cunumeric/matrix/dot_template.inl | 4 +- src/cunumeric/matrix/gemm.cc | 8 +- src/cunumeric/matrix/gemm.cu | 8 +- src/cunumeric/matrix/gemm_omp.cc | 8 +- src/cunumeric/matrix/gemm_template.inl | 16 +- src/cunumeric/matrix/matmul.cu | 10 +- src/cunumeric/matrix/matmul_cpu.inl | 10 +- src/cunumeric/matrix/matmul_template.inl | 18 +-- src/cunumeric/matrix/matvecmul.cu | 10 +- src/cunumeric/matrix/matvecmul_cpu.inl | 12 +- src/cunumeric/matrix/matvecmul_template.inl | 18 +-- src/cunumeric/matrix/potrf.cc | 8 +- src/cunumeric/matrix/potrf.cu | 8 +- src/cunumeric/matrix/potrf_omp.cc | 8 +- src/cunumeric/matrix/potrf_template.inl | 16 +- src/cunumeric/matrix/solve.cu | 8 +- src/cunumeric/matrix/solve_cpu.inl | 8 +- src/cunumeric/matrix/solve_template.inl | 16 +- src/cunumeric/matrix/syrk.cc | 8 +- src/cunumeric/matrix/syrk.cu | 8 +- src/cunumeric/matrix/syrk_omp.cc | 8 +- src/cunumeric/matrix/syrk_template.inl | 16 +- src/cunumeric/matrix/tile_template.inl | 2 +- src/cunumeric/matrix/transpose.cc | 2 +- src/cunumeric/matrix/transpose.cu | 2 +- src/cunumeric/matrix/transpose_omp.cc | 2 +- src/cunumeric/matrix/transpose_template.inl | 4 +- src/cunumeric/matrix/trilu.cc | 2 +- src/cunumeric/matrix/trilu.cu | 2 +- src/cunumeric/matrix/trilu_omp.cc | 2 +- src/cunumeric/matrix/trilu_template.inl | 6 +- src/cunumeric/matrix/trsm.cc | 8 +- src/cunumeric/matrix/trsm.cu | 8 +- src/cunumeric/matrix/trsm_omp.cc | 8 +- src/cunumeric/matrix/trsm_template.inl | 16 +- src/cunumeric/nullary/arange_template.inl | 2 +- src/cunumeric/nullary/eye_template.inl | 2 +- src/cunumeric/nullary/fill_template.inl | 12 +- src/cunumeric/random/rand_template.inl | 4 +- src/cunumeric/random/rand_util.h | 12 +- src/cunumeric/scan/scan_global.cc | 2 +- src/cunumeric/scan/scan_global.cu | 2 +- src/cunumeric/scan/scan_global_omp.cc | 2 +- src/cunumeric/scan/scan_global_template.inl | 4 +- src/cunumeric/scan/scan_global_util.h | 6 +- src/cunumeric/scan/scan_local.cc | 4 +- src/cunumeric/scan/scan_local.cu | 4 +- src/cunumeric/scan/scan_local_omp.cc | 4 +- src/cunumeric/scan/scan_local_template.inl | 8 +- src/cunumeric/scan/scan_local_util.h | 6 +- src/cunumeric/search/argwhere.cc | 2 +- src/cunumeric/search/argwhere.cu | 2 +- src/cunumeric/search/argwhere_omp.cc | 2 +- src/cunumeric/search/argwhere_template.inl | 4 +- src/cunumeric/search/nonzero.cc | 2 +- src/cunumeric/search/nonzero.cu | 2 +- src/cunumeric/search/nonzero_omp.cc | 2 +- src/cunumeric/search/nonzero_template.inl | 4 +- src/cunumeric/set/unique.cc | 2 +- src/cunumeric/set/unique.cu | 2 +- src/cunumeric/set/unique_omp.cc | 2 +- src/cunumeric/set/unique_reduce.cc | 2 +- src/cunumeric/set/unique_reduce_template.inl | 4 +- src/cunumeric/set/unique_template.inl | 4 +- src/cunumeric/sort/searchsorted.cc | 2 +- src/cunumeric/sort/searchsorted.cu | 2 +- src/cunumeric/sort/searchsorted_omp.cc | 2 +- src/cunumeric/sort/searchsorted_template.inl | 4 +- src/cunumeric/sort/sort.cc | 2 +- src/cunumeric/sort/sort.cu | 16 +- src/cunumeric/sort/sort_cpu.inl | 6 +- src/cunumeric/sort/sort_omp.cc | 2 +- src/cunumeric/sort/sort_template.inl | 4 +- src/cunumeric/stat/bincount.cc | 2 +- src/cunumeric/stat/bincount.cu | 2 +- src/cunumeric/stat/bincount_omp.cc | 2 +- src/cunumeric/stat/bincount_template.inl | 6 +- src/cunumeric/ternary/where.cc | 2 +- src/cunumeric/ternary/where.cu | 2 +- src/cunumeric/ternary/where_omp.cc | 2 +- src/cunumeric/ternary/where_template.inl | 4 +- src/cunumeric/transform/flip.cc | 2 +- src/cunumeric/transform/flip.cu | 2 +- src/cunumeric/transform/flip_omp.cc | 2 +- src/cunumeric/transform/flip_template.inl | 4 +- src/cunumeric/unary/convert.cc | 2 +- src/cunumeric/unary/convert.cu | 2 +- src/cunumeric/unary/convert_omp.cc | 2 +- src/cunumeric/unary/convert_template.inl | 16 +- src/cunumeric/unary/convert_util.h | 34 ++--- .../unary/scalar_unary_red_template.inl | 4 +- src/cunumeric/unary/unary_op.cc | 4 +- src/cunumeric/unary/unary_op.cu | 4 +- src/cunumeric/unary/unary_op.h | 139 ++++------------- src/cunumeric/unary/unary_op_omp.cc | 4 +- src/cunumeric/unary/unary_op_template.inl | 35 ++--- src/cunumeric/unary/unary_op_util.h | 144 +++++++++--------- src/cunumeric/unary/unary_red.cc | 2 +- src/cunumeric/unary/unary_red.cu | 2 +- src/cunumeric/unary/unary_red_omp.cc | 2 +- src/cunumeric/unary/unary_red_template.inl | 6 +- src/cunumeric/unary/unary_red_util.h | 30 ++-- tests/integration/test_ingest.py | 4 +- 169 files changed, 901 insertions(+), 1112 deletions(-) rename src/cunumeric/{arg.cc => arg_redop_register.cc} (60%) create mode 100644 src/cunumeric/arg_redop_register.cu create mode 100644 src/cunumeric/arg_redop_register.h delete mode 100644 src/cunumeric/cunumeric.cu diff --git a/cmake/versions.json b/cmake/versions.json index 49abf9ed9..08dd5b6a5 100644 --- a/cmake/versions.json +++ b/cmake/versions.json @@ -5,7 +5,7 @@ "git_url" : "https://github.com/nv-legate/legate.core.git", "git_shallow": false, "always_download": false, - "git_tag" : "b3e280d6212aa2ec0af619b6fee3ac24d069897e" + "git_tag" : "149fa50bce56350e84f3fad4d453b5f5b77b935d" } } } diff --git a/cunumeric/array.py b/cunumeric/array.py index c7822b6b4..53629b4b7 100644 --- a/cunumeric/array.py +++ b/cunumeric/array.py @@ -31,10 +31,8 @@ cast, ) -import legate.core.types as ty import numpy as np -import pyarrow # type: ignore [import] -from legate.core import Array +from legate.core import Array, Field from numpy.core.multiarray import ( # type: ignore [attr-defined] normalize_axis_index, ) @@ -56,7 +54,7 @@ from .coverage import FALLBACK_WARNING, clone_class from .runtime import runtime from .types import NdShape -from .utils import deep_apply, dot_modes +from .utils import deep_apply, dot_modes, to_core_dtype if TYPE_CHECKING: from pathlib import Path @@ -174,21 +172,6 @@ def maybe_convert_to_np_ndarray(obj: Any) -> Any: return obj -# FIXME: we can't give an accurate return type as mypy thinks -# the pyarrow import can be ignored, and can't override the check -# either, because no-any-unimported needs Python >= 3.10. We can -# fix it once we bump up the Python version -def convert_numpy_dtype_to_pyarrow(dtype: np.dtype[Any]) -> Any: - if dtype.kind != "c": - return pyarrow.from_numpy_dtype(dtype) - elif dtype == np.complex64: - return ty.complex64 - elif dtype == np.complex128: - return ty.complex128 - else: - raise ValueError(f"Unsupported NumPy dtype: {dtype}") - - NDARRAY_INTERNAL = { "__array_finalize__", "__array_function__", @@ -242,9 +225,15 @@ def __init__( for inp in inputs if isinstance(inp, ndarray) ] - self._thunk = runtime.create_empty_thunk( - sanitized_shape, dtype, inputs - ) + core_dtype = to_core_dtype(dtype) + if core_dtype is not None: + self._thunk = runtime.create_empty_thunk( + sanitized_shape, core_dtype, inputs + ) + else: + self._thunk = runtime.create_eager_thunk( + sanitized_shape, dtype + ) else: self._thunk = thunk self._legate_data: Union[dict[str, Any], None] = None @@ -280,24 +269,17 @@ def _sanitize_shape( @property def __legate_data_interface__(self) -> dict[str, Any]: if self._legate_data is None: - # All of our thunks implement the Legate Store interface - # so we just need to convert our type and stick it in - # a Legate Array - arrow_type = convert_numpy_dtype_to_pyarrow(self.dtype) # If the thunk is an eager array, we need to convert it to a # deferred array so we can extract a legate store deferred_thunk = runtime.to_deferred_array(self._thunk) # We don't have nullable data for the moment # until we support masked arrays - array = Array(arrow_type, [None, deferred_thunk.base]) + dtype = deferred_thunk.base.type + array = Array(dtype, [None, deferred_thunk.base]) self._legate_data = dict() self._legate_data["version"] = 1 - data = dict() - field = pyarrow.field( - "cuNumeric Array", arrow_type, nullable=False - ) - data[field] = array - self._legate_data["data"] = data + field = Field("cuNumeric Array", dtype) + self._legate_data["data"] = {field: array} return self._legate_data # Properties for ndarray diff --git a/cunumeric/config.py b/cunumeric/config.py index 98361b0bc..21a7a68e5 100644 --- a/cunumeric/config.py +++ b/cunumeric/config.py @@ -17,7 +17,7 @@ import os from abc import abstractmethod from enum import IntEnum, unique -from typing import TYPE_CHECKING, Any, List, Union, cast +from typing import TYPE_CHECKING, Union, cast import numpy as np from legate.core import Library, get_legate_runtime @@ -197,15 +197,6 @@ class _CunumericSharedLib: CUNUMERIC_TUNABLE_MAX_EAGER_VOLUME: int CUNUMERIC_TUNABLE_NUM_GPUS: int CUNUMERIC_TUNABLE_NUM_PROCS: int - CUNUMERIC_TYPE_POINT1: int - CUNUMERIC_TYPE_POINT2: int - CUNUMERIC_TYPE_POINT3: int - CUNUMERIC_TYPE_POINT4: int - CUNUMERIC_TYPE_POINT5: int - CUNUMERIC_TYPE_POINT6: int - CUNUMERIC_TYPE_POINT7: int - CUNUMERIC_TYPE_POINT8: int - CUNUMERIC_TYPE_POINT9: int CUNUMERIC_UNARY_OP: int CUNUMERIC_UNARY_RED: int CUNUMERIC_UNIQUE: int @@ -274,6 +265,12 @@ class _CunumericSharedLib: def cunumeric_has_curand(self) -> int: ... + @abstractmethod + def cunumeric_register_reduction_op( + self, type_uid: int, elem_type_code: int + ) -> None: + ... + # Load the cuNumeric library first so we have a shard object that # we can use to initialize all these configuration enumerations @@ -500,13 +497,6 @@ class RandGenCode(IntEnum): INTEGER = 3 -# Match these to CuNumericRedopID in cunumeric_c.h -@unique -class CuNumericRedopCode(IntEnum): - ARGMAX = 1 - ARGMIN = 2 - - # Match these to CuNumericTunable in cunumeric_c.h @unique class CuNumericTunable(IntEnum): @@ -774,33 +764,3 @@ def reverse(in_string: Union[str, None]) -> str: return "forward" else: return in_string - - -# Match these to CuNumericTypeCodes in cunumeric_c.h -# we start from POINT2 type since POINT1 is int8 type -_CUNUMERIC_DTYPES: List[tuple[np.dtype[Any], int, int]] = [ - (np.dtype("i8, i8"), 16, _cunumeric.CUNUMERIC_TYPE_POINT2), - (np.dtype("i8, i8, i8"), 24, _cunumeric.CUNUMERIC_TYPE_POINT3), - (np.dtype("i8, i8, i8, i8"), 32, _cunumeric.CUNUMERIC_TYPE_POINT4), - (np.dtype("i8, i8, i8, i8, i8"), 40, _cunumeric.CUNUMERIC_TYPE_POINT5), - ( - np.dtype("i8, i8, i8, i8, i8, i8"), - 48, - _cunumeric.CUNUMERIC_TYPE_POINT6, - ), - ( - np.dtype("i8, i8, i8, i8, i8, i8, i8"), - 56, - _cunumeric.CUNUMERIC_TYPE_POINT7, - ), - ( - np.dtype("i8, i8, i8, i8, i8, i8, i8, i8"), - 64, - _cunumeric.CUNUMERIC_TYPE_POINT8, - ), - ( - np.dtype("i8, i8, i8, i8, i8, i8, i8, i8, i8"), - 72, - _cunumeric.CUNUMERIC_TYPE_POINT9, - ), -] diff --git a/cunumeric/deferred.py b/cunumeric/deferred.py index ecac9332a..c8527d4d3 100644 --- a/cunumeric/deferred.py +++ b/cunumeric/deferred.py @@ -48,7 +48,6 @@ Bitorder, ConvertCode, CuNumericOpCode, - CuNumericRedopCode, RandGenCode, UnaryOpCode, UnaryRedCode, @@ -77,15 +76,10 @@ ) -def _complex_field_dtype(dtype: np.dtype[Any]) -> np.dtype[Any]: - if dtype == np.complex64: - return np.dtype(np.float32) - elif dtype == np.complex128: - return np.dtype(np.float64) - elif dtype == np.complex256: - return np.dtype(np.float128) - else: - assert False +_COMPLEX_FIELD_DTYPES = { + ty.complex64: ty.float32, + ty.complex128: ty.float64, +} def _prod(tpl: Sequence[int]) -> int: @@ -167,8 +161,8 @@ def __init__( UnaryRedCode.PROD: ReductionOp.MUL, UnaryRedCode.MAX: ReductionOp.MAX, UnaryRedCode.MIN: ReductionOp.MIN, - UnaryRedCode.ARGMAX: CuNumericRedopCode.ARGMAX, - UnaryRedCode.ARGMIN: CuNumericRedopCode.ARGMIN, + UnaryRedCode.ARGMAX: ReductionOp.MAX, + UnaryRedCode.ARGMIN: ReductionOp.MIN, UnaryRedCode.CONTAINS: ReductionOp.ADD, UnaryRedCode.COUNT_NONZERO: ReductionOp.ADD, UnaryRedCode.ALL: ReductionOp.MUL, @@ -238,11 +232,10 @@ class DeferredArray(NumPyThunk): def __init__( self, runtime: Runtime, - base: Any, - dtype: np.dtype[Any], + base: Store, numpy_array: Optional[npt.NDArray[Any]] = None, ) -> None: - super().__init__(runtime, dtype) + super().__init__(runtime, base.type.to_numpy_dtype()) assert base is not None assert isinstance(base, Store) self.base: Any = base # a Legate Store @@ -275,7 +268,8 @@ def _copy_if_overlapping(self, other: DeferredArray) -> DeferredArray: copy = cast( DeferredArray, self.runtime.create_empty_thunk( - self.shape, self.dtype, inputs=[self] + self.shape, + self.base.type, ), ) copy.copy(self, deep=True) @@ -320,7 +314,7 @@ def construct_ndarray( def imag(self) -> NumPyThunk: result = self.runtime.create_empty_thunk( self.shape, - dtype=_complex_field_dtype(self.dtype), + dtype=_COMPLEX_FIELD_DTYPES[self.base.type], inputs=[self], ) @@ -337,7 +331,7 @@ def imag(self) -> NumPyThunk: def real(self) -> NumPyThunk: result = self.runtime.create_empty_thunk( self.shape, - dtype=_complex_field_dtype(self.dtype), + dtype=_COMPLEX_FIELD_DTYPES[self.base.type], inputs=[self], ) @@ -353,7 +347,7 @@ def real(self) -> NumPyThunk: def conj(self) -> NumPyThunk: result = self.runtime.create_empty_thunk( self.shape, - dtype=self.dtype, + dtype=self.base.type, inputs=[self], ) @@ -491,11 +485,10 @@ def _copy_store(self, store: Any) -> DeferredArray: store_to_copy = DeferredArray( self.runtime, base=store, - dtype=self.dtype, ) store_copy = self.runtime.create_empty_thunk( store_to_copy.shape, - self.dtype, + self.base.type, inputs=[store_to_copy], ) store_copy.copy(store_to_copy, deep=True) @@ -606,7 +599,7 @@ def _has_single_boolean_array( "Unsupported entry type passed to advanced ", "indexing operation", ) - lhs = DeferredArray(self.runtime, store, self.dtype) + lhs = DeferredArray(self.runtime, store) return True, lhs, key[transpose_index] @@ -653,7 +646,7 @@ def _advanced_indexing_with_boolean_array( DeferredArray, self.runtime.create_empty_thunk( out_shape, - rhs.dtype, + rhs.base.type, inputs=[rhs], ), ) @@ -674,12 +667,11 @@ def _advanced_indexing_with_boolean_array( mask = DeferredArray( self.runtime, base=key_store, - dtype=self.dtype, ) rhs.putmask(mask, set_value) return False, rhs, rhs, self else: - out_dtype = rhs.dtype + out_dtype = rhs.base.type # in the case this operation is called for the set_item, we # return Point type field that is later used for # indirect copy operation @@ -699,7 +691,7 @@ def _advanced_indexing_with_boolean_array( task.add_output(out.base) task.add_input(rhs.base) task.add_input(key_store) - task.add_scalar_arg(is_set, bool) + task.add_scalar_arg(is_set, ty.bool_) task.add_scalar_arg(key_dims, ty.int64) task.add_alignment(rhs.base, key_store) task.add_broadcast( @@ -726,7 +718,7 @@ def _advanced_indexing_with_boolean_array( ), ) if not is_set: - out.fill(np.array(0, dtype=out_dtype)) + out.fill(np.array(0, dtype=out_dtype.to_numpy_dtype())) else: for dim in range(rhs.ndim - out_dim): out_tmp = out_tmp.project(rhs.ndim - dim - 1, 0) @@ -841,7 +833,7 @@ def _create_indexing_array( # to apply all the transformations done to `store` to `self` # as well before creating a copy if is_set: - self = DeferredArray(self.runtime, store, self.dtype) + self = DeferredArray(self.runtime, store) # after store is transformed we need to to return a copy of # the store since Copy operation can't be done on # the store with transformation @@ -893,7 +885,6 @@ def _get_view(self, key: Any) -> DeferredArray: return DeferredArray( self.runtime, base=store, - dtype=self.dtype, ) def _broadcast(self, shape: NdShape) -> Any: @@ -921,14 +912,13 @@ def _convert_future_to_regionfield( else: shape = self.shape store = self.context.create_store( - self.dtype, + self.base.type, shape=shape, optimize_scalar=False, ) thunk_copy = DeferredArray( self.runtime, base=store, - dtype=self.dtype, ) thunk_copy.copy(self, deep=True) return thunk_copy @@ -951,20 +941,19 @@ def get_item(self, key: Any) -> NumPyThunk: if index_array.base.kind == Future: index_array = index_array._convert_future_to_regionfield() result_store = self.context.create_store( - self.dtype, + self.base.type, shape=index_array.shape, optimize_scalar=False, ) result = DeferredArray( self.runtime, base=result_store, - dtype=self.dtype, ) else: result = self.runtime.create_empty_thunk( index_array.base.shape, - self.dtype, + self.base.type, inputs=[self], ) @@ -984,7 +973,7 @@ def get_item(self, key: Any) -> NumPyThunk: if result.shape == (): input = result result = self.runtime.create_empty_thunk( - (), self.dtype, inputs=[self] + (), self.base.type, inputs=[self] ) task = self.context.create_auto_task(CuNumericOpCode.READ) @@ -1027,7 +1016,6 @@ def set_item(self, key: Any, rhs: Any) -> None: rhs_tmp = DeferredArray( self.runtime, base=rhs_store, - dtype=rhs.dtype, ) rhs_tmp2 = rhs_tmp._convert_future_to_regionfield() rhs_store = rhs_tmp2.base @@ -1085,7 +1073,7 @@ def set_item(self, key: Any, rhs: Any) -> None: if view.base.overlaps(rhs.base): rhs_copy = self.runtime.create_empty_thunk( rhs.shape, - rhs.dtype, + rhs.base.type, inputs=[rhs], ) rhs_copy.copy(rhs, deep=False) @@ -1207,7 +1195,7 @@ def reshape(self, newshape: NdShape, order: OrderType) -> NumPyThunk: tmp_shape += tgt_g result = self.runtime.create_empty_thunk( - tmp_shape, dtype=self.dtype, inputs=[self] + tmp_shape, dtype=self.base.type, inputs=[self] ) src = self.base @@ -1237,8 +1225,8 @@ def reshape(self, newshape: NdShape, order: OrderType) -> NumPyThunk: assert src.shape == tgt.shape - src_array = DeferredArray(self.runtime, src, self.dtype) - tgt_array = DeferredArray(self.runtime, tgt, self.dtype) + src_array = DeferredArray(self.runtime, src) + tgt_array = DeferredArray(self.runtime, tgt) tgt_array.copy(src_array, deep=True) if needs_delinearization and needs_linearization: @@ -1250,9 +1238,9 @@ def reshape(self, newshape: NdShape, order: OrderType) -> NumPyThunk: src_dim += len(tgt_g) assert src.shape == newshape - src_array = DeferredArray(self.runtime, src, self.dtype) + src_array = DeferredArray(self.runtime, src) result = self.runtime.create_empty_thunk( - newshape, dtype=self.dtype, inputs=[self] + newshape, dtype=self.base.type, inputs=[self] ) result.copy(src_array, deep=True) @@ -1276,7 +1264,7 @@ def reshape(self, newshape: NdShape, order: OrderType) -> NumPyThunk: src_dim += diff - result = DeferredArray(self.runtime, src, self.dtype) + result = DeferredArray(self.runtime, src) return result @@ -1303,7 +1291,7 @@ def squeeze( ) if result is self.base: return self - return DeferredArray(self.runtime, result, self.dtype) + return DeferredArray(self.runtime, result) def swapaxes(self, axis1: int, axis2: int) -> DeferredArray: if self.size == 1 or axis1 == axis2: @@ -1315,7 +1303,7 @@ def swapaxes(self, axis1: int, axis2: int) -> DeferredArray: dims[axis1], dims[axis2] = dims[axis2], dims[axis1] result = self.base.transpose(dims) - result = DeferredArray(self.runtime, result, self.dtype) + result = DeferredArray(self.runtime, result) return result @@ -1422,7 +1410,7 @@ def fft( len(set(axes)) != len(axes) or len(axes) != input.ndim or tuple(axes) != tuple(sorted(axes)), - bool, + ty.bool_, ) for ax in axes: task.add_scalar_arg(ax, ty.int64) @@ -1452,7 +1440,7 @@ def _fill(self, value: Any) -> None: task = self.context.create_auto_task(CuNumericOpCode.FILL) task.add_output(self.base) task.add_input(value) - task.add_scalar_arg(argval, bool) + task.add_scalar_arg(argval, ty.bool_) task.execute() def fill(self, numpy_array: Any) -> None: @@ -1463,9 +1451,9 @@ def fill(self, numpy_array: Any) -> None: # Have to copy the numpy array because this launch is asynchronous # and we need to make sure the application doesn't mutate the value # so make a future result, this is immediate so no dependence - value = self.runtime.create_scalar(numpy_array.data, self.dtype) + value = self.runtime.create_scalar(numpy_array.data) store = self.context.create_store( - self.dtype, shape=(1,), storage=value, optimize_scalar=True + self.base.type, shape=(1,), storage=value, optimize_scalar=True ) self._fill(store) @@ -1558,7 +1546,7 @@ def contract( # below the tasks do this adjustment internally. if blas_op is not None and lhs_thunk.dtype == np.float16: lhs_thunk = self.runtime.create_empty_thunk( - lhs_thunk.shape, np.dtype(np.float32), inputs=[lhs_thunk] + lhs_thunk.shape, ty.float32, inputs=[lhs_thunk] ) # Clear output array @@ -1706,9 +1694,9 @@ def add_mode( task.add_reduction(lhs, ReductionOp.ADD) task.add_input(rhs1) task.add_input(rhs2) - task.add_scalar_arg(tuple(lhs_dim_mask), (bool,)) - task.add_scalar_arg(tuple(rhs1_dim_mask), (bool,)) - task.add_scalar_arg(tuple(rhs2_dim_mask), (bool,)) + task.add_scalar_arg(tuple(lhs_dim_mask), (ty.bool_,)) + task.add_scalar_arg(tuple(rhs1_dim_mask), (ty.bool_,)) + task.add_scalar_arg(tuple(rhs2_dim_mask), (ty.bool_,)) task.add_alignment(lhs, rhs1) task.add_alignment(lhs, rhs2) task.execute() @@ -1801,7 +1789,7 @@ def _diag_helper( task.add_alignment(diag, matrix) task.add_scalar_arg(naxes, ty.int32) - task.add_scalar_arg(extract, bool) + task.add_scalar_arg(extract, ty.bool_) task.execute() @@ -1840,8 +1828,8 @@ def put(self, indices: Any, values: Any, check_bounds: bool) -> None: task = self.context.create_auto_task(CuNumericOpCode.WRAP) task.add_output(indirect.base) task.add_scalar_arg(shape, (ty.int64,)) - task.add_scalar_arg(True, bool) # has_input - task.add_scalar_arg(check_bounds, bool) + task.add_scalar_arg(True, ty.bool_) # has_input + task.add_scalar_arg(check_bounds, ty.bool_) task.add_input(indices.base) task.add_alignment(indices.base, indirect.base) task.throws_exception(IndexError) @@ -1903,7 +1891,7 @@ def arange(self, start: float, stop: float, step: float) -> None: # Handle the special case of a single value here assert self.shape[0] == 1 array = np.array(start, dtype=self.dtype) - future = self.runtime.create_scalar(array.data, array.dtype) + future = self.runtime.create_scalar(array.data) self.base.set_storage(future) return @@ -1948,7 +1936,7 @@ def transpose( self, axes: Union[None, tuple[int, ...], list[int]] ) -> DeferredArray: result = self.base.transpose(axes) - result = DeferredArray(self.runtime, result, self.dtype) + result = DeferredArray(self.runtime, result) return result @auto_convert("rhs") @@ -1960,7 +1948,7 @@ def trilu(self, rhs: Any, k: int, lower: bool) -> None: task.add_output(lhs) task.add_input(rhs) - task.add_scalar_arg(lower, bool) + task.add_scalar_arg(lower, ty.bool_) task.add_scalar_arg(k, ty.int32) task.add_alignment(lhs, rhs) @@ -1971,13 +1959,13 @@ def trilu(self, rhs: Any, k: int, lower: bool) -> None: def repeat( self, repeats: Any, axis: int, scalar_repeats: bool ) -> DeferredArray: - out = self.runtime.create_unbound_thunk(self.dtype, ndim=self.ndim) + out = self.runtime.create_unbound_thunk(self.base.type, ndim=self.ndim) task = self.context.create_auto_task(CuNumericOpCode.REPEAT) task.add_input(self.base) task.add_output(out.base) # We pass axis now but don't use for 1D case (will use for ND case task.add_scalar_arg(axis, ty.int32) - task.add_scalar_arg(scalar_repeats, bool) + task.add_scalar_arg(scalar_repeats, ty.bool_) if scalar_repeats: task.add_scalar_arg(repeats, ty.int64) else: @@ -2046,7 +2034,7 @@ def bincount(self, rhs: Any, weights: Optional[NumPyThunk] = None) -> None: def nonzero(self) -> tuple[NumPyThunk, ...]: results = tuple( - self.runtime.create_unbound_thunk(np.dtype(np.int64)) + self.runtime.create_unbound_thunk(ty.int64) for _ in range(self.ndim) ) @@ -3133,7 +3121,7 @@ def unary_reduction( argred = op in (UnaryRedCode.ARGMAX, UnaryRedCode.ARGMIN) if argred: - argred_dtype = self.runtime.get_arg_dtype(rhs_array.dtype) + argred_dtype = self.runtime.get_argred_type(rhs_array.base.type) lhs_array = self.runtime.create_empty_thunk( lhs_array.shape, dtype=argred_dtype, @@ -3320,7 +3308,7 @@ def where(self, src1: Any, src2: Any, src3: Any) -> None: task.execute() def argwhere(self) -> NumPyThunk: - result = self.runtime.create_unbound_thunk(np.dtype(np.int64), ndim=2) + result = self.runtime.create_unbound_thunk(ty.int64, ndim=2) task = self.context.create_auto_task(CuNumericOpCode.ARGWHERE) @@ -3378,7 +3366,7 @@ def scan( # local sum # storage for local sums accessible temp = self.runtime.create_unbound_thunk( - dtype=self.dtype, ndim=self.ndim + dtype=self.base.type, ndim=self.ndim ) if axis == rhs.ndim - 1: @@ -3388,7 +3376,7 @@ def scan( # swap axes, always performing scan along last axis swapped = rhs.swapaxes(axis, rhs.ndim - 1) input = self.runtime.create_empty_thunk( - swapped.shape, dtype=rhs.dtype, inputs=(rhs, swapped) + swapped.shape, dtype=rhs.base.type, inputs=(rhs, swapped) ) input.copy(swapped, deep=True) output = input @@ -3398,7 +3386,7 @@ def scan( task.add_input(input.base) task.add_output(temp.base) task.add_scalar_arg(op, ty.int32) - task.add_scalar_arg(nan_to_identity, bool) + task.add_scalar_arg(nan_to_identity, ty.bool_) task.add_alignment(input.base, output.base) @@ -3424,7 +3412,7 @@ def scan( self.copy(swapped, deep=True) def unique(self) -> NumPyThunk: - result = self.runtime.create_unbound_thunk(self.dtype) + result = self.runtime.create_unbound_thunk(self.base.type) task = self.context.create_auto_task(CuNumericOpCode.UNIQUE) @@ -3464,7 +3452,7 @@ def searchsorted(self, rhs: Any, v: Any, side: SortSide = "left") -> None: task.add_broadcast(self.base) task.add_alignment(self.base, v.base) - task.add_scalar_arg(is_left, bool) + task.add_scalar_arg(is_left, ty.bool_) task.add_scalar_arg(rhs.size, ty.int64) task.execute() @@ -3577,8 +3565,8 @@ def _wrap(self, src: Any, new_len: int) -> None: task = self.context.create_auto_task(CuNumericOpCode.WRAP) task.add_output(indirect.base) task.add_scalar_arg(src.shape, (ty.int64,)) - task.add_scalar_arg(False, bool) # has_input - task.add_scalar_arg(False, bool) # check bounds + task.add_scalar_arg(False, ty.bool_) # has_input + task.add_scalar_arg(False, ty.bool_) # check bounds task.execute() copy = self.context.create_copy() diff --git a/cunumeric/linalg/cholesky.py b/cunumeric/linalg/cholesky.py index 7e023d2cd..db5a275a4 100644 --- a/cunumeric/linalg/cholesky.py +++ b/cunumeric/linalg/cholesky.py @@ -178,10 +178,10 @@ def tril_single(context: Context, output: Store) -> None: task = context.create_auto_task(CuNumericOpCode.TRILU) task.add_output(output) task.add_input(output) - task.add_scalar_arg(True, bool) + task.add_scalar_arg(True, ty.bool_) task.add_scalar_arg(0, ty.int32) # Add a fake task argument to indicate that this is for Cholesky - task.add_scalar_arg(True, bool) + task.add_scalar_arg(True, ty.bool_) task.execute() @@ -194,10 +194,10 @@ def tril(context: Context, p_output: StorePartition, n: int) -> None: task.add_output(p_output) task.add_input(p_output) - task.add_scalar_arg(True, bool) + task.add_scalar_arg(True, ty.bool_) task.add_scalar_arg(0, ty.int32) # Add a fake task argument to indicate that this is for Cholesky - task.add_scalar_arg(True, bool) + task.add_scalar_arg(True, ty.bool_) task.execute() diff --git a/cunumeric/linalg/solve.py b/cunumeric/linalg/solve.py index 8eca91bc8..cec277c4e 100644 --- a/cunumeric/linalg/solve.py +++ b/cunumeric/linalg/solve.py @@ -50,7 +50,7 @@ def solve(output: DeferredArray, a: DeferredArray, b: DeferredArray) -> None: a_copy = cast( DeferredArray, - runtime.create_empty_thunk(a.shape, dtype=a.dtype, inputs=(a,)), + runtime.create_empty_thunk(a.shape, dtype=a.base.type, inputs=(a,)), ) transpose_copy_single(context, a.base, a_copy.base) diff --git a/cunumeric/runtime.py b/cunumeric/runtime.py index f0ad0398b..3fe1ff242 100644 --- a/cunumeric/runtime.py +++ b/cunumeric/runtime.py @@ -21,15 +21,13 @@ import legate.core.types as ty import numpy as np -from legate.core import LEGATE_MAX_DIM, Rect, get_legate_runtime, legion +from legate.core import LEGATE_MAX_DIM, Rect, get_legate_runtime from legate.core.context import Context as LegateContext from typing_extensions import TypeGuard from .config import ( - _CUNUMERIC_DTYPES, BitGeneratorOperation, CuNumericOpCode, - CuNumericRedopCode, CuNumericTunable, cunumeric_context, cunumeric_lib, @@ -39,12 +37,7 @@ from .settings import settings from .thunk import NumPyThunk from .types import NdShape -from .utils import ( - SUPPORTED_DTYPES, - calculate_volume, - find_last_user_stacklevel, - get_arg_dtype, -) +from .utils import calculate_volume, find_last_user_stacklevel, to_core_dtype if TYPE_CHECKING: import numpy.typing as npt @@ -54,6 +47,9 @@ from .array import ndarray +DIMENSION = int + + class Runtime(object): def __init__(self, legate_context: LegateContext) -> None: self.legate_context = legate_context @@ -87,28 +83,26 @@ def __init__(self, legate_context: LegateContext) -> None: # destroy us cunumeric_lib.set_runtime(self) assert cunumeric_lib.shared_object is not None + self.cunumeric_lib = cunumeric_lib.shared_object self.has_curand = cunumeric_lib.shared_object.cunumeric_has_curand() - self._register_dtypes() settings.warn = settings.warn() or settings.test() if self.num_gpus > 0 and settings.preload_cudalibs(): self._load_cudalibs() - def _register_dtypes(self) -> None: - type_system = self.legate_context.type_system - for numpy_type, core_type in SUPPORTED_DTYPES.items(): - type_system.make_alias(np.dtype(numpy_type), core_type) - - for dtype in _CUNUMERIC_DTYPES: - type_system.add_type(dtype[0], dtype[1], dtype[2]) + # Maps dimensions to point types + self._cached_point_types: dict[DIMENSION, ty.Dtype] = dict() + # Maps value types to struct types used in argmin/argmax + self._cached_argred_types: dict[ty.Dtype, ty.Dtype] = dict() - def get_point_type(self, n: int) -> np.dtype[Any]: - type_system = self.legate_context.type_system - point_type = np.dtype(",".join(("i8",) * n)) - if point_type not in type_system: - raise ValueError(f"there is no point type registered for {n}") - return point_type + def get_point_type(self, dim: DIMENSION) -> ty.Dtype: + cached = self._cached_point_types.get(dim) + if cached is not None: + return cached + point_dtype = ty.array_type(ty.int64, dim) if dim > 1 else ty.int64 + self._cached_point_types[dim] = point_dtype + return point_dtype def record_api_call( self, name: str, location: str, implemented: bool @@ -131,20 +125,16 @@ def _unload_cudalibs(self) -> None: ) task.execute() - def get_arg_dtype(self, value_dtype: np.dtype[Any]) -> np.dtype[Any]: - arg_dtype = get_arg_dtype(value_dtype) - type_system = self.legate_context.type_system - if arg_dtype not in type_system: - # We assign T's type code to Argval - code = type_system[value_dtype].code - dtype = type_system.add_type(arg_dtype, arg_dtype.itemsize, code) - - for redop in CuNumericRedopCode: - redop_id = self.legate_context.get_reduction_op_id( - redop.value * legion.MAX_TYPE_NUMBER + code - ) - dtype.register_reduction_op(redop, redop_id) - return arg_dtype + def get_argred_type(self, value_dtype: ty.Dtype) -> ty.Dtype: + cached = self._cached_argred_types.get(value_dtype) + if cached is not None: + return cached + argred_dtype = ty.struct_type([ty.int64, value_dtype], True) + self._cached_argred_types[value_dtype] = argred_dtype + self.cunumeric_lib.cunumeric_register_reduction_op( + argred_dtype.uid, value_dtype.code + ) + return argred_dtype def _report_coverage(self) -> None: total = len(self.api_calls) @@ -175,7 +165,6 @@ def destroy(self) -> None: def create_scalar( self, array: Union[memoryview, npt.NDArray[Any]], - dtype: np.dtype[Any], shape: Optional[NdShape] = None, ) -> Future: data = array.tobytes() @@ -188,15 +177,17 @@ def create_wrapped_scalar( dtype: np.dtype[Any], shape: NdShape, ) -> DeferredArray: - future = self.create_scalar(array, dtype, shape) + future = self.create_scalar(array, shape) assert all(extent == 1 for extent in shape) + core_dtype = to_core_dtype(dtype) + assert core_dtype is not None store = self.legate_context.create_store( - dtype, + core_dtype, shape=shape, storage=future, optimize_scalar=True, ) - return DeferredArray(self, store, dtype=dtype) + return DeferredArray(self, store) def bitgenerator_populate_task( self, @@ -272,21 +263,8 @@ def get_next_random_epoch(self) -> int: self.current_random_epoch += 1 return result - def is_point_type(self, dtype: Union[str, np.dtype[Any]]) -> bool: - if ( - isinstance(dtype, str) - and len(dtype) == 6 - and dtype[0:5] == "Point" - ): - return True - else: - return False - def is_supported_type(self, dtype: Union[str, np.dtype[Any]]) -> bool: - if self.is_point_type(dtype): - return dtype in self.legate_context.type_system - else: - return np.dtype(dtype) in self.legate_context.type_system + return to_core_dtype(dtype) is not None def get_numpy_thunk( self, @@ -312,9 +290,7 @@ def get_numpy_thunk( if stores[0] is not None: raise NotImplementedError("Need support for masked arrays") store = stores[1] - if dtype is None: - dtype = np.dtype(array.type.to_pandas_dtype()) - return DeferredArray(self, store, dtype=dtype) + return DeferredArray(self, store) # See if this is a normal numpy array # Make sure to convert numpy matrices to numpy arrays here # as the former doesn't behave quite like the latter @@ -442,7 +418,8 @@ def find_or_create_array_thunk( # Once it's a normal numpy array we can make it into one of our arrays # Check to see if it is a type that we support for doing deferred # execution and big enough to be worth off-loading onto Legion - if self.is_supported_type(array.dtype) and ( + dtype = to_core_dtype(array.dtype) + if dtype is not None and ( defer or not self.is_eager_shape(array.shape) or self.has_external_attachment(array) @@ -458,7 +435,7 @@ def find_or_create_array_thunk( # This is not a scalar so make a field store = self.legate_context.create_store( - array.dtype, + dtype, shape=array.shape, optimize_scalar=False, ) @@ -470,7 +447,6 @@ def find_or_create_array_thunk( return DeferredArray( self, store, - dtype=array.dtype, numpy_array=array if share else None, ) @@ -481,24 +457,29 @@ def find_or_create_array_thunk( def create_empty_thunk( self, shape: NdShape, - dtype: np.dtype[Any], + dtype: ty.Dtype, inputs: Optional[Sequence[NumPyThunk]] = None, ) -> NumPyThunk: - if self.is_supported_type(dtype) and not ( - self.is_eager_shape(shape) and self.are_all_eager_inputs(inputs) - ): - store = self.legate_context.create_store( - dtype, shape=shape, optimize_scalar=True - ) - return DeferredArray(self, store, dtype=dtype) - else: - return EagerArray(self, np.empty(shape, dtype=dtype)) + if self.is_eager_shape(shape) and self.are_all_eager_inputs(inputs): + return self.create_eager_thunk(shape, dtype.to_numpy_dtype()) + + store = self.legate_context.create_store( + dtype, shape=shape, optimize_scalar=True + ) + return DeferredArray(self, store) + + def create_eager_thunk( + self, + shape: NdShape, + dtype: np.dtype[Any], + ) -> NumPyThunk: + return EagerArray(self, np.empty(shape, dtype=dtype)) def create_unbound_thunk( - self, dtype: np.dtype[Any], ndim: int = 1 + self, dtype: ty.Dtype, ndim: int = 1 ) -> DeferredArray: store = self.legate_context.create_store(dtype, ndim=ndim) - return DeferredArray(self, store, dtype=dtype) + return DeferredArray(self, store) def is_eager_shape(self, shape: NdShape) -> bool: volume = calculate_volume(shape) diff --git a/cunumeric/sort.py b/cunumeric/sort.py index 93fa63abb..a0503bf92 100644 --- a/cunumeric/sort.py +++ b/cunumeric/sort.py @@ -36,7 +36,7 @@ def sort_flattened( sort_result = cast( "DeferredArray", output.runtime.create_empty_thunk( - flattened.shape, dtype=output.dtype, inputs=(flattened,) + flattened.shape, dtype=output.base.type, inputs=(flattened,) ), ) sort(sort_result, flattened, argsort, stable=stable) @@ -59,7 +59,7 @@ def sort_swapped( swapped_copy = cast( "DeferredArray", output.runtime.create_empty_thunk( - swapped.shape, dtype=input.dtype, inputs=(input, swapped) + swapped.shape, dtype=input.base.type, inputs=(input, swapped) ), ) swapped_copy.copy(swapped, deep=True) @@ -69,7 +69,9 @@ def sort_swapped( sort_result = cast( "DeferredArray", output.runtime.create_empty_thunk( - swapped_copy.shape, dtype=output.dtype, inputs=(swapped_copy,) + swapped_copy.shape, + dtype=output.base.type, + inputs=(swapped_copy,), ), ) sort(sort_result, swapped_copy, argsort, stable=stable) @@ -91,7 +93,7 @@ def sort_task( task.add_input(input.base) if uses_unbound_output: unbound = output.runtime.create_unbound_thunk( - dtype=output.dtype, ndim=1 + dtype=output.base.type, ndim=1 ) task.add_output(unbound.base) else: @@ -103,9 +105,9 @@ def sort_task( elif output.runtime.num_gpus == 0 and output.runtime.num_procs > 1: task.add_cpu_communicator() - task.add_scalar_arg(argsort, bool) # return indices flag + task.add_scalar_arg(argsort, ty.bool_) # return indices flag task.add_scalar_arg(input.base.shape, (ty.int64,)) - task.add_scalar_arg(stable, bool) + task.add_scalar_arg(stable, ty.bool_) task.execute() if uses_unbound_output: diff --git a/cunumeric/utils.py b/cunumeric/utils.py index 64f39a87e..dc40ea190 100644 --- a/cunumeric/utils.py +++ b/cunumeric/utils.py @@ -18,7 +18,7 @@ from functools import reduce from string import ascii_lowercase, ascii_uppercase from types import FrameType -from typing import Any, Callable, List, Sequence, Tuple, Union, cast +from typing import Any, Callable, List, Optional, Sequence, Tuple, Union import legate.core.types as ty import numpy as np @@ -26,26 +26,27 @@ from .types import NdShape SUPPORTED_DTYPES = { - bool: ty.bool_, - np.bool_: ty.bool_, - np.int8: ty.int8, - np.int16: ty.int16, - np.int32: ty.int32, - int: ty.int64, # np.int is int - np.int64: ty.int64, - np.uint8: ty.uint8, - np.uint16: ty.uint16, - np.uint32: ty.uint32, - np.uint64: ty.uint64, # np.uint is np.uint64 - np.float16: ty.float16, - np.float32: ty.float32, - float: ty.float64, - np.float64: ty.float64, - np.complex64: ty.complex64, - np.complex128: ty.complex128, + np.dtype(np.bool_): ty.bool_, + np.dtype(np.int8): ty.int8, + np.dtype(np.int16): ty.int16, + np.dtype(np.int32): ty.int32, + np.dtype(np.int64): ty.int64, + np.dtype(np.uint8): ty.uint8, + np.dtype(np.uint16): ty.uint16, + np.dtype(np.uint32): ty.uint32, + np.dtype(np.uint64): ty.uint64, + np.dtype(np.float16): ty.float16, + np.dtype(np.float32): ty.float32, + np.dtype(np.float64): ty.float64, + np.dtype(np.complex64): ty.complex64, + np.dtype(np.complex128): ty.complex128, } +def to_core_dtype(dtype: Union[str, np.dtype[Any]]) -> Optional[ty.Dtype]: + return SUPPORTED_DTYPES.get(np.dtype(dtype), None) + + def is_advanced_indexing(key: Any) -> bool: if key is Ellipsis or key is None: # np.newdim case return False @@ -93,30 +94,12 @@ def find_last_user_frames(top_only: bool = True) -> str: return "|".join(get_line_number_from_frame(f) for f in frames) -def is_supported_dtype(dtype: Any) -> bool: - if not isinstance(dtype, np.dtype): - raise TypeError("expected a NumPy dtype") - return dtype.type in SUPPORTED_DTYPES - - def calculate_volume(shape: NdShape) -> int: if len(shape) == 0: return 0 return reduce(lambda x, y: x * y, shape) -def get_arg_dtype(dtype: np.dtype[Any]) -> np.dtype[Any]: - return np.dtype( - [("arg", np.int64), ("arg_value", dtype)], - align=True, - ) - - -def get_arg_value_dtype(dtype: np.dtype[Any]) -> np.dtype[Any]: - dt = dtype.fields["arg_value"][0].type # type: ignore [index] - return cast(np.dtype[Any], dt) - - Modes = Tuple[List[str], List[str], List[str]] diff --git a/cunumeric_cpp.cmake b/cunumeric_cpp.cmake index 4592559d3..dd8a60f7e 100644 --- a/cunumeric_cpp.cmake +++ b/cunumeric_cpp.cmake @@ -156,7 +156,7 @@ list(APPEND cunumeric_SOURCES src/cunumeric/stat/bincount.cc src/cunumeric/convolution/convolve.cc src/cunumeric/transform/flip.cc - src/cunumeric/arg.cc + src/cunumeric/arg_redop_register.cc src/cunumeric/mapper.cc src/cunumeric/cephes/chbevl.cc src/cunumeric/cephes/i0.cc @@ -254,8 +254,8 @@ if(Legion_USE_CUDA) src/cunumeric/convolution/convolve.cu src/cunumeric/fft/fft.cu src/cunumeric/transform/flip.cu + src/cunumeric/arg_redop_register.cu src/cunumeric/cudalibs.cu - src/cunumeric/cunumeric.cu ) endif() diff --git a/src/cunumeric/arg.h b/src/cunumeric/arg.h index edfa1d1c3..6f1f55258 100644 --- a/src/cunumeric/arg.h +++ b/src/cunumeric/arg.h @@ -17,7 +17,6 @@ #pragma once #include "legate.h" -#include "cunumeric/cunumeric_c.h" namespace cunumeric { @@ -63,8 +62,6 @@ class ArgmaxReduction { using RHS = Argval; static const Argval identity; - static const int32_t REDOP_ID = - CUNUMERIC_ARGMAX_REDOP * MAX_TYPE_NUMBER + legate::legate_type_code_of; template __CUDA_HD__ inline static void apply(LHS& lhs, RHS rhs) @@ -85,8 +82,6 @@ class ArgminReduction { using RHS = Argval; static const Argval identity; - static const int32_t REDOP_ID = - CUNUMERIC_ARGMIN_REDOP * MAX_TYPE_NUMBER + legate::legate_type_code_of; template __CUDA_HD__ inline static void apply(LHS& lhs, RHS rhs) @@ -101,3 +96,5 @@ class ArgminReduction { }; } // namespace cunumeric + +#include "cunumeric/arg.inl" diff --git a/src/cunumeric/arg.inl b/src/cunumeric/arg.inl index b98314d7b..8839ec456 100644 --- a/src/cunumeric/arg.inl +++ b/src/cunumeric/arg.inl @@ -112,34 +112,4 @@ __CUDA_HD__ inline void Argval::apply(const Argval& rhs) } } -#define DECLARE_ARGMAX_IDENTITY(TYPE) \ - template <> \ - const Argval ArgmaxReduction::identity; - -#define DECLARE_ARGMIN_IDENTITY(TYPE) \ - template <> \ - const Argval ArgminReduction::identity; - -#define DECLARE_IDENTITIES(TYPE) \ - DECLARE_ARGMAX_IDENTITY(TYPE) \ - DECLARE_ARGMIN_IDENTITY(TYPE) - -DECLARE_IDENTITIES(__half) -DECLARE_IDENTITIES(float) -DECLARE_IDENTITIES(double) -DECLARE_IDENTITIES(bool) -DECLARE_IDENTITIES(int8_t) -DECLARE_IDENTITIES(int16_t) -DECLARE_IDENTITIES(int32_t) -DECLARE_IDENTITIES(int64_t) -DECLARE_IDENTITIES(uint8_t) -DECLARE_IDENTITIES(uint16_t) -DECLARE_IDENTITIES(uint32_t) -DECLARE_IDENTITIES(uint64_t) -DECLARE_IDENTITIES(complex) - -#undef DECLARE_IDENTITIES -#undef DECLARE_ARGMIN_IDENTITY -#undef DECLARE_ARGMAX_IDENTITY - } // namespace cunumeric diff --git a/src/cunumeric/arg.cc b/src/cunumeric/arg_redop_register.cc similarity index 60% rename from src/cunumeric/arg.cc rename to src/cunumeric/arg_redop_register.cc index 5c400a0d8..5068abaee 100644 --- a/src/cunumeric/arg.cc +++ b/src/cunumeric/arg_redop_register.cc @@ -14,8 +14,7 @@ * */ -#include "cunumeric/arg.h" -#include "cunumeric/arg.inl" +#include "cunumeric/arg_redop_register.h" namespace cunumeric { @@ -46,30 +45,27 @@ DEFINE_IDENTITIES(uint16_t) DEFINE_IDENTITIES(uint32_t) DEFINE_IDENTITIES(uint64_t) +#undef DEFINE_ARGMAX_IDENTITY +#undef DEFINE_ARGMIN_IDENTITY +#undef DEFINE_IDENTITIES + +/*static*/ int32_t register_reduction_op_fn::register_reduction_op_fn::next_reduction_operator_id() +{ + static int32_t next_redop_id = 0; + return next_redop_id++; +} + +} // namespace cunumeric + #ifndef LEGATE_USE_CUDA -#define REGISTER_REDOPS(OP) \ - { \ - context->register_reduction_operator>(); \ - context->register_reduction_operator>(); \ - context->register_reduction_operator>(); \ - context->register_reduction_operator>(); \ - context->register_reduction_operator>(); \ - context->register_reduction_operator>(); \ - context->register_reduction_operator>(); \ - context->register_reduction_operator>(); \ - context->register_reduction_operator>(); \ - context->register_reduction_operator>(); \ - context->register_reduction_operator>(); \ - context->register_reduction_operator>(); \ - } +extern "C" { -void register_reduction_operators(legate::LibraryContext* context) +void cunumeric_register_reduction_op(int32_t type_uid, int32_t _elem_type_code) { - REGISTER_REDOPS(ArgmaxReduction); - REGISTER_REDOPS(ArgminReduction); + auto elem_type_code = static_cast(_elem_type_code); + legate::type_dispatch(elem_type_code, cunumeric::register_reduction_op_fn{}, type_uid); +} } #endif - -} // namespace cunumeric diff --git a/src/cunumeric/arg_redop_register.cu b/src/cunumeric/arg_redop_register.cu new file mode 100644 index 000000000..5a14b0b71 --- /dev/null +++ b/src/cunumeric/arg_redop_register.cu @@ -0,0 +1,26 @@ +/* Copyright 2021-2022 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +#include "cunumeric/arg_redop_register.h" + +extern "C" { + +void cunumeric_register_reduction_op(int32_t type_uid, int32_t _elem_type_code) +{ + auto elem_type_code = static_cast(_elem_type_code); + legate::type_dispatch(elem_type_code, cunumeric::register_reduction_op_fn{}, type_uid); +} +} diff --git a/src/cunumeric/arg_redop_register.h b/src/cunumeric/arg_redop_register.h new file mode 100644 index 000000000..02433da62 --- /dev/null +++ b/src/cunumeric/arg_redop_register.h @@ -0,0 +1,56 @@ +/* Copyright 2023 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +#pragma once + +#include "legate.h" +#include "cunumeric/cunumeric_c.h" +#include "cunumeric/arg.h" + +namespace cunumeric { + +struct register_reduction_op_fn { + template ::value>* = nullptr> + void operator()(int32_t type_uid) + { + using VAL = legate::legate_type_of; + + auto runtime = legate::Runtime::get_runtime(); + auto context = runtime->find_library("cunumeric"); + { + auto redop_id = + context->register_reduction_operator>(next_reduction_operator_id()); + auto op_kind = static_cast(legate::ReductionOpKind::MAX); + runtime->record_reduction_operator(type_uid, op_kind, redop_id); + } + { + auto redop_id = + context->register_reduction_operator>(next_reduction_operator_id()); + auto op_kind = static_cast(legate::ReductionOpKind::MIN); + runtime->record_reduction_operator(type_uid, op_kind, redop_id); + } + } + + template ::value>* = nullptr> + void operator()(int32_t type_uid) + { + LEGATE_ABORT; + } + + static int32_t next_reduction_operator_id(); +}; + +} // namespace cunumeric diff --git a/src/cunumeric/binary/binary_op.cc b/src/cunumeric/binary/binary_op.cc index c17e3ac49..a718443fa 100644 --- a/src/cunumeric/binary/binary_op.cc +++ b/src/cunumeric/binary/binary_op.cc @@ -21,7 +21,7 @@ namespace cunumeric { using namespace legate; -template +template struct BinaryOpImplBody { using OP = BinaryOp; using RHS1 = legate_type_of; diff --git a/src/cunumeric/binary/binary_op.cu b/src/cunumeric/binary/binary_op.cu index b1d7ce4df..76177b154 100644 --- a/src/cunumeric/binary/binary_op.cu +++ b/src/cunumeric/binary/binary_op.cu @@ -51,7 +51,7 @@ static __global__ void __launch_bounds__(THREADS_PER_BLOCK, MIN_CTAS_PER_SM) out[point] = func(in1[point], in2[point]); } -template +template struct BinaryOpImplBody { using OP = BinaryOp; using RHS1 = legate_type_of; diff --git a/src/cunumeric/binary/binary_op_omp.cc b/src/cunumeric/binary/binary_op_omp.cc index 46452e9f1..53ec582a7 100644 --- a/src/cunumeric/binary/binary_op_omp.cc +++ b/src/cunumeric/binary/binary_op_omp.cc @@ -21,7 +21,7 @@ namespace cunumeric { using namespace legate; -template +template struct BinaryOpImplBody { using OP = BinaryOp; using RHS1 = legate_type_of; diff --git a/src/cunumeric/binary/binary_op_template.inl b/src/cunumeric/binary/binary_op_template.inl index a51276d2a..d32484835 100644 --- a/src/cunumeric/binary/binary_op_template.inl +++ b/src/cunumeric/binary/binary_op_template.inl @@ -25,14 +25,12 @@ namespace cunumeric { using namespace legate; -template +template struct BinaryOpImplBody; template struct BinaryOpImpl { - template ::valid>* = nullptr> + template ::valid>* = nullptr> void operator()(BinaryOpArgs& args) const { using OP = BinaryOp; @@ -64,9 +62,7 @@ struct BinaryOpImpl { BinaryOpImplBody()(func, out, in1, in2, pitches, rect, dense); } - template ::valid>* = nullptr> + template ::valid>* = nullptr> void operator()(BinaryOpArgs& args) const { assert(false); diff --git a/src/cunumeric/binary/binary_op_util.h b/src/cunumeric/binary/binary_op_util.h index 1cb54464b..a0c9540dc 100644 --- a/src/cunumeric/binary/binary_op_util.h +++ b/src/cunumeric/binary/binary_op_util.h @@ -160,18 +160,18 @@ constexpr decltype(auto) reduce_op_dispatch(BinaryOpCode op_code, Functor f, Fna return f.template operator()(std::forward(args)...); } -template +template struct BinaryOp { static constexpr bool valid = false; }; -template +template struct BinaryOp : std::plus> { static constexpr bool valid = true; BinaryOp(const std::vector& args) {} }; -template +template struct BinaryOp { using T = legate::legate_type_of; static constexpr bool valid = legate::is_floating_point::value; @@ -187,17 +187,17 @@ struct BinaryOp { }; template <> -struct BinaryOp { +struct BinaryOp { static constexpr bool valid = true; BinaryOp(const std::vector& args) {} __CUDA_HD__ __half operator()(const __half& a, const __half& b) const { - return lift(a, b, BinaryOp{}); + return lift(a, b, BinaryOp{}); } }; -template +template struct BinaryOp { using T = legate::legate_type_of; static constexpr bool valid = std::is_integral::value; @@ -210,7 +210,7 @@ struct BinaryOp { } }; -template +template struct BinaryOp { using T = legate::legate_type_of; static constexpr bool valid = std::is_integral::value; @@ -223,7 +223,7 @@ struct BinaryOp { } }; -template +template struct BinaryOp { using T = legate::legate_type_of; static constexpr bool valid = std::is_integral::value; @@ -236,7 +236,7 @@ struct BinaryOp { } }; -template +template struct BinaryOp { using T = legate::legate_type_of; static constexpr bool valid = legate::is_floating_point::value; @@ -251,18 +251,18 @@ struct BinaryOp { }; template <> -struct BinaryOp { +struct BinaryOp { using T = __half; static constexpr bool valid = true; BinaryOp(const std::vector& args) {} __CUDA_HD__ __half operator()(const __half& a, const __half& b) const { - return lift(a, b, BinaryOp{}); + return lift(a, b, BinaryOp{}); } }; -template +template struct BinaryOp { using T = legate::legate_type_of; static constexpr bool valid = true; @@ -281,17 +281,17 @@ struct BinaryOp { } }; -template +template struct BinaryOp : std::equal_to> { static constexpr bool valid = true; BinaryOp(const std::vector& args) {} }; -template +template struct BinaryOp { using T = legate::legate_type_of; static constexpr bool valid = - CODE == legate::LegateTypeCode::DOUBLE_LT or CODE == legate::LegateTypeCode::COMPLEX128_LT; + CODE == legate::Type::Code::FLOAT64 or CODE == legate::Type::Code::COMPLEX128; BinaryOp(const std::vector& args) {} constexpr T operator()(const T& a, const T& b) const @@ -302,7 +302,7 @@ struct BinaryOp { }; template <> -struct BinaryOp { +struct BinaryOp { using T = complex; static constexpr bool valid = true; BinaryOp(const std::vector& args) {} @@ -314,11 +314,12 @@ struct BinaryOp } }; -template +template struct BinaryOp { using T = legate::legate_type_of; static constexpr bool valid = - not(CODE == legate::LegateTypeCode::BOOL_LT or legate::is_complex::value); + not(CODE == legate::Type::Code::BOOL or legate::is_complex::value); + __CUDA_HD__ BinaryOp() {} BinaryOp(const std::vector& args) {} @@ -337,12 +338,12 @@ struct BinaryOp { }; template <> -struct BinaryOp { +struct BinaryOp { static constexpr bool valid = true; BinaryOp(const std::vector& args) {} __CUDA_HD__ __half operator()(const __half& a, const __half& b) const { - return lift(a, b, BinaryOp{}); + return lift(a, b, BinaryOp{}); } }; @@ -370,7 +371,7 @@ static __CUDA_HD__ T _gcd(T a, T b) return a; } -template +template struct BinaryOp { using T = legate::legate_type_of; static constexpr bool valid = std::is_integral::value; @@ -388,7 +389,7 @@ static constexpr T floor_divide_signed(const T& a, const T& b) } using std::floor; -template +template struct BinaryOp { using T = legate::legate_type_of; static constexpr bool valid = true; @@ -416,31 +417,31 @@ struct BinaryOp { }; template <> -struct BinaryOp { +struct BinaryOp { static constexpr bool valid = false; BinaryOp(const std::vector& args) {} }; template <> -struct BinaryOp { +struct BinaryOp { static constexpr bool valid = false; BinaryOp(const std::vector& args) {} }; -template +template struct BinaryOp : std::greater> { static constexpr bool valid = true; BinaryOp(const std::vector& args) {} }; -template +template struct BinaryOp : std::greater_equal> { static constexpr bool valid = true; BinaryOp(const std::vector& args) {} }; -template +template struct BinaryOp { using T = legate::legate_type_of; static constexpr bool valid = legate::is_floating_point::value; @@ -456,18 +457,18 @@ struct BinaryOp { }; template <> -struct BinaryOp { +struct BinaryOp { static constexpr bool valid = true; BinaryOp(const std::vector& args) {} __CUDA_HD__ __half operator()(const __half& a, const __half& b) const { - return lift(a, b, BinaryOp{}); + return lift(a, b, BinaryOp{}); } }; -template +template struct BinaryOp { using VAL = legate::legate_type_of; static constexpr bool valid = true; @@ -499,7 +500,7 @@ struct BinaryOp { double atol_{0}; }; -template +template struct BinaryOp { using T = legate::legate_type_of; static constexpr bool valid = std::is_integral::value; @@ -524,7 +525,7 @@ struct BinaryOp { } }; -template +template struct BinaryOp { using T = legate::legate_type_of; static constexpr bool valid = legate::is_floating_point::value; @@ -538,7 +539,7 @@ struct BinaryOp { }; template <> -struct BinaryOp { +struct BinaryOp { using T = __half; static constexpr bool valid = true; BinaryOp(const std::vector& args) {} @@ -550,10 +551,10 @@ struct BinaryOp { } }; -template +template struct BinaryOp { using T = legate::legate_type_of; - static constexpr bool valid = CODE != BOOL_LT && std::is_integral::value; + static constexpr bool valid = CODE != legate::Type::Code::BOOL && std::is_integral::value; BinaryOp(const std::vector& args) {} @@ -567,19 +568,19 @@ struct BinaryOp { } }; -template +template struct BinaryOp : std::less> { static constexpr bool valid = true; BinaryOp(const std::vector& args) {} }; -template +template struct BinaryOp : std::less_equal> { static constexpr bool valid = true; BinaryOp(const std::vector& args) {} }; -template +template struct BinaryOp { using T = legate::legate_type_of; static constexpr bool valid = legate::is_floating_point::value; @@ -602,18 +603,18 @@ struct BinaryOp { }; template <> -struct BinaryOp { +struct BinaryOp { static constexpr bool valid = true; BinaryOp(const std::vector& args) {} __CUDA_HD__ __half operator()(const __half& a, const __half& b) const { - return lift(a, b, BinaryOp{}); + return lift(a, b, BinaryOp{}); } }; -template +template struct BinaryOp { using T = legate::legate_type_of; static constexpr bool valid = legate::is_floating_point::value; @@ -635,18 +636,18 @@ struct BinaryOp { }; template <> -struct BinaryOp { +struct BinaryOp { static constexpr bool valid = true; BinaryOp(const std::vector& args) {} __CUDA_HD__ __half operator()(const __half& a, const __half& b) const { - return lift(a, b, BinaryOp{}); + return lift(a, b, BinaryOp{}); } }; -template +template struct BinaryOp { using T = legate::legate_type_of; static constexpr bool valid = true; @@ -665,7 +666,7 @@ struct BinaryOp { } }; -template +template struct BinaryOp { using T = legate::legate_type_of; static constexpr bool valid = true; @@ -685,7 +686,7 @@ struct BinaryOp { } }; -template +template struct BinaryOp { using T = legate::legate_type_of; static constexpr bool valid = true; @@ -704,7 +705,7 @@ struct BinaryOp { } }; -template +template struct BinaryOp { using T = legate::legate_type_of; static constexpr bool valid = true; @@ -712,7 +713,7 @@ struct BinaryOp { constexpr T operator()(const T& a, const T& b) const { return std::max(a, b); } }; -template +template struct BinaryOp { using T = legate::legate_type_of; static constexpr bool valid = true; @@ -732,7 +733,7 @@ constexpr T real_mod(const T& a, const T& b) return res; } -template +template struct BinaryOp { using T = legate::legate_type_of; static constexpr bool valid = true; @@ -762,34 +763,34 @@ struct BinaryOp { }; template <> -struct BinaryOp { +struct BinaryOp { static constexpr bool valid = true; BinaryOp(const std::vector& args) {} __CUDA_HD__ __half operator()(const __half& a, const __half& b) const { - return lift(a, b, BinaryOp{}); + return lift(a, b, BinaryOp{}); } }; template <> -struct BinaryOp { +struct BinaryOp { static constexpr bool valid = false; BinaryOp(const std::vector& args) {} }; template <> -struct BinaryOp { +struct BinaryOp { static constexpr bool valid = false; BinaryOp(const std::vector& args) {} }; -template +template struct BinaryOp : std::multiplies> { static constexpr bool valid = true; BinaryOp(const std::vector& args) {} }; -template +template struct BinaryOp { using T = legate::legate_type_of; static constexpr bool valid = legate::is_floating_point::value; @@ -804,24 +805,24 @@ struct BinaryOp { }; template <> -struct BinaryOp { +struct BinaryOp { using T = __half; static constexpr bool valid = true; BinaryOp(const std::vector& args) {} __CUDA_HD__ __half operator()(const __half& a, const __half& b) const { - return lift(a, b, BinaryOp{}); + return lift(a, b, BinaryOp{}); } }; -template +template struct BinaryOp : std::not_equal_to> { static constexpr bool valid = true; BinaryOp(const std::vector& args) {} }; -template +template struct BinaryOp { using VAL = legate::legate_type_of; static constexpr bool valid = true; @@ -833,14 +834,14 @@ struct BinaryOp { }; template <> -struct BinaryOp { +struct BinaryOp { static constexpr bool valid = true; BinaryOp(const std::vector& args) {} __CUDA_HD__ __half operator()(const __half& a, const __half& b) const { return pow(a, b); } }; template <> -struct BinaryOp { +struct BinaryOp { static constexpr bool valid = true; BinaryOp(const std::vector& args) {} __CUDA_HD__ complex operator()(const complex& a, const complex& b) const @@ -850,7 +851,7 @@ struct BinaryOp { }; template <> -struct BinaryOp { +struct BinaryOp { static constexpr bool valid = true; BinaryOp(const std::vector& args) {} __CUDA_HD__ complex operator()(const complex& a, const complex& b) const @@ -859,33 +860,33 @@ struct BinaryOp { } }; -template +template struct BinaryOp { using T = legate::legate_type_of; - static constexpr bool valid = CODE != BOOL_LT && std::is_integral::value; + static constexpr bool valid = CODE != legate::Type::Code::BOOL && std::is_integral::value; BinaryOp(const std::vector& args) {} constexpr decltype(auto) operator()(const T& a, const T& b) const { return a >> b; } }; -template +template struct BinaryOp : std::minus> { static constexpr bool valid = true; BinaryOp(const std::vector& args) {} }; -template +template struct RHS2OfBinaryOp { using type = legate::legate_type_of; }; -template +template struct RHS2OfBinaryOp { using type = int32_t; }; -template +template using rhs2_of_binary_op = typename RHS2OfBinaryOp::type; } // namespace cunumeric diff --git a/src/cunumeric/binary/binary_red.cc b/src/cunumeric/binary/binary_red.cc index 73b72bc8a..5340e5334 100644 --- a/src/cunumeric/binary/binary_red.cc +++ b/src/cunumeric/binary/binary_red.cc @@ -21,7 +21,7 @@ namespace cunumeric { using namespace legate; -template +template struct BinaryRedImplBody { using OP = BinaryOp; using ARG = legate_type_of; diff --git a/src/cunumeric/binary/binary_red.cu b/src/cunumeric/binary/binary_red.cu index b9cc48d10..98435abd8 100644 --- a/src/cunumeric/binary/binary_red.cu +++ b/src/cunumeric/binary/binary_red.cu @@ -46,7 +46,7 @@ static __global__ void __launch_bounds__(1, 1) copy_kernel(Buffer result, RedAcc out.reduce(0, result.read()); } -template +template struct BinaryRedImplBody { using OP = BinaryOp; using ARG = legate_type_of; diff --git a/src/cunumeric/binary/binary_red_omp.cc b/src/cunumeric/binary/binary_red_omp.cc index cc2713645..891aa7abd 100644 --- a/src/cunumeric/binary/binary_red_omp.cc +++ b/src/cunumeric/binary/binary_red_omp.cc @@ -21,7 +21,7 @@ namespace cunumeric { using namespace legate; -template +template struct BinaryRedImplBody { using OP = BinaryOp; using ARG = legate_type_of; diff --git a/src/cunumeric/binary/binary_red_template.inl b/src/cunumeric/binary/binary_red_template.inl index d0f180091..4bff8e454 100644 --- a/src/cunumeric/binary/binary_red_template.inl +++ b/src/cunumeric/binary/binary_red_template.inl @@ -25,14 +25,12 @@ namespace cunumeric { using namespace legate; -template +template struct BinaryRedImplBody; template struct BinaryRedImpl { - template ::valid>* = nullptr> + template ::valid>* = nullptr> void operator()(BinaryRedArgs& args) const { using OP = BinaryOp; @@ -68,9 +66,7 @@ struct BinaryRedImpl { BinaryRedImplBody()(func, out, in1, in2, pitches, rect, dense); } - template ::valid>* = nullptr> + template ::valid>* = nullptr> void operator()(BinaryRedArgs& args) const { assert(false); diff --git a/src/cunumeric/bits/packbits.cc b/src/cunumeric/bits/packbits.cc index 027e090c7..99eac967c 100644 --- a/src/cunumeric/bits/packbits.cc +++ b/src/cunumeric/bits/packbits.cc @@ -21,7 +21,7 @@ namespace cunumeric { using namespace legate; -template +template struct PackbitsImplBody { using VAL = legate_type_of; diff --git a/src/cunumeric/bits/packbits.cu b/src/cunumeric/bits/packbits.cu index 03757144b..81edb82b4 100644 --- a/src/cunumeric/bits/packbits.cu +++ b/src/cunumeric/bits/packbits.cu @@ -39,7 +39,7 @@ static __global__ void __launch_bounds__(THREADS_PER_BLOCK, MIN_CTAS_PER_SM) out[out_p] = pack(in, out_p, in_hi_axis, axis); } -template +template struct PackbitsImplBody { using VAL = legate_type_of; diff --git a/src/cunumeric/bits/packbits_omp.cc b/src/cunumeric/bits/packbits_omp.cc index b28199ab3..7e8e05c55 100644 --- a/src/cunumeric/bits/packbits_omp.cc +++ b/src/cunumeric/bits/packbits_omp.cc @@ -21,7 +21,7 @@ namespace cunumeric { using namespace legate; -template +template struct PackbitsImplBody { using VAL = legate_type_of; diff --git a/src/cunumeric/bits/packbits_template.inl b/src/cunumeric/bits/packbits_template.inl index c1a820c73..7e1e68a89 100644 --- a/src/cunumeric/bits/packbits_template.inl +++ b/src/cunumeric/bits/packbits_template.inl @@ -24,12 +24,12 @@ namespace cunumeric { using namespace legate; -template +template struct PackbitsImplBody; template struct PackbitsImpl { - template ::value>* = nullptr> + template ::value>* = nullptr> void operator()(Array& output, Array& input, uint32_t axis) const { using VAL = legate_type_of; @@ -74,9 +74,7 @@ struct PackbitsImpl { axis); } - template ::value>* = nullptr> + template ::value>* = nullptr> void operator()(Array& output, Array& input, uint32_t axis) const { // Unreachable @@ -93,7 +91,7 @@ static void packbits_template(TaskContext& context) auto axis = scalars[0].value(); auto bitorder = scalars[1].value(); - auto code = input.code(); + auto code = input.code(); switch (bitorder) { case Bitorder::BIG: { double_dispatch(input.dim(), code, PackbitsImpl{}, output, input, axis); diff --git a/src/cunumeric/bits/unpackbits_template.inl b/src/cunumeric/bits/unpackbits_template.inl index f0316ac1e..2022a4135 100644 --- a/src/cunumeric/bits/unpackbits_template.inl +++ b/src/cunumeric/bits/unpackbits_template.inl @@ -47,9 +47,7 @@ struct UnpackbitsImpl { UnpackbitsImplBody{}(out, in, in_rect, in_pitches, in_volume, axis); } - template ::value>* = nullptr> + template ::value>* = nullptr> void operator()(Array& output, Array& input, uint32_t axis) const { // Unreachable @@ -66,7 +64,7 @@ static void unpackbits_template(TaskContext& context) auto axis = scalars[0].value(); auto bitorder = scalars[1].value(); - auto code = input.code(); + auto code = input.code(); switch (bitorder) { case Bitorder::BIG: { dim_dispatch(input.dim(), UnpackbitsImpl{}, output, input, axis); diff --git a/src/cunumeric/convolution/convolve.cc b/src/cunumeric/convolution/convolve.cc index b2a2d817f..3827be718 100644 --- a/src/cunumeric/convolution/convolve.cc +++ b/src/cunumeric/convolution/convolve.cc @@ -24,7 +24,7 @@ namespace cunumeric { // algorithm, but it is commented out in favor of the faster one // that is blocked for caches #if 0 -template +template struct ConvolveImplBody { using VAL = legate_type_of; @@ -73,7 +73,7 @@ struct ConvolveImplBody { }; #endif -template +template struct ConvolveImplBody { using VAL = legate_type_of; diff --git a/src/cunumeric/convolution/convolve.cu b/src/cunumeric/convolution/convolve.cu index 28688d6c4..66886a96b 100644 --- a/src/cunumeric/convolution/convolve.cu +++ b/src/cunumeric/convolution/convolve.cu @@ -1409,7 +1409,7 @@ struct UseCUFFT { static constexpr bool value = 1 <= DIM && DIM <= 3 && std::is_floating_point::value; }; -template +template struct ConvolveImplBody { using VAL = legate_type_of; diff --git a/src/cunumeric/convolution/convolve_omp.cc b/src/cunumeric/convolution/convolve_omp.cc index f016c4118..283f4b7b9 100644 --- a/src/cunumeric/convolution/convolve_omp.cc +++ b/src/cunumeric/convolution/convolve_omp.cc @@ -24,7 +24,7 @@ namespace cunumeric { using namespace legate; -template +template struct ConvolveImplBody { using VAL = legate_type_of; diff --git a/src/cunumeric/convolution/convolve_template.inl b/src/cunumeric/convolution/convolve_template.inl index 9d9077eca..f698ebfd4 100644 --- a/src/cunumeric/convolution/convolve_template.inl +++ b/src/cunumeric/convolution/convolve_template.inl @@ -26,12 +26,12 @@ namespace cunumeric { using namespace legate; -template +template struct ConvolveImplBody; template struct ConvolveImpl { - template * = nullptr> + template * = nullptr> void operator()(ConvolveArgs& args) const { using VAL = legate_type_of; @@ -55,7 +55,7 @@ struct ConvolveImpl { ConvolveImplBody()(out, filter, input, root_rect, subrect, filter_rect); } - template * = nullptr> + template * = nullptr> void operator()(ConvolveArgs& args) const { assert(false); diff --git a/src/cunumeric/cuda_help.h b/src/cunumeric/cuda_help.h index b7988f741..f0f0fee85 100644 --- a/src/cunumeric/cuda_help.h +++ b/src/cunumeric/cuda_help.h @@ -20,7 +20,6 @@ #include "core/cuda/cuda_help.h" #include "core/cuda/stream_pool.h" #include "cunumeric/arg.h" -#include "cunumeric/arg.inl" #include "cunumeric/device_scalar_reduction_buffer.h" #include #include diff --git a/src/cunumeric/cunumeric.cc b/src/cunumeric/cunumeric.cc index c631d5a8d..5377c5aeb 100644 --- a/src/cunumeric/cunumeric.cc +++ b/src/cunumeric/cunumeric.cc @@ -30,8 +30,6 @@ static const char* const cunumeric_library_name = "cunumeric"; return registrar; } -extern void register_reduction_operators(LibraryContext* context); - void registration_callback() { ResourceConfig config; @@ -42,9 +40,6 @@ void registration_callback() cunumeric_library_name, config, std::make_unique()); CuNumericRegistrar::get_registrar().register_all_tasks(context); - - // Register our special reduction functions - register_reduction_operators(context); } } // namespace cunumeric diff --git a/src/cunumeric/cunumeric.cu b/src/cunumeric/cunumeric.cu deleted file mode 100644 index 87cd3a85e..000000000 --- a/src/cunumeric/cunumeric.cu +++ /dev/null @@ -1,45 +0,0 @@ -/* Copyright 2021-2022 NVIDIA Corporation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * - */ - -#include "cunumeric.h" -#include "arg.h" -#include "arg.inl" - -namespace cunumeric { - -#define REGISTER_REDOPS(OP) \ - { \ - context->register_reduction_operator>(); \ - context->register_reduction_operator>(); \ - context->register_reduction_operator>(); \ - context->register_reduction_operator>(); \ - context->register_reduction_operator>(); \ - context->register_reduction_operator>(); \ - context->register_reduction_operator>(); \ - context->register_reduction_operator>(); \ - context->register_reduction_operator>(); \ - context->register_reduction_operator>(); \ - context->register_reduction_operator>(); \ - context->register_reduction_operator>(); \ - } - -void register_reduction_operators(legate::LibraryContext* context) -{ - REGISTER_REDOPS(ArgmaxReduction); - REGISTER_REDOPS(ArgminReduction); -} - -} // namespace cunumeric diff --git a/src/cunumeric/cunumeric_c.h b/src/cunumeric/cunumeric_c.h index 8cfcb0ac8..c3145939e 100644 --- a/src/cunumeric/cunumeric_c.h +++ b/src/cunumeric/cunumeric_c.h @@ -319,25 +319,13 @@ enum CuNumericFFTDirection { CUNUMERIC_FFT_FORWARD = -1, CUNUMERIC_FFT_INVERSE = // Match these to Bitorder in config.py enum CuNumericBitorder { CUNUMERIC_BITORDER_BIG = 0, CUNUMERIC_BITORDER_LITTLE = 1 }; -// Match these to CuNumericTypeCodes in config.py -enum CuNumericTypeCodes { - CUNUMERIC_TYPE_POINT1 = MAX_TYPE_NUMBER + 1, - CUNUMERIC_TYPE_POINT2, - CUNUMERIC_TYPE_POINT3, - CUNUMERIC_TYPE_POINT4, - CUNUMERIC_TYPE_POINT5, - CUNUMERIC_TYPE_POINT6, - CUNUMERIC_TYPE_POINT7, - CUNUMERIC_TYPE_POINT8, - CUNUMERIC_TYPE_POINT9, -}; - #ifdef __cplusplus extern "C" { #endif void cunumeric_perform_registration(); bool cunumeric_has_curand(); +void cunumeric_register_reduction_op(int32_t type_uid, int32_t elem_type_code); #ifdef __cplusplus } diff --git a/src/cunumeric/fft/fft.cu b/src/cunumeric/fft/fft.cu index 45b550584..4fb5bfea0 100644 --- a/src/cunumeric/fft/fft.cu +++ b/src/cunumeric/fft/fft.cu @@ -363,7 +363,7 @@ __host__ static inline void cufft_over_axis_r2c_c2r(AccessorWO CHECK_CUFFT(cufftDestroy(plan)); } -template +template struct FFTImplBody { using INPUT_TYPE = legate_type_of; using OUTPUT_TYPE = legate_type_of; diff --git a/src/cunumeric/fft/fft_template.inl b/src/cunumeric/fft/fft_template.inl index de26bd017..1ad05f9af 100644 --- a/src/cunumeric/fft/fft_template.inl +++ b/src/cunumeric/fft/fft_template.inl @@ -27,14 +27,14 @@ using namespace legate; template struct FFTImplBody; template struct FFTImpl { - template ::valid)>* = nullptr> void operator()(FFTArgs& args) const @@ -54,7 +54,7 @@ struct FFTImpl { } // We only support up to 3D FFTs for now - template 3) || !FFT::valid)>* = nullptr> void operator()(FFTArgs& args) const diff --git a/src/cunumeric/fft/fft_util.h b/src/cunumeric/fft/fft_util.h index 04428a1ab..dea461ccc 100644 --- a/src/cunumeric/fft/fft_util.h +++ b/src/cunumeric/fft/fft_util.h @@ -44,45 +44,45 @@ constexpr decltype(auto) fft_dispatch(CuNumericFFTType type, Functor f, Fnargs&& return f.template operator()(std::forward(args)...); } -template +template struct FFT { static constexpr bool valid = false; }; template <> -struct FFT { - static constexpr bool valid = true; - static constexpr LegateTypeCode CODE_OUT = LegateTypeCode::COMPLEX64_LT; +struct FFT { + static constexpr bool valid = true; + static constexpr Type::Code CODE_OUT = Type::Code::COMPLEX64; }; template <> -struct FFT { - static constexpr bool valid = true; - static constexpr LegateTypeCode CODE_OUT = LegateTypeCode::FLOAT_LT; +struct FFT { + static constexpr bool valid = true; + static constexpr Type::Code CODE_OUT = Type::Code::FLOAT32; }; template <> -struct FFT { - static constexpr bool valid = true; - static constexpr LegateTypeCode CODE_OUT = LegateTypeCode::COMPLEX64_LT; +struct FFT { + static constexpr bool valid = true; + static constexpr Type::Code CODE_OUT = Type::Code::COMPLEX64; }; template <> -struct FFT { - static constexpr bool valid = true; - static constexpr LegateTypeCode CODE_OUT = LegateTypeCode::COMPLEX128_LT; +struct FFT { + static constexpr bool valid = true; + static constexpr Type::Code CODE_OUT = Type::Code::COMPLEX128; }; template <> -struct FFT { - static constexpr bool valid = true; - static constexpr LegateTypeCode CODE_OUT = LegateTypeCode::DOUBLE_LT; +struct FFT { + static constexpr bool valid = true; + static constexpr Type::Code CODE_OUT = Type::Code::FLOAT64; }; template <> -struct FFT { - static constexpr bool valid = true; - static constexpr LegateTypeCode CODE_OUT = LegateTypeCode::COMPLEX128_LT; +struct FFT { + static constexpr bool valid = true; + static constexpr Type::Code CODE_OUT = Type::Code::COMPLEX128; }; } // namespace cunumeric diff --git a/src/cunumeric/index/advanced_indexing.cc b/src/cunumeric/index/advanced_indexing.cc index b50a8c279..19c1101e2 100644 --- a/src/cunumeric/index/advanced_indexing.cc +++ b/src/cunumeric/index/advanced_indexing.cc @@ -21,7 +21,7 @@ namespace cunumeric { using namespace legate; -template +template struct AdvancedIndexingImplBody { using VAL = legate_type_of; diff --git a/src/cunumeric/index/advanced_indexing.cu b/src/cunumeric/index/advanced_indexing.cu index 2f5e90d25..5b808a563 100644 --- a/src/cunumeric/index/advanced_indexing.cu +++ b/src/cunumeric/index/advanced_indexing.cu @@ -75,7 +75,7 @@ static __global__ void __launch_bounds__(THREADS_PER_BLOCK, MIN_CTAS_PER_SM) } } -template +template struct AdvancedIndexingImplBody { using VAL = legate_type_of; diff --git a/src/cunumeric/index/advanced_indexing_omp.cc b/src/cunumeric/index/advanced_indexing_omp.cc index f82cbdb2a..11cec094e 100644 --- a/src/cunumeric/index/advanced_indexing_omp.cc +++ b/src/cunumeric/index/advanced_indexing_omp.cc @@ -26,7 +26,7 @@ namespace cunumeric { using namespace legate; -template +template struct AdvancedIndexingImplBody { using VAL = legate_type_of; diff --git a/src/cunumeric/index/advanced_indexing_template.inl b/src/cunumeric/index/advanced_indexing_template.inl index 5178cb35e..fb160adff 100644 --- a/src/cunumeric/index/advanced_indexing_template.inl +++ b/src/cunumeric/index/advanced_indexing_template.inl @@ -24,14 +24,14 @@ namespace cunumeric { using namespace legate; -template +template struct AdvancedIndexingImplBody; template struct AdvancedIndexingImpl { // current implementaion of the ND-output regions requires all regions // to have the same DIM. - template + template void operator()(AdvancedIndexingArgs& args) const { using VAL = legate_type_of; diff --git a/src/cunumeric/index/choose.cc b/src/cunumeric/index/choose.cc index ecd1f052f..ed4b0a0cf 100644 --- a/src/cunumeric/index/choose.cc +++ b/src/cunumeric/index/choose.cc @@ -21,7 +21,7 @@ namespace cunumeric { using namespace legate; -template +template struct ChooseImplBody { using VAL = legate_type_of; diff --git a/src/cunumeric/index/choose.cu b/src/cunumeric/index/choose.cu index 1a042f941..5deab68bb 100644 --- a/src/cunumeric/index/choose.cu +++ b/src/cunumeric/index/choose.cu @@ -45,7 +45,7 @@ __global__ static void __launch_bounds__(THREADS_PER_BLOCK, MIN_CTAS_PER_SM) cho outptr[idx] = choices[indexptr[idx]][idx]; } -template +template struct ChooseImplBody { using VAL = legate_type_of; diff --git a/src/cunumeric/index/choose_omp.cc b/src/cunumeric/index/choose_omp.cc index 14006aa01..19bf12ee2 100644 --- a/src/cunumeric/index/choose_omp.cc +++ b/src/cunumeric/index/choose_omp.cc @@ -21,7 +21,7 @@ namespace cunumeric { using namespace legate; -template +template struct ChooseImplBody { using VAL = legate_type_of; diff --git a/src/cunumeric/index/choose_template.inl b/src/cunumeric/index/choose_template.inl index 58affeee1..9399f736a 100644 --- a/src/cunumeric/index/choose_template.inl +++ b/src/cunumeric/index/choose_template.inl @@ -24,12 +24,12 @@ namespace cunumeric { using namespace legate; -template +template struct ChooseImplBody; template struct ChooseImpl { - template + template void operator()(ChooseArgs& args) const { using VAL = legate_type_of; diff --git a/src/cunumeric/index/putmask_template.inl b/src/cunumeric/index/putmask_template.inl index 463f41473..3a85d2044 100644 --- a/src/cunumeric/index/putmask_template.inl +++ b/src/cunumeric/index/putmask_template.inl @@ -26,7 +26,7 @@ namespace cunumeric { using namespace legate; -template +template struct Putmask { using T = legate_type_of; using IN = AccessorRW; @@ -92,7 +92,7 @@ using namespace legate; template struct PutmaskImpl { - template + template void operator()(PutmaskArgs& args) const { Putmask putmask(args); diff --git a/src/cunumeric/index/repeat.cc b/src/cunumeric/index/repeat.cc index d6c317173..9222d7c5f 100644 --- a/src/cunumeric/index/repeat.cc +++ b/src/cunumeric/index/repeat.cc @@ -21,7 +21,7 @@ namespace cunumeric { using namespace legate; -template +template struct RepeatImplBody { using VAL = legate_type_of; diff --git a/src/cunumeric/index/repeat.cu b/src/cunumeric/index/repeat.cu index cc78378a5..634050b9d 100644 --- a/src/cunumeric/index/repeat.cu +++ b/src/cunumeric/index/repeat.cu @@ -93,7 +93,7 @@ __global__ static void __launch_bounds__(THREADS_PER_BLOCK, MIN_CTAS_PER_SM) } } -template +template struct RepeatImplBody { using VAL = legate_type_of; diff --git a/src/cunumeric/index/repeat_omp.cc b/src/cunumeric/index/repeat_omp.cc index 1e9018e15..9ff130634 100644 --- a/src/cunumeric/index/repeat_omp.cc +++ b/src/cunumeric/index/repeat_omp.cc @@ -26,7 +26,7 @@ namespace cunumeric { using namespace legate; -template +template struct RepeatImplBody { using VAL = legate_type_of; diff --git a/src/cunumeric/index/repeat_template.inl b/src/cunumeric/index/repeat_template.inl index 3b52141f5..d6173dde8 100644 --- a/src/cunumeric/index/repeat_template.inl +++ b/src/cunumeric/index/repeat_template.inl @@ -24,12 +24,12 @@ namespace cunumeric { using namespace legate; -template +template struct RepeatImplBody; template struct RepeatImpl { - template + template void operator()(RepeatArgs& args) const { using VAL = legate_type_of; diff --git a/src/cunumeric/item/read_template.inl b/src/cunumeric/item/read_template.inl index 66478bdef..b13a00345 100644 --- a/src/cunumeric/item/read_template.inl +++ b/src/cunumeric/item/read_template.inl @@ -28,7 +28,7 @@ struct ReadImplBody; template struct ReadImpl { - template + template void operator()(const Array& out_arr, const Array& in_arr) const { using VAL = legate_type_of; diff --git a/src/cunumeric/item/write_template.inl b/src/cunumeric/item/write_template.inl index 6595f3edd..41b18e01c 100644 --- a/src/cunumeric/item/write_template.inl +++ b/src/cunumeric/item/write_template.inl @@ -28,7 +28,7 @@ struct WriteImplBody; template struct WriteImpl { - template + template void operator()(Array& out_arr, Array& in_arr) const { using VAL = legate_type_of; diff --git a/src/cunumeric/matrix/contract.cc b/src/cunumeric/matrix/contract.cc index 47fe3c1b0..eab84e70e 100644 --- a/src/cunumeric/matrix/contract.cc +++ b/src/cunumeric/matrix/contract.cc @@ -33,7 +33,7 @@ using namespace tblis; // to appease the type checker. template <> -struct ContractImplBody { +struct ContractImplBody { void operator()(float* lhs_data, size_t lhs_ndim, int64_t* lhs_shape, @@ -65,7 +65,7 @@ struct ContractImplBody { }; template <> -struct ContractImplBody { +struct ContractImplBody { void operator()(double* lhs_data, size_t lhs_ndim, int64_t* lhs_shape, @@ -97,7 +97,7 @@ struct ContractImplBody { }; template <> -struct ContractImplBody { +struct ContractImplBody { void operator()(__half* lhs_data, size_t lhs_ndim, int64_t* lhs_shape, @@ -133,29 +133,29 @@ struct ContractImplBody { float* rhs2_copy_data = allocate_buffer(rhs2_size); half_tensor_to_float(rhs2_copy_data, rhs2_data, rhs2_ndim, rhs2_shape, rhs2_strides); - ContractImplBody{}(lhs_copy_data, - lhs_ndim, - lhs_shape, - lhs_copy_strides.data(), - lhs_modes, - rhs1_copy_data, - rhs1_ndim, - rhs1_shape, - rhs1_copy_strides.data(), - rhs1_modes, - rhs2_copy_data, - rhs2_ndim, - rhs2_shape, - rhs2_copy_strides.data(), - rhs2_modes, - lhs_overwritable); + ContractImplBody{}(lhs_copy_data, + lhs_ndim, + lhs_shape, + lhs_copy_strides.data(), + lhs_modes, + rhs1_copy_data, + rhs1_ndim, + rhs1_shape, + rhs1_copy_strides.data(), + rhs1_modes, + rhs2_copy_data, + rhs2_ndim, + rhs2_shape, + rhs2_copy_strides.data(), + rhs2_modes, + lhs_overwritable); float_tensor_to_half(lhs_data, lhs_copy_data, lhs_ndim, lhs_shape, lhs_strides); } }; template <> -struct ContractImplBody { +struct ContractImplBody { void operator()(complex* lhs_data, size_t lhs_ndim, int64_t* lhs_shape, @@ -198,7 +198,7 @@ struct ContractImplBody { }; template <> -struct ContractImplBody { +struct ContractImplBody { void operator()(complex* lhs_data, size_t lhs_ndim, int64_t* lhs_shape, diff --git a/src/cunumeric/matrix/contract.cu b/src/cunumeric/matrix/contract.cu index 3748ac7ab..3d4155106 100644 --- a/src/cunumeric/matrix/contract.cu +++ b/src/cunumeric/matrix/contract.cu @@ -152,7 +152,7 @@ __host__ void contract(T* lhs_data, } template <> -struct ContractImplBody { +struct ContractImplBody { void operator()(__half* lhs_data, size_t lhs_ndim, int64_t* lhs_shape, @@ -190,7 +190,7 @@ struct ContractImplBody { }; template <> -struct ContractImplBody { +struct ContractImplBody { void operator()(float* lhs_data, size_t lhs_ndim, int64_t* lhs_shape, @@ -228,7 +228,7 @@ struct ContractImplBody { }; template <> -struct ContractImplBody { +struct ContractImplBody { void operator()(double* lhs_data, size_t lhs_ndim, int64_t* lhs_shape, @@ -266,7 +266,7 @@ struct ContractImplBody { }; template <> -struct ContractImplBody { +struct ContractImplBody { void operator()(complex* lhs_data, size_t lhs_ndim, int64_t* lhs_shape, @@ -304,7 +304,7 @@ struct ContractImplBody { }; template <> -struct ContractImplBody { +struct ContractImplBody { void operator()(complex* lhs_data, size_t lhs_ndim, int64_t* lhs_shape, diff --git a/src/cunumeric/matrix/contract_omp.cc b/src/cunumeric/matrix/contract_omp.cc index 539ac9a74..698690cfe 100644 --- a/src/cunumeric/matrix/contract_omp.cc +++ b/src/cunumeric/matrix/contract_omp.cc @@ -26,7 +26,7 @@ namespace cunumeric { using namespace tblis; template <> -struct ContractImplBody { +struct ContractImplBody { void operator()(float* lhs_data, size_t lhs_ndim, int64_t* lhs_shape, @@ -58,7 +58,7 @@ struct ContractImplBody { }; template <> -struct ContractImplBody { +struct ContractImplBody { void operator()(double* lhs_data, size_t lhs_ndim, int64_t* lhs_shape, @@ -90,7 +90,7 @@ struct ContractImplBody { }; template <> -struct ContractImplBody { +struct ContractImplBody { void operator()(__half* lhs_data, size_t lhs_ndim, int64_t* lhs_shape, @@ -126,29 +126,29 @@ struct ContractImplBody { float* rhs2_copy_data = allocate_buffer(rhs2_size); half_tensor_to_float(rhs2_copy_data, rhs2_data, rhs2_ndim, rhs2_shape, rhs2_strides); - ContractImplBody{}(lhs_copy_data, - lhs_ndim, - lhs_shape, - lhs_copy_strides.data(), - lhs_modes, - rhs1_copy_data, - rhs1_ndim, - rhs1_shape, - rhs1_copy_strides.data(), - rhs1_modes, - rhs2_copy_data, - rhs2_ndim, - rhs2_shape, - rhs2_copy_strides.data(), - rhs2_modes, - lhs_overwritable); + ContractImplBody{}(lhs_copy_data, + lhs_ndim, + lhs_shape, + lhs_copy_strides.data(), + lhs_modes, + rhs1_copy_data, + rhs1_ndim, + rhs1_shape, + rhs1_copy_strides.data(), + rhs1_modes, + rhs2_copy_data, + rhs2_ndim, + rhs2_shape, + rhs2_copy_strides.data(), + rhs2_modes, + lhs_overwritable); float_tensor_to_half(lhs_data, lhs_copy_data, lhs_ndim, lhs_shape, lhs_strides); } }; template <> -struct ContractImplBody { +struct ContractImplBody { void operator()(complex* lhs_data, size_t lhs_ndim, int64_t* lhs_shape, @@ -191,7 +191,7 @@ struct ContractImplBody { }; template <> -struct ContractImplBody { +struct ContractImplBody { void operator()(complex* lhs_data, size_t lhs_ndim, int64_t* lhs_shape, diff --git a/src/cunumeric/matrix/contract_template.inl b/src/cunumeric/matrix/contract_template.inl index 1600bd7fe..a7fa69fa1 100644 --- a/src/cunumeric/matrix/contract_template.inl +++ b/src/cunumeric/matrix/contract_template.inl @@ -28,21 +28,21 @@ namespace cunumeric { using namespace legate; -template +template struct ContractImplBody; -template +template struct support_contract : std::false_type {}; template <> -struct support_contract : std::true_type {}; +struct support_contract : std::true_type {}; template <> -struct support_contract : std::true_type {}; +struct support_contract : std::true_type {}; template <> -struct support_contract : std::true_type {}; +struct support_contract : std::true_type {}; template <> -struct support_contract : std::true_type {}; +struct support_contract : std::true_type {}; template <> -struct support_contract : std::true_type {}; +struct support_contract : std::true_type {}; #if 0 // debugging output @@ -77,9 +77,7 @@ void print_ptr(const char* title, const T* vals, size_t len) template struct ContractImpl { - template ::value>* = nullptr> + template ::value>* = nullptr> void operator()(ContractArgs& args) const { using T = legate_type_of; @@ -195,9 +193,7 @@ struct ContractImpl { #endif } - template ::value>* = nullptr> + template ::value>* = nullptr> void operator()(ContractArgs& args) const { assert(false); diff --git a/src/cunumeric/matrix/diag.cc b/src/cunumeric/matrix/diag.cc index 65d7d5cfb..84140b1af 100644 --- a/src/cunumeric/matrix/diag.cc +++ b/src/cunumeric/matrix/diag.cc @@ -21,7 +21,7 @@ namespace cunumeric { using namespace legate; -template +template struct DiagImplBody { using VAL = legate_type_of; @@ -52,7 +52,7 @@ struct DiagImplBody { }; // not extract (create a new 2D matrix with diagonal from vector) -template +template struct DiagImplBody { using VAL = legate_type_of; diff --git a/src/cunumeric/matrix/diag.cu b/src/cunumeric/matrix/diag.cu index 4797cfb56..17b6a2564 100644 --- a/src/cunumeric/matrix/diag.cu +++ b/src/cunumeric/matrix/diag.cu @@ -60,7 +60,7 @@ __global__ static void __launch_bounds__(THREADS_PER_BLOCK, MIN_CTAS_PER_SM) } } -template +template struct DiagImplBody { using VAL = legate_type_of; @@ -92,7 +92,7 @@ struct DiagImplBody { }; // not extract (create a new 2D matrix with diagonal from vector) -template +template struct DiagImplBody { using VAL = legate_type_of; diff --git a/src/cunumeric/matrix/diag_omp.cc b/src/cunumeric/matrix/diag_omp.cc index 986561e25..5d2224d0c 100644 --- a/src/cunumeric/matrix/diag_omp.cc +++ b/src/cunumeric/matrix/diag_omp.cc @@ -21,7 +21,7 @@ namespace cunumeric { using namespace legate; -template +template struct DiagImplBody { using VAL = legate_type_of; @@ -57,7 +57,7 @@ struct DiagImplBody { }; // not extract (create a new 2D matrix with diagonal from vector) -template +template struct DiagImplBody { using VAL = legate_type_of; diff --git a/src/cunumeric/matrix/diag_template.inl b/src/cunumeric/matrix/diag_template.inl index d28a9c7c0..f2bf33b3f 100644 --- a/src/cunumeric/matrix/diag_template.inl +++ b/src/cunumeric/matrix/diag_template.inl @@ -24,12 +24,12 @@ namespace cunumeric { using namespace legate; -template +template struct DiagImplBody; template struct DiagImpl { - template + template void operator()(DiagArgs& args) const { using VAL = legate_type_of; diff --git a/src/cunumeric/matrix/dot.cc b/src/cunumeric/matrix/dot.cc index b2b9d03c4..637ab6a4e 100644 --- a/src/cunumeric/matrix/dot.cc +++ b/src/cunumeric/matrix/dot.cc @@ -21,7 +21,7 @@ namespace cunumeric { using namespace legate; -template +template struct DotImplBody { using VAL = legate_type_of; using ACC = acc_type_of; diff --git a/src/cunumeric/matrix/dot.cu b/src/cunumeric/matrix/dot.cu index c5a3ffd9b..f99047932 100644 --- a/src/cunumeric/matrix/dot.cu +++ b/src/cunumeric/matrix/dot.cu @@ -43,7 +43,7 @@ static __global__ void __launch_bounds__(1, 1) copy_kernel(Buffer result, RedAcc out.reduce(0, result.read()); } -template +template struct DotImplBody { using VAL = legate_type_of; using ACC = acc_type_of; diff --git a/src/cunumeric/matrix/dot_omp.cc b/src/cunumeric/matrix/dot_omp.cc index c71c2c7d9..857ab8f26 100644 --- a/src/cunumeric/matrix/dot_omp.cc +++ b/src/cunumeric/matrix/dot_omp.cc @@ -24,7 +24,7 @@ namespace cunumeric { using namespace legate; -template +template struct DotImplBody { using VAL = legate_type_of; using ACC = acc_type_of; diff --git a/src/cunumeric/matrix/dot_template.inl b/src/cunumeric/matrix/dot_template.inl index fd16e2783..fae14df13 100644 --- a/src/cunumeric/matrix/dot_template.inl +++ b/src/cunumeric/matrix/dot_template.inl @@ -23,7 +23,7 @@ namespace cunumeric { using namespace legate; -template +template struct DotImplBody; template @@ -41,7 +41,7 @@ using acc_type_of = typename AccTypeOf::type; template struct DotImpl { - template + template void operator()(DotArgs& args) const { using VAL = legate_type_of; diff --git a/src/cunumeric/matrix/gemm.cc b/src/cunumeric/matrix/gemm.cc index eee1205fe..4160bb03e 100644 --- a/src/cunumeric/matrix/gemm.cc +++ b/src/cunumeric/matrix/gemm.cc @@ -47,7 +47,7 @@ static inline void complex_gemm_template( } template <> -struct GemmImplBody { +struct GemmImplBody { void operator()(float* lhs, const float* rhs1, const float* rhs2, int32_t m, int32_t n, int32_t k) { gemm_template(cblas_sgemm, lhs, rhs1, rhs2, m, n, k); @@ -55,7 +55,7 @@ struct GemmImplBody { }; template <> -struct GemmImplBody { +struct GemmImplBody { void operator()( double* lhs, const double* rhs1, const double* rhs2, int32_t m, int32_t n, int32_t k) { @@ -64,7 +64,7 @@ struct GemmImplBody { }; template <> -struct GemmImplBody { +struct GemmImplBody { void operator()(complex* lhs_, const complex* rhs1_, const complex* rhs2_, @@ -81,7 +81,7 @@ struct GemmImplBody { }; template <> -struct GemmImplBody { +struct GemmImplBody { void operator()(complex* lhs_, const complex* rhs1_, const complex* rhs2_, diff --git a/src/cunumeric/matrix/gemm.cu b/src/cunumeric/matrix/gemm.cu index 7b17c8477..8fff167ff 100644 --- a/src/cunumeric/matrix/gemm.cu +++ b/src/cunumeric/matrix/gemm.cu @@ -62,7 +62,7 @@ static inline void complex_gemm_template( } template <> -struct GemmImplBody { +struct GemmImplBody { void operator()(float* lhs, const float* rhs1, const float* rhs2, int32_t m, int32_t n, int32_t k) { gemm_template(cublasSgemm, lhs, rhs1, rhs2, m, n, k); @@ -70,7 +70,7 @@ struct GemmImplBody { }; template <> -struct GemmImplBody { +struct GemmImplBody { void operator()( double* lhs, const double* rhs1, const double* rhs2, int32_t m, int32_t n, int32_t k) { @@ -79,7 +79,7 @@ struct GemmImplBody { }; template <> -struct GemmImplBody { +struct GemmImplBody { void operator()(complex* lhs_, const complex* rhs1_, const complex* rhs2_, @@ -96,7 +96,7 @@ struct GemmImplBody { }; template <> -struct GemmImplBody { +struct GemmImplBody { void operator()(complex* lhs_, const complex* rhs1_, const complex* rhs2_, diff --git a/src/cunumeric/matrix/gemm_omp.cc b/src/cunumeric/matrix/gemm_omp.cc index 0af9e6023..69b20c673 100644 --- a/src/cunumeric/matrix/gemm_omp.cc +++ b/src/cunumeric/matrix/gemm_omp.cc @@ -48,7 +48,7 @@ static inline void complex_gemm_template( } template <> -struct GemmImplBody { +struct GemmImplBody { void operator()(float* lhs, const float* rhs1, const float* rhs2, int32_t m, int32_t n, int32_t k) { gemm_template(cblas_sgemm, lhs, rhs1, rhs2, m, n, k); @@ -56,7 +56,7 @@ struct GemmImplBody { }; template <> -struct GemmImplBody { +struct GemmImplBody { void operator()( double* lhs, const double* rhs1, const double* rhs2, int32_t m, int32_t n, int32_t k) { @@ -65,7 +65,7 @@ struct GemmImplBody { }; template <> -struct GemmImplBody { +struct GemmImplBody { void operator()(complex* lhs_, const complex* rhs1_, const complex* rhs2_, @@ -82,7 +82,7 @@ struct GemmImplBody { }; template <> -struct GemmImplBody { +struct GemmImplBody { void operator()(complex* lhs_, const complex* rhs1_, const complex* rhs2_, diff --git a/src/cunumeric/matrix/gemm_template.inl b/src/cunumeric/matrix/gemm_template.inl index 92d500398..09178be1b 100644 --- a/src/cunumeric/matrix/gemm_template.inl +++ b/src/cunumeric/matrix/gemm_template.inl @@ -23,23 +23,23 @@ namespace cunumeric { using namespace legate; -template +template struct GemmImplBody; -template +template struct support_gemm : std::false_type {}; template <> -struct support_gemm : std::true_type {}; +struct support_gemm : std::true_type {}; template <> -struct support_gemm : std::true_type {}; +struct support_gemm : std::true_type {}; template <> -struct support_gemm : std::true_type {}; +struct support_gemm : std::true_type {}; template <> -struct support_gemm : std::true_type {}; +struct support_gemm : std::true_type {}; template struct GemmImpl { - template ::value>* = nullptr> + template ::value>* = nullptr> void operator()(Array& lhs_array, Array& rhs1_array, Array& rhs2_array) const { using VAL = legate_type_of; @@ -67,7 +67,7 @@ struct GemmImpl { GemmImplBody()(lhs, rhs1, rhs2, m, n, k); } - template ::value>* = nullptr> + template ::value>* = nullptr> void operator()(Array& lhs_array, Array& rhs1_array, Array& rhs2_array) const { assert(false); diff --git a/src/cunumeric/matrix/matmul.cu b/src/cunumeric/matrix/matmul.cu index 7ed65f832..934deb965 100644 --- a/src/cunumeric/matrix/matmul.cu +++ b/src/cunumeric/matrix/matmul.cu @@ -28,7 +28,7 @@ namespace cunumeric { // for this matrix shape and GPU. template <> -struct MatMulImplBody { +struct MatMulImplBody { void operator()(size_t m, size_t n, size_t k, @@ -73,7 +73,7 @@ struct MatMulImplBody { }; template <> -struct MatMulImplBody { +struct MatMulImplBody { void operator()(size_t m, size_t n, size_t k, @@ -114,7 +114,7 @@ struct MatMulImplBody { }; template <> -struct MatMulImplBody { +struct MatMulImplBody { void operator()(size_t m, size_t n, size_t k, @@ -158,7 +158,7 @@ struct MatMulImplBody { }; template <> -struct MatMulImplBody { +struct MatMulImplBody { void operator()(size_t m, size_t n, size_t k, @@ -206,7 +206,7 @@ struct MatMulImplBody { }; template <> -struct MatMulImplBody { +struct MatMulImplBody { void operator()(size_t m, size_t n, size_t k, diff --git a/src/cunumeric/matrix/matmul_cpu.inl b/src/cunumeric/matrix/matmul_cpu.inl index e059ac384..16e286045 100644 --- a/src/cunumeric/matrix/matmul_cpu.inl +++ b/src/cunumeric/matrix/matmul_cpu.inl @@ -28,7 +28,7 @@ using namespace Legion; using namespace legate; template -struct MatMulImplBody { +struct MatMulImplBody { void operator()(size_t m, size_t n, size_t k, @@ -61,7 +61,7 @@ struct MatMulImplBody { }; template -struct MatMulImplBody { +struct MatMulImplBody { void operator()(size_t m, size_t n, size_t k, @@ -93,7 +93,7 @@ struct MatMulImplBody { }; template -struct MatMulImplBody { +struct MatMulImplBody { void operator()(size_t m, size_t n, size_t k, @@ -138,7 +138,7 @@ struct MatMulImplBody { }; template -struct MatMulImplBody { +struct MatMulImplBody { void operator()(size_t m, size_t n, size_t k, @@ -176,7 +176,7 @@ struct MatMulImplBody { }; template -struct MatMulImplBody { +struct MatMulImplBody { void operator()(size_t m, size_t n, size_t k, diff --git a/src/cunumeric/matrix/matmul_template.inl b/src/cunumeric/matrix/matmul_template.inl index b1ff0a4ba..967860f53 100644 --- a/src/cunumeric/matrix/matmul_template.inl +++ b/src/cunumeric/matrix/matmul_template.inl @@ -24,35 +24,35 @@ namespace cunumeric { using namespace legate; -template +template struct MatMulImplBody; -template +template struct support_matmul : std::false_type {}; template <> -struct support_matmul : std::true_type { +struct support_matmul : std::true_type { using ACC_TYPE = double; }; template <> -struct support_matmul : std::true_type { +struct support_matmul : std::true_type { using ACC_TYPE = float; }; template <> -struct support_matmul : std::true_type { +struct support_matmul : std::true_type { using ACC_TYPE = float; }; template <> -struct support_matmul : std::true_type { +struct support_matmul : std::true_type { using ACC_TYPE = complex; }; template <> -struct support_matmul : std::true_type { +struct support_matmul : std::true_type { using ACC_TYPE = complex; }; template struct MatMulImpl { - template ::value>* = nullptr> + template ::value>* = nullptr> void operator()(MatMulArgs& args) const { using VAL = legate_type_of; @@ -105,7 +105,7 @@ struct MatMulImpl { args.lhs.is_readable()); } - template ::value>* = nullptr> + template ::value>* = nullptr> void operator()(MatMulArgs& args) const { assert(false); diff --git a/src/cunumeric/matrix/matvecmul.cu b/src/cunumeric/matrix/matvecmul.cu index 53c5f7104..d54e28b48 100644 --- a/src/cunumeric/matrix/matvecmul.cu +++ b/src/cunumeric/matrix/matvecmul.cu @@ -22,7 +22,7 @@ namespace cunumeric { template <> -struct MatVecMulImplBody { +struct MatVecMulImplBody { void operator()(size_t m, size_t n, float* lhs, @@ -75,7 +75,7 @@ struct MatVecMulImplBody { }; template <> -struct MatVecMulImplBody { +struct MatVecMulImplBody { void operator()(size_t m, size_t n, double* lhs, @@ -122,7 +122,7 @@ struct MatVecMulImplBody { }; template <> -struct MatVecMulImplBody { +struct MatVecMulImplBody { void operator()(size_t m, size_t n, float* lhs, @@ -164,7 +164,7 @@ struct MatVecMulImplBody { }; template <> -struct MatVecMulImplBody { +struct MatVecMulImplBody { void operator()(size_t m, size_t n, complex* lhs_, @@ -218,7 +218,7 @@ struct MatVecMulImplBody { }; template <> -struct MatVecMulImplBody { +struct MatVecMulImplBody { void operator()(size_t m, size_t n, complex* lhs_, diff --git a/src/cunumeric/matrix/matvecmul_cpu.inl b/src/cunumeric/matrix/matvecmul_cpu.inl index 92e99a2c6..6797d701b 100644 --- a/src/cunumeric/matrix/matvecmul_cpu.inl +++ b/src/cunumeric/matrix/matvecmul_cpu.inl @@ -28,7 +28,7 @@ using namespace Legion; using namespace legate; template -struct MatVecMulImplBody { +struct MatVecMulImplBody { void operator()(size_t m, size_t n, float* lhs, @@ -46,7 +46,7 @@ struct MatVecMulImplBody { }; template -struct MatVecMulImplBody { +struct MatVecMulImplBody { void operator()(size_t m, size_t n, double* lhs, @@ -63,7 +63,7 @@ struct MatVecMulImplBody { }; template -struct MatVecMulImplBody { +struct MatVecMulImplBody { void operator()(size_t m, size_t n, float* lhs, @@ -81,13 +81,13 @@ struct MatVecMulImplBody { half_matrix_to_float(mat_copy, mat, m, n, mat_stride); half_vector_to_float(vec_copy, vec, vec_size); - MatVecMulImplBody{}( + MatVecMulImplBody{}( m, n, lhs, mat_copy, vec_copy, n, transpose_mat, lhs_overwritable); } }; template -struct MatVecMulImplBody { +struct MatVecMulImplBody { void operator()(size_t m, size_t n, complex* lhs_, @@ -109,7 +109,7 @@ struct MatVecMulImplBody { }; template -struct MatVecMulImplBody { +struct MatVecMulImplBody { void operator()(size_t m, size_t n, complex* lhs_, diff --git a/src/cunumeric/matrix/matvecmul_template.inl b/src/cunumeric/matrix/matvecmul_template.inl index 26c3ba876..547d376d1 100644 --- a/src/cunumeric/matrix/matvecmul_template.inl +++ b/src/cunumeric/matrix/matvecmul_template.inl @@ -24,35 +24,35 @@ namespace cunumeric { using namespace legate; -template +template struct MatVecMulImplBody; -template +template struct support_matvecmul : std::false_type {}; template <> -struct support_matvecmul : std::true_type { +struct support_matvecmul : std::true_type { using ACC_TYPE = double; }; template <> -struct support_matvecmul : std::true_type { +struct support_matvecmul : std::true_type { using ACC_TYPE = float; }; template <> -struct support_matvecmul : std::true_type { +struct support_matvecmul : std::true_type { using ACC_TYPE = float; }; template <> -struct support_matvecmul : std::true_type { +struct support_matvecmul : std::true_type { using ACC_TYPE = complex; }; template <> -struct support_matvecmul : std::true_type { +struct support_matvecmul : std::true_type { using ACC_TYPE = complex; }; template struct MatVecMulImpl { - template ::value>* = nullptr> + template ::value>* = nullptr> void operator()(MatVecMulArgs& args) const { using VAL = legate_type_of; @@ -86,7 +86,7 @@ struct MatVecMulImpl { m, n, lhs, mat, vec, mat_stride, transpose_mat, args.lhs.is_readable()); } - template ::value>* = nullptr> + template ::value>* = nullptr> void operator()(MatVecMulArgs& args) const { assert(false); diff --git a/src/cunumeric/matrix/potrf.cc b/src/cunumeric/matrix/potrf.cc index a49be25da..02ae06246 100644 --- a/src/cunumeric/matrix/potrf.cc +++ b/src/cunumeric/matrix/potrf.cc @@ -25,7 +25,7 @@ namespace cunumeric { using namespace legate; template <> -struct PotrfImplBody { +struct PotrfImplBody { void operator()(float* array, int32_t m, int32_t n) { char uplo = 'L'; @@ -36,7 +36,7 @@ struct PotrfImplBody { }; template <> -struct PotrfImplBody { +struct PotrfImplBody { void operator()(double* array, int32_t m, int32_t n) { char uplo = 'L'; @@ -47,7 +47,7 @@ struct PotrfImplBody { }; template <> -struct PotrfImplBody { +struct PotrfImplBody { void operator()(complex* array, int32_t m, int32_t n) { char uplo = 'L'; @@ -58,7 +58,7 @@ struct PotrfImplBody { }; template <> -struct PotrfImplBody { +struct PotrfImplBody { void operator()(complex* array, int32_t m, int32_t n) { char uplo = 'L'; diff --git a/src/cunumeric/matrix/potrf.cu b/src/cunumeric/matrix/potrf.cu index 0a8bba066..68616525f 100644 --- a/src/cunumeric/matrix/potrf.cu +++ b/src/cunumeric/matrix/potrf.cu @@ -49,7 +49,7 @@ static inline void potrf_template( } template <> -struct PotrfImplBody { +struct PotrfImplBody { void operator()(float* array, int32_t m, int32_t n) { potrf_template(cusolverDnSpotrf_bufferSize, cusolverDnSpotrf, array, m, n); @@ -57,7 +57,7 @@ struct PotrfImplBody { }; template <> -struct PotrfImplBody { +struct PotrfImplBody { void operator()(double* array, int32_t m, int32_t n) { potrf_template(cusolverDnDpotrf_bufferSize, cusolverDnDpotrf, array, m, n); @@ -65,7 +65,7 @@ struct PotrfImplBody { }; template <> -struct PotrfImplBody { +struct PotrfImplBody { void operator()(complex* array, int32_t m, int32_t n) { potrf_template( @@ -74,7 +74,7 @@ struct PotrfImplBody { }; template <> -struct PotrfImplBody { +struct PotrfImplBody { void operator()(complex* array, int32_t m, int32_t n) { potrf_template(cusolverDnZpotrf_bufferSize, diff --git a/src/cunumeric/matrix/potrf_omp.cc b/src/cunumeric/matrix/potrf_omp.cc index 51e729bc1..d26143a6f 100644 --- a/src/cunumeric/matrix/potrf_omp.cc +++ b/src/cunumeric/matrix/potrf_omp.cc @@ -26,7 +26,7 @@ namespace cunumeric { using namespace legate; template <> -struct PotrfImplBody { +struct PotrfImplBody { void operator()(float* array, int32_t m, int32_t n) { char uplo = 'L'; @@ -37,7 +37,7 @@ struct PotrfImplBody { }; template <> -struct PotrfImplBody { +struct PotrfImplBody { void operator()(double* array, int32_t m, int32_t n) { char uplo = 'L'; @@ -48,7 +48,7 @@ struct PotrfImplBody { }; template <> -struct PotrfImplBody { +struct PotrfImplBody { void operator()(complex* array, int32_t m, int32_t n) { char uplo = 'L'; @@ -59,7 +59,7 @@ struct PotrfImplBody { }; template <> -struct PotrfImplBody { +struct PotrfImplBody { void operator()(complex* array, int32_t m, int32_t n) { char uplo = 'L'; diff --git a/src/cunumeric/matrix/potrf_template.inl b/src/cunumeric/matrix/potrf_template.inl index afceecfe5..55c782ad0 100644 --- a/src/cunumeric/matrix/potrf_template.inl +++ b/src/cunumeric/matrix/potrf_template.inl @@ -23,23 +23,23 @@ namespace cunumeric { using namespace legate; -template +template struct PotrfImplBody; -template +template struct support_potrf : std::false_type {}; template <> -struct support_potrf : std::true_type {}; +struct support_potrf : std::true_type {}; template <> -struct support_potrf : std::true_type {}; +struct support_potrf : std::true_type {}; template <> -struct support_potrf : std::true_type {}; +struct support_potrf : std::true_type {}; template <> -struct support_potrf : std::true_type {}; +struct support_potrf : std::true_type {}; template struct PotrfImpl { - template ::value>* = nullptr> + template ::value>* = nullptr> void operator()(Array& array) const { using VAL = legate_type_of; @@ -58,7 +58,7 @@ struct PotrfImpl { PotrfImplBody()(arr, m, n); } - template ::value>* = nullptr> + template ::value>* = nullptr> void operator()(Array& array) const { assert(false); diff --git a/src/cunumeric/matrix/solve.cu b/src/cunumeric/matrix/solve.cu index 3e490bb78..3f3262b15 100644 --- a/src/cunumeric/matrix/solve.cu +++ b/src/cunumeric/matrix/solve.cu @@ -61,7 +61,7 @@ static inline void solve_template(GetrfBufferSize getrf_buffer_size, } template <> -struct SolveImplBody { +struct SolveImplBody { void operator()(int32_t m, int32_t n, int32_t nrhs, float* a, float* b) { solve_template( @@ -70,7 +70,7 @@ struct SolveImplBody { }; template <> -struct SolveImplBody { +struct SolveImplBody { void operator()(int32_t m, int32_t n, int32_t nrhs, double* a, double* b) { solve_template( @@ -79,7 +79,7 @@ struct SolveImplBody { }; template <> -struct SolveImplBody { +struct SolveImplBody { void operator()(int32_t m, int32_t n, int32_t nrhs, complex* a, complex* b) { solve_template(cusolverDnCgetrf_bufferSize, @@ -94,7 +94,7 @@ struct SolveImplBody { }; template <> -struct SolveImplBody { +struct SolveImplBody { void operator()(int32_t m, int32_t n, int32_t nrhs, complex* a, complex* b) { solve_template(cusolverDnZgetrf_bufferSize, diff --git a/src/cunumeric/matrix/solve_cpu.inl b/src/cunumeric/matrix/solve_cpu.inl index 1c036ee61..7275a2c0e 100644 --- a/src/cunumeric/matrix/solve_cpu.inl +++ b/src/cunumeric/matrix/solve_cpu.inl @@ -24,7 +24,7 @@ namespace cunumeric { using namespace legate; template -struct SolveImplBody { +struct SolveImplBody { void operator()(int32_t m, int32_t n, int32_t nrhs, float* a, float* b) { auto ipiv = create_buffer(std::min(m, n)); @@ -37,7 +37,7 @@ struct SolveImplBody { }; template -struct SolveImplBody { +struct SolveImplBody { void operator()(int32_t m, int32_t n, int32_t nrhs, double* a, double* b) { auto ipiv = create_buffer(std::min(m, n)); @@ -50,7 +50,7 @@ struct SolveImplBody { }; template -struct SolveImplBody { +struct SolveImplBody { void operator()(int32_t m, int32_t n, int32_t nrhs, complex* a_, complex* b_) { auto ipiv = create_buffer(std::min(m, n)); @@ -66,7 +66,7 @@ struct SolveImplBody { }; template -struct SolveImplBody { +struct SolveImplBody { void operator()(int32_t m, int32_t n, int32_t nrhs, complex* a_, complex* b_) { auto ipiv = create_buffer(std::min(m, n)); diff --git a/src/cunumeric/matrix/solve_template.inl b/src/cunumeric/matrix/solve_template.inl index 3fa48b778..e338b8326 100644 --- a/src/cunumeric/matrix/solve_template.inl +++ b/src/cunumeric/matrix/solve_template.inl @@ -25,23 +25,23 @@ namespace cunumeric { using namespace legate; -template +template struct SolveImplBody; -template +template struct support_solve : std::false_type {}; template <> -struct support_solve : std::true_type {}; +struct support_solve : std::true_type {}; template <> -struct support_solve : std::true_type {}; +struct support_solve : std::true_type {}; template <> -struct support_solve : std::true_type {}; +struct support_solve : std::true_type {}; template <> -struct support_solve : std::true_type {}; +struct support_solve : std::true_type {}; template struct SolveImpl { - template ::value>* = nullptr> + template ::value>* = nullptr> void operator()(Array& a_array, Array& b_array) const { using VAL = legate_type_of; @@ -95,7 +95,7 @@ struct SolveImpl { SolveImplBody()(m, n, nrhs, a, b); } - template ::value>* = nullptr> + template ::value>* = nullptr> void operator()(Array& a_array, Array& b_array) const { assert(false); diff --git a/src/cunumeric/matrix/syrk.cc b/src/cunumeric/matrix/syrk.cc index 2149bda5c..2fa5bc64c 100644 --- a/src/cunumeric/matrix/syrk.cc +++ b/src/cunumeric/matrix/syrk.cc @@ -33,7 +33,7 @@ static inline void syrk_template(Syrk syrk, VAL* lhs, const VAL* rhs, int32_t m, } template <> -struct SyrkImplBody { +struct SyrkImplBody { void operator()(float* lhs, const float* rhs, int32_t m, int32_t n) { syrk_template(cblas_ssyrk, lhs, rhs, m, n); @@ -41,7 +41,7 @@ struct SyrkImplBody { }; template <> -struct SyrkImplBody { +struct SyrkImplBody { void operator()(double* lhs, const double* rhs, int32_t m, int32_t n) { syrk_template(cblas_dsyrk, lhs, rhs, m, n); @@ -49,7 +49,7 @@ struct SyrkImplBody { }; template <> -struct SyrkImplBody { +struct SyrkImplBody { void operator()(complex* lhs_, const complex* rhs_, int32_t m, int32_t n) { auto lhs = reinterpret_cast<__complex__ float*>(lhs_); @@ -64,7 +64,7 @@ struct SyrkImplBody { }; template <> -struct SyrkImplBody { +struct SyrkImplBody { void operator()(complex* lhs_, const complex* rhs_, int32_t m, int32_t n) { auto lhs = reinterpret_cast<__complex__ double*>(lhs_); diff --git a/src/cunumeric/matrix/syrk.cu b/src/cunumeric/matrix/syrk.cu index d7f38bcdd..1fdbd2ca6 100644 --- a/src/cunumeric/matrix/syrk.cu +++ b/src/cunumeric/matrix/syrk.cu @@ -42,7 +42,7 @@ static inline void syrk_template( } template <> -struct SyrkImplBody { +struct SyrkImplBody { void operator()(float* lhs, const float* rhs, int32_t m, int32_t n) { syrk_template(cublasSsyrk, lhs, rhs, m, n, static_cast(0)); @@ -50,7 +50,7 @@ struct SyrkImplBody { }; template <> -struct SyrkImplBody { +struct SyrkImplBody { void operator()(double* lhs, const double* rhs, int32_t m, int32_t n) { syrk_template(cublasDsyrk, lhs, rhs, m, n, static_cast(0)); @@ -58,7 +58,7 @@ struct SyrkImplBody { }; template <> -struct SyrkImplBody { +struct SyrkImplBody { void operator()(complex* lhs_, const complex* rhs_, int32_t m, int32_t n) { auto lhs = reinterpret_cast(lhs_); @@ -69,7 +69,7 @@ struct SyrkImplBody { }; template <> -struct SyrkImplBody { +struct SyrkImplBody { void operator()(complex* lhs_, const complex* rhs_, int32_t m, int32_t n) { auto lhs = reinterpret_cast(lhs_); diff --git a/src/cunumeric/matrix/syrk_omp.cc b/src/cunumeric/matrix/syrk_omp.cc index 849429aac..b276d71a2 100644 --- a/src/cunumeric/matrix/syrk_omp.cc +++ b/src/cunumeric/matrix/syrk_omp.cc @@ -34,7 +34,7 @@ static inline void syrk_template(Syrk syrk, VAL* lhs, const VAL* rhs, int32_t m, } template <> -struct SyrkImplBody { +struct SyrkImplBody { void operator()(float* lhs, const float* rhs, int32_t m, int32_t n) { syrk_template(cblas_ssyrk, lhs, rhs, m, n); @@ -42,7 +42,7 @@ struct SyrkImplBody { }; template <> -struct SyrkImplBody { +struct SyrkImplBody { void operator()(double* lhs, const double* rhs, int32_t m, int32_t n) { syrk_template(cblas_dsyrk, lhs, rhs, m, n); @@ -50,7 +50,7 @@ struct SyrkImplBody { }; template <> -struct SyrkImplBody { +struct SyrkImplBody { void operator()(complex* lhs_, const complex* rhs_, int32_t m, int32_t n) { auto lhs = reinterpret_cast<__complex__ float*>(lhs_); @@ -65,7 +65,7 @@ struct SyrkImplBody { }; template <> -struct SyrkImplBody { +struct SyrkImplBody { void operator()(complex* lhs_, const complex* rhs_, int32_t m, int32_t n) { auto lhs = reinterpret_cast<__complex__ double*>(lhs_); diff --git a/src/cunumeric/matrix/syrk_template.inl b/src/cunumeric/matrix/syrk_template.inl index 66490b34e..58ea4abae 100644 --- a/src/cunumeric/matrix/syrk_template.inl +++ b/src/cunumeric/matrix/syrk_template.inl @@ -23,23 +23,23 @@ namespace cunumeric { using namespace legate; -template +template struct SyrkImplBody; -template +template struct support_syrk : std::false_type {}; template <> -struct support_syrk : std::true_type {}; +struct support_syrk : std::true_type {}; template <> -struct support_syrk : std::true_type {}; +struct support_syrk : std::true_type {}; template <> -struct support_syrk : std::true_type {}; +struct support_syrk : std::true_type {}; template <> -struct support_syrk : std::true_type {}; +struct support_syrk : std::true_type {}; template struct SyrkImpl { - template ::value>* = nullptr> + template ::value>* = nullptr> void operator()(Array& lhs_array, Array& rhs_array) const { using VAL = legate_type_of; @@ -62,7 +62,7 @@ struct SyrkImpl { SyrkImplBody()(lhs, rhs, m, n); } - template ::value>* = nullptr> + template ::value>* = nullptr> void operator()(Array& lhs_array, Array& rhs_array) const { assert(false); diff --git a/src/cunumeric/matrix/tile_template.inl b/src/cunumeric/matrix/tile_template.inl index 89f6edd55..35f8dc967 100644 --- a/src/cunumeric/matrix/tile_template.inl +++ b/src/cunumeric/matrix/tile_template.inl @@ -67,7 +67,7 @@ struct TileImpl { template struct TileDispatch { - template + template void operator()(TileArgs& args) const { using VAL = legate_type_of; diff --git a/src/cunumeric/matrix/transpose.cc b/src/cunumeric/matrix/transpose.cc index bc829f440..224a36ab2 100644 --- a/src/cunumeric/matrix/transpose.cc +++ b/src/cunumeric/matrix/transpose.cc @@ -26,7 +26,7 @@ namespace cunumeric { using namespace legate; -template +template struct TransposeImplBody { using VAL = legate_type_of; diff --git a/src/cunumeric/matrix/transpose.cu b/src/cunumeric/matrix/transpose.cu index 0d4c210e7..5ccd3ef7a 100644 --- a/src/cunumeric/matrix/transpose.cu +++ b/src/cunumeric/matrix/transpose.cu @@ -136,7 +136,7 @@ __global__ static void __launch_bounds__((TILE_DIM * BLOCK_ROWS), MIN_CTAS_PER_S } } -template +template struct TransposeImplBody { using VAL = legate_type_of; diff --git a/src/cunumeric/matrix/transpose_omp.cc b/src/cunumeric/matrix/transpose_omp.cc index c1750434f..729719242 100644 --- a/src/cunumeric/matrix/transpose_omp.cc +++ b/src/cunumeric/matrix/transpose_omp.cc @@ -24,7 +24,7 @@ namespace cunumeric { using namespace legate; -template +template struct TransposeImplBody { using VAL = legate_type_of; diff --git a/src/cunumeric/matrix/transpose_template.inl b/src/cunumeric/matrix/transpose_template.inl index 3c8de1f6e..4d695c3cd 100644 --- a/src/cunumeric/matrix/transpose_template.inl +++ b/src/cunumeric/matrix/transpose_template.inl @@ -23,12 +23,12 @@ namespace cunumeric { using namespace legate; -template +template struct TransposeImplBody; template struct TransposeImpl { - template + template void operator()(TransposeArgs& args) const { using VAL = legate_type_of; diff --git a/src/cunumeric/matrix/trilu.cc b/src/cunumeric/matrix/trilu.cc index 8b44c2517..7d0e55e4f 100644 --- a/src/cunumeric/matrix/trilu.cc +++ b/src/cunumeric/matrix/trilu.cc @@ -21,7 +21,7 @@ namespace cunumeric { using namespace legate; -template +template struct TriluImplBody { using VAL = legate_type_of; diff --git a/src/cunumeric/matrix/trilu.cu b/src/cunumeric/matrix/trilu.cu index 2158f9dbf..6a8c7a02b 100644 --- a/src/cunumeric/matrix/trilu.cu +++ b/src/cunumeric/matrix/trilu.cu @@ -50,7 +50,7 @@ static __global__ void __launch_bounds__(THREADS_PER_BLOCK, MIN_CTAS_PER_SM) } } -template +template struct TriluImplBody { using VAL = legate_type_of; diff --git a/src/cunumeric/matrix/trilu_omp.cc b/src/cunumeric/matrix/trilu_omp.cc index 6e9e9598f..b4e2482da 100644 --- a/src/cunumeric/matrix/trilu_omp.cc +++ b/src/cunumeric/matrix/trilu_omp.cc @@ -21,7 +21,7 @@ namespace cunumeric { using namespace legate; -template +template struct TriluImplBody { using VAL = legate_type_of; diff --git a/src/cunumeric/matrix/trilu_template.inl b/src/cunumeric/matrix/trilu_template.inl index 4c7d019a4..ca417f5af 100644 --- a/src/cunumeric/matrix/trilu_template.inl +++ b/src/cunumeric/matrix/trilu_template.inl @@ -24,12 +24,12 @@ namespace cunumeric { using namespace legate; -template +template struct TriluImplBody; template struct TriluImpl { - template = 2)>* = nullptr> + template = 2)>* = nullptr> void operator()(TriluArgs& args) const { using VAL = legate_type_of; @@ -59,7 +59,7 @@ struct TriluImpl { } } - template * = nullptr> + template * = nullptr> void operator()(TriluArgs& args) const { assert(false); diff --git a/src/cunumeric/matrix/trsm.cc b/src/cunumeric/matrix/trsm.cc index 32382465d..e61c86981 100644 --- a/src/cunumeric/matrix/trsm.cc +++ b/src/cunumeric/matrix/trsm.cc @@ -49,7 +49,7 @@ static inline void complex_trsm_template(Trsm trsm, VAL* lhs, const VAL* rhs, in } template <> -struct TrsmImplBody { +struct TrsmImplBody { void operator()(float* lhs, const float* rhs, int32_t m, int32_t n) { trsm_template(cblas_strsm, lhs, rhs, m, n); @@ -57,7 +57,7 @@ struct TrsmImplBody { }; template <> -struct TrsmImplBody { +struct TrsmImplBody { void operator()(double* lhs, const double* rhs, int32_t m, int32_t n) { trsm_template(cblas_dtrsm, lhs, rhs, m, n); @@ -65,7 +65,7 @@ struct TrsmImplBody { }; template <> -struct TrsmImplBody { +struct TrsmImplBody { void operator()(complex* lhs_, const complex* rhs_, int32_t m, int32_t n) { auto lhs = reinterpret_cast<__complex__ float*>(lhs_); @@ -76,7 +76,7 @@ struct TrsmImplBody { }; template <> -struct TrsmImplBody { +struct TrsmImplBody { void operator()(complex* lhs_, const complex* rhs_, int32_t m, int32_t n) { auto lhs = reinterpret_cast<__complex__ double*>(lhs_); diff --git a/src/cunumeric/matrix/trsm.cu b/src/cunumeric/matrix/trsm.cu index 05595ee28..8bd5d66c7 100644 --- a/src/cunumeric/matrix/trsm.cu +++ b/src/cunumeric/matrix/trsm.cu @@ -43,7 +43,7 @@ static inline void trsm_template( } template <> -struct TrsmImplBody { +struct TrsmImplBody { void operator()(float* lhs, const float* rhs, int32_t m, int32_t n) { trsm_template(cublasStrsm, lhs, rhs, m, n, 1.0F); @@ -51,7 +51,7 @@ struct TrsmImplBody { }; template <> -struct TrsmImplBody { +struct TrsmImplBody { void operator()(double* lhs, const double* rhs, int32_t m, int32_t n) { trsm_template(cublasDtrsm, lhs, rhs, m, n, 1.0); @@ -59,7 +59,7 @@ struct TrsmImplBody { }; template <> -struct TrsmImplBody { +struct TrsmImplBody { void operator()(complex* lhs_, const complex* rhs_, int32_t m, int32_t n) { auto lhs = reinterpret_cast(lhs_); @@ -70,7 +70,7 @@ struct TrsmImplBody { }; template <> -struct TrsmImplBody { +struct TrsmImplBody { void operator()(complex* lhs_, const complex* rhs_, int32_t m, int32_t n) { auto lhs = reinterpret_cast(lhs_); diff --git a/src/cunumeric/matrix/trsm_omp.cc b/src/cunumeric/matrix/trsm_omp.cc index 255a04cf0..2041ec17a 100644 --- a/src/cunumeric/matrix/trsm_omp.cc +++ b/src/cunumeric/matrix/trsm_omp.cc @@ -50,7 +50,7 @@ static inline void complex_trsm_template(Trsm trsm, VAL* lhs, const VAL* rhs, in } template <> -struct TrsmImplBody { +struct TrsmImplBody { void operator()(float* lhs, const float* rhs, int32_t m, int32_t n) { trsm_template(cblas_strsm, lhs, rhs, m, n); @@ -58,7 +58,7 @@ struct TrsmImplBody { }; template <> -struct TrsmImplBody { +struct TrsmImplBody { void operator()(double* lhs, const double* rhs, int32_t m, int32_t n) { trsm_template(cblas_dtrsm, lhs, rhs, m, n); @@ -66,7 +66,7 @@ struct TrsmImplBody { }; template <> -struct TrsmImplBody { +struct TrsmImplBody { void operator()(complex* lhs_, const complex* rhs_, int32_t m, int32_t n) { auto lhs = reinterpret_cast<__complex__ float*>(lhs_); @@ -77,7 +77,7 @@ struct TrsmImplBody { }; template <> -struct TrsmImplBody { +struct TrsmImplBody { void operator()(complex* lhs_, const complex* rhs_, int32_t m, int32_t n) { auto lhs = reinterpret_cast<__complex__ double*>(lhs_); diff --git a/src/cunumeric/matrix/trsm_template.inl b/src/cunumeric/matrix/trsm_template.inl index d214aa9dd..28f37ba1b 100644 --- a/src/cunumeric/matrix/trsm_template.inl +++ b/src/cunumeric/matrix/trsm_template.inl @@ -23,23 +23,23 @@ namespace cunumeric { using namespace legate; -template +template struct TrsmImplBody; -template +template struct support_trsm : std::false_type {}; template <> -struct support_trsm : std::true_type {}; +struct support_trsm : std::true_type {}; template <> -struct support_trsm : std::true_type {}; +struct support_trsm : std::true_type {}; template <> -struct support_trsm : std::true_type {}; +struct support_trsm : std::true_type {}; template <> -struct support_trsm : std::true_type {}; +struct support_trsm : std::true_type {}; template struct TrsmImpl { - template ::value>* = nullptr> + template ::value>* = nullptr> void operator()(Array& lhs_array, Array& rhs_array) const { using VAL = legate_type_of; @@ -62,7 +62,7 @@ struct TrsmImpl { TrsmImplBody()(lhs, rhs, m, n); } - template ::value>* = nullptr> + template ::value>* = nullptr> void operator()(Array& lhs_array, Array& rhs_array) const { assert(false); diff --git a/src/cunumeric/nullary/arange_template.inl b/src/cunumeric/nullary/arange_template.inl index 97c87ef46..c71b9c44e 100644 --- a/src/cunumeric/nullary/arange_template.inl +++ b/src/cunumeric/nullary/arange_template.inl @@ -31,7 +31,7 @@ struct ArangeImplBody; template struct ArangeImpl { - template + template void operator()(ArangeArgs& args) const { using VAL = legate_type_of; diff --git a/src/cunumeric/nullary/eye_template.inl b/src/cunumeric/nullary/eye_template.inl index 2554dd5fd..33cbe6054 100644 --- a/src/cunumeric/nullary/eye_template.inl +++ b/src/cunumeric/nullary/eye_template.inl @@ -31,7 +31,7 @@ struct EyeImplBody; template struct EyeImpl { - template + template void operator()(EyeArgs& args) const { using VAL = legate_type_of; diff --git a/src/cunumeric/nullary/fill_template.inl b/src/cunumeric/nullary/fill_template.inl index 4c6726242..dc9c2f609 100644 --- a/src/cunumeric/nullary/fill_template.inl +++ b/src/cunumeric/nullary/fill_template.inl @@ -54,7 +54,7 @@ struct FillImpl { FillImplBody{}(out, fill_value, pitches, rect, dense); } - template + template void operator()(FillArgs& args) const { if (args.is_argval) { @@ -71,7 +71,15 @@ template static void fill_template(TaskContext& context) { FillArgs args{context.outputs()[0], context.inputs()[0], context.scalars()[0].value()}; - double_dispatch(args.out.dim(), args.out.code(), FillImpl{}, args); + Type::Code code{args.out.code()}; + if (Type::Code::STRUCT == code) { +#ifdef DEBUG_CUNUMERIC + assert(args.is_argval); +#endif + auto& field_type = static_cast(args.out.type()).field_type(1); + code = field_type.code; + } + double_dispatch(args.out.dim(), code, FillImpl{}, args); } } // namespace cunumeric diff --git a/src/cunumeric/random/rand_template.inl b/src/cunumeric/random/rand_template.inl index db5d4ccee..3b689a728 100644 --- a/src/cunumeric/random/rand_template.inl +++ b/src/cunumeric/random/rand_template.inl @@ -31,7 +31,7 @@ struct RandImplBody; template struct RandImpl { - template ::valid>* = nullptr> void operator()(RandArgs& args) const @@ -53,7 +53,7 @@ struct RandImpl { RandImplBody{}(out, rng, strides, pitches, rect); } - template ::valid>* = nullptr> void operator()(RandArgs& args) const diff --git a/src/cunumeric/random/rand_util.h b/src/cunumeric/random/rand_util.h index b492ccce0..11988c5ec 100644 --- a/src/cunumeric/random/rand_util.h +++ b/src/cunumeric/random/rand_util.h @@ -46,15 +46,15 @@ constexpr decltype(auto) op_dispatch(RandGenCode gen_code, Functor f, Fnargs&&.. return f.template operator()(std::forward(args)...); } -template +template struct RandomGenerator { static constexpr bool valid = false; }; -template +template struct RandomGenerator { using RNG = Philox_2x32<10>; - static constexpr bool valid = CODE == legate::LegateTypeCode::DOUBLE_LT; + static constexpr bool valid = CODE == legate::Type::Code::FLOAT64; RandomGenerator(uint32_t ep, const std::vector& args) : epoch(ep) {} @@ -66,10 +66,10 @@ struct RandomGenerator { uint32_t epoch; }; -template +template struct RandomGenerator { using RNG = Philox_2x32<10>; - static constexpr bool valid = CODE == legate::LegateTypeCode::DOUBLE_LT; + static constexpr bool valid = CODE == legate::Type::Code::FLOAT64; RandomGenerator(uint32_t ep, const std::vector& args) : epoch(ep) {} @@ -174,7 +174,7 @@ struct RandomGenerator { uint32_t epoch; }; -template +template struct RandomGenerator { using RNG = Philox_2x32<10>; using VAL = legate::legate_type_of; diff --git a/src/cunumeric/scan/scan_global.cc b/src/cunumeric/scan/scan_global.cc index 753a84bcb..2df4ae14d 100644 --- a/src/cunumeric/scan/scan_global.cc +++ b/src/cunumeric/scan/scan_global.cc @@ -24,7 +24,7 @@ namespace cunumeric { using namespace legate; -template +template struct ScanGlobalImplBody { using OP = ScanOp; using VAL = legate_type_of; diff --git a/src/cunumeric/scan/scan_global.cu b/src/cunumeric/scan/scan_global.cu index 0be6ef994..ba1c5da9d 100644 --- a/src/cunumeric/scan/scan_global.cu +++ b/src/cunumeric/scan/scan_global.cu @@ -35,7 +35,7 @@ static __global__ void __launch_bounds__(THREADS_PER_BLOCK, MIN_CTAS_PER_SM) out[idx] = func(out[idx], scalar); } -template +template struct ScanGlobalImplBody { using OP = ScanOp; using VAL = legate_type_of; diff --git a/src/cunumeric/scan/scan_global_omp.cc b/src/cunumeric/scan/scan_global_omp.cc index cdb80f3d9..3ad989aca 100644 --- a/src/cunumeric/scan/scan_global_omp.cc +++ b/src/cunumeric/scan/scan_global_omp.cc @@ -26,7 +26,7 @@ namespace cunumeric { using namespace legate; -template +template struct ScanGlobalImplBody { using OP = ScanOp; using VAL = legate_type_of; diff --git a/src/cunumeric/scan/scan_global_template.inl b/src/cunumeric/scan/scan_global_template.inl index 099a357d8..b96007dc2 100644 --- a/src/cunumeric/scan/scan_global_template.inl +++ b/src/cunumeric/scan/scan_global_template.inl @@ -21,12 +21,12 @@ namespace cunumeric { using namespace legate; -template +template struct ScanGlobalImplBody; template struct ScanGlobalImpl { - template + template void operator()(ScanGlobalArgs& args) const { using OP = ScanOp; diff --git a/src/cunumeric/scan/scan_global_util.h b/src/cunumeric/scan/scan_global_util.h index 502b9720c..ce2e8b522 100644 --- a/src/cunumeric/scan/scan_global_util.h +++ b/src/cunumeric/scan/scan_global_util.h @@ -40,16 +40,16 @@ constexpr decltype(auto) op_dispatch(ScanCode op_code, Functor f, Fnargs&&... ar return f.template operator()(std::forward(args)...); } -template +template struct ScanOp {}; -template +template struct ScanOp : thrust::plus> { static constexpr int nan_identity = 0; ScanOp() {} }; -template +template struct ScanOp : thrust::multiplies> { static constexpr int nan_identity = 1; ScanOp() {} diff --git a/src/cunumeric/scan/scan_local.cc b/src/cunumeric/scan/scan_local.cc index bfc52d49f..3c49147c1 100644 --- a/src/cunumeric/scan/scan_local.cc +++ b/src/cunumeric/scan/scan_local.cc @@ -26,7 +26,7 @@ namespace cunumeric { using namespace legate; -template +template struct ScanLocalImplBody { using OP = ScanOp; using VAL = legate_type_of; @@ -62,7 +62,7 @@ struct ScanLocalImplBody { } }; -template +template struct ScanLocalNanImplBody { using OP = ScanOp; using VAL = legate_type_of; diff --git a/src/cunumeric/scan/scan_local.cu b/src/cunumeric/scan/scan_local.cu index 258e0e282..da4053182 100644 --- a/src/cunumeric/scan/scan_local.cu +++ b/src/cunumeric/scan/scan_local.cu @@ -37,7 +37,7 @@ static __global__ void __launch_bounds__(THREADS_PER_BLOCK, MIN_CTAS_PER_SM) sum_val[0] = out[0]; } -template +template struct ScanLocalImplBody { using OP = ScanOp; using VAL = legate_type_of; @@ -77,7 +77,7 @@ struct ScanLocalImplBody { } }; -template +template struct ScanLocalNanImplBody { using OP = ScanOp; using VAL = legate_type_of; diff --git a/src/cunumeric/scan/scan_local_omp.cc b/src/cunumeric/scan/scan_local_omp.cc index 2b80ab97a..4fb2d2841 100644 --- a/src/cunumeric/scan/scan_local_omp.cc +++ b/src/cunumeric/scan/scan_local_omp.cc @@ -28,7 +28,7 @@ namespace cunumeric { using namespace legate; -template +template struct ScanLocalImplBody { using OP = ScanOp; using VAL = legate_type_of; @@ -64,7 +64,7 @@ struct ScanLocalImplBody { } }; -template +template struct ScanLocalNanImplBody { using OP = ScanOp; using VAL = legate_type_of; diff --git a/src/cunumeric/scan/scan_local_template.inl b/src/cunumeric/scan/scan_local_template.inl index c016873bb..154a86b35 100644 --- a/src/cunumeric/scan/scan_local_template.inl +++ b/src/cunumeric/scan/scan_local_template.inl @@ -21,16 +21,16 @@ namespace cunumeric { using namespace legate; -template +template struct ScanLocalImplBody; -template +template struct ScanLocalNanImplBody; template struct ScanLocalImpl { // Case where NANs are transformed - template ::value || legate::is_complex::value)>* = nullptr> @@ -56,7 +56,7 @@ struct ScanLocalImpl { ScanLocalNanImplBody()(func, out, in, args.sum_vals, pitches, rect); } // Case where NANs are as is - template ::value || legate::is_complex::value))>* = nullptr> diff --git a/src/cunumeric/scan/scan_local_util.h b/src/cunumeric/scan/scan_local_util.h index 0cfbacb00..b62db7a83 100644 --- a/src/cunumeric/scan/scan_local_util.h +++ b/src/cunumeric/scan/scan_local_util.h @@ -52,16 +52,16 @@ constexpr decltype(auto) op_dispatch(ScanCode op_code, return f.template operator()(std::forward(args)...); } -template +template struct ScanOp {}; -template +template struct ScanOp : thrust::plus> { static constexpr int nan_identity = 0; ScanOp() {} }; -template +template struct ScanOp : thrust::multiplies> { static constexpr int nan_identity = 1; ScanOp() {} diff --git a/src/cunumeric/search/argwhere.cc b/src/cunumeric/search/argwhere.cc index a3eed173d..a787c2f4c 100644 --- a/src/cunumeric/search/argwhere.cc +++ b/src/cunumeric/search/argwhere.cc @@ -21,7 +21,7 @@ namespace cunumeric { using namespace legate; -template +template struct ArgWhereImplBody { using VAL = legate_type_of; diff --git a/src/cunumeric/search/argwhere.cu b/src/cunumeric/search/argwhere.cu index d4131ca6d..09819aca7 100644 --- a/src/cunumeric/search/argwhere.cu +++ b/src/cunumeric/search/argwhere.cu @@ -41,7 +41,7 @@ static __global__ void __launch_bounds__(THREADS_PER_BLOCK, MIN_CTAS_PER_SM) } } -template +template struct ArgWhereImplBody { using VAL = legate_type_of; diff --git a/src/cunumeric/search/argwhere_omp.cc b/src/cunumeric/search/argwhere_omp.cc index 3cea7fbd0..51555b684 100644 --- a/src/cunumeric/search/argwhere_omp.cc +++ b/src/cunumeric/search/argwhere_omp.cc @@ -23,7 +23,7 @@ namespace cunumeric { using namespace legate; -template +template struct ArgWhereImplBody { using VAL = legate_type_of; diff --git a/src/cunumeric/search/argwhere_template.inl b/src/cunumeric/search/argwhere_template.inl index 5c0a57e5e..5c1a91a85 100644 --- a/src/cunumeric/search/argwhere_template.inl +++ b/src/cunumeric/search/argwhere_template.inl @@ -24,12 +24,12 @@ namespace cunumeric { using namespace legate; -template +template struct ArgWhereImplBody; template struct ArgWhereImpl { - template + template void operator()(ArgWhereArgs& args) const { using VAL = legate_type_of; diff --git a/src/cunumeric/search/nonzero.cc b/src/cunumeric/search/nonzero.cc index 93f869b88..5e2da5113 100644 --- a/src/cunumeric/search/nonzero.cc +++ b/src/cunumeric/search/nonzero.cc @@ -21,7 +21,7 @@ namespace cunumeric { using namespace legate; -template +template struct NonzeroImplBody { using VAL = legate_type_of; diff --git a/src/cunumeric/search/nonzero.cu b/src/cunumeric/search/nonzero.cu index 92fcb5047..38cfa8480 100644 --- a/src/cunumeric/search/nonzero.cu +++ b/src/cunumeric/search/nonzero.cu @@ -39,7 +39,7 @@ static __global__ void __launch_bounds__(THREADS_PER_BLOCK, MIN_CTAS_PER_SM) } } -template +template struct NonzeroImplBody { using VAL = legate_type_of; diff --git a/src/cunumeric/search/nonzero_omp.cc b/src/cunumeric/search/nonzero_omp.cc index 690202aee..e07fb5170 100644 --- a/src/cunumeric/search/nonzero_omp.cc +++ b/src/cunumeric/search/nonzero_omp.cc @@ -24,7 +24,7 @@ namespace cunumeric { using namespace legate; -template +template struct NonzeroImplBody { using VAL = legate_type_of; diff --git a/src/cunumeric/search/nonzero_template.inl b/src/cunumeric/search/nonzero_template.inl index fde09c76a..fb9935535 100644 --- a/src/cunumeric/search/nonzero_template.inl +++ b/src/cunumeric/search/nonzero_template.inl @@ -24,12 +24,12 @@ namespace cunumeric { using namespace legate; -template +template struct NonzeroImplBody; template struct NonzeroImpl { - template + template void operator()(NonzeroArgs& args) const { using VAL = legate_type_of; diff --git a/src/cunumeric/set/unique.cc b/src/cunumeric/set/unique.cc index ed0f28f49..7aa09d0e5 100644 --- a/src/cunumeric/set/unique.cc +++ b/src/cunumeric/set/unique.cc @@ -21,7 +21,7 @@ namespace cunumeric { using namespace legate; -template +template struct UniqueImplBody { using VAL = legate_type_of; diff --git a/src/cunumeric/set/unique.cu b/src/cunumeric/set/unique.cu index 6d67eecae..302077c5f 100644 --- a/src/cunumeric/set/unique.cu +++ b/src/cunumeric/set/unique.cu @@ -139,7 +139,7 @@ static Piece tree_reduce(Array& output, return my_piece; } -template +template struct UniqueImplBody { using VAL = legate_type_of; diff --git a/src/cunumeric/set/unique_omp.cc b/src/cunumeric/set/unique_omp.cc index 411fda749..37a86582b 100644 --- a/src/cunumeric/set/unique_omp.cc +++ b/src/cunumeric/set/unique_omp.cc @@ -23,7 +23,7 @@ namespace cunumeric { using namespace legate; -template +template struct UniqueImplBody { using VAL = legate_type_of; diff --git a/src/cunumeric/set/unique_reduce.cc b/src/cunumeric/set/unique_reduce.cc index d18db95a1..29442e371 100644 --- a/src/cunumeric/set/unique_reduce.cc +++ b/src/cunumeric/set/unique_reduce.cc @@ -21,7 +21,7 @@ namespace cunumeric { using namespace legate; -template +template struct UniqueReduceImplBody { using VAL = legate_type_of; diff --git a/src/cunumeric/set/unique_reduce_template.inl b/src/cunumeric/set/unique_reduce_template.inl index 9a0fb4415..5a6a3aab0 100644 --- a/src/cunumeric/set/unique_reduce_template.inl +++ b/src/cunumeric/set/unique_reduce_template.inl @@ -24,12 +24,12 @@ namespace cunumeric { using namespace legate; -template +template struct UniqueReduceImplBody; template struct UniqueReduceImpl { - template + template void operator()(Array& output, std::vector& input_arrs) { using VAL = legate_type_of; diff --git a/src/cunumeric/set/unique_template.inl b/src/cunumeric/set/unique_template.inl index fe3046756..1ab1a7e1f 100644 --- a/src/cunumeric/set/unique_template.inl +++ b/src/cunumeric/set/unique_template.inl @@ -24,12 +24,12 @@ namespace cunumeric { using namespace legate; -template +template struct UniqueImplBody; template struct UniqueImpl { - template + template void operator()(Array& output, Array& input, std::vector& comms, diff --git a/src/cunumeric/sort/searchsorted.cc b/src/cunumeric/sort/searchsorted.cc index 174deb333..6b8fdb4cd 100644 --- a/src/cunumeric/sort/searchsorted.cc +++ b/src/cunumeric/sort/searchsorted.cc @@ -21,7 +21,7 @@ namespace cunumeric { using namespace legate; -template +template struct SearchSortedImplBody { using VAL = legate_type_of; diff --git a/src/cunumeric/sort/searchsorted.cu b/src/cunumeric/sort/searchsorted.cu index c62892e8c..5f98b0259 100644 --- a/src/cunumeric/sort/searchsorted.cu +++ b/src/cunumeric/sort/searchsorted.cu @@ -64,7 +64,7 @@ static __global__ void __launch_bounds__(THREADS_PER_BLOCK, MIN_CTAS_PER_SM) if (upper_bound > 0) { output_reduction.reduce(v_point, upper_bound + global_offset); } } -template +template struct SearchSortedImplBody { using VAL = legate_type_of; diff --git a/src/cunumeric/sort/searchsorted_omp.cc b/src/cunumeric/sort/searchsorted_omp.cc index 115c14214..6c695494c 100644 --- a/src/cunumeric/sort/searchsorted_omp.cc +++ b/src/cunumeric/sort/searchsorted_omp.cc @@ -23,7 +23,7 @@ namespace cunumeric { using namespace legate; -template +template struct SearchSortedImplBody { using VAL = legate_type_of; diff --git a/src/cunumeric/sort/searchsorted_template.inl b/src/cunumeric/sort/searchsorted_template.inl index 30acf5202..8ccd0661f 100644 --- a/src/cunumeric/sort/searchsorted_template.inl +++ b/src/cunumeric/sort/searchsorted_template.inl @@ -24,12 +24,12 @@ namespace cunumeric { using namespace legate; -template +template struct SearchSortedImplBody; template struct SearchSortedImpl { - template + template void operator()(SearchSortedArgs& args) const { using VAL = legate_type_of; diff --git a/src/cunumeric/sort/sort.cc b/src/cunumeric/sort/sort.cc index 517865fc7..3835a3598 100644 --- a/src/cunumeric/sort/sort.cc +++ b/src/cunumeric/sort/sort.cc @@ -28,7 +28,7 @@ namespace cunumeric { using namespace legate; -template +template struct SortImplBody { using VAL = legate_type_of; diff --git a/src/cunumeric/sort/sort.cu b/src/cunumeric/sort/sort.cu index 0851f0229..0e5dc39f9 100644 --- a/src/cunumeric/sort/sort.cu +++ b/src/cunumeric/sort/sort.cu @@ -42,14 +42,14 @@ namespace cunumeric { -template +template struct support_cub : std::true_type {}; template <> -struct support_cub : std::false_type {}; +struct support_cub : std::false_type {}; template <> -struct support_cub : std::false_type {}; +struct support_cub : std::false_type {}; -template ::value>* = nullptr> +template ::value>* = nullptr> void local_sort(const legate_type_of* values_in, legate_type_of* values_out, const int64_t* indices_in, @@ -69,7 +69,7 @@ void local_sort(const legate_type_of* values_in, } } -template ::value>* = nullptr> +template ::value>* = nullptr> void local_sort(const legate_type_of* values_in, legate_type_of* values_out, const int64_t* indices_in, @@ -566,7 +566,7 @@ struct negative_plus : public thrust::binary_function ///////////////////////////////////////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////////////////////////////////// -template +template SegmentMergePiece> merge_all_buffers( std::vector>>& merge_buffers, bool segmented, @@ -1187,7 +1187,7 @@ void rebalance_data(SegmentMergePiece& merge_buffer, ///////////////////////////////////////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////////////////////////////////// -template +template void sample_sort_nccl_nd(SortPiece> local_sorted, Array& output_array_unbound, // only for unbound usage when !rebalance void* output_ptr, @@ -1658,7 +1658,7 @@ void sample_sort_nccl_nd(SortPiece> local_sorted, ///////////////////////////////////////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////////////////////////////////// -template +template struct SortImplBody { using VAL = legate_type_of; diff --git a/src/cunumeric/sort/sort_cpu.inl b/src/cunumeric/sort/sort_cpu.inl index aa738d5e6..6ab8a585c 100644 --- a/src/cunumeric/sort/sort_cpu.inl +++ b/src/cunumeric/sort/sort_cpu.inl @@ -441,7 +441,7 @@ void rebalance_data(SegmentMergePiece& merge_buffer, } } -template +template void sample_sort_nd(SortPiece> local_sorted, Array& output_array_unbound, // only for unbound usage when !rebalance void* output_ptr, @@ -552,7 +552,7 @@ void sample_sort_nd(SortPiece> local_sorted, /*comm::coll::collAllgather(p_samples + num_samples_l * my_sort_rank, p_samples, num_samples_l * sizeof(SegmentSample), - comm::coll::CollDataType::CollUint8, + comm::coll::CollDataType::Code::CollUint8, comm);*/ // workaround - using alltoallv to mimic allgather on subset @@ -894,7 +894,7 @@ void sample_sort_nd(SortPiece> local_sorted, } } -template +template struct SortImplBodyCpu { using VAL = legate_type_of; diff --git a/src/cunumeric/sort/sort_omp.cc b/src/cunumeric/sort/sort_omp.cc index 92aa751da..e117439a8 100644 --- a/src/cunumeric/sort/sort_omp.cc +++ b/src/cunumeric/sort/sort_omp.cc @@ -29,7 +29,7 @@ namespace cunumeric { using namespace legate; -template +template struct SortImplBody { using VAL = legate_type_of; diff --git a/src/cunumeric/sort/sort_template.inl b/src/cunumeric/sort/sort_template.inl index 19a927a5c..0a4d1c16b 100644 --- a/src/cunumeric/sort/sort_template.inl +++ b/src/cunumeric/sort/sort_template.inl @@ -24,7 +24,7 @@ namespace cunumeric { using namespace legate; -template +template struct SortImplBody; static int get_rank(Domain domain, DomainPoint index_point) @@ -41,7 +41,7 @@ static int get_rank(Domain domain, DomainPoint index_point) template struct SortImpl { - template + template void operator()(SortArgs& args, std::vector& comms) const { using VAL = legate_type_of; diff --git a/src/cunumeric/stat/bincount.cc b/src/cunumeric/stat/bincount.cc index dc73f69de..d4806cbab 100644 --- a/src/cunumeric/stat/bincount.cc +++ b/src/cunumeric/stat/bincount.cc @@ -21,7 +21,7 @@ namespace cunumeric { using namespace legate; -template +template struct BincountImplBody { using VAL = legate_type_of; diff --git a/src/cunumeric/stat/bincount.cu b/src/cunumeric/stat/bincount.cu index d4996a993..2ae4a0d05 100644 --- a/src/cunumeric/stat/bincount.cu +++ b/src/cunumeric/stat/bincount.cu @@ -143,7 +143,7 @@ static __global__ void weighted_bincount_kernel_rd_global( lhs[bin] <<= weights[idx + origin[0]]; } -template +template struct BincountImplBody { using VAL = legate_type_of; diff --git a/src/cunumeric/stat/bincount_omp.cc b/src/cunumeric/stat/bincount_omp.cc index 9d8f6375a..4f21e95a8 100644 --- a/src/cunumeric/stat/bincount_omp.cc +++ b/src/cunumeric/stat/bincount_omp.cc @@ -23,7 +23,7 @@ namespace cunumeric { using namespace legate; -template +template struct BincountImplBody { using VAL = legate_type_of; diff --git a/src/cunumeric/stat/bincount_template.inl b/src/cunumeric/stat/bincount_template.inl index 00034a486..83ae638e1 100644 --- a/src/cunumeric/stat/bincount_template.inl +++ b/src/cunumeric/stat/bincount_template.inl @@ -23,12 +23,12 @@ namespace cunumeric { using namespace legate; -template +template struct BincountImplBody; template struct BincountImpl { - template ::value>* = nullptr> + template ::value>* = nullptr> void operator()(BincountArgs& args) const { using VAL = legate_type_of; @@ -50,7 +50,7 @@ struct BincountImpl { } } - template ::value>* = nullptr> + template ::value>* = nullptr> void operator()(BincountArgs& args) const { assert(false); diff --git a/src/cunumeric/ternary/where.cc b/src/cunumeric/ternary/where.cc index 449ff3b46..85c602522 100644 --- a/src/cunumeric/ternary/where.cc +++ b/src/cunumeric/ternary/where.cc @@ -21,7 +21,7 @@ namespace cunumeric { using namespace legate; -template +template struct WhereImplBody { using VAL = legate_type_of; diff --git a/src/cunumeric/ternary/where.cu b/src/cunumeric/ternary/where.cu index f1d1594a2..a9dfdb1a3 100644 --- a/src/cunumeric/ternary/where.cu +++ b/src/cunumeric/ternary/where.cu @@ -40,7 +40,7 @@ static __global__ void __launch_bounds__(THREADS_PER_BLOCK, MIN_CTAS_PER_SM) gen out[point] = mask[point] ? in1[point] : in2[point]; } -template +template struct WhereImplBody { using VAL = legate_type_of; diff --git a/src/cunumeric/ternary/where_omp.cc b/src/cunumeric/ternary/where_omp.cc index 26beea4bd..dd0ed7e55 100644 --- a/src/cunumeric/ternary/where_omp.cc +++ b/src/cunumeric/ternary/where_omp.cc @@ -21,7 +21,7 @@ namespace cunumeric { using namespace legate; -template +template struct WhereImplBody { using VAL = legate_type_of; diff --git a/src/cunumeric/ternary/where_template.inl b/src/cunumeric/ternary/where_template.inl index 6ea668354..ccdc78b5a 100644 --- a/src/cunumeric/ternary/where_template.inl +++ b/src/cunumeric/ternary/where_template.inl @@ -24,12 +24,12 @@ namespace cunumeric { using namespace legate; -template +template struct WhereImplBody; template struct WhereImpl { - template + template void operator()(WhereArgs& args) const { using VAL = legate_type_of; diff --git a/src/cunumeric/transform/flip.cc b/src/cunumeric/transform/flip.cc index 946cdd4a9..3aa332d57 100644 --- a/src/cunumeric/transform/flip.cc +++ b/src/cunumeric/transform/flip.cc @@ -21,7 +21,7 @@ namespace cunumeric { using namespace legate; -template +template struct FlipImplBody { using VAL = legate_type_of; diff --git a/src/cunumeric/transform/flip.cu b/src/cunumeric/transform/flip.cu index 88ef54227..8c6dc166b 100644 --- a/src/cunumeric/transform/flip.cu +++ b/src/cunumeric/transform/flip.cu @@ -41,7 +41,7 @@ static __global__ void __launch_bounds__(THREADS_PER_BLOCK, MIN_CTAS_PER_SM) out[p] = in[q]; } -template +template struct FlipImplBody { using VAL = legate_type_of; diff --git a/src/cunumeric/transform/flip_omp.cc b/src/cunumeric/transform/flip_omp.cc index ce39ba88d..775fd6802 100644 --- a/src/cunumeric/transform/flip_omp.cc +++ b/src/cunumeric/transform/flip_omp.cc @@ -21,7 +21,7 @@ namespace cunumeric { using namespace legate; -template +template struct FlipImplBody { using VAL = legate_type_of; diff --git a/src/cunumeric/transform/flip_template.inl b/src/cunumeric/transform/flip_template.inl index 82279da9b..6af541fc6 100644 --- a/src/cunumeric/transform/flip_template.inl +++ b/src/cunumeric/transform/flip_template.inl @@ -24,12 +24,12 @@ namespace cunumeric { using namespace legate; -template +template struct FlipImplBody; template struct FlipImpl { - template + template void operator()(FlipArgs& args) const { using VAL = legate_type_of; diff --git a/src/cunumeric/unary/convert.cc b/src/cunumeric/unary/convert.cc index d7ab32fc3..a3fae7fbb 100644 --- a/src/cunumeric/unary/convert.cc +++ b/src/cunumeric/unary/convert.cc @@ -21,7 +21,7 @@ namespace cunumeric { using namespace legate; -template +template struct ConvertImplBody { using OP = ConvertOp; using SRC = legate_type_of; diff --git a/src/cunumeric/unary/convert.cu b/src/cunumeric/unary/convert.cu index 7b839131e..ea1d7cfb1 100644 --- a/src/cunumeric/unary/convert.cu +++ b/src/cunumeric/unary/convert.cu @@ -40,7 +40,7 @@ static __global__ void __launch_bounds__(THREADS_PER_BLOCK, MIN_CTAS_PER_SM) out[point] = func(in[point]); } -template +template struct ConvertImplBody { using OP = ConvertOp; using SRC = legate_type_of; diff --git a/src/cunumeric/unary/convert_omp.cc b/src/cunumeric/unary/convert_omp.cc index 139d84221..de2f20478 100644 --- a/src/cunumeric/unary/convert_omp.cc +++ b/src/cunumeric/unary/convert_omp.cc @@ -21,7 +21,7 @@ namespace cunumeric { using namespace legate; -template +template struct ConvertImplBody { using OP = ConvertOp; using SRC = legate_type_of; diff --git a/src/cunumeric/unary/convert_template.inl b/src/cunumeric/unary/convert_template.inl index 075843ab3..8d507d35f 100644 --- a/src/cunumeric/unary/convert_template.inl +++ b/src/cunumeric/unary/convert_template.inl @@ -25,16 +25,12 @@ namespace cunumeric { using namespace legate; -template +template struct ConvertImplBody; -template +template struct ConvertImpl { - template * = nullptr> + template * = nullptr> void operator()(ConvertArgs& args) const { using OP = ConvertOp; @@ -63,14 +59,14 @@ struct ConvertImpl { ConvertImplBody()(func, out, in, pitches, rect, dense); } - template * = nullptr> + template * = nullptr> void operator()(ConvertArgs& args) const { assert(false); } }; -template +template struct ConvertDispatch { template ::value || @@ -94,7 +90,7 @@ struct ConvertDispatch { template struct SourceTypeDispatch { - template + template void operator()(ConvertArgs& args) const { op_dispatch(args.nan_op, ConvertDispatch{}, args); diff --git a/src/cunumeric/unary/convert_util.h b/src/cunumeric/unary/convert_util.h index f58c0265c..5fb340fd7 100644 --- a/src/cunumeric/unary/convert_util.h +++ b/src/cunumeric/unary/convert_util.h @@ -43,10 +43,10 @@ constexpr decltype(auto) op_dispatch(ConvertCode nan_op, Functor f, Fnargs&&... return f.template operator()(std::forward(args)...); } -template +template struct ConvertOp {}; -template +template struct ConvertOp { using SRC = legate::legate_type_of; using DST = legate::legate_type_of; @@ -64,7 +64,7 @@ struct ConvertOp { !legate::is_complex_type::value>* = nullptr> constexpr DST operator()(const _SRC& src) const { - if constexpr (DST_TYPE == legate::LegateTypeCode::BOOL_LT) + if constexpr (DST_TYPE == legate::Type::Code::BOOL) return static_cast(src.real()) || static_cast(src.imag()); else return static_cast(src.real()); @@ -74,8 +74,8 @@ struct ConvertOp { } }; -template -struct ConvertOp { +template +struct ConvertOp { using SRC = legate::legate_type_of; template ::value>* = nullptr> @@ -91,8 +91,8 @@ struct ConvertOp { } }; -template -struct ConvertOp { +template +struct ConvertOp { using DST = legate::legate_type_of; constexpr DST operator()(const __half& src) const @@ -101,7 +101,7 @@ struct ConvertOp { } }; -template +template struct ConvertOp { using SRC = legate::legate_type_of; using DST = legate::legate_type_of; @@ -123,8 +123,8 @@ struct ConvertOp { } }; -template -struct ConvertOp { +template +struct ConvertOp { using SRC = legate::legate_type_of; template ::value>* = nullptr> @@ -142,8 +142,8 @@ struct ConvertOp { } }; -template -struct ConvertOp { +template +struct ConvertOp { using DST = legate::legate_type_of; constexpr DST operator()(const __half& src) const @@ -153,7 +153,7 @@ struct ConvertOp { } }; -template +template struct ConvertOp { using SRC = legate::legate_type_of; using DST = legate::legate_type_of; @@ -175,8 +175,8 @@ struct ConvertOp { } }; -template -struct ConvertOp { +template +struct ConvertOp { using SRC = legate::legate_type_of; template ::value>* = nullptr> @@ -194,8 +194,8 @@ struct ConvertOp { } }; -template -struct ConvertOp { +template +struct ConvertOp { using DST = legate::legate_type_of; constexpr DST operator()(const __half& src) const diff --git a/src/cunumeric/unary/scalar_unary_red_template.inl b/src/cunumeric/unary/scalar_unary_red_template.inl index 1f57be92d..198a38cc7 100644 --- a/src/cunumeric/unary/scalar_unary_red_template.inl +++ b/src/cunumeric/unary/scalar_unary_red_template.inl @@ -28,7 +28,7 @@ namespace cunumeric { using namespace legate; -template +template struct ScalarUnaryRed { using OP = UnaryRedOp; using LG_OP = typename OP::OP; @@ -116,7 +116,7 @@ struct ScalarUnaryRed { template struct ScalarUnaryRedImpl { - template + template void operator()(ScalarUnaryRedArgs& args) const { // The operation is always valid for contains diff --git a/src/cunumeric/unary/unary_op.cc b/src/cunumeric/unary/unary_op.cc index 6004ac759..53c085113 100644 --- a/src/cunumeric/unary/unary_op.cc +++ b/src/cunumeric/unary/unary_op.cc @@ -21,7 +21,7 @@ namespace cunumeric { using namespace legate; -template +template struct UnaryOpImplBody { using OP = UnaryOp; using ARG = typename OP::T; @@ -70,7 +70,7 @@ struct PointCopyImplBody { } }; -template +template struct MultiOutUnaryOpImplBody { using OP = MultiOutUnaryOp; using RHS1 = typename OP::RHS1; diff --git a/src/cunumeric/unary/unary_op.cu b/src/cunumeric/unary/unary_op.cu index 8bbb21872..41de2e20b 100644 --- a/src/cunumeric/unary/unary_op.cu +++ b/src/cunumeric/unary/unary_op.cu @@ -63,7 +63,7 @@ static __global__ void __launch_bounds__(THREADS_PER_BLOCK, MIN_CTAS_PER_SM) out[point] = in[point]; } -template +template struct UnaryOpImplBody { using OP = UnaryOp; using ARG = typename OP::T; @@ -143,7 +143,7 @@ static __global__ void __launch_bounds__(THREADS_PER_BLOCK, MIN_CTAS_PER_SM) lhs[point] = func(rhs1[point], rhs2.ptr(point)); } -template +template struct MultiOutUnaryOpImplBody { using OP = MultiOutUnaryOp; using RHS1 = typename OP::RHS1; diff --git a/src/cunumeric/unary/unary_op.h b/src/cunumeric/unary/unary_op.h index c277c5d77..a4439dbd8 100644 --- a/src/cunumeric/unary/unary_op.h +++ b/src/cunumeric/unary/unary_op.h @@ -52,187 +52,112 @@ class UnaryOpTask : public CuNumericTask { template struct inner_type_dispatch_fn { template - constexpr decltype(auto) operator()(CuNumericTypeCodes code, Functor f, Fnargs&&... args) + constexpr decltype(auto) operator()(int point_dim, Functor f, Fnargs&&... args) { - switch (code) { + switch (point_dim) { #if LEGATE_MAX_DIM >= 1 - case CuNumericTypeCodes::CUNUMERIC_TYPE_POINT1: { - return f.template operator()( - std::forward(args)...); + case 1: { + return f.template operator()<1, DIM>(std::forward(args)...); } #endif #if LEGATE_MAX_DIM >= 2 - case CuNumericTypeCodes::CUNUMERIC_TYPE_POINT2: { - return f.template operator()( - std::forward(args)...); + case 2: { + return f.template operator()<2, DIM>(std::forward(args)...); } #endif #if LEGATE_MAX_DIM >= 3 - case CuNumericTypeCodes::CUNUMERIC_TYPE_POINT3: { - return f.template operator()( - std::forward(args)...); + case 3: { + return f.template operator()<3, DIM>(std::forward(args)...); } #endif #if LEGATE_MAX_DIM >= 4 - case CuNumericTypeCodes::CUNUMERIC_TYPE_POINT4: { - return f.template operator()( - std::forward(args)...); + case 4: { + return f.template operator()<4, DIM>(std::forward(args)...); } #endif #if LEGATE_MAX_DIM >= 5 - case CuNumericTypeCodes::CUNUMERIC_TYPE_POINT5: { - return f.template operator()( - std::forward(args)...); + case 5: { + return f.template operator()<5, DIM>(std::forward(args)...); } #endif #if LEGATE_MAX_DIM >= 6 - case CuNumericTypeCodes::CUNUMERIC_TYPE_POINT6: { - return f.template operator()( - std::forward(args)...); + case 6: { + return f.template operator()<6, DIM>(std::forward(args)...); } #endif #if LEGATE_MAX_DIM >= 7 - case CuNumericTypeCodes::CUNUMERIC_TYPE_POINT7: { - return f.template operator()( - std::forward(args)...); + case 7: { + return f.template operator()<7, DIM>(std::forward(args)...); } #endif #if LEGATE_MAX_DIM >= 8 - case CuNumericTypeCodes::CUNUMERIC_TYPE_POINT8: { - return f.template operator()( - std::forward(args)...); + case 8: { + return f.template operator()<8, DIM>(std::forward(args)...); } #endif #if LEGATE_MAX_DIM >= 9 - case CuNumericTypeCodes::CUNUMERIC_TYPE_POINT9: { - return f.template operator()( - std::forward(args)...); + case 9: { + return f.template operator()<9, DIM>(std::forward(args)...); } #endif default: assert(false); } - return f.template operator()( - std::forward(args)...); + return f.template operator()<1, DIM>(std::forward(args)...); } }; template -constexpr decltype(auto) double_dispatch(int dim, - CuNumericTypeCodes code, - Functor f, - Fnargs&&... args) +constexpr decltype(auto) double_dispatch(int dim, int point_dim, Functor f, Fnargs&&... args) { switch (dim) { #if LEGATE_MAX_DIM >= 1 case 1: { - return cunumeric::inner_type_dispatch_fn<1>{}(code, f, std::forward(args)...); + return cunumeric::inner_type_dispatch_fn<1>{}(point_dim, f, std::forward(args)...); } #endif #if LEGATE_MAX_DIM >= 2 case 2: { - return cunumeric::inner_type_dispatch_fn<2>{}(code, f, std::forward(args)...); + return cunumeric::inner_type_dispatch_fn<2>{}(point_dim, f, std::forward(args)...); } #endif #if LEGATE_MAX_DIM >= 3 case 3: { - return cunumeric::inner_type_dispatch_fn<3>{}(code, f, std::forward(args)...); + return cunumeric::inner_type_dispatch_fn<3>{}(point_dim, f, std::forward(args)...); } #endif #if LEGATE_MAX_DIM >= 4 case 4: { - return cunumeric::inner_type_dispatch_fn<4>{}(code, f, std::forward(args)...); + return cunumeric::inner_type_dispatch_fn<4>{}(point_dim, f, std::forward(args)...); } #endif #if LEGATE_MAX_DIM >= 5 case 5: { - return cunumeric::inner_type_dispatch_fn<5>{}(code, f, std::forward(args)...); + return cunumeric::inner_type_dispatch_fn<5>{}(point_dim, f, std::forward(args)...); } #endif #if LEGATE_MAX_DIM >= 6 case 6: { - return cunumeric::inner_type_dispatch_fn<6>{}(code, f, std::forward(args)...); + return cunumeric::inner_type_dispatch_fn<6>{}(point_dim, f, std::forward(args)...); } #endif #if LEGATE_MAX_DIM >= 7 case 7: { - return cunumeric::inner_type_dispatch_fn<7>{}(code, f, std::forward(args)...); + return cunumeric::inner_type_dispatch_fn<7>{}(point_dim, f, std::forward(args)...); } #endif #if LEGATE_MAX_DIM >= 8 case 8: { - return cunumeric::inner_type_dispatch_fn<8>{}(code, f, std::forward(args)...); + return cunumeric::inner_type_dispatch_fn<8>{}(point_dim, f, std::forward(args)...); } #endif #if LEGATE_MAX_DIM >= 9 case 9: { - return cunumeric::inner_type_dispatch_fn<9>{}(code, f, std::forward(args)...); + return cunumeric::inner_type_dispatch_fn<9>{}(point_dim, f, std::forward(args)...); } #endif } assert(false); - return cunumeric::inner_type_dispatch_fn<1>{}(code, f, std::forward(args)...); + return cunumeric::inner_type_dispatch_fn<1>{}(point_dim, f, std::forward(args)...); } -template -struct CuNumericTypeOf { - using type = legate::Point<1>; -}; -#if LEGATE_MAX_DIM >= 1 -template <> -struct CuNumericTypeOf { - using type = legate::Point<1>; -}; -#endif -#if LEGATE_MAX_DIM >= 2 -template <> -struct CuNumericTypeOf { - using type = legate::Point<2>; -}; -#endif -#if LEGATE_MAX_DIM >= 3 -template <> -struct CuNumericTypeOf { - using type = legate::Point<3>; -}; -#endif -#if LEGATE_MAX_DIM >= 4 -template <> -struct CuNumericTypeOf { - using type = legate::Point<4>; -}; -#endif -#if LEGATE_MAX_DIM >= 5 -template <> -struct CuNumericTypeOf { - using type = legate::Point<5>; -}; -#endif -#if LEGATE_MAX_DIM >= 6 -template <> -struct CuNumericTypeOf { - using type = legate::Point<6>; -}; -#endif -#if LEGATE_MAX_DIM >= 7 -template <> -struct CuNumericTypeOf { - using type = legate::Point<7>; -}; -#endif -#if LEGATE_MAX_DIM >= 8 -template <> -struct CuNumericTypeOf { - using type = legate::Point<8>; -}; -#endif -#if LEGATE_MAX_DIM >= 9 -template <> -struct CuNumericTypeOf { - using type = legate::Point<9>; -}; -#endif - -template -using cunumeric_type_of = typename CuNumericTypeOf::type; - } // namespace cunumeric diff --git a/src/cunumeric/unary/unary_op_omp.cc b/src/cunumeric/unary/unary_op_omp.cc index 8cbe683a5..1badb93a8 100644 --- a/src/cunumeric/unary/unary_op_omp.cc +++ b/src/cunumeric/unary/unary_op_omp.cc @@ -21,7 +21,7 @@ namespace cunumeric { using namespace legate; -template +template struct UnaryOpImplBody { using OP = UnaryOp; using ARG = typename OP::T; @@ -74,7 +74,7 @@ struct PointCopyImplBody { } }; -template +template struct MultiOutUnaryOpImplBody { using OP = MultiOutUnaryOp; using RHS1 = typename OP::RHS1; diff --git a/src/cunumeric/unary/unary_op_template.inl b/src/cunumeric/unary/unary_op_template.inl index 74f882b91..548cba9bf 100644 --- a/src/cunumeric/unary/unary_op_template.inl +++ b/src/cunumeric/unary/unary_op_template.inl @@ -24,20 +24,18 @@ namespace cunumeric { using namespace legate; -template +template struct UnaryOpImplBody; template struct PointCopyImplBody; -template +template struct MultiOutUnaryOpImplBody; template struct UnaryOpImpl { - template ::valid>* = nullptr> + template ::valid>* = nullptr> void operator()(UnaryOpArgs& args) const { using OP = UnaryOp; @@ -66,9 +64,7 @@ struct UnaryOpImpl { UnaryOpImplBody()(func, out, in, pitches, rect, dense); } - template ::valid>* = nullptr> + template ::valid>* = nullptr> void operator()(UnaryOpArgs& args) const { assert(false); @@ -77,7 +73,7 @@ struct UnaryOpImpl { template struct MultiOutUnaryOpImpl { - template ::valid>* = nullptr> void operator()(MultiOutUnaryOpArgs& args) const @@ -112,7 +108,7 @@ struct MultiOutUnaryOpImpl { func, lhs, rhs1, rhs2, pitches, rect, dense); } - template ::valid>* = nullptr> void operator()(MultiOutUnaryOpArgs& args) const @@ -123,17 +119,17 @@ struct MultiOutUnaryOpImpl { template struct UnaryCopyImpl { - template + template void operator()(UnaryOpArgs& args) const { using VAL = legate_type_of; execute_copy(args); } - template + template void operator()(UnaryOpArgs& args) const { - using VAL = cunumeric_type_of; + using VAL = Point; execute_copy(args); } @@ -168,12 +164,13 @@ struct UnaryOpDispatch { void operator()(UnaryOpArgs& args) const { auto dim = std::max(args.in.dim(), 1); - if ((OP_CODE == UnaryOpCode::COPY) && - (args.in.code() > LegateTypeCode::MAX_TYPE_NUMBER)) - cunumeric::double_dispatch( - dim, args.in.code(), UnaryCopyImpl{}, args); - else - legate::double_dispatch(dim, args.in.code(), UnaryOpImpl{}, args); + if ((OP_CODE == UnaryOpCode::COPY) && (args.in.code() == Type::Code::FIXED_ARRAY)) { + auto& type = static_cast(args.in.type()); + cunumeric::double_dispatch(dim, type.num_elements(), UnaryCopyImpl{}, args); + } else { + auto code = OP_CODE == UnaryOpCode::GETARG ? args.out.code() : args.in.code(); + legate::double_dispatch(dim, code, UnaryOpImpl{}, args); + } } }; diff --git a/src/cunumeric/unary/unary_op_util.h b/src/cunumeric/unary/unary_op_util.h index 2f6fab59a..f309cbcf7 100644 --- a/src/cunumeric/unary/unary_op_util.h +++ b/src/cunumeric/unary/unary_op_util.h @@ -183,20 +183,20 @@ constexpr decltype(auto) op_dispatch(UnaryOpCode op_code, Functor f, Fnargs&&... return f.template operator()(std::forward(args)...); } -template +template static constexpr bool is_floating_point = - legate::is_floating_point::value || CODE == legate::LegateTypeCode::HALF_LT; + legate::is_floating_point::value || CODE == legate::Type::Code::FLOAT16; -template +template static constexpr bool is_floating_or_complex = is_floating_point || legate::is_complex::value; -template +template struct UnaryOp { static constexpr bool valid = false; }; -template +template struct UnaryOp { static constexpr bool valid = true; using T = legate::legate_type_of; @@ -235,7 +235,7 @@ struct UnaryOp { } }; -template +template struct UnaryOp { static constexpr bool valid = is_floating_or_complex; using T = legate::legate_type_of; @@ -249,7 +249,7 @@ struct UnaryOp { } }; -template +template struct UnaryOp { static constexpr bool valid = is_floating_or_complex; using T = legate::legate_type_of; @@ -264,7 +264,7 @@ struct UnaryOp { }; template <> -struct UnaryOp { +struct UnaryOp { static constexpr bool valid = true; using T = __half; @@ -277,7 +277,7 @@ struct UnaryOp { } }; -template +template struct UnaryOp { static constexpr bool valid = is_floating_or_complex; using T = legate::legate_type_of; @@ -291,7 +291,7 @@ struct UnaryOp { } }; -template +template struct UnaryOp { static constexpr bool valid = is_floating_or_complex; using T = legate::legate_type_of; @@ -306,7 +306,7 @@ struct UnaryOp { }; template <> -struct UnaryOp { +struct UnaryOp { static constexpr bool valid = true; using T = __half; @@ -319,7 +319,7 @@ struct UnaryOp { } }; -template +template struct UnaryOp { static constexpr bool valid = is_floating_or_complex; using T = legate::legate_type_of; @@ -333,7 +333,7 @@ struct UnaryOp { } }; -template +template struct UnaryOp { static constexpr bool valid = is_floating_or_complex; using T = legate::legate_type_of; @@ -348,7 +348,7 @@ struct UnaryOp { }; template <> -struct UnaryOp { +struct UnaryOp { static constexpr bool valid = true; using T = __half; @@ -361,7 +361,7 @@ struct UnaryOp { } }; -template +template struct UnaryOp { static constexpr bool valid = legate::is_floating_point::value; using T = legate::legate_type_of; @@ -376,7 +376,7 @@ struct UnaryOp { }; template <> -struct UnaryOp { +struct UnaryOp { static constexpr bool valid = true; using T = __half; @@ -389,7 +389,7 @@ struct UnaryOp { } }; -template +template struct UnaryOp { static constexpr bool valid = is_floating_point; using T = legate::legate_type_of; @@ -403,7 +403,7 @@ struct UnaryOp { } }; -template +template struct UnaryOp { static constexpr bool valid = true; using T = legate::legate_type_of; @@ -421,7 +421,7 @@ struct UnaryOp { T max; }; -template +template struct UnaryOp { using T = legate::legate_type_of; static constexpr bool valid = true; @@ -441,7 +441,7 @@ struct UnaryOp { } }; -template +template struct UnaryOp { static constexpr bool valid = true; using T = legate::legate_type_of; @@ -451,7 +451,7 @@ struct UnaryOp { constexpr T operator()(const T& x) const { return x; } }; -template +template struct UnaryOp { static constexpr bool valid = is_floating_or_complex; using T = legate::legate_type_of; @@ -465,7 +465,7 @@ struct UnaryOp { } }; -template +template struct UnaryOp { static constexpr bool valid = is_floating_or_complex; using T = legate::legate_type_of; @@ -480,7 +480,7 @@ struct UnaryOp { }; template <> -struct UnaryOp { +struct UnaryOp { static constexpr bool valid = true; using T = __half; @@ -493,7 +493,7 @@ struct UnaryOp { } }; -template +template struct UnaryOp { static constexpr bool valid = is_floating_point; using T = legate::legate_type_of; @@ -504,7 +504,7 @@ struct UnaryOp { }; template <> -struct UnaryOp { +struct UnaryOp { static constexpr bool valid = true; using T = __half; @@ -516,7 +516,7 @@ struct UnaryOp { } }; -template +template struct UnaryOp { static constexpr bool valid = true; using T = legate::legate_type_of; @@ -530,7 +530,7 @@ struct UnaryOp { } }; -template +template struct UnaryOp { static constexpr bool valid = is_floating_or_complex; using T = legate::legate_type_of; @@ -558,7 +558,7 @@ struct UnaryOp { }; template <> -struct UnaryOp { +struct UnaryOp { static constexpr bool valid = true; using T = __half; @@ -571,7 +571,7 @@ struct UnaryOp { } }; -template +template struct UnaryOp { static constexpr bool valid = is_floating_or_complex; using T = legate::legate_type_of; @@ -594,7 +594,7 @@ struct UnaryOp { }; template <> -struct UnaryOp { +struct UnaryOp { static constexpr bool valid = true; using T = __half; @@ -607,7 +607,7 @@ struct UnaryOp { } }; -template +template struct UnaryOp { static constexpr bool valid = is_floating_point; using T = legate::legate_type_of; @@ -621,7 +621,7 @@ struct UnaryOp { } }; -template +template struct UnaryOp { using T = Argval>; static constexpr bool valid = true; @@ -631,7 +631,7 @@ struct UnaryOp { constexpr decltype(auto) operator()(const T& x) const { return x.arg; } }; -template +template struct UnaryOp { using T = legate::legate_type_of; static constexpr bool valid = legate::is_complex_type::value; @@ -641,10 +641,10 @@ struct UnaryOp { constexpr decltype(auto) operator()(const T& x) const { return x.imag(); } }; -template +template struct UnaryOp { static constexpr bool valid = - legate::is_integral::value && CODE != legate::LegateTypeCode::BOOL_LT; + legate::is_integral::value && CODE != legate::Type::Code::BOOL; using T = legate::legate_type_of; UnaryOp(const std::vector& args) {} @@ -652,7 +652,7 @@ struct UnaryOp { constexpr T operator()(const T& x) const { return ~x; } }; -template +template struct UnaryOp { static constexpr bool valid = true; using T = legate::legate_type_of; @@ -680,7 +680,7 @@ struct UnaryOp { __CUDA_HD__ bool operator()(const __half& x) const { return isfinite(static_cast(x)); } }; -template +template struct UnaryOp { static constexpr bool valid = true; using T = legate::legate_type_of; @@ -708,7 +708,7 @@ struct UnaryOp { __CUDA_HD__ bool operator()(const __half& x) const { return isinf(x); } }; -template +template struct UnaryOp { static constexpr bool valid = true; using T = legate::legate_type_of; @@ -737,7 +737,7 @@ struct UnaryOp { __CUDA_HD__ bool operator()(const __half& x) const { return isnan(x); } }; -template +template struct UnaryOp { static constexpr bool valid = is_floating_or_complex; ; @@ -752,7 +752,7 @@ struct UnaryOp { } }; -template +template struct UnaryOp { static constexpr bool valid = is_floating_or_complex; ; @@ -768,7 +768,7 @@ struct UnaryOp { }; template <> -struct UnaryOp { +struct UnaryOp { static constexpr bool valid = true; using T = __half; @@ -781,7 +781,7 @@ struct UnaryOp { } }; -template +template struct UnaryOp { static constexpr bool valid = is_floating_or_complex; ; @@ -805,7 +805,7 @@ struct UnaryOp { }; template <> -struct UnaryOp { +struct UnaryOp { static constexpr bool valid = true; using T = __half; @@ -818,7 +818,7 @@ struct UnaryOp { } }; -template +template struct UnaryOp { static constexpr bool valid = is_floating_or_complex; ; @@ -842,7 +842,7 @@ struct UnaryOp { }; template <> -struct UnaryOp { +struct UnaryOp { static constexpr bool valid = true; using T = __half; @@ -855,7 +855,7 @@ struct UnaryOp { } }; -template +template struct UnaryOp { static constexpr bool valid = true; using T = legate::legate_type_of; @@ -875,7 +875,7 @@ struct UnaryOp { } }; -template +template struct UnaryOp { static constexpr bool valid = true; using T = legate::legate_type_of; @@ -885,7 +885,7 @@ struct UnaryOp { constexpr T operator()(const T& x) const { return -x; } }; -template +template struct UnaryOp { static constexpr bool valid = legate::is_floating_point::value; using T = legate::legate_type_of; @@ -896,7 +896,7 @@ struct UnaryOp { }; template <> -struct UnaryOp { +struct UnaryOp { static constexpr bool valid = true; using T = __half; @@ -908,7 +908,7 @@ struct UnaryOp { } }; -template +template struct UnaryOp { using T = legate::legate_type_of; static constexpr bool valid = legate::is_complex_type::value; @@ -918,7 +918,7 @@ struct UnaryOp { constexpr decltype(auto) operator()(const T& x) const { return x.real(); } }; -template +template struct UnaryOp { using T = legate::legate_type_of; static constexpr bool valid = true; @@ -933,7 +933,7 @@ struct UnaryOp { }; template <> -struct UnaryOp { +struct UnaryOp { using T = __half; static constexpr bool valid = true; @@ -945,7 +945,7 @@ struct UnaryOp { } }; -template +template struct UnaryOp { static constexpr bool valid = true; using T = legate::legate_type_of; @@ -966,7 +966,7 @@ struct UnaryOp { }; template <> -struct UnaryOp { +struct UnaryOp { static constexpr bool valid = true; using T = __half; @@ -995,7 +995,7 @@ constexpr T sign(const T& x) } // namespace detail -template +template struct UnaryOp { static constexpr bool valid = true; using T = legate::legate_type_of; @@ -1020,7 +1020,7 @@ struct UnaryOp { }; template <> -struct UnaryOp { +struct UnaryOp { static constexpr bool valid = true; using T = __half; @@ -1032,7 +1032,7 @@ struct UnaryOp { } }; -template +template struct UnaryOp { static constexpr bool valid = legate::is_floating_point::value; using T = legate::legate_type_of; @@ -1047,7 +1047,7 @@ struct UnaryOp { }; template <> -struct UnaryOp { +struct UnaryOp { static constexpr bool valid = true; using T = __half; @@ -1060,7 +1060,7 @@ struct UnaryOp { } }; -template +template struct UnaryOp { static constexpr bool valid = is_floating_or_complex; using T = legate::legate_type_of; @@ -1074,7 +1074,7 @@ struct UnaryOp { } }; -template +template struct UnaryOp { static constexpr bool valid = is_floating_or_complex; using T = legate::legate_type_of; @@ -1089,7 +1089,7 @@ struct UnaryOp { }; template <> -struct UnaryOp { +struct UnaryOp { static constexpr bool valid = true; using T = __half; @@ -1102,7 +1102,7 @@ struct UnaryOp { } }; -template +template struct UnaryOp { static constexpr bool valid = true; using T = legate::legate_type_of; @@ -1112,7 +1112,7 @@ struct UnaryOp { constexpr T operator()(const T& x) const { return x * x; } }; -template +template struct UnaryOp { static constexpr bool valid = true; using T = legate::legate_type_of; @@ -1126,7 +1126,7 @@ struct UnaryOp { } }; -template +template struct UnaryOp { static constexpr bool valid = is_floating_or_complex; using T = legate::legate_type_of; @@ -1140,7 +1140,7 @@ struct UnaryOp { } }; -template +template struct UnaryOp { static constexpr bool valid = is_floating_or_complex; using T = legate::legate_type_of; @@ -1154,7 +1154,7 @@ struct UnaryOp { } }; -template +template struct UnaryOp { static constexpr bool valid = legate::is_floating_point::value; using T = legate::legate_type_of; @@ -1169,7 +1169,7 @@ struct UnaryOp { }; template <> -struct UnaryOp { +struct UnaryOp { static constexpr bool valid = true; using T = __half; @@ -1182,12 +1182,12 @@ struct UnaryOp { } }; -template +template struct MultiOutUnaryOp { static constexpr bool valid = false; }; -template +template struct MultiOutUnaryOp { static constexpr bool valid = legate::is_floating_point::value; using RHS1 = legate::legate_type_of; @@ -1202,7 +1202,7 @@ struct MultiOutUnaryOp { }; template <> -struct MultiOutUnaryOp { +struct MultiOutUnaryOp { static constexpr bool valid = true; using RHS1 = __half; using RHS2 = int32_t; @@ -1215,7 +1215,7 @@ struct MultiOutUnaryOp { } }; -template +template struct MultiOutUnaryOp { static constexpr bool valid = legate::is_floating_point::value; using RHS1 = legate::legate_type_of; @@ -1230,7 +1230,7 @@ struct MultiOutUnaryOp { }; template <> -struct MultiOutUnaryOp { +struct MultiOutUnaryOp { static constexpr bool valid = true; using RHS1 = __half; using RHS2 = __half; diff --git a/src/cunumeric/unary/unary_red.cc b/src/cunumeric/unary/unary_red.cc index 39bfc7e92..2aff3d907 100644 --- a/src/cunumeric/unary/unary_red.cc +++ b/src/cunumeric/unary/unary_red.cc @@ -21,7 +21,7 @@ namespace cunumeric { using namespace legate; -template +template struct UnaryRedImplBody { using OP = UnaryRedOp; using LG_OP = typename OP::OP; diff --git a/src/cunumeric/unary/unary_red.cu b/src/cunumeric/unary/unary_red.cu index 766f2a7fd..3aa8b4f0c 100644 --- a/src/cunumeric/unary/unary_red.cu +++ b/src/cunumeric/unary/unary_red.cu @@ -293,7 +293,7 @@ static __global__ void __launch_bounds__(THREADS_PER_BLOCK, MIN_CTAS_PER_SM) if (result != identity) out.reduce(point, result); } -template +template struct UnaryRedImplBody { using OP = UnaryRedOp; using LG_OP = typename OP::OP; diff --git a/src/cunumeric/unary/unary_red_omp.cc b/src/cunumeric/unary/unary_red_omp.cc index 1718d49d6..823726251 100644 --- a/src/cunumeric/unary/unary_red_omp.cc +++ b/src/cunumeric/unary/unary_red_omp.cc @@ -72,7 +72,7 @@ class Splitter { size_t pitches_[DIM]; }; -template +template struct UnaryRedImplBody { using OP = UnaryRedOp; using LG_OP = typename OP::OP; diff --git a/src/cunumeric/unary/unary_red_template.inl b/src/cunumeric/unary/unary_red_template.inl index a144bdc30..1e3b298d3 100644 --- a/src/cunumeric/unary/unary_red_template.inl +++ b/src/cunumeric/unary/unary_red_template.inl @@ -27,12 +27,12 @@ namespace cunumeric { using namespace legate; -template +template struct UnaryRedImplBody; template struct UnaryRedImpl { - template 1) && UnaryRedOp::valid>* = nullptr> void operator()(UnaryRedArgs& args) const @@ -53,7 +53,7 @@ struct UnaryRedImpl { lhs, rhs, rect, pitches, args.collapsed_dim, volume); } - template ::valid>* = nullptr> void operator()(UnaryRedArgs& args) const diff --git a/src/cunumeric/unary/unary_red_util.h b/src/cunumeric/unary/unary_red_util.h index 9296ccfe2..d4ceb007b 100644 --- a/src/cunumeric/unary/unary_red_util.h +++ b/src/cunumeric/unary/unary_red_util.h @@ -72,14 +72,14 @@ constexpr decltype(auto) op_dispatch(UnaryRedCode op_code, Functor f, Fnargs&&.. return f.template operator()(std::forward(args)...); } -template +template struct UnaryRedOp { static constexpr bool valid = false; }; -template +template struct UnaryRedOp { - static constexpr bool valid = TYPE_CODE != legate::LegateTypeCode::COMPLEX128_LT; + static constexpr bool valid = TYPE_CODE != legate::Type::Code::COMPLEX128; using RHS = legate::legate_type_of; using VAL = bool; @@ -100,9 +100,9 @@ struct UnaryRedOp { __CUDA_HD__ static VAL convert(const RHS& rhs) { return rhs != RHS(0); } }; -template +template struct UnaryRedOp { - static constexpr bool valid = TYPE_CODE != legate::LegateTypeCode::COMPLEX128_LT; + static constexpr bool valid = TYPE_CODE != legate::Type::Code::COMPLEX128; using RHS = legate::legate_type_of; using VAL = bool; @@ -123,7 +123,7 @@ struct UnaryRedOp { __CUDA_HD__ static VAL convert(const RHS& rhs) { return rhs != RHS(0); } }; -template +template struct UnaryRedOp { static constexpr bool valid = true; @@ -146,7 +146,7 @@ struct UnaryRedOp { __CUDA_HD__ static VAL convert(const RHS& rhs) { return static_cast(rhs != RHS(0)); } }; -template +template struct UnaryRedOp { static constexpr bool valid = !legate::is_complex::value; @@ -169,7 +169,7 @@ struct UnaryRedOp { __CUDA_HD__ static VAL convert(const RHS& rhs) { return rhs; } }; -template +template struct UnaryRedOp { static constexpr bool valid = !legate::is_complex::value; @@ -192,9 +192,9 @@ struct UnaryRedOp { __CUDA_HD__ static VAL convert(const RHS& rhs) { return rhs; } }; -template +template struct UnaryRedOp { - static constexpr bool valid = TYPE_CODE != legate::LegateTypeCode::COMPLEX128_LT; + static constexpr bool valid = TYPE_CODE != legate::Type::Code::COMPLEX128; using RHS = legate::legate_type_of; using VAL = RHS; @@ -215,7 +215,7 @@ struct UnaryRedOp { __CUDA_HD__ static VAL convert(const RHS& rhs) { return rhs; } }; -template +template struct UnaryRedOp { static constexpr bool valid = true; @@ -238,7 +238,7 @@ struct UnaryRedOp { __CUDA_HD__ static VAL convert(const RHS& rhs) { return rhs; } }; -template +template struct UnaryRedOp { static constexpr bool valid = !legate::is_complex::value; @@ -271,7 +271,7 @@ struct UnaryRedOp { } }; -template +template struct UnaryRedOp { static constexpr bool valid = !legate::is_complex::value; @@ -304,7 +304,7 @@ struct UnaryRedOp { } }; -template +template struct UnaryRedOp { // Set to false so that this only gets enabled when expliclty declared valid. static constexpr bool valid = false; @@ -312,7 +312,7 @@ struct UnaryRedOp { // It does not provide fold/convert functions. using RHS = legate::legate_type_of; using VAL = bool; - using _RED_OP = UnaryRedOp; + using _RED_OP = UnaryRedOp; using OP = _RED_OP::OP; }; diff --git a/tests/integration/test_ingest.py b/tests/integration/test_ingest.py index c0f17c37b..272f2080d 100644 --- a/tests/integration/test_ingest.py +++ b/tests/integration/test_ingest.py @@ -13,12 +13,12 @@ # limitations under the License. # import numpy as np -import pyarrow as pa import pytest from legate.core import ( CustomSplit, Rect, TiledSplit, + float64, get_legion_context, get_legion_runtime, ingest, @@ -73,7 +73,7 @@ def _ingest(custom_partitioning, custom_sharding): else TiledSplit(tile_shape) ) tab = ingest( - pa.float64(), + float64, shape, colors, data_split, From d41aa7c884c63c86c72bc1e04d8c1c67e8dadf88 Mon Sep 17 00:00:00 2001 From: yimoj <130720840+yimoj@users.noreply.github.com> Date: Thu, 4 May 2023 09:38:15 +0800 Subject: [PATCH 048/106] Improve cunumeric.prod coverage and some test fixes (#905) * Improve prod coverage and some test fixes * Compare the exceptions raised by NumPy and cuNumeric in negative tests * Add test for cunumeric.place mask implicit casting --- tests/integration/test_extract.py | 23 ++++- tests/integration/test_prod.py | 143 ++++++++++++++++-------------- 2 files changed, 98 insertions(+), 68 deletions(-) diff --git a/tests/integration/test_extract.py b/tests/integration/test_extract.py index bb873a235..689d4601a 100644 --- a/tests/integration/test_extract.py +++ b/tests/integration/test_extract.py @@ -205,8 +205,27 @@ def test_place_mask_reshape(shape, vals): arr_np = mk_seq_array(np, shape) arr_num = mk_seq_array(num, shape) - mask_np = (arr_np % 2).astype(bool) - mask_num = (arr_np % 2).astype(bool) + mask_np = np.arange(0, arr_np.size).astype(bool) + mask_num = num.arange(0, arr_num.size).astype(bool) + + vals_np = np.array(vals).astype(arr_np.dtype) + vals_num = num.array(vals_np) + + np.place(arr_np, mask_np, vals_np) + num.place(arr_num, mask_num, vals_num) + + assert np.array_equal(arr_np, arr_num) + + +@pytest.mark.parametrize("dtype", (np.float32, np.complex64), ids=str) +def test_place_mask_dtype(dtype): + shape = (3, 2, 3) + vals = [42 + 3j] + arr_np = mk_seq_array(np, shape) + arr_num = mk_seq_array(num, shape) + + mask_np = mk_seq_array(np, shape).astype(dtype) + mask_num = mk_seq_array(num, shape).astype(dtype) vals_np = np.array(vals).astype(arr_np.dtype) vals_num = num.array(vals_np) diff --git a/tests/integration/test_prod.py b/tests/integration/test_prod.py index d6935cb74..ef3b217ce 100644 --- a/tests/integration/test_prod.py +++ b/tests/integration/test_prod.py @@ -78,10 +78,8 @@ ARR = ([], [[]], [[], []], np.inf, np.Inf, -10.3, 0, 200, 5 + 8j) -DTYPE = ["l", "L", "f", "e", "d"] -COMPLEX_TYPE = ["F"] -NEGATIVE_COMPLEX_TYPE = ["D"] -NEGATIVE_DTYPE = ["h", "i", "H", "I", "?", "b", "B"] +DTYPE = ("l", "L", "f", "e", "d") +INTEGER_DTYPE = ("h", "i", "H", "I", "?", "b", "B") def to_dtype(s): @@ -97,57 +95,26 @@ class TestProdNegative(object): def test_array(self, arr): assert allclose(np.prod(arr), num.prod(arr)) - @pytest.mark.xfail - @pytest.mark.parametrize("dtype", NEGATIVE_DTYPE, ids=to_dtype) - def test_dtype_negative(self, dtype): - size = (5, 5, 5) - arr = np.random.random(size) * 10 + 2 - arr_np = np.array(arr, dtype=dtype) - arr_num = num.array(arr_np) - out_np = np.prod(arr_np) # Numpy return product of all datas - out_num = num.prod(arr_num) - # cuNumeric return an array with a different data - assert allclose(out_np, out_num) - - @pytest.mark.skip - @pytest.mark.parametrize("dtype", NEGATIVE_COMPLEX_TYPE, ids=to_dtype) - def test_dtype_complex_negative(self, dtype): - arr = (num.random.rand(5, 5) * 10 + 2) + ( - num.random.rand(5, 5) * 10 * 1.0j + 0.2j - ) - arr_np = np.array(arr, dtype=dtype) - arr_num = num.array(arr_np) - out_np = np.prod(arr_np) - out_num = num.prod(arr_num) - assert allclose(out_np, out_num) - def test_axis_out_bound(self): + expected_exc = np.AxisError arr = [-1, 0, 1, 2, 10] - msg = r"bounds" - with pytest.raises(np.AxisError, match=msg): + with pytest.raises(expected_exc): + np.prod(arr, axis=2) + with pytest.raises(expected_exc): num.prod(arr, axis=2) - @pytest.mark.xfail - @pytest.mark.parametrize("axis", ((-1, 1), (0, 1), (1, 2), (0, 2))) - def test_axis_tuple(self, axis): - size = (5, 5, 5) - arr_np = np.random.random(size) * 10 - arr_num = num.array(arr_np) - out_np = np.prod(arr_np, axis=axis) - # cuNumeric raises NotImplementedError: - # Need support for reducing multiple dimensions. - # Numpy get results. - out_num = num.prod(arr_num, axis=axis) - assert allclose(out_np, out_num) - def test_out_negative(self): + expected_exc = ValueError in_shape = (2, 3, 4) out_shape = (2, 3, 3) - arr_num = num.random.random(in_shape) * 10 - arr_out = num.random.random(out_shape) * 10 - msg = r"shapes do not match" - with pytest.raises(ValueError, match=msg): - num.prod(arr_num, out=arr_out, axis=2) + arr_np = np.ndarray(in_shape) + out_np = np.ndarray(out_shape) + arr_num = num.ndarray(in_shape) + out_num = num.ndarray(out_shape) + with pytest.raises(expected_exc): + np.prod(arr_np, out=out_np, axis=2) + with pytest.raises(expected_exc): + num.prod(arr_num, out=out_num, axis=2) def test_keepdims(self): in_shape = (2, 3, 4) @@ -157,23 +124,21 @@ def test_keepdims(self): out_num = num.prod(arr_num, axis=2, keepdims=True) assert allclose(out_np, out_num) - @pytest.mark.xfail - def test_initial_scalar_list(self): + @pytest.mark.parametrize( + "initial", + ([2, 3], pytest.param([3], marks=pytest.mark.xfail)), + ids=str, + ) + def test_initial_list(self, initial): + expected_exc = ValueError arr = [[1, 2], [3, 4]] - initial_value = [3] - - out_num = num.prod(arr, initial=initial_value) # array(72) # Numpy raises ValueError: # Input object to FillWithScalar is not a scalar - out_np = np.prod(arr, initial=initial_value) - - assert allclose(out_np, out_num) - - def test_initial_list(self): - arr = [[1, 2], [3, 4]] - initial_value = [2, 3] - with pytest.raises(ValueError): - num.prod(arr, initial=initial_value) + with pytest.raises(expected_exc): + np.prod(arr, initial=initial) + # when LEGATE_TEST=1, cuNumeric casts list to scalar and proceeds + with pytest.raises(expected_exc): + num.prod(arr, initial=initial) def test_initial_empty_array(self): size = (1, 0) @@ -206,6 +171,7 @@ def test_basic(self, size): out_np = np.prod(arr_np) out_num = np.prod(arr_num) assert allclose(out_np, out_num) + assert allclose(out_num, arr_num.prod()) @pytest.mark.parametrize("dtype", DTYPE, ids=to_dtype) def test_dtype(self, dtype): @@ -217,15 +183,43 @@ def test_dtype(self, dtype): out_num = num.prod(arr_num) assert allclose(out_np, out_num) - @pytest.mark.parametrize("dtype", COMPLEX_TYPE, ids=to_dtype) + @pytest.mark.xfail(reason="numpy and cunumeric return different dtypes") + @pytest.mark.parametrize("dtype", INTEGER_DTYPE, ids=to_dtype) + def test_dtype_integer_precision(self, dtype): + arr_np = np.arange(0, 5).astype(dtype) + arr_num = num.arange(0, 5).astype(dtype) + out_np = np.prod(arr_np) + out_num = num.prod(arr_num) + assert allclose(out_num, arr_num.prod()) + # When input precision is less than default platform integer + # NumPy returns the product with dtype of platform integer + # cuNumeric returns the product with dtype of the input array + assert allclose(out_np, out_num) + + @pytest.mark.parametrize( + "dtype", + ( + "F", + pytest.param("D", marks=pytest.mark.xfail), + pytest.param("G", marks=pytest.mark.xfail), + ), + ids=to_dtype, + ) def test_dtype_complex(self, dtype): - arr = (num.random.rand(5, 5) * 10 + 2) + ( - num.random.rand(5, 5) * 10 * 1.0j + 0.2j + arr = (np.random.rand(5, 5) * 10 + 2) + ( + np.random.rand(5, 5) * 10 * 1.0j + 0.2j ) arr_np = np.array(arr, dtype=dtype) - arr_num = num.array(arr_np) + arr_num = num.array(arr, dtype=dtype) out_np = np.prod(arr_np) + # cunumeric always returns [1+0.j] when LEGATE_TEST=1 out_num = num.prod(arr_num) + # When running tests with CUNUMERIC_TEST=1 and dtype is complex256, + # allclose hits assertion error: + # File "/legate/cunumeric/cunumeric/eager.py", line 293, + # in to_deferred_array + # assert self.runtime.is_supported_type(self.array.dtype) + # AssertionError assert allclose(out_np, out_num) @pytest.mark.parametrize("axis", (_ for _ in range(-2, 3, 1))) @@ -237,6 +231,23 @@ def test_axis_basic(self, axis): out_np = np.prod(arr_np, axis=axis) assert allclose(out_np, out_num) + @pytest.mark.xfail(reason="cunumeric raises exceptions when LEGATE_TEST=1") + @pytest.mark.parametrize( + "axis", ((-1, 1), (0, 1), (1, 2), (0, 2)), ids=str + ) + def test_axis_tuple(self, axis): + size = (5, 5, 5) + arr_np = np.random.random(size) * 10 + arr_num = num.array(arr_np) + out_np = np.prod(arr_np, axis=axis) + # when LEGATE_TEST = 1 cuNumeric raises two types of exceptions + # (-1, 1): ValueError: Invalid promotion on dimension 2 for a 1-D store + # others: + # NotImplementedError: Need support for reducing multiple dimensions + # Numpy get results. + out_num = num.prod(arr_num, axis=axis) + assert allclose(out_np, out_num) + @pytest.mark.parametrize("size", SIZES) def test_out_basic(self, size): arr_np = np.random.random(size) From c13cb40ab0ecd6437efae6609172af3d6de7305c Mon Sep 17 00:00:00 2001 From: Wonchan Lee Date: Thu, 4 May 2023 10:12:59 -0700 Subject: [PATCH 049/106] Changes for resource scoping (#728) * Update the mapper to use the new machine object * Fetch machine info from the core runtime * Catch up the core machine API change * Update the commit hash --- cmake/versions.json | 2 +- cunumeric/runtime.py | 22 +++++++++------------- 2 files changed, 10 insertions(+), 14 deletions(-) diff --git a/cmake/versions.json b/cmake/versions.json index 08dd5b6a5..8c2fe2665 100644 --- a/cmake/versions.json +++ b/cmake/versions.json @@ -5,7 +5,7 @@ "git_url" : "https://github.com/nv-legate/legate.core.git", "git_shallow": false, "always_download": false, - "git_tag" : "149fa50bce56350e84f3fad4d453b5f5b77b935d" + "git_tag" : "8ac807575c9770adae73ac623e32c34eac9c36c9" } } } diff --git a/cunumeric/runtime.py b/cunumeric/runtime.py index 3fe1ff242..5c3c0ebd1 100644 --- a/cunumeric/runtime.py +++ b/cunumeric/runtime.py @@ -21,7 +21,7 @@ import legate.core.types as ty import numpy as np -from legate.core import LEGATE_MAX_DIM, Rect, get_legate_runtime +from legate.core import LEGATE_MAX_DIM, ProcessorKind, Rect, get_legate_runtime from legate.core.context import Context as LegateContext from typing_extensions import TypeGuard @@ -66,18 +66,6 @@ def __init__(self, legate_context: LegateContext) -> None: ty.int32, ) ) - self.num_procs = int( - self.legate_context.get_tunable( - CuNumericTunable.NUM_PROCS, - ty.int32, - ) - ) - self.num_gpus = int( - self.legate_context.get_tunable( - CuNumericTunable.NUM_GPUS, - ty.int32, - ) - ) # Make sure that our CuNumericLib object knows about us so it can # destroy us @@ -96,6 +84,14 @@ def __init__(self, legate_context: LegateContext) -> None: # Maps value types to struct types used in argmin/argmax self._cached_argred_types: dict[ty.Dtype, ty.Dtype] = dict() + @property + def num_procs(self) -> int: + return len(self.legate_runtime.machine) + + @property + def num_gpus(self) -> int: + return self.legate_runtime.machine.count(ProcessorKind.GPU) + def get_point_type(self, dim: DIMENSION) -> ty.Dtype: cached = self._cached_point_types.get(dim) if cached is not None: From 9a486ee4ec5614ced48549e5b13ac09cea7e8be7 Mon Sep 17 00:00:00 2001 From: Robin Wang <104830875+robinwnv@users.noreply.github.com> Date: Fri, 5 May 2023 09:10:56 +0800 Subject: [PATCH 050/106] Enhance test_logic.py and test_logical.py (#911) * Enhance test_logic.py and test_logical.py * Address comments. * Address comments - part 2 --- tests/integration/test_logic.py | 240 ++++++++++++++++++++++++++---- tests/integration/test_logical.py | 130 ++++++++++++---- 2 files changed, 315 insertions(+), 55 deletions(-) diff --git a/tests/integration/test_logic.py b/tests/integration/test_logic.py index 32fbf1c7d..aabc99946 100644 --- a/tests/integration/test_logic.py +++ b/tests/integration/test_logic.py @@ -20,57 +20,143 @@ import cunumeric as num +SCALARS_INF = (np.inf, -np.inf, np.nan, 0) +ARRAYS_INF = ([np.inf, -np.inf, np.nan, 0],) -def test_inf(): - arr = [np.inf, -np.inf, np.nan, 0] - assert np.array_equal(np.isposinf(arr), num.isposinf(arr)) - assert np.array_equal(np.isneginf(arr), num.isneginf(arr)) +@pytest.mark.parametrize("x", SCALARS_INF + ARRAYS_INF) +@pytest.mark.parametrize("func_name", ("isposinf", "isneginf")) +def test_inf_basic(func_name, x): + func_np = getattr(np, func_name) + func_num = getattr(num, func_name) -INPUTS = ( + assert np.array_equal(func_np(x), func_num(x)) + + +@pytest.mark.parametrize("out_dt", (bool, int, float)) +@pytest.mark.parametrize("x", (np.inf,) + ARRAYS_INF) +@pytest.mark.parametrize("func_name", ("isposinf", "isneginf")) +def test_inf_out(func_name, x, out_dt): + res_shape = (4,) + res_np = np.empty(res_shape, dtype=out_dt) + res_num = num.empty(res_shape, dtype=out_dt) + + func_np = getattr(np, func_name) + func_num = getattr(num, func_name) + + func_np(x, out=res_np) + func_num(x, out=res_num) + assert np.array_equal(res_np, res_num) + + +class TestInfErrors: + @pytest.mark.parametrize("func_name", ("isposinf", "isneginf")) + def test_out_invalid_shape(self, func_name): + expected_exc = ValueError + x = [np.inf, -np.inf, np.nan, 0] + res_shape = (3,) + res_np = np.empty(res_shape) + res_num = num.empty(res_shape) + + func_np = getattr(np, func_name) + func_num = getattr(num, func_name) + + with pytest.raises(expected_exc): + func_np(x, out=res_np) + with pytest.raises(expected_exc): + func_num(x, out=res_num) + + +SCALARS = (pytest.param("a string", marks=pytest.mark.xfail), None, False) +ARRAYS = ( [1.0, 2.0, 3.0], [1.0 + 0j, 2.0 + 0j, 3.0 + 0j], [1.0 + 1j, 2.0 + 0j, 3.0 + 1j], ) -@pytest.mark.parametrize("arr", INPUTS) -def test_predicates(arr): - in_np = np.array(arr) - in_num = num.array(arr) +@pytest.mark.parametrize("x", SCALARS + ARRAYS) +def test_isreal(x): + # for x is 'a string', np.isreal is False, num.isreal is Array(True) + assert np.array_equal(np.isreal(x), num.isreal(x)) + + +@pytest.mark.parametrize("x", SCALARS + ARRAYS) +def test_iscomplex(x): + assert np.array_equal(np.iscomplex(x), num.iscomplex(x)) - assert np.array_equal(np.isreal(in_np), num.isreal(in_num)) - assert np.array_equal(np.iscomplex(in_np), num.iscomplex(in_num)) +@pytest.mark.parametrize("x", SCALARS) +def test_isrealobj_scalar(x): + assert np.array_equal(np.isrealobj(x), num.isrealobj(x)) -@pytest.mark.parametrize("arr", INPUTS) -def test_array_predicates(arr): - in_np = np.array(arr) - in_num = num.array(arr) + +@pytest.mark.parametrize("x", SCALARS) +def test_iscomplexobj_scalar(x): + assert np.array_equal(np.iscomplexobj(x), num.iscomplexobj(x)) + + +@pytest.mark.parametrize("x", ARRAYS) +def test_isrealobj_array(x): + in_np = np.array(x) + in_num = num.array(x) assert np.array_equal(np.isrealobj(in_np), num.isrealobj(in_num)) assert np.array_equal(np.isrealobj(in_np), num.isrealobj(in_np)) + + +@pytest.mark.parametrize("x", ARRAYS) +def test_iscomplexobj_array(x): + in_np = np.array(x) + in_num = num.array(x) + assert np.array_equal(np.iscomplexobj(in_np), num.iscomplexobj(in_num)) assert np.array_equal(np.iscomplexobj(in_np), num.iscomplexobj(in_np)) -def test_isscalar(): +@pytest.mark.parametrize("x", (1.0, True, [1, 2, 3])) +def test_isscalar(x): + assert np.isscalar(x) is num.isscalar(x) + + +def test_isscalar_array(): in_np = np.array([1, 2, 3]) in_num = num.array([1, 2, 3]) - - assert num.isscalar(1.0) - assert num.isscalar(True) - assert not num.isscalar(in_np) - assert not num.isscalar(in_num) + assert np.isscalar(in_np) is num.isscalar(in_num) is False # NumPy's scalar reduction returns a Python scalar - assert num.isscalar(np.sum(in_np)) + assert num.isscalar(np.sum(in_np)) is True # but cuNumeric's scalar reduction returns a 0-D array that behaves like # a deferred scalar - assert not num.isscalar(num.sum(in_np)) + assert num.isscalar(num.sum(in_np)) is False + + +SCALAR_PAIRS = ( + (0, -1e-8), + (1e10, 1.00001e10), + (1 + 1j, 1 + 1.00001j), + (0, -0.000001), + (1e10, 1.0001e10), + (1 + 1j, 1 + 1.0001j), +) -def test_isclose(): +@pytest.mark.xfail +@pytest.mark.parametrize( + ("a", "b"), + SCALAR_PAIRS, +) +def test_isclose_scalars(a, b): + # for all cases, + # In Numpy, it pass + # In cuNumeric, it raises IndexError: too many indices for array: + # array is 0-dimensional, but 1 were indexed + out_np = np.isclose(a, b) + out_num = num.isclose(a, b) + assert np.array_equal(out_np, out_num) + + +def test_isclose_arrays(): in1_np = np.random.rand(10) in2_np = in1_np + np.random.uniform(low=5e-09, high=2e-08, size=10) in1_num = num.array(in1_np) @@ -80,18 +166,112 @@ def test_isclose(): out_num = num.isclose(in1_num, in2_num) assert np.array_equal(out_np, out_num) - weird_values = [np.inf, -np.inf, np.nan, 0.0, -0.0] - weird_pairs = tuple(combinations_with_replacement(weird_values, 2)) - in1_np = np.array([x for x, _ in weird_pairs]) - in2_np = np.array([y for _, y in weird_pairs]) + +SHAPES_BROADCASTING = ( + (1, 3), + (2, 3), + (1, 2, 3), + (2, 2, 3), + (1,), + (1, 1), + (1, 2, 1), + (2, 2, 1), +) + + +@pytest.mark.parametrize( + "shape_b", SHAPES_BROADCASTING, ids=lambda shape_b: f"(shape_b={shape_b})" +) +def test_isclose_broadcast(shape_b): + len_in_arr = 20 + in1_np = np.random.rand(len_in_arr) + in2_np = in1_np + np.random.uniform(low=5e-09, high=2e-08, size=len_in_arr) + + shape_a = (3,) + size_a = np.prod(shape_a) + a_np = np.array([in1_np[i % len_in_arr] for i in range(size_a)]).reshape( + shape_a + ) + + size_b = np.prod(shape_b) + b_np = np.array([in2_np[i % len_in_arr] for i in range(size_b)]).reshape( + shape_b + ) + + a_num = num.array(a_np) + b_num = num.array(b_np) + out_np = np.isclose(a_np, b_np) + out_num = num.isclose(a_num, b_num) + assert np.array_equal(out_np, out_num) + + +EMPTY_ARRAY_PAIRS = ( + ([], []), + ([], [[]]), + ([[]], [[]]), +) + + +@pytest.mark.parametrize( + ("a", "b"), + EMPTY_ARRAY_PAIRS, +) +def test_isclose_empty_arrays(a, b): + out_np = np.isclose(a, b) + out_num = num.isclose(a, b) + assert np.array_equal(out_np, out_num) + + +@pytest.mark.parametrize( + ("rtol", "atol"), + ((1e-04, 1e-06), (1e-06, 1e-09)), +) +def test_isclose_arrays_rtol_atol(rtol, atol): + in1_np = np.random.rand(10) + in2_np = in1_np + np.random.uniform(low=5e-09, high=2e-08, size=10) in1_num = num.array(in1_np) in2_num = num.array(in2_np) - out_np = np.isclose(in1_np, in2_np) - out_num = num.isclose(in1_num, in2_num) + out_np = np.isclose(in1_np, in2_np, rtol=rtol, atol=atol) + out_num = num.isclose(in1_num, in2_num, rtol=rtol, atol=atol) assert np.array_equal(out_np, out_num) +@pytest.mark.parametrize( + "equal_nan", (False, pytest.param(True, marks=pytest.mark.xfail)) +) +def test_isclose_euqal_nan(equal_nan): + # If equal_nan is True, + # In Numpy, it pass + # In cuNumeric, it raises NotImplementedError + values = [np.inf, -np.inf, np.nan, 0.0, -0.0] + pairs = tuple(combinations_with_replacement(values, 2)) + in1_np = np.array([x for x, _ in pairs]) + in2_np = np.array([y for _, y in pairs]) + in1_num = num.array(in1_np) + in2_num = num.array(in2_np) + + out_np = np.isclose(in1_np, in2_np, equal_nan=equal_nan) + out_num = num.isclose(in1_num, in2_num, equal_nan=equal_nan) + assert np.array_equal(out_np, out_num) + + +class TestIsCloseErrors: + def test_arrays_invalid_shape(self): + expected_exc = ValueError + a_np = np.random.rand(6) + b_np = a_np + np.random.uniform(low=5e-09, high=2e-08, size=6) + in1_np = a_np.reshape((2, 3)) + in2_np = b_np.reshape((3, 2)) + in1_num = num.array(in1_np) + in2_num = num.array(in2_np) + + with pytest.raises(expected_exc): + np.isclose(in1_np, in2_np) + with pytest.raises(expected_exc): + num.isclose(in1_num, in2_num) + + if __name__ == "__main__": import sys diff --git a/tests/integration/test_logical.py b/tests/integration/test_logical.py index 7f9dd94c1..cb835ba75 100644 --- a/tests/integration/test_logical.py +++ b/tests/integration/test_logical.py @@ -31,52 +31,80 @@ [np.nan], ) +FUNCTIONS = ("all", "any") + +@pytest.mark.parametrize("keepdims", (False, True)) @pytest.mark.parametrize("input", INPUTS) -def test_any_and_all(input): +@pytest.mark.parametrize("func", FUNCTIONS) +def test_basic(func, input, keepdims): in_np = np.array(input) # cuNumeric doesn't support reductions for complex128 if in_np.dtype.kind == "c": in_np = in_np.astype("F") in_num = num.array(in_np) - for fn in ("any", "all"): - fn_np = getattr(np, fn) - fn_num = getattr(num, fn) - assert np.array_equal(fn_np(in_np), fn_num(in_num)) - for axis in range(in_num.ndim): - out_np = fn_np(in_np, axis=axis) - out_num = fn_num(in_num, axis=axis) - assert np.array_equal(out_np, out_num) + fn_np = getattr(np, func) + fn_num = getattr(num, func) + assert np.array_equal( + fn_np(in_np, keepdims=keepdims), fn_num(in_num, keepdims=keepdims) + ) + for axis in range(-in_num.ndim, in_num.ndim): + out_np = fn_np(in_np, axis=axis, keepdims=keepdims) + out_num = fn_num(in_num, axis=axis, keepdims=keepdims) + assert np.array_equal(out_np, out_num) + + +@pytest.mark.parametrize( + "axes", + ((0,), (1, 2), pytest.param((-1, 0), marks=pytest.mark.xfail), (-1, 0, 1)), + ids=lambda axes: f"(axes={axes})", +) +@pytest.mark.parametrize("func", FUNCTIONS) +def test_axis_tuple(func, axes): + # For axes=(-1, 0), + # in Numpy, it pass + # in cuNumeric, raises ValueError: + # Invalid promotion on dimension 2 for a 1-D store + input = [[[5, 10], [0, 100]]] + in_np = np.array(input) + in_num = num.array(in_np) + + fn_np = getattr(np, func) + fn_num = getattr(num, func) + out_np = fn_np(in_np, axis=axes) + out_num = fn_num(in_num, axis=axes) + assert np.array_equal(out_np, out_num) @pytest.mark.parametrize("ndim", range(LEGATE_MAX_DIM + 1)) -def test_nd_inputs(ndim): +@pytest.mark.parametrize("func", FUNCTIONS) +def test_nd_inputs(ndim, func): shape = (3,) * ndim in_np = np.random.random(shape) in_num = num.array(in_np) - for fn in ("any", "all"): - fn_np = getattr(np, fn) - fn_num = getattr(num, fn) - for axis in range(in_num.ndim): - out_np = fn_np(in_np, axis=axis) - out_num = fn_num(in_num, axis=axis) - assert np.array_equal(out_np, out_num) + fn_np = getattr(np, func) + fn_num = getattr(num, func) + for axis in range(in_num.ndim): + out_np = fn_np(in_np, axis=axis) + out_num = fn_num(in_num, axis=axis) + assert np.array_equal(out_np, out_num) - out_np = np.empty(out_np.shape, dtype="D") - out_num = num.empty(out_num.shape, dtype="D") - fn_np(in_np, axis=axis, out=out_np) - fn_num(in_num, axis=axis, out=out_num) - assert np.array_equal(out_np, out_num) + out_np = np.empty(out_np.shape, dtype="D") + out_num = num.empty(out_num.shape, dtype="D") + fn_np(in_np, axis=axis, out=out_np) + fn_num(in_num, axis=axis, out=out_num) + assert np.array_equal(out_np, out_num) - out_np = fn_np(in_np[1:], axis=axis) - out_num = fn_num(in_num[1:], axis=axis) - assert np.array_equal(out_np, out_num) + out_np = fn_np(in_np[1:], axis=axis) + out_num = fn_num(in_num[1:], axis=axis) + assert np.array_equal(out_np, out_num) @pytest.mark.skip def test_where(): + # "the `where` parameter is currently not supported" x = np.array([[True, True, False], [True, True, True]]) y = np.array([[True, False], [True, True]]) cy = num.array(y) @@ -90,6 +118,58 @@ def test_where(): ) +class TestAnyAllErrors: + def setup_method(self): + input = [[[5, 10], [0, 100]]] + self.in_np = np.array(input) + self.in_num = num.array(self.in_np) + + @pytest.mark.parametrize( + "axis", (-4, 3), ids=lambda axis: f"(axis={axis})" + ) + @pytest.mark.parametrize("func", FUNCTIONS) + def test_axis_out_of_bound(self, func, axis): + expected_exc = ValueError + fn_np = getattr(np, func) + fn_num = getattr(num, func) + + with pytest.raises(expected_exc): + fn_np(self.in_np, axis=axis) + with pytest.raises(expected_exc): + fn_num(self.in_num, axis=axis) + + @pytest.mark.parametrize( + "axes", ((1, 1), (-1, 2), (0, 3)), ids=lambda axes: f"(axes={axes})" + ) + @pytest.mark.parametrize("func", FUNCTIONS) + def test_invalid_axis_tuple(self, func, axes): + expected_exc = ValueError + fn_np = getattr(np, func) + fn_num = getattr(num, func) + + with pytest.raises(expected_exc): + fn_np(self.in_np, axis=axes) + with pytest.raises(expected_exc): + fn_num(self.in_num, axis=axes) + + @pytest.mark.parametrize( + ("axis", "out_shape"), ((None, (1,)), (1, (2,)), (1, (2, 2))) + ) + @pytest.mark.parametrize("func", FUNCTIONS) + def test_out_invalid_shape(self, func, axis, out_shape): + expected_exc = ValueError + func_np = getattr(np, func) + func_num = getattr(num, func) + + out_np = np.empty(out_shape) + out_num = num.empty(out_shape) + + with pytest.raises(expected_exc): + func_np(self.in_np, axis=axis, out=out_np) + with pytest.raises(expected_exc): + func_num(self.in_num, axis=axis, out=out_num) + + if __name__ == "__main__": import sys From e71e52f355e53d2dd228bce1c4a3aeca686d021d Mon Sep 17 00:00:00 2001 From: XiaLuNV <110973296+XiaLuNV@users.noreply.github.com> Date: Fri, 5 May 2023 13:50:01 +0800 Subject: [PATCH 051/106] add negative test case test_searchedsorted.py (#916) * add negative test case test_searchedsorted.py * add negative test case test_searchedsorted.py --------- Co-authored-by: xialu00 --- tests/integration/test_searchsorted.py | 126 ++++++++++++++++--------- 1 file changed, 84 insertions(+), 42 deletions(-) diff --git a/tests/integration/test_searchsorted.py b/tests/integration/test_searchsorted.py index b9b49bd45..5ba9e21eb 100644 --- a/tests/integration/test_searchsorted.py +++ b/tests/integration/test_searchsorted.py @@ -57,52 +57,94 @@ ] -@pytest.mark.xfail -def test_arr_none(): - expected_exc = AttributeError - with pytest.raises(expected_exc): - np.searchsorted(None, 10) - # Numpy raises ValueError: object of too small depth for desired array - with pytest.raises(expected_exc): - num.searchsorted(None, 10) - # cuNemeric raises AttributeError: 'NoneType' object - # has no attribute 'searchsorted' - - -@pytest.mark.xfail -def test_val_none(): - arr = [2, 3, 10, 9] - expected_exc = TypeError - with pytest.raises(expected_exc): - np.searchsorted(arr, None) - # Numpy raises TypeError: '<' not supported between - # instances of 'NoneType' and 'NoneType' - with pytest.raises(expected_exc): - num.searchsorted(arr, None) - # cuNumeric raises AssertionError - # if self.deferred is None: - # if self.parent is None: - # > assert self.runtime.is_supported_type(self.array.dtype) - # E AssertionError - # cunumeric/cunumeric/eager.py:to_deferred_array() - - -@pytest.mark.xfail -def test_side_invalid(): - arr = [2, 3, 10, 9] - expected_exc = ValueError - with pytest.raises(expected_exc): - np.searchsorted(arr, 10, "hi") - # Numpy raises ValueError: search side must be 'left' or 'right' - # (got 'hi') - with pytest.raises(expected_exc): - num.searchsorted(arr, 10, "hi") - # cuNumeric passed, and the result is the same as that of 'right'. +class TestSearchSortedErrors(object): + @pytest.mark.xfail + def test_arr_none(self): + expected_exc = AttributeError + with pytest.raises(expected_exc): + np.searchsorted(None, 10) + # Numpy raises ValueError: + # object of too small depth for desired array + with pytest.raises(expected_exc): + num.searchsorted(None, 10) + # cuNemeric raises AttributeError: 'NoneType' object + # has no attribute 'searchsorted' + + @pytest.mark.xfail + def test_val_none(self): + arr = [2, 3, 10, 9] + expected_exc = TypeError + with pytest.raises(expected_exc): + np.searchsorted(arr, None) + # Numpy raises TypeError: '<' not supported between + # instances of 'NoneType' and 'NoneType' + with pytest.raises(expected_exc): + num.searchsorted(arr, None) + # cuNumeric raises AssertionError + # if self.deferred is None: + # if self.parent is None: + # > assert self.runtime.is_supported_type + # (self.array.dtype) + # E AssertionError + # cunumeric/cunumeric/eager.py:to_deferred_array() + + @pytest.mark.xfail + def test_side_invalid(self): + arr = [2, 3, 10, 9] + expected_exc = ValueError + with pytest.raises(expected_exc): + np.searchsorted(arr, 10, "hi") + # Numpy raises ValueError: search side must be 'left' or 'right' + # (got 'hi') + with pytest.raises(expected_exc): + num.searchsorted(arr, 10, "hi") + # cuNumeric passed, and the result is the same as that of 'right'. + + def test_ndim_mismatch(self): + a = np.random.random((5, 5, 5)) + expected_exc = ValueError + with pytest.raises(expected_exc): + num.searchsorted(a, 5) + with pytest.raises(expected_exc): + np.searchsorted(a, 5) + + @pytest.mark.xfail + def test_sorter_ndim_mismatch(self): + a = np.random.randint(-100, 100, size=100) + v = np.random.randint(-100, 100, size=10) + a_argsorted = np.random.random((5, 5, 5)) + expected_exc = ValueError + with pytest.raises(expected_exc): + num.searchsorted(a, v, sorter=a_argsorted) + with pytest.raises(expected_exc): + # Numpy raises TypeError + np.searchsorted(a, v, sorter=a_argsorted) + + def test_sorter_shape_mismatch(self): + a = np.random.randint(-100, 100, size=100) + v = np.random.randint(-100, 100, size=10) + a_argsorted = np.random.randint(-100, 100, size=10) + expected_exc = ValueError + with pytest.raises(expected_exc): + num.searchsorted(a, v, sorter=a_argsorted) + with pytest.raises(expected_exc): + np.searchsorted(a, v, sorter=a_argsorted) + + @pytest.mark.xfail + def test_sorter_dtype_mismatch(self): + a = np.random.randint(-100, 100, size=100) + v = np.random.randint(-100, 100, size=10) + a_argsorted = np.random.random(size=100) + expected_exc = ValueError + with pytest.raises(expected_exc): + num.searchsorted(a, v, sorter=a_argsorted) + with pytest.raises(expected_exc): + # Numpy raises TypeError + np.searchsorted(a, v, sorter=a_argsorted) def generate_random(volume, datatype): a_np = None - if np.issubdtype(datatype, np.integer): a_np = np.array( np.random.randint( From 2e552efceb4b71703a0f7a06e8612c9bcaf7358b Mon Sep 17 00:00:00 2001 From: Bryan Van de Ven Date: Fri, 5 May 2023 09:57:40 -0700 Subject: [PATCH 052/106] Update for Context-less annotations (#914) --- cunumeric/coverage.py | 4 ++-- cunumeric/deferred.py | 12 ++++-------- 2 files changed, 6 insertions(+), 10 deletions(-) diff --git a/cunumeric/coverage.py b/cunumeric/coverage.py index cc6730f9e..9cd246ca9 100644 --- a/cunumeric/coverage.py +++ b/cunumeric/coverage.py @@ -99,7 +99,7 @@ def implemented( if reporting: @wraps(func) - @track_provenance(runtime.legate_context) + @track_provenance() def wrapper(*args: Any, **kwargs: Any) -> Any: location = find_last_user_frames( not settings.report_dump_callstack() @@ -114,7 +114,7 @@ def wrapper(*args: Any, **kwargs: Any) -> Any: else: @wraps(func) - @track_provenance(runtime.legate_context) + @track_provenance() def wrapper(*args: Any, **kwargs: Any) -> Any: return func(*args, **kwargs) diff --git a/cunumeric/deferred.py b/cunumeric/deferred.py index c8527d4d3..acb93ee8b 100644 --- a/cunumeric/deferred.py +++ b/cunumeric/deferred.py @@ -3084,7 +3084,7 @@ def unary_op( lhs = self.base rhs = src._broadcast(lhs.shape) - with Annotation(self.context, {"OpCode": op.name}): + with Annotation({"OpCode": op.name}): task = self.context.create_auto_task(CuNumericOpCode.UNARY_OP) task.add_output(lhs) task.add_input(rhs) @@ -3146,9 +3146,7 @@ def unary_reduction( while lhs.ndim > 1: lhs = lhs.project(0, 0) - with Annotation( - self.context, {"OpCode": op.name, "ArgRed?": str(argred)} - ): + with Annotation({"OpCode": op.name, "ArgRed?": str(argred)}): task = self.context.create_auto_task( CuNumericOpCode.SCALAR_UNARY_RED ) @@ -3189,9 +3187,7 @@ def unary_reduction( "Need support for reducing multiple dimensions" ) - with Annotation( - self.context, {"OpCode": op.name, "ArgRed?": str(argred)} - ): + with Annotation({"OpCode": op.name, "ArgRed?": str(argred)}): task = self.context.create_auto_task(CuNumericOpCode.UNARY_RED) task.add_input(rhs_array.base) @@ -3237,7 +3233,7 @@ def binary_op( rhs1 = src1._broadcast(lhs.shape) rhs2 = src2._broadcast(lhs.shape) - with Annotation(self.context, {"OpCode": op_code.name}): + with Annotation({"OpCode": op_code.name}): # Populate the Legate launcher task = self.context.create_auto_task(CuNumericOpCode.BINARY_OP) task.add_output(lhs) From 9d9b226267626d38c8d4d40c9ae47b663ca30b5d Mon Sep 17 00:00:00 2001 From: Wonchan Lee Date: Sun, 7 May 2023 22:01:48 -0700 Subject: [PATCH 053/106] Fix unit tests (#920) * Bump up the commit hash * Fix unit tests * Bump up the commit hash again * Wasn't the right commit hash * Propagate the fix for overzealous assertions in Legion * Bump up again the core commit hash --- cmake/versions.json | 2 +- cunumeric/utils.py | 2 +- tests/unit/cunumeric/test_config.py | 4 ---- tests/unit/cunumeric/test_utils.py | 33 +++++++---------------------- 4 files changed, 10 insertions(+), 31 deletions(-) diff --git a/cmake/versions.json b/cmake/versions.json index 8c2fe2665..36e73765c 100644 --- a/cmake/versions.json +++ b/cmake/versions.json @@ -5,7 +5,7 @@ "git_url" : "https://github.com/nv-legate/legate.core.git", "git_shallow": false, "always_download": false, - "git_tag" : "8ac807575c9770adae73ac623e32c34eac9c36c9" + "git_tag" : "e63d55a4777f7f47ba15c9a1d1b63eb7140f4a32" } } } diff --git a/cunumeric/utils.py b/cunumeric/utils.py index dc40ea190..0586bb8f3 100644 --- a/cunumeric/utils.py +++ b/cunumeric/utils.py @@ -44,7 +44,7 @@ def to_core_dtype(dtype: Union[str, np.dtype[Any]]) -> Optional[ty.Dtype]: - return SUPPORTED_DTYPES.get(np.dtype(dtype), None) + return SUPPORTED_DTYPES.get(np.dtype(dtype)) def is_advanced_indexing(key: Any) -> bool: diff --git a/tests/unit/cunumeric/test_config.py b/tests/unit/cunumeric/test_config.py index d3d30707c..bbaa8fbc8 100644 --- a/tests/unit/cunumeric/test_config.py +++ b/tests/unit/cunumeric/test_config.py @@ -224,10 +224,6 @@ def test_RandGenCode() -> None: assert (set(m.RandGenCode.__members__)) == {"UNIFORM", "NORMAL", "INTEGER"} -def test_CuNumericRedopCode() -> None: - assert (set(m.CuNumericRedopCode.__members__)) == {"ARGMIN", "ARGMAX"} - - def test_CuNumericTunable() -> None: assert (set(m.CuNumericTunable.__members__)) == { "NUM_GPUS", diff --git a/tests/unit/cunumeric/test_utils.py b/tests/unit/cunumeric/test_utils.py index 3f6d77639..12e6d0265 100644 --- a/tests/unit/cunumeric/test_utils.py +++ b/tests/unit/cunumeric/test_utils.py @@ -23,12 +23,10 @@ EXPECTED_SUPPORTED_DTYPES = set( [ - bool, np.bool_, np.int8, np.int16, np.int32, - int, np.int64, np.uint8, np.uint16, @@ -36,7 +34,6 @@ np.uint64, np.float16, np.float32, - float, np.float64, np.complex64, np.complex128, @@ -116,25 +113,25 @@ def test_top_only_False(self) -> None: def test__SUPPORTED_DTYPES(): - assert set(m.SUPPORTED_DTYPES.keys()) == EXPECTED_SUPPORTED_DTYPES + assert set(m.SUPPORTED_DTYPES.keys()) == set( + np.dtype(ty) for ty in EXPECTED_SUPPORTED_DTYPES + ) class Test_is_supported_dtype: - @pytest.mark.parametrize( - "value", ["foo", 10, 10.2, [], (), {}, set(), None] - ) + @pytest.mark.parametrize("value", ["foo", 10, 10.2, (), set()]) def test_type_bad(self, value) -> None: with pytest.raises(TypeError): - m.is_supported_dtype(value) + m.to_core_dtype(value) @pytest.mark.parametrize("value", EXPECTED_SUPPORTED_DTYPES) def test_supported(self, value) -> None: - assert m.is_supported_dtype(np.dtype(value)) + assert m.to_core_dtype(value) is not None # This is just a representative sample, not exhasutive - @pytest.mark.parametrize("value", [np.float128, np.datetime64]) + @pytest.mark.parametrize("value", [np.float128, np.datetime64, [], {}]) def test_unsupported(self, value) -> None: - assert not m.is_supported_dtype(np.dtype(value)) + assert m.to_core_dtype(value) is None @pytest.mark.parametrize( @@ -144,20 +141,6 @@ def test_calculate_volume(shape, volume) -> None: assert m.calculate_volume(shape) == volume -def test_get_arg_dtype() -> None: - dt = m.get_arg_dtype(np.float32) - assert dt.type is np.void - assert dt.isalignedstruct - assert set(dt.fields) == {"arg", "arg_value"} - assert dt.fields["arg"][0] == np.dtype(np.int64) - assert dt.fields["arg_value"][0] == np.dtype(np.float32) - - -def test_get_arg_value_dtype() -> None: - dt = m.get_arg_dtype(np.float32) - assert m.get_arg_value_dtype(dt) is np.float32 - - def _dot_modes_oracle(a_ndim: int, b_ndim: int) -> bool: a_modes, b_modes, out_modes = m.dot_modes(a_ndim, b_ndim) expr = f"{''.join(a_modes)},{''.join(b_modes)}->{''.join(out_modes)}" From a1f2b41d167b3ed71d60dc6eccb1de41ff5a6861 Mon Sep 17 00:00:00 2001 From: Robin Wang <104830875+robinwnv@users.noreply.github.com> Date: Thu, 11 May 2023 09:10:56 +0800 Subject: [PATCH 054/106] Enhance test_complex_ops.py and test_partition.py (#926) * Enhance test_complex_ops.py and test_partition.py * Address comments. --- tests/integration/test_complex_ops.py | 77 +++++++++++++++++++++----- tests/integration/test_partition.py | 79 ++++++++++++++++++++++----- 2 files changed, 130 insertions(+), 26 deletions(-) diff --git a/tests/integration/test_complex_ops.py b/tests/integration/test_complex_ops.py index 274bccb76..1653704dc 100644 --- a/tests/integration/test_complex_ops.py +++ b/tests/integration/test_complex_ops.py @@ -18,27 +18,78 @@ import cunumeric as num -DTYPES = [np.complex64, np.complex128] +ARRAYS = ( + [1, 2, 3], + [4j, 5j, 6j], + [3 + 6j], + [[1 + 4j, 2 + 5j, 3 + 6j]], + [], +) -@pytest.mark.parametrize("dtype", DTYPES) -def test_array(dtype): - x_np = np.array([1 + 4j, 2 + 5j, 3 + 6j], dtype) +def strict_type_equal_array(a, b): + return np.array_equal(a, b) and a.dtype == b.dtype + + +@pytest.mark.parametrize("dtype", (np.complex64, np.complex128)) +@pytest.mark.parametrize("arr", ARRAYS) +def test_complex_array(arr, dtype): + # If val has complex elements, the returned type is float. + x_np = np.array(arr, dtype) x_num = num.array(x_np) - assert num.array_equal(x_np.conj(), x_num.conj()) - assert num.array_equal(x_np.real, x_num.real) - assert num.array_equal(x_np.imag, x_num.imag) + assert strict_type_equal_array(np.real(x_np), num.real(x_num)) + assert strict_type_equal_array(np.imag(x_np), num.imag(x_num)) + assert strict_type_equal_array(x_np.conj(), x_num.conj()) + assert strict_type_equal_array(x_np.real, x_num.real) + assert strict_type_equal_array(x_np.imag, x_num.imag) -@pytest.mark.parametrize("dtype", DTYPES) -def test_single(dtype): - x_np = np.array([3 + 6j], dtype) + +@pytest.mark.parametrize("dtype", (np.int32, np.float64)) +def test_non_complex_array(dtype): + # If val is real, the type of val is used for the output. + arr = [1, 2, 3] + x_np = np.array(arr, dtype) x_num = num.array(x_np) - assert num.array_equal(x_np.conj(), x_num.conj()) - assert num.array_equal(x_np.real, x_num.real) - assert num.array_equal(x_np.imag, x_num.imag) + assert strict_type_equal_array(np.real(x_np), num.real(x_num)) + assert strict_type_equal_array(np.imag(x_np), num.imag(x_num)) + + assert strict_type_equal_array(x_np.conj(), x_num.conj()) + assert strict_type_equal_array(x_np.real, x_num.real) + assert strict_type_equal_array(x_np.imag, x_num.imag) + + +SCALARS = (1, 0.0, 1 + 1j, 1.1 + 1j, 0j) + + +@pytest.mark.diff +@pytest.mark.parametrize("val", SCALARS) +def test_scalar(val): + # e.g., np.array_equal(1.1, array(1.1)) + # In numpy, it returns val as a scalar + # In cunumeric, it returns a 0-dim array(val) + assert np.array_equal(np.real(val), num.real(val)) + assert np.array_equal(np.imag(val), num.imag(val)) + + +@pytest.mark.xfail +@pytest.mark.parametrize("imag_val", ([10, 11, 12], 12)) +@pytest.mark.parametrize("real_val", ([7, 8, 9], 9)) +def test_assignment(real_val, imag_val): + # In numpy, x_np.real = real_val pass + # In cunumeric, it rasies AttributeError: can't set attribute + arr = [1 + 4j, 2 + 5j, 3 + 6j] + x_np = np.array(arr) + x_num = num.array(x_np) + + x_np.real = real_val + x_np.imag = imag_val + x_num.real = real_val + x_num.imag = imag_val + + assert np.array_equal(x_np, x_num) if __name__ == "__main__": diff --git a/tests/integration/test_partition.py b/tests/integration/test_partition.py index a93c27329..3ccab94db 100644 --- a/tests/integration/test_partition.py +++ b/tests/integration/test_partition.py @@ -23,9 +23,7 @@ def assert_partition(a_num, kth, axis): # compute volume shape = a_num.shape - volume = 1 - for i in range(a_num.ndim): - volume *= shape[i] + volume = np.prod(shape) # move axis to end and flatten other sort_dim = shape[axis] @@ -75,19 +73,17 @@ def check_api(a=None): a_num = num.array(a) shape = a.shape - volume = 1 - for i in range(a.ndim): - volume *= shape[i] + volume = np.prod(shape) # partition axes - for i in range(a.ndim): + for i in range(-a.ndim, a.ndim): kth = shape[i] // 2 print(f"partition axis {i}") assert_partition(num.partition(a_num, kth=kth, axis=i), kth, i) # flatten print("partition flattened") - kth = kth = volume // 2 + kth = volume // 2 assert_partition(num.partition(a_num, kth=kth, axis=None), kth, 0) # in-place partition @@ -97,7 +93,7 @@ def check_api(a=None): assert_partition(copy_a_num, kth, a.ndim - 1) # argpartition - for i in range(a.ndim): + for i in range(-a.ndim, a.ndim): kth = shape[i] // 2 print(f"argpartition axis {i}") assert_argpartition( @@ -131,10 +127,7 @@ def check_api(a=None): def generate_random(shape, datatype): print(f"Generate random for {datatype}") - a_np = None - volume = 1 - for i in shape: - volume *= i + volume = np.prod(shape) if np.issubdtype(datatype, np.integer): a_np = np.array( @@ -170,6 +163,66 @@ def test_dtypes(shape, dtype): check_api(generate_random(shape, dtype)) +class TestPartitionErrors: + def setup_method(self): + shape = (3, 4, 5) + volume = np.prod(shape) + self.a_np = np.array(np.random.random(size=volume)).reshape(shape) + self.a_num = num.array(self.a_np) + + @pytest.mark.parametrize("axis", (-4, 3)) + def test_axis_out_of_bound(self, axis): + expected_exc = ValueError + kth = 1 + with pytest.raises(expected_exc): + np.partition(self.a_np, kth=kth, axis=axis) + with pytest.raises(expected_exc): + num.partition(self.a_num, kth=kth, axis=axis) + + @pytest.mark.xfail + @pytest.mark.parametrize("kth", (-4, 3, (-4, 0), (0, 3), (3, 3))) + def test_kth_out_of_bound(self, kth): + # For all cases, + # In numpy, it raises ValueError + # In cunumeric, it pass + expected_exc = ValueError + axis = 0 + with pytest.raises(expected_exc): + np.partition(self.a_np, kth=kth, axis=axis) + with pytest.raises(expected_exc): + num.partition(self.a_num, kth=kth, axis=axis) + + +class TestArgPartitionErrors: + def setup_method(self): + shape = (3, 4, 5) + volume = np.prod(shape) + self.a_np = np.array(np.random.random(size=volume)).reshape(shape) + self.a_num = num.array(self.a_np) + + @pytest.mark.parametrize("axis", (-4, 3)) + def test_axis_out_of_bound(self, axis): + expected_exc = ValueError + kth = 1 + with pytest.raises(expected_exc): + np.argpartition(self.a_np, kth=kth, axis=axis) + with pytest.raises(expected_exc): + num.argpartition(self.a_num, kth=kth, axis=axis) + + @pytest.mark.xfail + @pytest.mark.parametrize("kth", (-4, 3, (-4, 0), (0, 3), (3, 3))) + def test_kth_out_of_bound(self, kth): + # For all cases, + # In numpy, it raises ValueError + # In cunumeric, it pass + expected_exc = ValueError + axis = 0 + with pytest.raises(expected_exc): + np.argpartition(self.a_np, kth=kth, axis=axis) + with pytest.raises(expected_exc): + num.argpartition(self.a_num, kth=kth, axis=axis) + + if __name__ == "__main__": import sys From c2a4937a7476c91f12dd32d8b0c8f1c68d6bd777 Mon Sep 17 00:00:00 2001 From: yimoj <130720840+yimoj@users.noreply.github.com> Date: Thu, 11 May 2023 10:04:21 +0800 Subject: [PATCH 055/106] Improve cunumeric.clip code coverage (#927) --- tests/integration/test_clip.py | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/tests/integration/test_clip.py b/tests/integration/test_clip.py index d0005b0eb..b9720b7db 100644 --- a/tests/integration/test_clip.py +++ b/tests/integration/test_clip.py @@ -140,6 +140,19 @@ def test_out_ndim(): assert np.array_equal(out_np, out_num) +def test_out_np_array(): + array = ((2, 3, 4), (3, 4, 5), (6, 6, 12)) + amin = (2, 3, 1) + amax = 6 + np_arr = np.array(array) + num_arr = num.array(array) + out_np = np.empty(np_arr.shape) + out_num = np.empty(np_arr.shape) + np_arr.clip(min=amin, max=amax, out=out_np) + num_arr.clip(min=amin, max=amax, out=out_num) + assert np.array_equal(out_np, out_num) + + @pytest.mark.parametrize("ndim", range(1, LEGATE_MAX_DIM + 1)) def test_basic(ndim): shape = (5,) * ndim @@ -172,6 +185,19 @@ def test_out(ndim): assert np.array_equal(out_np, out_num) +def test_out_with_array_amin(): + array = ((2, 3, 4), (3, 4, 5), (6, 6, 12)) + amin = (2, 3, 1) + amax = 6 + np_arr = np.array(array) + num_arr = num.array(array) + out_np = np.empty(np_arr.shape) + out_num = num.empty(np_arr.shape) + np.clip(np_arr, a_min=amin, a_max=amax, out=out_np) + num.clip(num_arr, a_min=amin, a_max=amax, out=out_num) + assert np.array_equal(out_np, out_num) + + if __name__ == "__main__": import sys From f2ec4b8a18b30a2850e03c9d3264a18440be63d2 Mon Sep 17 00:00:00 2001 From: yimoj <130720840+yimoj@users.noreply.github.com> Date: Fri, 12 May 2023 14:34:18 +0800 Subject: [PATCH 056/106] Improve cunumeric.diagonal code coverage (#918) --- tests/integration/test_index_routines.py | 41 ++++++++++++++++++++++++ 1 file changed, 41 insertions(+) diff --git a/tests/integration/test_index_routines.py b/tests/integration/test_index_routines.py index b8b5c3dde..9925ed1fc 100644 --- a/tests/integration/test_index_routines.py +++ b/tests/integration/test_index_routines.py @@ -346,6 +346,19 @@ def test_diagonal_empty_array(shape): assert np.array_equal(b, bn) +@pytest.mark.xfail(reason="cuNumeric does not take single axis") +def test_diagonal_axis1(): + shape = (3, 1, 2) + a = mk_seq_array(num, shape) + an = mk_seq_array(np, shape) + + # cuNumeric hits AssertionError in _diag_helper: assert axes is not None + b = num.diagonal(a, axis1=2) + # NumPy passes + bn = np.diagonal(an, axis1=2) + assert np.array_equal(b, bn) + + class TestDiagonalErrors: def setup_method(self): shape = (3, 4, 5) @@ -412,6 +425,34 @@ def test_axes_none(self): with pytest.raises(TypeError): num.diagonal(self.a, 0, None, 0) + @pytest.mark.diff + def test_scalar_axes(self): + # NumPy does not have axes arg + with pytest.raises(ValueError): + num.diagonal(self.a, axes=(0,)) + + @pytest.mark.diff + def test_duplicate_axes(self): + # NumPy does not have axes arg + expected_exc = ValueError + with pytest.raises(expected_exc): + num.diagonal(self.a, axis1=1, axes=(0, 1)) + with pytest.raises(expected_exc): + num.diagonal(self.a, axis1=1, axis2=0, axes=(0, 1)) + + @pytest.mark.diff + def test_extra_axes(self): + # NumPy does not have axes arg + axes = num.arange(self.a.ndim + 1, dtype=int) + with pytest.raises(ValueError): + num.diagonal(self.a, axes=axes) + + @pytest.mark.diff + def test_n_axes_offset(self): + # NumPy does not have axes arg + with pytest.raises(ValueError): + num.diagonal(self.a, offset=1, axes=(2, 1, 0)) + @pytest.mark.parametrize( "k", (pytest.param(0.0, marks=pytest.mark.xfail), -1.5, 1.5), From 2baa32b2072b5dbe63b7f51cdfa1c48691bfcd37 Mon Sep 17 00:00:00 2001 From: Marcin Zalewski Date: Fri, 12 May 2023 01:26:27 -0700 Subject: [PATCH 057/106] Bump up legate (#929) --- cmake/versions.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cmake/versions.json b/cmake/versions.json index 36e73765c..be172a050 100644 --- a/cmake/versions.json +++ b/cmake/versions.json @@ -5,7 +5,7 @@ "git_url" : "https://github.com/nv-legate/legate.core.git", "git_shallow": false, "always_download": false, - "git_tag" : "e63d55a4777f7f47ba15c9a1d1b63eb7140f4a32" + "git_tag" : "2ce7c60" } } } From 6b7e8553766ff9c02e7fe3f3ae11f486071f8f5e Mon Sep 17 00:00:00 2001 From: Marcin Zalewski Date: Mon, 15 May 2023 15:00:12 -0700 Subject: [PATCH 058/106] Fix an incorrect type (#931) Fix mypy failures. --- cunumeric/_sphinxext/_cunumeric_directive.py | 2 +- cunumeric/_sphinxext/comparison_table.py | 2 +- cunumeric/_sphinxext/implemented_index.py | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/cunumeric/_sphinxext/_cunumeric_directive.py b/cunumeric/_sphinxext/_cunumeric_directive.py index c2a603f5e..ef6402f6c 100644 --- a/cunumeric/_sphinxext/_cunumeric_directive.py +++ b/cunumeric/_sphinxext/_cunumeric_directive.py @@ -21,7 +21,7 @@ class CunumericDirective(SphinxDirective): - def parse(self, rst_text: str, annotation: str) -> nodes.Node: + def parse(self, rst_text: str, annotation: str) -> list[nodes.Node]: result = ViewList() for line in rst_text.split("\n"): result.append(line, annotation) diff --git a/cunumeric/_sphinxext/comparison_table.py b/cunumeric/_sphinxext/comparison_table.py index 6737e6421..f37a14229 100644 --- a/cunumeric/_sphinxext/comparison_table.py +++ b/cunumeric/_sphinxext/comparison_table.py @@ -37,7 +37,7 @@ class ComparisonTable(CunumericDirective): "sections": lambda x: choice(x, ("numpy", "grouped")), } - def run(self) -> nodes.Node: + def run(self) -> list[nodes.Node]: if self.options.get("sections", "numpy") == "numpy": section_configs = NUMPY_CONFIGS else: diff --git a/cunumeric/_sphinxext/implemented_index.py b/cunumeric/_sphinxext/implemented_index.py index fa16982f4..3d70f763b 100644 --- a/cunumeric/_sphinxext/implemented_index.py +++ b/cunumeric/_sphinxext/implemented_index.py @@ -50,7 +50,7 @@ class ImplementedIndex(CunumericDirective): required_arguments = 0 optional_arguments = 0 - def run(self) -> nodes.Node: + def run(self) -> list[nodes.Node]: refs: list[str] = [] for ns in namespaces: refs += [ From 24fdbda9524216a730fb3052d9d10b88d73f6dca Mon Sep 17 00:00:00 2001 From: Manolis Papadakis Date: Tue, 16 May 2023 16:52:06 -0700 Subject: [PATCH 059/106] Fix message on NaN errors in benchmarks (#923) --- examples/jacobi.py | 7 ++++--- examples/logreg.py | 7 ++++--- 2 files changed, 8 insertions(+), 6 deletions(-) diff --git a/examples/jacobi.py b/examples/jacobi.py index 6b9e46968..de3d888f5 100644 --- a/examples/jacobi.py +++ b/examples/jacobi.py @@ -55,9 +55,10 @@ def run_jacobi(N, iters, warmup, perform_check, timing, verbose): if perform_check: assert check(A, x, b) else: - assert not math.isnan( - np.sum(x) - ), f"{np.count_nonzero(~np.isnan(x))} NaNs in x" + assert not math.isnan(np.sum(x)), ( + f"{np.count_nonzero(np.isnan(x))} NaNs, " + f"{np.count_nonzero(np.isinf(x))} infs in x" + ) if timing: print(f"Elapsed Time: {total} ms") diff --git a/examples/logreg.py b/examples/logreg.py index d502e35f3..78d243ae9 100644 --- a/examples/logreg.py +++ b/examples/logreg.py @@ -70,9 +70,10 @@ def run_logistic_regression(N, F, T, I, warmup, S, B): # noqa: E741 ) total = timer.stop() - assert not math.isnan( - np.sum(weights) - ), f"{np.count_nonzero(~np.isnan(weights))} NaNs in weights" + assert not math.isnan(np.sum(weights)), ( + f"{np.count_nonzero(np.isnan(weights))} NaNs, " + f"{np.count_nonzero(np.isinf(weights))} infs in weights" + ) print(f"Elapsed Time: {total} ms") return total From e985c8da9565afde9beecaafef06217dd8c9fd93 Mon Sep 17 00:00:00 2001 From: Manolis Papadakis Date: Wed, 17 May 2023 09:47:40 -0700 Subject: [PATCH 060/106] Bump legate.core version, for latest Legion bugfix (#932) --- cmake/versions.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cmake/versions.json b/cmake/versions.json index be172a050..fa48abc62 100644 --- a/cmake/versions.json +++ b/cmake/versions.json @@ -5,7 +5,7 @@ "git_url" : "https://github.com/nv-legate/legate.core.git", "git_shallow": false, "always_download": false, - "git_tag" : "2ce7c60" + "git_tag" : "0405b0f9b9a2f09ea4aeb3a795d19481e45026c0" } } } From d0a57578a87a5499949c3a4d65f554510eda0a20 Mon Sep 17 00:00:00 2001 From: Manolis Papadakis Date: Thu, 18 May 2023 20:18:32 -0700 Subject: [PATCH 061/106] Bump legate.core commit (#935) --- cmake/versions.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cmake/versions.json b/cmake/versions.json index fa48abc62..f2897758c 100644 --- a/cmake/versions.json +++ b/cmake/versions.json @@ -5,7 +5,7 @@ "git_url" : "https://github.com/nv-legate/legate.core.git", "git_shallow": false, "always_download": false, - "git_tag" : "0405b0f9b9a2f09ea4aeb3a795d19481e45026c0" + "git_tag" : "88b3eaec431e70c228d362e6cc8170bae0273f9f" } } } From 00709fe989c51eb42e32830bd6d0f8b7d02f75cb Mon Sep 17 00:00:00 2001 From: Bryan Van de Ven Date: Fri, 19 May 2023 13:10:30 -0700 Subject: [PATCH 062/106] Unify LEGATE_TEST and CUNUMERIC_TEST (#922) * Unify LEGATE_TEST and CUNUMERIC_TEST * update to use expose force_thunk * add help copy --- cunumeric/linalg/cholesky.py | 2 +- cunumeric/runtime.py | 25 ++++++++++++++++------ cunumeric/settings.py | 30 ++++++++++++++++----------- tests/unit/cunumeric/test_settings.py | 6 +----- 4 files changed, 39 insertions(+), 24 deletions(-) diff --git a/cunumeric/linalg/cholesky.py b/cunumeric/linalg/cholesky.py index db5a275a4..08af14e60 100644 --- a/cunumeric/linalg/cholesky.py +++ b/cunumeric/linalg/cholesky.py @@ -18,9 +18,9 @@ from legate.core import Rect, types as ty from legate.core.shape import Shape +from legate.settings import settings from cunumeric.config import CuNumericOpCode -from cunumeric.settings import settings from .exception import LinAlgError diff --git a/cunumeric/runtime.py b/cunumeric/runtime.py index 5c3c0ebd1..633ea4069 100644 --- a/cunumeric/runtime.py +++ b/cunumeric/runtime.py @@ -23,6 +23,7 @@ import numpy as np from legate.core import LEGATE_MAX_DIM, ProcessorKind, Rect, get_legate_runtime from legate.core.context import Context as LegateContext +from legate.settings import settings as legate_settings from typing_extensions import TypeGuard from .config import ( @@ -74,7 +75,7 @@ def __init__(self, legate_context: LegateContext) -> None: self.cunumeric_lib = cunumeric_lib.shared_object self.has_curand = cunumeric_lib.shared_object.cunumeric_has_curand() - settings.warn = settings.warn() or settings.test() + settings.warn = settings.warn() or legate_settings.test() if self.num_gpus > 0 and settings.preload_cudalibs(): self._load_cudalibs() @@ -479,17 +480,29 @@ def create_unbound_thunk( def is_eager_shape(self, shape: NdShape) -> bool: volume = calculate_volume(shape) - # Newly created empty arrays are ALWAYS eager + + # Special cases that must always be eager: + + # Newly created empty arrays if volume == 0: return True - # If we're testing then the answer is always no - if settings.test(): - return False + + # Arrays with more dimensions than what Legion was compiled for if len(shape) > LEGATE_MAX_DIM: return True + + # CUNUMERIC_FORCE_THUNK == "eager" + if settings.force_thunk() == "eager": + return True + + if settings.force_thunk() == "deferred": + return False + + # no forcing; auto mode if len(shape) == 0: return self.max_eager_volume > 0 - # See if the volume is large enough + + # Otherwise, see if the volume is large enough return volume <= self.max_eager_volume @staticmethod diff --git a/cunumeric/settings.py b/cunumeric/settings.py index f2de75699..83f436646 100644 --- a/cunumeric/settings.py +++ b/cunumeric/settings.py @@ -26,18 +26,6 @@ class CunumericRuntimeSettings(Settings): - test: PrioritizedSetting[bool] = PrioritizedSetting( - "test", - "CUNUMERIC_TEST", - default=False, - convert=convert_bool, - help=""" - Enable test mode. In test mode, all cuNumeric ndarrays are managed by - the distributed runtime and the NumPy fallback for small arrays is - turned off. - """, - ) - preload_cudalibs: PrioritizedSetting[bool] = PrioritizedSetting( "preload_cudalibs", "CUNUMERIC_PRELOAD_CUDALIBS", @@ -151,5 +139,23 @@ class CunumericRuntimeSettings(Settings): """, ) + force_thunk: EnvOnlySetting[str | None] = EnvOnlySetting( + "force_thunk", + "CUNUMERIC_FORCE_THUNK", + default=None, + test_default="deferred", + help=""" + Force cuNumeric to always use a specific strategy for backing + ndarrays: "deferred", i.e. managed by the Legate runtime, which + enables distribution and accelerated operations, but has some + up-front offloading overhead, or "eager", i.e. falling back to + using a vanilla NumPy array. By default cuNumeric will decide + this on a per-array basis, based on the size of the array and + the accelerator in use. + + This is a read-only environment variable setting used by the runtime. + """, + ) + settings = CunumericRuntimeSettings() diff --git a/tests/unit/cunumeric/test_settings.py b/tests/unit/cunumeric/test_settings.py index 4b858cc80..c6f32f423 100644 --- a/tests/unit/cunumeric/test_settings.py +++ b/tests/unit/cunumeric/test_settings.py @@ -24,7 +24,6 @@ import cunumeric.settings as m _expected_settings = ( - "test", "preload_cudalibs", "warn", "report_coverage", @@ -34,6 +33,7 @@ "min_gpu_chunk", "min_cpu_chunk", "min_omp_chunk", + "force_thunk", ) _settings_with_test_defaults = ( @@ -62,7 +62,6 @@ def test_prefix(self, name: str) -> None: assert ps.env_var.startswith("CUNUMERIC_") def test_types(self) -> None: - assert m.settings.test.convert_type == 'bool ("0" or "1")' assert m.settings.preload_cudalibs.convert_type == 'bool ("0" or "1")' assert m.settings.warn.convert_type == 'bool ("0" or "1")' assert m.settings.report_coverage.convert_type == 'bool ("0" or "1")' @@ -74,9 +73,6 @@ def test_types(self) -> None: class TestDefaults: - def test_test(self) -> None: - assert m.settings.test.default is False - def test_preload_cudalibs(self) -> None: assert m.settings.preload_cudalibs.default is False From a4a2c61885d0bf2bf52132d5451fbb982f1943be Mon Sep 17 00:00:00 2001 From: Manolis Papadakis Date: Mon, 22 May 2023 10:33:56 -0700 Subject: [PATCH 063/106] Update ingest example for new type system (#937) --- examples/ingest.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/examples/ingest.py b/examples/ingest.py index d5b7e95dd..90dc9a4e0 100644 --- a/examples/ingest.py +++ b/examples/ingest.py @@ -19,9 +19,8 @@ import os from glob import glob -import pyarrow as pa import tifffile as tfl -from legate.core import CustomSplit, Rect, TiledSplit, ingest +from legate.core import CustomSplit, Rect, TiledSplit, ingest, uint16 import cunumeric as np @@ -39,7 +38,7 @@ parser.add_argument("-p", "--custom-partitioning", action="store_true") parser.add_argument("-s", "--custom-sharding", action="store_true") args = parser.parse_args() -dtype = pa.uint16() +dtype = uint16 tile_shape = (1, 301, 704, 360) colors = tuple(args.colors) shape = tuple(ci * di for (ci, di) in zip(colors, tile_shape)) From 1f7abc675cf5bad34c8648d6f80e60e54b2d54eb Mon Sep 17 00:00:00 2001 From: Manolis Papadakis Date: Tue, 23 May 2023 17:18:47 -0700 Subject: [PATCH 064/106] Bump legate.core version (#938) --- cmake/versions.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cmake/versions.json b/cmake/versions.json index f2897758c..b2bbf73ce 100644 --- a/cmake/versions.json +++ b/cmake/versions.json @@ -5,7 +5,7 @@ "git_url" : "https://github.com/nv-legate/legate.core.git", "git_shallow": false, "always_download": false, - "git_tag" : "88b3eaec431e70c228d362e6cc8170bae0273f9f" + "git_tag" : "afdf183ad054caa262825742bd4f48e66274f883" } } } From 01836a0774a468f6bc0066e7bed799edddd5328d Mon Sep 17 00:00:00 2001 From: Marcin Zalewski Date: Tue, 23 May 2023 17:30:36 -0700 Subject: [PATCH 065/106] Conditionally set python fault handler for eager tests (#901) --- .github/workflows/ci.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 121db6f06..a894b34c9 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -120,6 +120,7 @@ jobs: - name: Test run: | cd legate-ci/github-ci/cunumeric + export PYTHONFAULTHANDLER=$(( ${{ matrix.name }} == "Eager"* ? 1 : 0 )) ./test.sh ${{ matrix.options }} > ${COMMIT}-test-${{ matrix.log }}.log 2>&1 - name: Process output if: always() From 6778b2a31cd7d9c0e3c8814fa9cf5aa65c0ad24f Mon Sep 17 00:00:00 2001 From: Bryan Van de Ven Date: Wed, 24 May 2023 15:15:47 -0700 Subject: [PATCH 066/106] Docs for LEGION_DEFAULT_ARGS (#939) * docs for LEGION_DEFAULT_ARGS * demo different options --- docs/cunumeric/source/user/usage.rst | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/docs/cunumeric/source/user/usage.rst b/docs/cunumeric/source/user/usage.rst index 7f03be09d..f5f40d474 100644 --- a/docs/cunumeric/source/user/usage.rst +++ b/docs/cunumeric/source/user/usage.rst @@ -41,6 +41,13 @@ using the ``LEGATE_CONFIG`` environment variable: See the :ref:`config` section :ref:`config_legate` for more information. +Additionally, any Legion and Realm arguments may also be passed via the +``LEGION_DEFAULT_ARGS`` environment variable: + +.. code-block:: sh + + LEGION_DEFAULT_ARGS="-lg:sched 100 -ll:show_rsrv" legate script.py