diff --git a/.clang-format b/.clang-format index 262238254..6d5353f99 100644 --- a/.clang-format +++ b/.clang-format @@ -8,7 +8,7 @@ AlignEscapedNewlines: Left AlignOperands: true AlignTrailingComments: true AllowAllParametersOfDeclarationOnNextLine: true -AllowShortBlocksOnASingleLine: true +AllowShortBlocksOnASingleLine: Empty AllowShortCaseLabelsOnASingleLine: true AllowShortFunctionsOnASingleLine: All AllowShortIfStatementsOnASingleLine: true @@ -72,6 +72,7 @@ IndentCaseLabels: true IndentPPDirectives: None IndentWidth: 2 IndentWrappedFunctionNames: false +InsertBraces: true JavaScriptQuotes: Leave JavaScriptWrapImports: true KeepEmptyLinesAtTheStartOfBlocks: false diff --git a/.github/actions/download-artifacts/action.yml b/.github/actions/download-artifacts/action.yml deleted file mode 100644 index 640dc143a..000000000 --- a/.github/actions/download-artifacts/action.yml +++ /dev/null @@ -1,48 +0,0 @@ -name: setup-legate-conda - -description: Download dependencies (artifacts) - -inputs: - device: {type: string, required: true} - git_sha: {type: string, required: true} - -runs: - using: composite - steps: - - - id: cache - name: Cache conda artifacts - uses: actions/cache@v3 - with: - key: "nv-legate/legate.core@${{ inputs.git_sha }}-${{ inputs.device }}" - path: .artifacts - - - if: steps.cache.outputs.cache-hit != 'true' - name: Download conda artifacts - uses: dawidd6/action-download-artifact@v2 - with: - path: .artifacts-dl - repo: nv-legate/legate.core - commit: ${{ inputs.git_sha }} - workflow_conclusion: success - workflow: "ci-gh.yml" - name: "legate.core-${{ inputs.device }}-[0-9a-z]{40}" - name_is_regexp: true - - - if: steps.cache.outputs.cache-hit != 'true' - name: Move conda artifacts into cached dir - shell: bash --noprofile --norc -xeo pipefail {0} - run: | - mkdir -p .artifacts; - find .artifacts-dl/legate.core-${{ inputs.device }}-*/ \ - -maxdepth 2 -type d -name legate_core -exec mv {} .artifacts/ \; - find .artifacts-dl/legate.core-${{ inputs.device }}-*/ \ - -maxdepth 2 -type f -name "environment*.yaml" -exec mv {} .artifacts/ \; - - - name: Copy and change cache dir ownership - shell: bash --noprofile --norc -xeo pipefail {0} - run: | - # Copy and change directory ownership - cp -ar .artifacts /home/coder/.artifacts; - chown -R coder:coder /home/coder/.artifacts; - ls -R /home/coder/.artifacts diff --git a/.github/workflows/ci-gh-nightly-release.yml b/.github/workflows/ci-gh-nightly-release.yml new file mode 100644 index 000000000..0540d2b8f --- /dev/null +++ b/.github/workflows/ci-gh-nightly-release.yml @@ -0,0 +1,34 @@ +name: Build Nightly release package + +concurrency: + group: ci-nightly-release-on-${{ github.event_name }}-from-${{ github.ref_name }} + cancel-in-progress: true + +on: + workflow_dispatch: + schedule: + - cron: '0 23 * * *' # Nightly at 11:00 PM + +jobs: + build-and-test: + strategy: + fail-fast: false + matrix: + platform: + - linux + - linux-aarch64 + target-device: + - gpu + - cpu + upload-enabled: + - true + - false + uses: + ./.github/workflows/gh-build-and-test.yml + with: + target-device: ${{ matrix.target-device }} + platform: ${{ matrix.platform }} + build-type: release + upload-enabled: ${{ matrix.upload-enabled }} + dependencies-workflow: "ci-gh-nightly-release.yml" + secrets: inherit diff --git a/.github/workflows/ci-gh-release.yml b/.github/workflows/ci-gh-release.yml new file mode 100644 index 000000000..98bb737c5 --- /dev/null +++ b/.github/workflows/ci-gh-release.yml @@ -0,0 +1,39 @@ +name: Build Release package + +concurrency: + group: ci-nightly-release-on-${{ github.event_name }}-from-${{ github.ref_name }} + cancel-in-progress: true + +on: + workflow_dispatch: + push: + branches: + - "pull-request/[0-9]+" + - "cpp-branch-*" + - "main" + +jobs: + build-and-test: + strategy: + fail-fast: false + matrix: + platform: + - linux + - linux-aarch64 + target-device: + - gpu + - cpu + upload-enabled: + - false + exclude: + - platform: linux-aarch64 + target-device: gpu + uses: + ./.github/workflows/gh-build-and-test.yml + with: + target-device: ${{ matrix.target-device }} + platform: ${{ matrix.platform }} + build-type: release + upload-enabled: ${{ matrix.upload-enabled }} + dependencies-workflow: "ci-gh-nightly-release.yml" + secrets: inherit diff --git a/.github/workflows/ci-gh.yml b/.github/workflows/ci-gh.yml deleted file mode 100644 index ffb77c10e..000000000 --- a/.github/workflows/ci-gh.yml +++ /dev/null @@ -1,28 +0,0 @@ -name: Build and test cunumeric on GH - -concurrency: - group: ci-build-and-test-on-${{ github.event_name }}-from-${{ github.ref_name }} - cancel-in-progress: true - -on: - push: - branches: - - "pull-request/[0-9]+" - - "branch-*" - -jobs: - build-and-test: - strategy: - fail-fast: false - matrix: - include: - - device: "gpu" - image: "rapidsai/devcontainers:23.06-cpp-mambaforge-ubuntu22.04" - - - device: "cpu" - image: "rapidsai/devcontainers:23.06-cpp-mambaforge-ubuntu22.04" - uses: - ./.github/workflows/gh-build-and-test.yml - with: - device: ${{ matrix.device }} - image: ${{ matrix.image }} diff --git a/.github/workflows/gh-build-and-test.yml b/.github/workflows/gh-build-and-test.yml index 3766a07ee..b71d771e5 100644 --- a/.github/workflows/gh-build-and-test.yml +++ b/.github/workflows/gh-build-and-test.yml @@ -1,92 +1,188 @@ on: workflow_call: inputs: - image: + platform: type: string required: true - device: + target-device: type: string required: true - + build-type: + type: string + required: true + upload-enabled: + type: boolean + required: true + dependencies-workflow: + required: true + type: string + description: The workflow file name used by the dependency jobs: + setup-build: + name: Setup build + runs-on: linux-amd64-cpu4 + outputs: + runner_type: ${{ steps.set_runner.outputs.runner_type }} + steps: + - id: set_runner + run: | + if [ "${{ inputs.platform }}" = "linux" ]; then + if [ "${{ github.repository_owner }}" = "nv-legate" ]; then + echo "runner_type=linux-amd64-cpu16" >> $GITHUB_OUTPUT + else + echo "runner_type=ubuntu-latest" >> $GITHUB_OUTPUT + fi + elif [ "${{ inputs.platform }}" = "linux-aarch64" ]; then + echo "runner_type=linux-arm64-cpu16" >> $GITHUB_OUTPUT + elif [ "${{ inputs.platform }}" = "mac" ]; then + echo "runner_type=macos-latest" >> $GITHUB_OUTPUT + fi + build: - name: "Build cunumeric (with ${{ inputs.device }} legate) on GH" + needs: setup-build + name: "Build (${{ inputs.platform }}, ${{ inputs.target-device }}, ${{ inputs.build-type }})" uses: - ./.github/workflows/gh-build.yml + nv-legate/legate-gh-ci/.github/workflows/gh-build.yml@v1.8 with: - device: ${{ inputs.device }} - image: ${{ inputs.image }} - runs-on: ${{ github.repository_owner == 'nv-legate' && 'linux-amd64-32cpu' || 'ubuntu-latest' }} - - test: - needs: - - build - strategy: - fail-fast: false - matrix: - include: - - name: 1 CPU test - options: test --cpus 1 --unit --debug - runner: ${{ inputs.device == 'gpu' && 'linux-amd64-gpu-v100-latest-1' || 'linux-amd64-cpu4' }} - has-gpu: false - enabled: true + client-repo: ${{ github.event.repository.name }} + target-device: ${{ inputs.target-device }} + runs-on: ${{ needs.setup-build.outputs.runner_type }} + build-type: ${{ inputs.build-type }} + use-container: ${{ inputs.platform == 'linux' || inputs.platform == 'linux-aarch64' }} + platform: ${{ inputs.platform }} + dependencies-file: "cmake/versions.json" + dependencies-workflow: ${{ inputs.dependencies-workflow }} + legate-gh-ci-tag: "v1.8" + build-mode: "" + ucx-enabled: false + upload-enabled: ${{ inputs.upload-enabled }} + secrets: inherit - - name: 2 CPUs test - options: test --cpus 2 --debug - runner: ${{ inputs.device == 'gpu' && 'linux-amd64-gpu-v100-latest-1' || 'linux-amd64-cpu8' }} - has-gpu: false - enabled: true - - name: GPU test - options: test --use cuda --gpus 1 --debug - runner: linux-amd64-gpu-v100-latest-1 - has-gpu: true - enabled: ${{ inputs.device == 'gpu' }} - - - name: 2 GPUs test - options: test --use cuda --gpus 2 --debug - runner: linux-amd64-2gpu - has-gpu: true - enabled: ${{ inputs.device == 'gpu' }} - - - name: OpenMP test - options: test --use openmp --omps 1 --ompthreads 2 --debug - runner: ${{ inputs.device == 'gpu' && 'linux-amd64-gpu-v100-latest-1' || 'linux-amd64-32cpu' }} - has-gpu: ${{ inputs.device == 'gpu' }} - enabled: true + upload: + needs: build + if: ${{ github.repository_owner == 'nv-legate' && contains(github.workflow, 'release') && inputs.upload-enabled == true }} + name: Upload package to Server + uses: + nv-legate/legate-gh-ci/.github/workflows/gh-upload.yml@v1.8 + with: + client-repo: ${{ github.event.repository.name }} + build-type: ${{ inputs.build-type }} + name: Upload package to Server + target-device: ${{ inputs.target-device }} + platform: ${{ inputs.platform }} + legate-gh-ci-tag: "v1.8" + build-mode: "" + ucx-enabled: false + upload-enabled: ${{ inputs.upload-enabled }} + upload-action: "upload-package" + pkgSubString: "cunumeric-" + repos-Root: "cunumeric" + secrets: inherit - - name: 2 NUMA OpenMPs test - options: test --use openmp --omps 2 --ompthreads 2 --numamem 2048 --debug - runner: ${{ inputs.device == 'gpu' && 'linux-amd64-gpu-v100-latest-1' || 'linux-amd64-32cpu' }} - has-gpu: ${{ inputs.device == 'gpu' }} - enabled: true + setup-test: + if: inputs.upload-enabled == false + name: Setup test + needs: + - build + runs-on: linux-amd64-cpu4 + outputs: + matrix: ${{ steps.set-matrix.outputs.matrix }} + steps: + - id: set-matrix + run: | + set -xeuo pipefail + MATRIX_JSON='{"include": [' + RUNNERS=( + 'linux-amd64-gpu-v100-latest-1:gpu:gpu:linux' 'linux-amd64-2gpu:gpu:2gpu:linux' + 'linux-amd64-cpu16:cpu:cpu:linux' + 'linux-arm64-cpu16:cpu:cpu:linux-aarch64' 'linux-aarch64-2gpu:gpu:2gpu:linux-aarch64' 'linux-aarch64-2gpu:gpu:gpu:linux-aarch64' + 'macos-latest:cpu:cpu:mac') + TEST_CONFIGS=( + '1 CPU test:test --cpus 1 --debug:cpu' + '1 CPU test:test --cpus 1 --debug:gpu' + '2 CPU test:test --cpus 2 --debug:cpu' + '2 CPU test:test --cpus 2 --debug:gpu' + # set the number of workers manually because nvidia runners report 6 gpus when onyl one is really available + # this workaround can be removed when the number of available gpus is reported correctly (when we run on VMs) + 'GPU test:test --use cuda --gpus 1 -j 7 --debug:gpu' + '2 GPU test:test --use cuda --gpus 2 --debug:2gpu' + 'OpenMP test:test --use openmp --omps 1 --ompthreads 2 --debug:gpu' + 'OpenMP test:test --use openmp --omps 1 --ompthreads 2 --debug:cpu' + '2 NUMA OpenMPs test:test --use openmp --omps 2 --ompthreads 2 --numamem 2048 --debug:gpu' + '2 NUMA OpenMPs test:test --use openmp --omps 2 --ompthreads 2 --numamem 2048 --debug:cpu' + 'Eager execution test:test --use eager --debug:gpu' + 'Eager execution test:test --use eager --debug:cpu' + 'mypy:mypy:cpu' + 'Documentation:docs:cpu' + 'Unit tests:unit:cpu' + ) + for RUNNER in "${RUNNERS[@]}"; do + IFS=':' read -ra RUNNER_INFO <<< "$RUNNER" + RUNNER_NAME=${RUNNER_INFO[0]} + RUNNER_TYPE=${RUNNER_INFO[1]} + RUNNER_DEVICE=${RUNNER_INFO[2]} + RUNNER_PLATFORM=${RUNNER_INFO[3]} + if [[ "$RUNNER_TYPE" == "${{ inputs.target-device }}" && "$RUNNER_PLATFORM" == "${{ inputs.platform }}" ]]; then + for TEST_CONFIG in "${TEST_CONFIGS[@]}"; do + IFS=':' read -ra CONFIG_INFO <<< "$TEST_CONFIG" + TEST_NAME=${CONFIG_INFO[0]} + TEST_OPTIONS=${CONFIG_INFO[1]} + TEST_TARGET_DEVICE=${CONFIG_INFO[2]} + if [[ "$TEST_TARGET_DEVICE" == "$RUNNER_DEVICE" ]]; then + MATRIX_JSON+="{\"runner\": {\"name\": \"$RUNNER_NAME\", \"type\": \"$RUNNER_TYPE\", \"platform\": \"$RUNNER_PLATFORM\"}, \"test-config\": {\"name\": \"$TEST_NAME\", \"test-options\": \"$TEST_OPTIONS\"}}," + fi + done + fi + done + MATRIX_JSON=$(echo "$MATRIX_JSON" | sed 's/,$//') # Remove the trailing comma + MATRIX_JSON+=']}' + echo "matrix=$MATRIX_JSON" >> $GITHUB_OUTPUT - - name: Eager execution test - options: test --use eager --debug - runner: ${{ inputs.device == 'gpu' && 'linux-amd64-gpu-v100-latest-1' || 'linux-amd64-cpu4' }} - has-gpu: ${{ inputs.device == 'gpu' }} - enabled: true + test: + needs: + - setup-test + name: ${{ matrix.test-config.name }} (${{ inputs.platform }}, ${{ inputs.target-device }}) - - name: mypy - options: mypy - runner: linux-amd64-cpu4 - has-gpu: false - enabled: true + strategy: + fail-fast: false + matrix: ${{fromJson(needs.setup-test.outputs.matrix)}} - - name: documentation - options: docs - runner: linux-amd64-32cpu - has-gpu: false - enabled: ${{ inputs.device == 'gpu' }} + uses: + nv-legate/legate-gh-ci/.github/workflows/gh-test-within-container.yml@v1.8 + with: + client-repo: ${{ github.event.repository.name }} + build-type: ${{ inputs.build-type }} + name: ${{ matrix.test-config.name }} + target-device: ${{ inputs.target-device }} + runs-on: ${{ matrix.runner.name }} + has-gpu: ${{ matrix.runner.type == 'gpu' }} + test-options: ${{ matrix.test-config.test-options }} + platform: ${{ inputs.platform }} + legate-gh-ci-tag: "v1.8" + build-mode: "" + ucx-enabled: false + upload-enabled: ${{ inputs.upload-enabled }} + secrets: inherit + updateTestStatus: + needs: test + name: Update Test status on Server + if: ${{ (github.repository_owner == 'nv-legate') && contains(github.workflow, 'Nightly') && (inputs.upload-enabled == true) }} uses: - ./.github/workflows/gh-test.yml + nv-legate/legate-gh-ci/.github/workflows/gh-upload.yml@v1.8 with: - name: ${{ matrix.name }} - device: ${{ inputs.device }} - image: ${{ inputs.image }} - runs-on: ${{ matrix.runner }} - has-gpu: ${{ matrix.has-gpu }} - test-options: ${{ matrix.options }} - enabled: ${{ matrix.enabled }} + client-repo: ${{ github.event.repository.name }} + build-type: ${{ inputs.build-type }} + name: UpdateTestStatus + target-device: ${{ inputs.target-device }} + platform: ${{ inputs.platform }} + legate-gh-ci-tag: "v1.8" + build-mode: "" + ucx-enabled: false + upload-enabled: true + upload-action: "update-test-status" + pkgSubString: "cunumeric-" + repos-Root: "cunumeric" + secrets: inherit diff --git a/.github/workflows/gh-build.yml b/.github/workflows/gh-build.yml deleted file mode 100644 index 030dad1ad..000000000 --- a/.github/workflows/gh-build.yml +++ /dev/null @@ -1,101 +0,0 @@ -name: Build - -on: - workflow_call: - inputs: - image: - type: string - required: true - device: - required: true - type: string - runs-on: - required: true - type: string - -jobs: - build: - name: build-${{ inputs.device }}-sub-workflow - - permissions: - id-token: write # This is required for configure-aws-credentials - contents: read # This is required for actions/checkout - - runs-on: ${{ inputs.runs-on }} - - container: - options: -u root - image: "${{ inputs.image }}" - env: - CUDA_VERSION: "12.2" - CUDA_VERSION_MAJOR: "12" - CUDA_VERSION_MINOR: "2" - SCCACHE_REGION: "us-east-2" - SCCACHE_BUCKET: "rapids-sccache-devs" - SCCACHE_S3_KEY_PREFIX: "legate-cunumeric-dev" - USE_CUDA: "${{ inputs.device == 'gpu' && 'ON' || 'OFF' }}" - GH_TOKEN: "${{ env.GH_TOKEN }}" - GITHUB_TOKEN: "${{ env.GITHUB_TOKEN }}" - VAULT_HOST: "${{ github.repository_owner != 'nv-legate' && 'https://vault.ops.k8s.rapids.ai' || '' }}" - defaults: - run: - shell: su coder {0} - working-directory: /home/coder - - steps: - - name: Checkout cunumeric (= this repo) - uses: actions/checkout@v3 - with: - fetch-depth: 0 - path: cunumeric - persist-credentials: false - - - name: Dump environment - run: | - env - - - name: Copy source folder - run: | - set -x - pwd - cp -r $GITHUB_WORKSPACE/cunumeric . - chown -R coder:coder cunumeric; - ls -R - - - name: Copy .gitconfig - run: cp ~/cunumeric/continuous_integration/dot-gitconfig ~/.gitconfig - - - id: legate_core_info - name: Read legate.core SHA - shell: bash --noprofile --norc -xeo pipefail {0} - run: | - git_tag="$(jq -r '.packages.legate_core.git_tag' cunumeric/cmake/versions.json)"; - - echo "git_tag=$git_tag" | tee -a "${GITHUB_OUTPUT}"; - - - name: Download dependencies (artifacts) - uses: ./cunumeric/.github/actions/download-artifacts - with: - device: "${{ inputs.device }}" - git_sha: "${{ steps.legate_core_info.outputs.git_tag }}" - - - if: github.repository_owner == 'nv-legate' - name: Get AWS credentials for sccache bucket - uses: aws-actions/configure-aws-credentials@v2 - with: - aws-region: us-east-2 - role-duration-seconds: 28800 # 8 hours - role-to-assume: arn:aws:iam::279114543810:role/gha-oidc-nv-legate - - - name: Build cunumeric - run: | - export PATH="/home/coder/cunumeric/continuous_integration/scripts:$PATH" - build-cunumeric-all - - - name: Upload build artifacts - uses: actions/upload-artifact@v3 - with: - name: "cunumeric-${{ inputs.device }}-${{ github.sha }}" - path: | - /tmp/out - /tmp/conda-build diff --git a/.github/workflows/gh-test.yml b/.github/workflows/gh-test.yml deleted file mode 100644 index 675f27e9b..000000000 --- a/.github/workflows/gh-test.yml +++ /dev/null @@ -1,91 +0,0 @@ -name: Test cunumeric on GH - -on: - workflow_call: - inputs: - name: - required: true - type: string - image: - type: string - required: true - device: - required: true - type: string - runs-on: - required: true - type: string - has-gpu: - required: true - type: boolean - description: "The runner has GPU(s)." - test-options: - required: true - type: string - enabled: - required: true - type: boolean - -env: - build_artifact_name: "cunumeric-${{ inputs.device }}-${{ github.sha }}" - -jobs: - test: - name: ${{ inputs.name }} - if: inputs.enabled && github.repository_owner == 'nv-legate' - runs-on: ${{ inputs.runs-on }} - - container: - options: -u root - image: "${{ inputs.image }}" - env: - # CUDA_VERSION: "${{ inputs.CUDA }}" - NVIDIA_VISIBLE_DEVICES: ${{ env.NVIDIA_VISIBLE_DEVICES }} - - defaults: - run: - shell: su coder {0} - working-directory: /home/coder - - steps: - - if: inputs.has-gpu - name: Run nvidia-smi to make sure GPU is working - run: nvidia-smi - - - name: Install numactl - run: | - export DEBIAN_FRONTEND=noninteractive && \ - sudo apt-get update && \ - sudo apt-get install -y numactl - - - name: Checkout cunumeric - uses: actions/checkout@v3 - with: - fetch-depth: 0 - path: cunumeric - persist-credentials: false - - - name: Copy source folder - run: | - set -x - pwd - cp -r $GITHUB_WORKSPACE/cunumeric . - chown -R coder:coder cunumeric; - ls -R - - - name: Download build artifacts - uses: actions/download-artifact@v3 - with: - name: ${{ env.build_artifact_name }} - path: /home/coder/.artifacts - - - name: Run cunumeric test / analysis - shell: su coder {0} - run: | - set -x - sudo chown -R coder:coder /home/coder/.artifacts - - export PATH="/home/coder/cunumeric/continuous_integration/scripts:$PATH" - - set -eo pipefail - test-cunumeric ${{ inputs.test-options }} diff --git a/.github/workflows/require-labels.yml b/.github/workflows/require-labels.yml index 9b2704f70..b6680ed06 100644 --- a/.github/workflows/require-labels.yml +++ b/.github/workflows/require-labels.yml @@ -7,7 +7,7 @@ jobs: runs-on: ubuntu-latest steps: - name: Check Labels - uses: mheap/github-action-required-labels@v3 + uses: mheap/github-action-required-labels@v5 with: mode: exactly count: 1 diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 03cfc8b1c..c8b84bdb5 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -32,6 +32,15 @@ repos: entry: python scripts/hooks/enforce_boilerplate.py language: python pass_filenames: false + - id: legate-defined + name: legate-defined + description: 'Find uses of ifdef LEGATE_ that should be using LegateDefined()' + entry: ./scripts/hooks/legate_defined.sh + language: script + 'types_or': [c++, c, cuda] + require_serial: false + stages: [pre-commit] + exclude: '^src/cunumeric/cunumeric_c\.h$' ci: skip: [mypy] diff --git a/CMakeLists.txt b/CMakeLists.txt index 18b121f50..7f51994da 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,5 +1,5 @@ #============================================================================= -# Copyright 2022 NVIDIA Corporation +# Copyright 2024 NVIDIA Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -44,8 +44,10 @@ endif() ############################################################################## # - Download and initialize RAPIDS CMake helpers ----------------------------- +set(rapids-cmake-version 24.04) +set(rapids-cmake-sha "365322aca32fd6ecd7027f5d7ec7be50b7f3cc2a") if(NOT EXISTS ${CMAKE_BINARY_DIR}/RAPIDS.cmake) - file(DOWNLOAD https://raw.githubusercontent.com/rapidsai/rapids-cmake/branch-23.08/RAPIDS.cmake + file(DOWNLOAD https://raw.githubusercontent.com/rapidsai/rapids-cmake/branch-${rapids-cmake-version}/RAPIDS.cmake ${CMAKE_BINARY_DIR}/RAPIDS.cmake) endif() include(${CMAKE_BINARY_DIR}/RAPIDS.cmake) @@ -55,7 +57,7 @@ include(rapids-cuda) include(rapids-export) include(rapids-find) -set(cunumeric_version 24.01.00) +set(cunumeric_version 24.05.00) # For now we want the optimization flags to match on both normal make and cmake # builds so we override the cmake defaults here for release, this changes diff --git a/LICENSES_bundled.txt b/LICENSES_bundled.txt deleted file mode 100644 index d18691fd7..000000000 --- a/LICENSES_bundled.txt +++ /dev/null @@ -1,39 +0,0 @@ -The cuNumeric repository and source distributions bundle several libraries that are -compatibly licensed. We list these here. - - -Name: Cephes -Files: src/cunumeric/cephes/* -License: 3-clause BSD - Distributed under 3-clause BSD license with permission from the author, - see https://lists.debian.org/debian-legal/2004/12/msg00295.html - - Cephes Math Library Release 2.8: June, 2000 - Copyright 1984, 1995, 2000 by Stephen L. Moshier - - This software is derived from the Cephes Math Library and is - incorporated herein by permission of the author. - - All rights reserved. - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions are met: - * Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - * Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - * Neither the name of the nor the - names of its contributors may be used to endorse or promote products - derived from this software without specific prior written permission. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND - ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED - WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE - DISCLAIMED. IN NO EVENT SHALL BE LIABLE FOR ANY - DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES - (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND - ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/README.md b/README.md index cec00b052..262b62873 100644 --- a/README.md +++ b/README.md @@ -1,5 +1,5 @@ need a copy - CHECK_CUDA(cudaMemcpyAsync( + CUNUMERIC_CHECK_CUDA(cudaMemcpyAsync( values_out, values_in, sizeof(VAL) * volume, cudaMemcpyDeviceToDevice, stream)); } if (indices_in != indices_out) { // not in-place --> need a copy - CHECK_CUDA(cudaMemcpyAsync( + CUNUMERIC_CHECK_CUDA(cudaMemcpyAsync( indices_out, values_in, sizeof(int64_t) * volume, cudaMemcpyDeviceToDevice, stream)); } diff --git a/src/cunumeric/sort/thrust_sort.h b/src/cunumeric/sort/thrust_sort.h index 18a5a2473..0dcd12a90 100644 --- a/src/cunumeric/sort/thrust_sort.h +++ b/src/cunumeric/sort/thrust_sort.h @@ -1,4 +1,4 @@ -/* Copyright 2022 NVIDIA Corporation +/* Copyright 2024 NVIDIA Corporation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/src/cunumeric/sort/thrust_sort_bool.cu b/src/cunumeric/sort/thrust_sort_bool.cu index 5e78d1052..3406171eb 100644 --- a/src/cunumeric/sort/thrust_sort_bool.cu +++ b/src/cunumeric/sort/thrust_sort_bool.cu @@ -1,4 +1,4 @@ -/* Copyright 2022 NVIDIA Corporation +/* Copyright 2024 NVIDIA Corporation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/src/cunumeric/sort/thrust_sort_complex128.cu b/src/cunumeric/sort/thrust_sort_complex128.cu index 1fe1c5494..978afa691 100644 --- a/src/cunumeric/sort/thrust_sort_complex128.cu +++ b/src/cunumeric/sort/thrust_sort_complex128.cu @@ -1,4 +1,4 @@ -/* Copyright 2022 NVIDIA Corporation +/* Copyright 2024 NVIDIA Corporation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/src/cunumeric/sort/thrust_sort_complex64.cu b/src/cunumeric/sort/thrust_sort_complex64.cu index 259d63fe1..15a607225 100644 --- a/src/cunumeric/sort/thrust_sort_complex64.cu +++ b/src/cunumeric/sort/thrust_sort_complex64.cu @@ -1,4 +1,4 @@ -/* Copyright 2022 NVIDIA Corporation +/* Copyright 2024 NVIDIA Corporation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/src/cunumeric/sort/thrust_sort_double.cu b/src/cunumeric/sort/thrust_sort_double.cu index 57c8078b0..0b3d54db1 100644 --- a/src/cunumeric/sort/thrust_sort_double.cu +++ b/src/cunumeric/sort/thrust_sort_double.cu @@ -1,4 +1,4 @@ -/* Copyright 2022 NVIDIA Corporation +/* Copyright 2024 NVIDIA Corporation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/src/cunumeric/sort/thrust_sort_float.cu b/src/cunumeric/sort/thrust_sort_float.cu index 7487e494a..a32af2601 100644 --- a/src/cunumeric/sort/thrust_sort_float.cu +++ b/src/cunumeric/sort/thrust_sort_float.cu @@ -1,4 +1,4 @@ -/* Copyright 2022 NVIDIA Corporation +/* Copyright 2024 NVIDIA Corporation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/src/cunumeric/sort/thrust_sort_half.cu b/src/cunumeric/sort/thrust_sort_half.cu index 9d9b1a28b..86467247e 100644 --- a/src/cunumeric/sort/thrust_sort_half.cu +++ b/src/cunumeric/sort/thrust_sort_half.cu @@ -1,4 +1,4 @@ -/* Copyright 2022 NVIDIA Corporation +/* Copyright 2024 NVIDIA Corporation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/src/cunumeric/sort/thrust_sort_int16.cu b/src/cunumeric/sort/thrust_sort_int16.cu index e8218f527..d0f80ac6d 100644 --- a/src/cunumeric/sort/thrust_sort_int16.cu +++ b/src/cunumeric/sort/thrust_sort_int16.cu @@ -1,4 +1,4 @@ -/* Copyright 2022 NVIDIA Corporation +/* Copyright 2024 NVIDIA Corporation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/src/cunumeric/sort/thrust_sort_int32.cu b/src/cunumeric/sort/thrust_sort_int32.cu index 6663a393d..8217b19c1 100644 --- a/src/cunumeric/sort/thrust_sort_int32.cu +++ b/src/cunumeric/sort/thrust_sort_int32.cu @@ -1,4 +1,4 @@ -/* Copyright 2022 NVIDIA Corporation +/* Copyright 2024 NVIDIA Corporation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/src/cunumeric/sort/thrust_sort_int64.cu b/src/cunumeric/sort/thrust_sort_int64.cu index 8dfa69c32..238bc828d 100644 --- a/src/cunumeric/sort/thrust_sort_int64.cu +++ b/src/cunumeric/sort/thrust_sort_int64.cu @@ -1,4 +1,4 @@ -/* Copyright 2022 NVIDIA Corporation +/* Copyright 2024 NVIDIA Corporation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/src/cunumeric/sort/thrust_sort_int8.cu b/src/cunumeric/sort/thrust_sort_int8.cu index 38d8b821b..8ce4fbcff 100644 --- a/src/cunumeric/sort/thrust_sort_int8.cu +++ b/src/cunumeric/sort/thrust_sort_int8.cu @@ -1,4 +1,4 @@ -/* Copyright 2022 NVIDIA Corporation +/* Copyright 2024 NVIDIA Corporation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/src/cunumeric/sort/thrust_sort_uint16.cu b/src/cunumeric/sort/thrust_sort_uint16.cu index 39f36315e..31d0db9b1 100644 --- a/src/cunumeric/sort/thrust_sort_uint16.cu +++ b/src/cunumeric/sort/thrust_sort_uint16.cu @@ -1,4 +1,4 @@ -/* Copyright 2022 NVIDIA Corporation +/* Copyright 2024 NVIDIA Corporation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/src/cunumeric/sort/thrust_sort_uint32.cu b/src/cunumeric/sort/thrust_sort_uint32.cu index 69fe67d74..318a1e991 100644 --- a/src/cunumeric/sort/thrust_sort_uint32.cu +++ b/src/cunumeric/sort/thrust_sort_uint32.cu @@ -1,4 +1,4 @@ -/* Copyright 2022 NVIDIA Corporation +/* Copyright 2024 NVIDIA Corporation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/src/cunumeric/sort/thrust_sort_uint64.cu b/src/cunumeric/sort/thrust_sort_uint64.cu index 0c3da6f7f..e457cfb9b 100644 --- a/src/cunumeric/sort/thrust_sort_uint64.cu +++ b/src/cunumeric/sort/thrust_sort_uint64.cu @@ -1,4 +1,4 @@ -/* Copyright 2022 NVIDIA Corporation +/* Copyright 2024 NVIDIA Corporation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/src/cunumeric/sort/thrust_sort_uint8.cu b/src/cunumeric/sort/thrust_sort_uint8.cu index 8742141e0..873d51796 100644 --- a/src/cunumeric/sort/thrust_sort_uint8.cu +++ b/src/cunumeric/sort/thrust_sort_uint8.cu @@ -1,4 +1,4 @@ -/* Copyright 2022 NVIDIA Corporation +/* Copyright 2024 NVIDIA Corporation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/src/cunumeric/stat/bincount.cc b/src/cunumeric/stat/bincount.cc index d4806cbab..b18c95d8d 100644 --- a/src/cunumeric/stat/bincount.cc +++ b/src/cunumeric/stat/bincount.cc @@ -1,4 +1,4 @@ -/* Copyright 2021-2022 NVIDIA Corporation +/* Copyright 2024 NVIDIA Corporation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -23,14 +23,14 @@ using namespace legate; template struct BincountImplBody { - using VAL = legate_type_of; + using VAL = type_of; void operator()(AccessorRD, true, 1> lhs, const AccessorRO& rhs, const Rect<1>& rect, const Rect<1>& lhs_rect) const { - for (size_t idx = rect.lo[0]; idx <= rect.hi[0]; ++idx) { + for (int64_t idx = rect.lo[0]; idx <= rect.hi[0]; ++idx) { auto value = rhs[idx]; assert(lhs_rect.contains(value)); lhs.reduce(value, 1); @@ -43,7 +43,7 @@ struct BincountImplBody { const Rect<1>& rect, const Rect<1>& lhs_rect) const { - for (size_t idx = rect.lo[0]; idx <= rect.hi[0]; ++idx) { + for (int64_t idx = rect.lo[0]; idx <= rect.hi[0]; ++idx) { auto value = rhs[idx]; assert(lhs_rect.contains(value)); lhs.reduce(value, weights[idx]); @@ -51,7 +51,7 @@ struct BincountImplBody { } }; -/*static*/ void BincountTask::cpu_variant(TaskContext& context) +/*static*/ void BincountTask::cpu_variant(TaskContext context) { bincount_template(context); } diff --git a/src/cunumeric/stat/bincount.cu b/src/cunumeric/stat/bincount.cu index 2ae4a0d05..314b0f00e 100644 --- a/src/cunumeric/stat/bincount.cu +++ b/src/cunumeric/stat/bincount.cu @@ -1,4 +1,4 @@ -/* Copyright 2021-2022 NVIDIA Corporation +/* Copyright 2024 NVIDIA Corporation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -29,7 +29,9 @@ static __device__ inline void _bincount(int32_t* bins, Point<1> origin) { // Initialize the bins to 0 - for (int32_t bin = threadIdx.x; bin < num_bins; bin += blockDim.x) bins[bin] = 0; + for (int32_t bin = threadIdx.x; bin < num_bins; bin += blockDim.x) { + bins[bin] = 0; + } __syncthreads(); // Start reading values and do atomic updates to shared @@ -58,7 +60,9 @@ static __device__ inline void _weighted_bincount(double* bins, Point<1> origin) { // Initialize the bins to 0 - for (int32_t bin = threadIdx.x; bin < num_bins; bin += blockDim.x) bins[bin] = 0; + for (int32_t bin = threadIdx.x; bin < num_bins; bin += blockDim.x) { + bins[bin] = 0; + } __syncthreads(); // Start reading values and do atomic updates to shared @@ -86,13 +90,15 @@ static __global__ void __launch_bounds__(THREADS_PER_BLOCK, MIN_CTAS_PER_SM) const size_t num_bins, Point<1> origin) { - extern __shared__ char array[]; - auto bins = reinterpret_cast(array); + extern __shared__ char __bins[]; + auto bins = reinterpret_cast(__bins); _bincount(bins, rhs, volume, num_bins, origin); // Now do the atomics out to global memory for (int32_t bin = threadIdx.x; bin < num_bins; bin += blockDim.x) { const auto count = bins[bin]; - if (count > 0) lhs.reduce(bin, count); + if (count > 0) { + lhs.reduce(bin, count); + } } } @@ -104,7 +110,9 @@ static __global__ void bincount_kernel_rd_global(AccessorRD= volume) return; + if (idx >= volume) { + return; + } auto bin = rhs[idx + origin[0]]; lhs[bin] <<= 1; } @@ -118,8 +126,8 @@ static __global__ void __launch_bounds__(THREADS_PER_BLOCK, MIN_CTAS_PER_SM) const size_t num_bins, Point<1> origin) { - extern __shared__ char array[]; - auto bins = reinterpret_cast(array); + extern __shared__ char __bins[]; + auto bins = reinterpret_cast(__bins); _weighted_bincount(bins, rhs, weights, volume, num_bins, origin); // Now do the atomics out to global memory for (int32_t bin = threadIdx.x; bin < num_bins; bin += blockDim.x) { @@ -138,14 +146,16 @@ static __global__ void weighted_bincount_kernel_rd_global( { // Just blast out the atomic writes into global memory. auto idx = global_tid_1d(); - if (idx >= volume) return; + if (idx >= volume) { + return; + } auto bin = rhs[idx + origin[0]]; lhs[bin] <<= weights[idx + origin[0]]; } template struct BincountImplBody { - using VAL = legate_type_of; + using VAL = type_of; void operator()(AccessorRD, false, 1> lhs, const AccessorRO& rhs, @@ -173,7 +183,7 @@ struct BincountImplBody { bincount_kernel_rd_global <<>>(lhs, rhs, volume, rect.lo); } - CHECK_CUDA_STREAM(stream); + CUNUMERIC_CHECK_CUDA_STREAM(stream); } void operator()(AccessorRD, false, 1> lhs, @@ -202,11 +212,11 @@ struct BincountImplBody { weighted_bincount_kernel_rd_global <<>>(lhs, rhs, weights, volume, rect.lo); } - CHECK_CUDA_STREAM(stream); + CUNUMERIC_CHECK_CUDA_STREAM(stream); } }; -/*static*/ void BincountTask::gpu_variant(TaskContext& context) +/*static*/ void BincountTask::gpu_variant(TaskContext context) { bincount_template(context); } diff --git a/src/cunumeric/stat/bincount.h b/src/cunumeric/stat/bincount.h index 0dbdb6b7e..7f8c6fdc3 100644 --- a/src/cunumeric/stat/bincount.h +++ b/src/cunumeric/stat/bincount.h @@ -1,4 +1,4 @@ -/* Copyright 2021-2022 NVIDIA Corporation +/* Copyright 2024 NVIDIA Corporation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -16,14 +16,15 @@ #pragma once -#include "cunumeric/cunumeric.h" +#include "cunumeric/cunumeric_task.h" namespace cunumeric { struct BincountArgs { - const Array& lhs; - const Array& rhs; - const Array& weights; + legate::PhysicalStore lhs{nullptr}; + legate::PhysicalStore rhs{nullptr}; + legate::PhysicalStore weights{nullptr}; + bool has_weights; }; class BincountTask : public CuNumericTask { @@ -31,12 +32,12 @@ class BincountTask : public CuNumericTask { static const int TASK_ID = CUNUMERIC_BINCOUNT; public: - static void cpu_variant(legate::TaskContext& context); -#ifdef LEGATE_USE_OPENMP - static void omp_variant(legate::TaskContext& context); + static void cpu_variant(legate::TaskContext context); +#if LEGATE_DEFINED(LEGATE_USE_OPENMP) + static void omp_variant(legate::TaskContext context); #endif -#ifdef LEGATE_USE_CUDA - static void gpu_variant(legate::TaskContext& context); +#if LEGATE_DEFINED(LEGATE_USE_CUDA) + static void gpu_variant(legate::TaskContext context); #endif }; diff --git a/src/cunumeric/stat/bincount_omp.cc b/src/cunumeric/stat/bincount_omp.cc index 4f21e95a8..a0fe21817 100644 --- a/src/cunumeric/stat/bincount_omp.cc +++ b/src/cunumeric/stat/bincount_omp.cc @@ -1,4 +1,4 @@ -/* Copyright 2021-2022 NVIDIA Corporation +/* Copyright 2024 NVIDIA Corporation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -25,7 +25,7 @@ using namespace legate; template struct BincountImplBody { - using VAL = legate_type_of; + using VAL = type_of; std::vector> _bincount(const AccessorRO& rhs, const Rect<1>& rect, @@ -43,13 +43,13 @@ struct BincountImplBody { auto tid = omp_get_thread_num(); std::vector& local_bins = all_local_bins[tid]; #pragma omp for schedule(static) - for (size_t idx = rect.lo[0]; idx <= rect.hi[0]; ++idx) { + for (int64_t idx = rect.lo[0]; idx <= rect.hi[0]; ++idx) { auto value = rhs[idx]; assert(lhs_rect.contains(value)); SumReduction::fold(local_bins[value], 1); } } - return std::move(all_local_bins); + return all_local_bins; } std::vector> _bincount(const AccessorRO& rhs, @@ -69,13 +69,13 @@ struct BincountImplBody { auto tid = omp_get_thread_num(); std::vector& local_bins = all_local_bins[tid]; #pragma omp for schedule(static) - for (size_t idx = rect.lo[0]; idx <= rect.hi[0]; ++idx) { + for (int64_t idx = rect.lo[0]; idx <= rect.hi[0]; ++idx) { auto value = rhs[idx]; assert(lhs_rect.contains(value)); SumReduction::fold(local_bins[value], weights[idx]); } } - return std::move(all_local_bins); + return all_local_bins; } void operator()(AccessorRD, true, 1> lhs, @@ -84,9 +84,11 @@ struct BincountImplBody { const Rect<1>& lhs_rect) const { auto all_local_bins = _bincount(rhs, rect, lhs_rect); - for (auto& local_bins : all_local_bins) - for (size_t bin_num = 0; bin_num < local_bins.size(); ++bin_num) + for (auto& local_bins : all_local_bins) { + for (size_t bin_num = 0; bin_num < local_bins.size(); ++bin_num) { lhs.reduce(bin_num, local_bins[bin_num]); + } + } } void operator()(AccessorRD, true, 1> lhs, @@ -96,13 +98,15 @@ struct BincountImplBody { const Rect<1>& lhs_rect) const { auto all_local_bins = _bincount(rhs, weights, rect, lhs_rect); - for (auto& local_bins : all_local_bins) - for (size_t bin_num = 0; bin_num < local_bins.size(); ++bin_num) + for (auto& local_bins : all_local_bins) { + for (size_t bin_num = 0; bin_num < local_bins.size(); ++bin_num) { lhs.reduce(bin_num, local_bins[bin_num]); + } + } } }; -/*static*/ void BincountTask::omp_variant(TaskContext& context) +/*static*/ void BincountTask::omp_variant(TaskContext context) { bincount_template(context); } diff --git a/src/cunumeric/stat/bincount_template.inl b/src/cunumeric/stat/bincount_template.inl index 83ae638e1..02706fa8f 100644 --- a/src/cunumeric/stat/bincount_template.inl +++ b/src/cunumeric/stat/bincount_template.inl @@ -1,4 +1,4 @@ -/* Copyright 2021-2022 NVIDIA Corporation +/* Copyright 2024 NVIDIA Corporation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -31,14 +31,16 @@ struct BincountImpl { template ::value>* = nullptr> void operator()(BincountArgs& args) const { - using VAL = legate_type_of; + using VAL = type_of; auto rect = args.rhs.shape<1>(); auto lhs_rect = args.lhs.shape<1>(); - if (rect.empty()) return; + if (rect.empty()) { + return; + } auto rhs = args.rhs.read_accessor(rect); - if (args.weights.dim() == 1) { + if (args.has_weights) { auto weights = args.weights.read_accessor(rect); auto lhs = args.lhs.reduce_accessor, KIND != VariantKind::GPU, 1>(lhs_rect); @@ -60,9 +62,19 @@ struct BincountImpl { template static void bincount_template(TaskContext& context) { - auto& inputs = context.inputs(); - auto& reductions = context.reductions(); - BincountArgs args{reductions[0], inputs[0], inputs[1]}; + auto inputs = context.inputs(); + auto reductions = context.reductions(); + + BincountArgs args; + args.lhs = std::move(reductions[0]); + args.rhs = std::move(inputs[0]); + if (inputs.size() >= 2) { + args.has_weights = true; + args.weights = std::move(inputs[1]); + } else { + args.has_weights = false; + } + type_dispatch(args.rhs.code(), BincountImpl{}, args); } diff --git a/src/cunumeric/stat/histogram.cc b/src/cunumeric/stat/histogram.cc index 7c0caa6b7..c8c19fd11 100644 --- a/src/cunumeric/stat/histogram.cc +++ b/src/cunumeric/stat/histogram.cc @@ -1,4 +1,4 @@ -/* Copyright 2023 NVIDIA Corporation +/* Copyright 2024 NVIDIA Corporation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -35,7 +35,7 @@ using namespace legate; template struct HistogramImplBody { - using VAL = legate_type_of; + using VAL = type_of; // for now, it has been decided to hardcode these types: // @@ -62,7 +62,7 @@ struct HistogramImplBody { } }; -/*static*/ void HistogramTask::cpu_variant(TaskContext& context) +/*static*/ void HistogramTask::cpu_variant(TaskContext context) { histogram_template(context); } diff --git a/src/cunumeric/stat/histogram.cu b/src/cunumeric/stat/histogram.cu index f43fe84d6..ece9cca26 100644 --- a/src/cunumeric/stat/histogram.cu +++ b/src/cunumeric/stat/histogram.cu @@ -1,4 +1,4 @@ -/* Copyright 2023 NVIDIA Corporation +/* Copyright 2024 NVIDIA Corporation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -40,7 +40,7 @@ namespace cunumeric { template struct HistogramImplBody { - using VAL = legate_type_of; + using VAL = type_of; // for now, it has been decided to hardcode these types: // @@ -69,7 +69,7 @@ struct HistogramImplBody { } }; -/*static*/ void HistogramTask::gpu_variant(TaskContext& context) +/*static*/ void HistogramTask::gpu_variant(TaskContext context) { histogram_template(context); } diff --git a/src/cunumeric/stat/histogram.cuh b/src/cunumeric/stat/histogram.cuh index 9c7bd74c4..1d5ec1b38 100644 --- a/src/cunumeric/stat/histogram.cuh +++ b/src/cunumeric/stat/histogram.cuh @@ -1,4 +1,4 @@ -/* Copyright 2023 NVIDIA Corporation +/* Copyright 2024 NVIDIA Corporation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -108,7 +108,7 @@ template struct sync_policy_t>> { sync_policy_t() {} - void operator()(cudaStream_t stream) { CHECK_CUDA_STREAM(stream); } + void operator()(cudaStream_t stream) { CUNUMERIC_CHECK_CUDA_STREAM(stream); } }; } // namespace detail diff --git a/src/cunumeric/stat/histogram.h b/src/cunumeric/stat/histogram.h index 217a79e8c..215e5731f 100644 --- a/src/cunumeric/stat/histogram.h +++ b/src/cunumeric/stat/histogram.h @@ -1,4 +1,4 @@ -/* Copyright 2023 NVIDIA Corporation +/* Copyright 2024 NVIDIA Corporation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -16,15 +16,15 @@ #pragma once -#include "cunumeric/cunumeric.h" +#include "cunumeric/cunumeric_task.h" namespace cunumeric { struct HistogramArgs { - const Array& result; - const Array& src; - const Array& bins; - const Array& weights; + legate::PhysicalStore result; + legate::PhysicalStore src; + legate::PhysicalStore bins; + legate::PhysicalStore weights; }; class HistogramTask : public CuNumericTask { @@ -32,12 +32,12 @@ class HistogramTask : public CuNumericTask { static const int TASK_ID = CUNUMERIC_HISTOGRAM; public: - static void cpu_variant(legate::TaskContext& context); -#ifdef LEGATE_USE_OPENMP - static void omp_variant(legate::TaskContext& context); + static void cpu_variant(legate::TaskContext context); +#if LEGATE_DEFINED(LEGATE_USE_OPENMP) + static void omp_variant(legate::TaskContext context); #endif -#ifdef LEGATE_USE_CUDA - static void gpu_variant(legate::TaskContext& context); +#if LEGATE_DEFINED(LEGATE_USE_CUDA) + static void gpu_variant(legate::TaskContext context); #endif }; diff --git a/src/cunumeric/stat/histogram_cpu.h b/src/cunumeric/stat/histogram_cpu.h index cd0f4304b..28677e52a 100644 --- a/src/cunumeric/stat/histogram_cpu.h +++ b/src/cunumeric/stat/histogram_cpu.h @@ -1,4 +1,4 @@ -/* Copyright 2023 NVIDIA Corporation +/* Copyright 2024 NVIDIA Corporation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -29,11 +29,12 @@ #include #include -#include "cunumeric/stat/histogram_gen.h" - -#ifndef LEGATE_USE_CUDA -using cudaStream_t = void*; +#if LEGATE_DEFINED(LEGATE_USE_CUDA) and LEGATE_DEFINED(LEGATE_NVCC) +#include "cunumeric/cuda_help.h" +#else +#define cudaStream_t std::int32_t #endif +#include "cunumeric/stat/histogram_gen.h" namespace cunumeric { namespace detail { @@ -61,7 +62,7 @@ struct segmented_sum_t(left); - if (right == sentinel) + if (right == sentinel) { return left_up <= right; - else + } else { return left_up < right; + } } else if constexpr (std::is_same_v && std::is_integral_v) { // upcast to elem_t: // - if (right == sentinel) + if (right == sentinel) { return left <= static_cast(right); - else + } else { return left < right; + } } else { - if (right == sentinel) + if (right == sentinel) { return left <= right; - else + } else { return left < right; + } } } @@ -83,14 +86,16 @@ void histogram_weights(exe_policy_t exe_pol, size_t n_intervals, // |bins| - 1 weight_t* ptr_hist, // result; pre-allocated, sz = n_intervals weight_t* ptr_w = nullptr, // weights array, w - cudaStream_t stream = nullptr) + cudaStream_t stream = {}) { alloc_t alloc_offsets; auto* ptr_offsets = alloc_offsets(exe_pol, n_intervals + 1); alloc_t alloc_w; - if (!ptr_w) { ptr_w = alloc_w(exe_pol, n_samples, 1); } + if (!ptr_w) { + ptr_w = alloc_w(exe_pol, n_samples, 1); + } // in-place src sort + corresponding weights shuffle: // @@ -124,7 +129,7 @@ void histogram_wrapper(exe_policy_t exe_pol, const Rect<1>& weights_rect, const AccessorRD, true, 1>& result, const Rect<1>& result_rect, - cudaStream_t stream = nullptr) + cudaStream_t stream = {}) { auto&& [src_size, src_copy, src_ptr] = accessors::make_accessor_copy(exe_pol, src, src_rect); diff --git a/src/cunumeric/stat/histogram_omp.cc b/src/cunumeric/stat/histogram_omp.cc index b68d7f81e..edae0fa76 100644 --- a/src/cunumeric/stat/histogram_omp.cc +++ b/src/cunumeric/stat/histogram_omp.cc @@ -1,4 +1,4 @@ -/* Copyright 2023 NVIDIA Corporation +/* Copyright 2024 NVIDIA Corporation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -35,7 +35,7 @@ using namespace legate; template struct HistogramImplBody { - using VAL = legate_type_of; + using VAL = type_of; // for now, it has been decided to hardcode these types: // @@ -113,7 +113,7 @@ struct HistogramImplBody { } }; -/*static*/ void HistogramTask::omp_variant(TaskContext& context) +/*static*/ void HistogramTask::omp_variant(TaskContext context) { histogram_template(context); } diff --git a/src/cunumeric/stat/histogram_template.inl b/src/cunumeric/stat/histogram_template.inl index d7ed940bc..4fc1a69e0 100644 --- a/src/cunumeric/stat/histogram_template.inl +++ b/src/cunumeric/stat/histogram_template.inl @@ -1,4 +1,4 @@ -/* Copyright 2023 NVIDIA Corporation +/* Copyright 2024 NVIDIA Corporation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -39,14 +39,16 @@ struct HistogramImpl { template >* = nullptr> void operator()(HistogramArgs& args) const { - using VAL = legate_type_of; + using VAL = type_of; auto result_rect = args.result.shape<1>(); auto src_rect = args.src.shape<1>(); auto bins_rect = args.bins.shape<1>(); auto weights_rect = args.weights.shape<1>(); - if (src_rect.empty()) return; + if (src_rect.empty()) { + return; + } auto result = args.result.reduce_accessor, true, 1>(result_rect); auto src = args.src.read_accessor(src_rect); @@ -67,8 +69,8 @@ struct HistogramImpl { template static void histogram_template(TaskContext& context) { - auto& inputs = context.inputs(); - auto& reductions = context.reductions(); + auto inputs = context.inputs(); + auto reductions = context.reductions(); HistogramArgs args{reductions[0], inputs[0], inputs[1], inputs[2]}; type_dispatch(args.src.code(), HistogramImpl{}, args); } diff --git a/src/cunumeric/ternary/where.cc b/src/cunumeric/ternary/where.cc index 85c602522..3ee46c2c3 100644 --- a/src/cunumeric/ternary/where.cc +++ b/src/cunumeric/ternary/where.cc @@ -1,4 +1,4 @@ -/* Copyright 2021-2022 NVIDIA Corporation +/* Copyright 2024 NVIDIA Corporation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -23,7 +23,7 @@ using namespace legate; template struct WhereImplBody { - using VAL = legate_type_of; + using VAL = type_of; void operator()(AccessorWO out, AccessorRO mask, @@ -40,8 +40,9 @@ struct WhereImplBody { auto maskptr = mask.ptr(rect); auto in1ptr = in1.ptr(rect); auto in2ptr = in2.ptr(rect); - for (size_t idx = 0; idx < volume; ++idx) + for (size_t idx = 0; idx < volume; ++idx) { outptr[idx] = maskptr[idx] ? in1ptr[idx] : in2ptr[idx]; + } } else { for (size_t idx = 0; idx < volume; ++idx) { auto point = pitches.unflatten(idx, rect.lo); @@ -51,7 +52,7 @@ struct WhereImplBody { } }; -/*static*/ void WhereTask::cpu_variant(TaskContext& context) +/*static*/ void WhereTask::cpu_variant(TaskContext context) { where_template(context); } diff --git a/src/cunumeric/ternary/where.cu b/src/cunumeric/ternary/where.cu index a9dfdb1a3..c665d4004 100644 --- a/src/cunumeric/ternary/where.cu +++ b/src/cunumeric/ternary/where.cu @@ -1,4 +1,4 @@ -/* Copyright 2021-2022 NVIDIA Corporation +/* Copyright 2024 NVIDIA Corporation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -26,7 +26,9 @@ static __global__ void __launch_bounds__(THREADS_PER_BLOCK, MIN_CTAS_PER_SM) dense_kernel(size_t volume, VAL* out, const bool* mask, const VAL* in1, const VAL* in2) { const size_t idx = global_tid_1d(); - if (idx >= volume) return; + if (idx >= volume) { + return; + } out[idx] = mask[idx] ? in1[idx] : in2[idx]; } @@ -35,14 +37,16 @@ static __global__ void __launch_bounds__(THREADS_PER_BLOCK, MIN_CTAS_PER_SM) gen size_t volume, WriteAcc out, MaskAcc mask, ReadAcc in1, ReadAcc in2, Pitches pitches, Rect rect) { const size_t idx = global_tid_1d(); - if (idx >= volume) return; + if (idx >= volume) { + return; + } auto point = pitches.unflatten(idx, rect.lo); out[point] = mask[point] ? in1[point] : in2[point]; } template struct WhereImplBody { - using VAL = legate_type_of; + using VAL = type_of; void operator()(AccessorWO out, AccessorRO mask, @@ -67,11 +71,11 @@ struct WhereImplBody { generic_kernel<<>>( volume, out, mask, in1, in2, pitches, rect); } - CHECK_CUDA_STREAM(stream); + CUNUMERIC_CHECK_CUDA_STREAM(stream); } }; -/*static*/ void WhereTask::gpu_variant(TaskContext& context) +/*static*/ void WhereTask::gpu_variant(TaskContext context) { where_template(context); } diff --git a/src/cunumeric/ternary/where.h b/src/cunumeric/ternary/where.h index ff8ca4736..e649b7967 100644 --- a/src/cunumeric/ternary/where.h +++ b/src/cunumeric/ternary/where.h @@ -1,4 +1,4 @@ -/* Copyright 2021-2022 NVIDIA Corporation +/* Copyright 2024 NVIDIA Corporation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -16,15 +16,15 @@ #pragma once -#include "cunumeric/cunumeric.h" +#include "cunumeric/cunumeric_task.h" namespace cunumeric { struct WhereArgs { - const Array& out; - const Array& mask; - const Array& in1; - const Array& in2; + legate::PhysicalStore out; + legate::PhysicalStore mask; + legate::PhysicalStore in1; + legate::PhysicalStore in2; }; class WhereTask : public CuNumericTask { @@ -32,12 +32,12 @@ class WhereTask : public CuNumericTask { static const int TASK_ID = CUNUMERIC_WHERE; public: - static void cpu_variant(legate::TaskContext& context); -#ifdef LEGATE_USE_OPENMP - static void omp_variant(legate::TaskContext& context); + static void cpu_variant(legate::TaskContext context); +#if LEGATE_DEFINED(LEGATE_USE_OPENMP) + static void omp_variant(legate::TaskContext context); #endif -#ifdef LEGATE_USE_CUDA - static void gpu_variant(legate::TaskContext& context); +#if LEGATE_DEFINED(LEGATE_USE_CUDA) + static void gpu_variant(legate::TaskContext context); #endif }; diff --git a/src/cunumeric/ternary/where_omp.cc b/src/cunumeric/ternary/where_omp.cc index dd0ed7e55..6a8928ee4 100644 --- a/src/cunumeric/ternary/where_omp.cc +++ b/src/cunumeric/ternary/where_omp.cc @@ -1,4 +1,4 @@ -/* Copyright 2021-2022 NVIDIA Corporation +/* Copyright 2024 NVIDIA Corporation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -23,7 +23,7 @@ using namespace legate; template struct WhereImplBody { - using VAL = legate_type_of; + using VAL = type_of; void operator()(AccessorWO out, AccessorRO mask, @@ -41,8 +41,9 @@ struct WhereImplBody { auto in1ptr = in1.ptr(rect); auto in2ptr = in2.ptr(rect); #pragma omp parallel for schedule(static) - for (size_t idx = 0; idx < volume; ++idx) + for (size_t idx = 0; idx < volume; ++idx) { outptr[idx] = maskptr[idx] ? in1ptr[idx] : in2ptr[idx]; + } } else { #pragma omp parallel for schedule(static) for (size_t idx = 0; idx < volume; ++idx) { @@ -53,7 +54,7 @@ struct WhereImplBody { } }; -/*static*/ void WhereTask::omp_variant(TaskContext& context) +/*static*/ void WhereTask::omp_variant(TaskContext context) { where_template(context); } diff --git a/src/cunumeric/ternary/where_template.inl b/src/cunumeric/ternary/where_template.inl index ccdc78b5a..1e3d2001a 100644 --- a/src/cunumeric/ternary/where_template.inl +++ b/src/cunumeric/ternary/where_template.inl @@ -1,4 +1,4 @@ -/* Copyright 2021-2022 NVIDIA Corporation +/* Copyright 2024 NVIDIA Corporation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -32,21 +32,23 @@ struct WhereImpl { template void operator()(WhereArgs& args) const { - using VAL = legate_type_of; + using VAL = type_of; auto rect = args.out.shape(); Pitches pitches; size_t volume = pitches.flatten(rect); - if (volume == 0) return; + if (volume == 0) { + return; + } auto out = args.out.write_accessor(rect); auto mask = args.mask.read_accessor(rect); auto in1 = args.in1.read_accessor(rect); auto in2 = args.in2.read_accessor(rect); -#ifndef LEGATE_BOUNDS_CHECKS +#if !LEGATE_DEFINED(LEGATE_BOUNDS_CHECKS) // Check to see if this is dense or not bool dense = out.accessor.is_dense_row_major(rect) && in1.accessor.is_dense_row_major(rect) && in2.accessor.is_dense_row_major(rect) && mask.accessor.is_dense_row_major(rect); @@ -62,8 +64,8 @@ struct WhereImpl { template static void where_template(TaskContext& context) { - auto& inputs = context.inputs(); - WhereArgs args{context.outputs()[0], inputs[0], inputs[1], inputs[2]}; + auto inputs = context.inputs(); + WhereArgs args{context.output(0), inputs[0], inputs[1], inputs[2]}; auto dim = std::max(1, args.out.dim()); double_dispatch(dim, args.out.code(), WhereImpl{}, args); } diff --git a/src/cunumeric/transform/flip.cc b/src/cunumeric/transform/flip.cc index 3aa332d57..17dd7b089 100644 --- a/src/cunumeric/transform/flip.cc +++ b/src/cunumeric/transform/flip.cc @@ -1,4 +1,4 @@ -/* Copyright 2021-2022 NVIDIA Corporation +/* Copyright 2024 NVIDIA Corporation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -23,7 +23,7 @@ using namespace legate; template struct FlipImplBody { - using VAL = legate_type_of; + using VAL = type_of; void operator()(AccessorWO out, AccessorRO in, @@ -34,14 +34,15 @@ struct FlipImplBody { { for (PointInRectIterator itr(rect); itr.valid(); ++itr) { auto q = *itr; - for (uint32_t idx = 0; idx < axes.size(); ++idx) + for (uint32_t idx = 0; idx < axes.size(); ++idx) { q[axes[idx]] = rect.hi[axes[idx]] - q[axes[idx]]; + } out[*itr] = in[q]; } } }; -/*static*/ void FlipTask::cpu_variant(TaskContext& context) +/*static*/ void FlipTask::cpu_variant(TaskContext context) { flip_template(context); } diff --git a/src/cunumeric/transform/flip.cu b/src/cunumeric/transform/flip.cu index 8c6dc166b..ab7bb0111 100644 --- a/src/cunumeric/transform/flip.cu +++ b/src/cunumeric/transform/flip.cu @@ -1,4 +1,4 @@ -/* Copyright 2021-2022 NVIDIA Corporation +/* Copyright 2024 NVIDIA Corporation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -34,16 +34,20 @@ static __global__ void __launch_bounds__(THREADS_PER_BLOCK, MIN_CTAS_PER_SM) const uint32_t num_axes) { const size_t idx = global_tid_1d(); - if (idx >= volume) return; + if (idx >= volume) { + return; + } auto p = pitches.unflatten(idx, rect.lo); auto q = p; - for (uint32_t idx = 0; idx < num_axes; ++idx) q[axes[idx]] = rect.hi[axes[idx]] - q[axes[idx]]; + for (uint32_t idx = 0; idx < num_axes; ++idx) { + q[axes[idx]] = rect.hi[axes[idx]] - q[axes[idx]]; + } out[p] = in[q]; } template struct FlipImplBody { - using VAL = legate_type_of; + using VAL = type_of; void operator()(AccessorWO out, AccessorRO in, @@ -56,15 +60,17 @@ struct FlipImplBody { const size_t blocks = (volume + THREADS_PER_BLOCK - 1) / THREADS_PER_BLOCK; auto num_axes = axes.size(); auto gpu_axes = create_buffer(num_axes, Memory::Kind::Z_COPY_MEM); - for (uint32_t idx = 0; idx < num_axes; ++idx) gpu_axes[idx] = axes[idx]; + for (uint32_t idx = 0; idx < num_axes; ++idx) { + gpu_axes[idx] = axes[idx]; + } auto stream = get_cached_stream(); flip_kernel<<>>( volume, out, in, pitches, rect, gpu_axes, num_axes); - CHECK_CUDA_STREAM(stream); + CUNUMERIC_CHECK_CUDA_STREAM(stream); } }; -/*static*/ void FlipTask::gpu_variant(TaskContext& context) +/*static*/ void FlipTask::gpu_variant(TaskContext context) { flip_template(context); } diff --git a/src/cunumeric/transform/flip.h b/src/cunumeric/transform/flip.h index 9470bda0b..f7593285d 100644 --- a/src/cunumeric/transform/flip.h +++ b/src/cunumeric/transform/flip.h @@ -1,4 +1,4 @@ -/* Copyright 2021-2022 NVIDIA Corporation +/* Copyright 2024 NVIDIA Corporation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -16,13 +16,13 @@ #pragma once -#include "cunumeric/cunumeric.h" +#include "cunumeric/cunumeric_task.h" namespace cunumeric { struct FlipArgs { - const Array& in; - const Array& out; + legate::PhysicalStore in; + legate::PhysicalStore out; legate::Span axes; }; @@ -31,12 +31,12 @@ class FlipTask : public CuNumericTask { static const int TASK_ID = CUNUMERIC_FLIP; public: - static void cpu_variant(legate::TaskContext& context); -#ifdef LEGATE_USE_OPENMP - static void omp_variant(legate::TaskContext& context); + static void cpu_variant(legate::TaskContext context); +#if LEGATE_DEFINED(LEGATE_USE_OPENMP) + static void omp_variant(legate::TaskContext context); #endif -#ifdef LEGATE_USE_CUDA - static void gpu_variant(legate::TaskContext& context); +#if LEGATE_DEFINED(LEGATE_USE_CUDA) + static void gpu_variant(legate::TaskContext context); #endif }; diff --git a/src/cunumeric/transform/flip_omp.cc b/src/cunumeric/transform/flip_omp.cc index 775fd6802..eb7e64013 100644 --- a/src/cunumeric/transform/flip_omp.cc +++ b/src/cunumeric/transform/flip_omp.cc @@ -1,4 +1,4 @@ -/* Copyright 2021-2022 NVIDIA Corporation +/* Copyright 2024 NVIDIA Corporation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -23,7 +23,7 @@ using namespace legate; template struct FlipImplBody { - using VAL = legate_type_of; + using VAL = type_of; void operator()(AccessorWO out, AccessorRO in, @@ -37,14 +37,15 @@ struct FlipImplBody { for (size_t idx = 0; idx < volume; ++idx) { auto p = pitches.unflatten(idx, rect.lo); auto q = p; - for (uint32_t idx = 0; idx < axes.size(); ++idx) + for (uint32_t idx = 0; idx < axes.size(); ++idx) { q[axes[idx]] = rect.hi[axes[idx]] - q[axes[idx]]; + } out[p] = in[q]; } } }; -/*static*/ void FlipTask::omp_variant(TaskContext& context) +/*static*/ void FlipTask::omp_variant(TaskContext context) { flip_template(context); } diff --git a/src/cunumeric/transform/flip_template.inl b/src/cunumeric/transform/flip_template.inl index 6af541fc6..cc050313c 100644 --- a/src/cunumeric/transform/flip_template.inl +++ b/src/cunumeric/transform/flip_template.inl @@ -1,4 +1,4 @@ -/* Copyright 2021-2022 NVIDIA Corporation +/* Copyright 2024 NVIDIA Corporation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -32,14 +32,16 @@ struct FlipImpl { template void operator()(FlipArgs& args) const { - using VAL = legate_type_of; + using VAL = type_of; auto rect = args.out.shape().intersection(args.in.shape()); Pitches pitches; size_t volume = pitches.flatten(rect); - if (volume == 0) return; + if (volume == 0) { + return; + } auto out = args.out.write_accessor(rect); auto in = args.in.read_accessor(rect); @@ -51,8 +53,8 @@ struct FlipImpl { template static void flip_template(TaskContext& context) { - auto& inputs = context.inputs(); - auto& outputs = context.outputs(); + auto inputs = context.inputs(); + auto outputs = context.outputs(); auto& scalars = context.scalars(); FlipArgs args{inputs[0], outputs[0], scalars[0].values()}; diff --git a/src/cunumeric/typedefs.h b/src/cunumeric/typedefs.h new file mode 100644 index 000000000..840cf875a --- /dev/null +++ b/src/cunumeric/typedefs.h @@ -0,0 +1,28 @@ +/* Copyright 2024 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +#pragma once + +#include + +#include "legate.h" + +namespace cunumeric { + +using Array = legate::PhysicalStore; +using Scalar = legate::Scalar; + +} // namespace cunumeric diff --git a/src/cunumeric/unary/convert.cc b/src/cunumeric/unary/convert.cc index a3fae7fbb..78ece02eb 100644 --- a/src/cunumeric/unary/convert.cc +++ b/src/cunumeric/unary/convert.cc @@ -1,4 +1,4 @@ -/* Copyright 2021-2022 NVIDIA Corporation +/* Copyright 2024 NVIDIA Corporation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -24,8 +24,8 @@ using namespace legate; template struct ConvertImplBody { using OP = ConvertOp; - using SRC = legate_type_of; - using DST = legate_type_of; + using SRC = type_of; + using DST = type_of; void operator()(OP func, AccessorWO out, @@ -38,7 +38,9 @@ struct ConvertImplBody { if (dense) { auto outptr = out.ptr(rect); auto inptr = in.ptr(rect); - for (size_t idx = 0; idx < volume; ++idx) outptr[idx] = func(inptr[idx]); + for (size_t idx = 0; idx < volume; ++idx) { + outptr[idx] = func(inptr[idx]); + } } else { for (size_t idx = 0; idx < volume; ++idx) { auto p = pitches.unflatten(idx, rect.lo); @@ -48,7 +50,7 @@ struct ConvertImplBody { } }; -/*static*/ void ConvertTask::cpu_variant(TaskContext& context) +/*static*/ void ConvertTask::cpu_variant(TaskContext context) { convert_template(context); } diff --git a/src/cunumeric/unary/convert.cu b/src/cunumeric/unary/convert.cu index ea1d7cfb1..564473a4e 100644 --- a/src/cunumeric/unary/convert.cu +++ b/src/cunumeric/unary/convert.cu @@ -1,4 +1,4 @@ -/* Copyright 2021-2022 NVIDIA Corporation +/* Copyright 2024 NVIDIA Corporation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -26,7 +26,9 @@ static __global__ void __launch_bounds__(THREADS_PER_BLOCK, MIN_CTAS_PER_SM) dense_kernel(size_t volume, Function func, RES* out, const ARG* in) { const size_t idx = global_tid_1d(); - if (idx >= volume) return; + if (idx >= volume) { + return; + } out[idx] = func(in[idx]); } @@ -35,7 +37,9 @@ static __global__ void __launch_bounds__(THREADS_PER_BLOCK, MIN_CTAS_PER_SM) generic_kernel(size_t volume, Function func, WriteAcc out, ReadAcc in, Pitches pitches, Rect rect) { const size_t idx = global_tid_1d(); - if (idx >= volume) return; + if (idx >= volume) { + return; + } auto point = pitches.unflatten(idx, rect.lo); out[point] = func(in[point]); } @@ -43,8 +47,8 @@ static __global__ void __launch_bounds__(THREADS_PER_BLOCK, MIN_CTAS_PER_SM) template struct ConvertImplBody { using OP = ConvertOp; - using SRC = legate_type_of; - using DST = legate_type_of; + using SRC = type_of; + using DST = type_of; void operator()(OP func, AccessorWO out, @@ -64,11 +68,11 @@ struct ConvertImplBody { generic_kernel<<>>( volume, func, out, in, pitches, rect); } - CHECK_CUDA_STREAM(stream); + CUNUMERIC_CHECK_CUDA_STREAM(stream); } }; -/*static*/ void ConvertTask::gpu_variant(TaskContext& context) +/*static*/ void ConvertTask::gpu_variant(TaskContext context) { convert_template(context); } diff --git a/src/cunumeric/unary/convert.h b/src/cunumeric/unary/convert.h index 05bbfe112..265b75e70 100644 --- a/src/cunumeric/unary/convert.h +++ b/src/cunumeric/unary/convert.h @@ -1,4 +1,4 @@ -/* Copyright 2021-2022 NVIDIA Corporation +/* Copyright 2024 NVIDIA Corporation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -16,14 +16,14 @@ #pragma once +#include "cunumeric/cunumeric_task.h" #include "cunumeric/unary/convert_util.h" -#include "cunumeric/cunumeric.h" namespace cunumeric { struct ConvertArgs { - const Array& out; - const Array& in; + legate::PhysicalStore out; + legate::PhysicalStore in; ConvertCode nan_op; }; @@ -32,12 +32,12 @@ class ConvertTask : public CuNumericTask { static const int TASK_ID = CUNUMERIC_CONVERT; public: - static void cpu_variant(legate::TaskContext& context); -#ifdef LEGATE_USE_OPENMP - static void omp_variant(legate::TaskContext& context); + static void cpu_variant(legate::TaskContext context); +#if LEGATE_DEFINED(LEGATE_USE_OPENMP) + static void omp_variant(legate::TaskContext context); #endif -#ifdef LEGATE_USE_CUDA - static void gpu_variant(legate::TaskContext& context); +#if LEGATE_DEFINED(LEGATE_USE_CUDA) + static void gpu_variant(legate::TaskContext context); #endif }; diff --git a/src/cunumeric/unary/convert_omp.cc b/src/cunumeric/unary/convert_omp.cc index de2f20478..d0823daf3 100644 --- a/src/cunumeric/unary/convert_omp.cc +++ b/src/cunumeric/unary/convert_omp.cc @@ -1,4 +1,4 @@ -/* Copyright 2021-2022 NVIDIA Corporation +/* Copyright 2024 NVIDIA Corporation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -24,8 +24,8 @@ using namespace legate; template struct ConvertImplBody { using OP = ConvertOp; - using SRC = legate_type_of; - using DST = legate_type_of; + using SRC = type_of; + using DST = type_of; void operator()(OP func, AccessorWO out, @@ -39,7 +39,9 @@ struct ConvertImplBody { auto outptr = out.ptr(rect); auto inptr = in.ptr(rect); #pragma omp parallel for schedule(static) - for (size_t idx = 0; idx < volume; ++idx) outptr[idx] = func(inptr[idx]); + for (size_t idx = 0; idx < volume; ++idx) { + outptr[idx] = func(inptr[idx]); + } } else { #pragma omp parallel for schedule(static) for (size_t idx = 0; idx < volume; ++idx) { @@ -50,7 +52,7 @@ struct ConvertImplBody { } }; -/*static*/ void ConvertTask::omp_variant(TaskContext& context) +/*static*/ void ConvertTask::omp_variant(TaskContext context) { convert_template(context); } diff --git a/src/cunumeric/unary/convert_template.inl b/src/cunumeric/unary/convert_template.inl index 8d507d35f..d78f13b4d 100644 --- a/src/cunumeric/unary/convert_template.inl +++ b/src/cunumeric/unary/convert_template.inl @@ -1,4 +1,4 @@ -/* Copyright 2021-2022 NVIDIA Corporation +/* Copyright 2024 NVIDIA Corporation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -34,20 +34,22 @@ struct ConvertImpl { void operator()(ConvertArgs& args) const { using OP = ConvertOp; - using SRC = legate_type_of; - using DST = legate_type_of; + using SRC = type_of; + using DST = type_of; auto rect = args.out.shape(); Pitches pitches; size_t volume = pitches.flatten(rect); - if (volume == 0) return; + if (volume == 0) { + return; + } auto out = args.out.write_accessor(rect); auto in = args.in.read_accessor(rect); -#ifndef LEGATE_BOUNDS_CHECKS +#if !LEGATE_DEFINED(LEGATE_BOUNDS_CHECKS) // Check to see if this is dense or not bool dense = out.accessor.is_dense_row_major(rect) && in.accessor.is_dense_row_major(rect); #else @@ -100,8 +102,7 @@ struct SourceTypeDispatch { template static void convert_template(TaskContext& context) { - ConvertArgs args{ - context.outputs()[0], context.inputs()[0], context.scalars()[0].value()}; + ConvertArgs args{context.output(0), context.input(0), context.scalar(0).value()}; type_dispatch(args.in.code(), SourceTypeDispatch{}, args); } diff --git a/src/cunumeric/unary/convert_util.h b/src/cunumeric/unary/convert_util.h index 5fb340fd7..08951f6b1 100644 --- a/src/cunumeric/unary/convert_util.h +++ b/src/cunumeric/unary/convert_util.h @@ -1,4 +1,4 @@ -/* Copyright 2021-2022 NVIDIA Corporation +/* Copyright 2024 NVIDIA Corporation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -16,7 +16,7 @@ #pragma once -#include "cunumeric/cunumeric.h" +#include "cunumeric/cunumeric_task.h" #include "cunumeric/unary/isnan.h" namespace cunumeric { @@ -48,8 +48,8 @@ struct ConvertOp {}; template struct ConvertOp { - using SRC = legate::legate_type_of; - using DST = legate::legate_type_of; + using SRC = legate::type_of; + using DST = legate::type_of; template ::value or @@ -64,10 +64,11 @@ struct ConvertOp { !legate::is_complex_type::value>* = nullptr> constexpr DST operator()(const _SRC& src) const { - if constexpr (DST_TYPE == legate::Type::Code::BOOL) + if constexpr (DST_TYPE == legate::Type::Code::BOOL) { return static_cast(src.real()) || static_cast(src.imag()); - else + } else { return static_cast(src.real()); + } // Unreachable assert(false); return DST{}; @@ -76,7 +77,7 @@ struct ConvertOp { template struct ConvertOp { - using SRC = legate::legate_type_of; + using SRC = legate::type_of; template ::value>* = nullptr> __CUDA_HD__ __half operator()(const _SRC& src) const @@ -93,7 +94,7 @@ struct ConvertOp { template struct ConvertOp { - using DST = legate::legate_type_of; + using DST = legate::type_of; constexpr DST operator()(const __half& src) const { @@ -103,8 +104,8 @@ struct ConvertOp { template struct ConvertOp { - using SRC = legate::legate_type_of; - using DST = legate::legate_type_of; + using SRC = legate::type_of; + using DST = legate::type_of; template ::value or @@ -125,7 +126,7 @@ struct ConvertOp { template struct ConvertOp { - using SRC = legate::legate_type_of; + using SRC = legate::type_of; template ::value>* = nullptr> __CUDA_HD__ __half operator()(const _SRC& src) const @@ -144,7 +145,7 @@ struct ConvertOp { template struct ConvertOp { - using DST = legate::legate_type_of; + using DST = legate::type_of; constexpr DST operator()(const __half& src) const { @@ -155,8 +156,8 @@ struct ConvertOp { template struct ConvertOp { - using SRC = legate::legate_type_of; - using DST = legate::legate_type_of; + using SRC = legate::type_of; + using DST = legate::type_of; template ::value or @@ -177,7 +178,7 @@ struct ConvertOp { template struct ConvertOp { - using SRC = legate::legate_type_of; + using SRC = legate::type_of; template ::value>* = nullptr> __CUDA_HD__ __half operator()(const _SRC& src) const @@ -196,7 +197,7 @@ struct ConvertOp { template struct ConvertOp { - using DST = legate::legate_type_of; + using DST = legate::type_of; constexpr DST operator()(const __half& src) const { diff --git a/src/cunumeric/unary/isnan.h b/src/cunumeric/unary/isnan.h index d3c4d62fa..f891f846a 100644 --- a/src/cunumeric/unary/isnan.h +++ b/src/cunumeric/unary/isnan.h @@ -1,4 +1,4 @@ -/* Copyright 2022 NVIDIA Corporation +/* Copyright 2024 NVIDIA Corporation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/src/cunumeric/unary/scalar_unary_red.cc b/src/cunumeric/unary/scalar_unary_red.cc index c10e065f9..94640035a 100644 --- a/src/cunumeric/unary/scalar_unary_red.cc +++ b/src/cunumeric/unary/scalar_unary_red.cc @@ -1,4 +1,4 @@ -/* Copyright 2021-2023 NVIDIA Corporation +/* Copyright 2024 NVIDIA Corporation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -19,7 +19,7 @@ namespace cunumeric { -/*static*/ void ScalarUnaryRedTask::cpu_variant(TaskContext& context) +/*static*/ void ScalarUnaryRedTask::cpu_variant(TaskContext context) { scalar_unary_red_template(context); } diff --git a/src/cunumeric/unary/scalar_unary_red.cu b/src/cunumeric/unary/scalar_unary_red.cu index 76dcaeb32..71521be73 100644 --- a/src/cunumeric/unary/scalar_unary_red.cu +++ b/src/cunumeric/unary/scalar_unary_red.cu @@ -1,4 +1,4 @@ -/* Copyright 2021-2023 NVIDIA Corporation +/* Copyright 2024 NVIDIA Corporation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -14,7 +14,7 @@ * */ -#include "cunumeric/cunumeric.h" +#include "cunumeric/cunumeric_task.h" #include "cunumeric/unary/scalar_unary_red.h" #include "cunumeric/unary/scalar_unary_red_template.inl" #include "cunumeric/execution_policy/reduction/scalar_reduction.cuh" @@ -23,7 +23,7 @@ namespace cunumeric { using namespace legate; -/*static*/ void ScalarUnaryRedTask::gpu_variant(TaskContext& context) +/*static*/ void ScalarUnaryRedTask::gpu_variant(TaskContext context) { scalar_unary_red_template(context); } diff --git a/src/cunumeric/unary/scalar_unary_red.h b/src/cunumeric/unary/scalar_unary_red.h index 570c0d605..160efaa0a 100644 --- a/src/cunumeric/unary/scalar_unary_red.h +++ b/src/cunumeric/unary/scalar_unary_red.h @@ -1,4 +1,4 @@ -/* Copyright 2021-2023 NVIDIA Corporation +/* Copyright 2024 NVIDIA Corporation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -16,18 +16,18 @@ #pragma once -#include "cunumeric/cunumeric.h" +#include "cunumeric/cunumeric_task.h" #include "cunumeric/unary/unary_red_util.h" namespace cunumeric { struct ScalarUnaryRedArgs { - const Array& out; - const Array& in; - const Array& where; + legate::PhysicalStore out; + legate::PhysicalStore in; + legate::PhysicalStore where; UnaryRedCode op_code; legate::DomainPoint shape; - std::vector args; + std::vector args; }; // Unary reduction task that produces scalar results @@ -36,12 +36,12 @@ class ScalarUnaryRedTask : public CuNumericTask { static const int TASK_ID = CUNUMERIC_SCALAR_UNARY_RED; public: - static void cpu_variant(legate::TaskContext& context); -#ifdef LEGATE_USE_OPENMP - static void omp_variant(legate::TaskContext& context); + static void cpu_variant(legate::TaskContext context); +#if LEGATE_DEFINED(LEGATE_USE_OPENMP) + static void omp_variant(legate::TaskContext context); #endif -#ifdef LEGATE_USE_CUDA - static void gpu_variant(legate::TaskContext& context); +#if LEGATE_DEFINED(LEGATE_USE_CUDA) + static void gpu_variant(legate::TaskContext context); #endif }; diff --git a/src/cunumeric/unary/scalar_unary_red_omp.cc b/src/cunumeric/unary/scalar_unary_red_omp.cc index 646f0193a..1bba055b3 100644 --- a/src/cunumeric/unary/scalar_unary_red_omp.cc +++ b/src/cunumeric/unary/scalar_unary_red_omp.cc @@ -1,4 +1,4 @@ -/* Copyright 2021-2023 NVIDIA Corporation +/* Copyright 2024 NVIDIA Corporation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -20,7 +20,7 @@ namespace cunumeric { -/*static*/ void ScalarUnaryRedTask::omp_variant(TaskContext& context) +/*static*/ void ScalarUnaryRedTask::omp_variant(TaskContext context) { scalar_unary_red_template(context); } diff --git a/src/cunumeric/unary/scalar_unary_red_template.inl b/src/cunumeric/unary/scalar_unary_red_template.inl index 35173abeb..8afcd5a1b 100644 --- a/src/cunumeric/unary/scalar_unary_red_template.inl +++ b/src/cunumeric/unary/scalar_unary_red_template.inl @@ -1,4 +1,4 @@ -/* Copyright 2021-2023 NVIDIA Corporation +/* Copyright 2024 NVIDIA Corporation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -18,7 +18,7 @@ // Useful for IDEs #include -#include "cunumeric/cunumeric.h" +#include "cunumeric/cunumeric_task.h" #include "cunumeric/unary/scalar_unary_red.h" #include "cunumeric/unary/unary_red_util.h" #include "cunumeric/pitches.h" @@ -33,7 +33,7 @@ struct ScalarUnaryRed { using OP = UnaryRedOp; using LG_OP = typename OP::OP; using LHS = typename OP::VAL; - using RHS = legate_type_of; + using RHS = type_of; using OUT = AccessorRD; using IN = AccessorRO; using WHERE = AccessorRO; @@ -64,11 +64,17 @@ struct ScalarUnaryRed { shape = args.shape; out = args.out.reduce_accessor(); - if constexpr (OP_CODE == UnaryRedCode::CONTAINS) { to_find = args.args[0].scalar(); } - if constexpr (OP_CODE == UnaryRedCode::VARIANCE) { mu = args.args[0].scalar(); } + if constexpr (OP_CODE == UnaryRedCode::CONTAINS) { + to_find = args.args[0].value(); + } + if constexpr (OP_CODE == UnaryRedCode::VARIANCE) { + mu = args.args[0].value(); + } - if constexpr (HAS_WHERE) where = args.where.read_accessor(rect); -#ifndef LEGATE_BOUNDS_CHECKS + if constexpr (HAS_WHERE) { + where = args.where.read_accessor(rect); + } +#if !LEGATE_DEFINED(LEGATE_BOUNDS_CHECKS) // Check to see if this is dense or not if (in.accessor.is_dense_row_major(rect)) { dense = true; @@ -76,7 +82,9 @@ struct ScalarUnaryRed { } if constexpr (HAS_WHERE) { dense = dense && where.accessor.is_dense_row_major(rect); - if (dense) whereptr = where.ptr(rect); + if (dense) { + whereptr = where.ptr(rect); + } } #endif } @@ -84,18 +92,28 @@ struct ScalarUnaryRed { __CUDA_HD__ void operator()(LHS& lhs, size_t idx, LHS identity, DenseReduction) const noexcept { bool mask = true; - if constexpr (HAS_WHERE) mask = whereptr[idx]; + if constexpr (HAS_WHERE) { + mask = whereptr[idx]; + } if constexpr (OP_CODE == UnaryRedCode::CONTAINS) { - if (mask && (inptr[idx] == to_find)) { lhs = true; } + if (mask && (inptr[idx] == to_find)) { + lhs = true; + } } else if constexpr (OP_CODE == UnaryRedCode::ARGMAX || OP_CODE == UnaryRedCode::ARGMIN || OP_CODE == UnaryRedCode::NANARGMAX || OP_CODE == UnaryRedCode::NANARGMIN) { auto p = pitches.unflatten(idx, origin); - if (mask) OP::template fold(lhs, OP::convert(p, shape, identity, inptr[idx])); + if (mask) { + OP::template fold(lhs, OP::convert(p, shape, identity, inptr[idx])); + } } else if constexpr (OP_CODE == UnaryRedCode::VARIANCE) { - if (mask) OP::template fold(lhs, OP::convert(inptr[idx] - mu, identity)); + if (mask) { + OP::template fold(lhs, OP::convert(inptr[idx] - mu, identity)); + } } else { - if (mask) OP::template fold(lhs, OP::convert(inptr[idx], identity)); + if (mask) { + OP::template fold(lhs, OP::convert(inptr[idx], identity)); + } } } @@ -103,24 +121,34 @@ struct ScalarUnaryRed { { auto p = pitches.unflatten(idx, origin); bool mask = true; - if constexpr (HAS_WHERE) mask = where[p]; + if constexpr (HAS_WHERE) { + mask = where[p]; + } if constexpr (OP_CODE == UnaryRedCode::CONTAINS) { - if (mask && (in[p] == to_find)) { lhs = true; } + if (mask && (in[p] == to_find)) { + lhs = true; + } } else if constexpr (OP_CODE == UnaryRedCode::ARGMAX || OP_CODE == UnaryRedCode::ARGMIN || OP_CODE == UnaryRedCode::NANARGMAX || OP_CODE == UnaryRedCode::NANARGMIN) { - if (mask) OP::template fold(lhs, OP::convert(p, shape, identity, in[p])); + if (mask) { + OP::template fold(lhs, OP::convert(p, shape, identity, in[p])); + } } else if constexpr (OP_CODE == UnaryRedCode::VARIANCE) { - if (mask) OP::template fold(lhs, OP::convert(in[p] - mu, identity)); + if (mask) { + OP::template fold(lhs, OP::convert(in[p] - mu, identity)); + } } else { - if (mask) OP::template fold(lhs, OP::convert(in[p], identity)); + if (mask) { + OP::template fold(lhs, OP::convert(in[p], identity)); + } } } void execute() const noexcept { auto identity = LG_OP::identity; -#ifndef LEGATE_BOUNDS_CHECKS +#if !LEGATE_DEFINED(LEGATE_BOUNDS_CHECKS) // The constexpr if here prevents the DenseReduction from being instantiated for GPU kernels // which limits compile times and binary sizes. if constexpr (KIND != VariantKind::GPU) { @@ -153,37 +181,38 @@ struct ScalarUnaryRedDispatch { void operator()(ScalarUnaryRedArgs& args, bool has_where) const { auto dim = std::max(1, args.in.dim()); - if (has_where) + if (has_where) { double_dispatch(dim, args.in.code(), ScalarUnaryRedImpl{}, args); - else + } else { double_dispatch(dim, args.in.code(), ScalarUnaryRedImpl{}, args); + } } }; template static void scalar_unary_red_template(TaskContext& context) { - auto& inputs = context.inputs(); auto& scalars = context.scalars(); - auto op_code = scalars[0].value(); - auto shape = scalars[1].value(); - bool has_where = scalars[2].value(); - size_t start_idx = has_where ? 2 : 1; - std::vector extra_args; - extra_args.reserve(inputs.size() - start_idx); - for (size_t idx = start_idx; idx < inputs.size(); ++idx) - extra_args.emplace_back(std::move(inputs[idx])); + auto op_code = scalars[0].value(); + auto shape = scalars[1].value(); + bool has_where = scalars[2].value(); + + std::vector extra_args; + extra_args.reserve(scalars.size() - 3); + for (size_t idx = 3; idx < scalars.size(); ++idx) { + extra_args.emplace_back(std::move(scalars[idx])); + } + // If the RHS was a scalar, use (1,) as the shape if (shape.dim == 0) { shape.dim = 1; shape[0] = 1; } - Array dummy_where; - ScalarUnaryRedArgs args{context.reductions()[0], - inputs[0], - has_where ? inputs[1] : dummy_where, + ScalarUnaryRedArgs args{context.reduction(0), + context.input(0), + has_where ? context.input(1) : PhysicalStore{nullptr}, op_code, shape, std::move(extra_args)}; diff --git a/src/cunumeric/unary/unary_op.cc b/src/cunumeric/unary/unary_op.cc index 53c085113..547e75ec0 100644 --- a/src/cunumeric/unary/unary_op.cc +++ b/src/cunumeric/unary/unary_op.cc @@ -1,4 +1,4 @@ -/* Copyright 2021-2022 NVIDIA Corporation +/* Copyright 2024 NVIDIA Corporation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -38,7 +38,9 @@ struct UnaryOpImplBody { if (dense) { auto outptr = out.ptr(rect); auto inptr = in.ptr(rect); - for (size_t idx = 0; idx < volume; ++idx) outptr[idx] = func(inptr[idx]); + for (size_t idx = 0; idx < volume; ++idx) { + outptr[idx] = func(inptr[idx]); + } } else { for (size_t idx = 0; idx < volume; ++idx) { auto p = pitches.unflatten(idx, rect.lo); @@ -60,7 +62,9 @@ struct PointCopyImplBody { if (dense) { auto outptr = out.ptr(rect); auto inptr = in.ptr(rect); - for (size_t idx = 0; idx < volume; ++idx) outptr[idx] = inptr[idx]; + for (size_t idx = 0; idx < volume; ++idx) { + outptr[idx] = inptr[idx]; + } } else { for (size_t idx = 0; idx < volume; ++idx) { auto p = pitches.unflatten(idx, rect.lo); @@ -90,7 +94,9 @@ struct MultiOutUnaryOpImplBody { auto lhsptr = lhs.ptr(rect); auto rhs1ptr = rhs1.ptr(rect); auto rhs2ptr = rhs2.ptr(rect); - for (size_t idx = 0; idx < volume; ++idx) lhsptr[idx] = func(rhs1ptr[idx], &rhs2ptr[idx]); + for (size_t idx = 0; idx < volume; ++idx) { + lhsptr[idx] = func(rhs1ptr[idx], &rhs2ptr[idx]); + } } else { for (size_t idx = 0; idx < volume; ++idx) { auto p = pitches.unflatten(idx, rect.lo); @@ -100,7 +106,7 @@ struct MultiOutUnaryOpImplBody { } }; -/*static*/ void UnaryOpTask::cpu_variant(TaskContext& context) +/*static*/ void UnaryOpTask::cpu_variant(TaskContext context) { unary_op_template(context); } diff --git a/src/cunumeric/unary/unary_op.cu b/src/cunumeric/unary/unary_op.cu index 41de2e20b..88838f958 100644 --- a/src/cunumeric/unary/unary_op.cu +++ b/src/cunumeric/unary/unary_op.cu @@ -1,4 +1,4 @@ -/* Copyright 2021-2022 NVIDIA Corporation +/* Copyright 2024 NVIDIA Corporation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -26,7 +26,9 @@ static __global__ void __launch_bounds__(THREADS_PER_BLOCK, MIN_CTAS_PER_SM) dense_kernel(size_t volume, Function func, RES* out, const ARG* in) { const size_t idx = global_tid_1d(); - if (idx >= volume) return; + if (idx >= volume) { + return; + } out[idx] = func(in[idx]); } @@ -35,7 +37,9 @@ static __global__ void __launch_bounds__(THREADS_PER_BLOCK, MIN_CTAS_PER_SM) generic_kernel(size_t volume, Function func, WriteAcc out, ReadAcc in, Pitches pitches, Rect rect) { const size_t idx = global_tid_1d(); - if (idx >= volume) return; + if (idx >= volume) { + return; + } auto point = pitches.unflatten(idx, rect.lo); out[point] = func(in[point]); } @@ -45,7 +49,9 @@ static __global__ void __launch_bounds__(THREADS_PER_BLOCK, MIN_CTAS_PER_SM) dense_copy_kernel(size_t volume, VAL* out, const VAL* in) { const size_t idx = global_tid_1d(); - if (idx >= volume) return; + if (idx >= volume) { + return; + } out[idx] = in[idx]; } @@ -58,7 +64,9 @@ static __global__ void __launch_bounds__(THREADS_PER_BLOCK, MIN_CTAS_PER_SM) Rect rect) { const size_t idx = global_tid_1d(); - if (idx >= volume) return; + if (idx >= volume) { + return; + } auto point = pitches.unflatten(idx, rect.lo); out[point] = in[point]; } @@ -87,7 +95,7 @@ struct UnaryOpImplBody { generic_kernel<<>>( volume, func, out, in, pitches, rect); } - CHECK_CUDA_STREAM(stream); + CUNUMERIC_CHECK_CUDA_STREAM(stream); } }; @@ -109,7 +117,7 @@ struct PointCopyImplBody { } else { generic_copy_kernel<<>>(volume, out, in, pitches, rect); } - CHECK_CUDA_STREAM(stream); + CUNUMERIC_CHECK_CUDA_STREAM(stream); } }; @@ -118,7 +126,9 @@ static __global__ void __launch_bounds__(THREADS_PER_BLOCK, MIN_CTAS_PER_SM) dense_kernel_multiout(size_t volume, Function func, LHS* lhs, const RHS1* rhs1, RHS2* rhs2) { const size_t idx = global_tid_1d(); - if (idx >= volume) return; + if (idx >= volume) { + return; + } lhs[idx] = func(rhs1[idx], &rhs2[idx]); } @@ -138,7 +148,9 @@ static __global__ void __launch_bounds__(THREADS_PER_BLOCK, MIN_CTAS_PER_SM) Rect rect) { const size_t idx = global_tid_1d(); - if (idx >= volume) return; + if (idx >= volume) { + return; + } auto point = pitches.unflatten(idx, rect.lo); lhs[point] = func(rhs1[point], rhs2.ptr(point)); } @@ -171,11 +183,11 @@ struct MultiOutUnaryOpImplBody { generic_kernel_multiout<<>>( volume, func, lhs, rhs1, rhs2, pitches, rect); } - CHECK_CUDA_STREAM(stream); + CUNUMERIC_CHECK_CUDA_STREAM(stream); } }; -/*static*/ void UnaryOpTask::gpu_variant(TaskContext& context) +/*static*/ void UnaryOpTask::gpu_variant(TaskContext context) { unary_op_template(context); } diff --git a/src/cunumeric/unary/unary_op.h b/src/cunumeric/unary/unary_op.h index a4439dbd8..b1167e344 100644 --- a/src/cunumeric/unary/unary_op.h +++ b/src/cunumeric/unary/unary_op.h @@ -1,4 +1,4 @@ -/* Copyright 2021-2022 NVIDIA Corporation +/* Copyright 2024 NVIDIA Corporation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -16,22 +16,22 @@ #pragma once -#include "cunumeric/cunumeric.h" +#include "cunumeric/cunumeric_task.h" #include "cunumeric/unary/unary_op_util.h" namespace cunumeric { struct UnaryOpArgs { - const Array& in; - const Array& out; + legate::PhysicalStore in; + legate::PhysicalStore out; UnaryOpCode op_code; - std::vector args; + std::vector args; }; struct MultiOutUnaryOpArgs { - const Array& in; - const Array& out1; - const Array& out2; + legate::PhysicalStore in; + legate::PhysicalStore out1; + legate::PhysicalStore out2; UnaryOpCode op_code; }; @@ -40,12 +40,12 @@ class UnaryOpTask : public CuNumericTask { static const int TASK_ID = CUNUMERIC_UNARY_OP; public: - static void cpu_variant(legate::TaskContext& context); -#ifdef LEGATE_USE_OPENMP - static void omp_variant(legate::TaskContext& context); + static void cpu_variant(legate::TaskContext context); +#if LEGATE_DEFINED(LEGATE_USE_OPENMP) + static void omp_variant(legate::TaskContext context); #endif -#ifdef LEGATE_USE_CUDA - static void gpu_variant(legate::TaskContext& context); +#if LEGATE_DEFINED(LEGATE_USE_CUDA) + static void gpu_variant(legate::TaskContext context); #endif }; diff --git a/src/cunumeric/unary/unary_op_omp.cc b/src/cunumeric/unary/unary_op_omp.cc index 1badb93a8..fad475fce 100644 --- a/src/cunumeric/unary/unary_op_omp.cc +++ b/src/cunumeric/unary/unary_op_omp.cc @@ -1,4 +1,4 @@ -/* Copyright 2021-2022 NVIDIA Corporation +/* Copyright 2024 NVIDIA Corporation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -39,7 +39,9 @@ struct UnaryOpImplBody { auto outptr = out.ptr(rect); auto inptr = in.ptr(rect); #pragma omp parallel for schedule(static) - for (size_t idx = 0; idx < volume; ++idx) outptr[idx] = func(inptr[idx]); + for (size_t idx = 0; idx < volume; ++idx) { + outptr[idx] = func(inptr[idx]); + } } else { #pragma omp parallel for schedule(static) for (size_t idx = 0; idx < volume; ++idx) { @@ -63,7 +65,9 @@ struct PointCopyImplBody { auto outptr = out.ptr(rect); auto inptr = in.ptr(rect); #pragma omp parallel for schedule(static) - for (size_t idx = 0; idx < volume; ++idx) outptr[idx] = inptr[idx]; + for (size_t idx = 0; idx < volume; ++idx) { + outptr[idx] = inptr[idx]; + } } else { #pragma omp parallel for schedule(static) for (size_t idx = 0; idx < volume; ++idx) { @@ -95,7 +99,9 @@ struct MultiOutUnaryOpImplBody { auto rhs1ptr = rhs1.ptr(rect); auto rhs2ptr = rhs2.ptr(rect); #pragma omp parallel for schedule(static) - for (size_t idx = 0; idx < volume; ++idx) lhsptr[idx] = func(rhs1ptr[idx], &rhs2ptr[idx]); + for (size_t idx = 0; idx < volume; ++idx) { + lhsptr[idx] = func(rhs1ptr[idx], &rhs2ptr[idx]); + } } else { #pragma omp parallel for schedule(static) for (size_t idx = 0; idx < volume; ++idx) { @@ -106,7 +112,7 @@ struct MultiOutUnaryOpImplBody { } }; -/*static*/ void UnaryOpTask::omp_variant(TaskContext& context) +/*static*/ void UnaryOpTask::omp_variant(TaskContext context) { unary_op_template(context); } diff --git a/src/cunumeric/unary/unary_op_template.inl b/src/cunumeric/unary/unary_op_template.inl index 548cba9bf..f245a90ae 100644 --- a/src/cunumeric/unary/unary_op_template.inl +++ b/src/cunumeric/unary/unary_op_template.inl @@ -1,4 +1,4 @@ -/* Copyright 2021-2022 NVIDIA Corporation +/* Copyright 2024 NVIDIA Corporation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -47,12 +47,14 @@ struct UnaryOpImpl { Pitches pitches; size_t volume = pitches.flatten(rect); - if (volume == 0) return; + if (volume == 0) { + return; + } auto out = args.out.write_accessor(rect); auto in = args.in.read_accessor(rect); -#ifndef LEGATE_BOUNDS_CHECKS +#if !LEGATE_DEFINED(LEGATE_BOUNDS_CHECKS) // Check to see if this is dense or not bool dense = out.accessor.is_dense_row_major(rect) && in.accessor.is_dense_row_major(rect); #else @@ -88,13 +90,15 @@ struct MultiOutUnaryOpImpl { Pitches pitches; size_t volume = pitches.flatten(rect); - if (volume == 0) return; + if (volume == 0) { + return; + } auto lhs = args.out1.write_accessor(rect); auto rhs1 = args.in.read_accessor(rect); auto rhs2 = args.out2.write_accessor(rect); -#ifndef LEGATE_BOUNDS_CHECKS +#if !LEGATE_DEFINED(LEGATE_BOUNDS_CHECKS) // Check to see if this is dense or not bool dense = lhs.accessor.is_dense_row_major(rect) && rhs1.accessor.is_dense_row_major(rect) && rhs2.accessor.is_dense_row_major(rect); @@ -122,7 +126,7 @@ struct UnaryCopyImpl { template void operator()(UnaryOpArgs& args) const { - using VAL = legate_type_of; + using VAL = type_of; execute_copy(args); } @@ -141,12 +145,14 @@ struct UnaryCopyImpl { Pitches pitches; size_t volume = pitches.flatten(rect); - if (volume == 0) return; + if (volume == 0) { + return; + } auto out = args.out.write_accessor(rect); auto in = args.in.read_accessor(rect); -#ifndef LEGATE_BOUNDS_CHECKS +#if !LEGATE_DEFINED(LEGATE_BOUNDS_CHECKS) // Check to see if this is dense or not bool dense = out.accessor.is_dense_row_major(rect) && in.accessor.is_dense_row_major(rect); #else @@ -165,7 +171,7 @@ struct UnaryOpDispatch { { auto dim = std::max(args.in.dim(), 1); if ((OP_CODE == UnaryOpCode::COPY) && (args.in.code() == Type::Code::FIXED_ARRAY)) { - auto& type = static_cast(args.in.type()); + auto type = args.in.type().as_fixed_array_type(); cunumeric::double_dispatch(dim, type.num_elements(), UnaryCopyImpl{}, args); } else { auto code = OP_CODE == UnaryOpCode::GETARG ? args.out.code() : args.in.code(); @@ -177,8 +183,8 @@ struct UnaryOpDispatch { template static void unary_op_template(TaskContext& context) { - auto& inputs = context.inputs(); - auto& outputs = context.outputs(); + auto inputs = context.inputs(); + auto outputs = context.outputs(); auto& scalars = context.scalars(); auto op_code = scalars[0].value(); @@ -198,8 +204,10 @@ static void unary_op_template(TaskContext& context) break; } default: { - std::vector extra_args; - for (size_t idx = 1; idx < inputs.size(); ++idx) extra_args.push_back(std::move(inputs[idx])); + std::vector extra_args; + for (size_t idx = 1; idx < scalars.size(); ++idx) { + extra_args.push_back(scalars[idx]); + } UnaryOpArgs args{inputs[0], outputs[0], op_code, std::move(extra_args)}; op_dispatch(args.op_code, UnaryOpDispatch{}, args); diff --git a/src/cunumeric/unary/unary_op_util.h b/src/cunumeric/unary/unary_op_util.h index d94273feb..bf9e5a5f4 100644 --- a/src/cunumeric/unary/unary_op_util.h +++ b/src/cunumeric/unary/unary_op_util.h @@ -1,4 +1,4 @@ -/* Copyright 2021-2022 NVIDIA Corporation +/* Copyright 2024 NVIDIA Corporation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -16,10 +16,14 @@ #pragma once -#include "cunumeric/cunumeric.h" +#include "cunumeric/cunumeric_task.h" #include "cunumeric/arg.h" #include "cunumeric/arg.inl" +#ifdef __NVCC__ +#include "thrust/complex.h" +#endif + #define _USE_MATH_DEFINES #include @@ -199,9 +203,9 @@ struct UnaryOp { template struct UnaryOp { static constexpr bool valid = true; - using T = legate::legate_type_of; + using T = legate::type_of; - UnaryOp(const std::vector& args) {} + UnaryOp(const std::vector& args) {} template ::value>* = nullptr> constexpr decltype(auto) operator()(const _T& x) const @@ -238,9 +242,9 @@ struct UnaryOp { template struct UnaryOp { static constexpr bool valid = is_floating_or_complex; - using T = legate::legate_type_of; + using T = legate::type_of; - UnaryOp(const std::vector& args) {} + UnaryOp(const std::vector& args) {} constexpr decltype(auto) operator()(const T& x) const { @@ -252,9 +256,9 @@ struct UnaryOp { template struct UnaryOp { static constexpr bool valid = is_floating_or_complex; - using T = legate::legate_type_of; + using T = legate::type_of; - UnaryOp(const std::vector& args) {} + UnaryOp(const std::vector& args) {} constexpr decltype(auto) operator()(const T& x) const { @@ -268,7 +272,7 @@ struct UnaryOp { static constexpr bool valid = true; using T = __half; - UnaryOp(const std::vector& args) {} + UnaryOp(const std::vector& args) {} __CUDA_HD__ __half operator()(const __half& x) const { @@ -280,9 +284,9 @@ struct UnaryOp { template struct UnaryOp { static constexpr bool valid = is_floating_or_complex; - using T = legate::legate_type_of; + using T = legate::type_of; - UnaryOp(const std::vector& args) {} + UnaryOp(const std::vector& args) {} constexpr decltype(auto) operator()(const T& x) const { @@ -294,9 +298,9 @@ struct UnaryOp { template struct UnaryOp { static constexpr bool valid = is_floating_or_complex; - using T = legate::legate_type_of; + using T = legate::type_of; - UnaryOp(const std::vector& args) {} + UnaryOp(const std::vector& args) {} constexpr decltype(auto) operator()(const T& x) const { @@ -310,7 +314,7 @@ struct UnaryOp { static constexpr bool valid = true; using T = __half; - UnaryOp(const std::vector& args) {} + UnaryOp(const std::vector& args) {} __CUDA_HD__ __half operator()(const __half& x) const { @@ -322,9 +326,9 @@ struct UnaryOp { template struct UnaryOp { static constexpr bool valid = is_floating_or_complex; - using T = legate::legate_type_of; + using T = legate::type_of; - UnaryOp(const std::vector& args) {} + UnaryOp(const std::vector& args) {} constexpr decltype(auto) operator()(const T& x) const { @@ -336,9 +340,9 @@ struct UnaryOp { template struct UnaryOp { static constexpr bool valid = is_floating_or_complex; - using T = legate::legate_type_of; + using T = legate::type_of; - UnaryOp(const std::vector& args) {} + UnaryOp(const std::vector& args) {} constexpr decltype(auto) operator()(const T& x) const { @@ -352,7 +356,7 @@ struct UnaryOp { static constexpr bool valid = true; using T = __half; - UnaryOp(const std::vector& args) {} + UnaryOp(const std::vector& args) {} __CUDA_HD__ __half operator()(const __half& x) const { @@ -364,9 +368,9 @@ struct UnaryOp { template struct UnaryOp { static constexpr bool valid = legate::is_floating_point::value; - using T = legate::legate_type_of; + using T = legate::type_of; - UnaryOp(const std::vector& args) {} + UnaryOp(const std::vector& args) {} constexpr decltype(auto) operator()(const T& x) const { @@ -380,7 +384,7 @@ struct UnaryOp { static constexpr bool valid = true; using T = __half; - UnaryOp(const std::vector& args) {} + UnaryOp(const std::vector& args) {} __CUDA_HD__ __half operator()(const __half& x) const { @@ -392,9 +396,9 @@ struct UnaryOp { template struct UnaryOp { static constexpr bool valid = is_floating_point; - using T = legate::legate_type_of; + using T = legate::type_of; - UnaryOp(const std::vector& args) {} + UnaryOp(const std::vector& args) {} constexpr decltype(auto) operator()(const T& x) const { @@ -406,13 +410,12 @@ struct UnaryOp { template struct UnaryOp { static constexpr bool valid = true; - using T = legate::legate_type_of; + using T = legate::type_of; - UnaryOp(const std::vector& args) + UnaryOp(const std::vector& args) + : min{args[0].value()}, max{args[1].value()} { assert(args.size() == 2); - min = args[0].scalar(); - max = args[1].scalar(); } constexpr T operator()(const T& x) const { return (x < min) ? min : (x > max) ? max : x; } @@ -423,10 +426,10 @@ struct UnaryOp { template struct UnaryOp { - using T = legate::legate_type_of; + using T = legate::type_of; static constexpr bool valid = true; - UnaryOp(const std::vector& args) {} + UnaryOp(const std::vector& args) {} template ::value>* = nullptr> constexpr T operator()(const T& x) const @@ -444,9 +447,9 @@ struct UnaryOp { template struct UnaryOp { static constexpr bool valid = true; - using T = legate::legate_type_of; + using T = legate::type_of; - UnaryOp(const std::vector& args) {} + UnaryOp(const std::vector& args) {} constexpr T operator()(const T& x) const { return x; } }; @@ -454,9 +457,9 @@ struct UnaryOp { template struct UnaryOp { static constexpr bool valid = is_floating_or_complex; - using T = legate::legate_type_of; + using T = legate::type_of; - UnaryOp(const std::vector& args) {} + UnaryOp(const std::vector& args) {} constexpr decltype(auto) operator()(const T& x) const { @@ -468,9 +471,9 @@ struct UnaryOp { template struct UnaryOp { static constexpr bool valid = is_floating_or_complex; - using T = legate::legate_type_of; + using T = legate::type_of; - UnaryOp(const std::vector& args) {} + UnaryOp(const std::vector& args) {} constexpr decltype(auto) operator()(const T& x) const { @@ -484,7 +487,7 @@ struct UnaryOp { static constexpr bool valid = true; using T = __half; - UnaryOp(const std::vector& args) {} + UnaryOp(const std::vector& args) {} __CUDA_HD__ __half operator()(const __half& x) const { @@ -496,9 +499,9 @@ struct UnaryOp { template struct UnaryOp { static constexpr bool valid = is_floating_point; - using T = legate::legate_type_of; + using T = legate::type_of; - UnaryOp(const std::vector& args) {} + UnaryOp(const std::vector& args) {} constexpr decltype(auto) operator()(const T& x) const { return x * T{M_PI / 180.0}; } }; @@ -508,7 +511,7 @@ struct UnaryOp { static constexpr bool valid = true; using T = __half; - UnaryOp(const std::vector& args) {} + UnaryOp(const std::vector& args) {} __CUDA_HD__ __half operator()(const __half& x) const { @@ -519,9 +522,9 @@ struct UnaryOp { template struct UnaryOp { static constexpr bool valid = true; - using T = legate::legate_type_of; + using T = legate::type_of; - UnaryOp(const std::vector& args) {} + UnaryOp(const std::vector& args) {} constexpr decltype(auto) operator()(const T& x) const { @@ -533,9 +536,9 @@ struct UnaryOp { template struct UnaryOp { static constexpr bool valid = is_floating_or_complex; - using T = legate::legate_type_of; + using T = legate::type_of; - UnaryOp(const std::vector& args) {} + UnaryOp(const std::vector& args) {} template ::value>* = nullptr> constexpr T operator()(const T& x) const @@ -562,7 +565,7 @@ struct UnaryOp { static constexpr bool valid = true; using T = __half; - UnaryOp(const std::vector& args) {} + UnaryOp(const std::vector& args) {} __CUDA_HD__ __half operator()(const __half& x) const { @@ -574,9 +577,9 @@ struct UnaryOp { template struct UnaryOp { static constexpr bool valid = is_floating_or_complex; - using T = legate::legate_type_of; + using T = legate::type_of; - UnaryOp(const std::vector& args) {} + UnaryOp(const std::vector& args) {} template ::value>* = nullptr> constexpr decltype(auto) operator()(const T& x) const @@ -598,7 +601,7 @@ struct UnaryOp { static constexpr bool valid = true; using T = __half; - UnaryOp(const std::vector& args) {} + UnaryOp(const std::vector& args) {} __CUDA_HD__ __half operator()(const __half& x) const { @@ -610,9 +613,9 @@ struct UnaryOp { template struct UnaryOp { static constexpr bool valid = is_floating_point; - using T = legate::legate_type_of; + using T = legate::type_of; - UnaryOp(const std::vector& args) {} + UnaryOp(const std::vector& args) {} constexpr decltype(auto) operator()(const T& x) const { @@ -623,20 +626,20 @@ struct UnaryOp { template struct UnaryOp { - using T = Argval>; + using T = Argval>; static constexpr bool valid = true; - UnaryOp(const std::vector& args) {} + UnaryOp(const std::vector& args) {} constexpr decltype(auto) operator()(const T& x) const { return x.arg; } }; template struct UnaryOp { - using T = legate::legate_type_of; + using T = legate::type_of; static constexpr bool valid = legate::is_complex_type::value; - UnaryOp(const std::vector& args) {} + UnaryOp(const std::vector& args) {} constexpr decltype(auto) operator()(const T& x) const { return x.imag(); } }; @@ -645,9 +648,9 @@ template struct UnaryOp { static constexpr bool valid = legate::is_integral::value && CODE != legate::Type::Code::BOOL; - using T = legate::legate_type_of; + using T = legate::type_of; - UnaryOp(const std::vector& args) {} + UnaryOp(const std::vector& args) {} constexpr T operator()(const T& x) const { return ~x; } }; @@ -655,9 +658,9 @@ struct UnaryOp { template struct UnaryOp { static constexpr bool valid = true; - using T = legate::legate_type_of; + using T = legate::type_of; - UnaryOp(const std::vector& args) {} + UnaryOp(const std::vector& args) {} template ::value>* = nullptr> constexpr bool operator()(const T& x) const @@ -683,9 +686,9 @@ struct UnaryOp { template struct UnaryOp { static constexpr bool valid = true; - using T = legate::legate_type_of; + using T = legate::type_of; - UnaryOp(const std::vector& args) {} + UnaryOp(const std::vector& args) {} template ::value>* = nullptr> constexpr bool operator()(const T& x) const @@ -711,9 +714,9 @@ struct UnaryOp { template struct UnaryOp { static constexpr bool valid = true; - using T = legate::legate_type_of; + using T = legate::type_of; - UnaryOp(const std::vector& args) {} + UnaryOp(const std::vector& args) {} template ::value>* = nullptr> constexpr bool operator()(const T& x) const @@ -741,9 +744,9 @@ template struct UnaryOp { static constexpr bool valid = is_floating_or_complex; ; - using T = legate::legate_type_of; + using T = legate::type_of; - UnaryOp(const std::vector& args) {} + UnaryOp(const std::vector& args) {} constexpr decltype(auto) operator()(const T& x) const { @@ -756,9 +759,9 @@ template struct UnaryOp { static constexpr bool valid = is_floating_or_complex; ; - using T = legate::legate_type_of; + using T = legate::type_of; - UnaryOp(const std::vector& args) {} + UnaryOp(const std::vector& args) {} constexpr decltype(auto) operator()(const T& x) const { @@ -772,7 +775,7 @@ struct UnaryOp { static constexpr bool valid = true; using T = __half; - UnaryOp(const std::vector& args) {} + UnaryOp(const std::vector& args) {} __CUDA_HD__ __half operator()(const __half& x) const { @@ -785,9 +788,9 @@ template struct UnaryOp { static constexpr bool valid = is_floating_or_complex; ; - using T = legate::legate_type_of; + using T = legate::type_of; - UnaryOp(const std::vector& args) {} + UnaryOp(const std::vector& args) {} template ::value>* = nullptr> constexpr decltype(auto) operator()(const T& x) const @@ -809,7 +812,7 @@ struct UnaryOp { static constexpr bool valid = true; using T = __half; - UnaryOp(const std::vector& args) {} + UnaryOp(const std::vector& args) {} __CUDA_HD__ __half operator()(const __half& x) const { @@ -822,9 +825,9 @@ template struct UnaryOp { static constexpr bool valid = is_floating_or_complex; ; - using T = legate::legate_type_of; + using T = legate::type_of; - UnaryOp(const std::vector& args) {} + UnaryOp(const std::vector& args) {} template ::value>* = nullptr> constexpr decltype(auto) operator()(const T& x) const @@ -846,7 +849,7 @@ struct UnaryOp { static constexpr bool valid = true; using T = __half; - UnaryOp(const std::vector& args) {} + UnaryOp(const std::vector& args) {} __CUDA_HD__ __half operator()(const __half& x) const { @@ -858,9 +861,9 @@ struct UnaryOp { template struct UnaryOp { static constexpr bool valid = true; - using T = legate::legate_type_of; + using T = legate::type_of; - UnaryOp(const std::vector& args) {} + UnaryOp(const std::vector& args) {} template ::value>* = nullptr> constexpr bool operator()(const T& x) const @@ -878,9 +881,9 @@ struct UnaryOp { template struct UnaryOp { static constexpr bool valid = true; - using T = legate::legate_type_of; + using T = legate::type_of; - UnaryOp(const std::vector& args) {} + UnaryOp(const std::vector& args) {} constexpr T operator()(const T& x) const { return -x; } }; @@ -888,9 +891,9 @@ struct UnaryOp { template struct UnaryOp { static constexpr bool valid = legate::is_floating_point::value; - using T = legate::legate_type_of; + using T = legate::type_of; - UnaryOp(const std::vector& args) {} + UnaryOp(const std::vector& args) {} constexpr T operator()(const T& x) const { return x * 180.0 / M_PI; } }; @@ -900,7 +903,7 @@ struct UnaryOp { static constexpr bool valid = true; using T = __half; - UnaryOp(const std::vector& args) {} + UnaryOp(const std::vector& args) {} __CUDA_HD__ __half operator()(const __half& x) const { @@ -910,20 +913,20 @@ struct UnaryOp { template struct UnaryOp { - using T = legate::legate_type_of; + using T = legate::type_of; static constexpr bool valid = legate::is_complex_type::value; - UnaryOp(const std::vector& args) {} + UnaryOp(const std::vector& args) {} constexpr decltype(auto) operator()(const T& x) const { return x.real(); } }; template struct UnaryOp { - using T = legate::legate_type_of; + using T = legate::type_of; static constexpr bool valid = true; - UnaryOp(const std::vector& args) {} + UnaryOp(const std::vector& args) {} constexpr T operator()(const T& x) const { @@ -937,7 +940,7 @@ struct UnaryOp { using T = __half; static constexpr bool valid = true; - UnaryOp(const std::vector& args) {} + UnaryOp(const std::vector& args) {} __CUDA_HD__ __half operator()(const __half& x) const { @@ -948,9 +951,9 @@ struct UnaryOp { template struct UnaryOp { static constexpr bool valid = true; - using T = legate::legate_type_of; + using T = legate::type_of; - UnaryOp(const std::vector& args) {} + UnaryOp(const std::vector& args) {} template ::value>* = nullptr> constexpr decltype(auto) operator()(const _T& x) const @@ -970,7 +973,7 @@ struct UnaryOp { static constexpr bool valid = true; using T = __half; - UnaryOp(const std::vector& args) {} + UnaryOp(const std::vector& args) {} __CUDA_HD__ __half operator()(const __half& x) const { @@ -998,9 +1001,9 @@ constexpr T sign(const T& x) template struct UnaryOp { static constexpr bool valid = true; - using T = legate::legate_type_of; + using T = legate::type_of; - UnaryOp(const std::vector& args) {} + UnaryOp(const std::vector& args) {} template ::value>* = nullptr> constexpr decltype(auto) operator()(const _T& x) const @@ -1024,7 +1027,7 @@ struct UnaryOp { static constexpr bool valid = true; using T = __half; - UnaryOp(const std::vector& args) {} + UnaryOp(const std::vector& args) {} __CUDA_HD__ __half operator()(const __half& x) const { @@ -1035,9 +1038,9 @@ struct UnaryOp { template struct UnaryOp { static constexpr bool valid = legate::is_floating_point::value; - using T = legate::legate_type_of; + using T = legate::type_of; - UnaryOp(const std::vector& args) {} + UnaryOp(const std::vector& args) {} constexpr bool operator()(const T& x) const { @@ -1051,7 +1054,7 @@ struct UnaryOp { static constexpr bool valid = true; using T = __half; - UnaryOp(const std::vector& args) {} + UnaryOp(const std::vector& args) {} __CUDA_HD__ bool operator()(const __half& x) const { @@ -1063,9 +1066,9 @@ struct UnaryOp { template struct UnaryOp { static constexpr bool valid = is_floating_or_complex; - using T = legate::legate_type_of; + using T = legate::type_of; - UnaryOp(const std::vector& args) {} + UnaryOp(const std::vector& args) {} constexpr decltype(auto) operator()(const T& x) const { @@ -1077,9 +1080,9 @@ struct UnaryOp { template struct UnaryOp { static constexpr bool valid = is_floating_or_complex; - using T = legate::legate_type_of; + using T = legate::type_of; - UnaryOp(const std::vector& args) {} + UnaryOp(const std::vector& args) {} constexpr decltype(auto) operator()(const T& x) const { @@ -1093,7 +1096,7 @@ struct UnaryOp { static constexpr bool valid = true; using T = __half; - UnaryOp(const std::vector& args) {} + UnaryOp(const std::vector& args) {} __CUDA_HD__ __half operator()(const __half& x) const { @@ -1105,19 +1108,29 @@ struct UnaryOp { template struct UnaryOp { static constexpr bool valid = true; - using T = legate::legate_type_of; + using T = legate::type_of; - UnaryOp(const std::vector& args) {} + UnaryOp(const std::vector& args) {} constexpr T operator()(const T& x) const { return x * x; } }; +template <> +struct UnaryOp { + static constexpr bool valid = true; + using T = bool; + + UnaryOp(const std::vector& args) {} + + constexpr bool operator()(const bool& x) const { return x && x; } +}; + template struct UnaryOp { static constexpr bool valid = true; - using T = legate::legate_type_of; + using T = legate::type_of; - UnaryOp(const std::vector& args) {} + UnaryOp(const std::vector& args) {} constexpr decltype(auto) operator()(const T& x) const { @@ -1129,9 +1142,9 @@ struct UnaryOp { template struct UnaryOp { static constexpr bool valid = is_floating_or_complex; - using T = legate::legate_type_of; + using T = legate::type_of; - UnaryOp(const std::vector& args) {} + UnaryOp(const std::vector& args) {} constexpr decltype(auto) operator()(const T& x) const { @@ -1143,9 +1156,9 @@ struct UnaryOp { template struct UnaryOp { static constexpr bool valid = is_floating_or_complex; - using T = legate::legate_type_of; + using T = legate::type_of; - UnaryOp(const std::vector& args) {} + UnaryOp(const std::vector& args) {} constexpr decltype(auto) operator()(const T& x) const { @@ -1157,9 +1170,9 @@ struct UnaryOp { template struct UnaryOp { static constexpr bool valid = legate::is_floating_point::value; - using T = legate::legate_type_of; + using T = legate::type_of; - UnaryOp(const std::vector& args) {} + UnaryOp(const std::vector& args) {} constexpr decltype(auto) operator()(const T& x) const { @@ -1173,7 +1186,7 @@ struct UnaryOp { static constexpr bool valid = true; using T = __half; - UnaryOp(const std::vector& args) {} + UnaryOp(const std::vector& args) {} __CUDA_HD__ __half operator()(const __half& x) const { @@ -1190,7 +1203,7 @@ struct MultiOutUnaryOp { template struct MultiOutUnaryOp { static constexpr bool valid = legate::is_floating_point::value; - using RHS1 = legate::legate_type_of; + using RHS1 = legate::type_of; using RHS2 = int32_t; using LHS = RHS1; @@ -1218,7 +1231,7 @@ struct MultiOutUnaryOp { template struct MultiOutUnaryOp { static constexpr bool valid = legate::is_floating_point::value; - using RHS1 = legate::legate_type_of; + using RHS1 = legate::type_of; using RHS2 = RHS1; using LHS = RHS1; diff --git a/src/cunumeric/unary/unary_red.cc b/src/cunumeric/unary/unary_red.cc index b37d1a4b2..827f27a9c 100644 --- a/src/cunumeric/unary/unary_red.cc +++ b/src/cunumeric/unary/unary_red.cc @@ -1,4 +1,4 @@ -/* Copyright 2021-2023 NVIDIA Corporation +/* Copyright 2024 NVIDIA Corporation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -25,7 +25,7 @@ template struct UnaryRedImplBody { using OP = UnaryRedOp; using LG_OP = typename OP::OP; - using RHS = legate_type_of; + using RHS = type_of; void operator()(AccessorRD lhs, AccessorRO rhs, @@ -38,7 +38,9 @@ struct UnaryRedImplBody { for (size_t idx = 0; idx < volume; ++idx) { auto point = pitches.unflatten(idx, rect.lo); bool mask = true; - if constexpr (HAS_WHERE) mask = where[point]; + if constexpr (HAS_WHERE) { + mask = where[point]; + } if (mask) { auto identity = LG_OP::identity; lhs.reduce(point, OP::convert(point, collapsed_dim, identity, rhs[point])); @@ -47,7 +49,7 @@ struct UnaryRedImplBody { } }; -/*static*/ void UnaryRedTask::cpu_variant(TaskContext& context) +/*static*/ void UnaryRedTask::cpu_variant(TaskContext context) { unary_red_template(context); } diff --git a/src/cunumeric/unary/unary_red.cu b/src/cunumeric/unary/unary_red.cu index b5e0e5eb1..f245305e8 100644 --- a/src/cunumeric/unary/unary_red.cu +++ b/src/cunumeric/unary/unary_red.cu @@ -1,4 +1,4 @@ -/* Copyright 2021-2023 NVIDIA Corporation +/* Copyright 2024 NVIDIA Corporation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -45,8 +45,9 @@ struct ThreadBlock { auto remaining = static_cast(THREADS_PER_BLOCK); Point domain_extents; - for (int32_t idx = 0; idx < DIM; ++idx) + for (int32_t idx = 0; idx < DIM; ++idx) { domain_extents[idx] = domain.hi[idx] - domain.lo[idx] + 1; + } // If the innermost dimension is being collapsed, we assign at least one warp to it // for warp coalsecing. @@ -59,15 +60,18 @@ struct ThreadBlock { // Then, we compute how many threads there should be along aech dimension, // excluding the one being collapsed for (int32_t idx = DIM - 1; idx >= 0; --idx) { - if (idx == collapsed_dim) continue; + if (idx == collapsed_dim) { + continue; + } auto extent = std::min(remaining, domain_extents[idx]); extents_[idx] = extent; remaining = std::max(remaining / extent, 1); } // Finally, we determine degree of parallelism for the collapsed dimension if we didn't above - if (collapsed_dim != DIM - 1) + if (collapsed_dim != DIM - 1) { extents_[collapsed_dim] = std::min(remaining, domain_extents[collapsed_dim]); + } // Cache the aggregate number of threads per increment in each dimension, // which later will be used for de-linearization of a thread id @@ -117,8 +121,11 @@ struct ThreadBlocks { // We want the collapsed dimension to be the outermost one when // de-linearizing the block id. dim_order_[0] = collapsed_dim_; - for (int32_t dim = 0, idx = 1; dim < DIM; ++dim) - if (dim != collapsed_dim_) dim_order_[idx++] = dim; + for (int32_t dim = 0, idx = 1; dim < DIM; ++dim) { + if (dim != collapsed_dim_) { + dim_order_[idx++] = dim; + } + } // Compute the aggregate number of blocks per increment in each dimension coord_t num_blocks = 1; @@ -195,7 +202,9 @@ std::ostream& operator<<(std::ostream& os, const ThreadBlocks& blocks) os << "ThreadBlocks(" << blocks.block_ << ", extents: " << blocks.extents_ << ", pitches: " << blocks.pitches_ << ", num concurrent blocks: " << blocks.num_blocks_ << ", dim order: {"; - for (int32_t dim : blocks.dim_order_) os << dim << ", "; + for (int32_t dim : blocks.dim_order_) { + os << dim << ", "; + } os << "})"; return os; @@ -220,8 +229,9 @@ static void __device__ __forceinline__ collapse_dims(LHS& result, // so instead we do a warp-level reduction so just one thread ends // up doing the full atomic coord_t bucket = 0; - for (int32_t dim = DIM - 2; dim >= 0; --dim) + for (int32_t dim = DIM - 2; dim >= 0; --dim) { bucket = bucket * (domain.hi[dim] - domain.lo[dim] + 1) + point[dim] - domain.lo[dim]; + } const uint32_t same_mask = __match_any_sync(0xffffffff, bucket); int32_t laneid; @@ -234,7 +244,7 @@ static void __device__ __forceinline__ collapse_dims(LHS& result, __syncwarp(active_mask); // Have the lowest thread in each mask pull in the values int32_t lowest_index = -1; - for (int32_t i = 0; i < warpSize; i++) + for (int32_t i = 0; i < warpSize; i++) { if (same_mask & (1 << i)) { if (lowest_index == -1) { if (i != laneid) { @@ -244,8 +254,9 @@ static void __device__ __forceinline__ collapse_dims(LHS& result, // perform the reduction out to memory result = identity; break; - } else // Make sure we don't do this test again + } else { // Make sure we don't do this test again lowest_index = i; + } // It was already our value, so just keep going } else { // Pull in the value from shared memory @@ -253,17 +264,18 @@ static void __device__ __forceinline__ collapse_dims(LHS& result, REDOP::template fold(result, trampoline[index]); } } + } } } #endif -#ifdef LEGATE_BOUNDS_CHECKS - // Note: this isn't necessary because we know that the affine transformation on the output - // accessor will ignore coordinates of the collapsed dimension. However, Legion's bounds checks - // want the accessor to honor the sub-rectangle passed when it was created, so we need to - // put points back in the bounds to appease the checks. - point[collapsed_dim] = domain.lo[collapsed_dim]; -#endif + if (LEGATE_DEFINED(LEGATE_BOUNDS_CHECKS)) { + // Note: this isn't necessary because we know that the affine transformation on the output + // accessor will ignore coordinates of the collapsed dimension. However, Legion's bounds checks + // want the accessor to honor the sub-rectangle passed when it was created, so we need to + // put points back in the bounds to appease the checks. + point[collapsed_dim] = domain.lo[collapsed_dim]; + } } template @@ -279,11 +291,15 @@ static __device__ __forceinline__ Point local_reduce(LHS& result, const coord_t bid = blockIdx.x; Point point = blocks.point(bid, tid, domain.lo); - if (!domain.contains(point)) return point; + if (!domain.contains(point)) { + return point; + } bool mask = true; while (point[collapsed_dim] <= domain.hi[collapsed_dim]) { - if constexpr (HAS_WHERE) mask = where[point]; + if constexpr (HAS_WHERE) { + mask = where[point]; + } if (mask) { LHS value = OP::convert(point, collapsed_dim, identity, in[point]); REDOP::template fold(result, value); @@ -308,14 +324,16 @@ static __global__ void __launch_bounds__(THREADS_PER_BLOCK, MIN_CTAS_PER_SM) auto result = identity; auto point = local_reduce( result, in, where, identity, blocks, domain, collapsed_dim); - if (result != identity) out.reduce(point, result); + if (result != identity) { + out.reduce(point, result); + } } template struct UnaryRedImplBody { using OP = UnaryRedOp; using LG_OP = typename OP::OP; - using RHS = legate_type_of; + using RHS = type_of; using LHS = typename OP::VAL; void operator()(AccessorRD lhs, @@ -335,11 +353,11 @@ struct UnaryRedImplBody { blocks.compute_maximum_concurrency(reinterpret_cast(Kernel)); Kernel<<>>( lhs, rhs, where, LG_OP::identity, blocks, rect, collapsed_dim); - CHECK_CUDA_STREAM(stream); + CUNUMERIC_CHECK_CUDA_STREAM(stream); } }; -/*static*/ void UnaryRedTask::gpu_variant(TaskContext& context) +/*static*/ void UnaryRedTask::gpu_variant(TaskContext context) { unary_red_template(context); } diff --git a/src/cunumeric/unary/unary_red.h b/src/cunumeric/unary/unary_red.h index a7b44584f..6018c16a8 100644 --- a/src/cunumeric/unary/unary_red.h +++ b/src/cunumeric/unary/unary_red.h @@ -1,4 +1,4 @@ -/* Copyright 2021-2022 NVIDIA Corporation +/* Copyright 2024 NVIDIA Corporation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -16,15 +16,15 @@ #pragma once -#include "cunumeric/cunumeric.h" +#include "cunumeric/cunumeric_task.h" #include "cunumeric/unary/unary_red_util.h" namespace cunumeric { struct UnaryRedArgs { - const Array& lhs; - const Array& rhs; - const Array& where; + legate::PhysicalStore lhs; + legate::PhysicalStore rhs; + legate::PhysicalStore where; int32_t collapsed_dim; UnaryRedCode op_code; }; @@ -34,12 +34,12 @@ class UnaryRedTask : public CuNumericTask { static const int TASK_ID = CUNUMERIC_UNARY_RED; public: - static void cpu_variant(legate::TaskContext& context); -#ifdef LEGATE_USE_OPENMP - static void omp_variant(legate::TaskContext& context); + static void cpu_variant(legate::TaskContext context); +#if LEGATE_DEFINED(LEGATE_USE_OPENMP) + static void omp_variant(legate::TaskContext context); #endif -#ifdef LEGATE_USE_CUDA - static void gpu_variant(legate::TaskContext& context); +#if LEGATE_DEFINED(LEGATE_USE_CUDA) + static void gpu_variant(legate::TaskContext context); #endif }; diff --git a/src/cunumeric/unary/unary_red_omp.cc b/src/cunumeric/unary/unary_red_omp.cc index fd9ccce60..8ed659aee 100644 --- a/src/cunumeric/unary/unary_red_omp.cc +++ b/src/cunumeric/unary/unary_red_omp.cc @@ -1,4 +1,4 @@ -/* Copyright 2021-2023 NVIDIA Corporation +/* Copyright 2024 NVIDIA Corporation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -31,20 +31,21 @@ class Splitter { public: Split split(const Rect& rect, int must_be_inner) { - for (int dim = 0; dim < DIM; ++dim) + for (int dim = 0; dim < DIM; ++dim) { if (dim != must_be_inner) { outer_dim_ = dim; break; } + } size_t outer = 1; size_t inner = 1; size_t pitch = 1; for (int dim = DIM - 1; dim >= 0; --dim) { auto diff = rect.hi[dim] - rect.lo[dim] + 1; - if (dim == outer_dim_) + if (dim == outer_dim_) { outer *= diff; - else { + } else { inner *= diff; pitches_[dim] = pitch; pitch *= diff; @@ -57,9 +58,9 @@ class Splitter { { Point point = lo; for (int dim = 0; dim < DIM; ++dim) { - if (dim == outer_dim_) + if (dim == outer_dim_) { point[dim] += outer_idx; - else { + } else { point[dim] += inner_idx / pitches_[dim]; inner_idx = inner_idx % pitches_[dim]; } @@ -76,7 +77,7 @@ template struct UnaryRedImplBody { using OP = UnaryRedOp; using LG_OP = typename OP::OP; - using RHS = legate_type_of; + using RHS = type_of; void operator()(AccessorRD lhs, AccessorRO rhs, @@ -94,7 +95,9 @@ struct UnaryRedImplBody { for (size_t i_idx = 0; i_idx < split.inner; ++i_idx) { auto point = splitter.combine(o_idx, i_idx, rect.lo); bool mask = true; - if constexpr (HAS_WHERE) mask = where[point]; + if constexpr (HAS_WHERE) { + mask = where[point]; + } if (mask) { auto identity = LG_OP::identity; lhs.reduce(point, OP::convert(point, collapsed_dim, identity, rhs[point])); @@ -104,7 +107,7 @@ struct UnaryRedImplBody { } }; -/*static*/ void UnaryRedTask::omp_variant(TaskContext& context) +/*static*/ void UnaryRedTask::omp_variant(TaskContext context) { unary_red_template(context); } diff --git a/src/cunumeric/unary/unary_red_template.inl b/src/cunumeric/unary/unary_red_template.inl index aa038384f..634cbfe88 100644 --- a/src/cunumeric/unary/unary_red_template.inl +++ b/src/cunumeric/unary/unary_red_template.inl @@ -1,4 +1,4 @@ -/* Copyright 2021-2022 NVIDIA Corporation +/* Copyright 2024 NVIDIA Corporation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -38,20 +38,24 @@ struct UnaryRedImpl { void operator()(UnaryRedArgs& args) const { using OP = UnaryRedOp; - using RHS = legate_type_of; + using RHS = type_of; Pitches pitches; auto rect = args.rhs.shape(); auto volume = pitches.flatten(rect); - if (volume == 0) return; + if (volume == 0) { + return; + } auto rhs = args.rhs.read_accessor(rect); auto lhs = args.lhs.reduce_accessor(rect); AccessorRO where; - if constexpr (HAS_WHERE) { where = args.where.read_accessor(rect); } + if constexpr (HAS_WHERE) { + where = args.where.read_accessor(rect); + } UnaryRedImplBody()( lhs, rhs, where, rect, pitches, args.collapsed_dim, volume); } @@ -78,14 +82,13 @@ struct UnaryRedDispatch { template static void unary_red_template(TaskContext& context) { - auto& inputs = context.inputs(); - auto& reductions = context.reductions(); - auto& scalars = context.scalars(); - bool has_where = scalars[2].value(); - Array dummy_where; + auto inputs = context.inputs(); + auto reductions = context.reductions(); + auto& scalars = context.scalars(); + bool has_where = scalars[2].value(); UnaryRedArgs args{reductions[0], inputs[0], - has_where ? inputs[1] : dummy_where, + has_where ? inputs[1] : legate::PhysicalStore{nullptr}, scalars[0].value(), scalars[1].value()}; if (has_where) { diff --git a/src/cunumeric/unary/unary_red_util.h b/src/cunumeric/unary/unary_red_util.h index e822e40b4..3dafba0bf 100644 --- a/src/cunumeric/unary/unary_red_util.h +++ b/src/cunumeric/unary/unary_red_util.h @@ -1,4 +1,4 @@ -/* Copyright 2021-2022 NVIDIA Corporation +/* Copyright 2024 NVIDIA Corporation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -16,7 +16,7 @@ #pragma once -#include "cunumeric/cunumeric.h" +#include "cunumeric/cunumeric_task.h" #include "cunumeric/arg.h" #include "cunumeric/arg.inl" #include "cunumeric/unary/isnan.h" @@ -110,7 +110,7 @@ template struct UnaryRedOp { static constexpr bool valid = TYPE_CODE != legate::Type::Code::COMPLEX128; - using RHS = legate::legate_type_of; + using RHS = legate::type_of; using VAL = bool; using OP = legate::ProdReduction; @@ -133,7 +133,7 @@ template struct UnaryRedOp { static constexpr bool valid = TYPE_CODE != legate::Type::Code::COMPLEX128; - using RHS = legate::legate_type_of; + using RHS = legate::type_of; using VAL = bool; using OP = legate::SumReduction; @@ -156,7 +156,7 @@ template struct UnaryRedOp { static constexpr bool valid = true; - using RHS = legate::legate_type_of; + using RHS = legate::type_of; using VAL = uint64_t; using OP = legate::SumReduction; @@ -182,7 +182,7 @@ template struct UnaryRedOp { static constexpr bool valid = !legate::is_complex::value; - using RHS = legate::legate_type_of; + using RHS = legate::type_of; using VAL = RHS; using OP = legate::MaxReduction; @@ -205,7 +205,7 @@ template struct UnaryRedOp { static constexpr bool valid = !legate::is_complex::value; - using RHS = legate::legate_type_of; + using RHS = legate::type_of; using VAL = RHS; using OP = legate::MinReduction; @@ -228,7 +228,7 @@ template struct UnaryRedOp { static constexpr bool valid = TYPE_CODE != legate::Type::Code::COMPLEX128; - using RHS = legate::legate_type_of; + using RHS = legate::type_of; using VAL = RHS; using OP = legate::ProdReduction; @@ -251,7 +251,7 @@ template struct UnaryRedOp { static constexpr bool valid = true; - using RHS = legate::legate_type_of; + using RHS = legate::type_of; using VAL = RHS; using OP = legate::SumReduction; @@ -274,7 +274,7 @@ template struct UnaryRedOp { static constexpr bool valid = true; - using RHS = legate::legate_type_of; + using RHS = legate::type_of; using VAL = RHS; using OP = Legion::SumReduction; @@ -297,7 +297,7 @@ template struct UnaryRedOp { static constexpr bool valid = true; - using RHS = legate::legate_type_of; + using RHS = legate::type_of; using VAL = RHS; using OP = Legion::SumReduction; @@ -320,7 +320,7 @@ template struct UnaryRedOp { static constexpr bool valid = !legate::is_complex::value; - using RHS = legate::legate_type_of; + using RHS = legate::type_of; using VAL = Argval; using OP = ArgmaxReduction; @@ -346,7 +346,9 @@ struct UnaryRedOp { const RHS& rhs) { int64_t idx = 0; - for (int32_t dim = 0; dim < DIM; ++dim) idx = idx * shape[dim] + point[dim]; + for (int32_t dim = 0; dim < DIM; ++dim) { + idx = idx * shape[dim] + point[dim]; + } return VAL(idx, rhs); } }; @@ -355,7 +357,7 @@ template struct UnaryRedOp { static constexpr bool valid = !legate::is_complex::value; - using RHS = legate::legate_type_of; + using RHS = legate::type_of; using VAL = Argval; using OP = ArgminReduction; @@ -381,7 +383,9 @@ struct UnaryRedOp { const RHS& rhs) { int64_t idx = 0; - for (int32_t dim = 0; dim < DIM; ++dim) idx = idx * shape[dim] + point[dim]; + for (int32_t dim = 0; dim < DIM; ++dim) { + idx = idx * shape[dim] + point[dim]; + } return VAL(idx, rhs); } }; @@ -394,7 +398,7 @@ template struct UnaryRedOp> { static constexpr bool valid = true; - using RHS = legate::legate_type_of; + using RHS = legate::type_of; using VAL = Argval; using OP = ArgmaxReduction; @@ -421,7 +425,9 @@ struct UnaryRedOp struct UnaryRedOp> { static constexpr bool valid = true; - using RHS = legate::legate_type_of; + using RHS = legate::type_of; using VAL = Argval; using OP = ArgminReduction; @@ -457,7 +463,9 @@ struct UnaryRedOp struct UnaryRedOp> { static constexpr bool valid = true; - using RHS = legate::legate_type_of; + using RHS = legate::type_of; using VAL = RHS; using OP = legate::MinReduction; @@ -495,7 +503,7 @@ template struct UnaryRedOp> { static constexpr bool valid = true; - using RHS = legate::legate_type_of; + using RHS = legate::type_of; using VAL = RHS; using OP = legate::MaxReduction; @@ -529,7 +537,7 @@ template struct UnaryRedOp> { static constexpr bool valid = true; - using RHS = legate::legate_type_of; + using RHS = legate::type_of; using VAL = RHS; using OP = legate::ProdReduction; @@ -563,7 +571,7 @@ template struct UnaryRedOp> { static constexpr bool valid = true; - using RHS = legate::legate_type_of; + using RHS = legate::type_of; using VAL = RHS; using OP = legate::SumReduction; @@ -594,7 +602,7 @@ struct UnaryRedOp { static constexpr bool valid = false; // This class only provides the typedefs necessary to match the other operators. // It does not provide fold/convert functions. - using RHS = legate::legate_type_of; + using RHS = legate::type_of; using VAL = bool; using _RED_OP = UnaryRedOp; using OP = _RED_OP::OP; diff --git a/src/cunumeric/utilities/repartition.cc b/src/cunumeric/utilities/repartition.cc new file mode 100644 index 000000000..a27e28955 --- /dev/null +++ b/src/cunumeric/utilities/repartition.cc @@ -0,0 +1,71 @@ +/* Copyright 2024 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +#include "repartition.h" + +namespace cunumeric { + +std::tuple elements_for_rank_in_dimension( + size_t dim_length, size_t offset_id, size_t proc_id, size_t num_dim_procs, size_t tilesize) +{ + size_t start_tile_idx = offset_id / tilesize; + size_t start_tile_proc_id = start_tile_idx % num_dim_procs; + size_t start_pos_offset = proc_id >= start_tile_proc_id + ? (proc_id - start_tile_proc_id) * tilesize + : (num_dim_procs + proc_id - start_tile_proc_id) * tilesize; + size_t start_tile_offset = offset_id % tilesize; + + if (start_tile_offset > 0 && start_pos_offset > 0) { + // we can move the start position left to the start of the tile + start_pos_offset -= start_tile_offset; + } + + // calc global offset for procId + size_t offset_tiles = (start_tile_idx + num_dim_procs - proc_id - 1) / num_dim_procs; + + if (start_pos_offset > dim_length) { + return {0ul, offset_tiles}; + } + + size_t full_cycles = (dim_length - start_pos_offset) / (tilesize * num_dim_procs); + size_t num_elements = full_cycles * tilesize; + size_t remainder = dim_length - start_pos_offset - num_elements * num_dim_procs; + if (start_pos_offset > 0 || start_tile_offset == 0) { + // we have a clean start + if (remainder > 0) { + num_elements += std::min(tilesize, remainder); + } + } else { + // we start with a partial tile + size_t tile_remainder = tilesize - start_tile_offset; + if (remainder <= tile_remainder) { + num_elements += remainder; + } else { + remainder -= tile_remainder; + num_elements += tile_remainder; + if (remainder > (num_dim_procs - 1) * tilesize) { + num_elements += std::min(tilesize, remainder - (num_dim_procs - 1) * tilesize); + } + } + } + + size_t offset_elements = + offset_tiles * tilesize + (start_pos_offset == 0 ? start_tile_offset : 0); + + return {num_elements, offset_elements}; +} + +} // namespace cunumeric \ No newline at end of file diff --git a/src/cunumeric/utilities/repartition.cu b/src/cunumeric/utilities/repartition.cu new file mode 100644 index 000000000..917c84be9 --- /dev/null +++ b/src/cunumeric/utilities/repartition.cu @@ -0,0 +1,1357 @@ +/* Copyright 2024 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +#include "repartition.h" + +#include "cunumeric/cuda_help.h" + +namespace cunumeric { + +using namespace Legion; +using namespace legate; + +namespace { +// auto align to multiples of 16 bytes +constexpr auto get_16b_aligned = [](auto bytes) { + return std::max(16, (bytes + 15) / 16 * 16); +}; +constexpr auto get_16b_aligned_count = [](auto count, auto element_bytes) { + return (get_16b_aligned(count * element_bytes) + element_bytes - 1) / element_bytes; +}; + +const auto is_device_only_ptr = [](const void* ptr) { + cudaPointerAttributes attrs; + auto res = cudaPointerGetAttributes(&attrs, ptr); + if (res == cudaSuccess) { + return attrs.type == cudaMemoryTypeDevice; + } else { + cudaGetLastError(); + return false; + } +}; +} // namespace + +template +__global__ static void __launch_bounds__(THREADS_PER_BLOCK, MIN_CTAS_PER_SM) + split_data_to_send_buffers(const VAL* input_2dbc, + size_t input_volume, + size_t input_lld, + Buffer send_info, + size_t stored_size_per_rank, + Buffer send_buffers_ptr, + size_t p_r, + size_t p_c, + size_t tile_r, + size_t tile_c) +{ + size_t thread_offset = blockIdx.x * blockDim.x + threadIdx.x; + size_t threadgroup_size = blockDim.x * gridDim.x; + size_t rank_id = blockIdx.y * blockDim.y + threadIdx.y; + + if (rank_id >= p_r * p_c) { + return; + } + + size_t source_size = send_info[rank_id * stored_size_per_rank + BlockInfo::TOTAL_SIZE]; + size_t source_lld = send_info[rank_id * stored_size_per_rank + BlockInfo::LLD]; + size_t source_offset_row = send_info[rank_id * stored_size_per_rank + BlockInfo::OFFSET_ROW]; + size_t source_offset_col = send_info[rank_id * stored_size_per_rank + BlockInfo::OFFSET_COL]; + + // copy large block from input with all elements for target rank_id + for (size_t pos = thread_offset; pos < source_size; pos += threadgroup_size) { + size_t source_row_id = source_offset_row + pos % source_lld; + size_t source_col_id = source_offset_col + pos / source_lld; + size_t index_in = source_col_id * input_lld + source_row_id; + + assert(index_in < input_volume); + send_buffers_ptr[rank_id][pos] = input_2dbc[index_in]; + } +} + +template +__global__ static void __launch_bounds__(THREADS_PER_BLOCK, MIN_CTAS_PER_SM) + merge_data_to_result(Buffer result_2dbc, + size_t volume, + Buffer recv_info, + size_t stored_size_per_rank, + Buffer merge_buffers, + size_t target_lld, + size_t tile_r, + size_t tile_c, + size_t my_rank, + size_t num_ranks) +{ + size_t thread_offset = blockIdx.x * blockDim.x + threadIdx.x; + size_t threadgroup_size = blockDim.x * gridDim.x; + size_t rank_id = blockIdx.y * blockDim.y + threadIdx.y; + + if (rank_id >= num_ranks) { + return; + } + + size_t source_size = recv_info[rank_id * stored_size_per_rank + BlockInfo::TOTAL_SIZE]; + size_t source_lld = recv_info[rank_id * stored_size_per_rank + BlockInfo::LLD]; + size_t source_offset_row = recv_info[rank_id * stored_size_per_rank + BlockInfo::OFFSET_ROW]; + size_t source_offset_col = recv_info[rank_id * stored_size_per_rank + BlockInfo::OFFSET_COL]; + + for (size_t pos = thread_offset; pos < source_size; pos += threadgroup_size) { + size_t target_col_id = source_offset_col + pos / source_lld; + size_t target_row_id = source_offset_row + pos % source_lld; + + // store elementwise + size_t index_out = target_col_id * target_lld + target_row_id; + + assert(index_out < volume); + result_2dbc[index_out] = merge_buffers[rank_id][pos]; + } +} + +__device__ __inline__ std::tuple compute_tile_info(size_t num_rows, + size_t num_cols, + size_t row_major, + size_t lld, + size_t offset_r, + size_t offset_c, + size_t tile_r, + size_t tile_c) +{ + // position info + // get local tile size and start position + size_t tile_r_size = tile_r; + size_t tile_c_size = tile_c; + size_t start_pos; + // special cases for first/last tile + { + size_t start_r_offset = offset_r % tile_r; + size_t start_c_offset = offset_c % tile_c; + size_t start_pos_r = blockIdx.x * tile_r; + size_t start_pos_c = blockIdx.y * tile_c; + + // rows + if (start_r_offset > 0) { + if (blockIdx.x == 0) { + tile_r_size -= start_r_offset; + } else { + start_pos_r -= start_r_offset; + } + } + if (blockIdx.x == gridDim.x - 1) { + size_t last_element_offset = (num_rows + start_r_offset) % tile_r; + if (last_element_offset > 0) { + tile_r_size -= (tile_r - last_element_offset); + } + } + // cols + if (start_c_offset > 0) { + if (blockIdx.y == 0) { + tile_c_size -= start_c_offset; + } else { + start_pos_c -= start_c_offset; + } + } + if (blockIdx.y == gridDim.y - 1) { + size_t last_element_offset = (num_cols + start_c_offset) % tile_c; + if (last_element_offset > 0) { + tile_c_size -= (tile_c - last_element_offset); + } + } + + start_pos = row_major ? start_pos_r * lld + start_pos_c : start_pos_c * lld + start_pos_r; + } + + return {tile_r_size, tile_c_size, start_pos}; +} + +__device__ __inline__ std::tuple compute_2dbc_info( + Buffer info, + size_t stored_size_per_rank, + size_t p_r, + size_t p_c, + size_t tile_idx_row, + size_t tile_idx_col, + size_t tile_r, + size_t tile_c) +{ + size_t rank_r = tile_idx_row % p_r; + size_t rank_c = tile_idx_col % p_c; + size_t rank_id = rank_r + rank_c * p_r; // tile ranks are col major + size_t size = info[rank_id * stored_size_per_rank + BlockInfo::TOTAL_SIZE]; + size_t lld = info[rank_id * stored_size_per_rank + BlockInfo::LLD]; + size_t start_pos = 0; + // compute start position of tile (tile_idx_row/tile_idx_col) within source + { + // this is where the OUR part of the whole 2dbc dist of the target rank resides + size_t offset_row = info[rank_id * stored_size_per_rank + BlockInfo::OFFSET_ROW]; + size_t offset_col = info[rank_id * stored_size_per_rank + BlockInfo::OFFSET_COL]; + + // this is where the tile starts / it does not have to be where our PART of the tile starts + size_t tile_pos_row = (tile_idx_row - rank_r) / p_r * tile_r; + size_t tile_pos_col = (tile_idx_col - rank_c) / p_c * tile_c; + + // shift to the positions where our PART of the tile starts + if (tile_pos_row > offset_row) { + tile_pos_row -= offset_row; + } else { + tile_pos_row = 0; + } + if (tile_pos_col > offset_col) { + tile_pos_col -= offset_col; + } else { + tile_pos_col = 0; + } + + start_pos = tile_pos_col * lld + tile_pos_row; // always col major + } + return {rank_id, size, lld, start_pos}; +} + +#define BLOCK_DIM 16 +template +__device__ __inline__ void transfer_data_src_tgt(const VAL* source, + size_t source_size, + size_t source_lld, + bool source_row_major, + size_t source_start_pos, + VAL* target, + size_t target_size, + size_t target_lld, + size_t target_row_major, + size_t target_start_pos, + size_t tile_r_size, + size_t tile_c_size, + VAL block[BLOCK_DIM][BLOCK_DIM + 1]) +{ + if (source_row_major != target_row_major) { + for (size_t tile_r_pos = 0; tile_r_pos < tile_r_size; tile_r_pos += BLOCK_DIM) { + for (size_t tile_c_pos = 0; tile_c_pos < tile_c_size; tile_c_pos += BLOCK_DIM) { + // we are at offset tile_r_pos/tile_c_pos within our tile (start of block) + // blocks are square, tiles don't need to be! + size_t tile_r_pos_t = tile_r_pos + threadIdx.y; + size_t tile_c_pos_t = tile_c_pos + threadIdx.x; + if (tile_r_pos_t < tile_r_size && tile_c_pos_t < tile_c_size) { + size_t index_in = + source_start_pos + (source_row_major ? tile_c_pos_t + tile_r_pos_t * source_lld + : tile_c_pos_t * source_lld + tile_r_pos_t); + assert(index_in < source_size); + block[threadIdx.y][threadIdx.x] = source[index_in]; + } + + __syncthreads(); + + // write back data to target (row major OR column major) + if (tile_r_pos + threadIdx.x < tile_r_size && tile_c_pos + threadIdx.y < tile_c_size) { + size_t index_out = + target_start_pos + + (target_row_major ? (tile_r_pos + threadIdx.x) * target_lld + tile_c_pos + threadIdx.y + : tile_r_pos + threadIdx.x + (tile_c_pos + threadIdx.y) * target_lld); + assert(index_out < target_size); + target[index_out] = block[threadIdx.x][threadIdx.y]; + } + } + } + } else { + for (size_t tile_r_pos = threadIdx.x; tile_r_pos < tile_r_size; tile_r_pos += BLOCK_DIM) { + for (size_t tile_c_pos = threadIdx.y; tile_c_pos < tile_c_size; tile_c_pos += BLOCK_DIM) { + size_t index_in = source_start_pos + tile_r_pos + tile_c_pos * source_lld; + size_t index_out = target_start_pos + tile_r_pos + tile_c_pos * target_lld; + assert(index_in < source_size); + assert(index_out < target_size); + target[index_out] = source[index_in]; + } + } + } +} + +template +__global__ void assemble_tiles_to_block_result(VAL* target, + size_t target_volume, + size_t target_lld, + size_t target_offset_r, + size_t target_offset_c, + bool target_row_major, + Buffer recv_info, + size_t stored_size_per_rank, + Buffer recv_buffers_ptr, + size_t p_r, + size_t p_c, + size_t tile_r, + size_t tile_c) +{ + __shared__ VAL block[BLOCK_DIM][BLOCK_DIM + 1]; + + size_t num_target_cols = target_row_major ? target_lld : target_volume / target_lld; + size_t num_target_rows = target_row_major ? target_volume / target_lld : target_lld; + + size_t tile_idx_row = blockIdx.x + target_offset_r / tile_r; + size_t tile_idx_col = blockIdx.y + target_offset_c / tile_c; + + auto [tile_r_size, tile_c_size, target_start_pos] = compute_tile_info(num_target_rows, + num_target_cols, + target_row_major, + target_lld, + target_offset_r, + target_offset_c, + tile_r, + tile_c); + + auto [source_rank_id, source_size, source_lld, source_start_pos] = compute_2dbc_info( + recv_info, stored_size_per_rank, p_r, p_c, tile_idx_row, tile_idx_col, tile_r, tile_c); + + transfer_data_src_tgt(recv_buffers_ptr[source_rank_id], + source_size, + source_lld, + false, + source_start_pos, + target, + target_volume, + target_lld, + target_row_major, + target_start_pos, + tile_r_size, + tile_c_size, + block); +} + +template +__global__ void copy_to_send_buffer(const VAL* input, + size_t volume, + Buffer send_info, + size_t stored_size_per_rank, + Buffer send_buffers_ptr, + bool row_major, + size_t offset_r, + size_t offset_c, + size_t lld, + size_t p_r, + size_t p_c, + size_t tile_r, + size_t tile_c) +{ + __shared__ VAL block[BLOCK_DIM][BLOCK_DIM + 1]; + + size_t num_input_cols = row_major ? lld : volume / lld; + size_t num_input_rows = row_major ? volume / lld : lld; + + size_t tile_idx_row = blockIdx.x + offset_r / tile_r; + size_t tile_idx_col = blockIdx.y + offset_c / tile_c; + + auto [tile_r_size, tile_c_size, source_start_pos] = compute_tile_info( + num_input_rows, num_input_cols, row_major, lld, offset_r, offset_c, tile_r, tile_c); + + auto [target_rank_id, target_size, target_lld, target_start_pos] = compute_2dbc_info( + send_info, stored_size_per_rank, p_r, p_c, tile_idx_row, tile_idx_col, tile_r, tile_c); + + transfer_data_src_tgt(input, + volume, + lld, + row_major, + source_start_pos, + send_buffers_ptr[target_rank_id], + target_size, + target_lld, + false, + target_start_pos, + tile_r_size, + tile_c_size, + block); +} + +template +std::tuple, size_t, size_t> repartition_matrix_2dbc(const VAL* input, + size_t volume, + bool row_major, + size_t offset_r, + size_t offset_c, + size_t lld, + size_t p_r, + size_t p_c, + size_t tile_r, + size_t tile_c, + comm::Communicator comm_wrapper) +{ + assert(volume == 0 || is_device_only_ptr(input)); + + auto num_ranks = p_r * p_c; + size_t num_cols = row_major ? lld : volume / lld; + size_t num_rows = row_major ? volume / lld : lld; + + auto comm = comm_wrapper.get(); + auto stream = get_cached_stream(); + + int nccl_rank = -1; + int nccl_ranks = -1; + CHECK_NCCL(ncclCommUserRank(*comm, &nccl_rank)); + CHECK_NCCL(ncclCommCount(*comm, &nccl_ranks)); + assert(num_ranks == nccl_ranks); + + // compute sizes/lld/offset for each target rank + size_t stored_size_per_rank = get_16b_aligned_count(BlockInfo::LAST, sizeof(size_t)); + size_t total_send_elements = 0; + Buffer send_info = + create_buffer(num_ranks * stored_size_per_rank, Memory::Z_COPY_MEM); + Buffer recv_info = + create_buffer(num_ranks * stored_size_per_rank, Memory::Z_COPY_MEM); + for (size_t rank_c = 0; rank_c < p_c; ++rank_c) { + auto [active_columns, offset_columns] = + elements_for_rank_in_dimension(num_cols, offset_c, rank_c, p_c, tile_c); + for (size_t rank_r = 0; rank_r < p_r; ++rank_r) { + auto glob_rank = rank_r + rank_c * p_r; // target ranks are col major + auto [active_rows, offset_rows] = + elements_for_rank_in_dimension(num_rows, offset_r, rank_r, p_r, tile_r); + + auto elements_for_rank = active_columns * active_rows; + total_send_elements += elements_for_rank; + + send_info[glob_rank * stored_size_per_rank + BlockInfo::TOTAL_SIZE] = elements_for_rank; + send_info[glob_rank * stored_size_per_rank + BlockInfo::LLD] = + active_rows; // col-major send data + send_info[glob_rank * stored_size_per_rank + BlockInfo::OFFSET_ROW] = offset_rows; + send_info[glob_rank * stored_size_per_rank + BlockInfo::OFFSET_COL] = offset_columns; + } + } + + assert(total_send_elements == volume); + + // TODO / OPTIMIZE + // in case we have the global partition information of the cuNumeric block partition + // we can compute receive buffers instead and skip this all2all + // same applies for inverse operation + + // all2all send_info/recv_info + CHECK_NCCL(ncclGroupStart()); + for (size_t r = 0; r < num_ranks; r++) { + CHECK_NCCL(ncclSend( + send_info.ptr(r * stored_size_per_rank), stored_size_per_rank, ncclUint64, r, *comm, stream)); + CHECK_NCCL(ncclRecv( + recv_info.ptr(r * stored_size_per_rank), stored_size_per_rank, ncclUint64, r, *comm, stream)); + } + CHECK_NCCL(ncclGroupEnd()); + CUNUMERIC_CHECK_CUDA(cudaStreamSynchronize(stream)); // need Z-copy synchronized to Host + + // allocate send/recv buffer + std::vector> send_buffers; + send_buffers.reserve(num_ranks); + std::vector> recv_buffers; + recv_buffers.reserve(num_ranks); + size_t total_receive = 0; + size_t target_lld = 0; + for (size_t rank_c = 0; rank_c < p_c; ++rank_c) { + for (size_t rank_r = 0; rank_r < p_r; ++rank_r) { + auto glob_rank = rank_r + rank_c * p_r; // target ranks are col major + assert(send_buffers.size() == glob_rank); + send_buffers.emplace_back(create_buffer( + send_info[glob_rank * stored_size_per_rank + BlockInfo::TOTAL_SIZE], Memory::GPU_FB_MEM)); + auto receive_size = recv_info[glob_rank * stored_size_per_rank + BlockInfo::TOTAL_SIZE]; + if (receive_size > 0) { + target_lld = + std::max(target_lld, + recv_info[glob_rank * stored_size_per_rank + BlockInfo::LLD] + + recv_info[glob_rank * stored_size_per_rank + BlockInfo::OFFSET_ROW]); + } + total_receive += receive_size; + assert(recv_buffers.size() == glob_rank); + recv_buffers.emplace_back(create_buffer(receive_size, Memory::GPU_FB_MEM)); + } + } + + // and package data for each target rank + if (volume > 0) { + Buffer send_buffers_ptr = create_buffer(num_ranks, Memory::Z_COPY_MEM); + for (size_t r = 0; r < num_ranks; r++) { + send_buffers_ptr[r] = send_buffers[r].ptr(0); + } + + size_t first_tile_r = offset_r / tile_r; + size_t last_tile_r = (offset_r + num_rows - 1) / tile_r; + size_t num_tiles_r = last_tile_r - first_tile_r + 1; + size_t first_tile_c = offset_c / tile_c; + size_t last_tile_c = (offset_c + num_cols - 1) / tile_c; + size_t num_tiles_c = last_tile_c - first_tile_c + 1; + + // simplify - every tile handled by individual block (especially helpful for row/col transpose) + dim3 grid = dim3(num_tiles_r, num_tiles_c); + dim3 block(BLOCK_DIM, BLOCK_DIM); + // row based needs shared mem for coalesced read/write + // col based can access directly? maybe also use shared mem to unify + copy_to_send_buffer<<>>(input, + volume, + send_info, + stored_size_per_rank, + send_buffers_ptr, + row_major, + offset_r, + offset_c, + lld, + p_r, + p_c, + tile_r, + tile_c); + CUNUMERIC_CHECK_CUDA(cudaStreamSynchronize(stream)); + send_buffers_ptr.destroy(); + } + + CUNUMERIC_CHECK_CUDA_STREAM(stream); + + // all2all data + CHECK_NCCL(ncclGroupStart()); + for (size_t r = 0; r < num_ranks; r++) { + CHECK_NCCL(ncclSend(send_buffers[r].ptr(0), + send_info[r * stored_size_per_rank + BlockInfo::TOTAL_SIZE] * sizeof(VAL), + ncclInt8, + r, + *comm, + stream)); + CHECK_NCCL(ncclRecv(recv_buffers[r].ptr(0), + recv_info[r * stored_size_per_rank + BlockInfo::TOTAL_SIZE] * sizeof(VAL), + ncclInt8, + r, + *comm, + stream)); + } + CHECK_NCCL(ncclGroupEnd()); + send_info.destroy(); + for (auto&& buf : send_buffers) { + buf.destroy(); + } + + // combine data from all buffers + Buffer result_2dbc = create_buffer(total_receive, Memory::GPU_FB_MEM); + if (total_receive > 0) { + Buffer recv_buffers_ptr = create_buffer(num_ranks, Memory::Z_COPY_MEM); + for (size_t r = 0; r < num_ranks; r++) { + recv_buffers_ptr[r] = recv_buffers[r].ptr(0); + } + + size_t avr_elements_per_rank = (total_receive + num_ranks - 1) / num_ranks; + // this roughly ensures ~32 elements per thread to copy - not optimized yet + size_t num_blocks_per_rank = + ((avr_elements_per_rank + 31) / 32 + THREADS_PER_BLOCK - 1) / THREADS_PER_BLOCK; + dim3 grid_shape = dim3(num_blocks_per_rank, num_ranks); + merge_data_to_result<<>>(result_2dbc, + total_receive, + recv_info, + stored_size_per_rank, + recv_buffers_ptr, + target_lld, + tile_r, + tile_c, + (size_t)nccl_rank, + num_ranks); + CUNUMERIC_CHECK_CUDA(cudaStreamSynchronize(stream)); + recv_buffers_ptr.destroy(); + } + + CUNUMERIC_CHECK_CUDA_STREAM(stream); + + recv_info.destroy(); + for (auto&& buf : recv_buffers) { + buf.destroy(); + } + + // returns the buffer/size/lld + return {result_2dbc, total_receive, target_lld}; +} + +template +void repartition_matrix_block( + Buffer input_2dbc_buffer, + size_t input_volume, + size_t input_lld, + size_t local_rank, // NOTE: this needs to correspond to communicator rank! + size_t p_r, + size_t p_c, + size_t tile_r, + size_t tile_c, + VAL* target, + size_t target_volume, + size_t target_lld, + size_t num_target_rows, + size_t num_target_cols, + bool target_row_major, + // TODO optimize -- we would like to provide a global mapping to skip additional communication + size_t target_offset_r, + size_t target_offset_c, + comm::Communicator comm_wrapper) +{ + auto num_ranks = p_r * p_c; + + auto comm = comm_wrapper.get(); + auto stream = get_cached_stream(); + + size_t num_input_rows = input_volume > 0 ? input_lld : 0; + size_t num_input_cols = input_volume > 0 ? input_volume / input_lld : 0; + + // will be computed from offset exchange + size_t target_p_r = 0; + size_t target_p_c = 0; + size_t target_p_r_valid = 0; + size_t target_p_c_valid = 0; + + // 1. communicate global offsets + auto offsets_r = create_buffer(num_ranks, Memory::Z_COPY_MEM); + auto offsets_c = create_buffer(num_ranks, Memory::Z_COPY_MEM); + // for now we need to exchange all offsets + { + auto offsets = create_buffer(2 * num_ranks, Memory::Z_COPY_MEM); + offsets[2 * local_rank] = num_target_rows > 0 ? target_offset_r + num_target_rows : 0; + offsets[2 * local_rank + 1] = num_target_cols > 0 ? target_offset_c + num_target_cols : 0; + CHECK_NCCL( + ncclAllGather(offsets.ptr(2 * local_rank), offsets.ptr(0), 2, ncclUint64, *comm, stream)); + CUNUMERIC_CHECK_CUDA(cudaStreamSynchronize(stream)); + + // re-arrange so that all row offsets come first + for (size_t i = 1; i < num_ranks; i += 2) { + size_t tmp = offsets[i]; + size_t idx2 = 2 * num_ranks - 1 - i; + offsets[i] = offsets[idx2]; + offsets[idx2] = tmp; + } + // sort col/row offsets independently + std::sort(offsets.ptr(0), offsets.ptr(num_ranks)); + std::sort(offsets.ptr(num_ranks), offsets.ptr(2 * num_ranks)); + // store offsets (we know that we can skip duplicate information) + + size_t last_offset_r = 0; + size_t empty_p_r = 0; + size_t equals_r = 1; + for (size_t r = 0; r < num_ranks; r++) { + if (offsets[r] > last_offset_r) { + offsets_r[target_p_r_valid++] = offsets[r]; + last_offset_r = offsets[r]; + } else if (target_p_r_valid == 1) { + assert(offsets[r] == last_offset_r); + equals_r++; + } else if (target_p_r_valid == 0) { + empty_p_r++; + } + } + size_t last_offset_c = 0; + size_t empty_p_c = 0; + size_t equals_c = 1; + for (size_t c = num_ranks; c < 2 * num_ranks; c++) { + if (offsets[c] > last_offset_c) { + offsets_c[target_p_c_valid++] = offsets[c]; + last_offset_c = offsets[c]; + } else if (target_p_c_valid == 1) { + assert(offsets[c] == last_offset_c); + equals_c++; + } else if (target_p_c_valid == 0) { + empty_p_c++; + } + } + + // edge-case -- empty in 2D + // x x x x 0 0 + // x x x x 0 0 + // 0 0 0 0 0 0 + // 0 0 0 0 0 0 + // 0 0 0 0 0 0 + // target_p_r_valid = 2 target_p_c_valid = 4 + // empty_p_r = 18 empty_p_c = 10 + // equals_r = 4 equals_c = 2 + if (empty_p_r > 0 && empty_p_c > 0) { + size_t empty_prod = empty_p_r * empty_p_c; + assert(empty_prod % num_ranks == 0); + empty_prod /= num_ranks; + bool found_match = false; + for (size_t r = 1; r <= empty_p_r && !found_match; r++) { + for (size_t c = 1; r <= empty_p_c; r++) { + if (r * c == empty_prod && (r + target_p_r_valid) * (c + target_p_c_valid) == num_ranks) { + found_match = true; + empty_p_r = r; + empty_p_c = c; + break; + } + } + } + assert(found_match); + } + + target_p_r = target_p_r_valid + empty_p_r; + target_p_c = target_p_c_valid + empty_p_c; + + // update offsets for invalid ranks + for (int r = target_p_r_valid; r < target_p_r; ++r) { + offsets_r[r] = offsets_r[r - 1]; + } + for (int c = target_p_c_valid; c < target_p_c; ++c) { + offsets_c[c] = offsets_c[c - 1]; + } + + offsets.destroy(); + assert(num_ranks == target_p_r * target_p_c); + } + + // Assumptions: + // a. local_rank == nccl_rank == 2dbc-id (col-major) + // b. local_rank interpreted row-major (cuNumeric) should match offsets in offset mappings + // c. offsets for ranks outside valid bounds are not considered + size_t rank_r_rm = local_rank / target_p_c; + size_t rank_c_rm = local_rank % target_p_c; + size_t rank_r_cm = local_rank % p_r; + size_t rank_c_cm = local_rank / p_r; + { + assert(rank_r_rm >= target_p_r_valid || + offsets_r[rank_r_rm] == target_offset_r + num_target_rows); + assert(rank_c_rm >= target_p_c_valid || + offsets_c[rank_c_rm] == target_offset_c + num_target_cols); + } + + // 2. compute expected send/receive sizes locally + // first convert global element offsets to local tile offsets + auto glob2loc = + [](size_t glob_elem, size_t first_tile_offset, size_t proc_dim, size_t tilesize) -> size_t { + size_t local_element = 0; + if (glob_elem > first_tile_offset) { + size_t remainder = glob_elem - first_tile_offset; + // full cycles + size_t cycle_length = proc_dim * tilesize; + size_t full_cycles = remainder / cycle_length; + local_element += tilesize * full_cycles; + remainder = remainder % cycle_length; + local_element += min(remainder, tilesize); + } + + return local_element; + }; + + size_t first_tile_offset_r = rank_r_cm * tile_r; + size_t first_tile_offset_c = rank_c_cm * tile_c; + size_t stored_size_per_rank = get_16b_aligned_count(BlockInfo::LAST, sizeof(size_t)); + size_t total_send_elements = 0; + size_t total_recv_elements = 0; + Buffer send_info = + create_buffer(num_ranks * stored_size_per_rank, Memory::Z_COPY_MEM); + Buffer recv_info = + create_buffer(num_ranks * stored_size_per_rank, Memory::Z_COPY_MEM); + + // send/recv buffer + + std::vector> send_buffers; + send_buffers.reserve(num_ranks); + std::vector> recv_buffers; + recv_buffers.reserve(num_ranks); + + size_t active_send_row_end = 0; + for (size_t rank_r = 0; rank_r < target_p_r; ++rank_r) { + size_t active_send_row_start = active_send_row_end; + active_send_row_end = glob2loc(offsets_r[rank_r], first_tile_offset_r, p_r, tile_r); + active_send_row_end = std::min(active_send_row_end, num_input_rows); // limited by local rows! + size_t active_send_column_end = 0; + for (size_t rank_c = 0; rank_c < target_p_c; ++rank_c) { + size_t active_send_column_start = active_send_column_end; + auto other_rank = rank_r * target_p_c + rank_c; // target ranks are row major!!! + active_send_column_end = glob2loc(offsets_c[rank_c], first_tile_offset_c, p_c, tile_c); + active_send_column_end = + std::min(active_send_column_end, num_input_cols); // limited by local cols! + + // send information from local_rank to other_rank + { + size_t active_send_rows = active_send_row_end - active_send_row_start; + size_t active_send_columns = active_send_column_end - active_send_column_start; + auto send_elements_for_rank = active_send_columns * active_send_rows; + total_send_elements += send_elements_for_rank; + + send_info[other_rank * stored_size_per_rank + BlockInfo::TOTAL_SIZE] = + send_elements_for_rank; + send_info[other_rank * stored_size_per_rank + BlockInfo::LLD] = + active_send_rows; // col-major send data + send_info[other_rank * stored_size_per_rank + BlockInfo::OFFSET_ROW] = + active_send_row_start; + send_info[other_rank * stored_size_per_rank + BlockInfo::OFFSET_COL] = + active_send_column_start; + assert(send_buffers.size() == other_rank); + send_buffers.emplace_back(create_buffer(send_elements_for_rank, Memory::GPU_FB_MEM)); + } + } + } + + assert(total_send_elements == input_volume); + + // 3. package send data (should be blocks of data) + if (total_send_elements > 0) { + VAL* input_2dbc = input_2dbc_buffer.ptr(0); + Buffer send_buffers_ptr = create_buffer(num_ranks, Memory::Z_COPY_MEM); + for (size_t r = 0; r < num_ranks; r++) { + send_buffers_ptr[r] = send_buffers[r].ptr(0); + } + + size_t avr_elements_per_rank = (total_send_elements + num_ranks - 1) / num_ranks; + // this roughly ensures ~32 elements per thread to copy - not optimized yet + size_t num_blocks_per_rank = + ((avr_elements_per_rank + 31) / 32 + THREADS_PER_BLOCK - 1) / THREADS_PER_BLOCK; + dim3 grid_shape = dim3(num_blocks_per_rank, num_ranks); + split_data_to_send_buffers<<>>(input_2dbc, + input_volume, + input_lld, + send_info, + stored_size_per_rank, + send_buffers_ptr, + p_r, + p_c, + tile_r, + tile_c); + + CUNUMERIC_CHECK_CUDA(cudaStreamSynchronize(stream)); + send_buffers_ptr.destroy(); + } + // we can destroy the input once we distributed data into the buffers + input_2dbc_buffer.destroy(); + + // compute and allocate receive buffers + for (size_t rank_r = 0; rank_r < target_p_r; ++rank_r) { + for (size_t rank_c = 0; rank_c < target_p_c; ++rank_c) { + auto other_rank = rank_r * target_p_c + rank_c; // target ranks are row major!!! + + // recv information from other_rank to local_rank + // other rank sends info for tile based on col-major ordering + size_t other_rank_r_cm = other_rank % p_r; + size_t other_rank_c_cm = other_rank / p_r; + size_t other_first_tile_offset_r = other_rank_r_cm * tile_r; + size_t other_first_tile_offset_c = other_rank_c_cm * tile_c; + + // locate other active rows/cols w.r.t. local target offsets + size_t active_recv_row_end = + glob2loc(target_offset_r + num_target_rows, other_first_tile_offset_r, p_r, tile_r); + size_t active_recv_column_end = + glob2loc(target_offset_c + num_target_cols, other_first_tile_offset_c, p_c, tile_c); + size_t active_recv_row_start = + glob2loc(target_offset_r, other_first_tile_offset_r, p_r, tile_r); + size_t active_recv_column_start = + glob2loc(target_offset_c, other_first_tile_offset_c, p_c, tile_c); + + size_t active_recv_rows = active_recv_row_end - active_recv_row_start; + size_t active_recv_columns = active_recv_column_end - active_recv_column_start; + auto recv_elements_for_rank = active_recv_columns * active_recv_rows; + total_recv_elements += recv_elements_for_rank; + + recv_info[other_rank * stored_size_per_rank + BlockInfo::TOTAL_SIZE] = recv_elements_for_rank; + recv_info[other_rank * stored_size_per_rank + BlockInfo::LLD] = + active_recv_rows; // col-major recv data + recv_info[other_rank * stored_size_per_rank + BlockInfo::OFFSET_ROW] = active_recv_row_start; + recv_info[other_rank * stored_size_per_rank + BlockInfo::OFFSET_COL] = + active_recv_column_start; + assert(other_rank == recv_buffers.size()); + recv_buffers.emplace_back(create_buffer(recv_elements_for_rank, Memory::GPU_FB_MEM)); + } + } + + assert(total_recv_elements == target_volume); + + // 4. communicate data + // all2all data + CHECK_NCCL(ncclGroupStart()); + for (size_t r = 0; r < num_ranks; r++) { + CHECK_NCCL(ncclSend(send_buffers[r].ptr(0), + send_info[r * stored_size_per_rank + BlockInfo::TOTAL_SIZE] * sizeof(VAL), + ncclInt8, + r, + *comm, + stream)); + CHECK_NCCL(ncclRecv(recv_buffers[r].ptr(0), + recv_info[r * stored_size_per_rank + BlockInfo::TOTAL_SIZE] * sizeof(VAL), + ncclInt8, + r, + *comm, + stream)); + } + CHECK_NCCL(ncclGroupEnd()); + send_info.destroy(); + for (auto&& buf : send_buffers) { + buf.destroy(); + } + + // 5. merge data from recv_buffers + if (total_recv_elements > 0) { + Buffer recv_buffers_ptr = create_buffer(num_ranks, Memory::Z_COPY_MEM); + for (size_t r = 0; r < num_ranks; r++) { + recv_buffers_ptr[r] = recv_buffers[r].ptr(0); + } + + size_t first_tile_r = target_offset_r / tile_r; + size_t last_tile_r = (target_offset_r + num_target_rows - 1) / tile_r; + size_t num_tiles_r = last_tile_r - first_tile_r + 1; + size_t first_tile_c = target_offset_c / tile_c; + size_t last_tile_c = (target_offset_c + num_target_cols - 1) / tile_c; + size_t num_tiles_c = last_tile_c - first_tile_c + 1; + + // simplify - every tile handled by individual block (especially helpful for row/col transpose) + dim3 grid = dim3(num_tiles_r, num_tiles_c); + dim3 block(BLOCK_DIM, BLOCK_DIM); + assemble_tiles_to_block_result<<>>(target, + target_volume, + target_lld, + target_offset_r, + target_offset_c, + target_row_major, + recv_info, + stored_size_per_rank, + recv_buffers_ptr, + p_r, + p_c, + tile_r, + tile_c); + CUNUMERIC_CHECK_CUDA(cudaStreamSynchronize(stream)); + recv_buffers_ptr.destroy(); + } + + CUNUMERIC_CHECK_CUDA_STREAM(stream); + + // cleanup + offsets_r.destroy(); + offsets_c.destroy(); + recv_info.destroy(); + for (auto&& buf : recv_buffers) { + buf.destroy(); + } +} + +/* + BOOL = LEGION_TYPE_BOOL, + INT8 = LEGION_TYPE_INT8, + INT16 = LEGION_TYPE_INT16, + INT32 = LEGION_TYPE_INT32, + INT64 = LEGION_TYPE_INT64, + UINT8 = LEGION_TYPE_UINT8, + UINT16 = LEGION_TYPE_UINT16, + UINT32 = LEGION_TYPE_UINT32, + UINT64 = LEGION_TYPE_UINT64, + FLOAT16 = LEGION_TYPE_FLOAT16, + FLOAT32 = LEGION_TYPE_FLOAT32, + FLOAT64 = LEGION_TYPE_FLOAT64, + COMPLEX64 = LEGION_TYPE_COMPLEX64, + COMPLEX128 = LEGION_TYPE_COMPLEX128 + */ +template std::tuple>, size_t, size_t> +repartition_matrix_2dbc>(const type_of*, + size_t, + bool, + size_t, + size_t, + size_t, + size_t, + size_t, + size_t, + size_t, + comm::Communicator); +template void repartition_matrix_block>(Buffer>, + size_t, + size_t, + size_t, + size_t, + size_t, + size_t, + size_t, + type_of*, + size_t, + size_t, + size_t, + size_t, + bool, + size_t, + size_t, + comm::Communicator); +template std::tuple>, size_t, size_t> +repartition_matrix_2dbc>(const type_of*, + size_t, + bool, + size_t, + size_t, + size_t, + size_t, + size_t, + size_t, + size_t, + comm::Communicator); +template void repartition_matrix_block>(Buffer>, + size_t, + size_t, + size_t, + size_t, + size_t, + size_t, + size_t, + type_of*, + size_t, + size_t, + size_t, + size_t, + bool, + size_t, + size_t, + comm::Communicator); +template std::tuple>, size_t, size_t> +repartition_matrix_2dbc>(const type_of*, + size_t, + bool, + size_t, + size_t, + size_t, + size_t, + size_t, + size_t, + size_t, + comm::Communicator); +template void repartition_matrix_block>( + Buffer>, + size_t, + size_t, + size_t, + size_t, + size_t, + size_t, + size_t, + type_of*, + size_t, + size_t, + size_t, + size_t, + bool, + size_t, + size_t, + comm::Communicator); +template std::tuple>, size_t, size_t> +repartition_matrix_2dbc>(const type_of*, + size_t, + bool, + size_t, + size_t, + size_t, + size_t, + size_t, + size_t, + size_t, + comm::Communicator); +template void repartition_matrix_block>( + Buffer>, + size_t, + size_t, + size_t, + size_t, + size_t, + size_t, + size_t, + type_of*, + size_t, + size_t, + size_t, + size_t, + bool, + size_t, + size_t, + comm::Communicator); +template std::tuple>, size_t, size_t> +repartition_matrix_2dbc>(const type_of*, + size_t, + bool, + size_t, + size_t, + size_t, + size_t, + size_t, + size_t, + size_t, + comm::Communicator); +template void repartition_matrix_block>( + Buffer>, + size_t, + size_t, + size_t, + size_t, + size_t, + size_t, + size_t, + type_of*, + size_t, + size_t, + size_t, + size_t, + bool, + size_t, + size_t, + comm::Communicator); +template std::tuple>, size_t, size_t> +repartition_matrix_2dbc>(const type_of*, + size_t, + bool, + size_t, + size_t, + size_t, + size_t, + size_t, + size_t, + size_t, + comm::Communicator); +template void repartition_matrix_block>( + Buffer>, + size_t, + size_t, + size_t, + size_t, + size_t, + size_t, + size_t, + type_of*, + size_t, + size_t, + size_t, + size_t, + bool, + size_t, + size_t, + comm::Communicator); +template std::tuple>, size_t, size_t> +repartition_matrix_2dbc>(const type_of*, + size_t, + bool, + size_t, + size_t, + size_t, + size_t, + size_t, + size_t, + size_t, + comm::Communicator); +template void repartition_matrix_block>( + Buffer>, + size_t, + size_t, + size_t, + size_t, + size_t, + size_t, + size_t, + type_of*, + size_t, + size_t, + size_t, + size_t, + bool, + size_t, + size_t, + comm::Communicator); +template std::tuple>, size_t, size_t> +repartition_matrix_2dbc>(const type_of*, + size_t, + bool, + size_t, + size_t, + size_t, + size_t, + size_t, + size_t, + size_t, + comm::Communicator); +template void repartition_matrix_block>( + Buffer>, + size_t, + size_t, + size_t, + size_t, + size_t, + size_t, + size_t, + type_of*, + size_t, + size_t, + size_t, + size_t, + bool, + size_t, + size_t, + comm::Communicator); +template std::tuple>, size_t, size_t> +repartition_matrix_2dbc>(const type_of*, + size_t, + bool, + size_t, + size_t, + size_t, + size_t, + size_t, + size_t, + size_t, + comm::Communicator); +template void repartition_matrix_block>( + Buffer>, + size_t, + size_t, + size_t, + size_t, + size_t, + size_t, + size_t, + type_of*, + size_t, + size_t, + size_t, + size_t, + bool, + size_t, + size_t, + comm::Communicator); +template std::tuple>, size_t, size_t> +repartition_matrix_2dbc>(const type_of*, + size_t, + bool, + size_t, + size_t, + size_t, + size_t, + size_t, + size_t, + size_t, + comm::Communicator); +template void repartition_matrix_block>( + Buffer>, + size_t, + size_t, + size_t, + size_t, + size_t, + size_t, + size_t, + type_of*, + size_t, + size_t, + size_t, + size_t, + bool, + size_t, + size_t, + comm::Communicator); +template std::tuple>, size_t, size_t> +repartition_matrix_2dbc>(const type_of*, + size_t, + bool, + size_t, + size_t, + size_t, + size_t, + size_t, + size_t, + size_t, + comm::Communicator); +template void repartition_matrix_block>( + Buffer>, + size_t, + size_t, + size_t, + size_t, + size_t, + size_t, + size_t, + type_of*, + size_t, + size_t, + size_t, + size_t, + bool, + size_t, + size_t, + comm::Communicator); +template std::tuple>, size_t, size_t> +repartition_matrix_2dbc>(const type_of*, + size_t, + bool, + size_t, + size_t, + size_t, + size_t, + size_t, + size_t, + size_t, + comm::Communicator); +template void repartition_matrix_block>( + Buffer>, + size_t, + size_t, + size_t, + size_t, + size_t, + size_t, + size_t, + type_of*, + size_t, + size_t, + size_t, + size_t, + bool, + size_t, + size_t, + comm::Communicator); +template std::tuple>, size_t, size_t> +repartition_matrix_2dbc>(const type_of*, + size_t, + bool, + size_t, + size_t, + size_t, + size_t, + size_t, + size_t, + size_t, + comm::Communicator); +template void repartition_matrix_block>( + Buffer>, + size_t, + size_t, + size_t, + size_t, + size_t, + size_t, + size_t, + type_of*, + size_t, + size_t, + size_t, + size_t, + bool, + size_t, + size_t, + comm::Communicator); +template std::tuple>, size_t, size_t> +repartition_matrix_2dbc>(const type_of*, + size_t, + bool, + size_t, + size_t, + size_t, + size_t, + size_t, + size_t, + size_t, + comm::Communicator); +template void repartition_matrix_block>( + Buffer>, + size_t, + size_t, + size_t, + size_t, + size_t, + size_t, + size_t, + type_of*, + size_t, + size_t, + size_t, + size_t, + bool, + size_t, + size_t, + comm::Communicator); + +} // namespace cunumeric \ No newline at end of file diff --git a/src/cunumeric/utilities/repartition.h b/src/cunumeric/utilities/repartition.h new file mode 100644 index 000000000..d513cb772 --- /dev/null +++ b/src/cunumeric/utilities/repartition.h @@ -0,0 +1,95 @@ +/* Copyright 2024 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +#pragma once + +#include "legate.h" +#include "cunumeric/cunumeric_task.h" + +namespace cunumeric { + +enum BlockInfo { + TOTAL_SIZE, // # values send + LLD, // local leading dimension ( num rows for col-based) + OFFSET_ROW, // global row offset w.r.t. elements of proc id + OFFSET_COL, // global col offset w.r.t. elements of proc id + LAST // keep as last element +}; + +// TODO(mförster) optimize -- we would like to provide a global mapping to skip additional +// communication + +/* + * performs collective repartition to 2d block cyclic pattern + * returns tuple(buffer, volume, lld) + */ +template +[[nodiscard]] std::tuple, size_t, size_t> repartition_matrix_2dbc( + // dense input data block (only GPU mem supported) + const VAL* input, + size_t volume, + bool row_major, + // offset of local block w.r.t. global dimensions + size_t offset_r, + size_t offset_c, + // lld of input data, corresponds to numRows/numCols + size_t lld, + // target process grid layout (p_r*p_c need to match communicator size) + size_t p_r, + size_t p_c, + // tile layout + size_t tile_r, + size_t tile_c, + // communicator + legate::comm::Communicator comm); + +/* + * performs collective repartition from 2d block cyclic pattern + * back to block + */ +template +void repartition_matrix_block( + // dense input data block (only GPU mem supported) + // will be released as soon as consumed + legate::Buffer input_2dbc, + size_t input_volume, + size_t input_lld, + // should match NCCL rank and 2dbc ID column major + size_t local_rank, + // 2dbc process grid layout (p_r*p_c need to match communicator size) + size_t p_r, + size_t p_c, + // tile layout + size_t tile_r, + size_t tile_c, + // dense output data pointer (only GPU mem supported) + VAL* target, + size_t target_volume, + size_t target_lld, + // cuNumeric process grid layout (needs to match communicator size) + size_t num_target_rows, + size_t num_target_cols, + bool target_row_major, + // offset of local block w.r.t. global dimensions + size_t target_offset_r, + size_t target_offset_c, + // communicator + legate::comm::Communicator comm); + +[[nodiscard]] std::tuple elements_for_rank_in_dimension( + size_t dim_length, size_t offset_id, size_t proc_id, size_t num_dim_procs, size_t tilesize); + +} // namespace cunumeric \ No newline at end of file diff --git a/src/cunumeric/utilities/thrust_allocator.h b/src/cunumeric/utilities/thrust_allocator.h index 6676f298f..465b74974 100644 --- a/src/cunumeric/utilities/thrust_allocator.h +++ b/src/cunumeric/utilities/thrust_allocator.h @@ -1,4 +1,4 @@ -/* Copyright 2022 NVIDIA Corporation +/* Copyright 2024 NVIDIA Corporation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/src/cunumeric/utilities/thrust_util.h b/src/cunumeric/utilities/thrust_util.h index 99ecbd644..3616f9e62 100644 --- a/src/cunumeric/utilities/thrust_util.h +++ b/src/cunumeric/utilities/thrust_util.h @@ -1,4 +1,4 @@ -/* Copyright 2023 NVIDIA Corporation +/* Copyright 2024 NVIDIA Corporation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/src/env_defaults.h b/src/env_defaults.h index af95896a1..c9a89963a 100644 --- a/src/env_defaults.h +++ b/src/env_defaults.h @@ -1,4 +1,4 @@ -/* Copyright 2021-2022 NVIDIA Corporation +/* Copyright 2024 NVIDIA Corporation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/test.py b/test.py index 50e22ee88..c7d857e54 100755 --- a/test.py +++ b/test.py @@ -1,6 +1,6 @@ #!/usr/bin/env python -# Copyright 2021-2022 NVIDIA Corporation +# Copyright 2024 NVIDIA Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -18,27 +18,10 @@ import sys -from legate.tester import PER_FILE_ARGS, SKIPPED_EXAMPLES from legate.tester.config import Config from legate.tester.test_plan import TestPlan from legate.tester.test_system import TestSystem -SKIPPED_EXAMPLES.update( - { - "examples/ingest.py", - "examples/kmeans_sort.py", - "examples/lstm_full.py", - "examples/wgrad.py", - } -) - -PER_FILE_ARGS.update( - { - "examples/lstm_full.py": ["--file", "resources/lstm_input.txt"], - } -) - - if __name__ == "__main__": config = Config(sys.argv) diff --git a/tests/cpp/.gitignore b/tests/cpp/.gitignore new file mode 100644 index 000000000..567609b12 --- /dev/null +++ b/tests/cpp/.gitignore @@ -0,0 +1 @@ +build/ diff --git a/tests/cpp/CMakeLists.txt b/tests/cpp/CMakeLists.txt new file mode 100644 index 000000000..1be2b8f59 --- /dev/null +++ b/tests/cpp/CMakeLists.txt @@ -0,0 +1,75 @@ +#============================================================================= +# Copyright 2024 NVIDIA Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +#============================================================================= + +cmake_minimum_required(VERSION 3.22.1 FATAL_ERROR) + +project(cpp_tests VERSION 0.1 LANGUAGES C CXX) + +if(PROJECT_IS_TOP_LEVEL) + # To catch people trying to build the tests from within tests/cpp instead of top-level + message(FATAL_ERROR "Error: Tests can only be built as part of the main library build. Please re-run cmake from top-level directory (\${CMAKE_SOURCE_DIR}) with -Dcunumeric_BUILD_TESTS=ON" + ) +endif() + +if(Legion_USE_CUDA) + find_package(CUDAToolkit REQUIRED) + enable_language(CUDA) +endif() + +include(rapids-test) + +rapids_test_init() + +include(${rapids-cmake-dir}/cpm/gtest.cmake) + +# BUILD_EXPORT_SET and INSTALL_EXPORT_SET are crucial, otherwise gtest does not get +# installed +rapids_cpm_gtest(BUILD_EXPORT_SET cunumeric-exports + INSTALL_EXPORT_SET cunumeric-exports) + +file(GLOB main_SRC ${PROJECT_SOURCE_DIR}/main.cc) +file(GLOB integration_SRC ${PROJECT_SOURCE_DIR}/integration/*.cc) + +if(Legion_USE_CUDA) + file(GLOB integration_GPU_SRC ${PROJECT_SOURCE_DIR}/integration/*.cu) + list(APPEND integration_SRC ${integration_GPU_SRC}) +endif() + +add_executable(cpp_tests ${main_SRC} ${tasks_SRC} ${integration_SRC} ${unit_SRC}) + +target_link_libraries(cpp_tests PRIVATE legate::core cunumeric::cunumeric GTest::gtest) +if(Legion_USE_CUDA) + target_link_libraries(cpp_tests PRIVATE NCCL::NCCL) +endif() + +if(Legion_USE_CUDA) + set(num_gpus 1) +else() + set(num_gpus 0) +endif() + +rapids_test_add( + NAME cpp_tests + COMMAND cpp_tests + GPUS ${num_gpus} + PERCENT 30 + INSTALL_COMPONENT_SET testing +) + +include(GNUInstallDirs) + +rapids_test_install_relocatable(INSTALL_COMPONENT_SET testing + DESTINATION ${CMAKE_INSTALL_BINDIR} INCLUDE_IN_ALL) diff --git a/tests/cpp/cmake/thirdparty/get_nccl.cmake b/tests/cpp/cmake/thirdparty/get_nccl.cmake new file mode 100644 index 000000000..3208de846 --- /dev/null +++ b/tests/cpp/cmake/thirdparty/get_nccl.cmake @@ -0,0 +1,34 @@ +#============================================================================= +# Copyright 2024 NVIDIA Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +#============================================================================= + +function(find_or_configure_nccl) + + if(TARGET NCCL::NCCL) + return() + endif() + + rapids_find_generate_module(NCCL + HEADER_NAMES nccl.h + LIBRARY_NAMES nccl + ) + + # Currently NCCL has no CMake build-system so we require + # it built and installed on the machine already + rapids_find_package(NCCL REQUIRED) + +endfunction() + +find_or_configure_nccl() diff --git a/tests/cpp/integration/common_utils.cc b/tests/cpp/integration/common_utils.cc new file mode 100644 index 000000000..457fcc4a7 --- /dev/null +++ b/tests/cpp/integration/common_utils.cc @@ -0,0 +1,171 @@ +/* Copyright 2024 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +#include "common_utils.h" +#include +#include + +namespace cunumeric { + +template +void show_array(NDArray& a) +{ + auto acc = a.get_read_accessor(); + std::cerr << "["; + for (size_t i = 0; i < a.size(); ++i) { + std::cerr << acc[i]; + if (i != a.size() - 1) { + std::cerr << ", "; + } + } + std::cerr << "]" << std::endl; +} + +void debug_array(NDArray a, bool show_data) +{ + auto store = a.get_store(); + if (store.has_scalar_storage()) { + std::cerr << "(S) "; + } else { + std::cerr << "( ) "; + } + if (store.transformed()) { + std::cerr << "(T) "; + } else { + std::cerr << "( ) "; + } + std::cerr << "<" << store.type().to_string() << "> " << store.to_string() << std::endl; + if (!show_data) { + return; + } + if (a.size() == 0) { + std::cerr << "[]" << std::endl; + return; + } + if (a.dim() > 1) { + a = a._wrap(a.size()); + } + switch (a.type().code()) { + case legate::Type::Code::INT32: show_array(a); break; + case legate::Type::Code::INT64: show_array(a); break; + case legate::Type::Code::FLOAT32: show_array(a); break; + case legate::Type::Code::FLOAT64: show_array(a); break; + default: std::cerr << "[ Not implemented ]" << std::endl; break; + } +} + +} // namespace cunumeric + +using namespace cunumeric; + +// unit test for common_utils +namespace { + +TEST(Utils, test_check_array) +{ + { + auto x = mk_array({99}); + debug_array(x); + } + { + auto x = mk_array({99}, {1}); + debug_array(x); + } + { + auto x = mk_array({1, 2, 3, 4}, {2, 2}); + debug_array(x); + check_array(x, {1, 2, 3, 4}, {2, 2}); + } + { + std::vector shape{2, 3, 4}; + auto x_in = mk_seq_vector(shape, 10); + auto x = mk_array(x_in, shape); + debug_array(x); + check_array(x, x_in, shape); + } +} + +void fail1() +{ + std::vector shape{2, 3}; + auto x = mk_array({1, 2, 3, 4, 50, 6}, shape); + auto x_gt = mk_seq_vector(shape); + check_array(x, x_gt, shape); +}; + +void fail2() +{ + auto x = mk_array({1 + 1e-8, 1 + 1e-7, 1 + 1e-6, 1 + 1e-5}); + auto x_gt = mk_seq_vector({4}, 0, 1); + check_array(x, x_gt); +}; + +void fail3() +{ + auto x = mk_array({1 + 1e-8, 1 + 1e-7, 1 + 1e-6, 1 + 1e-5}); + auto x_gt = mk_seq_vector({4}, 0, 1); + check_array(x, x_gt); +}; + +TEST(Utils, test_check_array_neg) +{ + EXPECT_FATAL_FAILURE(fail1(), "check_array"); + EXPECT_FATAL_FAILURE(fail2(), "check_array"); + EXPECT_FATAL_FAILURE(fail3(), "check_array"); +} + +TEST(Utils, test_as_type_vector) +{ + auto x = mk_seq_vector({16}, 0.25); + debug_vector(x); + auto y = as_type_vector(x); + debug_vector(y); +} + +TEST(Utils, test_ndarray_wrap) +{ + auto x = mk_array({1, 2, 3, 4}); + debug_array(x); + auto y = x._wrap(0); + debug_array(y); + auto z = y._wrap(0); + debug_array(z); + EXPECT_ANY_THROW(y._wrap(1);); +} + +TEST(Utils, test_ndarray_warn_and_convert) +{ + auto x_in = mk_seq_vector({8}, 0.5); + auto x = mk_array(x_in); + auto y = x._warn_and_convert(legate::int32()); + debug_array(x); + debug_array(y); + cunumeric_log().warning() << "Just a test!"; +} + +TEST(Utils, test_wrap_indices_and_clip_indices) +{ + std::vector shape{10}; + auto x_in = mk_seq_vector(shape); + auto x = mk_array(x_in, shape); + auto x_warp = x.wrap_indices(Scalar(int64_t(4))); + auto x_clip = x.clip_indices(Scalar(int64_t(3)), Scalar(int64_t(7))); + debug_array(x); + debug_array(x_warp); + debug_array(x_clip); +} + +} // namespace diff --git a/tests/cpp/integration/common_utils.h b/tests/cpp/integration/common_utils.h new file mode 100644 index 000000000..b92516017 --- /dev/null +++ b/tests/cpp/integration/common_utils.h @@ -0,0 +1,161 @@ +/* Copyright 2024 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +#pragma once + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "legate.h" +#include "cunumeric.h" +#include "cunumeric/runtime.h" +#include "util.inl" + +namespace cunumeric { + +void debug_array(NDArray a, bool show_data = true); + +template +NDArray mk_array(std::vector const& values, std::vector shape = {}) +{ + if (shape.empty() && values.size() > 1) { + shape.push_back(values.size()); + } + auto out = zeros(shape, legate::primitive_type(legate::type_code_of)); + if (values.size() != out.size()) { + throw std::invalid_argument("size and shape mismatch"); + } + if (out.size() == 0) { + return out; + } + if (out.size() == 1) { + out.fill(legate::Scalar(values[0])); + return out; + } + auto assign_values = [](NDArray& a, std::vector const& values) { + auto acc = a.get_write_accessor(); + for (size_t i = 0; i < values.size(); ++i) { + acc[i] = values[i]; + } + }; + if (out.dim() == 1) { + assign_values(out, values); + } else { + auto a1 = zeros({out.size()}, out.type()); + assign_values(a1, values); + auto runtime = CuNumericRuntime::get_runtime(); + auto a2 = runtime->create_array(std::move(a1.get_store().delinearize(0, shape))); + out.assign(a2); + } + return out; +} + +template +void check_array(NDArray a, std::vector values, std::vector shape = {}) +{ + if (shape.empty() && values.size() > 1) { + shape.push_back(values.size()); + } + ASSERT_EQ(a.size(), values.size()); + ASSERT_EQ(a.shape(), shape); + ASSERT_EQ(a.type().code(), legate::type_code_of); + if (a.size() == 0) { + return; + } + if (a.dim() > 1) { + a = a._wrap(a.size()); + } + auto err_msg = [](auto i) { + std::stringstream ss; + ss << "check_array failed at [i = " << i << "]"; + return ss.str(); + }; + auto acc = a.get_read_accessor(); + for (size_t i = 0; i < values.size(); ++i) { + ASSERT_EQ(acc[i], values[i]) << err_msg(i); + } +} + +template +struct PrintArray { + template + void operator()(cunumeric::NDArray array) + { + auto acc = array.get_read_accessor(); + auto& shape = array.shape(); + auto logical_store = array.get_store(); + auto physical_store = logical_store.get_physical_store(); + auto rect = physical_store.shape(); + std::cerr << to_string(acc, shape, rect) << std::endl; + } +}; + +template +void print_array(NDArray array) +{ + if (array.size() == 0) { + std::cerr << "[]" << std::endl; + return; + } + if (array.dim() == 0) { + auto acc = array.get_read_accessor(); + std::cerr << "[" << acc[0] << "]" << std::endl; + return; + } + legate::dim_dispatch(array.dim(), PrintArray{}, array); +} + +template +void debug_vector(const std::vector& vec) +{ + std::cerr << "["; + for (auto i = vec.begin(); i != vec.end(); ++i) { + std::cerr << *i; + if (i != vec.end() - 1) { + std::cerr << ", "; + } + } + std::cerr << "]" << std::endl; +} + +// x = a * i + b, i = 1, 2, 3, ... +template +std::vector mk_seq_vector(std::vector shape, T a = 1, T b = 0) +{ + size_t size = std::accumulate(shape.begin(), shape.end(), size_t(1), std::multiplies()); + std::vector v(size); + std::generate(v.begin(), v.end(), [a, x = b]() mutable { return x += a; }); + return v; +} + +template +std::vector as_type_vector(std::vector const& in) +{ + std::vector out; + for (auto elem : in) { + out.push_back(static_cast(elem)); + } + return out; +} + +} // namespace cunumeric diff --git a/tests/cpp/integration/test_arange.cc b/tests/cpp/integration/test_arange.cc new file mode 100644 index 000000000..d6a5fb730 --- /dev/null +++ b/tests/cpp/integration/test_arange.cc @@ -0,0 +1,91 @@ +/* Copyright 2024 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +#include +#include +#include +#include "legate.h" +#include "cunumeric.h" +#include "util.inl" + +TEST(ArangeType, ImplicitInt64) +{ + int64_t start = 1567891032456; + std::optional stop = 1567891032465; + std::array exp = {1567891032456, + 1567891032457, + 1567891032458, + 1567891032459, + 1567891032460, + 1567891032461, + 1567891032462, + 1567891032463, + 1567891032464}; + auto arr = cunumeric::arange(start, stop); + check_array_eq(arr, exp.data(), exp.size()); +} + +TEST(ArangeType, ImplicitInt32) +{ + int32_t stop = 10; + std::array exp = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9}; + auto arr = cunumeric::arange(stop); + check_array_eq(arr, exp.data(), exp.size()); +} + +TEST(ArangeType, ImplicitFloat64) +{ + double start = 1.5; + double stop = 10.5; + std::array exp = {1.5, 2.5, 3.5, 4.5, 5.5, 6.5, 7.5, 8.5, 9.5}; + auto arr = cunumeric::arange(start, (std::optional)stop); + check_array_eq(arr, exp.data(), exp.size()); +} + +TEST(ArangeType, ImplicitFloat32) +{ + float start = 1.5; + float stop = 10.5; + std::array exp = {1.5, 2.5, 3.5, 4.5, 5.5, 6.5, 7.5, 8.5, 9.5}; + auto arr = cunumeric::arange(start, (std::optional)stop); + check_array_eq(arr, exp.data(), exp.size()); +} + +TEST(ArangeType, ExplicitInt32) +{ + float start = 1.5; + float stop = 10.5; + std::array exp = {1, 2, 3, 4, 5, 6, 7, 8, 9}; + auto arr = cunumeric::arange(start, stop); + check_array_eq(arr, exp.data(), exp.size()); +} + +TEST(ArangeScalar, Float32) +{ + float start = 1.5; + float stop = 10.5; + std::array exp = {1.5, 2.5, 3.5, 4.5, 5.5, 6.5, 7.5, 8.5, 9.5}; + auto arr = cunumeric::arange(legate::Scalar(start), legate::Scalar(stop)); + check_array_eq(arr, exp.data(), exp.size()); +} + +TEST(ArangeErrors, ScalarTypeMismatch) +{ + float start = 1.5; + int32_t stop = 10; + EXPECT_THROW(cunumeric::arange(legate::Scalar(start), legate::Scalar(stop)), + std::invalid_argument); +} diff --git a/tests/cpp/integration/test_argsort.cc b/tests/cpp/integration/test_argsort.cc new file mode 100644 index 000000000..39339a38b --- /dev/null +++ b/tests/cpp/integration/test_argsort.cc @@ -0,0 +1,477 @@ +/* Copyright 2024 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +#include +#include +#include + +#include +#include "legate.h" +#include "cunumeric.h" +#include "util.inl" + +auto get_argsort_expect_result() +{ + std::vector>> expect_result = { + {{0, {11, 4, 1, 5, 3, 9, 8, 6, 7, 0, 10, 2}}}, + {{-1, {11, 4, 1, 5, 3, 9, 8, 6, 7, 0, 10, 2}}, + {0, {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}}, + {1, {11, 4, 1, 5, 3, 9, 8, 6, 7, 0, 10, 2}}}, + {{-1, {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}}, + {0, {11, 4, 1, 5, 3, 9, 8, 6, 7, 0, 10, 2}}, + {1, {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}}}, + {{-1, {1, 3, 0, 2, 0, 1, 2, 3, 3, 1, 0, 2}}, + {0, {1, 0, 1, 2, 2, 1, 2, 0, 0, 2, 0, 1}}, + {1, {1, 3, 0, 2, 0, 1, 2, 3, 3, 1, 0, 2}}}, + {{-2, {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}}, + {-1, {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}}, + {0, {11, 4, 1, 5, 3, 9, 8, 6, 7, 0, 10, 2}}, + {1, {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}}, + {2, {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}}}, + {{-2, {11, 4, 1, 5, 3, 9, 8, 6, 7, 0, 10, 2}}, + {-1, {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}}, + {0, {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}}, + {1, {11, 4, 1, 5, 3, 9, 8, 6, 7, 0, 10, 2}}, + {2, {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}}}, + {{-2, {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}}, + {-1, {11, 4, 1, 5, 3, 9, 8, 6, 7, 0, 10, 2}}, + {0, {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}}, + {1, {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}}, + {2, {11, 4, 1, 5, 3, 9, 8, 6, 7, 0, 10, 2}}}, + {{-2, {1, 1, 1, 0, 0, 0, 1, 0, 1, 0, 1, 0}}, + {-1, {1, 0, 2, 1, 2, 0, 2, 0, 1, 2, 0, 1}}, + {0, {1, 0, 1, 0, 0, 1, 0, 1, 0, 1, 1, 0}}, + {1, {1, 1, 1, 0, 0, 0, 1, 0, 1, 0, 1, 0}}, + {2, {1, 0, 2, 1, 2, 0, 2, 0, 1, 2, 0, 1}}}}; + return expect_result; +} + +auto get_argsort_expect_result_4d() +{ + std::vector>> expect_result = { + {{-3, {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}}, + {-2, {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}}, + {-1, {14, 6, 2, 7, 4, 12, 11, 9, 10, 1, 13, 3, 5, 0, 15, 8}}, + {0, {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}}, + {1, {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}}, + {2, {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}}, + {3, {14, 6, 2, 7, 4, 12, 11, 9, 10, 1, 13, 3, 5, 0, 15, 8}}}, + {{-3, {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}}, + {-2, {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}}, + {-1, {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}}, + {0, {14, 6, 2, 7, 4, 12, 11, 9, 10, 1, 13, 3, 5, 0, 15, 8}}, + {1, {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}}, + {2, {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}}, + {3, {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}}}, + {{-3, {1, 0, 1, 1, 0, 1, 0, 0, 1, 0, 1, 0, 0, 1, 0, 1}}, + {-2, {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}}, + {-1, {2, 1, 3, 0, 2, 3, 0, 1, 3, 1, 2, 0, 2, 0, 1, 3}}, + {0, {0, 1, 0, 1, 0, 1, 1, 0, 1, 0, 1, 0, 1, 0, 0, 1}}, + {1, {1, 0, 1, 1, 0, 1, 0, 0, 1, 0, 1, 0, 0, 1, 0, 1}}, + {2, {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}}, + {3, {2, 1, 3, 0, 2, 3, 0, 1, 3, 1, 2, 0, 2, 0, 1, 3}}}}; + return expect_result; +} + +auto get_argsort_expect_result_5d() +{ + std::vector>> expect_result = { + {{-4, {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}}, + {-3, {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}}, + {-2, {14, 6, 2, 7, 4, 12, 11, 9, 10, 1, 13, 3, 5, 0, 15, 8}}, + {-1, {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}}, + {0, {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}}, + {1, {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}}, + {2, {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}}, + {3, {14, 6, 2, 7, 4, 12, 11, 9, 10, 1, 13, 3, 5, 0, 15, 8}}, + {4, {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}}}, + {{-4, {14, 6, 2, 7, 4, 12, 11, 9, 10, 1, 13, 3, 5, 0, 15, 8}}, + {-3, {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}}, + {-2, {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}}, + {-1, {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}}, + {0, {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}}, + {1, {14, 6, 2, 7, 4, 12, 11, 9, 10, 1, 13, 3, 5, 0, 15, 8}}, + {2, {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}}, + {3, {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}}, + {4, {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}}}, + {{-4, {0, 1, 0, 1, 0, 1, 1, 0, 1, 0, 1, 0, 1, 0, 0, 1}}, + {-3, {1, 0, 1, 1, 0, 1, 0, 0, 1, 0, 1, 0, 0, 1, 0, 1}}, + {-2, {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}}, + {-1, {2, 1, 3, 0, 2, 3, 0, 1, 3, 1, 2, 0, 2, 0, 1, 3}}, + {0, {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}}, + {1, {0, 1, 0, 1, 0, 1, 1, 0, 1, 0, 1, 0, 1, 0, 0, 1}}, + {2, {1, 0, 1, 1, 0, 1, 0, 0, 1, 0, 1, 0, 0, 1, 0, 1}}, + {3, {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}}, + {4, {2, 1, 3, 0, 2, 3, 0, 1, 3, 1, 2, 0, 2, 0, 1, 3}}}}; + return expect_result; +} + +auto get_argsort_expect_result_6d() +{ + std::vector>> expect_result = { + {{-5, {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}}, + {-4, {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}}, + {-3, {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}}, + {-2, {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}}, + {-1, {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}}, + {0, {14, 6, 2, 7, 4, 12, 11, 9, 10, 1, 13, 3, 5, 0, 15, 8}}, + {1, {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}}, + {2, {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}}, + {3, {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}}, + {4, {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}}, + {5, {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}}}, + {{-5, {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}}, + {-4, {14, 6, 2, 7, 4, 12, 11, 9, 10, 1, 13, 3, 5, 0, 15, 8}}, + {-3, {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}}, + {-2, {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}}, + {-1, {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}}, + {0, {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}}, + {1, {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}}, + {2, {14, 6, 2, 7, 4, 12, 11, 9, 10, 1, 13, 3, 5, 0, 15, 8}}, + {3, {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}}, + {4, {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}}, + {5, {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}}}, + {{-5, {0, 1, 0, 1, 0, 1, 1, 0, 1, 0, 1, 0, 1, 0, 0, 1}}, + {-4, {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}}, + {-3, {1, 0, 1, 1, 0, 1, 0, 0, 1, 0, 1, 0, 0, 1, 0, 1}}, + {-2, {1, 0, 0, 1, 1, 1, 0, 0, 1, 1, 0, 0, 1, 0, 0, 1}}, + {-1, {1, 0, 0, 1, 0, 1, 0, 1, 1, 0, 1, 0, 0, 1, 0, 1}}, + {0, {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}}, + {1, {0, 1, 0, 1, 0, 1, 1, 0, 1, 0, 1, 0, 1, 0, 0, 1}}, + {2, {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}}, + {3, {1, 0, 1, 1, 0, 1, 0, 0, 1, 0, 1, 0, 0, 1, 0, 1}}, + {4, {1, 0, 0, 1, 1, 1, 0, 0, 1, 1, 0, 0, 1, 0, 0, 1}}, + {5, {1, 0, 0, 1, 0, 1, 0, 1, 1, 0, 1, 0, 0, 1, 0, 1}}}}; + return expect_result; +} + +auto get_argsort_expect_result_7d() +{ + std::vector>> expect_result = { + {{-6, {14, 6, 2, 7, 4, 12, 11, 9, 10, 1, 13, 3, 5, 0, 15, 8}}, + {-5, {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}}, + {-4, {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}}, + {-3, {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}}, + {-2, {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}}, + {-1, {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}}, + {0, {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}}, + {1, {14, 6, 2, 7, 4, 12, 11, 9, 10, 1, 13, 3, 5, 0, 15, 8}}, + {2, {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}}, + {3, {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}}, + {4, {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}}, + {5, {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}}, + {6, {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}}}, + {{-6, {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}}, + {-5, {0, 1, 0, 1, 0, 1, 1, 0, 1, 0, 1, 0, 1, 0, 0, 1}}, + {-4, {1, 0, 1, 1, 0, 1, 0, 0, 1, 0, 1, 0, 0, 1, 0, 1}}, + {-3, {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}}, + {-2, {2, 1, 3, 0, 2, 3, 0, 1, 3, 1, 2, 0, 2, 0, 1, 3}}, + {-1, {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}}, + {0, {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}}, + {1, {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}}, + {2, {0, 1, 0, 1, 0, 1, 1, 0, 1, 0, 1, 0, 1, 0, 0, 1}}, + {3, {1, 0, 1, 1, 0, 1, 0, 0, 1, 0, 1, 0, 0, 1, 0, 1}}, + {4, {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}}, + {5, {2, 1, 3, 0, 2, 3, 0, 1, 3, 1, 2, 0, 2, 0, 1, 3}}, + {6, {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}}}, + {{-6, {1, 0, 1, 1, 0, 1, 0, 0, 1, 0, 1, 0, 0, 1, 0, 1}}, + {-5, {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}}, + {-4, {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}}, + {-3, {1, 0, 0, 1, 1, 1, 0, 0, 1, 1, 0, 0, 1, 0, 0, 1}}, + {-2, {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}}, + {-1, {1, 0, 0, 1, 0, 1, 0, 1, 1, 0, 1, 0, 0, 1, 0, 1}}, + {0, {0, 1, 0, 1, 0, 1, 1, 0, 1, 0, 1, 0, 1, 0, 0, 1}}, + {1, {1, 0, 1, 1, 0, 1, 0, 0, 1, 0, 1, 0, 0, 1, 0, 1}}, + {2, {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}}, + {3, {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}}, + {4, {1, 0, 0, 1, 1, 1, 0, 0, 1, 1, 0, 0, 1, 0, 0, 1}}, + {5, {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}}, + {6, {1, 0, 0, 1, 0, 1, 0, 1, 1, 0, 1, 0, 0, 1, 0, 1}}}}; + return expect_result; +} + +template +void test_argsort(std::array& in_array, + std::array& expect, + legate::Type leg_type, + std::vector shape, + std::optional axis, + bool test_only_stable = false) +{ + auto A1 = cunumeric::zeros(shape, leg_type); + if (in_array.size() != 0) { + if (in_array.size() == 1) { + A1.fill(legate::Scalar(in_array[0])); + } else { + assign_values_to_array(A1, in_array.data(), in_array.size()); + } + } + std::vector algos = {"quicksort", "mergesort", "heapsort", "stable"}; + if (test_only_stable) { + algos = {"mergesort", "stable"}; + } + for (auto algo = algos.begin(); algo < algos.end(); ++algo) { + auto B1 = cunumeric::argsort(A1, axis, *algo); + if (in_array.size() != 0) { + check_array_eq(B1, expect.data(), expect.size()); + } + } +} + +template +void argsort_basic_axis_impl( + std::vector>& test_shapes, + std::array in_array, + std::vector>>& expect_result, + legate::Type leg_type, + bool test_only_stable = false) +{ + size_t test_shape_size = test_shapes.size(); + for (size_t i = 0; i < test_shape_size; ++i) { + auto test_shape = test_shapes[i]; + int32_t dim = test_shape.size(); + for (int32_t axis = -dim + 1; axis < dim; ++axis) { + std::cout << "Axis is: " << axis << std::endl; + auto expect_val = expect_result[i][axis]; + if (dim == 1) { + test_argsort( + in_array, expect_val, leg_type, test_shape, axis, test_only_stable); + } else if (dim == 2) { + test_argsort( + in_array, expect_val, leg_type, test_shape, axis, test_only_stable); + } else if (dim == 3) { + test_argsort( + in_array, expect_val, leg_type, test_shape, axis, test_only_stable); + } else if (dim == 4) { +#if LEGATE_MAX_DIM >= 4 + test_argsort( + in_array, expect_val, leg_type, test_shape, axis, test_only_stable); +#endif + } else if (dim == 5) { +#if LEGATE_MAX_DIM >= 5 + test_argsort( + in_array, expect_val, leg_type, test_shape, axis, test_only_stable); +#endif + } else if (dim == 6) { +#if LEGATE_MAX_DIM >= 6 + test_argsort( + in_array, expect_val, leg_type, test_shape, axis, test_only_stable); +#endif + } else if (dim == 7) { +#if LEGATE_MAX_DIM >= 7 + test_argsort( + in_array, expect_val, leg_type, test_shape, axis, test_only_stable); +#endif + } + } + } +} + +void argsort_basic_axis() +{ + std::vector> test_shapes = { + {12}, {1, 12}, {12, 1}, {3, 4}, {12, 1, 1}, {1, 12, 1}, {1, 1, 12}, {2, 2, 3}}; + + auto expect_result = get_argsort_expect_result(); + + // Test int type + std::array in_array1 = {10, 3, 12, 5, 2, 4, 8, 9, 7, 6, 11, 1}; + argsort_basic_axis_impl(test_shapes, in_array1, expect_result, legate::int32()); + + // Test float type + std::array in_array2 = {10.5, 3.66, 12, 5.98, 2.2, 4, 8, 9, 7.9, 6, 11, 1.5}; + argsort_basic_axis_impl(test_shapes, in_array2, expect_result, legate::float64()); + + // Test complex type + std::array, 12> in_array3 = {complex(10, 3), + complex(2.2, 10.5), + complex(12, 5), + complex(6, 5.98), + complex(2, 4), + complex(6, 4), + complex(8, 9), + complex(8, 11), + complex(7.9, 12), + complex(7, 6), + complex(11, 1), + complex(1.5, 3.66)}; + argsort_basic_axis_impl, 12>( + test_shapes, in_array3, expect_result, legate::complex64()); +} + +void argsort_basic_axis_stable() +{ + std::vector> test_shapes = { + {12}, {1, 12}, {12, 1}, {3, 4}, {12, 1, 1}, {1, 12, 1}, {1, 1, 12}, {2, 2, 3}}; + auto expect_result = get_argsort_expect_result(); + + // Test int type + std::array in_array1 = {10, 3, 12, 5, 2, 3, 8, 8, 7, 6, 10, 1}; + argsort_basic_axis_impl( + test_shapes, in_array1, expect_result, legate::int32(), true); + + // Test float type + std::array in_array2 = {10.5, 3.66, 12, 5.98, 2.2, 3.66, 8, 9, 7.9, 6, 10.5, 1.5}; + argsort_basic_axis_impl( + test_shapes, in_array2, expect_result, legate::float64(), true); + + // Test complex type + std::array, 12> in_array3 = {complex(10, 3), + complex(2.2, 10.5), + complex(12, 5), + complex(6, 5.98), + complex(2, 4), + complex(2.2, 10.5), + complex(8, 9), + complex(8, 11), + complex(7.9, 12), + complex(7, 6), + complex(10, 3), + complex(1.5, 3.66)}; + argsort_basic_axis_impl, 12>( + test_shapes, in_array3, expect_result, legate::complex64(), true); +} + +void argsort_basic_axis_max_dim() +{ + // Only test int type for max dim + std::array in_array = {14, 10, 3, 12, 5, 13, 2, 4, 16, 8, 9, 7, 6, 11, 1, 15}; +#if LEGATE_MAX_DIM >= 4 + std::vector> test_shapes_4d = {{1, 1, 1, 16}, {16, 1, 1, 1}, {2, 2, 1, 4}}; + auto expect_result_4d = get_argsort_expect_result_4d(); + argsort_basic_axis_impl(test_shapes_4d, in_array, expect_result_4d, legate::int32()); +#endif + +#if LEGATE_MAX_DIM >= 5 + std::vector> test_shapes_5d = { + {1, 1, 1, 16, 1}, {1, 16, 1, 1, 1}, {1, 2, 2, 1, 4}}; + auto expect_result_5d = get_argsort_expect_result_5d(); + argsort_basic_axis_impl(test_shapes_5d, in_array, expect_result_5d, legate::int32()); +#endif + +#if LEGATE_MAX_DIM >= 6 + std::vector> test_shapes_6d = { + {16, 1, 1, 1, 1, 1}, {1, 1, 16, 1, 1, 1}, {1, 2, 1, 2, 2, 2}}; + auto expect_result_6d = get_argsort_expect_result_6d(); + argsort_basic_axis_impl(test_shapes_6d, in_array, expect_result_6d, legate::int32()); +#endif + +#if LEGATE_MAX_DIM >= 7 + std::vector> test_shapes_7d = { + {1, 16, 1, 1, 1, 1, 1}, {1, 1, 2, 2, 1, 4, 1}, {2, 2, 1, 1, 2, 1, 2}}; + auto expect_result_7d = get_argsort_expect_result_7d(); + argsort_basic_axis_impl(test_shapes_7d, in_array, expect_result_7d, legate::int32()); +#endif +} + +void argsort_large_array() +{ + const int32_t count = 10000; + std::vector> test_shapes = {{count}}; + std::array expect_val; + for (int64_t j = 0; j < count; j++) { + expect_val[j] = count - 1 - j; + } + std::vector>> expect_result = {{{0, expect_val}}}; + + // Test int type for large array + std::array in_array1; + for (int32_t i = 0; i < count; i++) { + in_array1[i] = count - i; + } + argsort_basic_axis_impl(test_shapes, in_array1, expect_result, legate::int32()); + + // Test float type + std::array in_array2; + for (int32_t i = 0; i < count; i++) { + in_array2[i] = count * 1.1 - i; + } + argsort_basic_axis_impl(test_shapes, in_array2, expect_result, legate::float64()); + + // Test complex type + std::array, count> in_array3; + for (int32_t i = 0; i < count; i++) { + in_array3[i] = complex(count - i, count - i); + } + argsort_basic_axis_impl, count>( + test_shapes, in_array3, expect_result, legate::complex64()); +} + +void argsort_empty_array() +{ + std::vector> test_shapes = { + {0}, {0, 1}, {1, 0}, {1, 0, 0}, {1, 1, 0}, {1, 0, 1}}; + + std::array in_array = {}; + std::array expect_val = {}; + size_t test_shape_size = test_shapes.size(); + for (size_t i = 0; i < test_shape_size; ++i) { + auto test_shape = test_shapes[i]; + int32_t dim = test_shape.size(); + for (int32_t axis = -dim + 1; axis < dim; ++axis) { + if (dim == 1) { + test_argsort(in_array, expect_val, legate::int32(), test_shape, axis); + } else if (dim == 2) { + test_argsort(in_array, expect_val, legate::int32(), test_shape, axis); + } else { + test_argsort(in_array, expect_val, legate::int32(), test_shape, axis); + } + } + } +} + +void argsort_single_item_array() +{ + std::vector> test_shapes = {{1}, {1, 1}, {1, 1, 1}}; + + std::array in_array = {12}; + std::array expect_val = {0}; + size_t test_shape_size = test_shapes.size(); + for (size_t i = 0; i < test_shape_size; ++i) { + auto test_shape = test_shapes[i]; + int32_t dim = test_shape.size(); + for (int32_t axis = -dim + 1; axis < dim; ++axis) { + if (dim == 1) { + test_argsort(in_array, expect_val, legate::int32(), test_shape, axis); + } else if (dim == 2) { + test_argsort(in_array, expect_val, legate::int32(), test_shape, axis); + } else { + test_argsort(in_array, expect_val, legate::int32(), test_shape, axis); + } + } + } +} + +void argsort_negative_test() +{ + auto in_ar1 = cunumeric::zeros({2, 3}, legate::int32()); + + // Test invalid input sort axis + EXPECT_THROW(cunumeric::argsort(in_ar1, 2, "quicksort"), std::invalid_argument); + EXPECT_THROW(cunumeric::argsort(in_ar1, -3, "quicksort"), std::invalid_argument); + + // Test invalid input algorithm + EXPECT_THROW(cunumeric::argsort(in_ar1, 0, "negative"), std::invalid_argument); +} + +// void cpp_test() +TEST(Argsort, BasicAxis) { argsort_basic_axis(); } +TEST(Argsort, BasicAxisStable) { argsort_basic_axis_stable(); } +TEST(Argsort, BasicAxisMaxDim) { argsort_basic_axis_max_dim(); } +TEST(Argsort, LargeArray) { argsort_large_array(); } +TEST(Argsort, EmptyArray) { argsort_empty_array(); } +TEST(Argsort, SingleItemArray) { argsort_single_item_array(); } +TEST(Argsort, Negative) { argsort_negative_test(); } diff --git a/tests/cpp/integration/test_argwhere.cc b/tests/cpp/integration/test_argwhere.cc new file mode 100644 index 000000000..e365f5dbf --- /dev/null +++ b/tests/cpp/integration/test_argwhere.cc @@ -0,0 +1,298 @@ +/* Copyright 2024 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +#include +#include +#include + +#include +#include "legate.h" +#include "cunumeric.h" +#include "common_utils.h" + +using namespace cunumeric; + +namespace { +std::vector> get_in_shapes_basic() +{ + std::vector> in_shapes = {{12}, + {4, 3}, + {2, 2, 3}, + {2, 1, 2, 3}, + {2, 1, 2, 1, 3}, + {2, 1, 2, 1, 3, 1}, + {1, 2, 1, 2, 1, 3, 1}}; + return in_shapes; +} + +std::vector> get_exp_shapes_basic() +{ + std::vector> exp_shapes = { + {6, 1}, {6, 2}, {6, 3}, {6, 4}, {6, 5}, {6, 6}, {6, 7}}; + return exp_shapes; +} + +std::vector> get_exp_vectors_basic() +{ + std::vector> exp_vectors = { + {0, 2, 5, 6, 9, 11}, + {0, 0, 0, 2, 1, 2, 2, 0, 3, 0, 3, 2}, + {0, 0, 0, 0, 0, 2, 0, 1, 2, 1, 0, 0, 1, 1, 0, 1, 1, 2}, + {0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 1, 2, 1, 0, 0, 0, 1, 0, 1, 0, 1, 0, 1, 2}, + {0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 1, 0, 2, 1, 0, 0, 0, 0, 1, 0, 1, 0, 0, 1, 0, 1, 0, 2}, + {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 1, 0, 2, 0, + 1, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 1, 0, 2, 0}, + {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 1, 0, 2, 0, + 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 1, 0, 1, 0, 2, 0}}; + return exp_vectors; +} + +template +void test_argwhere(std::vector& in_vec, + std::vector& exp_vec, + const std::vector& in_shape, + const std::vector& exp_shape) +{ + auto a = mk_array(in_vec, in_shape); + auto x = argwhere(a); + check_array(x, exp_vec, exp_shape); +} + +template +void test_argwhere_basic(std::vector& in_vec, uint32_t dim) +{ + auto in_shapes = get_in_shapes_basic(); + auto exp_shapes = get_exp_shapes_basic(); + auto exp_vectors = get_exp_vectors_basic(); + + test_argwhere(in_vec, exp_vectors[dim - 1], in_shapes[dim - 1], exp_shapes[dim - 1]); +} + +template +void test_argwhere_basic_for_all_dims(std::vector& in_vec) +{ + test_argwhere_basic(in_vec, 1); + test_argwhere_basic(in_vec, 2); + test_argwhere_basic(in_vec, 3); + +#if LEGATE_MAX_DIM >= 4 + test_argwhere_basic(in_vec, 4); +#endif + +#if LEGATE_MAX_DIM >= 5 + test_argwhere_basic(in_vec, 5); +#endif + +#if LEGATE_MAX_DIM >= 6 + test_argwhere_basic(in_vec, 6); +#endif + +#if LEGATE_MAX_DIM >= 7 + test_argwhere_basic(in_vec, 7); +#endif +} + +void argwhere_int() +{ + std::vector in_vec = {-1, 0, 4, +0, -0, 45, 5, 0, 0, 9, 0, 4}; + test_argwhere_basic_for_all_dims(in_vec); +} + +void argwhere_double() +{ + std::vector in_vec = {0.01, 0, 4.0, -0.00, 0.00, 0.1, -5, +0.0, 0, 9, 0.0, 4}; + test_argwhere_basic_for_all_dims(in_vec); +} + +void argwhere_complex() +{ + std::vector> in_vec = {complex(1.0, 0), + complex(0.0, 0.0), + 54, + 0, + 0.0, + complex(0, 1.0), + 45, + 0, + 0.0, + 9, + -0.00, + 4}; + test_argwhere_basic_for_all_dims>(in_vec); +} + +void argwhere_bool() +{ + std::vector in_vec = { + true, false, true, false, false, true, true, false, false, true, false, true}; + test_argwhere_basic_for_all_dims(in_vec); +} + +void test_argwhere_empty_array(legate::Type leg_type, + std::vector in_shape, + std::vector exp_shape) +{ + auto a = zeros(in_shape, leg_type); + auto x = argwhere(a); + EXPECT_EQ(x.size(), 0); + EXPECT_EQ(x.type(), legate::int64()); + EXPECT_EQ(x.shape(), exp_shape); +} + +template +std::vector init_large_vector(size_t size) +{ + std::vector vec = {}; + for (uint i = 0; i < size; i++) { + T element = (i % 2 == 0) ? 1 : 0; + vec.push_back(element); + } + return vec; +} + +template +std::vector argwhere_result(const std::vector& in_vec, + const std::vector& in_shape) +{ + std::vector a(in_shape.size(), 0); + std::vector result; + for (uint32_t i = 0; i < in_vec.size(); i++) { + if (in_vec[i] != 0) { + for (auto aa : a) { + result.push_back(aa); + } + } + int32_t j = a.size() - 1; + while (j >= 0) { + if (++a[j] >= in_shape[j]) { + a[j] = 0; + j--; + } else { + break; + } + } + } + return result; +} + +std::vector gen_shape(uint32_t dim, size_t in_size) +{ + std::vector shape(dim, 1); + size_t value = 2; + size_t prod = 1; + for (int i = 0; i < dim - 1; i++) { + shape[i] = value; + prod *= value; + value++; + } + shape[dim - 1] = in_size / prod; + return shape; +} + +void argwhere_large_array(uint32_t dim) +{ + size_t in_size = 2 * 3 * 4 * 5 * 6 * 7; + auto in_vec = init_large_vector(in_size); + // for dim = 1, in_shape is {5040} + // for dim = 2, in_shape is {2, 2520} + // for dim = 3, in_shape is {2, 3, 840} + // for dim = 7, in_shape is {2, 3, 4, 5, 6, 7} + auto in_shape = gen_shape(dim, in_size); + + auto a = mk_array(in_vec, in_shape); + auto x = argwhere(a); + auto x_comp = argwhere_result(in_vec, in_shape); + std::vector exp_shape = {x_comp.size() / in_shape.size(), dim}; + check_array(x, x_comp, exp_shape); +} + +TEST(Argwhere, Basic) +{ + argwhere_int(); + argwhere_double(); + argwhere_complex(); + argwhere_bool(); +} + +TEST(Argwhere, LargeArray) +{ + argwhere_large_array(1); + argwhere_large_array(2); + argwhere_large_array(3); + +#if LEGATE_MAX_DIM >= 4 + argwhere_large_array(4); +#endif + +#if LEGATE_MAX_DIM >= 5 + argwhere_large_array(5); +#endif + +#if LEGATE_MAX_DIM >= 6 + argwhere_large_array(6); +#endif + +#if LEGATE_MAX_DIM >= 7 + argwhere_large_array(7); +#endif +} + +TEST(Argwhere, EmptyArray) +{ + std::vector> in_shapes = {{ + 0, + }, + {0, 1}, + {1, 0}, + {1, 0, 0}, + {1, 1, 0}, + {1, 0, 1}}; + + std::vector> exp_shapes = { + // {0, 1}, {0, 2}, {0, 2}, {0, 3}, {0, 3}, {0, 3}};//This is shape of numpy output array. + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0}, + {0, 0} // This is shape of cunumeric output array + }; + + assert(in_shapes.size() == exp_shapes.size()); + for (size_t i = 0; i < in_shapes.size(); i++) { + test_argwhere_empty_array(legate::int32(), in_shapes[i], exp_shapes[i]); + } +} + +TEST(Argwhere, Scalar) +{ + std::vector exp_shape1 = {0, 0}; + auto A1 = zeros({}, legate::int32()); + auto B1 = argwhere(A1); + EXPECT_EQ(B1.size(), 0); + EXPECT_EQ(B1.type(), legate::int64()); + EXPECT_EQ(B1.shape(), exp_shape1); + + std::vector exp_shape2 = {1, 0}; + auto A2 = zeros({}, legate::float64()); + A2.fill(legate::Scalar(static_cast(1))); + auto B2 = cunumeric::argwhere(A2); + EXPECT_EQ(B2.size(), 0); + EXPECT_EQ(B2.type(), legate::int64()); + EXPECT_EQ(B2.shape(), exp_shape2); +} + +} // namespace diff --git a/tests/cpp/integration/test_bincount.cc b/tests/cpp/integration/test_bincount.cc new file mode 100644 index 000000000..d4f4dc7f2 --- /dev/null +++ b/tests/cpp/integration/test_bincount.cc @@ -0,0 +1,106 @@ +/* Copyright 2024 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +#include +#include +#include + +#include +#include "legate.h" +#include "cunumeric.h" +#include "util.inl" + +void bincount_test() +{ + // case: x, no w, min_length=0. out NDArray type is int64_t if no weights + std::array exp1 = {0, 1, 1, 2, 0, 1, 1}; + std::array in_x1 = {1, 2, 3, 3, 5, 6}; + auto A1 = cunumeric::zeros({6}, legate::int32()); + assign_values_to_array(A1, in_x1.data(), in_x1.size()); + auto B1 = cunumeric::bincount(A1); + check_array_eq(B1, exp1.data(), exp1.size()); + + // case: x, w, min_length=0. + std::array exp2 = {0, 1, 1.2, 2, 0, 1, 0.1}; + std::array in_w2 = {1, 1.2, 1, 1, 1, 0.1}; + auto w2 = cunumeric::zeros({6}, legate::float64()); + assign_values_to_array(w2, in_w2.data(), in_w2.size()); + auto B2 = cunumeric::bincount(A1, w2); + check_array_eq(B2, exp2.data(), exp2.size()); + + // case: x, no w, min_length=8. out NDArray type is int64_t if no weights + std::array exp3 = {0, 1, 1, 2, 0, 1, 1, 0}; + auto B3 = cunumeric::bincount(A1, std::nullopt, 8); + check_array_eq(B3, exp3.data(), exp3.size()); + + // case: x of length 1, no w, min_length=0 + std::array exp4 = {0, 0, 0, 0, 0, 1}; + auto A4 = cunumeric::full({1}, cunumeric::Scalar(5)); + // If we use another way to initialize A4 of length 1 as below, it would rasie error. Seems a lock + // issue. In this way, if A4 is not of length 1, it pass. int64_t in_x4[1] = {5}; auto A4 = + // cunumeric::zeros({1}, legate::int64()); assign_values_to_array(A4, (void *)in_x4, + // sizeof(in_x4)/sizeof(int64_t)); cpp_tests: legion/runtime/realm/runtime_impl.cc:2755: + // Realm::RegionInstanceImpl* Realm::RuntimeImpl::get_instance_impl(Realm::ID): Assertion `0 && + // "invalid instance handle"' failed. + auto B4 = cunumeric::bincount(A4); + check_array_eq(B4, exp4.data(), exp4.size()); + + // case: x of length 1, w of length 1, min_length=0 + std::array exp5 = {0, 0, 0, 0, 0, 1.3}; + auto w5 = cunumeric::full({1}, cunumeric::Scalar(1.3)); + auto B5 = cunumeric::bincount(A4, w5); + check_array_eq(B5, exp5.data(), exp5.size()); + + // case: x of length 1, w of length 1, min_length=8 + std::array exp6 = {0, 0, 0, 0, 0, 1.3, 0, 0}; + auto B6 = cunumeric::bincount(A4, w5, 8); + check_array_eq(B6, exp6.data(), exp6.size()); +} + +void bincount_negative_test() +{ + // case: x.size() == 0 + auto A1 = cunumeric::full({0}, cunumeric::Scalar(5)); + EXPECT_THROW(cunumeric::bincount(A1), std::invalid_argument); + + // case: x.dim() != 1 + auto A2 = cunumeric::full({1, 1}, cunumeric::Scalar(5)); + EXPECT_THROW(cunumeric::bincount(A2), std::invalid_argument); + + // case: x.type() is not int + auto A3 = cunumeric::full({3}, cunumeric::Scalar(1.3)); + EXPECT_THROW(cunumeric::bincount(A3), std::invalid_argument); + + // case: x.shape() != w.shape() + auto A4 = cunumeric::zeros({6}, legate::int32()); + auto w4 = cunumeric::zeros({4}, legate::int32()); + EXPECT_THROW(cunumeric::bincount(A4, w4), std::invalid_argument); + + // case: w.type() is not convertible to float64 + auto w5 = cunumeric::zeros({6}, legate::complex64()); + EXPECT_THROW(cunumeric::bincount(A4, w5), std::invalid_argument); + + // case: x is negative + std::array in_x = {1, 2, -3, 4, 5, 6}; + auto A7 = cunumeric::zeros({6}, legate::int32()); + assign_values_to_array(A7, in_x.data(), in_x.size()); + EXPECT_THROW(cunumeric::bincount(A7), std::invalid_argument); +} + +// void cpp_test() +TEST(Bincount, Normal) { bincount_test(); } + +TEST(Bincount, Negative) { bincount_negative_test(); } diff --git a/tests/cpp/integration/test_convolve.cc b/tests/cpp/integration/test_convolve.cc new file mode 100644 index 000000000..8aa374480 --- /dev/null +++ b/tests/cpp/integration/test_convolve.cc @@ -0,0 +1,135 @@ +/* Copyright 2024 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +#include "common_utils.h" +#include + +using namespace cunumeric; + +namespace { + +TEST(Convolve, test_dtype) +{ + auto x = mk_array({1, 2, 3}); + auto y = mk_array({0, 1, 0.5}); + auto out1 = convolve(x, y); + auto out2 = convolve(y, x); + debug_array(out1); + debug_array(out2); + // out1 = [1, 2, 3], out2 = [1, 2.5, 4] + // It is a bug. + // It violates the "NumPy type promotion rules". +} + +TEST(Convolve, test_empty) +{ + auto a = mk_array({}, {0}); + auto v = mk_array({}, {0}); + debug_array(a); + debug_array(v); + // An exception should be thrown, but it doesn't. + auto out = convolve(a, v); + debug_array(out); +} + +TEST(Convolve, test_diff_dims) +{ + auto a = zeros({5, 5, 5}); + auto v = zeros({5, 5}); + EXPECT_ANY_THROW(convolve(a, v)); +} + +std::vector, + std::vector, + std::vector, + std::vector, + std::vector>> + test_data{ + {{0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 1, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0}, + {1, 1, 0, 1, 0, 0, 1}, + {0, 0, 1, 2, 1, 1, 1, 0, 2, 3, 2, 2, 1, 2, 2, 1, 2, 0, 0, 2, 1, 0, 2, 1, 0, 2, 0, 0, 1, 0}, + {30}, + {7}}, + {{0, 1, 0, 0, 1, 1, 1, 1, 0, 1, 1, 0, 1, 0, 1, 0, 1, 0, 1, 1, 1, 1, 0, 1, 1, + 1, 1, 0, 0, 0, 0, 1, 1, 0, 1, 1, 0, 1, 0, 1, 1, 0, 1, 0, 1, 1, 1, 1, 0, 1, + 0, 0, 1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, + 1, 0, 1, 0, 1, 1, 1, 0, 1, 1, 0, 0, 0, 1, 1, 0, 1, 0, 0, 1, 1, 1, 0, 0, 0}, + {1, 1, 1, 1, 0, 1, 0, 1, 0, 0, 1, 0, 0, 0, 1}, + {2, 3, 3, 3, 4, 4, 3, 5, 2, 3, 4, 3, 6, 5, 7, 4, 5, 4, 2, 2, 4, 4, 6, 5, 6, + 4, 5, 3, 3, 1, 3, 4, 6, 6, 5, 7, 4, 6, 3, 2, 4, 1, 4, 5, 4, 5, 5, 6, 2, 3, + 3, 1, 4, 3, 3, 4, 3, 3, 2, 1, 4, 5, 3, 5, 3, 4, 3, 3, 2, 2, 4, 5, 4, 6, 2, + 3, 1, 5, 2, 3, 3, 4, 4, 5, 5, 5, 3, 4, 1, 2, 0, 2, 3, 2, 2, 2, 3, 1, 0, 0}, + {10, 10}, + {3, 5}}, + {{1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 1, 0, 1, 0, 0, 0, 0, 1, + 0, 1, 0, 1, 1, 1, 0, 1, 1, 1, 0, 0, 1, 0, 0, 1, 1, 1, 0, 0, 0, 1, 0, 0, 0, + 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 1, + 0, 1, 0, 1, 1, 1, 0, 1, 0, 1, 0, 0, 1, 1, 0, 0, 0, 1, 1, 0, 0, 0, 1, 0, 1, + 0, 0, 1, 0, 1, 0, 1, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 1, 0, 1, 1, 1, 1, 1, 0}, + {1, 1, 0, 0, 0, 1, 0, 1, 1, 1, 0, 0, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 1, 0}, + {2, 1, 3, 3, 2, 1, 4, 2, 3, 4, 5, 6, 3, 4, 2, 3, 4, 3, 4, 1, 1, 2, 3, 2, 1, + 1, 3, 3, 5, 2, 5, 7, 6, 5, 2, 5, 7, 4, 4, 4, 4, 6, 6, 5, 3, 2, 3, 2, 2, 1, + 4, 3, 6, 5, 4, 1, 7, 7, 7, 3, 6, 8, 8, 6, 3, 3, 5, 4, 5, 3, 2, 2, 3, 4, 2, + 2, 3, 2, 5, 2, 4, 5, 9, 4, 3, 2, 7, 7, 4, 1, 4, 7, 7, 8, 1, 0, 4, 3, 4, 3, + 3, 3, 3, 4, 2, 1, 5, 2, 4, 2, 2, 4, 5, 3, 2, 1, 4, 5, 4, 2, 2, 3, 3, 3, 0}, + {5, 5, 5}, + {3, 3, 3}}}; + +TEST(Convolve, test_int) +{ + for (auto [a_in, v_in, out_gt, shape_a, shape_v] : test_data) { + auto a = mk_array(a_in, shape_a); + auto v = mk_array(v_in, shape_v); + auto out = convolve(a, v); + check_array(out, out_gt, shape_a); + debug_array(out, false); + } +} + +TEST(Convolve, test_double) +{ + for (auto [a_in, v_in, out_gt, shape_a, shape_v] : test_data) { + auto a = mk_array(as_type_vector(a_in), shape_a); + auto v = mk_array(as_type_vector(v_in), shape_v); + auto out = convolve(a, v); + check_array(out, as_type_vector(out_gt), shape_a); + debug_array(out, false); + } +} + +TEST(Convolve, test_ndim) +{ + std::vector shape; + std::vector filter_shape; + for (int32_t ndim = 1; ndim <= LEGATE_MAX_DIM; ++ndim) { + shape.emplace_back(5); + filter_shape.emplace_back(3); + auto a_in = mk_seq_vector(shape); + auto v_in = mk_seq_vector(filter_shape, 0, 0); + v_in[v_in.size() / 2] = 1; + auto a = mk_array(a_in, shape); + auto v = mk_array(v_in, filter_shape); + if (ndim <= 3) { + auto out = convolve(a, v); + check_array(out, a_in, shape); + debug_array(out, false); + } else { + EXPECT_ANY_THROW(convolve(a, v)); + } + } +} + +} // namespace diff --git a/tests/cpp/integration/test_diagonal.cc b/tests/cpp/integration/test_diagonal.cc new file mode 100644 index 000000000..09232fcc4 --- /dev/null +++ b/tests/cpp/integration/test_diagonal.cc @@ -0,0 +1,311 @@ +/* Copyright 2024 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +#include +#include "cunumeric.h" +#include "util.inl" + +template +void diagonal_test(std::array input, + std::array exp, + std::vector in_shape, + int32_t offset = 0, + int32_t axis1 = 0, + int32_t axis2 = 1, + bool extract = true) +{ + auto a_input = cunumeric::zeros(in_shape); + assign_values_to_array(a_input, input.data(), input.size()); + auto a_output = cunumeric::diagonal(a_input, offset, axis1, axis2, extract); + check_array_eq(a_output, exp.data(), exp.size()); +} + +TEST(Diagonal, Singleton) +{ + const size_t in_size = 6; + const size_t in_dim = 1; + const size_t exp_size = 36; + const size_t exp_dim = 2; + std::array input = {1.3, 2, 3.6, 4, 5, 6}; + std::array exp = {1.3, 0., 0., 0., 0., 0., 0., 2., 0., 0., 0., 0., + 0., 0., 3.6, 0., 0., 0., 0., 0., 0., 4., 0., 0., + 0., 0., 0., 0., 5., 0., 0., 0., 0., 0., 0., 6.}; + std::vector in_shape = {6}; + + auto a_input = cunumeric::zeros(in_shape); + assign_values_to_array(a_input, input.data(), input.size()); + auto a_output = cunumeric::diagonal(a_input, 0, std::nullopt, std::nullopt, false); + check_array_eq(a_output, exp.data(), exp.size()); +} + +TEST(Diagonal, SingletonExtract) +{ + std::vector in_shape = {6}; + + auto a_input = cunumeric::zeros(in_shape); + EXPECT_THROW(cunumeric::diagonal(a_input, 0, std::nullopt, std::nullopt, true), + std::invalid_argument); +} + +TEST(Diagonal, SingletonAxes) +{ + std::vector in_shape = {6}; + + auto a_input = cunumeric::zeros(in_shape); + EXPECT_THROW(cunumeric::diagonal(a_input, 0, 0, 1, false), std::invalid_argument); +} + +TEST(Diagonal, Defaults) +{ + const size_t in_size = 9; + const size_t in_dim = 2; + const size_t exp_size = 3; + const size_t exp_dim = 1; + std::array input = {9, 7, 0.5, 1.3, 2, 3.6, 4, 5, 6}; + std::array exp = {9, 2, 6}; + std::vector in_shape = {3, 3}; + + auto a_input = cunumeric::zeros(in_shape); + assign_values_to_array(a_input, input.data(), input.size()); + auto a_output = cunumeric::diagonal(a_input); + check_array_eq(a_output, exp.data(), exp.size()); +} + +TEST(Diagonal, EmptyArray) +{ + const size_t exp_size = 0; + const size_t exp_dim = 2; + std::array exp = {}; + + auto a_input = cunumeric::array({0}, legate::int32()); + auto a_output = cunumeric::diagonal(a_input, 0, std::nullopt, std::nullopt, false); + check_array_eq(a_output, exp.data(), exp.size()); +} + +TEST(Diagonal, Simple) +{ + const size_t in_size = 9; + const size_t in_dim = 2; + const size_t exp_size = 3; + const size_t exp_dim = 1; + std::array input = {9, 7, 0.5, 1.3, 2, 3.6, 4, 5, 6}; + std::array exp = {9, 2, 6}; + std::vector in_shape = {3, 3}; + + diagonal_test(input, exp, in_shape); +} + +TEST(Diagonal, Offset) +{ + const size_t in_size = 9; + const size_t in_dim = 3; + const size_t exp_size = 1; + const size_t exp_dim = 2; + std::array input = {9, 7, 0.5, 1.3, 2, 3.6, 4, 5, 6}; + std::array exp = {0.5}; + std::vector in_shape = {3, 3, 1}; + + diagonal_test(input, exp, in_shape, 2); +} + +TEST(Diagonal, Axes) +{ + const size_t in_size = 6; + const size_t in_dim = 2; + const size_t exp_size = 2; + const size_t exp_dim = 1; + std::array input = {1.3, 2, 3.6, 4, 5, 6}; + std::array exp = {1.3, 5}; + std::vector in_shape = {2, 3}; + + diagonal_test(input, exp, in_shape, 0, 1, 0); +} + +TEST(Diagonal, InvalidAxes) +{ + const size_t in_size = 6; + const size_t in_dim = 2; + std::array input = {1.3, 2, 3.6, 4, 5, 6}; + std::vector in_shape = {2, 3}; + + auto a_input = cunumeric::zeros(in_shape); + assign_values_to_array(a_input, input.data(), input.size()); + EXPECT_THROW(cunumeric::diagonal(a_input, 0, 2, 6, true), std::invalid_argument); + EXPECT_THROW(cunumeric::diagonal(a_input, 0, 1, 1, true), std::invalid_argument); +} + +TEST(Diagonal, InvalidOffset) +{ + const size_t in_size = 6; + const size_t in_dim = 2; + std::array input = {1.3, 2, 3.6, 4, 5, 6}; + std::vector in_shape = {2, 3}; + + auto a_input = cunumeric::zeros(in_shape); + assign_values_to_array(a_input, input.data(), input.size()); + EXPECT_THROW(cunumeric::diagonal(a_input, 3), std::invalid_argument); +} + +TEST(Diagonal, IntArray) +{ + const size_t in_size = 6; + const size_t in_dim = 2; + const size_t exp_size = 2; + const size_t exp_dim = 1; + std::array input = {1, 2, 3, 4, 5, 6}; + std::array exp = {1, 5}; + std::vector in_shape = {2, 3}; + + auto a_input = cunumeric::zeros(in_shape, legate::int32()); + assign_values_to_array(a_input, input.data(), input.size()); + auto a_output = cunumeric::diagonal(a_input); + check_array_eq(a_output, exp.data(), exp.size()); +} + +TEST(Diagonal, MaxDim) +{ + // Only test int type for max dim + const size_t in_size = 16; + std::array input = {14, 10, 3, 12, 5, 13, 2, 4, 16, 8, 9, 7, 6, 11, 1, 15}; +#if LEGATE_MAX_DIM >= 4 + diagonal_test(input, {14, 6, 10, 11, 3, 1, 12, 15}, {2, 2, 1, 4}); +#endif + +#if LEGATE_MAX_DIM >= 5 + diagonal_test(input, {14, 10, 3, 12, 5, 13, 2, 4}, {1, 2, 2, 1, 4}); +#endif + +#if LEGATE_MAX_DIM >= 6 + diagonal_test(input, {14, 10, 3, 12, 5, 13, 2, 4}, {2, 1, 1, 2, 2, 2}); +#endif + +#if LEGATE_MAX_DIM >= 7 + diagonal_test(input, {14, 6, 10, 11, 3, 1, 12, 15}, {2, 2, 1, 1, 2, 1, 2}); +#endif +} + +template +void trace_test(std::array input, + std::array exp, + std::vector in_shape, + int32_t offset = 0, + int32_t axis1 = 0, + int32_t axis2 = 1, + std::optional type = std::nullopt, + std::optional out = std::nullopt) +{ + auto a_input = cunumeric::zeros(in_shape); + assign_values_to_array(a_input, input.data(), input.size()); + auto a_output = cunumeric::trace(a_input, offset, axis1, axis2, type, out); + check_array_eq(a_output, exp.data(), exp.size()); +} + +TEST(Trace, Simple) +{ + const size_t in_size = 8; + const size_t in_dim = 3; + const size_t exp_size = 4; + const size_t exp_dim = 1; + std::array input = {9, 7, 0.5, 1.3, 2, 3.6, 4, 5}; + std::array exp = {9, 7, 0.5, 1.3}; + std::vector in_shape = {2, 1, 4}; + trace_test(input, exp, in_shape); +} + +TEST(Trace, Offset) +{ + const size_t in_size = 8; + const size_t in_dim = 3; + const size_t exp_size = 1; + const size_t exp_dim = 1; + std::array input = {9, 7, 0.5, 1.3, 2, 3.6, 4, 5}; + std::array exp = {5.5}; + std::vector in_shape = {2, 4, 1}; + trace_test(input, exp, in_shape, 2); +} + +TEST(Trace, Axes) +{ + const size_t in_size = 8; + const size_t in_dim = 3; + const size_t exp_size = 2; + const size_t exp_dim = 1; + std::array input = {9, 7, 0.5, 1.3, 2, 3.6, 4, 5}; + std::array exp = {9, 2}; + std::vector in_shape = {2, 4, 1}; + trace_test(input, exp, in_shape, 0, 2, 1); +} + +TEST(Trace, IntArray) +{ + const size_t in_size = 8; + const size_t in_dim = 3; + const size_t exp_size = 1; + const size_t exp_dim = 1; + std::array input = {9, 7, 5, 3, 2, 6, 4, 1}; + std::array exp = {15}; + std::vector in_shape = {2, 4, 1}; + auto a_input = cunumeric::zeros(in_shape, legate::int32()); + assign_values_to_array(a_input, input.data(), input.size()); + auto a_output = cunumeric::trace(a_input, 0, 0, 1); + check_array_eq(a_output, exp.data(), exp.size()); +} + +TEST(Trace, TypeInt) +{ + const size_t in_size = 8; + const size_t in_dim = 3; + const size_t exp_size = 1; + const size_t exp_dim = 1; + std::array input = {9, 7, 0.5, 1.3, 2, 3.6, 4, 5}; + std::array exp = {12}; + std::vector in_shape = {2, 4, 1}; + + auto a_input = cunumeric::zeros(in_shape); + assign_values_to_array(a_input, input.data(), input.size()); + auto a_output = cunumeric::trace(a_input, 0, 0, 1, legate::int32()); + check_array_eq(a_output, exp.data(), exp.size()); +} + +TEST(Trace, OutType) +{ + const size_t in_size = 8; + const size_t in_dim = 3; + const size_t exp_size = 1; + const size_t exp_dim = 1; + std::array input = {9, 7, 0.5, 1.3, 2, 3.6, 4, 5}; + std::array exp = {12}; + std::vector in_shape = {2, 4, 1}; + std::vector out_shape = {1}; + + auto a_input = cunumeric::zeros(in_shape); + auto a_output = cunumeric::zeros(out_shape, legate::int32()); + assign_values_to_array(a_input, input.data(), input.size()); + cunumeric::trace(a_input, 0, 0, 1, std::nullopt, a_output); + check_array_eq(a_output, exp.data(), exp.size()); +} + +TEST(Trace, InvalidArray) +{ + const size_t in_size = 8; + const size_t in_dim = 1; + std::array input = {9, 7, 0.5, 1.3, 2, 3.6, 4, 5}; + std::vector in_shape = {8}; + + auto a_input = cunumeric::zeros(in_shape); + assign_values_to_array(a_input, input.data(), input.size()); + EXPECT_THROW(cunumeric::trace(a_input), std::invalid_argument); +} diff --git a/tests/cpp/integration/test_eye.cc b/tests/cpp/integration/test_eye.cc new file mode 100644 index 000000000..438c4ae42 --- /dev/null +++ b/tests/cpp/integration/test_eye.cc @@ -0,0 +1,296 @@ +/* Copyright 2024 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +#include +#include +#include + +#include +#include "legate.h" +#include "cunumeric.h" +#include "util.inl" + +template +auto get_eye_expect_result_3_2() +{ + std::map> expect_result = {{-30, {0, 0, 0, 0, 0, 0}}, + {-3, {0, 0, 0, 0, 0, 0}}, + {-2, {0, 0, 0, 0, 1, 0}}, + {-1, {0, 0, 1, 0, 0, 1}}, + {0, {1, 0, 0, 1, 0, 0}}, + {1, {0, 1, 0, 0, 0, 0}}, + {2, {0, 0, 0, 0, 0, 0}}, + {3, {0, 0, 0, 0, 0, 0}}, + {30, {0, 0, 0, 0, 0, 0}}}; + return expect_result; +} + +template +auto get_eye_expect_result_3_3() +{ + std::map> expect_result = {{-30, {0, 0, 0, 0, 0, 0, 0, 0, 0}}, + {-3, {0, 0, 0, 0, 0, 0, 0, 0, 0}}, + {-2, {0, 0, 0, 0, 0, 0, 1, 0, 0}}, + {-1, {0, 0, 0, 1, 0, 0, 0, 1, 0}}, + {0, {1, 0, 0, 0, 1, 0, 0, 0, 1}}, + {1, {0, 1, 0, 0, 0, 1, 0, 0, 0}}, + {2, {0, 0, 1, 0, 0, 0, 0, 0, 0}}, + {3, {0, 0, 0, 0, 0, 0, 0, 0, 0}}, + {30, {0, 0, 0, 0, 0, 0, 0, 0, 0}}}; + return expect_result; +} + +template +auto get_eye_expect_result_3_4() +{ + std::map> expect_result = { + {-30, {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}}, + {-3, {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}}, + {-2, {0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0}}, + {-1, {0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0}}, + {0, {1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0}}, + {1, {0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1}}, + {2, {0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0}}, + {3, {0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0}}, + {30, {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}}, + }; + return expect_result; +} + +template +auto test_eye_3_2(std::vector& k_vals, std::optional type = std::nullopt) +{ + auto expect_result = get_eye_expect_result_3_2(); + std::vector expect_shape = {3, 2}; + for (auto k : k_vals) { + if (type.has_value()) { + auto result = cunumeric::eye(3, 2, k, type.value()); + EXPECT_EQ(result.type(), type.value()); + EXPECT_EQ(result.shape(), expect_shape); + auto expect = expect_result[k]; + check_array_eq(result, expect.data(), expect.size()); + } else { + auto result = cunumeric::eye(3, 2, k); + EXPECT_EQ(result.type(), legate::float64()); + EXPECT_EQ(result.shape(), expect_shape); + auto expect = expect_result[k]; + check_array_eq(result, expect.data(), expect.size()); + } + } +} + +template +auto test_eye_3_3(std::vector& k_vals, std::optional type = std::nullopt) +{ + auto expect_result = get_eye_expect_result_3_3(); + std::vector expect_shape = {3, 3}; + for (auto k : k_vals) { + if (type.has_value()) { + auto result = cunumeric::eye(3, 3, k, type.value()); + EXPECT_EQ(result.type(), type.value()); + EXPECT_EQ(result.shape(), expect_shape); + auto expect = expect_result[k]; + check_array_eq(result, expect.data(), expect.size()); + } else { + auto result = cunumeric::eye(3, 3, k); + EXPECT_EQ(result.type(), legate::float64()); + EXPECT_EQ(result.shape(), expect_shape); + auto expect = expect_result[k]; + check_array_eq(result, expect.data(), expect.size()); + } + } +} + +template +auto test_eye_3_4(std::vector& k_vals, std::optional type = std::nullopt) +{ + auto expect_result = get_eye_expect_result_3_4(); + std::vector expect_shape = {3, 4}; + for (auto k : k_vals) { + if (type.has_value()) { + auto result = cunumeric::eye(3, 4, k, type.value()); + EXPECT_EQ(result.type(), type.value()); + EXPECT_EQ(result.shape(), expect_shape); + auto expect = expect_result[k]; + check_array_eq(result, expect.data(), expect.size()); + } else { + auto result = cunumeric::eye(3, 4, k); + EXPECT_EQ(result.type(), legate::float64()); + EXPECT_EQ(result.shape(), expect_shape); + auto expect = expect_result[k]; + check_array_eq(result, expect.data(), expect.size()); + } + } +} + +template +auto test_eye_square_3(std::optional> k_vals = std::nullopt, + std::optional type = std::nullopt) +{ + auto expect_result = get_eye_expect_result_3_3(); + std::vector expect_shape = {3, 3}; + if (k_vals.has_value()) { + for (auto k : k_vals.value()) { + if (type.has_value()) { + auto result = cunumeric::eye(3, std::nullopt, k, type.value()); + EXPECT_EQ(result.type(), type.value()); + EXPECT_EQ(result.shape(), expect_shape); + auto expect = expect_result[k]; + check_array_eq(result, expect.data(), expect.size()); + } else { + auto result = cunumeric::eye(3, std::nullopt, k); + EXPECT_EQ(result.type(), legate::float64()); + auto expect = expect_result[k]; + check_array_eq(result, expect.data(), expect.size()); + } + } + } else { + if (type.has_value()) { + auto result = cunumeric::eye(3, std::nullopt, 0, type.value()); + EXPECT_EQ(result.type(), type.value()); + EXPECT_EQ(result.shape(), expect_shape); + auto expect = expect_result[0]; + check_array_eq(result, expect.data(), expect.size()); + } else { + auto result = cunumeric::eye(3); + EXPECT_EQ(result.type(), legate::float64()); + EXPECT_EQ(result.shape(), expect_shape); + auto expect = expect_result[0]; + check_array_eq(result, expect.data(), expect.size()); + } + } +} + +void eye_basic() +{ + std::vector k_vals = {-30, -3, -2, -1, 0, 1, 2, 3, 30}; + + // Test default data type + test_eye_3_2(k_vals); + test_eye_3_3(k_vals); + test_eye_3_4(k_vals); + + // Test int type + test_eye_3_2(k_vals, legate::int32()); + test_eye_3_3(k_vals, legate::int32()); + test_eye_3_4(k_vals, legate::int32()); + + // Test complex type + test_eye_3_2>(k_vals, legate::complex64()); + test_eye_3_3>(k_vals, legate::complex64()); + test_eye_3_4>(k_vals, legate::complex64()); +} + +void eye_square() +{ + std::vector k_vals = {-30, -3, -2, -1, 0, 1, 2, 3, 30}; + + // Test default parameter + test_eye_square_3(); + + // Test with k input + test_eye_square_3(k_vals); + + // Test with datatype input + test_eye_square_3(std::nullopt, legate::int32()); + + // Test with k and datatype input + test_eye_square_3>(k_vals, legate::complex64()); +} + +void eye_input_zero() +{ + // Test n=0 + auto result1 = cunumeric::eye(0); + std::vector expect_shape1 = {0, 0}; + EXPECT_EQ(result1.type(), legate::float64()); + EXPECT_EQ(result1.size(), 0); + EXPECT_EQ(result1.shape(), expect_shape1); + + // Test m=0 + auto result2 = cunumeric::eye(3, 0); + std::vector expect_shape2 = {3, 0}; + EXPECT_EQ(result2.type(), legate::float64()); + EXPECT_EQ(result2.size(), 0); + EXPECT_EQ(result2.shape(), expect_shape2); +} + +void eye_large_array() +{ + const size_t n_or_m = 1000; + + // Test 1000 * 1000 array + auto result1 = cunumeric::eye(n_or_m); + std::vector expect_shape1 = {n_or_m, n_or_m}; + std::array expect_result1; + expect_result1.fill(0); + for (size_t i = 0; i < n_or_m; i++) { + expect_result1[i * n_or_m + i] = 1; + } + EXPECT_EQ(result1.type(), legate::float64()); + EXPECT_EQ(result1.shape(), expect_shape1); + check_array_eq(result1, expect_result1.data(), expect_result1.size()); + + // Test 3 * 1000 array + const size_t n = 3; + auto result2 = cunumeric::eye(n, n_or_m, 0, legate::int32()); + std::vector expect_shape2 = {n, n_or_m}; + std::array expect_result2; + expect_result2.fill(0); + for (size_t i = 0; i < n; i++) { + expect_result2[i * n_or_m + i] = 1; + } + EXPECT_EQ(result2.type(), legate::int32()); + EXPECT_EQ(result2.shape(), expect_shape2); + check_array_eq(result2, expect_result2.data(), expect_result2.size()); + + // Test 1000 * 3 array + const size_t m = 3; + auto result3 = cunumeric::eye(n_or_m, m, 0, legate::complex64()); + std::vector expect_shape3 = {n_or_m, m}; + std::array, n_or_m * m> expect_result3; + expect_result3.fill(0); + for (size_t i = 0; i < n_or_m; i++) { + if (i < m) { + expect_result3[i * m + i] = 1; + } + } + EXPECT_EQ(result3.type(), legate::complex64()); + EXPECT_EQ(result3.shape(), expect_shape3); + check_array_eq, 2>(result3, expect_result3.data(), expect_result3.size()); +} + +void eye_negative() +{ + // Test bad n + EXPECT_THROW(cunumeric::eye(-1), std::invalid_argument); + EXPECT_THROW(cunumeric::eye(-1, 3), std::invalid_argument); + + // Test bad m + EXPECT_THROW(cunumeric::eye(3, -1), std::invalid_argument); + EXPECT_THROW(cunumeric::eye(-1, -1), std::invalid_argument); + + // Test bad dtype + EXPECT_THROW(cunumeric::eye(3, std::nullopt, 0, legate::binary_type(2)), std::invalid_argument); + EXPECT_THROW(cunumeric::eye(3, std::nullopt, 0, legate::point_type(2)), std::invalid_argument); +} + +// void cpp_test() +TEST(Eye, Basic) { eye_basic(); } +TEST(Eye, Square) { eye_square(); } +TEST(Eye, InputZero) { eye_input_zero(); } +TEST(Eye, LargeArray) { eye_large_array(); } +TEST(Eye, Negative) { eye_negative(); } diff --git a/tests/cpp/integration/test_fill.cc b/tests/cpp/integration/test_fill.cc new file mode 100644 index 000000000..fdbc1500d --- /dev/null +++ b/tests/cpp/integration/test_fill.cc @@ -0,0 +1,91 @@ +/* Copyright 2024 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +#include "common_utils.h" +#include +#include + +using namespace cunumeric; + +namespace { + +TEST(Fill, test_fill_empty_array) +{ + auto x = mk_array({}, {0}); + x.fill(Scalar(int32_t(1))); + check_array(x, {}, {0}); +} + +TEST(Fill, test_fill_float_with_nan) +{ + auto x = zeros({6}, legate::float32()); + float val_nan = std::numeric_limits::quiet_NaN(); + x.fill(Scalar(val_nan)); + auto accessor = x.get_read_accessor(); + for (size_t i = 0; i < x.size(); ++i) { + ASSERT_TRUE(std::isnan(accessor[i])); + } +} + +TEST(Fill, test_fill_inf_to_float) +{ + float val_inf = std::numeric_limits::infinity(); + std::vector INF_VALUES = {val_inf, -val_inf}; + for (auto value : INF_VALUES) { + auto x = zeros({6}, legate::float32()); + std::vector x_gt(6, value); + x.fill(Scalar(value)); + check_array(x, x_gt); + } +} + +TEST(Fill, test_fill_float_to_float) +{ + std::vector FLOAT_FILL_VALUES{-2.4e120, -1.3, 8.9e-130, 0.0, 5.7e-150, 0.6, 3.7e160}; + for (auto value : FLOAT_FILL_VALUES) { + auto x = zeros({6}, legate::float64()); + std::vector x_gt(6, value); + x.fill(Scalar(value)); + check_array(x, x_gt); + } +} + +TEST(Fill, test_fill_ndim) +{ + std::vector shape; + for (int32_t ndim = 1; ndim <= LEGATE_MAX_DIM; ++ndim) { + shape.push_back(ndim); + int32_t value = ndim * 10; + auto x = zeros(shape, legate::int32()); + auto x_gt = mk_seq_vector(shape, 0, value); + x.fill(Scalar(value)); + check_array(x, x_gt, shape); + } +} + +TEST(Fill, test_full_ndim) +{ + std::vector shape; + for (int32_t ndim = 1; ndim <= LEGATE_MAX_DIM; ++ndim) { + shape.push_back(ndim); + int32_t value = ndim * 10; + auto x = full(shape, Scalar(value)); + auto x_gt = mk_seq_vector(shape, 0, value); + check_array(x, x_gt, shape); + } +} + +} // namespace diff --git a/tests/cpp/integration/test_flip.cc b/tests/cpp/integration/test_flip.cc new file mode 100644 index 000000000..ff844fdc3 --- /dev/null +++ b/tests/cpp/integration/test_flip.cc @@ -0,0 +1,666 @@ +/* Copyright 2024 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +#include +#include +#include + +#include +#include "legate.h" +#include "cunumeric.h" +#include "util.inl" + +auto get_flip_expect_result_int() +{ + std::vector>> expect_result = { + {{0, {1, 11, 6, 7, 9, 8, 4, 2, 5, 12, 3, 10}}}, + {{0, {10, 3, 12, 5, 2, 4, 8, 9, 7, 6, 11, 1}}, {1, {1, 11, 6, 7, 9, 8, 4, 2, 5, 12, 3, 10}}}, + {{0, {1, 11, 6, 7, 9, 8, 4, 2, 5, 12, 3, 10}}, {1, {10, 3, 12, 5, 2, 4, 8, 9, 7, 6, 11, 1}}}, + {{0, {7, 6, 11, 1, 2, 4, 8, 9, 10, 3, 12, 5}}, {1, {5, 12, 3, 10, 9, 8, 4, 2, 1, 11, 6, 7}}}, + {{0, {1, 11, 6, 7, 9, 8, 4, 2, 5, 12, 3, 10}}, + {1, {10, 3, 12, 5, 2, 4, 8, 9, 7, 6, 11, 1}}, + {2, {10, 3, 12, 5, 2, 4, 8, 9, 7, 6, 11, 1}}}, + {{0, {10, 3, 12, 5, 2, 4, 8, 9, 7, 6, 11, 1}}, + {1, {1, 11, 6, 7, 9, 8, 4, 2, 5, 12, 3, 10}}, + {2, {10, 3, 12, 5, 2, 4, 8, 9, 7, 6, 11, 1}}}, + {{0, {10, 3, 12, 5, 2, 4, 8, 9, 7, 6, 11, 1}}, + {1, {10, 3, 12, 5, 2, 4, 8, 9, 7, 6, 11, 1}}, + {2, {1, 11, 6, 7, 9, 8, 4, 2, 5, 12, 3, 10}}}, + {{0, {8, 9, 7, 6, 11, 1, 10, 3, 12, 5, 2, 4}}, + {1, {5, 2, 4, 10, 3, 12, 6, 11, 1, 8, 9, 7}}, + {2, {12, 3, 10, 4, 2, 5, 7, 9, 8, 1, 11, 6}}}}; + return expect_result; +} + +auto get_flip_expect_result_double() +{ + std::vector>> expect_result = { + {{0, {4, 9, 12, 7.9, 11, 8, 10.5, 2.2, 5.98, 6, 3.66, 1.5}}}, + {{0, {1.5, 3.66, 6, 5.98, 2.2, 10.5, 8, 11, 7.9, 12, 9, 4}}, + {1, {4, 9, 12, 7.9, 11, 8, 10.5, 2.2, 5.98, 6, 3.66, 1.5}}}, + {{0, {4, 9, 12, 7.9, 11, 8, 10.5, 2.2, 5.98, 6, 3.66, 1.5}}, + {1, {1.5, 3.66, 6, 5.98, 2.2, 10.5, 8, 11, 7.9, 12, 9, 4}}}, + {{0, {7.9, 12, 9, 4, 2.2, 10.5, 8, 11, 1.5, 3.66, 6, 5.98}}, + {1, {5.98, 6, 3.66, 1.5, 11, 8, 10.5, 2.2, 4, 9, 12, 7.9}}}, + {{0, {4, 9, 12, 7.9, 11, 8, 10.5, 2.2, 5.98, 6, 3.66, 1.5}}, + {1, {1.5, 3.66, 6, 5.98, 2.2, 10.5, 8, 11, 7.9, 12, 9, 4}}, + {2, {1.5, 3.66, 6, 5.98, 2.2, 10.5, 8, 11, 7.9, 12, 9, 4}}}, + {{0, {1.5, 3.66, 6, 5.98, 2.2, 10.5, 8, 11, 7.9, 12, 9, 4}}, + {1, {4, 9, 12, 7.9, 11, 8, 10.5, 2.2, 5.98, 6, 3.66, 1.5}}, + {2, {1.5, 3.66, 6, 5.98, 2.2, 10.5, 8, 11, 7.9, 12, 9, 4}}}, + {{0, {1.5, 3.66, 6, 5.98, 2.2, 10.5, 8, 11, 7.9, 12, 9, 4}}, + {1, {1.5, 3.66, 6, 5.98, 2.2, 10.5, 8, 11, 7.9, 12, 9, 4}}, + {2, {4, 9, 12, 7.9, 11, 8, 10.5, 2.2, 5.98, 6, 3.66, 1.5}}}, + {{0, {8, 11, 7.9, 12, 9, 4, 1.5, 3.66, 6, 5.98, 2.2, 10.5}}, + {1, {5.98, 2.2, 10.5, 1.5, 3.66, 6, 12, 9, 4, 8, 11, 7.9}}, + {2, {6, 3.66, 1.5, 10.5, 2.2, 5.98, 7.9, 11, 8, 4, 9, 12}}}}; + return expect_result; +} + +auto get_flip_expect_result_complex() +{ + std::vector, 12>>> expect_result = { + {{0, + {complex(6, 4), + complex(7.9, 12), + complex(8, 11), + complex(2.2, 10.5), + complex(6, 5.98), + complex(1.5, 3.66), + complex(11, 1), + complex(7, 6), + complex(8, 9), + complex(2, 4), + complex(12, 5), + complex(10, 3)}}}, + {{0, + {complex(10, 3), + complex(12, 5), + complex(2, 4), + complex(8, 9), + complex(7, 6), + complex(11, 1), + complex(1.5, 3.66), + complex(6, 5.98), + complex(2.2, 10.5), + complex(8, 11), + complex(7.9, 12), + complex(6, 4)}}, + {1, + {complex(6, 4), + complex(7.9, 12), + complex(8, 11), + complex(2.2, 10.5), + complex(6, 5.98), + complex(1.5, 3.66), + complex(11, 1), + complex(7, 6), + complex(8, 9), + complex(2, 4), + complex(12, 5), + complex(10, 3)}}}, + {{0, + {complex(6, 4), + complex(7.9, 12), + complex(8, 11), + complex(2.2, 10.5), + complex(6, 5.98), + complex(1.5, 3.66), + complex(11, 1), + complex(7, 6), + complex(8, 9), + complex(2, 4), + complex(12, 5), + complex(10, 3)}}, + {1, + {complex(10, 3), + complex(12, 5), + complex(2, 4), + complex(8, 9), + complex(7, 6), + complex(11, 1), + complex(1.5, 3.66), + complex(6, 5.98), + complex(2.2, 10.5), + complex(8, 11), + complex(7.9, 12), + complex(6, 4)}}}, + {{0, + {complex(2.2, 10.5), + complex(8, 11), + complex(7.9, 12), + complex(6, 4), + complex(7, 6), + complex(11, 1), + complex(1.5, 3.66), + complex(6, 5.98), + complex(10, 3), + complex(12, 5), + complex(2, 4), + complex(8, 9)}}, + {1, + {complex(8, 9), + complex(2, 4), + complex(12, 5), + complex(10, 3), + complex(6, 5.98), + complex(1.5, 3.66), + complex(11, 1), + complex(7, 6), + complex(6, 4), + complex(7.9, 12), + complex(8, 11), + complex(2.2, 10.5)}}}, + {{0, + {complex(6, 4), + complex(7.9, 12), + complex(8, 11), + complex(2.2, 10.5), + complex(6, 5.98), + complex(1.5, 3.66), + complex(11, 1), + complex(7, 6), + complex(8, 9), + complex(2, 4), + complex(12, 5), + complex(10, 3)}}, + {1, + {complex(10, 3), + complex(12, 5), + complex(2, 4), + complex(8, 9), + complex(7, 6), + complex(11, 1), + complex(1.5, 3.66), + complex(6, 5.98), + complex(2.2, 10.5), + complex(8, 11), + complex(7.9, 12), + complex(6, 4)}}, + {2, + {complex(10, 3), + complex(12, 5), + complex(2, 4), + complex(8, 9), + complex(7, 6), + complex(11, 1), + complex(1.5, 3.66), + complex(6, 5.98), + complex(2.2, 10.5), + complex(8, 11), + complex(7.9, 12), + complex(6, 4)}}}, + {{0, + {complex(10, 3), + complex(12, 5), + complex(2, 4), + complex(8, 9), + complex(7, 6), + complex(11, 1), + complex(1.5, 3.66), + complex(6, 5.98), + complex(2.2, 10.5), + complex(8, 11), + complex(7.9, 12), + complex(6, 4)}}, + {1, + {complex(6, 4), + complex(7.9, 12), + complex(8, 11), + complex(2.2, 10.5), + complex(6, 5.98), + complex(1.5, 3.66), + complex(11, 1), + complex(7, 6), + complex(8, 9), + complex(2, 4), + complex(12, 5), + complex(10, 3)}}, + {2, + {complex(10, 3), + complex(12, 5), + complex(2, 4), + complex(8, 9), + complex(7, 6), + complex(11, 1), + complex(1.5, 3.66), + complex(6, 5.98), + complex(2.2, 10.5), + complex(8, 11), + complex(7.9, 12), + complex(6, 4)}}}, + {{0, + {complex(10, 3), + complex(12, 5), + complex(2, 4), + complex(8, 9), + complex(7, 6), + complex(11, 1), + complex(1.5, 3.66), + complex(6, 5.98), + complex(2.2, 10.5), + complex(8, 11), + complex(7.9, 12), + complex(6, 4)}}, + {1, + {complex(10, 3), + complex(12, 5), + complex(2, 4), + complex(8, 9), + complex(7, 6), + complex(11, 1), + complex(1.5, 3.66), + complex(6, 5.98), + complex(2.2, 10.5), + complex(8, 11), + complex(7.9, 12), + complex(6, 4)}}, + {2, + {complex(6, 4), + complex(7.9, 12), + complex(8, 11), + complex(2.2, 10.5), + complex(6, 5.98), + complex(1.5, 3.66), + complex(11, 1), + complex(7, 6), + complex(8, 9), + complex(2, 4), + complex(12, 5), + complex(10, 3)}}}, + {{0, + {complex(1.5, 3.66), + complex(6, 5.98), + complex(2.2, 10.5), + complex(8, 11), + complex(7.9, 12), + complex(6, 4), + complex(10, 3), + complex(12, 5), + complex(2, 4), + complex(8, 9), + complex(7, 6), + complex(11, 1)}}, + {1, + {complex(8, 9), + complex(7, 6), + complex(11, 1), + complex(10, 3), + complex(12, 5), + complex(2, 4), + complex(8, 11), + complex(7.9, 12), + complex(6, 4), + complex(1.5, 3.66), + complex(6, 5.98), + complex(2.2, 10.5)}}, + {2, + {complex(2, 4), + complex(12, 5), + complex(10, 3), + complex(11, 1), + complex(7, 6), + complex(8, 9), + complex(2.2, 10.5), + complex(6, 5.98), + complex(1.5, 3.66), + complex(6, 4), + complex(7.9, 12), + complex(8, 11)}}}}; + return expect_result; +} + +template +void test_flip(std::array& in_array, + std::array& expect, + legate::Type leg_type, + std::vector shape, + std::optional> axis = std::nullopt) +{ + auto A1 = cunumeric::zeros(shape, leg_type); + if (in_array.size() != 0) { + if (in_array.size() == 1) { + A1.fill(legate::Scalar(in_array[0])); + } else { + assign_values_to_array(A1, in_array.data(), in_array.size()); + } + } + + auto B1 = cunumeric::flip(A1, axis); + check_array_eq(B1, expect.data(), expect.size()); +} + +template +void test_flip_none_axis(std::vector>& test_shapes, + std::array& in_array, + std::array& expect_result, + legate::Type leg_type) +{ + size_t test_shape_size = test_shapes.size(); + for (size_t i = 0; i < test_shape_size; ++i) { + auto test_shape = test_shapes[i]; + int32_t dim = test_shape.size(); + if (dim == 1) { + test_flip(in_array, expect_result, leg_type, test_shape); + } else if (dim == 2) { + test_flip(in_array, expect_result, leg_type, test_shape); + } else if (dim == 3) { + test_flip(in_array, expect_result, leg_type, test_shape); + } else if (dim == 4) { +#if LEGATE_MAX_DIM >= 4 + test_flip(in_array, expect_result, leg_type, test_shape); +#endif + } else if (dim == 5) { +#if LEGATE_MAX_DIM >= 5 + test_flip(in_array, expect_result, leg_type, test_shape); +#endif + } else if (dim == 6) { +#if LEGATE_MAX_DIM >= 6 + test_flip(in_array, expect_result, leg_type, test_shape); +#endif + } else if (dim == 7) { +#if LEGATE_MAX_DIM >= 7 + test_flip(in_array, expect_result, leg_type, test_shape); +#endif + } + } +} + +template +void test_flip_each_axis(std::vector>& test_shapes, + std::array& in_array, + std::vector>>& expect_result, + legate::Type leg_type) +{ + size_t test_shape_size = test_shapes.size(); + for (size_t i = 0; i < test_shape_size; ++i) { + auto test_shape = test_shapes[i]; + int32_t dim = test_shape.size(); + for (int32_t axis = -dim + 1; axis < dim; ++axis) { + auto index = axis < 0 ? axis + dim : axis; + auto expect_val = expect_result[i][index]; + auto axes = {axis}; + if (dim == 1) { + test_flip(in_array, expect_val, leg_type, test_shape, axes); + } else if (dim == 2) { + test_flip(in_array, expect_val, leg_type, test_shape, axes); + } else if (dim == 3) { + test_flip(in_array, expect_val, leg_type, test_shape, axes); + } else if (dim == 4) { +#if LEGATE_MAX_DIM >= 4 + test_flip(in_array, expect_val, leg_type, test_shape, axes); +#endif + } else if (dim == 5) { +#if LEGATE_MAX_DIM >= 5 + test_flip(in_array, expect_val, leg_type, test_shape, axes); +#endif + } else if (dim == 6) { +#if LEGATE_MAX_DIM >= 6 + test_flip(in_array, expect_val, leg_type, test_shape, axes); +#endif + } else if (dim == 7) { +#if LEGATE_MAX_DIM >= 7 + test_flip(in_array, expect_val, leg_type, test_shape, axes); +#endif + } + } + } +} + +void flip_basic() +{ + // If no axis is input, the expect result would equal reverse result of the input array, no matter + // what's the array shape is. + std::vector> test_shapes = { + {12}, {1, 12}, {12, 1}, {3, 4}, {12, 1, 1}, {1, 12, 1}, {1, 1, 12}, {2, 2, 3}}; + + // Test int type + std::array in_array1 = {10, 3, 12, 5, 2, 4, 8, 9, 7, 6, 11, 1}; + std::array expect_result1; + std::reverse_copy(in_array1.begin(), in_array1.end(), expect_result1.begin()); + test_flip_none_axis(test_shapes, in_array1, expect_result1, legate::int32()); + + // Test float type + std::array int_array2 = {1.5, 3.66, 6, 5.98, 2.2, 10.5, 8, 11, 7.9, 12, 9, 4}; + std::array expect_result2; + std::reverse_copy(int_array2.begin(), int_array2.end(), expect_result2.begin()); + test_flip_none_axis(test_shapes, int_array2, expect_result2, legate::float64()); + + // Test complex type + std::array, 12> in_array3 = {complex(10, 3), + complex(12, 5), + complex(2, 4), + complex(8, 9), + complex(7, 6), + complex(11, 1), + complex(1.5, 3.66), + complex(6, 5.98), + complex(2.2, 10.5), + complex(8, 11), + complex(7.9, 12), + complex(6, 4)}; + std::array, 12> expect_result3; + std::reverse_copy(in_array3.begin(), in_array3.end(), expect_result3.begin()); + test_flip_none_axis, 12>( + test_shapes, in_array3, expect_result3, legate::complex64()); +} + +void flip_single_axis() +{ + std::vector> test_shapes = { + {12}, {1, 12}, {12, 1}, {3, 4}, {12, 1, 1}, {1, 12, 1}, {1, 1, 12}, {2, 2, 3}}; + + // Test int type + std::array in_array1 = {10, 3, 12, 5, 2, 4, 8, 9, 7, 6, 11, 1}; + auto expect_result1 = get_flip_expect_result_int(); + test_flip_each_axis(test_shapes, in_array1, expect_result1, legate::int32()); + + // Test float type + std::array int_array2 = {1.5, 3.66, 6, 5.98, 2.2, 10.5, 8, 11, 7.9, 12, 9, 4}; + auto expect_result2 = get_flip_expect_result_double(); + test_flip_each_axis(test_shapes, int_array2, expect_result2, legate::float64()); + + // Test complex type + std::array, 12> in_array3 = {complex(10, 3), + complex(12, 5), + complex(2, 4), + complex(8, 9), + complex(7, 6), + complex(11, 1), + complex(1.5, 3.66), + complex(6, 5.98), + complex(2.2, 10.5), + complex(8, 11), + complex(7.9, 12), + complex(6, 4)}; + auto expect_result3 = get_flip_expect_result_complex(); + test_flip_each_axis, 12>( + test_shapes, in_array3, expect_result3, legate::complex64()); +} + +void flip_multi_axis() +{ + // Test float type + std::vector test_shape = {2, 2, 3}; + std::array in_array = {1.5, 3.66, 6, 5.98, 2.2, 10.5, 8, 11, 7.9, 12, 9, 4}; + + auto axes1 = {-1, 0}; + std::array expect_result1 = {7.9, 11, 8, 4, 9, 12, 6, 3.66, 1.5, 10.5, 2.2, 5.98}; + test_flip(in_array, expect_result1, legate::float64(), test_shape, axes1); + + auto axes2 = {-1, 1}; + std::array expect_result2 = {10.5, 2.2, 5.98, 6, 3.66, 1.5, 4, 9, 12, 7.9, 11, 8}; + test_flip(in_array, expect_result2, legate::float64(), test_shape, axes2); + + auto axes3 = {0, 1}; + std::array expect_result3 = {12, 9, 4, 8, 11, 7.9, 5.98, 2.2, 10.5, 1.5, 3.66, 6}; + test_flip(in_array, expect_result3, legate::float64(), test_shape, axes3); + + auto axes4 = {-1, 0, 1}; + std::array expect_result4 = {4, 9, 12, 7.9, 11, 8, 10.5, 2.2, 5.98, 6, 3.66, 1.5}; + test_flip(in_array, expect_result4, legate::float64(), test_shape, axes4); +} + +void flip_max_dim() +{ + std::array in_array = {14, 10, 3, 12, 5, 13, 2, 4, 16, 8, 9, 7, 6, 11, 1, 15}; + std::array expect_result = {15, 1, 11, 6, 7, 9, 8, 16, 4, 2, 13, 5, 12, 3, 10, 14}; +#if LEGATE_MAX_DIM >= 4 + std::vector test_shape_4d = {2, 2, 2, 2}; + // Flip with none axis + test_flip(in_array, expect_result, legate::int32(), test_shape_4d); + // Flip with axis + auto axes_4d = {2, 1, 3}; + std::array expect_result_4d = { + 4, 2, 13, 5, 12, 3, 10, 14, 15, 1, 11, 6, 7, 9, 8, 16}; + test_flip(in_array, expect_result_4d, legate::int32(), test_shape_4d, axes_4d); +#endif + +#if LEGATE_MAX_DIM >= 5 + std::vector test_shape_5d = {1, 2, 2, 1, 4}; + // Flip with none axis + test_flip(in_array, expect_result, legate::int32(), test_shape_5d); + // Flip with axis + auto axes_5d = {4}; + std::array expect_result_5d = { + 12, 3, 10, 14, 4, 2, 13, 5, 7, 9, 8, 16, 15, 1, 11, 6}; + test_flip(in_array, expect_result_5d, legate::int32(), test_shape_5d, axes_5d); +#endif + +#if LEGATE_MAX_DIM >= 6 + std::vector test_shape_6d = {2, 1, 1, 2, 2, 2}; + // Flip with none axis + test_flip(in_array, expect_result, legate::int32(), test_shape_6d); + // Flip with axis + auto axes_6d = {-1, -3, 0, 1}; + std::array expect_result_6d = { + 11, 6, 15, 1, 8, 16, 7, 9, 13, 5, 4, 2, 10, 14, 12, 3}; + test_flip(in_array, expect_result_6d, legate::int32(), test_shape_6d, axes_6d); +#endif + +#if LEGATE_MAX_DIM >= 7 + std::vector test_shape_7d = {1, 16, 1, 1, 1, 1, 1}; + // Flip with none axis + test_flip(in_array, expect_result, legate::int32(), test_shape_7d); + // Flip with axis + auto axes_7d = {0, 2, 3, 4, 5, 6}; + std::array expect_result_7d = { + 14, 10, 3, 12, 5, 13, 2, 4, 16, 8, 9, 7, 6, 11, 1, 15}; + test_flip(in_array, expect_result_7d, legate::int32(), test_shape_7d, axes_7d); +#endif +} + +void flip_large_array() +{ + const int32_t count = 10000; + std::vector test_shape = {count}; + + // Test int type for large array + std::array in_array1; + for (int32_t i = 0; i < count; i++) { + in_array1[i] = count - i; + } + std::array expect_val1; + for (int32_t j = 0; j < count; j++) { + expect_val1[j] = j + 1; + } + test_flip(in_array1, expect_val1, legate::int32(), test_shape); + + // Test float type + std::array in_array2; + for (int32_t i = 0; i < count; i++) { + in_array2[i] = count * 1.0 - i; + } + std::array expect_val2; + for (int32_t j = 0; j < count; j++) { + expect_val2[j] = (j + 1) * 1.0; + } + test_flip(in_array2, expect_val2, legate::float64(), test_shape); + + // Test complex type + std::array, count> in_array3; + for (int32_t i = 0; i < count; i++) { + in_array3[i] = complex(count - i, count - i); + } + std::array, count> expect_val3; + for (int32_t j = 0; j < count; j++) { + expect_val3[j] = complex(j + 1, j + 1); + } + test_flip, count, 1>(in_array3, expect_val3, legate::complex64(), test_shape); +} + +void flip_empty_array() +{ + std::array in_array = {}; + std::vector test_shape = {0}; + + // Without axis input + test_flip(in_array, in_array, legate::int32(), test_shape); + + // With axis input + auto axes = {0}; + test_flip(in_array, in_array, legate::int32(), test_shape, axes); +} + +void flip_single_item_array() +{ + std::vector test_shape = {1, 1, 1}; + std::array in_array = {12}; + + // Without axis input + test_flip(in_array, in_array, legate::int32(), test_shape); + + // With axis input + auto axes1 = {1}; + test_flip(in_array, in_array, legate::int32(), test_shape, axes1); + + auto axes2 = {-1, 1}; + test_flip(in_array, in_array, legate::int32(), test_shape, axes2); + + auto axes3 = {-1, 0, 1}; + test_flip(in_array, in_array, legate::int32(), test_shape, axes3); +} + +void flip_negative_test() +{ + auto in_array = cunumeric::zeros({2, 3}, legate::int32()); + + // Test axis out-of-bound + auto axes1 = {12}; + EXPECT_THROW(cunumeric::flip(in_array, axes1), std::invalid_argument); + + // Test axis out-of-bound negative + auto axes2 = {-12}; + EXPECT_THROW(cunumeric::flip(in_array, axes2), std::invalid_argument); + + // Test axis repeated axis + auto axes3 = {1, 1}; + EXPECT_THROW(cunumeric::flip(in_array, axes3), std::invalid_argument); + + // Test axis out-of-bound multiple + auto axes4 = {1, 2}; + EXPECT_THROW(cunumeric::flip(in_array, axes4), std::invalid_argument); +} + +// void cpp_test() +TEST(Flip, Basic) { flip_basic(); } +TEST(Flip, Single_Axis) { flip_single_axis(); } +TEST(Flip, Multi_Axis) { flip_multi_axis(); } +TEST(Flip, MaxDim) { flip_max_dim(); } +TEST(Flip, LargeArray) { flip_large_array(); } +TEST(Flip, EmptyArray) { flip_empty_array(); } +TEST(Flip, SingleItemArray) { flip_single_item_array(); } +TEST(Flip, Negative) { flip_negative_test(); } diff --git a/tests/cpp/integration/test_logical.cc b/tests/cpp/integration/test_logical.cc new file mode 100644 index 000000000..1a003c699 --- /dev/null +++ b/tests/cpp/integration/test_logical.cc @@ -0,0 +1,451 @@ +/* Copyright 2024 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ +#include +#include +#include + +#include +#include "legate.h" +#include "cunumeric.h" +#include "util.inl" + +template +void test_all(std::array& in_array, + std::array& expect_result, + legate::Type leg_type, + std::vector shape, + std::optional> axis = std::nullopt, + std::optional out = std::nullopt, + std::optional keepdims = std::nullopt, + std::optional where = std::nullopt) +{ + auto A1 = cunumeric::zeros(shape, leg_type); + if (in_array.size() != 0) { + if (in_array.size() == 1) { + A1.fill(legate::Scalar(in_array[0])); + } else { + assign_values_to_array(A1, in_array.data(), in_array.size()); + } + } + + if (!out.has_value()) { + auto B1 = cunumeric::all(A1, axis, std::nullopt, keepdims, where); + check_array_eq(B1, expect_result.data(), expect_result.size()); + } else { + cunumeric::all(A1, axis, out, keepdims, where); + check_array_eq(out.value(), expect_result.data(), expect_result.size()); + } +} + +template +void test_all_each_axis(std::array& in_array, + std::map>& expect_result, + legate::Type leg_type, + std::vector shape, + bool keepdims = false) +{ + int32_t dim = shape.size(); + for (int32_t axis = -dim + 1; axis < dim; ++axis) { + auto index = axis < 0 ? axis + dim : axis; + auto expect_val = expect_result[index]; + auto axes = {axis}; + if (keepdims) { + if (dim == 1) { + test_all( + in_array, expect_val, leg_type, shape, axes, std::nullopt, keepdims); + } else if (dim == 2) { + test_all( + in_array, expect_val, leg_type, shape, axes, std::nullopt, keepdims); + } else if (dim == 3) { + test_all( + in_array, expect_val, leg_type, shape, axes, std::nullopt, keepdims); + } else if (dim == 4) { +#if LEGATE_MAX_DIM >= 4 + test_all( + in_array, expect_val, leg_type, shape, axes, std::nullopt, keepdims); +#endif + } else if (dim == 5) { +#if LEGATE_MAX_DIM >= 5 + test_all( + in_array, expect_val, leg_type, shape, axes, std::nullopt, keepdims); +#endif + } else if (dim == 6) { +#if LEGATE_MAX_DIM >= 6 + test_all( + in_array, expect_val, leg_type, shape, axes, std::nullopt, keepdims); +#endif + } else if (dim == 7) { +#if LEGATE_MAX_DIM >= 7 + test_all( + in_array, expect_val, leg_type, shape, axes, std::nullopt, keepdims); +#endif + } + } else { + if (dim == 1) { + test_all( + in_array, expect_val, leg_type, shape, axes, std::nullopt, keepdims); + } else if (dim == 2) { + test_all( + in_array, expect_val, leg_type, shape, axes, std::nullopt, keepdims); + } else if (dim == 3) { + test_all( + in_array, expect_val, leg_type, shape, axes, std::nullopt, keepdims); + } else if (dim == 4) { +#if LEGATE_MAX_DIM >= 4 + test_all( + in_array, expect_val, leg_type, shape, axes, std::nullopt, keepdims); +#endif + } else if (dim == 5) { +#if LEGATE_MAX_DIM >= 5 + test_all( + in_array, expect_val, leg_type, shape, axes, std::nullopt, keepdims); +#endif + } else if (dim == 6) { +#if LEGATE_MAX_DIM >= 6 + test_all( + in_array, expect_val, leg_type, shape, axes, std::nullopt, keepdims); +#endif + } else if (dim == 7) { +#if LEGATE_MAX_DIM >= 7 + test_all( + in_array, expect_val, leg_type, shape, axes, std::nullopt, keepdims); +#endif + } + } + } +} + +void test_all_basic() +{ + // Test int type + std::array in_array1 = {-1, 4, 5}; + std::vector shape1 = {3}; + std::map> expect_result1 = {{0, {true}}}; + test_all_each_axis(in_array1, expect_result1, legate::int32(), shape1); + test_all_each_axis(in_array1, expect_result1, legate::int32(), shape1, true); + + std::array in_array2 = {5, 10, 0, 100}; + std::map> expect_result2 = {{0, {false}}}; + std::vector shape2 = {4}; + test_all_each_axis(in_array2, expect_result2, legate::int32(), shape2); + test_all_each_axis(in_array2, expect_result2, legate::int32(), shape2, true); + + std::array in_array3 = {0, 0, 0, 0}; + std::map> expect_result3 = {{0, {false, false}}, + {1, {false, false}}}; + std::vector shape3 = {2, 2}; + test_all_each_axis(in_array3, expect_result3, legate::int32(), shape3); + test_all_each_axis(in_array3, expect_result3, legate::int32(), shape3, true); + + std::array in_array4 = {0, 1, 2, 3, 4, 0, 6, 7}; + std::map> expect_result4 = {{0, {false, false, true, true}}, + {1, {false, true, true, false}}, + {2, {false, true, false, true}}}; + std::vector shape4 = {2, 2, 2}; + test_all_each_axis(in_array4, expect_result4, legate::int32(), shape4); + test_all_each_axis(in_array4, expect_result4, legate::int32(), shape4, true); + + // Test bool type + std::array in_array5 = {true, true, false, true, true, true, true, true, false}; + std::map> expect_result5 = {{0, {true, true, false}}, + {1, {false, true, false}}}; + std::vector shape5 = {3, 3}; + test_all_each_axis(in_array5, expect_result5, legate::bool_(), shape5); + test_all_each_axis(in_array5, expect_result5, legate::bool_(), shape5, true); + + // Test float type + std::array in_array6 = {0.0, 1.0, 0.0, 5.0, 2.0, 1.0, 1.0, 2.0, 3.0}; + std::map> expect_result6 = {{0, {false, true, false}}, + {1, {false, true, true}}}; + std::vector shape6 = {3, 3}; + test_all_each_axis(in_array6, expect_result6, legate::float64(), shape6); + test_all_each_axis(in_array6, expect_result6, legate::float64(), shape6, true); + + // Test complex type + std::array, 4> in_array7 = { + complex(0, 1), complex(1, 1), complex(1, 0), complex(0, 0)}; + std::map> expect_result7 = {{0, {true, false}}, {1, {true, false}}}; + std::vector shape7 = {2, 2}; + test_all_each_axis, 4, 2>(in_array7, expect_result7, legate::complex64(), shape7); + test_all_each_axis, 4, 2>( + in_array7, expect_result7, legate::complex64(), shape7, true); + + std::array, 1> in_array8 = {complex(0, 1)}; + std::map> expect_result8 = {{0, {true}}}; + std::vector shape8 = {1}; + test_all_each_axis, 1, 1>( + in_array8, expect_result8, legate::complex128(), shape8); + test_all_each_axis, 1, 1>( + in_array8, expect_result8, legate::complex128(), shape8, true); +} + +void test_all_axis_input() +{ + std::array in_array = {5, 10, 0, 100}; + std::vector shape = {1, 2, 2}; + + std::vector axis1 = {0}; + std::array expect_val1 = {true, true, false, true}; + test_all(in_array, expect_val1, legate::int32(), shape, axis1); + + std::vector axis2 = {1, 2}; + std::array expect_val2 = {false}; + test_all(in_array, expect_val2, legate::int32(), shape, axis2); + + std::vector axis3 = {-1, 0, 1}; + std::array expect_val3 = {false}; + test_all(in_array, expect_val3, legate::int32(), shape, axis3); +} + +void test_all_where_input() +{ + std::array in_array = {true, false, true, true}; + std::vector shape = {2, 2}; + + // Test where with multiple bool values + std::array where_in1 = {true, false}; + auto where_array1 = cunumeric::zeros({2}, legate::bool_()); + assign_values_to_array(where_array1, where_in1.data(), where_in1.size()); + + std::array expect_val1 = {true}; + test_all( + in_array, expect_val1, legate::bool_(), shape, std::nullopt, std::nullopt, false, where_array1); + + // Test where with single bool value + std::array where_in2 = {true}; + auto where_array2 = cunumeric::zeros({1}, legate::bool_()); + assign_values_to_array(where_array2, where_in2.data(), where_in2.size()); + + std::array expect_val2 = {false}; + test_all( + in_array, expect_val2, legate::bool_(), shape, std::nullopt, std::nullopt, false, where_array2); + + std::array where_in3 = {false}; + auto where_array3 = cunumeric::zeros({1}, legate::bool_()); + assign_values_to_array(where_array3, where_in3.data(), where_in3.size()); + + std::array expect_val3 = {true}; + test_all( + in_array, expect_val3, legate::bool_(), shape, std::nullopt, std::nullopt, false, where_array3); +} + +void test_all_out_input() +{ + std::array in_array = {0, 1, 2, 3, 4, 5, 6, 7}; + std::vector shape = {2, 2, 2}; + std::vector out_shape = {2, 2}; + std::vector axis = {0}; + + auto out1 = cunumeric::zeros(out_shape, legate::int32()); + std::array expect_val1 = {0, 1, 1, 1}; + test_all(in_array, expect_val1, legate::int32(), shape, axis, out1); + + auto out2 = cunumeric::zeros(out_shape, legate::float64()); + std::array expect_val2 = {0.0, 1.0, 1.0, 1.0}; + test_all(in_array, expect_val2, legate::int32(), shape, axis, out2); + + auto out3 = cunumeric::zeros(out_shape, legate::complex64()); + std::array, 4> expect_val3 = { + complex(0, 0), complex(1, 0), complex(1, 0), complex(1, 0)}; + test_all, 8, 4, 3, 2>( + in_array, expect_val3, legate::int32(), shape, axis, out3); + + auto out4 = cunumeric::zeros(out_shape, legate::bool_()); + std::array expect_val4 = {false, true, true, true}; + test_all(in_array, expect_val4, legate::int32(), shape, axis, out4); +} + +template +void test_all_max_dim(int32_t dim) +{ + std::array in_array; + for (int32_t i = 0; i < IN_SIZE; i++) { + in_array[i] = i; + } + + int32_t count = IN_SIZE / OUT_SIZE; + std::vector shapes; + for (int32_t i = 0; i < dim; i++) { + shapes.push_back(count); + } + + std::array expect_val; + expect_val[0] = false; + for (int32_t i = 1; i < OUT_SIZE; i++) { + expect_val[i] = true; + } + + std::map> expect_result; + for (int32_t i = 0; i < dim; i++) { + expect_result[i] = expect_val; + } + + test_all_each_axis(in_array, expect_result, legate::int32(), shapes); + test_all_each_axis( + in_array, expect_result, legate::int32(), shapes, true); +} + +void test_all_max_dim() +{ +#if LEGATE_MAX_DIM >= 4 + const int32_t count_4d = 81; + const int32_t count_expect_4d = 27; + const int32_t dim_4d = 4; + test_all_max_dim(dim_4d); +#endif + +#if LEGATE_MAX_DIM >= 5 + const int32_t count_5d = 243; + const int32_t count_expect_5d = 81; + const int32_t dim_5d = 5; + test_all_max_dim(dim_5d); +#endif + +#if LEGATE_MAX_DIM >= 6 + const int32_t count_6d = 729; + const int32_t count_expect_6d = 243; + const int32_t dim_6d = 6; + test_all_max_dim(dim_6d); +#endif + +#if LEGATE_MAX_DIM >= 7 + const int32_t count_7d = 2187; + const int32_t count_expect_7d = 729; + const int32_t dim_7d = 7; + test_all_max_dim(dim_7d); +#endif +} + +void test_all_empty_array() +{ + std::array in_array = {}; + std::vector shape = {0}; + std::array expect_val = {true}; + + test_all(in_array, expect_val, legate::int32(), shape); +} + +void test_all_large_array() +{ + const int32_t count = 100000; + std::vector shape = {count}; + std::array expect_val = {true}; + + // Test int type for large array + std::array in_array1; + for (int32_t i = 0; i < count; i++) { + in_array1[i] = i + 1; + } + test_all(in_array1, expect_val, legate::int32(), shape); + + // Test float type + std::array in_array2; + for (int32_t i = 0; i < count; i++) { + in_array2[i] = i + 1.1; + } + test_all(in_array2, expect_val, legate::float64(), shape); + + // Test complex type + std::array, count> in_array3; + for (int32_t i = 0; i < count; i++) { + in_array3[i] = complex(i + 1, i + 1); + } + test_all, bool, count, 1, 1, 1>(in_array3, expect_val, legate::complex64(), shape); +} + +void test_all_invalid_axis() +{ + std::array in_array = {5, 10, 0, 100}; + std::vector shape = {1, 2, 2}; + auto array = cunumeric::zeros(shape, legate::int32()); + assign_values_to_array(array, in_array.data(), in_array.size()); + + // Test out-of-bound + std::vector axis1 = {-4, 3}; + EXPECT_THROW(cunumeric::all(array, axis1), std::invalid_argument); + + std::vector axis2 = {0, 3}; + EXPECT_THROW(cunumeric::all(array, axis2), std::invalid_argument); + + // Test repeated axes + std::vector axis3 = {1, 1}; + EXPECT_THROW(cunumeric::all(array, axis3), std::invalid_argument); + + std::vector axis4 = {-1, 2}; + EXPECT_THROW(cunumeric::all(array, axis4), std::invalid_argument); +} + +void test_all_invalid_shape() +{ + std::array in_array = {5, 10, 0, 100}; + std::vector shape = {1, 2, 2}; + auto array = cunumeric::zeros(shape, legate::int32()); + assign_values_to_array(array, in_array.data(), in_array.size()); + + std::vector out_shape1 = {1}; + auto out1 = cunumeric::zeros(out_shape1, legate::int32()); + EXPECT_THROW(cunumeric::all(array, std::nullopt, out1), std::invalid_argument); + + std::vector out_shape2 = {2}; + std::vector axis2 = {1}; + auto out2 = cunumeric::zeros(out_shape2, legate::int32()); + EXPECT_THROW(cunumeric::all(array, axis2, out2), std::invalid_argument); + + std::vector out_shape3 = {2, 2}; + std::vector axis3 = {1}; + auto out3 = cunumeric::zeros(out_shape3, legate::int32()); + EXPECT_THROW(cunumeric::all(array, axis3, out3), std::invalid_argument); +} + +void test_all_invalid_where() +{ + std::array in_array = {5, 10, 0, 100}; + std::vector shape = {1, 2, 2}; + auto array = cunumeric::zeros(shape, legate::int32()); + assign_values_to_array(array, in_array.data(), in_array.size()); + + // Test where with invalid type + std::array in_where1 = {0, 1, 0, 1}; + auto where1 = cunumeric::zeros(shape, legate::int32()); + assign_values_to_array(where1, in_where1.data(), in_where1.size()); + EXPECT_THROW(cunumeric::all(array, std::nullopt, std::nullopt, false, where1), + std::invalid_argument); + + // Test where with invalid shape + std::vector where_shape = {2, 2, 1}; + std::array in_where2 = {false, true, false, true}; + auto where2 = cunumeric::zeros(where_shape, legate::bool_()); + assign_values_to_array(where2, in_where2.data(), in_where2.size()); + EXPECT_THROW(cunumeric::all(array, std::nullopt, std::nullopt, false, where2), std::exception); +} + +// void cpp_test() +TEST(Logical, AllBasicTest) { test_all_basic(); } +TEST(Logical, AllAxisInput) { test_all_axis_input(); } +TEST(Logical, AllOutInput) { test_all_out_input(); } +// TODO - after where is supported +// TEST(Logical, AllWhereInput) { test_all_where_input(); } +TEST(Logical, AllEmptyArray) { test_all_empty_array(); } +TEST(Logical, AllLargeArray) { test_all_large_array(); } +TEST(Logical, AllMaxDim) { test_all_max_dim(); } +TEST(Logical, AllInvalidAxis) { test_all_invalid_axis(); } +TEST(Logical, AllInvalidShape) { test_all_invalid_shape(); } +TEST(Logical, AllInvalidWhere) { test_all_invalid_where(); } diff --git a/tests/cpp/integration/test_moveaxis.cc b/tests/cpp/integration/test_moveaxis.cc new file mode 100644 index 000000000..c02af1d81 --- /dev/null +++ b/tests/cpp/integration/test_moveaxis.cc @@ -0,0 +1,150 @@ +/* Copyright 2024 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +#include +#include +#include + +#include +#include "legate.h" +#include "cunumeric.h" +#include "util.inl" + +template +static void moveaxis_int32_test(std::vector input, + std::vector exp, + std::vector in_shape, + std::vector out_shape, + std::vector source, + std::vector destination) +{ + auto a_input = cunumeric::zeros(in_shape, legate::int32()); + assign_values_to_array(a_input, input.data(), input.size()); + auto a_output = cunumeric::moveaxis(a_input, source, destination); + check_array_eq(a_output, exp.data(), exp.size()); + EXPECT_EQ(a_output.shape(), out_shape); +} + +static void moveaxis_int32_test_2(std::vector in_shape, + std::vector out_shape, + std::vector source, + std::vector destination) +{ + auto a_input = cunumeric::zeros(in_shape, legate::int32()); + auto a_output = cunumeric::moveaxis(a_input, source, destination); + EXPECT_EQ(a_output.shape(), out_shape); +} + +TEST(MoveAxis, Normal) +{ + moveaxis_int32_test<2>({1, 2, 3, 4, 5, 6}, {1, 2, 3, 4, 5, 6}, {2, 3}, {2, 3}, {0}, {0}); + moveaxis_int32_test<2>({1, 2, 3, 4, 5, 6}, {1, 4, 2, 5, 3, 6}, {2, 3}, {3, 2}, {0}, {-1}); + moveaxis_int32_test<3>( + {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23}, + {0, 12, 1, 13, 2, 14, 3, 15, 4, 16, 5, 17, 6, 18, 7, 19, 8, 20, 9, 21, 10, 22, 11, 23}, + {2, 3, 4}, + {3, 4, 2}, + {0}, + {-1}); +} + +TEST(MoveAxis, SpecialArrays) +{ + // test single element array + { + std::vector input{99}; + auto a = cunumeric::zeros({1}, legate::int32()); + a.fill(legate::Scalar(input[0])); + auto a_out = cunumeric::moveaxis(a, {0}, {-1}); + check_array_eq(a_out, input.data(), input.size()); + EXPECT_EQ(a_out.shape(), a.shape()); + } + { + std::vector input{-100}; + auto a = cunumeric::zeros({1, 1}, legate::int32()); + a.fill(legate::Scalar(input[0])); + auto a_out = cunumeric::moveaxis(a, {0, 1}, {-1, -2}); + check_array_eq(a_out, input.data(), input.size()); + EXPECT_EQ(a_out.shape(), a.shape()); + } + + // test empty array + { + auto a = cunumeric::zeros({0}, legate::int32()); + auto a_out = cunumeric::moveaxis(a, {0}, {-1}); + EXPECT_EQ(a_out.shape(), a.shape()); + } +} + +TEST(MoveAxis, Shape) +{ + moveaxis_int32_test_2({3, 4, 5}, {4, 5, 3}, {0}, {-1}); + moveaxis_int32_test_2({3, 4, 5}, {5, 3, 4}, {-1}, {0}); + moveaxis_int32_test_2({3, 4, 5}, {5, 4, 3}, {0, 1}, {-1, -2}); + moveaxis_int32_test_2({3, 4, 5}, {5, 4, 3}, {0, 1, 2}, {-1, -2, -3}); +} + +TEST(MoveAxis, Shape7D) +{ + moveaxis_int32_test_2({3, 2, 2, 2}, {2, 2, 2, 3}, {0}, {-1}); + +#if LEGATE_MAX_DIM >= 5 + moveaxis_int32_test_2({3, 2, 2, 2, 2}, {2, 2, 2, 2, 3}, {0}, {-1}); +#endif + +#if LEGATE_MAX_DIM >= 6 + moveaxis_int32_test_2({3, 4, 2, 2, 2, 2}, {2, 2, 2, 2, 4, 3}, {0, 1}, {-1, -2}); +#endif + +#if LEGATE_MAX_DIM >= 7 + moveaxis_int32_test_2({3, 4, 5, 2, 2, 2, 2}, {2, 2, 2, 2, 3, 4, 5}, {2, 1, 0}, {-1, -2, -3}); +#endif +} + +TEST(MoveAxis, EmptyShape) +{ + moveaxis_int32_test_2({0, 1, 2}, {1, 2, 0}, {0}, {-1}); + moveaxis_int32_test_2({1, 0, 7}, {7, 1, 0}, {-1}, {0}); + moveaxis_int32_test_2({4, 0, 9, 0}, {0, 4, 0, 9}, {2, 0}, {3, 1}); +} + +TEST(MoveAxis, With_empty_array) +{ + moveaxis_int32_test_2({3, 4}, {3, 4}, {}, {}); + moveaxis_int32_test_2({3, 4, 5}, {3, 4, 5}, {}, {}); +} + +TEST(MoveAxisErrors, Repeated_axis) +{ + auto x = cunumeric::zeros({3, 4, 5}, legate::int32()); + EXPECT_THROW(cunumeric::moveaxis(x, {0, 0}, {1, 0}), std::invalid_argument); + EXPECT_THROW(cunumeric::moveaxis(x, {0, 1}, {0, -3}), std::invalid_argument); +} + +TEST(MoveAxisErrors, Axis_out_of_bound) +{ + auto x = cunumeric::zeros({3, 4, 5}, legate::int32()); + EXPECT_THROW(cunumeric::moveaxis(x, {0, 3}, {0, 1}), std::invalid_argument); + EXPECT_THROW(cunumeric::moveaxis(x, {0, 1}, {0, -4}), std::invalid_argument); + EXPECT_THROW(cunumeric::moveaxis(x, {4}, {0}), std::invalid_argument); + EXPECT_THROW(cunumeric::moveaxis(x, {0}, {-4}), std::invalid_argument); +} + +TEST(MoveAxisErrors, Axis_with_different_length) +{ + auto x = cunumeric::zeros({3, 4, 5}, legate::int32()); + EXPECT_THROW(cunumeric::moveaxis(x, {0}, {1, 0}), std::invalid_argument); +} diff --git a/tests/cpp/integration/test_msort.cc b/tests/cpp/integration/test_msort.cc new file mode 100644 index 000000000..aa923c06b --- /dev/null +++ b/tests/cpp/integration/test_msort.cc @@ -0,0 +1,398 @@ +/* Copyright 2024 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +#include +#include +#include + +#include +#include "legate.h" +#include "cunumeric.h" +#include "util.inl" + +auto get_msort_expect_result_int() +{ + std::vector> expect_result = {{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12}, + {10, 3, 12, 5, 2, 4, 8, 9, 7, 6, 11, 1}, + {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12}, + {2, 3, 8, 1, 7, 4, 11, 5, 10, 6, 12, 9}, + {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12}, + {10, 3, 12, 5, 2, 4, 8, 9, 7, 6, 11, 1}, + {10, 3, 12, 5, 2, 4, 8, 9, 7, 6, 11, 1}, + {8, 3, 7, 5, 2, 1, 10, 9, 12, 6, 11, 4}}; + return expect_result; +} + +auto get_msort_expect_result_int_4d() +{ + std::vector> expect_result = { + {14, 10, 3, 12, 5, 13, 2, 4, 16, 8, 9, 7, 6, 11, 1, 15}, + {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}, + {14, 8, 3, 7, 5, 11, 1, 4, 16, 10, 9, 12, 6, 13, 2, 15}}; + return expect_result; +} + +auto get_msort_expect_result_int_5d() +{ + std::vector> expect_result = { + {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}, + {14, 10, 3, 12, 5, 13, 2, 4, 16, 8, 9, 7, 6, 11, 1, 15}, + {14, 10, 3, 12, 5, 13, 2, 4, 16, 8, 9, 7, 6, 11, 1, 15}}; + return expect_result; +} + +auto get_msort_expect_result_int_6d() +{ + std::vector> expect_result = { + {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}, + {14, 10, 3, 12, 5, 13, 2, 4, 16, 8, 9, 7, 6, 11, 1, 15}, + {14, 8, 3, 7, 5, 11, 1, 4, 16, 10, 9, 12, 6, 13, 2, 15}}; + return expect_result; +} + +auto get_msort_expect_result_int_7d() +{ + std::vector> expect_result = { + {14, 10, 3, 12, 5, 13, 2, 4, 16, 8, 9, 7, 6, 11, 1, 15}, + {5, 8, 1, 4, 6, 10, 2, 7, 14, 11, 3, 12, 16, 13, 9, 15}, + {14, 8, 3, 7, 5, 11, 1, 4, 16, 10, 9, 12, 6, 13, 2, 15}}; + return expect_result; +} + +auto get_msort_expect_result_double() +{ + std::vector> expect_result = { + {1.5, 2.2, 3.66, 4, 5.98, 6, 7.9, 8, 9, 10.5, 11, 12}, + {1.5, 3.66, 6, 5.98, 2.2, 10.5, 8, 11, 7.9, 12, 9, 4}, + {1.5, 2.2, 3.66, 4, 5.98, 6, 7.9, 8, 9, 10.5, 11, 12}, + {1.5, 3.66, 6, 4, 2.2, 10.5, 8, 5.98, 7.9, 12, 9, 11}, + {1.5, 2.2, 3.66, 4, 5.98, 6, 7.9, 8, 9, 10.5, 11, 12}, + {1.5, 3.66, 6, 5.98, 2.2, 10.5, 8, 11, 7.9, 12, 9, 4}, + {1.5, 3.66, 6, 5.98, 2.2, 10.5, 8, 11, 7.9, 12, 9, 4}, + {1.5, 3.66, 6, 5.98, 2.2, 4, 8, 11, 7.9, 12, 9, 10.5}}; + return expect_result; +} + +auto get_msort_expect_result_complex() +{ + std::vector, 12>> expect_result = {{complex(1.5, 3.66), + complex(2, 4), + complex(2.2, 10.5), + complex(6, 4), + complex(6, 5.98), + complex(7, 6), + complex(7.9, 12), + complex(8, 9), + complex(8, 11), + complex(10, 3), + complex(11, 1), + complex(12, 5)}, + {complex(10, 3), + complex(12, 5), + complex(2, 4), + complex(8, 9), + complex(7, 6), + complex(11, 1), + complex(1.5, 3.66), + complex(6, 5.98), + complex(2.2, 10.5), + complex(8, 11), + complex(7.9, 12), + complex(6, 4)}, + {complex(1.5, 3.66), + complex(2, 4), + complex(2.2, 10.5), + complex(6, 4), + complex(6, 5.98), + complex(7, 6), + complex(7.9, 12), + complex(8, 9), + complex(8, 11), + complex(10, 3), + complex(11, 1), + complex(12, 5)}, + {complex(2.2, 10.5), + complex(8, 11), + complex(1.5, 3.66), + complex(6, 4), + complex(7, 6), + complex(11, 1), + complex(2, 4), + complex(6, 5.98), + complex(10, 3), + complex(12, 5), + complex(7.9, 12), + complex(8, 9)}, + {complex(1.5, 3.66), + complex(2, 4), + complex(2.2, 10.5), + complex(6, 4), + complex(6, 5.98), + complex(7, 6), + complex(7.9, 12), + complex(8, 9), + complex(8, 11), + complex(10, 3), + complex(11, 1), + complex(12, 5)}, + {complex(10, 3), + complex(12, 5), + complex(2, 4), + complex(8, 9), + complex(7, 6), + complex(11, 1), + complex(1.5, 3.66), + complex(6, 5.98), + complex(2.2, 10.5), + complex(8, 11), + complex(7.9, 12), + complex(6, 4)}, + {complex(10, 3), + complex(12, 5), + complex(2, 4), + complex(8, 9), + complex(7, 6), + complex(11, 1), + complex(1.5, 3.66), + complex(6, 5.98), + complex(2.2, 10.5), + complex(8, 11), + complex(7.9, 12), + complex(6, 4)}, + {complex(1.5, 3.66), + complex(6, 5.98), + complex(2, 4), + complex(8, 9), + complex(7, 6), + complex(6, 4), + complex(10, 3), + complex(12, 5), + complex(2.2, 10.5), + complex(8, 11), + complex(7.9, 12), + complex(11, 1)}}; + return expect_result; +} + +template +void test_msort(std::array& in_array, + std::array& expect, + legate::Type leg_type, + std::vector shape) +{ + auto A1 = cunumeric::zeros(shape, leg_type); + if (in_array.size() != 0) { + if (in_array.size() == 1) { + A1.fill(legate::Scalar(in_array[0])); + } else { + assign_values_to_array(A1, in_array.data(), in_array.size()); + } + print_array(A1); + } + + auto B1 = cunumeric::msort(A1); + check_array_eq(B1, expect.data(), expect.size()); +} + +template +void msort_basic_impl(std::vector>& test_shapes, + std::array in_array, + std::vector>& expect_result, + legate::Type leg_type) +{ + size_t test_shape_size = test_shapes.size(); + for (size_t i = 0; i < test_shape_size; ++i) { + auto test_shape = test_shapes[i]; + int32_t dim = test_shape.size(); + auto expect_val = expect_result[i]; + if (dim == 1) { + test_msort(in_array, expect_val, leg_type, test_shape); + } else if (dim == 2) { + test_msort(in_array, expect_val, leg_type, test_shape); + } else if (dim == 3) { + test_msort(in_array, expect_val, leg_type, test_shape); + } else if (dim == 4) { +#if LEGATE_MAX_DIM >= 4 + test_msort(in_array, expect_val, leg_type, test_shape); +#endif + } else if (dim == 5) { +#if LEGATE_MAX_DIM >= 5 + test_msort(in_array, expect_val, leg_type, test_shape); +#endif + } else if (dim == 6) { +#if LEGATE_MAX_DIM >= 6 + test_msort(in_array, expect_val, leg_type, test_shape); +#endif + } else if (dim == 7) { +#if LEGATE_MAX_DIM >= 7 + test_msort(in_array, expect_val, leg_type, test_shape); +#endif + } + } +} + +void msort_basic() +{ + std::vector> test_shapes = { + {12}, {1, 12}, {12, 1}, {3, 4}, {12, 1, 1}, {1, 12, 1}, {1, 1, 12}, {2, 2, 3}}; + + // Test int type + std::array in_array1 = {10, 3, 12, 5, 2, 4, 8, 9, 7, 6, 11, 1}; + auto expect_result1 = get_msort_expect_result_int(); + msort_basic_impl(test_shapes, in_array1, expect_result1, legate::int32()); + + // Test float type + std::array in_array2 = {1.5, 3.66, 6, 5.98, 2.2, 10.5, 8, 11, 7.9, 12, 9, 4}; + auto expect_result2 = get_msort_expect_result_double(); + msort_basic_impl(test_shapes, in_array2, expect_result2, legate::float64()); + + // Test complex type + std::array, 12> in_array3 = {complex(10, 3), + complex(12, 5), + complex(2, 4), + complex(8, 9), + complex(7, 6), + complex(11, 1), + complex(1.5, 3.66), + complex(6, 5.98), + complex(2.2, 10.5), + complex(8, 11), + complex(7.9, 12), + complex(6, 4)}; + auto expect_result3 = get_msort_expect_result_complex(); + msort_basic_impl, 12>(test_shapes, in_array3, expect_result3, legate::complex64()); +} + +void msort_basic_max_dim() +{ + // Only test int type for max dim + std::array in_array = {14, 10, 3, 12, 5, 13, 2, 4, 16, 8, 9, 7, 6, 11, 1, 15}; +#if LEGATE_MAX_DIM >= 4 + std::vector> test_shapes_4d = {{1, 1, 1, 16}, {16, 1, 1, 1}, {2, 2, 1, 4}}; + auto expect_result_4d = get_msort_expect_result_int_4d(); + msort_basic_impl(test_shapes_4d, in_array, expect_result_4d, legate::int32()); +#endif + +#if LEGATE_MAX_DIM >= 5 + std::vector> test_shapes_5d = { + {16, 1, 1, 1, 1}, {1, 16, 1, 1, 1}, {1, 2, 2, 1, 4}}; + auto expect_result_5d = get_msort_expect_result_int_5d(); + msort_basic_impl(test_shapes_5d, in_array, expect_result_5d, legate::int32()); +#endif + +#if LEGATE_MAX_DIM >= 6 + std::vector> test_shapes_6d = { + {16, 1, 1, 1, 1, 1}, {1, 1, 16, 1, 1, 1}, {2, 1, 1, 2, 2, 2}}; + auto expect_result_6d = get_msort_expect_result_int_6d(); + msort_basic_impl(test_shapes_6d, in_array, expect_result_6d, legate::int32()); +#endif + +#if LEGATE_MAX_DIM >= 7 + std::vector> test_shapes_7d = { + {1, 16, 1, 1, 1, 1, 1}, {4, 1, 2, 2, 1, 1, 1}, {2, 2, 1, 1, 2, 1, 2}}; + auto expect_result_7d = get_msort_expect_result_int_7d(); + msort_basic_impl(test_shapes_7d, in_array, expect_result_7d, legate::int32()); +#endif +} + +void msort_large_array() +{ + const int32_t count = 10000; + std::vector> test_shapes = {{count}}; + + // Test int type for large array + std::array in_array1; + for (int32_t i = 0; i < count; i++) { + in_array1[i] = count - i; + } + std::array expect_val1; + for (int32_t j = 0; j < count; j++) { + expect_val1[j] = j + 1; + } + std::vector> expect_result1 = {expect_val1}; + msort_basic_impl(test_shapes, in_array1, expect_result1, legate::int32()); + + // Test float type + std::array in_array2; + for (int32_t i = 0; i < count; i++) { + in_array2[i] = count * 1.0 - i; + } + std::array expect_val2; + for (int32_t j = 0; j < count; j++) { + expect_val2[j] = (j + 1) * 1.0; + } + std::vector> expect_result2 = {expect_val2}; + msort_basic_impl(test_shapes, in_array2, expect_result2, legate::float64()); + + // Test complex type + std::array, count> in_array3; + for (int32_t i = 0; i < count; i++) { + in_array3[i] = complex(count - i, count - i); + } + std::array, count> expect_val3; + for (int32_t j = 0; j < count; j++) { + expect_val3[j] = complex(j + 1, j + 1); + } + std::vector, count>> expect_result3 = {expect_val3}; + msort_basic_impl, count>( + test_shapes, in_array3, expect_result3, legate::complex64()); +} + +void msort_empty_array() +{ + std::vector> test_shapes = { + {0}, {0, 1}, {1, 0}, {1, 0, 0}, {1, 1, 0}, {1, 0, 1}}; + + std::array in_array = {}; + size_t test_shape_size = test_shapes.size(); + for (size_t i = 0; i < test_shape_size; ++i) { + auto test_shape = test_shapes[i]; + int32_t dim = test_shape.size(); + if (dim == 1) { + test_msort(in_array, in_array, legate::int32(), test_shape); + } else if (dim == 2) { + test_msort(in_array, in_array, legate::int32(), test_shape); + } else { + test_msort(in_array, in_array, legate::int32(), test_shape); + } + } +} + +void msort_single_item_array() +{ + std::vector> test_shapes = {{1}, {1, 1}, {1, 1, 1}}; + + std::array in_array = {12}; + size_t test_shape_size = test_shapes.size(); + for (size_t i = 0; i < test_shape_size; ++i) { + auto test_shape = test_shapes[i]; + int32_t dim = test_shape.size(); + if (dim == 1) { + test_msort(in_array, in_array, legate::int32(), test_shape); + } else if (dim == 2) { + test_msort(in_array, in_array, legate::int32(), test_shape); + } else { + test_msort(in_array, in_array, legate::int32(), test_shape); + } + } +} + +// void cpp_test() +TEST(Msort, Basic) { msort_basic(); } +TEST(Msort, BasicMaxDim) { msort_basic_max_dim(); } +TEST(Msort, LargeArray) { msort_large_array(); } +TEST(Msort, EmptyArray) { msort_empty_array(); } +TEST(Msort, SingleItemArray) { msort_single_item_array(); } diff --git a/tests/cpp/integration/test_nonzero.cc b/tests/cpp/integration/test_nonzero.cc new file mode 100644 index 000000000..5dfd23896 --- /dev/null +++ b/tests/cpp/integration/test_nonzero.cc @@ -0,0 +1,273 @@ +/* Copyright 2024 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ +#include "common_utils.h" + +auto get_nonzero_expect_result() +{ + std::vector>> expect_result = { + {{1, 2, 4, 5, 6, 8, 10}}, + {{0, 0, 0, 0, 0, 0, 0}, {1, 2, 4, 5, 6, 8, 10}}, + {{1, 2, 4, 5, 6, 8, 10}, {0, 0, 0, 0, 0, 0, 0}}, + {{0, 0, 1, 1, 1, 2, 2}, {1, 2, 0, 1, 2, 0, 2}}, + {{1, 2, 4, 5, 6, 8, 10}, {0, 0, 0, 0, 0, 0, 0}, {0, 0, 0, 0, 0, 0, 0}}, + {{0, 0, 0, 0, 0, 0, 0}, {1, 2, 4, 5, 6, 8, 10}, {0, 0, 0, 0, 0, 0, 0}}, + {{0, 0, 0, 0, 0, 0, 0}, {0, 0, 0, 0, 0, 0, 0}, {1, 2, 4, 5, 6, 8, 10}}, + {{0, 0, 0, 0, 1, 1, 1}, {0, 0, 1, 1, 0, 0, 1}, {1, 2, 1, 2, 0, 2, 1}}}; + return expect_result; +} + +auto get_nonzero_expect_result_4d() +{ + std::vector>> expect_result = {{{0, 0, 0, 0, 0, 0, 0, 0}, + {0, 0, 0, 0, 0, 0, 0, 0}, + {0, 0, 0, 0, 0, 0, 0, 0}, + {0, 2, 3, 5, 7, 9, 11, 14}}, + {{0, 2, 3, 5, 7, 9, 11, 14}, + {0, 0, 0, 0, 0, 0, 0, 0}, + {0, 0, 0, 0, 0, 0, 0, 0}, + {0, 0, 0, 0, 0, 0, 0, 0}}, + {{0, 0, 0, 0, 0, 1, 1, 1}, + {0, 0, 0, 1, 1, 0, 0, 1}, + {0, 0, 0, 0, 0, 0, 0, 0}, + {0, 2, 3, 1, 3, 1, 3, 2}}}; + return expect_result; +} + +auto get_nonzero_expect_result_5d() +{ + std::vector>> expect_result = {{{0, 0, 0, 0, 0, 0, 0, 0}, + {0, 0, 0, 0, 0, 0, 0, 0}, + {0, 0, 0, 0, 0, 0, 0, 0}, + {0, 2, 3, 5, 7, 9, 11, 14}, + {0, 0, 0, 0, 0, 0, 0, 0}}, + {{0, 0, 0, 0, 0, 0, 0, 0}, + {0, 2, 3, 5, 7, 9, 11, 14}, + {0, 0, 0, 0, 0, 0, 0, 0}, + {0, 0, 0, 0, 0, 0, 0, 0}, + {0, 0, 0, 0, 0, 0, 0, 0}}, + {{0, 0, 0, 0, 0, 0, 0, 0}, + {0, 0, 0, 0, 0, 1, 1, 1}, + {0, 0, 0, 1, 1, 0, 0, 1}, + {0, 0, 0, 0, 0, 0, 0, 0}, + {0, 2, 3, 1, 3, 1, 3, 2}}}; + return expect_result; +} + +auto get_nonzero_expect_result_6d() +{ + std::vector>> expect_result = {{{0, 2, 3, 5, 7, 9, 11, 14}, + {0, 0, 0, 0, 0, 0, 0, 0}, + {0, 0, 0, 0, 0, 0, 0, 0}, + {0, 0, 0, 0, 0, 0, 0, 0}, + {0, 0, 0, 0, 0, 0, 0, 0}, + {0, 0, 0, 0, 0, 0, 0, 0}}, + {{0, 0, 0, 0, 0, 0, 0, 0}, + {0, 0, 0, 0, 0, 0, 0, 0}, + {0, 2, 3, 5, 7, 9, 11, 14}, + {0, 0, 0, 0, 0, 0, 0, 0}, + {0, 0, 0, 0, 0, 0, 0, 0}, + {0, 0, 0, 0, 0, 0, 0, 0}}, + {{0, 0, 0, 0, 0, 0, 0, 0}, + {0, 0, 0, 0, 0, 1, 1, 1}, + {0, 0, 0, 0, 0, 0, 0, 0}, + {0, 0, 0, 1, 1, 0, 0, 1}, + {0, 1, 1, 0, 1, 0, 1, 1}, + {0, 0, 1, 1, 1, 1, 1, 0}}}; + return expect_result; +} + +auto get_nonzero_expect_result_7d() +{ + std::vector>> expect_result = {{{0, 0, 0, 0, 0, 0, 0, 0}, + {0, 2, 3, 5, 7, 9, 11, 14}, + {0, 0, 0, 0, 0, 0, 0, 0}, + {0, 0, 0, 0, 0, 0, 0, 0}, + {0, 0, 0, 0, 0, 0, 0, 0}, + {0, 0, 0, 0, 0, 0, 0, 0}, + {0, 0, 0, 0, 0, 0, 0, 0}}, + {{0, 0, 0, 0, 0, 0, 0, 0}, + {0, 0, 0, 0, 0, 0, 0, 0}, + {0, 0, 0, 0, 0, 1, 1, 1}, + {0, 0, 0, 1, 1, 0, 0, 1}, + {0, 0, 0, 0, 0, 0, 0, 0}, + {0, 2, 3, 1, 3, 1, 3, 2}, + {0, 0, 0, 0, 0, 0, 0, 0}}, + {{0, 0, 0, 0, 0, 1, 1, 1}, + {0, 0, 0, 1, 1, 0, 0, 1}, + {0, 0, 0, 0, 0, 0, 0, 0}, + {0, 0, 0, 0, 0, 0, 0, 0}, + {0, 1, 1, 0, 1, 0, 1, 1}, + {0, 0, 0, 0, 0, 0, 0, 0}, + {0, 0, 1, 1, 1, 1, 1, 0}}}; + return expect_result; +} + +template +void test_nonzero(const std::vector& in_array, + const std::vector>& expect, + const std::vector& shape) +{ + auto array = cunumeric::mk_array(in_array, shape); + auto result_vec = cunumeric::nonzero(array); + size_t result_size = result_vec.size(); + ASSERT_EQ(result_size, expect.size()); + std::vector expect_shape = {}; + if (shape.size() > 0) { + if (result_vec[0].size() == 0) { + expect_shape.push_back(0); + } else if (result_vec[0].size() == 1) { + expect_shape.push_back(1); + } + } + for (size_t i = 0; i < result_size; ++i) { + cunumeric::check_array(result_vec[i], expect[i], expect_shape); + } +} + +template +void nonzero_basic_impl(const std::vector>& test_shapes, + const std::vector& in_array, + const std::vector>>& expect_result) +{ + size_t test_shape_size = test_shapes.size(); + for (size_t i = 0; i < test_shape_size; ++i) { + test_nonzero(in_array, expect_result[i], test_shapes[i]); + } +} + +void nonzero_basic() +{ + std::vector> test_shapes = { + {12}, {1, 12}, {12, 1}, {3, 4}, {12, 1, 1}, {1, 12, 1}, {1, 1, 12}, {2, 2, 3}}; + auto expect_result = get_nonzero_expect_result(); + + // Test int type + std::vector in_array1 = {0, 3, 12, 0, 2, 4, 8, 0, 7, 0, 11, 0}; + nonzero_basic_impl(test_shapes, in_array1, expect_result); + + // Test float type + std::vector in_array2 = {0.0, 3.5, 11.0, 0, 2.2, 6.5, 8, 0.0, 7.9, 0.0, 0.0011, 0}; + nonzero_basic_impl(test_shapes, in_array2, expect_result); + + // Test complex type + std::vector> in_array3 = {complex(0, 0), + complex(2.2, 0), + complex(12, 5), + complex(0), + complex(2, 4), + complex(6, 4), + complex(8, 9), + complex(0, 0), + complex(7.9, 12), + complex(0), + complex(0, 0.001), + complex(0, 0)}; + nonzero_basic_impl>(test_shapes, in_array3, expect_result); +} + +void nonzero_basic_max_dim() +{ + // Only test int type for max dim + std::vector in_array = {14, 0, 3, 12, 0, 13, 0, 4, 0, 8, 0, 7, 0, 0, 1, 0}; + +#if LEGATE_MAX_DIM >= 4 + std::vector> test_shapes_4d = {{1, 1, 1, 16}, {16, 1, 1, 1}, {2, 2, 1, 4}}; + auto expect_result_4d = get_nonzero_expect_result_4d(); + nonzero_basic_impl(test_shapes_4d, in_array, expect_result_4d); +#endif + +#if LEGATE_MAX_DIM >= 5 + std::vector> test_shapes_5d = { + {1, 1, 1, 16, 1}, {1, 16, 1, 1, 1}, {1, 2, 2, 1, 4}}; + auto expect_result_5d = get_nonzero_expect_result_5d(); + nonzero_basic_impl(test_shapes_5d, in_array, expect_result_5d); +#endif + +#if LEGATE_MAX_DIM >= 6 + std::vector> test_shapes_6d = { + {16, 1, 1, 1, 1, 1}, {1, 1, 16, 1, 1, 1}, {1, 2, 1, 2, 2, 2}}; + auto expect_result_6d = get_nonzero_expect_result_6d(); + nonzero_basic_impl(test_shapes_6d, in_array, expect_result_6d); +#endif + +#if LEGATE_MAX_DIM >= 7 + std::vector> test_shapes_7d = { + {1, 16, 1, 1, 1, 1, 1}, {1, 1, 2, 2, 1, 4, 1}, {2, 2, 1, 1, 2, 1, 2}}; + auto expect_result_7d = get_nonzero_expect_result_7d(); + nonzero_basic_impl(test_shapes_7d, in_array, expect_result_7d); +#endif +} + +void nonzero_large_array() +{ + const int32_t count = 10000; + std::vector test_shape = {count}; + std::vector> expect_result = {{0, 9999}}; + + // Test int type for large array + std::vector in_array1(count); + in_array1.assign(count, 0); + in_array1[0] = 1; + in_array1[9999] = 1; + test_nonzero(in_array1, expect_result, test_shape); + + // Test float type for large array + std::vector in_array2(count); + in_array2.assign(count, 0.0); + in_array2[0] = 0.0001; + in_array2[9999] = 0.0001; + test_nonzero(in_array2, expect_result, test_shape); + + // Test complex type for large array + std::vector> in_array3(count); + in_array3.assign(count, complex(0.0)); + in_array3[0] = complex(0.0001, 0.0); + in_array3[9999] = complex(0.0, 0.0001); + test_nonzero>(in_array3, expect_result, test_shape); +} + +void nonzero_empty_array() +{ + std::vector> test_shapes = { + {0}, {0, 1}, {1, 0}, {1, 0, 0}, {1, 1, 0}, {1, 0, 1}}; + + std::vector in_array = {}; + std::vector>> expect_result = { + {{}}, {{}, {}}, {{}, {}}, {{}, {}, {}}, {{}, {}, {}}, {{}, {}, {}}}; + + nonzero_basic_impl(test_shapes, in_array, expect_result); +} + +void single_item_array() +{ + std::vector> test_shapes = {{1}, {1, 1}, {1, 1, 1}}; + + std::vector in_array1 = {1}; + std::vector>> expect_result1 = { + {{0}}, {{0}, {0}}, {{0}, {0}, {0}}}; + nonzero_basic_impl(test_shapes, in_array1, expect_result1); + + std::vector in_array2 = {0}; + std::vector>> expect_result2 = {{{}}, {{}, {}}, {{}, {}, {}}}; + nonzero_basic_impl(test_shapes, in_array2, expect_result2); +} + +// void cpp_test() +TEST(Nonzero, Basic) { nonzero_basic(); } +TEST(Nonzero, BasicMaxDim) { nonzero_basic_max_dim(); } +TEST(Nonzero, LargeArray) { nonzero_large_array(); } +TEST(Nonzero, EmptyArray) { nonzero_empty_array(); } +TEST(Nonzero, SingleItemArray) { single_item_array(); } diff --git a/tests/cpp/integration/test_put.cc b/tests/cpp/integration/test_put.cc new file mode 100644 index 000000000..2339fc0bd --- /dev/null +++ b/tests/cpp/integration/test_put.cc @@ -0,0 +1,292 @@ +/* Copyright 2024 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +#include "common_utils.h" +#include + +using namespace cunumeric; +namespace { + +template +std::vector put_result(std::vector const& a, + std::vector const& indices, + std::vector const& values, + std::string mode = "raise") +{ + if (a.size() == 0 || indices.size() == 0 || values.size() == 0) { + return a; + } + std::vector out(a); + int64_t size = static_cast(a.size()); + + for (size_t i = 0; i < indices.size(); ++i) { + auto val = static_cast(values[i % values.size()]); + auto ind = static_cast(indices[i]); + if (mode == "wrap") { + ind %= size; + } else if (mode == "clip") { + if (ind < 0) { + ind = 0; + } + if (ind >= size) { + ind = size - 1; + } + } + if (ind < 0) { + ind += size; + } + if (!(ind >= 0 && ind < size)) { + throw std::invalid_argument("vector index out of bounds"); + } + out[ind] = val; + } + return out; +} + +TEST(Put, test_scalar_indices_values) +{ + { + auto x = mk_array({1, 2, 3, 4, 5, 6}, {2, 3}); + auto indices = mk_array({0}); + auto values = mk_array({10}); + put(x, indices, values); // put(x, 0, 10) + check_array(x, {10, 2, 3, 4, 5, 6}, {2, 3}); + } + { + auto x = mk_array({1, 2, 3, 4, 5, 6}, {2, 3}); + auto indices = mk_array({0}); + auto values = mk_array({10, 20, 30}); + put(x, indices, values); // put(x, 0, [10, 20, 30]) + check_array(x, {10, 2, 3, 4, 5, 6}, {2, 3}); + } + { + auto x = mk_array({1, 2, 3, 4, 5, 6}, {2, 3}); + auto indices = mk_array({0}, {1}); + auto values = mk_array({10}); + put(x, indices, values); // put(x, [0], 10) + check_array(x, {10, 2, 3, 4, 5, 6}, {2, 3}); + } + { + auto x = mk_array({1, 2, 3, 4, 5, 6}, {2, 3}); + auto indices = mk_array({0, 1, 2.5, 1}); + auto values = mk_array({10.5}); + put(x, indices, values); // put(x, [0, 1, 2.5, 1], 10) + check_array(x, {10, 10, 10, 4, 5, 6}, {2, 3}); + } +} + +TEST(Put, test_scalar_indices_values_mode) +{ + std::vector mode_list{"wrap", "clip"}; + std::vector> values_list{{10}, {10, 20}}; + std::vector> indices_list{{100}, {-100}}; + + std::vector shape{3, 4, 5}; + auto x_in = mk_seq_vector(shape); + + for (auto indices : indices_list) { + for (auto values : values_list) { + for (auto mode : mode_list) { + auto x = mk_array(x_in, shape); + auto v = mk_array(values); + auto ind = mk_array(indices); + put(x, ind, v, mode); + auto x_gt = put_result(x_in, indices, values, mode); + check_array(x, x_gt, shape); + } + } + } +} + +TEST(Put, test_scalar_arr) +{ + std::vector, std::vector>> values_list{ + {{10}, {}}, {{10}, {1}}, {{10, 20}, {}}}; + std::vector, std::vector>> indices_list{ + {{0}, {}}, {{0}, {1}}, {{-1}, {}}, {{-1}, {1}}}; + std::vector x_in{0}; + for (auto [indices, shape_ind] : indices_list) { + for (auto [values, shape_val] : values_list) { + auto x = mk_array(x_in); + auto v = mk_array(values, shape_val); + auto ind = mk_array(indices, shape_ind); + put(x, ind, v); + auto x_gt = put_result(x_in, indices, values); + check_array(x, x_gt); + } + } +} + +TEST(Put, test_scalar_arr_mode) +{ + std::vector mode_list{"wrap", "clip"}; + std::vector> indices_list{{-1}, {1}, {-1, 0}, {-1, 0, 1, 2}}; + std::vector values{10}; + std::vector x_in{0}; + + for (auto indices : indices_list) { + for (auto mode : mode_list) { + auto x = mk_array(x_in); + auto v = mk_array(values); + auto ind = mk_array(indices); + put(x, ind, v, mode); + auto x_gt = put_result(x_in, indices, values, mode); + check_array(x, x_gt); + } + } +} + +TEST(Put, test_indices_type_convert) +{ + std::vector shape{3, 4, 5}; + auto x_in = mk_seq_vector(shape); + auto values = mk_seq_vector({6}, 10); + std::vector indices{-2, 2}; + auto x = mk_array(x_in); + auto v = mk_array(values); + auto ind = mk_array(indices); + put(x, ind, v); + auto x_gt = put_result(x_in, indices, values); + check_array(x, x_gt); +} + +TEST(Put, test_indices_array_and_shape_array) +{ + std::vector, std::vector>> INDICES_VALUES_SHAPE{ + {{0}, {1}}, + {{2}, {0}}, + {{2}, {1}}, + {{2}, {2}}, + {{2}, {3}}, + {{2}, {2, 1}}, + {{2}, {3, 2}}, + {{2, 2}, {1}}, + {{2, 2}, {4}}, + {{2, 2}, {5}}, + {{2, 2}, {2, 1}}, + {{2, 2}, {2, 2}}, + {{2, 2}, {3, 3}}, + }; + std::vector> shape_list{{2, 3, 4}, {6}}; + + for (auto shape : shape_list) { + for (auto [shape_ind, shape_val] : INDICES_VALUES_SHAPE) { + auto x_in = mk_seq_vector(shape); + auto indices = mk_seq_vector(shape_ind); + auto values = mk_seq_vector(shape_val, 10); + auto x = mk_array(x_in, shape); + auto v = mk_array(values, shape_val); + auto ind = mk_array(indices, shape_ind); + put(x, ind, v); + auto x_gt = put_result(x_in, indices, values); + check_array(x, x_gt, shape); + } + } +} + +TEST(Put, test_ndim_default_mode) +{ + std::vector shape, shape_ind, shape_val; + + for (int ndim = 1; ndim <= LEGATE_MAX_DIM; ++ndim) { + shape.push_back(5); + shape_ind.push_back(3); + shape_val.push_back(2); + auto x_in = mk_seq_vector(shape); + auto indices = mk_seq_vector(shape_ind); + auto values = mk_seq_vector(shape_val, 10); + auto x = mk_array(x_in, shape); + auto v = mk_array(values, shape_val); + auto ind = mk_array(indices, shape_ind); + put(x, ind, v); + auto x_gt = put_result(x_in, indices, values); + check_array(x, x_gt, shape); + } +} + +TEST(Put, test_ndim_mode) +{ + std::vector mode_list{"wrap", "clip"}; + std::vector, std::vector>> INDICES = { + {{1, 2, 3.2, 100}, {}}, {{2, 1, 3, 100}, {2, 2}}, {{1}, {1}}, {{100}, {1}}}; + + std::vector shape, shape_val; + for (int ndim = 1; ndim <= LEGATE_MAX_DIM; ++ndim) { + shape.push_back(5); + shape_val.push_back(2); + auto x_in = mk_seq_vector(shape); + auto values = mk_seq_vector(shape_val, 10); + for (auto [indices, shape_ind] : INDICES) { + for (auto mode : mode_list) { + auto x = mk_array(x_in, shape); + auto v = mk_array(values, shape_val); + auto ind = mk_array(indices, shape_ind); + put(x, ind, v, mode); + auto x_gt = put_result(x_in, indices, values, mode); + check_array(x, x_gt, shape); + } + } + } +} + +TEST(Put, test_empty_array) +{ + auto x = mk_array({}, {0}); + auto values = mk_array({10}); + auto indices = mk_array({}, {0}); + put(x, indices, values); + check_array(x, {}, {0}); +} + +TEST(Put, test_indices_out_of_bound) +{ + std::vector> indices_list{{-13}, {12}, {0, 1, 12}}; + std::vector shape{3, 4}; + auto x_in = mk_seq_vector(shape); + auto x = mk_array(x_in, shape); + auto v = mk_array({10}); + for (auto indices : indices_list) { + auto ind = mk_array(indices); + EXPECT_ANY_THROW(put(x, ind, v)); + EXPECT_ANY_THROW(put(x, ind, v, "raise")); + } +} + +TEST(Put, test_indices_out_of_bound_arr_is_scalar) +{ + std::vector, std::vector>> indices_list = { + {{-2}, {}}, {{1}, {}}, {{1}, {1}}}; + auto x = mk_array({0}); + auto v = mk_array({10}); + for (auto [indices, shape_ind] : indices_list) { + auto ind = mk_array(indices, shape_ind); + EXPECT_ANY_THROW(put(x, ind, v)); + EXPECT_ANY_THROW(put(x, ind, v, "raise")); + } +} + +TEST(Put, test_invalid_mode) +{ + std::string mode = "unknown"; + std::vector shape{3, 4}; + auto x_in = mk_seq_vector(shape); + auto x = mk_array(x_in, shape); + auto ind = mk_array({0}); + auto v = mk_array({10}); + EXPECT_THROW(put(x, ind, v, mode), std::invalid_argument); +} + +} // namespace diff --git a/tests/cpp/integration/test_repartition.cc b/tests/cpp/integration/test_repartition.cc new file mode 100644 index 000000000..8d0011cee --- /dev/null +++ b/tests/cpp/integration/test_repartition.cc @@ -0,0 +1,453 @@ +/* Copyright 2024 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +#include +#include +#include +#include "legate.h" +#include "cunumeric.h" +#include "util.inl" +#include "cunumeric/utilities/repartition.h" + +namespace repartition_test { + +constexpr const char* library_name = "test_repartition"; + +constexpr bool debug = false; + +enum TaskIDs { + CHECK_REPARTITION_TASK = 0, +}; + +template +struct CheckRepartitionTask + : public legate::LegateTask> { + static const std::int32_t TASK_ID = CHECK_REPARTITION_TASK + I_ROW_MAJOR * 2 + O_ROW_MAJOR; + static void gpu_variant(legate::TaskContext context); +}; + +class RepartitionLayoutMapper : public legate::mapping::Mapper { + void set_machine(const legate::mapping::MachineQueryInterface* /*machine*/) override {} + legate::mapping::TaskTarget task_target( + const legate::mapping::Task& /*task*/, + const std::vector& options) override + { + return options.front(); + } + std::vector store_mappings( + const legate::mapping::Task& task, + const std::vector& options) override + { + auto task_id = task.task_id(); + bool out_row_major = task_id % 2 == 1; + bool in_row_major = task_id > 1; + + std::vector mappings; + auto inputs = task.inputs(); + auto outputs = task.outputs(); + for (auto& input : inputs) { + mappings.push_back(legate::mapping::StoreMapping::default_mapping( + input.data(), options.front(), true /*exact*/)); + if (in_row_major) { + mappings.back().policy().ordering.set_c_order(); + } else { + mappings.back().policy().ordering.set_fortran_order(); + } + } + for (auto& output : outputs) { + mappings.push_back(legate::mapping::StoreMapping::default_mapping( + output.data(), options.front(), true /*exact*/)); + if (out_row_major) { + mappings.back().policy().ordering.set_c_order(); + } else { + mappings.back().policy().ordering.set_fortran_order(); + } + } + return mappings; + } + legate::Scalar tunable_value(legate::TunableID /*tunable_id*/) override + { + return legate::Scalar{}; + } +}; + +int get_rank_row_major(legate::Domain domain, legate::DomainPoint index_point) +{ + int domain_index = 0; + auto hi = domain.hi(); + auto lo = domain.lo(); + for (int i = 0; i < domain.get_dim(); ++i) { + if (i > 0) { + domain_index *= hi[i] - lo[i] + 1; + } + domain_index += index_point[i]; + } + return domain_index; +} + +void repartition_2dbc_test(legate::AccessorRO input, + legate::Rect<2> in_rect, + bool in_row_major, + legate::AccessorWO output, + legate::Rect<2> out_rect, + bool out_row_major, + int32_t proc_r, + int32_t proc_c, + int32_t tile_r, + int32_t tile_c, + int32_t local_rank, + legate::comm::Communicator comm) +{ + const int32_t* input_ptr = input.ptr(in_rect.lo); + size_t input_volume = in_rect.volume(); + size_t input_offset_r = in_rect.lo[0]; + size_t input_offset_c = in_rect.lo[1]; + size_t input_lld = + in_rect.empty() ? 1 : (in_rect.hi[in_row_major ? 1 : 0] - in_rect.lo[in_row_major ? 1 : 0] + 1); + + auto [buffer_2dbc, volume_2dbc, lld_2dbc] = cunumeric::repartition_matrix_2dbc(input_ptr, + input_volume, + in_row_major, + input_offset_r, + input_offset_c, + input_lld, + proc_r, + proc_c, + tile_r, + tile_c, + comm); + + int32_t* output_ptr = output.ptr(out_rect.lo); + size_t output_volume = out_rect.volume(); + size_t output_offset_r = out_rect.lo[0]; + size_t output_offset_c = out_rect.lo[1]; + size_t num_rows = out_rect.hi[0] < out_rect.lo[0] ? 0 : out_rect.hi[0] - out_rect.lo[0] + 1; + size_t num_cols = out_rect.hi[1] < out_rect.lo[1] ? 0 : out_rect.hi[1] - out_rect.lo[1] + 1; + size_t output_lld = out_rect.empty() ? 1 : (out_row_major ? num_cols : num_rows); + + if (debug) { + std::ostringstream stringStream; + stringStream << "DEBUG: volume_2dbc = " << volume_2dbc << ", lld_2dbc = " << lld_2dbc + << ", in_row_major = " << in_row_major << ", out_row_major = " << out_row_major + << ", num_rows = " << num_rows << ", num_cols = " << num_cols + << ", output_offset_r = " << output_offset_r + << ", output_offset_c = " << output_offset_c << ", output_lld = " << output_lld + << ", rank = " << local_rank << std::endl; + std::cerr << stringStream.str(); + } + + cunumeric::repartition_matrix_block(buffer_2dbc, + volume_2dbc, + lld_2dbc, + local_rank, + proc_r, + proc_c, + tile_r, + tile_c, + output_ptr, + output_volume, + output_lld, + num_rows, + num_cols, + out_row_major, + output_offset_r, + output_offset_c, + comm); +} + +void register_tasks() +{ + static bool prepared = false; + if (prepared) { + return; + } + prepared = true; + auto runtime = legate::Runtime::get_runtime(); + auto library = runtime->create_library( + library_name, legate::ResourceConfig{}, std::make_unique()); + + CheckRepartitionTask::register_variants(library); + CheckRepartitionTask::register_variants(library); + CheckRepartitionTask::register_variants(library); + CheckRepartitionTask::register_variants(library); +} + +template +/*static*/ void CheckRepartitionTask::gpu_variant( + legate::TaskContext context) +{ + auto input = context.input(0); + auto output = context.output(0); + auto shape_in = input.shape<2>(); + auto shape_out = output.shape<2>(); + + size_t tile_r = context.scalar(0).value(); + size_t tile_c = context.scalar(1).value(); + + auto total_ranks = context.get_launch_domain().get_volume(); + auto local_rank = get_rank_row_major(context.get_launch_domain(), context.get_task_index()); + + if (total_ranks == 1) { + std::cerr << "Error: aborting due to single task launch. Ensure LEGATE_TEST=1 to force " + "parallel execution for small test dimensions." + << std::endl; + return; + } + + int32_t pr = total_ranks; + int32_t pc = 1; + while (pc * 2 <= pr && pr % 2 == 0) { + pr /= 2; + pc *= 2; + } + + auto input_acc = input.data().read_accessor(shape_in); + auto output_acc = output.data().write_accessor(shape_out); + + bool in_row_major = shape_in.empty() || input_acc.accessor.is_dense_row_major(shape_in); + bool in_col_major = shape_in.empty() || input_acc.accessor.is_dense_col_major(shape_in); + bool out_row_major = shape_out.empty() || output_acc.accessor.is_dense_row_major(shape_out); + bool out_col_major = shape_out.empty() || output_acc.accessor.is_dense_col_major(shape_out); + + if (debug) { + std::ostringstream stringStream; + stringStream << "DEBUG: Domain = " << context.get_launch_domain() + << ", index = " << context.get_task_index() << ", I_ROW_MAJOR = " << I_ROW_MAJOR + << ", O_ROW_MAJOR = " << O_ROW_MAJOR << ", shape_in = " << shape_in + << "(order=" << in_row_major << "," << in_col_major << ")" + << ", shape_out = " << shape_out << "(order=" << out_row_major << ", " + << out_col_major << ")" + << ", communicators = " << context.num_communicators() << ", rank = " << local_rank + << ", tile = (" << tile_r << "," << tile_c << ")" + << ", procs_2dbc = (" << pr << "," << pc << ")" << std::endl; + std::cerr << stringStream.str(); + } + + EXPECT_EQ(true, I_ROW_MAJOR ? in_row_major : in_col_major); + EXPECT_EQ(true, O_ROW_MAJOR ? out_row_major : out_col_major); + + repartition_2dbc_test(input_acc, + shape_in, + I_ROW_MAJOR, + output_acc, + shape_out, + O_ROW_MAJOR, + pr, + pc, + tile_r, + tile_c, + local_rank, + context.communicator(0)); +} + +template +void run_test_aligned_default_launch(std::vector& data_shape, + std::vector& tile_shape) +{ + auto runtime = legate::Runtime::get_runtime(); + auto library = runtime->find_library(library_name); + auto machine = runtime->get_machine(); + auto num_gpus = machine.count(legate::mapping::TaskTarget::GPU); + if (num_gpus < 2) { + GTEST_SKIP(); + } + + // generate data + size_t volume = data_shape[0] * data_shape[1]; + auto data_input = cunumeric::zeros(data_shape, legate::int32()); + auto data_output = cunumeric::zeros(data_shape, legate::int32()); + if (volume != 0) { + if (volume == 1) { + data_input.fill(legate::Scalar(0)); + } else { + std::vector numbers(volume); + std::iota(numbers.data(), numbers.data() + volume, 0); + assign_values_to_array(data_input, numbers.data(), numbers.size()); + } + } + + // start custom test-task with aligned in/out + auto task = runtime->create_task(library, CHECK_REPARTITION_TASK + I_ROW_MAJOR * 2 + O_ROW_MAJOR); + auto part_in = task.add_input(data_input.get_store()); + auto part_out = task.add_output(data_output.get_store()); + task.add_scalar_arg(legate::Scalar{tile_shape[0]}); + task.add_scalar_arg(legate::Scalar{tile_shape[1]}); + task.add_constraint(legate::align(part_in, part_out)); + task.add_communicator("nccl"); + runtime->submit(std::move(task)); + + check_array_eq(data_input, data_output); +} + +void run_tests_with_shape(std::vector& data_shape, std::vector& tile_shape) +{ + auto machine = legate::Runtime::get_runtime()->get_machine(); + auto num_gpus = machine.count(legate::mapping::TaskTarget::GPU); + if (num_gpus < 2) { + GTEST_SKIP(); + } + + run_test_aligned_default_launch(data_shape, tile_shape); + run_test_aligned_default_launch(data_shape, tile_shape); + run_test_aligned_default_launch(data_shape, tile_shape); + run_test_aligned_default_launch(data_shape, tile_shape); +} + +std::vector> NICE_SHAPES = {{64, 64}, {64, 32}, {256, 256}, {512, 1}}; +std::vector> NICE_TILESIZE = {{4, 4}, {32, 32}, {64, 64}, {256, 256}}; + +TEST(Repartition, NiceValues_C_C) +{ + register_tasks(); + + for (size_t shape_idx = 0; shape_idx < NICE_SHAPES.size(); ++shape_idx) { + for (size_t tile_idx = 0; tile_idx < NICE_TILESIZE.size(); ++tile_idx) { + run_test_aligned_default_launch(NICE_SHAPES[shape_idx], NICE_TILESIZE[tile_idx]); + } + } +} + +TEST(Repartition, NiceValues_F_F) +{ + register_tasks(); + + for (size_t shape_idx = 0; shape_idx < NICE_SHAPES.size(); ++shape_idx) { + for (size_t tile_idx = 0; tile_idx < NICE_TILESIZE.size(); ++tile_idx) { + run_test_aligned_default_launch(NICE_SHAPES[shape_idx], + NICE_TILESIZE[tile_idx]); + } + } +} + +TEST(Repartition, NiceValues_C_F) +{ + register_tasks(); + + for (size_t shape_idx = 0; shape_idx < NICE_SHAPES.size(); ++shape_idx) { + for (size_t tile_idx = 0; tile_idx < NICE_TILESIZE.size(); ++tile_idx) { + run_test_aligned_default_launch(NICE_SHAPES[shape_idx], NICE_TILESIZE[tile_idx]); + } + } +} + +TEST(Repartition, NiceValues_F_C) +{ + register_tasks(); + + for (size_t shape_idx = 0; shape_idx < NICE_SHAPES.size(); ++shape_idx) { + for (size_t tile_idx = 0; tile_idx < NICE_TILESIZE.size(); ++tile_idx) { + run_test_aligned_default_launch(NICE_SHAPES[shape_idx], NICE_TILESIZE[tile_idx]); + } + } +} + +std::vector> ODD_SHAPES = { + {120, 257}, {148, 12}, {12, 2325}, {1112, 31}, {256, 256}, {12, 1}}; + +std::vector> ODD_TILESIZE = { + {2, 2}, {64, 32}, {255, 256}, {16, 5}, {1, 1}, {4, 4}}; + +TEST(Repartition, OddValues_C_C) +{ + register_tasks(); + for (size_t shape_idx = 0; shape_idx < ODD_SHAPES.size(); ++shape_idx) { + for (size_t tile_idx = 0; tile_idx < ODD_TILESIZE.size(); ++tile_idx) { + run_test_aligned_default_launch(ODD_SHAPES[shape_idx], ODD_TILESIZE[tile_idx]); + } + } +} + +TEST(Repartition, OddValues_F_F) +{ + register_tasks(); + for (size_t shape_idx = 0; shape_idx < ODD_SHAPES.size(); ++shape_idx) { + for (size_t tile_idx = 0; tile_idx < ODD_TILESIZE.size(); ++tile_idx) { + run_test_aligned_default_launch(ODD_SHAPES[shape_idx], ODD_TILESIZE[tile_idx]); + } + } +} + +TEST(Repartition, OddValues_C_F) +{ + register_tasks(); + for (size_t shape_idx = 0; shape_idx < ODD_SHAPES.size(); ++shape_idx) { + for (size_t tile_idx = 0; tile_idx < ODD_TILESIZE.size(); ++tile_idx) { + run_test_aligned_default_launch(ODD_SHAPES[shape_idx], ODD_TILESIZE[tile_idx]); + } + } +} + +TEST(Repartition, OddValues_F_C) +{ + register_tasks(); + for (size_t shape_idx = 0; shape_idx < ODD_SHAPES.size(); ++shape_idx) { + for (size_t tile_idx = 0; tile_idx < ODD_TILESIZE.size(); ++tile_idx) { + run_test_aligned_default_launch(ODD_SHAPES[shape_idx], ODD_TILESIZE[tile_idx]); + } + } +} + +std::vector> STRANGE_SHAPES = { + {120, 257}, {148, 12}, {12, 2325}, {1112, 31}, {256, 256}, {12, 1}}; + +std::vector> STRANGE_TILESIZE = { + {2, 2}, {64, 32}, {255, 256}, {16, 5}, {1, 1}, {4, 4}}; + +TEST(Repartition, StrangeValues_C_C) +{ + register_tasks(); + for (size_t shape_idx = 0; shape_idx < STRANGE_SHAPES.size(); ++shape_idx) { + for (size_t tile_idx = 0; tile_idx < STRANGE_TILESIZE.size(); ++tile_idx) { + run_test_aligned_default_launch(STRANGE_SHAPES[shape_idx], + STRANGE_TILESIZE[tile_idx]); + } + } +} + +TEST(Repartition, StrangeValues_F_F) +{ + register_tasks(); + for (size_t shape_idx = 0; shape_idx < STRANGE_SHAPES.size(); ++shape_idx) { + for (size_t tile_idx = 0; tile_idx < STRANGE_TILESIZE.size(); ++tile_idx) { + run_test_aligned_default_launch(STRANGE_SHAPES[shape_idx], + STRANGE_TILESIZE[tile_idx]); + } + } +} + +TEST(Repartition, StrangeValues_C_F) +{ + register_tasks(); + for (size_t shape_idx = 0; shape_idx < STRANGE_SHAPES.size(); ++shape_idx) { + for (size_t tile_idx = 0; tile_idx < STRANGE_TILESIZE.size(); ++tile_idx) { + run_test_aligned_default_launch(STRANGE_SHAPES[shape_idx], + STRANGE_TILESIZE[tile_idx]); + } + } +} + +TEST(Repartition, StrangeValues_F_C) +{ + register_tasks(); + for (size_t shape_idx = 0; shape_idx < STRANGE_SHAPES.size(); ++shape_idx) { + for (size_t tile_idx = 0; tile_idx < STRANGE_TILESIZE.size(); ++tile_idx) { + run_test_aligned_default_launch(STRANGE_SHAPES[shape_idx], + STRANGE_TILESIZE[tile_idx]); + } + } +} + +} // namespace repartition_test diff --git a/tests/cpp/integration/test_sort.cc b/tests/cpp/integration/test_sort.cc new file mode 100644 index 000000000..0744c22b1 --- /dev/null +++ b/tests/cpp/integration/test_sort.cc @@ -0,0 +1,632 @@ +/* Copyright 2024 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +#include +#include +#include + +#include +#include "legate.h" +#include "cunumeric.h" +#include "util.inl" + +auto get_expect_result_int() +{ + std::vector>> expect_result = { + {{0, {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12}}}, + {{-1, {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12}}, + {0, {10, 3, 12, 5, 2, 4, 8, 9, 7, 6, 11, 1}}, + {1, {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12}}}, + {{-1, {10, 3, 12, 5, 2, 4, 8, 9, 7, 6, 11, 1}}, + {0, {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12}}, + {1, {10, 3, 12, 5, 2, 4, 8, 9, 7, 6, 11, 1}}}, + {{-1, {3, 5, 10, 12, 2, 4, 8, 9, 1, 6, 7, 11}}, + {0, {2, 3, 8, 1, 7, 4, 11, 5, 10, 6, 12, 9}}, + {1, {3, 5, 10, 12, 2, 4, 8, 9, 1, 6, 7, 11}}}, + {{-2, {10, 3, 12, 5, 2, 4, 8, 9, 7, 6, 11, 1}}, + {-1, {10, 3, 12, 5, 2, 4, 8, 9, 7, 6, 11, 1}}, + {0, {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12}}, + {1, {10, 3, 12, 5, 2, 4, 8, 9, 7, 6, 11, 1}}, + {2, {10, 3, 12, 5, 2, 4, 8, 9, 7, 6, 11, 1}}}, + {{-2, {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12}}, + {-1, {10, 3, 12, 5, 2, 4, 8, 9, 7, 6, 11, 1}}, + {0, {10, 3, 12, 5, 2, 4, 8, 9, 7, 6, 11, 1}}, + {1, {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12}}, + {2, {10, 3, 12, 5, 2, 4, 8, 9, 7, 6, 11, 1}}}, + {{-2, {10, 3, 12, 5, 2, 4, 8, 9, 7, 6, 11, 1}}, + {-1, {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12}}, + {0, {10, 3, 12, 5, 2, 4, 8, 9, 7, 6, 11, 1}}, + {1, {10, 3, 12, 5, 2, 4, 8, 9, 7, 6, 11, 1}}, + {2, {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12}}}, + {{-2, {5, 2, 4, 10, 3, 12, 6, 9, 1, 8, 11, 7}}, + {-1, {3, 10, 12, 2, 4, 5, 7, 8, 9, 1, 6, 11}}, + {0, {8, 3, 7, 5, 2, 1, 10, 9, 12, 6, 11, 4}}, + {1, {5, 2, 4, 10, 3, 12, 6, 9, 1, 8, 11, 7}}, + {2, {3, 10, 12, 2, 4, 5, 7, 8, 9, 1, 6, 11}}}}; + return expect_result; +} + +auto get_expect_result_double() +{ + std::vector>> expect_result = { + {{0, {1.5, 2.2, 3.66, 4, 5.98, 6, 7.9, 8, 9, 10.5, 11, 12}}}, + {{-1, {1.5, 2.2, 3.66, 4, 5.98, 6, 7.9, 8, 9, 10.5, 11, 12}}, + {0, {1.5, 3.66, 6, 5.98, 2.2, 10.5, 8, 11, 7.9, 12, 9, 4}}, + {1, {1.5, 2.2, 3.66, 4, 5.98, 6, 7.9, 8, 9, 10.5, 11, 12}}}, + {{-1, {1.5, 3.66, 6, 5.98, 2.2, 10.5, 8, 11, 7.9, 12, 9, 4}}, + {0, {1.5, 2.2, 3.66, 4, 5.98, 6, 7.9, 8, 9, 10.5, 11, 12}}, + {1, {1.5, 3.66, 6, 5.98, 2.2, 10.5, 8, 11, 7.9, 12, 9, 4}}}, + {{-1, {1.5, 3.66, 5.98, 6, 2.2, 8, 10.5, 11, 4, 7.9, 9, 12}}, + {0, {1.5, 3.66, 6, 4, 2.2, 10.5, 8, 5.98, 7.9, 12, 9, 11}}, + {1, {1.5, 3.66, 5.98, 6, 2.2, 8, 10.5, 11, 4, 7.9, 9, 12}}}, + {{-2, {1.5, 3.66, 6, 5.98, 2.2, 10.5, 8, 11, 7.9, 12, 9, 4}}, + {-1, {1.5, 3.66, 6, 5.98, 2.2, 10.5, 8, 11, 7.9, 12, 9, 4}}, + {0, {1.5, 2.2, 3.66, 4, 5.98, 6, 7.9, 8, 9, 10.5, 11, 12}}, + {1, {1.5, 3.66, 6, 5.98, 2.2, 10.5, 8, 11, 7.9, 12, 9, 4}}, + {2, {1.5, 3.66, 6, 5.98, 2.2, 10.5, 8, 11, 7.9, 12, 9, 4}}}, + {{-2, {1.5, 2.2, 3.66, 4, 5.98, 6, 7.9, 8, 9, 10.5, 11, 12}}, + {-1, {1.5, 3.66, 6, 5.98, 2.2, 10.5, 8, 11, 7.9, 12, 9, 4}}, + {0, {1.5, 3.66, 6, 5.98, 2.2, 10.5, 8, 11, 7.9, 12, 9, 4}}, + {1, {1.5, 2.2, 3.66, 4, 5.98, 6, 7.9, 8, 9, 10.5, 11, 12}}, + {2, {1.5, 3.66, 6, 5.98, 2.2, 10.5, 8, 11, 7.9, 12, 9, 4}}}, + {{-2, {1.5, 3.66, 6, 5.98, 2.2, 10.5, 8, 11, 7.9, 12, 9, 4}}, + {-1, {1.5, 2.2, 3.66, 4, 5.98, 6, 7.9, 8, 9, 10.5, 11, 12}}, + {0, {1.5, 3.66, 6, 5.98, 2.2, 10.5, 8, 11, 7.9, 12, 9, 4}}, + {1, {1.5, 3.66, 6, 5.98, 2.2, 10.5, 8, 11, 7.9, 12, 9, 4}}, + {2, {1.5, 2.2, 3.66, 4, 5.98, 6, 7.9, 8, 9, 10.5, 11, 12}}}, + {{-2, {1.5, 2.2, 6, 5.98, 3.66, 10.5, 8, 9, 4, 12, 11, 7.9}}, + {-1, {1.5, 3.66, 6, 2.2, 5.98, 10.5, 7.9, 8, 11, 4, 9, 12}}, + {0, {1.5, 3.66, 6, 5.98, 2.2, 4, 8, 11, 7.9, 12, 9, 10.5}}, + {1, {1.5, 2.2, 6, 5.98, 3.66, 10.5, 8, 9, 4, 12, 11, 7.9}}, + {2, {1.5, 3.66, 6, 2.2, 5.98, 10.5, 7.9, 8, 11, 4, 9, 12}}}}; + return expect_result; +} + +auto get_expect_result_complex() +{ + std::vector, 12>>> expect_result = { + {{0, + {complex(1.5, 3.66), + complex(2, 4), + complex(2.2, 10.5), + complex(6, 4), + complex(6, 5.98), + complex(7, 6), + complex(7.9, 12), + complex(8, 9), + complex(8, 11), + complex(10, 3), + complex(11, 1), + complex(12, 5)}}}, + {{-1, + {complex(1.5, 3.66), + complex(2, 4), + complex(2.2, 10.5), + complex(6, 4), + complex(6, 5.98), + complex(7, 6), + complex(7.9, 12), + complex(8, 9), + complex(8, 11), + complex(10, 3), + complex(11, 1), + complex(12, 5)}}, + {0, + {complex(10, 3), + complex(12, 5), + complex(2, 4), + complex(8, 9), + complex(7, 6), + complex(11, 1), + complex(1.5, 3.66), + complex(6, 5.98), + complex(2.2, 10.5), + complex(8, 11), + complex(7.9, 12), + complex(6, 4)}}, + {1, + {complex(1.5, 3.66), + complex(2, 4), + complex(2.2, 10.5), + complex(6, 4), + complex(6, 5.98), + complex(7, 6), + complex(7.9, 12), + complex(8, 9), + complex(8, 11), + complex(10, 3), + complex(11, 1), + complex(12, 5)}}}, + {{-1, + {complex(10, 3), + complex(12, 5), + complex(2, 4), + complex(8, 9), + complex(7, 6), + complex(11, 1), + complex(1.5, 3.66), + complex(6, 5.98), + complex(2.2, 10.5), + complex(8, 11), + complex(7.9, 12), + complex(6, 4)}}, + {0, + {complex(1.5, 3.66), + complex(2, 4), + complex(2.2, 10.5), + complex(6, 4), + complex(6, 5.98), + complex(7, 6), + complex(7.9, 12), + complex(8, 9), + complex(8, 11), + complex(10, 3), + complex(11, 1), + complex(12, 5)}}, + {1, + {complex(10, 3), + complex(12, 5), + complex(2, 4), + complex(8, 9), + complex(7, 6), + complex(11, 1), + complex(1.5, 3.66), + complex(6, 5.98), + complex(2.2, 10.5), + complex(8, 11), + complex(7.9, 12), + complex(6, 4)}}}, + {{-1, + {complex(2, 4), + complex(8, 9), + complex(10, 3), + complex(12, 5), + complex(1.5, 3.66), + complex(6, 5.98), + complex(7, 6), + complex(11, 1), + complex(2.2, 10.5), + complex(6, 4), + complex(7.9, 12), + complex(8, 11)}}, + {0, + {complex(2.2, 10.5), + complex(8, 11), + complex(1.5, 3.66), + complex(6, 4), + complex(7, 6), + complex(11, 1), + complex(2, 4), + complex(6, 5.98), + complex(10, 3), + complex(12, 5), + complex(7.9, 12), + complex(8, 9)}}, + {1, + {complex(2, 4), + complex(8, 9), + complex(10, 3), + complex(12, 5), + complex(1.5, 3.66), + complex(6, 5.98), + complex(7, 6), + complex(11, 1), + complex(2.2, 10.5), + complex(6, 4), + complex(7.9, 12), + complex(8, 11)}}}, + {{-2, + {complex(10, 3), + complex(12, 5), + complex(2, 4), + complex(8, 9), + complex(7, 6), + complex(11, 1), + complex(1.5, 3.66), + complex(6, 5.98), + complex(2.2, 10.5), + complex(8, 11), + complex(7.9, 12), + complex(6, 4)}}, + {-1, + {complex(10, 3), + complex(12, 5), + complex(2, 4), + complex(8, 9), + complex(7, 6), + complex(11, 1), + complex(1.5, 3.66), + complex(6, 5.98), + complex(2.2, 10.5), + complex(8, 11), + complex(7.9, 12), + complex(6, 4)}}, + {0, + {complex(1.5, 3.66), + complex(2, 4), + complex(2.2, 10.5), + complex(6, 4), + complex(6, 5.98), + complex(7, 6), + complex(7.9, 12), + complex(8, 9), + complex(8, 11), + complex(10, 3), + complex(11, 1), + complex(12, 5)}}, + {1, + {complex(10, 3), + complex(12, 5), + complex(2, 4), + complex(8, 9), + complex(7, 6), + complex(11, 1), + complex(1.5, 3.66), + complex(6, 5.98), + complex(2.2, 10.5), + complex(8, 11), + complex(7.9, 12), + complex(6, 4)}}, + {2, + {complex(10, 3), + complex(12, 5), + complex(2, 4), + complex(8, 9), + complex(7, 6), + complex(11, 1), + complex(1.5, 3.66), + complex(6, 5.98), + complex(2.2, 10.5), + complex(8, 11), + complex(7.9, 12), + complex(6, 4)}}}, + {{-2, + {complex(1.5, 3.66), + complex(2, 4), + complex(2.2, 10.5), + complex(6, 4), + complex(6, 5.98), + complex(7, 6), + complex(7.9, 12), + complex(8, 9), + complex(8, 11), + complex(10, 3), + complex(11, 1), + complex(12, 5)}}, + {-1, + {complex(10, 3), + complex(12, 5), + complex(2, 4), + complex(8, 9), + complex(7, 6), + complex(11, 1), + complex(1.5, 3.66), + complex(6, 5.98), + complex(2.2, 10.5), + complex(8, 11), + complex(7.9, 12), + complex(6, 4)}}, + {0, + {complex(10, 3), + complex(12, 5), + complex(2, 4), + complex(8, 9), + complex(7, 6), + complex(11, 1), + complex(1.5, 3.66), + complex(6, 5.98), + complex(2.2, 10.5), + complex(8, 11), + complex(7.9, 12), + complex(6, 4)}}, + {1, + {complex(1.5, 3.66), + complex(2, 4), + complex(2.2, 10.5), + complex(6, 4), + complex(6, 5.98), + complex(7, 6), + complex(7.9, 12), + complex(8, 9), + complex(8, 11), + complex(10, 3), + complex(11, 1), + complex(12, 5)}}, + {2, + {complex(10, 3), + complex(12, 5), + complex(2, 4), + complex(8, 9), + complex(7, 6), + complex(11, 1), + complex(1.5, 3.66), + complex(6, 5.98), + complex(2.2, 10.5), + complex(8, 11), + complex(7.9, 12), + complex(6, 4)}}}, + {{-2, + {complex(10, 3), + complex(12, 5), + complex(2, 4), + complex(8, 9), + complex(7, 6), + complex(11, 1), + complex(1.5, 3.66), + complex(6, 5.98), + complex(2.2, 10.5), + complex(8, 11), + complex(7.9, 12), + complex(6, 4)}}, + {-1, + {complex(1.5, 3.66), + complex(2, 4), + complex(2.2, 10.5), + complex(6, 4), + complex(6, 5.98), + complex(7, 6), + complex(7.9, 12), + complex(8, 9), + complex(8, 11), + complex(10, 3), + complex(11, 1), + complex(12, 5)}}, + {0, + {complex(10, 3), + complex(12, 5), + complex(2, 4), + complex(8, 9), + complex(7, 6), + complex(11, 1), + complex(1.5, 3.66), + complex(6, 5.98), + complex(2.2, 10.5), + complex(8, 11), + complex(7.9, 12), + complex(6, 4)}}, + {1, + {complex(10, 3), + complex(12, 5), + complex(2, 4), + complex(8, 9), + complex(7, 6), + complex(11, 1), + complex(1.5, 3.66), + complex(6, 5.98), + complex(2.2, 10.5), + complex(8, 11), + complex(7.9, 12), + complex(6, 4)}}, + {2, + {complex(1.5, 3.66), + complex(2, 4), + complex(2.2, 10.5), + complex(6, 4), + complex(6, 5.98), + complex(7, 6), + complex(7.9, 12), + complex(8, 9), + complex(8, 11), + complex(10, 3), + complex(11, 1), + complex(12, 5)}}}, + {{-2, + {complex(8, 9), + complex(7, 6), + complex(2, 4), + complex(10, 3), + complex(12, 5), + complex(11, 1), + complex(1.5, 3.66), + complex(6, 5.98), + complex(2.2, 10.5), + complex(8, 11), + complex(7.9, 12), + complex(6, 4)}}, + {-1, + {complex(2, 4), + complex(10, 3), + complex(12, 5), + complex(7, 6), + complex(8, 9), + complex(11, 1), + complex(1.5, 3.66), + complex(2.2, 10.5), + complex(6, 5.98), + complex(6, 4), + complex(7.9, 12), + complex(8, 11)}}, + {0, + {complex(1.5, 3.66), + complex(6, 5.98), + complex(2, 4), + complex(8, 9), + complex(7, 6), + complex(6, 4), + complex(10, 3), + complex(12, 5), + complex(2.2, 10.5), + complex(8, 11), + complex(7.9, 12), + complex(11, 1)}}, + {1, + {complex(8, 9), + complex(7, 6), + complex(2, 4), + complex(10, 3), + complex(12, 5), + complex(11, 1), + complex(1.5, 3.66), + complex(6, 5.98), + complex(2.2, 10.5), + complex(8, 11), + complex(7.9, 12), + complex(6, 4)}}, + {2, + {complex(2, 4), + complex(10, 3), + complex(12, 5), + complex(7, 6), + complex(8, 9), + complex(11, 1), + complex(1.5, 3.66), + complex(2.2, 10.5), + complex(6, 5.98), + complex(6, 4), + complex(7.9, 12), + complex(8, 11)}}}}; + return expect_result; +} + +template +void test_sort(std::array& in_array, + std::array& expect, + legate::Type leg_type, + std::vector shape, + std::optional axis) +{ + auto A1 = cunumeric::zeros(shape, leg_type); + if (in_array.size() != 0) { + if (in_array.size() == 1) { + A1.fill(legate::Scalar(in_array[0])); + } else { + assign_values_to_array(A1, in_array.data(), in_array.size()); + } + } + std::vector algos = {"quicksort", "mergesort", "heapsort", "stable"}; + for (auto algo = algos.begin(); algo < algos.end(); ++algo) { + auto B1 = cunumeric::sort(A1, axis, *algo); + if (in_array.size() != 0) { + check_array_eq(B1, expect.data(), expect.size()); + } + } +} + +template +void sort_basic_axis_impl(std::vector>& test_shapes, + std::array in_array, + std::vector>>& expect_result, + legate::Type leg_type) +{ + size_t test_shape_size = test_shapes.size(); + for (size_t i = 0; i < test_shape_size; ++i) { + auto test_shape = test_shapes[i]; + int32_t dim = test_shape.size(); + for (int32_t axis = -dim + 1; axis < dim; ++axis) { + auto expect_val = expect_result[i][axis]; + if (dim == 1) { + test_sort(in_array, expect_val, leg_type, test_shape, axis); + } else if (dim == 2) { + test_sort(in_array, expect_val, leg_type, test_shape, axis); + } else { + test_sort(in_array, expect_val, leg_type, test_shape, axis); + } + } + } +} + +void sort_basic_axis() +{ + std::vector> test_shapes = { + {12}, {1, 12}, {12, 1}, {3, 4}, {12, 1, 1}, {1, 12, 1}, {1, 1, 12}, {2, 2, 3}}; + + // Test int type + std::array in_array1 = {10, 3, 12, 5, 2, 4, 8, 9, 7, 6, 11, 1}; + auto expect_result1 = get_expect_result_int(); + sort_basic_axis_impl(test_shapes, in_array1, expect_result1, legate::int32()); + + // Test float type + std::array in_array2 = {1.5, 3.66, 6, 5.98, 2.2, 10.5, 8, 11, 7.9, 12, 9, 4}; + auto expect_result2 = get_expect_result_double(); + sort_basic_axis_impl(test_shapes, in_array2, expect_result2, legate::float64()); + + // Test complex type + std::array, 12> in_array3 = {complex(10, 3), + complex(12, 5), + complex(2, 4), + complex(8, 9), + complex(7, 6), + complex(11, 1), + complex(1.5, 3.66), + complex(6, 5.98), + complex(2.2, 10.5), + complex(8, 11), + complex(7.9, 12), + complex(6, 4)}; + auto expect_result3 = get_expect_result_complex(); + sort_basic_axis_impl>(test_shapes, in_array3, expect_result3, legate::complex64()); +} + +void sort_empty_array() +{ + std::vector> test_shapes = { + {0}, {0, 1}, {1, 0}, {1, 0, 0}, {1, 1, 0}, {1, 0, 1}}; + + std::array in_array = {}; + size_t test_shape_size = test_shapes.size(); + for (size_t i = 0; i < test_shape_size; ++i) { + auto test_shape = test_shapes[i]; + int32_t dim = test_shape.size(); + for (int32_t axis = -dim + 1; axis < dim; ++axis) { + if (dim == 1) { + test_sort(in_array, in_array, legate::int32(), test_shape, axis); + } else if (dim == 2) { + test_sort(in_array, in_array, legate::int32(), test_shape, axis); + } else { + test_sort(in_array, in_array, legate::int32(), test_shape, axis); + } + } + } +} + +void sort_single_item_array() +{ + std::vector> test_shapes = {{1}, {1, 1}, {1, 1, 1}}; + + std::array in_array = {12}; + size_t test_shape_size = test_shapes.size(); + for (size_t i = 0; i < test_shape_size; ++i) { + auto test_shape = test_shapes[i]; + int32_t dim = test_shape.size(); + for (int32_t axis = -dim + 1; axis < dim; ++axis) { + if (dim == 1) { + test_sort(in_array, in_array, legate::int32(), test_shape, axis); + } else if (dim == 2) { + test_sort(in_array, in_array, legate::int32(), test_shape, axis); + } else { + test_sort(in_array, in_array, legate::int32(), test_shape, axis); + } + } + } +} + +void sort_negative_test() +{ + auto in_ar1 = cunumeric::zeros({2, 3}, legate::int32()); + + // Test invalid input sort axis + EXPECT_THROW(cunumeric::sort(in_ar1, 2, "quicksort"), std::invalid_argument); + EXPECT_THROW(cunumeric::sort(in_ar1, -3, "quicksort"), std::invalid_argument); + + // Test invalid input algorithm + EXPECT_THROW(cunumeric::sort(in_ar1, 0, "negative"), std::invalid_argument); +} + +// void cpp_test() +TEST(Sort, BasicAxis) { sort_basic_axis(); } +TEST(Sort, EmptyArray) { sort_empty_array(); } +TEST(Sort, SingleItemArray) { sort_single_item_array(); } +TEST(Sort, Negative) { sort_negative_test(); } diff --git a/tests/cpp/integration/test_sort_complex.cc b/tests/cpp/integration/test_sort_complex.cc new file mode 100644 index 000000000..60eab5ac1 --- /dev/null +++ b/tests/cpp/integration/test_sort_complex.cc @@ -0,0 +1,441 @@ +/* Copyright 2024 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +#include +#include +#include + +#include +#include "legate.h" +#include "cunumeric.h" +#include "util.inl" + +template +auto get_sort_complex_expect_result() +{ + std::vector, 12>> expect_result = {{complex(1.5, 3.66), + complex(2, 4), + complex(2.2, 10.5), + complex(6, 4), + complex(6, 5.98), + complex(7, 6), + complex(7.9, 12), + complex(8, 9), + complex(8, 11), + complex(10, 3), + complex(11, 1), + complex(12, 5)}, + {complex(1.5, 3.66), + complex(2, 4), + complex(2.2, 10.5), + complex(6, 4), + complex(6, 5.98), + complex(7, 6), + complex(7.9, 12), + complex(8, 9), + complex(8, 11), + complex(10, 3), + complex(11, 1), + complex(12, 5)}, + {complex(10, 3), + complex(12, 5), + complex(2, 4), + complex(8, 9), + complex(7, 6), + complex(11, 1), + complex(1.5, 3.66), + complex(6, 5.98), + complex(2.2, 10.5), + complex(8, 11), + complex(7.9, 12), + complex(6, 4)}, + {complex(2, 4), + complex(8, 9), + complex(10, 3), + complex(12, 5), + complex(1.5, 3.66), + complex(6, 5.98), + complex(7, 6), + complex(11, 1), + complex(2.2, 10.5), + complex(6, 4), + complex(7.9, 12), + complex(8, 11)}, + {complex(10, 3), + complex(12, 5), + complex(2, 4), + complex(8, 9), + complex(7, 6), + complex(11, 1), + complex(1.5, 3.66), + complex(6, 5.98), + complex(2.2, 10.5), + complex(8, 11), + complex(7.9, 12), + complex(6, 4)}, + {complex(10, 3), + complex(12, 5), + complex(2, 4), + complex(8, 9), + complex(7, 6), + complex(11, 1), + complex(1.5, 3.66), + complex(6, 5.98), + complex(2.2, 10.5), + complex(8, 11), + complex(7.9, 12), + complex(6, 4)}, + {complex(1.5, 3.66), + complex(2, 4), + complex(2.2, 10.5), + complex(6, 4), + complex(6, 5.98), + complex(7, 6), + complex(7.9, 12), + complex(8, 9), + complex(8, 11), + complex(10, 3), + complex(11, 1), + complex(12, 5)}, + {complex(2, 4), + complex(10, 3), + complex(12, 5), + complex(7, 6), + complex(8, 9), + complex(11, 1), + complex(1.5, 3.66), + complex(2.2, 10.5), + complex(6, 5.98), + complex(6, 4), + complex(7.9, 12), + complex(8, 11)}}; + return expect_result; +} + +template +auto change_int_to_complex(const std::vector>& input) +{ + std::vector, SIZE>> results; + for (size_t i = 0; i < input.size(); i++) { + std::array, SIZE> result; + for (size_t j = 0; j < input[i].size(); j++) { + result[j] = complex(input[i][j], 0); + } + results.push_back(result); + } + return results; +} + +template +auto get_sort_complex_expect_result_from_int() +{ + std::vector> expect_result = {{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12}, + {10, 3, 12, 5, 2, 4, 8, 9, 7, 6, 11, 1}, + {3, 5, 10, 12, 2, 4, 8, 9, 1, 6, 7, 11}, + {10, 3, 12, 5, 2, 4, 8, 9, 7, 6, 11, 1}, + {3, 10, 12, 2, 4, 5, 7, 8, 9, 1, 6, 11}}; + + return change_int_to_complex(expect_result); +} + +auto get_sort_complex_expect_result_4d() +{ + std::vector> expect_result = { + {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}, + {14, 10, 3, 12, 5, 13, 2, 4, 16, 8, 9, 7, 6, 11, 1, 15}, + {3, 10, 12, 14, 2, 4, 5, 13, 7, 8, 9, 16, 1, 6, 11, 15}}; + + return change_int_to_complex(expect_result); +} + +auto get_sort_complex_expect_result_5d() +{ + std::vector> expect_result = { + {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}, + {14, 10, 3, 12, 5, 13, 2, 4, 16, 8, 9, 7, 6, 11, 1, 15}, + {3, 10, 12, 14, 2, 4, 5, 13, 7, 8, 9, 16, 1, 6, 11, 15}}; + + return change_int_to_complex(expect_result); +} + +auto get_sort_complex_expect_result_6d() +{ + std::vector> expect_result = { + {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}, + {14, 10, 3, 12, 5, 13, 2, 4, 16, 8, 9, 7, 6, 11, 1, 15}, + {10, 14, 3, 12, 5, 13, 2, 4, 8, 16, 7, 9, 6, 11, 1, 15}}; + + return change_int_to_complex(expect_result); +} + +auto get_sort_complex_expect_result_7d() +{ + std::vector> expect_result = { + {14, 10, 3, 12, 5, 13, 2, 4, 16, 8, 9, 7, 6, 11, 1, 15}, + {14, 10, 3, 12, 5, 13, 2, 4, 16, 8, 9, 7, 6, 11, 1, 15}, + {10, 14, 3, 12, 5, 13, 2, 4, 8, 16, 7, 9, 6, 11, 1, 15}}; + + return change_int_to_complex(expect_result); +} + +template +void test_sort_complex(std::array& in_array, + std::array& expect, + legate::Type leg_type, + std::vector shape) +{ + auto A1 = cunumeric::zeros(shape, leg_type); + if (in_array.size() != 0) { + if (in_array.size() == 1) { + A1.fill(legate::Scalar(in_array[0])); + } else { + assign_values_to_array(A1, in_array.data(), in_array.size()); + } + } + auto B1 = cunumeric::sort_complex(A1); + if (in_array.size() != 0) { + check_array_eq(B1, expect.data(), expect.size()); + } +} + +template +void sort_complex_basic_impl(std::vector>& test_shapes, + std::array in_array, + std::vector>& expect_result, + legate::Type leg_type) +{ + size_t test_shape_size = test_shapes.size(); + for (size_t i = 0; i < test_shape_size; ++i) { + auto test_shape = test_shapes[i]; + int32_t dim = test_shape.size(); + auto expect_val = expect_result[i]; + if (dim == 1) { + test_sort_complex(in_array, expect_val, leg_type, test_shape); + } else if (dim == 2) { + test_sort_complex(in_array, expect_val, leg_type, test_shape); + } else if (dim == 3) { + test_sort_complex(in_array, expect_val, leg_type, test_shape); + } else if (dim == 4) { +#if LEGATE_MAX_DIM >= 4 + test_sort_complex(in_array, expect_val, leg_type, test_shape); +#endif + } else if (dim == 5) { +#if LEGATE_MAX_DIM >= 5 + test_sort_complex(in_array, expect_val, leg_type, test_shape); +#endif + } else if (dim == 6) { +#if LEGATE_MAX_DIM >= 6 + test_sort_complex(in_array, expect_val, leg_type, test_shape); +#endif + } else if (dim == 7) { +#if LEGATE_MAX_DIM >= 7 + test_sort_complex(in_array, expect_val, leg_type, test_shape); +#endif + } + } +} + +void sort_complex_basic() +{ + // Test int8 type + std::vector> test_shapes_int = {{12}, {12, 1}, {3, 4}, {12, 1, 1}, {2, 2, 3}}; + std::array in_array1 = {10, 3, 12, 5, 2, 4, 8, 9, 7, 6, 11, 1}; + auto expect_result1 = get_sort_complex_expect_result_from_int(); + sort_complex_basic_impl, 12>( + test_shapes_int, in_array1, expect_result1, legate::int8()); + + // Test int16 type + std::array in_array2 = {10, 3, 12, 5, 2, 4, 8, 9, 7, 6, 11, 1}; + auto expect_result2 = get_sort_complex_expect_result_from_int(); + sort_complex_basic_impl, 12>( + test_shapes_int, in_array2, expect_result2, legate::int16()); + + // Test int32 type + std::array int_array3 = {10, 3, 12, 5, 2, 4, 8, 9, 7, 6, 11, 1}; + auto expect_result3 = get_sort_complex_expect_result_from_int(); + sort_complex_basic_impl, 12>( + test_shapes_int, int_array3, expect_result3, legate::int32()); + + // Test complex type + std::vector> test_shapes = { + {12}, {1, 12}, {12, 1}, {3, 4}, {12, 1, 1}, {1, 12, 1}, {1, 1, 12}, {2, 2, 3}}; + + std::array, 12> in_array4 = {complex(10, 3), + complex(12, 5), + complex(2, 4), + complex(8, 9), + complex(7, 6), + complex(11, 1), + complex(1.5, 3.66), + complex(6, 5.98), + complex(2.2, 10.5), + complex(8, 11), + complex(7.9, 12), + complex(6, 4)}; + auto expect_result4 = get_sort_complex_expect_result(); + sort_complex_basic_impl, complex, 12>( + test_shapes, in_array4, expect_result4, legate::complex64()); + + std::array, 12> in_array5 = {complex(10, 3), + complex(12, 5), + complex(2, 4), + complex(8, 9), + complex(7, 6), + complex(11, 1), + complex(1.5, 3.66), + complex(6, 5.98), + complex(2.2, 10.5), + complex(8, 11), + complex(7.9, 12), + complex(6, 4)}; + auto expect_result5 = get_sort_complex_expect_result(); + sort_complex_basic_impl, complex, 12>( + test_shapes, in_array5, expect_result5, legate::complex128()); +} + +void sort_complex_basic_max_dim() +{ + // Only test int type for max dim + std::array in_array = {14, 10, 3, 12, 5, 13, 2, 4, 16, 8, 9, 7, 6, 11, 1, 15}; +#if LEGATE_MAX_DIM >= 4 + std::vector> test_shapes_4d = {{1, 1, 1, 16}, {16, 1, 1, 1}, {2, 2, 1, 4}}; + auto expect_result_4d = get_sort_complex_expect_result_4d(); + sort_complex_basic_impl, 16>( + test_shapes_4d, in_array, expect_result_4d, legate::int16()); +#endif + +#if LEGATE_MAX_DIM >= 5 + std::vector> test_shapes_5d = { + {1, 1, 1, 1, 16}, {1, 16, 1, 1, 1}, {1, 2, 2, 1, 4}}; + auto expect_result_5d = get_sort_complex_expect_result_5d(); + sort_complex_basic_impl, 16>( + test_shapes_5d, in_array, expect_result_5d, legate::int16()); +#endif + +#if LEGATE_MAX_DIM >= 6 + std::vector> test_shapes_6d = { + {1, 1, 1, 1, 1, 16}, {1, 1, 16, 1, 1, 1}, {2, 1, 1, 2, 2, 2}}; + auto expect_result_6d = get_sort_complex_expect_result_6d(); + sort_complex_basic_impl, 16>( + test_shapes_6d, in_array, expect_result_6d, legate::int16()); +#endif + +#if LEGATE_MAX_DIM >= 7 + std::vector> test_shapes_7d = { + {1, 16, 1, 1, 1, 1, 1}, {4, 1, 2, 2, 1, 1, 1}, {2, 2, 1, 1, 2, 1, 2}}; + auto expect_result_7d = get_sort_complex_expect_result_7d(); + sort_complex_basic_impl, 16>( + test_shapes_7d, in_array, expect_result_7d, legate::int16()); +#endif +} + +void sort_complex_large_array() +{ + const int32_t count = 10000; + std::vector> test_shapes = {{count}}; + + // Test int16 type for large array + std::array in_array1; + for (int16_t i = 0; i < count; i++) { + in_array1[i] = count - i; + } + std::array, count> expect_val1; + for (int32_t j = 0; j < count; j++) { + expect_val1[j] = complex(j + 1, 0); + } + std::vector, count>> expect_result1 = {expect_val1}; + sort_complex_basic_impl, count>( + test_shapes, in_array1, expect_result1, legate::int16()); + + // Test int32 type for large array + std::array in_array2; + for (int32_t i = 0; i < count; i++) { + in_array2[i] = count - i; + } + std::array, count> expect_val2; + for (int32_t j = 0; j < count; j++) { + expect_val2[j] = complex(j + 1, 0); + } + std::vector, count>> expect_result2 = {expect_val2}; + sort_complex_basic_impl, count>( + test_shapes, in_array2, expect_result2, legate::int32()); + + // Test complex type + std::array, count> in_array3; + for (int32_t i = 0; i < count; i++) { + in_array3[i] = complex(count - i, count - i); + } + std::array, count> expect_val3; + for (int32_t j = 0; j < count; j++) { + expect_val3[j] = complex(j + 1, j + 1); + } + std::vector, count>> expect_result3 = {expect_val3}; + sort_complex_basic_impl, complex, count>( + test_shapes, in_array3, expect_result3, legate::complex64()); +} + +void sort_complex_empty_array() +{ + std::vector> test_shapes = { + {0}, {0, 1}, {1, 0}, {1, 0, 0}, {1, 1, 0}, {1, 0, 1}}; + + std::array, 0> in_array = {}; + size_t test_shape_size = test_shapes.size(); + for (size_t i = 0; i < test_shape_size; ++i) { + auto test_shape = test_shapes[i]; + int32_t dim = test_shape.size(); + if (dim == 1) { + test_sort_complex, complex, 0, 1>( + in_array, in_array, legate::complex64(), test_shape); + } else if (dim == 2) { + test_sort_complex, complex, 0, 2>( + in_array, in_array, legate::complex64(), test_shape); + } else { + test_sort_complex, complex, 0, 3>( + in_array, in_array, legate::complex64(), test_shape); + } + } +} + +void sort_complex_single_item_array() +{ + std::vector> test_shapes = {{1}, {1, 1}, {1, 1, 1}}; + + std::array in_array = {12}; + std::array, 1> expect_result = {complex(12, 0)}; + size_t test_shape_size = test_shapes.size(); + for (size_t i = 0; i < test_shape_size; ++i) { + auto test_shape = test_shapes[i]; + int32_t dim = test_shape.size(); + if (dim == 1) { + test_sort_complex, 1, 1>( + in_array, expect_result, legate::float64(), test_shape); + } else if (dim == 2) { + test_sort_complex, 1, 2>( + in_array, expect_result, legate::float64(), test_shape); + } else { + test_sort_complex, 1, 3>( + in_array, expect_result, legate::float64(), test_shape); + } + } +} + +// void cpp_test() +TEST(SortComplex, Basic) { sort_complex_basic(); } +TEST(SortComplex, BasicMaxDim) { sort_complex_basic_max_dim(); } +TEST(SortComplex, LargeArray) { sort_complex_large_array(); } +TEST(SortComplex, EmptyArray) { sort_complex_empty_array(); } +TEST(SortComplex, SingleItemArray) { sort_complex_single_item_array(); } diff --git a/tests/cpp/integration/test_swapaxes.cc b/tests/cpp/integration/test_swapaxes.cc new file mode 100644 index 000000000..6b68d6302 --- /dev/null +++ b/tests/cpp/integration/test_swapaxes.cc @@ -0,0 +1,103 @@ +/* Copyright 2024 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +#include +#include +#include + +#include +#include "legate.h" +#include "cunumeric.h" +#include "util.inl" + +void swapaxes_test() +{ + // Test small + { + auto A = cunumeric::zeros({3, 3}, legate::int32()); + EXPECT_EQ(A.shape(), (std::vector{3, 3})); + auto B = cunumeric::swapaxes(A, 0, 1); + EXPECT_EQ(B.shape(), (std::vector{3, 3})); + } + + // Test tall + { + auto A_tall = cunumeric::zeros({300, 3}, legate::int32()); + EXPECT_EQ(A_tall.shape(), (std::vector{300, 3})); + auto B_tall = cunumeric::swapaxes(A_tall, 0, 1); + EXPECT_EQ(B_tall.shape(), (std::vector{3, 300})); + } + + // Test wide + { + auto A_wide = cunumeric::zeros({3, 300}, legate::int32()); + EXPECT_EQ(A_wide.shape(), (std::vector{3, 300})); + auto B_wide = cunumeric::swapaxes(A_wide, 0, 1); + EXPECT_EQ(B_wide.shape(), (std::vector{300, 3})); + } + + // Test big + { + auto A_big = cunumeric::zeros({300, 300}, legate::int32()); + EXPECT_EQ(A_big.shape(), (std::vector{300, 300})); + auto B_big = cunumeric::swapaxes(A_big, 0, 1); + EXPECT_EQ(B_big.shape(), (std::vector{300, 300})); + } + + // Test 3-dim array with different swap axes + { + auto A = cunumeric::zeros({3, 4, 5}, legate::int32()); + EXPECT_EQ(A.shape(), (std::vector{3, 4, 5})); + + auto B1 = cunumeric::swapaxes(A, 0, 0); + EXPECT_EQ(B1.shape(), (std::vector{3, 4, 5})); + + auto B2 = cunumeric::swapaxes(A, -3, 1); + EXPECT_EQ(B2.shape(), (std::vector{4, 3, 5})); + + auto B3 = cunumeric::swapaxes(A, 0, 2); + EXPECT_EQ(B3.shape(), (std::vector{5, 4, 3})); + + auto B4 = cunumeric::swapaxes(A, -3, -2); + EXPECT_EQ(B4.shape(), (std::vector{4, 3, 5})); + } + + // Test empty array + { + auto A = cunumeric::zeros({0}, legate::int32()); + EXPECT_EQ(A.shape(), (std::vector{0})); + + auto B = cunumeric::swapaxes(A, 0, 0); + EXPECT_EQ(B.shape(), (std::vector{0})); + } +} + +void swapaxes_negative_test() +{ + // Test out-of-bound1 + auto A = cunumeric::zeros({3, 3}, legate::int32()); + EXPECT_THROW(cunumeric::swapaxes(A, 3, 0), std::invalid_argument); + EXPECT_THROW(cunumeric::swapaxes(A, 0, 3), std::invalid_argument); + + // Test out-of-bound2 + EXPECT_THROW(cunumeric::swapaxes(A, -4, 0), std::invalid_argument); + EXPECT_THROW(cunumeric::swapaxes(A, 0, -4), std::invalid_argument); +} + +// void cpp_test() +TEST(Swapaxes, Normal) { swapaxes_test(); } + +TEST(Swapaxes, Negative) { swapaxes_negative_test(); } \ No newline at end of file diff --git a/tests/cpp/integration/test_transpose.cc b/tests/cpp/integration/test_transpose.cc new file mode 100644 index 000000000..87af9cf2e --- /dev/null +++ b/tests/cpp/integration/test_transpose.cc @@ -0,0 +1,138 @@ +/* Copyright 2024 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +#include +#include "legate.h" +#include "cunumeric.h" +#include "util.inl" + +template +void transpose_int32_test(std::array input, + std::array exp, + std::vector in_shape, + std::vector out_shape, + std::optional> axes = std::nullopt) +{ + auto a_input = cunumeric::zeros(in_shape, legate::int32()); + assign_values_to_array(a_input, input.data(), input.size()); + + auto a_output = cunumeric::array(out_shape, legate::int32()); + + if (axes) { + a_output = cunumeric::transpose(a_input, axes.value()); + } else { + a_output = cunumeric::transpose(a_input); + } + check_array_eq(a_output, exp.data(), exp.size()); + EXPECT_EQ(a_output.shape(), out_shape); +} + +TEST(Transpose, Dim) +{ + const size_t size = 6; + const int32_t dim = 2; + std::array input = {1, 2, 3, 4, 5, 6}; + std::array exp = {1, 4, 2, 5, 3, 6}; + std::vector in_shape = {2, 3}; + std::vector out_shape = {3, 2}; + auto axes = std::nullopt; + + transpose_int32_test(input, exp, in_shape, out_shape, axes); +} + +TEST(Transpose, Axes) +{ + const size_t size = 12; + const int32_t dim = 3; + std::array input = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12}; + std::array exp = {1, 7, 4, 10, 2, 8, 5, 11, 3, 9, 6, 12}; + std::vector in_shape = {2, 2, 3}; + std::vector out_shape = {3, 2, 2}; + auto axes = {2, 1, 0}; + + transpose_int32_test(input, exp, in_shape, out_shape, axes); +} + +TEST(Transpose, EmptyArray) +{ + const size_t size = 0; + const int32_t dim = 1; + std::array input = {}; + std::array exp = input; + std::vector in_shape = {0}; + std::vector out_shape = in_shape; + auto axes = std::nullopt; + + transpose_int32_test(input, exp, in_shape, out_shape, axes); +} + +TEST(Transpose, SingletonAxes) +{ + const size_t size = 6; + const int32_t dim = 1; + std::array input = {1, 2, 3, 4, 5, 6}; + std::array exp = input; + std::vector in_shape = {6}; + std::vector out_shape = in_shape; + auto axes = {1}; + + transpose_int32_test(input, exp, in_shape, out_shape, axes); +} + +TEST(Transpose, Singleton) +{ + const size_t size = 6; + const int32_t dim = 1; + std::array input = {1, 2, 3, 4, 5, 6}; + std::array exp = input; + std::vector in_shape = {6}; + std::vector out_shape = in_shape; + auto axes = std::nullopt; + + transpose_int32_test(input, exp, in_shape, out_shape, axes); +} + +TEST(Transpose, DefaultType) +{ + const size_t size = 6; + const int32_t dim = 2; + std::array input = {1.3, 2, 3.6, 4, 5, 6}; + std::array exp = {1.3, 4, 2, 5, 3.6, 6}; + std::vector in_shape = {2, 3}; + std::vector out_shape = {3, 2}; + + auto a_input = cunumeric::zeros(in_shape); + assign_values_to_array(a_input, input.data(), input.size()); + auto a_output = cunumeric::transpose(a_input); + check_array_eq(a_output, exp.data(), exp.size()); + EXPECT_EQ(a_output.shape(), out_shape); +} + +TEST(TransposeErrors, InvalidAxes) +{ + const size_t size = 6; + const int32_t dim = 2; + std::array input = {1.3, 2, 3.6, 4, 5, 6}; + std::vector in_shape = {2, 3}; + std::vector out_shape = {3, 2}; + + auto a_input = cunumeric::zeros(in_shape); + assign_values_to_array(a_input, input.data(), input.size()); + EXPECT_THROW(cunumeric::transpose(a_input, (std::vector){0, 1, 2}), + std::invalid_argument); + EXPECT_THROW(cunumeric::transpose(a_input, (std::vector){1}), std::invalid_argument); + EXPECT_THROW(cunumeric::transpose(a_input, (std::vector){3, 4}), std::invalid_argument); +} diff --git a/tests/cpp/integration/test_trilu.cc b/tests/cpp/integration/test_trilu.cc new file mode 100644 index 000000000..2d944106f --- /dev/null +++ b/tests/cpp/integration/test_trilu.cc @@ -0,0 +1,123 @@ +/* Copyright 2024 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +#include "common_utils.h" +#include + +using namespace cunumeric; + +namespace { + +template +std::tuple, std::vector> trilu_result(std::vector a, + std::vector shape, + int32_t k = 0, + bool lower = true) +{ + if (shape.empty()) { + throw std::invalid_argument("Array must be at least 1-D"); + } + + size_t size = std::accumulate(shape.begin(), shape.end(), size_t(1), std::multiplies()); + if (a.size() != size) { + throw std::invalid_argument("size and shape mismatch"); + } + + bool is_1D = false; + if (shape.size() == 1) { + is_1D = true; + shape.emplace_back(shape[0]); + size = shape[0] * shape[0]; + } + + if (a.size() == 0) { + return {a, shape}; + } + + int32_t ndim = static_cast(shape.size()); + size_t N = shape[ndim - 2]; + size_t M = shape[ndim - 1]; + std::vector out; + for (size_t idx = 0; idx < size; ++idx) { + int32_t j = static_cast(idx % M); + int32_t i = static_cast((idx / M) % N); + bool flag = lower ? j <= i + k : j >= i + k; + if (flag) { + if (is_1D) { + out.emplace_back(a[j]); + } else { + out.emplace_back(a[idx]); + } + } else { + out.emplace_back(0); + } + } + return {out, shape}; +} + +template +void _test(std::string func, std::vector x_in, std::vector shape, int32_t k) +{ + bool lower = (func == "tril") ? true : false; + auto num_f = (func == "tril") ? tril : triu; + auto x = mk_array(x_in, shape); + auto x_out = num_f(x, k); + auto [x_gt, shape_gt] = trilu_result(x_in, shape, k, lower); + check_array(x_out, x_gt, shape_gt); +} + +TEST(Trilu, test_trilu) +{ + std::vector func_list{"tril", "triu"}; + std::vector k_list{0, -1, 1, -2, 2, -10, 10}; + std::vector> shape_list{ + {0}, {1}, {10}, {1, 10}, {10, 10}, {1, 1, 10}, {1, 10, 10}, {10, 10, 10}}; + for (auto shape : shape_list) { + auto x_int32 = mk_seq_vector(shape, 0, 1); + auto x_float = mk_seq_vector(shape, 0, 1); + for (auto k : k_list) { + for (auto func : func_list) { + _test(func, x_int32, shape, k); + _test(func, x_float, shape, k); + } + } + } +} + +TEST(Trilu, test_ndim) +{ + std::vector func_list{"tril", "triu"}; + std::vector shape; + for (int32_t ndim = 1; ndim <= LEGATE_MAX_DIM; ++ndim) { + shape.push_back(ndim); + for (int32_t k = -ndim; k <= ndim; ++k) { + auto x_in = mk_seq_vector(shape); + for (auto func : func_list) { + _test(func, x_in, shape, k); + } + } + } +} + +class TriluErrors : public ::testing::Test {}; + +TEST_F(TriluErrors, test_m_scalar) +{ + auto x = mk_array({0}); + EXPECT_THROW(tril(x), std::invalid_argument); +} + +} // namespace diff --git a/tests/cpp/integration/test_zeros.cc b/tests/cpp/integration/test_zeros.cc new file mode 100644 index 000000000..cb880f85a --- /dev/null +++ b/tests/cpp/integration/test_zeros.cc @@ -0,0 +1,108 @@ +/* Copyright 2024 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +#include "common_utils.h" + +using namespace cunumeric; +using Code = legate::Type::Code; + +namespace { + +const size_t DIM = 4; +std::vector> shape_list{{0}, + {1}, + {DIM}, + {0, 1}, + {1, 0}, + {1, 1}, + {1, DIM}, + {DIM, 1}, + {DIM, DIM}, + {1, 0, 0}, + {1, 1, 0}, + {1, 0, 1}, + {1, 1, 1}, + {DIM, 1, 1}, + {1, DIM, 1}, + {1, 1, DIM}, + {DIM, DIM, DIM}}; + +std::vector code_list{Code::BOOL, + Code::INT8, + Code::INT16, + Code::INT32, + Code::INT64, + Code::UINT8, + Code::UINT16, + Code::UINT32, + Code::UINT64, + Code::FLOAT32, + Code::FLOAT64, + Code::COMPLEX64, + Code::COMPLEX128}; + +template +void _test(std::vector shape) +{ + auto x = zeros(shape, legate::primitive_type(CODE)); + using VAL = legate::type_of; + std::vector x_gt(x.size()); + check_array(x, x_gt, shape); + // debug_array(x, false); +} + +TEST(Zeros, test_basic_dtype) +{ + for (auto code : code_list) { + for (auto shape : shape_list) { + switch (code) { + case Code::BOOL: _test(shape); break; + case Code::INT8: _test(shape); break; + case Code::INT16: _test(shape); break; + case Code::INT32: _test(shape); break; + case Code::INT64: _test(shape); break; + case Code::UINT8: _test(shape); break; + case Code::UINT16: _test(shape); break; + case Code::UINT32: _test(shape); break; + case Code::UINT64: _test(shape); break; + case Code::FLOAT32: _test(shape); break; + case Code::FLOAT64: _test(shape); break; + case Code::COMPLEX64: _test(shape); break; + case Code::COMPLEX128: _test(shape); break; + default: FAIL() << "Unsupported data types."; break; + } + } + } +} + +TEST(Zeros, test_ndim) +{ + std::vector shape; + for (int32_t ndim = 1; ndim <= LEGATE_MAX_DIM; ++ndim) { + shape.push_back(ndim); + _test(shape); + _test(shape); + _test(shape); + _test(shape); + } +} + +TEST(Zeros, test_invalid_type) +{ + EXPECT_THROW(zeros({2, 2}, legate::primitive_type(Code::FIXED_ARRAY)), std::invalid_argument); +} + +} // namespace diff --git a/tests/cpp/integration/util.inl b/tests/cpp/integration/util.inl new file mode 100644 index 000000000..ccb203ea9 --- /dev/null +++ b/tests/cpp/integration/util.inl @@ -0,0 +1,254 @@ +/* Copyright 2024 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +namespace { +template +std::stringstream& print_value(std::stringstream& ss, T value) +{ + ss << value; + return ss; +} + +template <> +std::stringstream& print_value>(std::stringstream& ss, complex value) +{ + // operator<< missing for cuda::std::complex + // The issue is going to be fixed in the next cuda release. +#if CUDART_VERSION >= 12050 + ss << value; +#endif + return ss; +} + +template <> +std::stringstream& print_value>(std::stringstream& ss, complex value) +{ + // operator<< missing for cuda::std::complex + // The issue is going to be fixed in the next cuda release. +#if CUDART_VERSION >= 12050 + ss << value; +#endif + return ss; +} + +template +std::string to_string(legate::AccessorRO acc, + const std::vector& shape, + legate::Rect rect) +{ + std::stringstream ss; + auto size = static_cast(shape.size()); + + auto count = 0; + auto pro = 1; + std::vector item_count; + for (int32_t i = size - 1; i >= 0; --i) { + pro *= shape[i]; + item_count.push_back(pro); + } + + auto print_brackets_in_start_end = [&](bool start) { + if (start) { + for (int32_t i = 0; i < size; ++i) { + ss << "["; + } + } else { + for (int32_t i = 0; i < size; ++i) { + ss << "]"; + } + } + }; + + auto print_brackets_in_middle = [&]() -> bool { + for (int32_t i = size - 1; i >= 0; --i) { + if ((count % item_count[i]) == 0) { + for (int32_t j = i; j >= 0; --j) { + ss << "]"; + } + ss << ",\n"; + for (int32_t j = i; j >= 0; --j) { + ss << "["; + } + return true; + } + } + return false; + }; + + print_brackets_in_start_end(true); + for (legate::PointInRectIterator itr(rect, false); itr.valid(); ++itr) { + if (count > 0) { + if (!print_brackets_in_middle()) { + ss << ","; + } + } + ss << std::setw(9) << std::setprecision(3); + print_value(ss, acc[*itr]); + count += 1; + } + print_brackets_in_start_end(false); + + return ss.str(); +} + +template +std::string check_array_eq(legate::AccessorRO acc, + T* values_ptr, + const std::vector& shape, + legate::Rect rect) +{ + std::stringstream ss; + + auto index = 0; + auto size = shape.size(); + ss << "size: " << size << "\n"; + for (legate::PointInRectIterator itr(rect, false); itr.valid(); ++itr) { + auto q = *itr; + ss << std::left << std::setprecision(3); + ss << std::setw(13) << "Array value: " << std::setw(10); + print_value(ss, acc[q]) << ", "; + ss << std::setw(16) << "Expected value: " << std::setw(10); + print_value(ss, acc[q]) << ", "; + if (size > 0) { + ss << std::setw(8) << "index: ["; + for (uint32_t i = 0; i < size - 1; ++i) { + ss << q[i] << ","; + } + ss << q[size - 1] << "]\n"; + } + EXPECT_EQ(acc[q], values_ptr[index++]); + } + + return ss.str(); +} + +template +struct print_fn { + void operator()(legate::AccessorRO acc, + const std::vector& shape, + legate::Rect rect) + { + std::cerr << to_string(acc, shape, rect) << std::endl; + } +}; + +template +struct check_array_eq_fn { + void operator()(legate::AccessorRO acc, + T* values_ptr, + const std::vector& shape, + legate::Rect rect) + { + auto string_result = check_array_eq(acc, values_ptr, shape, rect); + if (rect.volume() <= 256) { + std::cerr << string_result << std::endl; + } + } +}; + +template +struct assign_array_fn { + void operator()(legate::AccessorWO acc, T* values_ptr, legate::Rect rect) + { + auto index = 0; + for (legate::PointInRectIterator itr(rect, false); itr.valid(); ++itr) { + acc[*itr] = values_ptr[index++]; + } + } +}; + +template +struct copy_array_fn { + void operator()(legate::AccessorRO acc, T* values_ptr, legate::Rect rect) + { + auto index = 0; + for (legate::PointInRectIterator itr(rect, false); itr.valid(); ++itr) { + values_ptr[index++] = acc[*itr]; + } + } +}; + +template +void print_array(cunumeric::NDArray array) +{ + auto acc = array.get_read_accessor(); + auto& shape = array.shape(); + auto logical_store = array.get_store(); + auto physical_store = logical_store.get_physical_store(); + auto rect = physical_store.shape(); + print_fn()(acc, shape, rect); +} + +template +void check_array_eq(cunumeric::NDArray array, T* values_ptr, size_t length) +{ + assert(array.size() == length); + if (length == 0) { + return; + } + assert(values_ptr != nullptr); + auto acc = array.get_read_accessor(); + auto& shape = array.shape(); + auto logical_store = array.get_store(); + auto physical_store = logical_store.get_physical_store(); + auto rect = physical_store.shape(); + check_array_eq_fn()(acc, values_ptr, shape, rect); +} + +template +void assign_values_to_array(cunumeric::NDArray array, T* values_ptr, size_t length) +{ + assert(array.size() == length); + if (length == 0) { + return; + } + assert(values_ptr != nullptr); + auto acc = array.get_write_accessor(); + auto logical_store = array.get_store(); + auto physical_store = logical_store.get_physical_store(); + auto rect = physical_store.shape(); + assign_array_fn()(acc, values_ptr, rect); +} + +template +std::vector assign_array_to_values(cunumeric::NDArray array) +{ + std::vector result(array.size()); + if (array.size() > 0) { + T* values_ptr = result.data(); + assert(values_ptr != nullptr); + auto acc = array.get_read_accessor(); + auto logical_store = array.get_store(); + auto physical_store = logical_store.get_physical_store(); + auto rect = physical_store.shape(); + copy_array_fn()(acc, values_ptr, rect); + } + return std::move(result); +} + +template +void check_array_eq(cunumeric::NDArray array1, cunumeric::NDArray array2) +{ + assert(array1.size() == array2.size()); + if (array1.size() == 0) { + return; + } + + std::vector data2 = assign_array_to_values(array2); + check_array_eq(array1, data2.data(), data2.size()); +} + +} // namespace diff --git a/tests/cpp/main.cc b/tests/cpp/main.cc new file mode 100644 index 000000000..97211a77c --- /dev/null +++ b/tests/cpp/main.cc @@ -0,0 +1,43 @@ +/* Copyright 2024 NVIDIA Corporation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +#include +#include "legate.h" +#include "cunumeric.h" + +class Environment : public ::testing::Environment { + public: + Environment(int argc, char** argv) : argc_(argc), argv_(argv) {} + + void SetUp() override + { + EXPECT_EQ(legate::start(argc_, argv_), 0); + cunumeric::initialize(argc_, argv_); + } + void TearDown() override { EXPECT_EQ(legate::finish(), 0); } + + private: + int argc_; + char** argv_; +}; + +int main(int argc, char** argv) +{ + ::testing::InitGoogleTest(&argc, argv); + ::testing::AddGlobalTestEnvironment(new Environment(argc, argv)); + + return RUN_ALL_TESTS(); +} diff --git a/tests/cpp/run.py b/tests/cpp/run.py new file mode 100755 index 000000000..e3c775e0a --- /dev/null +++ b/tests/cpp/run.py @@ -0,0 +1,169 @@ +# Copyright 2024 NVIDIA Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import argparse +import os +import subprocess +import sys +from pathlib import Path + +LAUNCHER_VAR_PREFIXES = ( + "CONDA_", + "LEGATE_", + "LEGION_", + "LG_", + "REALM_", + "GASNET_", + "PYTHON", + "UCX_", + "NCCL_", + "CUNUMERIC_", + "NVIDIA_", +) + +test_args_dict = { + # Example of usage + # "Alignment.Basic" : ["-logfile", "build/example_file.log"] +} + + +def fetch_test_names(binary_path): + list_command = [binary_path] + ["--gtest_list_tests"] + + result = subprocess.check_output(list_command, stderr=subprocess.STDOUT) + result = result.decode(sys.stdout.encoding).split("\n") + + test_group = "" + test_names = [] + for line in result: + # Skip empty entry + if not line.strip(): + continue + + # Check if this is a test group + if line[0] != " ": + test_group = line.strip() + continue + + # Assign test to test group + test_names += [test_group + line.strip()] + + return test_names + + +def run_test(config, test_name, log, extra_args): + test_command = [] + if config.ranks != 0: + test_command += ["mpirun", "-n", str(config.ranks)] + test_command += ["--output-filename", "build/mpi_result"] + test_command += ["--merge-stderr-to-stdout"] + + def is_launcher_var(name: str) -> bool: + # Whether an environment variable name is relevant for the laucher + return name.endswith("PATH") or any( + name.startswith(prefix) for prefix in LAUNCHER_VAR_PREFIXES + ) + + for var in dict(os.environ): + if is_launcher_var(var): + test_command += ["-x", var] + + test_command += [config.binary_path] + test_command += [f"--gtest_filter={test_name}"] + test_command += ["-ll:cpu", str(config.cpus)] + test_command += extra_args + + if test_name in test_args_dict: + test_command += test_args_dict[test_name] + + task = subprocess.Popen(test_command, stdout=log, stderr=subprocess.STDOUT) + task.communicate() + + return task.returncode + + +def main(): + CUNUMERIC_DIR = Path(__file__).resolve().parent.parent.parent + parser = argparse.ArgumentParser(description="Run Legate cpp tests.") + parser.add_argument( + "--binary-path", + dest="binary_path", + required=False, + default=str( + CUNUMERIC_DIR / "build" / "tests" / "cpp" / "bin" / "cpp_tests" + ), + help="Path to binary under test.", + ) + parser.add_argument( + "--log-path", + dest="log_path", + required=False, + default=str(CUNUMERIC_DIR / "build" / "results.log"), + help="Path to output log file.", + ) + parser.add_argument( + "--ranks", + dest="ranks", + required=False, + type=int, + default=0, + help="Runs mpirun with rank if non-zero.", + ) + parser.add_argument( + "--cpus", + dest="cpus", + required=False, + type=int, + default=4, + help="Legion cmd argument for CPU processors to create per process.", + ) + config, extra_args = parser.parse_known_args() + + # Get names + test_names = fetch_test_names(config.binary_path) + + # Run each test with popen + total_count = len(test_names) + failed_count = 0 + failed_tests = [] + with open(config.log_path, "w") as log: + for count, test_name in enumerate(test_names): + return_code = run_test(config, test_name, log, extra_args) + + # Record test result + if return_code: + failed_tests += [test_name] + failed_count += 1 + print( + f"{count + 1:3d}/{total_count}: {test_name} ".ljust(50, "."), + "Failed" if return_code else "Passed", + ) + + # Summarize results + print( + f"\n{int((total_count - failed_count) / total_count * 100)}% " + f"tests passed, {failed_count} tests failed out of {total_count}" + ) + if failed_tests: + print("\nThe following tests FAILED:") + for test in failed_tests: + print(f" - {test} (Failed)") + print(f"\nLog file generated: {config.log_path}") + return 1 + return 0 + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/tests/cpp/run.sh b/tests/cpp/run.sh new file mode 100755 index 000000000..6e012b101 --- /dev/null +++ b/tests/cpp/run.sh @@ -0,0 +1,13 @@ +#!/bin/bash + +if [ $# -eq 0 ] + then + REALM_BACKTRACE=1 LEGATE_TEST=1 python run.py +elif [ $# -eq 1 ] && [ "$1" = "ctest" ] + then + echo "Using ctest" + cd build + REALM_BACKTRACE=1 LEGATE_TEST=1 LEGION_DEFAULT_ARGS="-ll:cpu 4" ctest --output-on-failure "$@" +else + echo "Invalid arguments" +fi diff --git a/tests/integration/test_0d_store.py b/tests/integration/test_0d_store.py index 1701983f4..a2d22fab8 100644 --- a/tests/integration/test_0d_store.py +++ b/tests/integration/test_0d_store.py @@ -1,4 +1,4 @@ -# Copyright 2022 NVIDIA Corporation +# Copyright 2024 NVIDIA Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/tests/integration/test_advanced_indexing.py b/tests/integration/test_advanced_indexing.py index d7da99381..90751500d 100644 --- a/tests/integration/test_advanced_indexing.py +++ b/tests/integration/test_advanced_indexing.py @@ -1,4 +1,4 @@ -# Copyright 2022 NVIDIA Corporation +# Copyright 2024 NVIDIA Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -16,7 +16,6 @@ import numpy as np import pytest from legate.core import LEGATE_MAX_DIM -from pytest_lazyfixture import lazy_fixture from utils.generators import mk_seq_array import cunumeric as num @@ -49,60 +48,63 @@ def arr_empty1d(): val_future_0d = num.full((3,), -1).max() -# We use fixtures for `arr` because the `set_item` tests modify -# their input. -ARRS = (lazy_fixture("arr_region"), lazy_fixture("arr_future")) +# We need fixtures for `arr` because the `set_item` tests modify their inputs. +# However, pytest_lazy_fixture is no longer supported, so we pass the fixture +# names and rely on request.getfixturevalue to retrieve the computed values +ARRS_FIXTURES = ("arr_region", "arr_future") +ARRS_EMPTY_1D_FIXTURES = ("arr_empty1d", "arr_region", "arr_future") IDXS_0D = (idx_future_0d,) # TODO: idx_region_0d fails VALS_0D = (val_future_0d,) # TODO: val_region_0d fails IDXS_1D = (idx_region_1d, idx_future_1d) VALS_1D = (val_region_1d, val_future_1d) -ARRS_EMPTY_1D = ( - lazy_fixture("arr_empty1d"), - lazy_fixture("arr_region"), - lazy_fixture("arr_future"), -) IDXS_EMPTY_1D = (idx_empty_1d,) VALS_EMPTY_1D = (num.array([]),) @pytest.mark.parametrize("idx", IDXS_0D) # idx = 0 -@pytest.mark.parametrize("arr", ARRS) # arr = [42] -def test_getitem_scalar_0d(arr, idx): +@pytest.mark.parametrize("arr", ARRS_FIXTURES) # arr = [42] +def test_getitem_scalar_0d(arr, idx, request): + arr = request.getfixturevalue(arr) assert np.array_equal(arr[idx], 42) @pytest.mark.parametrize("val", VALS_0D) # val = -1 @pytest.mark.parametrize("idx", IDXS_0D) # idx = 0 -@pytest.mark.parametrize("arr", ARRS) # arr = [42] -def test_setitem_scalar_0d(arr, idx, val): +@pytest.mark.parametrize("arr", ARRS_FIXTURES) # arr = [42] +def test_setitem_scalar_0d(arr, idx, val, request): + arr = request.getfixturevalue(arr) arr[idx] = val assert np.array_equal(arr, [-1]) @pytest.mark.parametrize("idx", IDXS_1D) # idx = [0] -@pytest.mark.parametrize("arr", ARRS) # arr = [42] -def test_getitem_scalar_1d(arr, idx): +@pytest.mark.parametrize("arr", ARRS_FIXTURES) # arr = [42] +def test_getitem_scalar_1d(arr, idx, request): + arr = request.getfixturevalue(arr) assert np.array_equal(arr[idx], [42]) @pytest.mark.parametrize("val", VALS_1D) # val = [-1] @pytest.mark.parametrize("idx", IDXS_1D) # idx = [0] -@pytest.mark.parametrize("arr", ARRS) # arr = [42] -def test_setitem_scalar_1d(arr, idx, val): +@pytest.mark.parametrize("arr", ARRS_FIXTURES) # arr = [42] +def test_setitem_scalar_1d(arr, idx, val, request): + arr = request.getfixturevalue(arr) arr[idx] = val assert np.array_equal(arr, [-1]) @pytest.mark.parametrize("idx", IDXS_EMPTY_1D) # idx = [] -@pytest.mark.parametrize("arr", ARRS_EMPTY_1D) # arr = [42], [5], [] -def test_getitem_empty_1d(arr, idx): +@pytest.mark.parametrize("arr", ARRS_EMPTY_1D_FIXTURES) # arr = [42], [5], [] +def test_getitem_empty_1d(arr, idx, request): + arr = request.getfixturevalue(arr) assert np.array_equal(arr[idx], []) @pytest.mark.parametrize("idx", IDXS_EMPTY_1D) # idx = [] -@pytest.mark.parametrize("arr", ARRS_EMPTY_1D) # arr = [] +@pytest.mark.parametrize("arr", ARRS_EMPTY_1D_FIXTURES) # arr = [] @pytest.mark.parametrize("val", VALS_EMPTY_1D) # val = [] -def test_setitem_empty_1d(arr, idx, val): +def test_setitem_empty_1d(arr, idx, val, request): + arr = request.getfixturevalue(arr) arr[idx] = val assert np.array_equal(arr[idx], []) diff --git a/tests/integration/test_allclose.py b/tests/integration/test_allclose.py index b6b1d8d6d..9270c77d3 100755 --- a/tests/integration/test_allclose.py +++ b/tests/integration/test_allclose.py @@ -1,4 +1,4 @@ -# Copyright 2021-2022 NVIDIA Corporation +# Copyright 2024 NVIDIA Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/tests/integration/test_amax_amin.py b/tests/integration/test_amax_amin.py index f21217e43..ee85b2e2b 100755 --- a/tests/integration/test_amax_amin.py +++ b/tests/integration/test_amax_amin.py @@ -1,4 +1,4 @@ -# Copyright 2021-2022 NVIDIA Corporation +# Copyright 2024 NVIDIA Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/tests/integration/test_append.py b/tests/integration/test_append.py index 353027546..bece5e7f8 100644 --- a/tests/integration/test_append.py +++ b/tests/integration/test_append.py @@ -1,4 +1,4 @@ -# Copyright 2022 NVIDIA Corporation +# Copyright 2024 NVIDIA Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/tests/integration/test_arg_reduce.py b/tests/integration/test_arg_reduce.py index 4fa422726..17c491fbe 100644 --- a/tests/integration/test_arg_reduce.py +++ b/tests/integration/test_arg_reduce.py @@ -1,4 +1,4 @@ -# Copyright 2021-2022 NVIDIA Corporation +# Copyright 2024 NVIDIA Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -16,6 +16,7 @@ import numpy as np import pytest from legate.core import LEGATE_MAX_DIM +from utils.utils import AxisError import cunumeric as num @@ -57,7 +58,7 @@ def test_axis_outofbound(self, func_name): func = getattr(num, func_name) msg = r"out of bounds" - with pytest.raises(np.AxisError, match=msg): + with pytest.raises(AxisError, match=msg): func(in_num, axis=ndim + 1) @pytest.mark.parametrize("func_name", ARG_FUNCS) @@ -68,7 +69,7 @@ def test_axis_negative(self, func_name): func = getattr(num, func_name) msg = r"out of bounds" - with pytest.raises(np.AxisError, match=msg): + with pytest.raises(AxisError, match=msg): func(in_num, axis=-(ndim + 1)) @pytest.mark.parametrize("func_name", ARG_FUNCS) diff --git a/tests/integration/test_argsort.py b/tests/integration/test_argsort.py index b36de0d16..07d165eef 100644 --- a/tests/integration/test_argsort.py +++ b/tests/integration/test_argsort.py @@ -1,4 +1,4 @@ -# Copyright 2022 NVIDIA Corporation +# Copyright 2024 NVIDIA Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -68,7 +68,7 @@ class TestArgSort(object): def test_arr_none(self): res_np = np.argsort( None - ) # numpy.AxisError: axis -1 is out of bounds for array of dimension 0 + ) # AxisError: axis -1 is out of bounds for array of dimension 0 res_num = num.argsort( None ) # AttributeError: 'NoneType' object has no attribute 'shape' @@ -98,7 +98,7 @@ def test_structured_array_order(self): # if self.deferred is None: # if self.parent is None: # - # > assert self.runtime.is_supported_type(self.array.dtype) + # > assert self.runtime.is_supported_dtype(self.array.dtype) # E # AssertionError # diff --git a/tests/integration/test_array.py b/tests/integration/test_array.py index 6e03e98df..43854a42d 100755 --- a/tests/integration/test_array.py +++ b/tests/integration/test_array.py @@ -1,4 +1,4 @@ -# Copyright 2022 NVIDIA Corporation +# Copyright 2024 NVIDIA Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/tests/integration/test_array_creation.py b/tests/integration/test_array_creation.py index 7e9745cfa..65e9b3c82 100644 --- a/tests/integration/test_array_creation.py +++ b/tests/integration/test_array_creation.py @@ -1,4 +1,4 @@ -# Copyright 2021-2022 NVIDIA Corporation +# Copyright 2024 NVIDIA Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. # - +import copy from itertools import product import numpy as np @@ -28,12 +28,36 @@ def test_array(): assert np.array_equal(x, z) assert x.dtype == z.dtype + assert x.data == y.data + assert x.itemsize == y.itemsize + assert x.nbytes == y.nbytes + assert x.strides == y.strides + assert isinstance(x.ctypes, type(y.ctypes)) + x = num.array([1, 2, 3]) y = num.array(x) assert num.array_equal(x, y) assert x.dtype == y.dtype +def test_array_deepcopy() -> None: + x = num.array([1, 2, 3]) + y = np.array([1, 2, 3]) + copy_x = copy.deepcopy(x) + copy_y = copy.deepcopy(y) + x[1] = 0 + y[1] = 0 + assert not np.array_equal(x, copy_x) + assert not np.array_equal(y, copy_y) + assert np.array_equal(copy_x, copy_y) + + +def test_array_float() -> None: + p = num.array(2) + q = np.array(2) + assert p.__float__() == q.__float__() + + CREATION_FUNCTIONS = ("zeros", "ones") FILLED_VALUES = [0, 1, 1000, 123.456] SIZES = (0, 1, 2) @@ -90,19 +114,18 @@ def test_full(value): class TestCreationErrors: - def setup_method(self): - self.bad_type_shape = (2, 3.0) + bad_type_shape = (2, 3.0) + + @pytest.mark.parametrize("fn", ("empty", "zeros", "ones")) + @pytest.mark.parametrize("shape", SHAPES_NEGATIVE, ids=str) + def test_creation_negative_shape(self, shape, fn): + with pytest.raises(ValueError): + getattr(num, fn)(shape) @pytest.mark.parametrize("shape", SHAPES_NEGATIVE, ids=str) - class TestNegativeShape: - @pytest.mark.parametrize("fn", ("empty", "zeros", "ones")) - def test_creation(self, shape, fn): - with pytest.raises(ValueError): - getattr(num, fn)(shape) - - def test_full(self, shape): - with pytest.raises(ValueError): - num.full(shape, 10) + def test_full_negative_shape(self, shape): + with pytest.raises(ValueError): + num.full(shape, 10) @pytest.mark.parametrize("fn", ("empty", "zeros", "ones")) def test_creation_bad_type(self, fn): diff --git a/tests/integration/test_array_dunders.py b/tests/integration/test_array_dunders.py index 1c52d5ebf..83e4c2a5e 100644 --- a/tests/integration/test_array_dunders.py +++ b/tests/integration/test_array_dunders.py @@ -1,4 +1,4 @@ -# Copyright 2022 NVIDIA Corporation +# Copyright 2024 NVIDIA Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/tests/integration/test_array_equal.py b/tests/integration/test_array_equal.py index ef013d0d7..bb298eec5 100755 --- a/tests/integration/test_array_equal.py +++ b/tests/integration/test_array_equal.py @@ -1,4 +1,4 @@ -# Copyright 2021-2022 NVIDIA Corporation +# Copyright 2024 NVIDIA Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/tests/integration/test_array_fallback.py b/tests/integration/test_array_fallback.py index d4fe47aa0..46e74a8fa 100644 --- a/tests/integration/test_array_fallback.py +++ b/tests/integration/test_array_fallback.py @@ -1,4 +1,4 @@ -# Copyright 2022 NVIDIA Corporation +# Copyright 2024 NVIDIA Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/tests/integration/test_array_split.py b/tests/integration/test_array_split.py index 0a9e660eb..6b73c1681 100644 --- a/tests/integration/test_array_split.py +++ b/tests/integration/test_array_split.py @@ -1,4 +1,4 @@ -# Copyright 2021-2022 NVIDIA Corporation +# Copyright 2024 NVIDIA Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/tests/integration/test_astype.py b/tests/integration/test_astype.py index 725bab21b..8bc96de12 100644 --- a/tests/integration/test_astype.py +++ b/tests/integration/test_astype.py @@ -1,4 +1,4 @@ -# Copyright 2022 NVIDIA Corporation +# Copyright 2024 NVIDIA Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/tests/integration/test_atleast_nd.py b/tests/integration/test_atleast_nd.py index da67e2de9..cac98ad72 100644 --- a/tests/integration/test_atleast_nd.py +++ b/tests/integration/test_atleast_nd.py @@ -1,4 +1,4 @@ -# Copyright 2022 NVIDIA Corporation +# Copyright 2024 NVIDIA Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/tests/integration/test_average.py b/tests/integration/test_average.py new file mode 100644 index 000000000..e8ff4934d --- /dev/null +++ b/tests/integration/test_average.py @@ -0,0 +1,103 @@ +# Copyright 2024 NVIDIA Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import numpy as np +import pytest +from utils.comparisons import allclose + +import cunumeric as num + +axes = [None, 0, 1, 2, (0, 1, 2)] + + +input = [[[12, 3, 1, 2], [9, 1, 6, 1]], [[7, 9, 11, 50], [31, 5, 3, 2]]] +in_num = num.array(input) +in_np = np.array(input) + + +@pytest.mark.parametrize("axis", axes) +def test_no_mask(axis): + out_num, scl_num = num.average(in_num, axis=axis, returned=True) + out_num_no_scl = num.average(in_num, axis=axis, returned=False) + out_np, scl_np = np.average(in_np, axis=axis, returned=True) + assert allclose(out_num, out_np) + assert allclose(scl_num, scl_np) + assert allclose(out_num, out_num_no_scl) + + +@pytest.mark.parametrize("axis", axes) +def test_full_weights(axis): + weight_input = [[[1, 2, 3, 4], [3, 3, 7, 1]], [[2, 2, 3, 3], [4, 1, 0, 1]]] + weights_np = np.array(weight_input) + weights_num = num.array(weight_input) + + out_num, scl_num = num.average( + in_num, weights=weights_num, axis=axis, returned=True + ) + out_num_no_scl = num.average( + in_num, weights=weights_num, axis=axis, returned=False + ) + out_np, scl_np = np.average( + in_np, weights=weights_np, axis=axis, returned=True + ) + assert allclose(out_num, out_np) + assert allclose(scl_num, scl_np) + assert allclose(out_num, out_num_no_scl) + + +single_dimension_weights = [ + [3, 4], + [1, 2], + [4, 1, 2, 1], +] +single_dimension_axis = [0, 1, 2] + + +@pytest.mark.parametrize( + "weights,axis", zip(single_dimension_weights, single_dimension_axis) +) +def test_single_axis_weights(weights, axis): + weights_np = np.array(weights) + weights_num = num.array(weights) + + out_num, scl_num = num.average( + in_num, weights=weights_num, axis=axis, returned=True + ) + out_num_no_scl = num.average( + in_num, weights=weights_num, axis=axis, returned=False + ) + out_np, scl_np = np.average( + in_np, weights=weights_np, axis=axis, returned=True + ) + assert allclose(out_num, out_np) + assert allclose(scl_num, scl_np) + assert allclose(out_num, out_num_no_scl) + + +def test_exception_raising(): + with pytest.raises(ValueError): + num.average(in_num, weights=[0, 2]) + with pytest.raises(ValueError): + num.average(in_num, axis=2, weights=[0, 2]) + with pytest.raises(ValueError): + num.average(in_num, axis=0, weights=[[0, 2]]) + with pytest.raises(ZeroDivisionError): + num.average(in_num, axis=0, weights=[0, 0]) + + +if __name__ == "__main__": + import sys + + sys.exit(pytest.main(sys.argv)) diff --git a/tests/integration/test_binary_op_broadcast.py b/tests/integration/test_binary_op_broadcast.py index 25d1a0757..d779d5c34 100644 --- a/tests/integration/test_binary_op_broadcast.py +++ b/tests/integration/test_binary_op_broadcast.py @@ -1,4 +1,4 @@ -# Copyright 2021-2022 NVIDIA Corporation +# Copyright 2024 NVIDIA Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/tests/integration/test_binary_op_complex.py b/tests/integration/test_binary_op_complex.py index 1eaebc951..b9263bd17 100644 --- a/tests/integration/test_binary_op_complex.py +++ b/tests/integration/test_binary_op_complex.py @@ -1,4 +1,4 @@ -# Copyright 2021-2022 NVIDIA Corporation +# Copyright 2024 NVIDIA Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/tests/integration/test_binary_op_typing.py b/tests/integration/test_binary_op_typing.py index 5cb7ee8f5..446ee393d 100644 --- a/tests/integration/test_binary_op_typing.py +++ b/tests/integration/test_binary_op_typing.py @@ -1,4 +1,4 @@ -# Copyright 2021-2022 NVIDIA Corporation +# Copyright 2024 NVIDIA Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -65,8 +65,8 @@ def generate_array_array_cases(): for lhs_value, rhs_value in product(SCALAR_VALUES, SCALAR_VALUES): try: - lhs_np = np.array(lhs_value, dtype=lhs_type) - rhs_np = np.array(rhs_value, dtype=rhs_type) + lhs_np = np.array(lhs_value).astype(lhs_type) + rhs_np = np.array(rhs_value).astype(rhs_type) lhs_num = num.array(lhs_np) rhs_num = num.array(rhs_np) yield (lhs_np, rhs_np, lhs_num, rhs_num) @@ -94,8 +94,8 @@ def generate_array_scalar_cases(): for rhs_type in TYPES[idx:]: for array, scalar in product(ARRAY_VALUES, SCALAR_VALUES): try: - lhs_np = np.array(array, dtype=lhs_type) - rhs_np = np.array(scalar, dtype=rhs_type) + lhs_np = np.array(array).astype(lhs_type) + rhs_np = np.array(scalar).astype(rhs_type) lhs_num = num.array(lhs_np) rhs_num = num.array(rhs_np) yield (lhs_np, rhs_np, lhs_num, rhs_num) @@ -103,8 +103,8 @@ def generate_array_scalar_cases(): pass try: - lhs_np = np.array(scalar, dtype=lhs_type) - rhs_np = np.array(array, dtype=rhs_type) + lhs_np = np.array(scalar).astype(lhs_type) + rhs_np = np.array(array).astype(rhs_type) lhs_num = num.array(lhs_np) rhs_num = num.array(rhs_np) yield (lhs_np, rhs_np, lhs_num, rhs_num) @@ -121,12 +121,12 @@ def test_array_array(lhs_np, rhs_np, lhs_num, rhs_num): out_np = np.add(lhs_np, rhs_np) out_num = num.add(lhs_num, rhs_num) - assert out_np.dtype == out_num.dtype - print(f"LHS {lhs_np}") print(f"RHS {rhs_np}") print(f"NumPy type: {out_np.dtype}, cuNumeric type: {out_num.dtype}") + assert out_np.dtype == out_num.dtype + @pytest.mark.parametrize( "lhs_np, rhs_np, lhs_num, rhs_num", generate_array_scalar_cases(), ids=str @@ -137,12 +137,12 @@ def test_array_scalar(lhs_np, rhs_np, lhs_num, rhs_num): out_np = np.add(lhs_np, rhs_np) out_num = num.add(lhs_num, rhs_num) - assert out_np.dtype == out_num.dtype - print(f"LHS {lhs_np}") print(f"RHS {rhs_np}") print(f"NumPy type: {out_np.dtype}, cuNumeric type: {out_num.dtype}") + assert out_np.dtype == out_num.dtype + if __name__ == "__main__": import sys diff --git a/tests/integration/test_binary_ufunc.py b/tests/integration/test_binary_ufunc.py index cf30a1f4d..516321871 100644 --- a/tests/integration/test_binary_ufunc.py +++ b/tests/integration/test_binary_ufunc.py @@ -1,4 +1,4 @@ -# Copyright 2022 NVIDIA Corporation +# Copyright 2024 NVIDIA Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -14,7 +14,6 @@ # import argparse -from itertools import product import numpy as np import pytest @@ -43,196 +42,253 @@ def check_result(op, in_np, out_np, out_num): assert False -def check_ops(ops, in_np, out_dtype="D"): +def check_op(op, in_np, out_dtype="D"): in_num = tuple(num.array(arr) for arr in in_np) - for op in ops: - if op.isidentifier(): - op_np = getattr(np, op) - op_num = getattr(num, op) - assert op_np.nout == 1 + if op.isidentifier(): + op_np = getattr(np, op) + op_num = getattr(num, op) + assert op_np.nout == 1 - out_np = op_np(*in_np) - out_num = op_num(*in_num) + out_np = op_np(*in_np) + out_num = op_num(*in_num) - check_result(op, in_np, out_np, out_num) + check_result(op, in_np, out_np, out_num) - out_np = np.empty(out_np.shape, dtype=out_dtype) - out_num = num.empty(out_num.shape, dtype=out_dtype) - op_np(*in_np, out=out_np) - op_num(*in_num, out=out_num) + out_np = np.empty(out_np.shape, dtype=out_dtype) + out_num = num.empty(out_num.shape, dtype=out_dtype) + op_np(*in_np, out=out_np) + op_num(*in_num, out=out_num) - check_result(op, in_np, out_np, out_num) + check_result(op, in_np, out_np, out_num) - # Ask cuNumeric to produce outputs to NumPy ndarrays - out_num = np.empty(out_np.shape, dtype=out_dtype) - op_num(*in_num, out=out_num) + # Ask cuNumeric to produce outputs to NumPy ndarrays + out_num = np.empty(out_np.shape, dtype=out_dtype) + op_num(*in_num, out=out_num) - check_result(op, in_np, out_np, out_num) + check_result(op, in_np, out_np, out_num) - else: - # Doing it this way instead of invoking the dunders directly, to - # avoid having to select the right version, __add__ vs __radd__, - # when one isn't supported, e.g. for scalar.__add__(array) - - out_np = eval(f"in_np[0] {op} in_np[1]") - out_num = eval(f"in_num[0] {op} in_num[1]") - - check_result(op, in_np, out_np, out_num) - - out_np = np.ones_like(out_np) - out_num = num.ones_like(out_num) - exec(f"out_np {op}= in_np[0]") - exec(f"out_num {op}= in_num[0]") - - check_result(op, in_np, out_np, out_num) - - out_num = np.ones_like(out_np) - exec(f"out_num {op}= in_num[0]") - - check_result(op, in_np, out_np, out_num) - - -def test_all(): - # TODO: right now we will simply check if the operations work - # for some boring inputs. For some of these, we will want to - # test corner cases in the future. - - # TODO: matmul, @ - - # Math operations - ops = [ - "*", - "+", - "-", - "/", - "add", - # "divmod", - "equal", - "fmax", - "fmin", - "greater", - "greater_equal", - # "heaviside", - # "ldexp", - "less", - "less_equal", - "logical_and", - "logical_or", - "logical_xor", - "maximum", - "minimum", - "multiply", - "not_equal", - "subtract", - "true_divide", - ] - - # We want to test array-array, array-scalar, and scalar-array cases - arrs = ( - np.random.randint(3, 10, size=(4, 5)).astype("I"), - np.random.uniform(size=(4, 5)).astype("e"), - np.random.uniform(size=(4, 5)).astype("f"), - np.random.uniform(size=(4, 5)).astype("d"), - np.random.uniform(size=(4, 5)).astype("F"), - ) + else: + # Doing it this way instead of invoking the dunders directly, to + # avoid having to select the right version, __add__ vs __radd__, + # when one isn't supported, e.g. for scalar.__add__(array) + + out_np = eval(f"in_np[0] {op} in_np[1]") + out_num = eval(f"in_num[0] {op} in_num[1]") + + check_result(op, in_np, out_np, out_num) + + out_np = np.ones_like(out_np) + out_num = num.ones_like(out_num) + exec(f"out_np {op}= in_np[0]") + exec(f"out_num {op}= in_num[0]") + + check_result(op, in_np, out_np, out_num) + + out_num = np.ones_like(out_np) + exec(f"out_num {op}= in_num[0]") + + check_result(op, in_np, out_np, out_num) + + +# TODO: right now we will simply check if the operations work +# for some boring inputs. For some of these, we will want to +# test corner cases in the future. + +# TODO: matmul, @ + +# Math operations +math_ops = [ + "*", + "+", + "-", + "/", + "add", + # "divmod", + "equal", + "fmax", + "fmin", + "greater", + "greater_equal", + # "heaviside", + # "ldexp", + "less", + "less_equal", + "logical_and", + "logical_or", + "logical_xor", + "maximum", + "minimum", + "multiply", + "not_equal", + "subtract", + "true_divide", +] + +# We want to test array-array, array-scalar, and scalar-array cases +arrs = ( + np.random.randint(3, 10, size=(4, 5)).astype("I"), + np.random.uniform(size=(4, 5)).astype("e"), + np.random.uniform(size=(4, 5)).astype("f"), + np.random.uniform(size=(4, 5)).astype("d"), + np.random.uniform(size=(4, 5)).astype("F"), +) + +scalars = ( + np.uint64(2), + np.int64(-3), + np.random.randn(1)[0], + np.complex64(1 + 1j), +) + + +@pytest.mark.parametrize("op", math_ops) +@pytest.mark.parametrize("arr1", arrs) +@pytest.mark.parametrize("arr2", arrs) +def test_math_ops_arr_arr(op, arr1, arr2) -> None: + check_op(op, (arr1, arr2)) + + +@pytest.mark.parametrize("op", math_ops) +@pytest.mark.parametrize("arr", arrs) +@pytest.mark.parametrize("scalar", scalars) +def test_math_ops_arr_scalar(op, arr, scalar) -> None: + check_op(op, (arr, scalar)) + check_op(op, (scalar, arr)) + + +@pytest.mark.parametrize("op", math_ops) +@pytest.mark.parametrize("scalar1", scalars) +@pytest.mark.parametrize("scalar2", scalars) +def test_math_ops_scalar_scalar(op, scalar1, scalar2) -> None: + check_op(op, (scalar1, scalar2)) + + +trig_ops = [ + "//", + "arctan2", + "copysign", + "floor_divide", + "mod", + "fmod", + "hypot", + "logaddexp", + "logaddexp2", + "nextafter", +] + + +@pytest.mark.parametrize("op", trig_ops) +@pytest.mark.parametrize("arr1", arrs[:-1]) +@pytest.mark.parametrize("arr2", arrs[:-1]) +def test_trig_ops_arr_arr(op, arr1, arr2) -> None: + check_op(op, (arr1, arr2)) + + +@pytest.mark.parametrize("op", trig_ops) +@pytest.mark.parametrize("arr", arrs[:-1]) +@pytest.mark.parametrize("scalar", scalars[:-1]) +def test_trig_ops_arr_scalar(op, arr, scalar) -> None: + check_op(op, (arr, scalar)) + check_op(op, (scalar, arr)) + + +@pytest.mark.parametrize("op", trig_ops) +@pytest.mark.parametrize("scalar1", scalars[:-1]) +@pytest.mark.parametrize("scalar2", scalars[:-1]) +def test_trig_ops_scalar_scalar(op, scalar1, scalar2) -> None: + check_op(op, (scalar1, scalar2)) + + +power_ops = [ + "**", + "power", + "float_power", +] + + +@pytest.mark.parametrize("op", power_ops) +@pytest.mark.parametrize("arr1", arrs[:-1]) +@pytest.mark.parametrize("arr2", arrs[:-1]) +def test_power_ops_arr_arr(op, arr1, arr2) -> None: + check_op(op, (arr1, arr2)) + + +@pytest.mark.parametrize("op", power_ops) +@pytest.mark.parametrize("arr", arrs[:-1]) +def test_power_ops_arr_scalar(op, arr) -> None: + check_op(op, (arr, scalars[0])) + check_op(op, (scalars[0], arr)) + check_op(op, (arr, scalars[3])) + check_op(op, (scalars[3], scalars[3])) + + +@pytest.mark.parametrize("op", power_ops) +def test_power_ops_scalar_scalar(op) -> None: + check_op(op, (scalars[0], scalars[3])) + check_op(op, (scalars[3], scalars[0])) + + +div_ops = [ + "%", + "remainder", +] + + +@pytest.mark.parametrize("op", div_ops) +@pytest.mark.parametrize("arr1", arrs[:-1]) +@pytest.mark.parametrize("arr2", arrs[:-1]) +def test_div_ops_arr_arr(op, arr1, arr2) -> None: + check_op(op, (arr1, arr2)) + + +@pytest.mark.parametrize("op", div_ops) +@pytest.mark.parametrize("arr", arrs[:-1]) +@pytest.mark.parametrize("scalar", scalars[:-2]) +def test_div_ops_arr_scalar(op, arr, scalar) -> None: + check_op(op, (arr, scalar)) + check_op(op, (scalar, arr)) + + +@pytest.mark.parametrize("op", div_ops) +@pytest.mark.parametrize("scalar1", scalars[:-2]) +@pytest.mark.parametrize("scalar2", scalars[:-2]) +def test_div_ops_scalar_scalar(op, scalar1, scalar2) -> None: + check_op(op, (scalar1, scalar2)) + + +bit_ops = [ + "&", + "<<", + ">>", + "^", + "|", + "bitwise_and", + "bitwise_or", + "bitwise_xor", + "gcd", + "lcm", + "left_shift", + "right_shift", +] + + +@pytest.mark.parametrize("op", math_ops) +def test_bit_ops_arr_arr(op) -> None: + check_op(op, (arrs[0], arrs[0])) + + +@pytest.mark.parametrize("op", math_ops) +def test_bit_ops_arr_scalar(op) -> None: + check_op(op, (arrs[0], scalars[0])) + check_op(op, (arrs[0], scalars[1])) + check_op(op, (scalars[0], arrs[0])) + check_op(op, (scalars[1], arrs[0])) - scalars = ( - np.uint64(2), - np.int64(-3), - np.random.randn(1)[0], - np.complex64(1 + 1j), - ) - for arr1, arr2 in product(arrs, arrs): - check_ops(ops, (arr1, arr2)) - - for arr, scalar in product(arrs, scalars): - check_ops(ops, (arr, scalar)) - check_ops(ops, (scalar, arr)) - - for scalar1, scalar2 in product(scalars, scalars): - check_ops(ops, (scalar1, scalar2)) - - ops = [ - "//", - "arctan2", - "copysign", - "floor_divide", - "mod", - "fmod", - "hypot", - "logaddexp", - "logaddexp2", - "nextafter", - ] - - for arr1, arr2 in product(arrs[:-1], arrs[:-1]): - check_ops(ops, (arr1, arr2)) - - for arr, scalar in product(arrs[:-1], scalars[:-1]): - check_ops(ops, (arr, scalar)) - check_ops(ops, (scalar, arr)) - - for scalar1, scalar2 in product(scalars[:-1], scalars[:-1]): - check_ops(ops, (scalar1, scalar2)) - - ops = [ - "**", - "power", - "float_power", - ] - - for arr1, arr2 in product(arrs, arrs): - check_ops(ops, (arr1, arr2)) - - for arr in arrs: - check_ops(ops, (arr, scalars[0])) - check_ops(ops, (scalars[0], arr)) - check_ops(ops, (arr, scalars[3])) - check_ops(ops, (scalars[3], scalars[3])) - - check_ops(ops, (scalars[0], scalars[3])) - check_ops(ops, (scalars[3], scalars[0])) - - ops = [ - "%", - "remainder", - ] - - for arr1, arr2 in product(arrs[:1], arrs[:1]): - check_ops(ops, (arr1, arr2)) - - for arr, scalar in product(arrs[:1], scalars[:-2]): - check_ops(ops, (arr, scalar)) - check_ops(ops, (scalar, arr)) - - for scalar1, scalar2 in product(scalars[:-2], scalars[:-2]): - check_ops(ops, (scalar1, scalar2)) - - ops = [ - "&", - "<<", - ">>", - "^", - "|", - "bitwise_and", - "bitwise_or", - "bitwise_xor", - "gcd", - "lcm", - "left_shift", - "right_shift", - ] - - check_ops(ops, (arr1[0], arr2[0])) - - check_ops(ops, (arrs[0], scalars[0])) - check_ops(ops, (arrs[0], scalars[1])) - check_ops(ops, (scalars[0], arrs[0])) - check_ops(ops, (scalars[1], arrs[0])) - - check_ops(ops, (scalars[0], scalars[0])) +@pytest.mark.parametrize("op", math_ops) +def test_bit_ops_scalar_scalar(op) -> None: + check_op(op, (scalars[0], scalars[0])) def parse_inputs(in_str, dtype_str): @@ -276,6 +332,6 @@ def parse_inputs(in_str, dtype_str): if args.op is not None: in_np = parse_inputs(args.inputs, args.dtypes) - check_ops([args.op], in_np) + check_op(args.op, in_np) else: sys.exit(pytest.main(sys.argv)) diff --git a/tests/integration/test_bincount.py b/tests/integration/test_bincount.py index c57f89584..d382d1a70 100644 --- a/tests/integration/test_bincount.py +++ b/tests/integration/test_bincount.py @@ -1,4 +1,4 @@ -# Copyright 2021-2022 NVIDIA Corporation +# Copyright 2024 NVIDIA Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/tests/integration/test_bits.py b/tests/integration/test_bits.py index 7f5e67cea..40882706e 100644 --- a/tests/integration/test_bits.py +++ b/tests/integration/test_bits.py @@ -1,4 +1,4 @@ -# Copyright 2022 NVIDIA Corporation +# Copyright 2024 NVIDIA Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/tests/integration/test_block.py b/tests/integration/test_block.py index cae10ea7c..326b18e51 100644 --- a/tests/integration/test_block.py +++ b/tests/integration/test_block.py @@ -1,4 +1,4 @@ -# Copyright 2022 NVIDIA Corporation +# Copyright 2024 NVIDIA Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/tests/integration/test_broadcast.py b/tests/integration/test_broadcast.py index 433dc6bb0..821ddd909 100644 --- a/tests/integration/test_broadcast.py +++ b/tests/integration/test_broadcast.py @@ -1,4 +1,4 @@ -# Copyright 2022 NVIDIA Corporation +# Copyright 2024 NVIDIA Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/tests/integration/test_cholesky.py b/tests/integration/test_cholesky.py index c4b52754b..e0f9d260e 100644 --- a/tests/integration/test_cholesky.py +++ b/tests/integration/test_cholesky.py @@ -1,4 +1,4 @@ -# Copyright 2023 NVIDIA Corporation +# Copyright 2024 NVIDIA Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -19,7 +19,7 @@ import cunumeric as num -SIZES = [8, 9, 255, 512] +SIZES = [8, 9, 255, 512, 1024] def test_matrix(): diff --git a/tests/integration/test_clip.py b/tests/integration/test_clip.py index f8431fda2..f583398db 100644 --- a/tests/integration/test_clip.py +++ b/tests/integration/test_clip.py @@ -1,4 +1,4 @@ -# Copyright 2022 NVIDIA Corporation +# Copyright 2024 NVIDIA Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/tests/integration/test_complex_ops.py b/tests/integration/test_complex_ops.py index 626b9c781..e00de22e2 100644 --- a/tests/integration/test_complex_ops.py +++ b/tests/integration/test_complex_ops.py @@ -1,4 +1,4 @@ -# Copyright 2021-2022 NVIDIA Corporation +# Copyright 2024 NVIDIA Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/tests/integration/test_compress.py b/tests/integration/test_compress.py index 7247685e6..af466f7bf 100644 --- a/tests/integration/test_compress.py +++ b/tests/integration/test_compress.py @@ -1,4 +1,4 @@ -# Copyright 2022 NVIDIA Corporation +# Copyright 2024 NVIDIA Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/tests/integration/test_concatenate_stack.py b/tests/integration/test_concatenate_stack.py index 754766fff..d59fd47ce 100644 --- a/tests/integration/test_concatenate_stack.py +++ b/tests/integration/test_concatenate_stack.py @@ -1,4 +1,4 @@ -# Copyright 2021-2022 NVIDIA Corporation +# Copyright 2024 NVIDIA Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/tests/integration/test_contains.py b/tests/integration/test_contains.py index 23811ba74..08ab23dc8 100644 --- a/tests/integration/test_contains.py +++ b/tests/integration/test_contains.py @@ -1,4 +1,4 @@ -# Copyright 2021-2022 NVIDIA Corporation +# Copyright 2024 NVIDIA Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/tests/integration/test_convolve.py b/tests/integration/test_convolve.py index 997a9d6f7..687f11d62 100644 --- a/tests/integration/test_convolve.py +++ b/tests/integration/test_convolve.py @@ -1,4 +1,4 @@ -# Copyright 2021-2022 NVIDIA Corporation +# Copyright 2024 NVIDIA Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/tests/integration/test_copy.py b/tests/integration/test_copy.py index 17b2b0aa6..76efb4f83 100644 --- a/tests/integration/test_copy.py +++ b/tests/integration/test_copy.py @@ -1,4 +1,4 @@ -# Copyright 2021-2022 NVIDIA Corporation +# Copyright 2024 NVIDIA Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/tests/integration/test_data_interface.py b/tests/integration/test_data_interface.py index a3329a1b6..7214aa0f1 100644 --- a/tests/integration/test_data_interface.py +++ b/tests/integration/test_data_interface.py @@ -1,4 +1,4 @@ -# Copyright 2022 NVIDIA Corporation +# Copyright 2024 NVIDIA Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -16,7 +16,7 @@ import pytest import cunumeric as num -from cunumeric.utils import SUPPORTED_DTYPES +from cunumeric._utils.array import SUPPORTED_DTYPES DTYPES = SUPPORTED_DTYPES.keys() diff --git a/tests/integration/test_diag_indices.py b/tests/integration/test_diag_indices.py index 567011417..03659d44b 100644 --- a/tests/integration/test_diag_indices.py +++ b/tests/integration/test_diag_indices.py @@ -1,4 +1,4 @@ -# Copyright 2022 NVIDIA Corporation +# Copyright 2024 NVIDIA Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/tests/integration/test_diff.py b/tests/integration/test_diff.py new file mode 100644 index 000000000..0644f5c9c --- /dev/null +++ b/tests/integration/test_diff.py @@ -0,0 +1,66 @@ +# Copyright 2024 NVIDIA Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import numpy as np +import pytest +from utils.comparisons import allclose + +import cunumeric as num + + +@pytest.mark.parametrize( + "args", + [ + ((100,), 1, -1, None, None), + ((100,), 2, -1, None, None), + ((100,), 3, -1, None, None), + ((100,), 2, 0, None, None), + ((10, 10), 2, -1, None, None), + ((10, 10), 2, 0, None, None), + ((10, 10), 2, 1, None, None), + ((100,), 3, -1, [1.0, 2.0], None), + ((100,), 3, -1, None, [1.0, 2.0]), + ((100,), 3, -1, [1.0, 2.0], [1.0, 2.0]), + ((5,), 5, -1, None, None), + ((5,), 6, 0, None, None), + ((5, 5), 5, 1, None, None), + ((5, 5), 6, 1, None, None), + ], +) +def test_diff(args): + shape, n, axis, prepend, append = args + nparr = np.random.random(shape) + cnarr = num.array(nparr) + + # We are not adopting the np._NoValue default arguments + # for this function, as no special behavior is needed on None. + n_prepend = np._NoValue if prepend is None else prepend + n_append = np._NoValue if append is None else append + res_np = np.diff(nparr, n=n, axis=axis, prepend=n_prepend, append=n_append) + res_cn = num.diff(cnarr, n=n, axis=axis, prepend=prepend, append=append) + + assert allclose(res_np, res_cn) + + +def test_diff_nzero(): + a = num.ones(100) + ad = num.diff(a, n=0) + assert a is ad + + +if __name__ == "__main__": + import sys + + sys.exit(pytest.main(sys.argv)) diff --git a/tests/integration/test_digitize.py b/tests/integration/test_digitize.py new file mode 100644 index 000000000..f7d524f2c --- /dev/null +++ b/tests/integration/test_digitize.py @@ -0,0 +1,166 @@ +# Copyright 2024 NVIDIA Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import math + +import numpy as np +import pytest + +import cunumeric as num + +DTYPES = ( + np.uint32, + np.uint64, + np.float32, + np.float64, +) + +SHAPES = ( + (10,), + (2, 5), + (3, 7, 10), +) + + +class TestDigitizeErrors(object): + def test_complex_array(self): + a = np.array([2, 3, 10, 9], dtype=np.complex64) + bins = [0, 3, 5] + expected_exc = TypeError + with pytest.raises(expected_exc): + num.digitize(a, bins) + with pytest.raises(expected_exc): + np.digitize(a, bins) + + @pytest.mark.xfail + def test_bad_array(self): + bins = [0, 5, 3] + expected_exc = ValueError + with pytest.raises(expected_exc): + # cunumeric raises TypeError + num.digitize(None, bins) + with pytest.raises(expected_exc): + np.digitize(None, bins) + + @pytest.mark.xfail + def test_bad_bins(self): + a = [2, 3, 10, 9] + expected_exc = ValueError + with pytest.raises(expected_exc): + # cunumeric raises TypeError + num.digitize(a, None) + with pytest.raises(expected_exc): + np.digitize(a, None) + + def test_bins_non_monotonic(self): + a = [2, 3, 10, 9] + bins = [0, 5, 3] + expected_exc = ValueError + with pytest.raises(expected_exc): + num.digitize(a, bins) + with pytest.raises(expected_exc): + np.digitize(a, bins) + + def test_bins_ndim(self): + a = [2, 3, 10, 9] + bins = np.array([[0], [5], [3]]) + expected_exc = ValueError + with pytest.raises(expected_exc): + num.digitize(a, bins) + with pytest.raises(expected_exc): + np.digitize(a, bins) + + +def generate_random(shape, dtype): + a_np = None + size = math.prod(shape) + if np.issubdtype(dtype, np.integer): + a_np = np.array( + np.random.randint( + np.iinfo(dtype).min, + np.iinfo(dtype).max, + size=size, + dtype=dtype, + ), + dtype=dtype, + ) + elif np.issubdtype(dtype, np.floating): + a_np = np.array(np.random.random(size=size), dtype=dtype) + elif np.issubdtype(dtype, np.complexfloating): + a_np = np.array( + np.random.random(size=size) + np.random.random(size=size) * 1j, + dtype=dtype, + ) + else: + assert False + return a_np.reshape(shape) + + +@pytest.mark.parametrize("right", (True, False)) +def test_empty(right): + bins = [0, 3, 5] + assert len(num.digitize([], bins, right=right)) == 0 + + +@pytest.mark.parametrize("shape", SHAPES, ids=str) +@pytest.mark.parametrize("dtype", DTYPES, ids=str) +@pytest.mark.parametrize("right", (True, False)) +def test_increasing_bins(shape, dtype, right): + a = generate_random(shape, dtype) + bins = [0, 3, 5] + + a_num = num.array(a) + bins_num = num.array(bins) + + res_np = np.digitize(a, bins, right=right) + res_num = num.digitize(a, bins, right=right) + assert num.array_equal(res_np, res_num) + + res_np = np.digitize(a, bins, right=right) + res_num = num.digitize(a_num, bins, right=right) + assert num.array_equal(res_np, res_num) + + res_np = np.digitize(a, bins, right=right) + res_num = num.digitize(a_num, bins_num, right=right) + assert num.array_equal(res_np, res_num) + + +@pytest.mark.parametrize("shape", SHAPES, ids=str) +@pytest.mark.parametrize("dtype", DTYPES, ids=str) +@pytest.mark.parametrize("right", (True, False)) +def test_decreasing_bins(shape, dtype, right): + a = generate_random(shape, dtype) + bins = [5, 3, 0] + + a_num = num.array(a) + bins_num = num.array(bins) + + res_np = np.digitize(a, bins, right=right) + res_num = num.digitize(a, bins, right=right) + assert num.array_equal(res_np, res_num) + + res_np = np.digitize(a, bins, right=right) + res_num = num.digitize(a_num, bins, right=right) + assert num.array_equal(res_np, res_num) + + res_np = np.digitize(a, bins, right=right) + res_num = num.digitize(a_num, bins_num, right=right) + assert num.array_equal(res_np, res_num) + + +if __name__ == "__main__": + import sys + + sys.exit(pytest.main(sys.argv)) diff --git a/tests/integration/test_dot.py b/tests/integration/test_dot.py index 40769c354..e3b775145 100644 --- a/tests/integration/test_dot.py +++ b/tests/integration/test_dot.py @@ -1,4 +1,4 @@ -# Copyright 2021-2022 NVIDIA Corporation +# Copyright 2024 NVIDIA Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -19,7 +19,7 @@ from utils.generators import mk_0to1_array import cunumeric as num -from cunumeric.utils import dot_modes +from cunumeric._utils.linalg import dot_modes @pytest.mark.parametrize("b_ndim", range(LEGATE_MAX_DIM + 1)) diff --git a/tests/integration/test_einsum.py b/tests/integration/test_einsum.py index 4fcdd2402..f79033b1f 100644 --- a/tests/integration/test_einsum.py +++ b/tests/integration/test_einsum.py @@ -1,4 +1,4 @@ -# Copyright 2021-2022 NVIDIA Corporation +# Copyright 2024 NVIDIA Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -15,7 +15,6 @@ from functools import lru_cache from itertools import permutations, product -from typing import List, Optional, Set, Tuple import numpy as np import pytest @@ -44,7 +43,7 @@ def gen_operand( used_modes: int, dim_lim: int, mode_lim: int, - op: Optional[List[int]] = None, + op: list[int] | None = None, ): if op is None: op = [] @@ -77,8 +76,8 @@ def gen_operand( # Exhaustively generate all (normalized) expressions within some limits. These # limits are set low by default, to keep the unit test running time low. def gen_expr( - opers: Optional[List[List[int]]] = None, - cache: Optional[Set[Tuple[Tuple[int]]]] = None, + opers: list[list[int]] | None = None, + cache: set[tuple[tuple[int]]] | None = None, ): if opers is None: opers = [] diff --git a/tests/integration/test_einsum_path.py b/tests/integration/test_einsum_path.py index 6efda00b1..675ae4450 100644 --- a/tests/integration/test_einsum_path.py +++ b/tests/integration/test_einsum_path.py @@ -1,4 +1,4 @@ -# Copyright 2022 NVIDIA Corporation +# Copyright 2024 NVIDIA Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/tests/integration/test_exp.py b/tests/integration/test_exp.py index 7eff590ae..1b0fe195a 100644 --- a/tests/integration/test_exp.py +++ b/tests/integration/test_exp.py @@ -1,4 +1,4 @@ -# Copyright 2021-2022 NVIDIA Corporation +# Copyright 2024 NVIDIA Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/tests/integration/test_extract.py b/tests/integration/test_extract.py index 007d93fe0..5268a51f8 100644 --- a/tests/integration/test_extract.py +++ b/tests/integration/test_extract.py @@ -1,4 +1,4 @@ -# Copyright 2021-2022 NVIDIA Corporation +# Copyright 2024 NVIDIA Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -47,7 +47,7 @@ [11, 12, 13], [True, False, False, True], [42.3, 42.3, 42.3, 42.3, 42.3], - [np.inf, np.Inf], + [np.inf], ] diff --git a/tests/integration/test_eye.py b/tests/integration/test_eye.py index 3b2b8acfd..b79c6dde1 100644 --- a/tests/integration/test_eye.py +++ b/tests/integration/test_eye.py @@ -1,4 +1,4 @@ -# Copyright 2021-2022 NVIDIA Corporation +# Copyright 2024 NVIDIA Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/tests/integration/test_fallback.py b/tests/integration/test_fallback.py index 4e312d0bb..885762993 100644 --- a/tests/integration/test_fallback.py +++ b/tests/integration/test_fallback.py @@ -1,4 +1,4 @@ -# Copyright 2022 NVIDIA Corporation +# Copyright 2024 NVIDIA Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -28,8 +28,15 @@ def test_ufunc(): in_num = num.array([0, 1, 2, 3]) in_np = in_num.__array__() - out_num = np.logical_and.reduce(in_num) - out_np = np.logical_and.reduce(in_np) + # This test uses logical_and.accumulate because it is currently + # unimplemented, and we want to verify a behaviour of unimplemented ufunc + # methods. If logical_and.accumulate becomes implemented in the future, + # this assertion will start to fail, and a new (unimplemented) ufunc method + # should be found to replace it + assert not num.logical_and.accumulate._cunumeric.implemented + + out_num = num.logical_and.accumulate(in_num) + out_np = np.logical_and.accumulate(in_np) assert np.array_equal(out_num, out_np) diff --git a/tests/integration/test_fft_c2c.py b/tests/integration/test_fft_c2c.py index 35d192104..16f459894 100644 --- a/tests/integration/test_fft_c2c.py +++ b/tests/integration/test_fft_c2c.py @@ -1,4 +1,4 @@ -# Copyright 2021 NVIDIA Corporation +# Copyright 2024 NVIDIA Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -13,8 +13,6 @@ # limitations under the License. # -import warnings - import numpy as np import pytest from utils.comparisons import allclose as _allclose @@ -55,12 +53,11 @@ def check_1d_c2c(N, dtype=np.float64): assert allclose(out, out_num) # Odd types - warnings.filterwarnings(action="ignore", category=np.ComplexWarning) - out = np.fft.rfft(Z) - out_num = num.fft.rfft(Z_num) + out = np.fft.rfft(Z.real) + out_num = num.fft.rfft(Z_num.real) assert allclose(out, out_num) - out = np.fft.ihfft(Z) - out_num = num.fft.ihfft(Z_num) + out = np.fft.ihfft(Z.real) + out_num = num.fft.ihfft(Z_num.real) assert allclose(out, out_num) assert allclose(Z, Z_num) @@ -106,11 +103,11 @@ def check_2d_c2c(N, dtype=np.float64): assert allclose(out, out_num) # Odd types - out = np.fft.rfft2(Z) - out_num = num.fft.rfft2(Z_num) + out = np.fft.rfft2(Z.real) + out_num = num.fft.rfft2(Z_num.real) assert allclose(out, out_num) - out = np.fft.ihfft(Z) - out_num = num.fft.ihfft(Z_num) + out = np.fft.ihfft(Z.real) + out_num = num.fft.ihfft(Z_num.real) assert allclose(out, out_num) assert allclose(Z, Z_num) @@ -155,11 +152,11 @@ def check_3d_c2c(N, dtype=np.float64): assert allclose(out, out_num) # Odd types - out = np.fft.rfftn(Z) - out_num = num.fft.rfftn(Z_num) + out = np.fft.rfftn(Z.real) + out_num = num.fft.rfftn(Z_num.real) assert allclose(out, out_num) - out = np.fft.ihfft(Z) - out_num = num.fft.ihfft(Z_num) + out = np.fft.ihfft(Z.real) + out_num = num.fft.ihfft(Z_num.real) assert allclose(out, out_num) assert allclose(Z, Z_num) @@ -214,8 +211,8 @@ def check_4d_c2c(N, dtype=np.float64): # Odd types assert allclose(out, out_num) - out = np.fft.ihfft(Z) - out_num = num.fft.ihfft(Z_num) + out = np.fft.ihfft(Z.real) + out_num = num.fft.ihfft(Z_num.real) assert allclose(out, out_num) assert allclose(Z, Z_num) diff --git a/tests/integration/test_fft_c2r.py b/tests/integration/test_fft_c2r.py index 30e5c9b34..861977b59 100644 --- a/tests/integration/test_fft_c2r.py +++ b/tests/integration/test_fft_c2r.py @@ -1,4 +1,4 @@ -# Copyright 2021 NVIDIA Corporation +# Copyright 2024 NVIDIA Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -49,11 +49,11 @@ def check_1d_c2r(N, dtype=np.float64): assert allclose(out, out_num) # Odd types - out = np.fft.rfft(Z) - out_num = num.fft.rfft(Z_num) + out = np.fft.rfft(Z.real) + out_num = num.fft.rfft(Z_num.real) assert allclose(out, out_num) - out = np.fft.ihfft(Z) - out_num = num.fft.ihfft(Z_num) + out = np.fft.ihfft(Z.real) + out_num = num.fft.ihfft(Z_num.real) assert allclose(out, out_num) assert allclose(Z, Z_num) @@ -90,11 +90,11 @@ def check_2d_c2r(N, dtype=np.float64): assert allclose(out, out_num) # Odd types - out = np.fft.rfft2(Z) - out_num = num.fft.rfft2(Z_num) + out = np.fft.rfft2(Z.real) + out_num = num.fft.rfft2(Z_num.real) assert allclose(out, out_num) - out = np.fft.ihfft(Z) - out_num = num.fft.ihfft(Z_num) + out = np.fft.ihfft(Z.real) + out_num = num.fft.ihfft(Z_num.real) assert allclose(out, out_num) assert allclose(Z, Z_num) @@ -134,11 +134,11 @@ def check_3d_c2r(N, dtype=np.float64): assert allclose(out, out_num) # Odd types - out = np.fft.rfftn(Z) - out_num = num.fft.rfftn(Z_num) + out = np.fft.rfftn(Z.real) + out_num = num.fft.rfftn(Z_num.real) assert allclose(out, out_num) - out = np.fft.ihfft(Z) - out_num = num.fft.ihfft(Z_num) + out = np.fft.ihfft(Z.real) + out_num = num.fft.ihfft(Z_num.real) assert allclose(out, out_num) assert allclose(Z, Z_num) @@ -182,11 +182,11 @@ def check_4d_c2r(N, dtype=np.float64): assert allclose(out, out_num) # Odd types - out = np.fft.rfftn(Z) - out_num = num.fft.rfftn(Z_num) + out = np.fft.rfftn(Z.real) + out_num = num.fft.rfftn(Z_num.real) assert allclose(out, out_num) - out = np.fft.ihfft(Z) - out_num = num.fft.ihfft(Z_num) + out = np.fft.ihfft(Z.real) + out_num = num.fft.ihfft(Z_num.real) assert allclose(out, out_num) assert allclose(Z, Z_num) diff --git a/tests/integration/test_fft_hermitian.py b/tests/integration/test_fft_hermitian.py index 997821895..623e6ec31 100644 --- a/tests/integration/test_fft_hermitian.py +++ b/tests/integration/test_fft_hermitian.py @@ -1,4 +1,4 @@ -# Copyright 2021 NVIDIA Corporation +# Copyright 2024 NVIDIA Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/tests/integration/test_fft_r2c.py b/tests/integration/test_fft_r2c.py index e7b829fef..408b0de2f 100644 --- a/tests/integration/test_fft_r2c.py +++ b/tests/integration/test_fft_r2c.py @@ -1,4 +1,4 @@ -# Copyright 2021 NVIDIA Corporation +# Copyright 2024 NVIDIA Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/tests/integration/test_fill.py b/tests/integration/test_fill.py index 134e209f2..89cfde7a5 100644 --- a/tests/integration/test_fill.py +++ b/tests/integration/test_fill.py @@ -1,4 +1,4 @@ -# Copyright 2021-2022 NVIDIA Corporation +# Copyright 2024 NVIDIA Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -20,7 +20,7 @@ import cunumeric as num -INF_VALUES = [np.NINF, np.inf] +INF_VALUES = [-np.inf, np.inf] FLOAT_FILL_VALUES = (-2.4e120, -1.3, 8.9e-130, 0.0, 5.7e-150, 0.6, 3.7e160) FLOAT_BIG_VALUES = (-2.4e120, 3.7e160) diff --git a/tests/integration/test_fill_diagonal.py b/tests/integration/test_fill_diagonal.py index f50309607..7482fc412 100644 --- a/tests/integration/test_fill_diagonal.py +++ b/tests/integration/test_fill_diagonal.py @@ -1,4 +1,4 @@ -# Copyright 2022 NVIDIA Corporation +# Copyright 2024 NVIDIA Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/tests/integration/test_flags.py b/tests/integration/test_flags.py index f7e8f16ac..ae63f117e 100644 --- a/tests/integration/test_flags.py +++ b/tests/integration/test_flags.py @@ -1,4 +1,4 @@ -# Copyright 2022 NVIDIA Corporation +# Copyright 2024 NVIDIA Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/tests/integration/test_flatten.py b/tests/integration/test_flatten.py index f83ad2704..f143597e9 100644 --- a/tests/integration/test_flatten.py +++ b/tests/integration/test_flatten.py @@ -1,4 +1,4 @@ -# Copyright 2022 NVIDIA Corporation +# Copyright 2024 NVIDIA Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/tests/integration/test_flip.py b/tests/integration/test_flip.py index e4032174a..97e57ef26 100644 --- a/tests/integration/test_flip.py +++ b/tests/integration/test_flip.py @@ -1,4 +1,4 @@ -# Copyright 2021-2022 NVIDIA Corporation +# Copyright 2024 NVIDIA Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -17,6 +17,7 @@ import numpy as np import pytest from legate.core import LEGATE_MAX_DIM +from utils.utils import AxisError import cunumeric as num @@ -48,13 +49,13 @@ def test_axis_float(self): def test_axis_outofbound(self): axis = 12 msg = r"out of bounds" - with pytest.raises(np.AxisError, match=msg): + with pytest.raises(AxisError, match=msg): num.flip(a, axis=axis) def test_axis_outofbound_negative(self): axis = -12 msg = r"out of bounds" - with pytest.raises(np.AxisError, match=msg): + with pytest.raises(AxisError, match=msg): num.flip(a, axis=axis) def test_repeated_axis(self): @@ -66,7 +67,7 @@ def test_repeated_axis(self): def test_axis_outofbound_tuple(self): axis = (1, 5) msg = r"out of bounds" - with pytest.raises(np.AxisError, match=msg): + with pytest.raises(AxisError, match=msg): num.flip(a, axis=axis) diff --git a/tests/integration/test_floating.py b/tests/integration/test_floating.py index 36304949e..5b72bcc03 100644 --- a/tests/integration/test_floating.py +++ b/tests/integration/test_floating.py @@ -1,4 +1,4 @@ -# Copyright 2022 NVIDIA Corporation +# Copyright 2024 NVIDIA Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/tests/integration/test_get_item.py b/tests/integration/test_get_item.py index 724fe850f..629373706 100644 --- a/tests/integration/test_get_item.py +++ b/tests/integration/test_get_item.py @@ -1,4 +1,4 @@ -# Copyright 2021-2022 NVIDIA Corporation +# Copyright 2024 NVIDIA Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/tests/integration/test_histogram.py b/tests/integration/test_histogram.py index 92824f0da..13c14470c 100644 --- a/tests/integration/test_histogram.py +++ b/tests/integration/test_histogram.py @@ -1,4 +1,4 @@ -# Copyright 2023 NVIDIA Corporation +# Copyright 2024 NVIDIA Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/tests/integration/test_identity.py b/tests/integration/test_identity.py index 716eae725..f84e000d1 100644 --- a/tests/integration/test_identity.py +++ b/tests/integration/test_identity.py @@ -1,4 +1,4 @@ -# Copyright 2022 NVIDIA Corporation +# Copyright 2024 NVIDIA Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/tests/integration/test_index_routines.py b/tests/integration/test_index_routines.py index d1c42805f..b7aeb4158 100644 --- a/tests/integration/test_index_routines.py +++ b/tests/integration/test_index_routines.py @@ -1,4 +1,4 @@ -# Copyright 2021-2022 NVIDIA Corporation +# Copyright 2024 NVIDIA Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -19,9 +19,10 @@ import pytest from legate.core import LEGATE_MAX_DIM from utils.generators import mk_seq_array +from utils.utils import AxisError import cunumeric as num -from cunumeric.eager import diagonal_reference +from cunumeric._thunk.eager import diagonal_reference class TestChoose1d: @@ -530,11 +531,11 @@ def test_axes_same(self, axes): "axes", ((0, -4), (3, 0)), ids=lambda axes: f"(axes={axes})" ) def test_axes_out_of_bound(self, axes): - # In Numpy, it raises numpy.AxisError: is out of bounds + # In Numpy, it raises AxisError: is out of bounds # In cuNumeric, it raises ValueError: # axes must be the same size as ndim for transpose axis1, axis2 = axes - with pytest.raises(np.AxisError): + with pytest.raises(AxisError): num.diagonal(self.a, 0, axis1, axis2) @pytest.mark.xfail diff --git a/tests/integration/test_indices.py b/tests/integration/test_indices.py index 55a369975..5a8346bb1 100644 --- a/tests/integration/test_indices.py +++ b/tests/integration/test_indices.py @@ -1,4 +1,4 @@ -# Copyright 2022 NVIDIA Corporation +# Copyright 2024 NVIDIA Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/tests/integration/test_ingest.py b/tests/integration/test_ingest.py deleted file mode 100644 index 8ca6bfbcd..000000000 --- a/tests/integration/test_ingest.py +++ /dev/null @@ -1,101 +0,0 @@ -# Copyright 2021-2022 NVIDIA Corporation -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -import numpy as np -import pytest -from legate.core import ( - CustomSplit, - Rect, - TiledSplit, - float64, - get_legion_context, - get_legion_runtime, - ingest, - legion, -) - -import cunumeric as num - -tile_shape = (4, 7) -colors = (5, 3) -shape = tuple(ci * di for (ci, di) in zip(colors, tile_shape)) - - -def get_subdomain(color): - return Rect( - lo=[ci * di for (ci, di) in zip(color, tile_shape)], - hi=[(ci + 1) * di for (ci, di) in zip(color, tile_shape)], - ) - - -def get_buffer(color): - arr = np.zeros(tile_shape) - base = float( - color[0] * tile_shape[0] * shape[1] + color[1] * tile_shape[1] - ) - for i in range(tile_shape[0]): - for j in range(tile_shape[1]): - arr[i, j] = base + shape[1] * i + j - return arr.data - - -def get_local_colors(): - num_shards = legion.legion_runtime_total_shards( - get_legion_runtime(), get_legion_context() - ) - shard = legion.legion_runtime_local_shard( - get_legion_runtime(), get_legion_context() - ) - res = [] - i = 0 - for color in Rect(colors): - if i % num_shards == shard: - res.append(color) - i += 1 - return res - - -def _ingest(custom_partitioning, custom_sharding): - data_split = ( - CustomSplit(get_subdomain) - if custom_partitioning - else TiledSplit(tile_shape) - ) - tab = ingest( - float64, - shape, - colors, - data_split, - get_buffer, - get_local_colors if custom_sharding else None, - ) - return num.array(tab) - - -@pytest.mark.parametrize("custom_sharding", [True, False]) -@pytest.mark.parametrize("custom_partitioning", [True, False]) -def test(custom_partitioning, custom_sharding): - size = 1 - for d in shape: - size *= d - a_np = np.arange(size).reshape(shape) - a_num = _ingest(custom_partitioning, custom_sharding) - assert np.array_equal(a_np, a_num) - assert np.array_equal(a_np, a_num * 1.0) # force a copy - - -if __name__ == "__main__": - import sys - - sys.exit(pytest.main(sys.argv)) diff --git a/tests/integration/test_inlinemap-keeps-region-alive.py b/tests/integration/test_inlinemap-keeps-region-alive.py index 15de386aa..6f6bbf92c 100644 --- a/tests/integration/test_inlinemap-keeps-region-alive.py +++ b/tests/integration/test_inlinemap-keeps-region-alive.py @@ -1,4 +1,4 @@ -# Copyright 2021-2022 NVIDIA Corporation +# Copyright 2024 NVIDIA Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/tests/integration/test_inner.py b/tests/integration/test_inner.py index 24904a07c..d1f27a12f 100644 --- a/tests/integration/test_inner.py +++ b/tests/integration/test_inner.py @@ -1,4 +1,4 @@ -# Copyright 2022 NVIDIA Corporation +# Copyright 2024 NVIDIA Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -19,7 +19,7 @@ from utils.generators import mk_0to1_array import cunumeric as num -from cunumeric.utils import inner_modes +from cunumeric._utils.linalg import inner_modes @pytest.mark.parametrize("b_ndim", range(LEGATE_MAX_DIM + 1)) diff --git a/tests/integration/test_input_output.py b/tests/integration/test_input_output.py index 9f25de50c..2e2b2f594 100644 --- a/tests/integration/test_input_output.py +++ b/tests/integration/test_input_output.py @@ -1,4 +1,4 @@ -# Copyright 2021-2022 NVIDIA Corporation +# Copyright 2024 NVIDIA Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/tests/integration/test_intra_array_copy.py b/tests/integration/test_intra_array_copy.py index c783e2bb7..2f3ed31ba 100644 --- a/tests/integration/test_intra_array_copy.py +++ b/tests/integration/test_intra_array_copy.py @@ -1,4 +1,4 @@ -# Copyright 2021-2022 NVIDIA Corporation +# Copyright 2024 NVIDIA Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/tests/integration/test_item.py b/tests/integration/test_item.py index 09d506214..a80bc070b 100644 --- a/tests/integration/test_item.py +++ b/tests/integration/test_item.py @@ -1,4 +1,4 @@ -# Copyright 2021-2022 NVIDIA Corporation +# Copyright 2024 NVIDIA Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/tests/integration/test_itemset.py b/tests/integration/test_itemset.py index 0329be4d3..283d976a8 100644 --- a/tests/integration/test_itemset.py +++ b/tests/integration/test_itemset.py @@ -1,4 +1,4 @@ -# Copyright 2021-2022 NVIDIA Corporation +# Copyright 2024 NVIDIA Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -18,6 +18,11 @@ from utils.generators import generate_item import cunumeric as num +from cunumeric._utils import is_np2 + +# itemset was removed in numpy 2.0, skip the entire module +if is_np2: + pytestmark = pytest.mark.skip @pytest.mark.xfail diff --git a/tests/integration/test_jacobi.py b/tests/integration/test_jacobi.py index bdca2e3cc..82b4ff0b6 100644 --- a/tests/integration/test_jacobi.py +++ b/tests/integration/test_jacobi.py @@ -1,4 +1,4 @@ -# Copyright 2021-2022 NVIDIA Corporation +# Copyright 2024 NVIDIA Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/tests/integration/test_length.py b/tests/integration/test_length.py index cb79b642a..c00157eeb 100644 --- a/tests/integration/test_length.py +++ b/tests/integration/test_length.py @@ -1,4 +1,4 @@ -# Copyright 2021-2022 NVIDIA Corporation +# Copyright 2024 NVIDIA Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/tests/integration/test_linspace.py b/tests/integration/test_linspace.py index 89b0ee727..0937ac10f 100644 --- a/tests/integration/test_linspace.py +++ b/tests/integration/test_linspace.py @@ -1,4 +1,4 @@ -# Copyright 2021-2022 NVIDIA Corporation +# Copyright 2024 NVIDIA Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/tests/integration/test_logic.py b/tests/integration/test_logic.py index f969eb168..c4b8a33b4 100644 --- a/tests/integration/test_logic.py +++ b/tests/integration/test_logic.py @@ -1,4 +1,4 @@ -# Copyright 2021-2022 NVIDIA Corporation +# Copyright 2024 NVIDIA Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -141,16 +141,11 @@ def test_isscalar_array(): ) -@pytest.mark.xfail @pytest.mark.parametrize( ("a", "b"), SCALAR_PAIRS, ) def test_isclose_scalars(a, b): - # for all cases, - # In Numpy, it pass - # In cuNumeric, it raises IndexError: too many indices for array: - # array is 0-dimensional, but 1 were indexed out_np = np.isclose(a, b) out_num = num.isclose(a, b) assert np.array_equal(out_np, out_num) diff --git a/tests/integration/test_logical.py b/tests/integration/test_logical.py index b0f83aaa6..dac2de22a 100644 --- a/tests/integration/test_logical.py +++ b/tests/integration/test_logical.py @@ -1,4 +1,4 @@ -# Copyright 2021-2022 NVIDIA Corporation +# Copyright 2024 NVIDIA Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/tests/integration/test_logical_reduction.py b/tests/integration/test_logical_reduction.py new file mode 100644 index 000000000..3bd1f3e99 --- /dev/null +++ b/tests/integration/test_logical_reduction.py @@ -0,0 +1,40 @@ +# Copyright 2024 NVIDIA Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import numpy as np +import pytest + +import cunumeric as num + + +@pytest.mark.parametrize("axis", [None, 0, 1, 2, (0, 1, 2)]) +def test_logical_reductions(axis): + input = [[[12, 0, 1, 2], [9, 0, 0, 1]], [[0, 0, 0, 5], [1, 1, 1, 1]]] + in_num = num.array(input) + in_np = np.array(input) + + out_num = num.logical_and.reduce(in_num, axis=axis) + out_np = np.logical_and.reduce(in_np, axis=axis) + assert num.array_equal(out_num, out_np) + + out_num = num.logical_or.reduce(in_num, axis=axis) + out_np = np.logical_or.reduce(in_np, axis=axis) + assert num.array_equal(out_num, out_np) + + +if __name__ == "__main__": + import sys + + sys.exit(pytest.main(sys.argv)) diff --git a/tests/integration/test_lstm_backward_test.py b/tests/integration/test_lstm_backward_test.py index ae9428f22..aa8c5bfb2 100644 --- a/tests/integration/test_lstm_backward_test.py +++ b/tests/integration/test_lstm_backward_test.py @@ -1,4 +1,4 @@ -# Copyright 2021-2022 NVIDIA Corporation +# Copyright 2024 NVIDIA Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/tests/integration/test_lstm_simple_forward.py b/tests/integration/test_lstm_simple_forward.py index 2e2936ad8..629d1ef76 100644 --- a/tests/integration/test_lstm_simple_forward.py +++ b/tests/integration/test_lstm_simple_forward.py @@ -1,4 +1,4 @@ -# Copyright 2021-2022 NVIDIA Corporation +# Copyright 2024 NVIDIA Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/tests/integration/test_map_reduce.py b/tests/integration/test_map_reduce.py index 70379460f..15ac5c7d0 100644 --- a/tests/integration/test_map_reduce.py +++ b/tests/integration/test_map_reduce.py @@ -1,4 +1,4 @@ -# Copyright 2021-2022 NVIDIA Corporation +# Copyright 2024 NVIDIA Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/tests/integration/test_mask.py b/tests/integration/test_mask.py index afe4987e3..648d54e5e 100644 --- a/tests/integration/test_mask.py +++ b/tests/integration/test_mask.py @@ -1,4 +1,4 @@ -# Copyright 2021-2022 NVIDIA Corporation +# Copyright 2024 NVIDIA Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/tests/integration/test_mask_indices.py b/tests/integration/test_mask_indices.py index 2bcc88212..bd4587916 100644 --- a/tests/integration/test_mask_indices.py +++ b/tests/integration/test_mask_indices.py @@ -1,4 +1,4 @@ -# Copyright 2021-2022 NVIDIA Corporation +# Copyright 2024 NVIDIA Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/tests/integration/test_matmul.py b/tests/integration/test_matmul.py index 66f6ad89a..b46ddd875 100644 --- a/tests/integration/test_matmul.py +++ b/tests/integration/test_matmul.py @@ -1,4 +1,4 @@ -# Copyright 2022 NVIDIA Corporation +# Copyright 2024 NVIDIA Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -16,6 +16,7 @@ import numpy as np import pytest from legate.core import LEGATE_MAX_DIM +from utils.comparisons import allclose from utils.contractions import ( check_default, check_permutations, @@ -24,12 +25,12 @@ ) import cunumeric as num -from cunumeric.utils import matmul_modes +from cunumeric._utils.linalg import matmul_modes @pytest.mark.parametrize("a_ndim", range(1, LEGATE_MAX_DIM + 1)) @pytest.mark.parametrize("b_ndim", range(1, LEGATE_MAX_DIM + 1)) -def test(a_ndim, b_ndim): +def test_function(a_ndim, b_ndim): name = f"matmul({a_ndim} x {b_ndim})" modes = matmul_modes(a_ndim, b_ndim) @@ -43,6 +44,57 @@ def operation(lib, *args, **kwargs): check_types(name, modes, operation) +@pytest.mark.parametrize( + "a_shape", + ( + (3, 4, 5), + (4, 5), + (5,), + ), +) +@pytest.mark.parametrize( + "b_shape", + ( + (3, 5, 6), + (5, 6), + (5,), + ), +) +def test_operator(a_shape, b_shape): + np_a = np.random.random(a_shape) + np_b = np.random.random(b_shape) + num_a = num.array(np_a) + num_b = num.array(np_b) + assert allclose(np_a @ np_b, num_a @ num_b) + + +@pytest.mark.parametrize( + "a_shape", + ( + (3, 4, 5), + (4, 5), + (5,), + ), +) +@pytest.mark.parametrize( + "b_shape", + ( + (3, 5, 5), + (5, 5), + ), +) +def test_inplace_operator(a_shape, b_shape): + if len(a_shape) < len(b_shape): + return + np_a = np.random.random(a_shape) + np_b = np.random.random(b_shape) + num_a = num.array(np_a) + num_b = num.array(np_b) + np_a @= np_b + num_a @= num_b + assert allclose(np_a, num_a) + + class TestMatmulErrors: @pytest.mark.parametrize( "shapesAB", diff --git a/tests/integration/test_matrix_power.py b/tests/integration/test_matrix_power.py index 192d01638..58508897c 100644 --- a/tests/integration/test_matrix_power.py +++ b/tests/integration/test_matrix_power.py @@ -1,4 +1,4 @@ -# Copyright 2022 NVIDIA Corporation +# Copyright 2024 NVIDIA Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/tests/integration/test_mean.py b/tests/integration/test_mean.py index 40092455c..2065b6f75 100755 --- a/tests/integration/test_mean.py +++ b/tests/integration/test_mean.py @@ -1,4 +1,4 @@ -# Copyright 2021-2023 NVIDIA Corporation +# Copyright 2024 NVIDIA Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/tests/integration/test_min_on_gpu.py b/tests/integration/test_min_on_gpu.py index 2a5345c4f..ccb09aa82 100644 --- a/tests/integration/test_min_on_gpu.py +++ b/tests/integration/test_min_on_gpu.py @@ -1,4 +1,4 @@ -# Copyright 2021-2022 NVIDIA Corporation +# Copyright 2024 NVIDIA Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/tests/integration/test_moveaxis.py b/tests/integration/test_moveaxis.py index d52a98f3d..f8a58f799 100644 --- a/tests/integration/test_moveaxis.py +++ b/tests/integration/test_moveaxis.py @@ -1,4 +1,4 @@ -# Copyright 2022 NVIDIA Corporation +# Copyright 2024 NVIDIA Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -17,6 +17,7 @@ import pytest from legate.core import LEGATE_MAX_DIM from utils.generators import mk_0to1_array +from utils.utils import AxisError import cunumeric as num @@ -87,16 +88,16 @@ def test_repeated_axis(self): def test_axis_out_of_bound(self): msg = "out of bound" - with pytest.raises(np.AxisError, match=msg): + with pytest.raises(AxisError, match=msg): num.moveaxis(self.x, [0, 3], [0, 1]) - with pytest.raises(np.AxisError, match=msg): + with pytest.raises(AxisError, match=msg): num.moveaxis(self.x, [0, 1], [0, -4]) - with pytest.raises(np.AxisError, match=msg): + with pytest.raises(AxisError, match=msg): num.moveaxis(self.x, 4, 0) - with pytest.raises(np.AxisError, match=msg): + with pytest.raises(AxisError, match=msg): num.moveaxis(self.x, 0, -4) def test_axis_with_different_length(self): diff --git a/tests/integration/test_msort.py b/tests/integration/test_msort.py index fbb1cdac0..cc99a3fbe 100644 --- a/tests/integration/test_msort.py +++ b/tests/integration/test_msort.py @@ -1,4 +1,4 @@ -# Copyright 2022 NVIDIA Corporation +# Copyright 2024 NVIDIA Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -17,6 +17,7 @@ import pytest import cunumeric as num +from cunumeric._utils import is_np2 # cunumeric.msort(a: ndarray) → ndarray @@ -42,12 +43,13 @@ ] +@pytest.mark.skipif(is_np2, reason="numpy 2.0") class TestmSort(object): @pytest.mark.xfail def test_arr_none(self): res_np = np.msort( None - ) # numpy.AxisError: axis 0 is out of bounds for array of dimension 0 + ) # AxisError: axis 0 is out of bounds for array of dimension 0 res_num = num.msort( None ) # AttributeError: 'NoneType' object has no attribute 'shape' diff --git a/tests/integration/test_multi_dot.py b/tests/integration/test_multi_dot.py index 1c4ca3d05..399f70e23 100644 --- a/tests/integration/test_multi_dot.py +++ b/tests/integration/test_multi_dot.py @@ -1,4 +1,4 @@ -# Copyright 2022 NVIDIA Corporation +# Copyright 2024 NVIDIA Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/tests/integration/test_nan_reduction.py b/tests/integration/test_nan_reduction.py index e57a46d0b..b97b6d012 100644 --- a/tests/integration/test_nan_reduction.py +++ b/tests/integration/test_nan_reduction.py @@ -1,4 +1,4 @@ -# Copyright 2022-2023 NVIDIA Corporation +# Copyright 2024 NVIDIA Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -281,6 +281,18 @@ def test_all_nans_nanprod(self, ndim): assert out_num == 1.0 + def test_dtype_nanprod(self) -> None: + in_np = np.arange(1, 10, step=1, dtype=np.int64) + out_np = np.nanprod(in_np) + in_num = num.arange(1, 10, 1, dtype=np.int64) + out_num = num.nanprod(in_num) + assert out_np == out_num + + def test_dtype_nansum(self) -> None: + arr_num = num.array([1, 2, 3]) + arr_np = np.array([1, 2, 3]) + assert num.nansum(arr_num) == np.nansum(arr_np) + @pytest.mark.parametrize("ndim", range(1, LEGATE_MAX_DIM + 1)) def test_all_nans_nansum(self, ndim): shape = (3,) * ndim diff --git a/tests/integration/test_nanarg_reduction.py b/tests/integration/test_nanarg_reduction.py index 9956244a2..c5f2d66d3 100644 --- a/tests/integration/test_nanarg_reduction.py +++ b/tests/integration/test_nanarg_reduction.py @@ -1,4 +1,4 @@ -# Copyright 2022-2023 NVIDIA Corporation +# Copyright 2024 NVIDIA Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -225,6 +225,21 @@ def test_slice_nan_no_numpy_compat(self, identity, func_name): settings.numpy_compat.unset_value() + @pytest.mark.parametrize("func_name", NAN_ARG_FUNCS) + def test_empty_arr(self, func_name: str) -> None: + a = [] + in_np = np.array(a) + in_num = num.array(a) + func_np = getattr(np, func_name) + func_num = getattr(num, func_name) + with pytest.raises(ValueError): + func_np(in_np) + # ValueError: All-NaN slice encountered + with pytest.raises(ValueError): + func_num(in_num) + # ValueError: attempt to get nanargmax of an empty sequence + # ValueError: attempt to get nanargmin of an empty sequence + class TestXFail: """ @@ -244,8 +259,7 @@ def test_disallowed_dtypes(self, func_name, ndim, disallowed_dtype): func_num = getattr(num, func_name) expected_exp = ValueError - msg = r"operation is not supported for complex-type arrays" - with pytest.raises(expected_exp, match=msg): + with pytest.raises(expected_exp): func_num(in_num) @pytest.mark.xfail diff --git a/tests/integration/test_nanmean.py b/tests/integration/test_nanmean.py index 98962842b..f02487d11 100755 --- a/tests/integration/test_nanmean.py +++ b/tests/integration/test_nanmean.py @@ -1,4 +1,4 @@ -# Copyright 2021-2023 NVIDIA Corporation +# Copyright 2024 NVIDIA Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/tests/integration/test_ndim.py b/tests/integration/test_ndim.py index c9bba7f07..d520928d3 100644 --- a/tests/integration/test_ndim.py +++ b/tests/integration/test_ndim.py @@ -1,4 +1,4 @@ -# Copyright 2022 NVIDIA Corporation +# Copyright 2024 NVIDIA Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/tests/integration/test_nonzero.py b/tests/integration/test_nonzero.py index 8d525446c..9c4c44b97 100644 --- a/tests/integration/test_nonzero.py +++ b/tests/integration/test_nonzero.py @@ -1,4 +1,4 @@ -# Copyright 2021-2022 NVIDIA Corporation +# Copyright 2024 NVIDIA Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -15,6 +15,7 @@ import numpy as np import pytest +from utils.utils import AxisError import cunumeric as num @@ -82,7 +83,7 @@ def test_basic(size): def test_axis_out_bound(): arr = [-1, 0, 1, 2, 10] - with pytest.raises(np.AxisError): + with pytest.raises(AxisError): num.count_nonzero(arr, axis=2) diff --git a/tests/integration/test_norm.py b/tests/integration/test_norm.py index add546fd6..51f9b4a5a 100644 --- a/tests/integration/test_norm.py +++ b/tests/integration/test_norm.py @@ -1,4 +1,4 @@ -# Copyright 2021-2022 NVIDIA Corporation +# Copyright 2024 NVIDIA Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/tests/integration/test_ones.py b/tests/integration/test_ones.py index 918045dd5..6e5f047e3 100644 --- a/tests/integration/test_ones.py +++ b/tests/integration/test_ones.py @@ -1,4 +1,4 @@ -# Copyright 2022 NVIDIA Corporation +# Copyright 2024 NVIDIA Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/tests/integration/test_outer.py b/tests/integration/test_outer.py index 7ce71a68f..850cb0e83 100644 --- a/tests/integration/test_outer.py +++ b/tests/integration/test_outer.py @@ -1,4 +1,4 @@ -# Copyright 2022 NVIDIA Corporation +# Copyright 2024 NVIDIA Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/tests/integration/test_overwrite_slice.py b/tests/integration/test_overwrite_slice.py index 5ae4070fa..005daff0e 100644 --- a/tests/integration/test_overwrite_slice.py +++ b/tests/integration/test_overwrite_slice.py @@ -1,4 +1,4 @@ -# Copyright 2021-2022 NVIDIA Corporation +# Copyright 2024 NVIDIA Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/tests/integration/test_partition.py b/tests/integration/test_partition.py index bd40c2dd1..2f75aeb1a 100644 --- a/tests/integration/test_partition.py +++ b/tests/integration/test_partition.py @@ -1,4 +1,4 @@ -# Copyright 2022 NVIDIA Corporation +# Copyright 2024 NVIDIA Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/tests/integration/test_percentiles.py b/tests/integration/test_percentiles.py index 95f3e0c09..9699d462f 100644 --- a/tests/integration/test_percentiles.py +++ b/tests/integration/test_percentiles.py @@ -1,4 +1,4 @@ -# Copyright 2023 NVIDIA Corporation +# Copyright 2024 NVIDIA Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/tests/integration/test_prod.py b/tests/integration/test_prod.py index c004c95a3..9fecf6c4f 100644 --- a/tests/integration/test_prod.py +++ b/tests/integration/test_prod.py @@ -1,4 +1,4 @@ -# Copyright 2021-2022 NVIDIA Corporation +# Copyright 2024 NVIDIA Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -15,8 +15,10 @@ import numpy as np import pytest from utils.comparisons import allclose +from utils.utils import AxisError import cunumeric as num +from cunumeric._utils import is_np2 # numpy.prod(a, axis=None, dtype=None, out=None, keepdims=, # initial=, where=) @@ -74,7 +76,7 @@ (DIM, DIM, DIM), ] -ARR = ([], [[]], [[], []], np.inf, np.Inf, -10.3, 0, 200, 5 + 8j) +ARR = ([], [[]], [[], []], np.inf, -10.3, 0, 200, 5 + 8j) DTYPE = ("l", "L", "f", "e", "d") INTEGER_DTYPE = ("h", "i", "H", "I", "?", "b", "B") @@ -94,7 +96,7 @@ def test_array(self, arr): assert allclose(np.prod(arr), num.prod(arr)) def test_axis_out_bound(self): - expected_exc = np.AxisError + expected_exc = AxisError arr = [-1, 0, 1, 2, 10] with pytest.raises(expected_exc): np.prod(arr, axis=2) @@ -122,19 +124,12 @@ def test_keepdims(self): out_num = num.prod(arr_num, axis=2, keepdims=True) assert allclose(out_np, out_num) - @pytest.mark.parametrize( - "initial", - ([2, 3], pytest.param([3], marks=pytest.mark.xfail)), - ids=str, - ) + @pytest.mark.parametrize("initial", ([2, 3], [3]), ids=str) def test_initial_list(self, initial): - expected_exc = ValueError + expected_exc = TypeError if is_np2 else ValueError arr = [[1, 2], [3, 4]] - # Numpy raises ValueError: - # Input object to FillWithScalar is not a scalar with pytest.raises(expected_exc): np.prod(arr, initial=initial) - # when LEGATE_TEST=1, cuNumeric casts list to scalar and proceeds with pytest.raises(expected_exc): num.prod(arr, initial=initial) @@ -222,7 +217,7 @@ def test_dtype_complex(self, dtype): # allclose hits assertion error: # File "/legate/cunumeric/cunumeric/eager.py", line 293, # in to_deferred_array - # assert self.runtime.is_supported_type(self.array.dtype) + # assert self.runtime.is_supported_dtype(self.array.dtype) # AssertionError assert allclose(out_np, out_num) diff --git a/tests/integration/test_put.py b/tests/integration/test_put.py index aced0ce64..ef8c593b9 100644 --- a/tests/integration/test_put.py +++ b/tests/integration/test_put.py @@ -1,4 +1,4 @@ -# Copyright 2022 NVIDIA Corporation +# Copyright 2024 NVIDIA Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -154,7 +154,7 @@ def test_ndim_default_mode(ndim): assert np.array_equal(np_arr, num_arr) -INDICES = ([1, 2, 3.2, 100], [[2, 2], [3, 100]], [1], [100]) +INDICES = ([1, 2, 3.2, 100], [[2, 1], [3, 100]], [1], [100]) @pytest.mark.parametrize("ndim", range(1, LEGATE_MAX_DIM + 1)) diff --git a/tests/integration/test_put_along_axis.py b/tests/integration/test_put_along_axis.py index 9386f8d92..89cb3725a 100644 --- a/tests/integration/test_put_along_axis.py +++ b/tests/integration/test_put_along_axis.py @@ -1,4 +1,4 @@ -# Copyright 2022 NVIDIA Corporation +# Copyright 2024 NVIDIA Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/tests/integration/test_putmask.py b/tests/integration/test_putmask.py index 26926b1be..fa40f7fb0 100644 --- a/tests/integration/test_putmask.py +++ b/tests/integration/test_putmask.py @@ -1,4 +1,4 @@ -# Copyright 2022 NVIDIA Corporation +# Copyright 2024 NVIDIA Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/tests/integration/test_qr.py b/tests/integration/test_qr.py new file mode 100644 index 000000000..80bcea47e --- /dev/null +++ b/tests/integration/test_qr.py @@ -0,0 +1,110 @@ +# Copyright 2024 NVIDIA Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import numpy as np +import pytest +from utils.comparisons import allclose + +import cunumeric as num + +SIZES = (8, 9, 255) + +RTOL = { + np.dtype(np.float32): 1e-1, + np.dtype(np.complex64): 1e-1, + np.dtype(np.float64): 1e-5, + np.dtype(np.complex128): 1e-5, +} + +ATOL = { + np.dtype(np.float32): 1e-3, + np.dtype(np.complex64): 1e-3, + np.dtype(np.float64): 1e-8, + np.dtype(np.complex128): 1e-8, +} + + +@pytest.mark.parametrize("m", SIZES) +@pytest.mark.parametrize("n", SIZES) +@pytest.mark.parametrize( + "a_dtype", (np.float32, np.float64, np.complex64, np.complex128) +) +def test_qr(m, n, a_dtype): + a = np.random.rand(m, n).astype(a_dtype) + + q, r = num.linalg.qr(a) + + rtol = RTOL[a.dtype] + atol = ATOL[a.dtype] + assert allclose( + a, num.matmul(q, r), rtol=rtol, atol=atol, check_dtype=False + ) + + +def test_qr_corner_cases(): + a = num.random.rand(1, 1) + + q, r = num.linalg.qr(a) + assert allclose(a, num.matmul(q, r)) + + +@pytest.mark.parametrize("dtype", (np.int32, np.int64)) +def test_qr_dtype_int(dtype): + a_array = [[1, 4, 5], [2, 3, 1], [9, 5, 2]] + a = num.array(a_array).astype(dtype) + + q, r = num.linalg.qr(a) + + rtol = RTOL[q.dtype] + atol = ATOL[q.dtype] + assert allclose( + a, num.matmul(q, r), rtol=rtol, atol=atol, check_dtype=False + ) + + +class TestQrErrors: + def setup_method(self): + self.n = 3 + self.a = num.random.rand(self.n, self.n).astype(np.float64) + self.b = num.random.rand(self.n).astype(np.float64) + + def test_a_bad_dim(self): + a = num.random.rand(self.n).astype(np.float64) + msg = "Array must be at least two-dimensional" + with pytest.raises(num.linalg.LinAlgError, match=msg): + num.linalg.qr(a) + + a = 10 + msg = "Array must be at least two-dimensional" + with pytest.raises(num.linalg.LinAlgError, match=msg): + num.linalg.qr(a) + + def test_a_dim_greater_than_two(self): + a = num.random.rand(self.n, self.n, self.n).astype(np.float64) + with pytest.raises(NotImplementedError): + num.linalg.qr(a) + + def test_a_bad_dtype_float16(self): + a = self.a.astype(np.float16) + msg = "array type float16 is unsupported in linalg" + with pytest.raises(TypeError, match=msg): + num.linalg.qr(a) + + +if __name__ == "__main__": + import sys + + np.random.seed(12345) + sys.exit(pytest.main(sys.argv)) diff --git a/tests/integration/test_quantiles.py b/tests/integration/test_quantiles.py index 3458a2ea0..26ea3dfb6 100644 --- a/tests/integration/test_quantiles.py +++ b/tests/integration/test_quantiles.py @@ -1,4 +1,4 @@ -# Copyright 2022 NVIDIA Corporation +# Copyright 2024 NVIDIA Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -39,7 +39,7 @@ @pytest.mark.parametrize("str_method", ALL_METHODS) -@pytest.mark.parametrize("axes", (0, 1, (0, 1), (0, 2))) +@pytest.mark.parametrize("axes", (0, 1, (0,), (0, 1), (0, 2))) @pytest.mark.parametrize( "qin_arr", (0.5, [0.001, 0.37, 0.42, 0.67, 0.83, 0.99, 0.39, 0.49, 0.5]) ) @@ -410,6 +410,26 @@ def test_non_ndarray_input(str_method, qs_arr, arr): assert allclose(np_q_out, q_out, atol=eps) +@pytest.mark.parametrize("str_method", ALL_METHODS) +@pytest.mark.parametrize( + "qs_arr", + ( + 0.5, + np.ndarray( + shape=(2, 3), buffer=np.array([x / 6.0 for x in range(0, 6)]) + ), + ), +) +@pytest.mark.parametrize("arr", (3 + 3.0j, [2 + 2.0j, 1 + 1.0j])) +def test_complex_ndarray_input(str_method, qs_arr, arr): + expected_msg = "input array cannot be of complex type" + with pytest.raises(TypeError, match=expected_msg): + num.quantile(arr, qs_arr, method=str_method) + expected_msg = "array of real numbers" + with pytest.raises(TypeError, match=expected_msg): + np.quantile(arr, qs_arr, method=str_method) + + @pytest.mark.parametrize("str_method", ALL_METHODS) @pytest.mark.parametrize( "qs_arr", @@ -455,6 +475,28 @@ def test_output_conversion(str_method, qs_arr, keepdims): assert abs(q_out - np_q_out) < eps +@pytest.mark.parametrize("str_method", ALL_METHODS) +@pytest.mark.parametrize( + "qs_arr", + ( + np.ndarray( + shape=(2, 3), buffer=np.array([x / 6.0 for x in range(0, 6)]) + ), + ), +) +@pytest.mark.parametrize("arr", (3, [2, 1])) +def test_wrong_shape(str_method, qs_arr, arr): + q_out = num.zeros((3, 1), dtype=np.dtype("float32")) + q_np_out = np.zeros((3, 1), dtype=np.dtype("float32")) + expected_msg = "wrong shape on output array" + with pytest.raises(ValueError, match=expected_msg): + num.quantile(arr, qs_arr, method=str_method, keepdims=False, out=q_out) + with pytest.raises(ValueError): + np.quantile( + arr, qs_arr, method=str_method, keepdims=False, out=q_np_out + ) + + if __name__ == "__main__": import sys diff --git a/tests/integration/test_randint.py b/tests/integration/test_randint.py index 84bd4778c..de8311687 100644 --- a/tests/integration/test_randint.py +++ b/tests/integration/test_randint.py @@ -1,4 +1,4 @@ -# Copyright 2021-2022 NVIDIA Corporation +# Copyright 2024 NVIDIA Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/tests/integration/test_random.py b/tests/integration/test_random.py new file mode 100644 index 000000000..34041ff16 --- /dev/null +++ b/tests/integration/test_random.py @@ -0,0 +1,99 @@ +# Copyright 2024 NVIDIA Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import numpy as np +import pytest + +import cunumeric as num + + +def test_basic_num() -> None: + num.random.seed(10) + L1 = num.random.randn(3, 3) + num.random.seed(10) + L2 = num.random.randn(3, 3) + assert not np.array_equal(L1, L2) + + num.random.seed(10) + L1 = num.random.randn(3, 3) + L2 = num.random.randn(3, 3) + assert not np.array_equal(L1, L2) + + +def test_basic_np() -> None: + np.random.seed(10) + L1 = np.random.randn(3, 3) + np.random.seed(10) + L2 = np.random.randn(3, 3) + assert np.array_equal(L1, L2) + + np.random.seed(10) + L1 = np.random.randn(3, 3) + L2 = np.random.randn(3, 3) + assert not np.array_equal(L1, L2) + + +def test_none_num() -> None: + num.random.seed() + L1 = num.random.randn(3, 3) + num.random.seed() + L2 = num.random.randn(3, 3) + assert not np.array_equal(L1, L2) + + num.random.seed() + L1 = num.random.randn(3, 3) + L2 = num.random.randn(3, 3) + assert not np.array_equal(L1, L2) + + +def test_none_np() -> None: + np.random.seed() + L1 = np.random.randn(3, 3) + np.random.seed() + L2 = np.random.randn(3, 3) + assert not np.array_equal(L1, L2) + + np.random.seed() + L1 = np.random.randn(3, 3) + L2 = np.random.randn(3, 3) + assert not np.array_equal(L1, L2) + + +def test_basic_num_np() -> None: + np.random.seed(10) + L1 = np.random.randn(3, 3) + num.random.seed(10) + L2 = num.random.randn(3, 3) + assert not np.array_equal(L1, L2) + + +def test_RandomState() -> None: + rdm_num = num.random.RandomState(10) + L1 = rdm_num.randn(3, 3) + rdm_np = np.random.RandomState(10) + L2 = rdm_np.randn(3, 3) + assert np.array_equal(L1, L2) + + +def test_float() -> None: + with pytest.raises(TypeError): + np.random.seed(10.5) + # TypeError: 'float' object cannot be interpreted as an integer + num.random.seed(10.5) + # cuNumeric passed with float + + +if __name__ == "__main__": + import sys + + sys.exit(pytest.main(sys.argv)) diff --git a/tests/integration/test_random_advanced.py b/tests/integration/test_random_advanced.py index 72d6861c2..decc8d01e 100644 --- a/tests/integration/test_random_advanced.py +++ b/tests/integration/test_random_advanced.py @@ -1,4 +1,4 @@ -# Copyright 2022 NVIDIA Corporation +# Copyright 2024 NVIDIA Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/tests/integration/test_random_beta.py b/tests/integration/test_random_beta.py index 8abb4f1a7..6d2783b50 100644 --- a/tests/integration/test_random_beta.py +++ b/tests/integration/test_random_beta.py @@ -1,4 +1,4 @@ -# Copyright 2022 NVIDIA Corporation +# Copyright 2024 NVIDIA Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/tests/integration/test_random_bitgenerator.py b/tests/integration/test_random_bitgenerator.py index 7cc6c91f3..864fbb965 100644 --- a/tests/integration/test_random_bitgenerator.py +++ b/tests/integration/test_random_bitgenerator.py @@ -1,4 +1,4 @@ -# Copyright 2022 NVIDIA Corporation +# Copyright 2024 NVIDIA Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/tests/integration/test_random_creation.py b/tests/integration/test_random_creation.py index f4bdecec7..e4391507a 100644 --- a/tests/integration/test_random_creation.py +++ b/tests/integration/test_random_creation.py @@ -1,4 +1,4 @@ -# Copyright 2021-2022 NVIDIA Corporation +# Copyright 2024 NVIDIA Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -14,7 +14,7 @@ # import os -from typing import Any, Tuple +from typing import Any import numpy as np import pytest @@ -41,14 +41,14 @@ def test_randn(): def reseed_and_gen_random( func: str, seed: Any, *args: Any, **kwargs: Any -) -> Tuple[Any, Any]: +) -> tuple[Any, Any]: """Reseeed singleton rng and generate random in NumPy and cuNumeric.""" return gen_random_from_both(func, *args, **kwargs) def gen_random_from_both( func: str, *args: Any, **kwargs: Any -) -> Tuple[Any, Any]: +) -> tuple[Any, Any]: """Call the same random function from both NumPy and cuNumeric.""" return ( getattr(np.random, func)(*args, **kwargs), @@ -150,6 +150,7 @@ def test_default_rng_bitgenerator(): EAGER_TEST, reason="cuNumeric does not respect seed in Eager mode", ) +@pytest.mark.xfail(reason="cunumeric.internal#135") def test_default_rng_generator(): steps = 3 seed = 12345 @@ -183,7 +184,7 @@ def test_rand(shape): LARGE_RNG_SIZES = [10000, (20, 50, 4)] ALL_RNG_SIZES = SMALL_RNG_SIZES + LARGE_RNG_SIZES + [None] INT_DTYPES = [np.int64, np.int32, np.int16] -UINT_DTYPES = [np.uint64, np.uint16, np.uint0] +UINT_DTYPES = [np.uint64, np.uint16, np.uintp] FLOAT_DTYPES = [np.float16, np.float128, np.float64] diff --git a/tests/integration/test_random_gamma.py b/tests/integration/test_random_gamma.py index 4aa03d194..6e6edd3c7 100644 --- a/tests/integration/test_random_gamma.py +++ b/tests/integration/test_random_gamma.py @@ -1,4 +1,4 @@ -# Copyright 2022 NVIDIA Corporation +# Copyright 2024 NVIDIA Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/tests/integration/test_random_straightforward.py b/tests/integration/test_random_straightforward.py index 9ab7d2499..8c3363e47 100644 --- a/tests/integration/test_random_straightforward.py +++ b/tests/integration/test_random_straightforward.py @@ -1,4 +1,4 @@ -# Copyright 2022 NVIDIA Corporation +# Copyright 2024 NVIDIA Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/tests/integration/test_reduction.py b/tests/integration/test_reduction.py index f3379265b..2039f815d 100644 --- a/tests/integration/test_reduction.py +++ b/tests/integration/test_reduction.py @@ -1,4 +1,4 @@ -# Copyright 2021-2022 NVIDIA Corporation +# Copyright 2024 NVIDIA Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -15,6 +15,7 @@ import numpy as np import pytest from utils.comparisons import allclose +from utils.utils import AxisError import cunumeric as num @@ -56,7 +57,7 @@ (DIM, DIM, DIM), ] -ARR = ([], [[]], [[], []], np.inf, np.Inf, -10.3, 0, 200, 5 + 8j) +ARR = ([], [[]], [[], []], np.inf, -10.3, 0, 200, 5 + 8j) DTYPE = ["l", "L", "f", "d"] COMPLEX_TYPE = ["F", "D"] @@ -92,7 +93,7 @@ def test_dtype_negative(self, dtype): def test_axis_out_bound(self): arr = [-1, 0, 1, 2, 10] msg = r"bounds" - with pytest.raises(np.AxisError, match=msg): + with pytest.raises(AxisError, match=msg): num.sum(arr, axis=2) @pytest.mark.xfail diff --git a/tests/integration/test_repeat.py b/tests/integration/test_repeat.py index 3023f97c8..0df92ff17 100644 --- a/tests/integration/test_repeat.py +++ b/tests/integration/test_repeat.py @@ -1,4 +1,4 @@ -# Copyright 2021-2022 NVIDIA Corporation +# Copyright 2024 NVIDIA Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -16,6 +16,7 @@ import pytest from legate.core import LEGATE_MAX_DIM from utils.generators import mk_seq_array +from utils.utils import AxisError import cunumeric as num @@ -186,7 +187,7 @@ def test_axis_string(arr, repeats, axis): def test_array_axis_out_bound(): anp = np.array([1, 2, 3, 4, 5]) - expected_exc = np.AxisError + expected_exc = AxisError with pytest.raises(expected_exc): np.repeat(anp, 4, 2) with pytest.raises(expected_exc): diff --git a/tests/integration/test_reshape.py b/tests/integration/test_reshape.py index ee39c6d5f..696ca0f41 100644 --- a/tests/integration/test_reshape.py +++ b/tests/integration/test_reshape.py @@ -1,4 +1,4 @@ -# Copyright 2021-2022 NVIDIA Corporation +# Copyright 2024 NVIDIA Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/tests/integration/test_scan.py b/tests/integration/test_scan.py index 1d99f8e7a..4b102f39c 100644 --- a/tests/integration/test_scan.py +++ b/tests/integration/test_scan.py @@ -1,4 +1,4 @@ -# Copyright 2022 NVIDIA Corporation +# Copyright 2024 NVIDIA Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/tests/integration/test_searchsorted.py b/tests/integration/test_searchsorted.py index c3d1461af..8ef77f944 100644 --- a/tests/integration/test_searchsorted.py +++ b/tests/integration/test_searchsorted.py @@ -1,4 +1,4 @@ -# Copyright 2022 NVIDIA Corporation +# Copyright 2024 NVIDIA Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -83,7 +83,7 @@ def test_val_none(self): # cuNumeric raises AssertionError # if self.deferred is None: # if self.parent is None: - # > assert self.runtime.is_supported_type + # > assert self.runtime.is_supported_dtype # (self.array.dtype) # E AssertionError # cunumeric/cunumeric/eager.py:to_deferred_array() diff --git a/tests/integration/test_set_item.py b/tests/integration/test_set_item.py index 8f9b4a1ac..bfbda631d 100644 --- a/tests/integration/test_set_item.py +++ b/tests/integration/test_set_item.py @@ -1,4 +1,4 @@ -# Copyright 2021-2022 NVIDIA Corporation +# Copyright 2024 NVIDIA Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/tests/integration/test_setflags.py b/tests/integration/test_setflags.py index 561c6a2cf..a3bc81699 100644 --- a/tests/integration/test_setflags.py +++ b/tests/integration/test_setflags.py @@ -1,4 +1,4 @@ -# Copyright 2021-2022 NVIDIA Corporation +# Copyright 2024 NVIDIA Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/tests/integration/test_shape.py b/tests/integration/test_shape.py index 03656b6c5..e134f64ae 100644 --- a/tests/integration/test_shape.py +++ b/tests/integration/test_shape.py @@ -1,4 +1,4 @@ -# Copyright 2021-2022 NVIDIA Corporation +# Copyright 2024 NVIDIA Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/tests/integration/test_singleton_access.py b/tests/integration/test_singleton_access.py index 118d2828f..a719f42bf 100644 --- a/tests/integration/test_singleton_access.py +++ b/tests/integration/test_singleton_access.py @@ -1,4 +1,4 @@ -# Copyright 2021-2022 NVIDIA Corporation +# Copyright 2024 NVIDIA Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -64,38 +64,24 @@ def array_gen(lib): yield arr for arr in nonscalar_gen(lib): idx_tuple = arr.ndim * (2,) - flat_idx = 0 - for i, x in enumerate(idx_tuple): - flat_idx *= arr.shape[i] - flat_idx += x - arr.itemset(flat_idx, -1) - yield arr - for arr in nonscalar_gen(lib): - idx_tuple = arr.ndim * (2,) - arr.itemset(idx_tuple, -1) + arr[idx_tuple] = -1 yield arr for arr in nonscalar_gen(lib): idx_tuple = arr.ndim * (2,) - arr.itemset(*idx_tuple, -1) + arr[idx_tuple] = -1 yield arr # set single item on scalar array for arr in scalar_gen(lib, 42): idx_tuple = arr.ndim * (0,) arr[idx_tuple] = -1 yield arr - for arr in scalar_gen(lib, 42): - arr.itemset(-1) - yield arr - for arr in scalar_gen(lib, 42): - arr.itemset(0, -1) - yield arr for arr in scalar_gen(lib, 42): idx_tuple = arr.ndim * (0,) - arr.itemset(idx_tuple, -1) + arr[idx_tuple] = -1 yield arr for arr in scalar_gen(lib, 42): idx_tuple = arr.ndim * (0,) - arr.itemset(*idx_tuple, -1) + arr[idx_tuple] = -1 yield arr # set "multiple" items on scalar array for arr in scalar_gen(lib, 42): diff --git a/tests/integration/test_slicing.py b/tests/integration/test_slicing.py index 73b4a4ee4..38e4a5f51 100644 --- a/tests/integration/test_slicing.py +++ b/tests/integration/test_slicing.py @@ -1,4 +1,4 @@ -# Copyright 2021-2022 NVIDIA Corporation +# Copyright 2024 NVIDIA Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/tests/integration/test_solve.py b/tests/integration/test_solve.py index e9b0e2015..574769300 100644 --- a/tests/integration/test_solve.py +++ b/tests/integration/test_solve.py @@ -1,4 +1,4 @@ -# Copyright 2022 NVIDIA Corporation +# Copyright 2024 NVIDIA Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -51,6 +51,9 @@ def test_solve_1d(n, a_dtype, b_dtype): rtol = RTOL[out.dtype] atol = ATOL[out.dtype] + if n > 1024: + atol *= 20.0 + assert allclose( b, num.matmul(a, out), rtol=rtol, atol=atol, check_dtype=False ) @@ -71,6 +74,9 @@ def test_solve_2d(n, a_dtype, b_dtype): rtol = RTOL[out.dtype] atol = ATOL[out.dtype] + if n > 1024: + atol *= 20.0 + assert allclose( b, num.matmul(a, out), rtol=rtol, atol=atol, check_dtype=False ) @@ -115,7 +121,7 @@ def test_solve_with_output(): n = 8 a = np.random.rand(n, n).astype(np.float32) b = np.random.rand(n).astype(np.float32) - output = np.zeros((n,)).astype(np.float32) + output = num.zeros((n,)).astype(np.float32) out = num.linalg.solve(a, b, out=output) diff --git a/tests/integration/test_sort.py b/tests/integration/test_sort.py index 1fdfc2f13..2618d0491 100644 --- a/tests/integration/test_sort.py +++ b/tests/integration/test_sort.py @@ -1,4 +1,4 @@ -# Copyright 2022 NVIDIA Corporation +# Copyright 2024 NVIDIA Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -52,7 +52,7 @@ class TestSort(object): def test_arr_none(self): res_np = np.sort( None - ) # numpy.AxisError: axis -1 is out of bounds for array of dimension 0 + ) # AxisError: axis -1 is out of bounds for array of dimension 0 res_num = num.sort( None ) # AttributeError: 'NoneType' object has no attribute 'shape' diff --git a/tests/integration/test_sort_complex.py b/tests/integration/test_sort_complex.py index d2ff93c77..eeb0bc85d 100644 --- a/tests/integration/test_sort_complex.py +++ b/tests/integration/test_sort_complex.py @@ -1,4 +1,4 @@ -# Copyright 2022 NVIDIA Corporation +# Copyright 2024 NVIDIA Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -48,7 +48,7 @@ class TestSortComplex(object): def test_arr_none(self): res_np = np.sort_complex( None - ) # numpy.AxisError: axis 0 is out of bounds for array of dimension 0 + ) # AxisError: axis 0 is out of bounds for array of dimension 0 res_num = num.sort_complex( None ) # AttributeError: 'NoneType' object has no attribute 'shape' diff --git a/tests/integration/test_split.py b/tests/integration/test_split.py index 7646d478b..3fcf6aa66 100644 --- a/tests/integration/test_split.py +++ b/tests/integration/test_split.py @@ -1,4 +1,4 @@ -# Copyright 2021-2022 NVIDIA Corporation +# Copyright 2024 NVIDIA Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/tests/integration/test_squeeze.py b/tests/integration/test_squeeze.py index 14c2fda0d..c68cae279 100644 --- a/tests/integration/test_squeeze.py +++ b/tests/integration/test_squeeze.py @@ -1,4 +1,4 @@ -# Copyright 2021-2022 NVIDIA Corporation +# Copyright 2024 NVIDIA Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -15,6 +15,7 @@ import numpy as np import pytest +from utils.utils import AxisError import cunumeric as num @@ -74,7 +75,7 @@ def test_num_axis_out_bound(): size = (1, 2, 1) a = num.random.randint(low=-10, high=10, size=size) msg = r"bounds" - with pytest.raises(np.AxisError, match=msg): + with pytest.raises(AxisError, match=msg): num.squeeze(a, axis=3) @@ -82,7 +83,7 @@ def test_array_axis_out_bound(): size = (1, 2, 1) a = num.random.randint(-10, 10, size=size) msg = r"bounds" - with pytest.raises(np.AxisError, match=msg): + with pytest.raises(AxisError, match=msg): a.squeeze(axis=3) diff --git a/tests/integration/test_stats.py b/tests/integration/test_stats.py index 256a6b77f..7f3970f3c 100644 --- a/tests/integration/test_stats.py +++ b/tests/integration/test_stats.py @@ -1,4 +1,4 @@ -# Copyright 2022 NVIDIA Corporation +# Copyright 2024 NVIDIA Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -215,6 +215,102 @@ def test_var_xfail(dtype, ddof, axis, shape): check_op(op_np, op_num, np_in, dtype, negative_test=True) +@pytest.mark.parametrize("dtype", dtypes) +@pytest.mark.parametrize("rowvar", [True, False]) +@pytest.mark.parametrize("ddof", [None, 0, 1]) +def test_cov(dtype, rowvar, ddof): + np_in = get_op_input(astype=dtype) + num_in = num.array(np_in) + + np_out = np.cov(np_in, rowvar=rowvar, ddof=ddof) + num_out = num.cov(num_in, rowvar=rowvar, ddof=ddof) + if dtype == dtypes[0]: + assert allclose(np_out, num_out, atol=1e-2) + else: + assert allclose(np_out, num_out) + + +fweights_base = [[9, 2, 1, 2, 3], [1, 1, 3, 2, 4], None] +np_aweights_base = [ + np.abs(get_op_input(astype=dtype, shape=(5,))) for dtype in dtypes +] + [[0.03, 0.04, 01.01, 0.02, 0.08], None] + + +@pytest.mark.parametrize("dtype", dtypes) +@pytest.mark.parametrize("bias", [True, False]) +@pytest.mark.parametrize("ddof", [None, 0, 1]) +@pytest.mark.parametrize("fweights", fweights_base) +@pytest.mark.parametrize("np_aweights", np_aweights_base) +def test_cov_full(dtype, bias, ddof, fweights, np_aweights): + np_in = get_op_input(astype=dtype, shape=(4, 5)) + num_in = num.array(np_in) + if fweights is not None: + np_fweights = np.array(fweights) + num_fweights = num.array(fweights) + else: + np_fweights = None + num_fweights = None + if isinstance(np_aweights, np.ndarray): + num_aweights = num.array(np_aweights) + else: + num_aweights = np_aweights + # num_aweights = None + # np_aweights = None + + np_out = np.cov( + np_in, bias=bias, ddof=ddof, fweights=np_fweights, aweights=np_aweights + ) + num_out = num.cov( + num_in, + bias=bias, + ddof=ddof, + fweights=num_fweights, + aweights=num_aweights, + ) + # if dtype == dtypes[0]: + # assert allclose(np_out, num_out, atol=1e-2) + # else: + # assert allclose(np_out, num_out) + assert allclose(np_out, num_out, atol=1e-2) + + +@pytest.mark.parametrize("ddof", [None, 0, 1]) +@pytest.mark.parametrize("fweights", fweights_base) +@pytest.mark.parametrize("np_aweights", np_aweights_base) +def test_cov_dtype_scaling(ddof, fweights, np_aweights): + np_in = np.array( + [ + [1 + 3j, 1 - 1j, 2 + 2j, 4 + 3j, -1 + 2j], + [1 + 3j, 1 - 1j, 2 + 2j, 4 + 3j, -1 + 2j], + ] + ) + num_in = num.array(np_in) + if fweights is not None: + np_fweights = np.array(fweights) + num_fweights = num.array(fweights) + else: + np_fweights = None + num_fweights = None + if isinstance(np_aweights, np.ndarray): + num_aweights = num.array(np_aweights) + else: + num_aweights = np_aweights + + np_out = np.cov( + np_in, + ddof=ddof, + fweights=np_fweights, + aweights=np_aweights, + ) + num_out = num.cov( + num_in, + ddof=ddof, + fweights=num_fweights, + aweights=num_aweights, + ) + assert allclose(np_out, num_out, atol=1e-2) + + if __name__ == "__main__": import sys diff --git a/tests/integration/test_svd.py b/tests/integration/test_svd.py new file mode 100644 index 000000000..e7394d923 --- /dev/null +++ b/tests/integration/test_svd.py @@ -0,0 +1,131 @@ +# Copyright 2024 NVIDIA Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import numpy as np +import pytest +from utils.comparisons import allclose + +import cunumeric as num + +SIZES = (8, 9, 255) + +RTOL = { + np.dtype(np.int32): 1e-1, + np.dtype(np.int64): 1e-1, + np.dtype(np.float32): 1e-1, + np.dtype(np.complex64): 1e-1, + np.dtype(np.float64): 1e-5, + np.dtype(np.complex128): 1e-5, +} + +ATOL = { + np.dtype(np.int32): 1e-3, + np.dtype(np.int64): 1e-3, + np.dtype(np.float32): 1e-3, + np.dtype(np.complex64): 1e-3, + np.dtype(np.float64): 1e-8, + np.dtype(np.complex128): 1e-8, +} + + +def assert_result(a, u, s, vh): + # (u * s) @ vh + m = a.shape[0] + n = a.shape[1] + k = min(m, n) + + if k < m: + u = u[:, :k] + + a2 = num.matmul(u * s, vh) + + rtol = RTOL[a.dtype] + atol = ATOL[a.dtype] + assert allclose(a, a2, rtol=rtol, atol=atol, check_dtype=False) + + +@pytest.mark.parametrize("m", SIZES) +@pytest.mark.parametrize("n", SIZES) +@pytest.mark.parametrize( + "a_dtype", (np.float32, np.float64, np.complex64, np.complex128) +) +def test_svd(m, n, a_dtype): + if m < n: + pytest.skip() + + if np.issubdtype(a_dtype, np.complexfloating): + a = np.random.rand(m, n) + np.random.rand(m, n) * 1j + else: + a = np.random.rand(m, n) + + a = a.astype(a_dtype) + + u, s, vh = num.linalg.svd(a) + + assert_result(a, u, s, vh) + + +def test_svd_corner_cases(): + a = num.random.rand(1, 1) + + u, s, vh = num.linalg.svd(a) + + assert_result(a, u, s, vh) + + +@pytest.mark.parametrize("dtype", (np.int32, np.int64)) +def test_svd_dtype_int(dtype): + a_array = [[1, 4, 5], [2, 3, 1], [9, 5, 2]] + a = num.array(a_array).astype(dtype) + + u, s, vh = num.linalg.svd(a) + + assert_result(a, u, s, vh) + + +class TestSvdErrors: + def setup_method(self): + self.n = 3 + self.a = num.random.rand(self.n, self.n).astype(np.float64) + self.b = num.random.rand(self.n).astype(np.float64) + + def test_a_bad_dim(self): + a = num.random.rand(self.n).astype(np.float64) + msg = "Array must be at least two-dimensional" + with pytest.raises(num.linalg.LinAlgError, match=msg): + num.linalg.svd(a) + + a = 10 + msg = "Array must be at least two-dimensional" + with pytest.raises(num.linalg.LinAlgError, match=msg): + num.linalg.svd(a) + + def test_a_dim_greater_than_two(self): + a = num.random.rand(self.n, self.n, self.n).astype(np.float64) + with pytest.raises(NotImplementedError): + num.linalg.svd(a) + + def test_a_bad_dtype_float16(self): + a = self.a.astype(np.float16) + msg = "array type float16 is unsupported in linalg" + with pytest.raises(TypeError, match=msg): + num.linalg.svd(a) + + +if __name__ == "__main__": + import sys + + np.random.seed(12345) + sys.exit(pytest.main(sys.argv)) diff --git a/tests/integration/test_swapaxes.py b/tests/integration/test_swapaxes.py index 0217019c9..ded45a663 100644 --- a/tests/integration/test_swapaxes.py +++ b/tests/integration/test_swapaxes.py @@ -1,4 +1,4 @@ -# Copyright 2021-2022 NVIDIA Corporation +# Copyright 2024 NVIDIA Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/tests/integration/test_take.py b/tests/integration/test_take.py index afa997a14..07e34567a 100644 --- a/tests/integration/test_take.py +++ b/tests/integration/test_take.py @@ -1,4 +1,4 @@ -# Copyright 2022 NVIDIA Corporation +# Copyright 2024 NVIDIA Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/tests/integration/test_take_along_axis.py b/tests/integration/test_take_along_axis.py index 1597b2ff6..a2c930e10 100644 --- a/tests/integration/test_take_along_axis.py +++ b/tests/integration/test_take_along_axis.py @@ -1,4 +1,4 @@ -# Copyright 2022 NVIDIA Corporation +# Copyright 2024 NVIDIA Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/tests/integration/test_tensordot.py b/tests/integration/test_tensordot.py index dd2873be3..0091a4627 100644 --- a/tests/integration/test_tensordot.py +++ b/tests/integration/test_tensordot.py @@ -1,4 +1,4 @@ -# Copyright 2021-2022 NVIDIA Corporation +# Copyright 2024 NVIDIA Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -19,7 +19,7 @@ from utils.generators import mk_0to1_array import cunumeric as num -from cunumeric.utils import tensordot_modes +from cunumeric._utils.linalg import tensordot_modes def gen_axes(a_ndim, b_ndim): diff --git a/tests/integration/test_tile.py b/tests/integration/test_tile.py index 1bfc1dcf8..72ce9ee61 100644 --- a/tests/integration/test_tile.py +++ b/tests/integration/test_tile.py @@ -1,4 +1,4 @@ -# Copyright 2021-2022 NVIDIA Corporation +# Copyright 2024 NVIDIA Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/tests/integration/test_trace.py b/tests/integration/test_trace.py index aad02ca28..4423f83a9 100644 --- a/tests/integration/test_trace.py +++ b/tests/integration/test_trace.py @@ -1,4 +1,4 @@ -# Copyright 2022 NVIDIA Corporation +# Copyright 2024 NVIDIA Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/tests/integration/test_transpose.py b/tests/integration/test_transpose.py index 4162df713..f97fa3b19 100644 --- a/tests/integration/test_transpose.py +++ b/tests/integration/test_transpose.py @@ -1,4 +1,4 @@ -# Copyright 2021-2022 NVIDIA Corporation +# Copyright 2024 NVIDIA Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -104,7 +104,7 @@ def test_axes_1d_int(self, size, axes): # For cunumeric, if array.dim==1, it returns the array itself directly, # no matter what the axes value is. # For numpy, it raises - # "numpy.AxisError: axis * is out of bounds for array of dimension 1". + # "AxisError: axis * is out of bounds for array of dimension 1". a = np.random.randint(low=-10, high=10, size=size) b = num.array(a) res_np = np.transpose(a, axes=axes) @@ -215,7 +215,7 @@ def test_axes_1d_int(self, size, axes): # For cunumeric, if array.dim==1, it returns the array itself directly, # no matter what the axes value is. # For Numpy, it raises - # "numpy.AxisError: axis * is out of bounds for array of dimension 1". + # "AxisError: axis * is out of bounds for array of dimension 1". a = np.random.randint(low=-10, high=10, size=size) b = num.array(a) res_np = a.transpose(axes) diff --git a/tests/integration/test_tri.py b/tests/integration/test_tri.py index 127180064..194ed2b12 100644 --- a/tests/integration/test_tri.py +++ b/tests/integration/test_tri.py @@ -1,4 +1,4 @@ -# Copyright 2021-2022 NVIDIA Corporation +# Copyright 2024 NVIDIA Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/tests/integration/test_trilu.py b/tests/integration/test_trilu.py index 10e5f22ca..30b22c51c 100644 --- a/tests/integration/test_trilu.py +++ b/tests/integration/test_trilu.py @@ -1,4 +1,4 @@ -# Copyright 2021-2022 NVIDIA Corporation +# Copyright 2024 NVIDIA Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/tests/integration/test_trilu_indices.py b/tests/integration/test_trilu_indices.py index a069fda6f..8f1604e45 100644 --- a/tests/integration/test_trilu_indices.py +++ b/tests/integration/test_trilu_indices.py @@ -1,4 +1,4 @@ -# Copyright 2021-2022 NVIDIA Corporation +# Copyright 2024 NVIDIA Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/tests/integration/test_unary_functions_2d_complex.py b/tests/integration/test_unary_functions_2d_complex.py index bd0b92174..df3a89603 100644 --- a/tests/integration/test_unary_functions_2d_complex.py +++ b/tests/integration/test_unary_functions_2d_complex.py @@ -1,4 +1,4 @@ -# Copyright 2021-2022 NVIDIA Corporation +# Copyright 2024 NVIDIA Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/tests/integration/test_unary_ufunc.py b/tests/integration/test_unary_ufunc.py index 9d0021613..5e264c677 100644 --- a/tests/integration/test_unary_ufunc.py +++ b/tests/integration/test_unary_ufunc.py @@ -1,4 +1,4 @@ -# Copyright 2022 NVIDIA Corporation +# Copyright 2024 NVIDIA Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/tests/integration/test_unique.py b/tests/integration/test_unique.py index 28374586c..a6c701330 100644 --- a/tests/integration/test_unique.py +++ b/tests/integration/test_unique.py @@ -1,4 +1,4 @@ -# Copyright 2021-2022 NVIDIA Corporation +# Copyright 2024 NVIDIA Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/tests/integration/test_update.py b/tests/integration/test_update.py index 49ea59bff..3f4d76b8e 100644 --- a/tests/integration/test_update.py +++ b/tests/integration/test_update.py @@ -1,4 +1,4 @@ -# Copyright 2021-2022 NVIDIA Corporation +# Copyright 2024 NVIDIA Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/tests/integration/test_vdot.py b/tests/integration/test_vdot.py index 2cc380587..0b9a20197 100644 --- a/tests/integration/test_vdot.py +++ b/tests/integration/test_vdot.py @@ -1,4 +1,4 @@ -# Copyright 2022 NVIDIA Corporation +# Copyright 2024 NVIDIA Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/tests/integration/test_view.py b/tests/integration/test_view.py index ab27ed19f..1894dc60a 100644 --- a/tests/integration/test_view.py +++ b/tests/integration/test_view.py @@ -1,4 +1,4 @@ -# Copyright 2021-2022 NVIDIA Corporation +# Copyright 2024 NVIDIA Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/tests/integration/test_where.py b/tests/integration/test_where.py index cd66c0ce7..a8193f9bf 100644 --- a/tests/integration/test_where.py +++ b/tests/integration/test_where.py @@ -1,4 +1,4 @@ -# Copyright 2021-2022 NVIDIA Corporation +# Copyright 2024 NVIDIA Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/tests/integration/test_window.py b/tests/integration/test_window.py index 16afeec9a..71503b8f9 100644 --- a/tests/integration/test_window.py +++ b/tests/integration/test_window.py @@ -1,4 +1,4 @@ -# Copyright 2022 NVIDIA Corporation +# Copyright 2024 NVIDIA Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/tests/integration/utils/__init__.py b/tests/integration/utils/__init__.py index 6cb8191ee..9353a8a60 100644 --- a/tests/integration/utils/__init__.py +++ b/tests/integration/utils/__init__.py @@ -1,4 +1,4 @@ -# Copyright 2021-2022 NVIDIA Corporation +# Copyright 2024 NVIDIA Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/tests/integration/utils/comparisons.py b/tests/integration/utils/comparisons.py index 65571b38c..a8dd4a1f6 100644 --- a/tests/integration/utils/comparisons.py +++ b/tests/integration/utils/comparisons.py @@ -1,4 +1,4 @@ -# Copyright 2022 NVIDIA Corporation +# Copyright 2024 NVIDIA Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -14,7 +14,7 @@ # from itertools import islice -from typing import Any, Union +from typing import Any import numpy as np @@ -26,7 +26,7 @@ def allclose( atol: float = 1e-8, equal_nan: bool = False, *, - diff_limit: Union[int, None] = 5, # None means no limit at all + diff_limit: int | None = 5, # None means no limit at all check_dtype: bool = True, ) -> bool: if np.shape(a) != np.shape(b): diff --git a/tests/integration/utils/contractions.py b/tests/integration/utils/contractions.py index c590adbe4..641020b4f 100644 --- a/tests/integration/utils/contractions.py +++ b/tests/integration/utils/contractions.py @@ -1,4 +1,4 @@ -# Copyright 2022 NVIDIA Corporation +# Copyright 2024 NVIDIA Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/tests/integration/utils/generators.py b/tests/integration/utils/generators.py index 6227a66ce..f96a3cf1c 100644 --- a/tests/integration/utils/generators.py +++ b/tests/integration/utils/generators.py @@ -1,4 +1,4 @@ -# Copyright 2021-2022 NVIDIA Corporation +# Copyright 2024 NVIDIA Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/tests/integration/utils/random.py b/tests/integration/utils/random.py index 89d21c20d..afca73c31 100644 --- a/tests/integration/utils/random.py +++ b/tests/integration/utils/random.py @@ -1,4 +1,4 @@ -# Copyright 2021-2022 NVIDIA Corporation +# Copyright 2024 NVIDIA Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -26,11 +26,11 @@ def __init__( self.bit_generator = num.random.XORWOW(seed) def random_raw(self, shape): - gen = num.random.generator.get_static_generator() + gen = num.random._generator.get_static_generator() return gen.bit_generator.random_raw(shape) def integers(self, low, high, size, dtype, endpoint): - return num.random.generator.get_static_generator().integers( + return num.random._generator.get_static_generator().integers( low, high, size, dtype, endpoint ) diff --git a/tests/integration/utils/utils.py b/tests/integration/utils/utils.py index 892154d45..ee885b157 100644 --- a/tests/integration/utils/utils.py +++ b/tests/integration/utils/utils.py @@ -1,4 +1,4 @@ -# Copyright 2021-2022 NVIDIA Corporation +# Copyright 2024 NVIDIA Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -16,6 +16,12 @@ import numpy as np import cunumeric as num +from cunumeric._utils import is_np2 + +if is_np2: + from numpy.exceptions import AxisError # noqa: F401 +else: + from numpy import AxisError # noqa: F401 def compare_array(a, b, check_type=True): diff --git a/tests/todo/2d_reduction_complex.py b/tests/todo/2d_reduction_complex.py index 5b209ee2d..0ed704be4 100644 --- a/tests/todo/2d_reduction_complex.py +++ b/tests/todo/2d_reduction_complex.py @@ -1,4 +1,4 @@ -# Copyright 2021-2022 NVIDIA Corporation +# Copyright 2024 NVIDIA Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/tests/todo/assign_slice.py b/tests/todo/assign_slice.py index dfc14f514..75fa19af6 100644 --- a/tests/todo/assign_slice.py +++ b/tests/todo/assign_slice.py @@ -1,4 +1,4 @@ -# Copyright 2021-2022 NVIDIA Corporation +# Copyright 2024 NVIDIA Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/tests/todo/complex_test.py b/tests/todo/complex_test.py index a1be533ec..22f1d667c 100644 --- a/tests/todo/complex_test.py +++ b/tests/todo/complex_test.py @@ -1,4 +1,4 @@ -# Copyright 2021-2022 NVIDIA Corporation +# Copyright 2024 NVIDIA Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/tests/todo/dot.py b/tests/todo/dot.py index 394ecf34c..e90bef46d 100644 --- a/tests/todo/dot.py +++ b/tests/todo/dot.py @@ -1,4 +1,4 @@ -# Copyright 2021-2022 NVIDIA Corporation +# Copyright 2024 NVIDIA Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/tests/todo/indirect.py b/tests/todo/indirect.py index 338813557..4e2449421 100644 --- a/tests/todo/indirect.py +++ b/tests/todo/indirect.py @@ -1,4 +1,4 @@ -# Copyright 2021-2022 NVIDIA Corporation +# Copyright 2024 NVIDIA Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/tests/todo/kmeans_test.py b/tests/todo/kmeans_test.py index 4e5d19385..f9d0a6c8c 100644 --- a/tests/todo/kmeans_test.py +++ b/tests/todo/kmeans_test.py @@ -1,4 +1,4 @@ -# Copyright 2021-2022 NVIDIA Corporation +# Copyright 2024 NVIDIA Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/tests/todo/lstm_batch.py b/tests/todo/lstm_batch.py index aea17ce01..97b57a1d2 100644 --- a/tests/todo/lstm_batch.py +++ b/tests/todo/lstm_batch.py @@ -1,4 +1,4 @@ -# Copyright 2021-2022 NVIDIA Corporation +# Copyright 2024 NVIDIA Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/tests/todo/lstm_simple_backward.py b/tests/todo/lstm_simple_backward.py index 716fe0148..772af1a91 100644 --- a/tests/todo/lstm_simple_backward.py +++ b/tests/todo/lstm_simple_backward.py @@ -1,4 +1,4 @@ -# Copyright 2021-2022 NVIDIA Corporation +# Copyright 2024 NVIDIA Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/tests/unit/__init__.py b/tests/unit/__init__.py new file mode 100644 index 000000000..08e567f98 --- /dev/null +++ b/tests/unit/__init__.py @@ -0,0 +1,16 @@ +# Copyright 2024 NVIDIA Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +from __future__ import annotations diff --git a/tests/unit/cunumeric/__init__.py b/tests/unit/cunumeric/__init__.py new file mode 100644 index 000000000..08e567f98 --- /dev/null +++ b/tests/unit/cunumeric/__init__.py @@ -0,0 +1,16 @@ +# Copyright 2024 NVIDIA Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +from __future__ import annotations diff --git a/tests/unit/cunumeric/_array/__init__.py b/tests/unit/cunumeric/_array/__init__.py new file mode 100644 index 000000000..08e567f98 --- /dev/null +++ b/tests/unit/cunumeric/_array/__init__.py @@ -0,0 +1,16 @@ +# Copyright 2024 NVIDIA Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +from __future__ import annotations diff --git a/tests/unit/cunumeric/_array/test_util.py b/tests/unit/cunumeric/_array/test_util.py new file mode 100644 index 000000000..1f5c26e79 --- /dev/null +++ b/tests/unit/cunumeric/_array/test_util.py @@ -0,0 +1,236 @@ +# Copyright 2024 NVIDIA Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import pytest +from mock import MagicMock +from pytest_mock import MockerFixture + +import cunumeric._array.util as m # module under test + +from ...util import powerset + + +@m.add_boilerplate() +def _out_implicit(a, b, out): + pass + + +@m.add_boilerplate("out") +def _out_explicit(a, b, out): + pass + + +@m.add_boilerplate() +def _where_implicit(a, b, where): + pass + + +@m.add_boilerplate("where") +def _where_explicit(a, b, where): + pass + + +@pytest.fixture(autouse=True) +def mock_convert(mocker: MockerFixture) -> MagicMock: + return mocker.patch("cunumeric._array.util.convert_to_cunumeric_ndarray") + + +class Test_add_boilerplate_bad: + def test_bad_repeat(self) -> None: + with pytest.raises(AssertionError): + + @m.add_boilerplate("a", "a") + def _bad_repeat(a, b): + pass + + def test_bad_extra(self) -> None: + with pytest.raises(AssertionError): + + @m.add_boilerplate("c") + def _bad_repeat(a, b): + pass + + +class Test_add_boilerplate_args: + @pytest.mark.parametrize("args", powerset("abc")) + def test_args_positional_None(self, args, mock_convert: MagicMock) -> None: + @m.add_boilerplate(*args) + def func(a, b, c): + pass + + func(None, None, None) + + assert not mock_convert.called + + @pytest.mark.parametrize("args", powerset("abc")) + def test_args_positional_value( + self, args, mock_convert: MagicMock + ) -> None: + @m.add_boilerplate(*args) + def func(a, b, c): + pass + + vals = (1, 2, 3) + + func(*vals) + + assert mock_convert.call_count == len(args) + expected = ( + val for (arg, val) in zip(tuple("abc"), vals) if arg in args + ) + for item in expected: + mock_convert.assert_any_call(item) + + @pytest.mark.parametrize("args", powerset("abc")) + def test_args_kwargs_None(self, args, mock_convert: MagicMock) -> None: + @m.add_boilerplate(*args) + def func(a, b, c): + pass + + func(a=None, b=None, c=None) + + assert not mock_convert.called + + @pytest.mark.parametrize("args", powerset("abc")) + def test_args_kwargs_value(self, args, mock_convert: MagicMock) -> None: + @m.add_boilerplate(*args) + def func(a, b, c): + pass + + vals = (1, 2, 3) + + func(**dict(zip(tuple("abc"), vals))) + + assert mock_convert.call_count == len(args) + expected = ( + val for (arg, val) in zip(tuple("abc"), vals) if arg in args + ) + for item in expected: + mock_convert.assert_any_call(item) + + +class Test_add_boilerplate_out: + def test_implicit_positional_None(self, mock_convert: MagicMock) -> None: + _out_implicit(None, None, None) + assert not mock_convert.called + + @pytest.mark.parametrize("args", powerset("ab")) + def test_implicit_positional_value( + self, args, mock_convert: MagicMock + ) -> None: + _out_implicit(None, None, 10) + mock_convert.assert_called_once_with(10, share=True) + + def test_implicit_kwargs_None(self, mock_convert: MagicMock) -> None: + _out_implicit(None, None, out=None) + assert not mock_convert.called + + @pytest.mark.parametrize("args", powerset("ab")) + def test_implicit_kwargs_value( + self, args, mock_convert: MagicMock + ) -> None: + _out_implicit(None, None, out=10) + mock_convert.assert_called_once_with(10, share=True) + + def test_explicit_positional_None(self, mock_convert: MagicMock) -> None: + _out_explicit(None, None, None) + assert not mock_convert.called + + @pytest.mark.parametrize("args", powerset("ab")) + def test_explicit_positional_value( + self, args, mock_convert: MagicMock + ) -> None: + _out_explicit(None, None, 10) + mock_convert.assert_called_once_with(10, share=True) + + def test_explicit_kwargs_None(self, mock_convert: MagicMock) -> None: + _out_explicit(None, None, out=None) + assert not mock_convert.called + + @pytest.mark.parametrize("args", powerset("ab")) + def test_explicit_kwargs_value( + self, args, mock_convert: MagicMock + ) -> None: + _out_explicit(None, None, out=10) + mock_convert.assert_called_once_with(10, share=True) + + +class Test_add_boilerplate_where: + def test_implicit_positional_None(self, mock_convert: MagicMock) -> None: + _where_implicit(None, None, None) + assert not mock_convert.called + + @pytest.mark.parametrize("args", powerset("ab")) + def test_implicit_positional_value( + self, args, mock_convert: MagicMock + ) -> None: + _where_implicit(None, None, 10) + mock_convert.assert_called_once_with(10) + + def test_implicit_kwargs_None(self, mock_convert: MagicMock) -> None: + _where_implicit(None, None, where=None) + assert not mock_convert.called + + @pytest.mark.parametrize("args", powerset("ab")) + def test_implicit_kwargs_value( + self, args, mock_convert: MagicMock + ) -> None: + _where_implicit(None, None, where=10) + mock_convert.assert_called_once_with(10) + + def test_explicit_positional_None(self, mock_convert: MagicMock) -> None: + _where_explicit(None, None, None) + assert not mock_convert.called + + @pytest.mark.parametrize("args", powerset("ab")) + def test_explicit_positional_value( + self, args, mock_convert: MagicMock + ) -> None: + _where_explicit(None, None, 10) + mock_convert.assert_called_once_with(10) + + def test_explicit_kwargs_None(self, mock_convert: MagicMock) -> None: + _where_explicit(None, None, where=None) + assert not mock_convert.called + + @pytest.mark.parametrize("args", powerset("ab")) + def test_explicit_kwargs_value( + self, args, mock_convert: MagicMock + ) -> None: + _where_explicit(None, None, where=10) + mock_convert.assert_called_once_with(10) + + +def test_add_boilerplate_mixed(mock_convert: MagicMock) -> None: + @m.add_boilerplate( + "a", + "b", + "c", + ) + def func(a, b=2, c=None, d=None, e=5, out=None, where=None): + pass + + func(1, c=3, out=4, where=None) + + assert mock_convert.call_count == 3 + mock_convert.assert_any_call(1) + mock_convert.assert_any_call(3) + mock_convert.assert_any_call(4, share=True) + + +if __name__ == "__main__": + import sys + + sys.exit(pytest.main(sys.argv)) diff --git a/tests/unit/cunumeric/_sphinxext/__init__.py b/tests/unit/cunumeric/_sphinxext/__init__.py new file mode 100644 index 000000000..350b6bdfe --- /dev/null +++ b/tests/unit/cunumeric/_sphinxext/__init__.py @@ -0,0 +1,17 @@ +# Copyright 2024 NVIDIA Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# + +from __future__ import annotations diff --git a/tests/unit/cunumeric/_sphinxext/test__comparison_util.py b/tests/unit/cunumeric/_sphinxext/test__comparison_util.py index d22c9451a..c36e69ffb 100644 --- a/tests/unit/cunumeric/_sphinxext/test__comparison_util.py +++ b/tests/unit/cunumeric/_sphinxext/test__comparison_util.py @@ -1,4 +1,4 @@ -# Copyright 2021-2022 NVIDIA Corporation +# Copyright 2024 NVIDIA Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/tests/unit/cunumeric/_utils/__init__.py b/tests/unit/cunumeric/_utils/__init__.py new file mode 100644 index 000000000..08e567f98 --- /dev/null +++ b/tests/unit/cunumeric/_utils/__init__.py @@ -0,0 +1,16 @@ +# Copyright 2024 NVIDIA Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +from __future__ import annotations diff --git a/tests/unit/cunumeric/_utils/test_array.py b/tests/unit/cunumeric/_utils/test_array.py new file mode 100644 index 000000000..34e124c47 --- /dev/null +++ b/tests/unit/cunumeric/_utils/test_array.py @@ -0,0 +1,101 @@ +# Copyright 2024 NVIDIA Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import numpy as np +import pytest + +import cunumeric._utils.array as m # module under test + +EXPECTED_SUPPORTED_DTYPES = set( + [ + np.bool_, + np.int8, + np.int16, + np.int32, + np.int64, + np.uint8, + np.uint16, + np.uint32, + np.uint64, + np.float16, + np.float32, + np.float64, + np.complex64, + np.complex128, + ] +) + + +class Test_is_advanced_indexing: + def test_Ellipsis(self): + assert not m.is_advanced_indexing(...) + + def test_None(self): + assert not m.is_advanced_indexing(None) + + @pytest.mark.parametrize("typ", EXPECTED_SUPPORTED_DTYPES) + def test_np_scalar(self, typ): + assert not m.is_advanced_indexing(typ(10)) + + def test_slice(self): + assert not m.is_advanced_indexing(slice(None, 10)) + assert not m.is_advanced_indexing(slice(1, 10)) + assert not m.is_advanced_indexing(slice(None, 10, 2)) + + def test_tuple_False(self): + assert not m.is_advanced_indexing((..., None, np.int32())) + + def test_tuple_True(self): + assert m.is_advanced_indexing(([1, 2, 3], np.array([1, 2]))) + + def test_advanced(self): + assert m.is_advanced_indexing([1, 2, 3]) + assert m.is_advanced_indexing(np.array([1, 2, 3])) + + +def test__SUPPORTED_DTYPES(): + assert set(m.SUPPORTED_DTYPES.keys()) == set( + np.dtype(ty) for ty in EXPECTED_SUPPORTED_DTYPES + ) + + +class Test_is_supported_dtype: + @pytest.mark.parametrize("value", ["foo", 10, 10.2, (), set()]) + def test_type_bad(self, value) -> None: + with pytest.raises(TypeError): + m.to_core_type(value) + + @pytest.mark.parametrize("value", EXPECTED_SUPPORTED_DTYPES) + def test_supported(self, value) -> None: + m.to_core_type(value) + + # This is just a representative sample, not exhasutive + @pytest.mark.parametrize("value", [np.float128, np.datetime64, [], {}]) + def test_unsupported(self, value) -> None: + with pytest.raises(TypeError): + m.to_core_type(value) + + +@pytest.mark.parametrize( + "shape, volume", [[(), 0], [(10,), 10], [(1, 2, 3), 6]] +) +def test_calculate_volume(shape, volume) -> None: + assert m.calculate_volume(shape) == volume + + +if __name__ == "__main__": + import sys + + sys.exit(pytest.main(sys.argv)) diff --git a/tests/unit/cunumeric/test_coverage.py b/tests/unit/cunumeric/_utils/test_coverage.py similarity index 99% rename from tests/unit/cunumeric/test_coverage.py rename to tests/unit/cunumeric/_utils/test_coverage.py index ca683b51c..5c6bf1aee 100644 --- a/tests/unit/cunumeric/test_coverage.py +++ b/tests/unit/cunumeric/_utils/test_coverage.py @@ -1,4 +1,4 @@ -# Copyright 2021-2022 NVIDIA Corporation +# Copyright 2024 NVIDIA Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -20,7 +20,7 @@ from mock import MagicMock, patch import cunumeric -import cunumeric.coverage as m # module under test +import cunumeric._utils.coverage as m # module under test from cunumeric.settings import settings diff --git a/tests/unit/cunumeric/test_utils.py b/tests/unit/cunumeric/_utils/test_linalg.py similarity index 60% rename from tests/unit/cunumeric/test_utils.py rename to tests/unit/cunumeric/_utils/test_linalg.py index 3b3da8bc5..f863f55af 100644 --- a/tests/unit/cunumeric/test_utils.py +++ b/tests/unit/cunumeric/_utils/test_linalg.py @@ -1,4 +1,4 @@ -# Copyright 2021-2022 NVIDIA Corporation +# Copyright 2024 NVIDIA Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -13,133 +13,10 @@ # limitations under the License. # -import inspect -from typing import List, Tuple, Union - import numpy as np import pytest -import cunumeric.utils as m # module under test - -EXPECTED_SUPPORTED_DTYPES = set( - [ - np.bool_, - np.int8, - np.int16, - np.int32, - np.int64, - np.uint8, - np.uint16, - np.uint32, - np.uint64, - np.float16, - np.float32, - np.float64, - np.complex64, - np.complex128, - ] -) - - -class Test_is_advanced_indexing: - def test_Ellipsis(self): - assert not m.is_advanced_indexing(...) - - def test_None(self): - assert not m.is_advanced_indexing(None) - - @pytest.mark.parametrize("typ", EXPECTED_SUPPORTED_DTYPES) - def test_np_scalar(self, typ): - assert not m.is_advanced_indexing(typ(10)) - - def test_slice(self): - assert not m.is_advanced_indexing(slice(None, 10)) - assert not m.is_advanced_indexing(slice(1, 10)) - assert not m.is_advanced_indexing(slice(None, 10, 2)) - - def test_tuple_False(self): - assert not m.is_advanced_indexing((..., None, np.int32())) - - def test_tuple_True(self): - assert m.is_advanced_indexing(([1, 2, 3], np.array([1, 2]))) - - def test_advanced(self): - assert m.is_advanced_indexing([1, 2, 3]) - assert m.is_advanced_indexing(np.array([1, 2, 3])) - - -def test_find_last_user_stacklevel() -> None: - n = m.find_last_user_stacklevel() - assert isinstance(n, int) - assert n == 1 - - -def test_get_line_number_from_frame() -> None: - frame = inspect.currentframe() - result = m.get_line_number_from_frame(frame) - assert isinstance(result, str) - filename, lineno = result.split(":") - - # NOTE: this will break if this test filename is changed - assert filename.endswith("test_utils.py") - - # it would be too fragile to compare more specific than this - assert int(lineno) > 0 - - -class Test_find_last_user_frames: - def check_default_top_only(self) -> None: - result = m.find_last_user_frames(top_only=True) - assert isinstance(result, str) - assert "|" not in result - assert "\n" not in result - assert len(result.split(":")) == 2 - - def test_top_only_True(self) -> None: - result = m.find_last_user_frames(top_only=True) - assert isinstance(result, str) - assert "|" not in result - assert "\n" not in result - assert len(result.split(":")) == 2 - - def test_top_only_False(self) -> None: - result = m.find_last_user_frames(top_only=False) - assert isinstance(result, str) - assert "|" in result - - # it would be too fragile to compare more specific than this - assert len(result.split("|")) > 1 - assert all(len(x.split(":")) == 2 for x in result.split("|")) - - -def test__SUPPORTED_DTYPES(): - assert set(m.SUPPORTED_DTYPES.keys()) == set( - np.dtype(ty) for ty in EXPECTED_SUPPORTED_DTYPES - ) - - -class Test_is_supported_dtype: - @pytest.mark.parametrize("value", ["foo", 10, 10.2, (), set()]) - def test_type_bad(self, value) -> None: - with pytest.raises(TypeError): - m.to_core_dtype(value) - - @pytest.mark.parametrize("value", EXPECTED_SUPPORTED_DTYPES) - def test_supported(self, value) -> None: - m.to_core_dtype(value) - - # This is just a representative sample, not exhasutive - @pytest.mark.parametrize("value", [np.float128, np.datetime64, [], {}]) - def test_unsupported(self, value) -> None: - with pytest.raises(TypeError): - m.to_core_dtype(value) - - -@pytest.mark.parametrize( - "shape, volume", [[(), 0], [(10,), 10], [(1, 2, 3), 6]] -) -def test_calculate_volume(shape, volume) -> None: - assert m.calculate_volume(shape) == volume +import cunumeric._utils.linalg as m # module under test def _dot_modes_oracle(a_ndim: int, b_ndim: int) -> bool: @@ -219,7 +96,7 @@ def test_matmul_modes(a: int, b: int) -> None: assert _matmul_modes_oracle(a, b) -AxesType = Union[int, Tuple[int, int], Tuple[List[int], List[int]]] +AxesType = int | tuple[int, int] | tuple[list[int], list[int]] def _tensordot_modes_oracle(a_ndim: int, b_ndim: int, axes: AxesType) -> bool: diff --git a/tests/unit/cunumeric/_utils/test_stack.py b/tests/unit/cunumeric/_utils/test_stack.py new file mode 100644 index 000000000..ca4a2ed8b --- /dev/null +++ b/tests/unit/cunumeric/_utils/test_stack.py @@ -0,0 +1,70 @@ +# Copyright 2024 NVIDIA Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import inspect + +import pytest + +import cunumeric._utils.stack as m # module under test + + +def test_find_last_user_stacklevel() -> None: + n = m.find_last_user_stacklevel() + assert isinstance(n, int) + assert n == 1 + + +def test_get_line_number_from_frame() -> None: + frame = inspect.currentframe() + result = m.get_line_number_from_frame(frame) + assert isinstance(result, str) + filename, lineno = result.split(":") + + # NOTE: this will break if this test filename is changed + assert filename.endswith("test_stack.py") + + # it would be too fragile to compare more specific than this + assert int(lineno) > 0 + + +class Test_find_last_user_frames: + def check_default_top_only(self) -> None: + result = m.find_last_user_frames(top_only=True) + assert isinstance(result, str) + assert "|" not in result + assert "\n" not in result + assert len(result.split(":")) == 2 + + def test_top_only_True(self) -> None: + result = m.find_last_user_frames(top_only=True) + assert isinstance(result, str) + assert "|" not in result + assert "\n" not in result + assert len(result.split(":")) == 2 + + def test_top_only_False(self) -> None: + result = m.find_last_user_frames(top_only=False) + assert isinstance(result, str) + assert "|" in result + + # it would be too fragile to compare more specific than this + assert len(result.split("|")) > 1 + assert all(len(x.split(":")) == 2 for x in result.split("|")) + + +if __name__ == "__main__": + import sys + + sys.exit(pytest.main(sys.argv)) diff --git a/tests/unit/cunumeric/random/__init__.py b/tests/unit/cunumeric/random/__init__.py new file mode 100644 index 000000000..08e567f98 --- /dev/null +++ b/tests/unit/cunumeric/random/__init__.py @@ -0,0 +1,16 @@ +# Copyright 2024 NVIDIA Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +from __future__ import annotations diff --git a/tests/unit/cunumeric/random/test_bitgenerator.py b/tests/unit/cunumeric/random/test_bitgenerator.py index 895a49ccc..d9b83ab1a 100644 --- a/tests/unit/cunumeric/random/test_bitgenerator.py +++ b/tests/unit/cunumeric/random/test_bitgenerator.py @@ -1,4 +1,4 @@ -# Copyright 2021-2022 NVIDIA Corporation +# Copyright 2024 NVIDIA Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -16,7 +16,7 @@ import pytest from mock import patch -import cunumeric.random.bitgenerator as m # module under test +import cunumeric.random._bitgenerator as m # module under test from cunumeric.config import BitGeneratorType diff --git a/tests/unit/cunumeric/test_config.py b/tests/unit/cunumeric/test_config.py index f829c0279..678c05f3e 100644 --- a/tests/unit/cunumeric/test_config.py +++ b/tests/unit/cunumeric/test_config.py @@ -1,4 +1,4 @@ -# Copyright 2021-2022 NVIDIA Corporation +# Copyright 2024 NVIDIA Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -14,31 +14,14 @@ # import pytest -from legate.core import Library -from legate.core.context import Context -from mock import patch import cunumeric.config as m # module under test -from cunumeric import runtime - - -class _FakeSO: - CUNUMERIC_MAX_TASKS = 10 - CUNUMERIC_MAX_MAPPERS = 20 - CUNUMERIC_MAX_REDOPS = 30 class TestCuNumericLib: def test___init__(self) -> None: lib = m.CuNumericLib("foo") - assert isinstance(lib, Library) assert lib.name == "foo" - assert lib.shared_object is None - assert lib.runtime is None - - def test_get_name(self) -> None: - lib = m.CuNumericLib("foo") - assert lib.get_name() == "foo" def test_get_shared_library(self) -> None: lib = m.CuNumericLib("foo") @@ -60,45 +43,6 @@ def test_get_c_header(self) -> None: assert lib.get_c_header() == header - def test_get_registration_callback(self) -> None: - lib = m.CuNumericLib("foo") - assert ( - lib.get_registration_callback() == "cunumeric_perform_registration" - ) - - def test_initialize(self) -> None: - lib = m.CuNumericLib("foo") - lib.initialize(_FakeSO) - assert lib.shared_object == _FakeSO - - # error if runtime already set - lib.runtime = runtime - with pytest.raises(AssertionError): - lib.initialize(_FakeSO) - - def test_set_runtine(self) -> None: - lib = m.CuNumericLib("foo") - - # error if not initialized - with pytest.raises(AssertionError): - lib.set_runtime(runtime) - - lib.initialize(_FakeSO) - lib.set_runtime(runtime) - assert lib.runtime == runtime - - # error if runtime already set - with pytest.raises(AssertionError): - lib.set_runtime(runtime) - - @patch("cunumeric.runtime.destroy") - def test_destroy(self, mock_destroy) -> None: - lib = m.CuNumericLib("foo") - lib.initialize(_FakeSO) - lib.set_runtime(runtime) - lib.destroy() - mock_destroy.assert_called_once_with() - def test_CUNUMERIC_LIB_NAME() -> None: assert m.CUNUMERIC_LIB_NAME == "cunumeric" @@ -108,10 +52,6 @@ def test_cunumeric_lib() -> None: assert isinstance(m.cunumeric_lib, m.CuNumericLib) -def test_cunumeric_context() -> None: - assert isinstance(m.cunumeric_context, Context) - - def test_CuNumericOpCode() -> None: assert set(m.CuNumericOpCode.__members__) == { "ADVANCED_INDEXING", @@ -137,10 +77,13 @@ def test_CuNumericOpCode() -> None: "LOAD_CUDALIBS", "MATMUL", "MATVECMUL", + "MP_POTRF", + "MP_SOLVE", "NONZERO", "PACKBITS", "POTRF", "PUTMASK", + "QR", "RAND", "READ", "REPEAT", @@ -151,6 +94,7 @@ def test_CuNumericOpCode() -> None: "SOLVE", "SORT", "SEARCHSORTED", + "SVD", "SYRK", "TILE", "TRANSPOSE_COPY_2D", diff --git a/tests/unit/cunumeric/test_patch.py b/tests/unit/cunumeric/test_patch.py index 51911c563..41a5107e8 100644 --- a/tests/unit/cunumeric/test_patch.py +++ b/tests/unit/cunumeric/test_patch.py @@ -1,4 +1,4 @@ -# Copyright 2021-2022 NVIDIA Corporation +# Copyright 2024 NVIDIA Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/tests/unit/cunumeric/test_settings.py b/tests/unit/cunumeric/test_settings.py index b51570699..e83070086 100644 --- a/tests/unit/cunumeric/test_settings.py +++ b/tests/unit/cunumeric/test_settings.py @@ -1,4 +1,4 @@ -# Copyright 2023 NVIDIA Corporation +# Copyright 2024 NVIDIA Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/tests/unit/util.py b/tests/unit/util.py new file mode 100644 index 000000000..a6bb0a49e --- /dev/null +++ b/tests/unit/util.py @@ -0,0 +1,34 @@ +# Copyright 2024 NVIDIA Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +from __future__ import annotations + +from itertools import chain, combinations +from typing import Any, Iterable, Iterator + +import pytest +from typing_extensions import TypeAlias + +Capsys: TypeAlias = pytest.CaptureFixture[str] + + +# ref: https://docs.python.org/3/library/itertools.html +def powerset(iterable: Iterable[Any]) -> Iterator[Any]: + s = list(iterable) + return chain.from_iterable(combinations(s, r) for r in range(len(s) + 1)) + + +def powerset_nonempty(iterable: Iterable[Any]) -> Iterator[Any]: + return (x for x in powerset(iterable) if len(x)) diff --git a/versioneer.py b/versioneer.py index 159ace09b..4470e3562 100644 --- a/versioneer.py +++ b/versioneer.py @@ -11,7 +11,7 @@ * https://github.com/python-versioneer/python-versioneer * Brian Warner * License: Public Domain -* Compatible with: Python 3.6, 3.7, 3.8, 3.9, 3.10 and pypy3 +* Compatible with: Python 3.10, 3.11, 3.12 and pypy3 * [![Latest Version][pypi-image]][pypi-url] * [![Build Status][travis-image]][travis-url]