From b198f33df14ff63c7859bb31b6d14241bc823b40 Mon Sep 17 00:00:00 2001 From: Manolis Papadakis Date: Sat, 16 Nov 2024 19:22:53 -0500 Subject: [PATCH] 24.11.00 release --- .clang-format | 3 +- .gitattributes | 2 +- .github/CODEOWNERS | 4 + .github/ISSUE_TEMPLATE/bug_report.yml | 33 +- .github/actions/download-artifacts/action.yml | 48 - .github/workflows/ci-gh.yml | 28 - .github/workflows/gh-build-and-test.yml | 92 - .github/workflows/gh-build.yml | 101 - .github/workflows/gh-test.yml | 91 - .github/workflows/require-labels.yml | 15 - .gitignore | 8 +- .pre-commit-config.yaml | 11 +- CMakeLists.txt | 36 +- CONTRIBUTING.md | 12 +- LICENSES_bundled.txt | 39 - MANIFEST.in | 4 +- README.md | 154 +- cmake/Modules/cpm_helpers.cmake | 4 +- cmake/Modules/cuda_arch_helpers.cmake | 8 +- cmake/Modules/set_cpu_arch_flags.cmake | 2 +- cmake/generate_install_info_py.cmake | 10 +- cmake/thirdparty/get_cutensor.cmake | 2 +- cmake/thirdparty/get_legate.cmake | 104 + cmake/thirdparty/get_legate_core.cmake | 103 - cmake/thirdparty/get_nccl.cmake | 2 +- cmake/thirdparty/get_openblas.cmake | 38 +- cmake/thirdparty/get_tblis.cmake | 30 +- cmake/versions.json | 13 +- conda/conda-build/build.sh | 39 +- conda/conda-build/conda_build_config.yaml | 7 +- conda/conda-build/meta.yaml | 124 +- continuous_integration/dot-gitconfig | 3 - .../scripts/build-cunumeric-all | 38 - .../scripts/build-cunumeric-conda | 85 - .../scripts/build-cunumeric-cpp | 33 - .../scripts/build-cunumeric-wheel | 27 - continuous_integration/scripts/entrypoint | 46 - continuous_integration/scripts/test-cunumeric | 61 - cunumeric/_ufunc/comparison.py | 122 - cunumeric/config.py | 788 -- cunumeric/install_info.py.in | 43 - cunumeric/linalg/cholesky.py | 272 - cunumeric/linalg/solve.py | 62 - cunumeric/module.py | 8284 ----------------- cunumeric/random/legacy.py | 203 - cunumeric_cpp.cmake | 505 - {cunumeric => cupynumeric}/__init__.py | 21 +- cupynumeric/_array/__init__.py | 15 + {cunumeric => cupynumeric/_array}/array.py | 1502 +-- cupynumeric/_array/flags.py | 82 + cupynumeric/_array/thunk.py | 356 + cupynumeric/_array/util.py | 221 + cupynumeric/_module/__init__.py | 146 + .../_module}/_unary_red_utils.py | 4 +- cupynumeric/_module/array_basic.py | 81 + cupynumeric/_module/array_dimension.py | 443 + cupynumeric/_module/array_joining.py | 702 ++ cupynumeric/_module/array_rearrange.py | 289 + cupynumeric/_module/array_shape.py | 119 + cupynumeric/_module/array_splitting.py | 246 + cupynumeric/_module/array_tiling.py | 232 + cupynumeric/_module/array_transpose.py | 136 + .../_module/binary_bit_packing.py | 22 +- cupynumeric/_module/creation_data.py | 177 + cupynumeric/_module/creation_matrices.py | 192 + cupynumeric/_module/creation_ranges.py | 355 + cupynumeric/_module/creation_shape.py | 406 + cupynumeric/_module/indexing.py | 1243 +++ cupynumeric/_module/io_numpy.py | 75 + cupynumeric/_module/linalg_mvp.py | 935 ++ cupynumeric/_module/logic_array_contents.py | 114 + .../_module/logic_array_type.py | 121 +- cupynumeric/_module/logic_comparison.py | 201 + cupynumeric/_module/logic_truth.py | 138 + cupynumeric/_module/math_complex.py | 122 + cupynumeric/_module/math_extrema.py | 179 + cupynumeric/_module/math_misc.py | 174 + cupynumeric/_module/math_rounding.py | 57 + cupynumeric/_module/math_sum_prod_diff.py | 1338 +++ cupynumeric/_module/sets_making.py | 104 + cupynumeric/_module/ssc_counting.py | 57 + cupynumeric/_module/ssc_searching.py | 357 + cupynumeric/_module/ssc_sorting.py | 291 + cupynumeric/_module/stats_avgs_vars.py | 518 ++ cupynumeric/_module/stats_correlating.py | 187 + cupynumeric/_module/stats_histograms.py | 388 + cupynumeric/_module/stats_order.py | 1063 +++ {cunumeric => cupynumeric/_module}/window.py | 8 +- .../_sphinxext/__init__.py | 2 +- .../_sphinxext/_comparison_config.py | 7 +- .../_sphinxext/_comparison_util.py | 18 +- .../_sphinxext/_cupynumeric_directive.py | 8 +- .../_sphinxext/_templates.py | 2 +- .../_templates/comparison_table.rst | 6 +- .../_sphinxext/comparison_table.py | 6 +- .../_sphinxext/implemented_index.py | 12 +- .../_sphinxext/missing_refs.py | 44 +- .../_sphinxext/ufunc_formatter.py | 4 +- cupynumeric/_thunk/__init__.py | 15 + .../sort.py => cupynumeric/_thunk/_sort.py | 51 +- {cunumeric => cupynumeric/_thunk}/deferred.py | 1391 +-- {cunumeric => cupynumeric/_thunk}/eager.py | 454 +- {cunumeric => cupynumeric/_thunk}/thunk.py | 174 +- {cunumeric => cupynumeric}/_ufunc/__init__.py | 2 +- .../_ufunc/bit_twiddling.py | 5 +- cupynumeric/_ufunc/comparison.py | 212 + {cunumeric => cupynumeric}/_ufunc/floating.py | 5 +- {cunumeric => cupynumeric}/_ufunc/math.py | 5 +- .../_ufunc/trigonometric.py | 5 +- {cunumeric => cupynumeric}/_ufunc/ufunc.py | 227 +- cupynumeric/_utils/__init__.py | 19 + cupynumeric/_utils/array.py | 113 + {cunumeric => cupynumeric/_utils}/coverage.py | 69 +- .../utils.py => cupynumeric/_utils/linalg.py | 133 +- cupynumeric/_utils/stack.py | 52 + cupynumeric/_utils/structure.py | 37 + {cunumeric => cupynumeric}/_version.py | 4 +- cupynumeric/config.py | 835 ++ {cunumeric => cupynumeric}/fft/__init__.py | 8 +- {cunumeric => cupynumeric}/fft/fft.py | 156 +- cupynumeric/install_info.py.in | 47 + {cunumeric => cupynumeric}/linalg/__init__.py | 9 +- cupynumeric/linalg/_cholesky.py | 305 + .../linalg/_exception.py | 2 +- cupynumeric/linalg/_qr.py | 50 + cupynumeric/linalg/_solve.py | 108 + cupynumeric/linalg/_svd.py | 60 + {cunumeric => cupynumeric}/linalg/linalg.py | 237 +- {cunumeric => cupynumeric}/ma/__init__.py | 8 +- .../ma/_masked_array.py | 16 +- {cunumeric => cupynumeric}/patch.py | 12 +- {cunumeric => cupynumeric}/py.typed | 0 {cunumeric => cupynumeric}/random/__init__.py | 18 +- .../random/_bitgenerator.py | 82 +- .../random/_generator.py | 90 +- .../random/_random.py | 189 +- {cunumeric => cupynumeric}/runtime.py | 401 +- {cunumeric => cupynumeric}/settings.py | 56 +- {cunumeric => cupynumeric}/types.py | 12 +- cupynumeric_cpp.cmake | 565 ++ ...c_python.cmake => cupynumeric_python.cmake | 52 +- docs/cunumeric/source/api/settings.rst | 8 - .../source/developer/CONTRIBUTING.md | 1 - docs/cunumeric/source/developer/building.rst | 71 - docs/cunumeric/source/index.rst | 39 - docs/cunumeric/source/user/configuration.rst | 108 - docs/cunumeric/source/user/installation.rst | 19 - docs/cunumeric/source/user/notebooks.rst | 9 - docs/cunumeric/source/user/usage.rst | 143 - docs/cunumeric/source/versions.rst | 14 - docs/{cunumeric => cupynumeric}/Makefile | 9 +- docs/{cunumeric => cupynumeric}/make.bat | 0 .../source/_images/developer-build.png | Bin .../source/_implemented.rst | 2 +- .../source/_static/.keep | 0 .../cupynumeric/source/_templates/layout.html | 7 + .../source/api/_bitgenerator.rst | 4 +- .../source/api/_generator.rst | 4 +- .../source/api}/_grouped.rst | 0 .../source/api/_ndarray.rst | 5 +- .../source/api/binary.rst | 2 +- .../source/api/broadcast.rst | 4 +- .../source/api/classes.rst | 0 docs/cupynumeric/source/api/comparison.rst | 12 + .../source/api/creation.rst | 3 +- docs/cupynumeric/source/api/datatype.rst | 12 + .../source/api/fft.rst | 16 +- .../source/api/index.rst | 3 +- .../source/api/indexing.rst | 4 +- docs/cupynumeric/source/api/io.rst | 11 + .../source/api/linalg.rst | 8 +- .../source/api/logic.rst | 2 +- .../source/api/manipulation.rst | 7 +- .../source/api/math.rst | 6 +- .../source/api/ndarray.rst | 7 +- .../source/api/random.rst | 4 +- .../source/api/routines.rst | 2 + .../source/api/set.rst | 2 +- docs/cupynumeric/source/api/settings.rst | 8 + .../source/api/sorting.rst | 2 +- .../source/api/statistics.rst | 34 +- .../source/api/window.rst | 2 +- .../{cunumeric => cupynumeric}/source/conf.py | 52 +- .../source/developer/CONTRIBUTING.md | 72 + .../cupynumeric/source/developer/building.rst | 71 + .../source/developer/index.rst | 0 .../source/developer/testing.rst | 2 +- .../source/examples}/black_scholes.ipynb | 8 +- .../source/examples}/cholesky.ipynb | 12 +- .../examples/compact_finite_difference.ipynb | 336 + .../source/examples/edge_detection.ipynb | 210 + docs/cupynumeric/source/examples/image.png | Bin 0 -> 305442 bytes docs/cupynumeric/source/examples/index.rst | 14 + docs/cupynumeric/source/examples/kmeans.ipynb | 402 + .../source/examples/newton_raphson_2d.ipynb | 264 + .../source/examples}/stencil.ipynb | 6 +- .../source/examples/torchswe.ipynb | 219 + docs/cupynumeric/source/faqs.rst | 206 + docs/cupynumeric/source/index.rst | 34 + docs/cupynumeric/source/installation.rst | 63 + docs/cupynumeric/source/oss-licenses.rst | 123 + docs/cupynumeric/source/user/advanced.rst | 42 + docs/cupynumeric/source/user/differences.rst | 67 + .../source/user/howtos/benchmarking.rst | 56 + docs/cupynumeric/source/user/howtos/index.rst | 9 + .../source/user/howtos/measuring.rst} | 44 +- .../source/user/howtos/patching.rst | 35 + .../source/user/index.rst | 6 +- .../source/user/practices.rst | 164 +- docs/cupynumeric/source/user/usage.rst | 50 + docs/cupynumeric/switcher.json | 7 + examples/benchmark.py | 19 +- examples/black_scholes.py | 2 +- examples/black_scholes_greeks.py | 2 +- examples/cg.py | 2 +- examples/cholesky.py | 74 +- examples/cpp/stencil/CMakeLists.txt | 31 + examples/cpp/stencil/build.sh | 22 + examples/cpp/stencil/stencil.cc | 101 + examples/einsum.py | 2 +- examples/gemm.py | 2 +- examples/indexing_routines.py | 2 +- examples/ingest.py | 96 - examples/jacobi.py | 2 +- examples/kmeans.py | 2 +- examples/kmeans_slow.py | 2 +- examples/kmeans_sort.py | 228 - examples/linreg.py | 2 +- examples/logreg.py | 2 +- examples/lstm_backward.py | 2 +- examples/lstm_forward.py | 2 +- examples/lstm_full.py | 419 - examples/qr.py | 94 + examples/quantiles.py | 2 +- examples/richardson_lucy.py | 21 +- examples/scan.py | 4 +- examples/solve.py | 69 +- examples/sort.py | 2 +- examples/stencil.py | 2 +- examples/svd.py | 131 + examples/wgrad.py | 107 - install.py | 183 +- pyproject.toml | 6 +- scripts/api_compare.py | 20 +- scripts/build-install.sh | 37 - scripts/build-no-install.sh | 36 - scripts/build-separately-no-install.sh | 53 - scripts/build-with-legate-no-install.sh | 37 - ...build-with-legate-separately-no-install.sh | 54 - scripts/conda-build.sh | 13 +- scripts/hooks/enforce_boilerplate.py | 2 +- scripts/hooks/legate_defined.sh | 43 + scripts/util/build-caching.sh | 17 - scripts/util/compiler-flags.sh | 12 - scripts/util/read-legate-core-root.sh | 16 - ...global-legion-legate-core-and-cunumeric.sh | 10 - setup.cfg | 14 +- setup.py | 14 +- src/cunumeric/arg_redop_register.h | 56 - src/cunumeric/binary/binary_op.h | 46 - src/cunumeric/binary/binary_red.h | 46 - src/cunumeric/cunumeric.cc | 62 - src/cunumeric/cunumeric_c.h | 345 - src/cunumeric/index/zip.h | 67 - src/cunumeric/mapper.cc | 251 - src/cunumeric/random/bitgenerator_util.h | 98 - src/cunumeric/random/curand_help.h | 46 - .../random/randutil/generator_create.inl | 50 - .../random/randutil/generator_integers.inl | 59 - src/cunumeric/random/randutil/randutil.h | 230 - src/cunumeric/scan/scan_global_util.h | 58 - src/cunumeric/stat/bincount.h | 43 - src/cupynumeric.h | 19 + src/{cunumeric => cupynumeric}/arg.h | 8 +- src/{cunumeric => cupynumeric}/arg.inl | 6 +- .../arg_redop_register.cc | 27 +- .../arg_redop_register.cu | 10 +- src/cupynumeric/arg_redop_register.h | 50 + .../binary/binary_op.cc | 18 +- .../binary/binary_op.cu | 26 +- src/cupynumeric/binary/binary_op.h | 46 + .../binary/binary_op_omp.cc | 18 +- .../binary/binary_op_template.inl | 30 +- src/cupynumeric/binary/binary_op_util.cc | 49 + .../binary/binary_op_util.h | 300 +- .../binary/binary_red.cc | 17 +- .../binary/binary_red.cu | 34 +- src/cupynumeric/binary/binary_red.h | 46 + .../binary/binary_red_omp.cc | 25 +- .../binary/binary_red_template.inl | 34 +- .../bits/bits_util.h | 12 +- .../bits/packbits.cc | 14 +- .../bits/packbits.cu | 22 +- .../bits/packbits.h | 24 +- .../bits/packbits_omp.cc | 14 +- .../bits/packbits_template.inl | 35 +- .../bits/unpackbits.cc | 12 +- .../bits/unpackbits.cu | 20 +- .../bits/unpackbits.h | 24 +- .../bits/unpackbits_omp.cc | 12 +- .../bits/unpackbits_template.inl | 30 +- .../cephes/chbevl.cc | 0 src/{cunumeric => cupynumeric}/cephes/i0.cc | 6 +- .../convolution/convolve.cc | 86 +- .../convolution/convolve.cu | 415 +- .../convolution/convolve.h | 29 +- .../convolution/convolve_omp.cc | 90 +- .../convolution/convolve_template.inl | 180 +- src/{cunumeric => cupynumeric}/cuda_help.h | 332 +- src/{cunumeric => cupynumeric}/cudalibs.cu | 211 +- src/{cunumeric => cupynumeric}/cudalibs.h | 23 +- src/cupynumeric/cupynumeric.cc | 82 + src/cupynumeric/cupynumeric.cu | 45 + src/cupynumeric/cupynumeric_c.h | 355 + .../cupynumeric_task.h} | 17 +- .../device_scalar_reduction_buffer.h | 18 +- src/{cunumeric => cupynumeric}/divmod.h | 18 +- .../indexing/parallel_loop.cuh | 22 +- .../execution_policy/indexing/parallel_loop.h | 12 +- .../indexing/parallel_loop_omp.h | 16 +- .../reduction/scalar_reduction.cuh | 20 +- .../reduction/scalar_reduction.h | 12 +- .../reduction/scalar_reduction_omp.h | 22 +- src/{cunumeric => cupynumeric}/fft/fft.cu | 102 +- src/{cunumeric => cupynumeric}/fft/fft.h | 26 +- .../fft/fft_template.inl | 45 +- src/{cunumeric => cupynumeric}/fft/fft_util.h | 50 +- .../index/advanced_indexing.cc | 44 +- .../index/advanced_indexing.cu | 47 +- .../index/advanced_indexing.h | 28 +- .../index/advanced_indexing_omp.cc | 44 +- .../index/advanced_indexing_template.inl | 23 +- .../index/choose.cc | 22 +- .../index/choose.cu | 34 +- src/cupynumeric/index/choose.h | 42 + .../index/choose_omp.cc | 18 +- .../index/choose_template.inl | 26 +- .../index/putmask.cc | 12 +- .../index/putmask.cu | 14 +- src/cupynumeric/index/putmask.h | 43 + .../index/putmask_omp.cc | 14 +- .../index/putmask_template.inl | 40 +- .../index/repeat.cc | 33 +- .../index/repeat.cu | 57 +- src/{cunumeric => cupynumeric}/index/repeat.h | 28 +- .../index/repeat_omp.cc | 38 +- .../index/repeat_template.inl | 38 +- .../index/select.cc | 28 +- .../index/select.cu | 51 +- src/{cunumeric => cupynumeric}/index/select.h | 26 +- .../index/select_omp.cc | 28 +- .../index/select_template.inl | 32 +- src/{cunumeric => cupynumeric}/index/wrap.cc | 20 +- src/{cunumeric => cupynumeric}/index/wrap.cu | 38 +- src/{cunumeric => cupynumeric}/index/wrap.h | 47 +- .../index/wrap_omp.cc | 30 +- .../index/wrap_template.inl | 40 +- src/{cunumeric => cupynumeric}/index/zip.cc | 18 +- src/{cunumeric => cupynumeric}/index/zip.cu | 52 +- src/cupynumeric/index/zip.h | 69 + .../index/zip_omp.cc | 34 +- .../index/zip_template.inl | 43 +- src/{cunumeric => cupynumeric}/item/read.cc | 12 +- src/{cunumeric => cupynumeric}/item/read.cu | 16 +- .../putmask.h => cupynumeric/item/read.h} | 28 +- .../item/read_template.inl | 18 +- src/{cunumeric => cupynumeric}/item/write.cc | 20 +- src/{cunumeric => cupynumeric}/item/write.cu | 30 +- .../item/read.h => cupynumeric/item/write.h} | 22 +- .../item/write_template.inl | 27 +- src/cupynumeric/mapper.cc | 229 + src/{cunumeric => cupynumeric}/mapper.h | 31 +- .../matrix/batched_cholesky.cc | 24 +- .../matrix/batched_cholesky.cu | 24 +- .../matrix/batched_cholesky.h | 24 +- .../matrix/batched_cholesky_omp.cc | 20 +- .../matrix/batched_cholesky_template.inl | 51 +- .../matrix/contract.cc | 14 +- .../matrix/contract.cu | 142 +- .../matrix/contract.h | 28 +- .../matrix/contract_omp.cc | 14 +- .../matrix/contract_template.inl | 56 +- src/{cunumeric => cupynumeric}/matrix/diag.cc | 26 +- src/{cunumeric => cupynumeric}/matrix/diag.cu | 34 +- src/{cunumeric => cupynumeric}/matrix/diag.h | 26 +- .../matrix/diag_omp.cc | 22 +- .../matrix/diag_template.inl | 36 +- src/{cunumeric => cupynumeric}/matrix/dot.cc | 16 +- src/{cunumeric => cupynumeric}/matrix/dot.cu | 21 +- src/{cunumeric => cupynumeric}/matrix/dot.h | 28 +- .../matrix/dot_omp.cc | 24 +- .../matrix/dot_template.inl | 20 +- src/{cunumeric => cupynumeric}/matrix/gemm.cc | 14 +- src/{cunumeric => cupynumeric}/matrix/gemm.cu | 18 +- src/{cunumeric => cupynumeric}/matrix/gemm.h | 22 +- .../matrix/gemm_omp.cc | 12 +- .../matrix/gemm_template.inl | 28 +- .../matrix/matmul.cc | 18 +- .../matrix/matmul.cu | 24 +- .../matrix/matmul.h | 28 +- .../matrix/matmul_cpu.inl | 22 +- .../matrix/matmul_omp.cc | 14 +- .../matrix/matmul_template.inl | 84 +- .../matrix/matvecmul.cc | 18 +- .../matrix/matvecmul.cu | 44 +- .../matrix/matvecmul.h | 28 +- .../matrix/matvecmul_cpu.inl | 12 +- .../matrix/matvecmul_omp.cc | 14 +- .../matrix/matvecmul_template.inl | 26 +- src/cupynumeric/matrix/mp_potrf.cu | 146 + .../matrix/mp_potrf.h} | 17 +- src/cupynumeric/matrix/mp_potrf_template.inl | 167 + src/cupynumeric/matrix/mp_solve.cu | 247 + src/cupynumeric/matrix/mp_solve.h | 33 + src/cupynumeric/matrix/mp_solve_template.inl | 195 + .../matrix/potrf.cc | 30 +- .../matrix/potrf.cu | 22 +- src/{cunumeric => cupynumeric}/matrix/potrf.h | 22 +- .../matrix/potrf_omp.cc | 28 +- .../matrix/potrf_template.inl | 22 +- src/cupynumeric/matrix/qr.cc | 40 + src/cupynumeric/matrix/qr.cu | 190 + .../solve.h => cupynumeric/matrix/qr.h} | 22 +- src/cupynumeric/matrix/qr_cpu.inl | 131 + src/cupynumeric/matrix/qr_omp.cc | 31 + src/cupynumeric/matrix/qr_template.inl | 102 + .../matrix/solve.cc | 16 +- .../matrix/solve.cu | 24 +- src/cupynumeric/matrix/solve.h | 38 + .../matrix/solve_cpu.inl | 22 +- .../matrix/solve_omp.cc | 14 +- .../matrix/solve_template.inl | 38 +- src/cupynumeric/matrix/svd.cc | 40 + src/cupynumeric/matrix/svd.cu | 194 + .../choose.h => cupynumeric/matrix/svd.h} | 28 +- src/cupynumeric/matrix/svd_cpu.inl | 257 + src/cupynumeric/matrix/svd_omp.cc | 31 + src/cupynumeric/matrix/svd_template.inl | 137 + src/{cunumeric => cupynumeric}/matrix/syrk.cc | 14 +- src/{cunumeric => cupynumeric}/matrix/syrk.cu | 16 +- .../write.h => cupynumeric/matrix/syrk.h} | 22 +- .../matrix/syrk_omp.cc | 12 +- .../matrix/syrk_template.inl | 24 +- src/{cunumeric => cupynumeric}/matrix/tile.cc | 12 +- src/{cunumeric => cupynumeric}/matrix/tile.cu | 20 +- src/{cunumeric => cupynumeric}/matrix/tile.h | 26 +- .../matrix/tile_omp.cc | 12 +- .../matrix/tile_template.inl | 21 +- .../matrix/transpose.cc | 25 +- .../matrix/transpose.cu | 36 +- .../matrix/transpose.h | 26 +- .../matrix/transpose_omp.cc | 21 +- .../matrix/transpose_template.inl | 20 +- .../matrix/trilu.cc | 29 +- .../matrix/trilu.cu | 32 +- src/{cunumeric => cupynumeric}/matrix/trilu.h | 26 +- .../matrix/trilu_omp.cc | 24 +- .../matrix/trilu_template.inl | 41 +- src/{cunumeric => cupynumeric}/matrix/trsm.cc | 14 +- src/{cunumeric => cupynumeric}/matrix/trsm.cu | 16 +- src/{cunumeric => cupynumeric}/matrix/trsm.h | 22 +- .../matrix/trsm_omp.cc | 12 +- .../matrix/trsm_template.inl | 24 +- src/{cunumeric => cupynumeric}/matrix/util.cc | 57 +- src/{cunumeric => cupynumeric}/matrix/util.h | 10 +- src/cupynumeric/ndarray.cc | 1903 ++++ src/cupynumeric/ndarray.h | 175 + src/cupynumeric/ndarray.inl | 33 + .../nullary/arange.cc | 15 +- .../nullary/arange.cu | 20 +- .../nullary/arange.h | 29 +- .../nullary/arange_omp.cc | 15 +- .../nullary/arange_template.inl | 27 +- src/{cunumeric => cupynumeric}/nullary/eye.cc | 16 +- src/{cunumeric => cupynumeric}/nullary/eye.cu | 20 +- src/{cunumeric => cupynumeric}/nullary/eye.h | 24 +- .../nullary/eye_omp.cc | 16 +- .../nullary/eye_template.inl | 22 +- .../nullary/fill.cc | 16 +- .../nullary/fill.cu | 24 +- src/{cunumeric => cupynumeric}/nullary/fill.h | 27 +- .../nullary/fill_omp.cc | 16 +- .../nullary/fill_template.inl | 41 +- .../nullary/window.cc | 20 +- .../nullary/window.cu | 24 +- .../nullary/window.h | 22 +- .../nullary/window_omp.cc | 20 +- .../nullary/window_template.inl | 27 +- .../nullary/window_util.h | 18 +- src/{cunumeric => cupynumeric}/omp_help.h | 6 +- src/cupynumeric/operators.cc | 614 ++ src/cupynumeric/operators.h | 200 + src/cupynumeric/operators.inl | 38 + src/{cunumeric => cupynumeric}/pitches.h | 21 +- .../random/bitgenerator.cc | 58 +- .../random/bitgenerator.cu | 43 +- .../random/bitgenerator.h | 34 +- .../random/bitgenerator_curand.inl | 294 +- .../random/bitgenerator_template.inl | 40 +- src/cupynumeric/random/bitgenerator_util.h | 98 + src/cupynumeric/random/curand_help.h | 64 + .../random/philox.h | 6 +- src/{cunumeric => cupynumeric}/random/rand.cc | 16 +- src/{cunumeric => cupynumeric}/random/rand.cu | 24 +- src/{cunumeric => cupynumeric}/random/rand.h | 28 +- .../random/rand_omp.cc | 16 +- .../random/rand_template.inl | 38 +- .../random/rand_util.h | 32 +- .../random/randutil/generator.cuh | 43 +- .../random/randutil/generator.h | 92 +- .../random/randutil/generator_beta.inl | 2 +- .../random/randutil/generator_binomial.inl | 2 +- .../random/randutil/generator_cauchy.inl | 18 +- .../random/randutil/generator_chisquare.inl | 2 +- .../random/randutil/generator_create.inl | 51 + .../random/randutil/generator_device.cu | 2 +- .../randutil/generator_device_advanced.cu | 2 +- .../generator_device_straightforward.cu | 2 +- .../random/randutil/generator_exponential.inl | 8 +- .../random/randutil/generator_f.inl | 2 +- .../random/randutil/generator_gamma.inl | 2 +- .../random/randutil/generator_geometric.inl | 2 +- .../random/randutil/generator_gumbel.inl | 18 +- .../random/randutil/generator_host.cc | 83 +- .../randutil/generator_host_advanced.cc | 168 +- .../generator_host_straightforward.cc | 126 +- .../randutil/generator_hypergeometric.inl | 2 +- .../random/randutil/generator_integers.inl | 56 + .../random/randutil/generator_laplace.inl | 26 +- .../random/randutil/generator_logistic.inl | 18 +- .../random/randutil/generator_lognormal.inl | 8 +- .../random/randutil/generator_logseries.inl | 2 +- .../randutil/generator_negative_binomial.inl | 6 +- .../random/randutil/generator_normal.inl | 8 +- .../random/randutil/generator_pareto.inl | 12 +- .../random/randutil/generator_poisson.inl | 6 +- .../random/randutil/generator_power.inl | 8 +- .../random/randutil/generator_raw.inl | 6 +- .../random/randutil/generator_rayleigh.inl | 8 +- .../random/randutil/generator_standard_t.inl | 2 +- .../random/randutil/generator_triangular.inl | 36 +- .../random/randutil/generator_uniform.inl | 10 +- .../random/randutil/generator_vonmises.inl | 2 +- .../random/randutil/generator_wald.inl | 22 +- .../random/randutil/generator_weibull.inl | 20 +- .../random/randutil/generator_zipf.inl | 2 +- .../random/randutil/random_distributions.h | 160 +- src/cupynumeric/random/randutil/randomizer.h | 143 + src/cupynumeric/random/randutil/randutil.h | 232 + .../random/randutil/randutil_curand.h | 9 +- .../random/randutil/randutil_impl.h | 2 +- src/cupynumeric/random/rnd_aliases.h | 75 + src/cupynumeric/random/rnd_types.h | 63 + src/cupynumeric/runtime.cc | 178 + src/cupynumeric/runtime.h | 73 + .../scan/scan_global.cc | 14 +- .../scan/scan_global.cu | 24 +- .../scan/scan_global.h | 28 +- .../scan/scan_global_omp.cc | 14 +- .../scan/scan_global_template.inl | 19 +- .../scan/scan_local.cc | 24 +- .../scan/scan_local.cu | 36 +- .../scan/scan_local.h | 30 +- .../scan/scan_local_omp.cc | 24 +- .../scan/scan_local_template.inl | 24 +- .../scan/scan_local_util.h | 16 +- src/cupynumeric/scan/scan_util.h | 90 + .../search/argwhere.cc | 22 +- .../search/argwhere.cu | 32 +- .../search/argwhere.h | 26 +- .../search/argwhere_omp.cc | 34 +- .../search/argwhere_template.inl | 14 +- .../search/nonzero.cc | 25 +- .../search/nonzero.cu | 37 +- .../search/nonzero.cuh | 15 +- .../search/nonzero.h | 26 +- .../search/nonzero_omp.cc | 39 +- .../search/nonzero_template.inl | 22 +- src/{cunumeric => cupynumeric}/set/unique.cc | 20 +- src/{cunumeric => cupynumeric}/set/unique.cu | 49 +- src/{cunumeric => cupynumeric}/set/unique.h | 22 +- .../set/unique_omp.cc | 20 +- .../set/unique_reduce.cc | 12 +- .../set/unique_reduce.h} | 21 +- .../set/unique_reduce_omp.cc | 12 +- .../set/unique_reduce_template.inl | 24 +- .../set/unique_template.inl | 24 +- src/cupynumeric/slice.h | 27 + .../sort/cub_sort.cuh | 24 +- .../sort/cub_sort.h | 6 +- .../sort/cub_sort_bool.cu | 8 +- .../sort/cub_sort_double.cu | 8 +- .../sort/cub_sort_float.cu | 8 +- .../sort/cub_sort_half.cu | 8 +- .../sort/cub_sort_int16.cu | 8 +- .../sort/cub_sort_int32.cu | 8 +- .../sort/cub_sort_int64.cu | 8 +- .../sort/cub_sort_int8.cu | 8 +- .../sort/cub_sort_uint16.cu | 8 +- .../sort/cub_sort_uint32.cu | 8 +- .../sort/cub_sort_uint64.cu | 8 +- .../sort/cub_sort_uint8.cu | 8 +- .../sort/searchsorted.cc | 28 +- .../sort/searchsorted.cu | 40 +- .../sort/searchsorted.h | 28 +- .../sort/searchsorted_omp.cc | 28 +- .../sort/searchsorted_template.inl | 30 +- src/{cunumeric => cupynumeric}/sort/sort.cc | 21 +- src/{cunumeric => cupynumeric}/sort/sort.cu | 324 +- src/{cunumeric => cupynumeric}/sort/sort.h | 30 +- .../sort/sort_cpu.inl | 261 +- .../sort/sort_omp.cc | 21 +- .../sort/sort_template.inl | 42 +- .../sort/thrust_sort.cuh | 18 +- .../sort/thrust_sort.h | 6 +- .../sort/thrust_sort_bool.cu | 8 +- .../sort/thrust_sort_complex128.cu | 8 +- .../sort/thrust_sort_complex64.cu | 8 +- .../sort/thrust_sort_double.cu | 8 +- .../sort/thrust_sort_float.cu | 8 +- .../sort/thrust_sort_half.cu | 8 +- .../sort/thrust_sort_int16.cu | 8 +- .../sort/thrust_sort_int32.cu | 8 +- .../sort/thrust_sort_int64.cu | 8 +- .../sort/thrust_sort_int8.cu | 8 +- .../sort/thrust_sort_uint16.cu | 8 +- .../sort/thrust_sort_uint32.cu | 8 +- .../sort/thrust_sort_uint64.cu | 8 +- .../sort/thrust_sort_uint8.cu | 8 +- .../stat/bincount.cc | 18 +- .../stat/bincount.cu | 48 +- src/cupynumeric/stat/bincount.h | 44 + .../stat/bincount_omp.cc | 34 +- .../stat/bincount_template.inl | 32 +- .../stat/histogram.cc | 18 +- .../stat/histogram.cu | 22 +- .../stat/histogram.cuh | 12 +- .../stat/histogram.h | 30 +- .../stat/histogram_cpu.h | 17 +- .../stat/histogram_gen.h | 6 +- .../stat/histogram_impl.h | 35 +- .../stat/histogram_omp.cc | 18 +- .../stat/histogram_template.inl | 18 +- .../ternary/where.cc | 17 +- .../ternary/where.cu | 26 +- .../ternary/where.h | 30 +- .../ternary/where_omp.cc | 17 +- .../ternary/where_template.inl | 22 +- .../transform/flip.cc | 17 +- .../transform/flip.cu | 30 +- .../transform/flip.h | 26 +- .../transform/flip_omp.cc | 17 +- .../transform/flip_template.inl | 22 +- src/cupynumeric/typedefs.h | 28 + .../unary/convert.cc | 20 +- .../unary/convert.cu | 28 +- .../unary/convert.h | 28 +- .../unary/convert_omp.cc | 20 +- .../unary/convert_template.inl | 25 +- .../unary/convert_util.h | 77 +- src/{cunumeric => cupynumeric}/unary/isnan.h | 8 +- .../unary/scalar_unary_red.cc | 12 +- .../unary/scalar_unary_red.cu | 16 +- .../unary/scalar_unary_red.h | 32 +- .../unary/scalar_unary_red_omp.cc | 14 +- .../unary/scalar_unary_red_template.inl | 115 +- .../unary/unary_op.cc | 24 +- .../unary/unary_op.cu | 44 +- .../unary/unary_op.h | 56 +- .../unary/unary_op_omp.cc | 24 +- .../unary/unary_op_template.inl | 55 +- .../unary/unary_op_util.h | 436 +- .../unary/unary_red.cc | 18 +- .../unary/unary_red.cu | 74 +- .../unary/unary_red.h | 30 +- .../unary/unary_red_omp.cc | 29 +- .../unary/unary_red_template.inl | 42 +- .../unary/unary_red_util.h | 102 +- src/cupynumeric/utilities/repartition.cc | 71 + src/cupynumeric/utilities/repartition.cu | 1357 +++ src/cupynumeric/utilities/repartition.h | 95 + .../utilities/thrust_allocator.h | 6 +- .../utilities/thrust_util.h | 2 +- src/env_defaults.h | 8 +- test.py | 34 +- tests/cpp/.gitignore | 1 + tests/cpp/CMakeLists.txt | 75 + tests/cpp/cmake/thirdparty/get_nccl.cmake | 34 + tests/cpp/integration/common_utils.cc | 171 + tests/cpp/integration/common_utils.h | 196 + tests/cpp/integration/test_amax.cc | 339 + tests/cpp/integration/test_amin.cc | 339 + tests/cpp/integration/test_arange.cc | 91 + tests/cpp/integration/test_argsort.cc | 477 + tests/cpp/integration/test_argwhere.cc | 298 + tests/cpp/integration/test_bincount.cc | 106 + tests/cpp/integration/test_convolve.cc | 135 + tests/cpp/integration/test_diagonal.cc | 311 + tests/cpp/integration/test_dot.cc | 73 + tests/cpp/integration/test_eye.cc | 296 + tests/cpp/integration/test_fill.cc | 91 + tests/cpp/integration/test_flip.cc | 666 ++ tests/cpp/integration/test_logical.cc | 450 + tests/cpp/integration/test_moveaxis.cc | 150 + tests/cpp/integration/test_msort.cc | 398 + tests/cpp/integration/test_nonzero.cc | 273 + tests/cpp/integration/test_put.cc | 292 + tests/cpp/integration/test_repartition.cc | 459 + tests/cpp/integration/test_repeat.cc | 352 + tests/cpp/integration/test_reshape.cc | 232 + tests/cpp/integration/test_sort.cc | 632 ++ tests/cpp/integration/test_sort_complex.cc | 442 + tests/cpp/integration/test_squeeze.cc | 212 + tests/cpp/integration/test_swapaxes.cc | 103 + tests/cpp/integration/test_transpose.cc | 139 + tests/cpp/integration/test_trilu.cc | 123 + tests/cpp/integration/test_unique.cc | 96 + tests/cpp/integration/test_where.cc | 273 + tests/cpp/integration/test_window.cc | 196 + tests/cpp/integration/test_zeros.cc | 108 + tests/cpp/integration/util.inl | 247 + tests/cpp/main.cc | 43 + tests/cpp/run.py | 169 + tests/cpp/run.sh | 13 + tests/integration/test_0d_store.py | 4 +- tests/integration/test_advanced_indexing.py | 48 +- tests/integration/test_allclose.py | 12 +- tests/integration/test_amax_amin.py | 12 +- tests/integration/test_angle.py | 104 + tests/integration/test_append.py | 4 +- tests/integration/test_arg_reduce.py | 9 +- tests/integration/test_argsort.py | 20 +- tests/integration/test_array.py | 8 +- tests/integration/test_array_creation.py | 54 +- tests/integration/test_array_dunders.py | 4 +- tests/integration/test_array_equal.py | 8 +- tests/integration/test_array_fallback.py | 8 +- tests/integration/test_array_split.py | 4 +- tests/integration/test_astype.py | 16 +- tests/integration/test_atleast_nd.py | 12 +- tests/integration/test_average.py | 103 + tests/integration/test_binary_op_broadcast.py | 4 +- tests/integration/test_binary_op_complex.py | 4 +- tests/integration/test_binary_op_typing.py | 38 +- tests/integration/test_binary_ufunc.py | 498 +- tests/integration/test_bincount.py | 4 +- tests/integration/test_bits.py | 12 +- tests/integration/test_block.py | 26 +- tests/integration/test_broadcast.py | 20 +- tests/integration/test_cholesky.py | 6 +- tests/integration/test_clip.py | 12 +- tests/integration/test_complex_ops.py | 8 +- tests/integration/test_compress.py | 14 +- tests/integration/test_concatenate_stack.py | 10 +- tests/integration/test_contains.py | 4 +- tests/integration/test_convolve.py | 28 +- tests/integration/test_copy.py | 4 +- tests/integration/test_corner_quantiles.py | 379 + tests/integration/test_data_interface.py | 6 +- tests/integration/test_diag_indices.py | 4 +- tests/integration/test_diff.py | 66 + tests/integration/test_digitize.py | 166 + tests/integration/test_dot.py | 8 +- tests/integration/test_einsum.py | 24 +- tests/integration/test_einsum_path.py | 8 +- tests/integration/test_exp.py | 10 +- tests/integration/test_expand_dims.py | 92 + tests/integration/test_extract.py | 12 +- tests/integration/test_eye.py | 14 +- tests/integration/test_fallback.py | 15 +- tests/integration/test_fft_c2c.py | 37 +- tests/integration/test_fft_c2r.py | 36 +- tests/integration/test_fft_hermitian.py | 4 +- tests/integration/test_fft_r2c.py | 4 +- tests/integration/test_fftshift.py | 70 + tests/integration/test_file.py | 48 + tests/integration/test_fill.py | 16 +- tests/integration/test_fill_diagonal.py | 7 +- tests/integration/test_flags.py | 4 +- tests/integration/test_flatten.py | 6 +- tests/integration/test_flip.py | 11 +- tests/integration/test_floating.py | 9 +- tests/integration/test_get_item.py | 4 +- tests/integration/test_gradient.py | 164 + tests/integration/test_histogram.py | 171 +- tests/integration/test_identity.py | 4 +- tests/integration/test_index_routines.py | 68 +- tests/integration/test_indices.py | 6 +- tests/integration/test_ingest.py | 101 - .../test_inlinemap-keeps-region-alive.py | 4 +- tests/integration/test_inner.py | 6 +- tests/integration/test_input_output.py | 4 +- tests/integration/test_intra_array_copy.py | 6 +- tests/integration/test_item.py | 10 +- tests/integration/test_itemset.py | 17 +- tests/integration/test_jacobi.py | 4 +- tests/integration/test_length.py | 4 +- tests/integration/test_linspace.py | 16 +- tests/integration/test_logic.py | 13 +- tests/integration/test_logical.py | 8 +- tests/integration/test_logical_reduction.py | 61 + tests/integration/test_lstm_backward_test.py | 4 +- tests/integration/test_lstm_simple_forward.py | 4 +- tests/integration/test_map_reduce.py | 4 +- tests/integration/test_mask.py | 4 +- tests/integration/test_mask_indices.py | 10 +- tests/integration/test_matmul.py | 95 +- tests/integration/test_matrix_power.py | 8 +- tests/integration/test_mean.py | 6 +- tests/integration/test_median.py | 223 + tests/integration/test_meshgrid.py | 71 + tests/integration/test_min_on_gpu.py | 4 +- tests/integration/test_moveaxis.py | 13 +- tests/integration/test_msort.py | 10 +- tests/integration/test_multi_dot.py | 8 +- tests/integration/test_nan_reduction.py | 65 +- tests/integration/test_nanarg_reduction.py | 36 +- tests/integration/test_nanmean.py | 6 +- tests/integration/test_nanpercentiles.py | 102 + tests/integration/test_nanquantiles.py | 102 + tests/integration/test_nd_convolve.py | 85 + tests/integration/test_ndim.py | 4 +- tests/integration/test_negaxes_quantiles.py | 112 + tests/integration/test_nonzero.py | 53 +- tests/integration/test_norm.py | 12 +- tests/integration/test_ones.py | 10 +- tests/integration/test_outer.py | 4 +- tests/integration/test_overlap.py | 14 +- tests/integration/test_overwrite_slice.py | 4 +- tests/integration/test_partition.py | 8 +- tests/integration/test_percentiles.py | 8 +- tests/integration/test_prod.py | 41 +- tests/integration/test_put.py | 6 +- tests/integration/test_put_along_axis.py | 6 +- tests/integration/test_putmask.py | 8 +- tests/integration/test_qr.py | 110 + tests/integration/test_quantiles.py | 306 +- tests/integration/test_randint.py | 4 +- tests/integration/test_random.py | 109 + tests/integration/test_random_advanced.py | 27 +- tests/integration/test_random_beta.py | 22 +- tests/integration/test_random_bitgenerator.py | 26 +- tests/integration/test_random_creation.py | 104 +- tests/integration/test_random_gamma.py | 22 +- .../test_random_straightforward.py | 33 +- tests/integration/test_reduction.py | 33 +- tests/integration/test_repeat.py | 11 +- tests/integration/test_reshape.py | 12 +- tests/integration/test_roll.py | 103 + tests/integration/test_rot90.py | 76 + tests/integration/test_round.py | 131 + tests/integration/test_scan.py | 12 +- tests/integration/test_searchsorted.py | 14 +- tests/integration/test_set_item.py | 4 +- tests/integration/test_setflags.py | 8 +- tests/integration/test_shape.py | 4 +- tests/integration/test_singleton_access.py | 30 +- tests/integration/test_slicing.py | 4 +- tests/integration/test_solve.py | 12 +- tests/integration/test_sort.py | 20 +- tests/integration/test_sort_complex.py | 8 +- tests/integration/test_split.py | 24 +- tests/integration/test_squeeze.py | 9 +- tests/integration/test_stack.py | 70 + tests/integration/test_stats.py | 255 +- tests/integration/test_svd.py | 132 + tests/integration/test_swapaxes.py | 4 +- tests/integration/test_take.py | 10 +- tests/integration/test_take_along_axis.py | 6 +- tests/integration/test_tensordot.py | 8 +- tests/integration/test_tile.py | 4 +- tests/integration/test_trace.py | 10 +- tests/integration/test_transpose.py | 32 +- tests/integration/test_tri.py | 20 +- tests/integration/test_trilu.py | 8 +- tests/integration/test_trilu_indices.py | 20 +- .../test_unary_functions_2d_complex.py | 4 +- tests/integration/test_unary_ufunc.py | 10 +- tests/integration/test_unique.py | 6 +- tests/integration/test_unravel_index.py | 166 + tests/integration/test_update.py | 4 +- tests/integration/test_vdot.py | 8 +- tests/integration/test_view.py | 4 +- tests/integration/test_where.py | 12 +- tests/integration/test_window.py | 4 +- tests/integration/utils/__init__.py | 2 +- tests/integration/utils/comparisons.py | 12 +- tests/integration/utils/contractions.py | 4 +- tests/integration/utils/generators.py | 2 +- tests/integration/utils/random.py | 8 +- tests/integration/utils/utils.py | 20 +- tests/todo/2d_reduction_complex.py | 4 +- tests/todo/assign_slice.py | 4 +- tests/todo/complex_test.py | 4 +- tests/todo/dot.py | 4 +- tests/todo/indirect.py | 4 +- tests/todo/kmeans_test.py | 4 +- tests/todo/lstm_batch.py | 4 +- tests/todo/lstm_simple_backward.py | 4 +- tests/unit/__init__.py | 16 + tests/unit/cupynumeric/__init__.py | 16 + tests/unit/cupynumeric/_array/__init__.py | 16 + tests/unit/cupynumeric/_array/test_util.py | 238 + tests/unit/cupynumeric/_sphinxext/__init__.py | 17 + .../_sphinxext/test__comparison_util.py | 6 +- tests/unit/cupynumeric/_utils/__init__.py | 16 + tests/unit/cupynumeric/_utils/test_array.py | 101 + .../_utils}/test_coverage.py | 76 +- .../_utils/test_linalg.py} | 129 +- tests/unit/cupynumeric/random/__init__.py | 16 + .../random/test_bitgenerator.py | 6 +- .../{cunumeric => cupynumeric}/test_config.py | 104 +- tests/unit/cupynumeric/test_nptest.py | 39 + .../{cunumeric => cupynumeric}/test_patch.py | 6 +- .../test_settings.py | 12 +- tests/unit/util.py | 34 + versioneer.py | 2 +- 917 files changed, 50029 insertions(+), 24643 deletions(-) create mode 100644 .github/CODEOWNERS delete mode 100644 .github/actions/download-artifacts/action.yml delete mode 100644 .github/workflows/ci-gh.yml delete mode 100644 .github/workflows/gh-build-and-test.yml delete mode 100644 .github/workflows/gh-build.yml delete mode 100644 .github/workflows/gh-test.yml delete mode 100644 .github/workflows/require-labels.yml delete mode 100644 LICENSES_bundled.txt create mode 100644 cmake/thirdparty/get_legate.cmake delete mode 100644 cmake/thirdparty/get_legate_core.cmake delete mode 100644 continuous_integration/dot-gitconfig delete mode 100755 continuous_integration/scripts/build-cunumeric-all delete mode 100755 continuous_integration/scripts/build-cunumeric-conda delete mode 100755 continuous_integration/scripts/build-cunumeric-cpp delete mode 100755 continuous_integration/scripts/build-cunumeric-wheel delete mode 100755 continuous_integration/scripts/entrypoint delete mode 100755 continuous_integration/scripts/test-cunumeric delete mode 100644 cunumeric/_ufunc/comparison.py delete mode 100644 cunumeric/config.py delete mode 100644 cunumeric/install_info.py.in delete mode 100644 cunumeric/linalg/cholesky.py delete mode 100644 cunumeric/linalg/solve.py delete mode 100644 cunumeric/module.py delete mode 100644 cunumeric/random/legacy.py delete mode 100644 cunumeric_cpp.cmake rename {cunumeric => cupynumeric}/__init__.py (69%) create mode 100644 cupynumeric/_array/__init__.py rename {cunumeric => cupynumeric/_array}/array.py (72%) create mode 100644 cupynumeric/_array/flags.py create mode 100644 cupynumeric/_array/thunk.py create mode 100644 cupynumeric/_array/util.py create mode 100644 cupynumeric/_module/__init__.py rename {cunumeric => cupynumeric/_module}/_unary_red_utils.py (95%) create mode 100644 cupynumeric/_module/array_basic.py create mode 100644 cupynumeric/_module/array_dimension.py create mode 100644 cupynumeric/_module/array_joining.py create mode 100644 cupynumeric/_module/array_rearrange.py create mode 100644 cupynumeric/_module/array_shape.py create mode 100644 cupynumeric/_module/array_splitting.py create mode 100644 cupynumeric/_module/array_tiling.py create mode 100644 cupynumeric/_module/array_transpose.py rename cunumeric/bits.py => cupynumeric/_module/binary_bit_packing.py (92%) create mode 100644 cupynumeric/_module/creation_data.py create mode 100644 cupynumeric/_module/creation_matrices.py create mode 100644 cupynumeric/_module/creation_ranges.py create mode 100644 cupynumeric/_module/creation_shape.py create mode 100644 cupynumeric/_module/indexing.py create mode 100644 cupynumeric/_module/io_numpy.py create mode 100644 cupynumeric/_module/linalg_mvp.py create mode 100644 cupynumeric/_module/logic_array_contents.py rename cunumeric/logic.py => cupynumeric/_module/logic_array_type.py (51%) create mode 100644 cupynumeric/_module/logic_comparison.py create mode 100644 cupynumeric/_module/logic_truth.py create mode 100644 cupynumeric/_module/math_complex.py create mode 100644 cupynumeric/_module/math_extrema.py create mode 100644 cupynumeric/_module/math_misc.py create mode 100644 cupynumeric/_module/math_rounding.py create mode 100644 cupynumeric/_module/math_sum_prod_diff.py create mode 100644 cupynumeric/_module/sets_making.py create mode 100644 cupynumeric/_module/ssc_counting.py create mode 100644 cupynumeric/_module/ssc_searching.py create mode 100644 cupynumeric/_module/ssc_sorting.py create mode 100644 cupynumeric/_module/stats_avgs_vars.py create mode 100644 cupynumeric/_module/stats_correlating.py create mode 100644 cupynumeric/_module/stats_histograms.py create mode 100644 cupynumeric/_module/stats_order.py rename {cunumeric => cupynumeric/_module}/window.py (96%) rename {cunumeric => cupynumeric}/_sphinxext/__init__.py (95%) rename {cunumeric => cupynumeric}/_sphinxext/_comparison_config.py (98%) rename {cunumeric => cupynumeric}/_sphinxext/_comparison_util.py (87%) rename cunumeric/_sphinxext/_cunumeric_directive.py => cupynumeric/_sphinxext/_cupynumeric_directive.py (86%) rename {cunumeric => cupynumeric}/_sphinxext/_templates.py (95%) rename {cunumeric => cupynumeric}/_sphinxext/_templates/comparison_table.rst (69%) rename {cunumeric => cupynumeric}/_sphinxext/comparison_table.py (92%) rename {cunumeric => cupynumeric}/_sphinxext/implemented_index.py (86%) rename {cunumeric => cupynumeric}/_sphinxext/missing_refs.py (69%) rename {cunumeric => cupynumeric}/_sphinxext/ufunc_formatter.py (94%) create mode 100644 cupynumeric/_thunk/__init__.py rename cunumeric/sort.py => cupynumeric/_thunk/_sort.py (72%) rename {cunumeric => cupynumeric/_thunk}/deferred.py (75%) rename {cunumeric => cupynumeric/_thunk}/eager.py (84%) rename {cunumeric => cupynumeric/_thunk}/thunk.py (81%) rename {cunumeric => cupynumeric}/_ufunc/__init__.py (95%) rename {cunumeric => cupynumeric}/_ufunc/bit_twiddling.py (94%) create mode 100644 cupynumeric/_ufunc/comparison.py rename {cunumeric => cupynumeric}/_ufunc/floating.py (97%) rename {cunumeric => cupynumeric}/_ufunc/math.py (98%) rename {cunumeric => cupynumeric}/_ufunc/trigonometric.py (96%) rename {cunumeric => cupynumeric}/_ufunc/ufunc.py (77%) create mode 100644 cupynumeric/_utils/__init__.py create mode 100644 cupynumeric/_utils/array.py rename {cunumeric => cupynumeric/_utils}/coverage.py (87%) rename cunumeric/utils.py => cupynumeric/_utils/linalg.py (51%) create mode 100644 cupynumeric/_utils/stack.py create mode 100644 cupynumeric/_utils/structure.py rename {cunumeric => cupynumeric}/_version.py (99%) create mode 100644 cupynumeric/config.py rename {cunumeric => cupynumeric}/fft/__init__.py (80%) rename {cunumeric => cupynumeric}/fft/fft.py (91%) create mode 100644 cupynumeric/install_info.py.in rename {cunumeric => cupynumeric}/linalg/__init__.py (77%) create mode 100644 cupynumeric/linalg/_cholesky.py rename cunumeric/linalg/exception.py => cupynumeric/linalg/_exception.py (94%) create mode 100644 cupynumeric/linalg/_qr.py create mode 100644 cupynumeric/linalg/_solve.py create mode 100644 cupynumeric/linalg/_svd.py rename {cunumeric => cupynumeric}/linalg/linalg.py (78%) rename {cunumeric => cupynumeric}/ma/__init__.py (80%) rename {cunumeric => cupynumeric}/ma/_masked_array.py (87%) rename {cunumeric => cupynumeric}/patch.py (80%) rename {cunumeric => cupynumeric}/py.typed (100%) rename {cunumeric => cupynumeric}/random/__init__.py (67%) rename cunumeric/random/bitgenerator.py => cupynumeric/random/_bitgenerator.py (89%) rename cunumeric/random/generator.py => cupynumeric/random/_generator.py (83%) rename cunumeric/random/random.py => cupynumeric/random/_random.py (90%) rename {cunumeric => cupynumeric}/runtime.py (60%) rename {cunumeric => cupynumeric}/settings.py (75%) rename {cunumeric => cupynumeric}/types.py (83%) create mode 100644 cupynumeric_cpp.cmake rename cunumeric_python.cmake => cupynumeric_python.cmake (68%) delete mode 100644 docs/cunumeric/source/api/settings.rst delete mode 120000 docs/cunumeric/source/developer/CONTRIBUTING.md delete mode 100644 docs/cunumeric/source/developer/building.rst delete mode 100644 docs/cunumeric/source/index.rst delete mode 100644 docs/cunumeric/source/user/configuration.rst delete mode 100644 docs/cunumeric/source/user/installation.rst delete mode 100644 docs/cunumeric/source/user/notebooks.rst delete mode 100644 docs/cunumeric/source/user/usage.rst delete mode 100644 docs/cunumeric/source/versions.rst rename docs/{cunumeric => cupynumeric}/Makefile (87%) rename docs/{cunumeric => cupynumeric}/make.bat (100%) rename docs/{cunumeric => cupynumeric}/source/_images/developer-build.png (100%) rename docs/{cunumeric => cupynumeric}/source/_implemented.rst (73%) rename docs/{cunumeric => cupynumeric}/source/_static/.keep (100%) create mode 100644 docs/cupynumeric/source/_templates/layout.html rename docs/{cunumeric => cupynumeric}/source/api/_bitgenerator.rst (65%) rename docs/{cunumeric => cupynumeric}/source/api/_generator.rst (65%) rename docs/{cunumeric/source/comparison => cupynumeric/source/api}/_grouped.rst (100%) rename docs/{cunumeric => cupynumeric}/source/api/_ndarray.rst (95%) rename docs/{cunumeric => cupynumeric}/source/api/binary.rst (90%) rename docs/{cunumeric => cupynumeric}/source/api/broadcast.rst (52%) rename docs/{cunumeric => cupynumeric}/source/api/classes.rst (100%) create mode 100644 docs/cupynumeric/source/api/comparison.rst rename docs/{cunumeric => cupynumeric}/source/api/creation.rst (93%) create mode 100644 docs/cupynumeric/source/api/datatype.rst rename docs/{cunumeric => cupynumeric}/source/api/fft.rst (67%) rename docs/{cunumeric => cupynumeric}/source/api/index.rst (67%) rename docs/{cunumeric => cupynumeric}/source/api/indexing.rst (92%) create mode 100644 docs/cupynumeric/source/api/io.rst rename docs/{cunumeric => cupynumeric}/source/api/linalg.rst (82%) rename docs/{cunumeric => cupynumeric}/source/api/logic.rst (95%) rename docs/{cunumeric => cupynumeric}/source/api/manipulation.rst (91%) rename docs/{cunumeric => cupynumeric}/source/api/math.rst (96%) rename docs/{cunumeric => cupynumeric}/source/api/ndarray.rst (97%) rename docs/{cunumeric => cupynumeric}/source/api/random.rst (93%) rename docs/{cunumeric => cupynumeric}/source/api/routines.rst (91%) rename docs/{cunumeric => cupynumeric}/source/api/set.rst (79%) create mode 100644 docs/cupynumeric/source/api/settings.rst rename docs/{cunumeric => cupynumeric}/source/api/sorting.rst (93%) rename docs/{cunumeric => cupynumeric}/source/api/statistics.rst (64%) rename docs/{cunumeric => cupynumeric}/source/api/window.rst (85%) rename docs/{cunumeric => cupynumeric}/source/conf.py (59%) create mode 100644 docs/cupynumeric/source/developer/CONTRIBUTING.md create mode 100644 docs/cupynumeric/source/developer/building.rst rename docs/{cunumeric => cupynumeric}/source/developer/index.rst (100%) rename docs/{cunumeric => cupynumeric}/source/developer/testing.rst (97%) rename docs/{cunumeric/source/user/notebooks => cupynumeric/source/examples}/black_scholes.ipynb (99%) rename docs/{cunumeric/source/user/notebooks => cupynumeric/source/examples}/cholesky.ipynb (86%) create mode 100644 docs/cupynumeric/source/examples/compact_finite_difference.ipynb create mode 100644 docs/cupynumeric/source/examples/edge_detection.ipynb create mode 100644 docs/cupynumeric/source/examples/image.png create mode 100644 docs/cupynumeric/source/examples/index.rst create mode 100644 docs/cupynumeric/source/examples/kmeans.ipynb create mode 100644 docs/cupynumeric/source/examples/newton_raphson_2d.ipynb rename docs/{cunumeric/source/user/notebooks => cupynumeric/source/examples}/stencil.ipynb (99%) create mode 100644 docs/cupynumeric/source/examples/torchswe.ipynb create mode 100644 docs/cupynumeric/source/faqs.rst create mode 100644 docs/cupynumeric/source/index.rst create mode 100644 docs/cupynumeric/source/installation.rst create mode 100644 docs/cupynumeric/source/oss-licenses.rst create mode 100644 docs/cupynumeric/source/user/advanced.rst create mode 100644 docs/cupynumeric/source/user/differences.rst create mode 100644 docs/cupynumeric/source/user/howtos/benchmarking.rst create mode 100644 docs/cupynumeric/source/user/howtos/index.rst rename docs/{cunumeric/source/comparison/index.rst => cupynumeric/source/user/howtos/measuring.rst} (50%) create mode 100644 docs/cupynumeric/source/user/howtos/patching.rst rename docs/{cunumeric => cupynumeric}/source/user/index.rst (62%) rename docs/{cunumeric => cupynumeric}/source/user/practices.rst (71%) create mode 100644 docs/cupynumeric/source/user/usage.rst create mode 100644 docs/cupynumeric/switcher.json create mode 100644 examples/cpp/stencil/CMakeLists.txt create mode 100755 examples/cpp/stencil/build.sh create mode 100644 examples/cpp/stencil/stencil.cc delete mode 100644 examples/ingest.py delete mode 100644 examples/kmeans_sort.py delete mode 100644 examples/lstm_full.py create mode 100644 examples/qr.py create mode 100644 examples/svd.py delete mode 100644 examples/wgrad.py delete mode 100755 scripts/build-install.sh delete mode 100755 scripts/build-no-install.sh delete mode 100644 scripts/build-separately-no-install.sh delete mode 100644 scripts/build-with-legate-no-install.sh delete mode 100755 scripts/build-with-legate-separately-no-install.sh create mode 100755 scripts/hooks/legate_defined.sh delete mode 100755 scripts/util/build-caching.sh delete mode 100755 scripts/util/compiler-flags.sh delete mode 100755 scripts/util/read-legate-core-root.sh delete mode 100755 scripts/util/uninstall-global-legion-legate-core-and-cunumeric.sh delete mode 100644 src/cunumeric/arg_redop_register.h delete mode 100644 src/cunumeric/binary/binary_op.h delete mode 100644 src/cunumeric/binary/binary_red.h delete mode 100644 src/cunumeric/cunumeric.cc delete mode 100644 src/cunumeric/cunumeric_c.h delete mode 100644 src/cunumeric/index/zip.h delete mode 100644 src/cunumeric/mapper.cc delete mode 100644 src/cunumeric/random/bitgenerator_util.h delete mode 100644 src/cunumeric/random/curand_help.h delete mode 100644 src/cunumeric/random/randutil/generator_create.inl delete mode 100644 src/cunumeric/random/randutil/generator_integers.inl delete mode 100644 src/cunumeric/random/randutil/randutil.h delete mode 100644 src/cunumeric/scan/scan_global_util.h delete mode 100644 src/cunumeric/stat/bincount.h create mode 100644 src/cupynumeric.h rename src/{cunumeric => cupynumeric}/arg.h (95%) rename src/{cunumeric => cupynumeric}/arg.inl (98%) rename src/{cunumeric => cupynumeric}/arg_redop_register.cc (71%) rename src/{cunumeric => cupynumeric}/arg_redop_register.cu (64%) create mode 100644 src/cupynumeric/arg_redop_register.h rename src/{cunumeric => cupynumeric}/binary/binary_op.cc (81%) rename src/{cunumeric => cupynumeric}/binary/binary_op.cu (85%) create mode 100644 src/cupynumeric/binary/binary_op.h rename src/{cunumeric => cupynumeric}/binary/binary_op_omp.cc (80%) rename src/{cunumeric => cupynumeric}/binary/binary_op_template.inl (78%) create mode 100644 src/cupynumeric/binary/binary_op_util.cc rename src/{cunumeric => cupynumeric}/binary/binary_op_util.h (77%) rename src/{cunumeric => cupynumeric}/binary/binary_red.cc (83%) rename src/{cunumeric => cupynumeric}/binary/binary_red.cu (80%) create mode 100644 src/cupynumeric/binary/binary_red.h rename src/{cunumeric => cupynumeric}/binary/binary_red_omp.cc (75%) rename src/{cunumeric => cupynumeric}/binary/binary_red_template.inl (80%) rename src/{cunumeric => cupynumeric}/bits/bits_util.h (74%) rename src/{cunumeric => cupynumeric}/bits/packbits.cc (87%) rename src/{cunumeric => cupynumeric}/bits/packbits.cu (88%) rename src/{cunumeric => cupynumeric}/bits/packbits.h (85%) rename src/{cunumeric => cupynumeric}/bits/packbits_omp.cc (87%) rename src/{cunumeric => cupynumeric}/bits/packbits_template.inl (79%) rename src/{cunumeric => cupynumeric}/bits/unpackbits.cc (84%) rename src/{cunumeric => cupynumeric}/bits/unpackbits.cu (83%) rename src/{cunumeric => cupynumeric}/bits/unpackbits.h (75%) rename src/{cunumeric => cupynumeric}/bits/unpackbits_omp.cc (83%) rename src/{cunumeric => cupynumeric}/bits/unpackbits_template.inl (71%) rename src/{cunumeric => cupynumeric}/cephes/chbevl.cc (100%) rename src/{cunumeric => cupynumeric}/cephes/i0.cc (99%) rename src/{cunumeric => cupynumeric}/convolution/convolve.cc (85%) rename src/{cunumeric => cupynumeric}/convolution/convolve.cu (84%) rename src/{cunumeric => cupynumeric}/convolution/convolve.h (59%) rename src/{cunumeric => cupynumeric}/convolution/convolve_omp.cc (82%) rename src/{cunumeric => cupynumeric}/convolution/convolve_template.inl (69%) rename src/{cunumeric => cupynumeric}/cuda_help.h (68%) rename src/{cunumeric => cupynumeric}/cudalibs.cu (69%) rename src/{cunumeric => cupynumeric}/cudalibs.h (67%) create mode 100644 src/cupynumeric/cupynumeric.cc create mode 100644 src/cupynumeric/cupynumeric.cu create mode 100644 src/cupynumeric/cupynumeric_c.h rename src/{cunumeric/cunumeric.h => cupynumeric/cupynumeric_task.h} (72%) rename src/{cunumeric => cupynumeric}/device_scalar_reduction_buffer.h (74%) rename src/{cunumeric => cupynumeric}/divmod.h (97%) rename src/{cunumeric => cupynumeric}/execution_policy/indexing/parallel_loop.cuh (78%) rename src/{cunumeric => cupynumeric}/execution_policy/indexing/parallel_loop.h (80%) rename src/{cunumeric => cupynumeric}/execution_policy/indexing/parallel_loop_omp.h (73%) rename src/{cunumeric => cupynumeric}/execution_policy/reduction/scalar_reduction.cuh (88%) rename src/{cunumeric => cupynumeric}/execution_policy/reduction/scalar_reduction.h (86%) rename src/{cunumeric => cupynumeric}/execution_policy/reduction/scalar_reduction_omp.h (68%) rename src/{cunumeric => cupynumeric}/fft/fft.cu (81%) rename src/{cunumeric => cupynumeric}/fft/fft.h (56%) rename src/{cunumeric => cupynumeric}/fft/fft_template.inl (68%) rename src/{cunumeric => cupynumeric}/fft/fft_util.h (52%) rename src/{cunumeric => cupynumeric}/index/advanced_indexing.cc (75%) rename src/{cunumeric => cupynumeric}/index/advanced_indexing.cu (86%) rename src/{cunumeric => cupynumeric}/index/advanced_indexing.h (59%) rename src/{cunumeric => cupynumeric}/index/advanced_indexing_omp.cc (78%) rename src/{cunumeric => cupynumeric}/index/advanced_indexing_template.inl (81%) rename src/{cunumeric => cupynumeric}/index/choose.cc (78%) rename src/{cunumeric => cupynumeric}/index/choose.cu (80%) create mode 100644 src/cupynumeric/index/choose.h rename src/{cunumeric => cupynumeric}/index/choose_omp.cc (81%) rename src/{cunumeric => cupynumeric}/index/choose_template.inl (78%) rename src/{cunumeric => cupynumeric}/index/putmask.cc (76%) rename src/{cunumeric => cupynumeric}/index/putmask.cu (66%) create mode 100644 src/cupynumeric/index/putmask.h rename src/{cunumeric => cupynumeric}/index/putmask_omp.cc (66%) rename src/{cunumeric => cupynumeric}/index/putmask_template.inl (76%) rename src/{cunumeric => cupynumeric}/index/repeat.cc (82%) rename src/{cunumeric => cupynumeric}/index/repeat.cu (82%) rename src/{cunumeric => cupynumeric}/index/repeat.h (53%) rename src/{cunumeric => cupynumeric}/index/repeat_omp.cc (78%) rename src/{cunumeric => cupynumeric}/index/repeat_template.inl (65%) rename src/{cunumeric => cupynumeric}/index/select.cc (78%) rename src/{cunumeric => cupynumeric}/index/select.cu (77%) rename src/{cunumeric => cupynumeric}/index/select.h (54%) rename src/{cunumeric => cupynumeric}/index/select_omp.cc (78%) rename src/{cunumeric => cupynumeric}/index/select_template.inl (74%) rename src/{cunumeric => cupynumeric}/index/wrap.cc (83%) rename src/{cunumeric => cupynumeric}/index/wrap.cu (89%) rename src/{cunumeric => cupynumeric}/index/wrap.h (57%) rename src/{cunumeric => cupynumeric}/index/wrap_omp.cc (76%) rename src/{cunumeric => cupynumeric}/index/wrap_template.inl (74%) rename src/{cunumeric => cupynumeric}/index/zip.cc (88%) rename src/{cunumeric => cupynumeric}/index/zip.cu (88%) create mode 100644 src/cupynumeric/index/zip.h rename src/{cunumeric => cupynumeric}/index/zip_omp.cc (80%) rename src/{cunumeric => cupynumeric}/index/zip_template.inl (74%) rename src/{cunumeric => cupynumeric}/item/read.cc (80%) rename src/{cunumeric => cupynumeric}/item/read.cu (76%) rename src/{cunumeric/index/putmask.h => cupynumeric/item/read.h} (53%) rename src/{cunumeric => cupynumeric}/item/read_template.inl (73%) rename src/{cunumeric => cupynumeric}/item/write.cc (63%) rename src/{cunumeric => cupynumeric}/item/write.cu (59%) rename src/{cunumeric/item/read.h => cupynumeric/item/write.h} (55%) rename src/{cunumeric => cupynumeric}/item/write_template.inl (58%) create mode 100644 src/cupynumeric/mapper.cc rename src/{cunumeric => cupynumeric}/mapper.h (50%) rename src/{cunumeric => cupynumeric}/matrix/batched_cholesky.cc (81%) rename src/{cunumeric => cupynumeric}/matrix/batched_cholesky.cu (85%) rename src/{cunumeric => cupynumeric}/matrix/batched_cholesky.h (52%) rename src/{cunumeric => cupynumeric}/matrix/batched_cholesky_omp.cc (84%) rename src/{cunumeric => cupynumeric}/matrix/batched_cholesky_template.inl (81%) rename src/{cunumeric => cupynumeric}/matrix/contract.cc (97%) rename src/{cunumeric => cupynumeric}/matrix/contract.cu (67%) rename src/{cunumeric => cupynumeric}/matrix/contract.h (56%) rename src/{cunumeric => cupynumeric}/matrix/contract_omp.cc (96%) rename src/{cunumeric => cupynumeric}/matrix/contract_template.inl (88%) rename src/{cunumeric => cupynumeric}/matrix/diag.cc (81%) rename src/{cunumeric => cupynumeric}/matrix/diag.cu (86%) rename src/{cunumeric => cupynumeric}/matrix/diag.h (54%) rename src/{cunumeric => cupynumeric}/matrix/diag_omp.cc (85%) rename src/{cunumeric => cupynumeric}/matrix/diag_template.inl (82%) rename src/{cunumeric => cupynumeric}/matrix/dot.cc (83%) rename src/{cunumeric => cupynumeric}/matrix/dot.cu (88%) rename src/{cunumeric => cupynumeric}/matrix/dot.h (52%) rename src/{cunumeric => cupynumeric}/matrix/dot_omp.cc (79%) rename src/{cunumeric => cupynumeric}/matrix/dot_template.inl (84%) rename src/{cunumeric => cupynumeric}/matrix/gemm.cc (92%) rename src/{cunumeric => cupynumeric}/matrix/gemm.cu (90%) rename src/{cunumeric => cupynumeric}/matrix/gemm.h (55%) rename src/{cunumeric => cupynumeric}/matrix/gemm_omp.cc (92%) rename src/{cunumeric => cupynumeric}/matrix/gemm_template.inl (78%) rename src/{cunumeric => cupynumeric}/matrix/matmul.cc (71%) rename src/{cunumeric => cupynumeric}/matrix/matmul.cu (94%) rename src/{cunumeric => cupynumeric}/matrix/matmul.h (52%) rename src/{cunumeric => cupynumeric}/matrix/matmul_cpu.inl (95%) rename src/{cunumeric => cupynumeric}/matrix/matmul_omp.cc (71%) rename src/{cunumeric => cupynumeric}/matrix/matmul_template.inl (50%) rename src/{cunumeric => cupynumeric}/matrix/matvecmul.cc (71%) rename src/{cunumeric => cupynumeric}/matrix/matvecmul.cu (93%) rename src/{cunumeric => cupynumeric}/matrix/matvecmul.h (51%) rename src/{cunumeric => cupynumeric}/matrix/matvecmul_cpu.inl (95%) rename src/{cunumeric => cupynumeric}/matrix/matvecmul_omp.cc (71%) rename src/{cunumeric => cupynumeric}/matrix/matvecmul_template.inl (88%) create mode 100644 src/cupynumeric/matrix/mp_potrf.cu rename src/{cunumeric/set/unique_reduce.h => cupynumeric/matrix/mp_potrf.h} (61%) create mode 100644 src/cupynumeric/matrix/mp_potrf_template.inl create mode 100644 src/cupynumeric/matrix/mp_solve.cu create mode 100644 src/cupynumeric/matrix/mp_solve.h create mode 100644 src/cupynumeric/matrix/mp_solve_template.inl rename src/{cunumeric => cupynumeric}/matrix/potrf.cc (79%) rename src/{cunumeric => cupynumeric}/matrix/potrf.cu (86%) rename src/{cunumeric => cupynumeric}/matrix/potrf.h (55%) rename src/{cunumeric => cupynumeric}/matrix/potrf_omp.cc (79%) rename src/{cunumeric => cupynumeric}/matrix/potrf_template.inl (85%) create mode 100644 src/cupynumeric/matrix/qr.cc create mode 100644 src/cupynumeric/matrix/qr.cu rename src/{cunumeric/matrix/solve.h => cupynumeric/matrix/qr.h} (57%) create mode 100644 src/cupynumeric/matrix/qr_cpu.inl create mode 100644 src/cupynumeric/matrix/qr_omp.cc create mode 100644 src/cupynumeric/matrix/qr_template.inl rename src/{cunumeric => cupynumeric}/matrix/solve.cc (74%) rename src/{cunumeric => cupynumeric}/matrix/solve.cu (87%) create mode 100644 src/cupynumeric/matrix/solve.h rename src/{cunumeric => cupynumeric}/matrix/solve_cpu.inl (82%) rename src/{cunumeric => cupynumeric}/matrix/solve_omp.cc (71%) rename src/{cunumeric => cupynumeric}/matrix/solve_template.inl (74%) create mode 100644 src/cupynumeric/matrix/svd.cc create mode 100644 src/cupynumeric/matrix/svd.cu rename src/{cunumeric/index/choose.h => cupynumeric/matrix/svd.h} (53%) create mode 100644 src/cupynumeric/matrix/svd_cpu.inl create mode 100644 src/cupynumeric/matrix/svd_omp.cc create mode 100644 src/cupynumeric/matrix/svd_template.inl rename src/{cunumeric => cupynumeric}/matrix/syrk.cc (90%) rename src/{cunumeric => cupynumeric}/matrix/syrk.cu (88%) rename src/{cunumeric/item/write.h => cupynumeric/matrix/syrk.h} (55%) rename src/{cunumeric => cupynumeric}/matrix/syrk_omp.cc (91%) rename src/{cunumeric => cupynumeric}/matrix/syrk_template.inl (79%) rename src/{cunumeric => cupynumeric}/matrix/tile.cc (86%) rename src/{cunumeric => cupynumeric}/matrix/tile.cu (84%) rename src/{cunumeric => cupynumeric}/matrix/tile.h (53%) rename src/{cunumeric => cupynumeric}/matrix/tile_omp.cc (85%) rename src/{cunumeric => cupynumeric}/matrix/tile_template.inl (87%) rename src/{cunumeric => cupynumeric}/matrix/transpose.cc (74%) rename src/{cunumeric => cupynumeric}/matrix/transpose.cu (80%) rename src/{cunumeric => cupynumeric}/matrix/transpose.h (52%) rename src/{cunumeric => cupynumeric}/matrix/transpose_omp.cc (75%) rename src/{cunumeric => cupynumeric}/matrix/transpose_template.inl (77%) rename src/{cunumeric => cupynumeric}/matrix/trilu.cc (77%) rename src/{cunumeric => cupynumeric}/matrix/trilu.cu (79%) rename src/{cunumeric => cupynumeric}/matrix/trilu.h (54%) rename src/{cunumeric => cupynumeric}/matrix/trilu_omp.cc (78%) rename src/{cunumeric => cupynumeric}/matrix/trilu_template.inl (73%) rename src/{cunumeric => cupynumeric}/matrix/trsm.cc (90%) rename src/{cunumeric => cupynumeric}/matrix/trsm.cu (88%) rename src/{cunumeric => cupynumeric}/matrix/trsm.h (55%) rename src/{cunumeric => cupynumeric}/matrix/trsm_omp.cc (91%) rename src/{cunumeric => cupynumeric}/matrix/trsm_template.inl (79%) rename src/{cunumeric => cupynumeric}/matrix/util.cc (80%) rename src/{cunumeric => cupynumeric}/matrix/util.h (91%) create mode 100644 src/cupynumeric/ndarray.cc create mode 100644 src/cupynumeric/ndarray.h create mode 100644 src/cupynumeric/ndarray.inl rename src/{cunumeric => cupynumeric}/nullary/arange.cc (77%) rename src/{cunumeric => cupynumeric}/nullary/arange.cu (81%) rename src/{cunumeric => cupynumeric}/nullary/arange.h (52%) rename src/{cunumeric => cupynumeric}/nullary/arange_omp.cc (75%) rename src/{cunumeric => cupynumeric}/nullary/arange_template.inl (69%) rename src/{cunumeric => cupynumeric}/nullary/eye.cc (74%) rename src/{cunumeric => cupynumeric}/nullary/eye.cu (79%) rename src/{cunumeric => cupynumeric}/nullary/eye.h (55%) rename src/{cunumeric => cupynumeric}/nullary/eye_omp.cc (72%) rename src/{cunumeric => cupynumeric}/nullary/eye_template.inl (83%) rename src/{cunumeric => cupynumeric}/nullary/fill.cc (80%) rename src/{cunumeric => cupynumeric}/nullary/fill.cu (83%) rename src/{cunumeric => cupynumeric}/nullary/fill.h (53%) rename src/{cunumeric => cupynumeric}/nullary/fill_omp.cc (81%) rename src/{cunumeric => cupynumeric}/nullary/fill_template.inl (63%) rename src/{cunumeric => cupynumeric}/nullary/window.cc (71%) rename src/{cunumeric => cupynumeric}/nullary/window.cu (83%) rename src/{cunumeric => cupynumeric}/nullary/window.h (55%) rename src/{cunumeric => cupynumeric}/nullary/window_omp.cc (70%) rename src/{cunumeric => cupynumeric}/nullary/window_template.inl (69%) rename src/{cunumeric => cupynumeric}/nullary/window_util.h (90%) rename src/{cunumeric => cupynumeric}/omp_help.h (93%) create mode 100644 src/cupynumeric/operators.cc create mode 100644 src/cupynumeric/operators.h create mode 100644 src/cupynumeric/operators.inl rename src/{cunumeric => cupynumeric}/pitches.h (90%) rename src/{cunumeric => cupynumeric}/random/bitgenerator.cc (55%) rename src/{cunumeric => cupynumeric}/random/bitgenerator.cu (63%) rename src/{cunumeric => cupynumeric}/random/bitgenerator.h (70%) rename src/{cunumeric => cupynumeric}/random/bitgenerator_curand.inl (87%) rename src/{cunumeric => cupynumeric}/random/bitgenerator_template.inl (81%) create mode 100644 src/cupynumeric/random/bitgenerator_util.h create mode 100644 src/cupynumeric/random/curand_help.h rename src/{cunumeric => cupynumeric}/random/philox.h (97%) rename src/{cunumeric => cupynumeric}/random/rand.cc (80%) rename src/{cunumeric => cupynumeric}/random/rand.cu (78%) rename src/{cunumeric => cupynumeric}/random/rand.h (54%) rename src/{cunumeric => cupynumeric}/random/rand_omp.cc (79%) rename src/{cunumeric => cupynumeric}/random/rand_template.inl (72%) rename src/{cunumeric => cupynumeric}/random/rand_util.h (89%) rename src/{cunumeric => cupynumeric}/random/randutil/generator.cuh (80%) rename src/{cunumeric => cupynumeric}/random/randutil/generator.h (57%) rename src/{cunumeric => cupynumeric}/random/randutil/generator_beta.inl (96%) rename src/{cunumeric => cupynumeric}/random/randutil/generator_binomial.inl (96%) rename src/{cunumeric => cupynumeric}/random/randutil/generator_cauchy.inl (66%) rename src/{cunumeric => cupynumeric}/random/randutil/generator_chisquare.inl (97%) create mode 100644 src/cupynumeric/random/randutil/generator_create.inl rename src/{cunumeric => cupynumeric}/random/randutil/generator_device.cu (98%) rename src/{cunumeric => cupynumeric}/random/randutil/generator_device_advanced.cu (98%) rename src/{cunumeric => cupynumeric}/random/randutil/generator_device_straightforward.cu (98%) rename src/{cunumeric => cupynumeric}/random/randutil/generator_exponential.inl (85%) rename src/{cunumeric => cupynumeric}/random/randutil/generator_f.inl (97%) rename src/{cunumeric => cupynumeric}/random/randutil/generator_gamma.inl (96%) rename src/{cunumeric => cupynumeric}/random/randutil/generator_geometric.inl (95%) rename src/{cunumeric => cupynumeric}/random/randutil/generator_gumbel.inl (78%) rename src/{cunumeric => cupynumeric}/random/randutil/generator_host.cc (71%) rename src/{cunumeric => cupynumeric}/random/randutil/generator_host_advanced.cc (64%) rename src/{cunumeric => cupynumeric}/random/randutil/generator_host_straightforward.cc (68%) rename src/{cunumeric => cupynumeric}/random/randutil/generator_hypergeometric.inl (95%) create mode 100644 src/cupynumeric/random/randutil/generator_integers.inl rename src/{cunumeric => cupynumeric}/random/randutil/generator_laplace.inl (75%) rename src/{cunumeric => cupynumeric}/random/randutil/generator_logistic.inl (76%) rename src/{cunumeric => cupynumeric}/random/randutil/generator_lognormal.inl (84%) rename src/{cunumeric => cupynumeric}/random/randutil/generator_logseries.inl (95%) rename src/{cunumeric => cupynumeric}/random/randutil/generator_negative_binomial.inl (86%) rename src/{cunumeric => cupynumeric}/random/randutil/generator_normal.inl (83%) rename src/{cunumeric => cupynumeric}/random/randutil/generator_pareto.inl (70%) rename src/{cunumeric => cupynumeric}/random/randutil/generator_poisson.inl (85%) rename src/{cunumeric => cupynumeric}/random/randutil/generator_power.inl (81%) rename src/{cunumeric => cupynumeric}/random/randutil/generator_raw.inl (86%) rename src/{cunumeric => cupynumeric}/random/randutil/generator_rayleigh.inl (80%) rename src/{cunumeric => cupynumeric}/random/randutil/generator_standard_t.inl (96%) rename src/{cunumeric => cupynumeric}/random/randutil/generator_triangular.inl (67%) rename src/{cunumeric => cupynumeric}/random/randutil/generator_uniform.inl (77%) rename src/{cunumeric => cupynumeric}/random/randutil/generator_vonmises.inl (96%) rename src/{cunumeric => cupynumeric}/random/randutil/generator_wald.inl (77%) rename src/{cunumeric => cupynumeric}/random/randutil/generator_weibull.inl (75%) rename src/{cunumeric => cupynumeric}/random/randutil/generator_zipf.inl (95%) rename src/{cunumeric => cupynumeric}/random/randutil/random_distributions.h (89%) create mode 100644 src/cupynumeric/random/randutil/randomizer.h create mode 100644 src/cupynumeric/random/randutil/randutil.h rename src/{cunumeric => cupynumeric}/random/randutil/randutil_curand.h (90%) rename src/{cunumeric => cupynumeric}/random/randutil/randutil_impl.h (94%) create mode 100644 src/cupynumeric/random/rnd_aliases.h create mode 100644 src/cupynumeric/random/rnd_types.h create mode 100644 src/cupynumeric/runtime.cc create mode 100644 src/cupynumeric/runtime.h rename src/{cunumeric => cupynumeric}/scan/scan_global.cc (90%) rename src/{cunumeric => cupynumeric}/scan/scan_global.cu (86%) rename src/{cunumeric => cupynumeric}/scan/scan_global.h (53%) rename src/{cunumeric => cupynumeric}/scan/scan_global_omp.cc (90%) rename src/{cunumeric => cupynumeric}/scan/scan_global_template.inl (85%) rename src/{cunumeric => cupynumeric}/scan/scan_local.cc (87%) rename src/{cunumeric => cupynumeric}/scan/scan_local.cu (84%) rename src/{cunumeric => cupynumeric}/scan/scan_local.h (51%) rename src/{cunumeric => cupynumeric}/scan/scan_local_omp.cc (87%) rename src/{cunumeric => cupynumeric}/scan/scan_local_template.inl (85%) rename src/{cunumeric => cupynumeric}/scan/scan_local_util.h (86%) create mode 100644 src/cupynumeric/scan/scan_util.h rename src/{cunumeric => cupynumeric}/search/argwhere.cc (76%) rename src/{cunumeric => cupynumeric}/search/argwhere.cu (75%) rename src/{cunumeric => cupynumeric}/search/argwhere.h (53%) rename src/{cunumeric => cupynumeric}/search/argwhere_omp.cc (71%) rename src/{cunumeric => cupynumeric}/search/argwhere_template.inl (83%) rename src/{cunumeric => cupynumeric}/search/nonzero.cc (77%) rename src/{cunumeric => cupynumeric}/search/nonzero.cu (77%) rename src/{cunumeric => cupynumeric}/search/nonzero.cuh (92%) rename src/{cunumeric => cupynumeric}/search/nonzero.h (52%) rename src/{cunumeric => cupynumeric}/search/nonzero_omp.cc (70%) rename src/{cunumeric => cupynumeric}/search/nonzero_template.inl (74%) rename src/{cunumeric => cupynumeric}/set/unique.cc (80%) rename src/{cunumeric => cupynumeric}/set/unique.cu (85%) rename src/{cunumeric => cupynumeric}/set/unique.h (55%) rename src/{cunumeric => cupynumeric}/set/unique_omp.cc (85%) rename src/{cunumeric => cupynumeric}/set/unique_reduce.cc (75%) rename src/{cunumeric/matrix/syrk.h => cupynumeric/set/unique_reduce.h} (57%) rename src/{cunumeric => cupynumeric}/set/unique_reduce_omp.cc (72%) rename src/{cunumeric => cupynumeric}/set/unique_reduce_template.inl (74%) rename src/{cunumeric => cupynumeric}/set/unique_template.inl (78%) create mode 100644 src/cupynumeric/slice.h rename src/{cunumeric => cupynumeric}/sort/cub_sort.cuh (94%) rename src/{cunumeric => cupynumeric}/sort/cub_sort.h (97%) rename src/{cunumeric => cupynumeric}/sort/cub_sort_bool.cu (88%) rename src/{cunumeric => cupynumeric}/sort/cub_sort_double.cu (88%) rename src/{cunumeric => cupynumeric}/sort/cub_sort_float.cu (88%) rename src/{cunumeric => cupynumeric}/sort/cub_sort_half.cu (88%) rename src/{cunumeric => cupynumeric}/sort/cub_sort_int16.cu (88%) rename src/{cunumeric => cupynumeric}/sort/cub_sort_int32.cu (88%) rename src/{cunumeric => cupynumeric}/sort/cub_sort_int64.cu (88%) rename src/{cunumeric => cupynumeric}/sort/cub_sort_int8.cu (88%) rename src/{cunumeric => cupynumeric}/sort/cub_sort_uint16.cu (88%) rename src/{cunumeric => cupynumeric}/sort/cub_sort_uint32.cu (88%) rename src/{cunumeric => cupynumeric}/sort/cub_sort_uint64.cu (88%) rename src/{cunumeric => cupynumeric}/sort/cub_sort_uint8.cu (88%) rename src/{cunumeric => cupynumeric}/sort/searchsorted.cc (77%) rename src/{cunumeric => cupynumeric}/sort/searchsorted.cu (82%) rename src/{cunumeric => cupynumeric}/sort/searchsorted.h (52%) rename src/{cunumeric => cupynumeric}/sort/searchsorted_omp.cc (77%) rename src/{cunumeric => cupynumeric}/sort/searchsorted_template.inl (82%) rename src/{cunumeric => cupynumeric}/sort/sort.cc (83%) rename src/{cunumeric => cupynumeric}/sort/sort.cu (90%) rename src/{cunumeric => cupynumeric}/sort/sort.h (76%) rename src/{cunumeric => cupynumeric}/sort/sort_cpu.inl (83%) rename src/{cunumeric => cupynumeric}/sort/sort_omp.cc (82%) rename src/{cunumeric => cupynumeric}/sort/sort_template.inl (79%) rename src/{cunumeric => cupynumeric}/sort/thrust_sort.cuh (92%) rename src/{cunumeric => cupynumeric}/sort/thrust_sort.h (98%) rename src/{cunumeric => cupynumeric}/sort/thrust_sort_bool.cu (88%) rename src/{cunumeric => cupynumeric}/sort/thrust_sort_complex128.cu (89%) rename src/{cunumeric => cupynumeric}/sort/thrust_sort_complex64.cu (89%) rename src/{cunumeric => cupynumeric}/sort/thrust_sort_double.cu (88%) rename src/{cunumeric => cupynumeric}/sort/thrust_sort_float.cu (88%) rename src/{cunumeric => cupynumeric}/sort/thrust_sort_half.cu (88%) rename src/{cunumeric => cupynumeric}/sort/thrust_sort_int16.cu (88%) rename src/{cunumeric => cupynumeric}/sort/thrust_sort_int32.cu (88%) rename src/{cunumeric => cupynumeric}/sort/thrust_sort_int64.cu (88%) rename src/{cunumeric => cupynumeric}/sort/thrust_sort_int8.cu (88%) rename src/{cunumeric => cupynumeric}/sort/thrust_sort_uint16.cu (88%) rename src/{cunumeric => cupynumeric}/sort/thrust_sort_uint32.cu (88%) rename src/{cunumeric => cupynumeric}/sort/thrust_sort_uint64.cu (88%) rename src/{cunumeric => cupynumeric}/sort/thrust_sort_uint8.cu (88%) rename src/{cunumeric => cupynumeric}/stat/bincount.cc (79%) rename src/{cunumeric => cupynumeric}/stat/bincount.cu (90%) create mode 100644 src/cupynumeric/stat/bincount.h rename src/{cunumeric => cupynumeric}/stat/bincount_omp.cc (85%) rename src/{cunumeric => cupynumeric}/stat/bincount_template.inl (76%) rename src/{cunumeric => cupynumeric}/stat/histogram.cc (83%) rename src/{cunumeric => cupynumeric}/stat/histogram.cu (81%) rename src/{cunumeric => cupynumeric}/stat/histogram.cuh (92%) rename src/{cunumeric => cupynumeric}/stat/histogram.h (50%) rename src/{cunumeric => cupynumeric}/stat/histogram_cpu.h (87%) rename src/{cunumeric => cupynumeric}/stat/histogram_gen.h (97%) rename src/{cunumeric => cupynumeric}/stat/histogram_impl.h (89%) rename src/{cunumeric => cupynumeric}/stat/histogram_omp.cc (90%) rename src/{cunumeric => cupynumeric}/stat/histogram_template.inl (88%) rename src/{cunumeric => cupynumeric}/ternary/where.cc (83%) rename src/{cunumeric => cupynumeric}/ternary/where.cu (84%) rename src/{cunumeric => cupynumeric}/ternary/where.h (50%) rename src/{cunumeric => cupynumeric}/ternary/where_omp.cc (83%) rename src/{cunumeric => cupynumeric}/ternary/where_template.inl (82%) rename src/{cunumeric => cupynumeric}/transform/flip.cc (79%) rename src/{cunumeric => cupynumeric}/transform/flip.cu (76%) rename src/{cunumeric => cupynumeric}/transform/flip.h (54%) rename src/{cunumeric => cupynumeric}/transform/flip_omp.cc (79%) rename src/{cunumeric => cupynumeric}/transform/flip_template.inl (76%) create mode 100644 src/cupynumeric/typedefs.h rename src/{cunumeric => cupynumeric}/unary/convert.cc (79%) rename src/{cunumeric => cupynumeric}/unary/convert.cu (82%) rename src/{cunumeric => cupynumeric}/unary/convert.h (52%) rename src/{cunumeric => cupynumeric}/unary/convert_omp.cc (78%) rename src/{cunumeric => cupynumeric}/unary/convert_template.inl (86%) rename src/{cunumeric => cupynumeric}/unary/convert_util.h (73%) rename src/{cunumeric => cupynumeric}/unary/isnan.h (89%) rename src/{cunumeric => cupynumeric}/unary/scalar_unary_red.cc (74%) rename src/{cunumeric => cupynumeric}/unary/scalar_unary_red.cu (63%) rename src/{cunumeric => cupynumeric}/unary/scalar_unary_red.h (51%) rename src/{cunumeric => cupynumeric}/unary/scalar_unary_red_omp.cc (65%) rename src/{cunumeric => cupynumeric}/unary/scalar_unary_red_template.inl (66%) rename src/{cunumeric => cupynumeric}/unary/unary_op.cc (84%) rename src/{cunumeric => cupynumeric}/unary/unary_op.cu (90%) rename src/{cunumeric => cupynumeric}/unary/unary_op.h (62%) rename src/{cunumeric => cupynumeric}/unary/unary_op_omp.cc (85%) rename src/{cunumeric => cupynumeric}/unary/unary_op_template.inl (82%) rename src/{cunumeric => cupynumeric}/unary/unary_op_util.h (72%) rename src/{cunumeric => cupynumeric}/unary/unary_red.cc (82%) rename src/{cunumeric => cupynumeric}/unary/unary_red.cu (89%) rename src/{cunumeric => cupynumeric}/unary/unary_red.h (51%) rename src/{cunumeric => cupynumeric}/unary/unary_red_omp.cc (84%) rename src/{cunumeric => cupynumeric}/unary/unary_red_template.inl (73%) rename src/{cunumeric => cupynumeric}/unary/unary_red_util.h (88%) create mode 100644 src/cupynumeric/utilities/repartition.cc create mode 100644 src/cupynumeric/utilities/repartition.cu create mode 100644 src/cupynumeric/utilities/repartition.h rename src/{cunumeric => cupynumeric}/utilities/thrust_allocator.h (91%) rename src/{cunumeric => cupynumeric}/utilities/thrust_util.h (95%) create mode 100644 tests/cpp/.gitignore create mode 100644 tests/cpp/CMakeLists.txt create mode 100644 tests/cpp/cmake/thirdparty/get_nccl.cmake create mode 100644 tests/cpp/integration/common_utils.cc create mode 100644 tests/cpp/integration/common_utils.h create mode 100644 tests/cpp/integration/test_amax.cc create mode 100644 tests/cpp/integration/test_amin.cc create mode 100644 tests/cpp/integration/test_arange.cc create mode 100644 tests/cpp/integration/test_argsort.cc create mode 100644 tests/cpp/integration/test_argwhere.cc create mode 100644 tests/cpp/integration/test_bincount.cc create mode 100644 tests/cpp/integration/test_convolve.cc create mode 100644 tests/cpp/integration/test_diagonal.cc create mode 100644 tests/cpp/integration/test_dot.cc create mode 100644 tests/cpp/integration/test_eye.cc create mode 100644 tests/cpp/integration/test_fill.cc create mode 100644 tests/cpp/integration/test_flip.cc create mode 100644 tests/cpp/integration/test_logical.cc create mode 100644 tests/cpp/integration/test_moveaxis.cc create mode 100644 tests/cpp/integration/test_msort.cc create mode 100644 tests/cpp/integration/test_nonzero.cc create mode 100644 tests/cpp/integration/test_put.cc create mode 100644 tests/cpp/integration/test_repartition.cc create mode 100644 tests/cpp/integration/test_repeat.cc create mode 100644 tests/cpp/integration/test_reshape.cc create mode 100644 tests/cpp/integration/test_sort.cc create mode 100644 tests/cpp/integration/test_sort_complex.cc create mode 100644 tests/cpp/integration/test_squeeze.cc create mode 100644 tests/cpp/integration/test_swapaxes.cc create mode 100644 tests/cpp/integration/test_transpose.cc create mode 100644 tests/cpp/integration/test_trilu.cc create mode 100644 tests/cpp/integration/test_unique.cc create mode 100644 tests/cpp/integration/test_where.cc create mode 100644 tests/cpp/integration/test_window.cc create mode 100644 tests/cpp/integration/test_zeros.cc create mode 100644 tests/cpp/integration/util.inl create mode 100644 tests/cpp/main.cc create mode 100755 tests/cpp/run.py create mode 100755 tests/cpp/run.sh create mode 100644 tests/integration/test_angle.py create mode 100644 tests/integration/test_average.py create mode 100644 tests/integration/test_corner_quantiles.py create mode 100644 tests/integration/test_diff.py create mode 100644 tests/integration/test_digitize.py create mode 100644 tests/integration/test_expand_dims.py create mode 100644 tests/integration/test_fftshift.py create mode 100644 tests/integration/test_file.py create mode 100644 tests/integration/test_gradient.py delete mode 100644 tests/integration/test_ingest.py create mode 100644 tests/integration/test_logical_reduction.py create mode 100644 tests/integration/test_median.py create mode 100644 tests/integration/test_meshgrid.py create mode 100644 tests/integration/test_nanpercentiles.py create mode 100644 tests/integration/test_nanquantiles.py create mode 100644 tests/integration/test_nd_convolve.py create mode 100644 tests/integration/test_negaxes_quantiles.py create mode 100644 tests/integration/test_qr.py create mode 100644 tests/integration/test_random.py create mode 100644 tests/integration/test_roll.py create mode 100644 tests/integration/test_rot90.py create mode 100644 tests/integration/test_round.py create mode 100644 tests/integration/test_stack.py create mode 100644 tests/integration/test_svd.py create mode 100644 tests/integration/test_unravel_index.py create mode 100644 tests/unit/__init__.py create mode 100644 tests/unit/cupynumeric/__init__.py create mode 100644 tests/unit/cupynumeric/_array/__init__.py create mode 100644 tests/unit/cupynumeric/_array/test_util.py create mode 100644 tests/unit/cupynumeric/_sphinxext/__init__.py rename tests/unit/{cunumeric => cupynumeric}/_sphinxext/test__comparison_util.py (92%) create mode 100644 tests/unit/cupynumeric/_utils/__init__.py create mode 100644 tests/unit/cupynumeric/_utils/test_array.py rename tests/unit/{cunumeric => cupynumeric/_utils}/test_coverage.py (86%) rename tests/unit/{cunumeric/test_utils.py => cupynumeric/_utils/test_linalg.py} (60%) create mode 100644 tests/unit/cupynumeric/random/__init__.py rename tests/unit/{cunumeric => cupynumeric}/random/test_bitgenerator.py (95%) rename tests/unit/{cunumeric => cupynumeric}/test_config.py (55%) create mode 100644 tests/unit/cupynumeric/test_nptest.py rename tests/unit/{cunumeric => cupynumeric}/test_patch.py (81%) rename tests/unit/{cunumeric => cupynumeric}/test_settings.py (91%) create mode 100644 tests/unit/util.py diff --git a/.clang-format b/.clang-format index 262238254..6d5353f99 100644 --- a/.clang-format +++ b/.clang-format @@ -8,7 +8,7 @@ AlignEscapedNewlines: Left AlignOperands: true AlignTrailingComments: true AllowAllParametersOfDeclarationOnNextLine: true -AllowShortBlocksOnASingleLine: true +AllowShortBlocksOnASingleLine: Empty AllowShortCaseLabelsOnASingleLine: true AllowShortFunctionsOnASingleLine: All AllowShortIfStatementsOnASingleLine: true @@ -72,6 +72,7 @@ IndentCaseLabels: true IndentPPDirectives: None IndentWidth: 2 IndentWrappedFunctionNames: false +InsertBraces: true JavaScriptQuotes: Leave JavaScriptWrapImports: true KeepEmptyLinesAtTheStartOfBlocks: false diff --git a/.gitattributes b/.gitattributes index 8ae3c8012..1215d42fc 100644 --- a/.gitattributes +++ b/.gitattributes @@ -1 +1 @@ -cunumeric/_version.py export-subst +cunpyumeric/_version.py export-subst diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS new file mode 100644 index 000000000..5ac9b710d --- /dev/null +++ b/.github/CODEOWNERS @@ -0,0 +1,4 @@ +# Code Ownership +.github @marcinz @m3vaz @sandeepd-nv @mag1cp1n +continuous_integration @marcinz @m3vaz @sandeepd-nv @mag1cp1n +conda @marcinz @m3vaz @sandeepd-nv @mag1cp1n diff --git a/.github/ISSUE_TEMPLATE/bug_report.yml b/.github/ISSUE_TEMPLATE/bug_report.yml index 8bf716ed1..74fb1d45b 100644 --- a/.github/ISSUE_TEMPLATE/bug_report.yml +++ b/.github/ISSUE_TEMPLATE/bug_report.yml @@ -8,7 +8,7 @@ body: value: "# Bug report" - type: markdown attributes: - value: Thank you for reporting a bug and helping us improve Cunumeric! + value: Thank you for reporting a bug and helping us improve cuPyNumeric! - type: markdown attributes: value: > @@ -25,19 +25,19 @@ body: description: >- Run `legate-issue` and paste the output here. placeholder: | - Python : 3.10.11 | packaged by conda-forge | (main, May 10 2023, 18:58:44) [GCC 11.3.0] - Platform : Linux-5.14.0-1042-oem-x86_64-with-glibc2.31 - Legion : v23.11.00.dev-16-g2499f878 - Legate : 23.11.00.dev+17.gb7b50313 - Cunumeric : (ImportError: cannot import name 'LogicalArray' from 'legate.core') - Numpy : 1.24.4 - Scipy : 1.10.1 - Numba : (not installed) - CTK package : cuda-version-11.8-h70ddcb2_2 (conda-forge) - GPU Driver : 515.65.01 - GPU Devices : - GPU 0: Quadro RTX 8000 - GPU 1: Quadro RTX 8000 + Python : 3.12.4 | packaged by conda-forge | (main, Jun 17 2024, 10:23:07) [GCC 12.3.0] + Platform : Linux-6.8.0-40-generic-x86_64-with-glibc2.35 + Legion : (failed to detect) + Legate : 24.05.00+255.g2656afbd + cuPynumeric : 24.05.00+132.gc4741d57 + Numpy : 1.26.4 + Scipy : 1.13.1 + Numba : (failed to detect) + CTK package : cuda-version-12.0-hffde075_3 (conda-forge) + GPU driver : 535.183.06 + GPU devices : + GPU 0: NVIDIA RTX A5000 + GPU 1: NVIDIA RTX A5000 validations: required: true - type: input @@ -76,10 +76,9 @@ body: attributes: label: Example code or instructions description: > - Please provide detailed instructions to reproduce the issue. Ideally this includes a + Please provide detailed instructions to reproduce the issue. Ideally this includes a [Complete, minimal, self-contained example code](https://stackoverflow.com/help/minimal-reproducible-example) - given here or as a link to code in another repository. - render: Python + given here or as a link to code in another repository. validations: required: true - type: markdown diff --git a/.github/actions/download-artifacts/action.yml b/.github/actions/download-artifacts/action.yml deleted file mode 100644 index 640dc143a..000000000 --- a/.github/actions/download-artifacts/action.yml +++ /dev/null @@ -1,48 +0,0 @@ -name: setup-legate-conda - -description: Download dependencies (artifacts) - -inputs: - device: {type: string, required: true} - git_sha: {type: string, required: true} - -runs: - using: composite - steps: - - - id: cache - name: Cache conda artifacts - uses: actions/cache@v3 - with: - key: "nv-legate/legate.core@${{ inputs.git_sha }}-${{ inputs.device }}" - path: .artifacts - - - if: steps.cache.outputs.cache-hit != 'true' - name: Download conda artifacts - uses: dawidd6/action-download-artifact@v2 - with: - path: .artifacts-dl - repo: nv-legate/legate.core - commit: ${{ inputs.git_sha }} - workflow_conclusion: success - workflow: "ci-gh.yml" - name: "legate.core-${{ inputs.device }}-[0-9a-z]{40}" - name_is_regexp: true - - - if: steps.cache.outputs.cache-hit != 'true' - name: Move conda artifacts into cached dir - shell: bash --noprofile --norc -xeo pipefail {0} - run: | - mkdir -p .artifacts; - find .artifacts-dl/legate.core-${{ inputs.device }}-*/ \ - -maxdepth 2 -type d -name legate_core -exec mv {} .artifacts/ \; - find .artifacts-dl/legate.core-${{ inputs.device }}-*/ \ - -maxdepth 2 -type f -name "environment*.yaml" -exec mv {} .artifacts/ \; - - - name: Copy and change cache dir ownership - shell: bash --noprofile --norc -xeo pipefail {0} - run: | - # Copy and change directory ownership - cp -ar .artifacts /home/coder/.artifacts; - chown -R coder:coder /home/coder/.artifacts; - ls -R /home/coder/.artifacts diff --git a/.github/workflows/ci-gh.yml b/.github/workflows/ci-gh.yml deleted file mode 100644 index ffb77c10e..000000000 --- a/.github/workflows/ci-gh.yml +++ /dev/null @@ -1,28 +0,0 @@ -name: Build and test cunumeric on GH - -concurrency: - group: ci-build-and-test-on-${{ github.event_name }}-from-${{ github.ref_name }} - cancel-in-progress: true - -on: - push: - branches: - - "pull-request/[0-9]+" - - "branch-*" - -jobs: - build-and-test: - strategy: - fail-fast: false - matrix: - include: - - device: "gpu" - image: "rapidsai/devcontainers:23.06-cpp-mambaforge-ubuntu22.04" - - - device: "cpu" - image: "rapidsai/devcontainers:23.06-cpp-mambaforge-ubuntu22.04" - uses: - ./.github/workflows/gh-build-and-test.yml - with: - device: ${{ matrix.device }} - image: ${{ matrix.image }} diff --git a/.github/workflows/gh-build-and-test.yml b/.github/workflows/gh-build-and-test.yml deleted file mode 100644 index 3766a07ee..000000000 --- a/.github/workflows/gh-build-and-test.yml +++ /dev/null @@ -1,92 +0,0 @@ -on: - workflow_call: - inputs: - image: - type: string - required: true - device: - type: string - required: true - - -jobs: - build: - name: "Build cunumeric (with ${{ inputs.device }} legate) on GH" - uses: - ./.github/workflows/gh-build.yml - with: - device: ${{ inputs.device }} - image: ${{ inputs.image }} - runs-on: ${{ github.repository_owner == 'nv-legate' && 'linux-amd64-32cpu' || 'ubuntu-latest' }} - - test: - needs: - - build - strategy: - fail-fast: false - matrix: - include: - - name: 1 CPU test - options: test --cpus 1 --unit --debug - runner: ${{ inputs.device == 'gpu' && 'linux-amd64-gpu-v100-latest-1' || 'linux-amd64-cpu4' }} - has-gpu: false - enabled: true - - - name: 2 CPUs test - options: test --cpus 2 --debug - runner: ${{ inputs.device == 'gpu' && 'linux-amd64-gpu-v100-latest-1' || 'linux-amd64-cpu8' }} - has-gpu: false - enabled: true - - - name: GPU test - options: test --use cuda --gpus 1 --debug - runner: linux-amd64-gpu-v100-latest-1 - has-gpu: true - enabled: ${{ inputs.device == 'gpu' }} - - - name: 2 GPUs test - options: test --use cuda --gpus 2 --debug - runner: linux-amd64-2gpu - has-gpu: true - enabled: ${{ inputs.device == 'gpu' }} - - - name: OpenMP test - options: test --use openmp --omps 1 --ompthreads 2 --debug - runner: ${{ inputs.device == 'gpu' && 'linux-amd64-gpu-v100-latest-1' || 'linux-amd64-32cpu' }} - has-gpu: ${{ inputs.device == 'gpu' }} - enabled: true - - - name: 2 NUMA OpenMPs test - options: test --use openmp --omps 2 --ompthreads 2 --numamem 2048 --debug - runner: ${{ inputs.device == 'gpu' && 'linux-amd64-gpu-v100-latest-1' || 'linux-amd64-32cpu' }} - has-gpu: ${{ inputs.device == 'gpu' }} - enabled: true - - - name: Eager execution test - options: test --use eager --debug - runner: ${{ inputs.device == 'gpu' && 'linux-amd64-gpu-v100-latest-1' || 'linux-amd64-cpu4' }} - has-gpu: ${{ inputs.device == 'gpu' }} - enabled: true - - - name: mypy - options: mypy - runner: linux-amd64-cpu4 - has-gpu: false - enabled: true - - - name: documentation - options: docs - runner: linux-amd64-32cpu - has-gpu: false - enabled: ${{ inputs.device == 'gpu' }} - - uses: - ./.github/workflows/gh-test.yml - with: - name: ${{ matrix.name }} - device: ${{ inputs.device }} - image: ${{ inputs.image }} - runs-on: ${{ matrix.runner }} - has-gpu: ${{ matrix.has-gpu }} - test-options: ${{ matrix.options }} - enabled: ${{ matrix.enabled }} diff --git a/.github/workflows/gh-build.yml b/.github/workflows/gh-build.yml deleted file mode 100644 index 030dad1ad..000000000 --- a/.github/workflows/gh-build.yml +++ /dev/null @@ -1,101 +0,0 @@ -name: Build - -on: - workflow_call: - inputs: - image: - type: string - required: true - device: - required: true - type: string - runs-on: - required: true - type: string - -jobs: - build: - name: build-${{ inputs.device }}-sub-workflow - - permissions: - id-token: write # This is required for configure-aws-credentials - contents: read # This is required for actions/checkout - - runs-on: ${{ inputs.runs-on }} - - container: - options: -u root - image: "${{ inputs.image }}" - env: - CUDA_VERSION: "12.2" - CUDA_VERSION_MAJOR: "12" - CUDA_VERSION_MINOR: "2" - SCCACHE_REGION: "us-east-2" - SCCACHE_BUCKET: "rapids-sccache-devs" - SCCACHE_S3_KEY_PREFIX: "legate-cunumeric-dev" - USE_CUDA: "${{ inputs.device == 'gpu' && 'ON' || 'OFF' }}" - GH_TOKEN: "${{ env.GH_TOKEN }}" - GITHUB_TOKEN: "${{ env.GITHUB_TOKEN }}" - VAULT_HOST: "${{ github.repository_owner != 'nv-legate' && 'https://vault.ops.k8s.rapids.ai' || '' }}" - defaults: - run: - shell: su coder {0} - working-directory: /home/coder - - steps: - - name: Checkout cunumeric (= this repo) - uses: actions/checkout@v3 - with: - fetch-depth: 0 - path: cunumeric - persist-credentials: false - - - name: Dump environment - run: | - env - - - name: Copy source folder - run: | - set -x - pwd - cp -r $GITHUB_WORKSPACE/cunumeric . - chown -R coder:coder cunumeric; - ls -R - - - name: Copy .gitconfig - run: cp ~/cunumeric/continuous_integration/dot-gitconfig ~/.gitconfig - - - id: legate_core_info - name: Read legate.core SHA - shell: bash --noprofile --norc -xeo pipefail {0} - run: | - git_tag="$(jq -r '.packages.legate_core.git_tag' cunumeric/cmake/versions.json)"; - - echo "git_tag=$git_tag" | tee -a "${GITHUB_OUTPUT}"; - - - name: Download dependencies (artifacts) - uses: ./cunumeric/.github/actions/download-artifacts - with: - device: "${{ inputs.device }}" - git_sha: "${{ steps.legate_core_info.outputs.git_tag }}" - - - if: github.repository_owner == 'nv-legate' - name: Get AWS credentials for sccache bucket - uses: aws-actions/configure-aws-credentials@v2 - with: - aws-region: us-east-2 - role-duration-seconds: 28800 # 8 hours - role-to-assume: arn:aws:iam::279114543810:role/gha-oidc-nv-legate - - - name: Build cunumeric - run: | - export PATH="/home/coder/cunumeric/continuous_integration/scripts:$PATH" - build-cunumeric-all - - - name: Upload build artifacts - uses: actions/upload-artifact@v3 - with: - name: "cunumeric-${{ inputs.device }}-${{ github.sha }}" - path: | - /tmp/out - /tmp/conda-build diff --git a/.github/workflows/gh-test.yml b/.github/workflows/gh-test.yml deleted file mode 100644 index 675f27e9b..000000000 --- a/.github/workflows/gh-test.yml +++ /dev/null @@ -1,91 +0,0 @@ -name: Test cunumeric on GH - -on: - workflow_call: - inputs: - name: - required: true - type: string - image: - type: string - required: true - device: - required: true - type: string - runs-on: - required: true - type: string - has-gpu: - required: true - type: boolean - description: "The runner has GPU(s)." - test-options: - required: true - type: string - enabled: - required: true - type: boolean - -env: - build_artifact_name: "cunumeric-${{ inputs.device }}-${{ github.sha }}" - -jobs: - test: - name: ${{ inputs.name }} - if: inputs.enabled && github.repository_owner == 'nv-legate' - runs-on: ${{ inputs.runs-on }} - - container: - options: -u root - image: "${{ inputs.image }}" - env: - # CUDA_VERSION: "${{ inputs.CUDA }}" - NVIDIA_VISIBLE_DEVICES: ${{ env.NVIDIA_VISIBLE_DEVICES }} - - defaults: - run: - shell: su coder {0} - working-directory: /home/coder - - steps: - - if: inputs.has-gpu - name: Run nvidia-smi to make sure GPU is working - run: nvidia-smi - - - name: Install numactl - run: | - export DEBIAN_FRONTEND=noninteractive && \ - sudo apt-get update && \ - sudo apt-get install -y numactl - - - name: Checkout cunumeric - uses: actions/checkout@v3 - with: - fetch-depth: 0 - path: cunumeric - persist-credentials: false - - - name: Copy source folder - run: | - set -x - pwd - cp -r $GITHUB_WORKSPACE/cunumeric . - chown -R coder:coder cunumeric; - ls -R - - - name: Download build artifacts - uses: actions/download-artifact@v3 - with: - name: ${{ env.build_artifact_name }} - path: /home/coder/.artifacts - - - name: Run cunumeric test / analysis - shell: su coder {0} - run: | - set -x - sudo chown -R coder:coder /home/coder/.artifacts - - export PATH="/home/coder/cunumeric/continuous_integration/scripts:$PATH" - - set -eo pipefail - test-cunumeric ${{ inputs.test-options }} diff --git a/.github/workflows/require-labels.yml b/.github/workflows/require-labels.yml deleted file mode 100644 index 9b2704f70..000000000 --- a/.github/workflows/require-labels.yml +++ /dev/null @@ -1,15 +0,0 @@ -name: Pull Request Labels -on: - pull_request: - types: [opened, labeled, unlabeled, synchronize] -jobs: - label: - runs-on: ubuntu-latest - steps: - - name: Check Labels - uses: mheap/github-action-required-labels@v3 - with: - mode: exactly - count: 1 - labels: "category:new-feature, category:improvement, category:bug-fix, category:task, category:documentation" - \ No newline at end of file diff --git a/.gitignore b/.gitignore index 84244ce82..d4ccc950a 100644 --- a/.gitignore +++ b/.gitignore @@ -27,11 +27,11 @@ legion gasnet* legion_defines.h realm_defines.h -cunumeric/install_info.py +cupynumeric/install_info.py /build/* -/docs/cunumeric/build -/docs/cunumeric/source/api/generated -/docs/cunumeric/source/comparison/comparison_table.rst.inc +/docs/cupynumeric/build +/docs/cupynumeric/source/api/generated +/docs/cupynumeric/source/comparison/comparison_table.rst.inc *.egg-info .cache .vscode diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 03cfc8b1c..8c9820450 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -5,7 +5,7 @@ repos: - id: mypy language: system pass_filenames: false - args: ['cunumeric'] + args: ['cupynumeric'] - repo: https://github.com/PyCQA/isort rev: 5.12.0 hooks: @@ -32,6 +32,15 @@ repos: entry: python scripts/hooks/enforce_boilerplate.py language: python pass_filenames: false + - id: legate-defined + name: legate-defined + description: 'Find uses of ifdef LEGATE_ that should be using LegateDefined()' + entry: ./scripts/hooks/legate_defined.sh + language: script + 'types_or': [c++, c, cuda] + require_serial: false + stages: [pre-commit] + exclude: '^src/cupynumeric/cupynumeric_c\.h$' ci: skip: [mypy] diff --git a/CMakeLists.txt b/CMakeLists.txt index 18b121f50..b13fe7d60 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,5 +1,5 @@ #============================================================================= -# Copyright 2022 NVIDIA Corporation +# Copyright 2024 NVIDIA Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -41,11 +41,19 @@ if(POLICY CMP0132) set(CMAKE_POLICY_DEFAULT_CMP0132 NEW) endif() +set(CMAKE_CXX_STANDARD 17 CACHE STRING "" FORCE) +set(CMAKE_CXX_STANDARD_REQUIRED ON CACHE STRING "" FORCE) + +set(CMAKE_CUDA_STANDARD 17 CACHE STRING "" FORCE) +set(CMAKE_CUDA_STANDARD_REQUIRED ON CACHE STRING "" FORCE) + ############################################################################## # - Download and initialize RAPIDS CMake helpers ----------------------------- +set(rapids-cmake-version 24.04) +set(rapids-cmake-sha "365322aca32fd6ecd7027f5d7ec7be50b7f3cc2a") if(NOT EXISTS ${CMAKE_BINARY_DIR}/RAPIDS.cmake) - file(DOWNLOAD https://raw.githubusercontent.com/rapidsai/rapids-cmake/branch-23.08/RAPIDS.cmake + file(DOWNLOAD https://raw.githubusercontent.com/rapidsai/rapids-cmake/branch-${rapids-cmake-version}/RAPIDS.cmake ${CMAKE_BINARY_DIR}/RAPIDS.cmake) endif() include(${CMAKE_BINARY_DIR}/RAPIDS.cmake) @@ -55,7 +63,7 @@ include(rapids-cuda) include(rapids-export) include(rapids-find) -set(cunumeric_version 24.01.00) +set(cupynumeric_version 24.11.00) # For now we want the optimization flags to match on both normal make and cmake # builds so we override the cmake defaults here for release, this changes @@ -70,40 +78,40 @@ set(CMAKE_CXX_FLAGS_RELWITHDEBINFO "-O2 -g") set(CMAKE_CUDA_FLAGS_RELWITHDEBINFO "-O2 -g") if(NOT SKBUILD) - project(cunumeric VERSION ${cunumeric_version} LANGUAGES C CXX) - include(cunumeric_cpp.cmake) + project(cupynumeric VERSION ${cupynumeric_version} LANGUAGES C CXX) + include(cupynumeric_cpp.cmake) else() project( - cunumeric_python - VERSION ${cunumeric_version} + cupynumeric_python + VERSION ${cupynumeric_version} LANGUAGES # TODO: Building Python extension modules via the python_extension_module requires the C # language to be enabled here. The test project that is built in scikit-build to verify # various linking options for the python library is hardcoded to build with C, so until # that is fixed we need to keep C. C CXX) - include(cunumeric_python.cmake) + include(cupynumeric_python.cmake) endif() if(CMAKE_GENERATOR STREQUAL "Ninja") - function(add_touch_cunumeric_ninja_build_target) + function(add_touch_cupynumeric_ninja_build_target) set(_suf ) if(SKBUILD) set(_suf "_python") endif() - add_custom_target("touch_cunumeric${_suf}_ninja_build" ALL + add_custom_target("touch_cupynumeric${_suf}_ninja_build" ALL COMMAND ${CMAKE_COMMAND} -E touch_nocreate "${CMAKE_CURRENT_BINARY_DIR}/build.ninja" COMMENT "touch build.ninja so ninja doesn't re-run CMake on rebuild" VERBATIM ) - foreach(_dep IN ITEMS cunumeric cunumeric_python - legion_core legion_core_python + foreach(_dep IN ITEMS cupynumeric cupynumeric_python + legate legate_python Legion LegionRuntime Realm RealmRuntime Regent) if(TARGET ${_dep}) - add_dependencies("touch_cunumeric${_suf}_ninja_build" ${_dep}) + add_dependencies("touch_cupynumeric${_suf}_ninja_build" ${_dep}) endif() endforeach() endfunction() - add_touch_cunumeric_ninja_build_target() + add_touch_cupynumeric_ninja_build_target() endif() diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index e083cc3c0..b4ac11a6a 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -1,16 +1,16 @@ -# Contributing to cuNumeric +# Contributing to cuPyNumeric -CuNumeric is an open-source project released under the [Apache license, version 2.0](https://www.apache.org/licenses/LICENSE-2.0). We welcome any and all contributions, and we hope that you can help us develop a strong community. +CuPyNumeric is an open-source project released under the [Apache license, version 2.0](https://www.apache.org/licenses/LICENSE-2.0). We welcome any and all contributions, and we hope that you can help us develop a strong community. ## How to begin -Most of the time, the best thing is to begin by [opening an issue](https://github.com/nv-legate/cunumeric/issues). This gives us a chance to discuss the contribution and to define the problem or feature that it addresses. Often, opening of the issue first may help prevent you from doing unnecessary work or to enhance and further develop your idea. +Most of the time, the best thing is to begin by [opening an issue](https://github.com/nv-legate/cupynumeric/issues). This gives us a chance to discuss the contribution and to define the problem or feature that it addresses. Often, opening of the issue first may help prevent you from doing unnecessary work or to enhance and further develop your idea. Once you are ready to start development, we ask you to work on a [fork](https://docs.github.com/en/get-started/quickstart/fork-a-repo) of our repository. The next step is to create a (pull request)[https://help.github.com/en/articles/about-pull-requests]. Feel free to open the pull request as soon as you begin your development (just mark it [as a draft](https://github.blog/2019-02-14-introducing-draft-pull-requests/)) or when you are ready to have your contribution merged. ## The Legalese: Developer Certificate of Origin -CuNumeric is released under the open-source [Apache license, version 2.0](https://www.apache.org/licenses/LICENSE-2.0), and is free to use, modify, and redistribute. To ensure that the license can be exercised without encumbrance, we ask you that you only contribute your own work or work to which you have the intellectual rights. To that end, we employ the Developer's Certificate of Origin (DCO), which is the lightweight mechanism for you to certify that you are legally able to make your contribution. Here is the full text of the certificate (also available at [DeveloperCertificate.org](https://developercertificate.org/): +CuPyNumeric is released under the open-source [Apache license, version 2.0](https://www.apache.org/licenses/LICENSE-2.0), and is free to use, modify, and redistribute. To ensure that the license can be exercised without encumbrance, we ask you that you only contribute your own work or work to which you have the intellectual rights. To that end, we employ the Developer's Certificate of Origin (DCO), which is the lightweight mechanism for you to certify that you are legally able to make your contribution. Here is the full text of the certificate (also available at [DeveloperCertificate.org](https://developercertificate.org/): ```` Developer Certificate of Origin @@ -61,12 +61,12 @@ Please use your real name and a valid email address at which you can be reached. ## Review Process -We are really grateful that you are thinking of contributing to cuNumeric. We will make every effort to review your contributions as soon as possible. +We are really grateful that you are thinking of contributing to cuPyNumeric. We will make every effort to review your contributions as soon as possible. As we suggested at the beginning of this document, it will be really helpful to start with an issue unless your proposed change is really trivial. An issue will help to save work in the review process (e.g., maybe somebody is already working on exactly the same thing you want to work on). After you open your pull request (PR), there usually will be a community feedback that often will require further changes to your contribution (the usual open-source process). Usually, this will conclude in the PR being merged by a maintainer, but on rare occasions a PR may be rejected. This may happen, for example, if the PR appears abandoned (no response to the community feedback) or if the PR does not seem to be approaching community acceptance in a reasonable time frame. In any case, an explanation will always be given why a PR is closed. Even if a PR is closed for some reason, it may always be reopened if the situation evolves (feel free to comment on closed PRs to discuss reopening them). ## Code Formatting Requirements -CuNumeric has a set of coding standards that are expected from all the code merged into the project. The coding standards are defined by the set of tools we use to format our code. We use the [pre-commit](https://pre-commit.com/) framework to run our formatting tools. The easiest way to meet the coding standards is to simply use the pre-commit framework to run all the checks for you. Please visit the [pre-commit project page](https://pre-commit.com/) for pre-commit installation and usage instructions. Once pre-commit is installed in the cuNumeric repo, all the checks and formatting will be run on every commit, but one can also run the checks explicitly as detailed in pre-commit documentation. +CuPyNumeric has a set of coding standards that are expected from all the code merged into the project. The coding standards are defined by the set of tools we use to format our code. We use the [pre-commit](https://pre-commit.com/) framework to run our formatting tools. The easiest way to meet the coding standards is to simply use the pre-commit framework to run all the checks for you. Please visit the [pre-commit project page](https://pre-commit.com/) for pre-commit installation and usage instructions. Once pre-commit is installed in the cuPyNumeric repo, all the checks and formatting will be run on every commit, but one can also run the checks explicitly as detailed in pre-commit documentation. We hope that the automation of our formatting checks will make it easy to comply with our coding standards. If you encounter problems with code formatting, however, please let us know in a comment on your PR, and we will do our best to help. diff --git a/LICENSES_bundled.txt b/LICENSES_bundled.txt deleted file mode 100644 index d18691fd7..000000000 --- a/LICENSES_bundled.txt +++ /dev/null @@ -1,39 +0,0 @@ -The cuNumeric repository and source distributions bundle several libraries that are -compatibly licensed. We list these here. - - -Name: Cephes -Files: src/cunumeric/cephes/* -License: 3-clause BSD - Distributed under 3-clause BSD license with permission from the author, - see https://lists.debian.org/debian-legal/2004/12/msg00295.html - - Cephes Math Library Release 2.8: June, 2000 - Copyright 1984, 1995, 2000 by Stephen L. Moshier - - This software is derived from the Cephes Math Library and is - incorporated herein by permission of the author. - - All rights reserved. - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions are met: - * Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - * Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - * Neither the name of the nor the - names of its contributors may be used to endorse or promote products - derived from this software without specific prior written permission. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND - ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED - WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE - DISCLAIMED. IN NO EVENT SHALL BE LIABLE FOR ANY - DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES - (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND - ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/MANIFEST.in b/MANIFEST.in index 8f77ed200..3eb2279b7 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -1,3 +1,3 @@ include versioneer.py -include cunumeric/_version.py -include cunumeric/py.typed +include cupynumeric/_version.py +include cupynumeric/py.typed diff --git a/README.md b/README.md index cec00b052..945124602 100644 --- a/README.md +++ b/README.md @@ -1,5 +1,5 @@ -# cuNumeric +# cuPyNumeric -cuNumeric is a [Legate](https://github.com/nv-legate/legate.core) library +cuPyNumeric is a [Legate](https://github.com/nv-legate/legate.core) library that aims to provide a distributed and accelerated drop-in replacement for the [NumPy API](https://numpy.org/doc/stable/reference/) on top of the -[Legion](https://legion.stanford.edu) runtime. Using cuNumeric you do things like run +[Legion](https://legion.stanford.edu) runtime. Using cuPyNumeric you can do things like run [the final example of the Python CFD course](https://github.com/barbagroup/CFDPython/blob/master/lessons/15_Step_12.ipynb) -completely unmodified on 2048 A100 GPUs in a [DGX SuperPOD](https://www.nvidia.com/en-us/data-center/dgx-superpod/) and achieve good weak scaling. +completely unmodified on 2048 A100 GPUs in a +[DGX SuperPOD](https://www.nvidia.com/en-us/data-center/dgx-superpod/) +and achieve good weak scaling. drawing -cuNumeric works best for programs that have very large arrays of data +cuPyNumeric works best for programs that have very large arrays of data that cannot fit in the memory of a single GPU or a single node and need to span multiple nodes and GPUs. While our implementation of the current NumPy API is still incomplete, programs that use unimplemented features will still work (assuming enough memory) by falling back to the canonical NumPy implementation. -If you have questions, please contact us at legate(at)nvidia.com. - ## Installation -cuNumeric is available [on conda](https://anaconda.org/legate/cunumeric). -Create a new environment containing cuNumeric: - -``` -mamba create -n myenv -c nvidia -c conda-forge -c legate cunumeric -``` - -or install it into an existing environment: - -``` -mamba install -c nvidia -c conda-forge -c legate cunumeric -``` - -Only linux-64 packages are available at the moment. - -The default package contains GPU support, and is compatible with CUDA >= 11.8 -(CUDA driver version >= r520), and Volta or later GPU architectures. There are -also CPU-only packages available, and will be automatically selected when -installing on a machine without GPUs. You can force installation of a CPU-only -package by requesting it as follows: - -``` -mamba ... cunumeric=*=*_cpu -``` - -See the build instructions at https://nv-legate.github.io/cunumeric for details -about building cuNumeric from source. - -## Usage and Execution - -Using cuNumeric as a replacement for NumPy is easy. Users only need -to replace: - -``` -import numpy as np -``` - -with: - -``` -import cunumeric as np -``` - -These programs can then be run by the Legate driver script described in the -[Legate Core](https://github.com/nv-legate/legate.core) documentation. - -``` -legate cunumeric_program.py -``` - -For execution with multiple nodes (assuming Legate Core is installed with networking support) -users can supply the `--nodes` option. For execution with GPUs, users can use the -`--gpus` flags to specify the number of GPUs to use per node. We encourage all users -to familiarize themselves with these resource flags as described in the Legate Core -documentation or simply by passing `--help` to the `legate` driver script. - -You can use `test.py` to run the test suite. Invoke the script directly or through -standard `python`; the script will invoke the `legate` driver script internally. -Check out `test.py --help` for further options. - -## Supported and Planned Features - -cuNumeric is currently a work in progress and we are gradually adding support for -additional NumPy operators. Unsupported NumPy operations will provide a -warning that we are falling back to canonical NumPy. Please report unimplemented -features that are necessary for attaining good performance so that we can triage -them and prioritize implementation appropriately. The more users that report an -unimplemented feature, the more we will prioritize it. Please include a pointer -to your code if possible too so we can see how you are using the feature in context. - -## Supported Types and Dimensions - -cuNumeric currently supports the following NumPy types: `float16`, `float32`, -`float64`, `int16`, `int32`, `int64`, `uint16`, `uint32`, `uint64`, `bool`, -`complex64`, and `complex128`. - -cuNumeric supports up to 4D arrays by default, you can adjust this setting by -installing legate.core with a larger `--max-dim`. +cuPyNumeric is available from [conda](https://docs.conda.io/projects/conda/en/latest/index.html) +on the [legate channel](https://anaconda.org/legate/cupynumeric). +See https://docs.nvidia.com/cupynumeric/latest/installation.html for +details about different install configurations, or building +cuPyNumeric from source. ## Documentation -The cuNumeric documentation can be found -[here](https://nv-legate.github.io/cunumeric). +The cuPyNumeric documentation can be found +[here](https://docs.nvidia.com/cupynumeric). -## Future Directions +## Contributing -There are three primary directions that we plan to investigate -with cuNumeric going forward: +See the discussion on contributing in [CONTRIBUTING.md](CONTRIBUTING.md). -* More features: we plan to identify a few key lighthouse applications - and use the demands of these applications to drive the addition of - new features to cuNumeric. -* We plan to add support for sharded file I/O for loading and - storing large data sets that could never be loaded on a single node. - Initially this will begin with native support for hdf5 and zarr, - but will grow to accommodate other formats needed by our lighthouse - applications. -* Strong scaling: while cuNumeric is currently implemented in a way that - enables weak scaling of codes on larger data sets, we would also like - to make it possible to strong-scale Legate applications for a single - problem size. This will require leveraging some of the more advanced - features of Legion from inside the Python interpreter. +## Contact -We are open to comments, suggestions, and ideas. +For technical questions about cuPyNumeric and Legate-based tools, please visit +the [community discussion forum](https://github.com/nv-legate/discussion). -## Contributing +If you have other questions, please contact us at legate(at)nvidia.com. -See the discussion of contributing in [CONTRIBUTING.md](CONTRIBUTING.md). - -## Known Issues - - * When using certain operations with high scratch space requirements (e.g. - `einsum` or `convolve`) you might run into the following error: - ``` - LEGION ERROR: Failed to allocate DeferredBuffer/Value/Reduction in task [some task] because [some memory] is full. This is an eager allocation ... - ``` - Currently, Legion splits its memory reservations between two pools: the - "deferred" pool, used for allocating cuNumeric `ndarray`s, and the "eager" - pool, used for allocating scratch memory for operations. The above error - message signifies that not enough memory was available for an operation's - scratch space requirements. You can work around this by allocating more - memory overall to cuNumeric (e.g. adjusting `--sysmem`, `--numamem` or - `--fbmem`), and/or by adjusting the split between the two pools (e.g. by - passing `-lg:eager_alloc_percentage 60` on the command line to allocate 60% - of memory to the eager pool, up from the default of 50%). - * cuNumeric can exercise a bug in OpenBLAS when it is run with - [multiple OpenMP processors](https://github.com/xianyi/OpenBLAS/issues/2146) - * On Mac OSX, cuNumeric can trigger a bug in Apple's implementation of libc++. - The [bug](https://bugs.llvm.org/show_bug.cgi?id=43764) has since been fixed but - likely will not show up on most Apple machines for quite some time. You may have - to manually patch your implementation of libc++. If you have trouble doing this - please contact us and we will be able to help you. +## Note +*This project, i.e., cuPyNumeric, is separate and independent of the CuPy project. CuPy is a registered trademark of Preferred Networks.* diff --git a/cmake/Modules/cpm_helpers.cmake b/cmake/Modules/cpm_helpers.cmake index 9fc28633d..acdb5d0d5 100644 --- a/cmake/Modules/cpm_helpers.cmake +++ b/cmake/Modules/cpm_helpers.cmake @@ -1,5 +1,5 @@ #============================================================================= -# Copyright 2022 NVIDIA Corporation +# Copyright 2024 NVIDIA Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -40,7 +40,7 @@ function(get_cpm_git_args _out_var) endif() if(GIT_REPOSITORY MATCHES "github\.com") # If retrieving from github use `.zip` URL to download faster - set(cpm_git_args URL "${GIT_REPOSITORY}/archive/refs/${gh_tag_prefix}/${repo_tag}.zip") + set(cpm_git_args URL "${GIT_REPOSITORY}/archive/${repo_tag}.zip") elseif(GIT_REPOSITORY MATCHES "gitlab\.com") # GitLab archive URIs replace slashes with dashes string(REPLACE "/" "-" archive_tag "${repo_tag}") diff --git a/cmake/Modules/cuda_arch_helpers.cmake b/cmake/Modules/cuda_arch_helpers.cmake index 9a2206f69..01a192a43 100644 --- a/cmake/Modules/cuda_arch_helpers.cmake +++ b/cmake/Modules/cuda_arch_helpers.cmake @@ -1,5 +1,5 @@ #============================================================================= -# Copyright 2022 NVIDIA Corporation +# Copyright 2024 NVIDIA Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -44,6 +44,9 @@ function(set_cuda_arch_from_names) if(CMAKE_CUDA_ARCHITECTURES MATCHES "ampere") list(APPEND cuda_archs 80) endif() + if(CMAKE_CUDA_ARCHITECTURES MATCHES "ada") + list(APPEND cuda_archs 89) + endif() if(CMAKE_CUDA_ARCHITECTURES MATCHES "hopper") list(APPEND cuda_archs 90) endif() @@ -65,7 +68,7 @@ function(set_cuda_arch_from_names) endfunction() function(add_cuda_architecture_defines defs) - message(VERBOSE "legate.core: CMAKE_CUDA_ARCHITECTURES=${CMAKE_CUDA_ARCHITECTURES}") + message(VERBOSE "legate: CMAKE_CUDA_ARCHITECTURES=${CMAKE_CUDA_ARCHITECTURES}") set(_defs ${${defs}}) @@ -86,6 +89,7 @@ function(add_cuda_architecture_defines defs) add_def_if_arch_enabled("70" "VOLTA_ARCH") add_def_if_arch_enabled("75" "TURING_ARCH") add_def_if_arch_enabled("80" "AMPERE_ARCH") + add_def_if_arch_enabled("89" "ADA_ARCH") add_def_if_arch_enabled("90" "HOPPER_ARCH") set(${defs} ${_defs} PARENT_SCOPE) diff --git a/cmake/Modules/set_cpu_arch_flags.cmake b/cmake/Modules/set_cpu_arch_flags.cmake index ff3e35ca3..3ee6953e8 100644 --- a/cmake/Modules/set_cpu_arch_flags.cmake +++ b/cmake/Modules/set_cpu_arch_flags.cmake @@ -1,5 +1,5 @@ #============================================================================= -# Copyright 2022 NVIDIA Corporation +# Copyright 2024 NVIDIA Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/cmake/generate_install_info_py.cmake b/cmake/generate_install_info_py.cmake index 2fb14cbcb..724640cbb 100644 --- a/cmake/generate_install_info_py.cmake +++ b/cmake/generate_install_info_py.cmake @@ -1,5 +1,5 @@ #============================================================================= -# Copyright 2022 NVIDIA Corporation +# Copyright 2024 NVIDIA Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -17,8 +17,8 @@ execute_process( COMMAND ${CMAKE_C_COMPILER} -E -DLEGATE_USE_PYTHON_CFFI - -I "${CMAKE_CURRENT_LIST_DIR}/../src/cunumeric" - -P "${CMAKE_CURRENT_LIST_DIR}/../src/cunumeric/cunumeric_c.h" + -I "${CMAKE_CURRENT_LIST_DIR}/../src/cupynumeric" + -P "${CMAKE_CURRENT_LIST_DIR}/../src/cupynumeric/cupynumeric_c.h" ECHO_ERROR_VARIABLE OUTPUT_VARIABLE header COMMAND_ERROR_IS_FATAL ANY @@ -26,6 +26,6 @@ execute_process( set(libpath "") configure_file( - "${CMAKE_CURRENT_LIST_DIR}/../cunumeric/install_info.py.in" - "${CMAKE_CURRENT_LIST_DIR}/../cunumeric/install_info.py" + "${CMAKE_CURRENT_LIST_DIR}/../cupynumeric/install_info.py.in" + "${CMAKE_CURRENT_LIST_DIR}/../cupynumeric/install_info.py" @ONLY) diff --git a/cmake/thirdparty/get_cutensor.cmake b/cmake/thirdparty/get_cutensor.cmake index e1829d429..d271d8a7c 100644 --- a/cmake/thirdparty/get_cutensor.cmake +++ b/cmake/thirdparty/get_cutensor.cmake @@ -1,5 +1,5 @@ #============================================================================= -# Copyright 2022 NVIDIA Corporation +# Copyright 2024 NVIDIA Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/cmake/thirdparty/get_legate.cmake b/cmake/thirdparty/get_legate.cmake new file mode 100644 index 000000000..68db8207b --- /dev/null +++ b/cmake/thirdparty/get_legate.cmake @@ -0,0 +1,104 @@ +#============================================================================= +# Copyright 2024 NVIDIA Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +#============================================================================= + +function(find_or_configure_legate) + set(oneValueArgs VERSION REPOSITORY BRANCH EXCLUDE_FROM_ALL) + cmake_parse_arguments(PKG "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN}) + + include("${rapids-cmake-dir}/export/detail/parse_version.cmake") + rapids_export_parse_version(${PKG_VERSION} legate PKG_VERSION) + + include("${rapids-cmake-dir}/cpm/detail/package_details.cmake") + rapids_cpm_package_details(legate version git_repo git_branch shallow exclude_from_all) + + set(version ${PKG_VERSION}) + set(exclude_from_all ${PKG_EXCLUDE_FROM_ALL}) + if(PKG_BRANCH) + set(git_branch "${PKG_BRANCH}") + endif() + if(PKG_REPOSITORY) + set(git_repo "${PKG_REPOSITORY}") + endif() + + set(FIND_PKG_ARGS + GLOBAL_TARGETS legate::legate + BUILD_EXPORT_SET cupynumeric-exports + INSTALL_EXPORT_SET cupynumeric-exports) + + # First try to find legate via find_package() + # so the `Legion_USE_*` variables are visible + # Use QUIET find by default. + set(_find_mode QUIET) + # If legate_DIR/legate_ROOT are defined as something other than empty or NOTFOUND + # use a REQUIRED find so that the build does not silently download legate. + if(legate_DIR OR legate_ROOT) + set(_find_mode REQUIRED) + endif() + rapids_find_package(legate ${version} EXACT CONFIG ${_find_mode} ${FIND_PKG_ARGS}) + + if(legate_FOUND) + message(STATUS "CPM: using local package legate@${version}") + else() + include(${CMAKE_CURRENT_SOURCE_DIR}/cmake/Modules/cpm_helpers.cmake) + get_cpm_git_args(legate_cpm_git_args REPOSITORY ${git_repo} BRANCH ${git_branch}) + + message(VERBOSE "cupynumeric: legate version: ${version}") + message(VERBOSE "cupynumeric: legate git_repo: ${git_repo}") + message(VERBOSE "cupynumeric: legate git_branch: ${git_branch}") + message(VERBOSE "cupynumeric: legate exclude_from_all: ${exclude_from_all}") + message(VERBOSE "cupynumeric: legate legate_cpm_git_args: ${legate_cpm_git_args}") + + rapids_cpm_find(legate ${version} ${FIND_PKG_ARGS} + CPM_ARGS + ${legate_cpm_git_args} + FIND_PACKAGE_ARGUMENTS EXACT + EXCLUDE_FROM_ALL ${exclude_from_all} + ) + endif() + + set(Legion_USE_CUDA ${Legion_USE_CUDA} PARENT_SCOPE) + set(Legion_USE_OpenMP ${Legion_USE_OpenMP} PARENT_SCOPE) + set(Legion_BOUNDS_CHECKS ${Legion_BOUNDS_CHECKS} PARENT_SCOPE) + + message(VERBOSE "Legion_USE_CUDA=${Legion_USE_CUDA}") + message(VERBOSE "Legion_USE_OpenMP=${Legion_USE_OpenMP}") + message(VERBOSE "Legion_BOUNDS_CHECKS=${Legion_BOUNDS_CHECKS}") +endfunction() + +foreach(_var IN ITEMS "cupynumeric_LEGATE_VERSION" + "cupynumeric_LEGATE_BRANCH" + "cupynumeric_LEGATE_REPOSITORY" + "cupynumeric_EXCLUDE_LEGATE_FROM_ALL") + if(DEFINED ${_var}) + # Create a cupynumeric_LEGATE_BRANCH variable in the current scope either from the existing + # current-scope variable, or the cache variable. + set(${_var} "${${_var}}") + # Remove cupynumeric_LEGATE_BRANCH from the CMakeCache.txt. This ensures reconfiguring the same + # build dir without passing `-Dcupynumeric_LEGATE_BRANCH=` reverts to the value in versions.json + # instead of reusing the previous `-Dcupynumeric_LEGATE_BRANCH=` value. + unset(${_var} CACHE) + endif() +endforeach() + +if(NOT DEFINED cupynumeric_LEGATE_VERSION) + set(cupynumeric_LEGATE_VERSION "${cupynumeric_VERSION}") +endif() + +find_or_configure_legate(VERSION ${cupynumeric_LEGATE_VERSION} + REPOSITORY ${cupynumeric_LEGATE_REPOSITORY} + BRANCH ${cupynumeric_LEGATE_BRANCH} + EXCLUDE_FROM_ALL ${cupynumeric_EXCLUDE_LEGATE_FROM_ALL} +) diff --git a/cmake/thirdparty/get_legate_core.cmake b/cmake/thirdparty/get_legate_core.cmake deleted file mode 100644 index 6bd7dea04..000000000 --- a/cmake/thirdparty/get_legate_core.cmake +++ /dev/null @@ -1,103 +0,0 @@ -#============================================================================= -# Copyright 2022-2023 NVIDIA Corporation -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -#============================================================================= - -function(find_or_configure_legate_core) - set(oneValueArgs VERSION REPOSITORY BRANCH EXCLUDE_FROM_ALL) - cmake_parse_arguments(PKG "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN}) - - include("${rapids-cmake-dir}/export/detail/parse_version.cmake") - rapids_export_parse_version(${PKG_VERSION} legate_core PKG_VERSION) - - include("${rapids-cmake-dir}/cpm/detail/package_details.cmake") - rapids_cpm_package_details(legate_core version git_repo git_branch shallow exclude_from_all) - - set(version ${PKG_VERSION}) - set(exclude_from_all ${PKG_EXCLUDE_FROM_ALL}) - if(PKG_BRANCH) - set(git_branch "${PKG_BRANCH}") - endif() - if(PKG_REPOSITORY) - set(git_repo "${PKG_REPOSITORY}") - endif() - - set(FIND_PKG_ARGS - GLOBAL_TARGETS legate::core - BUILD_EXPORT_SET cunumeric-exports - INSTALL_EXPORT_SET cunumeric-exports) - - # First try to find legate_core via find_package() - # so the `Legion_USE_*` variables are visible - # Use QUIET find by default. - set(_find_mode QUIET) - # If legate_core_DIR/legate_core_ROOT are defined as something other than empty or NOTFOUND - # use a REQUIRED find so that the build does not silently download legate.core. - if(legate_core_DIR OR legate_core_ROOT) - set(_find_mode REQUIRED) - endif() - rapids_find_package(legate_core ${version} EXACT CONFIG ${_find_mode} ${FIND_PKG_ARGS}) - - if(legate_core_FOUND) - message(STATUS "CPM: using local package legate_core@${version}") - else() - include(${CMAKE_CURRENT_SOURCE_DIR}/cmake/Modules/cpm_helpers.cmake) - get_cpm_git_args(legate_core_cpm_git_args REPOSITORY ${git_repo} BRANCH ${git_branch}) - - message(VERBOSE "cunumeric: legate.core version: ${version}") - message(VERBOSE "cunumeric: legate.core git_repo: ${git_repo}") - message(VERBOSE "cunumeric: legate.core git_branch: ${git_branch}") - message(VERBOSE "cunumeric: legate.core exclude_from_all: ${exclude_from_all}") - - rapids_cpm_find(legate_core ${version} ${FIND_PKG_ARGS} - CPM_ARGS - ${legate_core_cpm_git_args} - FIND_PACKAGE_ARGUMENTS EXACT - EXCLUDE_FROM_ALL ${exclude_from_all} - ) - endif() - - set(Legion_USE_CUDA ${Legion_USE_CUDA} PARENT_SCOPE) - set(Legion_USE_OpenMP ${Legion_USE_OpenMP} PARENT_SCOPE) - set(Legion_BOUNDS_CHECKS ${Legion_BOUNDS_CHECKS} PARENT_SCOPE) - - message(VERBOSE "Legion_USE_CUDA=${Legion_USE_CUDA}") - message(VERBOSE "Legion_USE_OpenMP=${Legion_USE_OpenMP}") - message(VERBOSE "Legion_BOUNDS_CHECKS=${Legion_BOUNDS_CHECKS}") -endfunction() - -foreach(_var IN ITEMS "cunumeric_LEGATE_CORE_VERSION" - "cunumeric_LEGATE_CORE_BRANCH" - "cunumeric_LEGATE_CORE_REPOSITORY" - "cunumeric_EXCLUDE_LEGATE_CORE_FROM_ALL") - if(DEFINED ${_var}) - # Create a cunumeric_LEGATE_CORE_BRANCH variable in the current scope either from the existing - # current-scope variable, or the cache variable. - set(${_var} "${${_var}}") - # Remove cunumeric_LEGATE_CORE_BRANCH from the CMakeCache.txt. This ensures reconfiguring the same - # build dir without passing `-Dcunumeric_LEGATE_CORE_BRANCH=` reverts to the value in versions.json - # instead of reusing the previous `-Dcunumeric_LEGATE_CORE_BRANCH=` value. - unset(${_var} CACHE) - endif() -endforeach() - -if(NOT DEFINED cunumeric_LEGATE_CORE_VERSION) - set(cunumeric_LEGATE_CORE_VERSION "${cunumeric_VERSION}") -endif() - -find_or_configure_legate_core(VERSION ${cunumeric_LEGATE_CORE_VERSION} - REPOSITORY ${cunumeric_LEGATE_CORE_REPOSITORY} - BRANCH ${cunumeric_LEGATE_CORE_BRANCH} - EXCLUDE_FROM_ALL ${cunumeric_EXCLUDE_LEGATE_CORE_FROM_ALL} -) diff --git a/cmake/thirdparty/get_nccl.cmake b/cmake/thirdparty/get_nccl.cmake index 1aee52b6f..3208de846 100644 --- a/cmake/thirdparty/get_nccl.cmake +++ b/cmake/thirdparty/get_nccl.cmake @@ -1,5 +1,5 @@ #============================================================================= -# Copyright 2022 NVIDIA Corporation +# Copyright 2024 NVIDIA Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/cmake/thirdparty/get_openblas.cmake b/cmake/thirdparty/get_openblas.cmake index 82bef489d..aa7030ca5 100644 --- a/cmake/thirdparty/get_openblas.cmake +++ b/cmake/thirdparty/get_openblas.cmake @@ -1,5 +1,5 @@ #============================================================================= -# Copyright 2022 NVIDIA Corporation +# Copyright 2024 NVIDIA Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -22,7 +22,7 @@ function(find_or_configure_OpenBLAS) set(BLAS_name "OpenBLAS") set(BLAS_target "openblas") - # cuNumeric presently requires OpenBLAS + # cuPyNumeric presently requires OpenBLAS set(BLA_VENDOR OpenBLAS) # TODO: should we find (or build) 64-bit BLAS? @@ -35,8 +35,8 @@ function(find_or_configure_OpenBLAS) set(FIND_PKG_ARGS ${PKG_VERSION} GLOBAL_TARGETS ${BLAS_target} - BUILD_EXPORT_SET cunumeric-exports - INSTALL_EXPORT_SET cunumeric-exports) + BUILD_EXPORT_SET cupynumeric-exports + INSTALL_EXPORT_SET cupynumeric-exports) include(${CMAKE_CURRENT_SOURCE_DIR}/cmake/Modules/cpm_helpers.cmake) if(PKG_BRANCH) @@ -105,35 +105,35 @@ function(find_or_configure_OpenBLAS) FINAL_CODE_BLOCK code_string) # Do `CPMFindPackage(BLAS)` in build dir - rapids_export_package(BUILD BLAS cunumeric-exports + rapids_export_package(BUILD BLAS cupynumeric-exports VERSION ${PKG_VERSION} GLOBAL_TARGETS ${BLAS_target}) # Tell cmake where it can find the generated blas-config.cmake include("${rapids-cmake-dir}/export/find_package_root.cmake") - rapids_export_find_package_root(BUILD BLAS [=[${CMAKE_CURRENT_LIST_DIR}]=] cunumeric-exports) + rapids_export_find_package_root(BUILD BLAS [=[${CMAKE_CURRENT_LIST_DIR}]=] EXPORT_SET cupynumeric-exports) endif() endfunction() -if(NOT DEFINED cunumeric_OPENBLAS_VERSION) +if(NOT DEFINED cupynumeric_OPENBLAS_VERSION) # Before v0.3.18, OpenBLAS's throws CMake errors when configuring - set(cunumeric_OPENBLAS_VERSION "0.3.20") + set(cupynumeric_OPENBLAS_VERSION "0.3.20") endif() -if(NOT DEFINED cunumeric_OPENBLAS_BRANCH) - set(cunumeric_OPENBLAS_BRANCH "") +if(NOT DEFINED cupynumeric_OPENBLAS_BRANCH) + set(cupynumeric_OPENBLAS_BRANCH "") endif() -if(NOT DEFINED cunumeric_OPENBLAS_TAG) - set(cunumeric_OPENBLAS_TAG v${cunumeric_OPENBLAS_VERSION}) +if(NOT DEFINED cupynumeric_OPENBLAS_TAG) + set(cupynumeric_OPENBLAS_TAG v${cupynumeric_OPENBLAS_VERSION}) endif() -if(NOT DEFINED cunumeric_OPENBLAS_REPOSITORY) - set(cunumeric_OPENBLAS_REPOSITORY https://github.com/xianyi/OpenBLAS.git) +if(NOT DEFINED cupynumeric_OPENBLAS_REPOSITORY) + set(cupynumeric_OPENBLAS_REPOSITORY https://github.com/xianyi/OpenBLAS.git) endif() -find_or_configure_OpenBLAS(VERSION ${cunumeric_OPENBLAS_VERSION} - REPOSITORY ${cunumeric_OPENBLAS_REPOSITORY} - BRANCH ${cunumeric_OPENBLAS_BRANCH} - PINNED_TAG ${cunumeric_OPENBLAS_TAG} - EXCLUDE_FROM_ALL ${cunumeric_EXCLUDE_OPENBLAS_FROM_ALL} +find_or_configure_OpenBLAS(VERSION ${cupynumeric_OPENBLAS_VERSION} + REPOSITORY ${cupynumeric_OPENBLAS_REPOSITORY} + BRANCH ${cupynumeric_OPENBLAS_BRANCH} + PINNED_TAG ${cupynumeric_OPENBLAS_TAG} + EXCLUDE_FROM_ALL ${cupynumeric_EXCLUDE_OPENBLAS_FROM_ALL} ) diff --git a/cmake/thirdparty/get_tblis.cmake b/cmake/thirdparty/get_tblis.cmake index b02afbd7d..164923601 100644 --- a/cmake/thirdparty/get_tblis.cmake +++ b/cmake/thirdparty/get_tblis.cmake @@ -1,5 +1,5 @@ #============================================================================= -# Copyright 2022 NVIDIA Corporation +# Copyright 2024 NVIDIA Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -34,14 +34,14 @@ function(find_or_configure_tblis) HEADER_NAMES "tblis/tblis.h" LIBRARY_NAMES "libtblis${lib_suffix}" NO_CONFIG - BUILD_EXPORT_SET cunumeric-exports - INSTALL_EXPORT_SET cunumeric-exports + BUILD_EXPORT_SET cupynumeric-exports + INSTALL_EXPORT_SET cupynumeric-exports ) rapids_cpm_find(tblis ${PKG_VERSION} GLOBAL_TARGETS tblis::tblis - BUILD_EXPORT_SET cunumeric-exports - INSTALL_EXPORT_SET cunumeric-exports + BUILD_EXPORT_SET cupynumeric-exports + INSTALL_EXPORT_SET cupynumeric-exports CPM_ARGS ${tblis_cpm_git_args} EXCLUDE_FROM_ALL ${PKG_EXCLUDE_FROM_ALL} @@ -95,8 +95,8 @@ function(find_or_configure_tblis) set(ENV{CC} "${_CC}") set(ENV{CXX} "${_CXX}") - message(VERBOSE "cunumeric: ENV{CC}=\"$ENV{CC}\"") - message(VERBOSE "cunumeric: ENV{CXX}=\"$ENV{CXX}\"") + message(VERBOSE "cupynumeric: ENV{CC}=\"$ENV{CC}\"") + message(VERBOSE "cupynumeric: ENV{CXX}=\"$ENV{CXX}\"") set(tblis_verbosity "--enable-silent-rules") if(CMAKE_VERSION VERSION_GREATER_EQUAL "3.25") @@ -167,20 +167,20 @@ function(find_or_configure_tblis) endif() set(tblis_BINARY_DIR ${tblis_BINARY_DIR} PARENT_SCOPE) - set(cunumeric_INSTALL_TBLIS ${should_build_tblis} PARENT_SCOPE) + set(cupynumeric_INSTALL_TBLIS ${should_build_tblis} PARENT_SCOPE) endfunction() -if(NOT DEFINED cunumeric_TBLIS_BRANCH) - set(cunumeric_TBLIS_BRANCH master) +if(NOT DEFINED cupynumeric_TBLIS_BRANCH) + set(cupynumeric_TBLIS_BRANCH arm-build) endif() -if(NOT DEFINED cunumeric_TBLIS_REPOSITORY) - set(cunumeric_TBLIS_REPOSITORY https://github.com/devinamatthews/tblis.git) +if(NOT DEFINED cupynumeric_TBLIS_REPOSITORY) + set(cupynumeric_TBLIS_REPOSITORY https://github.com/nv-legate/tblis.git) endif() find_or_configure_tblis(VERSION 1.2.0 - REPOSITORY ${cunumeric_TBLIS_REPOSITORY} - BRANCH ${cunumeric_TBLIS_BRANCH} - EXCLUDE_FROM_ALL ${cunumeric_EXCLUDE_TBLIS_FROM_ALL} + REPOSITORY ${cupynumeric_TBLIS_REPOSITORY} + BRANCH ${cupynumeric_TBLIS_BRANCH} + EXCLUDE_FROM_ALL ${cupynumeric_EXCLUDE_TBLIS_FROM_ALL} USE_OPENMP ${Legion_USE_OpenMP} ) diff --git a/cmake/versions.json b/cmake/versions.json index 80128714e..b96665e2c 100644 --- a/cmake/versions.json +++ b/cmake/versions.json @@ -1,11 +1,16 @@ { "packages" : { - "legate_core" : { - "version": "24.01.00", - "git_url" : "https://github.com/nv-legate/legate.core.git", + "legate" : { + "repo": "legate.core.internal", + "artifact_name": "${{ inputs.platform }}-${{ inputs.build-type }}-<>-python${{ inputs.python-version }}-${{ inputs.target-device }}-release-with_tests-${{ inputs.network }}-<>", + "org": "nv-legate", + "artifact_workflow": "ci-gh.yml", + "nightly_workflow": "ci-gh-nightly-release.yml", + "version": "24.11.00", + "git_url" : "git@github.com:nv-legate/legate.core.internal.git", "git_shallow": false, "always_download": false, - "git_tag" : "b84e86e9e3518d7102cdf531b5b733dc09c0e8d9" + "git_tag" : "583cbc0bae357de2eb88b57a88e911750b82ca4f" } } } diff --git a/conda/conda-build/build.sh b/conda/conda-build/build.sh index b78740090..2a7b6589f 100644 --- a/conda/conda-build/build.sh +++ b/conda/conda-build/build.sh @@ -1,5 +1,9 @@ #!/bin/bash +echo -e "\n\n--------------------- CONDA/CONDA-BUILD/BUILD.SH -----------------------\n" + +set -xeo pipefail; + # Rewrite conda's -DCMAKE_FIND_ROOT_PATH_MODE_INCLUDE=ONLY to # -DCMAKE_FIND_ROOT_PATH_MODE_INCLUDE=BOTH CMAKE_ARGS="$(echo "$CMAKE_ARGS" | sed -r "s@_INCLUDE=ONLY@_INCLUDE=BOTH@g")" @@ -7,40 +11,42 @@ CMAKE_ARGS="$(echo "$CMAKE_ARGS" | sed -r "s@_INCLUDE=ONLY@_INCLUDE=BOTH@g")" # Add our options to conda's CMAKE_ARGS CMAKE_ARGS+=" --log-level=VERBOSE --DBUILD_MARCH=haswell" +-DBUILD_SHARED_LIBS=ON +-DBUILD_MARCH=${BUILD_MARCH} +-DCMAKE_BUILD_TYPE=Release +-DCMAKE_VERBOSE_MAKEFILE=ON +-DCMAKE_BUILD_PARALLEL_LEVEL=${JOBS:-$(nproc --ignore=1)}" # We rely on an environment variable to determine if we need to build cpu-only bits if [ -z "$CPU_ONLY" ]; then # cutensor, relying on the conda cutensor package CMAKE_ARGS+=" -Dcutensor_DIR=$PREFIX --DCMAKE_CUDA_ARCHITECTURES:LIST=60-real;70-real;75-real;80-real;90 -" +-DCMAKE_CUDA_ARCHITECTURES=all-major" else # When we build without cuda, we need to provide the location of curand CMAKE_ARGS+=" --Dcunumeric_cuRAND_INCLUDE_DIR=$PREFIX -" +-Dcupynumeric_cuRAND_INCLUDE_DIR=$PREFIX/targets/x86_64-linux/include" fi -# Do not compile with NDEBUG until Legion handles it without warnings -export CFLAGS="-UNDEBUG" -export CXXFLAGS="-UNDEBUG" -export CPPFLAGS="-UNDEBUG" -export CUDAFLAGS="-UNDEBUG" export CMAKE_GENERATOR=Ninja export CUDAHOSTCXX=${CXX} +export OPENSSL_DIR="$PREFIX" + +echo "Environment" +env echo "Build starting on $(date)" +CUDAFLAGS="-isystem ${PREFIX}/include -L${PREFIX}/lib" +export CUDAFLAGS cmake -S . -B build ${CMAKE_ARGS} -DCMAKE_BUILD_PARALLEL_LEVEL=$CPU_COUNT -cmake --build build -j$CPU_COUNT +cmake --build build -j$CPU_COUNT --verbose cmake --install build CMAKE_ARGS=" --DFIND_CUNUMERIC_CPP=ON --Dcunumeric_ROOT=$PREFIX -" +-DFIND_CUPYNUMERIC_CPP=ON +-Dcupynumeric_ROOT=$PREFIX" SKBUILD_BUILD_OPTIONS=-j$CPU_COUNT \ $PYTHON -m pip install \ @@ -48,12 +54,9 @@ $PYTHON -m pip install \ --no-deps \ --prefix "$PREFIX" \ --no-build-isolation \ + --upgrade \ --cache-dir "$PIP_CACHE_DIR" \ --disable-pip-version-check \ . -vv echo "Build ending on $(date)" - -# Legion leaves an egg-info file which will confuse conda trying to pick up the information -# Remove it so the legate-core is the only egg-info file added -rm -rf $SP_DIR/legion*egg-info diff --git a/conda/conda-build/conda_build_config.yaml b/conda/conda-build/conda_build_config.yaml index a508a6ed1..4ec5498e9 100644 --- a/conda/conda-build/conda_build_config.yaml +++ b/conda/conda-build/conda_build_config.yaml @@ -2,13 +2,16 @@ gpu_enabled: - true - false +upload_build: + - true + - false + python: - - 3.9 - 3.10 - 3.11 numpy_version: - - ">=1.22" + - ">=1.22,<2" cmake_version: - ">=3.20.1,!=3.23.0" diff --git a/conda/conda-build/meta.yaml b/conda/conda-build/meta.yaml index 85ea88d74..2aaddb22b 100644 --- a/conda/conda-build/meta.yaml +++ b/conda/conda-build/meta.yaml @@ -1,4 +1,4 @@ -{% set name = "cunumeric" %} +{% set name = "cupynumeric" %} {% if gpu_enabled == "true" %} {% set gpu_enabled_bool = true %} {% elif gpu_enabled == "false" %} @@ -7,10 +7,18 @@ {# We need to have a default value for the initial pass over the recipe #} {% set gpu_enabled_bool = false %} {% endif %} +{% if upload_build == "true" %} + {% set upload_build_bool = true %} +{% elif upload_build == "false" %} + {% set upload_build_bool = false %} +{% else %} + {# We need to have a default value for the initial pass over the recipe #} + {% set upload_build_bool = false %} +{% endif %} ## The placeholder version is strictly for making two-pass conda build process. ## It should not be used for any other purpose, and this is not a default version. {% set placeholder_version = '0.0.0.dev' %} -{% set default_cuda_version = '12.0' %} +{% set default_cuda_version = '12.2.2' %} {% set cuda_version='.'.join(environ.get('CUDA', default_cuda_version).split('.')[:2]) %} {% set cuda_major=cuda_version.split('.')[0]|int %} {% set py_version=environ.get('CONDA_PY', '') %} @@ -21,7 +29,7 @@ {% endif %} {% if package_version is defined %} {% set version = package_version %} - {% set core_version = version %} + {% set legate_version = version %} ## The tag must be in one of the two formats (where N is a digit): ## vNN.NN.NN ## vNN.NN.NN.dev @@ -29,10 +37,10 @@ ## Note: default values are only given to make conda build work. They should not be necessary in principle. {% elif 'dev' in environ.get('GIT_DESCRIBE_TAG', placeholder_version) %} {% set version = (environ.get('GIT_DESCRIBE_TAG', placeholder_version) ~ environ.get('GIT_DESCRIBE_NUMBER', '')).lstrip('v') %} - {% set core_version = (version.rsplit('.',1)[0] ~ ".dev" ~ "|>=" ~ version.rsplit('.',1)[0]) %} + {% set legate_version = (version.rsplit('.',1)[0] ~ ".dev" ~ "|>=" ~ version.rsplit('.',1)[0]) %} {% else %} {% set version = environ.get('GIT_DESCRIBE_TAG', placeholder_version).lstrip('v') %} - {% set core_version = version %} + {% set legate_version = version %} {% endif %} package: @@ -53,20 +61,18 @@ build: number: {{ build_number }} missing_dso_whitelist: - '*libcuda.so*' -{% if use_local_path is not defined %} -# use git hash -{% if not gpu_enabled_bool %} - string: "cuda{{ cuda_major }}_py{{ py_version }}_{{ GIT_DESCRIBE_HASH }}_{{ PKG_BUILDNUM }}_cpu" +{% if gpu_enabled_bool %} +{% set cpu_gpu_tag='_gpu' %} {% else %} - string: "cuda{{ cuda_major }}_py{{ py_version }}_{{ GIT_DESCRIBE_HASH }}_{{ PKG_BUILDNUM }}" +{% set cpu_gpu_tag='_cpu' %} {% endif %} +{% set upload_tag='' if upload_build_bool else '_with_tests' %} +{% if use_local_path is not defined %} +# use git hash + string: "cuda{{ cuda_major }}_py{{ py_version }}_{{ GIT_DESCRIBE_HASH }}_{{ PKG_BUILDNUM }}{{ cpu_gpu_tag }}{{ upload_tag }}" {% else %} # do not use git hash -{% if not gpu_enabled_bool %} - string: "cuda{{ cuda_major }}_py{{ py_version }}_{{ PKG_BUILDNUM }}_cpu" -{% else %} - string: "cuda{{ cuda_major }}_py{{ py_version }}_{{ PKG_BUILDNUM }}" -{% endif %} + string: "cuda{{ cuda_major }}_py{{ py_version }}_{{ PKG_BUILDNUM }}{{ cpu_gpu_tag }}{{ upload_tag }}" {% endif %} script_env: - SCCACHE_BUCKET @@ -76,22 +82,21 @@ build: - SCCACHE_S3_KEY_PREFIX - AWS_ACCESS_KEY_ID - AWS_SECRET_ACCESS_KEY - - CMAKE_C_COMPILER_LAUNCHER - - CMAKE_CXX_COMPILER_LAUNCHER - - CMAKE_CUDA_COMPILER_LAUNCHER {% if not gpu_enabled_bool %} - CPU_ONLY=1 + # The CPU-only packages having more track_features than the GPU builds helps + # the solver to prefer the GPU builds when both are viable candidates. + # ref: https://docs.conda.io/projects/conda-build/en/latest/resources/define-metadata.html#track-features track_features: - cpu_only -{% else %} -# prevent nccl from pulling in cudatoolkit - ignore_run_exports: - - cudatoolkit - ignore_run_exports_from: - - cuda-nvcc - - legate-core {% endif %} +ignore_run_exports_from: + # scikit-build should really be a part of the build env, but then it installs its own Python. Conda build stacks + # the build environment on the host environment, and the build python takes over causing paths havoc. So, we put + # scikit-build into the host env, but we ignore any exports it may bring. + - scikit-build + requirements: build: - make @@ -99,73 +104,60 @@ requirements: - cmake {{ cmake_version }} - {{ compiler('c') }} =11.2 - {{ compiler('cxx') }} =11.2 - host: # the nvcc requirement is necessary because it contains crt/host_config.h used by cuda runtime. This is a packaging bug that has been reported. - - cuda-nvcc ={{ cuda_version }} - # libcurand is used both in CPU and GPU builds - - libcurand-dev + - cuda-nvcc # cudart needed for CPU and GPU builds because of curand - - cuda-cudart-dev ={{ cuda_version }} + - cuda-cudart-dev + - cuda-version ={{ cuda_version }} + - libcurand-dev + + host: - python - scikit-build - openblas =* =*openmp* -{% if not gpu_enabled_bool %} - - legate-core ={{ core_version }} =*_cpu -{% else %} - - legate-core ={{ core_version }} - - cuda-nvcc ={{ cuda_version }} - - cuda-cccl ={{ cuda_version }} - - cuda-cudart ={{ cuda_version }} - - cuda-cudart-static ={{ cuda_version }} - - cuda-driver-dev ={{ cuda_version }} - - cuda-cudart-dev ={{ cuda_version }} - - cuda-nvtx ={{ cuda_version }} - # - libcutensor-dev >=1.3 - - cutensor >=1.3,<2.0 =*_* +{% if gpu_enabled_bool %} + - legate >={{ legate_version }} =*_gpu* + - cuda-cccl + - cutensor >=2.0 =*_* - libcublas-dev - libcusolver-dev - libcufft-dev - - nccl + - libcurand-dev + - libcufile-dev + - cuda-version ={{ cuda_version }} +{% else %} + - legate >={{ legate_version }} =*_cpu* {% endif %} run: - numpy {{ numpy_version }} - - libopenblas =* =*openmp* -{% if not gpu_enabled_bool %} - - legate-core ={{ core_version }} =*_cpu -{% else %} - - legate-core ={{ core_version }} - - cuda-cudart >={{ cuda_version }},<{{ cuda_major+1 }} - - cuda-version >={{ cuda_version }},<{{ cuda_major+1 }} - - cutensor >=1.3 =*_* - - libcublas - - libcusolver >=11.4.1.48-0 - - libcufft + - opt_einsum >=3.3 + - scipy + - openblas =* =*openmp* +{% if gpu_enabled_bool %} - libnvjitlink - libcusparse + - cutensor >=2.0 =*_* + # Pin to all minor versions of CUDA newer than the one built against, within the same major version. + # cuda-version constrains the CUDA runtime version and ensures a compatible driver is available + - {{ pin_compatible('cuda-version', min_pin='x.x', max_pin='x') }} + - __cuda >={{ cuda_version }} {% endif %} - - opt_einsum >=3.3 - - scipy - - typing_extensions run_constrained: - __glibc >=2.17 # [linux] - - python != 3.9.7 -{% if gpu_enabled_bool %} - - __cuda >={{ cuda_version }} -{% endif %} about: - home: https://github.com/nv-legate/cunumeric + home: https://github.com/nv-legate/cupynumeric license: Apache-2.0 license_file: LICENSE summary: 'Drop-in Replacment for NumPy' description: | - cuNumeric is a Legate library that aims to provide + cuPyNumeric is a Legate library that aims to provide a distributed and accelerated drop-in replacement for the NumPy API on top of the Legion runtime. - doc_url: https://github.com/nv-legate/cunumeric - dev_url: https://github.com/nv-legate/cunumeric + doc_url: https://github.com/nv-legate/cupynumeric + dev_url: https://github.com/nv-legate/cupynumeric extra: recipe-maintainers: diff --git a/continuous_integration/dot-gitconfig b/continuous_integration/dot-gitconfig deleted file mode 100644 index 91ac79c70..000000000 --- a/continuous_integration/dot-gitconfig +++ /dev/null @@ -1,3 +0,0 @@ -[user] - email = users.noreply.github.com - name = anon \ No newline at end of file diff --git a/continuous_integration/scripts/build-cunumeric-all b/continuous_integration/scripts/build-cunumeric-all deleted file mode 100755 index 66f5ccb6e..000000000 --- a/continuous_integration/scripts/build-cunumeric-all +++ /dev/null @@ -1,38 +0,0 @@ -#!/usr/bin/env bash - -setup_env() { - yaml_file=$(find ~/.artifacts -name "environment*.yaml" | head -n 1) - - echo "YAML file..." - cat "${yaml_file}" - - mkdir -p /tmp/out; - - cp "${yaml_file}" /tmp/out - - mamba env create -n legate -f "$yaml_file" - - mamba uninstall -yn legate numpy - - mamba install -yn legate -c ~/.artifacts/legate_core -c conda-forge -c nvidia legate-core - - mamba activate legate -} - -build_cunumeric_all() { - set -xeo pipefail - - setup_env; - cd ~/cunumeric; - conda info; - - set -xeuo pipefail; - printf "\n\n\n\n********* BUILDING CUNUMERIC CPP *********\n" - build-cunumeric-cpp; - printf "\n\n\n\n********* BUILDING CUNUMERIC WHEEL *********\n" - build-cunumeric-wheel; - printf "\n\n\n\n********* BUILDING CUNUMERIC CONDA *********\n" - build-cunumeric-conda; -} - -(build_cunumeric_all "$@"); diff --git a/continuous_integration/scripts/build-cunumeric-conda b/continuous_integration/scripts/build-cunumeric-conda deleted file mode 100755 index ee4efefcb..000000000 --- a/continuous_integration/scripts/build-cunumeric-conda +++ /dev/null @@ -1,85 +0,0 @@ -#!/usr/bin/env bash - -build_cunumeric_conda_package() { - set -xeuo pipefail; - - local python_version="${PYTHON_VERSION:-}"; - - if [ -z "${python_version}" ]; then - python_version="$(python3 --version 2>&1 | cut -d' ' -f2 | cut -d'.' -f3 --complement)"; - fi - - mkdir -p /tmp/conda-build /tmp/out - cp -r ~/.artifacts/legate_core /tmp/conda-build/ - - local conda_build_args=(); - conda_build_args+=(--override-channels); - conda_build_args+=(-c conda-forge); - conda_build_args+=(-c nvidia); - conda_build_args+=(-c file:///tmp/conda-build/legate_core); - conda_build_args+=(--croot /tmp/conda-build/cunumeric); - conda_build_args+=(--numpy 1.22); - conda_build_args+=(--python ${python_version}); - conda_build_args+=(--no-test); - conda_build_args+=(--no-verify); - conda_build_args+=(--no-build-id); - conda_build_args+=("--build-id-pat=''"); - conda_build_args+=(--no-include-recipe); - conda_build_args+=(--no-anaconda-upload); - - GPU_ENABLED=true - [ "${USE_CUDA:-}" = "OFF" ] && GPU_ENABLED=false - - conda_build_args+=(--variants "{gpu_enabled:${GPU_ENABLED},python:${python_version}}"); - - rm -rf /tmp/conda-build/cunumeric; - mkdir -p /tmp/conda-build/cunumeric; - - # Synthesize new cunumeric conda-build build.sh script - - cat < ~/cunumeric/conda/conda-build/conda_build_config.yaml -gpu_enabled: - - "${GPU_ENABLED}" - -python: - - "${python_version}" - -numpy_version: - - ">=1.22" - -cmake_version: - - ">=3.20.1,!=3.23.0" - -use_local_path: - - "true" - -numpy: - - 1.22 - -package_version: - - "$(git -C ~/cunumeric describe --abbrev=0 --tags | sed 's/[a-zA-Z]//g' | cut -d '.' -f -2).00" -EOF - - cat <<"EOF" > ~/cunumeric/conda/conda-build/build.sh -# Install cunumeric C++ libs -tar -C "$PREFIX" --exclude="*.a" --strip-components=1 -xvf /tmp/out/cunumeric-*-Linux.tar.gz; - -# Install cunumeric Python wheel -pip install --no-deps --root / --prefix "$PREFIX" /tmp/out/cunumeric-*.whl; -EOF - - git -C ~/cunumeric add .; - git -C ~/cunumeric commit --allow-empty --allow-empty-message -n -m ""; - - # Build cuNumeric conda package - CUDA=${CUDA_VERSION_MAJOR}.${CUDA_VERSION_MINOR} \ - conda mambabuild ${conda_build_args[@]} ~/cunumeric/conda/conda-build; - - git -C ~/cunumeric reset --hard HEAD~1; - - cp /tmp/conda-build/cunumeric/linux-64/cunumeric-*.tar.bz2 /tmp/out/; - - { set +x; } 2>/dev/null; -} - -(build_cunumeric_conda_package "$@"); diff --git a/continuous_integration/scripts/build-cunumeric-cpp b/continuous_integration/scripts/build-cunumeric-cpp deleted file mode 100755 index fd08ceac2..000000000 --- a/continuous_integration/scripts/build-cunumeric-cpp +++ /dev/null @@ -1,33 +0,0 @@ -#!/usr/bin/env bash - -build_cunumeric_cpp() { - set -xeuo pipefail; - - # Build + package cuNumeric C++ libs - local cmake_args=(${CMAKE_ARGS:-}); - cmake_args+=(-DBUILD_SHARED_LIBS=ON); - cmake_args+=(-DBUILD_MARCH=${BUILD_MARCH:-haswell}); - cmake_args+=(-DCMAKE_BUILD_TYPE=Release); - cmake_args+=(-DCMAKE_CUDA_ARCHITECTURES=RAPIDS); - cmake_args+=(-DCMAKE_BUILD_PARALLEL_LEVEL=${JOBS:-$(nproc --ignore=1)}); - cmake_args+=(${@}); - - cmake -S ~/cunumeric -B ~/cunumeric/build ${cmake_args[@]} -GNinja; - - sccache --show-stats; - - time cmake --build ~/cunumeric/build --verbose --parallel ${JOBS:-$(nproc --ignore=1)}; - - sccache --show-stats; - - ( - mkdir -p /tmp/out; - cd ~/cunumeric/build; - cpack -G TGZ; - cp ./*-Linux.tar.gz /tmp/out/; - ); - - { set +x; } 2>/dev/null; -} - -(build_cunumeric_cpp "$@"); diff --git a/continuous_integration/scripts/build-cunumeric-wheel b/continuous_integration/scripts/build-cunumeric-wheel deleted file mode 100755 index 5f55136eb..000000000 --- a/continuous_integration/scripts/build-cunumeric-wheel +++ /dev/null @@ -1,27 +0,0 @@ -#!/usr/bin/env bash - -build_cunumeric_wheel() { - set -xeuo pipefail; - - mkdir -p /tmp/out; - - local pip_args=(-vv); - pip_args+=(--wheel-dir /tmp/out); - - if type conda 2>&1 >/dev/null; then - pip_args+=(--no-deps); - pip_args+=(--no-build-isolation); - fi - - local cmake_args=(${CMAKE_ARGS:-}); - cmake_args+=("-DFIND_CUNUMERIC_CPP=ON"); - cmake_args+=("-Dcunumeric_ROOT=$HOME/cunumeric/build"); - - # Build + package cuNumeric Python wheel - CMAKE_ARGS="${cmake_args[@]}" \ - pip wheel ${pip_args[@]} ~/cunumeric; - - { set +x; } 2>/dev/null; -} - -(build_cunumeric_wheel "$@"); diff --git a/continuous_integration/scripts/entrypoint b/continuous_integration/scripts/entrypoint deleted file mode 100755 index 298fc1c7a..000000000 --- a/continuous_integration/scripts/entrypoint +++ /dev/null @@ -1,46 +0,0 @@ -#!/usr/bin/env bash - -sccache_stop_server_and_show_stats() { - sccache --stop-server || true && sccache --show-stats; -} - -init_devcontainer() { - # disable xtrace and history - local xtrace_enabled=$(echo "${SHELLOPTS:-}" | grep -q 'xtrace'; echo $?); - local history_enabled=$(echo "${SHELLOPTS:-}" | grep -q 'history'; echo $?); - { set +xo history; } 2>/dev/null; - eval "export $(find /run/secrets/ -type f -exec bash -c 'echo ${0/\/run\/secrets\//}=$(<${0})' {} \;)"; - if [ "${history_enabled}" -eq "0" ]; then { set -o history; } 2>/dev/null; fi; - if [ "${xtrace_enabled}" -eq "0" ]; then { set -o xtrace; } 2>/dev/null; fi; - - set -xeo pipefail - - . devcontainer-utils-post-attach-command; - - sleep 10; - . devcontainer-utils-vault-s3-test; - . devcontainer-utils-vault-s3-export 0; -} - -entrypoint() { - set -x - - mkdir -p /home/coder/.cache; - - local secrets_dir=/run/secrets - - if [ -d "$secrets_dir" ] && [ "$(ls -A $secrets_dir)" ]; then - init_devcontainer - else - sccache_stop_server_and_show_stats - fi - - echo AWS_REGION=${AWS_REGION:-} - echo AWS_SESSION_TOKEN=${AWS_SESSION_TOKEN:-} - echo AWS_ACCESS_KEY_ID=${AWS_ACCESS_KEY_ID:-} - echo AWS_SECRET_ACCESS_KEY=${AWS_SECRET_ACCESS_KEY:-} - - exec "$@"; -} - -entrypoint "$@"; diff --git a/continuous_integration/scripts/test-cunumeric b/continuous_integration/scripts/test-cunumeric deleted file mode 100755 index 698179b31..000000000 --- a/continuous_integration/scripts/test-cunumeric +++ /dev/null @@ -1,61 +0,0 @@ -#!/usr/bin/env bash - -setup_env() { - mamba create -yn legate -c ~/.artifacts/conda-build/legate_core -c ~/.artifacts/conda-build/cunumeric -c conda-forge -c "nvidia/label/cuda-12.0.0" legate-core cunumeric -} - -setup_test_env() { - mamba install -y "clang-tools>=8" "clang>=8" colorama coverage mock pre-commit pytest-cov pytest-lazy-fixture pytest-mock "pytest<8" types-docutils pynvml - - pip install tifffile -} - -setup_docs_env() { - mamba install -y pandoc doxygen - pip install ipython jinja2 "markdown<3.4.0" "pydata-sphinx-theme>=0.13" myst-parser nbsphinx sphinx-copybutton "sphinx>=4.4.0" - -} - -setup_mypy_env() { - mamba install -y "mypy>=0.961" jinja2 nbsphinx sphinx-copybutton "sphinx>=4.4.0" types-docutils -} - -test-cunumeric() { - set -xeo pipefail - - setup_env; - - set +u - mamba activate legate; - conda info; - - cd ~/cunumeric; - - case "$1" in - "test") - echo "Executing tests..." - shift; - setup_test_env; - ./test.py --verbose "$@" - ;; - "mypy") - echo "Installing and executing mypy..." - shift; - setup_mypy_env; - mypy cunumeric - ;; - "docs") - echo "Building docs..." - shift; - setup_docs_env; - cd docs/cunumeric - make clean html - ;; - *) - echo "Invalid command: $1" - return 1 - ;; - esac -} - -(test-cunumeric "$@"); diff --git a/cunumeric/_ufunc/comparison.py b/cunumeric/_ufunc/comparison.py deleted file mode 100644 index d1df4e8de..000000000 --- a/cunumeric/_ufunc/comparison.py +++ /dev/null @@ -1,122 +0,0 @@ -# Copyright 2022 NVIDIA Corporation -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -from __future__ import annotations - -from cunumeric.config import BinaryOpCode, UnaryOpCode, UnaryRedCode - -from .ufunc import ( - all_dtypes, - create_binary_ufunc, - create_unary_ufunc, - float_dtypes, - integer_dtypes, - predicate_types_of, - relation_types_of, -) - -greater = create_binary_ufunc( - "Return the truth value of (x1 > x2) element-wise.", - "greater", - BinaryOpCode.GREATER, - relation_types_of(all_dtypes), -) - -greater_equal = create_binary_ufunc( - "Return the truth value of (x1 >= x2) element-wise.", - "greater_equal", - BinaryOpCode.GREATER_EQUAL, - relation_types_of(all_dtypes), -) - -less = create_binary_ufunc( - "Return the truth value of (x1 < x2) element-wise.", - "less", - BinaryOpCode.LESS, - relation_types_of(all_dtypes), -) - -less_equal = create_binary_ufunc( - "Return the truth value of (x1 =< x2) element-wise.", - "less", - BinaryOpCode.LESS_EQUAL, - relation_types_of(all_dtypes), -) - -not_equal = create_binary_ufunc( - "Return (x1 != x2) element-wise.", - "not_equal", - BinaryOpCode.NOT_EQUAL, - relation_types_of(all_dtypes), -) - -equal = create_binary_ufunc( - "Return (x1 == x2) element-wise.", - "equal", - BinaryOpCode.EQUAL, - relation_types_of(all_dtypes), -) - -logical_and = create_binary_ufunc( - "Compute the truth value of x1 AND x2 element-wise.", - "logical_and", - BinaryOpCode.LOGICAL_AND, - relation_types_of(all_dtypes), -) - -logical_or = create_binary_ufunc( - "Compute the truth value of x1 OR x2 element-wise.", - "logical_or", - BinaryOpCode.LOGICAL_OR, - relation_types_of(all_dtypes), -) - -logical_xor = create_binary_ufunc( - "Compute the truth value of x1 XOR x2, element-wise.", - "logical_xor", - BinaryOpCode.LOGICAL_XOR, - relation_types_of(all_dtypes), -) - -logical_not = create_unary_ufunc( - "Compute bit-wise inversion, or bit-wise NOT, element-wise.", - "invert", - UnaryOpCode.LOGICAL_NOT, - ( - ["??"] - + predicate_types_of(integer_dtypes) - + predicate_types_of(float_dtypes) - ), - overrides={"?": UnaryOpCode.LOGICAL_NOT}, -) - -maximum = create_binary_ufunc( - "Element-wise maximum of array elements.", - "maximum", - BinaryOpCode.MAXIMUM, - all_dtypes, - red_code=UnaryRedCode.MAX, -) - -fmax = maximum - -minimum = create_binary_ufunc( - "Element-wise minimum of array elements.", - "minimum", - BinaryOpCode.MINIMUM, - all_dtypes, - red_code=UnaryRedCode.MIN, -) - -fmin = minimum diff --git a/cunumeric/config.py b/cunumeric/config.py deleted file mode 100644 index c18d36f4b..000000000 --- a/cunumeric/config.py +++ /dev/null @@ -1,788 +0,0 @@ -# Copyright 2021-2022 NVIDIA Corporation -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -from __future__ import annotations - -import os -from abc import abstractmethod -from enum import IntEnum, unique -from typing import TYPE_CHECKING, Union, cast - -import numpy as np -from legate.core import Library, get_legate_runtime - -if TYPE_CHECKING: - import numpy.typing as npt - - from .runtime import Runtime - - -class _CunumericSharedLib: - CUNUMERIC_ADVANCED_INDEXING: int - CUNUMERIC_ARANGE: int - CUNUMERIC_ARGWHERE: int - CUNUMERIC_BATCHED_CHOLESKY: int - CUNUMERIC_BINARY_OP: int - CUNUMERIC_BINARY_RED: int - CUNUMERIC_BINCOUNT: int - CUNUMERIC_BINOP_ADD: int - CUNUMERIC_BINOP_ARCTAN2: int - CUNUMERIC_BINOP_BITWISE_AND: int - CUNUMERIC_BINOP_BITWISE_OR: int - CUNUMERIC_BINOP_BITWISE_XOR: int - CUNUMERIC_BINOP_COPYSIGN: int - CUNUMERIC_BINOP_DIVIDE: int - CUNUMERIC_BINOP_EQUAL: int - CUNUMERIC_BINOP_FLOAT_POWER: int - CUNUMERIC_BINOP_FLOOR_DIVIDE: int - CUNUMERIC_BINOP_FMOD: int - CUNUMERIC_BINOP_GCD: int - CUNUMERIC_BINOP_GREATER: int - CUNUMERIC_BINOP_GREATER_EQUAL: int - CUNUMERIC_BINOP_HYPOT: int - CUNUMERIC_BINOP_ISCLOSE: int - CUNUMERIC_BINOP_LCM: int - CUNUMERIC_BINOP_LDEXP: int - CUNUMERIC_BINOP_LEFT_SHIFT: int - CUNUMERIC_BINOP_LESS: int - CUNUMERIC_BINOP_LESS_EQUAL: int - CUNUMERIC_BINOP_LOGADDEXP2: int - CUNUMERIC_BINOP_LOGADDEXP: int - CUNUMERIC_BINOP_LOGICAL_AND: int - CUNUMERIC_BINOP_LOGICAL_OR: int - CUNUMERIC_BINOP_LOGICAL_XOR: int - CUNUMERIC_BINOP_MAXIMUM: int - CUNUMERIC_BINOP_MINIMUM: int - CUNUMERIC_BINOP_MOD: int - CUNUMERIC_BINOP_MULTIPLY: int - CUNUMERIC_BINOP_NEXTAFTER: int - CUNUMERIC_BINOP_NOT_EQUAL: int - CUNUMERIC_BINOP_POWER: int - CUNUMERIC_BINOP_RIGHT_SHIFT: int - CUNUMERIC_BINOP_SUBTRACT: int - CUNUMERIC_BITGENERATOR: int - CUNUMERIC_BITGENOP_DISTRIBUTION: int - CUNUMERIC_BITGENTYPE_DEFAULT: int - CUNUMERIC_BITGENTYPE_XORWOW: int - CUNUMERIC_BITGENTYPE_MRG32K3A: int - CUNUMERIC_BITGENTYPE_MTGP32: int - CUNUMERIC_BITGENTYPE_MT19937: int - CUNUMERIC_BITGENTYPE_PHILOX4_32_10: int - CUNUMERIC_BITGENDIST_INTEGERS_16: int - CUNUMERIC_BITGENDIST_INTEGERS_32: int - CUNUMERIC_BITGENDIST_INTEGERS_64: int - CUNUMERIC_BITGENDIST_UNIFORM_32: int - CUNUMERIC_BITGENDIST_UNIFORM_64: int - CUNUMERIC_BITGENDIST_LOGNORMAL_32: int - CUNUMERIC_BITGENDIST_LOGNORMAL_64: int - CUNUMERIC_BITGENDIST_NORMAL_32: int - CUNUMERIC_BITGENDIST_NORMAL_64: int - CUNUMERIC_BITGENDIST_POISSON: int - CUNUMERIC_BITGENDIST_EXPONENTIAL_32: int - CUNUMERIC_BITGENDIST_EXPONENTIAL_64: int - CUNUMERIC_BITGENDIST_GUMBEL_32: int - CUNUMERIC_BITGENDIST_GUMBEL_64: int - CUNUMERIC_BITGENDIST_LAPLACE_32: int - CUNUMERIC_BITGENDIST_LAPLACE_64: int - CUNUMERIC_BITGENDIST_LOGISTIC_32: int - CUNUMERIC_BITGENDIST_LOGISTIC_64: int - CUNUMERIC_BITGENDIST_PARETO_32: int - CUNUMERIC_BITGENDIST_PARETO_64: int - CUNUMERIC_BITGENDIST_POWER_32: int - CUNUMERIC_BITGENDIST_POWER_64: int - CUNUMERIC_BITGENDIST_RAYLEIGH_32: int - CUNUMERIC_BITGENDIST_RAYLEIGH_64: int - CUNUMERIC_BITGENDIST_CAUCHY_32: int - CUNUMERIC_BITGENDIST_CAUCHY_64: int - CUNUMERIC_BITGENDIST_TRIANGULAR_32: int - CUNUMERIC_BITGENDIST_TRIANGULAR_64: int - CUNUMERIC_BITGENDIST_WEIBULL_32: int - CUNUMERIC_BITGENDIST_WEIBULL_64: int - CUNUMERIC_BITGENDIST_BYTES: int - CUNUMERIC_BITGENDIST_BETA_32: int - CUNUMERIC_BITGENDIST_BETA_64: int - CUNUMERIC_BITGENDIST_F_32: int - CUNUMERIC_BITGENDIST_F_64: int - CUNUMERIC_BITGENDIST_LOGSERIES: int - CUNUMERIC_BITGENDIST_NONCENTRAL_F_32: int - CUNUMERIC_BITGENDIST_NONCENTRAL_F_64: int - CUNUMERIC_BITGENDIST_CHISQUARE_32: int - CUNUMERIC_BITGENDIST_CHISQUARE_64: int - CUNUMERIC_BITGENDIST_GAMMA_32: int - CUNUMERIC_BITGENDIST_GAMMA_64: int - CUNUMERIC_BITGENDIST_STANDARD_T_32: int - CUNUMERIC_BITGENDIST_STANDARD_T_64: int - CUNUMERIC_BITGENDIST_HYPERGEOMETRIC: int - CUNUMERIC_BITGENDIST_VONMISES_32: int - CUNUMERIC_BITGENDIST_VONMISES_64: int - CUNUMERIC_BITGENDIST_ZIPF: int - CUNUMERIC_BITGENDIST_GEOMETRIC: int - CUNUMERIC_BITGENDIST_WALD_32: int - CUNUMERIC_BITGENDIST_WALD_64: int - CUNUMERIC_BITGENDIST_BINOMIAL: int - CUNUMERIC_BITGENDIST_NEGATIVE_BINOMIAL: int - CUNUMERIC_BITGENOP_CREATE: int - CUNUMERIC_BITGENOP_DESTROY: int - CUNUMERIC_BITGENOP_RAND_RAW: int - CUNUMERIC_BITORDER_BIG: int - CUNUMERIC_BITORDER_LITTLE: int - CUNUMERIC_CHOOSE: int - CUNUMERIC_CONTRACT: int - CUNUMERIC_CONVERT: int - CUNUMERIC_CONVERT_NAN_NOOP: int - CUNUMERIC_CONVERT_NAN_PROD: int - CUNUMERIC_CONVERT_NAN_SUM: int - CUNUMERIC_CONVOLVE: int - CUNUMERIC_DIAG: int - CUNUMERIC_DOT: int - CUNUMERIC_EYE: int - CUNUMERIC_FFT: int - CUNUMERIC_FFT_C2C: int - CUNUMERIC_FFT_C2R: int - CUNUMERIC_FFT_D2Z: int - CUNUMERIC_FFT_FORWARD: int - CUNUMERIC_FFT_INVERSE: int - CUNUMERIC_FFT_R2C: int - CUNUMERIC_FFT_Z2D: int - CUNUMERIC_FFT_Z2Z: int - CUNUMERIC_FILL: int - CUNUMERIC_FLIP: int - CUNUMERIC_GEMM: int - CUNUMERIC_HISTOGRAM: int - CUNUMERIC_LOAD_CUDALIBS: int - CUNUMERIC_MATMUL: int - CUNUMERIC_MATVECMUL: int - CUNUMERIC_MAX_MAPPERS: int - CUNUMERIC_MAX_REDOPS: int - CUNUMERIC_MAX_TASKS: int - CUNUMERIC_NONZERO: int - CUNUMERIC_PACKBITS: int - CUNUMERIC_POTRF: int - CUNUMERIC_PUTMASK: int - CUNUMERIC_RAND: int - CUNUMERIC_READ: int - CUNUMERIC_RED_ALL: int - CUNUMERIC_RED_ANY: int - CUNUMERIC_RED_ARGMAX: int - CUNUMERIC_RED_ARGMIN: int - CUNUMERIC_RED_CONTAINS: int - CUNUMERIC_RED_COUNT_NONZERO: int - CUNUMERIC_RED_MAX: int - CUNUMERIC_RED_MIN: int - CUNUMERIC_RED_NANARGMAX: int - CUNUMERIC_RED_NANARGMIN: int - CUNUMERIC_RED_NANMAX: int - CUNUMERIC_RED_NANMIN: int - CUNUMERIC_RED_NANPROD: int - CUNUMERIC_RED_NANSUM: int - CUNUMERIC_RED_PROD: int - CUNUMERIC_RED_SUM: int - CUNUMERIC_RED_SUM_SQUARES: int - CUNUMERIC_RED_VARIANCE: int - CUNUMERIC_REPEAT: int - CUNUMERIC_SCALAR_UNARY_RED: int - CUNUMERIC_SCAN_GLOBAL: int - CUNUMERIC_SCAN_LOCAL: int - CUNUMERIC_SCAN_PROD: int - CUNUMERIC_SCAN_SUM: int - CUNUMERIC_SEARCHSORTED: int - CUNUMERIC_SELECT: int - CUNUMERIC_SOLVE: int - CUNUMERIC_SORT: int - CUNUMERIC_SYRK: int - CUNUMERIC_TILE: int - CUNUMERIC_TRANSPOSE_COPY_2D: int - CUNUMERIC_TRILU: int - CUNUMERIC_TRSM: int - CUNUMERIC_TUNABLE_MAX_EAGER_VOLUME: int - CUNUMERIC_TUNABLE_NUM_GPUS: int - CUNUMERIC_TUNABLE_NUM_PROCS: int - CUNUMERIC_UNARY_OP: int - CUNUMERIC_UNARY_RED: int - CUNUMERIC_UNIQUE: int - CUNUMERIC_UNIQUE_REDUCE: int - CUNUMERIC_UNLOAD_CUDALIBS: int - CUNUMERIC_UNPACKBITS: int - CUNUMERIC_UOP_ABSOLUTE: int - CUNUMERIC_UOP_ARCCOS: int - CUNUMERIC_UOP_ARCCOSH: int - CUNUMERIC_UOP_ARCSIN: int - CUNUMERIC_UOP_ARCSINH: int - CUNUMERIC_UOP_ARCTAN: int - CUNUMERIC_UOP_ARCTANH: int - CUNUMERIC_UOP_CBRT: int - CUNUMERIC_UOP_CEIL: int - CUNUMERIC_UOP_CLIP: int - CUNUMERIC_UOP_CONJ: int - CUNUMERIC_UOP_COPY: int - CUNUMERIC_UOP_COS: int - CUNUMERIC_UOP_COSH: int - CUNUMERIC_UOP_DEG2RAD: int - CUNUMERIC_UOP_EXP2: int - CUNUMERIC_UOP_EXP: int - CUNUMERIC_UOP_EXPM1: int - CUNUMERIC_UOP_FLOOR: int - CUNUMERIC_UOP_FREXP: int - CUNUMERIC_UOP_GETARG: int - CUNUMERIC_UOP_IMAG: int - CUNUMERIC_UOP_INVERT: int - CUNUMERIC_UOP_ISFINITE: int - CUNUMERIC_UOP_ISINF: int - CUNUMERIC_UOP_ISNAN: int - CUNUMERIC_UOP_LOG10: int - CUNUMERIC_UOP_LOG1P: int - CUNUMERIC_UOP_LOG2: int - CUNUMERIC_UOP_LOG: int - CUNUMERIC_UOP_LOGICAL_NOT: int - CUNUMERIC_UOP_MODF: int - CUNUMERIC_UOP_NEGATIVE: int - CUNUMERIC_UOP_POSITIVE: int - CUNUMERIC_UOP_RAD2DEG: int - CUNUMERIC_UOP_REAL: int - CUNUMERIC_UOP_RECIPROCAL: int - CUNUMERIC_UOP_RINT: int - CUNUMERIC_UOP_SIGN: int - CUNUMERIC_UOP_SIGNBIT: int - CUNUMERIC_UOP_SIN: int - CUNUMERIC_UOP_SINH: int - CUNUMERIC_UOP_SQRT: int - CUNUMERIC_UOP_SQUARE: int - CUNUMERIC_UOP_TAN: int - CUNUMERIC_UOP_TANH: int - CUNUMERIC_UOP_TRUNC: int - CUNUMERIC_WHERE: int - CUNUMERIC_WINDOW: int - CUNUMERIC_WINDOW_BARLETT: int - CUNUMERIC_WINDOW_BLACKMAN: int - CUNUMERIC_WINDOW_HAMMING: int - CUNUMERIC_WINDOW_HANNING: int - CUNUMERIC_WINDOW_KAISER: int - CUNUMERIC_WRAP: int - CUNUMERIC_WRITE: int - CUNUMERIC_ZIP: int - - @abstractmethod - def cunumeric_has_curand(self) -> int: - ... - - @abstractmethod - def cunumeric_register_reduction_op( - self, type_uid: int, elem_type_code: int - ) -> None: - ... - - -# Load the cuNumeric library first so we have a shard object that -# we can use to initialize all these configuration enumerations -class CuNumericLib(Library): - def __init__(self, name: str) -> None: - self.name = name - self.runtime: Union[Runtime, None] = None - self.shared_object: Union[_CunumericSharedLib, None] = None - - def get_name(self) -> str: - return self.name - - def get_shared_library(self) -> str: - from cunumeric.install_info import libpath - - return os.path.join( - libpath, "libcunumeric" + self.get_library_extension() - ) - - def get_c_header(self) -> str: - from cunumeric.install_info import header - - return header - - def get_registration_callback(self) -> str: - return "cunumeric_perform_registration" - - def initialize(self, shared_object: _CunumericSharedLib) -> None: - assert self.runtime is None - self.shared_object = shared_object - - def set_runtime(self, runtime: Runtime) -> None: - assert self.runtime is None - assert self.shared_object is not None - self.runtime = runtime - - def destroy(self) -> None: - if self.runtime is not None: - self.runtime.destroy() - - -CUNUMERIC_LIB_NAME = "cunumeric" -cunumeric_lib = CuNumericLib(CUNUMERIC_LIB_NAME) -cunumeric_context = get_legate_runtime().register_library(cunumeric_lib) -_cunumeric = cast(_CunumericSharedLib, cunumeric_lib.shared_object) - - -# Match these to CuNumericOpCode in cunumeric_c.h -@unique -class CuNumericOpCode(IntEnum): - ADVANCED_INDEXING = _cunumeric.CUNUMERIC_ADVANCED_INDEXING - ARANGE = _cunumeric.CUNUMERIC_ARANGE - ARGWHERE = _cunumeric.CUNUMERIC_ARGWHERE - BATCHED_CHOLESKY = _cunumeric.CUNUMERIC_BATCHED_CHOLESKY - BINARY_OP = _cunumeric.CUNUMERIC_BINARY_OP - BINARY_RED = _cunumeric.CUNUMERIC_BINARY_RED - BINCOUNT = _cunumeric.CUNUMERIC_BINCOUNT - BITGENERATOR = _cunumeric.CUNUMERIC_BITGENERATOR - CHOOSE = _cunumeric.CUNUMERIC_CHOOSE - CONTRACT = _cunumeric.CUNUMERIC_CONTRACT - CONVERT = _cunumeric.CUNUMERIC_CONVERT - CONVOLVE = _cunumeric.CUNUMERIC_CONVOLVE - DIAG = _cunumeric.CUNUMERIC_DIAG - DOT = _cunumeric.CUNUMERIC_DOT - EYE = _cunumeric.CUNUMERIC_EYE - FFT = _cunumeric.CUNUMERIC_FFT - FILL = _cunumeric.CUNUMERIC_FILL - FLIP = _cunumeric.CUNUMERIC_FLIP - GEMM = _cunumeric.CUNUMERIC_GEMM - HISTOGRAM = _cunumeric.CUNUMERIC_HISTOGRAM - LOAD_CUDALIBS = _cunumeric.CUNUMERIC_LOAD_CUDALIBS - MATMUL = _cunumeric.CUNUMERIC_MATMUL - MATVECMUL = _cunumeric.CUNUMERIC_MATVECMUL - NONZERO = _cunumeric.CUNUMERIC_NONZERO - PACKBITS = _cunumeric.CUNUMERIC_PACKBITS - POTRF = _cunumeric.CUNUMERIC_POTRF - PUTMASK = _cunumeric.CUNUMERIC_PUTMASK - RAND = _cunumeric.CUNUMERIC_RAND - READ = _cunumeric.CUNUMERIC_READ - REPEAT = _cunumeric.CUNUMERIC_REPEAT - SCALAR_UNARY_RED = _cunumeric.CUNUMERIC_SCALAR_UNARY_RED - SCAN_GLOBAL = _cunumeric.CUNUMERIC_SCAN_GLOBAL - SCAN_LOCAL = _cunumeric.CUNUMERIC_SCAN_LOCAL - SEARCHSORTED = _cunumeric.CUNUMERIC_SEARCHSORTED - SELECT = _cunumeric.CUNUMERIC_SELECT - SOLVE = _cunumeric.CUNUMERIC_SOLVE - SORT = _cunumeric.CUNUMERIC_SORT - SYRK = _cunumeric.CUNUMERIC_SYRK - TILE = _cunumeric.CUNUMERIC_TILE - TRANSPOSE_COPY_2D = _cunumeric.CUNUMERIC_TRANSPOSE_COPY_2D - TRILU = _cunumeric.CUNUMERIC_TRILU - TRSM = _cunumeric.CUNUMERIC_TRSM - UNARY_OP = _cunumeric.CUNUMERIC_UNARY_OP - UNARY_RED = _cunumeric.CUNUMERIC_UNARY_RED - UNIQUE = _cunumeric.CUNUMERIC_UNIQUE - UNIQUE_REDUCE = _cunumeric.CUNUMERIC_UNIQUE_REDUCE - UNLOAD_CUDALIBS = _cunumeric.CUNUMERIC_UNLOAD_CUDALIBS - UNPACKBITS = _cunumeric.CUNUMERIC_UNPACKBITS - WHERE = _cunumeric.CUNUMERIC_WHERE - WINDOW = _cunumeric.CUNUMERIC_WINDOW - WRAP = _cunumeric.CUNUMERIC_WRAP - WRITE = _cunumeric.CUNUMERIC_WRITE - ZIP = _cunumeric.CUNUMERIC_ZIP - - -# Match these to CuNumericUnaryOpCode in cunumeric_c.h -@unique -class UnaryOpCode(IntEnum): - ABSOLUTE = _cunumeric.CUNUMERIC_UOP_ABSOLUTE - ARCCOS = _cunumeric.CUNUMERIC_UOP_ARCCOS - ARCCOSH = _cunumeric.CUNUMERIC_UOP_ARCCOSH - ARCSIN = _cunumeric.CUNUMERIC_UOP_ARCSIN - ARCSINH = _cunumeric.CUNUMERIC_UOP_ARCSINH - ARCTAN = _cunumeric.CUNUMERIC_UOP_ARCTAN - ARCTANH = _cunumeric.CUNUMERIC_UOP_ARCTANH - CBRT = _cunumeric.CUNUMERIC_UOP_CBRT - CEIL = _cunumeric.CUNUMERIC_UOP_CEIL - CLIP = _cunumeric.CUNUMERIC_UOP_CLIP - CONJ = _cunumeric.CUNUMERIC_UOP_CONJ - COPY = _cunumeric.CUNUMERIC_UOP_COPY - COS = _cunumeric.CUNUMERIC_UOP_COS - COSH = _cunumeric.CUNUMERIC_UOP_COSH - DEG2RAD = _cunumeric.CUNUMERIC_UOP_DEG2RAD - EXP = _cunumeric.CUNUMERIC_UOP_EXP - EXP2 = _cunumeric.CUNUMERIC_UOP_EXP2 - EXPM1 = _cunumeric.CUNUMERIC_UOP_EXPM1 - FLOOR = _cunumeric.CUNUMERIC_UOP_FLOOR - FREXP = _cunumeric.CUNUMERIC_UOP_FREXP - GETARG = _cunumeric.CUNUMERIC_UOP_GETARG - IMAG = _cunumeric.CUNUMERIC_UOP_IMAG - INVERT = _cunumeric.CUNUMERIC_UOP_INVERT - ISFINITE = _cunumeric.CUNUMERIC_UOP_ISFINITE - ISINF = _cunumeric.CUNUMERIC_UOP_ISINF - ISNAN = _cunumeric.CUNUMERIC_UOP_ISNAN - LOG = _cunumeric.CUNUMERIC_UOP_LOG - LOG10 = _cunumeric.CUNUMERIC_UOP_LOG10 - LOG1P = _cunumeric.CUNUMERIC_UOP_LOG1P - LOG2 = _cunumeric.CUNUMERIC_UOP_LOG2 - LOGICAL_NOT = _cunumeric.CUNUMERIC_UOP_LOGICAL_NOT - MODF = _cunumeric.CUNUMERIC_UOP_MODF - NEGATIVE = _cunumeric.CUNUMERIC_UOP_NEGATIVE - POSITIVE = _cunumeric.CUNUMERIC_UOP_POSITIVE - RAD2DEG = _cunumeric.CUNUMERIC_UOP_RAD2DEG - REAL = _cunumeric.CUNUMERIC_UOP_REAL - RECIPROCAL = _cunumeric.CUNUMERIC_UOP_RECIPROCAL - RINT = _cunumeric.CUNUMERIC_UOP_RINT - SIGN = _cunumeric.CUNUMERIC_UOP_SIGN - SIGNBIT = _cunumeric.CUNUMERIC_UOP_SIGNBIT - SIN = _cunumeric.CUNUMERIC_UOP_SIN - SINH = _cunumeric.CUNUMERIC_UOP_SINH - SQRT = _cunumeric.CUNUMERIC_UOP_SQRT - SQUARE = _cunumeric.CUNUMERIC_UOP_SQUARE - TAN = _cunumeric.CUNUMERIC_UOP_TAN - TANH = _cunumeric.CUNUMERIC_UOP_TANH - TRUNC = _cunumeric.CUNUMERIC_UOP_TRUNC - - -# Match these to CuNumericUnaryRedCode in cunumeric_c.h -@unique -class UnaryRedCode(IntEnum): - ALL = _cunumeric.CUNUMERIC_RED_ALL - ANY = _cunumeric.CUNUMERIC_RED_ANY - ARGMAX = _cunumeric.CUNUMERIC_RED_ARGMAX - ARGMIN = _cunumeric.CUNUMERIC_RED_ARGMIN - CONTAINS = _cunumeric.CUNUMERIC_RED_CONTAINS - COUNT_NONZERO = _cunumeric.CUNUMERIC_RED_COUNT_NONZERO - MAX = _cunumeric.CUNUMERIC_RED_MAX - MIN = _cunumeric.CUNUMERIC_RED_MIN - NANARGMAX = _cunumeric.CUNUMERIC_RED_NANARGMAX - NANARGMIN = _cunumeric.CUNUMERIC_RED_NANARGMIN - NANMAX = _cunumeric.CUNUMERIC_RED_NANMAX - NANMIN = _cunumeric.CUNUMERIC_RED_NANMIN - NANPROD = _cunumeric.CUNUMERIC_RED_NANPROD - NANSUM = _cunumeric.CUNUMERIC_RED_NANSUM - PROD = _cunumeric.CUNUMERIC_RED_PROD - SUM = _cunumeric.CUNUMERIC_RED_SUM - SUM_SQUARES = _cunumeric.CUNUMERIC_RED_SUM_SQUARES - VARIANCE = _cunumeric.CUNUMERIC_RED_VARIANCE - - -# Match these to CuNumericBinaryOpCode in cunumeric_c.h -@unique -class BinaryOpCode(IntEnum): - ADD = _cunumeric.CUNUMERIC_BINOP_ADD - ARCTAN2 = _cunumeric.CUNUMERIC_BINOP_ARCTAN2 - BITWISE_AND = _cunumeric.CUNUMERIC_BINOP_BITWISE_AND - BITWISE_OR = _cunumeric.CUNUMERIC_BINOP_BITWISE_OR - BITWISE_XOR = _cunumeric.CUNUMERIC_BINOP_BITWISE_XOR - COPYSIGN = _cunumeric.CUNUMERIC_BINOP_COPYSIGN - DIVIDE = _cunumeric.CUNUMERIC_BINOP_DIVIDE - EQUAL = _cunumeric.CUNUMERIC_BINOP_EQUAL - FLOAT_POWER = _cunumeric.CUNUMERIC_BINOP_FLOAT_POWER - FLOOR_DIVIDE = _cunumeric.CUNUMERIC_BINOP_FLOOR_DIVIDE - FMOD = _cunumeric.CUNUMERIC_BINOP_FMOD - GCD = _cunumeric.CUNUMERIC_BINOP_GCD - GREATER = _cunumeric.CUNUMERIC_BINOP_GREATER - GREATER_EQUAL = _cunumeric.CUNUMERIC_BINOP_GREATER_EQUAL - HYPOT = _cunumeric.CUNUMERIC_BINOP_HYPOT - ISCLOSE = _cunumeric.CUNUMERIC_BINOP_ISCLOSE - LCM = _cunumeric.CUNUMERIC_BINOP_LCM - LDEXP = _cunumeric.CUNUMERIC_BINOP_LDEXP - LEFT_SHIFT = _cunumeric.CUNUMERIC_BINOP_LEFT_SHIFT - LESS = _cunumeric.CUNUMERIC_BINOP_LESS - LESS_EQUAL = _cunumeric.CUNUMERIC_BINOP_LESS_EQUAL - LOGADDEXP = _cunumeric.CUNUMERIC_BINOP_LOGADDEXP - LOGADDEXP2 = _cunumeric.CUNUMERIC_BINOP_LOGADDEXP2 - LOGICAL_AND = _cunumeric.CUNUMERIC_BINOP_LOGICAL_AND - LOGICAL_OR = _cunumeric.CUNUMERIC_BINOP_LOGICAL_OR - LOGICAL_XOR = _cunumeric.CUNUMERIC_BINOP_LOGICAL_XOR - MAXIMUM = _cunumeric.CUNUMERIC_BINOP_MAXIMUM - MINIMUM = _cunumeric.CUNUMERIC_BINOP_MINIMUM - MOD = _cunumeric.CUNUMERIC_BINOP_MOD - MULTIPLY = _cunumeric.CUNUMERIC_BINOP_MULTIPLY - NEXTAFTER = _cunumeric.CUNUMERIC_BINOP_NEXTAFTER - NOT_EQUAL = _cunumeric.CUNUMERIC_BINOP_NOT_EQUAL - POWER = _cunumeric.CUNUMERIC_BINOP_POWER - RIGHT_SHIFT = _cunumeric.CUNUMERIC_BINOP_RIGHT_SHIFT - SUBTRACT = _cunumeric.CUNUMERIC_BINOP_SUBTRACT - - -@unique -class WindowOpCode(IntEnum): - BARLETT = _cunumeric.CUNUMERIC_WINDOW_BARLETT - BLACKMAN = _cunumeric.CUNUMERIC_WINDOW_BLACKMAN - HAMMING = _cunumeric.CUNUMERIC_WINDOW_HAMMING - HANNING = _cunumeric.CUNUMERIC_WINDOW_HANNING - KAISER = _cunumeric.CUNUMERIC_WINDOW_KAISER - - -# Match these to RandGenCode in rand_util.h -@unique -class RandGenCode(IntEnum): - UNIFORM = 1 - NORMAL = 2 - INTEGER = 3 - - -# Match these to CuNumericTunable in cunumeric_c.h -@unique -class CuNumericTunable(IntEnum): - NUM_GPUS = _cunumeric.CUNUMERIC_TUNABLE_NUM_GPUS - NUM_PROCS = _cunumeric.CUNUMERIC_TUNABLE_NUM_PROCS - MAX_EAGER_VOLUME = _cunumeric.CUNUMERIC_TUNABLE_MAX_EAGER_VOLUME - - -# Match these to CuNumericScanCode in cunumeric_c.h -@unique -class ScanCode(IntEnum): - PROD = _cunumeric.CUNUMERIC_SCAN_PROD - SUM = _cunumeric.CUNUMERIC_SCAN_SUM - - -# Match these to CuNumericConvertCode in cunumeric_c.h -@unique -class ConvertCode(IntEnum): - NOOP = _cunumeric.CUNUMERIC_CONVERT_NAN_NOOP - PROD = _cunumeric.CUNUMERIC_CONVERT_NAN_PROD - SUM = _cunumeric.CUNUMERIC_CONVERT_NAN_SUM - - -# Match these to BitGeneratorOperation in cunumeric_c.h -@unique -class BitGeneratorOperation(IntEnum): - CREATE = _cunumeric.CUNUMERIC_BITGENOP_CREATE - DESTROY = _cunumeric.CUNUMERIC_BITGENOP_DESTROY - RAND_RAW = _cunumeric.CUNUMERIC_BITGENOP_RAND_RAW - DISTRIBUTION = _cunumeric.CUNUMERIC_BITGENOP_DISTRIBUTION - - -# Match these to BitGeneratorType in cunumeric_c.h -@unique -class BitGeneratorType(IntEnum): - DEFAULT = _cunumeric.CUNUMERIC_BITGENTYPE_DEFAULT - XORWOW = _cunumeric.CUNUMERIC_BITGENTYPE_XORWOW - MRG32K3A = _cunumeric.CUNUMERIC_BITGENTYPE_MRG32K3A - MTGP32 = _cunumeric.CUNUMERIC_BITGENTYPE_MTGP32 - MT19937 = _cunumeric.CUNUMERIC_BITGENTYPE_MT19937 - PHILOX4_32_10 = _cunumeric.CUNUMERIC_BITGENTYPE_PHILOX4_32_10 - - -# Match these to BitGeneratorDistribution in cunumeric_c.h -@unique -class BitGeneratorDistribution(IntEnum): - INTEGERS_16 = _cunumeric.CUNUMERIC_BITGENDIST_INTEGERS_16 - INTEGERS_32 = _cunumeric.CUNUMERIC_BITGENDIST_INTEGERS_32 - INTEGERS_64 = _cunumeric.CUNUMERIC_BITGENDIST_INTEGERS_64 - UNIFORM_32 = _cunumeric.CUNUMERIC_BITGENDIST_UNIFORM_32 - UNIFORM_64 = _cunumeric.CUNUMERIC_BITGENDIST_UNIFORM_64 - LOGNORMAL_32 = _cunumeric.CUNUMERIC_BITGENDIST_LOGNORMAL_32 - LOGNORMAL_64 = _cunumeric.CUNUMERIC_BITGENDIST_LOGNORMAL_64 - NORMAL_32 = _cunumeric.CUNUMERIC_BITGENDIST_NORMAL_32 - NORMAL_64 = _cunumeric.CUNUMERIC_BITGENDIST_NORMAL_64 - POISSON = _cunumeric.CUNUMERIC_BITGENDIST_POISSON - EXPONENTIAL_32 = _cunumeric.CUNUMERIC_BITGENDIST_EXPONENTIAL_32 - EXPONENTIAL_64 = _cunumeric.CUNUMERIC_BITGENDIST_EXPONENTIAL_64 - GUMBEL_32 = _cunumeric.CUNUMERIC_BITGENDIST_GUMBEL_32 - GUMBEL_64 = _cunumeric.CUNUMERIC_BITGENDIST_GUMBEL_64 - LAPLACE_32 = _cunumeric.CUNUMERIC_BITGENDIST_LAPLACE_32 - LAPLACE_64 = _cunumeric.CUNUMERIC_BITGENDIST_LAPLACE_64 - LOGISTIC_32 = _cunumeric.CUNUMERIC_BITGENDIST_LOGISTIC_32 - LOGISTIC_64 = _cunumeric.CUNUMERIC_BITGENDIST_LOGISTIC_64 - PARETO_32 = _cunumeric.CUNUMERIC_BITGENDIST_PARETO_32 - PARETO_64 = _cunumeric.CUNUMERIC_BITGENDIST_PARETO_64 - POWER_32 = _cunumeric.CUNUMERIC_BITGENDIST_POWER_32 - POWER_64 = _cunumeric.CUNUMERIC_BITGENDIST_POWER_64 - RAYLEIGH_32 = _cunumeric.CUNUMERIC_BITGENDIST_RAYLEIGH_32 - RAYLEIGH_64 = _cunumeric.CUNUMERIC_BITGENDIST_RAYLEIGH_64 - CAUCHY_32 = _cunumeric.CUNUMERIC_BITGENDIST_CAUCHY_32 - CAUCHY_64 = _cunumeric.CUNUMERIC_BITGENDIST_CAUCHY_64 - TRIANGULAR_32 = _cunumeric.CUNUMERIC_BITGENDIST_TRIANGULAR_32 - TRIANGULAR_64 = _cunumeric.CUNUMERIC_BITGENDIST_TRIANGULAR_64 - WEIBULL_32 = _cunumeric.CUNUMERIC_BITGENDIST_WEIBULL_32 - WEIBULL_64 = _cunumeric.CUNUMERIC_BITGENDIST_WEIBULL_64 - BYTES = _cunumeric.CUNUMERIC_BITGENDIST_BYTES - BETA_32 = _cunumeric.CUNUMERIC_BITGENDIST_BETA_32 - BETA_64 = _cunumeric.CUNUMERIC_BITGENDIST_BETA_64 - F_32 = _cunumeric.CUNUMERIC_BITGENDIST_F_32 - F_64 = _cunumeric.CUNUMERIC_BITGENDIST_F_64 - LOGSERIES = _cunumeric.CUNUMERIC_BITGENDIST_LOGSERIES - NONCENTRAL_F_32 = _cunumeric.CUNUMERIC_BITGENDIST_NONCENTRAL_F_32 - NONCENTRAL_F_64 = _cunumeric.CUNUMERIC_BITGENDIST_NONCENTRAL_F_64 - CHISQUARE_32 = _cunumeric.CUNUMERIC_BITGENDIST_CHISQUARE_32 - CHISQUARE_64 = _cunumeric.CUNUMERIC_BITGENDIST_CHISQUARE_64 - GAMMA_32 = _cunumeric.CUNUMERIC_BITGENDIST_GAMMA_32 - GAMMA_64 = _cunumeric.CUNUMERIC_BITGENDIST_GAMMA_64 - STANDARD_T_32 = _cunumeric.CUNUMERIC_BITGENDIST_STANDARD_T_32 - STANDARD_T_64 = _cunumeric.CUNUMERIC_BITGENDIST_STANDARD_T_64 - HYPERGEOMETRIC = _cunumeric.CUNUMERIC_BITGENDIST_HYPERGEOMETRIC - VONMISES_32 = _cunumeric.CUNUMERIC_BITGENDIST_VONMISES_32 - VONMISES_64 = _cunumeric.CUNUMERIC_BITGENDIST_VONMISES_64 - ZIPF = _cunumeric.CUNUMERIC_BITGENDIST_ZIPF - GEOMETRIC = _cunumeric.CUNUMERIC_BITGENDIST_GEOMETRIC - WALD_32 = _cunumeric.CUNUMERIC_BITGENDIST_WALD_32 - WALD_64 = _cunumeric.CUNUMERIC_BITGENDIST_WALD_64 - BINOMIAL = _cunumeric.CUNUMERIC_BITGENDIST_BINOMIAL - NEGATIVE_BINOMIAL = _cunumeric.CUNUMERIC_BITGENDIST_NEGATIVE_BINOMIAL - - -# Match these to fftType in fft_util.h -class FFTType: - def __init__( - self, - name: str, - type_id: int, - input_dtype: npt.DTypeLike, - output_dtype: npt.DTypeLike, - single_precision: bool, - complex_type: Union[FFTType, None] = None, - ) -> None: - self._name = name - self._type_id = type_id - self._complex_type = self if complex_type is None else complex_type - self._input_dtype = input_dtype - self._output_dtype = output_dtype - self._single_precision = single_precision - - def __str__(self) -> str: - return self._name - - def __repr__(self) -> str: - return str(self) - - @property - def type_id(self) -> int: - return self._type_id - - @property - def complex(self) -> FFTType: - return self._complex_type - - @property - def input_dtype(self) -> npt.DTypeLike: - return self._input_dtype - - @property - def output_dtype(self) -> npt.DTypeLike: - return self._output_dtype - - @property - def is_single_precision(self) -> bool: - return self._single_precision - - -FFT_C2C = FFTType( - "C2C", - _cunumeric.CUNUMERIC_FFT_C2C, - np.complex64, - np.complex64, - True, -) - -FFT_Z2Z = FFTType( - "Z2Z", - _cunumeric.CUNUMERIC_FFT_Z2Z, - np.complex128, - np.complex128, - False, -) - -FFT_R2C = FFTType( - "R2C", - _cunumeric.CUNUMERIC_FFT_R2C, - np.float32, - np.complex64, - True, - FFT_C2C, -) - -FFT_C2R = FFTType( - "C2R", - _cunumeric.CUNUMERIC_FFT_C2R, - np.complex64, - np.float32, - True, - FFT_C2C, -) - -FFT_D2Z = FFTType( - "D2Z", - _cunumeric.CUNUMERIC_FFT_D2Z, - np.float64, - np.complex128, - False, - FFT_Z2Z, -) - -FFT_Z2D = FFTType( - "Z2D", - _cunumeric.CUNUMERIC_FFT_Z2D, - np.complex128, - np.float64, - False, - FFT_Z2Z, -) - - -class FFTCode: - @staticmethod - def real_to_complex_code(dtype: npt.DTypeLike) -> FFTType: - if dtype == np.float64: - return FFT_D2Z - elif dtype == np.float32: - return FFT_R2C - else: - raise TypeError( - ( - "Data type for FFT not supported " - "(supported types are float32 and float64)" - ) - ) - - @staticmethod - def complex_to_real_code(dtype: npt.DTypeLike) -> FFTType: - if dtype == np.complex128: - return FFT_Z2D - elif dtype == np.complex64: - return FFT_C2R - else: - raise TypeError( - ( - "Data type for FFT not supported " - "(supported types are complex64 and complex128)" - ) - ) - - -@unique -class FFTDirection(IntEnum): - FORWARD = _cunumeric.CUNUMERIC_FFT_FORWARD - INVERSE = _cunumeric.CUNUMERIC_FFT_INVERSE - - -# Match these to CuNumericBitorder in cunumeric_c.h -@unique -class Bitorder(IntEnum): - BIG = _cunumeric.CUNUMERIC_BITORDER_BIG - LITTLE = _cunumeric.CUNUMERIC_BITORDER_LITTLE - - -@unique -class FFTNormalization(IntEnum): - FORWARD = 1 - INVERSE = 2 - ORTHOGONAL = 3 - - @staticmethod - def from_string(in_string: str) -> Union[FFTNormalization, None]: - if in_string == "forward": - return FFTNormalization.FORWARD - elif in_string == "ortho": - return FFTNormalization.ORTHOGONAL - elif in_string == "backward" or in_string is None: - return FFTNormalization.INVERSE - else: - return None - - @staticmethod - def reverse(in_string: Union[str, None]) -> str: - if in_string == "forward": - return "backward" - elif in_string == "backward" or in_string is None: - return "forward" - else: - return in_string diff --git a/cunumeric/install_info.py.in b/cunumeric/install_info.py.in deleted file mode 100644 index 74f1cf994..000000000 --- a/cunumeric/install_info.py.in +++ /dev/null @@ -1,43 +0,0 @@ -# Copyright (c) 2020-2022, NVIDIA CORPORATION. All rights reserved. -# -# NVIDIA CORPORATION and its licensors retain all intellectual property -# and proprietary rights in and to this software, related documentation -# and any modifications thereto. Any use, reproduction, disclosure or -# distribution of this software and related documentation without an express -# license agreement from NVIDIA CORPORATION is strictly prohibited. -# -# See the LICENSE file for details. -# - -# IMPORTANT: -# * install_info.py is a generated file and should not be modified by hand - -def get_libpath(): - import os, sys, platform - join = os.path.join - exists = os.path.exists - dirname = os.path.dirname - cn_path = dirname(dirname(__file__)) - so_ext = { - "": "", - "Java": ".jar", - "Linux": ".so", - "Darwin": ".dylib", - "Windows": ".dll" - }[platform.system()] - - def find_libcunumeric(libdir): - if exists(join(libdir, f"libcunumeric{so_ext}")): - return libdir - return None - - return ( - find_libcunumeric(join(cn_path, "build", "lib")) or - find_libcunumeric(join(dirname(dirname(dirname(cn_path))), "lib")) or - find_libcunumeric(join(dirname(dirname(sys.executable)), "lib")) or - "" - ) - - -libpath: str = get_libpath() -header: str = """@header@""" diff --git a/cunumeric/linalg/cholesky.py b/cunumeric/linalg/cholesky.py deleted file mode 100644 index 8eccd2944..000000000 --- a/cunumeric/linalg/cholesky.py +++ /dev/null @@ -1,272 +0,0 @@ -# Copyright 2023 NVIDIA Corporation -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -from __future__ import annotations - -from typing import TYPE_CHECKING - -from legate.core import Rect, types as ty -from legate.core.shape import Shape -from legate.settings import settings - -from cunumeric.config import CuNumericOpCode - -from .exception import LinAlgError - -if TYPE_CHECKING: - from legate.core.context import Context - from legate.core.store import Store, StorePartition - - from ..deferred import DeferredArray - from ..runtime import Runtime - - -def transpose_copy_single( - context: Context, input: Store, output: Store -) -> None: - task = context.create_auto_task(CuNumericOpCode.TRANSPOSE_COPY_2D) - task.add_output(output) - task.add_input(input) - # Output has the same shape as input, but is mapped - # to a column major instance - - task.add_broadcast(output) - task.add_broadcast(input) - - task.execute() - - -def transpose_copy( - context: Context, - launch_domain: Rect, - p_input: StorePartition, - p_output: StorePartition, -) -> None: - task = context.create_manual_task( - CuNumericOpCode.TRANSPOSE_COPY_2D, - launch_domain=launch_domain, - ) - task.add_output(p_output) - task.add_input(p_input) - # Output has the same shape as input, but is mapped - # to a column major instance - - task.execute() - - -def potrf_single(context: Context, output: Store) -> None: - task = context.create_auto_task(CuNumericOpCode.POTRF) - task.throws_exception(LinAlgError) - task.add_output(output) - task.add_input(output) - task.execute() - - -def potrf(context: Context, p_output: StorePartition, i: int) -> None: - launch_domain = Rect(lo=(i, i), hi=(i + 1, i + 1)) - task = context.create_manual_task( - CuNumericOpCode.POTRF, launch_domain=launch_domain - ) - task.throws_exception(LinAlgError) - task.add_output(p_output) - task.add_input(p_output) - task.execute() - - -def trsm( - context: Context, p_output: StorePartition, i: int, lo: int, hi: int -) -> None: - if lo >= hi: - return - - rhs = p_output.get_child_store(i, i) - lhs = p_output - - launch_domain = Rect(lo=(lo, i), hi=(hi, i + 1)) - task = context.create_manual_task( - CuNumericOpCode.TRSM, launch_domain=launch_domain - ) - task.add_output(lhs) - task.add_input(rhs) - task.add_input(lhs) - task.execute() - - -def syrk(context: Context, p_output: StorePartition, k: int, i: int) -> None: - rhs = p_output.get_child_store(k, i) - lhs = p_output - - launch_domain = Rect(lo=(k, k), hi=(k + 1, k + 1)) - task = context.create_manual_task( - CuNumericOpCode.SYRK, launch_domain=launch_domain - ) - task.add_output(lhs) - task.add_input(rhs) - task.add_input(lhs) - task.execute() - - -def gemm( - context: Context, - p_output: StorePartition, - k: int, - i: int, - lo: int, - hi: int, -) -> None: - if lo >= hi: - return - - rhs2 = p_output.get_child_store(k, i) - lhs = p_output - rhs1 = p_output - - launch_domain = Rect(lo=(lo, k), hi=(hi, k + 1)) - task = context.create_manual_task( - CuNumericOpCode.GEMM, launch_domain=launch_domain - ) - task.add_output(lhs) - task.add_input(rhs1, proj=lambda p: (p[0], i)) - task.add_input(rhs2) - task.add_input(lhs) - task.execute() - - -MIN_CHOLESKY_TILE_SIZE = 2048 -MIN_CHOLESKY_MATRIX_SIZE = 8192 - - -# TODO: We need a better cost model -def choose_color_shape(runtime: Runtime, shape: Shape) -> Shape: - if settings.test(): - num_tiles = runtime.num_procs * 2 - return Shape((num_tiles, num_tiles)) - - extent = shape[0] - # If there's only one processor or the matrix is too small, - # don't even bother to partition it at all - if runtime.num_procs == 1 or extent <= MIN_CHOLESKY_MATRIX_SIZE: - return Shape((1, 1)) - - # If the matrix is big enough to warrant partitioning, - # pick the granularity that the tile size is greater than a threshold - num_tiles = runtime.num_procs - max_num_tiles = runtime.num_procs * 4 - while ( - (extent + num_tiles - 1) // num_tiles > MIN_CHOLESKY_TILE_SIZE - and num_tiles * 2 <= max_num_tiles - ): - num_tiles *= 2 - - return Shape((num_tiles, num_tiles)) - - -def tril_single(context: Context, output: Store) -> None: - task = context.create_auto_task(CuNumericOpCode.TRILU) - task.add_output(output) - task.add_input(output) - task.add_scalar_arg(True, ty.bool_) - task.add_scalar_arg(0, ty.int32) - # Add a fake task argument to indicate that this is for Cholesky - task.add_scalar_arg(True, ty.bool_) - - task.execute() - - -def tril(context: Context, p_output: StorePartition, n: int) -> None: - launch_domain = Rect((n, n)) - task = context.create_manual_task( - CuNumericOpCode.TRILU, launch_domain=launch_domain - ) - - task.add_output(p_output) - task.add_input(p_output) - task.add_scalar_arg(True, ty.bool_) - task.add_scalar_arg(0, ty.int32) - # Add a fake task argument to indicate that this is for Cholesky - task.add_scalar_arg(True, ty.bool_) - - task.execute() - - -def _batched_cholesky(output: DeferredArray, input: DeferredArray) -> None: - # the only feasible implementation for right now is that - # each cholesky submatrix fits on a single proc. We will have - # wildly varying memory available depending on the system. - # Just use a fixed cutoff to provide some sensible warning. - # TODO: find a better way to inform the user dims are too big - context: Context = output.context # type: ignore - task = context.create_auto_task(CuNumericOpCode.BATCHED_CHOLESKY) - task.add_input(input.base) - task.add_output(output.base) - ndim = input.base.ndim - task.add_broadcast(input.base, (ndim - 2, ndim - 1)) - task.add_broadcast(output.base, (ndim - 2, ndim - 1)) - task.add_alignment(input.base, output.base) - task.throws_exception(LinAlgError) - task.execute() - - -def cholesky( - output: DeferredArray, input: DeferredArray, no_tril: bool -) -> None: - runtime = output.runtime - context: Context = output.context - if len(input.base.shape) > 2: - if no_tril: - raise NotImplementedError( - "batched cholesky expects to only " - "produce the lower triangular matrix" - ) - size = input.base.shape[-1] - # Choose 32768 as dimension cutoff for warning - # so that for float64 anything larger than - # 8 GiB produces a warning - if size > 32768: - runtime.warn( - "batched cholesky is only valid" - " when the square submatrices fit" - f" on a single proc, n > {size} may be too large", - category=UserWarning, - ) - return _batched_cholesky(output, input) - - if runtime.num_procs == 1: - transpose_copy_single(context, input.base, output.base) - potrf_single(context, output.base) - if not no_tril: - tril_single(context, output.base) - return - - shape = output.base.shape - initial_color_shape = choose_color_shape(runtime, shape) - tile_shape = (shape + initial_color_shape - 1) // initial_color_shape - color_shape = (shape + tile_shape - 1) // tile_shape - n = color_shape[0] - - p_input = input.base.partition_by_tiling(tile_shape) - p_output = output.base.partition_by_tiling(tile_shape) - transpose_copy(context, Rect(hi=color_shape), p_input, p_output) - - for i in range(n): - potrf(context, p_output, i) - trsm(context, p_output, i, i + 1, n) - for k in range(i + 1, n): - syrk(context, p_output, k, i) - gemm(context, p_output, k, i, k + 1, n) - - if no_tril: - return - - tril(context, p_output, n) diff --git a/cunumeric/linalg/solve.py b/cunumeric/linalg/solve.py deleted file mode 100644 index cec277c4e..000000000 --- a/cunumeric/linalg/solve.py +++ /dev/null @@ -1,62 +0,0 @@ -# Copyright 2022 NVIDIA Corporation -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -from __future__ import annotations - -from typing import TYPE_CHECKING, cast - -from cunumeric.config import CuNumericOpCode - -from .cholesky import transpose_copy_single -from .exception import LinAlgError - -if TYPE_CHECKING: - from legate.core.context import Context - from legate.core.store import Store - - from ..deferred import DeferredArray - - -def solve_single(context: Context, a: Store, b: Store) -> None: - task = context.create_auto_task(CuNumericOpCode.SOLVE) - task.throws_exception(LinAlgError) - task.add_input(a) - task.add_input(b) - task.add_output(a) - task.add_output(b) - - task.add_broadcast(a) - task.add_broadcast(b) - - task.execute() - - -def solve(output: DeferredArray, a: DeferredArray, b: DeferredArray) -> None: - from ..deferred import DeferredArray - - runtime = output.runtime - context = output.context - - a_copy = cast( - DeferredArray, - runtime.create_empty_thunk(a.shape, dtype=a.base.type, inputs=(a,)), - ) - transpose_copy_single(context, a.base, a_copy.base) - - if b.ndim > 1: - transpose_copy_single(context, b.base, output.base) - else: - output.copy(b) - - solve_single(context, a_copy.base, output.base) diff --git a/cunumeric/module.py b/cunumeric/module.py deleted file mode 100644 index 0a8132a81..000000000 --- a/cunumeric/module.py +++ /dev/null @@ -1,8284 +0,0 @@ -# Copyright 2021-2023 NVIDIA Corporation -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -from __future__ import annotations - -import math -import operator -import re -from collections import Counter -from itertools import chain -from typing import ( - TYPE_CHECKING, - Any, - Iterable, - Literal, - Optional, - Sequence, - Tuple, - Union, - cast, -) - -import numpy as np -import opt_einsum as oe # type: ignore [import] -from numpy.core.multiarray import ( # type: ignore [attr-defined] - normalize_axis_index, -) -from numpy.core.numeric import ( # type: ignore [attr-defined] - normalize_axis_tuple, -) - -from cunumeric.coverage import is_implemented - -from ._ufunc.comparison import maximum, minimum -from ._ufunc.floating import floor, isnan -from ._ufunc.math import add, multiply -from ._unary_red_utils import get_non_nan_unary_red_code -from .array import ( - add_boilerplate, - check_writeable, - convert_to_cunumeric_ndarray, - ndarray, -) -from .config import BinaryOpCode, ScanCode, UnaryRedCode -from .runtime import runtime -from .settings import settings as cunumeric_settings -from .types import NdShape, NdShapeLike, OrderType, SortSide -from .utils import AxesPairLike, inner_modes, matmul_modes, tensordot_modes - -if TYPE_CHECKING: - from typing import Callable - - import numpy.typing as npt - - from ._ufunc.ufunc import CastingKind - from .types import BoundsMode, ConvolveMode, SelectKind, SortType - -_builtin_abs = abs -_builtin_all = all -_builtin_any = any -_builtin_max = max -_builtin_min = min -_builtin_sum = sum -_builtin_range = range - -casting_kinds: tuple[CastingKind, ...] = ( - "no", - "equiv", - "safe", - "same_kind", - "unsafe", -) - -######################### -# Array creation routines -######################### - -# From shape or value - - -def empty(shape: NdShapeLike, dtype: npt.DTypeLike = np.float64) -> ndarray: - """ - empty(shape, dtype=float) - - Return a new array of given shape and type, without initializing entries. - - Parameters - ---------- - shape : int or tuple[int] - Shape of the empty array. - dtype : data-type, optional - Desired output data-type for the array. Default is `cunumeric.float64`. - - Returns - ------- - out : ndarray - Array of uninitialized (arbitrary) data of the given shape and dtype. - - See Also - -------- - numpy.empty - - Availability - -------- - Multiple GPUs, Multiple CPUs - """ - return ndarray(shape=shape, dtype=dtype) - - -@add_boilerplate("a") -def empty_like( - a: ndarray, - dtype: Optional[npt.DTypeLike] = None, - shape: Optional[NdShapeLike] = None, -) -> ndarray: - """ - - empty_like(prototype, dtype=None) - - Return a new array with the same shape and type as a given array. - - Parameters - ---------- - prototype : array_like - The shape and data-type of `prototype` define these same attributes - of the returned array. - dtype : data-type, optional - Overrides the data type of the result. - shape : int or tuple[int], optional - Overrides the shape of the result. - - Returns - ------- - out : ndarray - Array of uninitialized (arbitrary) data with the same shape and type as - `prototype`. - - See Also - -------- - numpy.empty_like - - Availability - -------- - Multiple GPUs, Multiple CPUs - """ - shape = a.shape if shape is None else shape - if dtype is not None: - dtype = np.dtype(dtype) - else: - dtype = a.dtype - return ndarray(shape, dtype=dtype, inputs=(a,)) - - -def eye( - N: int, - M: Optional[int] = None, - k: int = 0, - dtype: Optional[npt.DTypeLike] = np.float64, -) -> ndarray: - """ - - Return a 2-D array with ones on the diagonal and zeros elsewhere. - - Parameters - ---------- - N : int - Number of rows in the output. - M : int, optional - Number of columns in the output. If None, defaults to `N`. - k : int, optional - Index of the diagonal: 0 (the default) refers to the main diagonal, - a positive value refers to an upper diagonal, and a negative value - to a lower diagonal. - dtype : data-type, optional - Data-type of the returned array. - - Returns - ------- - I : ndarray - An array of shape (N, M) where all elements are equal to zero, except - for the `k`-th diagonal, whose values are equal to one. - - See Also - -------- - numpy.eye - - Availability - -------- - Multiple GPUs, Multiple CPUs - """ - if dtype is not None: - dtype = np.dtype(dtype) - if M is None: - M = N - k = operator.index(k) - result = ndarray((N, M), dtype) - result._thunk.eye(k) - return result - - -def identity(n: int, dtype: npt.DTypeLike = float) -> ndarray: - """ - - Return the identity array. - - The identity array is a square array with ones on - the main diagonal. - - Parameters - ---------- - n : int - Number of rows (and columns) in `n` x `n` output. - dtype : data-type, optional - Data-type of the output. Defaults to ``float``. - - Returns - ------- - out : ndarray - `n` x `n` array with its main diagonal set to one, and all other - elements 0. - - See Also - -------- - numpy.identity - - Availability - -------- - Multiple GPUs, Multiple CPUs - """ - return eye(N=n, M=n, dtype=dtype) - - -def ones(shape: NdShapeLike, dtype: npt.DTypeLike = np.float64) -> ndarray: - """ - - Return a new array of given shape and type, filled with ones. - - Parameters - ---------- - shape : int or tuple[int] - Shape of the new array. - dtype : data-type, optional - The desired data-type for the array. Default is `cunumeric.float64`. - - Returns - ------- - out : ndarray - Array of ones with the given shape and dtype. - - See Also - -------- - numpy.ones - - Availability - -------- - Multiple GPUs, Multiple CPUs - """ - return full(shape, 1, dtype=dtype) - - -def ones_like( - a: ndarray, - dtype: Optional[npt.DTypeLike] = None, - shape: Optional[NdShapeLike] = None, -) -> ndarray: - """ - - Return an array of ones with the same shape and type as a given array. - - Parameters - ---------- - a : array_like - The shape and data-type of `a` define these same attributes of the - returned array. - dtype : data-type, optional - Overrides the data type of the result. - shape : int or tuple[int], optional - Overrides the shape of the result. - - Returns - ------- - out : ndarray - Array of ones with the same shape and type as `a`. - - See Also - -------- - numpy.ones_like - - Availability - -------- - Multiple GPUs, Multiple CPUs - """ - usedtype = a.dtype - if dtype is not None: - usedtype = np.dtype(dtype) - return full_like(a, 1, dtype=usedtype, shape=shape) - - -def zeros(shape: NdShapeLike, dtype: npt.DTypeLike = np.float64) -> ndarray: - """ - zeros(shape, dtype=float) - - Return a new array of given shape and type, filled with zeros. - - Parameters - ---------- - shape : int or tuple[int] - Shape of the new array. - dtype : data-type, optional - The desired data-type for the array. Default is `cunumeric.float64`. - - Returns - ------- - out : ndarray - Array of zeros with the given shape and dtype. - - See Also - -------- - numpy.zeros - - Availability - -------- - Multiple GPUs, Multiple CPUs - """ - if dtype is not None: - dtype = np.dtype(dtype) - return full(shape, 0, dtype=dtype) - - -def zeros_like( - a: ndarray, - dtype: Optional[npt.DTypeLike] = None, - shape: Optional[NdShapeLike] = None, -) -> ndarray: - """ - - Return an array of zeros with the same shape and type as a given array. - - Parameters - ---------- - a : array_like - The shape and data-type of `a` define these same attributes of - the returned array. - dtype : data-type, optional - Overrides the data type of the result. - shape : int or tuple[int], optional - Overrides the shape of the result. - - Returns - ------- - out : ndarray - Array of zeros with the same shape and type as `a`. - - See Also - -------- - numpy.zeros_like - - Availability - -------- - Multiple GPUs, Multiple CPUs - """ - usedtype = a.dtype - if dtype is not None: - usedtype = np.dtype(dtype) - return full_like(a, 0, dtype=usedtype, shape=shape) - - -def full( - shape: NdShapeLike, - value: Any, - dtype: Optional[npt.DTypeLike] = None, -) -> ndarray: - """ - - Return a new array of given shape and type, filled with `fill_value`. - - Parameters - ---------- - shape : int or tuple[int] - Shape of the new array. - fill_value : scalar - Fill value. - dtype : data-type, optional - The desired data-type for the array The default, None, means - `cunumeric.array(fill_value).dtype`. - - Returns - ------- - out : ndarray - Array of `fill_value` with the given shape and dtype. - - See Also - -------- - numpy.full - - Availability - -------- - Multiple GPUs, Multiple CPUs - """ - if dtype is None: - val = np.array(value) - else: - dtype = np.dtype(dtype) - val = np.array(value, dtype=dtype) - result = empty(shape, dtype=val.dtype) - result._thunk.fill(val) - return result - - -def full_like( - a: ndarray, - value: Union[int, float], - dtype: Optional[npt.DTypeLike] = None, - shape: Optional[NdShapeLike] = None, -) -> ndarray: - """ - - Return a full array with the same shape and type as a given array. - - Parameters - ---------- - a : array_like - The shape and data-type of `a` define these same attributes of - the returned array. - fill_value : scalar - Fill value. - dtype : data-type, optional - Overrides the data type of the result. - shape : int or tuple[int], optional - Overrides the shape of the result. - - Returns - ------- - out : ndarray - Array of `fill_value` with the same shape and type as `a`. - - See Also - -------- - numpy.full_like - - Availability - -------- - Multiple GPUs, Multiple CPUs - """ - if dtype is not None: - dtype = np.dtype(dtype) - else: - dtype = a.dtype - result = empty_like(a, dtype=dtype, shape=shape) - val = np.array(value, dtype=result.dtype) - result._thunk.fill(val) - return result - - -# From existing data - - -def array( - obj: Any, - dtype: Optional[np.dtype[Any]] = None, - copy: bool = True, - order: Union[OrderType, Literal["K"]] = "K", - subok: bool = False, - ndmin: int = 0, -) -> ndarray: - """ - array(object, dtype=None, copy=True) - - Create an array. - - Parameters - ---------- - object : array_like - An array, any object exposing the array interface, an object whose - __array__ method returns an array, or any (nested) sequence. - dtype : data-type, optional - The desired data-type for the array. If not given, then the type will - be determined as the minimum type required to hold the objects in the - sequence. - copy : bool, optional - If true (default), then the object is copied. Otherwise, a copy will - only be made if __array__ returns a copy, if obj is a nested sequence, - or if a copy is needed to satisfy any of the other requirements - (`dtype`, `order`, etc.). - order : ``{'K', 'A', 'C', 'F'}``, optional - Specify the memory layout of the array. If object is not an array, the - newly created array will be in C order (row major) unless 'F' is - specified, in which case it will be in Fortran order (column major). - If object is an array the following holds. - - ===== ========= =================================================== - order no copy copy=True - ===== ========= =================================================== - 'K' unchanged F & C order preserved, otherwise most similar order - 'A' unchanged F order if input is F and not C, otherwise C order - 'C' C order C order - 'F' F order F order - ===== ========= =================================================== - - When ``copy=False`` and a copy is made for other reasons, the result is - the same as if ``copy=True``, with some exceptions for 'A', see the - Notes section. The default order is 'K'. - subok : bool, optional - If True, then sub-classes will be passed-through, otherwise - the returned array will be forced to be a base-class array (default). - ndmin : int, optional - Specifies the minimum number of dimensions that the resulting - array should have. Ones will be pre-pended to the shape as - needed to meet this requirement. - - Returns - ------- - out : ndarray - An array object satisfying the specified requirements. - - See Also - -------- - numpy.array - - Availability - -------- - Multiple GPUs, Multiple CPUs - """ - - if not isinstance(obj, ndarray): - thunk = runtime.get_numpy_thunk(obj, share=(not copy), dtype=dtype) - result = ndarray(shape=None, thunk=thunk) - else: - result = obj - if dtype is not None and result.dtype != dtype: - result = result.astype(dtype) - elif copy and obj is result: - result = result.copy() - if result.ndim < ndmin: - shape = (1,) * (ndmin - result.ndim) + result.shape - result = result.reshape(shape) - return result - - -def asarray(a: Any, dtype: Optional[np.dtype[Any]] = None) -> ndarray: - """ - Convert the input to an array. - - Parameters - ---------- - a : array_like - Input data, in any form that can be converted to an array. This - includes lists, lists of tuples, tuples, tuples of tuples, tuples - of lists and ndarrays. - dtype : data-type, optional - By default, the data-type is inferred from the input data. - - Returns - ------- - out : ndarray - Array interpretation of `a`. No copy is performed if the input is - already an ndarray with matching dtype. If `a` is a subclass of - ndarray, a base class ndarray is returned. - - See Also - -------- - numpy.asarray - - Availability - -------- - Multiple GPUs, Multiple CPUs - """ - if not isinstance(a, ndarray): - thunk = runtime.get_numpy_thunk(a, share=True, dtype=dtype) - writeable = a.flags.writeable if isinstance(a, np.ndarray) else True - array = ndarray(shape=None, thunk=thunk, writeable=writeable) - else: - array = a - if dtype is not None and array.dtype != dtype: - array = array.astype(dtype) - return array - - -@add_boilerplate("a") -def copy(a: ndarray) -> ndarray: - """ - - Return an array copy of the given object. - - Parameters - ---------- - a : array_like - Input data. - - Returns - ------- - arr : ndarray - Array interpretation of `a`. - - See Also - -------- - numpy.copy - - Availability - -------- - Multiple GPUs, Multiple CPUs - """ - result = empty_like(a, dtype=a.dtype) - result._thunk.copy(a._thunk, deep=True) - return result - - -# Numerical ranges - - -def arange( - start: Union[int, float] = 0, - stop: Optional[Union[int, float]] = None, - step: Optional[Union[int, float]] = 1, - dtype: Optional[npt.DTypeLike] = None, -) -> ndarray: - """ - arange([start,] stop[, step,], dtype=None) - - Return evenly spaced values within a given interval. - - Values are generated within the half-open interval ``[start, stop)`` - (in other words, the interval including `start` but excluding `stop`). - For integer arguments the function is equivalent to the Python built-in - `range` function, but returns an ndarray rather than a list. - - When using a non-integer step, such as 0.1, the results will often not - be consistent. It is better to use `cunumeric.linspace` for these cases. - - Parameters - ---------- - start : int or float, optional - Start of interval. The interval includes this value. The default - start value is 0. - stop : int or float - End of interval. The interval does not include this value, except - in some cases where `step` is not an integer and floating point - round-off affects the length of `out`. - step : int or float, optional - Spacing between values. For any output `out`, this is the distance - between two adjacent values, ``out[i+1] - out[i]``. The default - step size is 1. If `step` is specified as a position argument, - `start` must also be given. - dtype : data-type - The type of the output array. If `dtype` is not given, infer the data - type from the other input arguments. - - Returns - ------- - arange : ndarray - Array of evenly spaced values. - - For floating point arguments, the length of the result is - ``ceil((stop - start)/step)``. Because of floating point overflow, - this rule may result in the last element of `out` being greater - than `stop`. - - See Also - -------- - numpy.arange - - Availability - -------- - Multiple GPUs, Multiple CPUs - """ - if stop is None: - stop = start - start = 0 - - if step is None: - step = 1 - - if dtype is None: - dtype = np.result_type(start, stop, step) - else: - dtype = np.dtype(dtype) - - N = math.ceil((stop - start) / step) - result = ndarray((_builtin_max(0, N),), dtype) - result._thunk.arange(start, stop, step) - return result - - -@add_boilerplate("start", "stop") -def linspace( - start: ndarray, - stop: ndarray, - num: int = 50, - endpoint: bool = True, - retstep: bool = False, - dtype: Optional[npt.DTypeLike] = None, - axis: int = 0, -) -> Union[ndarray, tuple[ndarray, Union[float, ndarray]]]: - """ - - Return evenly spaced numbers over a specified interval. - - Returns `num` evenly spaced samples, calculated over the - interval [`start`, `stop`]. - - The endpoint of the interval can optionally be excluded. - - Parameters - ---------- - start : array_like - The starting value of the sequence. - stop : array_like - The end value of the sequence, unless `endpoint` is set to False. - In that case, the sequence consists of all but the last of ``num + 1`` - evenly spaced samples, so that `stop` is excluded. Note that the step - size changes when `endpoint` is False. - num : int, optional - Number of samples to generate. Default is 50. Must be non-negative. - endpoint : bool, optional - If True, `stop` is the last sample. Otherwise, it is not included. - Default is True. - retstep : bool, optional - If True, return (`samples`, `step`), where `step` is the spacing - between samples. - dtype : data-type, optional - The type of the output array. If `dtype` is not given, infer the data - type from the other input arguments. - axis : int, optional - The axis in the result to store the samples. Relevant only if start - or stop are array-like. By default (0), the samples will be along a - new axis inserted at the beginning. Use -1 to get an axis at the end. - - Returns - ------- - samples : ndarray - There are `num` equally spaced samples in the closed interval - ``[start, stop]`` or the half-open interval ``[start, stop)`` - (depending on whether `endpoint` is True or False). - step : float or ndarray, optional - Only returned if `retstep` is True - - Size of spacing between samples. - - See Also - -------- - numpy.linspace - - Availability - -------- - Multiple GPUs, Multiple CPUs - """ - if num < 0: - raise ValueError("Number of samples, %s, must be non-negative." % num) - div = (num - 1) if endpoint else num - - common_kind = np.result_type(start.dtype, stop.dtype).kind - dt = np.complex128 if common_kind == "c" else np.float64 - if dtype is None: - dtype = dt - - delta = stop - start - y = arange(0, num, dtype=dt) - - out: tuple[Any, ...] # EllipsisType not even in typing_extensions yet - - # Reshape these arrays into dimensions that allow them to broadcast - if delta.ndim > 0: - if axis is None or axis == 0: - # First dimension - y = y.reshape((-1,) + (1,) * delta.ndim) - # Nothing else needs to be reshaped here because - # they should all broadcast correctly with y - if endpoint and num > 1: - out = (-1,) - elif axis == -1 or axis == delta.ndim: - # Last dimension - y = y.reshape((1,) * delta.ndim + (-1,)) - if endpoint and num > 1: - out = (Ellipsis, -1) - # Extend everything else with extra dimensions of 1 at the end - # so that they can broadcast with y - delta = delta.reshape(delta.shape + (1,)) - start = start.reshape(start.shape + (1,)) - elif axis < delta.ndim: - # Somewhere in the middle - y = y.reshape((1,) * axis + (-1,) + (1,) * (delta.ndim - axis)) - # Start array might be smaller than delta because of broadcast - startax = start.ndim - len(delta.shape[axis:]) - start = start.reshape( - start.shape[0:startax] + (1,) + start.shape[startax:] - ) - if endpoint and num > 1: - out = (Ellipsis, -1) + (slice(None, None, None),) * len( - delta.shape[axis:] - ) - delta = delta.reshape( - delta.shape[0:axis] + (1,) + delta.shape[axis:] - ) - else: - raise ValueError( - "axis " - + str(axis) - + " is out of bounds for array of dimension " - + str(delta.ndim + 1) - ) - else: - out = (-1,) - # else delta is a scalar so start must be also - # therefore it will trivially broadcast correctly - - step: Union[float, ndarray] - if div > 0: - step = delta / div - if delta.ndim == 0: - y *= step - else: - y = y * step - else: - # sequences with 0 items or 1 item with endpoint=True (i.e. div <= 0) - # have an undefined step - step = np.NaN - if delta.ndim == 0: - y *= delta - else: - y = y * delta - - y += start.astype(y.dtype, copy=False) - - if endpoint and num > 1: - y[out] = stop.astype(y.dtype, copy=False) - - if np.issubdtype(dtype, np.integer): - floor(y, out=y) - - if retstep: - return y.astype(dtype, copy=False), step - else: - return y.astype(dtype, copy=False) - - -# Building matrices - - -@add_boilerplate("v") -def diag(v: ndarray, k: int = 0) -> ndarray: - """ - - Extract a diagonal or construct a diagonal array. - - See the more detailed documentation for ``cunumeric.diagonal`` if you use - this function to extract a diagonal and wish to write to the resulting - array; whether it returns a copy or a view depends on what version of numpy - you are using. - - Parameters - ---------- - v : array_like - If `v` is a 2-D array, return a copy of its `k`-th diagonal. - If `v` is a 1-D array, return a 2-D array with `v` on the `k`-th - diagonal. - k : int, optional - Diagonal in question. The default is 0. Use `k>0` for diagonals - above the main diagonal, and `k<0` for diagonals below the main - diagonal. - - Returns - ------- - out : ndarray - The extracted diagonal or constructed diagonal array. - - See Also - -------- - numpy.diag - - Availability - -------- - Multiple GPUs, Multiple CPUs - """ - if v.ndim == 0: - raise ValueError("Input must be 1- or 2-d") - elif v.ndim == 1: - return v.diagonal(offset=k, axis1=0, axis2=1, extract=False) - elif v.ndim == 2: - return v.diagonal(offset=k, axis1=0, axis2=1, extract=True) - else: - raise ValueError("diag requires 1- or 2-D array, use diagonal instead") - - -def tri( - N: int, - M: Optional[int] = None, - k: int = 0, - dtype: npt.DTypeLike = float, - *, - like: Optional[ndarray] = None, -) -> ndarray: - """ - An array with ones at and below the given diagonal and zeros elsewhere. - - Parameters - ---------- - N : int - Number of rows in the array. - M : int, optional - Number of columns in the array. - By default, `M` is taken equal to `N`. - k : int, optional - The sub-diagonal at and below which the array is filled. - `k` = 0 is the main diagonal, while `k` < 0 is below it, - and `k` > 0 is above. The default is 0. - dtype : dtype, optional - Data type of the returned array. The default is float. - like : array_like - Reference object to allow the creation of arrays which are not NumPy - arrays. If an array-like passed in as `like` supports the - `__array_function__` protocol, the result will be defined by it. In - this case it ensures the creation of an array object compatible with - that passed in via this argument. - - Returns - ------- - tri : ndarray of shape (N, M) - Array with its lower triangle filled with ones and zero elsewhere; - in other words ``T[i,j] == 1`` for ``j <= i + k``, 0 otherwise. - - See Also - -------- - numpy.tri - - Notes - ----- - `like` argument is currently not supported - - Availability - -------- - Multiple GPUs, Multiple CPUs - - """ - # TODO: add support for `like` (see issue #418) - if like is not None: - raise ValueError("like parameter is currently not supported") - - if M is None: - M = N - - out = ones((N, M), dtype=dtype) - return tril(out, k) - - -@add_boilerplate("m") -def trilu(m: ndarray, k: int, lower: bool) -> ndarray: - if m.ndim < 1: - raise TypeError("Array must be at least 1-D") - shape = m.shape if m.ndim >= 2 else m.shape * 2 - result = ndarray(shape, dtype=m.dtype, inputs=(m,)) - result._thunk.trilu(m._thunk, k, lower) - return result - - -def tril(m: ndarray, k: int = 0) -> ndarray: - """ - - Lower triangle of an array. - - Return a copy of an array with elements above the `k`-th diagonal zeroed. - - Parameters - ---------- - m : array_like - Input array of shape (M, N). - k : int, optional - Diagonal above which to zero elements. `k = 0` (the default) is the - main diagonal, `k < 0` is below it and `k > 0` is above. - - Returns - ------- - tril : ndarray - Lower triangle of `m`, of same shape and data-type as `m`. - - See Also - -------- - numpy.tril - - Availability - -------- - Multiple GPUs, Multiple CPUs - """ - return trilu(m, k, True) - - -def triu(m: ndarray, k: int = 0) -> ndarray: - """ - - Upper triangle of an array. - - Return a copy of a matrix with the elements below the `k`-th diagonal - zeroed. - - Please refer to the documentation for `tril` for further details. - - See Also - -------- - numpy.triu - - Availability - -------- - Multiple GPUs, Multiple CPUs - """ - return trilu(m, k, False) - - -############################# -# Array manipulation routines -############################# - -# Basic operations - - -@add_boilerplate("a") -def ndim(a: ndarray) -> int: - """ - - Return the number of dimensions of an array. - - Parameters - ---------- - a : array_like - Input array. If it is not already an ndarray, a conversion is - attempted. - - Returns - ------- - number_of_dimensions : int - The number of dimensions in `a`. Scalars are zero-dimensional. - - See Also - -------- - ndarray.ndim : equivalent method - shape : dimensions of array - ndarray.shape : dimensions of array - - Availability - -------- - Multiple GPUs, Multiple CPUs - """ - return 0 if a is None else a.ndim - - -@add_boilerplate("a") -def shape(a: ndarray) -> NdShape: - """ - - Return the shape of an array. - - Parameters - ---------- - a : array_like - Input array. - - Returns - ------- - shape : tuple[int, ...] - The elements of the shape tuple give the lengths of the - corresponding array dimensions. - - See Also - -------- - numpy.shape - - Availability - -------- - Multiple GPUs, Multiple CPUs - """ - return a.shape - - -# Changing array shape - - -@add_boilerplate("a") -def ravel(a: ndarray, order: OrderType = "C") -> ndarray: - """ - Return a contiguous flattened array. - - A 1-D array, containing the elements of the input, is returned. A copy is - made only if needed. - - Parameters - ---------- - a : array_like - Input array. The elements in `a` are read in the order specified by - `order`, and packed as a 1-D array. - order : ``{'C','F', 'A', 'K'}``, optional - The elements of `a` are read using this index order. 'C' means - to index the elements in row-major, C-style order, - with the last axis index changing fastest, back to the first - axis index changing slowest. 'F' means to index the elements - in column-major, Fortran-style order, with the - first index changing fastest, and the last index changing - slowest. Note that the 'C' and 'F' options take no account of - the memory layout of the underlying array, and only refer to - the order of axis indexing. 'A' means to read the elements in - Fortran-like index order if `a` is Fortran *contiguous* in - memory, C-like order otherwise. 'K' means to read the - elements in the order they occur in memory, except for - reversing the data when strides are negative. By default, 'C' - index order is used. - - Returns - ------- - y : array_like - y is an array of the same subtype as `a`, with shape ``(a.size,)``. - Note that matrices are special cased for backward compatibility, if `a` - is a matrix, then y is a 1-D ndarray. - - See Also - -------- - numpy.ravel - - Availability - -------- - Multiple GPUs, Multiple CPUs - """ - return a.ravel(order=order) - - -@add_boilerplate("a") -def reshape( - a: ndarray, newshape: NdShapeLike, order: OrderType = "C" -) -> ndarray: - """ - - Gives a new shape to an array without changing its data. - - Parameters - ---------- - a : array_like - Array to be reshaped. - newshape : int or tuple[int] - The new shape should be compatible with the original shape. If - an integer, then the result will be a 1-D array of that length. - One shape dimension can be -1. In this case, the value is - inferred from the length of the array and remaining dimensions. - order : ``{'C', 'F', 'A'}``, optional - Read the elements of `a` using this index order, and place the - elements into the reshaped array using this index order. 'C' - means to read / write the elements using C-like index order, - with the last axis index changing fastest, back to the first - axis index changing slowest. 'F' means to read / write the - elements using Fortran-like index order, with the first index - changing fastest, and the last index changing slowest. Note that - the 'C' and 'F' options take no account of the memory layout of - the underlying array, and only refer to the order of indexing. - 'A' means to read / write the elements in Fortran-like index - order if `a` is Fortran *contiguous* in memory, C-like order - otherwise. - - Returns - ------- - reshaped_array : ndarray - This will be a new view object if possible; otherwise, it will - be a copy. Note there is no guarantee of the *memory layout* (C- or - Fortran- contiguous) of the returned array. - - See Also - -------- - numpy.reshape - - Availability - -------- - Multiple GPUs, Multiple CPUs - """ - return a.reshape(newshape, order=order) - - -# Transpose-like operations - - -@add_boilerplate("a") -def swapaxes(a: ndarray, axis1: int, axis2: int) -> ndarray: - """ - - Interchange two axes of an array. - - Parameters - ---------- - a : array_like - Input array. - axis1 : int - First axis. - axis2 : int - Second axis. - - Returns - ------- - a_swapped : ndarray - If `a` is an ndarray, then a view of `a` is returned; otherwise a new - array is created. - - See Also - -------- - numpy.swapaxes - - Availability - -------- - Multiple GPUs, Multiple CPUs - """ - return a.swapaxes(axis1, axis2) - - -@add_boilerplate("a") -def transpose(a: ndarray, axes: Optional[list[int]] = None) -> ndarray: - """ - - Permute the dimensions of an array. - - Parameters - ---------- - a : array_like - Input array. - axes : list[int], optional - By default, reverse the dimensions, otherwise permute the axes - according to the values given. - - Returns - ------- - p : ndarray - `a` with its axes permuted. A view is returned whenever - possible. - - See Also - -------- - numpy.transpose - - Availability - -------- - Multiple GPUs, Multiple CPUs - """ - return a.transpose(axes=axes) - - -@add_boilerplate("a") -def moveaxis( - a: ndarray, source: Sequence[int], destination: Sequence[int] -) -> ndarray: - """ - Move axes of an array to new positions. - Other axes remain in their original order. - - Parameters - ---------- - a : ndarray - The array whose axes should be reordered. - source : int or Sequence[int] - Original positions of the axes to move. These must be unique. - destination : int or Sequence[int] - Destination positions for each of the original axes. These must also be - unique. - - Returns - ------- - result : ndarray - Array with moved axes. This array is a view of the input array. - - See Also - -------- - numpy.moveaxis - - Availability - -------- - Multiple GPUs, Multiple CPUs - """ - source = normalize_axis_tuple(source, a.ndim, "source") - destination = normalize_axis_tuple(destination, a.ndim, "destination") - if len(source) != len(destination): - raise ValueError( - "`source` and `destination` arguments must have the same number " - "of elements" - ) - order = [n for n in range(a.ndim) if n not in source] - for dest, src in sorted(zip(destination, source)): - order.insert(dest, src) - return a.transpose(order) - - -# Changing number of dimensions - - -def _reshape_recur(ndim: int, arr: ndarray) -> tuple[int, ...]: - if arr.ndim < ndim: - cur_shape: tuple[int, ...] = _reshape_recur(ndim - 1, arr) - if ndim == 2: - cur_shape = (1,) + cur_shape - else: - cur_shape = cur_shape + (1,) - else: - cur_shape = arr.shape - return cur_shape - - -def _atleast_nd( - ndim: int, arys: Sequence[ndarray] -) -> Union[list[ndarray], ndarray]: - inputs = list(convert_to_cunumeric_ndarray(arr) for arr in arys) - # 'reshape' change the shape of arrays - # only when arr.shape != _reshape_recur(ndim,arr) - result = list(arr.reshape(_reshape_recur(ndim, arr)) for arr in inputs) - # if the number of arrays in `arys` is 1, - # the return value is a single array - if len(result) == 1: - return result[0] - return result - - -def atleast_1d(*arys: ndarray) -> Union[list[ndarray], ndarray]: - """ - - Convert inputs to arrays with at least one dimension. - Scalar inputs are converted to 1-dimensional arrays, - whilst higher-dimensional inputs are preserved. - - Parameters - ---------- - *arys : array_like - One or more input arrays. - - Returns - ------- - ret : ndarray - An array, or list of arrays, each with a.ndim >= 1. - Copies are made only if necessary. - - See Also - -------- - numpy.atleast_1d - - Availability - -------- - Multiple GPUs, Multiple CPUs - """ - return _atleast_nd(1, arys) - - -def atleast_2d(*arys: ndarray) -> Union[list[ndarray], ndarray]: - """ - - View inputs as arrays with at least two dimensions. - - Parameters - ---------- - *arys : array_like - One or more array-like sequences. - Non-array inputs are converted to arrays. - Arrays that already have two or more dimensions are preserved. - - Returns - ------- - res, res2, … : ndarray - An array, or list of arrays, each with a.ndim >= 2. - Copies are avoided where possible, and - views with two or more dimensions are returned. - - See Also - -------- - numpy.atleast_2d - - Availability - -------- - Multiple GPUs, Multiple CPUs - """ - return _atleast_nd(2, arys) - - -def atleast_3d(*arys: ndarray) -> Union[list[ndarray], ndarray]: - """ - - View inputs as arrays with at least three dimensions. - - Parameters - ---------- - *arys : array_like - One or more array-like sequences. - Non-array inputs are converted to arrays. - Arrays that already have three or more dimensions are preserved. - - Returns - ------- - res, res2, … : ndarray - An array, or list of arrays, each with a.ndim >= 3. - Copies are avoided where possible, and - views with three or more dimensions are returned. - For example, a 1-D array of shape (N,) becomes - a view of shape (1, N, 1), and a 2-D array of shape (M, N) - becomes a view of shape (M, N, 1). - - See Also - -------- - numpy.atleast_3d - - Availability - -------- - Multiple GPUs, Multiple CPUs - """ - return _atleast_nd(3, arys) - - -@add_boilerplate("a") -def squeeze(a: ndarray, axis: Optional[NdShapeLike] = None) -> ndarray: - """ - - Remove single-dimensional entries from the shape of an array. - - Parameters - ---------- - a : array_like - Input data. - axis : None or int or tuple[int], optional - Selects a subset of the single-dimensional entries in the - shape. If an axis is selected with shape entry greater than - one, an error is raised. - - Returns - ------- - squeezed : ndarray - The input array, but with all or a subset of the - dimensions of length 1 removed. This is always `a` itself - or a view into `a`. - - Raises - ------ - ValueError - If `axis` is not None, and an axis being squeezed is not of length 1 - - See Also - -------- - numpy.squeeze - - Availability - -------- - Multiple GPUs, Multiple CPUs - """ - return a.squeeze(axis=axis) - - -def broadcast_shapes( - *args: Union[NdShapeLike, Sequence[NdShapeLike]] -) -> NdShape: - """ - - Broadcast the input shapes into a single shape. - - Parameters - ---------- - `*args` : tuples of ints, or ints - The shapes to be broadcast against each other. - - Returns - ------- - tuple : Broadcasted shape. - - See Also - -------- - numpy.broadcast_shapes - - Availability - -------- - Multiple GPUs, Multiple CPUs - - """ - # TODO: expected "Union[SupportsIndex, Sequence[SupportsIndex]]" - return np.broadcast_shapes(*args) # type: ignore [arg-type] - - -def _broadcast_to( - arr: ndarray, - shape: NdShapeLike, - subok: bool = False, - broadcasted: bool = False, -) -> ndarray: - # create an array object w/ options passed from 'broadcast' routines - arr = array(arr, copy=False, subok=subok) - # 'broadcast_to' returns a read-only view of the original array - out_shape = broadcast_shapes(arr.shape, shape) - if out_shape != shape: - raise ValueError( - f"cannot broadcast an array of shape {arr.shape} to {shape}" - ) - result = ndarray( - shape=out_shape, - thunk=arr._thunk.broadcast_to(out_shape), - writeable=False, - ) - return result - - -@add_boilerplate("arr") -def broadcast_to( - arr: ndarray, shape: NdShapeLike, subok: bool = False -) -> ndarray: - """ - - Broadcast an array to a new shape. - - Parameters - ---------- - arr : array_like - The array to broadcast. - shape : tuple or int - The shape of the desired array. - A single integer i is interpreted as (i,). - subok : bool, optional - This option is ignored by cuNumeric. - - Returns - ------- - broadcast : array - A readonly view on the original array with the given shape. - It is typically not contiguous. - Furthermore, more than one element of a broadcasted array - may refer to a single memory location. - - See Also - -------- - numpy.broadcast_to - - Availability - -------- - Multiple GPUs, Multiple CPUs - - """ - return _broadcast_to(arr, shape, subok) - - -def _broadcast_arrays( - arrs: list[ndarray], - subok: bool = False, -) -> list[ndarray]: - # create an arry object w/ options passed from 'broadcast' routines - arrays = [array(arr, copy=False, subok=subok) for arr in arrs] - # check if the broadcast can happen in the input list of arrays - shapes = [arr.shape for arr in arrays] - out_shape = broadcast_shapes(*shapes) - # broadcast to the final shape - arrays = [_broadcast_to(arr, out_shape, subok) for arr in arrays] - return arrays - - -def broadcast_arrays( - *args: Sequence[Any], subok: bool = False -) -> list[ndarray]: - """ - - Broadcast any number of arrays against each other. - - Parameters - ---------- - `*args` : array_likes - The arrays to broadcast. - - subok : bool, optional - This option is ignored by cuNumeric - - Returns - ------- - broadcasted : list of arrays - These arrays are views on the original arrays. - They are typically not contiguous. - Furthermore, more than one element of a broadcasted array - may refer to a single memory location. - If you need to write to the arrays, make copies first. - - Availability - -------- - Multiple GPUs, Multiple CPUs - - """ - arrs = [convert_to_cunumeric_ndarray(arr) for arr in args] - return _broadcast_arrays(arrs, subok=subok) - - -class broadcast: - """Produce an object that broadcasts input parameters against one another. - It has shape and nd properties and may be used as an iterator. - - Parameters - ---------- - `*arrays` : array_likes - The arrays to broadcast. - - Returns - ------- - b: broadcast - Broadcast the input parameters against one another, and return an - object that encapsulates the result. Amongst others, it has shape - and nd properties, and may be used as an iterator. - - """ - - def __init__(self, *arrays: Sequence[Any]) -> None: - arrs = [convert_to_cunumeric_ndarray(arr) for arr in arrays] - broadcasted = _broadcast_arrays(arrs) - self._iters = tuple(arr.flat for arr in broadcasted) - self._index = 0 - self._shape = broadcasted[0].shape - self._size = np.prod(self.shape, dtype=int) - - def __iter__(self) -> broadcast: - self._index = 0 - return self - - def __next__(self) -> Any: - if self._index < self.size: - result = tuple(each[self._index] for each in self._iters) - self._index += 1 - return result - - def reset(self) -> None: - """Reset the broadcasted result's iterator(s).""" - self._index = 0 - - @property - def index(self) -> int: - """current index in broadcasted result""" - return self._index - - @property - def iters(self) -> Tuple[Iterable[Any], ...]: - """tuple of iterators along self’s "components." """ - return self._iters - - @property - def numiter(self) -> int: - """Number of iterators possessed by the broadcasted result.""" - return len(self._iters) - - @property - def nd(self) -> int: - """Number of dimensions of broadcasted result.""" - return self.ndim - - @property - def ndim(self) -> int: - """Number of dimensions of broadcasted result.""" - return len(self.shape) - - @property - def shape(self) -> NdShape: - """Shape of broadcasted result.""" - return self._shape - - @property - def size(self) -> int: - """Total size of broadcasted result.""" - return self._size - - -# Joining arrays - - -class ArrayInfo: - def __init__( - self, ndim: int, shape: NdShape, dtype: np.dtype[Any] - ) -> None: - self.ndim = ndim - self.shape = shape - self.dtype = dtype - - -def convert_to_array_form(indices: Sequence[int]) -> str: - return "".join(f"[{coord}]" for coord in indices) - - -def check_list_depth(arr: Any, prefix: NdShape = (0,)) -> int: - if not isinstance(arr, list): - return 0 - elif len(arr) == 0: - raise ValueError( - f"List at arrays{convert_to_array_form(prefix)} cannot be empty" - ) - - depths = list( - check_list_depth(each, prefix + (idx,)) for idx, each in enumerate(arr) - ) - - if len(set(depths)) != 1: # this should be one - # If we're here elements don't have the same depth - first_depth = depths[0] - for idx, other_depth in enumerate(depths[1:]): - if other_depth != first_depth: - raise ValueError( - "List depths are mismatched. First element was at depth " - f"{first_depth}, but there is an element at" - f" depth {other_depth}, " - f"arrays{convert_to_array_form(prefix+(idx+1,))}" - ) - - return depths[0] + 1 - - -def check_shape_with_axis( - inputs: list[ndarray], - func_name: str, - axis: int, -) -> None: - ndim = inputs[0].ndim - shape = inputs[0].shape - - axis = normalize_axis_index(axis, ndim) - if ndim >= 1: - if _builtin_any( - shape[:axis] != inp.shape[:axis] - or shape[axis + 1 :] != inp.shape[axis + 1 :] - for inp in inputs - ): - raise ValueError( - f"All arguments to {func_name} " - "must have the same " - "dimension size in all dimensions " - "except the target axis" - ) - return - - -def check_shape_dtype_without_axis( - inputs: Sequence[ndarray], - func_name: str, - dtype: Optional[npt.DTypeLike] = None, - casting: CastingKind = "same_kind", -) -> tuple[list[ndarray], ArrayInfo]: - if len(inputs) == 0: - raise ValueError("need at least one array to concatenate") - - inputs = list(convert_to_cunumeric_ndarray(inp) for inp in inputs) - ndim = inputs[0].ndim - shape = inputs[0].shape - - if _builtin_any(ndim != inp.ndim for inp in inputs): - raise ValueError( - f"All arguments to {func_name} " - "must have the same number of dimensions" - ) - - # Cast arrays with the passed arguments (dtype, casting) - if dtype is None: - dtype = np.result_type(*[inp.dtype for inp in inputs]) - else: - dtype = np.dtype(dtype) - - converted = list(inp.astype(dtype, casting=casting) for inp in inputs) - return converted, ArrayInfo(ndim, shape, dtype) - - -def _block_collect_slices( - arr: Union[ndarray, Sequence[ndarray]], cur_depth: int, depth: int -) -> tuple[list[Any], list[tuple[slice, ...]], Sequence[ndarray]]: - # collects slices for each array in `arr` - # the outcome will be slices on every dimension of the output array - # for each array in `arr` - if cur_depth < depth: - sublist_results = list( - _block_collect_slices(each, cur_depth + 1, depth) for each in arr - ) - # 'sublist_results' contains a list of 3-way tuples, - # for arrays, out_shape of the sublist, and slices - arrays, outshape_list, slices = zip(*sublist_results) - max_ndim = _builtin_max( - 1 + (depth - cur_depth), *(len(each) for each in outshape_list) - ) - outshape_list = list( - ((1,) * (max_ndim - len(each)) + tuple(each)) - for each in outshape_list - ) - leading_dim = _builtin_sum( - each[-1 + (cur_depth - depth)] for each in outshape_list - ) - # flatten array lists from sublists into a single list - arrays = list(chain(*arrays)) - # prepares the out_shape of the current list - out_shape = list(outshape_list[0]) - out_shape[-1 + cur_depth - depth] = leading_dim - offset = 0 - updated_slices = [] - # update the dimension in each slice for the current axis - for shape, slice_list in zip(outshape_list, slices): - cur_dim = shape[-1 + cur_depth - depth] - updated_slices.append( - list( - (slice(offset, offset + cur_dim),) + each - for each in slice_list - ) - ) - offset += cur_dim - # flatten lists of slices into a single list - slices = list(chain(*updated_slices)) - else: - arrays = list(convert_to_cunumeric_ndarray(inp) for inp in arr) - common_shape = arrays[0].shape - if len(arr) > 1: - arrays, common_info = check_shape_dtype_without_axis( - arrays, block.__name__ - ) - common_shape = common_info.shape - check_shape_with_axis(arrays, block.__name__, axis=-1) - # the initial slices for each arr on arr.shape[-1] - out_shape, slices, arrays = _collect_outshape_slices( - arrays, common_shape, axis=-1 + len(common_shape) - ) - - return arrays, out_shape, slices - - -def _block_slicing(arrays: Sequence[ndarray], depth: int) -> ndarray: - # collects the final slices of input arrays and assign them at once - arrays, out_shape, slices = _block_collect_slices(arrays, 1, depth) - out_array = ndarray(shape=out_shape, inputs=arrays) - - for dest, inp in zip(slices, arrays): - out_array[(Ellipsis,) + tuple(dest)] = inp - - return out_array - - -def _collect_outshape_slices( - inputs: Sequence[ndarray], common_shape: NdShape, axis: int -) -> tuple[list[Any], list[tuple[slice, ...]], Sequence[ndarray]]: - leading_dim = _builtin_sum(arr.shape[axis] for arr in inputs) - out_shape = list(common_shape) - out_shape[axis] = leading_dim - post_idx = (slice(None),) * len(out_shape[axis + 1 :]) - slices = [] - offset = 0 - # collect slices for arrays in `inputs` - inputs = list(inp for inp in inputs if inp.size > 0) - for inp in inputs: - slices.append((slice(offset, offset + inp.shape[axis]),) + post_idx) - offset += inp.shape[axis] - - return out_shape, slices, inputs - - -def _concatenate( - inputs: Sequence[ndarray], - common_info: ArrayInfo, - axis: int = 0, - out: Optional[ndarray] = None, - dtype: Optional[npt.DTypeLike] = None, - casting: CastingKind = "same_kind", -) -> ndarray: - if axis < 0: - axis += len(common_info.shape) - out_shape, slices, inputs = _collect_outshape_slices( - inputs, common_info.shape, axis - ) - - if out is None: - out_array = ndarray( - shape=out_shape, dtype=common_info.dtype, inputs=inputs - ) - else: - out = convert_to_cunumeric_ndarray(out) - if not isinstance(out, ndarray): - raise TypeError("out should be ndarray") - elif list(out.shape) != out_shape: - raise ValueError( - f"out.shape({out.shape}) is not matched " - f"to the result shape of concatenation ({out_shape})" - ) - out_array = out - - for dest, src in zip(slices, inputs): - out_array[(Ellipsis,) + dest] = src - - return out_array - - -def append( - arr: ndarray, values: ndarray, axis: Optional[int] = None -) -> ndarray: - """ - - Append values to the end of an array. - - Parameters - ---------- - arr : array_like - Values are appended to a copy of this array. - values : array_like - These values are appended to a copy of arr. It must be of the correct - shape (the same shape as arr, excluding axis). If axis is not - specified, values can be any shape and will be flattened before use. - axis : int, optional - The axis along which values are appended. If axis is not given, both - `arr` and `values` are flattened before use. - - Returns - ------- - res : ndarray - A copy of arr with values appended to axis. - - See Also - -------- - numpy.append - - Availability - -------- - Multiple GPUs, Multiple CPUs - - """ - # Check to see if we can build a new tuple of cuNumeric arrays - inputs = list(convert_to_cunumeric_ndarray(inp) for inp in [arr, values]) - return concatenate(inputs, axis) - - -def block(arrays: Sequence[Any]) -> ndarray: - """ - Assemble an nd-array from nested lists of blocks. - - Blocks in the innermost lists are concatenated (see concatenate) - along the last dimension (-1), then these are concatenated along - the second-last dimension (-2), and so on until the outermost - list is reached. - - Blocks can be of any dimension, but will not be broadcasted using - the normal rules. Instead, leading axes of size 1 are inserted, - to make block.ndim the same for all blocks. This is primarily useful - for working with scalars, and means that code like np.block([v, 1]) - is valid, where v.ndim == 1. - - When the nested list is two levels deep, this allows block matrices - to be constructed from their components. - - Parameters - ---------- - arrays : nested list of array_like or scalars - If passed a single ndarray or scalar (a nested list of depth 0), - this is returned unmodified (and not copied). - - Elements shapes must match along the appropriate axes (without - broadcasting), but leading 1s will be prepended to the shape as - necessary to make the dimensions match. - - Returns - ------- - block_array : ndarray - The array assembled from the given blocks. - The dimensionality of the output is equal to the greatest of: * the - dimensionality of all the inputs * the depth to which the input list - is nested - - Raises - ------ - ValueError - If list depths are mismatched - for instance, [[a, b], c] is - illegal, and should be spelt [[a, b], [c]] - If lists are empty - for instance, [[a, b], []] - - See Also - -------- - numpy.block - - Availability - -------- - Multiple GPUs, Multiple CPUs - - """ - # arrays should concatenate from innermost subarrays - # the 'arrays' should be balanced tree - # check if the 'arrays' is a balanced tree - depth = check_list_depth(arrays) - - result = _block_slicing(arrays, depth) - return result - - -def concatenate( - inputs: Sequence[ndarray], - axis: Union[int, None] = 0, - out: Optional[ndarray] = None, - dtype: Optional[npt.DTypeLike] = None, - casting: CastingKind = "same_kind", -) -> ndarray: - """ - - concatenate((a1, a2, ...), axis=0, out=None, dtype=None, - casting="same_kind") - - Join a sequence of arrays along an existing axis. - - Parameters - ---------- - a1, a2, ... : Sequence[array_like] - The arrays must have the same shape, except in the dimension - corresponding to `axis` (the first, by default). - axis : int, optional - The axis along which the arrays will be joined. If axis is None, - arrays are flattened before use. Default is 0. - out : ndarray, optional - If provided, the destination to place the result. The shape must be - correct, matching that of what concatenate would have returned if no - out argument were specified. - dtype : str or data-type - If provided, the destination array will have this dtype. Cannot be - provided together with `out`. - casting : ``{'no', 'equiv', 'safe', 'same_kind', 'unsafe'}``, optional - Controls what kind of data casting may occur. Defaults to 'same_kind'. - - Returns - ------- - res : ndarray - The concatenated array. - - See Also - -------- - numpy.concatenate - - Availability - -------- - Multiple GPUs, Multiple CPUs - """ - if dtype is not None and out is not None: - raise TypeError( - "concatenate() only takes `out` or `dtype` as an argument," - "but both were provided." - ) - - if casting not in casting_kinds: - raise ValueError( - "casting must be one of 'no', 'equiv', " - "'safe', 'same_kind', or 'unsafe'" - ) - - # flatten arrays if axis == None and concatenate arrays on the first axis - if axis is None: - # Reshape arrays in the `array_list` to handle scalars - reshaped = _atleast_nd(1, inputs) - if not isinstance(reshaped, list): - reshaped = [reshaped] - inputs = list(inp.ravel() for inp in reshaped) - axis = 0 - - # Check to see if we can build a new tuple of cuNumeric arrays - cunumeric_inputs, common_info = check_shape_dtype_without_axis( - inputs, concatenate.__name__, dtype, casting - ) - check_shape_with_axis(cunumeric_inputs, concatenate.__name__, axis) - - return _concatenate( - cunumeric_inputs, - common_info, - axis, - out, - dtype, - casting, - ) - - -def stack( - arrays: Sequence[ndarray], axis: int = 0, out: Optional[ndarray] = None -) -> ndarray: - """ - - Join a sequence of arrays along a new axis. - - The ``axis`` parameter specifies the index of the new axis in the - dimensions of the result. For example, if ``axis=0`` it will be the first - dimension and if ``axis=-1`` it will be the last dimension. - - Parameters - ---------- - arrays : Sequence[array_like] - Each array must have the same shape. - - axis : int, optional - The axis in the result array along which the input arrays are stacked. - - out : ndarray, optional - If provided, the destination to place the result. The shape must be - correct, matching that of what stack would have returned if no - out argument were specified. - - Returns - ------- - stacked : ndarray - The stacked array has one more dimension than the input arrays. - - See Also - -------- - numpy.stack - - Availability - -------- - Multiple GPUs, Multiple CPUs - """ - if type(axis) is not int: - raise TypeError("The target axis should be an integer") - - arrays, common_info = check_shape_dtype_without_axis( - arrays, stack.__name__ - ) - shapes = {inp.shape for inp in arrays} - if len(shapes) != 1: - raise ValueError("all input arrays must have the same shape for stack") - - axis = normalize_axis_index(axis, common_info.ndim + 1) - shape = common_info.shape[:axis] + (1,) + common_info.shape[axis:] - arrays = [arr.reshape(shape) for arr in arrays] - common_info.shape = tuple(shape) - return _concatenate(arrays, common_info, axis, out=out) - - -def vstack(tup: Sequence[ndarray]) -> ndarray: - """ - - Stack arrays in sequence vertically (row wise). - - This is equivalent to concatenation along the first axis after 1-D arrays - of shape `(N,)` have been reshaped to `(1,N)`. Rebuilds arrays divided by - `vsplit`. - - This function makes most sense for arrays with up to 3 dimensions. For - instance, for pixel-data with a height (first axis), width (second axis), - and r/g/b channels (third axis). The functions `concatenate`, `stack` and - `block` provide more general stacking and concatenation operations. - - Parameters - ---------- - tup : Sequence[ndarray] - The arrays must have the same shape along all but the first axis. - 1-D arrays must have the same length. - - Returns - ------- - stacked : ndarray - The array formed by stacking the given arrays, will be at least 2-D. - - See Also - -------- - numpy.vstack - - Availability - -------- - Multiple GPUs, Multiple CPUs - """ - # Reshape arrays in the `array_list` if needed before concatenation - reshaped = _atleast_nd(2, tup) - if not isinstance(reshaped, list): - reshaped = [reshaped] - tup, common_info = check_shape_dtype_without_axis( - reshaped, vstack.__name__ - ) - check_shape_with_axis(tup, vstack.__name__, 0) - return _concatenate( - tup, - common_info, - axis=0, - dtype=common_info.dtype, - ) - - -def hstack(tup: Sequence[ndarray]) -> ndarray: - """ - - Stack arrays in sequence horizontally (column wise). - - This is equivalent to concatenation along the second axis, except for 1-D - arrays where it concatenates along the first axis. Rebuilds arrays divided - by `hsplit`. - - This function makes most sense for arrays with up to 3 dimensions. For - instance, for pixel-data with a height (first axis), width (second axis), - and r/g/b channels (third axis). The functions `concatenate`, `stack` and - `block` provide more general stacking and concatenation operations. - - Parameters - ---------- - tup : Sequence[ndarray] - The arrays must have the same shape along all but the second axis, - except 1-D arrays which can be any length. - - Returns - ------- - stacked : ndarray - The array formed by stacking the given arrays. - - See Also - -------- - numpy.hstack - - Availability - -------- - Multiple GPUs, Multiple CPUs - """ - # Reshape arrays in the `array_list` to handle scalars - reshaped = _atleast_nd(1, tup) - if not isinstance(reshaped, list): - reshaped = [reshaped] - - tup, common_info = check_shape_dtype_without_axis( - reshaped, hstack.__name__ - ) - check_shape_with_axis( - tup, hstack.__name__, axis=(0 if common_info.ndim == 1 else 1) - ) - # When ndim == 1, hstack concatenates arrays along the first axis - return _concatenate( - tup, - common_info, - axis=(0 if common_info.ndim == 1 else 1), - dtype=common_info.dtype, - ) - - -def dstack(tup: Sequence[ndarray]) -> ndarray: - """ - - Stack arrays in sequence depth wise (along third axis). - - This is equivalent to concatenation along the third axis after 2-D arrays - of shape `(M,N)` have been reshaped to `(M,N,1)` and 1-D arrays of shape - `(N,)` have been reshaped to `(1,N,1)`. Rebuilds arrays divided by - `dsplit`. - - This function makes most sense for arrays with up to 3 dimensions. For - instance, for pixel-data with a height (first axis), width (second axis), - and r/g/b channels (third axis). The functions `concatenate`, `stack` and - `block` provide more general stacking and concatenation operations. - - Parameters - ---------- - tup : Sequence[ndarray] - The arrays must have the same shape along all but the third axis. - 1-D or 2-D arrays must have the same shape. - - Returns - ------- - stacked : ndarray - The array formed by stacking the given arrays, will be at least 3-D. - - See Also - -------- - numpy.dstack - - Availability - -------- - Multiple GPUs, Multiple CPUs - """ - # Reshape arrays to (1,N,1) for ndim ==1 or (M,N,1) for ndim == 2: - reshaped = _atleast_nd(3, tup) - if not isinstance(reshaped, list): - reshaped = [reshaped] - tup, common_info = check_shape_dtype_without_axis( - reshaped, dstack.__name__ - ) - check_shape_with_axis(tup, dstack.__name__, 2) - return _concatenate( - tup, - common_info, - axis=2, - dtype=common_info.dtype, - ) - - -def column_stack(tup: Sequence[ndarray]) -> ndarray: - """ - - Stack 1-D arrays as columns into a 2-D array. - - Take a sequence of 1-D arrays and stack them as columns - to make a single 2-D array. 2-D arrays are stacked as-is, - just like with `hstack`. 1-D arrays are turned into 2-D columns - first. - - Parameters - ---------- - tup : Sequence[ndarray] - 1-D or 2-D arrays to stack. All of them must have the same - first dimension. - - Returns - ------- - stacked : ndarray - The 2-D array formed by stacking the given arrays. - - See Also - -------- - numpy.column_stack - - Availability - -------- - Multiple GPUs, Multiple CPUs - """ - # Reshape arrays in the `array_list` to handle scalars - reshaped = _atleast_nd(1, tup) - if not isinstance(reshaped, list): - reshaped = [reshaped] - - tup, common_info = check_shape_dtype_without_axis( - reshaped, column_stack.__name__ - ) - - if common_info.ndim == 1: - tup = list(inp.reshape((inp.shape[0], 1)) for inp in tup) - common_info.shape = tup[0].shape - check_shape_with_axis(tup, column_stack.__name__, 1) - return _concatenate( - tup, - common_info, - axis=1, - dtype=common_info.dtype, - ) - - -row_stack = vstack - - -# Splitting arrays - - -def split( - a: ndarray, indices: Union[int, ndarray], axis: int = 0 -) -> list[ndarray]: - """ - - Split an array into multiple sub-arrays as views into `ary`. - - Parameters - ---------- - ary : ndarray - Array to be divided into sub-arrays. - indices_or_sections : int or ndarray - If `indices_or_sections` is an integer, N, the array will be divided - into N equal arrays along `axis`. If such a split is not possible, - an error is raised. - - If `indices_or_sections` is a 1-D array of sorted integers, the entries - indicate where along `axis` the array is split. For example, - ``[2, 3]`` would, for ``axis=0``, result in - - - ary[:2] - - ary[2:3] - - ary[3:] - - If an index exceeds the dimension of the array along `axis`, - an empty sub-array is returned correspondingly. - axis : int, optional - The axis along which to split, default is 0. - - Returns - ------- - sub-arrays : list[ndarray] - A list of sub-arrays as views into `ary`. - - Raises - ------ - ValueError - If `indices_or_sections` is given as an integer, but - a split does not result in equal division. - - See Also - -------- - numpy.split - - Availability - -------- - Multiple GPUs, Multiple CPUs - """ - return array_split(a, indices, axis, equal=True) - - -def array_split( - a: ndarray, - indices: Union[int, tuple[int], ndarray, npt.NDArray[Any]], - axis: int = 0, - equal: bool = False, -) -> list[ndarray]: - """ - - Split an array into multiple sub-arrays. - - Please refer to the ``split`` documentation. The only difference - between these functions is that ``array_split`` allows - `indices_or_sections` to be an integer that does *not* equally - divide the axis. For an array of length l that should be split - into n sections, it returns l % n sub-arrays of size l//n + 1 - and the rest of size l//n. - - See Also - -------- - numpy.array_split - - Availability - -------- - Multiple GPUs, Multiple CPUs - """ - array = convert_to_cunumeric_ndarray(a) - split_pts = [] - if axis >= array.ndim: - raise ValueError( - f"array({array.shape}) has less dimensions than axis({axis})" - ) - - if isinstance(indices, int): - if indices <= 0: - raise ValueError("number sections must be larger than 0.") - res = array.shape[axis] % indices - if equal and res != 0: - raise ValueError("array split does not result in an equal divison") - - len_subarr = array.shape[axis] // indices - end_idx = array.shape[axis] - first_idx = len_subarr - - # the requested # of subarray is larger than the size of array - # -> size of 1 subarrays + empty subarrays - if len_subarr == 0: - len_subarr = 1 - first_idx = len_subarr - end_idx = indices - else: - if res != 0: - # The first 'res' groups have len_subarr+1 elements - split_pts = list( - range( - len_subarr + 1, (len_subarr + 1) * res, len_subarr + 1 - ) - ) - first_idx = (len_subarr + 1) * res - split_pts.extend(range(first_idx, end_idx + 1, len_subarr)) - - elif isinstance(indices, (list, tuple)) or ( - isinstance(indices, (ndarray, np.ndarray)) and indices.dtype == int - ): - split_pts = list(indices) - # adding the size of the target dimension. - # This helps create dummy or last subarray correctly - split_pts.append(array.shape[axis]) - - else: - raise ValueError("Integer or array for split should be provided") - - result = [] - start_idx = 0 - end_idx = 0 - out_shape = [] - in_shape: list[Union[int, slice]] = [] - - for i in range(array.ndim): - if i != axis: - in_shape.append(slice(array.shape[i])) - out_shape.append(array.shape[i]) - else: - in_shape.append(1) - out_shape.append(1) - - for pts in split_pts: - if type(pts) is not int: - raise ValueError( - "Split points in the passed `indices` should be integer" - ) - end_idx = pts - # For a split point, which is larger than the dimension for splitting, - # The last non-empty subarray should be copied from - # array[last_elem:array.shape[axis]] - if pts > array.shape[axis]: - end_idx = array.shape[axis] - out_shape[axis] = (end_idx - start_idx) + 1 - in_shape[axis] = slice(start_idx, end_idx) - new_subarray = None - if start_idx < array.shape[axis] and start_idx < end_idx: - new_subarray = array[tuple(in_shape)].view() - else: - out_shape[axis] = 0 - new_subarray = ndarray( - tuple(out_shape), dtype=array.dtype, writeable=array._writeable - ) - result.append(new_subarray) - start_idx = pts - - return result - - -def dsplit(a: ndarray, indices: Union[int, ndarray]) -> list[ndarray]: - """ - - Split array into multiple sub-arrays along the 3rd axis (depth). - - Please refer to the `split` documentation. `dsplit` is equivalent - to `split` with ``axis=2``, the array is always split along the third - axis provided the array dimension is greater than or equal to 3. - - See Also - -------- - numpy.dsplit - - Availability - -------- - Multiple GPUs, Multiple CPUs - """ - return split(a, indices, axis=2) - - -def hsplit(a: ndarray, indices: Union[int, ndarray]) -> list[ndarray]: - """ - - Split an array into multiple sub-arrays horizontally (column-wise). - - Please refer to the `split` documentation. `hsplit` is equivalent - to `split` with ``axis=1``, the array is always split along the second - axis regardless of the array dimension. - - See Also - -------- - numpy.hsplit - - Availability - -------- - Multiple GPUs, Multiple CPUs - """ - return split(a, indices, axis=1) - - -def vsplit(a: ndarray, indices: Union[int, ndarray]) -> list[ndarray]: - """ - - Split an array into multiple sub-arrays vertically (row-wise). - - Please refer to the ``split`` documentation. ``vsplit`` is equivalent - to ``split`` with `axis=0` (default), the array is always split along the - first axis regardless of the array dimension. - - See Also - -------- - numpy.vsplit - - Availability - -------- - Multiple GPUs, Multiple CPUs - """ - return split(a, indices, axis=0) - - -# Tiling arrays - - -@add_boilerplate("A") -def tile( - A: ndarray, reps: Union[int, Sequence[int], npt.NDArray[np.int_]] -) -> ndarray: - """ - Construct an array by repeating A the number of times given by reps. - - If `reps` has length ``d``, the result will have dimension of ``max(d, - A.ndim)``. - - If ``A.ndim < d``, `A` is promoted to be d-dimensional by prepending new - axes. So a shape (3,) array is promoted to (1, 3) for 2-D replication, - or shape (1, 1, 3) for 3-D replication. If this is not the desired - behavior, promote `A` to d-dimensions manually before calling this - function. - - If ``A.ndim > d``, `reps` is promoted to `A`.ndim by pre-pending 1's to it. - Thus for an `A` of shape (2, 3, 4, 5), a `reps` of (2, 2) is treated as - (1, 1, 2, 2). - - Parameters - ---------- - A : array_like - The input array. - reps : 1d array_like - The number of repetitions of `A` along each axis. - - Returns - ------- - c : ndarray - The tiled output array. - - See Also - -------- - numpy.tile - - Availability - -------- - Multiple GPUs, Multiple CPUs - """ - computed_reps: tuple[int, ...] - if isinstance(reps, int): - computed_reps = (reps,) - else: - if np.ndim(reps) > 1: - raise TypeError("`reps` must be a 1d sequence") - computed_reps = tuple(reps) - # Figure out the shape of the destination array - out_dims = _builtin_max(A.ndim, len(computed_reps)) - # Prepend ones until the dimensions match - while len(computed_reps) < out_dims: - computed_reps = (1,) + computed_reps - out_shape: NdShape = () - # Prepend dimensions if necessary - for dim in range(out_dims - A.ndim): - out_shape += (computed_reps[dim],) - offset = len(out_shape) - for dim in range(A.ndim): - out_shape += (A.shape[dim] * computed_reps[offset + dim],) - assert len(out_shape) == out_dims - result = ndarray(out_shape, dtype=A.dtype, inputs=(A,)) - result._thunk.tile(A._thunk, computed_reps) - return result - - -def repeat(a: ndarray, repeats: Any, axis: Optional[int] = None) -> ndarray: - """ - Repeat elements of an array. - - Parameters - ---------- - a : array_like - Input array. - repeats : int or ndarray[int] - The number of repetitions for each element. repeats is - broadcasted to fit the shape of the given axis. - axis : int, optional - The axis along which to repeat values. By default, use the - flattened input array, and return a flat output array. - - Returns - ------- - repeated_array : ndarray - Output array which has the same shape as a, except along the - given axis. - - Notes - ----- - Currently, repeat operations supports only 1D arrays - - See Also - -------- - numpy.repeat - - Availability - -------- - Multiple GPUs, Multiple CPUs - """ - - if repeats is None: - raise TypeError( - "int() argument must be a string, a bytes-like object or a number," - " not 'NoneType'" - ) - - if np.ndim(repeats) > 1: - raise ValueError("`repeats` should be scalar or 1D array") - - # axes should be integer type - if axis is not None and not isinstance(axis, int): - raise TypeError("Axis should be of integer type") - - # when array is a scalar - if np.ndim(a) == 0: - if axis is not None and axis != 0 and axis != -1: - raise np.AxisError( - f"axis {axis} is out of bounds for array of dimension 0" - ) - if np.ndim(repeats) == 0: - if not isinstance(repeats, int): - runtime.warn( - "converting repeats to an integer type", - category=UserWarning, - ) - repeats = np.int64(repeats) - return full((repeats,), cast(Union[int, float], a)) - elif np.ndim(repeats) == 1 and len(repeats) == 1: - if not isinstance(repeats, int): - runtime.warn( - "converting repeats to an integer type", - category=UserWarning, - ) - repeats = np.int64(repeats) - return full((repeats[0],), cast(Union[int, float], a)) - else: - raise ValueError( - "`repeat` with a scalar parameter `a` is only " - "implemented for scalar values of the parameter `repeats`." - ) - - # array is an array - array = convert_to_cunumeric_ndarray(a) - if np.ndim(repeats) == 1: - repeats = convert_to_cunumeric_ndarray(repeats) - - # if no axes specified, flatten array - if axis is None: - array = array.ravel() - axis = 0 - - axis_int: int = normalize_axis_index(axis, array.ndim) - - # If repeats is on a zero sized axis_int, then return the array. - if array.shape[axis_int] == 0: - return array.copy() - - if np.ndim(repeats) == 1: - if repeats.shape[0] == 1 and repeats.shape[0] != array.shape[axis_int]: - repeats = repeats[0] - - # repeats is a scalar. - if np.ndim(repeats) == 0: - # repeats is 0 - if repeats == 0: - empty_shape = list(array.shape) - empty_shape[axis_int] = 0 - return ndarray(shape=tuple(empty_shape), dtype=array.dtype) - # repeats should be integer type - if not isinstance(repeats, int): - runtime.warn( - "converting repeats to an integer type", - category=UserWarning, - ) - result = array._thunk.repeat( - repeats=np.int64(repeats), - axis=axis_int, - scalar_repeats=True, - ) - # repeats is an array - else: - # repeats should be integer type - repeats = repeats._warn_and_convert(np.int64) - if repeats.shape[0] != array.shape[axis_int]: - raise ValueError("incorrect shape of repeats array") - result = array._thunk.repeat( - repeats=repeats._thunk, axis=axis_int, scalar_repeats=False - ) - return ndarray(shape=result.shape, thunk=result) - - -# Rearranging elements - - -@add_boilerplate("m") -def flip(m: ndarray, axis: Optional[NdShapeLike] = None) -> ndarray: - """ - Reverse the order of elements in an array along the given axis. - - The shape of the array is preserved, but the elements are reordered. - - Parameters - ---------- - m : array_like - Input array. - axis : None or int or tuple[int], optional - Axis or axes along which to flip over. The default, axis=None, will - flip over all of the axes of the input array. If axis is negative it - counts from the last to the first axis. - - If axis is a tuple of ints, flipping is performed on all of the axes - specified in the tuple. - - Returns - ------- - out : array_like - A new array that is constructed from `m` with the entries of axis - reversed. - - See Also - -------- - numpy.flip - - Availability - -------- - Single GPU, Single CPU - - Notes - ----- - cuNumeric implementation doesn't return a view, it returns a new array - """ - return m.flip(axis=axis) - - -@add_boilerplate("m") -def flipud(m: ndarray) -> ndarray: - """ - Reverse the order of elements along axis 0 (up/down). - - For a 2-D array, this flips the entries in each column in the up/down - direction. Rows are preserved, but appear in a different order than before. - - Parameters - ---------- - m : array_like - Input array. - - Returns - ------- - out : array_like - A new array that is constructed from `m` with rows reversed. - - See Also - -------- - numpy.flipud - - Availability - -------- - Single GPU, Single CPU - - Notes - ----- - cuNumeric implementation doesn't return a view, it returns a new array - """ - if m.ndim < 1: - raise ValueError("Input must be >= 1-d.") - return flip(m, axis=0) - - -@add_boilerplate("m") -def fliplr(m: ndarray) -> ndarray: - """ - Reverse the order of elements along axis 1 (left/right). - - For a 2-D array, this flips the entries in each row in the left/right - direction. Columns are preserved, but appear in a different order than - before. - - Parameters - ---------- - m : array_like - Input array, must be at least 2-D. - - Returns - ------- - f : ndarray - A new array that is constructed from `m` with the columns reversed. - - See Also - -------- - numpy.fliplr - - Availability - -------- - Single GPU, Single CPU - - Notes - ----- - cuNumeric implementation doesn't return a view, it returns a new array - """ - if m.ndim < 2: - raise ValueError("Input must be >= 2-d.") - return flip(m, axis=1) - - -################### -# Binary operations -################### - -# Elementwise bit operations - - -################### -# Indexing routines -################### - -# Generating index arrays - - -@add_boilerplate("arr", "mask", "vals") -def place(arr: ndarray, mask: ndarray, vals: ndarray) -> None: - """ - Change elements of an array based on conditional and input values. - - Parameters - ---------- - arr : array_like - Array to put data into. - mask : array_like - Mask array. Must have the same size as `arr`. - vals : 1-D sequence - Values to put into `arr`. Only the first N elements are used, - where N is the number of True values in mask. If vals is smaller - than N, it will be repeated, and if elements of a are to be masked, - this sequence must be non-empty. - - See Also - -------- - numpy.copyto, numpy.put, numpy.take, numpy.extract - - Availability - -------- - Multiple GPUs, Multiple CPUs - """ - if arr.size == 0: - return - - check_writeable(arr) - - if mask.size != arr.size: - raise ValueError("arr array and condition array must be of same size") - - if vals.ndim != 1: - raise ValueError("vals array has to be 1-dimensional") - - if mask.shape != arr.shape: - mask_reshape = reshape(mask, arr.shape) - else: - mask_reshape = mask - - num_values = int(count_nonzero(mask_reshape)) - if num_values == 0: - return - - if vals.size == 0: - raise ValueError("vals array cannot be empty") - - if num_values != vals.size: - reps = (num_values + vals.size - 1) // vals.size - vals_resized = tile(A=vals, reps=reps) if reps > 1 else vals - vals_resized = vals_resized[:num_values] - else: - vals_resized = vals - - if mask_reshape.dtype == bool: - arr._thunk.set_item(mask_reshape._thunk, vals_resized._thunk) - else: - bool_mask = mask_reshape.astype(bool) - arr._thunk.set_item(bool_mask._thunk, vals_resized._thunk) - - -@add_boilerplate("condition", "arr") -def extract(condition: ndarray, arr: ndarray) -> ndarray: - """ - - Return the elements of an array that satisfy some condition. - - Parameters - ---------- - condition : array_like - An array whose nonzero or True entries indicate the elements - of `arr` to extract. - arr : array_like - Input array of the same size as `condition`. - - Returns - ------- - result : ndarray - Rank 1 array of values from arr where `condition` is True. - - See Also - -------- - numpy.extract - - Availability - -------- - Multiple GPUs, Multiple CPUs - """ - if condition.size != arr.size: - raise ValueError("arr array and condition array must be of same size") - - if condition.shape != arr.shape: - condition_reshape = reshape(condition, arr.shape) - else: - condition_reshape = condition - - if condition_reshape.dtype == bool: - thunk = arr._thunk.get_item(condition_reshape._thunk) - else: - bool_condition = condition_reshape.astype(bool) - thunk = arr._thunk.get_item(bool_condition._thunk) - - return ndarray(shape=thunk.shape, thunk=thunk) - - -@add_boilerplate("a") -def nonzero(a: ndarray) -> tuple[ndarray, ...]: - """ - - Return the indices of the elements that are non-zero. - - Returns a tuple of arrays, one for each dimension of `a`, - containing the indices of the non-zero elements in that - dimension. - - Parameters - ---------- - a : array_like - Input array. - - Returns - ------- - tuple_of_arrays : tuple - Indices of elements that are non-zero. - - See Also - -------- - numpy.nonzero - - Availability - -------- - Multiple GPUs, Multiple CPUs - """ - return a.nonzero() - - -@add_boilerplate("a") -def flatnonzero(a: ndarray) -> ndarray: - """ - - Return indices that are non-zero in the flattened version of a. - - This is equivalent to `np.nonzero(np.ravel(a))[0]`. - - Parameters - ---------- - a : array_like - Input array. - - Returns - ------- - res : ndarray - Output array, containing the indices of the elements of - `a.ravel()` that are non-zero. - - See Also - -------- - numpy.flatnonzero - - Availability - -------- - Multiple GPUs, Multiple CPUs - """ - return nonzero(ravel(a))[0] - - -@add_boilerplate("a", "x", "y") -def where( - a: ndarray, x: Optional[ndarray] = None, y: Optional[ndarray] = None -) -> Union[ndarray, tuple[ndarray, ...]]: - """ - where(condition, [x, y]) - - Return elements chosen from `x` or `y` depending on `condition`. - - Parameters - ---------- - condition : array_like, bool - Where True, yield `x`, otherwise yield `y`. - x, y : array_like - Values from which to choose. `x`, `y` and `condition` need to be - broadcastable to some shape. - - Returns - ------- - out : ndarray - An array with elements from `x` where `condition` is True, and elements - from `y` elsewhere. - - See Also - -------- - numpy.where - - Availability - -------- - Multiple GPUs, Multiple CPUs - """ - if x is None or y is None: - if x is not None or y is not None: - raise ValueError( - "both 'x' and 'y' parameters must be specified together for" - " 'where'" - ) - return nonzero(a) - return ndarray._perform_where(a, x, y) - - -@add_boilerplate("a") -def argwhere(a: ndarray) -> ndarray: - """ - argwhere(a) - - Find the indices of array elements that are non-zero, grouped by element. - - Parameters - ---------- - a : array_like - Input data. - - Returns - ------- - index_array : ndarray - Indices of elements that are non-zero. Indices are grouped by element. - This array will have shape (N, a.ndim) where N is the number of - non-zero items. - - See Also - -------- - numpy.argwhere - - Availability - -------- - Multiple GPUs, Multiple CPUs - """ - thunk = a._thunk.argwhere() - return ndarray(shape=thunk.shape, thunk=thunk) - - -# Indexing-like operations -def indices( - dimensions: Sequence[int], dtype: npt.DTypeLike = int, sparse: bool = False -) -> Union[ndarray, tuple[ndarray, ...]]: - """ - Return an array representing the indices of a grid. - Compute an array where the subarrays contain index values 0, 1, ... - varying only along the corresponding axis. - - Parameters - ---------- - dimensions : Sequence[int] - The shape of the grid. - dtype : data-type, optional - Data type of the result. - sparse : bool, optional - Return a sparse representation of the grid instead of a dense - representation. Default is False. - - Returns - ------- - grid : ndarray or Tuple[ndarray, ...] - If sparse is False returns one array of grid indices, - ``grid.shape = (len(dimensions),) + tuple(dimensions)``. - If sparse is True returns a tuple of arrays, with - ``grid[i].shape = (1, ..., 1, dimensions[i], 1, ..., 1)`` with - dimensions[i] in the ith place - - See Also - -------- - numpy.indices - - Notes - ----- - The output shape in the dense case is obtained by prepending the number - of dimensions in front of the tuple of dimensions, i.e. if `dimensions` - is a tuple ``(r0, ..., rN-1)`` of length ``N``, the output shape is - ``(N, r0, ..., rN-1)``. - The subarrays ``grid[k]`` contains the N-D array of indices along the - ``k-th`` axis. Explicitly: - - grid[k, i0, i1, ..., iN-1] = ik - - Availability - -------- - Multiple GPUs, Multiple CPUs - """ - # implementation of indices routine is adapted from NumPy - dimensions = tuple(dimensions) - N = len(dimensions) - shape = (1,) * N - if sparse: - res_tuple: tuple[ndarray, ...] = () - for i, dim in enumerate(dimensions): - idx = arange(dim, dtype=dtype).reshape( - shape[:i] + (dim,) + shape[i + 1 :] - ) - res_tuple += (idx,) - return res_tuple - else: - out_shape = (N,) + dimensions - res_array: ndarray = empty(out_shape, dtype=dtype) - for i, dim in enumerate(dimensions): - idx = arange(dim, dtype=dtype).reshape( - shape[:i] + (dim,) + shape[i + 1 :] - ) - res_array[i] = idx - return res_array - - -def mask_indices( - n: int, mask_func: Callable[[ndarray, int], ndarray], k: int = 0 -) -> tuple[ndarray, ...]: - """ - Return the indices to access (n, n) arrays, given a masking function. - - Assume `mask_func` is a function that, for a square array a of size - ``(n, n)`` with a possible offset argument `k`, when called as - ``mask_func(a, k)`` returns a new array with zeros in certain locations - (functions like :func:`cunumeric.triu` or :func:`cunumeric.tril` - do precisely this). Then this function returns the indices where - the non-zero values would be located. - - Parameters - ---------- - n : int - The returned indices will be valid to access arrays of shape (n, n). - mask_func : callable - A function whose call signature is similar to that of - :func:`cunumeric.triu`, :func:`cunumeric.tril`. - That is, ``mask_func(x, k)`` returns a boolean array, shaped like `x`. - `k` is an optional argument to the function. - k : scalar - An optional argument which is passed through to `mask_func`. Functions - like :func:`cunumeric.triu`, :func:`cunumeric,tril` - take a second argument that is interpreted as an offset. - - Returns - ------- - indices : tuple of arrays. - The `n` arrays of indices corresponding to the locations where - ``mask_func(np.ones((n, n)), k)`` is True. - - See Also - -------- - numpy.mask_indices - - Notes - ----- - WARNING: `mask_indices` expects `mask_function` to call cuNumeric functions - for good performance. In case non-cuNumeric functions are called by - `mask_function`, cuNumeric will have to materialize all data on the host - which might result in running out of system memory. - - Availability - -------- - Multiple GPUs, Multiple CPUs - """ - # this implementation is based on the Cupy - a = ones((n, n), dtype=bool) - if not is_implemented(mask_func): - runtime.warn( - "Calling non-cuNumeric functions in mask_func can result in bad " - "performance", - category=UserWarning, - ) - return mask_func(a, k).nonzero() - - -def diag_indices(n: int, ndim: int = 2) -> tuple[ndarray, ...]: - """ - Return the indices to access the main diagonal of an array. - - This returns a tuple of indices that can be used to access the main - diagonal of an array a with a.ndim >= 2 dimensions and - shape (n, n, …, n). For a.ndim = 2 this is the usual diagonal, - for a.ndim > 2 this is the set of indices to - access a[i, i, ..., i] for i = [0..n-1]. - - Parameters - ---------- - n : int - The size, along each dimension, of the arrays for which the - returned indices can be used. - ndim : int, optional - The number of dimensions. - - See Also - -------- - numpy.diag_indices - - Availability - -------- - Multiple GPUs, Multiple CPUs - """ - idx = arange(n, dtype=int) - return (idx,) * ndim - - -@add_boilerplate("arr") -def diag_indices_from(arr: ndarray) -> tuple[ndarray, ...]: - """ - Return the indices to access the main diagonal of an n-dimensional array. - - See diag_indices for full details. - - Parameters - ---------- - arr : array_like - at least 2-D - - See Also - -------- - numpy.diag_indices_from, numpy.diag_indices - - Availability - -------- - Multiple GPUs, Multiple CPUs - """ - if not arr.ndim >= 2: - raise ValueError("input array must be at least 2-d") - # For more than d=2, the strided formula is only valid for arrays with - # all dimensions equal, so we check first. - for i in range(1, arr.ndim): - if arr.shape[i] != arr.shape[0]: - raise ValueError("All dimensions of input must be of equal length") - - return diag_indices(arr.shape[0], arr.ndim) - - -def tril_indices( - n: int, k: int = 0, m: Optional[int] = None -) -> tuple[ndarray, ...]: - """ - Return the indices for the lower-triangle of an (n, m) array. - - Parameters - ---------- - n : int - The row dimension of the arrays for which the returned - indices will be valid. - k : int, optional - Diagonal offset (see :func:`cunumeric.tril` for details). - m : int, optional - The column dimension of the arrays for which the returned - indices will be valid. - By default `m` is taken equal to `n`. - - Returns - ------- - inds : tuple of arrays - The indices for the lower-triangle. The returned tuple contains two - arrays, each with the indices along one dimension of the array. - - See also - -------- - numpy.tril_indices - - Notes - ----- - - Availability - ------------ - Multiple GPUs, Multiple CPUs - """ - - tri_ = tri(n, m, k=k, dtype=bool) - return nonzero(tri_) - - -@add_boilerplate("arr") -def tril_indices_from(arr: ndarray, k: int = 0) -> tuple[ndarray, ...]: - """ - Return the indices for the lower-triangle of arr. - - See :func:`cunumeric.tril_indices` for full details. - - Parameters - ---------- - arr : array_like - The indices will be valid for arrays whose dimensions are - the same as arr. - k : int, optional - Diagonal offset (see :func:`cunumeric.tril` for details). - - Returns - ------- - inds : tuple of arrays - The indices for the lower-triangle. The returned tuple contains two - arrays, each with the indices along one dimension of the array. - - See Also - -------- - numpy.tril_indices_from - - Notes - ----- - - Availability - -------- - Multiple GPUs, Multiple CPUs - - """ - # this implementation is taken from numpy - if arr.ndim != 2: - raise ValueError("input array must be 2-d") - return tril_indices(arr.shape[-2], k=k, m=arr.shape[-1]) - - -def triu_indices( - n: int, k: int = 0, m: Optional[int] = None -) -> tuple[ndarray, ...]: - """ - Return the indices for the upper-triangle of an (n, m) array. - - Parameters - ---------- - n : int - The size of the arrays for which the returned indices will - be valid. - k : int, optional - Diagonal offset (see :func:`cunumeric.triu` for details). - m : int, optional - The column dimension of the arrays for which the returned - arrays will be valid. - By default `m` is taken equal to `n`. - - Returns - ------- - inds : tuple of arrays - The indices for the upper-triangle. The returned tuple contains two - arrays, each with the indices along one dimension of the array. - - See also - -------- - numpy.triu_indices - - Notes - ----- - - Availability - -------- - Multiple GPUs, Multiple CPUs - """ - - tri_ = ~tri(n, m, k=k - 1, dtype=bool) - return nonzero(tri_) - - -@add_boilerplate("arr") -def triu_indices_from(arr: ndarray, k: int = 0) -> tuple[ndarray, ...]: - """ - Return the indices for the upper-triangle of arr. - - See :func:`cunumeric.triu_indices` for full details. - - Parameters - ---------- - arr : ndarray, shape(N, N) - The indices will be valid for arrays whose dimensions are - the same as arr. - k : int, optional - Diagonal offset (see :func:`cunumeric.triu` for details). - - Returns - ------- - inds : tuple of arrays - The indices for the upper-triangle. The returned tuple contains two - arrays, each with the indices along one dimension of the array. - - See Also - -------- - numpy.triu_indices_from - - Notes - ----- - - Availability - -------- - Multiple GPUs, Multiple CPUs - """ - # this implementation is taken from numpy - if arr.ndim != 2: - raise ValueError("input array must be 2-d") - return triu_indices(arr.shape[-2], k=k, m=arr.shape[-1]) - - -@add_boilerplate("a") -def take( - a: ndarray, - indices: ndarray, - axis: Optional[int] = None, - out: Optional[ndarray] = None, - mode: BoundsMode = "raise", -) -> ndarray: - """ - Take elements from an array along an axis. - When axis is not None, this function does the same thing as “fancy” - indexing (indexing arrays using arrays); however, it can be easier - to use if you need elements along a given axis. A call such as - `np.take(arr, indices, axis=3)` is equivalent to `arr[:,:,:,indices,...]`. - - Parameters - ---------- - a : array_like `(Ni…, M, Nk…)` - The source array. - indices : array_like `(Nj…)` - The indices of the values to extract. - Also allow scalars for indices. - axis : int, optional - The axis over which to select values. By default, the flattened input - array is used. - out : ndarray, optional `(Ni…, Nj…, Nk…)` - If provided, the result will be placed in this array. It should be of - the appropriate shape and dtype. - mode : ``{'raise', 'wrap', 'clip'}``, optional - Specifies how out-of-bounds indices will behave. - 'raise' - raise an error (default) - 'wrap' - wrap around - 'clip' - clip to the range - 'clip' mode means that all indices that are too large are replaced by - the index that addresses the last element along that axis. - Note that this disables indexing with negative numbers. - - Returns - ------- - out : ndarray `(Ni…, Nj…, Nk…)` - The returned array has the same type as a. - - Raises - ------ - - See Also - -------- - numpy.take - - Availability - -------- - Multiple GPUs, Multiple CPUs - """ - return a.take(indices=indices, axis=axis, out=out, mode=mode) - - -def _fill_fancy_index_for_along_axis_routines( - a_shape: NdShape, axis: int, indices: ndarray -) -> tuple[ndarray, ...]: - # the logic below is base on the cupy implementation of - # the *_along_axis routines - ndim = len(a_shape) - fancy_index = [] - for i, n in enumerate(a_shape): - if i == axis: - fancy_index.append(indices) - else: - ind_shape = (1,) * i + (-1,) + (1,) * (ndim - i - 1) - fancy_index.append(arange(n).reshape(ind_shape)) - return tuple(fancy_index) - - -@add_boilerplate("a", "indices") -def take_along_axis( - a: ndarray, indices: ndarray, axis: Union[int, None] -) -> ndarray: - """ - Take values from the input array by matching 1d index and data slices. - - This iterates over matching 1d slices oriented along the specified axis in - the index and data arrays, and uses the former to look up values in the - latter. These slices can be different lengths. - - Functions returning an index along an axis, like - :func:`cunumeric.argsort` and :func:`cunumeric.argpartition`, - produce suitable indices for this function. - - Parameters - ---------- - arr : ndarray (Ni..., M, Nk...) - Source array - indices : ndarray (Ni..., J, Nk...) - Indices to take along each 1d slice of `arr`. This must match the - dimension of arr, but dimensions Ni and Nj only need to broadcast - against `arr`. - axis : int - The axis to take 1d slices along. If axis is None, the input array is - treated as if it had first been flattened to 1d, for consistency with - :func:`cunumeric.sort` and :func:`cunumeric.argsort`. - - Returns - ------- - out: ndarray (Ni..., J, Nk...) - The indexed result. It is going to be a view to `arr` for most cases, - except the case when `axis=Null` and `arr.ndim>1`. - - See Also - -------- - numpy.take_along_axis - - Availability - -------- - Multiple GPUs, Multiple CPUs - """ - if not np.issubdtype(indices.dtype, np.integer): - raise TypeError("`indices` must be an integer array") - - computed_axis = 0 - if axis is None: - if indices.ndim != 1: - raise ValueError("indices must be 1D if axis=None") - if a.ndim > 1: - a = a.ravel() - else: - computed_axis = normalize_axis_index(axis, a.ndim) - - if a.ndim != indices.ndim: - raise ValueError( - "`indices` and `a` must have the same number of dimensions" - ) - return a[ - _fill_fancy_index_for_along_axis_routines( - a.shape, computed_axis, indices - ) - ] - - -@add_boilerplate("a", "indices", "values") -def put_along_axis( - a: ndarray, indices: ndarray, values: ndarray, axis: Union[int, None] -) -> None: - """ - Put values into the destination array by matching 1d index and data slices. - - This iterates over matching 1d slices oriented along the specified axis in - the index and data arrays, and uses the former to place values into the - latter. These slices can be different lengths. - - Functions returning an index along an axis, like :func:`cunumeric.argsort` - and :func:`cunumeric.argpartition`, produce suitable indices for - this function. - - Parameters - ---------- - a : ndarray (Ni..., M, Nk...) - Destination array. - indices : ndarray (Ni..., J, Nk...) - Indices to change along each 1d slice of `arr`. This must match the - dimension of arr, but dimensions in Ni and Nj may be 1 to broadcast - against `arr`. - values : array_like (Ni..., J, Nk...) - values to insert at those indices. Its shape and dimension are - broadcast to match that of `indices`. - axis : int - The axis to take 1d slices along. If axis is None, the destination - array is treated as if a flattened 1d view had been created of it. - `axis=None` case is currently supported only for 1D input arrays. - - Note - ---- - Having duplicate entries in `indices` will result in undefined behavior - since operation performs asynchronous update of the `arr` entries. - - See Also - -------- - numpy.put_along_axis - - Availability - -------- - Multiple GPUs, Multiple CPUs - - """ - - if a.size == 0: - return - - check_writeable(a) - - if not np.issubdtype(indices.dtype, np.integer): - raise TypeError("`indices` must be an integer array") - - computed_axis = 0 - if axis is None: - if indices.ndim != 1: - raise ValueError("indices must be 1D if axis=None") - if a.ndim > 1: - # TODO call a=a.flat when flat is implemented - raise ValueError("a.ndim>1 case is not supported when axis=None") - if (indices.size == 0) or (values.size == 0): - return - if values.shape != indices.shape: - values = values._wrap(indices.size) - else: - computed_axis = normalize_axis_index(axis, a.ndim) - - if a.ndim != indices.ndim: - raise ValueError( - "`indices` and `a` must have the same number of dimensions" - ) - ind = _fill_fancy_index_for_along_axis_routines( - a.shape, computed_axis, indices - ) - a[ind] = values - - -@add_boilerplate("a") -def choose( - a: ndarray, - choices: Sequence[ndarray], - out: Optional[ndarray] = None, - mode: BoundsMode = "raise", -) -> ndarray: - """ - Construct an array from an index array and a list of arrays to choose from. - - Given an "index" array (`a`) of integers and a sequence of ``n`` arrays - (`choices`), `a` and each choice array are first broadcast, as necessary, - to arrays of a common shape; calling these *Ba* and *Bchoices[i], i = - 0,...,n-1* we have that, necessarily, ``Ba.shape == Bchoices[i].shape`` - for each ``i``. Then, a new array with shape ``Ba.shape`` is created as - follows: - - * if ``mode='raise'`` (the default), then, first of all, each element of - ``a`` (and thus ``Ba``) must be in the range ``[0, n-1]``; now, suppose - that ``i`` (in that range) is the value at the ``(j0, j1, ..., jm)`` - position in ``Ba`` - then the value at the same position in the new array - is the value in ``Bchoices[i]`` at that same position; - - * if ``mode='wrap'``, values in `a` (and thus `Ba`) may be any (signed) - integer; modular arithmetic is used to map integers outside the range - `[0, n-1]` back into that range; and then the new array is constructed - as above; - - * if ``mode='clip'``, values in `a` (and thus ``Ba``) may be any (signed) - integer; negative integers are mapped to 0; values greater than ``n-1`` - are mapped to ``n-1``; and then the new array is constructed as above. - - Parameters - ---------- - a : ndarray[int] - This array must contain integers in ``[0, n-1]``, where ``n`` is the - number of choices, unless ``mode=wrap`` or ``mode=clip``, in which - cases any integers are permissible. - choices : Sequence[ndarray] - Choice arrays. `a` and all of the choices must be broadcastable to the - same shape. If `choices` is itself an array (not recommended), then - its outermost dimension (i.e., the one corresponding to - ``choices.shape[0]``) is taken as defining the "sequence". - out : ndarray, optional - If provided, the result will be inserted into this array. It should - be of the appropriate shape and dtype. Note that `out` is always - buffered if ``mode='raise'``; use other modes for better performance. - mode : ``{'raise', 'wrap', 'clip'}``, optional - Specifies how indices outside ``[0, n-1]`` will be treated: - - * 'raise' : an exception is raised (default) - * 'wrap' : value becomes value mod ``n`` - * 'clip' : values < 0 are mapped to 0, values > n-1 are mapped to n-1 - - Returns - ------- - merged_array : ndarray - The merged result. - - Raises - ------ - ValueError: shape mismatch - If `a` and each choice array are not all broadcastable to the same - shape. - - See Also - -------- - numpy.choose - - Availability - -------- - Multiple GPUs, Multiple CPUs - """ - return a.choose(choices=choices, out=out, mode=mode) - - -def select( - condlist: Sequence[npt.ArrayLike | ndarray], - choicelist: Sequence[npt.ArrayLike | ndarray], - default: Any = 0, -) -> ndarray: - """ - Return an array drawn from elements in choicelist, depending on conditions. - - Parameters - ---------- - condlist : list of bool ndarrays - The list of conditions which determine from which array in `choicelist` - the output elements are taken. When multiple conditions are satisfied, - the first one encountered in `condlist` is used. - choicelist : list of ndarrays - The list of arrays from which the output elements are taken. It has - to be of the same length as `condlist`. - default : scalar, optional - The element inserted in `output` when all conditions evaluate to False. - - Returns - ------- - output : ndarray - The output at position m is the m-th element of the array in - `choicelist` where the m-th element of the corresponding array in - `condlist` is True. - - See Also - -------- - numpy.select - - Availability - -------- - Multiple GPUs, Multiple CPUs - """ - - if len(condlist) != len(choicelist): - raise ValueError( - "list of cases must be same length as list of conditions" - ) - if len(condlist) == 0: - raise ValueError("select with an empty condition list is not possible") - - condlist_ = tuple(convert_to_cunumeric_ndarray(c) for c in condlist) - for i, c in enumerate(condlist_): - if c.dtype != bool: - raise TypeError( - f"invalid entry {i} in condlist: should be boolean ndarray" - ) - - choicelist_ = tuple(convert_to_cunumeric_ndarray(c) for c in choicelist) - common_type = np.result_type(*choicelist_, default) - args = condlist_ + choicelist_ - choicelist_ = tuple( - c._maybe_convert(common_type, args) for c in choicelist_ - ) - default_ = np.array(default, dtype=common_type) - - out_shape = np.broadcast_shapes( - *(c.shape for c in condlist_), - *(c.shape for c in choicelist_), - ) - out = ndarray(shape=out_shape, dtype=common_type, inputs=args) - out._thunk.select( - tuple(c._thunk for c in condlist_), - tuple(c._thunk for c in choicelist_), - default_, - ) - return out - - -@add_boilerplate("condition", "a") -def compress( - condition: ndarray, - a: ndarray, - axis: Optional[int] = None, - out: Optional[ndarray] = None, -) -> ndarray: - """ - Return selected slices of an array along given axis. - - When working along a given axis, a slice along that axis is returned - in output for each index where condition evaluates to True. - When working on a 1-D array, compress is equivalent to numpy.extract. - - Parameters - ---------- - condition, 1-D array of bools - Array that selects which entries to return. If `len(c)` is less than - the size of a along the given axis, then output is truncated to the - length of the condition array. - - a : array_like - Array from which to extract a part. - - axis: int, optional - Axis along which to take slices. If None (default), - work on the flattened array. - - out : ndarray, optional - Output array. Its type is preserved and it must be of the right - shape to hold the output. - - Returns - ------- - compressed_array : ndarray - A copy of `a` without the slices along `axis` for which condition - is false. - - Raises - ------ - ValueError : dimension mismatch - If condition is not 1D array - ValueError : shape mismatch - If condition contains entries that are out of bounds of array - ValueError : shape mismatch - If output array has a wrong shape - - See Also - -------- - numpy.compress, numpy.extract - - Availability - -------- - Multiple GPUs, Multiple CPUs - - """ - return a.compress(condition, axis=axis, out=out) - - -@add_boilerplate("a") -def diagonal( - a: ndarray, - offset: int = 0, - axis1: int = 0, - axis2: int = 1, - extract: bool = True, -) -> ndarray: - """ - diagonal(a: ndarray, offset=0, axis1=None, axis2=None) - - Return specified diagonals. - - If `a` is 2-D, returns the diagonal of `a` with the given offset, - i.e., the collection of elements of the form ``a[i, i+offset]``. If - `a` has more than two dimensions, then the axes specified by `axis1` - and `axis2` are used to determine the 2-D sub-array whose diagonal is - returned. The shape of the resulting array can be determined by - removing `axis1` and `axis2` and appending an index to the right equal - to the size of the resulting diagonals. - - Parameters - ---------- - a : array_like - Array from which the diagonals are taken. - offset : int, optional - Offset of the diagonal from the main diagonal. Can be positive or - negative. Defaults to main diagonal (0). - axis1 : int, optional - Axis to be used as the first axis of the 2-D sub-arrays from which - the diagonals should be taken. Defaults to first axis (0). - axis2 : int, optional - Axis to be used as the second axis of the 2-D sub-arrays from - which the diagonals should be taken. Defaults to second axis (1). - - Returns - ------- - array_of_diagonals : ndarray - If `a` is 2-D, then a 1-D array containing the diagonal and of the - same type as `a` is returned unless `a` is a `matrix`, in which case - a 1-D array rather than a (2-D) `matrix` is returned in order to - maintain backward compatibility. - - If ``a.ndim > 2``, then the dimensions specified by `axis1` and `axis2` - are removed, and a new axis inserted at the end corresponding to the - diagonal. - - Raises - ------ - ValueError - If the dimension of `a` is less than 2. - - Notes - ----- - Unlike NumPy's, the cuNumeric implementation always returns a copy - - See Also - -------- - numpy.diagonal - - Availability - -------- - Multiple GPUs, Multiple CPUs - - """ - return a.diagonal(offset=offset, axis1=axis1, axis2=axis2, extract=extract) - - -@add_boilerplate("a", "indices", "values") -def put( - a: ndarray, indices: ndarray, values: ndarray, mode: str = "raise" -) -> None: - """ - Replaces specified elements of an array with given values. - The indexing works as if the target array is first flattened. - - Parameters - ---------- - a : array_like - Array to put data into - indices : array_like - Target indices, interpreted as integers. - WARNING: In case there are repeated entries in the - indices array, Legate doesn't guarantee the order in - which values are updated. - - values : array_like - Values to place in `a` at target indices. If values array is shorter - than indices, it will be repeated as necessary. - mode : {'raise', 'wrap', 'clip'}, optional - Specifies how out-of-bounds indices will behave. - 'raise' : raise an error. - 'wrap' : wrap around. - 'clip' : clip to the range. - - See Also - -------- - numpy.put - - Availability - -------- - Multiple GPUs, Multiple CPUs - """ - a.put(indices=indices, values=values, mode=mode) - - -@add_boilerplate("a", "mask", "values") -def putmask(a: ndarray, mask: ndarray, values: ndarray) -> None: - """ - putmask(a, mask, values) - Changes elements of an array based on conditional and input values. - Sets ``a.flat[n] = values[n]`` for each n where ``mask.flat[n]==True``. - If `values` is not the same size as `a` and `mask` then it will repeat. - This gives behavior different from ``a[mask] = values``. - - Parameters - ---------- - a : ndarray - Target array. - mask : array_like - Boolean mask array. It has to be the same shape as `a`. - values : array_like - Values to put into `a` where `mask` is True. If `values` is smaller - than `a` it will be repeated. - - See Also - -------- - numpy.putmask - - Availability - ------------ - Multiple GPUs, Multiple CPUs - """ - if not a.shape == mask.shape: - raise ValueError("mask and data must be the same size") - - check_writeable(a) - - mask = mask._warn_and_convert(np.dtype(bool)) - - if a.dtype != values.dtype: - values = values._warn_and_convert(a.dtype) - - try: - np.broadcast_shapes(values.shape, a.shape) - except ValueError: - values = values._wrap(a.size) - values = values.reshape(a.shape) - - a._thunk.putmask(mask._thunk, values._thunk) - - -@add_boilerplate("a", "val") -def fill_diagonal(a: ndarray, val: ndarray, wrap: bool = False) -> None: - """ - Fill the main diagonal of the given array of any dimensionality. - - For an array a with a.ndim >= 2, the diagonal is the list of locations with - indices a[i, ..., i] all identical. This function modifies the input - array in-place, it does not return a value. - - Parameters - ---------- - - a : array, at least 2-D. - Array whose diagonal is to be filled, it gets modified in-place. - val : scalar or array_like - Value(s) to write on the diagonal. If val is scalar, the value is - written along the diagonal. - If array-like, the flattened val is written along - the diagonal, repeating if necessary to fill all diagonal entries. - wrap : bool - If true, the diagonal "wraps" after N columns, for tall 2d matrices. - - Raises - ------ - ValueError - If the dimension of `a` is less than 2. - - Notes - ----- - - See Also - -------- - numpy.fill_diagonal - - Availability - -------- - Multiple GPUs, Multiple CPUs - - """ - if val.size == 0 or a.size == 0: - return - - check_writeable(a) - - if a.ndim < 2: - raise ValueError("array must be at least 2-d") - - n = _builtin_min(a.shape) - - if a.ndim > 2: - for s in a.shape: - if s != n: - raise ValueError( - "All dimensions of input must be of equal length" - ) - - len_val = n - - if a.ndim == 2 and wrap and a.shape[0] > a.shape[1]: - len_val = a.shape[0] - (a.shape[0] // (a.shape[1] + 1)) - - if (val.size != len_val and val.ndim > 0) or val.ndim > 1: - val = val._wrap(len_val) - - if a.ndim == 2 and wrap and a.shape[0] > a.shape[1]: - idx0_tmp = arange(a.shape[1], dtype=int) - idx0 = idx0_tmp.copy() - while idx0.size < len_val: - idx0_tmp = idx0_tmp + (a.shape[1] + 1) - idx0 = hstack((idx0, idx0_tmp)) - idx0 = idx0[0:len_val] - idx1 = arange(len_val, dtype=int) % a.shape[1] - a[idx0, idx1] = val - else: - idx = arange(n, dtype=int) - indices = (idx,) * a.ndim - - a[indices] = val - - -################ -# Linear algebra -################ - -# Matrix and vector products - - -@add_boilerplate("a", "b") -def inner(a: ndarray, b: ndarray, out: Optional[ndarray] = None) -> ndarray: - """ - Inner product of two arrays. - - Ordinary inner product of vectors for 1-D arrays (without complex - conjugation), in higher dimensions a sum product over the last axes. - - Parameters - ---------- - a, b : array_like - out : ndarray, optional - Output argument. This must have the exact shape that would be returned - if it was not present. If its dtype is not what would be expected from - this operation, then the result will be (unsafely) cast to `out`. - - Returns - ------- - output : ndarray - If `a` and `b` are both - scalars or both 1-D arrays then a scalar is returned; otherwise - an array is returned. - ``output.shape = (*a.shape[:-1], *b.shape[:-1])`` - If `out` is given, then it is returned. - - Notes - ----- - The cuNumeric implementation is a little more liberal than NumPy in terms - of allowed broadcasting, e.g. ``inner(ones((1,)), ones((4,)))`` is allowed. - - See Also - -------- - numpy.inner - - Availability - -------- - Multiple GPUs, Multiple CPUs - """ - if a.ndim == 0 or b.ndim == 0: - return multiply(a, b, out=out) - (a_modes, b_modes, out_modes) = inner_modes(a.ndim, b.ndim) - return _contract( - a_modes, - b_modes, - out_modes, - a, - b, - out=out, - casting="unsafe", - ) - - -@add_boilerplate("a", "b") -def dot(a: ndarray, b: ndarray, out: Optional[ndarray] = None) -> ndarray: - """ - Dot product of two arrays. Specifically, - - - If both `a` and `b` are 1-D arrays, it is inner product of vectors - (without complex conjugation). - - - If both `a` and `b` are 2-D arrays, it is matrix multiplication, - but using ``a @ b`` is preferred. - - - If either `a` or `b` is 0-D (scalar), it is equivalent to - :func:`multiply` and using ``cunumeric.multiply(a, b)`` or ``a * b`` is - preferred. - - - If `a` is an N-D array and `b` is a 1-D array, it is a sum product over - the last axis of `a` and `b`. - - - If `a` is an N-D array and `b` is an M-D array (where ``M>=2``), it is a - sum product over the last axis of `a` and the second-to-last axis of - `b`:: - - dot(a: ndarray, b)[i,j,k,m] = sum(a[i,j,:] * b[k,:,m]) - - Parameters - ---------- - a : array_like - First argument. - b : array_like - Second argument. - out : ndarray, optional - Output argument. This must have the exact shape and dtype that would be - returned if it was not present. - - Returns - ------- - output : ndarray - Returns the dot product of `a` and `b`. If `out` is given, then it is - returned. - - Notes - ----- - The cuNumeric implementation is a little more liberal than NumPy in terms - of allowed broadcasting, e.g. ``dot(ones((3,1)), ones((4,5)))`` is allowed. - - Except for the inner-product case, only floating-point types are supported. - - See Also - -------- - numpy.dot - - Availability - -------- - Multiple GPUs, Multiple CPUs - """ - return a.dot(b, out=out) - - -@add_boilerplate("a", "b") -def matmul( - a: ndarray, - b: ndarray, - /, - out: Optional[ndarray] = None, - *, - casting: CastingKind = "same_kind", - dtype: Optional[np.dtype[Any]] = None, -) -> ndarray: - """ - Matrix product of two arrays. - - Parameters - ---------- - a, b : array_like - Input arrays, scalars not allowed. - out : ndarray, optional - A location into which the result is stored. If provided, it must have - a shape that matches the signature `(n,k),(k,m)->(n,m)`. - casting : ``{'no', 'equiv', 'safe', 'same_kind', 'unsafe'}``, optional - Controls what kind of data casting may occur. - - * 'no' means the data types should not be cast at all. - * 'equiv' means only byte-order changes are allowed. - * 'safe' means only casts which can preserve values are allowed. - * 'same_kind' means only safe casts or casts within a kind, - like float64 to float32, are allowed. - * 'unsafe' means any data conversions may be done. - - Default is 'same_kind'. - dtype : data-type, optional - If provided, forces the calculation to use the data type specified. - Note that you may have to also give a more liberal `casting` - parameter to allow the conversions. Default is None. - - Returns - ------- - output : ndarray - The matrix product of the inputs. - This is a scalar only when both a, b are 1-d vectors. - If `out` is given, then it is returned. - - Notes - ----- - The behavior depends on the arguments in the following way. - - - If both arguments are 2-D they are multiplied like conventional - matrices. - - If either argument is N-D, N > 2, it is treated as a stack of - matrices residing in the last two indexes and broadcast accordingly. - - If the first argument is 1-D, it is promoted to a matrix by - prepending a 1 to its dimensions. After matrix multiplication - the prepended 1 is removed. - - If the second argument is 1-D, it is promoted to a matrix by - appending a 1 to its dimensions. After matrix multiplication - the appended 1 is removed. - - ``matmul`` differs from ``dot`` in two important ways: - - - Multiplication by scalars is not allowed, use ``*`` instead. - - Stacks of matrices are broadcast together as if the matrices - were elements, respecting the signature ``(n,k),(k,m)->(n,m)``: - - >>> a = ones([9, 5, 7, 4]) - >>> c = ones([9, 5, 4, 3]) - >>> dot(a: ndarray, c).shape - (9, 5, 7, 9, 5, 3) - >>> matmul(a: ndarray, c).shape - (9, 5, 7, 3) - >>> # n is 7, k is 4, m is 3 - - The cuNumeric implementation is a little more liberal than NumPy in terms - of allowed broadcasting, e.g. ``matmul(ones((3,1)), ones((4,5)))`` is - allowed. - - Only floating-point types are supported. - - See Also - -------- - numpy.matmul - - Availability - -------- - Multiple GPUs, Multiple CPUs - """ - if a.ndim == 0 or b.ndim == 0: - raise ValueError("Scalars not allowed in matmul") - - (a_modes, b_modes, out_modes) = matmul_modes(a.ndim, b.ndim) - - return _contract( - a_modes, - b_modes, - out_modes, - a, - b, - out=out, - casting=casting, - dtype=dtype, - ) - - -@add_boilerplate("a", "b") -def vdot(a: ndarray, b: ndarray, out: Optional[ndarray] = None) -> ndarray: - """ - Return the dot product of two vectors. - - The vdot(`a`, `b`) function handles complex numbers differently than - dot(`a`, `b`). If the first argument is complex the complex conjugate - of the first argument is used for the calculation of the dot product. - - Note that `vdot` handles multidimensional arrays differently than `dot`: - it does *not* perform a matrix product, but flattens input arguments - to 1-D vectors first. Consequently, it should only be used for vectors. - - Parameters - ---------- - a : array_like - If `a` is complex the complex conjugate is taken before calculation - of the dot product. - b : array_like - Second argument to the dot product. - out : ndarray, optional - Output argument. This must have the exact shape that would be returned - if it was not present. If its dtype is not what would be expected from - this operation, then the result will be (unsafely) cast to `out`. - - Returns - ------- - output : ndarray - Dot product of `a` and `b`. If `out` is given, then it is returned. - - Notes - ----- - The cuNumeric implementation is a little more liberal than NumPy in terms - of allowed broadcasting, e.g. ``vdot(ones((1,)), ones((4,)))`` is allowed. - - See Also - -------- - numpy.vdot - - Availability - -------- - Multiple GPUs, Multiple CPUs - """ - return inner(a.ravel().conj(), b.ravel(), out=out) - - -@add_boilerplate("a", "b") -def outer(a: ndarray, b: ndarray, out: Optional[ndarray] = None) -> ndarray: - """ - Compute the outer product of two vectors. - - Given two vectors, ``a = [a0, a1, ..., aM]`` and ``b = [b0, b1, ..., bN]``, - the outer product is:: - - [[a0*b0 a0*b1 ... a0*bN ] - [a1*b0 . - [ ... . - [aM*b0 aM*bN ]] - - Parameters - ---------- - a : (M,) array_like - First input vector. Input is flattened if not already 1-dimensional. - b : (N,) array_like - Second input vector. Input is flattened if not already 1-dimensional. - out : (M, N) ndarray, optional - A location where the result is stored. If its dtype is not what would - be expected from this operation, then the result will be (unsafely) - cast to `out`. - - Returns - ------- - output : (M, N) ndarray - ``output[i, j] = a[i] * b[j]`` - If `out` is given, then it is returned. - - See Also - -------- - numpy.outer - - Availability - -------- - Multiple GPUs, Multiple CPUs - """ - return multiply( - a.ravel()[:, np.newaxis], b.ravel()[np.newaxis, :], out=out - ) - - -@add_boilerplate("a", "b") -def tensordot( - a: ndarray, - b: ndarray, - axes: AxesPairLike = 2, - out: Optional[ndarray] = None, -) -> ndarray: - """ - Compute tensor dot product along specified axes. - - Given two tensors, `a` and `b`, and an array_like object containing - two array_like objects, ``(a_axes, b_axes)``, sum the products of - `a`'s and `b`'s elements (components) over the axes specified by - ``a_axes`` and ``b_axes``. The third argument can be a single non-negative - integer_like scalar, ``N``; if it is such, then the last ``N`` dimensions - of `a` and the first ``N`` dimensions of `b` are summed over. - - Parameters - ---------- - a, b : array_like - Tensors to "dot". - - axes : int or array_like - * integer_like - If an int N, sum over the last N axes of `a` and the first N axes - of `b` in order. - * (2,) array_like - Or, a list of axes to be summed over, first sequence applying to `a`, - second to `b`. Both elements array_like must be of the same length. - out : ndarray, optional - Output argument. This must have the exact shape that would be returned - if it was not present. If its dtype is not what would be expected from - this operation, then the result will be (unsafely) cast to `out`. - - Returns - ------- - output : ndarray - The tensor dot product of the inputs. If `out` is given, then it is - returned. - - Notes - ----- - The cuNumeric implementation is a little more liberal than NumPy in terms - of allowed broadcasting, e.g. ``tensordot(ones((3,1)), ones((1,4)))`` is - allowed. - - Except for the inner-product case, only floating-point types are supported. - - See Also - -------- - numpy.tensordot - - Availability - -------- - Multiple GPUs, Multiple CPUs - """ - (a_modes, b_modes, out_modes) = tensordot_modes(a.ndim, b.ndim, axes) - - return _contract( - a_modes, - b_modes, - out_modes, - a, - b, - out=out, - casting="unsafe", - ) - - -# Trivial multi-tensor contraction strategy: contract in input order -class NullOptimizer(oe.paths.PathOptimizer): # type: ignore [misc,no-any-unimported] # noqa - def __call__( - self, - inputs: list[set[str]], - outputs: set[str], - size_dict: dict[str, int], - memory_limit: Union[int, None] = None, - ) -> list[tuple[int, int]]: - return [(0, 1)] + [(0, -1)] * (len(inputs) - 2) - - -def _maybe_cast_input( - arr: ndarray, to_dtype: np.dtype[Any], casting: CastingKind -) -> ndarray: - if arr.dtype == to_dtype: - return arr - if not np.can_cast(arr.dtype, to_dtype, casting=casting): - raise TypeError( - f"Cannot cast input array of type {arr.dtype} to {to_dtype} with " - f"casting rule '{casting}'" - ) - return arr.astype(to_dtype) - - -# Generalized tensor contraction -def _contract( - a_modes: list[str], - b_modes: list[str], - out_modes: list[str], - a: ndarray, - b: Optional[ndarray] = None, - out: Optional[ndarray] = None, - casting: CastingKind = "same_kind", - dtype: Optional[np.dtype[Any]] = None, -) -> ndarray: - # Sanity checks - if len(a_modes) != a.ndim: - raise ValueError( - f"Expected {len(a_modes)}-d input array but got {a.ndim}-d" - ) - - if b is None: - if len(b_modes) != 0: - raise ValueError("Missing input array") - elif len(b_modes) != b.ndim: - raise ValueError( - f"Expected {len(b_modes)}-d input array but got {b.ndim}-d" - ) - - if out is not None and len(out_modes) != out.ndim: - raise ValueError( - f"Expected {len(out_modes)}-d output array but got {out.ndim}-d" - ) - - if len(set(out_modes)) != len(out_modes): - raise ValueError("Duplicate mode labels on output") - - if len(set(out_modes) - set(a_modes) - set(b_modes)) > 0: - raise ValueError("Unknown mode labels on output") - - makes_view = b is None and len(a_modes) == len(out_modes) - if dtype is not None and not makes_view: - c_dtype = dtype - elif out is not None: - c_dtype = out.dtype - elif b is None: - c_dtype = a.dtype - else: - c_dtype = ndarray.find_common_type(a, b) - - a = _maybe_cast_input(a, c_dtype, casting) - - if b is not None: - b = _maybe_cast_input(b, c_dtype, casting) - - out_dtype = out.dtype if out is not None else c_dtype - - # Handle duplicate modes on inputs - c_a_modes = Counter(a_modes) - for mode, count in c_a_modes.items(): - if count > 1: - axes = [i for (i, m) in enumerate(a_modes) if m == mode] - a = a._diag_helper(axes=axes) - # diagonal is stored on last axis - a_modes = [m for m in a_modes if m != mode] + [mode] - c_b_modes = Counter(b_modes) - for mode, count in c_b_modes.items(): - if count > 1: - axes = [i for (i, m) in enumerate(b_modes) if m == mode] - b = b._diag_helper(axes=axes) # type: ignore [union-attr] - # diagonal is stored on last axis - b_modes = [m for m in b_modes if m != mode] + [mode] - - # Drop modes corresponding to singleton dimensions. This handles cases of - # broadcasting. - for dim in reversed(range(a.ndim)): - if a.shape[dim] == 1: - a = a.squeeze(dim) - a_modes.pop(dim) - if b is not None: - for dim in reversed(range(b.ndim)): - if b.shape[dim] == 1: - b = b.squeeze(dim) - b_modes.pop(dim) - - # Sum-out modes appearing on one argument, and missing from the result - # TODO: If we supported sum on multiple axes we could do the full sum in a - # single operation, and avoid intermediates. - for dim, mode in reversed(list(enumerate(a_modes))): - if mode not in b_modes and mode not in out_modes: - a_modes.pop(dim) - a = a.sum(axis=dim) - - for dim, mode in reversed(list(enumerate(b_modes))): - if mode not in a_modes and mode not in out_modes: - b_modes.pop(dim) - b = b.sum(axis=dim) # type: ignore [union-attr] - - # Compute extent per mode. No need to consider broadcasting at this stage, - # since it has been handled above. - mode2extent: dict[str, int] = {} - for mode, extent in chain( - zip(a_modes, a.shape), zip(b_modes, b.shape) if b is not None else [] - ): - prev_extent = mode2extent.get(mode) - if prev_extent is not None and extent != prev_extent: - raise ValueError( - f"Incompatible sizes between matched dimensions: {extent} vs " - f"{prev_extent}" - ) - mode2extent[mode] = extent - - # Any modes appearing only on the result must have originally been present - # on one of the operands, but got dropped by the broadcast-handling code. - out_shape = ( - out.shape - if out is not None - else tuple(mode2extent.get(mode, 1) for mode in out_modes) - ) - c_modes = [] - c_shape: NdShape = () - c_bloated_shape: NdShape = () - for mode, extent in zip(out_modes, out_shape): - if mode not in a_modes and mode not in b_modes: - c_bloated_shape += (1,) - else: - assert extent > 1 - c_modes.append(mode) - c_shape += (extent,) - c_bloated_shape += (extent,) - - # Verify output array has the right shape (input arrays can be broadcasted - # up to match the output, but not the other way around). There should be no - # unknown or singleton modes on the result at this point. - for mode, extent in zip(c_modes, c_shape): - prev_extent = mode2extent[mode] - assert prev_extent != 1 - if extent != prev_extent: - raise ValueError("Wrong shape on output array") - - # Test for fallback to unary case - if b is not None: - if len(a_modes) == 0: - a = a * b - a_modes = b_modes - b = None - b_modes = [] - elif len(b_modes) == 0: - a = a * b - b = None - - if b is None: - # Unary contraction case - assert len(a_modes) == len(c_modes) and set(a_modes) == set(c_modes) - if len(a_modes) == 0: - # NumPy doesn't return a view in this case - c = copy(a) - elif a_modes == c_modes: - c = a - else: - # Shuffle input array according to mode labels - axes = [a_modes.index(mode) for mode in c_modes] - assert _builtin_all(ax >= 0 for ax in axes) - c = a.transpose(axes) - - else: - # Binary contraction case - # Create result array, if output array can't be directly targeted - if out is not None and out_dtype == c_dtype and out_shape == c_shape: - c = out - else: - c = ndarray( - shape=c_shape, - dtype=c_dtype, - inputs=(a, b), - ) - # Perform operation - c._thunk.contract( - c_modes, - a._thunk, - a_modes, - b._thunk, - b_modes, - mode2extent, - ) - - # Postprocess result before returning - if out is c: - # We already decided above to use the output array directly - return out - if out_dtype != c_dtype or out_shape != c_bloated_shape: - # We need to broadcast the result of the contraction or switch types - # before returning - if not np.can_cast(c_dtype, out_dtype, casting=casting): - raise TypeError( - f"Cannot cast intermediate result array of type {c_dtype} " - f"into output array of type {out_dtype} with casting rule " - f"'{casting}'" - ) - if out is None: - out = ndarray( - shape=out_shape, - dtype=out_dtype, - inputs=(c,), - ) - out[...] = c.reshape(c_bloated_shape) - return out - if out_shape != c_shape: - # We need to add missing dimensions, but they are all of size 1, so - # we don't need to broadcast - assert c_bloated_shape == out_shape - if out is None: - return c.reshape(out_shape) - else: - out[...] = c.reshape(out_shape) - return out - if out is not None: - # The output and result arrays are fully compatible, but we still - # need to copy - out[...] = c - return out - return c - - -def einsum( - expr: str, - *operands: ndarray, - out: Optional[ndarray] = None, - dtype: Optional[np.dtype[Any]] = None, - casting: CastingKind = "safe", - optimize: Union[bool, Literal["greedy", "optimal"]] = True, -) -> ndarray: - """ - Evaluates the Einstein summation convention on the operands. - - Using the Einstein summation convention, many common multi-dimensional, - linear algebraic array operations can be represented in a simple fashion. - In *implicit* mode `einsum` computes these values. - - In *explicit* mode, `einsum` provides further flexibility to compute - other array operations that might not be considered classical Einstein - summation operations, by disabling, or forcing summation over specified - subscript labels. - - Parameters - ---------- - subscripts : str - Specifies the subscripts for summation as comma separated list of - subscript labels. An implicit (classical Einstein summation) - calculation is performed unless the explicit indicator '->' is - included as well as subscript labels of the precise output form. - operands : list[array_like] - These are the arrays for the operation. - out : ndarray, optional - If provided, the calculation is done into this array. - dtype : data-type, optional - If provided, forces the calculation to use the data type specified. - Note that you may have to also give a more liberal `casting` - parameter to allow the conversions. Default is None. - casting : ``{'no', 'equiv', 'safe', 'same_kind', 'unsafe'}``, optional - Controls what kind of data casting may occur. - - * 'no' means the data types should not be cast at all. - * 'equiv' means only byte-order changes are allowed. - * 'safe' means only casts which can preserve values are allowed. - * 'same_kind' means only safe casts or casts within a kind, - like float64 to float32, are allowed. - * 'unsafe' means any data conversions may be done. - - Default is 'safe'. - optimize : ``{False, True, 'greedy', 'optimal'}``, optional - Controls if intermediate optimization should occur. If False then - arrays will be contracted in input order, one at a time. True (the - default) will use the 'greedy' algorithm. See ``cunumeric.einsum_path`` - for more information on the available optimization algorithms. - - Returns - ------- - output : ndarray - The calculation based on the Einstein summation convention. - - Notes - ----- - For most expressions, only floating-point types are supported. - - See Also - -------- - numpy.einsum - - Availability - -------- - Multiple GPUs, Multiple CPUs - """ - operands_list = [convert_to_cunumeric_ndarray(op) for op in operands] - - if out is not None: - out = convert_to_cunumeric_ndarray(out, share=True) - - if optimize is True: - optimize = "greedy" - elif optimize is False: - optimize = NullOptimizer() - - # This call normalizes the expression (adds the output part if it's - # missing, expands '...') and checks for some errors (mismatch on number - # of dimensions between operand and expression, wrong number of operands, - # unknown modes on output, a mode appearing under two different - # non-singleton extents). - computed_operands, contractions = oe.contract_path( - expr, *operands_list, einsum_call=True, optimize=optimize - ) - for indices, _, sub_expr, _, _ in contractions: - assert len(indices) == 1 or len(indices) == 2 - a = computed_operands.pop(indices[0]) - b = computed_operands.pop(indices[1]) if len(indices) == 2 else None - if b is None: - m = re.match(r"([a-zA-Z]*)->([a-zA-Z]*)", sub_expr) - if m is None: - raise NotImplementedError("Non-alphabetic mode labels") - a_modes = list(m.group(1)) - b_modes = [] - out_modes = list(m.group(2)) - else: - m = re.match(r"([a-zA-Z]*),([a-zA-Z]*)->([a-zA-Z]*)", sub_expr) - if m is None: - raise NotImplementedError("Non-alphabetic mode labels") - a_modes = list(m.group(1)) - b_modes = list(m.group(2)) - out_modes = list(m.group(3)) - sub_result = _contract( - a_modes, - b_modes, - out_modes, - a, - b, - out=(out if len(computed_operands) == 0 else None), - casting=casting, - dtype=dtype, - ) - computed_operands.append(sub_result) - - assert len(computed_operands) == 1 - return computed_operands[0] - - -def einsum_path( - expr: str, - *operands: ndarray, - optimize: Union[bool, list[Any], tuple[Any, ...], str] = "greedy", -) -> tuple[list[Union[str, int]], str]: - """ - Evaluates the lowest cost contraction order for an einsum expression by - considering the creation of intermediate arrays. - - Parameters - ---------- - expr : str - Specifies the subscripts for summation. - *operands : Sequence[array_like] - These are the arrays for the operation. - optimize : ``{bool, list, tuple, 'greedy', 'optimal'}`` - Choose the type of path. If a tuple is provided, the second argument is - assumed to be the maximum intermediate size created. If only a single - argument is provided the largest input or output array size is used - as a maximum intermediate size. - - * if a list is given that starts with ``einsum_path``, uses this as the - contraction path - * if False no optimization is taken - * if True defaults to the 'greedy' algorithm - * 'optimal' An algorithm that combinatorially explores all possible - ways of contracting the listed tensors and chooses the least costly - path. Scales exponentially with the number of terms in the - contraction. - * 'greedy' An algorithm that chooses the best pair contraction - at each step. Effectively, this algorithm searches the largest inner, - Hadamard, and then outer products at each step. Scales cubically with - the number of terms in the contraction. Equivalent to the 'optimal' - path for most contractions. - - Default is 'greedy'. - - Returns - ------- - path : list[Tuple[int,...]] - A list representation of the einsum path. - string_repr : str - A printable representation of the einsum path. - - Notes - ----- - The resulting path indicates which terms of the input contraction should be - contracted first, the result of this contraction is then appended to the - end of the contraction list. This list can then be iterated over until all - intermediate contractions are complete. - - See Also - -------- - numpy.einsum_path - - Availability - -------- - Multiple GPUs, Multiple CPUs - """ - computed_operands = [convert_to_cunumeric_ndarray(op) for op in operands] - memory_limit = _builtin_max(op.size for op in computed_operands) - if isinstance(optimize, tuple): - if len(optimize) != 2: - raise ValueError("einsum_path expects optimize tuples of size 2") - optimize, memory_limit = optimize - if optimize is True: - optimize = "greedy" - elif optimize is False: - optimize = [tuple(range(len(computed_operands)))] - elif optimize in ["greedy", "optimal"]: - pass - elif ( - isinstance(optimize, list) - and len(optimize) > 1 - and optimize[0] == "einsum_path" - ): - optimize = optimize[1:] - else: - raise ValueError( - f"einsum_path: unexpected value for optimize: {optimize}" - ) - path, info = oe.contract_path( - expr, *computed_operands, optimize=optimize, memory_limit=memory_limit - ) - return ["einsum_path"] + path, info - - -@add_boilerplate("a") -def trace( - a: ndarray, - offset: int = 0, - axis1: Optional[int] = None, - axis2: Optional[int] = None, - dtype: Optional[np.dtype[Any]] = None, - out: Optional[ndarray] = None, -) -> ndarray: - """ - Return the sum along diagonals of the array. - - If a is 2-D, the sum along its diagonal with the given offset is - returned, i.e., the sum of elements a[i,i+offset] for all i. - If a has more than two dimensions, then the axes specified by axis1 - and axis2 are used to determine the 2-D sub-arrays whose traces - are returned. The shape of the resulting array is the same as that - of a with axis1 and axis2 removed. - - Parameters - ---------- - a : array_like - Input array, from which the diagonals are taken. - offset : int, optional - Offset of the diagonal from the main diagonal. Can be both - positive and negative. Defaults to 0. - axis1, axis2 : int, optional - Axes to be used as the first and second axis of the 2-D sub-arrays - from which the diagonals should be taken. Defaults are the - first two axes of a. - dtype : data-type, optional - Determines the data-type of the returned array and of the - accumulator where the elements are summed. If dtype has the value - None and a is of integer type of precision less than the default - integer precision, then the default integer precision is used. - Otherwise, the precision is the same as that of a. - - out : ndarray, optional - Array into which the output is placed. Its type is preserved and - it must be of the right shape to hold the output. - - Returns - ------- - sum_along_diagonals : ndarray - If a is 2-D, the sum along the diagonal is returned. If a has - larger dimensions, then an array of sums along diagonals is returned. - - Raises - ------ - ValueError - If the dimension of `a` is less than 2. - - See Also - -------- - numpy.diagonal - - Availability - -------- - Multiple GPUs, Multiple CPUs - """ - return a.trace( - offset=offset, axis1=axis1, axis2=axis2, dtype=dtype, out=out - ) - - -################# -# Logic functions -################# - -# Truth value testing - - -@add_boilerplate("a") -def all( - a: ndarray, - axis: Optional[Union[int, tuple[int, ...]]] = None, - out: Optional[ndarray] = None, - keepdims: bool = False, - where: Optional[ndarray] = None, -) -> ndarray: - """ - Test whether all array elements along a given axis evaluate to True. - - Parameters - ---------- - a : array_like - Input array or object that can be converted to an array. - axis : None or int or tuple[int], optional - Axis or axes along which a logical AND reduction is performed. - The default (``axis=None``) is to perform a logical AND over all - the dimensions of the input array. `axis` may be negative, in - which case it counts from the last to the first axis. - - If this is a tuple of ints, a reduction is performed on multiple - axes, instead of a single axis or all the axes as before. - out : ndarray, optional - Alternate output array in which to place the result. - It must have the same shape as the expected output and its - type is preserved (e.g., if ``dtype(out)`` is float, the result - will consist of 0.0's and 1.0's). See `ufuncs-output-type` for more - details. - - keepdims : bool, optional - If this is set to True, the axes which are reduced are left - in the result as dimensions with size one. With this option, - the result will broadcast correctly against the input array. - - If the default value is passed, then `keepdims` will not be - passed through to the `all` method of sub-classes of - `ndarray`, however any non-default value will be. If the - sub-class' method does not implement `keepdims` any - exceptions will be raised. - - Returns - ------- - all : ndarray, bool - A new boolean or array is returned unless `out` is specified, - in which case a reference to `out` is returned. - - See Also - -------- - numpy.all - - Availability - -------- - Multiple GPUs, Multiple CPUs - """ - return a.all(axis=axis, out=out, keepdims=keepdims, where=where) - - -@add_boilerplate("a") -def any( - a: ndarray, - axis: Optional[Union[int, tuple[int, ...]]] = None, - out: Optional[ndarray] = None, - keepdims: bool = False, - where: Optional[ndarray] = None, -) -> ndarray: - """ - Test whether any array element along a given axis evaluates to True. - - Returns single boolean unless `axis` is not ``None`` - - Parameters - ---------- - a : array_like - Input array or object that can be converted to an array. - axis : None or int or tuple[int], optional - Axis or axes along which a logical OR reduction is performed. - The default (``axis=None``) is to perform a logical OR over all - the dimensions of the input array. `axis` may be negative, in - which case it counts from the last to the first axis. - - If this is a tuple of ints, a reduction is performed on multiple - axes, instead of a single axis or all the axes as before. - out : ndarray, optional - Alternate output array in which to place the result. It must have - the same shape as the expected output and its type is preserved - (e.g., if it is of type float, then it will remain so, returning - 1.0 for True and 0.0 for False, regardless of the type of `a`). - See `ufuncs-output-type` for more details. - - keepdims : bool, optional - If this is set to True, the axes which are reduced are left - in the result as dimensions with size one. With this option, - the result will broadcast correctly against the input array. - - If the default value is passed, then `keepdims` will not be - passed through to the `any` method of sub-classes of - `ndarray`, however any non-default value will be. If the - sub-class' method does not implement `keepdims` any - exceptions will be raised. - - Returns - ------- - any : bool or ndarray - A new boolean or `ndarray` is returned unless `out` is specified, - in which case a reference to `out` is returned. - - See Also - -------- - numpy.any - - Availability - -------- - Multiple GPUs, Multiple CPUs - """ - return a.any(axis=axis, out=out, keepdims=keepdims, where=where) - - -# Array contents - - -# Logic operations - - -# Comparison - - -@add_boilerplate("a", "b") -def allclose( - a: ndarray, - b: ndarray, - rtol: float = 1e-5, - atol: float = 1e-8, - equal_nan: bool = False, -) -> ndarray: - """ - - Returns True if two arrays are element-wise equal within a tolerance. - - The tolerance values are positive, typically very small numbers. The - relative difference (`rtol` * abs(`b`)) and the absolute difference - `atol` are added together to compare against the absolute difference - between `a` and `b`. - - NaNs are treated as equal if they are in the same place and if - ``equal_nan=True``. Infs are treated as equal if they are in the same - place and of the same sign in both arrays. - - Parameters - ---------- - a, b : array_like - Input arrays to compare. - rtol : float - The relative tolerance parameter (see Notes). - atol : float - The absolute tolerance parameter (see Notes). - equal_nan : bool - Whether to compare NaN's as equal. If True, NaN's in `a` will be - considered equal to NaN's in `b` in the output array. - - Returns - ------- - allclose : ndarray scalar - Returns True if the two arrays are equal within the given - tolerance; False otherwise. - - Notes - ----- - If the following equation is element-wise True, then allclose returns - True. - - absolute(`a` - `b`) <= (`atol` + `rtol` * absolute(`b`)) - - See Also - -------- - numpy.allclose - - Availability - -------- - Multiple GPUs, Multiple CPUs - """ - if equal_nan: - raise NotImplementedError( - "cuNumeric does not support `equal_nan` yet for allclose" - ) - args = (np.array(rtol, dtype=np.float64), np.array(atol, dtype=np.float64)) - return ndarray._perform_binary_reduction( - BinaryOpCode.ISCLOSE, - a, - b, - dtype=np.dtype(bool), - extra_args=args, - ) - - -@add_boilerplate("a", "b") -def isclose( - a: ndarray, - b: ndarray, - rtol: float = 1e-5, - atol: float = 1e-8, - equal_nan: bool = False, -) -> ndarray: - """ - - Returns a boolean array where two arrays are element-wise equal within a - tolerance. - - Parameters - ---------- - a, b : array_like - Input arrays to compare. - rtol : float - The relative tolerance parameter (see Notes). - atol : float - The absolute tolerance parameter (see Notes). - equal_nan : bool - Whether to compare NaN's as equal. If True, NaN's in `a` will be - considered equal to NaN's in `b` in the output array. - - Returns - ------- - y : array_like - Returns a boolean array of where `a` and `b` are equal within the - given tolerance. If both `a` and `b` are scalars, returns a single - boolean value. - - Notes - ----- - For finite values, isclose uses the following equation to test whether - two floating point values are equivalent. - - absolute(`a` - `b`) <= (`atol` + `rtol` * absolute(`b`)) - - See Also - -------- - numpy.isclose - - Availability - -------- - Multiple GPUs, Multiple CPUs - """ - if equal_nan: - raise NotImplementedError( - "cuNumeric does not support `equal_nan` yet for isclose" - ) - - out_shape = np.broadcast_shapes(a.shape, b.shape) - out = empty(out_shape, dtype=bool) - - common_type = ndarray.find_common_type(a, b) - a = a.astype(common_type) - b = b.astype(common_type) - - out._thunk.isclose(a._thunk, b._thunk, rtol, atol, equal_nan) - return out - - -@add_boilerplate("a1", "a2") -def array_equal( - a1: ndarray, a2: ndarray, equal_nan: bool = False -) -> Union[bool, ndarray]: - """ - - True if two arrays have the same shape and elements, False otherwise. - - Parameters - ---------- - a1, a2 : array_like - Input arrays. - equal_nan : bool - Whether to compare NaN's as equal. If the dtype of a1 and a2 is - complex, values will be considered equal if either the real or the - imaginary component of a given value is ``nan``. - - Returns - ------- - b : ndarray scalar - Returns True if the arrays are equal. - - See Also - -------- - numpy.array_equal - - Availability - -------- - Multiple GPUs, Multiple CPUs - """ - if equal_nan: - raise NotImplementedError( - "cuNumeric does not support `equal_nan` yet for `array_equal`" - ) - - if a1.shape != a2.shape: - return False - return ndarray._perform_binary_reduction( - BinaryOpCode.EQUAL, a1, a2, dtype=np.dtype(np.bool_) - ) - - -######################## -# Mathematical functions -######################## - -# Trigonometric functions - - -# Hyperbolic functions - - -# Rounding - - -# Sums, products, differences - - -@add_boilerplate("a") -def prod( - a: ndarray, - axis: Optional[Union[int, tuple[int, ...]]] = None, - dtype: Optional[np.dtype[Any]] = None, - out: Optional[ndarray] = None, - keepdims: bool = False, - initial: Optional[Union[int, float]] = None, - where: Optional[ndarray] = None, -) -> ndarray: - """ - - Return the product of array elements over a given axis. - - Parameters - ---------- - a : array_like - Input data. - axis : None or int or tuple[int], optional - Axis or axes along which a product is performed. The default, - axis=None, will calculate the product of all the elements in the - input array. If axis is negative it counts from the last to the - first axis. - - If axis is a tuple of ints, a product is performed on all of the - axes specified in the tuple instead of a single axis or all the - axes as before. - dtype : data-type, optional - The type of the returned array, as well as of the accumulator in - which the elements are multiplied. The dtype of `a` is used by - default unless `a` has an integer dtype of less precision than the - default platform integer. In that case, if `a` is signed then the - platform integer is used while if `a` is unsigned then an unsigned - integer of the same precision as the platform integer is used. - out : ndarray, optional - Alternative output array in which to place the result. It must have - the same shape as the expected output, but the type of the output - values will be cast if necessary. - keepdims : bool, optional - If this is set to True, the axes which are reduced are left in the - result as dimensions with size one. With this option, the result - will broadcast correctly against the input array. - - If the default value is passed, then `keepdims` will not be - passed through to the `prod` method of sub-classes of - `ndarray`, however any non-default value will be. If the - sub-class' method does not implement `keepdims` any - exceptions will be raised. - initial : scalar, optional - The starting value for this product. See `~cunumeric.ufunc.reduce` for - details. - - where : array_like[bool], optional - Elements to include in the product. See `~cunumeric.ufunc.reduce` for - details. - - Returns - ------- - product_along_axis : ndarray, see `dtype` parameter above. - An array shaped as `a` but with the specified axis removed. - Returns a reference to `out` if specified. - - See Also - -------- - numpy.prod - - Availability - -------- - Multiple GPUs, Multiple CPUs - """ - return multiply.reduce( - a, - axis=axis, - dtype=dtype, - out=out, - keepdims=keepdims, - initial=initial, - where=where, - ) - - -@add_boilerplate("a") -def sum( - a: ndarray, - axis: Optional[Union[int, tuple[int, ...]]] = None, - dtype: Optional[np.dtype[Any]] = None, - out: Optional[ndarray] = None, - keepdims: bool = False, - initial: Optional[Union[int, float]] = None, - where: Optional[ndarray] = None, -) -> ndarray: - """ - - Sum of array elements over a given axis. - - Parameters - ---------- - a : array_like - Elements to sum. - axis : None or int or tuple[int], optional - Axis or axes along which a sum is performed. The default, - axis=None, will sum all of the elements of the input array. If - axis is negative it counts from the last to the first axis. - - If axis is a tuple of ints, a sum is performed on all of the axes - specified in the tuple instead of a single axis or all the axes as - before. - dtype : data-type, optional - The type of the returned array and of the accumulator in which the - elements are summed. The dtype of `a` is used by default unless `a` - has an integer dtype of less precision than the default platform - integer. In that case, if `a` is signed then the platform integer - is used while if `a` is unsigned then an unsigned integer of the - same precision as the platform integer is used. - out : ndarray, optional - Alternative output array in which to place the result. It must have - the same shape as the expected output, but the type of the output - values will be cast if necessary. - keepdims : bool, optional - If this is set to True, the axes which are reduced are left - in the result as dimensions with size one. With this option, - the result will broadcast correctly against the input array. - - If the default value is passed, then `keepdims` will not be - passed through to the `sum` method of sub-classes of - `ndarray`, however any non-default value will be. If the - sub-class' method does not implement `keepdims` any - exceptions will be raised. - initial : scalar, optional - Starting value for the sum. See `~cunumeric.ufunc.reduce` for details. - - where : array_like[bool], optional - Elements to include in the sum. See `~cunumeric.ufunc.reduce` for - details. - - Returns - ------- - sum_along_axis : ndarray - An array with the same shape as `a`, with the specified - axis removed. If `a` is a 0-d array, or if `axis` is None, a scalar - is returned. If an output array is specified, a reference to - `out` is returned. - - See Also - -------- - numpy.sum - - Availability - -------- - Multiple GPUs, Multiple CPUs - """ - return add.reduce( - a, - axis=axis, - dtype=dtype, - out=out, - keepdims=keepdims, - initial=initial, - where=where, - ) - - -@add_boilerplate("a") -def cumprod( - a: ndarray, - axis: Optional[int] = None, - dtype: Optional[np.dtype[Any]] = None, - out: Optional[ndarray] = None, -) -> ndarray: - """ - Return the cumulative product of the elements along a given axis. - - Parameters - ---------- - a : array_like - Input array. - - axis : int, optional - Axis along which the cumulative product is computed. The default (None) - is to compute the cumprod over the flattened array. - - dtype : dtype, optional - Type of the returned array and of the accumulator in which the elements - are multiplied. If dtype is not specified, it defaults to the dtype of - a, unless a has an integer dtype with a precision less than that of the - default platform integer. In that case, the default platform integer is - used. - out : ndarray, optional - Alternative output array in which to place the result. It must have the - same shape and buffer length as the expected output but the type will - be cast if necessary. See Output type determination for more details. - - Returns - ------- - cumprod : ndarray - A new array holding the result is returned unless out is specified, in - which case a reference to out is returned. The result has the same size - as a, and the same shape as a if axis is not None or a is a 1-d array. - - See Also - -------- - numpy.cumprod - - Notes - ----- - CuNumeric's parallel implementation may yield different results from NumPy - with floating point and complex types. For example, when boundary values - such as inf occur they may not propagate as expected. Consider the float32 - array ``[3e+37, 1, 100, 0.01]``. NumPy's cumprod will return a result of - ``[3e+37, 3e+37, inf, inf]``. However, cuNumeric might internally partition - the array such that partition 0 has ``[3e+37, 1]`` and partition 1 has - ``[100, 0.01]``, returning the result ``[3e+37, 3e+37, inf, 3e+37]``. - - Availability - -------- - Multiple GPUs, Multiple CPUs - """ - return ndarray._perform_scan( - ScanCode.PROD, - a, - axis=axis, - dtype=dtype, - out=out, - nan_to_identity=False, - ) - - -@add_boilerplate("a") -def cumsum( - a: ndarray, - axis: Optional[int] = None, - dtype: Optional[np.dtype[Any]] = None, - out: Optional[ndarray] = None, -) -> ndarray: - """ - Return the cumulative sum of the elements along a given axis. - - Parameters - ---------- - a : array_like - Input array. - - axis : int, optional - Axis along which the cumulative sum is computed. The default (None) is - to compute the cumsum over the flattened array. - - dtype : dtype, optional - Type of the returned array and of the accumulator in which the elements - are summed. If dtype is not specified, it defaults to the dtype of a, - unless a has an integer dtype with a precision less than that of the - default platform integer. In that case, the default platform integer is - used. - out : ndarray, optional - Alternative output array in which to place the result. It must have the - same shape and buffer length as the expected output but the type will - be cast if necessary. See Output type determination for more details. - - Returns - ------- - cumsum : ndarray. - A new array holding the result is returned unless out is specified, in - which case a reference to out is returned. The result has the same size - as a, and the same shape as a if axis is not None or a is a 1-d array. - - See Also - -------- - numpy.cumsum - - Notes - ----- - CuNumeric's parallel implementation may yield different results from NumPy - with floating point and complex types. For example, when boundary values - such as inf occur they may not propagate as expected. For more explanation - check cunumeric.cumprod. - - Availability - -------- - Multiple GPUs, Multiple CPUs - """ - return ndarray._perform_scan( - ScanCode.SUM, a, axis=axis, dtype=dtype, out=out, nan_to_identity=False - ) - - -@add_boilerplate("a") -def nancumprod( - a: ndarray, - axis: Optional[int] = None, - dtype: Optional[np.dtype[Any]] = None, - out: Optional[ndarray] = None, -) -> ndarray: - """ - Return the cumulative product of the elements along a given axis treating - Not a Numbers (NaNs) as one. The cumulative product does not change when - NaNs are encountered and leading NaNs are replaced by ones. - - Ones are returned for slices that are all-NaN or empty. - - Parameters - ---------- - a : array_like - Input array. - - axis : int, optional - Axis along which the cumulative product is computed. The default (None) - is to compute the nancumprod over the flattened array. - - dtype : dtype, optional - Type of the returned array and of the accumulator in which the elements - are multiplied. If dtype is not specified, it defaults to the dtype of - a, unless a has an integer dtype with a precision less than that of the - default platform integer. In that case, the default platform integer is - used. - out : ndarray, optional - Alternative output array in which to place the result. It must have the - same shape and buffer length as the expected output but the type will - be cast if necessary. See Output type determination for more details. - - Returns - ------- - nancumprod : ndarray. - A new array holding the result is returned unless out is specified, in - which case a reference to out is returned. The result has the same size - as a, and the same shape as a if axis is not None or a is a 1-d array. - - See Also - -------- - numpy.nancumprod - - Notes - ----- - CuNumeric's parallel implementation may yield different results from NumPy - with floating point and complex types. For example, when boundary values - such as inf occur they may not propagate as expected. For more explanation - check cunumeric.cumprod. - - Availability - -------- - Multiple GPUs, Multiple CPUs - """ - return ndarray._perform_scan( - ScanCode.PROD, a, axis=axis, dtype=dtype, out=out, nan_to_identity=True - ) - - -@add_boilerplate("a") -def nancumsum( - a: ndarray, - axis: Optional[int] = None, - dtype: Optional[np.dtype[Any]] = None, - out: Optional[ndarray] = None, -) -> ndarray: - """ - Return the cumulative sum of the elements along a given axis treating Not a - Numbers (NaNs) as zero. The cumulative sum does not change when NaNs are - encountered and leading NaNs are replaced by zeros. - - Zeros are returned for slices that are all-NaN or empty. - - Parameters - ---------- - a : array_like - Input array. - - axis : int, optional - Axis along which the cumulative sum is computed. The default (None) is - to compute the nancumsum over the flattened array. - - dtype : dtype, optional - Type of the returned array and of the accumulator in which the elements - are summed. If dtype is not specified, it defaults to the dtype of a, - unless a has an integer dtype with a precision less than that of the - default platform integer. In that case, the default platform integer is - used. - out : ndarray, optional - Alternative output array in which to place the result. It must have the - same shape and buffer length as the expected output but the type will - be cast if necessary. See Output type determination for more details. - - Returns - ------- - nancumsum : ndarray. - A new array holding the result is returned unless out is specified, in - which case a reference to out is returned. The result has the same size - as a, and the same shape as a if axis is not None or a is a 1-d array. - - See Also - -------- - numpy.nancumsum - - Notes - ----- - CuNumeric's parallel implementation may yield different results from NumPy - with floating point and complex types. For example, when boundary values - such as inf occur they may not propagate as expected. For more explanation - check cunumeric.cumprod. - - Availability - -------- - Multiple GPUs, Multiple CPUs - """ - return ndarray._perform_scan( - ScanCode.SUM, a, axis=axis, dtype=dtype, out=out, nan_to_identity=True - ) - - -@add_boilerplate("a") -def nanargmax( - a: ndarray, - axis: Any = None, - out: Union[ndarray, None] = None, - *, - keepdims: bool = False, -) -> ndarray: - """ - Return the indices of the maximum values in the specified axis ignoring - NaNs. For empty arrays, ValueError is raised. For all-NaN slices, - ValueError is raised only when CUNUMERIC_NUMPY_COMPATIBILITY - environment variable is set, otherwise identity is returned. - - Warning: results cannot be trusted if a slice contains only NaNs - and -Infs. - - Parameters - ---------- - a : array_like - Input array. - axis : int, optional - By default, the index corresponds to the flattened array, otherwise - along the specified axis. - out : ndarray, optional - If provided, the result will be inserted into this array. It should - be of the appropriate shape and dtype. - keepdims : bool, optional - If this is set to True, the axes which are reduced are left - in the result as dimensions with size one. With this option, - the result will broadcast correctly against the array. - - Returns - ------- - index_array : ndarray[int] - Array of indices into the array. It has the same shape as `a.shape` - with the dimension along `axis` removed. - - See Also - -------- - numpy.nanargmin, numpy.nanargmax - - Availability - -------- - Multiple GPUs, Multiple CPUs - """ - - if a.size == 0: - raise ValueError("attempt to get nanargmax of an empty sequence") - - if cunumeric_settings.numpy_compat() and a.dtype.kind == "f": - if any(all(isnan(a), axis=axis)): - raise ValueError("Array/Slice contains only NaNs") - - unary_red_code = get_non_nan_unary_red_code( - a.dtype.kind, UnaryRedCode.NANARGMAX - ) - - return a._perform_unary_reduction( - unary_red_code, - a, - axis=axis, - out=out, - keepdims=keepdims, - res_dtype=np.dtype(np.int64), - ) - - -@add_boilerplate("a") -def nanargmin( - a: ndarray, - axis: Any = None, - out: Union[ndarray, None] = None, - *, - keepdims: bool = False, -) -> ndarray: - """ - Return the indices of the minimum values in the specified axis ignoring - NaNs. For empty arrays, ValueError is raised. For all-NaN slices, - ValueError is raised only when CUNUMERIC_NUMPY_COMPATIBILITY - environment variable is set, otherwise identity is returned. - - Warning: results cannot be trusted if a slice contains only NaNs - and -Infs. - - Parameters - ---------- - a : array_like - Input array. - axis : int, optional - By default, the index corresponds to the flattened array, otherwise - along the specified axis. - out : ndarray, optional - If provided, the result will be inserted into this array. It should - be of the appropriate shape and dtype. - keepdims : bool, optional - If this is set to True, the axes which are reduced are left - in the result as dimensions with size one. With this option, - the result will broadcast correctly against the array. - - Returns - ------- - index_array : ndarray[int] - Array of indices into the array. It has the same shape as `a.shape` - with the dimension along `axis` removed. - - See Also - -------- - numpy.nanargmin, numpy.nanargmax - - Availability - -------- - Multiple GPUs, Multiple CPUs - """ - - if a.size == 0: - raise ValueError("attempt to get nanargmin of an empty sequence") - - if cunumeric_settings.numpy_compat() and a.dtype.kind == "f": - if any(all(isnan(a), axis=axis)): - raise ValueError("Array/Slice contains only NaNs") - - unary_red_code = get_non_nan_unary_red_code( - a.dtype.kind, UnaryRedCode.NANARGMIN - ) - - return a._perform_unary_reduction( - unary_red_code, - a, - axis=axis, - out=out, - keepdims=keepdims, - res_dtype=np.dtype(np.int64), - ) - - -@add_boilerplate("a") -def nanmin( - a: ndarray, - axis: Any = None, - out: Union[ndarray, None] = None, - keepdims: bool = False, - initial: Optional[Union[int, float]] = None, - where: Optional[ndarray] = None, -) -> ndarray: - """ - Return minimum of an array or minimum along an axis, ignoring any - NaNs. When all-NaN slices are encountered, a NaN is returned - for that slice only when CUNUMERIC_NUMPY_COMPATIBILITY environment - variable is set, otherwise identity is returned. - Empty slices will raise a ValueError - - Parameters - ---------- - a : array_like - Array containing numbers whose minimum is desired. If a is not an - array, a conversion is attempted. - - axis : {int, tuple of int, None}, optional - Axis or axes along which the minimum is computed. The default is to - compute the minimum of the flattened array. - - out : ndarray, optional - Alternative output array in which to place the result. Must - be of the same shape and buffer length as the expected output. - See `ufuncs-output-type` for more details. - - keepdims : bool, Optional - If this is set to True, the axes which are reduced are left - in the result as dimensions with size one. With this option, - the result will broadcast correctly against the input array. - - If the default value is passed, then `keepdims` will not be - passed through to the `amin` method of sub-classes of - `ndarray`, however any non-default value will be. If the - sub-class' method does not implement `keepdims` any - exceptions will be raised. - - initial : scalar, optional - The maximum value of an output element. Must be present to allow - computation on empty slice. See `~cunumeric.ufunc.reduce` for details. - - where : array_like[bool], optional - Elements to compare for the minimum. See `~cunumeric.ufunc.reduce` - for details. - - Returns - ------- - nanmin : ndarray or scalar - Minimum of `a`. If `axis` is None, the result is a scalar value. - If `axis` is given, the result is an array of dimension - ``a.ndim - 1``. - - Notes - ----- - CuNumeric's implementation will not raise a Runtime Warning for - slices with all-NaNs - - See Also - -------- - numpy.nanmin, numpy.nanmax, numpy.min, numpy.max, numpy.isnan, - numpy.maximum - - Availability - -------- - Multiple GPUs, Multiple CPUs - """ - - unary_red_code = get_non_nan_unary_red_code( - a.dtype.kind, UnaryRedCode.NANMIN - ) - - out_array = a._perform_unary_reduction( - unary_red_code, - a, - axis=axis, - out=out, - keepdims=keepdims, - initial=initial, - where=where, - ) - - if cunumeric_settings.numpy_compat() and a.dtype.kind == "f": - all_nan = all(isnan(a), axis=axis, keepdims=keepdims, where=where) - putmask(out_array, all_nan, np.nan) # type: ignore - - return out_array - - -@add_boilerplate("a") -def nanmax( - a: ndarray, - axis: Any = None, - out: Union[ndarray, None] = None, - keepdims: bool = False, - initial: Optional[Union[int, float]] = None, - where: Optional[ndarray] = None, -) -> ndarray: - """ - Return the maximum of an array or maximum along an axis, ignoring any - NaNs. When all-NaN slices are encountered, a NaN is returned - for that slice only when CUNUMERIC_NUMPY_COMPATIBILITY environment - variable is set, otherwise identity is returned. - Empty slices will raise a ValueError - - Parameters - ---------- - a : array_like - Array containing numbers whose maximum is desired. If a is not - an array, a conversion is attempted. - - axis : None or int or tuple[int], optional - Axis or axes along which to operate. By default, flattened input is - used. - - If this is a tuple of ints, the maximum is selected over multiple axes, - instead of a single axis or all the axes as before. - - out : ndarray, optional - Alternative output array in which to place the result. Must - be of the same shape and buffer length as the expected output. - See `ufuncs-output-type` for more details. - - keepdims : bool, optional - If this is set to True, the axes which are reduced are left - in the result as dimensions with size one. With this option, - the result will broadcast correctly against the input array. - - If the default value is passed, then `keepdims` will not be - passed through to the `amax` method of sub-classes of - `ndarray`, however any non-default value will be. If the - sub-class' method does not implement `keepdims` any - exceptions will be raised. - - initial : scalar, optional - The minimum value of an output element. Must be present to allow - computation on empty slice. See `~cunumeric.ufunc.reduce` for details. - - where : array_like[bool], optional - Elements to compare for the maximum. See `~cunumeric.ufunc.reduce` - for details. - - Returns - ------- - nanmax : ndarray or scalar - An array with the same shape as `a`, with the specified axis - removed. If `a` is 0-d array, of if axis is None, an ndarray - scalar is returned. The same dtype as `a` is returned. - - Notes - ----- - CuNumeric's implementation will not raise a Runtime Warning for - slices with all-NaNs - - See Also - -------- - numpy.nanmin, numpy.amax, numpy.isnan, numpy.fmax, numpy.maximum, - numpy.isfinite - - Availability - -------- - Multiple GPUs, Multiple CPUs - """ - - unary_red_code = get_non_nan_unary_red_code( - a.dtype.kind, UnaryRedCode.NANMAX - ) - - out_array = a._perform_unary_reduction( - unary_red_code, - a, - axis=axis, - out=out, - keepdims=keepdims, - initial=initial, - where=where, - ) - - if cunumeric_settings.numpy_compat() and a.dtype.kind == "f": - all_nan = all(isnan(a), axis=axis, keepdims=keepdims, where=where) - putmask(out_array, all_nan, np.nan) # type: ignore - - return out_array - - -@add_boilerplate("a") -def nanprod( - a: ndarray, - axis: Any = None, - dtype: Any = None, - out: Union[ndarray, None] = None, - keepdims: bool = False, - initial: Optional[Union[int, float]] = None, - where: Optional[ndarray] = None, -) -> ndarray: - """ - Return the product of array elements over a given axis treating - Not a Numbers (NaNs) as ones. - - One is returned for slices that are all-NaN or empty. - - Parameters - ---------- - a : array_like - Input array. - axis : int, optional - Axis or axes along which the product is computed. The - default is to compute the product of the flattened array. - dtype : data-type, optional - The type of the returned array and of the accumulator in - which the elements are summed. By default, the dtype of a - is used. An exception is when a has an integer type with - less precision than the platform (u)intp. In that case, - the default will be either (u)int32 or (u)int64 depending - on whether the platform is 32 or 64 bits. For inexact - inputs, dtype must be inexact. - out : ndarray, optional - Alternate output array in which to place the result. The - default is None. If provided, it must have the same shape as - the expected output, but the type will be cast if necessary. - See Output type determination for more details. The casting of - NaN to integer can yield unexpected results. - keepdims : bool, optional - If this is set to True, the axes which are reduced are left in the - result as dimensions with size one. With this option, the result - will broadcast correctly against the input array. - - If the default value is passed, then `keepdims` will not be - passed through to the `prod` method of sub-classes of - `ndarray`, however any non-default value will be. If the - sub-class' method does not implement `keepdims` any - exceptions will be raised. - initial : scalar, optional - The starting value for this product. See `~cunumeric.ufunc.reduce` for - details. - where : array_like[bool], optional - Elements to include in the product. See `~cunumeric.ufunc.reduce` for - details. - - Returns - ------- - nanprod: ndarray, see `dtype` parameter above. - A new array holding the result is returned unless out is - specified, in which case it is returned. - - See Also - -------- - numpy.prod, numpy.isnan - - Availability - -------- - Multiple GPUs, Multiple CPUs - - """ - - # Note: if the datatype of the input array is int and less - # than that of the platform int, then a convert task is launched - # in np.prod to take care of the type casting - - if a.dtype == np.complex128: - raise NotImplementedError( - "operation is not supported for complex128 arrays" - ) - - if a.dtype.kind in ("f", "c"): - unary_red_code = UnaryRedCode.NANPROD - else: - unary_red_code = UnaryRedCode.PROD - - return a._perform_unary_reduction( - unary_red_code, - a, - axis=axis, - dtype=dtype, - out=out, - keepdims=keepdims, - initial=initial, - where=where, - ) - - -@add_boilerplate("a") -def nansum( - a: ndarray, - axis: Any = None, - dtype: Any = None, - out: Union[ndarray, None] = None, - keepdims: bool = False, - initial: Optional[Union[int, float]] = None, - where: Optional[ndarray] = None, -) -> ndarray: - """ - Return the sum of array elements over a given axis treating - Not a Numbers (NaNs) as ones. - - Zero is returned for slices that are all-NaN or empty. - - Parameters - ---------- - a : array_like - Array containing numbers whose product is desired. If a is not - an array, a conversion is attempted. - - axis : None or int or tuple[int], optional - Axis or axes along which a sum is performed. The default, - axis=None, will sum all of the elements of the input array. - If axis is negative it counts from the last to the first axis. - - If axis is a tuple of ints, a sum is performed on all of the - axes specified in the tuple instead of a single axis or all - the axes as before. - - dtype : data-type, optional - The type of the returned array and of the accumulator in which - the elements are summed. The dtype of `a` is used by default - unless `a` has an integer dtype of less precision than the - default platform integer. In that case, if `a` is signed then - the platform integer is used while if `a` is unsigned then an - unsigned integer of the same precision as the platform integer - is used. - - out : ndarray, optional - Alternative output array in which to place the result. It must - have the same shape as the expected output, but the type of - the output values will be cast if necessary. - - keepdims : bool, optional - If this is set to True, the axes which are reduced are left - in the result as dimensions with size one. With this option, - the result will broadcast correctly against the input array. - - initial : scalar, optional - Starting value for the sum. See `~cunumeric.ufunc.reduce` for - details. - - where : array_like[bool], optional - Elements to include in the sum. See `~cunumeric.ufunc.reduce` for - details. - - Returns - ------- - nansum : ndarray, see `dtype` parameter above. - A new array holding the result is returned unless out is - specified, in which case it is returned. The result has the - same size as a, and the same shape as a if axis is not None or - a is a 1-d array. - - See Also - -------- - numpy.nansum, numpy.isnan, numpy.isfinite - - Availability - -------- - Multiple GPUs, Multiple CPUs - """ - - return a._nansum( - axis=axis, - dtype=dtype, - out=out, - keepdims=keepdims, - initial=initial, - where=where, - ) - - -# Exponents and logarithms - - -# Arithmetic operations - - -# Handling complex numbers - - -@add_boilerplate("val") -def real(val: ndarray) -> ndarray: - """ - Return the real part of the complex argument. - - Parameters - ---------- - val : array_like - Input array. - - Returns - ------- - out : ndarray or scalar - The real component of the complex argument. If `val` is real, the type - of `val` is used for the output. If `val` has complex elements, the - returned type is float. - - See Also - -------- - numpy.real - - Availability - -------- - Multiple GPUs, Multiple CPUs - """ - return val.real - - -@add_boilerplate("val") -def imag(val: ndarray) -> ndarray: - """ - - Return the imaginary part of the complex argument. - - Parameters - ---------- - val : array_like - Input array. - - Returns - ------- - out : ndarray or scalar - The imaginary component of the complex argument. If `val` is real, - the type of `val` is used for the output. If `val` has complex - elements, the returned type is float. - - See Also - -------- - numpy.imag - - Availability - -------- - Multiple GPUs, Multiple CPUs - """ - return val.imag - - -# Extrema Finding - - -@add_boilerplate("a") -def amax( - a: ndarray, - axis: Optional[Union[int, tuple[int, ...]]] = None, - dtype: Optional[np.dtype[Any]] = None, - out: Optional[ndarray] = None, - keepdims: bool = False, - initial: Optional[Union[int, float]] = None, - where: Optional[ndarray] = None, -) -> ndarray: - """ - - Return the maximum of an array or maximum along an axis. - - Parameters - ---------- - a : array_like - Input data. - axis : None or int or tuple[int], optional - Axis or axes along which to operate. By default, flattened input is - used. - - If this is a tuple of ints, the maximum is selected over multiple axes, - instead of a single axis or all the axes as before. - out : ndarray, optional - Alternative output array in which to place the result. Must - be of the same shape and buffer length as the expected output. - See `ufuncs-output-type` for more details. - - keepdims : bool, optional - If this is set to True, the axes which are reduced are left - in the result as dimensions with size one. With this option, - the result will broadcast correctly against the input array. - - If the default value is passed, then `keepdims` will not be - passed through to the `amax` method of sub-classes of - `ndarray`, however any non-default value will be. If the - sub-class' method does not implement `keepdims` any - exceptions will be raised. - - initial : scalar, optional - The minimum value of an output element. Must be present to allow - computation on empty slice. See `~cunumeric.ufunc.reduce` for details. - - where : array_like[bool], optional - Elements to compare for the maximum. See `~cunumeric.ufunc.reduce` - for details. - - Returns - ------- - amax : ndarray or scalar - Maximum of `a`. If `axis` is None, the result is a scalar value. - If `axis` is given, the result is an array of dimension - ``a.ndim - 1``. - - See Also - -------- - numpy.amax - - Availability - -------- - Multiple GPUs, Multiple CPUs - """ - return maximum.reduce( - a, - axis=axis, - dtype=dtype, - out=out, - keepdims=keepdims, - initial=initial, - where=where, - ) - - -max = amax - - -@add_boilerplate("a") -def amin( - a: ndarray, - axis: Optional[Union[int, tuple[int, ...]]] = None, - dtype: Optional[np.dtype[Any]] = None, - out: Optional[ndarray] = None, - keepdims: bool = False, - initial: Optional[Union[int, float]] = None, - where: Optional[ndarray] = None, -) -> ndarray: - """ - - Return the minimum of an array or minimum along an axis. - - Parameters - ---------- - a : array_like - Input data. - axis : None or int or tuple[int], optional - Axis or axes along which to operate. By default, flattened input is - used. - - If this is a tuple of ints, the minimum is selected over multiple axes, - instead of a single axis or all the axes as before. - out : ndarray, optional - Alternative output array in which to place the result. Must - be of the same shape and buffer length as the expected output. - See `ufuncs-output-type` for more details. - - keepdims : bool, optional - If this is set to True, the axes which are reduced are left - in the result as dimensions with size one. With this option, - the result will broadcast correctly against the input array. - - If the default value is passed, then `keepdims` will not be - passed through to the `amin` method of sub-classes of - `ndarray`, however any non-default value will be. If the - sub-class' method does not implement `keepdims` any - exceptions will be raised. - - initial : scalar, optional - The maximum value of an output element. Must be present to allow - computation on empty slice. See `~cunumeric.ufunc.reduce` for details. - - where : array_like[bool], optional - Elements to compare for the minimum. See `~cunumeric.ufunc.reduce` - for details. - - Returns - ------- - amin : ndarray or scalar - Minimum of `a`. If `axis` is None, the result is a scalar value. - If `axis` is given, the result is an array of dimension - ``a.ndim - 1``. - - See Also - -------- - numpy.amin - - Availability - -------- - Multiple GPUs, Multiple CPUs - """ - return minimum.reduce( - a, - axis=axis, - dtype=dtype, - out=out, - keepdims=keepdims, - initial=initial, - where=where, - ) - - -min = amin - -# Miscellaneous - - -@add_boilerplate("a", "v") -def convolve(a: ndarray, v: ndarray, mode: ConvolveMode = "full") -> ndarray: - """ - - Returns the discrete, linear convolution of two ndarrays. - - If `a` and `v` are both 1-D and `v` is longer than `a`, the two are - swapped before computation. For N-D cases, the arguments are never swapped. - - Parameters - ---------- - a : (N,) array_like - First input ndarray. - v : (M,) array_like - Second input ndarray. - mode : ``{'full', 'valid', 'same'}``, optional - 'same': - The output is the same size as `a`, centered with respect to - the 'full' output. (default) - - 'full': - The output is the full discrete linear convolution of the inputs. - - 'valid': - The output consists only of those elements that do not - rely on the zero-padding. In 'valid' mode, either `a` or `v` - must be at least as large as the other in every dimension. - - Returns - ------- - out : ndarray - Discrete, linear convolution of `a` and `v`. - - See Also - -------- - numpy.convolve - - Notes - ----- - The current implementation only supports the 'same' mode. - - Unlike `numpy.convolve`, `cunumeric.convolve` supports N-dimensional - inputs, but it follows NumPy's behavior for 1-D inputs. - - Availability - -------- - Multiple GPUs, Multiple CPUs - """ - if mode != "same": - raise NotImplementedError("Need to implement other convolution modes") - - if a.ndim != v.ndim: - raise RuntimeError("Arrays should have the same dimensions") - elif a.ndim > 3: - raise NotImplementedError(f"{a.ndim}-D arrays are not yet supported") - - if a.ndim == 1 and a.size < v.size: - v, a = a, v - - if a.dtype != v.dtype: - v = v.astype(a.dtype) - out = ndarray( - shape=a.shape, - dtype=a.dtype, - inputs=(a, v), - ) - a._thunk.convolve(v._thunk, out._thunk, mode) - return out - - -@add_boilerplate("a") -def clip( - a: ndarray, - a_min: Union[int, float, npt.ArrayLike, None], - a_max: Union[int, float, npt.ArrayLike, None], - out: Union[npt.NDArray[Any], ndarray, None] = None, -) -> ndarray: - """ - - Clip (limit) the values in an array. - - Given an interval, values outside the interval are clipped to - the interval edges. For example, if an interval of ``[0, 1]`` - is specified, values smaller than 0 become 0, and values larger - than 1 become 1. - - Parameters - ---------- - a : array_like - Array containing elements to clip. - a_min : scalar or array_like or None - Minimum value. If None, clipping is not performed on lower - interval edge. Not more than one of `a_min` and `a_max` may be - None. - a_max : scalar or array_like or None - Maximum value. If None, clipping is not performed on upper - interval edge. Not more than one of `a_min` and `a_max` may be - None. If `a_min` or `a_max` are array_like, then the three - arrays will be broadcasted to match their shapes. - out : ndarray, optional - The results will be placed in this array. It may be the input - array for in-place clipping. `out` must be of the right shape - to hold the output. Its type is preserved. - **kwargs - For other keyword-only arguments, see the - :ref:`ufunc docs `. - - Returns - ------- - clipped_array : ndarray - An array with the elements of `a`, but where values - < `a_min` are replaced with `a_min`, and those > `a_max` - with `a_max`. - - See Also - -------- - numpy.clip - - Availability - -------- - Multiple GPUs, Multiple CPUs - """ - return a.clip(a_min, a_max, out=out) - - -################################## -# Set routines -################################## - - -@add_boilerplate("ar") -def unique( - ar: ndarray, - return_index: bool = False, - return_inverse: bool = False, - return_counts: bool = False, - axis: Optional[int] = None, -) -> ndarray: - """ - - Find the unique elements of an array. - Returns the sorted unique elements of an array. There are three optional - outputs in addition to the unique elements: - * the indices of the input array that give the unique values - * the indices of the unique array that reconstruct the input array - * the number of times each unique value comes up in the input array - - Parameters - ---------- - ar : array_like - Input array. Unless `axis` is specified, this will be flattened if it - is not already 1-D. - return_index : bool, optional - If True, also return the indices of `ar` (along the specified axis, - if provided, or in the flattened array) that result in the unique - array. - Currently not supported. - return_inverse : bool, optional - If True, also return the indices of the unique array (for the specified - axis, if provided) that can be used to reconstruct `ar`. - Currently not supported. - return_counts : bool, optional - If True, also return the number of times each unique item appears - in `ar`. - Currently not supported. - axis : int or None, optional - The axis to operate on. If None, `ar` will be flattened. If an integer, - the subarrays indexed by the given axis will be flattened and treated - as the elements of a 1-D array with the dimension of the given axis, - see the notes for more details. Object arrays or structured arrays - that contain objects are not supported if the `axis` kwarg is used. The - default is None. - Currently not supported. - - Returns - ------- - unique : ndarray - The sorted unique values. - unique_indices : ndarray, optional - The indices of the first occurrences of the unique values in the - original array. Only provided if `return_index` is True. - unique_inverse : ndarray, optional - The indices to reconstruct the original array from the - unique array. Only provided if `return_inverse` is True. - unique_counts : ndarray, optional - The number of times each of the unique values comes up in the - original array. Only provided if `return_counts` is True. - - See Also - -------- - numpy.unique - - Availability - -------- - Multiple GPUs, Multiple CPUs - - Notes - -------- - Keyword arguments for optional outputs are not yet supported. - `axis` is also not handled currently. - - """ - if _builtin_any((return_index, return_inverse, return_counts, axis)): - raise NotImplementedError( - "Keyword arguments for `unique` are not yet supported" - ) - - return ar.unique() - - -################################## -# Sorting, searching, and counting -################################## - -# Sorting - - -@add_boilerplate("a") -def argsort( - a: ndarray, - axis: Union[int, None] = -1, - kind: SortType = "quicksort", - order: Optional[Union[str, list[str]]] = None, -) -> ndarray: - """ - - Returns the indices that would sort an array. - - Parameters - ---------- - a : array_like - Input array. - axis : int or None, optional - Axis to sort. By default, the index -1 (the last axis) is used. If - None, the flattened array is used. - kind : ``{'quicksort', 'mergesort', 'heapsort', 'stable'}``, optional - Default is 'quicksort'. The underlying sort algorithm might vary. - The code basically supports 'stable' or *not* 'stable'. - order : str or list[str], optional - Currently not supported - - Returns - ------- - index_array : ndarray[int] - Array of indices that sort a along the specified axis. It has the - same shape as `a.shape` or is flattened in case of `axis` is None. - - See Also - -------- - numpy.argsort - - Availability - -------- - Multiple GPUs, Multiple CPUs - """ - - result = ndarray(a.shape, np.int64) - result._thunk.sort( - rhs=a._thunk, argsort=True, axis=axis, kind=kind, order=order - ) - return result - - -def msort(a: ndarray) -> ndarray: - """ - - Returns a sorted copy of an array sorted along the first axis. - - Parameters - ---------- - a : array_like - Input array. - - Returns - ------- - out : ndarray - Sorted array with same dtype and shape as `a`. - - See Also - -------- - numpy.msort - - Availability - -------- - Multiple GPUs, Multiple CPUs - """ - return sort(a, axis=0) - - -@add_boilerplate("a") -def searchsorted( - a: ndarray, - v: Union[int, float, ndarray], - side: SortSide = "left", - sorter: Optional[ndarray] = None, -) -> Union[int, ndarray]: - """ - - Find the indices into a sorted array a such that, if the corresponding - elements in v were inserted before the indices, the order of a would be - preserved. - - Parameters - ---------- - a : 1-D array_like - Input array. If sorter is None, then it must be sorted in ascending - order, otherwise sorter must be an array of indices that sort it. - v : scalar or array_like - Values to insert into a. - side : ``{'left', 'right'}``, optional - If 'left', the index of the first suitable location found is given. - If 'right', return the last such index. If there is no suitable index, - return either 0 or N (where N is the length of a). - sorter : 1-D array_like, optional - Optional array of integer indices that sort array a into ascending - order. They are typically the result of argsort. - - Returns - ------- - indices : int or array_like[int] - Array of insertion points with the same shape as v, or an integer - if v is a scalar. - - Availability - -------- - Multiple GPUs, Multiple CPUs - """ - return a.searchsorted(v, side, sorter) - - -@add_boilerplate("a") -def sort( - a: ndarray, - axis: Union[int, None] = -1, - kind: SortType = "quicksort", - order: Optional[Union[str, list[str]]] = None, -) -> ndarray: - """ - - Returns a sorted copy of an array. - - Parameters - ---------- - a : array_like - Input array. - axis : int or None, optional - Axis to sort. By default, the index -1 (the last axis) is used. If - None, the flattened array is used. - kind : ``{'quicksort', 'mergesort', 'heapsort', 'stable'}``, optional - Default is 'quicksort'. The underlying sort algorithm might vary. - The code basically supports 'stable' or *not* 'stable'. - order : str or list[str], optional - Currently not supported - - Returns - ------- - out : ndarray - Sorted array with same dtype and shape as `a`. In case `axis` is - None the result is flattened. - - - See Also - -------- - numpy.sort - - Availability - -------- - Multiple GPUs, Multiple CPUs - """ - result = ndarray(a.shape, a.dtype) - result._thunk.sort(rhs=a._thunk, axis=axis, kind=kind, order=order) - return result - - -@add_boilerplate("a") -def sort_complex(a: ndarray) -> ndarray: - """ - - Returns a sorted copy of an array sorted along the last axis. Sorts the - real part first, the imaginary part second. - - Parameters - ---------- - a : array_like - Input array. - - Returns - ------- - out : ndarray, complex - Sorted array with same shape as `a`. - - See Also - -------- - numpy.sort_complex - - Availability - -------- - Multiple GPUs, Multiple CPUs - """ - - result = sort(a) - # force complex result upon return - if np.issubdtype(result.dtype, np.complexfloating): - return result - elif ( - np.issubdtype(result.dtype, np.integer) and result.dtype.itemsize <= 2 - ): - return result.astype(np.complex64, copy=True) - else: - return result.astype(np.complex128, copy=True) - - -# partition - - -@add_boilerplate("a") -def argpartition( - a: ndarray, - kth: Union[int, Sequence[int]], - axis: Union[int, None] = -1, - kind: SelectKind = "introselect", - order: Optional[Union[str, list[str]]] = None, -) -> ndarray: - """ - - Perform an indirect partition along the given axis. - - Parameters - ---------- - a : array_like - Input array. - kth : int or Sequence[int] - axis : int or None, optional - Axis to partition. By default, the index -1 (the last axis) is used. If - None, the flattened array is used. - kind : ``{'introselect'}``, optional - Currently not supported. - order : str or list[str], optional - Currently not supported. - - Returns - ------- - out : ndarray[int] - Array of indices that partitions a along the specified axis. It has the - same shape as `a.shape` or is flattened in case of `axis` is None. - - - Notes - ----- - The current implementation falls back to `cunumeric.argsort`. - - See Also - -------- - numpy.argpartition - - Availability - -------- - Multiple GPUs, Multiple CPUs - """ - result = ndarray(a.shape, np.int64) - result._thunk.partition( - rhs=a._thunk, - argpartition=True, - kth=kth, - axis=axis, - kind=kind, - order=order, - ) - return result - - -@add_boilerplate("a") -def partition( - a: ndarray, - kth: Union[int, Sequence[int]], - axis: Union[int, None] = -1, - kind: SelectKind = "introselect", - order: Optional[Union[str, list[str]]] = None, -) -> ndarray: - """ - - Returns a partitioned copy of an array. - - Parameters - ---------- - a : array_like - Input array. - kth : int or Sequence[int] - axis : int or None, optional - Axis to partition. By default, the index -1 (the last axis) is used. If - None, the flattened array is used. - kind : ``{'introselect'}``, optional - Currently not supported. - order : str or list[str], optional - Currently not supported. - - Returns - ------- - out : ndarray - Partitioned array with same dtype and shape as `a`. In case `axis` is - None the result is flattened. - - Notes - ----- - The current implementation falls back to `cunumeric.sort`. - - See Also - -------- - numpy.partition - - Availability - -------- - Multiple GPUs, Multiple CPUs - """ - result = ndarray(a.shape, a.dtype) - result._thunk.partition( - rhs=a._thunk, kth=kth, axis=axis, kind=kind, order=order - ) - return result - - -# Searching - - -@add_boilerplate("a") -def argmax( - a: ndarray, - axis: Optional[int] = None, - out: Optional[ndarray] = None, - *, - keepdims: bool = False, -) -> ndarray: - """ - - Returns the indices of the maximum values along an axis. - - Parameters - ---------- - a : array_like - Input array. - axis : int, optional - By default, the index is into the flattened array, otherwise - along the specified axis. - out : ndarray, optional - If provided, the result will be inserted into this array. It should - be of the appropriate shape and dtype. - keepdims : bool, optional - If this is set to True, the axes which are reduced are left - in the result as dimensions with size one. With this option, - the result will broadcast correctly against the array. - - Returns - ------- - index_array : ndarray[int] - Array of indices into the array. It has the same shape as `a.shape` - with the dimension along `axis` removed. - - See Also - -------- - numpy.argmax - - Notes - ----- - CuNumeric's parallel implementation may yield different results from NumPy - when the array contains NaN(s). - - Availability - -------- - Multiple GPUs, Multiple CPUs - """ - return a.argmax(axis=axis, out=out, keepdims=keepdims) - - -@add_boilerplate("a") -def argmin( - a: ndarray, - axis: Optional[int] = None, - out: Optional[ndarray] = None, - *, - keepdims: bool = False, -) -> ndarray: - """ - - Returns the indices of the minimum values along an axis. - - Parameters - ---------- - a : array_like - Input array. - axis : int, optional - By default, the index is into the flattened array, otherwise - along the specified axis. - out : ndarray, optional - If provided, the result will be inserted into this array. It should - be of the appropriate shape and dtype. - keepdims : bool, optional - If this is set to True, the axes which are reduced are left - in the result as dimensions with size one. With this option, - the result will broadcast correctly against the array. - - Returns - ------- - index_array : ndarray[int] - Array of indices into the array. It has the same shape as `a.shape` - with the dimension along `axis` removed. - - See Also - -------- - numpy.argmin - - Notes - ----- - CuNumeric's parallel implementation may yield different results from NumPy - when the array contains NaN(s). - - Availability - -------- - Multiple GPUs, Multiple CPUs - """ - return a.argmin(axis=axis, out=out, keepdims=keepdims) - - -# Counting - - -@add_boilerplate("a") -def count_nonzero( - a: ndarray, axis: Optional[Union[int, tuple[int, ...]]] = None -) -> Union[int, ndarray]: - """ - - Counts the number of non-zero values in the array ``a``. - - Parameters - ---------- - a : array_like - The array for which to count non-zeros. - axis : int or tuple, optional - Axis or tuple of axes along which to count non-zeros. - Default is None, meaning that non-zeros will be counted - along a flattened version of ``a``. - - Returns - ------- - count : int or ndarray[int] - Number of non-zero values in the array along a given axis. - Otherwise, the total number of non-zero values in the array - is returned. - - See Also - -------- - numpy.count_nonzero - - Availability - -------- - Multiple GPUs, Multiple CPUs - """ - return a._count_nonzero(axis) - - -############ -# Statistics -############ - -# Averages and variances - - -@add_boilerplate("a") -def mean( - a: ndarray, - axis: Optional[Union[int, tuple[int, ...]]] = None, - dtype: Optional[np.dtype[Any]] = None, - out: Optional[ndarray] = None, - keepdims: bool = False, - where: Optional[ndarray] = None, -) -> ndarray: - """ - - Compute the arithmetic mean along the specified axis. - - Returns the average of the array elements. The average is taken over - the flattened array by default, otherwise over the specified axis. - `float64` intermediate and return values are used for integer inputs. - - Parameters - ---------- - a : array_like - Array containing numbers whose mean is desired. If `a` is not an - array, a conversion is attempted. - axis : None or int or tuple[int], optional - Axis or axes along which the means are computed. The default is to - compute the mean of the flattened array. - - If this is a tuple of ints, a mean is performed over multiple axes, - instead of a single axis or all the axes as before. - dtype : data-type, optional - Type to use in computing the mean. For integer inputs, the default - is `float64`; for floating point inputs, it is the same as the - input dtype. - out : ndarray, optional - Alternate output array in which to place the result. The default - is ``None``; if provided, it must have the same shape as the - expected output, but the type will be cast if necessary. - See `ufuncs-output-type` for more details. - - keepdims : bool, optional - If this is set to True, the axes which are reduced are left - in the result as dimensions with size one. With this option, - the result will broadcast correctly against the input array. - - If the default value is passed, then `keepdims` will not be - passed through to the `mean` method of sub-classes of - `ndarray`, however any non-default value will be. If the - sub-class' method does not implement `keepdims` any - exceptions will be raised. - - where : array_like of bool, optional - Elements to include in the mean. - - Returns - ------- - m : ndarray - If `out is None`, returns a new array of the same dtype a above - containing the mean values, otherwise a reference to the output - array is returned. - - See Also - -------- - numpy.mean - - Availability - -------- - Multiple GPUs, Multiple CPUs - """ - return a.mean( - axis=axis, dtype=dtype, out=out, keepdims=keepdims, where=where - ) - - -@add_boilerplate("a") -def nanmean( - a: ndarray, - axis: Optional[Union[int, tuple[int, ...]]] = None, - dtype: Optional[np.dtype[Any]] = None, - out: Optional[ndarray] = None, - keepdims: bool = False, - where: Optional[ndarray] = None, -) -> ndarray: - """ - - Compute the arithmetic mean along the specified axis, ignoring NaNs. - - Returns the average of the array elements. The average is taken over - the flattened array by default, otherwise over the specified axis. - `float64` intermediate and return values are used for integer inputs. - - Parameters - ---------- - a : array_like - Array containing numbers whose mean is desired. If `a` is not an - array, a conversion is attempted. - axis : None or int or tuple[int], optional - Axis or axes along which the means are computed. The default is to - compute the mean of the flattened array. - - If this is a tuple of ints, a mean is performed over multiple axes, - instead of a single axis or all the axes as before. - dtype : data-type, optional - Type to use in computing the mean. For integer inputs, the default - is `float64`; for floating point inputs, it is the same as the - input dtype. - out : ndarray, optional - Alternate output array in which to place the result. The default - is ``None``; if provided, it must have the same shape as the - expected output, but the type will be cast if necessary. - See `ufuncs-output-type` for more details. - - keepdims : bool, optional - If this is set to True, the axes which are reduced are left - in the result as dimensions with size one. With this option, - the result will broadcast correctly against the input array. - - - where : array_like of bool, optional - Elements to include in the mean. - - Returns - ------- - m : ndarray - If `out is None`, returns a new array of the same dtype as a above - containing the mean values, otherwise a reference to the output - array is returned. - - See Also - -------- - numpy.nanmean - - Availability - -------- - Multiple GPUs, Multiple CPUs - """ - return a._nanmean( - axis=axis, dtype=dtype, out=out, keepdims=keepdims, where=where - ) - - -@add_boilerplate("a") -def var( - a: ndarray, - axis: Optional[Union[int, tuple[int, ...]]] = None, - dtype: Optional[np.dtype[Any]] = None, - out: Optional[ndarray] = None, - ddof: int = 0, - keepdims: bool = False, - *, - where: Union[ndarray, None] = None, -) -> ndarray: - """ - Compute the variance along the specified axis. - - Returns the variance of the array elements, a measure of the spread of - a distribution. The variance is computed for the flattened array - by default, otherwise over the specified axis. - - Parameters - ---------- - a : array_like - Array containing numbers whose variance is desired. If `a` is not an - array, a conversion is attempted. - axis : None or int or tuple[int], optional - Axis or axes along which the variance is computed. The default is to - compute the variance of the flattened array. - - If this is a tuple of ints, a variance is performed over multiple axes, - instead of a single axis or all the axes as before. - dtype : data-type, optional - Type to use in computing the variance. For arrays of integer type - the default is float64; for arrays of float types - it is the same as the array type. - out : ndarray, optional - Alternate output array in which to place the result. It must have the - same shape as the expected output, but the type is cast if necessary. - ddof : int, optional - “Delta Degrees of Freedom”: the divisor used in the calculation is - N - ddof, where N represents the number of elements. By default - ddof is zero. - keepdims : bool, optional - If this is set to True, the axes which are reduced are left - in the result as dimensions with size one. With this option, - the result will broadcast correctly against the input array. - where : array_like of bool, optional - A boolean array which is broadcasted to match the dimensions of array, - and selects elements to include in the reduction. - - Returns - ------- - m : ndarray, see dtype parameter above - If `out=None`, returns a new array of the same dtype as above - containing the variance values, otherwise a reference to the output - array is returned. - - See Also - -------- - numpy.var - - Availability - -------- - Multiple GPUs, Multiple CPUs - """ - return a.var( - axis=axis, - dtype=dtype, - out=out, - ddof=ddof, - keepdims=keepdims, - where=where, - ) - - -# Histograms - - -@add_boilerplate("x", "weights") -def bincount( - x: ndarray, weights: Optional[ndarray] = None, minlength: int = 0 -) -> ndarray: - """ - bincount(x, weights=None, minlength=0) - - Count number of occurrences of each value in array of non-negative ints. - - The number of bins (of size 1) is one larger than the largest value in - `x`. If `minlength` is specified, there will be at least this number - of bins in the output array (though it will be longer if necessary, - depending on the contents of `x`). - Each bin gives the number of occurrences of its index value in `x`. - If `weights` is specified the input array is weighted by it, i.e. if a - value ``n`` is found at position ``i``, ``out[n] += weight[i]`` instead - of ``out[n] += 1``. - - Parameters - ---------- - x : array_like - 1-D input array of non-negative ints. - weights : array_like, optional - Weights, array of the same shape as `x`. - minlength : int, optional - A minimum number of bins for the output array. - - Returns - ------- - out : ndarray[int] - The result of binning the input array. - The length of `out` is equal to ``cunumeric.amax(x)+1``. - - Raises - ------ - ValueError - If the input is not 1-dimensional, or contains elements with negative - values, or if `minlength` is negative. - TypeError - If the type of the input is float or complex. - - See Also - -------- - numpy.bincount - - Availability - -------- - Multiple GPUs, Multiple CPUs - """ - if x.ndim != 1: - raise ValueError("the input array must be 1-dimensional") - if weights is not None: - if weights.shape != x.shape: - raise ValueError("weights array must be same shape for bincount") - if weights.dtype.kind == "c": - raise ValueError("weights must be convertible to float64") - # Make sure the weights are float64 - weights = weights.astype(np.float64) - if not np.issubdtype(x.dtype, np.integer): - raise TypeError("input array for bincount must be integer type") - if minlength < 0: - raise ValueError("'minlength' must not be negative") - # Note that the following are non-blocking operations, - # though passing their results to `int` is blocking - max_val, min_val = amax(x), amin(x) - if int(min_val) < 0: - raise ValueError("the input array must have no negative elements") - minlength = _builtin_max(minlength, int(max_val) + 1) - if x.size == 1: - # Handle the special case of 0-D array - if weights is None: - out = zeros((minlength,), dtype=np.dtype(np.int64)) - # TODO: Remove this "type: ignore" once @add_boilerplate can - # propagate "ndarray -> ndarray | npt.ArrayLike" in wrapped sigs - out[x[0]] = 1 # type: ignore [assignment] - else: - out = zeros((minlength,), dtype=weights.dtype) - index = x[0] - out[index] = weights[0] - else: - # Normal case of bincount - if weights is None: - out = ndarray( - (minlength,), - dtype=np.dtype(np.int64), - inputs=(x, weights), - ) - out._thunk.bincount(x._thunk) - else: - out = ndarray( - (minlength,), - dtype=weights.dtype, - inputs=(x, weights), - ) - out._thunk.bincount(x._thunk, weights=weights._thunk) - return out - - -# Quantiles - - -# account for 0-based indexing -# there's no negative numbers -# arithmetic at this level, -# (pos, k) are always positive! -# -def floor_i(k: int | float) -> int: - j = k - 1 if k > 0 else 0 - return int(j) - - -# Generic rule: if `q` input value falls onto a node, then return that node - - -# Discontinuous methods: -# -# 'inverted_cdf' -# q = quantile input \in [0, 1] -# n = sizeof(array) -# -def inverted_cdf(q: float, n: int) -> tuple[float, int]: - pos = q * n - k = math.floor(pos) - - g = pos - k - gamma = 1.0 if g > 0 else 0.0 - - j = int(k) - 1 - if j < 0: - return (0.0, 0) - else: - return (gamma, j) - - -# 'averaged_inverted_cdf' -# -def averaged_inverted_cdf(q: float, n: int) -> tuple[float, int]: - pos = q * n - k = math.floor(pos) - - g = pos - k - gamma = 1.0 if g > 0 else 0.5 - - j = int(k) - 1 - if j < 0: - return (0.0, 0) - elif j >= n - 1: - return (1.0, n - 2) - else: - return (gamma, j) - - -# 'closest_observation' -# -def closest_observation(q: float, n: int) -> tuple[float, int]: - # p = q*n - 0.5 - # pos = 0 if p < 0 else p - - # weird departure from paper - # (bug?), but this fixes it: - # also, j even in original paper - # applied to 1-based indexing; we have 0-based! - # numpy impl. doesn't account that the original paper used - # 1-based indexing, 0-based j is still checked for evennes! - # (see proof in quantile_policies.py) - # - p0 = q * n - 0.5 - p = p0 - 1.0 - - pos = 0 if p < 0 else p0 - k = math.floor(pos) - - j = floor_i(k) - gamma = 1 if k < pos else (0 if j % 2 == 0 else 1) - - return (gamma, j) - - -# Continuous methods: -# -# Parzen method: -# 'interpolated_inverted_cdf' -# -def interpolated_inverted_cdf(q: float, n: int) -> tuple[float, int]: - pos = q * n - k = math.floor(pos) - # gamma = pos-k - # this fixes it: - # - gamma = 0.0 if k == 0 else pos - k - j = floor_i(k) - return (gamma, j) - - -# Hazen method: -# 'hazen' -# -def hazen(q: float, n: int) -> tuple[float, int]: - pos = q * n + 0.5 - k = math.floor(pos) - # gamma = pos-k - # - # this fixes it: - # (when pos > n: this actually selects the right point, - # which is the correct choice, because right = arr[n] - # gets invalidated) - # - gamma = 0.0 if (pos < 1 or pos > n) else pos - k - - j = floor_i(k) - return (gamma, j) - - -# Weibull method: -# 'weibull' -# -def weibull(q: float, n: int) -> tuple[float, int]: - pos = q * (n + 1) - - k = math.floor(pos) - # gamma = pos-k - # - # this fixes it: - # (when pos > n: this actually selects the right point, - # which is the correct choice, because right = arr[n] - # gets invalidated) - # - gamma = 0.0 if (pos < 1 or pos > n) else pos - k - - j = floor_i(k) - - if j >= n: - j = n - 1 - - return (gamma, j) - - -# Gumbel method: -# 'linear' -# -def linear(q: float, n: int) -> tuple[float, int]: - pos = q * (n - 1) + 1 - k = math.floor(pos) - # gamma = pos-k - # - # this fixes it: - # (when pos > n: this actually selects the right point, - # which is the correct choice, because right = arr[n] - # gets invalidated) - # - gamma = 0.0 if (pos < 1 or pos > n) else pos - k - - j = floor_i(k) - return (gamma, j) - - -# Johnson & Kotz method: -# 'median_unbiased' -# -def median_unbiased(q: float, n: int) -> tuple[float, int]: - fract = 1.0 / 3.0 - pos = q * (n + fract) + fract - k = math.floor(pos) - - # gamma = pos-k - # - # this fixes it: - # (when pos > n: this actually selects the right point, - # which is the correct choice, because right = arr[n] - # gets invalidated) - # - gamma = 0.0 if (pos < 1 or pos > n) else pos - k - - j = floor_i(k) - return (gamma, j) - - -# Blom method: -# 'normal_unbiased' -# -def normal_unbiased(q: float, n: int) -> tuple[float, int]: - fract1 = 0.25 - fract2 = 3.0 / 8.0 - pos = q * (n + fract1) + fract2 - k = math.floor(pos) - - # gamma = pos-k - # - # this fixes it: - # (when pos > n: this actually selects the right point, - # which is the correct choice, because right = arr[n] - # gets invalidated) - # - gamma = 0.0 if (pos < 1 or pos > n) else pos - k - - j = floor_i(k) - return (gamma, j) - - -# `lower` -# -def lower(q: float, n: int) -> tuple[float, int]: - gamma = 0.0 - pos = q * (n - 1) - k = math.floor(pos) - - j = int(k) - return (gamma, j) - - -# `higher` -# -def higher(q: float, n: int) -> tuple[float, int]: - pos = q * (n - 1) - k = math.floor(pos) - - # Generic rule: (k == pos) - gamma = 0.0 if (pos == 0 or k == pos) else 1.0 - - j = int(k) - return (gamma, j) - - -# `midpoint` -# -def midpoint(q: float, n: int) -> tuple[float, int]: - pos = q * (n - 1) - k = math.floor(pos) - - # Generic rule: (k == pos) - gamma = 0.0 if (pos == 0 or k == pos) else 0.5 - - j = int(k) - return (gamma, j) - - -# `nearest` -# -def nearest(q: float, n: int) -> tuple[float, int]: - pos = q * (n - 1) - - # k = floor(pos) - # gamma = 1.0 if pos - k >= 0.5 else 0.0 - - k = np.round(pos) - gamma = 0.0 - - j = int(k) - return (gamma, j) - - -# for the case when axis = tuple (non-singleton) -# reshuffling might have to be done (if tuple is non-consecutive) -# and the src array must be collapsed along that set of axes -# -# args: -# -# arr: [in] source nd-array on which quantiles are calculated; -# axes_set: [in] tuple or list of axes (indices less than arr dimension); -# -# return: pair: (minimal_index, reshuffled_and_collapsed source array) -def reshuffle_reshape( - arr: ndarray, axes_set: Iterable[int] -) -> tuple[int, ndarray]: - ndim = len(arr.shape) - - sorted_axes = tuple(sorted(axes_set)) - - min_dim_index = sorted_axes[0] - num_axes = len(sorted_axes) - reshuffled_axes = tuple(range(min_dim_index, min_dim_index + num_axes)) - - non_consecutive = sorted_axes != reshuffled_axes - if non_consecutive: - arr_shuffled = moveaxis(arr, sorted_axes, reshuffled_axes) - else: - arr_shuffled = arr - - # shape_reshuffled = arr_shuffled.shape # debug - collapsed_shape = np.prod([arr_shuffled.shape[i] for i in reshuffled_axes]) - - redimed = tuple(range(0, min_dim_index + 1)) + tuple( - range(min_dim_index + num_axes, ndim) - ) - reshaped = tuple( - [ - collapsed_shape if k == min_dim_index else arr_shuffled.shape[k] - for k in redimed - ] - ) - - arr_reshaped = arr_shuffled.reshape(reshaped) - return (min_dim_index, arr_reshaped) - - -# args: -# -# arr: [in] source nd-array on which quantiles are calculated; -# preccondition: assumed sorted! -# q_arr: [in] quantile input values nd-array; -# axis: [in] axis along which quantiles are calculated; -# method: [in] func(q, n) returning (gamma, j), -# where = array1D.size; -# keepdims: [in] boolean flag specifying whether collapsed axis -# should be kept as dim=1; -# to_dtype: [in] dtype to convert the result to; -# qs_all: [in/out] result pass through or created (returned) -# -def quantile_impl( - arr: ndarray, - q_arr: npt.NDArray[Any], - axis: Optional[int], - axes_set: Iterable[int], - original_shape: tuple[int, ...], - method: Callable[[float, int], tuple[float, int]], - keepdims: bool, - to_dtype: np.dtype[Any], - qs_all: Optional[ndarray], -) -> ndarray: - ndims = len(arr.shape) - - if axis is None: - n = arr.size - - if keepdims: - remaining_shape = (1,) * len(original_shape) - else: - remaining_shape = () # only `q_arr` dictates shape; - # quantile applied to `arr` seen as 1D; - else: - n = arr.shape[axis] - - # arr.shape -{axis}; if keepdims use 1 for arr.shape[axis]: - # (can be empty []) - # - if keepdims: - remaining_shape = tuple( - 1 if k in axes_set else original_shape[k] - for k in range(0, len(original_shape)) - ) - else: - remaining_shape = tuple( - arr.shape[k] for k in range(0, ndims) if k != axis - ) - - # compose qarr.shape with arr.shape: - # - # result.shape = (q_arr.shape, arr.shape -{axis}): - # - qresult_shape = (*q_arr.shape, *remaining_shape) - - # construct result NdArray, non-flattening approach: - # - if qs_all is None: - qs_all = zeros(qresult_shape, dtype=to_dtype) - else: - # implicit conversion from to_dtype to qs_all.dtype assumed - # - if qs_all.shape != qresult_shape: - raise ValueError("wrong shape on output array") - - for index, q in np.ndenumerate(q_arr): - (gamma, j) = method(q, n) - (left_pos, right_pos) = (j, j + 1) - - # (N-1) dimensional ndarray of left, right - # neighbor values: - # - # non-flattening approach: - # - # extract values at index=left_pos; - arr_1D_lvals = arr.take(left_pos, axis) - arr_vals_shape = arr_1D_lvals.shape - - if right_pos >= n: - # some quantile methods may result in j==(n-1), - # hence (j+1) could surpass array boundary; - # - arr_1D_rvals = zeros(arr_vals_shape, dtype=arr_1D_lvals.dtype) - else: - # extract values at index=right_pos; - arr_1D_rvals = arr.take(right_pos, axis) - - # vectorized for axis != None; - # (non-flattening approach) - # - if len(index) == 0: - left = (1.0 - gamma) * arr_1D_lvals.reshape(qs_all.shape) - right = gamma * arr_1D_rvals.reshape(qs_all.shape) - qs_all[...] = left + right - else: - left = (1.0 - gamma) * arr_1D_lvals.reshape(qs_all[index].shape) - right = gamma * arr_1D_rvals.reshape(qs_all[index].shape) - qs_all[index] = left + right - - return qs_all - - -@add_boilerplate("a") -def quantile( - a: ndarray, - q: Union[float, Iterable[float], ndarray], - axis: Union[None, int, tuple[int, ...]] = None, - out: Optional[ndarray] = None, - overwrite_input: bool = False, - method: str = "linear", - keepdims: bool = False, -) -> ndarray: - """ - Compute the q-th quantile of the data along the specified axis. - - Parameters - ---------- - a : array_like - Input array or object that can be converted to an array. - q : array_like of float - Quantile or sequence of quantiles to compute, which must be between - 0 and 1 inclusive. - axis : {int, tuple of int, None}, optional - Axis or axes along which the quantiles are computed. The default is - to compute the quantile(s) along a flattened version of the array. - out : ndarray, optional - Alternative output array in which to place the result. It must have - the same shape as the expected output. - overwrite_input : bool, optional - If True, then allow the input array `a` to be modified by - intermediate calculations, to save memory. In this case, the - contents of the input `a` after this function completes is - undefined. - method : str, optional - This parameter specifies the method to use for estimating the - quantile. The options sorted by their R type - as summarized in the H&F paper [1]_ are: - 1. 'inverted_cdf' - 2. 'averaged_inverted_cdf' - 3. 'closest_observation' - 4. 'interpolated_inverted_cdf' - 5. 'hazen' - 6. 'weibull' - 7. 'linear' (default) - 8. 'median_unbiased' - 9. 'normal_unbiased' - The first three methods are discontinuous. NumPy further defines the - following discontinuous variations of the default 'linear' (7.) option: - * 'lower' - * 'higher', - * 'midpoint' - * 'nearest' - keepdims : bool, optional - If this is set to True, the axes which are reduced are left in - the result as dimensions with size one. With this option, the - result will broadcast correctly against the original array `a`. - - Returns - ------- - quantile : scalar or ndarray - If `q` is a single quantile and `axis=None`, then the result - is a scalar. If multiple quantiles are given, first axis of - the result corresponds to the quantiles. The other axes are - the axes that remain after the reduction of `a`. If the input - contains integers or floats smaller than ``float64``, the output - data-type is ``float64``. Otherwise, the output data-type is the - same as that of the input. If `out` is specified, that array is - returned instead. - - Raises - ------ - TypeError - If the type of the input is complex. - - See Also - -------- - numpy.quantile - - Availability - -------- - Multiple GPUs, Multiple CPUs - - References - ---------- - .. [1] R. J. Hyndman and Y. Fan, - "Sample quantiles in statistical packages," - The American Statistician, 50(4), pp. 361-365, 1996 - """ - - dict_methods = { - "inverted_cdf": inverted_cdf, - "averaged_inverted_cdf": averaged_inverted_cdf, - "closest_observation": closest_observation, - "interpolated_inverted_cdf": interpolated_inverted_cdf, - "hazen": hazen, - "weibull": weibull, - "linear": linear, - "median_unbiased": median_unbiased, - "normal_unbiased": normal_unbiased, - "lower": lower, - "higher": higher, - "midpoint": midpoint, - "nearest": nearest, - } - - real_axis: Optional[int] - axes_set: Iterable[int] = [] - original_shape = a.shape - - if axis is not None and isinstance(axis, Iterable): - if len(axis) == 1: - real_axis = axis[0] - a_rr = a - else: - (real_axis, a_rr) = reshuffle_reshape(a, axis) - # What happens with multiple axes and overwrite_input = True ? - # It seems overwrite_input is reset to False; - overwrite_input = False - axes_set = axis - else: - real_axis = axis - a_rr = a - if real_axis is not None: - axes_set = [real_axis] - - # covers both array-like and scalar cases: - # - q_arr = np.asarray(q) - - # in the future k-sort (partition) - # might be faster, for now it uses sort - # arr = partition(arr, k = floor(nq), axis = real_axis) - # but that would require a k-sort call for each `q`! - # too expensive for many `q` values... - # if no axis given then elements are sorted as a 1D array - # - if overwrite_input: - a_rr.sort(axis=real_axis) - arr = a_rr - else: - arr = sort(a_rr, axis=real_axis) - - if arr.dtype.kind == "c": - raise TypeError("input array cannot be of complex type") - - # return type dependency on arr.dtype: - # - # it depends on interpolation method; - # For discontinuous methods returning either end of the interval within - # which the quantile falls, or the other; arr.dtype is returned; - # else, logic below: - # - # if is_float(arr_dtype) && (arr.dtype >= dtype('float64')) then - # arr.dtype - # else - # dtype('float64') - # - # see https://github.com/numpy/numpy/issues/22323 - # - if method in [ - "inverted_cdf", - "closest_observation", - "lower", - "higher", - "nearest", - ]: - to_dtype = arr.dtype - else: - to_dtype = np.dtype("float64") - - # in case dtype("float128") becomes supported: - # - # to_dtype = ( - # arr.dtype - # if (arr.dtype == np.dtype("float128")) - # else np.dtype("float64") - # ) - - res = quantile_impl( - arr, - q_arr, - real_axis, - axes_set, - original_shape, - dict_methods[method], - keepdims, - to_dtype, - out, - ) - - if out is not None: - # out = res.astype(out.dtype) -- conversion done inside impl - return out - else: - return res - - -@add_boilerplate("a") -def percentile( - a: ndarray, - q: Union[float, Iterable[float], ndarray], - axis: Union[None, int, tuple[int, ...]] = None, - out: Optional[ndarray] = None, - overwrite_input: bool = False, - method: str = "linear", - keepdims: bool = False, -) -> ndarray: - """ - Compute the q-th percentile of the data along the specified axis. - - Parameters - ---------- - a : array_like - Input array or object that can be converted to an array. - q : array_like of float - Percentile or sequence of percentiles to compute, which must be between - 0 and 100 inclusive. - axis : {int, tuple of int, None}, optional - Axis or axes along which the percentiles are computed. The default is - to compute the percentile(s) along a flattened version of the array. - out : ndarray, optional - Alternative output array in which to place the result. It must have - the same shape as the expected output. - overwrite_input : bool, optional - If True, then allow the input array `a` to be modified by - intermediate calculations, to save memory. In this case, the - contents of the input `a` after this function completes is - undefined. - method : str, optional - This parameter specifies the method to use for estimating the - percentile. The options sorted by their R type - as summarized in the H&F paper [1]_ are: - 1. 'inverted_cdf' - 2. 'averaged_inverted_cdf' - 3. 'closest_observation' - 4. 'interpolated_inverted_cdf' - 5. 'hazen' - 6. 'weibull' - 7. 'linear' (default) - 8. 'median_unbiased' - 9. 'normal_unbiased' - The first three methods are discontinuous. NumPy further defines the - following discontinuous variations of the default 'linear' (7.) option: - * 'lower' - * 'higher', - * 'midpoint' - * 'nearest' - keepdims : bool, optional - If this is set to True, the axes which are reduced are left in - the result as dimensions with size one. With this option, the - result will broadcast correctly against the original array `a`. - - Returns - ------- - percentile : scalar or ndarray - If `q` is a single percentile and `axis=None`, then the result - is a scalar. If multiple percentiles are given, first axis of - the result corresponds to the percentiles. The other axes are - the axes that remain after the reduction of `a`. If the input - contains integers or floats smaller than ``float64``, the output - data-type is ``float64``. Otherwise, the output data-type is the - same as that of the input. If `out` is specified, that array is - returned instead. - - Raises - ------ - TypeError - If the type of the input is complex. - - See Also - -------- - numpy.percentile - - Availability - -------- - Multiple GPUs, Multiple CPUs - - References - ---------- - .. [1] R. J. Hyndman and Y. Fan, - "Sample quantiles in statistical packages," - The American Statistician, 50(4), pp. 361-365, 1996 - """ - - q_arr = np.asarray(q) - q01 = q_arr / 100.0 - - return quantile( - a, - q01, - axis, - out=out, - overwrite_input=overwrite_input, - method=method, - keepdims=keepdims, - ) - - -@add_boilerplate("x", "weights") -def histogram( - x: ndarray, - bins: Union[ndarray, npt.ArrayLike, int] = 10, - range: Optional[Union[tuple[int, int], tuple[float, float]]] = None, - weights: Optional[ndarray] = None, - density: bool = False, -) -> tuple[ndarray, ndarray]: - """ - Compute the histogram of a dataset. - - Parameters - ---------- - a : array_like - Input data. The histogram is computed over the flattened array. - bins : int or sequence of scalars, optional - If `bins` is an int, it defines the number of equal-width bins in the - given range (10, by default). If `bins` is a sequence, it defines a - monotonically increasing array of bin edges, including the rightmost - edge, allowing for non-uniform bin widths. - range : (float, float), optional - The lower and upper range of the bins. If not provided, range is simply - ``(a.min(), a.max())``. Values outside the range are ignored. The first - element of the range must be smaller than the second. This argument is - ignored when bin edges are provided explicitly. - weights : array_like, optional - An array of weights, of the same shape as `a`. Each value in `a` only - contributes its associated weight towards the bin count (instead of 1). - If `density` is True, the weights are normalized, so that the integral - of the density over the range remains 1. - density : bool, optional - If ``False``, the result will contain the number of samples in each - bin. If ``True``, the result is the value of the probability *density* - function at the bin, normalized such that the *integral* over the range - is 1. Note that the sum of the histogram values will not be equal to 1 - unless bins of unity width are chosen; it is not a probability *mass* - function. - - Returns - ------- - hist : array - The values of the histogram. See `density` and `weights` for a - description of the possible semantics. - bin_edges : array - Return the bin edges ``(length(hist)+1)``. - - See Also - -------- - numpy.histogram - - Availability - -------- - Multiple GPUs, Multiple CPUs - """ - result_type: np.dtype[Any] = np.dtype(np.int64) - - if np.ndim(bins) > 1: - raise ValueError("`bins` must be 1d, when an array") - - # check isscalar(bins): - # - if np.ndim(bins) == 0: - if not isinstance(bins, int): - raise TypeError("`bins` must be array or integer type") - - num_intervals = bins - - if range is not None: - assert isinstance(range, tuple) and len(range) == 2 - if range[0] >= range[1]: - raise ValueError( - "`range` must be a pair of increasing values." - ) - - lower_b = range[0] - higher_b = range[1] - elif x.size == 0: - lower_b = 0.0 - higher_b = 1.0 - else: - lower_b = float(min(x)) - higher_b = float(max(x)) - - step = (higher_b - lower_b) / num_intervals - - bins_array = asarray( - [lower_b + k * step for k in _builtin_range(0, num_intervals)] - + [higher_b], - dtype=np.dtype(np.float64), - ) - - bins_orig_type = bins_array.dtype - else: - bins_as_arr = asarray(bins) - bins_orig_type = bins_as_arr.dtype - - bins_array = bins_as_arr.astype(np.dtype(np.float64)) - num_intervals = bins_array.shape[0] - 1 - - if not all((bins_array[1:] - bins_array[:-1]) >= 0): - raise ValueError( - "`bins` must increase monotonically, when an array" - ) - - if x.ndim != 1: - x = x.flatten() - - if weights is not None: - if weights.shape != x.shape: - raise ValueError( - "`weights` array must be same shape for histogram" - ) - - result_type = weights.dtype - weights_array = weights.astype(np.dtype(np.float64)) - else: - # case weights == None cannot be handled inside _thunk.histogram, - # bc/ of hist ndarray inputs(), below; - # needs to be handled here: - # - weights_array = ones(x.shape, dtype=np.dtype(np.float64)) - - if x.size == 0: - return ( - zeros((num_intervals,), dtype=result_type), - bins_array.astype(bins_orig_type), - ) - - hist = ndarray( - (num_intervals,), - dtype=weights_array.dtype, - inputs=(x, bins_array, weights_array), - ) - hist._thunk.histogram( - x._thunk, bins_array._thunk, weights=weights_array._thunk - ) - - # handle (density = True): - # - if density: - result_type = np.dtype(np.float64) - hist /= sum(hist) - hist /= bins_array[1:] - bins_array[:-1] - - return hist.astype(result_type), bins_array.astype(bins_orig_type) diff --git a/cunumeric/random/legacy.py b/cunumeric/random/legacy.py deleted file mode 100644 index 4070653e8..000000000 --- a/cunumeric/random/legacy.py +++ /dev/null @@ -1,203 +0,0 @@ -# Copyright 2021-2022 NVIDIA Corporation -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -from __future__ import annotations - -from typing import TYPE_CHECKING, Any, Union - -import numpy as np -import numpy.random as nprandom - -from cunumeric.array import ndarray -from cunumeric.runtime import runtime - -if TYPE_CHECKING: - import numpy.typing as npt - - from ..types import NdShapeLike - - -def seed(init: Union[int, None] = None) -> None: - if init is None: - init = 0 - runtime.set_next_random_epoch(int(init)) - - -def rand(*shapeargs: int) -> Union[float, ndarray]: - """ - rand(d0, d1, ..., dn) - - Random values in a given shape. - - Create an array of the given shape and populate it with random samples from - a uniform distribution over ``[0, 1)``. - - Parameters - ---------- - d0, d1, ..., dn : int, optional - The dimensions of the returned array, must be non-negative. - If no argument is given a single Python float is returned. - - Returns - ------- - out : ndarray, shape ``(d0, d1, ..., dn)`` - Random values. - - See Also - -------- - numpy.random.rand - - Availability - -------- - Multiple GPUs, Multiple CPUs - """ - - if shapeargs == (): - return nprandom.rand() - result = ndarray(shapeargs, dtype=np.dtype(np.float64)) - result._thunk.random_uniform() - return result - - -def randint( - low: int, - high: Union[int, None] = None, - size: Union[NdShapeLike, None] = None, - dtype: Union[np.dtype[Any], type, None] = int, -) -> Union[int, ndarray, npt.NDArray[Any]]: - """ - Return random integers from `low` (inclusive) to `high` (exclusive). - - Parameters - ---------- - low : int or array_like[int] - Lowest (signed) integers to be drawn from the distribution (unless - ``high=None``, in which case this parameter is one above the - *highest* such integer). - high : int or array_like[int], optional - If provided, one above the largest (signed) integer to be drawn - from the distribution (see above for behavior if ``high=None``). - If array-like, must contain integer values - size : int or tuple[int], optional - Output shape. If the given shape is, e.g., ``(m, n, k)``, then - ``m * n * k`` samples are drawn. Default is None, in which case a - single value is returned. - dtype : data-type, optional - Desired dtype of the result. Byteorder must be native. - The default value is int. - - Returns - ------- - out : int or ndarray[int] - `size`-shaped array of random integers from the appropriate - distribution, or a single such random int if `size` not provided. - - See Also - -------- - numpy.random.randint - - Availability - -------- - Multiple GPUs, Multiple CPUs - """ - if not isinstance(low, int): - raise NotImplementedError("'low' must be an integer") - if high is not None and not isinstance(high, int): - raise NotImplementedError("'high' must be an integer or None") - - if size is None: - return nprandom.randint(low=low, high=high, size=size, dtype=dtype) - - if dtype is not None: - dtype = np.dtype(dtype) - else: - dtype = np.dtype(np.int64) - # TODO: randint must support unsigned integer dtypes as well - if dtype.kind != "i": - raise NotImplementedError( - "cunumeric.random.randint must be given an integer dtype" - ) - if isinstance(size, int): - size = (size,) - result = ndarray(size, dtype=dtype) - if high is None: - if low <= 0: - raise ValueError( - "bound must be strictly greater than 0 for randint" - ) - result._thunk.random_integer(low=0, high=low) - else: - if low >= high: - raise ValueError( - "'high' bound must be strictly greater than 'low' " - "bound for randint" - ) - result._thunk.random_integer(low=low, high=high) - return result - - -def randn(*shapeargs: int) -> Union[float, ndarray]: - """ - randn(d0, d1, ..., dn) - - Return a sample (or samples) from the "standard normal" distribution. - - Parameters - ---------- - d0, d1, ..., dn : int, optional - The dimensions of the returned array, must be non-negative. - If no argument is given a single Python float is returned. - - Returns - ------- - Z : ndarray or float - A ``(d0, d1, ..., dn)``-shaped array of floating-point samples from - the standard normal distribution, or a single such float if - no parameters were supplied. - - See Also - -------- - numpy.random.randn - - Availability - -------- - Multiple GPUs, Multiple CPUs - """ - - if shapeargs == (): - return nprandom.randn() - result = ndarray(shapeargs, dtype=np.dtype(np.float64)) - result._thunk.random_normal() - return result - - -def random(size: Union[NdShapeLike, None] = None) -> Union[float, ndarray]: - """ - random(size=None) - - Return random floats in the half-open interval [0.0, 1.0). - - See Also - -------- - numpy.random.random - - Availability - -------- - Multiple GPUs, Multiple CPUs - """ - if size is None: - return nprandom.random() - result = ndarray(size, dtype=np.dtype(np.float64)) - result._thunk.random_uniform() - return result diff --git a/cunumeric_cpp.cmake b/cunumeric_cpp.cmake deleted file mode 100644 index be5c0fbe6..000000000 --- a/cunumeric_cpp.cmake +++ /dev/null @@ -1,505 +0,0 @@ -#============================================================================= -# Copyright 2022 NVIDIA Corporation -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -#============================================================================= - -############################################################################## -# - User Options ------------------------------------------------------------ - -option(BUILD_SHARED_LIBS "Build cuNumeric shared libraries" ON) -option(cunumeric_EXCLUDE_TBLIS_FROM_ALL "Exclude tblis targets from cuNumeric's 'all' target" OFF) -option(cunumeric_EXCLUDE_OPENBLAS_FROM_ALL "Exclude OpenBLAS targets from cuNumeric's 'all' target" OFF) -option(cunumeric_EXCLUDE_LEGATE_CORE_FROM_ALL "Exclude legate.core targets from cuNumeric's 'all' target" OFF) - -############################################################################## -# - Project definition ------------------------------------------------------- - -# Write the version header -rapids_cmake_write_version_file(include/cunumeric/version_config.hpp) - -# Needed to integrate with LLVM/clang tooling -set(CMAKE_EXPORT_COMPILE_COMMANDS ON) - -############################################################################## -# - Build Type --------------------------------------------------------------- - -# Set a default build type if none was specified -rapids_cmake_build_type(Release) - -############################################################################## -# - conda environment -------------------------------------------------------- - -rapids_cmake_support_conda_env(conda_env MODIFY_PREFIX_PATH) - -# We're building python extension libraries, which must always be installed -# under lib/, even if the system normally uses lib64/. Rapids-cmake currently -# doesn't realize this when we're going through scikit-build, see -# https://github.com/rapidsai/rapids-cmake/issues/426 -if(TARGET conda_env) - set(CMAKE_INSTALL_LIBDIR "lib") -endif() - -############################################################################## -# - Dependencies ------------------------------------------------------------- - -# add third party dependencies using CPM -rapids_cpm_init(OVERRIDE ${CMAKE_CURRENT_SOURCE_DIR}/cmake/versions.json) - -find_package(OpenMP) - -option(Legion_USE_CUDA "Use CUDA" ON) -option(Legion_USE_OpenMP "Use OpenMP" ${OpenMP_FOUND}) -option(Legion_BOUNDS_CHECKS "Build cuNumeric with bounds checks (expensive)" OFF) - -### -# If we find legate.core already configured on the system, it will report -# whether it was compiled with bounds checking (Legion_BOUNDS_CHECKS), -# CUDA (Legion_USE_CUDA), and OpenMP (Legion_USE_OpenMP). -# -# We use the same variables as legate.core because we want to enable/disable -# each of these features based on how legate.core was configured (it doesn't -# make sense to build cuNumeric's CUDA bindings if legate.core wasn't built -# with CUDA support). -### -include(cmake/thirdparty/get_legate_core.cmake) - -if(Legion_USE_CUDA) - include(${CMAKE_CURRENT_SOURCE_DIR}/cmake/Modules/cuda_arch_helpers.cmake) - # Needs to run before `rapids_cuda_init_architectures` - set_cuda_arch_from_names() - # Needs to run before `enable_language(CUDA)` - rapids_cuda_init_architectures(cunumeric) - enable_language(CUDA) - # Since cunumeric only enables CUDA optionally we need to manually include - # the file that rapids_cuda_init_architectures relies on `project` calling - if(CMAKE_PROJECT_cunumeric_INCLUDE) - include("${CMAKE_PROJECT_cunumeric_INCLUDE}") - endif() - - # Must come after enable_language(CUDA) - # Use `-isystem ` instead of `-isystem=` - # because the former works with clangd intellisense - set(CMAKE_INCLUDE_SYSTEM_FLAG_CUDA "-isystem ") - - rapids_find_package( - CUDAToolkit REQUIRED - BUILD_EXPORT_SET cunumeric-exports - INSTALL_EXPORT_SET cunumeric-exports - ) - - include(cmake/thirdparty/get_nccl.cmake) - include(cmake/thirdparty/get_cutensor.cmake) -endif() - -include(cmake/thirdparty/get_openblas.cmake) - -include(cmake/thirdparty/get_tblis.cmake) - -############################################################################## -# - cuNumeric ---------------------------------------------------------------- - -set(cunumeric_SOURCES "") -set(cunumeric_CXX_DEFS "") -set(cunumeric_CUDA_DEFS "") -set(cunumeric_CXX_OPTIONS "") -set(cunumeric_CUDA_OPTIONS "") - -include(cmake/Modules/set_cpu_arch_flags.cmake) -set_cpu_arch_flags(cunumeric_CXX_OPTIONS) - -# Add `src/cunumeric.mk` sources -list(APPEND cunumeric_SOURCES - src/cunumeric/ternary/where.cc - src/cunumeric/scan/scan_global.cc - src/cunumeric/scan/scan_local.cc - src/cunumeric/binary/binary_op.cc - src/cunumeric/binary/binary_red.cc - src/cunumeric/bits/packbits.cc - src/cunumeric/bits/unpackbits.cc - src/cunumeric/unary/scalar_unary_red.cc - src/cunumeric/unary/unary_op.cc - src/cunumeric/unary/unary_red.cc - src/cunumeric/unary/convert.cc - src/cunumeric/nullary/arange.cc - src/cunumeric/nullary/eye.cc - src/cunumeric/nullary/fill.cc - src/cunumeric/nullary/window.cc - src/cunumeric/index/advanced_indexing.cc - src/cunumeric/index/choose.cc - src/cunumeric/index/putmask.cc - src/cunumeric/index/repeat.cc - src/cunumeric/index/select.cc - src/cunumeric/index/wrap.cc - src/cunumeric/index/zip.cc - src/cunumeric/item/read.cc - src/cunumeric/item/write.cc - src/cunumeric/matrix/batched_cholesky.cc - src/cunumeric/matrix/contract.cc - src/cunumeric/matrix/diag.cc - src/cunumeric/matrix/gemm.cc - src/cunumeric/matrix/matmul.cc - src/cunumeric/matrix/matvecmul.cc - src/cunumeric/matrix/dot.cc - src/cunumeric/matrix/potrf.cc - src/cunumeric/matrix/solve.cc - src/cunumeric/matrix/syrk.cc - src/cunumeric/matrix/tile.cc - src/cunumeric/matrix/transpose.cc - src/cunumeric/matrix/trilu.cc - src/cunumeric/matrix/trsm.cc - src/cunumeric/matrix/util.cc - src/cunumeric/random/rand.cc - src/cunumeric/search/argwhere.cc - src/cunumeric/search/nonzero.cc - src/cunumeric/set/unique.cc - src/cunumeric/set/unique_reduce.cc - src/cunumeric/stat/bincount.cc - src/cunumeric/convolution/convolve.cc - src/cunumeric/transform/flip.cc - src/cunumeric/arg_redop_register.cc - src/cunumeric/mapper.cc - src/cunumeric/cephes/chbevl.cc - src/cunumeric/cephes/i0.cc - src/cunumeric/stat/histogram.cc -) - -if(Legion_USE_OpenMP) - list(APPEND cunumeric_SOURCES - src/cunumeric/ternary/where_omp.cc - src/cunumeric/scan/scan_global_omp.cc - src/cunumeric/scan/scan_local_omp.cc - src/cunumeric/binary/binary_op_omp.cc - src/cunumeric/binary/binary_red_omp.cc - src/cunumeric/bits/packbits_omp.cc - src/cunumeric/bits/unpackbits_omp.cc - src/cunumeric/unary/unary_op_omp.cc - src/cunumeric/unary/scalar_unary_red_omp.cc - src/cunumeric/unary/unary_red_omp.cc - src/cunumeric/unary/convert_omp.cc - src/cunumeric/nullary/arange_omp.cc - src/cunumeric/nullary/eye_omp.cc - src/cunumeric/nullary/fill_omp.cc - src/cunumeric/nullary/window_omp.cc - src/cunumeric/index/advanced_indexing_omp.cc - src/cunumeric/index/choose_omp.cc - src/cunumeric/index/putmask_omp.cc - src/cunumeric/index/repeat_omp.cc - src/cunumeric/index/select_omp.cc - src/cunumeric/index/wrap_omp.cc - src/cunumeric/index/zip_omp.cc - src/cunumeric/matrix/batched_cholesky_omp.cc - src/cunumeric/matrix/contract_omp.cc - src/cunumeric/matrix/diag_omp.cc - src/cunumeric/matrix/gemm_omp.cc - src/cunumeric/matrix/matmul_omp.cc - src/cunumeric/matrix/matvecmul_omp.cc - src/cunumeric/matrix/dot_omp.cc - src/cunumeric/matrix/potrf_omp.cc - src/cunumeric/matrix/solve_omp.cc - src/cunumeric/matrix/syrk_omp.cc - src/cunumeric/matrix/tile_omp.cc - src/cunumeric/matrix/transpose_omp.cc - src/cunumeric/matrix/trilu_omp.cc - src/cunumeric/matrix/trsm_omp.cc - src/cunumeric/random/rand_omp.cc - src/cunumeric/search/argwhere_omp.cc - src/cunumeric/search/nonzero_omp.cc - src/cunumeric/set/unique_omp.cc - src/cunumeric/set/unique_reduce_omp.cc - src/cunumeric/stat/bincount_omp.cc - src/cunumeric/convolution/convolve_omp.cc - src/cunumeric/transform/flip_omp.cc - src/cunumeric/stat/histogram_omp.cc - ) -endif() - -if(Legion_USE_CUDA) - list(APPEND cunumeric_SOURCES - src/cunumeric/ternary/where.cu - src/cunumeric/scan/scan_global.cu - src/cunumeric/scan/scan_local.cu - src/cunumeric/binary/binary_op.cu - src/cunumeric/binary/binary_red.cu - src/cunumeric/bits/packbits.cu - src/cunumeric/bits/unpackbits.cu - src/cunumeric/unary/scalar_unary_red.cu - src/cunumeric/unary/unary_red.cu - src/cunumeric/unary/unary_op.cu - src/cunumeric/unary/convert.cu - src/cunumeric/nullary/arange.cu - src/cunumeric/nullary/eye.cu - src/cunumeric/nullary/fill.cu - src/cunumeric/nullary/window.cu - src/cunumeric/index/advanced_indexing.cu - src/cunumeric/index/choose.cu - src/cunumeric/index/putmask.cu - src/cunumeric/index/repeat.cu - src/cunumeric/index/select.cu - src/cunumeric/index/wrap.cu - src/cunumeric/index/zip.cu - src/cunumeric/item/read.cu - src/cunumeric/item/write.cu - src/cunumeric/matrix/batched_cholesky.cu - src/cunumeric/matrix/contract.cu - src/cunumeric/matrix/diag.cu - src/cunumeric/matrix/gemm.cu - src/cunumeric/matrix/matmul.cu - src/cunumeric/matrix/matvecmul.cu - src/cunumeric/matrix/dot.cu - src/cunumeric/matrix/potrf.cu - src/cunumeric/matrix/solve.cu - src/cunumeric/matrix/syrk.cu - src/cunumeric/matrix/tile.cu - src/cunumeric/matrix/transpose.cu - src/cunumeric/matrix/trilu.cu - src/cunumeric/matrix/trsm.cu - src/cunumeric/random/rand.cu - src/cunumeric/search/argwhere.cu - src/cunumeric/search/nonzero.cu - src/cunumeric/set/unique.cu - src/cunumeric/stat/bincount.cu - src/cunumeric/convolution/convolve.cu - src/cunumeric/fft/fft.cu - src/cunumeric/transform/flip.cu - src/cunumeric/arg_redop_register.cu - src/cunumeric/cudalibs.cu - src/cunumeric/stat/histogram.cu - ) -endif() - -# Add `src/cunumeric/sort/sort.mk` sources -list(APPEND cunumeric_SOURCES - src/cunumeric/sort/sort.cc - src/cunumeric/sort/searchsorted.cc -) - -if(Legion_USE_OpenMP) - list(APPEND cunumeric_SOURCES - src/cunumeric/sort/sort_omp.cc - src/cunumeric/sort/searchsorted_omp.cc - ) -endif() - -if(Legion_USE_CUDA) - list(APPEND cunumeric_SOURCES - src/cunumeric/sort/sort.cu - src/cunumeric/sort/searchsorted.cu - src/cunumeric/sort/cub_sort_bool.cu - src/cunumeric/sort/cub_sort_int8.cu - src/cunumeric/sort/cub_sort_int16.cu - src/cunumeric/sort/cub_sort_int32.cu - src/cunumeric/sort/cub_sort_int64.cu - src/cunumeric/sort/cub_sort_uint8.cu - src/cunumeric/sort/cub_sort_uint16.cu - src/cunumeric/sort/cub_sort_uint32.cu - src/cunumeric/sort/cub_sort_uint64.cu - src/cunumeric/sort/cub_sort_half.cu - src/cunumeric/sort/cub_sort_float.cu - src/cunumeric/sort/cub_sort_double.cu - src/cunumeric/sort/thrust_sort_bool.cu - src/cunumeric/sort/thrust_sort_int8.cu - src/cunumeric/sort/thrust_sort_int16.cu - src/cunumeric/sort/thrust_sort_int32.cu - src/cunumeric/sort/thrust_sort_int64.cu - src/cunumeric/sort/thrust_sort_uint8.cu - src/cunumeric/sort/thrust_sort_uint16.cu - src/cunumeric/sort/thrust_sort_uint32.cu - src/cunumeric/sort/thrust_sort_uint64.cu - src/cunumeric/sort/thrust_sort_half.cu - src/cunumeric/sort/thrust_sort_float.cu - src/cunumeric/sort/thrust_sort_double.cu - src/cunumeric/sort/thrust_sort_complex64.cu - src/cunumeric/sort/thrust_sort_complex128.cu - ) -endif() - -# Add `src/cunumeric/random/random.mk` sources -if(Legion_USE_CUDA OR cunumeric_cuRAND_INCLUDE_DIR) - list(APPEND cunumeric_SOURCES - src/cunumeric/random/bitgenerator.cc - src/cunumeric/random/randutil/generator_host.cc - src/cunumeric/random/randutil/generator_host_straightforward.cc - src/cunumeric/random/randutil/generator_host_advanced.cc - ) - if(Legion_USE_CUDA) - list(APPEND cunumeric_SOURCES - src/cunumeric/random/bitgenerator.cu - src/cunumeric/random/randutil/generator_device.cu - src/cunumeric/random/randutil/generator_device_straightforward.cu - src/cunumeric/random/randutil/generator_device_advanced.cu - ) - endif() -endif() - -list(APPEND cunumeric_SOURCES - # This must always be the last file! - # It guarantees we do our registration callback - # only after all task variants are recorded - src/cunumeric/cunumeric.cc -) - -if(CMAKE_BUILD_TYPE STREQUAL "Debug") - list(APPEND cunumeric_CXX_DEFS DEBUG_CUNUMERIC) - list(APPEND cunumeric_CUDA_DEFS DEBUG_CUNUMERIC) -endif() - -if(Legion_BOUNDS_CHECKS) - list(APPEND cunumeric_CXX_DEFS BOUNDS_CHECKS) - list(APPEND cunumeric_CUDA_DEFS BOUNDS_CHECKS) -endif() - -list(APPEND cunumeric_CUDA_OPTIONS -Xfatbin=-compress-all) -list(APPEND cunumeric_CUDA_OPTIONS --expt-extended-lambda) -list(APPEND cunumeric_CUDA_OPTIONS --expt-relaxed-constexpr) -list(APPEND cunumeric_CXX_OPTIONS -Wno-deprecated-declarations) -list(APPEND cunumeric_CUDA_OPTIONS -Wno-deprecated-declarations) - -add_library(cunumeric ${cunumeric_SOURCES}) -add_library(cunumeric::cunumeric ALIAS cunumeric) - -if (CMAKE_SYSTEM_NAME STREQUAL "Linux") - set(platform_rpath_origin "\$ORIGIN") -elseif (CMAKE_SYSTEM_NAME STREQUAL "Darwin") - set(platform_rpath_origin "@loader_path") -endif () - -set_target_properties(cunumeric - PROPERTIES BUILD_RPATH "${platform_rpath_origin}" - INSTALL_RPATH "${platform_rpath_origin}" - CXX_STANDARD 17 - CXX_STANDARD_REQUIRED ON - POSITION_INDEPENDENT_CODE ON - INTERFACE_POSITION_INDEPENDENT_CODE ON - CUDA_STANDARD 17 - CUDA_STANDARD_REQUIRED ON - LIBRARY_OUTPUT_DIRECTORY lib) - -target_link_libraries(cunumeric - PUBLIC legate::core - $ - PRIVATE BLAS::BLAS - tblis::tblis - # Add Conda library and include paths - $ - $ - $ - $ - $ - $) - -if(NOT Legion_USE_CUDA AND cunumeric_cuRAND_INCLUDE_DIR) - list(APPEND cunumeric_CXX_DEFS CUNUMERIC_CURAND_FOR_CPU_BUILD) - target_include_directories(cunumeric PRIVATE ${cunumeric_cuRAND_INCLUDE_DIR}) -endif() - -# Change THRUST_DEVICE_SYSTEM for `.cpp` files -if(Legion_USE_OpenMP) - list(APPEND cunumeric_CXX_OPTIONS -UTHRUST_DEVICE_SYSTEM) - list(APPEND cunumeric_CXX_OPTIONS -DTHRUST_DEVICE_SYSTEM=THRUST_DEVICE_SYSTEM_OMP) -elseif(NOT Legion_USE_CUDA) - list(APPEND cunumeric_CXX_OPTIONS -UTHRUST_DEVICE_SYSTEM) - list(APPEND cunumeric_CXX_OPTIONS -DTHRUST_DEVICE_SYSTEM=THRUST_DEVICE_SYSTEM_CPP) -endif() - -target_compile_options(cunumeric - PRIVATE "$<$:${cunumeric_CXX_OPTIONS}>" - "$<$:${cunumeric_CUDA_OPTIONS}>") - -target_compile_definitions(cunumeric - PUBLIC "$<$:${cunumeric_CXX_DEFS}>" - "$<$:${cunumeric_CUDA_DEFS}>") - -target_include_directories(cunumeric - PRIVATE - $ - INTERFACE - $ -) - -if(Legion_USE_CUDA) - file(WRITE "${CMAKE_CURRENT_BINARY_DIR}/fatbin.ld" -[=[ -SECTIONS -{ -.nvFatBinSegment : { *(.nvFatBinSegment) } -.nv_fatbin : { *(.nv_fatbin) } -} -]=]) - - # ensure CUDA symbols aren't relocated to the middle of the debug build binaries - target_link_options(cunumeric PRIVATE "${CMAKE_CURRENT_BINARY_DIR}/fatbin.ld") -endif() - -############################################################################## -# - install targets----------------------------------------------------------- - -include(CPack) -include(GNUInstallDirs) -rapids_cmake_install_lib_dir(lib_dir) - -install(TARGETS cunumeric - DESTINATION ${lib_dir} - EXPORT cunumeric-exports) - -install( - FILES ${CMAKE_CURRENT_BINARY_DIR}/include/cunumeric/version_config.hpp - DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/cunumeric) - -if(cunumeric_INSTALL_TBLIS) - install(DIRECTORY ${tblis_BINARY_DIR}/lib/ DESTINATION ${lib_dir}) - install(DIRECTORY ${tblis_BINARY_DIR}/include/ DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}) -endif() - -############################################################################## -# - install export ----------------------------------------------------------- - -set(doc_string - [=[ -Provide targets for cuNumeric, an aspiring drop-in replacement for NumPy at scale. - -Imported Targets: - - cunumeric::cunumeric - -]=]) - -string(JOIN "\n" code_string - "set(Legion_USE_CUDA ${Legion_USE_CUDA})" - "set(Legion_USE_OpenMP ${Legion_USE_OpenMP})" - "set(Legion_BOUNDS_CHECKS ${Legion_BOUNDS_CHECKS})" -) - -if(DEFINED Legion_USE_Python) - string(APPEND code_string "\nset(Legion_USE_Python ${Legion_USE_Python})") -endif() - -if(DEFINED Legion_NETWORKS) - string(APPEND code_string "\nset(Legion_NETWORKS ${Legion_NETWORKS})") -endif() - -rapids_export( - INSTALL cunumeric - EXPORT_SET cunumeric-exports - GLOBAL_TARGETS cunumeric - NAMESPACE cunumeric:: - DOCUMENTATION doc_string - FINAL_CODE_BLOCK code_string) - -# build export targets -rapids_export( - BUILD cunumeric - EXPORT_SET cunumeric-exports - GLOBAL_TARGETS cunumeric - NAMESPACE cunumeric:: - DOCUMENTATION doc_string - FINAL_CODE_BLOCK code_string) diff --git a/cunumeric/__init__.py b/cupynumeric/__init__.py similarity index 69% rename from cunumeric/__init__.py rename to cupynumeric/__init__.py index 3ad86dd9f..01037dfa3 100644 --- a/cunumeric/__init__.py +++ b/cupynumeric/__init__.py @@ -1,4 +1,4 @@ -# Copyright 2021-2022 NVIDIA Corporation +# Copyright 2024 NVIDIA Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -14,7 +14,7 @@ # """ -cuNumeric +cuPyNumeric ===== Provides a distributed task-parallel implementation of the Numpy interface @@ -24,18 +24,15 @@ """ from __future__ import annotations -import os - import numpy as _np -from cunumeric import linalg, random, fft, ma -from cunumeric.array import maybe_convert_to_np_ndarray, ndarray -from cunumeric.bits import packbits, unpackbits -from cunumeric.module import * -from cunumeric._ufunc import * -from cunumeric.logic import * -from cunumeric.window import bartlett, blackman, hamming, hanning, kaiser -from cunumeric.coverage import clone_module +from . import linalg, random, fft, ma +from ._array.array import ndarray +from ._array.util import maybe_convert_to_np_ndarray +from ._module import * +from ._ufunc import * +from ._utils.array import is_supported_dtype +from ._utils.coverage import clone_module clone_module(_np, globals(), maybe_convert_to_np_ndarray) diff --git a/cupynumeric/_array/__init__.py b/cupynumeric/_array/__init__.py new file mode 100644 index 000000000..31d8d448c --- /dev/null +++ b/cupynumeric/_array/__init__.py @@ -0,0 +1,15 @@ +# Copyright 2024 NVIDIA Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +from __future__ import annotations diff --git a/cunumeric/array.py b/cupynumeric/_array/array.py similarity index 72% rename from cunumeric/array.py rename to cupynumeric/_array/array.py index 8bfc5178a..1787affe6 100644 --- a/cunumeric/array.py +++ b/cupynumeric/_array/array.py @@ -1,4 +1,4 @@ -# Copyright 2021-2023 NVIDIA Corporation +# Copyright 2024 NVIDIA Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -16,61 +16,64 @@ import operator import warnings -from functools import reduce, wraps -from inspect import signature -from typing import ( - TYPE_CHECKING, - Any, - Callable, - Optional, - Sequence, - TypeVar, - Union, - cast, -) +from functools import reduce +from typing import TYPE_CHECKING, Any, Sequence, cast +import legate.core.types as ty import numpy as np -from legate.core import Array, Field +from legate.core import Field, LogicalArray, Scalar from legate.core.utils import OrderedSet -from numpy.core.multiarray import ( # type: ignore [attr-defined] - normalize_axis_index, -) -from numpy.core.numeric import ( # type: ignore [attr-defined] - normalize_axis_tuple, -) -from typing_extensions import ParamSpec -from .config import ( - BinaryOpCode, - ConvertCode, +from .. import _ufunc +from .._utils import is_np2 +from .._utils.array import ( + calculate_volume, + max_identity, + min_identity, + to_core_type, +) +from .._utils.coverage import FALLBACK_WARNING, clone_class, is_implemented +from .._utils.linalg import dot_modes +from .._utils.structure import deep_apply +from ..config import ( FFTDirection, FFTNormalization, FFTType, ScanCode, + TransferType, UnaryOpCode, UnaryRedCode, ) -from .coverage import FALLBACK_WARNING, clone_class, is_implemented -from .runtime import runtime -from .types import NdShape -from .utils import ( - calculate_volume, - deep_apply, - dot_modes, - to_core_dtype, +from ..runtime import runtime +from ..types import NdShape +from .flags import flagsobj +from .thunk import perform_scan, perform_unary_op, perform_unary_reduction +from .util import ( + add_boilerplate, + broadcast_where, + check_writeable, + convert_to_cupynumeric_ndarray, + maybe_convert_to_np_ndarray, + sanitize_shape, tuple_pop, ) +if is_np2: + from numpy.lib.array_utils import normalize_axis_index # type: ignore + from numpy.lib.array_utils import normalize_axis_tuple # type: ignore +else: + from numpy.core.multiarray import normalize_axis_index # type: ignore + from numpy.core.numeric import normalize_axis_tuple # type: ignore + if TYPE_CHECKING: from pathlib import Path import numpy.typing as npt - from .thunk import NumPyThunk - from .types import ( + from .._thunk.thunk import NumPyThunk + from ..types import ( BoundsMode, CastingKind, - NdShapeLike, OrderType, SelectKind, SortSide, @@ -79,187 +82,6 @@ from math import prod -R = TypeVar("R") -P = ParamSpec("P") - - -def add_boilerplate( - *array_params: str, -) -> Callable[[Callable[P, R]], Callable[P, R]]: - """ - Adds required boilerplate to the wrapped cunumeric.ndarray or module-level - function. - - Every time the wrapped function is called, this wrapper will: - * Convert all specified array-like parameters, plus the special "out" - parameter (if present), to cuNumeric ndarrays. - * Convert the special "where" parameter (if present) to a valid predicate. - """ - keys = OrderedSet(array_params) - assert len(keys) == len(array_params) - - def decorator(func: Callable[P, R]) -> Callable[P, R]: - assert not hasattr( - func, "__wrapped__" - ), "this decorator must be the innermost" - - # For each parameter specified by name, also consider the case where - # it's passed as a positional parameter. - indices: OrderedSet[int] = OrderedSet() - where_idx: Optional[int] = None - out_idx: Optional[int] = None - params = signature(func).parameters - extra = keys - OrderedSet(params) - assert len(extra) == 0, f"unknown parameter(s): {extra}" - for idx, param in enumerate(params): - if param == "where": - where_idx = idx - elif param == "out": - out_idx = idx - elif param in keys: - indices.add(idx) - - @wraps(func) - def wrapper(*args: Any, **kwargs: Any) -> R: - assert (where_idx is None or len(args) <= where_idx) and ( - out_idx is None or len(args) <= out_idx - ), "'where' and 'out' should be passed as keyword arguments" - - # Convert relevant arguments to cuNumeric ndarrays - args = tuple( - convert_to_cunumeric_ndarray(arg) - if idx in indices and arg is not None - else arg - for (idx, arg) in enumerate(args) - ) - for k, v in kwargs.items(): - if v is None: - continue - elif k == "out": - kwargs[k] = convert_to_cunumeric_ndarray(v, share=True) - if not kwargs[k].flags.writeable: - raise ValueError("out is not writeable") - elif (k in keys) or (k == "where"): - kwargs[k] = convert_to_cunumeric_ndarray(v) - - return func(*args, **kwargs) - - return wrapper - - return decorator - - -def convert_to_cunumeric_ndarray(obj: Any, share: bool = False) -> ndarray: - # If this is an instance of one of our ndarrays then we're done - if isinstance(obj, ndarray): - return obj - # Ask the runtime to make a numpy thunk for this object - thunk = runtime.get_numpy_thunk(obj, share=share) - writeable = ( - obj.flags.writeable if isinstance(obj, np.ndarray) and share else True - ) - return ndarray(shape=None, thunk=thunk, writeable=writeable) - - -def maybe_convert_to_np_ndarray(obj: Any) -> Any: - """ - Converts cuNumeric arrays into NumPy arrays, otherwise has no effect. - """ - from .ma import MaskedArray - - if isinstance(obj, (ndarray, MaskedArray)): - return obj.__array__() - return obj - - -def check_writeable(arr: Union[ndarray, tuple[ndarray, ...], None]) -> None: - """ - Check if the current array is writeable - This check needs to be manually inserted - with consideration on the behavior of the corresponding method - """ - if arr is None: - return - check_list = (arr,) if not isinstance(arr, tuple) else arr - if any(not arr.flags.writeable for arr in check_list): - raise ValueError("array is not writeable") - - -def broadcast_where( - where: Union[ndarray, None], shape: NdShape -) -> Union[ndarray, None]: - if where is not None and where.shape != shape: - from .module import broadcast_to - - where = broadcast_to(where, shape) - return where - - -class flagsobj: - """ - Information about the memory layout of the array. - - These flags don't reflect the properties of the cuNumeric array, but - rather the NumPy array that will be produced if the cuNumeric array is - materialized on a single node. - """ - - def __init__(self, array: ndarray) -> None: - # prevent infinite __setattr__ recursion - object.__setattr__(self, "_array", array) - - def __repr__(self) -> str: - return f"""\ - C_CONTIGUOUS : {self["C"]} - F_CONTIGUOUS : {self["F"]} - OWNDATA : {self["O"]} - WRITEABLE : {self["W"]} - ALIGNED : {self["A"]} - WRITEBACKIFCOPY : {self["X"]} -""" - - def __eq__(self, other: Any) -> bool: - flags = ("C", "F", "O", "W", "A", "X") - if not isinstance(other, (flagsobj, np.core.multiarray.flagsobj)): - return False - - return all(self[f] == other[f] for f in flags) # type: ignore [index] - - def __getattr__(self, name: str) -> Any: - if name == "writeable": - return self._array._writeable - flags = self._array.__array__().flags - return getattr(flags, name) - - def __setattr__(self, name: str, value: Any) -> None: - if name == "writeable": - self._check_writeable(value) - self._array._writeable = bool(value) - else: - flags = self._array.__array__().flags - setattr(flags, name, value) - - def __getitem__(self, key: Any) -> bool: - if key == "W": - return self._array._writeable - flags = self._array.__array__().flags - return flags[key] - - def __setitem__(self, key: str, value: Any) -> None: - if key == "W": - self._check_writeable(value) - self._array._writeable = bool(value) - else: - flags = self._array.__array__().flags - flags[key] = value - - def _check_writeable(self, value: Any) -> None: - if value and not self._array._writeable: - raise ValueError( - "non-writeable cunumeric arrays cannot be made writeable" - ) - - NDARRAY_INTERNAL = { "__array_finalize__", "__array_function__", @@ -278,19 +100,19 @@ def __init__( self, shape: Any, dtype: npt.DTypeLike = np.float64, - buffer: Union[Any, None] = None, + buffer: Any | None = None, offset: int = 0, - strides: Union[tuple[int], None] = None, - order: Union[OrderType, None] = None, - thunk: Union[NumPyThunk, None] = None, - inputs: Union[Any, None] = None, + strides: tuple[int] | None = None, + order: OrderType | None = None, + thunk: NumPyThunk | None = None, + inputs: Any | None = None, writeable: bool = True, ) -> None: - # `inputs` being a cuNumeric ndarray is definitely a bug + # `inputs` being a cuPyNumeric ndarray is definitely a bug assert not isinstance(inputs, ndarray) if thunk is None: assert shape is not None - sanitized_shape = self._sanitize_shape(shape) + sanitized_shape = sanitize_shape(shape) if not isinstance(dtype, np.dtype): dtype = np.dtype(dtype) if buffer is not None: @@ -304,7 +126,7 @@ def __init__( order=order, ) self._thunk = runtime.find_or_create_array_thunk( - np_array, share=False + np_array, TransferType.SHARE ) else: # Filter the inputs if necessary @@ -314,57 +136,32 @@ def __init__( for inp in inputs if isinstance(inp, ndarray) ] - core_dtype = to_core_dtype(dtype) + core_dtype = to_core_type(dtype) self._thunk = runtime.create_empty_thunk( sanitized_shape, core_dtype, inputs ) else: self._thunk = thunk - self._legate_data: Union[dict[str, Any], None] = None + self._legate_data: dict[str, Any] | None = None self._writeable = writeable - @staticmethod - def _sanitize_shape( - shape: Union[NdShapeLike, Sequence[Any], npt.NDArray[Any], ndarray] - ) -> NdShape: - seq: tuple[Any, ...] - if isinstance(shape, (ndarray, np.ndarray)): - if shape.ndim == 0: - seq = (shape.__array__().item(),) - else: - seq = tuple(shape.__array__()) - elif np.isscalar(shape): - seq = (shape,) - else: - seq = tuple(cast(NdShape, shape)) - try: - # Unfortunately, we can't do this check using - # 'isinstance(value, int)', as the values in a NumPy ndarray - # don't satisfy the predicate (they have numpy value types, - # such as numpy.int64). - result = tuple(operator.index(value) for value in seq) - except TypeError: - raise TypeError( - "expected a sequence of integers or a single integer, " - f"got {shape!r}" - ) - return result - # Support for the Legate data interface @property def __legate_data_interface__(self) -> dict[str, Any]: if self._legate_data is None: # If the thunk is an eager array, we need to convert it to a # deferred array so we can extract a legate store - deferred_thunk = runtime.to_deferred_array(self._thunk) + deferred_thunk = runtime.to_deferred_array( + self._thunk, read_only=False + ) # We don't have nullable data for the moment # until we support masked arrays dtype = deferred_thunk.base.type - array = Array(dtype, [None, deferred_thunk.base]) + array = LogicalArray.from_store(deferred_thunk.base) self._legate_data = dict() self._legate_data["version"] = 1 - field = Field("cuNumeric Array", dtype) + field = Field("cuPyNumeric Array", dtype) self._legate_data["data"] = {field: array} return self._legate_data @@ -389,7 +186,7 @@ def __legate_data_interface__(self) -> dict[str, Any]: def __array_function__( self, func: Any, types: Any, args: tuple[Any], kwargs: dict[str, Any] ) -> Any: - import cunumeric as cn + import cupynumeric as cn what = func.__name__ @@ -400,19 +197,19 @@ def __array_function__( return NotImplemented # We are wrapping all NumPy modules, so we can expect to find every - # NumPy API call in cuNumeric, even if just an "unimplemented" stub. + # NumPy API call in cuPyNumeric, even if just an "unimplemented" stub. module = reduce(getattr, func.__module__.split(".")[1:], cn) cn_func = getattr(module, func.__name__) - # We can't immediately forward to the corresponding cuNumeric + # We can't immediately forward to the corresponding cuPyNumeric # entrypoint. Say that we reached this point because the user code - # invoked `np.foo(x, bar=True)` where `x` is a `cunumeric.ndarray`. If - # our implementation of `foo` is not complete, and cannot handle + # invoked `np.foo(x, bar=True)` where `x` is a `cupynumeric.ndarray`. + # If our implementation of `foo` is not complete, and cannot handle # `bar=True`, then forwarding this call to `cn.foo` would fail. This # goes against the semantics of `__array_function__`, which shouldn't # fail if the custom implementation cannot handle the provided # arguments. Conversely, if the user calls `cn.foo(x, bar=True)` - # directly, that means they requested the cuNumeric implementation + # directly, that means they requested the cuPyNumeric implementation # specifically, and the `NotImplementedError` should not be hidden. if is_implemented(cn_func): try: @@ -435,7 +232,7 @@ def __array_function__( def __array_ufunc__( self, ufunc: Any, method: str, *inputs: Any, **kwargs: Any ) -> Any: - from . import _ufunc + from .. import _ufunc # Check whether we should handle the arguments array_args = inputs @@ -488,30 +285,31 @@ def T(self) -> ndarray: See Also -------- - cunumeric.transpose + cupynumeric.transpose ndarray.transpose """ return self.transpose() @property - def base(self) -> Union[npt.NDArray[Any], None]: + def base(self) -> npt.NDArray[Any] | None: """ - Returns dtype for the base element of the subarrays, - regardless of their dimension or shape. - - See Also - -------- - numpy.dtype.subdtype - + Base object if memory is from some other object. """ - return self.__array__().base + raise NotImplementedError( + "cupynumeric.ndarray doesn't keep track of the array view " + "hierarchy yet" + ) @property def data(self) -> memoryview: """ Python buffer object pointing to the start of the array's data. + Notes + ----- + This causes the entire (potentially distributed) array to be collected + into one memory. """ return self.__array__().data @@ -534,9 +332,9 @@ def flags(self) -> Any: """ Information about the memory layout of the array. - These flags don't reflect the properties of the cuNumeric array, but - rather the NumPy array that will be produced if the cuNumeric array is - materialized on a single node. + These flags don't reflect the properties of the cuPyNumeric array, but + rather the NumPy array that will be produced if the cuPyNumeric array + is materialized on a single node. Attributes ---------- @@ -618,7 +416,7 @@ def flat(self) -> np.flatiter[npt.NDArray[Any]]: flatten : Return a copy of the array collapsed into one dimension. Availability - -------- + ------------ Single CPU """ @@ -794,9 +592,7 @@ def __abs__(self) -> ndarray: """ # Handle the nice case of it being unsigned - from ._ufunc import absolute - - return absolute(self) + return _ufunc.absolute(self) def __add__(self, rhs: Any) -> ndarray: """a.__add__(value, /) @@ -808,9 +604,7 @@ def __add__(self, rhs: Any) -> ndarray: Multiple GPUs, Multiple CPUs """ - from ._ufunc import add - - return add(self, rhs) + return _ufunc.add(self, rhs) def __and__(self, rhs: Any) -> ndarray: """a.__and__(value, /) @@ -822,12 +616,10 @@ def __and__(self, rhs: Any) -> ndarray: Multiple GPUs, Multiple CPUs """ - from ._ufunc import bitwise_and - - return bitwise_and(self, rhs) + return _ufunc.bitwise_and(self, rhs) def __array__( - self, dtype: Union[np.dtype[Any], None] = None + self, dtype: np.dtype[Any] | None = None ) -> npt.NDArray[Any]: """a.__array__([dtype], /) @@ -877,12 +669,13 @@ def __contains__(self, item: Any) -> ndarray: args = (np.array(item, dtype=self.dtype),) if args[0].size != 1: raise ValueError("contains needs scalar item") - return self._perform_unary_reduction( + core_dtype = to_core_type(self.dtype) + return perform_unary_reduction( UnaryRedCode.CONTAINS, self, axis=None, res_dtype=bool, - args=args, + args=(Scalar(args[0].squeeze()[()], core_dtype),), ) def __copy__(self) -> ndarray: @@ -902,7 +695,7 @@ def __copy__(self) -> ndarray: result._thunk.copy(self._thunk, deep=False) return result - def __deepcopy__(self, memo: Union[Any, None] = None) -> ndarray: + def __deepcopy__(self, memo: Any | None = None) -> ndarray: """a.__deepcopy__(memo, /) Deep copy of array. @@ -941,7 +734,7 @@ def __divmod__(self, rhs: Any) -> ndarray: """ raise NotImplementedError( - "cunumeric.ndarray doesn't support __divmod__ yet" + "cupynumeric.ndarray doesn't support __divmod__ yet" ) def __eq__(self, rhs: object) -> ndarray: # type: ignore [override] @@ -954,9 +747,7 @@ def __eq__(self, rhs: object) -> ndarray: # type: ignore [override] Multiple GPUs, Multiple CPUs """ - from ._ufunc import equal - - return equal(self, rhs) + return _ufunc.equal(self, rhs) def __float__(self) -> float: """a.__float__(/) @@ -976,9 +767,7 @@ def __floordiv__(self, rhs: Any) -> ndarray: Multiple GPUs, Multiple CPUs """ - from ._ufunc import floor_divide - - return floor_divide(self, rhs) + return _ufunc.floor_divide(self, rhs) def __format__(self, *args: Any, **kwargs: Any) -> str: return self.__array__().__format__(*args, **kwargs) @@ -993,14 +782,12 @@ def __ge__(self, rhs: Any) -> ndarray: Multiple GPUs, Multiple CPUs """ - from ._ufunc import greater_equal - - return greater_equal(self, rhs) + return _ufunc.greater_equal(self, rhs) # __getattribute__ def _convert_key(self, key: Any, first: bool = True) -> Any: - # Convert any arrays stored in a key to a cuNumeric array + # Convert any arrays stored in a key to a cuPyNumeric array if isinstance(key, slice): key = slice( operator.index(key.start) if key.start is not None else None, @@ -1017,9 +804,9 @@ def _convert_key(self, key: Any, first: bool = True) -> Any: elif isinstance(key, tuple) and first: return tuple(self._convert_key(k, first=False) for k in key) else: - # Otherwise convert it to a cuNumeric array, check types + # Otherwise convert it to a cuPyNumeric array, check types # and get the thunk - key = convert_to_cunumeric_ndarray(key) + key = convert_to_cupynumeric_ndarray(key) if key.dtype != bool and not np.issubdtype(key.dtype, np.integer): raise TypeError("index arrays should be int or bool type") if key.dtype != bool: @@ -1047,12 +834,10 @@ def __gt__(self, rhs: Any) -> ndarray: Multiple GPUs, Multiple CPUs """ - from ._ufunc import greater - - return greater(self, rhs) + return _ufunc.greater(self, rhs) def __hash__(self) -> int: - raise TypeError("unhashable type: cunumeric.ndarray") + raise TypeError("unhashable type: cupynumeric.ndarray") def __iadd__(self, rhs: Any) -> ndarray: """a.__iadd__(value, /) @@ -1064,9 +849,7 @@ def __iadd__(self, rhs: Any) -> ndarray: Multiple GPUs, Multiple CPUs """ - from ._ufunc import add - - return add(self, rhs, out=self) + return _ufunc.add(self, rhs, out=self) def __iand__(self, rhs: Any) -> ndarray: """a.__iand__(value, /) @@ -1078,9 +861,7 @@ def __iand__(self, rhs: Any) -> ndarray: Multiple GPUs, Multiple CPUs """ - from ._ufunc import bitwise_and - - return bitwise_and(self, rhs, out=self) + return _ufunc.bitwise_and(self, rhs, out=self) def __idiv__(self, rhs: Any) -> ndarray: """a.__idiv__(value, /) @@ -1104,9 +885,7 @@ def __ifloordiv__(self, rhs: Any) -> ndarray: Multiple GPUs, Multiple CPUs """ - from ._ufunc import floor_divide - - return floor_divide(self, rhs, out=self) + return _ufunc.floor_divide(self, rhs, out=self) def __ilshift__(self, rhs: Any) -> ndarray: """a.__ilshift__(value, /) @@ -1118,9 +897,21 @@ def __ilshift__(self, rhs: Any) -> ndarray: Multiple GPUs, Multiple CPUs """ - from ._ufunc import left_shift + return _ufunc.left_shift(self, rhs, out=self) + + def __imatmul__(self, rhs: Any) -> ndarray: + """a.__imatmul__(value, /) + + Return ``self@=value``. + + Availability + -------- + Multiple GPUs, Multiple CPUs + + """ + from .._module.linalg_mvp import matmul - return left_shift(self, rhs, out=self) + return matmul(self, rhs, out=self) def __imod__(self, rhs: Any) -> ndarray: """a.__imod__(value, /) @@ -1132,9 +923,7 @@ def __imod__(self, rhs: Any) -> ndarray: Multiple GPUs, Multiple CPUs """ - from ._ufunc import remainder - - return remainder(self, rhs, out=self) + return _ufunc.remainder(self, rhs, out=self) def __imul__(self, rhs: Any) -> ndarray: """a.__imul__(value, /) @@ -1146,9 +935,7 @@ def __imul__(self, rhs: Any) -> ndarray: Multiple GPUs, Multiple CPUs """ - from ._ufunc import multiply - - return multiply(self, rhs, out=self) + return _ufunc.multiply(self, rhs, out=self) def __index__(self) -> int: return self.__array__().__index__() @@ -1171,15 +958,11 @@ def __invert__(self) -> ndarray: Multiple GPUs, Multiple CPUs """ - if self.dtype == np.bool_: + if self.dtype == bool: # Boolean values are special, just do logical NOT - from ._ufunc import logical_not - - return logical_not(self) + return _ufunc.logical_not(self) else: - from ._ufunc import invert - - return invert(self) + return _ufunc.invert(self) def __ior__(self, rhs: Any) -> ndarray: """a.__ior__(/) @@ -1191,9 +974,7 @@ def __ior__(self, rhs: Any) -> ndarray: Multiple GPUs, Multiple CPUs """ - from ._ufunc import bitwise_or - - return bitwise_or(self, rhs, out=self) + return _ufunc.bitwise_or(self, rhs, out=self) def __ipow__(self, rhs: float) -> ndarray: """a.__ipow__(/) @@ -1205,9 +986,7 @@ def __ipow__(self, rhs: float) -> ndarray: Multiple GPUs, Multiple CPUs """ - from ._ufunc import power - - return power(self, rhs, out=self) + return _ufunc.power(self, rhs, out=self) def __irshift__(self, rhs: Any) -> ndarray: """a.__irshift__(/) @@ -1219,9 +998,7 @@ def __irshift__(self, rhs: Any) -> ndarray: Multiple GPUs, Multiple CPUs """ - from ._ufunc import right_shift - - return right_shift(self, rhs, out=self) + return _ufunc.right_shift(self, rhs, out=self) def __iter__(self) -> Any: """a.__iter__(/)""" @@ -1237,9 +1014,7 @@ def __isub__(self, rhs: Any) -> ndarray: Multiple GPUs, Multiple CPUs """ - from ._ufunc import subtract - - return subtract(self, rhs, out=self) + return _ufunc.subtract(self, rhs, out=self) def __itruediv__(self, rhs: Any) -> ndarray: """a.__itruediv__(/) @@ -1251,9 +1026,7 @@ def __itruediv__(self, rhs: Any) -> ndarray: Multiple GPUs, Multiple CPUs """ - from ._ufunc import true_divide - - return true_divide(self, rhs, out=self) + return _ufunc.true_divide(self, rhs, out=self) def __ixor__(self, rhs: Any) -> ndarray: """a.__ixor__(/) @@ -1265,9 +1038,7 @@ def __ixor__(self, rhs: Any) -> ndarray: Multiple GPUs, Multiple CPUs """ - from ._ufunc import bitwise_xor - - return bitwise_xor(self, rhs, out=self) + return _ufunc.bitwise_xor(self, rhs, out=self) def __le__(self, rhs: Any) -> ndarray: """a.__le__(value, /) @@ -1279,9 +1050,7 @@ def __le__(self, rhs: Any) -> ndarray: Multiple GPUs, Multiple CPUs """ - from ._ufunc import less_equal - - return less_equal(self, rhs) + return _ufunc.less_equal(self, rhs) def __len__(self) -> int: """a.__len__(/) @@ -1301,9 +1070,7 @@ def __lshift__(self, rhs: Any) -> ndarray: Multiple GPUs, Multiple CPUs """ - from ._ufunc import left_shift - - return left_shift(self, rhs) + return _ufunc.left_shift(self, rhs) def __lt__(self, rhs: Any) -> ndarray: """a.__lt__(value, /) @@ -1315,9 +1082,7 @@ def __lt__(self, rhs: Any) -> ndarray: Multiple GPUs, Multiple CPUs """ - from ._ufunc import less - - return less(self, rhs) + return _ufunc.less(self, rhs) def __matmul__(self, value: Any) -> ndarray: """a.__matmul__(value, /) @@ -1329,7 +1094,9 @@ def __matmul__(self, value: Any) -> ndarray: Multiple GPUs, Multiple CPUs """ - return self.dot(value) + from .._module.linalg_mvp import matmul + + return matmul(self, value) def __mod__(self, rhs: Any) -> ndarray: """a.__mod__(value, /) @@ -1341,9 +1108,7 @@ def __mod__(self, rhs: Any) -> ndarray: Multiple GPUs, Multiple CPUs """ - from ._ufunc import remainder - - return remainder(self, rhs) + return _ufunc.remainder(self, rhs) def __mul__(self, rhs: Any) -> ndarray: """a.__mul__(value, /) @@ -1355,9 +1120,7 @@ def __mul__(self, rhs: Any) -> ndarray: Multiple GPUs, Multiple CPUs """ - from ._ufunc import multiply - - return multiply(self, rhs) + return _ufunc.multiply(self, rhs) def __ne__(self, rhs: object) -> ndarray: # type: ignore [override] """a.__ne__(value, /) @@ -1369,9 +1132,7 @@ def __ne__(self, rhs: object) -> ndarray: # type: ignore [override] Multiple GPUs, Multiple CPUs """ - from ._ufunc import not_equal - - return not_equal(self, rhs) + return _ufunc.not_equal(self, rhs) def __neg__(self) -> ndarray: """a.__neg__(value, /) @@ -1383,9 +1144,7 @@ def __neg__(self) -> ndarray: Multiple GPUs, Multiple CPUs """ - from ._ufunc import negative - - return negative(self) + return _ufunc.negative(self) # __new__ @@ -1395,11 +1154,11 @@ def nonzero(self) -> tuple[ndarray, ...]: Return the indices of the elements that are non-zero. - Refer to :func:`cunumeric.nonzero` for full documentation. + Refer to :func:`cupynumeric.nonzero` for full documentation. See Also -------- - cunumeric.nonzero : equivalent function + cupynumeric.nonzero : equivalent function Availability -------- @@ -1421,9 +1180,7 @@ def __or__(self, rhs: Any) -> ndarray: Multiple GPUs, Multiple CPUs """ - from ._ufunc import bitwise_or - - return bitwise_or(self, rhs) + return _ufunc.bitwise_or(self, rhs) def __pos__(self) -> ndarray: """a.__pos__(value, /) @@ -1436,9 +1193,7 @@ def __pos__(self) -> ndarray: """ # the positive opeartor is equivalent to copy - from ._ufunc import positive - - return positive(self) + return _ufunc.positive(self) def __pow__(self, rhs: float) -> ndarray: """a.__pow__(value, /) @@ -1450,9 +1205,7 @@ def __pow__(self, rhs: float) -> ndarray: Multiple GPUs, Multiple CPUs """ - from ._ufunc import power - - return power(self, rhs) + return _ufunc.power(self, rhs) def __radd__(self, lhs: Any) -> ndarray: """a.__radd__(value, /) @@ -1464,9 +1217,7 @@ def __radd__(self, lhs: Any) -> ndarray: Multiple GPUs, Multiple CPUs """ - from ._ufunc import add - - return add(lhs, self) + return _ufunc.add(lhs, self) def __rand__(self, lhs: Any) -> ndarray: """a.__rand__(value, /) @@ -1478,9 +1229,7 @@ def __rand__(self, lhs: Any) -> ndarray: Multiple GPUs, Multiple CPUs """ - from ._ufunc import bitwise_and - - return bitwise_and(lhs, self) + return _ufunc.bitwise_and(lhs, self) def __rdiv__(self, lhs: Any) -> ndarray: """a.__rdiv__(value, /) @@ -1492,9 +1241,7 @@ def __rdiv__(self, lhs: Any) -> ndarray: Multiple GPUs, Multiple CPUs """ - from ._ufunc import true_divide - - return true_divide(lhs, self) + return _ufunc.true_divide(lhs, self) def __rdivmod__(self, lhs: Any) -> ndarray: """a.__rdivmod__(value, /) @@ -1507,12 +1254,10 @@ def __rdivmod__(self, lhs: Any) -> ndarray: """ raise NotImplementedError( - "cunumeric.ndarray doesn't support __rdivmod__ yet" + "cupynumeric.ndarray doesn't support __rdivmod__ yet" ) - def __reduce__( - self, *args: Any, **kwargs: Any - ) -> Union[str, tuple[str, ...]]: + def __reduce__(self, *args: Any, **kwargs: Any) -> str | tuple[str, ...]: """a.__reduce__(/) For pickling. @@ -1522,7 +1267,7 @@ def __reduce__( def __reduce_ex__( self, *args: Any, **kwargs: Any - ) -> Union[str, tuple[str, ...]]: + ) -> str | tuple[str, ...]: return self.__array__().__reduce_ex__(*args, **kwargs) def __repr__(self) -> str: @@ -1547,9 +1292,21 @@ def __rfloordiv__(self, lhs: Any) -> ndarray: Multiple GPUs, Multiple CPUs """ - from ._ufunc import floor_divide + return _ufunc.floor_divide(lhs, self) + + def __rmatmul__(self, lhs: Any) -> ndarray: + """a.__rmatmul__(value, /) + + Return ``value@self``. + + Availability + -------- + Multiple GPUs, Multiple CPUs + + """ + from .._module.linalg_mvp import matmul - return floor_divide(lhs, self) + return matmul(lhs, self) def __rmod__(self, lhs: Any) -> ndarray: """a.__rmod__(value, /) @@ -1561,9 +1318,7 @@ def __rmod__(self, lhs: Any) -> ndarray: Multiple GPUs, Multiple CPUs """ - from ._ufunc import remainder - - return remainder(lhs, self) + return _ufunc.remainder(lhs, self) def __rmul__(self, lhs: Any) -> ndarray: """a.__rmul__(value, /) @@ -1575,9 +1330,7 @@ def __rmul__(self, lhs: Any) -> ndarray: Multiple GPUs, Multiple CPUs """ - from ._ufunc import multiply - - return multiply(lhs, self) + return _ufunc.multiply(lhs, self) def __ror__(self, lhs: Any) -> ndarray: """a.__ror__(value, /) @@ -1589,9 +1342,7 @@ def __ror__(self, lhs: Any) -> ndarray: Multiple GPUs, Multiple CPUs """ - from ._ufunc import bitwise_or - - return bitwise_or(lhs, self) + return _ufunc.bitwise_or(lhs, self) def __rpow__(self, lhs: Any) -> ndarray: """__rpow__(value, /) @@ -1603,9 +1354,7 @@ def __rpow__(self, lhs: Any) -> ndarray: Multiple GPUs, Multiple CPUs """ - from ._ufunc import power - - return power(lhs, self) + return _ufunc.power(lhs, self) def __rshift__(self, rhs: Any) -> ndarray: """a.__rshift__(value, /) @@ -1617,9 +1366,7 @@ def __rshift__(self, rhs: Any) -> ndarray: Multiple GPUs, Multiple CPUs """ - from ._ufunc import right_shift - - return right_shift(self, rhs) + return _ufunc.right_shift(self, rhs) def __rsub__(self, lhs: Any) -> ndarray: """a.__rsub__(value, /) @@ -1631,9 +1378,7 @@ def __rsub__(self, lhs: Any) -> ndarray: Multiple GPUs, Multiple CPUs """ - from ._ufunc import subtract - - return subtract(lhs, self) + return _ufunc.subtract(lhs, self) def __rtruediv__(self, lhs: Any) -> ndarray: """a.__rtruediv__(value, /) @@ -1645,9 +1390,7 @@ def __rtruediv__(self, lhs: Any) -> ndarray: Multiple GPUs, Multiple CPUs """ - from ._ufunc import true_divide - - return true_divide(lhs, self) + return _ufunc.true_divide(lhs, self) def __rxor__(self, lhs: Any) -> ndarray: """a.__rxor__(value, /) @@ -1659,9 +1402,7 @@ def __rxor__(self, lhs: Any) -> ndarray: Multiple GPUs, Multiple CPUs """ - from ._ufunc import bitwise_xor - - return bitwise_xor(lhs, self) + return _ufunc.bitwise_xor(lhs, self) # __setattr__ @add_boilerplate("value") @@ -1713,9 +1454,7 @@ def __sub__(self, rhs: Any) -> ndarray: Multiple GPUs, Multiple CPUs """ - from ._ufunc import subtract - - return subtract(self, rhs) + return _ufunc.subtract(self, rhs) def __str__(self) -> str: """a.__str__(/) @@ -1739,9 +1478,7 @@ def __truediv__(self, rhs: Any) -> ndarray: Multiple GPUs, Multiple CPUs """ - from ._ufunc import true_divide - - return true_divide(self, rhs) + return _ufunc.true_divide(self, rhs) def __xor__(self, rhs: Any) -> ndarray: """a.__xor__(value, /) @@ -1753,35 +1490,33 @@ def __xor__(self, rhs: Any) -> ndarray: Multiple GPUs, Multiple CPUs """ - from ._ufunc import bitwise_xor - - return bitwise_xor(self, rhs) + return _ufunc.bitwise_xor(self, rhs) @add_boilerplate() def all( self, axis: Any = None, - out: Union[ndarray, None] = None, + out: ndarray | None = None, keepdims: bool = False, - initial: Union[int, float, None] = None, - where: Union[ndarray, None] = None, + initial: int | float | None = None, + where: ndarray | None = None, ) -> ndarray: """a.all(axis=None, out=None, keepdims=False, initial=None, where=True) Returns True if all elements evaluate to True. - Refer to :func:`cunumeric.all` for full documentation. + Refer to :func:`cupynumeric.all` for full documentation. See Also -------- - cunumeric.all : equivalent function + cupynumeric.all : equivalent function Availability -------- Multiple GPUs, Multiple CPUs """ - return self._perform_unary_reduction( + return perform_unary_reduction( UnaryRedCode.ALL, self, axis=axis, @@ -1796,27 +1531,27 @@ def all( def any( self, axis: Any = None, - out: Union[ndarray, None] = None, + out: ndarray | None = None, keepdims: bool = False, - initial: Union[int, float, None] = None, - where: Union[ndarray, None] = None, + initial: int | float | None = None, + where: ndarray | None = None, ) -> ndarray: """a.any(axis=None, out=None, keepdims=False, initial=None, where=True) Returns True if any of the elements of `a` evaluate to True. - Refer to :func:`cunumeric.any` for full documentation. + Refer to :func:`cupynumeric.any` for full documentation. See Also -------- - cunumeric.any : equivalent function + cupynumeric.any : equivalent function Availability -------- Multiple GPUs, Multiple CPUs """ - return self._perform_unary_reduction( + return perform_unary_reduction( UnaryRedCode.ANY, self, axis=axis, @@ -1831,18 +1566,18 @@ def any( def argmax( self, axis: Any = None, - out: Union[ndarray, None] = None, + out: ndarray | None = None, keepdims: bool = False, ) -> ndarray: """a.argmax(axis=None, out=None) Return indices of the maximum values along the given axis. - Refer to :func:`cunumeric.argmax` for full documentation. + Refer to :func:`cupynumeric.argmax` for full documentation. See Also -------- - cunumeric.argmax : equivalent function + cupynumeric.argmax : equivalent function Availability -------- @@ -1853,7 +1588,7 @@ def argmax( raise ValueError("output array must have int64 dtype") if axis is not None and not isinstance(axis, int): raise ValueError("axis must be an integer") - return self._perform_unary_reduction( + return perform_unary_reduction( UnaryRedCode.ARGMAX, self, axis=axis, @@ -1866,18 +1601,18 @@ def argmax( def argmin( self, axis: Any = None, - out: Union[ndarray, None] = None, + out: ndarray | None = None, keepdims: bool = False, ) -> ndarray: """a.argmin(axis=None, out=None) Return indices of the minimum values along the given axis. - Refer to :func:`cunumeric.argmin` for detailed documentation. + Refer to :func:`cupynumeric.argmin` for detailed documentation. See Also -------- - cunumeric.argmin : equivalent function + cupynumeric.argmin : equivalent function Availability -------- @@ -1888,7 +1623,7 @@ def argmin( raise ValueError("output array must have int64 dtype") if axis is not None and not isinstance(axis, int): raise ValueError("axis must be an integer") - return self._perform_unary_reduction( + return perform_unary_reduction( UnaryRedCode.ARGMIN, self, axis=axis, @@ -1903,7 +1638,7 @@ def astype( order: OrderType = "C", casting: CastingKind = "unsafe", subok: bool = True, - copy: bool = True, + copy: bool = False, ) -> ndarray: """a.astype(dtype, order='C', casting='unsafe', subok=True, copy=True) @@ -1939,10 +1674,14 @@ def astype( array. copy : bool, optional - By default, astype always returns a newly allocated array. If this - is set to false, and the `dtype`, `order`, and `subok` - requirements are satisfied, the input array is returned instead - of a copy. + By default, astype does not returns a newly allocated array. If + this is set to True, a copy is made and returned, instead of the + input array. + + Notes + ----- + The default value for the ``copy`` argument is the opposite of Numpy. + Avoiding copies reduces memory pressure. Returns ------- @@ -1995,18 +1734,18 @@ def take( self, indices: Any, axis: Any = None, - out: Union[ndarray, None] = None, + out: ndarray | None = None, mode: BoundsMode = "raise", ) -> ndarray: """a.take(indices, axis=None, out=None, mode="raise") Take elements from an array along an axis. - Refer to :func:`cunumeric.take` for full documentation. + Refer to :func:`cupynumeric.take` for full documentation. See Also -------- - cunumeric.take : equivalent function + cupynumeric.take : equivalent function Availability -------- @@ -2016,7 +1755,7 @@ def take( if not np.isscalar(indices): # if indices is a tuple or list, bring sub-tuples to the same shape # and concatenate them - indices = convert_to_cunumeric_ndarray(indices) + indices = convert_to_cupynumeric_ndarray(indices) if axis is None: self = self.ravel() @@ -2075,18 +1814,18 @@ def take( def choose( self, choices: Any, - out: Union[ndarray, None] = None, + out: ndarray | None = None, mode: BoundsMode = "raise", ) -> ndarray: """a.choose(choices, out=None, mode='raise') Use an index array to construct a new array from a set of choices. - Refer to :func:`cunumeric.choose` for full documentation. + Refer to :func:`cupynumeric.choose` for full documentation. See Also -------- - cunumeric.choose : equivalent function + cupynumeric.choose : equivalent function Availability -------- @@ -2104,12 +1843,12 @@ def choose( dtypes = [ch.dtype for ch in choices] ch_dtype = np.result_type(*dtypes) choices = tuple( - convert_to_cunumeric_ndarray(choices[i]).astype(ch_dtype) + convert_to_cupynumeric_ndarray(choices[i]).astype(ch_dtype) for i in range(n) ) else: - choices = convert_to_cunumeric_ndarray(choices) + choices = convert_to_cupynumeric_ndarray(choices) n = choices.shape[0] ch_dtype = choices.dtype choices = tuple(choices[i, ...] for i in range(n)) @@ -2177,17 +1916,17 @@ def compress( self, condition: ndarray, axis: Any = None, - out: Union[ndarray, None] = None, + out: ndarray | None = None, ) -> ndarray: """a.compress(self, condition, axis=None, out=None) Return selected slices of an array along given axis. - Refer to :func:`cunumeric.compress` for full documentation. + Refer to :func:`cupynumeric.compress` for full documentation. See Also -------- - cunumeric.compress : equivalent function + cupynumeric.compress : equivalent function Availability -------- @@ -2236,9 +1975,9 @@ def compress( @add_boilerplate() def clip( self, - min: Union[int, float, npt.ArrayLike, None] = None, - max: Union[int, float, npt.ArrayLike, None] = None, - out: Union[npt.NDArray[Any], ndarray, None] = None, + min: int | float | npt.ArrayLike | None = None, + max: int | float | npt.ArrayLike | None = None, + out: ndarray | None = None, ) -> ndarray: """a.clip(min=None, max=None, out=None) @@ -2246,41 +1985,68 @@ def clip( One of max or min must be given. - Refer to :func:`cunumeric.clip` for full documentation. + Refer to :func:`cupynumeric.clip` for full documentation. See Also -------- - cunumeric.clip : equivalent function + cupynumeric.clip : equivalent function Availability -------- Multiple GPUs, Multiple CPUs """ + min = max_identity(self.dtype) if min is None else min + max = min_identity(self.dtype) if max is None else max + args = ( np.array(min, dtype=self.dtype), np.array(max, dtype=self.dtype), ) if args[0].size != 1 or args[1].size != 1: runtime.warn( - "cuNumeric has not implemented clip with array-like " + "cuPyNumeric has not implemented clip with array-like " "arguments and is falling back to canonical numpy. You " "may notice significantly decreased performance for this " "function call.", category=RuntimeWarning, ) - if isinstance(out, np.ndarray): - self.__array__().clip(args[0], args[1], out=out) - return convert_to_cunumeric_ndarray(out, share=True) - elif isinstance(out, ndarray): + if out is not None: self.__array__().clip(args[0], args[1], out=out.__array__()) return out else: - return convert_to_cunumeric_ndarray( + return convert_to_cupynumeric_ndarray( self.__array__().clip(args[0], args[1]) ) - return self._perform_unary_op( - UnaryOpCode.CLIP, self, out=out, extra_args=args + core_dtype = to_core_type(self.dtype) + extra_args = (Scalar(min, core_dtype), Scalar(max, core_dtype)) + return perform_unary_op( + UnaryOpCode.CLIP, self, out=out, extra_args=extra_args + ) + + @add_boilerplate() + def round( + self, + decimals: int = 0, + out: ndarray | None = None, + ) -> ndarray: + """a.round(decimals=0, out=None) + + Return a with each element rounded to the given number of decimals. + + Refer to :func:`cupynumeric.round` for full documentation. + + Availability + -------- + Multiple GPUs, Multiple CPUs + + """ + extra_args = ( + Scalar(decimals, ty.int64), + Scalar(10 ** abs(decimals), ty.int64), + ) + return perform_unary_op( + UnaryOpCode.ROUND, self, out=out, extra_args=extra_args ) def conj(self) -> ndarray: @@ -2288,11 +2054,11 @@ def conj(self) -> ndarray: Complex-conjugate all elements. - Refer to :func:`cunumeric.conjugate` for full documentation. + Refer to :func:`cupynumeric.conjugate` for full documentation. See Also -------- - cunumeric.conjugate : equivalent function + cupynumeric.conjugate : equivalent function Availability -------- @@ -2310,11 +2076,11 @@ def conjugate(self) -> ndarray: Return the complex conjugate, element-wise. - Refer to :func:`cunumeric.conjugate` for full documentation. + Refer to :func:`cupynumeric.conjugate` for full documentation. See Also -------- - cunumeric.conjugate : equivalent function + cupynumeric.conjugate : equivalent function Availability -------- @@ -2333,17 +2099,17 @@ def copy(self, order: OrderType = "C") -> ndarray: Multiple GPUs, Multiple CPUs """ - # We don't care about dimension order in cuNumeric + # We don't care about dimension order in cuPyNumeric return self.__copy__() @add_boilerplate() def cumsum( self, axis: Any = None, - dtype: Union[np.dtype[Any], None] = None, - out: Union[ndarray, None] = None, + dtype: np.dtype[Any] | None = None, + out: ndarray | None = None, ) -> ndarray: - return self._perform_scan( + return perform_scan( ScanCode.SUM, self, axis=axis, @@ -2356,10 +2122,10 @@ def cumsum( def cumprod( self, axis: Any = None, - dtype: Union[np.dtype[Any], None] = None, - out: Union[ndarray, None] = None, + dtype: np.dtype[Any] | None = None, + out: ndarray | None = None, ) -> ndarray: - return self._perform_scan( + return perform_scan( ScanCode.PROD, self, axis=axis, @@ -2372,10 +2138,10 @@ def cumprod( def nancumsum( self, axis: Any = None, - dtype: Union[np.dtype[Any], None] = None, - out: Union[ndarray, None] = None, + dtype: np.dtype[Any] | None = None, + out: ndarray | None = None, ) -> ndarray: - return self._perform_scan( + return perform_scan( ScanCode.SUM, self, axis=axis, @@ -2388,10 +2154,10 @@ def nancumsum( def nancumprod( self, axis: Any = None, - dtype: Union[np.dtype[Any], None] = None, - out: Union[ndarray, None] = None, + dtype: np.dtype[Any] | None = None, + out: ndarray | None = None, ) -> ndarray: - return self._perform_scan( + return perform_scan( ScanCode.PROD, self, axis=axis, @@ -2407,11 +2173,11 @@ def nancumprod( def _diag_helper( self, offset: int = 0, - axes: Union[Any, None] = None, + axes: Any | None = None, extract: bool = True, trace: bool = False, - out: Union[ndarray, None] = None, - dtype: Union[np.dtype[Any], None] = None, + out: ndarray | None = None, + dtype: np.dtype[Any] | None = None, ) -> ndarray: # _diag_helper can be used only for arrays with dim>=1 if self.ndim < 1: @@ -2540,11 +2306,11 @@ def diagonal( Return specified diagonals. - Refer to :func:`cunumeric.diagonal` for full documentation. + Refer to :func:`cupynumeric.diagonal` for full documentation. See Also -------- - cunumeric.diagonal : equivalent function + cupynumeric.diagonal : equivalent function Availability -------- @@ -2566,11 +2332,11 @@ def put( """ Replaces specified elements of the array with given values. - Refer to :func:`cunumeric.put` for full documentation. + Refer to :func:`cupynumeric.put` for full documentation. See Also -------- - cunumeric.put : equivalent function + cupynumeric.put : equivalent function Availability -------- @@ -2622,18 +2388,18 @@ def trace( offset: int = 0, axis1: Any = None, axis2: Any = None, - dtype: Union[np.dtype[Any], None] = None, - out: Union[ndarray, None] = None, + dtype: np.dtype[Any] | None = None, + out: ndarray | None = None, ) -> ndarray: """a.trace(offset=0, axis1=None, axis2=None, dtype = None, out = None) Return the sum along diagonals of the array. - Refer to :func:`cunumeric.trace` for full documentation. + Refer to :func:`cupynumeric.trace` for full documentation. See Also -------- - cunumeric.trace : equivalent function + cupynumeric.trace : equivalent function Availability -------- @@ -2665,28 +2431,27 @@ def trace( return res @add_boilerplate("rhs") - def dot(self, rhs: ndarray, out: Union[ndarray, None] = None) -> ndarray: + def dot(self, rhs: ndarray, out: ndarray | None = None) -> ndarray: """a.dot(rhs, out=None) Return the dot product of this array with ``rhs``. - Refer to :func:`cunumeric.dot` for full documentation. + Refer to :func:`cupynumeric.dot` for full documentation. See Also -------- - cunumeric.dot : equivalent function + cupynumeric.dot : equivalent function Availability -------- Multiple GPUs, Multiple CPUs """ - from .module import _contract # work around circular import + # work around circular import + from .._module.linalg_mvp import _contract if self.ndim == 0 or rhs.ndim == 0: - from ._ufunc import multiply - - return multiply(self, rhs, out=out) + return _ufunc.multiply(self, rhs, out=out) (self_modes, rhs_modes, out_modes) = dot_modes(self.ndim, rhs.ndim) return _contract( @@ -2699,12 +2464,12 @@ def dot(self, rhs: ndarray, out: Union[ndarray, None] = None) -> ndarray: casting="unsafe", ) - def dump(self, file: Union[str, Path]) -> None: + def dump(self, file: str | Path) -> None: """a.dump(file) Dump a pickle of the array to the specified file. - The array can be read back with pickle.load or cunumeric.load. + The array can be read back with pickle.load or cupynumeric.load. Parameters ---------- @@ -2763,7 +2528,7 @@ def _normalize_axes_shape( def fft( self, s: Any, - axes: Union[Sequence[int], None], + axes: Sequence[int] | None, kind: FFTType, direction: FFTDirection, norm: Any, @@ -2773,7 +2538,7 @@ def fft( Return the ``kind`` ``direction`` FFT of this array with normalization ``norm``. - Common entrypoint for FFT functionality in cunumeric.fft module. + Common entrypoint for FFT functionality in cupynumeric.fft module. Notes ----- @@ -2781,7 +2546,7 @@ def fft( See Also -------- - cunumeric.fft : FFT functions for different ``kind`` and + cupynumeric.fft : FFT functions for different ``kind`` and ``direction`` arguments Availability @@ -2928,7 +2693,7 @@ def flatten(self, order: OrderType = "C") -> ndarray: def getfield(self, dtype: np.dtype[Any], offset: int = 0) -> None: raise NotImplementedError( - "cuNumeric does not currently support type reinterpretation " + "cuPyNumeric does not currently support type reinterpretation " "for ndarray.getfield" ) @@ -2993,73 +2758,75 @@ def item(self, *args: Any) -> Any: assert result.shape == () return result._thunk.__numpy_array__() - def itemset(self, *args: Any) -> None: - """a.itemset(*args) - - Insert scalar into an array (scalar is cast to array's dtype, - if possible) - - There must be at least 1 argument, and define the last argument - as *item*. Then, ``a.itemset(*args)`` is equivalent to but faster - than ``a[args] = item``. The item should be a scalar value and `args` - must select a single item in the array `a`. - - Parameters - ---------- - \\*args : - If one argument: a scalar, only used in case `a` is of size 1. - If two arguments: the last argument is the value to be set - and must be a scalar, the first argument specifies a single array - element location. It is either an int or a tuple. - - Notes - ----- - Compared to indexing syntax, `itemset` provides some speed increase - for placing a scalar into a particular location in an `ndarray`, - if you must do this. However, generally this is discouraged: - among other problems, it complicates the appearance of the code. - Also, when using `itemset` (and `item`) inside a loop, be sure - to assign the methods to a local variable to avoid the attribute - look-up at each loop iteration. - - Availability - -------- - Multiple GPUs, Multiple CPUs - - """ - if len(args) == 0: - raise KeyError("itemset() requires at least one argument") - value = args[-1] - args = args[:-1] - key = self._convert_singleton_key(args) - self[key] = value + if not is_np2: + + def itemset(self, *args: Any) -> None: + """a.itemset(*args) + + Insert scalar into an array (scalar is cast to array's dtype, + if possible) + + There must be at least 1 argument, and define the last argument + as *item*. Then, ``a.itemset(*args)`` is equivalent to but faster + than ``a[args] = item``. The item should be a scalar value and + `args` must select a single item in the array `a`. + + Parameters + ---------- + \\*args : + If one argument: a scalar, only used in case `a` is of size 1. + If two arguments: the last argument is the value to be set + and must be a scalar, the first argument specifies a single + array element location. It is either an int or a tuple. + + Notes + ----- + Compared to indexing syntax, `itemset` provides some speed increase + for placing a scalar into a particular location in an `ndarray`, + if you must do this. However, generally this is discouraged: + among other problems, it complicates the appearance of the code. + Also, when using `itemset` (and `item`) inside a loop, be sure + to assign the methods to a local variable to avoid the attribute + look-up at each loop iteration. + + Availability + -------- + Multiple GPUs, Multiple CPUs + + """ + if len(args) == 0: + raise KeyError("itemset() requires at least one argument") + value = args[-1] + args = args[:-1] + key = self._convert_singleton_key(args) + self[key] = value @add_boilerplate() def max( self, axis: Any = None, - out: Union[ndarray, None] = None, + out: ndarray | None = None, keepdims: bool = False, - initial: Union[int, float, None] = None, - where: Union[ndarray, None] = None, + initial: int | float | None = None, + where: ndarray | None = None, ) -> ndarray: """a.max(axis=None, out=None, keepdims=False, initial=, where=True) Return the maximum along a given axis. - Refer to :func:`cunumeric.amax` for full documentation. + Refer to :func:`cupynumeric.amax` for full documentation. See Also -------- - cunumeric.amax : equivalent function + cupynumeric.amax : equivalent function Availability -------- Multiple GPUs, Multiple CPUs """ - return self._perform_unary_reduction( + return perform_unary_reduction( UnaryRedCode.MAX, self, axis=axis, @@ -3069,19 +2836,17 @@ def max( where=where, ) - def _count_nonzero(self, axis: Any = None) -> Union[int, ndarray]: + def _count_nonzero(self, axis: Any = None) -> int | ndarray: if self.size == 0: return 0 - return ndarray._perform_unary_reduction( + return perform_unary_reduction( UnaryRedCode.COUNT_NONZERO, self, res_dtype=np.dtype(np.uint64), axis=axis, ) - def _summation_dtype( - self, dtype: Optional[np.dtype[Any]] - ) -> np.dtype[Any]: + def _summation_dtype(self, dtype: np.dtype[Any] | None) -> np.dtype[Any]: # Pick our dtype if it wasn't picked yet if dtype is None: if self.dtype.kind != "f" and self.dtype.kind != "c": @@ -3096,7 +2861,7 @@ def _normalize_summation( axis: Any, ddof: int = 0, keepdims: bool = False, - where: Union[ndarray, None] = None, + where: ndarray | None = None, ) -> None: dtype = sum_array.dtype if axis is None: @@ -3132,20 +2897,20 @@ def _normalize_summation( def mean( self, axis: Any = None, - dtype: Optional[np.dtype[Any]] = None, - out: Optional[ndarray] = None, + dtype: np.dtype[Any] | None = None, + out: ndarray | None = None, keepdims: bool = False, - where: Union[ndarray, None] = None, + where: ndarray | None = None, ) -> ndarray: """a.mean(axis=None, dtype=None, out=None, keepdims=False) Returns the average of the array elements along given axis. - Refer to :func:`cunumeric.mean` for full documentation. + Refer to :func:`cupynumeric.mean` for full documentation. See Also -------- - cunumeric.mean : equivalent function + cupynumeric.mean : equivalent function Availability -------- @@ -3154,7 +2919,7 @@ def mean( """ if axis is not None and not isinstance(axis, int): raise NotImplementedError( - "cunumeric.mean only supports int types for " + "cupynumeric.mean only supports int types for " "`axis` currently" ) @@ -3190,15 +2955,13 @@ def mean( def _nanmean( self, - axis: Optional[Union[int, tuple[int, ...]]] = None, - dtype: Union[np.dtype[Any], None] = None, - out: Union[ndarray, None] = None, + axis: int | tuple[int, ...] | None = None, + dtype: np.dtype[Any] | None = None, + out: ndarray | None = None, keepdims: bool = False, - where: Union[ndarray, None] = None, + where: ndarray | None = None, ) -> ndarray: - from . import _ufunc - - if np.issubdtype(dtype, np.integer) or np.issubdtype(dtype, np.bool_): + if np.issubdtype(dtype, np.integer) or np.issubdtype(dtype, bool): return self.mean( axis=axis, dtype=dtype, out=out, keepdims=keepdims, where=where ) @@ -3219,23 +2982,23 @@ def _nanmean( @add_boilerplate() def var( self, - axis: Optional[Union[int, tuple[int, ...]]] = None, - dtype: Optional[np.dtype[Any]] = None, - out: Optional[ndarray] = None, + axis: int | tuple[int, ...] | None = None, + dtype: np.dtype[Any] | None = None, + out: ndarray | None = None, ddof: int = 0, keepdims: bool = False, *, - where: Union[ndarray, None] = None, + where: ndarray | None = None, ) -> ndarray: """a.var(a, axis=None, dtype=None, out=None, ddof=0, keepdims=False) Returns the variance of the array elements along given axis. - Refer to :func:`cunumeric.var` for full documentation. + Refer to :func:`cupynumeric.var` for full documentation. See Also -------- - cunumeric.var : equivalent function + cupynumeric.var : equivalent function Availability -------- @@ -3244,7 +3007,7 @@ def var( """ if axis is not None and not isinstance(axis, int): raise NotImplementedError( - "cunumeric.var only supports int types for `axis` currently" + "cupynumeric.var only supports int types for `axis` currently" ) # this could be computed as a single pass through the array @@ -3254,7 +3017,7 @@ def var( # directly as <(x-mu)^2>, which then requires two passes through the # data to first compute the mean and then compute the variance # see https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance - # TODO(https://github.com/nv-legate/cunumeric/issues/590) + # TODO(https://github.com/nv-legate/cupynumeric/issues/590) dtype = self._summation_dtype(dtype) # calculate the mean, but keep the dimensions so that the @@ -3267,7 +3030,7 @@ def var( if axis is None or calculate_volume(tuple_pop(self.shape, axis)) == 1: # this is a scalar reduction and we can optimize this as a single # pass through a scalar reduction - result = self._perform_unary_reduction( + result = perform_unary_reduction( UnaryRedCode.VARIANCE, self, axis=axis, @@ -3275,10 +3038,13 @@ def var( out=out, keepdims=keepdims, where=where_array, - args=(mu,), + # FIXME(wonchanl): the following code blocks on mu to convert + # it to a Scalar object. We need to get rid of this blocking by + # allowing the extra arguments to be Legate stores + args=(Scalar(mu.__array__(), to_core_type(self.dtype)),), ) else: - # TODO(https://github.com/nv-legate/cunumeric/issues/591) + # TODO(https://github.com/nv-legate/cupynumeric/issues/591) # there isn't really support for generic binary reductions # right now all of the current binary reductions are boolean # reductions like allclose. To implement this a single pass would @@ -3288,7 +3054,7 @@ def var( # delta*delta in second pass delta = self - mu - result = self._perform_unary_reduction( + result = perform_unary_reduction( UnaryRedCode.SUM_SQUARES, delta, axis=axis, @@ -3312,28 +3078,28 @@ def var( def min( self, axis: Any = None, - out: Union[ndarray, None] = None, + out: ndarray | None = None, keepdims: bool = False, - initial: Union[int, float, None] = None, - where: Union[ndarray, None] = None, + initial: int | float | None = None, + where: ndarray | None = None, ) -> ndarray: """a.min(axis=None, out=None, keepdims=False, initial=, where=True) Return the minimum along a given axis. - Refer to :func:`cunumeric.amin` for full documentation. + Refer to :func:`cupynumeric.amin` for full documentation. See Also -------- - cunumeric.amin : equivalent function + cupynumeric.amin : equivalent function Availability -------- Multiple GPUs, Multiple CPUs """ - return self._perform_unary_reduction( + return perform_unary_reduction( UnaryRedCode.MIN, self, axis=axis, @@ -3346,20 +3112,20 @@ def min( @add_boilerplate() def partition( self, - kth: Union[int, Sequence[int]], + kth: int | Sequence[int], axis: Any = -1, kind: SelectKind = "introselect", - order: Union[OrderType, None] = None, + order: OrderType | None = None, ) -> None: """a.partition(kth, axis=-1, kind='introselect', order=None) Partition of an array in-place. - Refer to :func:`cunumeric.partition` for full documentation. + Refer to :func:`cupynumeric.partition` for full documentation. See Also -------- - cunumeric.partition : equivalent function + cupynumeric.partition : equivalent function Availability -------- @@ -3374,20 +3140,20 @@ def partition( @add_boilerplate() def argpartition( self, - kth: Union[int, Sequence[int]], + kth: int | Sequence[int], axis: Any = -1, kind: SelectKind = "introselect", - order: Union[OrderType, None] = None, + order: OrderType | None = None, ) -> ndarray: """a.argpartition(kth, axis=-1, kind='introselect', order=None) Returns the indices that would partition this array. - Refer to :func:`cunumeric.argpartition` for full documentation. + Refer to :func:`cupynumeric.argpartition` for full documentation. See Also -------- - cunumeric.argpartition : equivalent function + cupynumeric.argpartition : equivalent function Availability -------- @@ -3409,41 +3175,31 @@ def argpartition( def prod( self, axis: Any = None, - dtype: Union[np.dtype[Any], None] = None, - out: Union[ndarray, None] = None, + dtype: np.dtype[Any] | None = None, + out: ndarray | None = None, keepdims: bool = False, - initial: Union[int, float, None] = None, - where: Union[ndarray, None] = None, + initial: int | float | None = None, + where: ndarray | None = None, ) -> ndarray: """a.prod(axis=None, dtype=None, out=None, keepdims=False, initial=1, where=True) Return the product of the array elements over the given axis - Refer to :func:`cunumeric.prod` for full documentation. + Refer to :func:`cupynumeric.prod` for full documentation. See Also -------- - cunumeric.prod : equivalent function + cupynumeric.prod : equivalent function Availability -------- Multiple GPUs, Multiple CPUs """ - if self.dtype.type == np.bool_: - temp = ndarray( - shape=self.shape, - dtype=np.dtype(np.int32), - inputs=(self,), - ) - temp._thunk.convert(self._thunk) - self_array = temp - else: - self_array = self - return self._perform_unary_reduction( + return perform_unary_reduction( UnaryRedCode.PROD, - self_array, + self, axis=axis, dtype=dtype, out=out, @@ -3457,11 +3213,11 @@ def ravel(self, order: OrderType = "C") -> ndarray: Return a flattened array. - Refer to :func:`cunumeric.ravel` for full documentation. + Refer to :func:`cupynumeric.ravel` for full documentation. See Also -------- - cunumeric.ravel : equivalent function + cupynumeric.ravel : equivalent function ndarray.flat : a flat iterator on the array. Availability @@ -3476,11 +3232,11 @@ def reshape(self, *args: Any, order: OrderType = "C") -> ndarray: Returns an array containing the same data with a new shape. - Refer to :func:`cunumeric.reshape` for full documentation. + Refer to :func:`cupynumeric.reshape` for full documentation. See Also -------- - cunumeric.reshape : equivalent function + cupynumeric.reshape : equivalent function Availability @@ -3551,15 +3307,15 @@ def setfield( self, val: Any, dtype: npt.DTypeLike, offset: int = 0 ) -> None: raise NotImplementedError( - "cuNumeric does not currently support type reinterpretation " + "cuPyNumeric does not currently support type reinterpretation " "for ndarray.setfield" ) def setflags( self, - write: Union[bool, None] = None, - align: Union[bool, None] = None, - uic: Union[bool, None] = None, + write: bool | None = None, + align: bool | None = None, + uic: bool | None = None, ) -> None: """a.setflags(write=None, align=None, uic=None) @@ -3622,10 +3378,10 @@ def setflags( @add_boilerplate() def searchsorted( self: ndarray, - v: Union[int, float, ndarray], + v: int | float | ndarray, side: SortSide = "left", - sorter: Optional[ndarray] = None, - ) -> Union[int, ndarray]: + sorter: ndarray | None = None, + ) -> int | ndarray: """a.searchsorted(v, side='left', sorter=None) Find the indices into a sorted array a such that, if the corresponding @@ -3659,7 +3415,7 @@ def searchsorted( raise ValueError("Dimension mismatch: self must be a 1D array") # this is needed in case v is a scalar - v_ndarray = convert_to_cunumeric_ndarray(v) + v_ndarray = convert_to_cupynumeric_ndarray(v) a = self # in case we have different dtypes we ned to find a common type @@ -3697,17 +3453,17 @@ def sort( self, axis: Any = -1, kind: SortType = "quicksort", - order: Union[OrderType, None] = None, + order: OrderType | None = None, ) -> None: """a.sort(axis=-1, kind=None, order=None) Sort an array in-place. - Refer to :func:`cunumeric.sort` for full documentation. + Refer to :func:`cupynumeric.sort` for full documentation. See Also -------- - cunumeric.sort : equivalent function + cupynumeric.sort : equivalent function Availability -------- @@ -3721,17 +3477,17 @@ def argsort( self, axis: Any = -1, kind: SortType = "quicksort", - order: Union[OrderType, None] = None, + order: OrderType | None = None, ) -> ndarray: """a.argsort(axis=-1, kind=None, order=None) Returns the indices that would sort this array. - Refer to :func:`cunumeric.argsort` for full documentation. + Refer to :func:`cupynumeric.argsort` for full documentation. See Also -------- - cunumeric.argsort : equivalent function + cupynumeric.argsort : equivalent function Availability -------- @@ -3749,11 +3505,11 @@ def squeeze(self, axis: Any = None) -> ndarray: Remove axes of length one from `a`. - Refer to :func:`cunumeric.squeeze` for full documentation. + Refer to :func:`cupynumeric.squeeze` for full documentation. See Also -------- - cunumeric.squeeze : equivalent function + cupynumeric.squeeze : equivalent function Availability -------- @@ -3779,41 +3535,31 @@ def squeeze(self, axis: Any = None) -> ndarray: def sum( self, axis: Any = None, - dtype: Union[np.dtype[Any], None] = None, - out: Union[ndarray, None] = None, + dtype: np.dtype[Any] | None = None, + out: ndarray | None = None, keepdims: bool = False, - initial: Union[int, float, None] = None, - where: Union[ndarray, None] = None, + initial: int | float | None = None, + where: ndarray | None = None, ) -> ndarray: """a.sum(axis=None, dtype=None, out=None, keepdims=False, initial=0, where=None) Return the sum of the array elements over the given axis. - Refer to :func:`cunumeric.sum` for full documentation. + Refer to :func:`cupynumeric.sum` for full documentation. See Also -------- - cunumeric.sum : equivalent function + cupynumeric.sum : equivalent function Availability -------- Multiple GPUs, Multiple CPUs """ - if self.dtype.type == np.bool_: - temp = ndarray( - shape=self.shape, - dtype=np.dtype(np.int32), - inputs=(self,), - ) - temp._thunk.convert(self._thunk) - self_array = temp - else: - self_array = self - return self._perform_unary_reduction( + return perform_unary_reduction( UnaryRedCode.SUM, - self_array, + self, axis=axis, dtype=dtype, out=out, @@ -3826,10 +3572,10 @@ def _nansum( self, axis: Any = None, dtype: Any = None, - out: Union[ndarray, None] = None, + out: ndarray | None = None, keepdims: bool = False, - initial: Optional[Union[int, float]] = None, - where: Optional[ndarray] = None, + initial: int | float | None = None, + where: ndarray | None = None, ) -> ndarray: # Note that np.nansum and np.sum allow complex datatypes # so there are no "disallowed types" for this API @@ -3839,7 +3585,7 @@ def _nansum( else: unary_red_code = UnaryRedCode.SUM - return self._perform_unary_reduction( + return perform_unary_reduction( unary_red_code, self, axis=axis, @@ -3855,11 +3601,11 @@ def swapaxes(self, axis1: Any, axis2: Any) -> ndarray: Return a view of the array with `axis1` and `axis2` interchanged. - Refer to :func:`cunumeric.swapaxes` for full documentation. + Refer to :func:`cupynumeric.swapaxes` for full documentation. See Also -------- - cunumeric.swapaxes : equivalent function + cupynumeric.swapaxes : equivalent function Availability -------- @@ -3957,7 +3703,7 @@ def tolist(self) -> Any: Return a copy of the array data as a (nested) Python list. Data items are converted to the nearest compatible builtin Python - type, via the `~cunumeric.ndarray.item` function. + type, via the `~cupynumeric.ndarray.item` function. If ``a.ndim`` is 0, then since the depth of the nested list is 0, it will not be a list at all, but a simple Python scalar. @@ -3974,7 +3720,7 @@ def tolist(self) -> Any: Notes ----- - The array may be recreated via ``a = cunumeric.array(a.tolist())``, + The array may be recreated via ``a = cupynumeric.array(a.tolist())``, although this may sometimes lose precision. Availability @@ -4090,8 +3836,8 @@ def flip(self, axis: Any = None) -> ndarray: def view( self, - dtype: Union[npt.DTypeLike, None] = None, - type: Union[type, None] = None, + dtype: npt.DTypeLike | None = None, + type: type | None = None, ) -> ndarray: """ New view of array with the same data. @@ -4110,7 +3856,7 @@ def view( Notes ----- - cuNumeric does not currently support type reinterpretation, or + cuPyNumeric does not currently support type reinterpretation, or conversion to ndarray sub-classes; use :func:`ndarray.__array__()` to convert to `numpy.ndarray`. @@ -4124,11 +3870,11 @@ def view( """ if dtype is not None and dtype != self.dtype: raise NotImplementedError( - "cuNumeric does not currently support type reinterpretation" + "cuPyNumeric does not currently support type reinterpretation" ) if type is not None: raise NotImplementedError( - "cuNumeric does not currently support conversion to ndarray " + "cuPyNumeric does not currently support conversion to ndarray " "sub-classes; use __array__() to convert to numpy.ndarray" ) return ndarray( @@ -4143,11 +3889,11 @@ def unique(self) -> ndarray: Find the unique elements of an array. - Refer to :func:`cunumeric.unique` for full documentation. + Refer to :func:`cupynumeric.unique` for full documentation. See Also -------- - cunumeric.unique : equivalent function + cupynumeric.unique : equivalent function Availability -------- @@ -4157,44 +3903,6 @@ def unique(self) -> ndarray: thunk = self._thunk.unique() return ndarray(shape=thunk.shape, thunk=thunk) - @classmethod - def _get_where_thunk( - cls, where: Union[None, ndarray], out_shape: NdShape - ) -> Union[None, NumPyThunk]: - if where is None: - return where - if ( - not isinstance(where, ndarray) - or where.dtype != np.bool_ - or where.shape != out_shape - ): - raise RuntimeError("should have converted this earlier") - return where._thunk - - @staticmethod - def find_common_type(*args: ndarray) -> np.dtype[Any]: - """Determine common type following NumPy's coercion rules. - - Parameters - ---------- - *args : ndarray - A list of ndarrays - - Returns - ------- - datatype : data-type - The type that results from applying the NumPy type promotion rules - to the arguments. - """ - array_types = list() - scalars = list() - for array in args: - if array.ndim == 0: - scalars.append(array.dtype.type(0)) - else: - array_types.append(array.dtype) - return np.result_type(*array_types, *scalars) - def _maybe_convert(self, dtype: np.dtype[Any], hints: Any) -> ndarray: if self.dtype == dtype: return self @@ -4212,313 +3920,6 @@ def _warn_and_convert(self, dtype: np.dtype[Any]) -> ndarray: else: return self - # For performing normal/broadcast unary operations - @classmethod - def _perform_unary_op( - cls, - op: UnaryOpCode, - src: ndarray, - out: Union[Any, None] = None, - extra_args: Any = None, - dtype: Union[np.dtype[Any], None] = None, - out_dtype: Union[np.dtype[Any], None] = None, - ) -> ndarray: - if out is not None: - # If the shapes don't match see if we can broadcast - # This will raise an exception if they can't be broadcast together - if np.broadcast_shapes(src.shape, out.shape) != out.shape: - raise ValueError( - f"non-broadcastable output operand with shape {out.shape} " - f"doesn't match the broadcast shape {src.shape}" - ) - else: - # No output yet, so make one - out_shape = src.shape - - if dtype is not None: - out = ndarray( - shape=out_shape, - dtype=dtype, - inputs=(src,), - ) - elif out_dtype is not None: - out = ndarray( - shape=out_shape, - dtype=out_dtype, - inputs=(src,), - ) - else: - out = ndarray( - shape=out_shape, - dtype=src.dtype - if src.dtype.kind != "c" - else np.dtype(np.float32) - if src.dtype == np.dtype(np.complex64) - else np.dtype(np.float64), - inputs=(src,), - ) - - if out_dtype is None: - if out.dtype != src.dtype and not ( - op == UnaryOpCode.ABSOLUTE and src.dtype.kind == "c" - ): - temp = ndarray( - out.shape, - dtype=src.dtype, - inputs=(src,), - ) - temp._thunk.unary_op( - op, - src._thunk, - True, - extra_args, - ) - out._thunk.convert(temp._thunk) - else: - out._thunk.unary_op( - op, - src._thunk, - True, - extra_args, - ) - else: - if out.dtype != out_dtype: - temp = ndarray( - out.shape, - dtype=out_dtype, - inputs=(src,), - ) - temp._thunk.unary_op( - op, - src._thunk, - True, - extra_args, - ) - out._thunk.convert(temp._thunk) - else: - out._thunk.unary_op( - op, - src._thunk, - True, - extra_args, - ) - return out - - # For performing reduction unary operations - @classmethod - def _perform_unary_reduction( - cls, - op: UnaryRedCode, - src: ndarray, - axis: Any = None, - dtype: Union[np.dtype[Any], None] = None, - res_dtype: Union[npt.DTypeLike, None] = None, - out: Union[ndarray, None] = None, - keepdims: bool = False, - args: Union[Any, None] = None, - initial: Union[int, float, None] = None, - where: Union[ndarray, None] = None, - ) -> ndarray: - # When 'res_dtype' is not None, the input and output of the reduction - # have different types. Such reduction operators don't take a dtype of - # the accumulator - if res_dtype is not None: - assert dtype is None - dtype = src.dtype - else: - # If 'dtype' exists, that determines both the accumulation dtype - # and the output dtype - if dtype is not None: - res_dtype = dtype - elif out is not None: - dtype = out.dtype - res_dtype = out.dtype - else: - dtype = src.dtype - res_dtype = src.dtype - - # TODO: Need to require initial to be given when the array is empty - # or a where mask is given. - if ( - op - in ( - UnaryRedCode.ARGMAX, - UnaryRedCode.ARGMIN, - UnaryRedCode.MAX, - UnaryRedCode.MIN, - ) - and src.dtype.kind == "c" - ): - raise NotImplementedError( - "(arg)max/min not supported for complex-type arrays" - ) - - if axis is None: - axes = tuple(range(src.ndim)) - else: - axes = normalize_axis_tuple(axis, src.ndim) - - out_shape: NdShape = () - for dim in range(src.ndim): - if dim not in axes: - out_shape += (src.shape[dim],) - elif keepdims: - out_shape += (1,) - - if out is None: - out = ndarray( - shape=out_shape, dtype=res_dtype, inputs=(src, where) - ) - elif out.shape != out_shape: - errmsg = ( - f"the output shapes do not match: expected {out_shape} " - f"but got {out.shape}" - ) - raise ValueError(errmsg) - - if dtype != src.dtype: - src = src.astype(dtype) - - if out.dtype == res_dtype: - result = out - else: - result = ndarray( - shape=out_shape, dtype=res_dtype, inputs=(src, where) - ) - - where_array = broadcast_where(where, src.shape) - result._thunk.unary_reduction( - op, - src._thunk, - cls._get_where_thunk(where_array, src.shape), - axis, - axes, - keepdims, - args, - initial, - ) - - if result is not out: - out._thunk.convert(result._thunk) - - return out - - @classmethod - def _perform_binary_reduction( - cls, - op: BinaryOpCode, - one: ndarray, - two: ndarray, - dtype: np.dtype[Any], - extra_args: Union[tuple[Any, ...], None] = None, - ) -> ndarray: - args = (one, two) - - # We only handle bool types here for now - assert dtype is not None and dtype == np.dtype(np.bool_) - # Collapsing down to a single value in this case - # Check to see if we need to broadcast between inputs - if one.shape != two.shape: - broadcast = np.broadcast_shapes(one.shape, two.shape) - else: - broadcast = None - - common_type = cls.find_common_type(one, two) - one_thunk = one._maybe_convert(common_type, args)._thunk - two_thunk = two._maybe_convert(common_type, args)._thunk - - dst = ndarray(shape=(), dtype=dtype, inputs=args) - dst._thunk.binary_reduction( - op, - one_thunk, - two_thunk, - broadcast, - extra_args, - ) - return dst - - @classmethod - def _perform_where( - cls, mask: ndarray, one: ndarray, two: ndarray - ) -> ndarray: - args = (mask, one, two) - - mask = mask._maybe_convert(np.dtype(np.bool_), args) - - common_type = cls.find_common_type(one, two) - one = one._maybe_convert(common_type, args) - two = two._maybe_convert(common_type, args) - - # Compute the output shape - out_shape = np.broadcast_shapes(mask.shape, one.shape, two.shape) - out = ndarray(shape=out_shape, dtype=common_type, inputs=args) - out._thunk.where(mask._thunk, one._thunk, two._thunk) - return out - - @classmethod - def _perform_scan( - cls, - op: ScanCode, - src: ndarray, - axis: Any = None, - dtype: Union[npt.DTypeLike, None] = None, - out: Union[ndarray, None] = None, - nan_to_identity: bool = False, - ) -> ndarray: - if src.dtype.kind != "c" and src.dtype.kind != "f": - nan_to_identity = False - if dtype is None: - if out is None: - if src.dtype.kind == "i": - # Set dtype to default platform integer - dtype = np.int_ - else: - dtype = src.dtype - else: - dtype = out.dtype - # flatten input when axis is None - if axis is None: - axis = 0 - src_arr = src.ravel() - else: - axis = normalize_axis_index(axis, src.ndim) - src_arr = src - if out is not None: - if dtype != out.dtype: - # if out array is specified, its type overrules dtype - dtype = out.dtype - if out.shape != src_arr.shape: - raise NotImplementedError( - "Varried output shape not supported. Output must have " - "same shape as input (same size if no axis is provided" - ) - else: - out = ndarray(shape=src_arr.shape, dtype=dtype) - - if dtype != src_arr.dtype: - if nan_to_identity: - if op is ScanCode.SUM: - nan_op = ConvertCode.SUM - else: - nan_op = ConvertCode.PROD - # If convert is called, it will handle NAN conversion - nan_to_identity = False - else: - nan_op = ConvertCode.NOOP - # convert input to temporary for type conversion - temp = ndarray(shape=src_arr.shape, dtype=dtype) - temp._thunk.convert(src_arr._thunk, nan_op=nan_op) - src_arr = temp - - out._thunk.scan( - op, - src_arr._thunk, - axis=axis, - dtype=dtype, - nan_to_identity=nan_to_identity, - ) - return out - def _wrap(self, new_len: int) -> ndarray: if new_len == 1: idxs = tuple(0 for i in range(self.ndim)) @@ -4531,3 +3932,46 @@ def _wrap(self, new_len: int) -> ndarray: ) out._thunk._wrap(src=self._thunk, new_len=new_len) return out + + def stencil_hint( + self, + low_offsets: tuple[int, ...], + high_offsets: tuple[int, ...], + ) -> None: + """ + Inform cuPyNumeric that this array will be used in a stencil + computation in the following code. + + This allows cuPyNumeric to allocate space for the "ghost" elements + ahead of time, rather than discovering the full extent of accesses + incrementally, and thus avoid intermediate copies. + + For example, let's say we have a 1-D array A of size 10 and we want to + partition A across two GPUs. By default, A would be partitioned equally + and each GPU gets an instance of size 5 (GPU0 gets elements 0-4, and + GPU1 gets 5-9 inclusive). Suppose we use A in the stencil computation + `B = A[:9] + A[1:]`. The runtime would now need to adjust the + partitioning such that GPU0 has elements 0-5 and GPU1 has elements 4-9 + inclusive. Since the original instance on GPU0 does not cover index 5, + cuPyNumeric needs to allocate a full new instance that covers 0-5, + leading to an extra copy. In this case, if the code calls + `A.stencil_hint([1], [1])` to pre-allocate instances that contain the + extra elements before it uses A, the extra copies can be avoided. + + Parameters + ---------- + low_offsets: tuple[int] + Stencil offsets towards the negative direction. + high_offsets: tuple[int] + Stencil offsets towards the positive direction. + + Notes + ----- + This function currently does not behave as expected in the case where + multiple CPU/OpenMP processors use the same system memory. + + Availability + -------- + Multiple CPUs, Multiple GPUs + """ + self._thunk.stencil_hint(low_offsets, high_offsets) diff --git a/cupynumeric/_array/flags.py b/cupynumeric/_array/flags.py new file mode 100644 index 000000000..d58a5480a --- /dev/null +++ b/cupynumeric/_array/flags.py @@ -0,0 +1,82 @@ +# Copyright 2024 NVIDIA Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +from __future__ import annotations + +from typing import TYPE_CHECKING, Any + +if TYPE_CHECKING: + from .array import ndarray + + +class flagsobj: + """ + Information about the memory layout of the array. + + These flags don't reflect the properties of the cuPyNumeric array, but + rather the NumPy array that will be produced if the cuPyNumeric array is + materialized on a single node. + """ + + def __init__(self, array: ndarray) -> None: + # prevent infinite __setattr__ recursion + object.__setattr__(self, "_array", array) + + def __repr__(self) -> str: + return f"""\ + C_CONTIGUOUS : {self["C"]} + F_CONTIGUOUS : {self["F"]} + OWNDATA : {self["O"]} + WRITEABLE : {self["W"]} + ALIGNED : {self["A"]} + WRITEBACKIFCOPY : {self["X"]} +""" + + def __eq__(self, other: Any) -> bool: + flags = ("C", "F", "O", "W", "A", "X") + return all(self[f] == other[f] for f in flags) # type: ignore [index] + + def __getattr__(self, name: str) -> Any: + if name == "writeable": + return self._array._writeable + flags = self._array.__array__().flags + return getattr(flags, name) + + def __setattr__(self, name: str, value: Any) -> None: + if name == "writeable": + self._check_writeable(value) + self._array._writeable = bool(value) + else: + flags = self._array.__array__().flags + setattr(flags, name, value) + + def __getitem__(self, key: Any) -> bool: + if key == "W": + return self._array._writeable + flags = self._array.__array__().flags + return flags[key] + + def __setitem__(self, key: str, value: Any) -> None: + if key == "W": + self._check_writeable(value) + self._array._writeable = bool(value) + else: + flags = self._array.__array__().flags + flags[key] = value + + def _check_writeable(self, value: Any) -> None: + if value and not self._array._writeable: + raise ValueError( + "non-writeable cupynumeric arrays cannot be made writeable" + ) diff --git a/cupynumeric/_array/thunk.py b/cupynumeric/_array/thunk.py new file mode 100644 index 000000000..d7f351f86 --- /dev/null +++ b/cupynumeric/_array/thunk.py @@ -0,0 +1,356 @@ +# Copyright 2024 NVIDIA Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +from __future__ import annotations + +from typing import TYPE_CHECKING, Any + +import numpy as np +from legate.core import Scalar + +from .._utils import is_np2 +from ..config import ( + BinaryOpCode, + ConvertCode, + ScanCode, + UnaryOpCode, + UnaryRedCode, +) +from ..types import NdShape +from .util import broadcast_where, find_common_type + +if is_np2: + from numpy.lib.array_utils import normalize_axis_index # type: ignore + from numpy.lib.array_utils import normalize_axis_tuple # type: ignore +else: + from numpy.core.multiarray import normalize_axis_index # type: ignore + from numpy.core.numeric import normalize_axis_tuple # type: ignore + +if TYPE_CHECKING: + import numpy.typing as npt + + from .._thunk.thunk import NumPyThunk + from .array import ndarray + + +def get_where_thunk( + where: ndarray | None, out_shape: NdShape +) -> NumPyThunk | None: + from .array import ndarray + + if where is None: + return where + if ( + not isinstance(where, ndarray) + or where.dtype != bool + or where.shape != out_shape + ): + raise RuntimeError("should have converted this earlier") + return where._thunk + + +def perform_unary_op( + op: UnaryOpCode, + src: ndarray, + out: ndarray | None = None, + extra_args: Any = None, +) -> ndarray: + from .array import ndarray + + if out is not None: + # If the shapes don't match see if we can broadcast + # This will raise an exception if they can't be broadcast together + if np.broadcast_shapes(src.shape, out.shape) != out.shape: + raise ValueError( + f"non-broadcastable output operand with shape {out.shape} " + f"doesn't match the broadcast shape {src.shape}" + ) + else: + # No output yet, so make one + out_shape = src.shape + + if op == UnaryOpCode.ANGLE: + dtype = np.dtype(np.float64) + elif op == UnaryOpCode.ROUND or src.dtype.kind != "c": + dtype = src.dtype + else: + if src.dtype == np.dtype(np.complex64): + dtype = np.dtype(np.float32) # type: ignore + else: + dtype = np.dtype(np.float64) + + out = ndarray( + shape=out_shape, + dtype=dtype, + inputs=(src,), + ) + + if out.dtype != src.dtype: + if ( + op == UnaryOpCode.ABSOLUTE and src.dtype.kind == "c" + ) or op == UnaryOpCode.ANGLE: + out._thunk.unary_op( + op, + src._thunk, + True, + extra_args, + ) + else: + temp = ndarray( + out.shape, + dtype=src.dtype, + inputs=(src,), + ) + temp._thunk.unary_op( + op, + src._thunk, + True, + extra_args, + ) + out._thunk.convert(temp._thunk) + else: + out._thunk.unary_op( + op, + src._thunk, + True, + extra_args, + ) + return out + + +def _need_upcast_for_reduction(op: UnaryRedCode, dtype: np.dtype[Any]) -> bool: + return op in (UnaryRedCode.SUM, UnaryRedCode.PROD) and dtype.kind in ( + "b", + "i", + "u", + ) + + +def perform_unary_reduction( + op: UnaryRedCode, + src: ndarray, + axis: Any = None, + dtype: np.dtype[Any] | None = None, + res_dtype: npt.DTypeLike | None = None, + out: ndarray | None = None, + keepdims: bool = False, + args: tuple[Scalar, ...] = (), + initial: int | float | None = None, + where: ndarray | None = None, +) -> ndarray: + from .array import ndarray + + # When 'res_dtype' is not None, the input and output of the reduction + # have different types. Such reduction operators don't take a dtype of + # the accumulator + if res_dtype is not None: + assert dtype is None + dtype = src.dtype + else: + if dtype is not None: + # If 'dtype' exists, that determines both the accumulation dtype + # and the output dtype + pass + elif out is not None: + dtype = out.dtype + elif _need_upcast_for_reduction(op, src.dtype): + # upcast to conserve precision + dtype = np.dtype(np.uint64 if src.dtype.kind == "u" else np.int64) + else: + dtype = src.dtype + res_dtype = dtype + + # TODO: Need to require initial to be given when the array is empty + # or a where mask is given. + if ( + op + in ( + UnaryRedCode.ARGMAX, + UnaryRedCode.ARGMIN, + UnaryRedCode.MAX, + UnaryRedCode.MIN, + ) + and src.dtype.kind == "c" + ): + raise NotImplementedError( + "(arg)max/min not supported for complex-type arrays" + ) + + if axis is None: + axes = tuple(range(src.ndim)) + else: + axes = normalize_axis_tuple(axis, src.ndim) + + out_shape: NdShape = () + for dim in range(src.ndim): + if dim not in axes: + out_shape += (src.shape[dim],) + elif keepdims: + out_shape += (1,) + + if out is None: + out = ndarray(shape=out_shape, dtype=res_dtype, inputs=(src, where)) + elif out.shape != out_shape: + errmsg = ( + f"the output shapes do not match: expected {out_shape} " + f"but got {out.shape}" + ) + raise ValueError(errmsg) + + if dtype != src.dtype: + src = src.astype(dtype) + + if out.dtype == res_dtype: + result = out + else: + result = ndarray(shape=out_shape, dtype=res_dtype, inputs=(src, where)) + + where_array = broadcast_where(where, src.shape) + result._thunk.unary_reduction( + op, + src._thunk, + get_where_thunk(where_array, src.shape), + axis, + axes, + keepdims, + args, + initial, + ) + + if result is not out: + out._thunk.convert(result._thunk) + + return out + + +def perform_binary_reduction( + op: BinaryOpCode, + one: ndarray, + two: ndarray, + dtype: np.dtype[Any], + extra_args: tuple[Scalar, ...] = (), +) -> ndarray: + from .array import ndarray + + args = (one, two) + + # We only handle bool types here for now + assert dtype is not None and dtype == np.dtype(bool) + + # Collapsing down to a single value in this case + # Check to see if we need to broadcast between inputs + if one.shape != two.shape: + broadcast = np.broadcast_shapes(one.shape, two.shape) + else: + broadcast = None + + common_type = find_common_type(one, two) + one_thunk = one._maybe_convert(common_type, args)._thunk + two_thunk = two._maybe_convert(common_type, args)._thunk + + dst = ndarray(shape=(), dtype=dtype, inputs=args) + dst._thunk.binary_reduction( + op, + one_thunk, + two_thunk, + broadcast, + extra_args, + ) + return dst + + +def perform_where(mask: ndarray, one: ndarray, two: ndarray) -> ndarray: + from .array import ndarray + + args = (mask, one, two) + + mask = mask._maybe_convert(np.dtype(bool), args) + + common_type = find_common_type(one, two) + one = one._maybe_convert(common_type, args) + two = two._maybe_convert(common_type, args) + + # Compute the output shape + out_shape = np.broadcast_shapes(mask.shape, one.shape, two.shape) + out = ndarray(shape=out_shape, dtype=common_type, inputs=args) + out._thunk.where(mask._thunk, one._thunk, two._thunk) + return out + + +def perform_scan( + op: ScanCode, + src: ndarray, + axis: Any = None, + dtype: npt.DTypeLike | None = None, + out: ndarray | None = None, + nan_to_identity: bool = False, +) -> ndarray: + from .array import ndarray + + if src.dtype.kind != "c" and src.dtype.kind != "f": + nan_to_identity = False + + if dtype is None: + if out is None: + if src.dtype.kind == "i": + # Set dtype to default platform integer + dtype = np.int_ + else: + dtype = src.dtype + else: + dtype = out.dtype + + # flatten input when axis is None + if axis is None: + axis = 0 + src_arr = src.ravel() + else: + axis = normalize_axis_index(axis, src.ndim) + src_arr = src + + if out is not None: + if dtype != out.dtype: + # if out array is specified, its type overrules dtype + dtype = out.dtype + if out.shape != src_arr.shape: + raise NotImplementedError( + "Varried output shape not supported. Output must have " + "same shape as input (same size if no axis is provided" + ) + else: + out = ndarray(shape=src_arr.shape, dtype=dtype) + + if dtype != src_arr.dtype: + if nan_to_identity: + if op is ScanCode.SUM: + nan_op = ConvertCode.SUM + else: + nan_op = ConvertCode.PROD + # If convert is called, it will handle NAN conversion + nan_to_identity = False + else: + nan_op = ConvertCode.NOOP + # convert input to temporary for type conversion + temp = ndarray(shape=src_arr.shape, dtype=dtype) + temp._thunk.convert(src_arr._thunk, nan_op=nan_op) + src_arr = temp + + out._thunk.scan( + op, + src_arr._thunk, + axis=axis, + dtype=dtype, + nan_to_identity=nan_to_identity, + ) + return out diff --git a/cupynumeric/_array/util.py b/cupynumeric/_array/util.py new file mode 100644 index 000000000..e0096db85 --- /dev/null +++ b/cupynumeric/_array/util.py @@ -0,0 +1,221 @@ +# Copyright 2024 NVIDIA Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +from __future__ import annotations + +import operator +from functools import wraps +from inspect import signature +from typing import ( + TYPE_CHECKING, + Any, + Callable, + ParamSpec, + Sequence, + TypeVar, + cast, +) + +import numpy as np + +from ..runtime import runtime +from ..types import NdShape + +if TYPE_CHECKING: + import numpy.typing as npt + + from ..types import NdShapeLike + from .array import ndarray + + +R = TypeVar("R") +P = ParamSpec("P") + + +def add_boilerplate( + *array_params: str, +) -> Callable[[Callable[P, R]], Callable[P, R]]: + """ + Adds required boilerplate to the wrapped cupynumeric.ndarray or + module-level function. + + Every time the wrapped function is called, this wrapper will convert all + specified array-like parameters to cuPyNumeric ndarrays. Additionally, any + "out" or "where" arguments will also always be automatically converted. + """ + to_convert = set(array_params) + assert len(to_convert) == len(array_params) + + def decorator(func: Callable[P, R]) -> Callable[P, R]: + assert not hasattr( + func, "__wrapped__" + ), "this decorator must be the innermost" + + params = signature(func).parameters + extra = to_convert - set(params) + assert len(extra) == 0, f"unknown parameter(s): {extra}" + + # we also always want to convert "out" and "where" + # even if they are not explicitly specified by the user + to_convert.update(("out", "where")) + + out_idx = -1 + indices = set() + for idx, param in enumerate(params): + if param == "out": + out_idx = idx + if param in to_convert: + indices.add(idx) + + @wraps(func) + def wrapper(*args: Any, **kwargs: Any) -> R: + # convert specified non-None positional arguments, making sure + # that any out-parameters are appropriately writeable + converted_args = [] + for idx, arg in enumerate(args): + if idx in indices and arg is not None: + if idx == out_idx: + arg = convert_to_cupynumeric_ndarray(arg, share=True) + if not arg.flags.writeable: + raise ValueError("out is not writeable") + else: + arg = convert_to_cupynumeric_ndarray(arg) + converted_args.append(arg) + args = tuple(converted_args) + + # convert specified non-None keyword arguments, making sure + # that any out-parameters are appropriately writeable + for k, v in kwargs.items(): + if k in to_convert and v is not None: + if k == "out": + kwargs[k] = convert_to_cupynumeric_ndarray( + v, share=True + ) + if not kwargs[k].flags.writeable: + raise ValueError("out is not writeable") + else: + kwargs[k] = convert_to_cupynumeric_ndarray(v) + + return func(*args, **kwargs) + + return wrapper + + return decorator + + +def broadcast_where(where: ndarray | None, shape: NdShape) -> ndarray | None: + if where is not None and where.shape != shape: + from .._module import broadcast_to + + where = broadcast_to(where, shape) + return where + + +def convert_to_cupynumeric_ndarray(obj: Any, share: bool = False) -> ndarray: + from .array import ndarray + + # If this is an instance of one of our ndarrays then we're done + if isinstance(obj, ndarray): + return obj + # Ask the runtime to make a numpy thunk for this object + thunk = runtime.get_numpy_thunk(obj, share=share) + writeable = ( + obj.flags.writeable if isinstance(obj, np.ndarray) and share else True + ) + return ndarray(shape=None, thunk=thunk, writeable=writeable) + + +def maybe_convert_to_np_ndarray(obj: Any) -> Any: + """ + Converts cuPyNumeric arrays into NumPy arrays, otherwise has no effect. + """ + from ..ma import MaskedArray + from .array import ndarray + + if isinstance(obj, (ndarray, MaskedArray)): + return obj.__array__() + return obj + + +def check_writeable(arr: ndarray | tuple[ndarray, ...] | None) -> None: + """ + Check if the current array is writeable + This check needs to be manually inserted + with consideration on the behavior of the corresponding method + """ + if arr is None: + return + check_list = (arr,) if not isinstance(arr, tuple) else arr + if any(not arr.flags.writeable for arr in check_list): + raise ValueError("array is not writeable") + + +def sanitize_shape( + shape: NdShapeLike | Sequence[Any] | npt.NDArray[Any] | ndarray, +) -> NdShape: + from .array import ndarray + + seq: tuple[Any, ...] + if isinstance(shape, (ndarray, np.ndarray)): + if shape.ndim == 0: + seq = (shape.__array__().item(),) + else: + seq = tuple(shape.__array__()) + elif np.isscalar(shape): + seq = (shape,) + else: + seq = tuple(cast(NdShape, shape)) + try: + # Unfortunately, we can't do this check using + # 'isinstance(value, int)', as the values in a NumPy ndarray + # don't satisfy the predicate (they have numpy value types, + # such as numpy.int64). + result = tuple(operator.index(value) for value in seq) + except TypeError: + raise TypeError( + "expected a sequence of integers or a single integer, " + f"got {shape!r}" + ) + return result + + +def find_common_type(*args: ndarray) -> np.dtype[Any]: + """Determine common type following NumPy's coercion rules. + + Parameters + ---------- + *args : ndarray + A list of ndarrays + + Returns + ------- + datatype : data-type + The type that results from applying the NumPy type promotion rules + to the arguments. + """ + array_types = list() + scalars = list() + for array in args: + if array.ndim == 0: + scalars.append(array.dtype.type(0)) + else: + array_types.append(array.dtype) + return np.result_type(*array_types, *scalars) + + +T = TypeVar("T") + + +def tuple_pop(tup: tuple[T, ...], index: int) -> tuple[T, ...]: + return tup[:index] + tup[index + 1 :] diff --git a/cupynumeric/_module/__init__.py b/cupynumeric/_module/__init__.py new file mode 100644 index 000000000..e96566d91 --- /dev/null +++ b/cupynumeric/_module/__init__.py @@ -0,0 +1,146 @@ +# Copyright 2024 NVIDIA Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +from __future__ import annotations + +from typing import Any +from warnings import warn + +# The CLLR functions for the numpy module are broken up more or less according +# to the docs at https://numpy.org/doc/stable/reference/routines.html +# +# There are some discrepencies since some functions are repeated in multiple +# locations and since indexing routines are listed elsehwere for some reason. +# Sections/sub-modules that are currently missing are noted in comments. + +# --- Array Creation Routines +# https://numpy.org/doc/stable/reference/routines.array-creation.html + +from .creation_shape import * # From shape or value +from .creation_data import * # From existing data +from .creation_ranges import * # Numerical Ranges +from .creation_matrices import * # Building Matrices + +# --- Array manipulation routines +# https://numpy.org/doc/stable/reference/routines.array-manipulation.html +# +# from .array_kind import * # Changing kind of array +# from .array_add_remove import * # Adding and removing elements + +from .array_basic import * # Basic operations +from .array_shape import * # Changing array shape +from .array_transpose import * # Transpose-like operations +from .array_dimension import * # Changing number of dimensions +from .array_joining import * # Joining arrays +from .array_splitting import * # Splitting arrays +from .array_tiling import * # Tiling arrays +from .array_rearrange import * # Rearranging elements + +# --- Binary operations +# https://numpy.org/doc/stable/reference/routines.bitwise.html +# +# from .binary_elementwise_bit_ops import * # Elementwise bit operations +# from .binary_output import * # Output formatting + +from .binary_bit_packing import * # Bit packing + +# --- Indexing routines +# +# These routines in the numpy module are a bit odd, they are documented under +# the array ref: https://numpy.org/doc/stable/reference/arrays.indexing.html + +from .indexing import * + +# --- Input and output +# https://numpy.org/doc/stable/reference/routines.io.html +# +# from .io_text import * # Text files +# from .io_raw import * # Raw binary files +# from .io_string import * # String formatting +# from .io_memory import * # Memory mapping files +# from .io_text import * # Text formatting options +# from .io_base import * # Base-n representations +# from .io_data import * # Data sources +# from .io_binary import * # Binary format description + +from .io_numpy import * # NumPy binary files (NPY, NPZ) + +# --- Linear Algebra +# https://numpy.org/doc/stable/reference/routines.linalg.html + +from .linalg_mvp import * # Matrix and vector products + +# --- Logic functions +# https://numpy.org/doc/stable/reference/routines.logic.html +# +# from .logic_ops import * # Logical operations + +from .logic_truth import * # Truth value testing +from .logic_array_contents import * # Array contents +from .logic_array_type import * # Array type testing +from .logic_comparison import * # Comparison + +# --- Mathematical functions +# https://numpy.org/doc/stable/reference/routines.math.html +# +# from .math_trig import * # Trigonometric functions +# from .math_hyp import * # Hyperbolic functions +# from .math_exp_log import * # Exponents and logarithms +# from .math_other import * # Other special funtions +# from .math_floating import * # Floating point routines +# from .math_arithmetic import * # Arithmetic opertations + +from .math_rounding import * # Rounding +from .math_sum_prod_diff import * # Sums, products, differences +from .math_complex import * # Handling complex numbers +from .math_extrema import * # Extrema finding +from .math_misc import * # Miscellaneous + +# --- Set routines +# https://numpy.org/doc/stable/reference/routines.set.html +# +# from .sets_boolean import * # Boolean operations + +from .sets_making import * # Making proper sets + +# --- Sorting, searching, and counting +# https://numpy.org/doc/stable/reference/routines.sort.html + +from .ssc_sorting import * # Sorting +from .ssc_searching import * # Searching +from .ssc_counting import * # Counting + +# --- Statistics +# https://numpy.org/doc/stable/reference/routines.statistics.html +# + +from .stats_order import * # Order statistics +from .stats_avgs_vars import * # Averages and variances +from .stats_correlating import * # Correlating +from .stats_histograms import * # Histograms + +# --- Window functions +# https://numpy.org/doc/stable/reference/routines.window.html + +from .window import * # Various windows + +# --- numpy.test (disabled) + + +def test(*args: Any, **kw: Any) -> None: + warn( + "cuPyNumeric cannot execute numpy.test() due to reliance " + "on Numpy internals. For information about running the " + "cuPyNumeric test suite, see: https://docs.nvidia.com/cupynumeric/latest/developer/index.html" + ) diff --git a/cunumeric/_unary_red_utils.py b/cupynumeric/_module/_unary_red_utils.py similarity index 95% rename from cunumeric/_unary_red_utils.py rename to cupynumeric/_module/_unary_red_utils.py index c115b50d6..17c493499 100644 --- a/cunumeric/_unary_red_utils.py +++ b/cupynumeric/_module/_unary_red_utils.py @@ -1,4 +1,4 @@ -# Copyright 2022-2023 NVIDIA Corporation +# Copyright 2024 NVIDIA Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -15,7 +15,7 @@ from __future__ import annotations -from .config import UnaryRedCode +from ..config import UnaryRedCode # corresponding non-nan unary reduction ops for nan unary reduction ops _EQUIVALENT_NON_NAN_OPS: dict[UnaryRedCode, UnaryRedCode] = { diff --git a/cupynumeric/_module/array_basic.py b/cupynumeric/_module/array_basic.py new file mode 100644 index 000000000..5fed9e38b --- /dev/null +++ b/cupynumeric/_module/array_basic.py @@ -0,0 +1,81 @@ +# Copyright 2024 NVIDIA Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +from __future__ import annotations + +from typing import TYPE_CHECKING + +from .._array.util import add_boilerplate + +if TYPE_CHECKING: + from .._array.array import ndarray + from ..types import NdShape + + +@add_boilerplate("a") +def ndim(a: ndarray) -> int: + """ + + Return the number of dimensions of an array. + + Parameters + ---------- + a : array_like + Input array. If it is not already an ndarray, a conversion is + attempted. + + Returns + ------- + number_of_dimensions : int + The number of dimensions in `a`. Scalars are zero-dimensional. + + See Also + -------- + ndarray.ndim : equivalent method + shape : dimensions of array + ndarray.shape : dimensions of array + + Availability + -------- + Multiple GPUs, Multiple CPUs + """ + return 0 if a is None else a.ndim + + +@add_boilerplate("a") +def shape(a: ndarray) -> NdShape: + """ + + Return the shape of an array. + + Parameters + ---------- + a : array_like + Input array. + + Returns + ------- + shape : tuple[int, ...] + The elements of the shape tuple give the lengths of the + corresponding array dimensions. + + See Also + -------- + numpy.shape + + Availability + -------- + Multiple GPUs, Multiple CPUs + """ + return a.shape diff --git a/cupynumeric/_module/array_dimension.py b/cupynumeric/_module/array_dimension.py new file mode 100644 index 000000000..01629b2cb --- /dev/null +++ b/cupynumeric/_module/array_dimension.py @@ -0,0 +1,443 @@ +# Copyright 2024 NVIDIA Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +from __future__ import annotations + +from typing import TYPE_CHECKING, Any, Iterable, Sequence + +import numpy as np + +from .._array.array import ndarray +from .._array.util import add_boilerplate, convert_to_cupynumeric_ndarray +from .._utils import is_np2 +from .creation_data import array + +if is_np2: + from numpy.lib.array_utils import normalize_axis_tuple # type: ignore +else: + from numpy.core.numeric import normalize_axis_tuple # type: ignore + +if TYPE_CHECKING: + from ..types import NdShape, NdShapeLike + + +def _reshape_recur(ndim: int, arr: ndarray) -> tuple[int, ...]: + if arr.ndim < ndim: + cur_shape: tuple[int, ...] = _reshape_recur(ndim - 1, arr) + if ndim == 2: + cur_shape = (1,) + cur_shape + else: + cur_shape = cur_shape + (1,) + else: + cur_shape = arr.shape + return cur_shape + + +def _atleast_nd(ndim: int, arys: Sequence[ndarray]) -> list[ndarray] | ndarray: + inputs = list(convert_to_cupynumeric_ndarray(arr) for arr in arys) + # 'reshape' change the shape of arrays + # only when arr.shape != _reshape_recur(ndim,arr) + result = list(arr.reshape(_reshape_recur(ndim, arr)) for arr in inputs) + # if the number of arrays in `arys` is 1, + # the return value is a single array + if len(result) == 1: + return result[0] + return result + + +def atleast_1d(*arys: ndarray) -> list[ndarray] | ndarray: + """ + + Convert inputs to arrays with at least one dimension. + Scalar inputs are converted to 1-dimensional arrays, + whilst higher-dimensional inputs are preserved. + + Parameters + ---------- + *arys : array_like + One or more input arrays. + + Returns + ------- + ret : ndarray + An array, or list of arrays, each with a.ndim >= 1. + Copies are made only if necessary. + + See Also + -------- + numpy.atleast_1d + + Availability + -------- + Multiple GPUs, Multiple CPUs + """ + return _atleast_nd(1, arys) + + +def atleast_2d(*arys: ndarray) -> list[ndarray] | ndarray: + """ + + View inputs as arrays with at least two dimensions. + + Parameters + ---------- + *arys : array_like + One or more array-like sequences. + Non-array inputs are converted to arrays. + Arrays that already have two or more dimensions are preserved. + + Returns + ------- + res, res2, … : ndarray + An array, or list of arrays, each with a.ndim >= 2. + Copies are avoided where possible, and + views with two or more dimensions are returned. + + See Also + -------- + numpy.atleast_2d + + Availability + -------- + Multiple GPUs, Multiple CPUs + """ + return _atleast_nd(2, arys) + + +def atleast_3d(*arys: ndarray) -> list[ndarray] | ndarray: + """ + + View inputs as arrays with at least three dimensions. + + Parameters + ---------- + *arys : array_like + One or more array-like sequences. + Non-array inputs are converted to arrays. + Arrays that already have three or more dimensions are preserved. + + Returns + ------- + res, res2, … : ndarray + An array, or list of arrays, each with a.ndim >= 3. + Copies are avoided where possible, and + views with three or more dimensions are returned. + For example, a 1-D array of shape (N,) becomes + a view of shape (1, N, 1), and a 2-D array of shape (M, N) + becomes a view of shape (M, N, 1). + + See Also + -------- + numpy.atleast_3d + + Availability + -------- + Multiple GPUs, Multiple CPUs + """ + return _atleast_nd(3, arys) + + +@add_boilerplate("a") +def squeeze(a: ndarray, axis: NdShapeLike | None = None) -> ndarray: + """ + + Remove single-dimensional entries from the shape of an array. + + Parameters + ---------- + a : array_like + Input data. + axis : None or int or tuple[int], optional + Selects a subset of the single-dimensional entries in the + shape. If an axis is selected with shape entry greater than + one, an error is raised. + + Returns + ------- + squeezed : ndarray + The input array, but with all or a subset of the + dimensions of length 1 removed. This is always `a` itself + or a view into `a`. + + Raises + ------ + ValueError + If `axis` is not None, and an axis being squeezed is not of length 1 + + See Also + -------- + numpy.squeeze + + Availability + -------- + Multiple GPUs, Multiple CPUs + """ + return a.squeeze(axis=axis) + + +def broadcast_shapes(*args: NdShapeLike | Sequence[NdShapeLike]) -> NdShape: + """ + + Broadcast the input shapes into a single shape. + + Parameters + ---------- + `*args` : tuples of ints, or ints + The shapes to be broadcast against each other. + + Returns + ------- + tuple : Broadcasted shape. + + See Also + -------- + numpy.broadcast_shapes + + Availability + -------- + Multiple GPUs, Multiple CPUs + + """ + # TODO: expected "Union[SupportsIndex, Sequence[SupportsIndex]]" + return np.broadcast_shapes(*args) # type: ignore [arg-type] + + +def _broadcast_to( + arr: ndarray, + shape: NdShapeLike, + subok: bool = False, + broadcasted: bool = False, +) -> ndarray: + # create an array object w/ options passed from 'broadcast' routines + arr = array(arr, copy=False, subok=subok) + # 'broadcast_to' returns a read-only view of the original array + out_shape = broadcast_shapes(arr.shape, shape) + if out_shape != shape: + raise ValueError( + f"cannot broadcast an array of shape {arr.shape} to {shape}" + ) + result = ndarray( + shape=out_shape, + thunk=arr._thunk.broadcast_to(out_shape), + writeable=False, + ) + return result + + +@add_boilerplate("arr") +def broadcast_to( + arr: ndarray, shape: NdShapeLike, subok: bool = False +) -> ndarray: + """ + + Broadcast an array to a new shape. + + Parameters + ---------- + arr : array_like + The array to broadcast. + shape : tuple or int + The shape of the desired array. + A single integer i is interpreted as (i,). + subok : bool, optional + This option is ignored by cuPyNumeric. + + Returns + ------- + broadcast : array + A readonly view on the original array with the given shape. + It is typically not contiguous. + Furthermore, more than one element of a broadcasted array + may refer to a single memory location. + + See Also + -------- + numpy.broadcast_to + + Availability + -------- + Multiple GPUs, Multiple CPUs + + """ + return _broadcast_to(arr, shape, subok) + + +def _broadcast_arrays( + arrs: list[ndarray], + subok: bool = False, +) -> list[ndarray]: + # create an arry object w/ options passed from 'broadcast' routines + arrays = [array(arr, copy=False, subok=subok) for arr in arrs] + # check if the broadcast can happen in the input list of arrays + shapes = [arr.shape for arr in arrays] + out_shape = broadcast_shapes(*shapes) + # broadcast to the final shape + arrays = [_broadcast_to(arr, out_shape, subok) for arr in arrays] + return arrays + + +def broadcast_arrays(*args: Any, subok: bool = False) -> list[ndarray]: + """ + + Broadcast any number of arrays against each other. + + Parameters + ---------- + `*args` : array_likes + The arrays to broadcast. + + subok : bool, optional + This option is ignored by cuPyNumeric + + Returns + ------- + broadcasted : list of arrays + These arrays are views on the original arrays. + They are typically not contiguous. + Furthermore, more than one element of a broadcasted array + may refer to a single memory location. + If you need to write to the arrays, make copies first. + + Availability + -------- + Multiple GPUs, Multiple CPUs + + """ + arrs = [convert_to_cupynumeric_ndarray(arr) for arr in args] + return _broadcast_arrays(arrs, subok=subok) + + +class broadcast: + """Produce an object that broadcasts input parameters against one another. + It has shape and nd properties and may be used as an iterator. + + Parameters + ---------- + `*arrays` : array_likes + The arrays to broadcast. + + Returns + ------- + b: broadcast + Broadcast the input parameters against one another, and return an + object that encapsulates the result. Amongst others, it has shape + and nd properties, and may be used as an iterator. + + """ + + def __init__(self, *arrays: Any) -> None: + arrs = [convert_to_cupynumeric_ndarray(arr) for arr in arrays] + broadcasted = _broadcast_arrays(arrs) + self._iters = tuple(arr.flat for arr in broadcasted) + self._index = 0 + self._shape = broadcasted[0].shape + self._size = np.prod(self.shape, dtype=int) + + def __iter__(self) -> broadcast: + self._index = 0 + return self + + def __next__(self) -> Any: + if self._index < self.size: + result = tuple(each[self._index] for each in self._iters) + self._index += 1 + return result + raise StopIteration + + def reset(self) -> None: + """Reset the broadcasted result's iterator(s).""" + self._index = 0 + + @property + def index(self) -> int: + """current index in broadcasted result""" + return self._index + + @property + def iters(self) -> tuple[Iterable[Any], ...]: + """tuple of iterators along self's "components." """ + return self._iters + + @property + def numiter(self) -> int: + """Number of iterators possessed by the broadcasted result.""" + return len(self._iters) + + @property + def nd(self) -> int: + """Number of dimensions of broadcasted result.""" + return self.ndim + + @property + def ndim(self) -> int: + """Number of dimensions of broadcasted result.""" + return len(self.shape) + + @property + def shape(self) -> NdShape: + """Shape of broadcasted result.""" + return self._shape + + @property + def size(self) -> int: + """Total size of broadcasted result.""" + return self._size + + +@add_boilerplate("a") +def expand_dims( + a: ndarray, axis: int | tuple[int, ...] | list[int] +) -> ndarray: + """ + Expand the shape of an array. + + Insert a new axis that will appear at the `axis` position in the expanded + array shape. + + Parameters + ---------- + a : array_like + Input array. + axis : int or tuple of ints + Position in the expanded axes where the new axis (or axes) is placed. + + Returns + ------- + result : ndarray + View of `a` with the number of dimensions increased. + + See Also + -------- + squeeze : The inverse operation, removing singleton dimensions + reshape : Insert, remove, and combine dimensions, and resize existing ones + atleast_1d, atleast_2d, atleast_3d + + Availability + -------- + Multiple GPUs, Multiple CPUs + """ + + if isinstance(axis, int): + axis = (axis,) + + out_ndim = len(axis) + a.ndim + normalized_axis = normalize_axis_tuple(axis, out_ndim) + + shape_it = iter(a.shape) + shape = [ + 1 if ax in normalized_axis else next(shape_it) + for ax in range(out_ndim) + ] + + return a.reshape(shape) diff --git a/cupynumeric/_module/array_joining.py b/cupynumeric/_module/array_joining.py new file mode 100644 index 000000000..fbdf2adda --- /dev/null +++ b/cupynumeric/_module/array_joining.py @@ -0,0 +1,702 @@ +# Copyright 2024 NVIDIA Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +from __future__ import annotations + +from itertools import chain +from typing import TYPE_CHECKING, Any, Sequence + +import numpy as np + +from .._array.array import ndarray +from .._array.util import convert_to_cupynumeric_ndarray +from .._utils import is_np2 +from .array_dimension import _atleast_nd + +if is_np2: + from numpy.lib.array_utils import normalize_axis_index # type: ignore +else: + from numpy.core.multiarray import normalize_axis_index # type: ignore + +if TYPE_CHECKING: + import numpy.typing as npt + + from .._ufunc.ufunc import CastingKind + from ..types import NdShape + +_builtin_any = any +_builtin_max = max +_builtin_sum = sum + +casting_kinds: tuple[CastingKind, ...] = ( + "no", + "equiv", + "safe", + "same_kind", + "unsafe", +) + + +class ArrayInfo: + def __init__( + self, ndim: int, shape: NdShape, dtype: np.dtype[Any] + ) -> None: + self.ndim = ndim + self.shape = shape + self.dtype = dtype + + +def convert_to_array_form(indices: Sequence[int]) -> str: + return "".join(f"[{coord}]" for coord in indices) + + +def check_list_depth(arr: Any, prefix: NdShape = (0,)) -> int: + if not isinstance(arr, list): + return 0 + elif len(arr) == 0: + raise ValueError( + f"List at arrays{convert_to_array_form(prefix)} cannot be empty" + ) + + depths = list( + check_list_depth(each, prefix + (idx,)) for idx, each in enumerate(arr) + ) + + if len(set(depths)) != 1: # this should be one + # If we're here elements don't have the same depth + first_depth = depths[0] + for idx, other_depth in enumerate(depths[1:]): + if other_depth != first_depth: + raise ValueError( + "List depths are mismatched. First element was at depth " + f"{first_depth}, but there is an element at" + f" depth {other_depth}, " + f"arrays{convert_to_array_form(prefix + (idx + 1,))}" + ) + + return depths[0] + 1 + + +def check_shape_with_axis( + inputs: list[ndarray], + func_name: str, + axis: int, +) -> None: + ndim = inputs[0].ndim + shape = inputs[0].shape + + axis = normalize_axis_index(axis, ndim) + if ndim >= 1: + if _builtin_any( + shape[:axis] != inp.shape[:axis] + or shape[axis + 1 :] != inp.shape[axis + 1 :] + for inp in inputs + ): + raise ValueError( + f"All arguments to {func_name} " + "must have the same " + "dimension size in all dimensions " + "except the target axis" + ) + return + + +def check_shape_dtype_without_axis( + inputs: Sequence[ndarray], + func_name: str, + dtype: npt.DTypeLike | None = None, + casting: CastingKind = "same_kind", +) -> tuple[list[ndarray], ArrayInfo]: + if len(inputs) == 0: + raise ValueError("need at least one array to concatenate") + + inputs = list(convert_to_cupynumeric_ndarray(inp) for inp in inputs) + ndim = inputs[0].ndim + shape = inputs[0].shape + + if _builtin_any(ndim != inp.ndim for inp in inputs): + raise ValueError( + f"All arguments to {func_name} " + "must have the same number of dimensions" + ) + + # Cast arrays with the passed arguments (dtype, casting) + if dtype is None: + dtype = np.result_type(*[inp.dtype for inp in inputs]) + else: + dtype = np.dtype(dtype) + + converted = list(inp.astype(dtype, casting=casting) for inp in inputs) + return converted, ArrayInfo(ndim, shape, dtype) + + +def _block_collect_slices( + arr: ndarray | Sequence[ndarray], cur_depth: int, depth: int +) -> tuple[list[Any], list[tuple[slice, ...]], Sequence[ndarray]]: + # collects slices for each array in `arr` + # the outcome will be slices on every dimension of the output array + # for each array in `arr` + if cur_depth < depth: + sublist_results = list( + _block_collect_slices(each, cur_depth + 1, depth) for each in arr + ) + # 'sublist_results' contains a list of 3-way tuples, + # for arrays, out_shape of the sublist, and slices + arrays, outshape_list, slices = zip(*sublist_results) + max_ndim = _builtin_max( + 1 + (depth - cur_depth), *(len(each) for each in outshape_list) + ) + outshape_list = list( + ((1,) * (max_ndim - len(each)) + tuple(each)) + for each in outshape_list + ) + leading_dim = _builtin_sum( + each[-1 + (cur_depth - depth)] for each in outshape_list + ) + # flatten array lists from sublists into a single list + arrays = list(chain(*arrays)) + # prepares the out_shape of the current list + out_shape = list(outshape_list[0]) + out_shape[-1 + cur_depth - depth] = leading_dim + offset = 0 + updated_slices = [] + # update the dimension in each slice for the current axis + for shape, slice_list in zip(outshape_list, slices): + cur_dim = shape[-1 + cur_depth - depth] + updated_slices.append( + list( + (slice(offset, offset + cur_dim),) + each + for each in slice_list + ) + ) + offset += cur_dim + # flatten lists of slices into a single list + slices = list(chain(*updated_slices)) + else: + arrays = list(convert_to_cupynumeric_ndarray(inp) for inp in arr) + common_shape = arrays[0].shape + if len(arr) > 1: + arrays, common_info = check_shape_dtype_without_axis( + arrays, block.__name__ + ) + common_shape = common_info.shape + check_shape_with_axis(arrays, block.__name__, axis=-1) + # the initial slices for each arr on arr.shape[-1] + out_shape, slices, arrays = _collect_outshape_slices( + arrays, common_shape, axis=-1 + len(common_shape) + ) + + return arrays, out_shape, slices + + +def _block_slicing(arrays: Sequence[ndarray], depth: int) -> ndarray: + # collects the final slices of input arrays and assign them at once + arrays, out_shape, slices = _block_collect_slices(arrays, 1, depth) + out_array = ndarray(shape=out_shape, inputs=arrays) + + for dest, inp in zip(slices, arrays): + out_array[(Ellipsis,) + tuple(dest)] = inp + + return out_array + + +def _collect_outshape_slices( + inputs: Sequence[ndarray], common_shape: NdShape, axis: int +) -> tuple[list[Any], list[tuple[slice, ...]], Sequence[ndarray]]: + leading_dim = _builtin_sum(arr.shape[axis] for arr in inputs) + out_shape = list(common_shape) + out_shape[axis] = leading_dim + post_idx = (slice(None),) * len(out_shape[axis + 1 :]) + slices = [] + offset = 0 + # collect slices for arrays in `inputs` + inputs = list(inp for inp in inputs if inp.size > 0) + for inp in inputs: + slices.append((slice(offset, offset + inp.shape[axis]),) + post_idx) + offset += inp.shape[axis] + + return out_shape, slices, inputs + + +def _concatenate( + inputs: Sequence[ndarray], + common_info: ArrayInfo, + axis: int = 0, + out: ndarray | None = None, + dtype: npt.DTypeLike | None = None, + casting: CastingKind = "same_kind", +) -> ndarray: + if axis < 0: + axis += len(common_info.shape) + out_shape, slices, inputs = _collect_outshape_slices( + inputs, common_info.shape, axis + ) + + if out is None: + out_array = ndarray( + shape=out_shape, dtype=common_info.dtype, inputs=inputs + ) + else: + out = convert_to_cupynumeric_ndarray(out) + if not isinstance(out, ndarray): + raise TypeError("out should be ndarray") + elif list(out.shape) != out_shape: + raise ValueError( + f"out.shape({out.shape}) is not matched " + f"to the result shape of concatenation ({out_shape})" + ) + out_array = out + + for dest, src in zip(slices, inputs): + out_array[(Ellipsis,) + dest] = src + + return out_array + + +def append(arr: ndarray, values: ndarray, axis: int | None = None) -> ndarray: + """ + + Append values to the end of an array. + + Parameters + ---------- + arr : array_like + Values are appended to a copy of this array. + values : array_like + These values are appended to a copy of arr. It must be of the correct + shape (the same shape as arr, excluding axis). If axis is not + specified, values can be any shape and will be flattened before use. + axis : int, optional + The axis along which values are appended. If axis is not given, both + `arr` and `values` are flattened before use. + + Returns + ------- + res : ndarray + A copy of arr with values appended to axis. + + See Also + -------- + numpy.append + + Availability + -------- + Multiple GPUs, Multiple CPUs + + """ + # Check to see if we can build a new tuple of cuPyNumeric arrays + inputs = list(convert_to_cupynumeric_ndarray(inp) for inp in [arr, values]) + return concatenate(inputs, axis) + + +def block(arrays: Sequence[Any]) -> ndarray: + """ + Assemble an nd-array from nested lists of blocks. + + Blocks in the innermost lists are concatenated (see concatenate) + along the last dimension (-1), then these are concatenated along + the second-last dimension (-2), and so on until the outermost + list is reached. + + Blocks can be of any dimension, but will not be broadcasted using + the normal rules. Instead, leading axes of size 1 are inserted, + to make block.ndim the same for all blocks. This is primarily useful + for working with scalars, and means that code like np.block([v, 1]) + is valid, where v.ndim == 1. + + When the nested list is two levels deep, this allows block matrices + to be constructed from their components. + + Parameters + ---------- + arrays : nested list of array_like or scalars + If passed a single ndarray or scalar (a nested list of depth 0), + this is returned unmodified (and not copied). + + Elements shapes must match along the appropriate axes (without + broadcasting), but leading 1s will be prepended to the shape as + necessary to make the dimensions match. + + Returns + ------- + block_array : ndarray + The array assembled from the given blocks. + The dimensionality of the output is equal to the greatest of: * the + dimensionality of all the inputs * the depth to which the input list + is nested + + Raises + ------ + ValueError + If list depths are mismatched - for instance, [[a, b], c] is + illegal, and should be spelt [[a, b], [c]] + If lists are empty - for instance, [[a, b], []] + + See Also + -------- + numpy.block + + Availability + -------- + Multiple GPUs, Multiple CPUs + + """ + # arrays should concatenate from innermost subarrays + # the 'arrays' should be balanced tree + # check if the 'arrays' is a balanced tree + depth = check_list_depth(arrays) + + result = _block_slicing(arrays, depth) + return result + + +def concatenate( + inputs: Sequence[ndarray], + axis: int | None = 0, + out: ndarray | None = None, + dtype: npt.DTypeLike | None = None, + casting: CastingKind = "same_kind", +) -> ndarray: + """ + + concatenate((a1, a2, ...), axis=0, out=None, dtype=None, + casting="same_kind") + + Join a sequence of arrays along an existing axis. + + Parameters + ---------- + a1, a2, ... : Sequence[array_like] + The arrays must have the same shape, except in the dimension + corresponding to `axis` (the first, by default). + axis : int, optional + The axis along which the arrays will be joined. If axis is None, + arrays are flattened before use. Default is 0. + out : ndarray, optional + If provided, the destination to place the result. The shape must be + correct, matching that of what concatenate would have returned if no + out argument were specified. + dtype : str or data-type + If provided, the destination array will have this dtype. Cannot be + provided together with `out`. + casting : ``{'no', 'equiv', 'safe', 'same_kind', 'unsafe'}``, optional + Controls what kind of data casting may occur. Defaults to 'same_kind'. + + Returns + ------- + res : ndarray + The concatenated array. + + See Also + -------- + numpy.concatenate + + Availability + -------- + Multiple GPUs, Multiple CPUs + """ + if dtype is not None and out is not None: + raise TypeError( + "concatenate() only takes `out` or `dtype` as an argument," + "but both were provided." + ) + + if casting not in casting_kinds: + raise ValueError( + "casting must be one of 'no', 'equiv', " + "'safe', 'same_kind', or 'unsafe'" + ) + + # flatten arrays if axis == None and concatenate arrays on the first axis + if axis is None: + # Reshape arrays in the `array_list` to handle scalars + reshaped = _atleast_nd(1, inputs) + if not isinstance(reshaped, list): + reshaped = [reshaped] + inputs = list(inp.ravel() for inp in reshaped) + axis = 0 + + # Check to see if we can build a new tuple of cuPyNumeric arrays + cupynumeric_inputs, common_info = check_shape_dtype_without_axis( + inputs, concatenate.__name__, dtype, casting + ) + check_shape_with_axis(cupynumeric_inputs, concatenate.__name__, axis) + + return _concatenate( + cupynumeric_inputs, + common_info, + axis, + out, + dtype, + casting, + ) + + +def stack( + arrays: Sequence[ndarray], axis: int = 0, out: ndarray | None = None +) -> ndarray: + """ + + Join a sequence of arrays along a new axis. + + The ``axis`` parameter specifies the index of the new axis in the + dimensions of the result. For example, if ``axis=0`` it will be the first + dimension and if ``axis=-1`` it will be the last dimension. + + Parameters + ---------- + arrays : Sequence[array_like] + Each array must have the same shape. + + axis : int, optional + The axis in the result array along which the input arrays are stacked. + + out : ndarray, optional + If provided, the destination to place the result. The shape must be + correct, matching that of what stack would have returned if no + out argument were specified. + + Returns + ------- + stacked : ndarray + The stacked array has one more dimension than the input arrays. + + See Also + -------- + numpy.stack + + Availability + -------- + Multiple GPUs, Multiple CPUs + """ + if type(axis) is not int: + raise TypeError("The target axis should be an integer") + + arrays, common_info = check_shape_dtype_without_axis( + arrays, stack.__name__ + ) + shapes = {inp.shape for inp in arrays} + if len(shapes) != 1: + raise ValueError("all input arrays must have the same shape for stack") + + axis = normalize_axis_index(axis, common_info.ndim + 1) + shape = common_info.shape[:axis] + (1,) + common_info.shape[axis:] + arrays = [arr.reshape(shape) for arr in arrays] + common_info.shape = tuple(shape) + return _concatenate(arrays, common_info, axis, out=out) + + +def vstack(tup: Sequence[ndarray]) -> ndarray: + """ + + Stack arrays in sequence vertically (row wise). + + This is equivalent to concatenation along the first axis after 1-D arrays + of shape `(N,)` have been reshaped to `(1,N)`. Rebuilds arrays divided by + `vsplit`. + + This function makes most sense for arrays with up to 3 dimensions. For + instance, for pixel-data with a height (first axis), width (second axis), + and r/g/b channels (third axis). The functions `concatenate`, `stack` and + `block` provide more general stacking and concatenation operations. + + Parameters + ---------- + tup : Sequence[ndarray] + The arrays must have the same shape along all but the first axis. + 1-D arrays must have the same length. + + Returns + ------- + stacked : ndarray + The array formed by stacking the given arrays, will be at least 2-D. + + See Also + -------- + numpy.vstack + + Availability + -------- + Multiple GPUs, Multiple CPUs + """ + # Reshape arrays in the `array_list` if needed before concatenation + reshaped = _atleast_nd(2, tup) + if not isinstance(reshaped, list): + reshaped = [reshaped] + tup, common_info = check_shape_dtype_without_axis( + reshaped, vstack.__name__ + ) + check_shape_with_axis(tup, vstack.__name__, 0) + return _concatenate( + tup, + common_info, + axis=0, + dtype=common_info.dtype, + ) + + +def hstack(tup: Sequence[ndarray]) -> ndarray: + """ + + Stack arrays in sequence horizontally (column wise). + + This is equivalent to concatenation along the second axis, except for 1-D + arrays where it concatenates along the first axis. Rebuilds arrays divided + by `hsplit`. + + This function makes most sense for arrays with up to 3 dimensions. For + instance, for pixel-data with a height (first axis), width (second axis), + and r/g/b channels (third axis). The functions `concatenate`, `stack` and + `block` provide more general stacking and concatenation operations. + + Parameters + ---------- + tup : Sequence[ndarray] + The arrays must have the same shape along all but the second axis, + except 1-D arrays which can be any length. + + Returns + ------- + stacked : ndarray + The array formed by stacking the given arrays. + + See Also + -------- + numpy.hstack + + Availability + -------- + Multiple GPUs, Multiple CPUs + """ + # Reshape arrays in the `array_list` to handle scalars + reshaped = _atleast_nd(1, tup) + if not isinstance(reshaped, list): + reshaped = [reshaped] + + tup, common_info = check_shape_dtype_without_axis( + reshaped, hstack.__name__ + ) + check_shape_with_axis( + tup, hstack.__name__, axis=(0 if common_info.ndim == 1 else 1) + ) + # When ndim == 1, hstack concatenates arrays along the first axis + return _concatenate( + tup, + common_info, + axis=(0 if common_info.ndim == 1 else 1), + dtype=common_info.dtype, + ) + + +def dstack(tup: Sequence[ndarray]) -> ndarray: + """ + + Stack arrays in sequence depth wise (along third axis). + + This is equivalent to concatenation along the third axis after 2-D arrays + of shape `(M,N)` have been reshaped to `(M,N,1)` and 1-D arrays of shape + `(N,)` have been reshaped to `(1,N,1)`. Rebuilds arrays divided by + `dsplit`. + + This function makes most sense for arrays with up to 3 dimensions. For + instance, for pixel-data with a height (first axis), width (second axis), + and r/g/b channels (third axis). The functions `concatenate`, `stack` and + `block` provide more general stacking and concatenation operations. + + Parameters + ---------- + tup : Sequence[ndarray] + The arrays must have the same shape along all but the third axis. + 1-D or 2-D arrays must have the same shape. + + Returns + ------- + stacked : ndarray + The array formed by stacking the given arrays, will be at least 3-D. + + See Also + -------- + numpy.dstack + + Availability + -------- + Multiple GPUs, Multiple CPUs + """ + # Reshape arrays to (1,N,1) for ndim ==1 or (M,N,1) for ndim == 2: + reshaped = _atleast_nd(3, tup) + if not isinstance(reshaped, list): + reshaped = [reshaped] + tup, common_info = check_shape_dtype_without_axis( + reshaped, dstack.__name__ + ) + check_shape_with_axis(tup, dstack.__name__, 2) + return _concatenate( + tup, + common_info, + axis=2, + dtype=common_info.dtype, + ) + + +def column_stack(tup: Sequence[ndarray]) -> ndarray: + """ + + Stack 1-D arrays as columns into a 2-D array. + + Take a sequence of 1-D arrays and stack them as columns + to make a single 2-D array. 2-D arrays are stacked as-is, + just like with `hstack`. 1-D arrays are turned into 2-D columns + first. + + Parameters + ---------- + tup : Sequence[ndarray] + 1-D or 2-D arrays to stack. All of them must have the same + first dimension. + + Returns + ------- + stacked : ndarray + The 2-D array formed by stacking the given arrays. + + See Also + -------- + numpy.column_stack + + Availability + -------- + Multiple GPUs, Multiple CPUs + """ + # Reshape arrays in the `array_list` to handle scalars + reshaped = _atleast_nd(1, tup) + if not isinstance(reshaped, list): + reshaped = [reshaped] + + tup, common_info = check_shape_dtype_without_axis( + reshaped, column_stack.__name__ + ) + + if common_info.ndim == 1: + tup = list(inp.reshape((inp.shape[0], 1)) for inp in tup) + common_info.shape = tup[0].shape + check_shape_with_axis(tup, column_stack.__name__, 1) + return _concatenate( + tup, + common_info, + axis=1, + dtype=common_info.dtype, + ) + + +row_stack = vstack diff --git a/cupynumeric/_module/array_rearrange.py b/cupynumeric/_module/array_rearrange.py new file mode 100644 index 000000000..7f2707583 --- /dev/null +++ b/cupynumeric/_module/array_rearrange.py @@ -0,0 +1,289 @@ +# Copyright 2024 NVIDIA Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +from __future__ import annotations + +import itertools +from typing import TYPE_CHECKING, Sequence + +from .._array.util import add_boilerplate +from .._utils import is_np2 +from .array_dimension import broadcast +from .array_transpose import transpose +from .creation_shape import empty_like + +if is_np2: + from numpy.lib.array_utils import normalize_axis_tuple # type: ignore +else: + from numpy.core.numeric import normalize_axis_tuple # type: ignore + +if TYPE_CHECKING: + from .._array.array import ndarray + from ..types import NdShapeLike + + +@add_boilerplate("m") +def flip(m: ndarray, axis: NdShapeLike | None = None) -> ndarray: + """ + Reverse the order of elements in an array along the given axis. + + The shape of the array is preserved, but the elements are reordered. + + Parameters + ---------- + m : array_like + Input array. + axis : None or int or tuple[int], optional + Axis or axes along which to flip over. The default, axis=None, will + flip over all of the axes of the input array. If axis is negative it + counts from the last to the first axis. + + If axis is a tuple of ints, flipping is performed on all of the axes + specified in the tuple. + + Returns + ------- + out : array_like + A new array that is constructed from `m` with the entries of axis + reversed. + + See Also + -------- + numpy.flip + + Availability + -------- + Single GPU, Single CPU + + Notes + ----- + cuPyNumeric implementation doesn't return a view, it returns a new array + """ + return m.flip(axis=axis) + + +@add_boilerplate("m") +def flipud(m: ndarray) -> ndarray: + """ + Reverse the order of elements along axis 0 (up/down). + + For a 2-D array, this flips the entries in each column in the up/down + direction. Rows are preserved, but appear in a different order than before. + + Parameters + ---------- + m : array_like + Input array. + + Returns + ------- + out : array_like + A new array that is constructed from `m` with rows reversed. + + See Also + -------- + numpy.flipud + + Availability + -------- + Single GPU, Single CPU + + Notes + ----- + cuPyNumeric implementation doesn't return a view, it returns a new array + """ + if m.ndim < 1: + raise ValueError("Input must be >= 1-d.") + return flip(m, axis=0) + + +@add_boilerplate("m") +def fliplr(m: ndarray) -> ndarray: + """ + Reverse the order of elements along axis 1 (left/right). + + For a 2-D array, this flips the entries in each row in the left/right + direction. Columns are preserved, but appear in a different order than + before. + + Parameters + ---------- + m : array_like + Input array, must be at least 2-D. + + Returns + ------- + f : ndarray + A new array that is constructed from `m` with the columns reversed. + + See Also + -------- + numpy.fliplr + + Availability + -------- + Single GPU, Single CPU + + Notes + ----- + cuPyNumeric implementation doesn't return a view, it returns a new array + """ + if m.ndim < 2: + raise ValueError("Input must be >= 2-d.") + return flip(m, axis=1) + + +@add_boilerplate("m") +def rot90(m: ndarray, k: int = 1, axes: Sequence[int] = (0, 1)) -> ndarray: + """ + Rotate an array by 90 degrees in the plane specified by axes. + + Rotation direction is from the first towards the second axis. + This means for a 2D array with the default `k` and `axes`, the + rotation will be counterclockwise. + + Parameters + ---------- + m : array_like + Array of two or more dimensions. + k : integer + Number of times the array is rotated by 90 degrees. + axes : (2,) array_like + The array is rotated in the plane defined by the axes. + Axes must be different. + + Returns + ------- + y : ndarray + A rotated copy of `m`. + + See Also + -------- + flip : Reverse the order of elements in an array along the given axis. + fliplr : Flip an array horizontally. + flipud : Flip an array vertically. + + Availability + -------- + Single GPU, Single CPU + """ + axes = tuple(axes) + if len(axes) != 2: + raise ValueError("len(axes) must be 2.") + + if axes[0] == axes[1] or abs(axes[0] - axes[1]) == m.ndim: + raise ValueError("Axes must be different.") + + if ( + axes[0] >= m.ndim + or axes[0] < -m.ndim + or axes[1] >= m.ndim + or axes[1] < -m.ndim + ): + raise ValueError( + f"Axes={axes} out of range for array of ndim={m.ndim}." + ) + + k %= 4 + + if k == 0: + return m.copy() + if k == 2: + return flip(flip(m, axes[0]), axes[1]) + + axes_list = list(range(0, m.ndim)) + (axes_list[axes[0]], axes_list[axes[1]]) = ( + axes_list[axes[1]], + axes_list[axes[0]], + ) + + if k == 1: + return transpose(flip(m, axes[1]), axes_list) + else: + # k == 3 + return flip(transpose(m, axes_list), axes[1]) + + +@add_boilerplate("a") +def roll( + a: ndarray, + shift: int | tuple[int, ...], + axis: int | tuple[int, ...] | None = None, +) -> ndarray: + """ + Roll array elements along a given axis. + + Elements that roll beyond the last position are re-introduced at + the first. + + Parameters + ---------- + a : array_like + Input array. + shift : int or tuple of ints + The number of places by which elements are shifted. If a tuple, + then `axis` must be a tuple of the same size, and each of the + given axes is shifted by the corresponding number. If an int + while `axis` is a tuple of ints, then the same value is used for + all given axes. + axis : int or tuple of ints, optional + Axis or axes along which elements are shifted. By default, the + array is flattened before shifting, after which the original + shape is restored. + + Returns + ------- + res : ndarray + Output array, with the same shape as `a`. + + Availability + -------- + Multiple GPUs, Multiple CPUs + + Notes + ----- + Supports rolling over multiple dimensions simultaneously. + """ + if axis is None: + return roll(a.ravel(), shift, 0).reshape(a.shape) + + normalized_axis: tuple[int, ...] = normalize_axis_tuple( + axis, a.ndim, allow_duplicate=True + ) + broadcasted = broadcast(shift, normalized_axis) + if broadcasted.ndim > 1: + raise ValueError( + "'shift' and 'axis' should be scalars or 1D sequences" + ) + shifts = {ax: 0 for ax in range(a.ndim)} + for sh, ax in broadcasted: + shifts[ax] += sh + + rolls: list[tuple[tuple[slice, ...], ...]] + rolls = [((slice(None), slice(None)),)] * a.ndim + for ax, offset in shifts.items(): + offset %= a.shape[ax] or 1 # If `a` is empty, nothing matters. + if offset: + # (original, result), (original, result) + rolls[ax] = ( + (slice(None, -offset), slice(offset, None)), + (slice(-offset, None), slice(None, offset)), + ) + + result = empty_like(a) + for indices in itertools.product(*rolls): + arr_index, res_index = zip(*indices) + result[res_index] = a[arr_index] + + return result diff --git a/cupynumeric/_module/array_shape.py b/cupynumeric/_module/array_shape.py new file mode 100644 index 000000000..078dc78a9 --- /dev/null +++ b/cupynumeric/_module/array_shape.py @@ -0,0 +1,119 @@ +# Copyright 2024 NVIDIA Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +from __future__ import annotations + +from typing import TYPE_CHECKING + +from .._array.util import add_boilerplate + +if TYPE_CHECKING: + from .._array.array import ndarray + from ..types import NdShapeLike, OrderType + + +@add_boilerplate("a") +def ravel(a: ndarray, order: OrderType = "C") -> ndarray: + """ + Return a contiguous flattened array. + + A 1-D array, containing the elements of the input, is returned. A copy is + made only if needed. + + Parameters + ---------- + a : array_like + Input array. The elements in `a` are read in the order specified by + `order`, and packed as a 1-D array. + order : ``{'C','F', 'A', 'K'}``, optional + The elements of `a` are read using this index order. 'C' means + to index the elements in row-major, C-style order, + with the last axis index changing fastest, back to the first + axis index changing slowest. 'F' means to index the elements + in column-major, Fortran-style order, with the + first index changing fastest, and the last index changing + slowest. Note that the 'C' and 'F' options take no account of + the memory layout of the underlying array, and only refer to + the order of axis indexing. 'A' means to read the elements in + Fortran-like index order if `a` is Fortran *contiguous* in + memory, C-like order otherwise. 'K' means to read the + elements in the order they occur in memory, except for + reversing the data when strides are negative. By default, 'C' + index order is used. + + Returns + ------- + y : array_like + y is an array of the same subtype as `a`, with shape ``(a.size,)``. + Note that matrices are special cased for backward compatibility, if `a` + is a matrix, then y is a 1-D ndarray. + + See Also + -------- + numpy.ravel + + Availability + -------- + Multiple GPUs, Multiple CPUs + """ + return a.ravel(order=order) + + +@add_boilerplate("a") +def reshape( + a: ndarray, newshape: NdShapeLike, order: OrderType = "C" +) -> ndarray: + """ + + Gives a new shape to an array without changing its data. + + Parameters + ---------- + a : array_like + Array to be reshaped. + newshape : int or tuple[int] + The new shape should be compatible with the original shape. If + an integer, then the result will be a 1-D array of that length. + One shape dimension can be -1. In this case, the value is + inferred from the length of the array and remaining dimensions. + order : ``{'C', 'F', 'A'}``, optional + Read the elements of `a` using this index order, and place the + elements into the reshaped array using this index order. 'C' + means to read / write the elements using C-like index order, + with the last axis index changing fastest, back to the first + axis index changing slowest. 'F' means to read / write the + elements using Fortran-like index order, with the first index + changing fastest, and the last index changing slowest. Note that + the 'C' and 'F' options take no account of the memory layout of + the underlying array, and only refer to the order of indexing. + 'A' means to read / write the elements in Fortran-like index + order if `a` is Fortran *contiguous* in memory, C-like order + otherwise. + + Returns + ------- + reshaped_array : ndarray + This will be a new view object if possible; otherwise, it will + be a copy. Note there is no guarantee of the *memory layout* (C- or + Fortran- contiguous) of the returned array. + + See Also + -------- + numpy.reshape + + Availability + -------- + Multiple GPUs, Multiple CPUs + """ + return a.reshape(newshape, order=order) diff --git a/cupynumeric/_module/array_splitting.py b/cupynumeric/_module/array_splitting.py new file mode 100644 index 000000000..4462ee5e6 --- /dev/null +++ b/cupynumeric/_module/array_splitting.py @@ -0,0 +1,246 @@ +# Copyright 2024 NVIDIA Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +from __future__ import annotations + +from typing import TYPE_CHECKING, Any + +import numpy as np + +from .._array.array import ndarray +from .._array.util import convert_to_cupynumeric_ndarray + +if TYPE_CHECKING: + import numpy.typing as npt + + +def split(a: ndarray, indices: int | ndarray, axis: int = 0) -> list[ndarray]: + """ + + Split an array into multiple sub-arrays as views into `ary`. + + Parameters + ---------- + ary : ndarray + Array to be divided into sub-arrays. + indices_or_sections : int or ndarray + If `indices_or_sections` is an integer, N, the array will be divided + into N equal arrays along `axis`. If such a split is not possible, + an error is raised. + + If `indices_or_sections` is a 1-D array of sorted integers, the entries + indicate where along `axis` the array is split. For example, + ``[2, 3]`` would, for ``axis=0``, result in + + - ary[:2] + - ary[2:3] + - ary[3:] + + If an index exceeds the dimension of the array along `axis`, + an empty sub-array is returned correspondingly. + axis : int, optional + The axis along which to split, default is 0. + + Returns + ------- + sub-arrays : list[ndarray] + A list of sub-arrays as views into `ary`. + + Raises + ------ + ValueError + If `indices_or_sections` is given as an integer, but + a split does not result in equal division. + + See Also + -------- + numpy.split + + Availability + -------- + Multiple GPUs, Multiple CPUs + """ + return array_split(a, indices, axis, equal=True) + + +def array_split( + a: ndarray, + indices: int | tuple[int] | ndarray | npt.NDArray[Any], + axis: int = 0, + equal: bool = False, +) -> list[ndarray]: + """ + + Split an array into multiple sub-arrays. + + Please refer to the ``split`` documentation. The only difference + between these functions is that ``array_split`` allows + `indices_or_sections` to be an integer that does *not* equally + divide the axis. For an array of length l that should be split + into n sections, it returns l % n sub-arrays of size l//n + 1 + and the rest of size l//n. + + See Also + -------- + numpy.array_split + + Availability + -------- + Multiple GPUs, Multiple CPUs + """ + array = convert_to_cupynumeric_ndarray(a) + split_pts = [] + if axis >= array.ndim: + raise ValueError( + f"array({array.shape}) has less dimensions than axis({axis})" + ) + + if isinstance(indices, int): + if indices <= 0: + raise ValueError("number sections must be larger than 0.") + res = array.shape[axis] % indices + if equal and res != 0: + raise ValueError("array split does not result in an equal divison") + + len_subarr = array.shape[axis] // indices + end_idx = array.shape[axis] + first_idx = len_subarr + + # the requested # of subarray is larger than the size of array + # -> size of 1 subarrays + empty subarrays + if len_subarr == 0: + len_subarr = 1 + first_idx = len_subarr + end_idx = indices + else: + if res != 0: + # The first 'res' groups have len_subarr+1 elements + split_pts = list( + range( + len_subarr + 1, (len_subarr + 1) * res, len_subarr + 1 + ) + ) + first_idx = (len_subarr + 1) * res + split_pts.extend(range(first_idx, end_idx + 1, len_subarr)) + + elif isinstance(indices, (list, tuple)) or ( + isinstance(indices, (ndarray, np.ndarray)) and indices.dtype == int + ): + split_pts = list(indices) + # adding the size of the target dimension. + # This helps create dummy or last subarray correctly + split_pts.append(array.shape[axis]) + + else: + raise ValueError("Integer or array for split should be provided") + + result = [] + start_idx = 0 + end_idx = 0 + out_shape = [] + in_shape: list[int | slice] = [] + + for i in range(array.ndim): + if i != axis: + in_shape.append(slice(array.shape[i])) + out_shape.append(array.shape[i]) + else: + in_shape.append(1) + out_shape.append(1) + + for pts in split_pts: + if type(pts) is not int: + raise ValueError( + "Split points in the passed `indices` should be integer" + ) + end_idx = pts + # For a split point, which is larger than the dimension for splitting, + # The last non-empty subarray should be copied from + # array[last_elem:array.shape[axis]] + if pts > array.shape[axis]: + end_idx = array.shape[axis] + out_shape[axis] = (end_idx - start_idx) + 1 + in_shape[axis] = slice(start_idx, end_idx) + new_subarray = None + if start_idx < array.shape[axis] and start_idx < end_idx: + new_subarray = array[tuple(in_shape)].view() + else: + out_shape[axis] = 0 + new_subarray = ndarray( + tuple(out_shape), dtype=array.dtype, writeable=array._writeable + ) + result.append(new_subarray) + start_idx = pts + + return result + + +def dsplit(a: ndarray, indices: int | ndarray) -> list[ndarray]: + """ + + Split array into multiple sub-arrays along the 3rd axis (depth). + + Please refer to the `split` documentation. `dsplit` is equivalent + to `split` with ``axis=2``, the array is always split along the third + axis provided the array dimension is greater than or equal to 3. + + See Also + -------- + numpy.dsplit + + Availability + -------- + Multiple GPUs, Multiple CPUs + """ + return split(a, indices, axis=2) + + +def hsplit(a: ndarray, indices: int | ndarray) -> list[ndarray]: + """ + + Split an array into multiple sub-arrays horizontally (column-wise). + + Please refer to the `split` documentation. `hsplit` is equivalent + to `split` with ``axis=1``, the array is always split along the second + axis regardless of the array dimension. + + See Also + -------- + numpy.hsplit + + Availability + -------- + Multiple GPUs, Multiple CPUs + """ + return split(a, indices, axis=1) + + +def vsplit(a: ndarray, indices: int | ndarray) -> list[ndarray]: + """ + + Split an array into multiple sub-arrays vertically (row-wise). + + Please refer to the ``split`` documentation. ``vsplit`` is equivalent + to ``split`` with `axis=0` (default), the array is always split along the + first axis regardless of the array dimension. + + See Also + -------- + numpy.vsplit + + Availability + -------- + Multiple GPUs, Multiple CPUs + """ + return split(a, indices, axis=0) diff --git a/cupynumeric/_module/array_tiling.py b/cupynumeric/_module/array_tiling.py new file mode 100644 index 000000000..6dca2939d --- /dev/null +++ b/cupynumeric/_module/array_tiling.py @@ -0,0 +1,232 @@ +# Copyright 2024 NVIDIA Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +from __future__ import annotations + +from typing import TYPE_CHECKING, Any, Sequence, cast + +import numpy as np + +from .._array.array import ndarray +from .._array.util import add_boilerplate, convert_to_cupynumeric_ndarray +from .._utils import is_np2 +from ..runtime import runtime +from .creation_shape import full + +if is_np2: + from numpy.lib.array_utils import normalize_axis_index # type: ignore +else: + from numpy.core.multiarray import normalize_axis_index # type: ignore + +if TYPE_CHECKING: + import numpy.typing as npt + + from ..types import NdShape + +if is_np2: + from numpy.exceptions import AxisError # type: ignore +else: + from numpy import AxisError # type: ignore + +_builtin_max = max + + +@add_boilerplate("A") +def tile( + A: ndarray, reps: int | Sequence[int] | npt.NDArray[np.int_] +) -> ndarray: + """ + Construct an array by repeating A the number of times given by reps. + + If `reps` has length ``d``, the result will have dimension of ``max(d, + A.ndim)``. + + If ``A.ndim < d``, `A` is promoted to be d-dimensional by prepending new + axes. So a shape (3,) array is promoted to (1, 3) for 2-D replication, + or shape (1, 1, 3) for 3-D replication. If this is not the desired + behavior, promote `A` to d-dimensions manually before calling this + function. + + If ``A.ndim > d``, `reps` is promoted to `A`.ndim by pre-pending 1's to it. + Thus for an `A` of shape (2, 3, 4, 5), a `reps` of (2, 2) is treated as + (1, 1, 2, 2). + + Parameters + ---------- + A : array_like + The input array. + reps : 1d array_like + The number of repetitions of `A` along each axis. + + Returns + ------- + c : ndarray + The tiled output array. + + See Also + -------- + numpy.tile + + Availability + -------- + Multiple GPUs, Multiple CPUs + """ + computed_reps: tuple[int, ...] + if isinstance(reps, int): + computed_reps = (reps,) + else: + if np.ndim(reps) > 1: + raise TypeError("`reps` must be a 1d sequence") + computed_reps = tuple(reps) + # Figure out the shape of the destination array + out_dims = _builtin_max(A.ndim, len(computed_reps)) + # Prepend ones until the dimensions match + while len(computed_reps) < out_dims: + computed_reps = (1,) + computed_reps + out_shape: NdShape = () + # Prepend dimensions if necessary + for dim in range(out_dims - A.ndim): + out_shape += (computed_reps[dim],) + offset = len(out_shape) + for dim in range(A.ndim): + out_shape += (A.shape[dim] * computed_reps[offset + dim],) + assert len(out_shape) == out_dims + result = ndarray(out_shape, dtype=A.dtype, inputs=(A,)) + result._thunk.tile(A._thunk, computed_reps) + return result + + +def repeat(a: ndarray, repeats: Any, axis: int | None = None) -> ndarray: + """ + Repeat elements of an array. + + Parameters + ---------- + a : array_like + Input array. + repeats : int or ndarray[int] + The number of repetitions for each element. repeats is + broadcasted to fit the shape of the given axis. + axis : int, optional + The axis along which to repeat values. By default, use the + flattened input array, and return a flat output array. + + Returns + ------- + repeated_array : ndarray + Output array which has the same shape as a, except along the + given axis. + + Notes + ----- + Currently, repeat operations supports only 1D arrays + + See Also + -------- + numpy.repeat + + Availability + -------- + Multiple GPUs, Multiple CPUs + """ + + if repeats is None: + raise TypeError( + "int() argument must be a string, a bytes-like object or a number," + " not 'NoneType'" + ) + + if np.ndim(repeats) > 1: + raise ValueError("`repeats` should be scalar or 1D array") + + # axes should be integer type + if axis is not None and not isinstance(axis, int): + raise TypeError("Axis should be of integer type") + + # when array is a scalar + if np.ndim(a) == 0: + if axis is not None and axis != 0 and axis != -1: + raise AxisError( + f"axis {axis} is out of bounds for array of dimension 0" + ) + if np.ndim(repeats) == 0: + if not isinstance(repeats, int): + runtime.warn( + "converting repeats to an integer type", + category=UserWarning, + ) + repeats = np.int64(repeats) + return full((repeats,), cast(int | float, a)) + elif np.ndim(repeats) == 1 and len(repeats) == 1: + if not isinstance(repeats, int): + runtime.warn( + "converting repeats to an integer type", + category=UserWarning, + ) + repeats = np.int64(repeats) + return full((repeats[0],), cast(int | float, a)) + else: + raise ValueError( + "`repeat` with a scalar parameter `a` is only " + "implemented for scalar values of the parameter `repeats`." + ) + + # array is an array + array = convert_to_cupynumeric_ndarray(a) + if np.ndim(repeats) == 1: + repeats = convert_to_cupynumeric_ndarray(repeats) + + # if no axes specified, flatten array + if axis is None: + array = array.ravel() + axis = 0 + + axis_int: int = normalize_axis_index(axis, array.ndim) + + # If repeats is on a zero sized axis_int, then return the array. + if array.shape[axis_int] == 0: + return array.copy() + + if np.ndim(repeats) == 1: + if repeats.shape[0] == 1 and repeats.shape[0] != array.shape[axis_int]: + repeats = repeats[0] + + # repeats is a scalar. + if np.ndim(repeats) == 0: + # repeats is 0 + if repeats == 0: + empty_shape = list(array.shape) + empty_shape[axis_int] = 0 + return ndarray(shape=tuple(empty_shape), dtype=array.dtype) + # repeats should be integer type + if not isinstance(repeats, int): + runtime.warn( + "converting repeats to an integer type", + category=UserWarning, + ) + result = array._thunk.repeat( + repeats=np.int64(repeats), + axis=axis_int, + scalar_repeats=True, + ) + # repeats is an array + else: + # repeats should be integer type + repeats = repeats._warn_and_convert(np.int64) + if repeats.shape[0] != array.shape[axis_int]: + raise ValueError("incorrect shape of repeats array") + result = array._thunk.repeat( + repeats=repeats._thunk, axis=axis_int, scalar_repeats=False + ) + return ndarray(shape=result.shape, thunk=result) diff --git a/cupynumeric/_module/array_transpose.py b/cupynumeric/_module/array_transpose.py new file mode 100644 index 000000000..f16f1963f --- /dev/null +++ b/cupynumeric/_module/array_transpose.py @@ -0,0 +1,136 @@ +# Copyright 2024 NVIDIA Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +from __future__ import annotations + +from typing import TYPE_CHECKING, Sequence + +from .._array.util import add_boilerplate +from .._utils import is_np2 + +if is_np2: + from numpy.lib.array_utils import normalize_axis_tuple # type: ignore +else: + from numpy.core.numeric import normalize_axis_tuple # type: ignore + + +if TYPE_CHECKING: + from .._array.array import ndarray + + +@add_boilerplate("a") +def swapaxes(a: ndarray, axis1: int, axis2: int) -> ndarray: + """ + + Interchange two axes of an array. + + Parameters + ---------- + a : array_like + Input array. + axis1 : int + First axis. + axis2 : int + Second axis. + + Returns + ------- + a_swapped : ndarray + If `a` is an ndarray, then a view of `a` is returned; otherwise a new + array is created. + + See Also + -------- + numpy.swapaxes + + Availability + -------- + Multiple GPUs, Multiple CPUs + """ + return a.swapaxes(axis1, axis2) + + +@add_boilerplate("a") +def transpose(a: ndarray, axes: list[int] | None = None) -> ndarray: + """ + + Permute the dimensions of an array. + + Parameters + ---------- + a : array_like + Input array. + axes : list[int], optional + By default, reverse the dimensions, otherwise permute the axes + according to the values given. + + Returns + ------- + p : ndarray + `a` with its axes permuted. A view is returned whenever + possible. + + See Also + -------- + numpy.transpose + + Availability + -------- + Multiple GPUs, Multiple CPUs + """ + return a.transpose(axes=axes) + + +@add_boilerplate("a") +def moveaxis( + a: ndarray, source: Sequence[int], destination: Sequence[int] +) -> ndarray: + """ + Move axes of an array to new positions. + Other axes remain in their original order. + + Parameters + ---------- + a : ndarray + The array whose axes should be reordered. + source : int or Sequence[int] + Original positions of the axes to move. These must be unique. + destination : int or Sequence[int] + Destination positions for each of the original axes. These must also be + unique. + + Returns + ------- + result : ndarray + Array with moved axes. This array is a view of the input array. + + See Also + -------- + numpy.moveaxis + + Availability + -------- + Multiple GPUs, Multiple CPUs + """ + source = normalize_axis_tuple(source, a.ndim, "source") + destination = normalize_axis_tuple(destination, a.ndim, "destination") + if len(source) != len(destination): + raise ValueError( + "`source` and `destination` arguments must have the same number " + "of elements" + ) + order = [n for n in range(a.ndim) if n not in source] + for dest, src in sorted(zip(destination, source)): + order.insert(dest, src) + return a.transpose(order) diff --git a/cunumeric/bits.py b/cupynumeric/_module/binary_bit_packing.py similarity index 92% rename from cunumeric/bits.py rename to cupynumeric/_module/binary_bit_packing.py index 1cb3995c0..b84e0f07a 100644 --- a/cunumeric/bits.py +++ b/cupynumeric/_module/binary_bit_packing.py @@ -1,4 +1,4 @@ -# Copyright 2022 NVIDIA Corporation +# Copyright 2024 NVIDIA Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -14,19 +14,19 @@ # from __future__ import annotations -from typing import TYPE_CHECKING, Optional, Tuple +from typing import TYPE_CHECKING -from .array import add_boilerplate -from .module import empty +from .._array.util import add_boilerplate +from .creation_shape import empty if TYPE_CHECKING: - from .array import ndarray - from .types import BitOrder + from .._array.array import ndarray + from ..types import BitOrder def _sanitize_arguments( - a: ndarray, axis: Optional[int], bitorder: BitOrder -) -> Tuple[ndarray, int]: + a: ndarray, axis: int | None, bitorder: BitOrder +) -> tuple[ndarray, int]: if axis is None: if a.ndim > 1: a = a.ravel() @@ -49,7 +49,7 @@ def _sanitize_arguments( @add_boilerplate("a") def packbits( - a: ndarray, axis: Optional[int] = None, bitorder: BitOrder = "big" + a: ndarray, axis: int | None = None, bitorder: BitOrder = "big" ) -> ndarray: """ @@ -108,8 +108,8 @@ def packbits( @add_boilerplate("a") def unpackbits( a: ndarray, - axis: Optional[int] = None, - count: Optional[int] = None, + axis: int | None = None, + count: int | None = None, bitorder: BitOrder = "big", ) -> ndarray: """ diff --git a/cupynumeric/_module/creation_data.py b/cupynumeric/_module/creation_data.py new file mode 100644 index 000000000..03869905c --- /dev/null +++ b/cupynumeric/_module/creation_data.py @@ -0,0 +1,177 @@ +# Copyright 2024 NVIDIA Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +from __future__ import annotations + +from typing import TYPE_CHECKING, Any, Literal + +import numpy as np + +from .._array.array import ndarray +from .._array.util import add_boilerplate +from ..runtime import runtime +from .creation_shape import empty_like + +if TYPE_CHECKING: + from ..types import OrderType + + +def array( + obj: Any, + dtype: np.dtype[Any] | None = None, + copy: bool = True, + order: OrderType | Literal["K"] = "K", + subok: bool = False, + ndmin: int = 0, +) -> ndarray: + """ + array(object, dtype=None, copy=True) + + Create an array. + + Parameters + ---------- + object : array_like + An array, any object exposing the array interface, an object whose + __array__ method returns an array, or any (nested) sequence. + dtype : data-type, optional + The desired data-type for the array. If not given, then the type will + be determined as the minimum type required to hold the objects in the + sequence. + copy : bool, optional + If true (default), then the object is copied. Otherwise, a copy will + only be made if __array__ returns a copy, if obj is a nested sequence, + or if a copy is needed to satisfy any of the other requirements + (`dtype`, `order`, etc.). + order : ``{'K', 'A', 'C', 'F'}``, optional + Specify the memory layout of the array. If object is not an array, the + newly created array will be in C order (row major) unless 'F' is + specified, in which case it will be in Fortran order (column major). + If object is an array the following holds. + + ===== ========= =================================================== + order no copy copy=True + ===== ========= =================================================== + 'K' unchanged F & C order preserved, otherwise most similar order + 'A' unchanged F order if input is F and not C, otherwise C order + 'C' C order C order + 'F' F order F order + ===== ========= =================================================== + + When ``copy=False`` and a copy is made for other reasons, the result is + the same as if ``copy=True``, with some exceptions for 'A', see the + Notes section. The default order is 'K'. + subok : bool, optional + If True, then sub-classes will be passed-through, otherwise + the returned array will be forced to be a base-class array (default). + ndmin : int, optional + Specifies the minimum number of dimensions that the resulting + array should have. Ones will be pre-pended to the shape as + needed to meet this requirement. + + Returns + ------- + out : ndarray + An array object satisfying the specified requirements. + + See Also + -------- + numpy.array + + Availability + -------- + Multiple GPUs, Multiple CPUs + """ + + if not isinstance(obj, ndarray): + thunk = runtime.get_numpy_thunk(obj, share=(not copy), dtype=dtype) + result = ndarray(shape=None, thunk=thunk) + else: + result = obj + if dtype is not None and result.dtype != dtype: + result = result.astype(dtype) + elif copy and obj is result: + result = result.copy() + if result.ndim < ndmin: + shape = (1,) * (ndmin - result.ndim) + result.shape + result = result.reshape(shape) + return result + + +def asarray(a: Any, dtype: np.dtype[Any] | None = None) -> ndarray: + """ + Convert the input to an array. + + Parameters + ---------- + a : array_like + Input data, in any form that can be converted to an array. This + includes lists, lists of tuples, tuples, tuples of tuples, tuples + of lists and ndarrays. + dtype : data-type, optional + By default, the data-type is inferred from the input data. + + Returns + ------- + out : ndarray + Array interpretation of `a`. No copy is performed if the input is + already an ndarray with matching dtype. If `a` is a subclass of + ndarray, a base class ndarray is returned. + + See Also + -------- + numpy.asarray + + Availability + -------- + Multiple GPUs, Multiple CPUs + """ + if not isinstance(a, ndarray): + thunk = runtime.get_numpy_thunk(a, share=True, dtype=dtype) + writeable = a.flags.writeable if isinstance(a, np.ndarray) else True + array = ndarray(shape=None, thunk=thunk, writeable=writeable) + else: + array = a + if dtype is not None and array.dtype != dtype: + array = array.astype(dtype) + return array + + +@add_boilerplate("a") +def copy(a: ndarray) -> ndarray: + """ + + Return an array copy of the given object. + + Parameters + ---------- + a : array_like + Input data. + + Returns + ------- + arr : ndarray + Array interpretation of `a`. + + See Also + -------- + numpy.copy + + Availability + -------- + Multiple GPUs, Multiple CPUs + """ + result = empty_like(a, dtype=a.dtype) + result._thunk.copy(a._thunk, deep=True) + return result diff --git a/cupynumeric/_module/creation_matrices.py b/cupynumeric/_module/creation_matrices.py new file mode 100644 index 000000000..540276c53 --- /dev/null +++ b/cupynumeric/_module/creation_matrices.py @@ -0,0 +1,192 @@ +# Copyright 2024 NVIDIA Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +from __future__ import annotations + +from typing import TYPE_CHECKING + +from .._array.array import ndarray +from .._array.util import add_boilerplate +from .creation_shape import ones + +if TYPE_CHECKING: + import numpy.typing as npt + + +@add_boilerplate("v") +def diag(v: ndarray, k: int = 0) -> ndarray: + """ + + Extract a diagonal or construct a diagonal array. + + See the more detailed documentation for ``cupynumeric.diagonal`` if you use + this function to extract a diagonal and wish to write to the resulting + array; whether it returns a copy or a view depends on what version of numpy + you are using. + + Parameters + ---------- + v : array_like + If `v` is a 2-D array, return a copy of its `k`-th diagonal. + If `v` is a 1-D array, return a 2-D array with `v` on the `k`-th + diagonal. + k : int, optional + Diagonal in question. The default is 0. Use `k>0` for diagonals + above the main diagonal, and `k<0` for diagonals below the main + diagonal. + + Returns + ------- + out : ndarray + The extracted diagonal or constructed diagonal array. + + See Also + -------- + numpy.diag + + Availability + -------- + Multiple GPUs, Multiple CPUs + """ + if v.ndim == 0: + raise ValueError("Input must be 1- or 2-d") + elif v.ndim == 1: + return v.diagonal(offset=k, axis1=0, axis2=1, extract=False) + elif v.ndim == 2: + return v.diagonal(offset=k, axis1=0, axis2=1, extract=True) + else: + raise ValueError("diag requires 1- or 2-D array, use diagonal instead") + + +def tri( + N: int, + M: int | None = None, + k: int = 0, + dtype: npt.DTypeLike = float, + *, + like: ndarray | None = None, +) -> ndarray: + """ + An array with ones at and below the given diagonal and zeros elsewhere. + + Parameters + ---------- + N : int + Number of rows in the array. + M : int, optional + Number of columns in the array. + By default, `M` is taken equal to `N`. + k : int, optional + The sub-diagonal at and below which the array is filled. + `k` = 0 is the main diagonal, while `k` < 0 is below it, + and `k` > 0 is above. The default is 0. + dtype : dtype, optional + Data type of the returned array. The default is float. + like : array_like + Reference object to allow the creation of arrays which are not NumPy + arrays. If an array-like passed in as `like` supports the + `__array_function__` protocol, the result will be defined by it. In + this case it ensures the creation of an array object compatible with + that passed in via this argument. + + Returns + ------- + tri : ndarray of shape (N, M) + Array with its lower triangle filled with ones and zero elsewhere; + in other words ``T[i,j] == 1`` for ``j <= i + k``, 0 otherwise. + + See Also + -------- + numpy.tri + + Notes + ----- + `like` argument is currently not supported + + Availability + -------- + Multiple GPUs, Multiple CPUs + + """ + # TODO: add support for `like` (see issue #418) + if like is not None: + raise ValueError("like parameter is currently not supported") + + if M is None: + M = N + + out = ones((N, M), dtype=dtype) + return tril(out, k) + + +@add_boilerplate("m") +def trilu(m: ndarray, k: int, lower: bool) -> ndarray: + if m.ndim < 1: + raise TypeError("Array must be at least 1-D") + shape = m.shape if m.ndim >= 2 else m.shape * 2 + result = ndarray(shape, dtype=m.dtype, inputs=(m,)) + result._thunk.trilu(m._thunk, k, lower) + return result + + +def tril(m: ndarray, k: int = 0) -> ndarray: + """ + + Lower triangle of an array. + + Return a copy of an array with elements above the `k`-th diagonal zeroed. + + Parameters + ---------- + m : array_like + Input array of shape (M, N). + k : int, optional + Diagonal above which to zero elements. `k = 0` (the default) is the + main diagonal, `k < 0` is below it and `k > 0` is above. + + Returns + ------- + tril : ndarray + Lower triangle of `m`, of same shape and data-type as `m`. + + See Also + -------- + numpy.tril + + Availability + -------- + Multiple GPUs, Multiple CPUs + """ + return trilu(m, k, True) + + +def triu(m: ndarray, k: int = 0) -> ndarray: + """ + + Upper triangle of an array. + + Return a copy of a matrix with the elements below the `k`-th diagonal + zeroed. + + Please refer to the documentation for `tril` for further details. + + See Also + -------- + numpy.triu + + Availability + -------- + Multiple GPUs, Multiple CPUs + """ + return trilu(m, k, False) diff --git a/cupynumeric/_module/creation_ranges.py b/cupynumeric/_module/creation_ranges.py new file mode 100644 index 000000000..d04d94b53 --- /dev/null +++ b/cupynumeric/_module/creation_ranges.py @@ -0,0 +1,355 @@ +# Copyright 2024 NVIDIA Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +from __future__ import annotations + +import math +from typing import TYPE_CHECKING, Any + +import numpy as np + +from .._array.array import ndarray +from .._array.util import add_boilerplate +from .._module.array_dimension import broadcast_arrays +from .._module.creation_data import asarray +from .._ufunc.floating import floor + +if TYPE_CHECKING: + import numpy.typing as npt + + +_builtin_max = max + + +def arange( + start: int | float = 0, + stop: int | float | None = None, + step: int | float | None = 1, + dtype: npt.DTypeLike | None = None, +) -> ndarray: + """ + arange([start,] stop[, step,], dtype=None) + + Return evenly spaced values within a given interval. + + Values are generated within the half-open interval ``[start, stop)`` + (in other words, the interval including `start` but excluding `stop`). + For integer arguments the function is equivalent to the Python built-in + `range` function, but returns an ndarray rather than a list. + + When using a non-integer step, such as 0.1, the results will often not + be consistent. It is better to use `cupynumeric.linspace` for these cases. + + Parameters + ---------- + start : int or float, optional + Start of interval. The interval includes this value. The default + start value is 0. + stop : int or float + End of interval. The interval does not include this value, except + in some cases where `step` is not an integer and floating point + round-off affects the length of `out`. + step : int or float, optional + Spacing between values. For any output `out`, this is the distance + between two adjacent values, ``out[i+1] - out[i]``. The default + step size is 1. If `step` is specified as a position argument, + `start` must also be given. + dtype : data-type + The type of the output array. If `dtype` is not given, infer the data + type from the other input arguments. + + Returns + ------- + arange : ndarray + Array of evenly spaced values. + + For floating point arguments, the length of the result is + ``ceil((stop - start)/step)``. Because of floating point overflow, + this rule may result in the last element of `out` being greater + than `stop`. + + See Also + -------- + numpy.arange + + Availability + -------- + Multiple GPUs, Multiple CPUs + """ + if stop is None: + stop = start + start = 0 + + if step is None: + step = 1 + + if dtype is None: + dtype = np.result_type(start, stop, step) + else: + dtype = np.dtype(dtype) + + N = math.ceil((stop - start) / step) + result = ndarray((_builtin_max(0, N),), dtype) + result._thunk.arange(start, stop, step) + return result + + +@add_boilerplate("start", "stop") +def linspace( + start: ndarray, + stop: ndarray, + num: int = 50, + endpoint: bool = True, + retstep: bool = False, + dtype: npt.DTypeLike | None = None, + axis: int = 0, +) -> ndarray | tuple[ndarray, float | ndarray]: + """ + + Return evenly spaced numbers over a specified interval. + + Returns `num` evenly spaced samples, calculated over the + interval [`start`, `stop`]. + + The endpoint of the interval can optionally be excluded. + + Parameters + ---------- + start : array_like + The starting value of the sequence. + stop : array_like + The end value of the sequence, unless `endpoint` is set to False. + In that case, the sequence consists of all but the last of ``num + 1`` + evenly spaced samples, so that `stop` is excluded. Note that the step + size changes when `endpoint` is False. + num : int, optional + Number of samples to generate. Default is 50. Must be non-negative. + endpoint : bool, optional + If True, `stop` is the last sample. Otherwise, it is not included. + Default is True. + retstep : bool, optional + If True, return (`samples`, `step`), where `step` is the spacing + between samples. + dtype : data-type, optional + The type of the output array. If `dtype` is not given, infer the data + type from the other input arguments. + axis : int, optional + The axis in the result to store the samples. Relevant only if start + or stop are array-like. By default (0), the samples will be along a + new axis inserted at the beginning. Use -1 to get an axis at the end. + + Returns + ------- + samples : ndarray + There are `num` equally spaced samples in the closed interval + ``[start, stop]`` or the half-open interval ``[start, stop)`` + (depending on whether `endpoint` is True or False). + step : float or ndarray, optional + Only returned if `retstep` is True + + Size of spacing between samples. + + See Also + -------- + numpy.linspace + + Availability + -------- + Multiple GPUs, Multiple CPUs + """ + if num < 0: + raise ValueError("Number of samples, %s, must be non-negative." % num) + div = (num - 1) if endpoint else num + + common_kind = np.result_type(start.dtype, stop.dtype).kind + dt = np.complex128 if common_kind == "c" else np.float64 + if dtype is None: + dtype = dt + + delta = stop - start + y = arange(0, num, dtype=dt) + + out: tuple[Any, ...] # EllipsisType not even in typing_extensions yet + + # Reshape these arrays into dimensions that allow them to broadcast + if delta.ndim > 0: + if axis is None or axis == 0: + # First dimension + y = y.reshape((-1,) + (1,) * delta.ndim) + # Nothing else needs to be reshaped here because + # they should all broadcast correctly with y + if endpoint and num > 1: + out = (-1,) + elif axis == -1 or axis == delta.ndim: + # Last dimension + y = y.reshape((1,) * delta.ndim + (-1,)) + if endpoint and num > 1: + out = (Ellipsis, -1) + # Extend everything else with extra dimensions of 1 at the end + # so that they can broadcast with y + delta = delta.reshape(delta.shape + (1,)) + start = start.reshape(start.shape + (1,)) + elif axis < delta.ndim: + # Somewhere in the middle + y = y.reshape((1,) * axis + (-1,) + (1,) * (delta.ndim - axis)) + # Start array might be smaller than delta because of broadcast + startax = start.ndim - len(delta.shape[axis:]) + start = start.reshape( + start.shape[0:startax] + (1,) + start.shape[startax:] + ) + if endpoint and num > 1: + out = (Ellipsis, -1) + (slice(None, None, None),) * len( + delta.shape[axis:] + ) + delta = delta.reshape( + delta.shape[0:axis] + (1,) + delta.shape[axis:] + ) + else: + raise ValueError( + "axis " + + str(axis) + + " is out of bounds for array of dimension " + + str(delta.ndim + 1) + ) + else: + out = (-1,) + # else delta is a scalar so start must be also + # therefore it will trivially broadcast correctly + + step: float | ndarray + if div > 0: + step = delta / div + if delta.ndim == 0: + y *= step + else: + y = y * step + else: + # sequences with 0 items or 1 item with endpoint=True (i.e. div <= 0) + # have an undefined step + step = np.nan + if delta.ndim == 0: + y *= delta + else: + y = y * delta + + y += start.astype(y.dtype, copy=False) + + if endpoint and num > 1: + y[out] = stop.astype(y.dtype, copy=False) + + if np.issubdtype(dtype, np.integer): + floor(y, out=y) + + if retstep: + return y.astype(dtype, copy=False), step + else: + return y.astype(dtype, copy=False) + + +@add_boilerplate() +def meshgrid( + *xi: ndarray, copy: bool = True, sparse: bool = False, indexing: str = "xy" +) -> tuple[ndarray, ...]: + """ + Return a tuple of coordinate matrices from coordinate vectors. + + Make N-D coordinate arrays for vectorized evaluations of + N-D scalar/vector fields over N-D grids, given + one-dimensional coordinate arrays x1, x2,..., xn + + Parameters + ---------- + x1, x2,..., xn : array_like + 1-D arrays representing the coordinates of a grid. + indexing : {'xy', 'ij'}, optional + Cartesian ('xy', default) or matrix ('ij') indexing of output. + See Notes for more details. + sparse : bool, optional + If True the shape of the returned coordinate array for dimension *i* + is reduced from ``(N1, ..., Ni, ... Nn)`` to + ``(1, ..., 1, Ni, 1, ..., 1)``. These sparse coordinate grids are + intended to be use with broadcasting. When all + coordinates are used in an expression, broadcasting still leads to a + fully-dimensonal result array. + + Default is False. + copy : bool, optional + If False, a view into the original arrays are returned in order to + conserve memory. Default is True. Please note that + ``sparse=False, copy=False`` will likely return non-contiguous + arrays. Furthermore, more than one element of a broadcast array + may refer to a single memory location. If you need to write to the + arrays, make copies first. + + Returns + ------- + X1, X2,..., XN : tuple of ndarrays + For vectors `x1`, `x2`,..., `xn` with lengths ``Ni=len(xi)``, + returns ``(N1, N2, N3,..., Nn)`` shaped arrays if indexing='ij' + or ``(N2, N1, N3,..., Nn)`` shaped arrays if indexing='xy' + with the elements of `xi` repeated to fill the matrix along + the first dimension for `x1`, the second for `x2` and so on. + + Notes + ----- + This function supports both indexing conventions through the indexing + keyword argument. Giving the string 'ij' returns a meshgrid with + matrix indexing, while 'xy' returns a meshgrid with Cartesian indexing. + In the 2-D case with inputs of length M and N, the outputs are of shape + (N, M) for 'xy' indexing and (M, N) for 'ij' indexing. In the 3-D case + with inputs of length M, N and P, outputs are of shape (N, M, P) for + 'xy' indexing and (M, N, P) for 'ij' indexing. The difference is + illustrated by the following code snippet:: + + xv, yv = np.meshgrid(x, y, indexing='ij') + for i in range(nx): + for j in range(ny): + # treat xv[i,j], yv[i,j] + + xv, yv = np.meshgrid(x, y, indexing='xy') + for i in range(nx): + for j in range(ny): + # treat xv[j,i], yv[j,i] + + In the 1-D and 0-D case, the indexing and sparse keywords have no effect. + + Availability + -------- + Multiple GPUs, Multiple CPUs + + """ + ndim = len(xi) + + if indexing not in ["xy", "ij"]: + raise ValueError("Valid values for `indexing` are 'xy' and 'ij'.") + + s0 = (1,) * ndim + output = [ + asarray(x).reshape(s0[:i] + (-1,) + s0[i + 1 :]) + for i, x in enumerate(xi) + ] + + if indexing == "xy" and ndim > 1: + # switch first and second axis + output[0] = output[0].swapaxes(0, 1) + output[1] = output[1].swapaxes(0, 1) + + if not sparse: + # Return the full N-D matrix (not only the 1-D vector) + output = broadcast_arrays(*output, subok=True) + + if copy: + output = [x.copy() for x in output] + + return tuple(output) diff --git a/cupynumeric/_module/creation_shape.py b/cupynumeric/_module/creation_shape.py new file mode 100644 index 000000000..d14aa7298 --- /dev/null +++ b/cupynumeric/_module/creation_shape.py @@ -0,0 +1,406 @@ +# Copyright 2024 NVIDIA Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +from __future__ import annotations + +import operator +from typing import TYPE_CHECKING, Any + +import numpy as np + +from .._array.array import ndarray +from .._array.util import add_boilerplate +from ..types import NdShapeLike + +if TYPE_CHECKING: + import numpy.typing as npt + + +def empty(shape: NdShapeLike, dtype: npt.DTypeLike = np.float64) -> ndarray: + """ + empty(shape, dtype=float) + + Return a new array of given shape and type, without initializing entries. + + Parameters + ---------- + shape : int or tuple[int] + Shape of the empty array. + dtype : data-type, optional + Desired output data-type for the array. Default is + ``cupynumeric.float64``. + + Returns + ------- + out : ndarray + Array of uninitialized (arbitrary) data of the given shape and dtype. + + See Also + -------- + numpy.empty + + Availability + -------- + Multiple GPUs, Multiple CPUs + """ + return ndarray(shape=shape, dtype=dtype) + + +@add_boilerplate("a") +def empty_like( + a: ndarray, + dtype: npt.DTypeLike | None = None, + shape: NdShapeLike | None = None, +) -> ndarray: + """ + + empty_like(prototype, dtype=None) + + Return a new array with the same shape and type as a given array. + + Parameters + ---------- + prototype : array_like + The shape and data-type of `prototype` define these same attributes + of the returned array. + dtype : data-type, optional + Overrides the data type of the result. + shape : int or tuple[int], optional + Overrides the shape of the result. + + Returns + ------- + out : ndarray + Array of uninitialized (arbitrary) data with the same shape and type as + `prototype`. + + See Also + -------- + numpy.empty_like + + Availability + -------- + Multiple GPUs, Multiple CPUs + """ + shape = a.shape if shape is None else shape + if dtype is not None: + dtype = np.dtype(dtype) + else: + dtype = a.dtype + return ndarray(shape, dtype=dtype, inputs=(a,)) + + +def eye( + N: int, + M: int | None = None, + k: int = 0, + dtype: npt.DTypeLike | None = np.float64, +) -> ndarray: + """ + + Return a 2-D array with ones on the diagonal and zeros elsewhere. + + Parameters + ---------- + N : int + Number of rows in the output. + M : int, optional + Number of columns in the output. If None, defaults to `N`. + k : int, optional + Index of the diagonal: 0 (the default) refers to the main diagonal, + a positive value refers to an upper diagonal, and a negative value + to a lower diagonal. + dtype : data-type, optional + Data-type of the returned array. + + Returns + ------- + I : ndarray + An array of shape (N, M) where all elements are equal to zero, except + for the `k`-th diagonal, whose values are equal to one. + + See Also + -------- + numpy.eye + + Availability + -------- + Multiple GPUs, Multiple CPUs + """ + if dtype is not None: + dtype = np.dtype(dtype) + if M is None: + M = N + k = operator.index(k) + result = ndarray((N, M), dtype) + result._thunk.eye(k) + return result + + +def identity(n: int, dtype: npt.DTypeLike = float) -> ndarray: + """ + + Return the identity array. + + The identity array is a square array with ones on + the main diagonal. + + Parameters + ---------- + n : int + Number of rows (and columns) in `n` x `n` output. + dtype : data-type, optional + Data-type of the output. Defaults to ``float``. + + Returns + ------- + out : ndarray + `n` x `n` array with its main diagonal set to one, and all other + elements 0. + + See Also + -------- + numpy.identity + + Availability + -------- + Multiple GPUs, Multiple CPUs + """ + return eye(N=n, M=n, dtype=dtype) + + +def ones(shape: NdShapeLike, dtype: npt.DTypeLike = np.float64) -> ndarray: + """ + + Return a new array of given shape and type, filled with ones. + + Parameters + ---------- + shape : int or tuple[int] + Shape of the new array. + dtype : data-type, optional + The desired data-type for the array. Default is `cupynumeric.float64`. + + Returns + ------- + out : ndarray + Array of ones with the given shape and dtype. + + See Also + -------- + numpy.ones + + Availability + -------- + Multiple GPUs, Multiple CPUs + """ + return full(shape, 1, dtype=dtype) + + +def ones_like( + a: ndarray, + dtype: npt.DTypeLike | None = None, + shape: NdShapeLike | None = None, +) -> ndarray: + """ + + Return an array of ones with the same shape and type as a given array. + + Parameters + ---------- + a : array_like + The shape and data-type of `a` define these same attributes of the + returned array. + dtype : data-type, optional + Overrides the data type of the result. + shape : int or tuple[int], optional + Overrides the shape of the result. + + Returns + ------- + out : ndarray + Array of ones with the same shape and type as `a`. + + See Also + -------- + numpy.ones_like + + Availability + -------- + Multiple GPUs, Multiple CPUs + """ + usedtype = a.dtype + if dtype is not None: + usedtype = np.dtype(dtype) + return full_like(a, 1, dtype=usedtype, shape=shape) + + +def zeros(shape: NdShapeLike, dtype: npt.DTypeLike = np.float64) -> ndarray: + """ + zeros(shape, dtype=float) + + Return a new array of given shape and type, filled with zeros. + + Parameters + ---------- + shape : int or tuple[int] + Shape of the new array. + dtype : data-type, optional + The desired data-type for the array. Default is `cupynumeric.float64`. + + Returns + ------- + out : ndarray + Array of zeros with the given shape and dtype. + + See Also + -------- + numpy.zeros + + Availability + -------- + Multiple GPUs, Multiple CPUs + """ + if dtype is not None: + dtype = np.dtype(dtype) + return full(shape, 0, dtype=dtype) + + +def zeros_like( + a: ndarray, + dtype: npt.DTypeLike | None = None, + shape: NdShapeLike | None = None, +) -> ndarray: + """ + + Return an array of zeros with the same shape and type as a given array. + + Parameters + ---------- + a : array_like + The shape and data-type of `a` define these same attributes of + the returned array. + dtype : data-type, optional + Overrides the data type of the result. + shape : int or tuple[int], optional + Overrides the shape of the result. + + Returns + ------- + out : ndarray + Array of zeros with the same shape and type as `a`. + + See Also + -------- + numpy.zeros_like + + Availability + -------- + Multiple GPUs, Multiple CPUs + """ + usedtype = a.dtype + if dtype is not None: + usedtype = np.dtype(dtype) + return full_like(a, 0, dtype=usedtype, shape=shape) + + +def full( + shape: NdShapeLike, + value: Any, + dtype: npt.DTypeLike | None = None, +) -> ndarray: + """ + + Return a new array of given shape and type, filled with `fill_value`. + + Parameters + ---------- + shape : int or tuple[int] + Shape of the new array. + fill_value : scalar + Fill value. + dtype : data-type, optional + The desired data-type for the array The default, None, means + `cupynumeric.array(fill_value).dtype`. + + Returns + ------- + out : ndarray + Array of `fill_value` with the given shape and dtype. + + See Also + -------- + numpy.full + + Availability + -------- + Multiple GPUs, Multiple CPUs + """ + if dtype is None: + val = np.array(value) + else: + dtype = np.dtype(dtype) + val = np.array(value, dtype=dtype) + if np.dtype(dtype).itemsize == 1 and value > 255: + raise OverflowError(f"Value {value} out of bounds for {dtype}") + result = empty(shape, dtype=val.dtype) + result._thunk.fill(val) + return result + + +def full_like( + a: ndarray, + value: int | float, + dtype: npt.DTypeLike | None = None, + shape: NdShapeLike | None = None, +) -> ndarray: + """ + + Return a full array with the same shape and type as a given array. + + Parameters + ---------- + a : array_like + The shape and data-type of `a` define these same attributes of + the returned array. + fill_value : scalar + Fill value. + dtype : data-type, optional + Overrides the data type of the result. + shape : int or tuple[int], optional + Overrides the shape of the result. + + Returns + ------- + out : ndarray + Array of `fill_value` with the same shape and type as `a`. + + See Also + -------- + numpy.full_like + + Availability + -------- + Multiple GPUs, Multiple CPUs + """ + if dtype is not None: + dtype = np.dtype(dtype) + else: + dtype = a.dtype + if np.dtype(dtype).itemsize == 1 and value > 255: + raise OverflowError(f"Value {value} out of bounds for {dtype}") + result = empty_like(a, dtype=dtype, shape=shape) + val = np.array(value).astype(dtype) + result._thunk.fill(val) + return result diff --git a/cupynumeric/_module/indexing.py b/cupynumeric/_module/indexing.py new file mode 100644 index 000000000..3af462256 --- /dev/null +++ b/cupynumeric/_module/indexing.py @@ -0,0 +1,1243 @@ +# Copyright 2024 NVIDIA Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +from __future__ import annotations + +from typing import TYPE_CHECKING, Any, Sequence + +import numpy as np + +from .._array.array import ndarray +from .._array.util import ( + add_boilerplate, + check_writeable, + convert_to_cupynumeric_ndarray, +) +from .._utils import is_np2 +from .._utils.array import calculate_volume +from .._utils.coverage import is_implemented +from ..runtime import runtime +from ..types import NdShape +from .array_joining import hstack +from .array_shape import reshape +from .array_tiling import tile +from .creation_data import asarray +from .creation_matrices import tri +from .creation_ranges import arange +from .creation_shape import empty, ones +from .ssc_counting import count_nonzero +from .ssc_searching import nonzero + +if is_np2: + from numpy.lib.array_utils import normalize_axis_index # type: ignore +else: + from numpy.core.multiarray import normalize_axis_index # type: ignore + +if TYPE_CHECKING: + from typing import Callable + + import numpy.typing as npt + + from ..types import BoundsMode, OrderType + +_builtin_min = min + + +@add_boilerplate("arr", "mask", "vals") +def place(arr: ndarray, mask: ndarray, vals: ndarray) -> None: + """ + Change elements of an array based on conditional and input values. + + Parameters + ---------- + arr : array_like + Array to put data into. + mask : array_like + Mask array. Must have the same size as `arr`. + vals : 1-D sequence + Values to put into `arr`. Only the first N elements are used, + where N is the number of True values in mask. If vals is smaller + than N, it will be repeated, and if elements of a are to be masked, + this sequence must be non-empty. + + See Also + -------- + numpy.copyto, numpy.put, numpy.take, numpy.extract + + Availability + -------- + Multiple GPUs, Multiple CPUs + """ + if arr.size == 0: + return + + check_writeable(arr) + + if mask.size != arr.size: + raise ValueError("arr array and condition array must be of same size") + + if vals.ndim != 1: + raise ValueError("vals array has to be 1-dimensional") + + if mask.shape != arr.shape: + mask_reshape = reshape(mask, arr.shape) + else: + mask_reshape = mask + + num_values = int(count_nonzero(mask_reshape)) + if num_values == 0: + return + + if vals.size == 0: + raise ValueError("vals array cannot be empty") + + if num_values != vals.size: + reps = (num_values + vals.size - 1) // vals.size + vals_resized = tile(A=vals, reps=reps) if reps > 1 else vals + vals_resized = vals_resized[:num_values] + else: + vals_resized = vals + + if mask_reshape.dtype == bool: + arr._thunk.set_item(mask_reshape._thunk, vals_resized._thunk) + else: + bool_mask = mask_reshape.astype(bool) + arr._thunk.set_item(bool_mask._thunk, vals_resized._thunk) + + +# Indexing-like operations +def indices( + dimensions: Sequence[int], dtype: npt.DTypeLike = int, sparse: bool = False +) -> ndarray | tuple[ndarray, ...]: + """ + Return an array representing the indices of a grid. + Compute an array where the subarrays contain index values 0, 1, ... + varying only along the corresponding axis. + + Parameters + ---------- + dimensions : Sequence[int] + The shape of the grid. + dtype : data-type, optional + Data type of the result. + sparse : bool, optional + Return a sparse representation of the grid instead of a dense + representation. Default is False. + + Returns + ------- + grid : ndarray or tuple[ndarray, ...] + If sparse is False returns one array of grid indices, + ``grid.shape = (len(dimensions),) + tuple(dimensions)``. + If sparse is True returns a tuple of arrays, with + ``grid[i].shape = (1, ..., 1, dimensions[i], 1, ..., 1)`` with + dimensions[i] in the ith place + + See Also + -------- + numpy.indices + + Notes + ----- + The output shape in the dense case is obtained by prepending the number + of dimensions in front of the tuple of dimensions, i.e. if `dimensions` + is a tuple ``(r0, ..., rN-1)`` of length ``N``, the output shape is + ``(N, r0, ..., rN-1)``. + The subarrays ``grid[k]`` contains the N-D array of indices along the + ``k-th`` axis. Explicitly: + + grid[k, i0, i1, ..., iN-1] = ik + + Availability + -------- + Multiple GPUs, Multiple CPUs + """ + # implementation of indices routine is adapted from NumPy + dimensions = tuple(dimensions) + N = len(dimensions) + shape = (1,) * N + if sparse: + res_tuple: tuple[ndarray, ...] = () + for i, dim in enumerate(dimensions): + idx = arange(dim, dtype=dtype).reshape( + shape[:i] + (dim,) + shape[i + 1 :] + ) + res_tuple += (idx,) + return res_tuple + else: + out_shape = (N,) + dimensions + res_array: ndarray = empty(out_shape, dtype=dtype) + for i, dim in enumerate(dimensions): + idx = arange(dim, dtype=dtype).reshape( + shape[:i] + (dim,) + shape[i + 1 :] + ) + res_array[i] = idx + return res_array + + +def mask_indices( + n: int, mask_func: Callable[[ndarray, int], ndarray], k: int = 0 +) -> tuple[ndarray, ...]: + """ + Return the indices to access (n, n) arrays, given a masking function. + + Assume `mask_func` is a function that, for a square array a of size + ``(n, n)`` with a possible offset argument `k`, when called as + ``mask_func(a, k)`` returns a new array with zeros in certain locations + (functions like :func:`cupynumeric.triu` or :func:`cupynumeric.tril` + do precisely this). Then this function returns the indices where + the non-zero values would be located. + + Parameters + ---------- + n : int + The returned indices will be valid to access arrays of shape (n, n). + mask_func : callable + A function whose call signature is similar to that of + :func:`cupynumeric.triu`, :func:`cupynumeric.tril`. + That is, ``mask_func(x, k)`` returns a boolean array, shaped like `x`. + `k` is an optional argument to the function. + k : scalar + An optional argument which is passed through to `mask_func`. Functions + like :func:`cupynumeric.triu`, :func:`cupynumeric,tril` + take a second argument that is interpreted as an offset. + + Returns + ------- + indices : tuple of arrays. + The `n` arrays of indices corresponding to the locations where + ``mask_func(np.ones((n, n)), k)`` is True. + + See Also + -------- + numpy.mask_indices + + Notes + ----- + WARNING: ``mask_indices`` expects ``mask_function`` to call cuPyNumeric + functions for good performance. In case non-cuPyNumeric functions are + called by ``mask_function``, cuPyNumeric will have to materialize all data + on the host which might result in running out of system memory. + + Availability + -------- + Multiple GPUs, Multiple CPUs + """ + # this implementation is based on the Cupy + a = ones((n, n), dtype=bool) + if not is_implemented(mask_func): + runtime.warn( + "Calling non-cuPyNumeric functions in mask_func can result in bad " + "performance", + category=UserWarning, + ) + return mask_func(a, k).nonzero() + + +@add_boilerplate("indices") +def unravel_index( + indices: ndarray, + shape: NdShape, + order: OrderType = "C", +) -> tuple[ndarray, ...] | ndarray: + """ + Converts a flat index or array of flat indices into a tuple + of coordinate arrays. + + Parameters + ---------- + indices : array_like + An integer array whose elements are indices into the flattened + version of an array of dimensions ``shape``. + shape : tuple of ints + The shape of the array to use for unraveling ``indices``. + + order : {'C', 'F'}, optional + Determines whether the indices should be viewed as indexing in + row-major (C-style) or column-major (Fortran-style) order. + + Returns + ------- + unraveled_coords : tuple of ndarray + Each array in the tuple has the same shape as the ``indices`` + array. + + See Also + -------- + numpy.unravel_index + + Availability + -------- + Multiple GPUs, Multiple CPUs + """ + + if order not in ( + "F", + "C", + ): + raise ValueError("order is not understood") + + if indices is None or not np.can_cast( + indices.dtype, np.int64, "same_kind" + ): + raise TypeError("only int indices permitted") + + size = calculate_volume(shape) + + if (indices < 0).any() or (indices > size).any(): + raise ValueError("indices have out-of-bounds value(s)") + + if indices.size == 0: + unraveled_coords = tuple( + empty(indices.shape, dtype=indices.dtype) + for dim in range(len(shape)) + ) + return unraveled_coords + + unraveled_coords = tuple() + for dim in shape[::-1] if order == "C" else shape: + unraveled_coords = ( + (indices % dim,) + unraveled_coords + if order == "C" + else unraveled_coords + (indices % dim,) + ) + indices = indices // dim + return unraveled_coords + + +def diag_indices(n: int, ndim: int = 2) -> tuple[ndarray, ...]: + """ + Return the indices to access the main diagonal of an array. + + This returns a tuple of indices that can be used to access the main + diagonal of an array a with a.ndim >= 2 dimensions and + shape (n, n, …, n). For a.ndim = 2 this is the usual diagonal, + for a.ndim > 2 this is the set of indices to + access a[i, i, ..., i] for i = [0..n-1]. + + Parameters + ---------- + n : int + The size, along each dimension, of the arrays for which the + returned indices can be used. + ndim : int, optional + The number of dimensions. + + See Also + -------- + numpy.diag_indices + + Availability + -------- + Multiple GPUs, Multiple CPUs + """ + idx = arange(n, dtype=int) + return (idx,) * ndim + + +@add_boilerplate("arr") +def diag_indices_from(arr: ndarray) -> tuple[ndarray, ...]: + """ + Return the indices to access the main diagonal of an n-dimensional array. + + See diag_indices for full details. + + Parameters + ---------- + arr : array_like + at least 2-D + + See Also + -------- + numpy.diag_indices_from, numpy.diag_indices + + Availability + -------- + Multiple GPUs, Multiple CPUs + """ + if not arr.ndim >= 2: + raise ValueError("input array must be at least 2-d") + # For more than d=2, the strided formula is only valid for arrays with + # all dimensions equal, so we check first. + for i in range(1, arr.ndim): + if arr.shape[i] != arr.shape[0]: + raise ValueError("All dimensions of input must be of equal length") + + return diag_indices(arr.shape[0], arr.ndim) + + +def tril_indices( + n: int, k: int = 0, m: int | None = None +) -> tuple[ndarray, ...]: + """ + Return the indices for the lower-triangle of an (n, m) array. + + Parameters + ---------- + n : int + The row dimension of the arrays for which the returned + indices will be valid. + k : int, optional + Diagonal offset (see :func:`cupynumeric.tril` for details). + m : int, optional + The column dimension of the arrays for which the returned + indices will be valid. + By default `m` is taken equal to `n`. + + Returns + ------- + inds : tuple of arrays + The indices for the lower-triangle. The returned tuple contains two + arrays, each with the indices along one dimension of the array. + + See also + -------- + numpy.tril_indices + + Notes + ----- + + Availability + ------------ + Multiple GPUs, Multiple CPUs + """ + + tri_ = tri(n, m, k=k, dtype=bool) + return nonzero(tri_) + + +@add_boilerplate("arr") +def tril_indices_from(arr: ndarray, k: int = 0) -> tuple[ndarray, ...]: + """ + Return the indices for the lower-triangle of arr. + + See :func:`cupynumeric.tril_indices` for full details. + + Parameters + ---------- + arr : array_like + The indices will be valid for arrays whose dimensions are + the same as arr. + k : int, optional + Diagonal offset (see :func:`cupynumeric.tril` for details). + + Returns + ------- + inds : tuple of arrays + The indices for the lower-triangle. The returned tuple contains two + arrays, each with the indices along one dimension of the array. + + See Also + -------- + numpy.tril_indices_from + + Notes + ----- + + Availability + -------- + Multiple GPUs, Multiple CPUs + + """ + # this implementation is taken from numpy + if arr.ndim != 2: + raise ValueError("input array must be 2-d") + return tril_indices(arr.shape[-2], k=k, m=arr.shape[-1]) + + +def triu_indices( + n: int, k: int = 0, m: int | None = None +) -> tuple[ndarray, ...]: + """ + Return the indices for the upper-triangle of an (n, m) array. + + Parameters + ---------- + n : int + The size of the arrays for which the returned indices will + be valid. + k : int, optional + Diagonal offset (see :func:`cupynumeric.triu` for details). + m : int, optional + The column dimension of the arrays for which the returned + arrays will be valid. + By default `m` is taken equal to `n`. + + Returns + ------- + inds : tuple of arrays + The indices for the upper-triangle. The returned tuple contains two + arrays, each with the indices along one dimension of the array. + + See also + -------- + numpy.triu_indices + + Notes + ----- + + Availability + -------- + Multiple GPUs, Multiple CPUs + """ + + tri_ = ~tri(n, m, k=k - 1, dtype=bool) + return nonzero(tri_) + + +@add_boilerplate("arr") +def triu_indices_from(arr: ndarray, k: int = 0) -> tuple[ndarray, ...]: + """ + Return the indices for the upper-triangle of arr. + + See :func:`cupynumeric.triu_indices` for full details. + + Parameters + ---------- + arr : ndarray, shape(N, N) + The indices will be valid for arrays whose dimensions are + the same as arr. + k : int, optional + Diagonal offset (see :func:`cupynumeric.triu` for details). + + Returns + ------- + inds : tuple of arrays + The indices for the upper-triangle. The returned tuple contains two + arrays, each with the indices along one dimension of the array. + + See Also + -------- + numpy.triu_indices_from + + Notes + ----- + + Availability + -------- + Multiple GPUs, Multiple CPUs + """ + # this implementation is taken from numpy + if arr.ndim != 2: + raise ValueError("input array must be 2-d") + return triu_indices(arr.shape[-2], k=k, m=arr.shape[-1]) + + +@add_boilerplate("a") +def take( + a: ndarray, + indices: ndarray, + axis: int | None = None, + out: ndarray | None = None, + mode: BoundsMode = "raise", +) -> ndarray: + """ + Take elements from an array along an axis. + When axis is not None, this function does the same thing as “fancy” + indexing (indexing arrays using arrays); however, it can be easier + to use if you need elements along a given axis. A call such as + `np.take(arr, indices, axis=3)` is equivalent to `arr[:,:,:,indices,...]`. + + Parameters + ---------- + a : array_like `(Ni…, M, Nk…)` + The source array. + indices : array_like `(Nj…)` + The indices of the values to extract. + Also allow scalars for indices. + axis : int, optional + The axis over which to select values. By default, the flattened input + array is used. + out : ndarray, optional `(Ni…, Nj…, Nk…)` + If provided, the result will be placed in this array. It should be of + the appropriate shape and dtype. + mode : ``{'raise', 'wrap', 'clip'}``, optional + Specifies how out-of-bounds indices will behave. + 'raise' - raise an error (default) + 'wrap' - wrap around + 'clip' - clip to the range + 'clip' mode means that all indices that are too large are replaced by + the index that addresses the last element along that axis. + Note that this disables indexing with negative numbers. + + Returns + ------- + out : ndarray `(Ni…, Nj…, Nk…)` + The returned array has the same type as a. + + Raises + ------ + + See Also + -------- + numpy.take + + Availability + -------- + Multiple GPUs, Multiple CPUs + """ + return a.take(indices=indices, axis=axis, out=out, mode=mode) + + +def ix_(*args: Any) -> tuple[ndarray, ...]: + """ + Construct an open mesh from multiple sequences. + + This function takes N 1-D sequences and returns N outputs with N + dimensions each, such that the shape is 1 in all but one dimension + and the dimension with the non-unit shape value cycles through all + N dimensions. + + Using `ix_` one can quickly construct index arrays that will index + the cross product. ``a[np.ix_([1,3],[2,5])]`` returns the array + ``[[a[1,2] a[1,5]], [a[3,2] a[3,5]]]``. + + Parameters + ---------- + args : 1-D sequences + Each sequence should be of integer or boolean type. + Boolean sequences will be interpreted as boolean masks for the + corresponding dimension (equivalent to passing in + ``np.nonzero(boolean_sequence)``). + + Returns + ------- + out : tuple of ndarrays + N arrays with N dimensions each, with N the number of input + sequences. Together these arrays form an open mesh. + + See Also + -------- + ogrid, mgrid, meshgrid + + Availability + -------- + Multiple GPUs, Multiple CPUs + + """ + out = [] + nd = len(args) + + for k, new in enumerate(args): + if not isinstance(new, ndarray): + new = asarray(new) + if new.size == 0: + # Explicitly type empty arrays to avoid float default + new = new.astype(np.intp) + + if new.ndim != 1: + raise ValueError("Cross index must be 1 dimensional") + + if np.issubdtype(new.dtype, bool): + (new,) = new.nonzero() + + new = new.reshape((1,) * k + (new.size,) + (1,) * (nd - k - 1)) + out.append(new) + + return tuple(out) + + +def _fill_fancy_index_for_along_axis_routines( + a_shape: NdShape, axis: int, indices: ndarray +) -> tuple[ndarray, ...]: + # the logic below is base on the cupy implementation of + # the *_along_axis routines + ndim = len(a_shape) + fancy_index = [] + for i, n in enumerate(a_shape): + if i == axis: + fancy_index.append(indices) + else: + ind_shape = (1,) * i + (-1,) + (1,) * (ndim - i - 1) + fancy_index.append(arange(n).reshape(ind_shape)) + return tuple(fancy_index) + + +@add_boilerplate("a", "indices") +def take_along_axis(a: ndarray, indices: ndarray, axis: int | None) -> ndarray: + """ + Take values from the input array by matching 1d index and data slices. + + This iterates over matching 1d slices oriented along the specified axis in + the index and data arrays, and uses the former to look up values in the + latter. These slices can be different lengths. + + Functions returning an index along an axis, like + :func:`cupynumeric.argsort` and :func:`cupynumeric.argpartition`, + produce suitable indices for this function. + + Parameters + ---------- + arr : ndarray (Ni..., M, Nk...) + Source array + indices : ndarray (Ni..., J, Nk...) + Indices to take along each 1d slice of `arr`. This must match the + dimension of arr, but dimensions Ni and Nj only need to broadcast + against `arr`. + axis : int + The axis to take 1d slices along. If axis is None, the input array is + treated as if it had first been flattened to 1d, for consistency with + :func:`cupynumeric.sort` and :func:`cupynumeric.argsort`. + + Returns + ------- + out: ndarray (Ni..., J, Nk...) + The indexed result. It is going to be a view to `arr` for most cases, + except the case when `axis=Null` and `arr.ndim>1`. + + See Also + -------- + numpy.take_along_axis + + Availability + -------- + Multiple GPUs, Multiple CPUs + """ + if not np.issubdtype(indices.dtype, np.integer): + raise TypeError("`indices` must be an integer array") + + computed_axis = 0 + if axis is None: + if indices.ndim != 1: + raise ValueError("indices must be 1D if axis=None") + if a.ndim > 1: + a = a.ravel() + else: + computed_axis = normalize_axis_index(axis, a.ndim) + + if a.ndim != indices.ndim: + raise ValueError( + "`indices` and `a` must have the same number of dimensions" + ) + return a[ + _fill_fancy_index_for_along_axis_routines( + a.shape, computed_axis, indices + ) + ] + + +@add_boilerplate("a", "indices", "values") +def put_along_axis( + a: ndarray, indices: ndarray, values: ndarray, axis: int | None +) -> None: + """ + Put values into the destination array by matching 1d index and data slices. + + This iterates over matching 1d slices oriented along the specified axis in + the index and data arrays, and uses the former to place values into the + latter. These slices can be different lengths. + + Functions returning an index along an axis, like + :func:`cupynumeric.argsort` and :func:`cupynumeric.argpartition`, produce + suitable indices for this function. + + Parameters + ---------- + a : ndarray (Ni..., M, Nk...) + Destination array. + indices : ndarray (Ni..., J, Nk...) + Indices to change along each 1d slice of `arr`. This must match the + dimension of arr, but dimensions in Ni and Nj may be 1 to broadcast + against `arr`. + values : array_like (Ni..., J, Nk...) + values to insert at those indices. Its shape and dimension are + broadcast to match that of `indices`. + axis : int + The axis to take 1d slices along. If axis is None, the destination + array is treated as if a flattened 1d view had been created of it. + `axis=None` case is currently supported only for 1D input arrays. + + Note + ---- + Having duplicate entries in `indices` will result in undefined behavior + since operation performs asynchronous update of the `arr` entries. + + See Also + -------- + numpy.put_along_axis + + Availability + -------- + Multiple GPUs, Multiple CPUs + + """ + + if a.size == 0: + return + + check_writeable(a) + + if not np.issubdtype(indices.dtype, np.integer): + raise TypeError("`indices` must be an integer array") + + computed_axis = 0 + if axis is None: + if indices.ndim != 1: + raise ValueError("indices must be 1D if axis=None") + if a.ndim > 1: + # TODO call a=a.flat when flat is implemented + raise ValueError("a.ndim>1 case is not supported when axis=None") + if (indices.size == 0) or (values.size == 0): + return + if values.shape != indices.shape: + values = values._wrap(indices.size) + else: + computed_axis = normalize_axis_index(axis, a.ndim) + + if a.ndim != indices.ndim: + raise ValueError( + "`indices` and `a` must have the same number of dimensions" + ) + ind = _fill_fancy_index_for_along_axis_routines( + a.shape, computed_axis, indices + ) + a[ind] = values + + +@add_boilerplate("a") +def choose( + a: ndarray, + choices: Sequence[ndarray], + out: ndarray | None = None, + mode: BoundsMode = "raise", +) -> ndarray: + """ + Construct an array from an index array and a list of arrays to choose from. + + Given an "index" array (`a`) of integers and a sequence of ``n`` arrays + (`choices`), `a` and each choice array are first broadcast, as necessary, + to arrays of a common shape; calling these *Ba* and *Bchoices[i], i = + 0,...,n-1* we have that, necessarily, ``Ba.shape == Bchoices[i].shape`` + for each ``i``. Then, a new array with shape ``Ba.shape`` is created as + follows: + + * if ``mode='raise'`` (the default), then, first of all, each element of + ``a`` (and thus ``Ba``) must be in the range ``[0, n-1]``; now, suppose + that ``i`` (in that range) is the value at the ``(j0, j1, ..., jm)`` + position in ``Ba`` - then the value at the same position in the new array + is the value in ``Bchoices[i]`` at that same position; + + * if ``mode='wrap'``, values in `a` (and thus `Ba`) may be any (signed) + integer; modular arithmetic is used to map integers outside the range + `[0, n-1]` back into that range; and then the new array is constructed + as above; + + * if ``mode='clip'``, values in `a` (and thus ``Ba``) may be any (signed) + integer; negative integers are mapped to 0; values greater than ``n-1`` + are mapped to ``n-1``; and then the new array is constructed as above. + + Parameters + ---------- + a : ndarray[int] + This array must contain integers in ``[0, n-1]``, where ``n`` is the + number of choices, unless ``mode=wrap`` or ``mode=clip``, in which + cases any integers are permissible. + choices : Sequence[ndarray] + Choice arrays. `a` and all of the choices must be broadcastable to the + same shape. If `choices` is itself an array (not recommended), then + its outermost dimension (i.e., the one corresponding to + ``choices.shape[0]``) is taken as defining the "sequence". + out : ndarray, optional + If provided, the result will be inserted into this array. It should + be of the appropriate shape and dtype. Note that `out` is always + buffered if ``mode='raise'``; use other modes for better performance. + mode : ``{'raise', 'wrap', 'clip'}``, optional + Specifies how indices outside ``[0, n-1]`` will be treated: + + * 'raise' : an exception is raised (default) + * 'wrap' : value becomes value mod ``n`` + * 'clip' : values < 0 are mapped to 0, values > n-1 are mapped to n-1 + + Returns + ------- + merged_array : ndarray + The merged result. + + Raises + ------ + ValueError: shape mismatch + If `a` and each choice array are not all broadcastable to the same + shape. + + See Also + -------- + numpy.choose + + Availability + -------- + Multiple GPUs, Multiple CPUs + """ + return a.choose(choices=choices, out=out, mode=mode) + + +def select( + condlist: Sequence[npt.ArrayLike | ndarray], + choicelist: Sequence[npt.ArrayLike | ndarray], + default: Any = 0, +) -> ndarray: + """ + Return an array drawn from elements in choicelist, depending on conditions. + + Parameters + ---------- + condlist : list of bool ndarrays + The list of conditions which determine from which array in `choicelist` + the output elements are taken. When multiple conditions are satisfied, + the first one encountered in `condlist` is used. + choicelist : list of ndarrays + The list of arrays from which the output elements are taken. It has + to be of the same length as `condlist`. + default : scalar, optional + The element inserted in `output` when all conditions evaluate to False. + + Returns + ------- + output : ndarray + The output at position m is the m-th element of the array in + `choicelist` where the m-th element of the corresponding array in + `condlist` is True. + + See Also + -------- + numpy.select + + Availability + -------- + Multiple GPUs, Multiple CPUs + """ + + if len(condlist) != len(choicelist): + raise ValueError( + "list of cases must be same length as list of conditions" + ) + if len(condlist) == 0: + raise ValueError("select with an empty condition list is not possible") + + condlist_ = tuple(convert_to_cupynumeric_ndarray(c) for c in condlist) + for i, c in enumerate(condlist_): + if c.dtype != bool: + raise TypeError( + f"invalid entry {i} in condlist: should be boolean ndarray" + ) + + choicelist_ = tuple(convert_to_cupynumeric_ndarray(c) for c in choicelist) + common_type = np.result_type(*choicelist_, default) + args = condlist_ + choicelist_ + choicelist_ = tuple( + c._maybe_convert(common_type, args) for c in choicelist_ + ) + default_ = np.array(default, dtype=common_type) + + out_shape = np.broadcast_shapes( + *(c.shape for c in condlist_), + *(c.shape for c in choicelist_), + ) + out = ndarray(shape=out_shape, dtype=common_type, inputs=args) + out._thunk.select( + tuple(c._thunk for c in condlist_), + tuple(c._thunk for c in choicelist_), + default_, + ) + return out + + +@add_boilerplate("condition", "a") +def compress( + condition: ndarray, + a: ndarray, + axis: int | None = None, + out: ndarray | None = None, +) -> ndarray: + """ + Return selected slices of an array along given axis. + + When working along a given axis, a slice along that axis is returned + in output for each index where condition evaluates to True. + When working on a 1-D array, compress is equivalent to numpy.extract. + + Parameters + ---------- + condition, 1-D array of bools + Array that selects which entries to return. If `len(c)` is less than + the size of a along the given axis, then output is truncated to the + length of the condition array. + + a : array_like + Array from which to extract a part. + + axis: int, optional + Axis along which to take slices. If None (default), + work on the flattened array. + + out : ndarray, optional + Output array. Its type is preserved and it must be of the right + shape to hold the output. + + Returns + ------- + compressed_array : ndarray + A copy of `a` without the slices along `axis` for which condition + is false. + + Raises + ------ + ValueError : dimension mismatch + If condition is not 1D array + ValueError : shape mismatch + If condition contains entries that are out of bounds of array + ValueError : shape mismatch + If output array has a wrong shape + + See Also + -------- + numpy.compress, numpy.extract + + Availability + -------- + Multiple GPUs, Multiple CPUs + + """ + return a.compress(condition, axis=axis, out=out) + + +@add_boilerplate("a") +def diagonal( + a: ndarray, + offset: int = 0, + axis1: int = 0, + axis2: int = 1, + extract: bool = True, +) -> ndarray: + """ + diagonal(a: ndarray, offset=0, axis1=None, axis2=None) + + Return specified diagonals. + + If `a` is 2-D, returns the diagonal of `a` with the given offset, + i.e., the collection of elements of the form ``a[i, i+offset]``. If + `a` has more than two dimensions, then the axes specified by `axis1` + and `axis2` are used to determine the 2-D sub-array whose diagonal is + returned. The shape of the resulting array can be determined by + removing `axis1` and `axis2` and appending an index to the right equal + to the size of the resulting diagonals. + + Parameters + ---------- + a : array_like + Array from which the diagonals are taken. + offset : int, optional + Offset of the diagonal from the main diagonal. Can be positive or + negative. Defaults to main diagonal (0). + axis1 : int, optional + Axis to be used as the first axis of the 2-D sub-arrays from which + the diagonals should be taken. Defaults to first axis (0). + axis2 : int, optional + Axis to be used as the second axis of the 2-D sub-arrays from + which the diagonals should be taken. Defaults to second axis (1). + + Returns + ------- + array_of_diagonals : ndarray + If `a` is 2-D, then a 1-D array containing the diagonal and of the + same type as `a` is returned unless `a` is a `matrix`, in which case + a 1-D array rather than a (2-D) `matrix` is returned in order to + maintain backward compatibility. + + If ``a.ndim > 2``, then the dimensions specified by `axis1` and `axis2` + are removed, and a new axis inserted at the end corresponding to the + diagonal. + + Raises + ------ + ValueError + If the dimension of `a` is less than 2. + + Notes + ----- + Unlike NumPy's, the cuPyNumeric implementation always returns a copy + + See Also + -------- + numpy.diagonal + + Availability + -------- + Multiple GPUs, Multiple CPUs + + """ + return a.diagonal(offset=offset, axis1=axis1, axis2=axis2, extract=extract) + + +@add_boilerplate("a", "indices", "values") +def put( + a: ndarray, indices: ndarray, values: ndarray, mode: str = "raise" +) -> None: + """ + Replaces specified elements of an array with given values. + The indexing works as if the target array is first flattened. + + Parameters + ---------- + a : array_like + Array to put data into + indices : array_like + Target indices, interpreted as integers. + WARNING: In case there are repeated entries in the + indices array, Legate doesn't guarantee the order in + which values are updated. + + values : array_like + Values to place in `a` at target indices. If values array is shorter + than indices, it will be repeated as necessary. + mode : {'raise', 'wrap', 'clip'}, optional + Specifies how out-of-bounds indices will behave. + 'raise' : raise an error. + 'wrap' : wrap around. + 'clip' : clip to the range. + + See Also + -------- + numpy.put + + Availability + -------- + Multiple GPUs, Multiple CPUs + """ + a.put(indices=indices, values=values, mode=mode) + + +@add_boilerplate("a", "mask", "values") +def putmask(a: ndarray, mask: ndarray, values: ndarray) -> None: + """ + putmask(a, mask, values) + Changes elements of an array based on conditional and input values. + Sets ``a.flat[n] = values[n]`` for each n where ``mask.flat[n]==True``. + If `values` is not the same size as `a` and `mask` then it will repeat. + This gives behavior different from ``a[mask] = values``. + + Parameters + ---------- + a : ndarray + Target array. + mask : array_like + Boolean mask array. It has to be the same shape as `a`. + values : array_like + Values to put into `a` where `mask` is True. If `values` is smaller + than `a` it will be repeated. + + See Also + -------- + numpy.putmask + + Availability + ------------ + Multiple GPUs, Multiple CPUs + """ + if not a.shape == mask.shape: + raise ValueError("mask and data must be the same size") + + check_writeable(a) + + mask = mask._warn_and_convert(np.dtype(bool)) + + if a.dtype != values.dtype: + values = values._warn_and_convert(a.dtype) + + try: + np.broadcast_shapes(values.shape, a.shape) + except ValueError: + values = values._wrap(a.size) + values = values.reshape(a.shape) + + a._thunk.putmask(mask._thunk, values._thunk) + + +@add_boilerplate("a", "val") +def fill_diagonal(a: ndarray, val: ndarray, wrap: bool = False) -> None: + """ + Fill the main diagonal of the given array of any dimensionality. + + For an array a with a.ndim >= 2, the diagonal is the list of locations with + indices a[i, ..., i] all identical. This function modifies the input + array in-place, it does not return a value. + + Parameters + ---------- + + a : array, at least 2-D. + Array whose diagonal is to be filled, it gets modified in-place. + val : scalar or array_like + Value(s) to write on the diagonal. If val is scalar, the value is + written along the diagonal. + If array-like, the flattened val is written along + the diagonal, repeating if necessary to fill all diagonal entries. + wrap : bool + If true, the diagonal "wraps" after N columns, for tall 2d matrices. + + Raises + ------ + ValueError + If the dimension of `a` is less than 2. + + Notes + ----- + + See Also + -------- + numpy.fill_diagonal + + Availability + -------- + Multiple GPUs, Multiple CPUs + + """ + if val.size == 0 or a.size == 0: + return + + check_writeable(a) + + if a.ndim < 2: + raise ValueError("array must be at least 2-d") + + n = _builtin_min(a.shape) + + if a.ndim > 2: + for s in a.shape: + if s != n: + raise ValueError( + "All dimensions of input must be of equal length" + ) + + len_val = n + + if a.ndim == 2 and wrap and a.shape[0] > a.shape[1]: + len_val = a.shape[0] - (a.shape[0] // (a.shape[1] + 1)) + + if (val.size != len_val and val.ndim > 0) or val.ndim > 1: + val = val._wrap(len_val) + + if a.ndim == 2 and wrap and a.shape[0] > a.shape[1]: + idx0_tmp = arange(a.shape[1], dtype=int) + idx0 = idx0_tmp.copy() + while idx0.size < len_val: + idx0_tmp = idx0_tmp + (a.shape[1] + 1) + idx0 = hstack((idx0, idx0_tmp)) + idx0 = idx0[0:len_val] + idx1 = arange(len_val, dtype=int) % a.shape[1] + a[idx0, idx1] = val + else: + idx = arange(n, dtype=int) + indices = (idx,) * a.ndim + + a[indices] = val diff --git a/cupynumeric/_module/io_numpy.py b/cupynumeric/_module/io_numpy.py new file mode 100644 index 000000000..42d4ebdf5 --- /dev/null +++ b/cupynumeric/_module/io_numpy.py @@ -0,0 +1,75 @@ +# Copyright 2024 NVIDIA Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +from __future__ import annotations + +from typing import TYPE_CHECKING, Any + +if TYPE_CHECKING: + from .._array.array import ndarray + from os import PathLike + from typing import BinaryIO + +import numpy as np + +from .creation_data import array + + +def load( + file: str | bytes | PathLike[Any] | BinaryIO, + *, + max_header_size: int = 10000, +) -> ndarray: + """ + Load an array from a ``.npy`` file. + + Parameters + ---------- + file : file-like object, string, or pathlib.Path + The file to read. File-like objects must support the + ``seek()`` and ``read()`` methods and must always + be opened in binary mode. + max_header_size : int, optional + Maximum allowed size of the header. Large headers may not be safe + to load securely and thus require explicitly passing a larger value. + See :py:func:`ast.literal_eval()` for details. + + Returns + ------- + result : array + Data stored in the file. + + Raises + ------ + OSError + If the input file does not exist or cannot be read. + + See Also + -------- + numpy.load + + Notes + ----- + cuPyNumeric does not currently support ``.npz`` and pickled files. + + Availability + -------- + Single CPU + """ + return array( + np.load( + file, + max_header_size=max_header_size, # type: ignore [call-arg] + ) + ) diff --git a/cupynumeric/_module/linalg_mvp.py b/cupynumeric/_module/linalg_mvp.py new file mode 100644 index 000000000..8650b1b00 --- /dev/null +++ b/cupynumeric/_module/linalg_mvp.py @@ -0,0 +1,935 @@ +# Copyright 2024 NVIDIA Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +from __future__ import annotations + +import re +from collections import Counter +from itertools import chain +from typing import TYPE_CHECKING, Any, Literal + +import numpy as np +import opt_einsum as oe # type: ignore [import] + +from .._array.array import ndarray +from .._array.util import ( + add_boilerplate, + convert_to_cupynumeric_ndarray, + find_common_type, +) +from .._ufunc.math import multiply +from .._utils.linalg import ( + AxesPairLike, + inner_modes, + matmul_modes, + tensordot_modes, +) +from ..types import NdShape +from .creation_data import copy + +if TYPE_CHECKING: + from .._ufunc.ufunc import CastingKind + +_builtin_all = all +_builtin_max = max + + +@add_boilerplate("a", "b") +def inner(a: ndarray, b: ndarray, out: ndarray | None = None) -> ndarray: + """ + Inner product of two arrays. + + Ordinary inner product of vectors for 1-D arrays (without complex + conjugation), in higher dimensions a sum product over the last axes. + + Parameters + ---------- + a, b : array_like + out : ndarray, optional + Output argument. This must have the exact shape that would be returned + if it was not present. If its dtype is not what would be expected from + this operation, then the result will be (unsafely) cast to `out`. + + Returns + ------- + output : ndarray + If `a` and `b` are both + scalars or both 1-D arrays then a scalar is returned; otherwise + an array is returned. + ``output.shape = (*a.shape[:-1], *b.shape[:-1])`` + If `out` is given, then it is returned. + + Notes + ----- + The cuPyNumeric implementation is a little more liberal than NumPy in terms + of allowed broadcasting, e.g. ``inner(ones((1,)), ones((4,)))`` is allowed. + + See Also + -------- + numpy.inner + + Availability + -------- + Multiple GPUs, Multiple CPUs + """ + if a.ndim == 0 or b.ndim == 0: + return multiply(a, b, out=out) + (a_modes, b_modes, out_modes) = inner_modes(a.ndim, b.ndim) + return _contract( + a_modes, + b_modes, + out_modes, + a, + b, + out=out, + casting="unsafe", + ) + + +@add_boilerplate("a", "b") +def dot(a: ndarray, b: ndarray, out: ndarray | None = None) -> ndarray: + """ + Dot product of two arrays. Specifically, + + - If both `a` and `b` are 1-D arrays, it is inner product of vectors + (without complex conjugation). + + - If both `a` and `b` are 2-D arrays, it is matrix multiplication, + but using ``a @ b`` is preferred. + + - If either `a` or `b` is 0-D (scalar), it is equivalent to + :func:`multiply` and using ``cupynumeric.multiply(a, b)`` or ``a * b`` is + preferred. + + - If `a` is an N-D array and `b` is a 1-D array, it is a sum product over + the last axis of `a` and `b`. + + - If `a` is an N-D array and `b` is an M-D array (where ``M>=2``), it is a + sum product over the last axis of `a` and the second-to-last axis of + `b`:: + + dot(a: ndarray, b)[i,j,k,m] = sum(a[i,j,:] * b[k,:,m]) + + Parameters + ---------- + a : array_like + First argument. + b : array_like + Second argument. + out : ndarray, optional + Output argument. This must have the exact shape and dtype that would be + returned if it was not present. + + Returns + ------- + output : ndarray + Returns the dot product of `a` and `b`. If `out` is given, then it is + returned. + + Notes + ----- + The cuPyNumeric implementation is a little more liberal than NumPy in terms + of allowed broadcasting, e.g. ``dot(ones((3,1)), ones((4,5)))`` is allowed. + + Except for the inner-product case, only floating-point types are supported. + + See Also + -------- + numpy.dot + + Availability + -------- + Multiple GPUs, Multiple CPUs + """ + return a.dot(b, out=out) + + +@add_boilerplate("a", "b") +def matmul( + a: ndarray, + b: ndarray, + /, + out: ndarray | None = None, + *, + casting: CastingKind = "same_kind", + dtype: np.dtype[Any] | None = None, +) -> ndarray: + """ + Matrix product of two arrays. + + Parameters + ---------- + a, b : array_like + Input arrays, scalars not allowed. + out : ndarray, optional + A location into which the result is stored. If provided, it must have + a shape that matches the signature `(n,k),(k,m)->(n,m)`. + casting : ``{'no', 'equiv', 'safe', 'same_kind', 'unsafe'}``, optional + Controls what kind of data casting may occur. + + * 'no' means the data types should not be cast at all. + * 'equiv' means only byte-order changes are allowed. + * 'safe' means only casts which can preserve values are allowed. + * 'same_kind' means only safe casts or casts within a kind, + like float64 to float32, are allowed. + * 'unsafe' means any data conversions may be done. + + Default is 'same_kind'. + dtype : data-type, optional + If provided, forces the calculation to use the data type specified. + Note that you may have to also give a more liberal `casting` + parameter to allow the conversions. Default is None. + + Returns + ------- + output : ndarray + The matrix product of the inputs. + This is a scalar only when both a, b are 1-d vectors. + If `out` is given, then it is returned. + + Notes + ----- + The behavior depends on the arguments in the following way. + + - If both arguments are 2-D they are multiplied like conventional + matrices. + - If either argument is N-D, N > 2, it is treated as a stack of + matrices residing in the last two indexes and broadcast accordingly. + - If the first argument is 1-D, it is promoted to a matrix by + prepending a 1 to its dimensions. After matrix multiplication + the prepended 1 is removed. + - If the second argument is 1-D, it is promoted to a matrix by + appending a 1 to its dimensions. After matrix multiplication + the appended 1 is removed. + + ``matmul`` differs from ``dot`` in two important ways: + + - Multiplication by scalars is not allowed, use ``*`` instead. + - Stacks of matrices are broadcast together as if the matrices + were elements, respecting the signature ``(n,k),(k,m)->(n,m)``: + + >>> a = ones([9, 5, 7, 4]) + >>> c = ones([9, 5, 4, 3]) + >>> dot(a: ndarray, c).shape + (9, 5, 7, 9, 5, 3) + >>> matmul(a: ndarray, c).shape + (9, 5, 7, 3) + >>> # n is 7, k is 4, m is 3 + + The cuPyNumeric implementation is a little more liberal than NumPy in terms + of allowed broadcasting, e.g. ``matmul(ones((3,1)), ones((4,5)))`` is + allowed. + + Only floating-point types are supported. + + See Also + -------- + numpy.matmul + + Availability + -------- + Multiple GPUs, Multiple CPUs + """ + if a.ndim == 0 or b.ndim == 0: + raise ValueError("Scalars not allowed in matmul") + + (a_modes, b_modes, out_modes) = matmul_modes(a.ndim, b.ndim) + + return _contract( + a_modes, + b_modes, + out_modes, + a, + b, + out=out, + casting=casting, + dtype=dtype, + ) + + +@add_boilerplate("a", "b") +def vdot(a: ndarray, b: ndarray, out: ndarray | None = None) -> ndarray: + """ + Return the dot product of two vectors. + + The vdot(`a`, `b`) function handles complex numbers differently than + dot(`a`, `b`). If the first argument is complex the complex conjugate + of the first argument is used for the calculation of the dot product. + + Note that `vdot` handles multidimensional arrays differently than `dot`: + it does *not* perform a matrix product, but flattens input arguments + to 1-D vectors first. Consequently, it should only be used for vectors. + + Parameters + ---------- + a : array_like + If `a` is complex the complex conjugate is taken before calculation + of the dot product. + b : array_like + Second argument to the dot product. + out : ndarray, optional + Output argument. This must have the exact shape that would be returned + if it was not present. If its dtype is not what would be expected from + this operation, then the result will be (unsafely) cast to `out`. + + Returns + ------- + output : ndarray + Dot product of `a` and `b`. If `out` is given, then it is returned. + + Notes + ----- + The cuPyNumeric implementation is a little more liberal than NumPy in terms + of allowed broadcasting, e.g. ``vdot(ones((1,)), ones((4,)))`` is allowed. + + See Also + -------- + numpy.vdot + + Availability + -------- + Multiple GPUs, Multiple CPUs + """ + return inner(a.ravel().conj(), b.ravel(), out=out) + + +@add_boilerplate("a", "b") +def outer(a: ndarray, b: ndarray, out: ndarray | None = None) -> ndarray: + """ + Compute the outer product of two vectors. + + Given two vectors, ``a = [a0, a1, ..., aM]`` and ``b = [b0, b1, ..., bN]``, + the outer product is:: + + [[a0*b0 a0*b1 ... a0*bN ] + [a1*b0 . + [ ... . + [aM*b0 aM*bN ]] + + Parameters + ---------- + a : (M,) array_like + First input vector. Input is flattened if not already 1-dimensional. + b : (N,) array_like + Second input vector. Input is flattened if not already 1-dimensional. + out : (M, N) ndarray, optional + A location where the result is stored. If its dtype is not what would + be expected from this operation, then the result will be (unsafely) + cast to `out`. + + Returns + ------- + output : (M, N) ndarray + ``output[i, j] = a[i] * b[j]`` + If `out` is given, then it is returned. + + See Also + -------- + numpy.outer + + Availability + -------- + Multiple GPUs, Multiple CPUs + """ + return multiply( + a.ravel()[:, np.newaxis], b.ravel()[np.newaxis, :], out=out + ) + + +@add_boilerplate("a", "b") +def tensordot( + a: ndarray, + b: ndarray, + axes: AxesPairLike = 2, + out: ndarray | None = None, +) -> ndarray: + """ + Compute tensor dot product along specified axes. + + Given two tensors, `a` and `b`, and an array_like object containing + two array_like objects, ``(a_axes, b_axes)``, sum the products of + `a`'s and `b`'s elements (components) over the axes specified by + ``a_axes`` and ``b_axes``. The third argument can be a single non-negative + integer_like scalar, ``N``; if it is such, then the last ``N`` dimensions + of `a` and the first ``N`` dimensions of `b` are summed over. + + Parameters + ---------- + a, b : array_like + Tensors to "dot". + + axes : int or array_like + * integer_like + If an int N, sum over the last N axes of `a` and the first N axes + of `b` in order. + * (2,) array_like + Or, a list of axes to be summed over, first sequence applying to `a`, + second to `b`. Both elements array_like must be of the same length. + out : ndarray, optional + Output argument. This must have the exact shape that would be returned + if it was not present. If its dtype is not what would be expected from + this operation, then the result will be (unsafely) cast to `out`. + + Returns + ------- + output : ndarray + The tensor dot product of the inputs. If `out` is given, then it is + returned. + + Notes + ----- + The cuPyNumeric implementation is a little more liberal than NumPy in terms + of allowed broadcasting, e.g. ``tensordot(ones((3,1)), ones((1,4)))`` is + allowed. + + Except for the inner-product case, only floating-point types are supported. + + See Also + -------- + numpy.tensordot + + Availability + -------- + Multiple GPUs, Multiple CPUs + """ + (a_modes, b_modes, out_modes) = tensordot_modes(a.ndim, b.ndim, axes) + + return _contract( + a_modes, + b_modes, + out_modes, + a, + b, + out=out, + casting="unsafe", + ) + + +# Trivial multi-tensor contraction strategy: contract in input order +class NullOptimizer(oe.paths.PathOptimizer): # type: ignore [misc,no-any-unimported] # noqa + def __call__( + self, + inputs: list[set[str]], + outputs: set[str], + size_dict: dict[str, int], + memory_limit: int | None = None, + ) -> list[tuple[int, int]]: + return [(0, 1)] + [(0, -1)] * (len(inputs) - 2) + + +def _maybe_cast_input( + arr: ndarray, to_dtype: np.dtype[Any], casting: CastingKind +) -> ndarray: + if arr.dtype == to_dtype: + return arr + if not np.can_cast(arr.dtype, to_dtype, casting=casting): + raise TypeError( + f"Cannot cast input array of type {arr.dtype} to {to_dtype} with " + f"casting rule '{casting}'" + ) + return arr.astype(to_dtype) + + +# Generalized tensor contraction +def _contract( + a_modes: list[str], + b_modes: list[str], + out_modes: list[str], + a: ndarray, + b: ndarray | None = None, + out: ndarray | None = None, + casting: CastingKind = "same_kind", + dtype: np.dtype[Any] | None = None, +) -> ndarray: + # Sanity checks + if len(a_modes) != a.ndim: + raise ValueError( + f"Expected {len(a_modes)}-d input array but got {a.ndim}-d" + ) + + if b is None: + if len(b_modes) != 0: + raise ValueError("Missing input array") + elif len(b_modes) != b.ndim: + raise ValueError( + f"Expected {len(b_modes)}-d input array but got {b.ndim}-d" + ) + + if out is not None and len(out_modes) != out.ndim: + raise ValueError( + f"Expected {len(out_modes)}-d output array but got {out.ndim}-d" + ) + + if len(set(out_modes)) != len(out_modes): + raise ValueError("Duplicate mode labels on output") + + if len(set(out_modes) - set(a_modes) - set(b_modes)) > 0: + raise ValueError("Unknown mode labels on output") + + makes_view = b is None and len(a_modes) == len(out_modes) + if dtype is not None and not makes_view: + c_dtype = dtype + elif out is not None: + c_dtype = out.dtype + elif b is None: + c_dtype = a.dtype + else: + c_dtype = find_common_type(a, b) + + a = _maybe_cast_input(a, c_dtype, casting) + + if b is not None: + b = _maybe_cast_input(b, c_dtype, casting) + + out_dtype = out.dtype if out is not None else c_dtype + + # Handle duplicate modes on inputs + c_a_modes = Counter(a_modes) + for mode, count in c_a_modes.items(): + if count > 1: + axes = [i for (i, m) in enumerate(a_modes) if m == mode] + a = a._diag_helper(axes=axes) + # diagonal is stored on last axis + a_modes = [m for m in a_modes if m != mode] + [mode] + c_b_modes = Counter(b_modes) + for mode, count in c_b_modes.items(): + if count > 1: + axes = [i for (i, m) in enumerate(b_modes) if m == mode] + b = b._diag_helper(axes=axes) # type: ignore [union-attr] + # diagonal is stored on last axis + b_modes = [m for m in b_modes if m != mode] + [mode] + + # Drop modes corresponding to singleton dimensions. This handles cases of + # broadcasting. + for dim in reversed(range(a.ndim)): + if a.shape[dim] == 1: + a = a.squeeze(dim) + a_modes.pop(dim) + if b is not None: + for dim in reversed(range(b.ndim)): + if b.shape[dim] == 1: + b = b.squeeze(dim) + b_modes.pop(dim) + + # Sum-out modes appearing on one argument, and missing from the result + # TODO: If we supported sum on multiple axes we could do the full sum in a + # single operation, and avoid intermediates. + for dim, mode in reversed(list(enumerate(a_modes))): + if mode not in b_modes and mode not in out_modes: + a_modes.pop(dim) + a = a.sum(axis=dim) + + for dim, mode in reversed(list(enumerate(b_modes))): + if mode not in a_modes and mode not in out_modes: + b_modes.pop(dim) + b = b.sum(axis=dim) # type: ignore [union-attr] + + # Compute extent per mode. No need to consider broadcasting at this stage, + # since it has been handled above. + mode2extent: dict[str, int] = {} + for mode, extent in chain( + zip(a_modes, a.shape), zip(b_modes, b.shape) if b is not None else [] + ): + prev_extent = mode2extent.get(mode) + if prev_extent is not None and extent != prev_extent: + raise ValueError( + f"Incompatible sizes between matched dimensions: {extent} vs " + f"{prev_extent}" + ) + mode2extent[mode] = extent + + # Any modes appearing only on the result must have originally been present + # on one of the operands, but got dropped by the broadcast-handling code. + out_shape = ( + out.shape + if out is not None + else tuple(mode2extent.get(mode, 1) for mode in out_modes) + ) + c_modes = [] + c_shape: NdShape = () + c_bloated_shape: NdShape = () + for mode, extent in zip(out_modes, out_shape): + if mode not in a_modes and mode not in b_modes: + c_bloated_shape += (1,) + else: + assert extent > 1 + c_modes.append(mode) + c_shape += (extent,) + c_bloated_shape += (extent,) + + # Verify output array has the right shape (input arrays can be broadcasted + # up to match the output, but not the other way around). There should be no + # unknown or singleton modes on the result at this point. + for mode, extent in zip(c_modes, c_shape): + prev_extent = mode2extent[mode] + assert prev_extent != 1 + if extent != prev_extent: + raise ValueError("Wrong shape on output array") + + # Test for fallback to unary case + if b is not None: + if len(a_modes) == 0: + a = a * b + a_modes = b_modes + b = None + b_modes = [] + elif len(b_modes) == 0: + a = a * b + b = None + + if b is None: + # Unary contraction case + assert len(a_modes) == len(c_modes) and set(a_modes) == set(c_modes) + if len(a_modes) == 0: + # NumPy doesn't return a view in this case + c = copy(a) + elif a_modes == c_modes: + c = a + else: + # Shuffle input array according to mode labels + axes = [a_modes.index(mode) for mode in c_modes] + assert _builtin_all(ax >= 0 for ax in axes) + c = a.transpose(axes) + + else: + # Binary contraction case + # Create result array, if output array can't be directly targeted + if out is not None and out_dtype == c_dtype and out_shape == c_shape: + c = out + else: + c = ndarray( + shape=c_shape, + dtype=c_dtype, + inputs=(a, b), + ) + # Perform operation + c._thunk.contract( + c_modes, + a._thunk, + a_modes, + b._thunk, + b_modes, + mode2extent, + ) + + # Postprocess result before returning + if out is c: + # We already decided above to use the output array directly + return out + if out_dtype != c_dtype or out_shape != c_bloated_shape: + # We need to broadcast the result of the contraction or switch types + # before returning + if not np.can_cast(c_dtype, out_dtype, casting=casting): + raise TypeError( + f"Cannot cast intermediate result array of type {c_dtype} " + f"into output array of type {out_dtype} with casting rule " + f"'{casting}'" + ) + if out is None: + out = ndarray( + shape=out_shape, + dtype=out_dtype, + inputs=(c,), + ) + out[...] = c.reshape(c_bloated_shape) + return out + if out_shape != c_shape: + # We need to add missing dimensions, but they are all of size 1, so + # we don't need to broadcast + assert c_bloated_shape == out_shape + if out is None: + return c.reshape(out_shape) + else: + out[...] = c.reshape(out_shape) + return out + if out is not None: + # The output and result arrays are fully compatible, but we still + # need to copy + out[...] = c + return out + return c + + +def einsum( + expr: str, + *operands: ndarray, + out: ndarray | None = None, + dtype: np.dtype[Any] | None = None, + casting: CastingKind = "safe", + optimize: bool | Literal["greedy", "optimal"] = True, +) -> ndarray: + """ + Evaluates the Einstein summation convention on the operands. + + Using the Einstein summation convention, many common multi-dimensional, + linear algebraic array operations can be represented in a simple fashion. + In *implicit* mode `einsum` computes these values. + + In *explicit* mode, `einsum` provides further flexibility to compute + other array operations that might not be considered classical Einstein + summation operations, by disabling, or forcing summation over specified + subscript labels. + + Parameters + ---------- + subscripts : str + Specifies the subscripts for summation as comma separated list of + subscript labels. An implicit (classical Einstein summation) + calculation is performed unless the explicit indicator '->' is + included as well as subscript labels of the precise output form. + operands : list[array_like] + These are the arrays for the operation. + out : ndarray, optional + If provided, the calculation is done into this array. + dtype : data-type, optional + If provided, forces the calculation to use the data type specified. + Note that you may have to also give a more liberal `casting` + parameter to allow the conversions. Default is None. + casting : ``{'no', 'equiv', 'safe', 'same_kind', 'unsafe'}``, optional + Controls what kind of data casting may occur. + + * 'no' means the data types should not be cast at all. + * 'equiv' means only byte-order changes are allowed. + * 'safe' means only casts which can preserve values are allowed. + * 'same_kind' means only safe casts or casts within a kind, + like float64 to float32, are allowed. + * 'unsafe' means any data conversions may be done. + + Default is 'safe'. + optimize : ``{False, True, 'greedy', 'optimal'}``, optional + Controls if intermediate optimization should occur. If False then + arrays will be contracted in input order, one at a time. True (the + default) will use the 'greedy' algorithm. See + ``cupynumeric.einsum_path`` for more information on the available + optimization algorithms. + + Returns + ------- + output : ndarray + The calculation based on the Einstein summation convention. + + Notes + ----- + For most expressions, only floating-point types are supported. + + See Also + -------- + numpy.einsum + + Availability + -------- + Multiple GPUs, Multiple CPUs + """ + operands_list = [convert_to_cupynumeric_ndarray(op) for op in operands] + + if out is not None: + out = convert_to_cupynumeric_ndarray(out, share=True) + + if optimize is True: + optimize = "greedy" + elif optimize is False: + optimize = NullOptimizer() + + # This call normalizes the expression (adds the output part if it's + # missing, expands '...') and checks for some errors (mismatch on number + # of dimensions between operand and expression, wrong number of operands, + # unknown modes on output, a mode appearing under two different + # non-singleton extents). + computed_operands, contractions = oe.contract_path( + expr, *operands_list, einsum_call=True, optimize=optimize + ) + for indices, _, sub_expr, _, _ in contractions: + assert len(indices) == 1 or len(indices) == 2 + a = computed_operands.pop(indices[0]) + b = computed_operands.pop(indices[1]) if len(indices) == 2 else None + if b is None: + m = re.match(r"([a-zA-Z]*)->([a-zA-Z]*)", sub_expr) + if m is None: + raise NotImplementedError("Non-alphabetic mode labels") + a_modes = list(m.group(1)) + b_modes = [] + out_modes = list(m.group(2)) + else: + m = re.match(r"([a-zA-Z]*),([a-zA-Z]*)->([a-zA-Z]*)", sub_expr) + if m is None: + raise NotImplementedError("Non-alphabetic mode labels") + a_modes = list(m.group(1)) + b_modes = list(m.group(2)) + out_modes = list(m.group(3)) + sub_result = _contract( + a_modes, + b_modes, + out_modes, + a, + b, + out=(out if len(computed_operands) == 0 else None), + casting=casting, + dtype=dtype, + ) + computed_operands.append(sub_result) + + assert len(computed_operands) == 1 + return computed_operands[0] + + +def einsum_path( + expr: str, + *operands: ndarray, + optimize: bool | list[Any] | tuple[Any, ...] | str = "greedy", +) -> tuple[list[str | int], str]: + """ + Evaluates the lowest cost contraction order for an einsum expression by + considering the creation of intermediate arrays. + + Parameters + ---------- + expr : str + Specifies the subscripts for summation. + *operands : Sequence[array_like] + These are the arrays for the operation. + optimize : ``{bool, list, tuple, 'greedy', 'optimal'}`` + Choose the type of path. If a tuple is provided, the second argument is + assumed to be the maximum intermediate size created. If only a single + argument is provided the largest input or output array size is used + as a maximum intermediate size. + + * if a list is given that starts with ``einsum_path``, uses this as the + contraction path + * if False no optimization is taken + * if True defaults to the 'greedy' algorithm + * 'optimal' An algorithm that combinatorially explores all possible + ways of contracting the listed tensors and chooses the least costly + path. Scales exponentially with the number of terms in the + contraction. + * 'greedy' An algorithm that chooses the best pair contraction + at each step. Effectively, this algorithm searches the largest inner, + Hadamard, and then outer products at each step. Scales cubically with + the number of terms in the contraction. Equivalent to the 'optimal' + path for most contractions. + + Default is 'greedy'. + + Returns + ------- + path : list[tuple[int,...]] + A list representation of the einsum path. + string_repr : str + A printable representation of the einsum path. + + Notes + ----- + The resulting path indicates which terms of the input contraction should be + contracted first, the result of this contraction is then appended to the + end of the contraction list. This list can then be iterated over until all + intermediate contractions are complete. + + See Also + -------- + numpy.einsum_path + + Availability + -------- + Multiple GPUs, Multiple CPUs + """ + computed_operands = [convert_to_cupynumeric_ndarray(op) for op in operands] + memory_limit = _builtin_max(op.size for op in computed_operands) + if isinstance(optimize, tuple): + if len(optimize) != 2: + raise ValueError("einsum_path expects optimize tuples of size 2") + optimize, memory_limit = optimize + if optimize is True: + optimize = "greedy" + elif optimize is False: + optimize = [tuple(range(len(computed_operands)))] + elif optimize in ["greedy", "optimal"]: + pass + elif ( + isinstance(optimize, list) + and len(optimize) > 1 + and optimize[0] == "einsum_path" + ): + optimize = optimize[1:] + else: + raise ValueError( + f"einsum_path: unexpected value for optimize: {optimize}" + ) + path, info = oe.contract_path( + expr, *computed_operands, optimize=optimize, memory_limit=memory_limit + ) + return ["einsum_path"] + path, info + + +@add_boilerplate("a") +def trace( + a: ndarray, + offset: int = 0, + axis1: int | None = None, + axis2: int | None = None, + dtype: np.dtype[Any] | None = None, + out: ndarray | None = None, +) -> ndarray: + """ + Return the sum along diagonals of the array. + + If a is 2-D, the sum along its diagonal with the given offset is + returned, i.e., the sum of elements a[i,i+offset] for all i. + If a has more than two dimensions, then the axes specified by axis1 + and axis2 are used to determine the 2-D sub-arrays whose traces + are returned. The shape of the resulting array is the same as that + of a with axis1 and axis2 removed. + + Parameters + ---------- + a : array_like + Input array, from which the diagonals are taken. + offset : int, optional + Offset of the diagonal from the main diagonal. Can be both + positive and negative. Defaults to 0. + axis1, axis2 : int, optional + Axes to be used as the first and second axis of the 2-D sub-arrays + from which the diagonals should be taken. Defaults are the + first two axes of a. + dtype : data-type, optional + Determines the data-type of the returned array and of the + accumulator where the elements are summed. If dtype has the value + None and a is of integer type of precision less than the default + integer precision, then the default integer precision is used. + Otherwise, the precision is the same as that of a. + + out : ndarray, optional + Array into which the output is placed. Its type is preserved and + it must be of the right shape to hold the output. + + Returns + ------- + sum_along_diagonals : ndarray + If a is 2-D, the sum along the diagonal is returned. If a has + larger dimensions, then an array of sums along diagonals is returned. + + Raises + ------ + ValueError + If the dimension of `a` is less than 2. + + See Also + -------- + numpy.diagonal + + Availability + -------- + Multiple GPUs, Multiple CPUs + """ + return a.trace( + offset=offset, axis1=axis1, axis2=axis2, dtype=dtype, out=out + ) diff --git a/cupynumeric/_module/logic_array_contents.py b/cupynumeric/_module/logic_array_contents.py new file mode 100644 index 000000000..e5bb9bd9e --- /dev/null +++ b/cupynumeric/_module/logic_array_contents.py @@ -0,0 +1,114 @@ +# Copyright 2024 NVIDIA Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +from __future__ import annotations + +from typing import TYPE_CHECKING + +from .._array.util import convert_to_cupynumeric_ndarray +from .._ufunc.comparison import logical_and +from .._ufunc.floating import isinf, signbit + +if TYPE_CHECKING: + from .._array.array import ndarray + + +def isneginf(x: ndarray, out: ndarray | None = None) -> ndarray: + """ + + Test element-wise for negative infinity, return result as bool array. + + Parameters + ---------- + x : array_like + The input array. + out : array_like, optional + A location into which the result is stored. If provided, it must have a + shape that the input broadcasts to. If not provided or None, a + freshly-allocated boolean array is returned. + + Returns + ------- + out : ndarray + A boolean array with the same dimensions as the input. + If second argument is not supplied then a numpy boolean array is + returned with values True where the corresponding element of the + input is negative infinity and values False where the element of + the input is not negative infinity. + + If a second argument is supplied the result is stored there. If the + type of that array is a numeric type the result is represented as + zeros and ones, if the type is boolean then as False and True. The + return value `out` is then a reference to that array. + + See Also + -------- + numpy.isneginf + + Availability + -------- + Multiple GPUs, Multiple CPUs + + """ + x = convert_to_cupynumeric_ndarray(x) + if out is not None: + out = convert_to_cupynumeric_ndarray(out, share=True) + rhs1 = isinf(x) + rhs2 = signbit(x) + return logical_and(rhs1, rhs2, out=out) + + +def isposinf(x: ndarray, out: ndarray | None = None) -> ndarray: + """ + + Test element-wise for positive infinity, return result as bool array. + + Parameters + ---------- + x : array_like + The input array. + out : array_like, optional + A location into which the result is stored. If provided, it must have a + shape that the input broadcasts to. If not provided or None, a + freshly-allocated boolean array is returned. + + Returns + ------- + out : ndarray + A boolean array with the same dimensions as the input. + If second argument is not supplied then a boolean array is returned + with values True where the corresponding element of the input is + positive infinity and values False where the element of the input is + not positive infinity. + + If a second argument is supplied the result is stored there. If the + type of that array is a numeric type the result is represented as zeros + and ones, if the type is boolean then as False and True. + The return value `out` is then a reference to that array. + + See Also + -------- + numpy.isposinf + + Availability + -------- + Multiple GPUs, Multiple CPUs + + """ + x = convert_to_cupynumeric_ndarray(x) + if out is not None: + out = convert_to_cupynumeric_ndarray(out, share=True) + rhs1 = isinf(x) + rhs2 = ~signbit(x) + return logical_and(rhs1, rhs2, out=out) diff --git a/cunumeric/logic.py b/cupynumeric/_module/logic_array_type.py similarity index 51% rename from cunumeric/logic.py rename to cupynumeric/_module/logic_array_type.py index 667ae1d13..1e39754a7 100644 --- a/cunumeric/logic.py +++ b/cupynumeric/_module/logic_array_type.py @@ -1,4 +1,4 @@ -# Copyright 2022 NVIDIA Corporation +# Copyright 2024 NVIDIA Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -14,110 +14,19 @@ # from __future__ import annotations -from typing import TYPE_CHECKING, Any, Union +from typing import TYPE_CHECKING, Any import numpy as np -from ._ufunc.comparison import logical_and -from ._ufunc.floating import isinf, signbit -from .array import convert_to_cunumeric_ndarray, ndarray -from .module import full +from .._array.array import ndarray +from .._array.util import convert_to_cupynumeric_ndarray +from .creation_shape import full if TYPE_CHECKING: import numpy.typing as npt -def isneginf(x: ndarray, out: Union[ndarray, None] = None) -> ndarray: - """ - - Test element-wise for negative infinity, return result as bool array. - - Parameters - ---------- - x : array_like - The input array. - out : array_like, optional - A location into which the result is stored. If provided, it must have a - shape that the input broadcasts to. If not provided or None, a - freshly-allocated boolean array is returned. - - Returns - ------- - out : ndarray - A boolean array with the same dimensions as the input. - If second argument is not supplied then a numpy boolean array is - returned with values True where the corresponding element of the - input is negative infinity and values False where the element of - the input is not negative infinity. - - If a second argument is supplied the result is stored there. If the - type of that array is a numeric type the result is represented as - zeros and ones, if the type is boolean then as False and True. The - return value `out` is then a reference to that array. - - See Also - -------- - numpy.isneginf - - Availability - -------- - Multiple GPUs, Multiple CPUs - - """ - x = convert_to_cunumeric_ndarray(x) - if out is not None: - out = convert_to_cunumeric_ndarray(out, share=True) - rhs1 = isinf(x) - rhs2 = signbit(x) - return logical_and(rhs1, rhs2, out=out) - - -def isposinf(x: ndarray, out: Union[ndarray, None] = None) -> ndarray: - """ - - Test element-wise for positive infinity, return result as bool array. - - Parameters - ---------- - x : array_like - The input array. - out : array_like, optional - A location into which the result is stored. If provided, it must have a - shape that the input broadcasts to. If not provided or None, a - freshly-allocated boolean array is returned. - - Returns - ------- - out : ndarray - A boolean array with the same dimensions as the input. - If second argument is not supplied then a boolean array is returned - with values True where the corresponding element of the input is - positive infinity and values False where the element of the input is - not positive infinity. - - If a second argument is supplied the result is stored there. If the - type of that array is a numeric type the result is represented as zeros - and ones, if the type is boolean then as False and True. - The return value `out` is then a reference to that array. - - See Also - -------- - numpy.isposinf - - Availability - -------- - Multiple GPUs, Multiple CPUs - - """ - x = convert_to_cunumeric_ndarray(x) - if out is not None: - out = convert_to_cunumeric_ndarray(out, share=True) - rhs1 = isinf(x) - rhs2 = ~signbit(x) - return logical_and(rhs1, rhs2, out=out) - - -def iscomplex(x: Union[ndarray, npt.NDArray[Any]]) -> ndarray: +def iscomplex(x: ndarray | npt.NDArray[Any]) -> ndarray: """ Returns a bool array, where True if input element is complex. @@ -144,14 +53,14 @@ def iscomplex(x: Union[ndarray, npt.NDArray[Any]]) -> ndarray: Multiple GPUs, Multiple CPUs """ - x = convert_to_cunumeric_ndarray(x) + x = convert_to_cupynumeric_ndarray(x) if x.dtype.kind != "c": return full(x.shape, False, dtype=bool) else: return x.imag != 0 -def iscomplexobj(x: Union[ndarray, npt.NDArray[Any]]) -> bool: +def iscomplexobj(x: ndarray | npt.NDArray[Any]) -> bool: """ Check for a complex type or an array of complex numbers. @@ -184,7 +93,7 @@ def iscomplexobj(x: Union[ndarray, npt.NDArray[Any]]) -> bool: return np.iscomplexobj(x) -def isreal(x: Union[ndarray, npt.NDArray[Any]]) -> ndarray: +def isreal(x: ndarray | npt.NDArray[Any]) -> ndarray: """ Returns a bool array, where True if input element is real. @@ -212,7 +121,7 @@ def isreal(x: Union[ndarray, npt.NDArray[Any]]) -> ndarray: Multiple GPUs, Multiple CPUs """ - x = convert_to_cunumeric_ndarray(x) + x = convert_to_cupynumeric_ndarray(x) if x.dtype.kind != "c": return full(x.shape, True, dtype=bool) else: @@ -249,7 +158,7 @@ def isrealobj(x: ndarray) -> bool: return not iscomplexobj(x) -def isscalar(x: Union[ndarray, npt.NDArray[Any]]) -> bool: +def isscalar(x: ndarray | npt.NDArray[Any]) -> bool: """ Returns True if the type of `element` is a scalar type. @@ -270,7 +179,7 @@ def isscalar(x: Union[ndarray, npt.NDArray[Any]]) -> bool: Notes ----- - This function falls back to NumPy for all object types but cuNumeric's + This function falls back to NumPy for all object types but cuPyNumeric's ndarray, which always returns `False`. Availability @@ -278,9 +187,9 @@ def isscalar(x: Union[ndarray, npt.NDArray[Any]]) -> bool: Multiple GPUs, Multiple CPUs """ - # Since the input can be any value, we can't just convert it to cunumeric - # ndarray. Instead we check if the input is cunumeric ndarray and, if not, - # fall back to Numpy + # Since the input can be any value, we can't just convert it to cupynumeric + # ndarray. Instead we check if the input is cupynumeric ndarray and, if + # not, fall back to Numpy if isinstance(x, ndarray): return False else: diff --git a/cupynumeric/_module/logic_comparison.py b/cupynumeric/_module/logic_comparison.py new file mode 100644 index 000000000..46c6410a4 --- /dev/null +++ b/cupynumeric/_module/logic_comparison.py @@ -0,0 +1,201 @@ +# Copyright 2024 NVIDIA Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +from __future__ import annotations + +from typing import TYPE_CHECKING + +import numpy as np +from legate.core import Scalar, types as ty + +from .._array.thunk import perform_binary_reduction +from .._array.util import add_boilerplate, find_common_type +from ..config import BinaryOpCode +from .creation_shape import empty + +if TYPE_CHECKING: + from .._array.array import ndarray + + +@add_boilerplate("a", "b") +def allclose( + a: ndarray, + b: ndarray, + rtol: float = 1e-5, + atol: float = 1e-8, + equal_nan: bool = False, +) -> ndarray: + """ + + Returns True if two arrays are element-wise equal within a tolerance. + + The tolerance values are positive, typically very small numbers. The + relative difference (`rtol` * abs(`b`)) and the absolute difference + `atol` are added together to compare against the absolute difference + between `a` and `b`. + + NaNs are treated as equal if they are in the same place and if + ``equal_nan=True``. Infs are treated as equal if they are in the same + place and of the same sign in both arrays. + + Parameters + ---------- + a, b : array_like + Input arrays to compare. + rtol : float + The relative tolerance parameter (see Notes). + atol : float + The absolute tolerance parameter (see Notes). + equal_nan : bool + Whether to compare NaN's as equal. If True, NaN's in `a` will be + considered equal to NaN's in `b` in the output array. + + Returns + ------- + allclose : ndarray scalar + Returns True if the two arrays are equal within the given + tolerance; False otherwise. + + Notes + ----- + If the following equation is element-wise True, then allclose returns + True. + + absolute(`a` - `b`) <= (`atol` + `rtol` * absolute(`b`)) + + See Also + -------- + numpy.allclose + + Availability + -------- + Multiple GPUs, Multiple CPUs + """ + if equal_nan: + raise NotImplementedError( + "cuPyNumeric does not support `equal_nan` yet for allclose" + ) + args = (Scalar(rtol, ty.float64), Scalar(atol, ty.float64)) + return perform_binary_reduction( + BinaryOpCode.ISCLOSE, + a, + b, + dtype=np.dtype(bool), + extra_args=args, + ) + + +@add_boilerplate("a", "b") +def isclose( + a: ndarray, + b: ndarray, + rtol: float = 1e-5, + atol: float = 1e-8, + equal_nan: bool = False, +) -> ndarray: + """ + + Returns a boolean array where two arrays are element-wise equal within a + tolerance. + + Parameters + ---------- + a, b : array_like + Input arrays to compare. + rtol : float + The relative tolerance parameter (see Notes). + atol : float + The absolute tolerance parameter (see Notes). + equal_nan : bool + Whether to compare NaN's as equal. If True, NaN's in `a` will be + considered equal to NaN's in `b` in the output array. + + Returns + ------- + y : array_like + Returns a boolean array of where `a` and `b` are equal within the + given tolerance. If both `a` and `b` are scalars, returns a single + boolean value. + + Notes + ----- + For finite values, isclose uses the following equation to test whether + two floating point values are equivalent. + + absolute(`a` - `b`) <= (`atol` + `rtol` * absolute(`b`)) + + See Also + -------- + numpy.isclose + + Availability + -------- + Multiple GPUs, Multiple CPUs + """ + if equal_nan: + raise NotImplementedError( + "cuPyNumeric does not support `equal_nan` yet for isclose" + ) + + out_shape = np.broadcast_shapes(a.shape, b.shape) + out = empty(out_shape, dtype=bool) + + common_type = find_common_type(a, b) + a = a.astype(common_type) + b = b.astype(common_type) + + out._thunk.isclose(a._thunk, b._thunk, rtol, atol, equal_nan) + return out + + +@add_boilerplate("a1", "a2") +def array_equal( + a1: ndarray, a2: ndarray, equal_nan: bool = False +) -> bool | ndarray: + """ + + True if two arrays have the same shape and elements, False otherwise. + + Parameters + ---------- + a1, a2 : array_like + Input arrays. + equal_nan : bool + Whether to compare NaN's as equal. If the dtype of a1 and a2 is + complex, values will be considered equal if either the real or the + imaginary component of a given value is ``nan``. + + Returns + ------- + b : ndarray scalar + Returns True if the arrays are equal. + + See Also + -------- + numpy.array_equal + + Availability + -------- + Multiple GPUs, Multiple CPUs + """ + if equal_nan: + raise NotImplementedError( + "cuPyNumeric does not support `equal_nan` yet for `array_equal`" + ) + + if a1.shape != a2.shape: + return False + return perform_binary_reduction( + BinaryOpCode.EQUAL, a1, a2, dtype=np.dtype(bool) + ) diff --git a/cupynumeric/_module/logic_truth.py b/cupynumeric/_module/logic_truth.py new file mode 100644 index 000000000..89ca0244d --- /dev/null +++ b/cupynumeric/_module/logic_truth.py @@ -0,0 +1,138 @@ +# Copyright 2024 NVIDIA Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +from __future__ import annotations + +from typing import TYPE_CHECKING + +from .._array.util import add_boilerplate + +if TYPE_CHECKING: + from .._array.array import ndarray + + +@add_boilerplate("a") +def all( + a: ndarray, + axis: int | tuple[int, ...] | None = None, + out: ndarray | None = None, + keepdims: bool = False, + where: ndarray | None = None, +) -> ndarray: + """ + Test whether all array elements along a given axis evaluate to True. + + Parameters + ---------- + a : array_like + Input array or object that can be converted to an array. + axis : None or int or tuple[int], optional + Axis or axes along which a logical AND reduction is performed. + The default (``axis=None``) is to perform a logical AND over all + the dimensions of the input array. `axis` may be negative, in + which case it counts from the last to the first axis. + + If this is a tuple of ints, a reduction is performed on multiple + axes, instead of a single axis or all the axes as before. + out : ndarray, optional + Alternate output array in which to place the result. + It must have the same shape as the expected output and its + type is preserved (e.g., if ``dtype(out)`` is float, the result + will consist of 0.0's and 1.0's). + + keepdims : bool, optional + If this is set to True, the axes which are reduced are left + in the result as dimensions with size one. With this option, + the result will broadcast correctly against the input array. + + If the default value is passed, then `keepdims` will not be + passed through to the `all` method of sub-classes of + `ndarray`, however any non-default value will be. If the + sub-class' method does not implement `keepdims` any + exceptions will be raised. + + Returns + ------- + all : ndarray, bool + A new boolean or array is returned unless `out` is specified, + in which case a reference to `out` is returned. + + See Also + -------- + numpy.all + + Availability + -------- + Multiple GPUs, Multiple CPUs + """ + return a.all(axis=axis, out=out, keepdims=keepdims, where=where) + + +@add_boilerplate("a") +def any( + a: ndarray, + axis: int | tuple[int, ...] | None = None, + out: ndarray | None = None, + keepdims: bool = False, + where: ndarray | None = None, +) -> ndarray: + """ + Test whether any array element along a given axis evaluates to True. + + Returns single boolean unless `axis` is not ``None`` + + Parameters + ---------- + a : array_like + Input array or object that can be converted to an array. + axis : None or int or tuple[int], optional + Axis or axes along which a logical OR reduction is performed. + The default (``axis=None``) is to perform a logical OR over all + the dimensions of the input array. `axis` may be negative, in + which case it counts from the last to the first axis. + + If this is a tuple of ints, a reduction is performed on multiple + axes, instead of a single axis or all the axes as before. + out : ndarray, optional + Alternate output array in which to place the result. It must have + the same shape as the expected output and its type is preserved + (e.g., if it is of type float, then it will remain so, returning + 1.0 for True and 0.0 for False, regardless of the type of `a`). + + keepdims : bool, optional + If this is set to True, the axes which are reduced are left + in the result as dimensions with size one. With this option, + the result will broadcast correctly against the input array. + + If the default value is passed, then `keepdims` will not be + passed through to the `any` method of sub-classes of + `ndarray`, however any non-default value will be. If the + sub-class' method does not implement `keepdims` any + exceptions will be raised. + + Returns + ------- + any : bool or ndarray + A new boolean or `ndarray` is returned unless `out` is specified, + in which case a reference to `out` is returned. + + See Also + -------- + numpy.any + + Availability + -------- + Multiple GPUs, Multiple CPUs + """ + return a.any(axis=axis, out=out, keepdims=keepdims, where=where) diff --git a/cupynumeric/_module/math_complex.py b/cupynumeric/_module/math_complex.py new file mode 100644 index 000000000..29f3787f7 --- /dev/null +++ b/cupynumeric/_module/math_complex.py @@ -0,0 +1,122 @@ +# Copyright 2024 NVIDIA Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +from __future__ import annotations + +from typing import TYPE_CHECKING + +from legate.core import Scalar + +from .._array.thunk import perform_unary_op +from .._array.util import add_boilerplate +from ..config import UnaryOpCode + +if TYPE_CHECKING: + from .._array.array import ndarray + + +@add_boilerplate("val") +def real(val: ndarray) -> ndarray: + """ + Return the real part of the complex argument. + + Parameters + ---------- + val : array_like + Input array. + + Returns + ------- + out : ndarray or scalar + The real component of the complex argument. If `val` is real, the type + of `val` is used for the output. If `val` has complex elements, the + returned type is float. + + See Also + -------- + numpy.real + + Availability + -------- + Multiple GPUs, Multiple CPUs + """ + return val.real + + +@add_boilerplate("val") +def imag(val: ndarray) -> ndarray: + """ + + Return the imaginary part of the complex argument. + + Parameters + ---------- + val : array_like + Input array. + + Returns + ------- + out : ndarray or scalar + The imaginary component of the complex argument. If `val` is real, + the type of `val` is used for the output. If `val` has complex + elements, the returned type is float. + + See Also + -------- + numpy.imag + + Availability + -------- + Multiple GPUs, Multiple CPUs + """ + return val.imag + + +@add_boilerplate("z") +def angle(z: ndarray, deg: bool = False) -> ndarray: + """ + Return the angle of the complex argument. + + Parameters + ---------- + z : array_like + A complex number or sequence of complex numbers. + deg : bool, optional + Return angle in degrees if True, radians if False (default). + + Returns + ------- + angle : ndarray or scalar + The counterclockwise angle from the positive real axis on the complex + plane in the range ``(-pi, pi]``, with dtype as numpy.float64. + + See Also + -------- + numpy.angle + + Notes + ----- + This function passes the imaginary and real parts of the argument to + `arctan2` to compute the result; consequently, it follows the convention + of `arctan2` when the magnitude of the argument is zero. + + Availability + -------- + Multiple GPUs, Multiple CPUs + """ + + if z is None: + raise TypeError("can't compute 'angle' for None") + extra_args = (Scalar(deg),) + return perform_unary_op(UnaryOpCode.ANGLE, z, extra_args=extra_args) diff --git a/cupynumeric/_module/math_extrema.py b/cupynumeric/_module/math_extrema.py new file mode 100644 index 000000000..0b576684d --- /dev/null +++ b/cupynumeric/_module/math_extrema.py @@ -0,0 +1,179 @@ +# Copyright 2024 NVIDIA Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +from __future__ import annotations + +from typing import TYPE_CHECKING, Any + +import numpy as np + +from .._array.util import add_boilerplate +from .._ufunc.comparison import maximum, minimum + +if TYPE_CHECKING: + from .._array.array import ndarray + + +@add_boilerplate("a") +def amax( + a: ndarray, + axis: int | tuple[int, ...] | None = None, + dtype: np.dtype[Any] | None = None, + out: ndarray | None = None, + keepdims: bool = False, + initial: int | float | None = None, + where: ndarray | None = None, +) -> ndarray: + """ + + Return the maximum of an array or maximum along an axis. + + Parameters + ---------- + a : array_like + Input data. + axis : None or int or tuple[int], optional + Axis or axes along which to operate. By default, flattened input is + used. + + If this is a tuple of ints, the maximum is selected over multiple axes, + instead of a single axis or all the axes as before. + out : ndarray, optional + Alternative output array in which to place the result. Must + be of the same shape and buffer length as the expected output. + + keepdims : bool, optional + If this is set to True, the axes which are reduced are left + in the result as dimensions with size one. With this option, + the result will broadcast correctly against the input array. + + If the default value is passed, then `keepdims` will not be + passed through to the `amax` method of sub-classes of + `ndarray`, however any non-default value will be. If the + sub-class' method does not implement `keepdims` any + exceptions will be raised. + + initial : scalar, optional + The minimum value of an output element. Must be present to allow + computation on empty slice. See `~cupynumeric.ufunc.reduce` for + details. + + where : array_like[bool], optional + Elements to compare for the maximum. See `~cupynumeric.ufunc.reduce` + for details. + + Returns + ------- + amax : ndarray or scalar + Maximum of `a`. If `axis` is None, the result is a scalar value. + If `axis` is given, the result is an array of dimension + ``a.ndim - 1``. + + See Also + -------- + numpy.amax + + Availability + -------- + Multiple GPUs, Multiple CPUs + """ + return maximum.reduce( + a, + axis=axis, + dtype=dtype, + out=out, + keepdims=keepdims, + initial=initial, + where=where, + ) + + +max = amax + + +@add_boilerplate("a") +def amin( + a: ndarray, + axis: int | tuple[int, ...] | None = None, + dtype: np.dtype[Any] | None = None, + out: ndarray | None = None, + keepdims: bool = False, + initial: int | float | None = None, + where: ndarray | None = None, +) -> ndarray: + """ + + Return the minimum of an array or minimum along an axis. + + Parameters + ---------- + a : array_like + Input data. + axis : None or int or tuple[int], optional + Axis or axes along which to operate. By default, flattened input is + used. + + If this is a tuple of ints, the minimum is selected over multiple axes, + instead of a single axis or all the axes as before. + out : ndarray, optional + Alternative output array in which to place the result. Must + be of the same shape and buffer length as the expected output. + + keepdims : bool, optional + If this is set to True, the axes which are reduced are left + in the result as dimensions with size one. With this option, + the result will broadcast correctly against the input array. + + If the default value is passed, then `keepdims` will not be + passed through to the `amin` method of sub-classes of + `ndarray`, however any non-default value will be. If the + sub-class' method does not implement `keepdims` any + exceptions will be raised. + + initial : scalar, optional + The maximum value of an output element. Must be present to allow + computation on empty slice. See `~cupynumeric.ufunc.reduce` for + details. + + where : array_like[bool], optional + Elements to compare for the minimum. See `~cupynumeric.ufunc.reduce` + for details. + + Returns + ------- + amin : ndarray or scalar + Minimum of `a`. If `axis` is None, the result is a scalar value. + If `axis` is given, the result is an array of dimension + ``a.ndim - 1``. + + See Also + -------- + numpy.amin + + Availability + -------- + Multiple GPUs, Multiple CPUs + """ + return minimum.reduce( + a, + axis=axis, + dtype=dtype, + out=out, + keepdims=keepdims, + initial=initial, + where=where, + ) + + +min = amin diff --git a/cupynumeric/_module/math_misc.py b/cupynumeric/_module/math_misc.py new file mode 100644 index 000000000..a91e3facc --- /dev/null +++ b/cupynumeric/_module/math_misc.py @@ -0,0 +1,174 @@ +# Copyright 2024 NVIDIA Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +from __future__ import annotations + +from typing import TYPE_CHECKING + +from .._array.array import ndarray +from .._array.util import add_boilerplate +from ..config import ConvolveMethod + +if TYPE_CHECKING: + import numpy.typing as npt + + from ..types import ConvolveMethod as ConvolveMethodType, ConvolveMode + + +@add_boilerplate("a", "v") +def convolve( + a: ndarray, + v: ndarray, + mode: ConvolveMode = "full", + method: ConvolveMethodType = "auto", +) -> ndarray: + """ + + Returns the discrete, linear convolution of two ndarrays. + + If `a` and `v` are both 1-D and `v` is longer than `a`, the two are + swapped before computation. For N-D cases, the arguments are never swapped. + + Parameters + ---------- + a : (N,) array_like + First input ndarray. + v : (M,) array_like + Second input ndarray. + mode : ``{'full', 'valid', 'same'}``, optional + 'same': + The output is the same size as `a`, centered with respect to + the 'full' output. (default) + + 'full': + The output is the full discrete linear convolution of the inputs. + + 'valid': + The output consists only of those elements that do not + rely on the zero-padding. In 'valid' mode, either `a` or `v` + must be at least as large as the other in every dimension. + method : ``{'auto', 'direct', 'fft'}``, optional + A string indicating which method to use to calculate the convolution. + + 'auto': + Automatically chooses direct or Fourier method based on an estimate of + which is faster (default) + + 'direct': + The convolution is determined directly from sums, the definition of + convolution + + 'fft': + The Fourier Transform is used to perform the convolution + + Returns + ------- + out : ndarray + Discrete, linear convolution of `a` and `v`. + + See Also + -------- + numpy.convolve + + Notes + ----- + The current implementation only supports the 'same' mode. + + Unlike `numpy.convolve`, `cupynumeric.convolve` supports N-dimensional + inputs, but it follows NumPy's behavior for 1-D inputs. + + Availability + -------- + Multiple GPUs, Multiple CPUs + """ + if mode != "same": + raise NotImplementedError("Only support mode='same'") + + if a.ndim != v.ndim: + raise RuntimeError("Arrays should have the same dimensions") + elif a.ndim > 3: + raise NotImplementedError(f"{a.ndim}-D arrays are not yet supported") + + if a.ndim == 1 and a.size < v.size: + v, a = a, v + + if not hasattr(ConvolveMethod, method.upper()): + raise ValueError( + "Acceptable method flags are 'auto', 'direct', or 'fft'." + ) + + if a.dtype != v.dtype: + v = v.astype(a.dtype) + out = ndarray( + shape=a.shape, + dtype=a.dtype, + inputs=(a, v), + ) + out._thunk.convolve(a._thunk, v._thunk, mode, method) + return out + + +@add_boilerplate("a") +def clip( + a: ndarray, + a_min: int | float | npt.ArrayLike | None, + a_max: int | float | npt.ArrayLike | None, + out: ndarray | None = None, +) -> ndarray: + """ + + Clip (limit) the values in an array. + + Given an interval, values outside the interval are clipped to + the interval edges. For example, if an interval of ``[0, 1]`` + is specified, values smaller than 0 become 0, and values larger + than 1 become 1. + + Parameters + ---------- + a : array_like + Array containing elements to clip. + a_min : scalar or array_like or None + Minimum value. If None, clipping is not performed on lower + interval edge. Not more than one of `a_min` and `a_max` may be + None. + a_max : scalar or array_like or None + Maximum value. If None, clipping is not performed on upper + interval edge. Not more than one of `a_min` and `a_max` may be + None. If `a_min` or `a_max` are array_like, then the three + arrays will be broadcasted to match their shapes. + out : ndarray, optional + The results will be placed in this array. It may be the input + array for in-place clipping. `out` must be of the right shape + to hold the output. Its type is preserved. + **kwargs + For other keyword-only arguments, see the + :ref:`ufunc docs `. + + Returns + ------- + clipped_array : ndarray + An array with the elements of `a`, but where values + < `a_min` are replaced with `a_min`, and those > `a_max` + with `a_max`. + + See Also + -------- + numpy.clip + + Availability + -------- + Multiple GPUs, Multiple CPUs + """ + return a.clip(a_min, a_max, out=out) diff --git a/cupynumeric/_module/math_rounding.py b/cupynumeric/_module/math_rounding.py new file mode 100644 index 000000000..15b754fa1 --- /dev/null +++ b/cupynumeric/_module/math_rounding.py @@ -0,0 +1,57 @@ +# Copyright 2024 NVIDIA Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +from __future__ import annotations + +from .._array.array import ndarray +from .._array.util import add_boilerplate + + +@add_boilerplate("a") +def round( + a: ndarray, + decimals: int = 0, + out: ndarray | None = None, +) -> ndarray: + """ + Evenly round to the given number of decimals. + + Parameters + ---------- + a : array_like + Input data. + decimals : int, optional + Number of decimal places to round to (default: 0). If + decimals is negative, it specifies the number of positions to + the left of the decimal point. + out : ndarray, optional + Alternative output array in which to place the result. It must have + the same shape as the expected output, but the type of the output + values will be cast if necessary. + + Returns + ------- + rounded_array : ndarray + An array of the same type as `a`, containing the rounded values. + Unless `out` was specified, a new array is created. A reference to + the result is returned. + + The real and imaginary parts of complex numbers are rounded + separately. The result of rounding a float is a float. + + Availability + -------- + Multiple GPUs, Multiple CPUs + """ + return a.round(decimals, out=out) diff --git a/cupynumeric/_module/math_sum_prod_diff.py b/cupynumeric/_module/math_sum_prod_diff.py new file mode 100644 index 000000000..6027fb1c1 --- /dev/null +++ b/cupynumeric/_module/math_sum_prod_diff.py @@ -0,0 +1,1338 @@ +# Copyright 2024 NVIDIA Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +from __future__ import annotations + +import collections.abc +from typing import TYPE_CHECKING, Any + +import numpy as np + +from .._array.thunk import perform_scan, perform_unary_reduction +from .._array.util import add_boilerplate +from .._ufunc.comparison import not_equal +from .._ufunc.floating import isnan +from .._ufunc.math import add, multiply, subtract +from .._utils import is_np2 +from ..config import ScanCode, UnaryRedCode +from ..settings import settings as cupynumeric_settings +from ._unary_red_utils import get_non_nan_unary_red_code +from .array_dimension import broadcast_to +from .array_joining import concatenate +from .creation_data import asarray +from .creation_shape import empty, empty_like +from .indexing import putmask +from .logic_truth import all, any + +if is_np2: + from numpy.lib.array_utils import normalize_axis_index # type: ignore +else: + from numpy.core.multiarray import normalize_axis_index # type: ignore + +if TYPE_CHECKING: + from .._array.array import ndarray + + +@add_boilerplate("a") +def prod( + a: ndarray, + axis: int | tuple[int, ...] | None = None, + dtype: np.dtype[Any] | None = None, + out: ndarray | None = None, + keepdims: bool = False, + initial: int | float | None = None, + where: ndarray | None = None, +) -> ndarray: + """ + + Return the product of array elements over a given axis. + + Parameters + ---------- + a : array_like + Input data. + axis : None or int or tuple[int], optional + Axis or axes along which a product is performed. The default, + axis=None, will calculate the product of all the elements in the + input array. If axis is negative it counts from the last to the + first axis. + + If axis is a tuple of ints, a product is performed on all of the + axes specified in the tuple instead of a single axis or all the + axes as before. + dtype : data-type, optional + The type of the returned array, as well as of the accumulator in + which the elements are multiplied. The dtype of `a` is used by + default unless `a` has an integer dtype of less precision than the + default platform integer. In that case, if `a` is signed then the + platform integer is used while if `a` is unsigned then an unsigned + integer of the same precision as the platform integer is used. + out : ndarray, optional + Alternative output array in which to place the result. It must have + the same shape as the expected output, but the type of the output + values will be cast if necessary. + keepdims : bool, optional + If this is set to True, the axes which are reduced are left in the + result as dimensions with size one. With this option, the result + will broadcast correctly against the input array. + + If the default value is passed, then `keepdims` will not be + passed through to the `prod` method of sub-classes of + `ndarray`, however any non-default value will be. If the + sub-class' method does not implement `keepdims` any + exceptions will be raised. + initial : scalar, optional + The starting value for this product. See `~cupynumeric.ufunc.reduce` + for details. + + where : array_like[bool], optional + Elements to include in the product. See `~cupynumeric.ufunc.reduce` + for details. + + Returns + ------- + product_along_axis : ndarray, see `dtype` parameter above. + An array shaped as `a` but with the specified axis removed. + Returns a reference to `out` if specified. + + See Also + -------- + numpy.prod + + Availability + -------- + Multiple GPUs, Multiple CPUs + """ + if isinstance(initial, list): + exc = TypeError if is_np2 else ValueError # type: ignore [unreachable] + raise exc("initial should not be a list") + + return multiply.reduce( + a, + axis=axis, + dtype=dtype, + out=out, + keepdims=keepdims, + initial=initial, + where=where, + ) + + +@add_boilerplate("a") +def sum( + a: ndarray, + axis: int | tuple[int, ...] | None = None, + dtype: np.dtype[Any] | None = None, + out: ndarray | None = None, + keepdims: bool = False, + initial: int | float | None = None, + where: ndarray | None = None, +) -> ndarray: + """ + + Sum of array elements over a given axis. + + Parameters + ---------- + a : array_like + Elements to sum. + axis : None or int or tuple[int], optional + Axis or axes along which a sum is performed. The default, + axis=None, will sum all of the elements of the input array. If + axis is negative it counts from the last to the first axis. + + If axis is a tuple of ints, a sum is performed on all of the axes + specified in the tuple instead of a single axis or all the axes as + before. + dtype : data-type, optional + The type of the returned array and of the accumulator in which the + elements are summed. The dtype of `a` is used by default unless `a` + has an integer dtype of less precision than the default platform + integer. In that case, if `a` is signed then the platform integer + is used while if `a` is unsigned then an unsigned integer of the + same precision as the platform integer is used. + out : ndarray, optional + Alternative output array in which to place the result. It must have + the same shape as the expected output, but the type of the output + values will be cast if necessary. + keepdims : bool, optional + If this is set to True, the axes which are reduced are left + in the result as dimensions with size one. With this option, + the result will broadcast correctly against the input array. + + If the default value is passed, then `keepdims` will not be + passed through to the `sum` method of sub-classes of + `ndarray`, however any non-default value will be. If the + sub-class' method does not implement `keepdims` any + exceptions will be raised. + initial : scalar, optional + Starting value for the sum. See `~cupynumeric.ufunc.reduce` for + details. + + where : array_like[bool], optional + Elements to include in the sum. See `~cupynumeric.ufunc.reduce` for + details. + + Returns + ------- + sum_along_axis : ndarray + An array with the same shape as `a`, with the specified + axis removed. If `a` is a 0-d array, or if `axis` is None, a scalar + is returned. If an output array is specified, a reference to + `out` is returned. + + See Also + -------- + numpy.sum + + Availability + -------- + Multiple GPUs, Multiple CPUs + """ + return add.reduce( + a, + axis=axis, + dtype=dtype, + out=out, + keepdims=keepdims, + initial=initial, + where=where, + ) + + +@add_boilerplate("a") +def cumprod( + a: ndarray, + axis: int | None = None, + dtype: np.dtype[Any] | None = None, + out: ndarray | None = None, +) -> ndarray: + """ + Return the cumulative product of the elements along a given axis. + + Parameters + ---------- + a : array_like + Input array. + + axis : int, optional + Axis along which the cumulative product is computed. The default (None) + is to compute the cumprod over the flattened array. + + dtype : dtype, optional + Type of the returned array and of the accumulator in which the elements + are multiplied. If dtype is not specified, it defaults to the dtype of + a, unless a has an integer dtype with a precision less than that of the + default platform integer. In that case, the default platform integer is + used. + out : ndarray, optional + Alternative output array in which to place the result. It must have the + same shape and buffer length as the expected output but the type will + be cast if necessary. See Output type determination for more details. + + Returns + ------- + cumprod : ndarray + A new array holding the result is returned unless out is specified, in + which case a reference to out is returned. The result has the same size + as a, and the same shape as a if axis is not None or a is a 1-d array. + + See Also + -------- + numpy.cumprod + + Notes + ----- + cuPyNumeric's parallel implementation may yield different results from + NumPy with floating point and complex types. For example, when boundary + values such as inf occur they may not propagate as expected. Consider the + float32 array ``[3e+37, 1, 100, 0.01]``. NumPy's cumprod will return a + result ofc``[3e+37, 3e+37, inf, inf]``. However, cuPyNumeric might + internally partition the array such that partition 0 has ``[3e+37, 1]`` + and partition 1 has ``[100, 0.01]``, returning the result + ``[3e+37, 3e+37, inf, 3e+37]``. + + Availability + -------- + Multiple GPUs, Multiple CPUs + """ + return perform_scan( + ScanCode.PROD, + a, + axis=axis, + dtype=dtype, + out=out, + nan_to_identity=False, + ) + + +@add_boilerplate("a") +def cumsum( + a: ndarray, + axis: int | None = None, + dtype: np.dtype[Any] | None = None, + out: ndarray | None = None, +) -> ndarray: + """ + Return the cumulative sum of the elements along a given axis. + + Parameters + ---------- + a : array_like + Input array. + + axis : int, optional + Axis along which the cumulative sum is computed. The default (None) is + to compute the cumsum over the flattened array. + + dtype : dtype, optional + Type of the returned array and of the accumulator in which the elements + are summed. If dtype is not specified, it defaults to the dtype of a, + unless a has an integer dtype with a precision less than that of the + default platform integer. In that case, the default platform integer is + used. + out : ndarray, optional + Alternative output array in which to place the result. It must have the + same shape and buffer length as the expected output but the type will + be cast if necessary. See Output type determination for more details. + + Returns + ------- + cumsum : ndarray. + A new array holding the result is returned unless out is specified, in + which case a reference to out is returned. The result has the same size + as a, and the same shape as a if axis is not None or a is a 1-d array. + + See Also + -------- + numpy.cumsum + + Notes + ----- + CuPyNumeric's parallel implementation may yield different results from + NumPy with floating point and complex types. For example, when boundary + values such as inf occur they may not propagate as expected. For more + explanation check cupynumeric.cumprod. + + Availability + -------- + Multiple GPUs, Multiple CPUs + """ + return perform_scan( + ScanCode.SUM, a, axis=axis, dtype=dtype, out=out, nan_to_identity=False + ) + + +@add_boilerplate("a") +def nancumprod( + a: ndarray, + axis: int | None = None, + dtype: np.dtype[Any] | None = None, + out: ndarray | None = None, +) -> ndarray: + """ + Return the cumulative product of the elements along a given axis treating + Not a Numbers (NaNs) as one. The cumulative product does not change when + NaNs are encountered and leading NaNs are replaced by ones. + + Ones are returned for slices that are all-NaN or empty. + + Parameters + ---------- + a : array_like + Input array. + + axis : int, optional + Axis along which the cumulative product is computed. The default (None) + is to compute the nancumprod over the flattened array. + + dtype : dtype, optional + Type of the returned array and of the accumulator in which the elements + are multiplied. If dtype is not specified, it defaults to the dtype of + a, unless a has an integer dtype with a precision less than that of the + default platform integer. In that case, the default platform integer is + used. + out : ndarray, optional + Alternative output array in which to place the result. It must have the + same shape and buffer length as the expected output but the type will + be cast if necessary. See Output type determination for more details. + + Returns + ------- + nancumprod : ndarray. + A new array holding the result is returned unless out is specified, in + which case a reference to out is returned. The result has the same size + as a, and the same shape as a if axis is not None or a is a 1-d array. + + See Also + -------- + numpy.nancumprod + + Notes + ----- + CuPyNumeric's parallel implementation may yield different results from + NumPy with floating point and complex types. For example, when boundary + values such as inf occur they may not propagate as expected. For more + explanation check cupynumeric.cumprod. + + Availability + -------- + Multiple GPUs, Multiple CPUs + """ + return perform_scan( + ScanCode.PROD, a, axis=axis, dtype=dtype, out=out, nan_to_identity=True + ) + + +@add_boilerplate("a") +def nancumsum( + a: ndarray, + axis: int | None = None, + dtype: np.dtype[Any] | None = None, + out: ndarray | None = None, +) -> ndarray: + """ + Return the cumulative sum of the elements along a given axis treating Not a + Numbers (NaNs) as zero. The cumulative sum does not change when NaNs are + encountered and leading NaNs are replaced by zeros. + + Zeros are returned for slices that are all-NaN or empty. + + Parameters + ---------- + a : array_like + Input array. + + axis : int, optional + Axis along which the cumulative sum is computed. The default (None) is + to compute the nancumsum over the flattened array. + + dtype : dtype, optional + Type of the returned array and of the accumulator in which the elements + are summed. If dtype is not specified, it defaults to the dtype of a, + unless a has an integer dtype with a precision less than that of the + default platform integer. In that case, the default platform integer is + used. + out : ndarray, optional + Alternative output array in which to place the result. It must have the + same shape and buffer length as the expected output but the type will + be cast if necessary. See Output type determination for more details. + + Returns + ------- + nancumsum : ndarray. + A new array holding the result is returned unless out is specified, in + which case a reference to out is returned. The result has the same size + as a, and the same shape as a if axis is not None or a is a 1-d array. + + See Also + -------- + numpy.nancumsum + + Notes + ----- + CuPyNumeric's parallel implementation may yield different results from + NumPy with floating point and complex types. For example, when boundary + values such as inf occur they may not propagate as expected. For more + explanation check cupynumeric.cumprod. + + Availability + -------- + Multiple GPUs, Multiple CPUs + """ + return perform_scan( + ScanCode.SUM, a, axis=axis, dtype=dtype, out=out, nan_to_identity=True + ) + + +@add_boilerplate("a") +def nanargmax( + a: ndarray, + axis: Any = None, + out: ndarray | None = None, + *, + keepdims: bool = False, +) -> ndarray: + """ + Return the indices of the maximum values in the specified axis ignoring + NaNs. For empty arrays, ValueError is raised. For all-NaN slices, + ValueError is raised only when CUPYNUMERIC_NUMPY_COMPATIBILITY + environment variable is set, otherwise identity is returned. + + Warning: results cannot be trusted if a slice contains only NaNs + and -Infs. + + Parameters + ---------- + a : array_like + Input array. + axis : int, optional + By default, the index corresponds to the flattened array, otherwise + along the specified axis. + out : ndarray, optional + If provided, the result will be inserted into this array. It should + be of the appropriate shape and dtype. + keepdims : bool, optional + If this is set to True, the axes which are reduced are left + in the result as dimensions with size one. With this option, + the result will broadcast correctly against the array. + + Returns + ------- + index_array : ndarray[int] + Array of indices into the array. It has the same shape as `a.shape` + with the dimension along `axis` removed. + + See Also + -------- + numpy.nanargmin, numpy.nanargmax + + Availability + -------- + Multiple GPUs, Multiple CPUs + """ + + if a.size == 0: + raise ValueError("attempt to get nanargmax of an empty sequence") + + if cupynumeric_settings.numpy_compat() and a.dtype.kind == "f": + if any(all(isnan(a), axis=axis)): + raise ValueError("Array/Slice contains only NaNs") + + unary_red_code = get_non_nan_unary_red_code( + a.dtype.kind, UnaryRedCode.NANARGMAX + ) + + return perform_unary_reduction( + unary_red_code, + a, + axis=axis, + out=out, + keepdims=keepdims, + res_dtype=np.dtype(np.int64), + ) + + +@add_boilerplate("a") +def nanargmin( + a: ndarray, + axis: Any = None, + out: ndarray | None = None, + *, + keepdims: bool = False, +) -> ndarray: + """ + Return the indices of the minimum values in the specified axis ignoring + NaNs. For empty arrays, ValueError is raised. For all-NaN slices, + ValueError is raised only when CUPYNUMERIC_NUMPY_COMPATIBILITY + environment variable is set, otherwise identity is returned. + + Warning: results cannot be trusted if a slice contains only NaNs + and -Infs. + + Parameters + ---------- + a : array_like + Input array. + axis : int, optional + By default, the index corresponds to the flattened array, otherwise + along the specified axis. + out : ndarray, optional + If provided, the result will be inserted into this array. It should + be of the appropriate shape and dtype. + keepdims : bool, optional + If this is set to True, the axes which are reduced are left + in the result as dimensions with size one. With this option, + the result will broadcast correctly against the array. + + Returns + ------- + index_array : ndarray[int] + Array of indices into the array. It has the same shape as `a.shape` + with the dimension along `axis` removed. + + See Also + -------- + numpy.nanargmin, numpy.nanargmax + + Availability + -------- + Multiple GPUs, Multiple CPUs + """ + + if a.size == 0: + raise ValueError("attempt to get nanargmin of an empty sequence") + + if cupynumeric_settings.numpy_compat() and a.dtype.kind == "f": + if any(all(isnan(a), axis=axis)): + raise ValueError("Array/Slice contains only NaNs") + + unary_red_code = get_non_nan_unary_red_code( + a.dtype.kind, UnaryRedCode.NANARGMIN + ) + + return perform_unary_reduction( + unary_red_code, + a, + axis=axis, + out=out, + keepdims=keepdims, + res_dtype=np.dtype(np.int64), + ) + + +@add_boilerplate("a") +def nanmin( + a: ndarray, + axis: Any = None, + out: ndarray | None = None, + keepdims: bool = False, + initial: int | float | None = None, + where: ndarray | None = None, +) -> ndarray: + """ + Return minimum of an array or minimum along an axis, ignoring any + NaNs. When all-NaN slices are encountered, a NaN is returned + for that slice only when CUPYNUMERIC_NUMPY_COMPATIBILITY environment + variable is set, otherwise identity is returned. + Empty slices will raise a ValueError + + Parameters + ---------- + a : array_like + Array containing numbers whose minimum is desired. If a is not an + array, a conversion is attempted. + + axis : {int, tuple of int, None}, optional + Axis or axes along which the minimum is computed. The default is to + compute the minimum of the flattened array. + + out : ndarray, optional + Alternative output array in which to place the result. Must + be of the same shape and buffer length as the expected output. + + keepdims : bool, Optional + If this is set to True, the axes which are reduced are left + in the result as dimensions with size one. With this option, + the result will broadcast correctly against the input array. + + If the default value is passed, then `keepdims` will not be + passed through to the `amin` method of sub-classes of + `ndarray`, however any non-default value will be. If the + sub-class' method does not implement `keepdims` any + exceptions will be raised. + + initial : scalar, optional + The maximum value of an output element. Must be present to allow + computation on empty slice. See `~cupynumeric.ufunc.reduce` for + details. + + where : array_like[bool], optional + Elements to compare for the minimum. See `~cupynumeric.ufunc.reduce` + for details. + + Returns + ------- + nanmin : ndarray or scalar + Minimum of `a`. If `axis` is None, the result is a scalar value. + If `axis` is given, the result is an array of dimension + ``a.ndim - 1``. + + Notes + ----- + CuPyNumeric's implementation will not raise a Runtime Warning for + slices with all-NaNs + + See Also + -------- + numpy.nanmin, numpy.nanmax, numpy.min, numpy.max, numpy.isnan, + numpy.maximum + + Availability + -------- + Multiple GPUs, Multiple CPUs + """ + + unary_red_code = get_non_nan_unary_red_code( + a.dtype.kind, UnaryRedCode.NANMIN + ) + + out_array = perform_unary_reduction( + unary_red_code, + a, + axis=axis, + out=out, + keepdims=keepdims, + initial=initial, + where=where, + ) + + if cupynumeric_settings.numpy_compat() and a.dtype.kind == "f": + all_nan = all(isnan(a), axis=axis, keepdims=keepdims, where=where) + putmask(out_array, all_nan, np.nan) # type: ignore + + return out_array + + +@add_boilerplate("a") +def nanmax( + a: ndarray, + axis: Any = None, + out: ndarray | None = None, + keepdims: bool = False, + initial: int | float | None = None, + where: ndarray | None = None, +) -> ndarray: + """ + Return the maximum of an array or maximum along an axis, ignoring any + NaNs. When all-NaN slices are encountered, a NaN is returned + for that slice only when CUPYNUMERIC_NUMPY_COMPATIBILITY environment + variable is set, otherwise identity is returned. + Empty slices will raise a ValueError + + Parameters + ---------- + a : array_like + Array containing numbers whose maximum is desired. If a is not + an array, a conversion is attempted. + + axis : None or int or tuple[int], optional + Axis or axes along which to operate. By default, flattened input is + used. + + If this is a tuple of ints, the maximum is selected over multiple axes, + instead of a single axis or all the axes as before. + + out : ndarray, optional + Alternative output array in which to place the result. Must + be of the same shape and buffer length as the expected output. + + keepdims : bool, optional + If this is set to True, the axes which are reduced are left + in the result as dimensions with size one. With this option, + the result will broadcast correctly against the input array. + + If the default value is passed, then `keepdims` will not be + passed through to the `amax` method of sub-classes of + `ndarray`, however any non-default value will be. If the + sub-class' method does not implement `keepdims` any + exceptions will be raised. + + initial : scalar, optional + The minimum value of an output element. Must be present to allow + computation on empty slice. See `~cupynumeric.ufunc.reduce` for + details. + + where : array_like[bool], optional + Elements to compare for the maximum. See `~cupynumeric.ufunc.reduce` + for details. + + Returns + ------- + nanmax : ndarray or scalar + An array with the same shape as `a`, with the specified axis + removed. If `a` is 0-d array, of if axis is None, an ndarray + scalar is returned. The same dtype as `a` is returned. + + Notes + ----- + CuPyNumeric's implementation will not raise a Runtime Warning for + slices with all-NaNs + + See Also + -------- + numpy.nanmin, numpy.amax, numpy.isnan, numpy.fmax, numpy.maximum, + numpy.isfinite + + Availability + -------- + Multiple GPUs, Multiple CPUs + """ + + unary_red_code = get_non_nan_unary_red_code( + a.dtype.kind, UnaryRedCode.NANMAX + ) + + out_array = perform_unary_reduction( + unary_red_code, + a, + axis=axis, + out=out, + keepdims=keepdims, + initial=initial, + where=where, + ) + + if cupynumeric_settings.numpy_compat() and a.dtype.kind == "f": + all_nan = all(isnan(a), axis=axis, keepdims=keepdims, where=where) + putmask(out_array, all_nan, np.nan) # type: ignore + + return out_array + + +@add_boilerplate("a") +def nanprod( + a: ndarray, + axis: Any = None, + dtype: Any = None, + out: ndarray | None = None, + keepdims: bool = False, + initial: int | float | None = None, + where: ndarray | None = None, +) -> ndarray: + """ + Return the product of array elements over a given axis treating + Not a Numbers (NaNs) as ones. + + One is returned for slices that are all-NaN or empty. + + Parameters + ---------- + a : array_like + Input array. + axis : int, optional + Axis or axes along which the product is computed. The + default is to compute the product of the flattened array. + dtype : data-type, optional + The type of the returned array and of the accumulator in + which the elements are summed. By default, the dtype of a + is used. An exception is when a has an integer type with + less precision than the platform (u)intp. In that case, + the default will be either (u)int32 or (u)int64 depending + on whether the platform is 32 or 64 bits. For inexact + inputs, dtype must be inexact. + out : ndarray, optional + Alternate output array in which to place the result. The + default is None. If provided, it must have the same shape as + the expected output, but the type will be cast if necessary. + See Output type determination for more details. The casting of + NaN to integer can yield unexpected results. + keepdims : bool, optional + If this is set to True, the axes which are reduced are left in the + result as dimensions with size one. With this option, the result + will broadcast correctly against the input array. + + If the default value is passed, then `keepdims` will not be + passed through to the `prod` method of sub-classes of + `ndarray`, however any non-default value will be. If the + sub-class' method does not implement `keepdims` any + exceptions will be raised. + initial : scalar, optional + The starting value for this product. See `~cupynumeric.ufunc.reduce` + for details. + where : array_like[bool], optional + Elements to include in the product. See `~cupynumeric.ufunc.reduce` + for details. + + Returns + ------- + nanprod: ndarray, see `dtype` parameter above. + A new array holding the result is returned unless out is + specified, in which case it is returned. + + See Also + -------- + numpy.prod, numpy.isnan + + Availability + -------- + Multiple GPUs, Multiple CPUs + + """ + + # Note: if the datatype of the input array is int and less + # than that of the platform int, then a convert task is launched + # in np.prod to take care of the type casting + + if a.dtype == np.complex128: + raise NotImplementedError( + "operation is not supported for complex128 arrays" + ) + + if a.dtype.kind in ("f", "c"): + unary_red_code = UnaryRedCode.NANPROD + else: + unary_red_code = UnaryRedCode.PROD + + return perform_unary_reduction( + unary_red_code, + a, + axis=axis, + dtype=dtype, + out=out, + keepdims=keepdims, + initial=initial, + where=where, + ) + + +@add_boilerplate("a") +def nansum( + a: ndarray, + axis: Any = None, + dtype: Any = None, + out: ndarray | None = None, + keepdims: bool = False, + initial: int | float | None = None, + where: ndarray | None = None, +) -> ndarray: + """ + Return the sum of array elements over a given axis treating + Not a Numbers (NaNs) as ones. + + Zero is returned for slices that are all-NaN or empty. + + Parameters + ---------- + a : array_like + Array containing numbers whose product is desired. If a is not + an array, a conversion is attempted. + + axis : None or int or tuple[int], optional + Axis or axes along which a sum is performed. The default, + axis=None, will sum all of the elements of the input array. + If axis is negative it counts from the last to the first axis. + + If axis is a tuple of ints, a sum is performed on all of the + axes specified in the tuple instead of a single axis or all + the axes as before. + + dtype : data-type, optional + The type of the returned array and of the accumulator in which + the elements are summed. The dtype of `a` is used by default + unless `a` has an integer dtype of less precision than the + default platform integer. In that case, if `a` is signed then + the platform integer is used while if `a` is unsigned then an + unsigned integer of the same precision as the platform integer + is used. + + out : ndarray, optional + Alternative output array in which to place the result. It must + have the same shape as the expected output, but the type of + the output values will be cast if necessary. + + keepdims : bool, optional + If this is set to True, the axes which are reduced are left + in the result as dimensions with size one. With this option, + the result will broadcast correctly against the input array. + + initial : scalar, optional + Starting value for the sum. See `~cupynumeric.ufunc.reduce` for + details. + + where : array_like[bool], optional + Elements to include in the sum. See `~cupynumeric.ufunc.reduce` for + details. + + Returns + ------- + nansum : ndarray, see `dtype` parameter above. + A new array holding the result is returned unless out is + specified, in which case it is returned. The result has the + same size as a, and the same shape as a if axis is not None or + a is a 1-d array. + + See Also + -------- + numpy.nansum, numpy.isnan, numpy.isfinite + + Availability + -------- + Multiple GPUs, Multiple CPUs + """ + + return a._nansum( + axis=axis, + dtype=dtype, + out=out, + keepdims=keepdims, + initial=initial, + where=where, + ) + + +@add_boilerplate("a", "prepend", "append") +def diff( + a: ndarray, + n: int = 1, + axis: int = -1, + prepend: ndarray | None = None, + append: ndarray | None = None, +) -> ndarray: + """ + Calculate the n-th discrete difference along the given axis. + The first difference is given by ``out[i] = a[i+1] - a[i]`` along + the given axis, higher differences are calculated by using `diff` + recursively. + + Parameters + ---------- + a : array_like + Input array + n : int, optional + The number of times values are differenced. If zero, the input + is returned as-is. + axis : int, optional + The axis along which the difference is taken, default is the + last axis. + prepend, append : array_like, optional + Values to prepend or append to `a` along axis prior to + performing the difference. Scalar values are expanded to + arrays with length 1 in the direction of axis and the shape + of the input array in along all other axes. Otherwise the + dimension and shape must match `a` except along axis. + + Returns + ------- + diff : ndarray + The n-th differences. The shape of the output is the same as `a` + except along `axis` where the dimension is smaller by `n`. The + type of the output is the same as the type of the difference + between any two elements of `a`. This is the same as the type of + `a` in most cases. + + See Also + -------- + numpy.diff + + Notes + ----- + Type is preserved for boolean arrays, so the result will contain + `False` when consecutive elements are the same and `True` when they + differ. + + For unsigned integer arrays, the results will also be unsigned. This + should not be surprising, as the result is consistent with + calculating the difference directly:: + + >>> u8_arr = np.array([1, 0], dtype=np.uint8) + >>> np.diff(u8_arr) + array([255], dtype=uint8) + >>> u8_arr[1,...] - u8_arr[0,...] + 255 + If this is not desirable, then the array should be cast to a larger + integer type first: + >>> i16_arr = u8_arr.astype(np.int16) + >>> np.diff(i16_arr) + array([-1], dtype=int16) + Examples + -------- + >>> x = np.array([1, 2, 4, 7, 0]) + >>> np.diff(x) + array([ 1, 2, 3, -7]) + >>> np.diff(x, n=2) + array([ 1, 1, -10]) + >>> x = np.array([[1, 3, 6, 10], [0, 5, 6, 8]]) + >>> np.diff(x) + array([[2, 3, 4], + [5, 1, 2]]) + >>> np.diff(x, axis=0) + array([[-1, 2, 0, -2]]) + + Availability + -------- + Multiple GPUs, Multiple CPUs + """ + if n == 0: + return a + if n < 0: + raise ValueError("order must be non-negative but got " + repr(n)) + + nd = a.ndim + if nd == 0: + raise ValueError( + "diff requires input that is at least one dimensional" + ) + axis = normalize_axis_index(axis, nd) + + combined = [] + if prepend is not None: + if prepend.ndim == 0: + shape = list(a.shape) + shape[axis] = 1 + prepend = broadcast_to(prepend, tuple(shape)) + combined.append(prepend) + + combined.append(a) + + if append is not None: + if append.ndim == 0: + shape = list(a.shape) + shape[axis] = 1 + append = broadcast_to(append, tuple(shape)) + combined.append(append) + + if len(combined) > 1: + a = concatenate(combined, axis) + + # Diffing with n > shape results in an empty array. We have + # to handle this case explicitly as our slicing routines raise + # an exception with out-of-bounds slices, while NumPy's dont. + if a.shape[axis] <= n: + shape = list(a.shape) + shape[axis] = 0 + return empty(shape=tuple(shape), dtype=a.dtype) + + slice1l = [slice(None)] * nd + slice2l = [slice(None)] * nd + slice1l[axis] = slice(1, None) + slice2l[axis] = slice(None, -1) + slice1 = tuple(slice1l) + slice2 = tuple(slice2l) + + op = not_equal if a.dtype == bool else subtract + for _ in range(n): + a = op(a[slice1], a[slice2]) + + return a + + +@add_boilerplate("f") +def gradient( + f: ndarray, *varargs: Any, axis: Any = None, edge_order: int = 1 +) -> Any: + """ + Return the gradient of an N-dimensional array. + + The gradient is computed using second order accurate central differences + in the interior points and either first or second order accurate one-sided + (forward or backwards) differences at the boundaries. + The returned gradient hence has the same shape as the input array. + + Parameters + ---------- + f : array_like + An N-dimensional array containing samples of a scalar function. + varargs : list of scalar or array, optional + Spacing between f values. Default unitary spacing for all dimensions. + Spacing can be specified using: + + 1. single scalar to specify a sample distance for all dimensions. + 2. N scalars to specify a constant sample distance for each dimension. + i.e. `dx`, `dy`, `dz`, ... + 3. N arrays to specify the coordinates of the values along each + dimension of F. The length of the array must match the size of + the corresponding dimension + 4. Any combination of N scalars/arrays with the meaning of 2. and 3. + + If `axis` is given, the number of varargs must equal the number of + axes. Default: 1. + + edge_order : {1, 2}, optional + Gradient is calculated using N-th order accurate differences + at the boundaries. Default: 1. + + axis : None or int or tuple of ints, optional + Gradient is calculated only along the given axis or axes + The default (axis = None) is to calculate the gradient for all the axes + of the input array. axis may be negative, in which case it counts from + the last to the first axis. + + Returns + ------- + gradient : ndarray or list of ndarray + A list of ndarrays (or a single ndarray if there is only one dimension) + corresponding to the derivatives of f with respect to each dimension. + Each derivative has the same shape as f. + + See Also + -------- + numpy.gradient + + Availability + -------- + Multiple GPUs, Multiple CPUs + """ + N = f.ndim # number of dimensions + + if axis is None: + axes = tuple(range(N)) + elif isinstance(axis, collections.abc.Sequence): + axes = tuple(normalize_axis_index(a, N) for a in axis) + else: + axis = normalize_axis_index(axis, N) + axes = (axis,) + + len_axes = len(axes) + if not varargs: + n = 0 + else: + n = len(varargs) + + if n == 0: + # no spacing argument - use 1 in all axes + dx = [asarray(1.0)] * len_axes + elif n == 1 and np.ndim(varargs[0]) == 0: + # single scalar for all axes + dx = list(asarray(varargs)) * len_axes + elif n == len_axes: + # scalar or 1d array for each axis + dx = list(asarray(v) for v in varargs) + for i, distances in enumerate(dx): + if distances.ndim == 0: + continue + elif distances.ndim != 1: + raise ValueError("distances must be either scalars or 1d") + if len(distances) != f.shape[axes[i]]: + raise ValueError( + "when 1d, distances must match " + "the length of the corresponding dimension" + ) + if np.issubdtype(distances.dtype, np.integer): + # Convert numpy integer types to float64 to avoid modular + # arithmetic in np.diff(distances). + distances = distances.astype(np.float64) + diffx = diff(distances) + dx[i] = diffx + else: + raise TypeError("invalid number of arguments") + + if edge_order > 2: + raise ValueError("'edge_order' greater than 2 not supported") + if edge_order < 0: + raise ValueError("invalid 'edge_order'") + + # use central differences on interior and one-sided differences on the + # endpoints. This preserves second order-accuracy over the full domain. + + outvals = [] + + # create slice objects --- initially all are [:, :, ..., :] + slice1 = [slice(None)] * N + slice2 = [slice(None)] * N + slice3 = [slice(None)] * N + slice4 = [slice(None)] * N + + otype = f.dtype + if not np.issubdtype(otype, np.inexact): + # All other types convert to floating point. + # First check if f is a numpy integer type; if so, convert f to float64 + # to avoid modular arithmetic when computing the changes in f. + if np.issubdtype(otype, np.integer): + f = f.astype(np.float64) + otype = np.dtype(np.float64) + + for axis, ax_dx in zip(axes, dx): + if f.shape[axis] < edge_order + 1: + raise ValueError( + "Shape of array too small to calculate a numerical gradient, " + "at least (edge_order + 1) elements are required." + ) + # result allocation + out = empty_like(f, dtype=otype) + + # spacing for the current axis + uniform_spacing = np.ndim(ax_dx) == 0 + + # Numerical differentiation: 2nd order interior + slice1[axis] = slice(1, -1) + slice2[axis] = slice(None, -2) + slice3[axis] = slice(1, -1) + slice4[axis] = slice(2, None) + + if uniform_spacing: + out[tuple(slice1)] = (f[tuple(slice4)] - f[tuple(slice2)]) / ( + 2.0 * ax_dx + ) + else: + dx1 = ax_dx[0:-1] + dx2 = ax_dx[1:] + a = -(dx2) / (dx1 * (dx1 + dx2)) + b = (dx2 - dx1) / (dx1 * dx2) + c = dx1 / (dx2 * (dx1 + dx2)) + # fix the shape for broadcasting + shape = list(1 for i in range(N)) + shape[axis] = -1 + a = a.reshape(shape) + b = b.reshape(shape) + c = c.reshape(shape) + # 1D equivalent -- out[1:-1] = a * f[:-2] + b * f[1:-1] + c * f[2:] + out[tuple(slice1)] = ( + a * f[tuple(slice2)] + + b * f[tuple(slice3)] + + c * f[tuple(slice4)] + ) + + # Numerical differentiation: 1st order edges + if edge_order == 1: + slice1[axis] = 0 # type: ignore + slice2[axis] = 1 # type: ignore + slice3[axis] = 0 # type: ignore + dx_0 = ax_dx if uniform_spacing else ax_dx[0] + # 1D equivalent -- out[0] = (f[1] - f[0]) / (x[1] - x[0]) + out[tuple(slice1)] = (f[tuple(slice2)] - f[tuple(slice3)]) / dx_0 + + slice1[axis] = -1 # type: ignore + slice2[axis] = -1 # type: ignore + slice3[axis] = -2 # type: ignore + dx_n = ax_dx if uniform_spacing else ax_dx[-1] + # 1D equivalent -- out[-1] = (f[-1] - f[-2]) / (x[-1] - x[-2]) + out[tuple(slice1)] = (f[tuple(slice2)] - f[tuple(slice3)]) / dx_n + + # Numerical differentiation: 2nd order edges + else: + slice1[axis] = 0 # type: ignore + slice2[axis] = 0 # type: ignore + slice3[axis] = 1 # type: ignore + slice4[axis] = 2 # type: ignore + if uniform_spacing: + a = -1.5 / ax_dx + b = 2.0 / ax_dx + c = -0.5 / ax_dx + else: + dx1 = ax_dx[0] + dx2 = ax_dx[1] + a = -(2.0 * dx1 + dx2) / (dx1 * (dx1 + dx2)) + b = (dx1 + dx2) / (dx1 * dx2) + c = -dx1 / (dx2 * (dx1 + dx2)) + # 1D equivalent -- out[0] = a * f[0] + b * f[1] + c * f[2] + out[tuple(slice1)] = ( + a * f[tuple(slice2)] + + b * f[tuple(slice3)] + + c * f[tuple(slice4)] + ) + + slice1[axis] = -1 # type: ignore + slice2[axis] = -3 # type: ignore + slice3[axis] = -2 # type: ignore + slice4[axis] = -1 # type: ignore + if uniform_spacing: + a = 0.5 / ax_dx + b = -2.0 / ax_dx + c = 1.5 / ax_dx + else: + dx1 = ax_dx[-2] + dx2 = ax_dx[-1] + a = (dx2) / (dx1 * (dx1 + dx2)) + b = -(dx2 + dx1) / (dx1 * dx2) + c = (2.0 * dx2 + dx1) / (dx2 * (dx1 + dx2)) + # 1D equivalent -- out[-1] = a * f[-3] + b * f[-2] + c * f[-1] + out[tuple(slice1)] = ( + a * f[tuple(slice2)] + + b * f[tuple(slice3)] + + c * f[tuple(slice4)] + ) + + outvals.append(out) + + # reset the slice object in this dimension to ":" + slice1[axis] = slice(None) + slice2[axis] = slice(None) + slice3[axis] = slice(None) + slice4[axis] = slice(None) + + if len_axes == 1: + return outvals[0] + else: + return outvals diff --git a/cupynumeric/_module/sets_making.py b/cupynumeric/_module/sets_making.py new file mode 100644 index 000000000..4e1c6ef4d --- /dev/null +++ b/cupynumeric/_module/sets_making.py @@ -0,0 +1,104 @@ +# Copyright 2024 NVIDIA Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +from __future__ import annotations + +from typing import TYPE_CHECKING + +from .._array.util import add_boilerplate + +if TYPE_CHECKING: + from .._array.array import ndarray + +_builtin_any = any + + +@add_boilerplate("ar") +def unique( + ar: ndarray, + return_index: bool = False, + return_inverse: bool = False, + return_counts: bool = False, + axis: int | None = None, +) -> ndarray: + """ + + Find the unique elements of an array. + Returns the sorted unique elements of an array. There are three optional + outputs in addition to the unique elements: + * the indices of the input array that give the unique values + * the indices of the unique array that reconstruct the input array + * the number of times each unique value comes up in the input array + + Parameters + ---------- + ar : array_like + Input array. Unless `axis` is specified, this will be flattened if it + is not already 1-D. + return_index : bool, optional + If True, also return the indices of `ar` (along the specified axis, + if provided, or in the flattened array) that result in the unique + array. + Currently not supported. + return_inverse : bool, optional + If True, also return the indices of the unique array (for the specified + axis, if provided) that can be used to reconstruct `ar`. + Currently not supported. + return_counts : bool, optional + If True, also return the number of times each unique item appears + in `ar`. + Currently not supported. + axis : int or None, optional + The axis to operate on. If None, `ar` will be flattened. If an integer, + the subarrays indexed by the given axis will be flattened and treated + as the elements of a 1-D array with the dimension of the given axis, + see the notes for more details. Object arrays or structured arrays + that contain objects are not supported if the `axis` kwarg is used. The + default is None. + Currently not supported. + + Returns + ------- + unique : ndarray + The sorted unique values. + unique_indices : ndarray, optional + The indices of the first occurrences of the unique values in the + original array. Only provided if `return_index` is True. + unique_inverse : ndarray, optional + The indices to reconstruct the original array from the + unique array. Only provided if `return_inverse` is True. + unique_counts : ndarray, optional + The number of times each of the unique values comes up in the + original array. Only provided if `return_counts` is True. + + See Also + -------- + numpy.unique + + Availability + -------- + Multiple GPUs, Multiple CPUs + + Notes + -------- + Keyword arguments for optional outputs are not yet supported. + `axis` is also not handled currently. + + """ + if _builtin_any((return_index, return_inverse, return_counts, axis)): + raise NotImplementedError( + "Keyword arguments for `unique` are not yet supported" + ) + + return ar.unique() diff --git a/cupynumeric/_module/ssc_counting.py b/cupynumeric/_module/ssc_counting.py new file mode 100644 index 000000000..879c22083 --- /dev/null +++ b/cupynumeric/_module/ssc_counting.py @@ -0,0 +1,57 @@ +# Copyright 2024 NVIDIA Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +from __future__ import annotations + +from typing import TYPE_CHECKING + +from .._array.util import add_boilerplate + +if TYPE_CHECKING: + from .._array.array import ndarray + + +@add_boilerplate("a") +def count_nonzero( + a: ndarray, axis: int | tuple[int, ...] | None = None +) -> int | ndarray: + """ + + Counts the number of non-zero values in the array ``a``. + + Parameters + ---------- + a : array_like + The array for which to count non-zeros. + axis : int or tuple, optional + Axis or tuple of axes along which to count non-zeros. + Default is None, meaning that non-zeros will be counted + along a flattened version of ``a``. + + Returns + ------- + count : int or ndarray[int] + Number of non-zero values in the array along a given axis. + Otherwise, the total number of non-zero values in the array + is returned. + + See Also + -------- + numpy.count_nonzero + + Availability + -------- + Multiple GPUs, Multiple CPUs + """ + return a._count_nonzero(axis) diff --git a/cupynumeric/_module/ssc_searching.py b/cupynumeric/_module/ssc_searching.py new file mode 100644 index 000000000..c64261eb2 --- /dev/null +++ b/cupynumeric/_module/ssc_searching.py @@ -0,0 +1,357 @@ +# Copyright 2024 NVIDIA Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +from __future__ import annotations + +from typing import TYPE_CHECKING, overload + +from .._array.array import ndarray +from .._array.thunk import perform_where +from .._array.util import add_boilerplate +from .array_shape import ravel, reshape + +if TYPE_CHECKING: + import numpy.typing as npt + + from ..types import SortSide + + +@add_boilerplate("a") +def searchsorted( + a: ndarray, + v: int | float | ndarray, + side: SortSide = "left", + sorter: ndarray | None = None, +) -> int | ndarray: + """ + + Find the indices into a sorted array a such that, if the corresponding + elements in v were inserted before the indices, the order of a would be + preserved. + + Parameters + ---------- + a : 1-D array_like + Input array. If sorter is None, then it must be sorted in ascending + order, otherwise sorter must be an array of indices that sort it. + v : scalar or array_like + Values to insert into a. + side : ``{'left', 'right'}``, optional + If 'left', the index of the first suitable location found is given. + If 'right', return the last such index. If there is no suitable index, + return either 0 or N (where N is the length of a). + sorter : 1-D array_like, optional + Optional array of integer indices that sort array a into ascending + order. They are typically the result of argsort. + + Returns + ------- + indices : int or array_like[int] + Array of insertion points with the same shape as v, or an integer + if v is a scalar. + + Availability + -------- + Multiple GPUs, Multiple CPUs + """ + return a.searchsorted(v, side, sorter) + + +@add_boilerplate("a") +def argmax( + a: ndarray, + axis: int | None = None, + out: ndarray | None = None, + *, + keepdims: bool = False, +) -> ndarray: + """ + + Returns the indices of the maximum values along an axis. + + Parameters + ---------- + a : array_like + Input array. + axis : int, optional + By default, the index is into the flattened array, otherwise + along the specified axis. + out : ndarray, optional + If provided, the result will be inserted into this array. It should + be of the appropriate shape and dtype. + keepdims : bool, optional + If this is set to True, the axes which are reduced are left + in the result as dimensions with size one. With this option, + the result will broadcast correctly against the array. + + Returns + ------- + index_array : ndarray[int] + Array of indices into the array. It has the same shape as `a.shape` + with the dimension along `axis` removed. + + See Also + -------- + numpy.argmax + + Notes + ----- + cuPyNumeric's parallel implementation may yield different results from + NumPy when the array contains NaN(s). + + Availability + -------- + Multiple GPUs, Multiple CPUs + """ + return a.argmax(axis=axis, out=out, keepdims=keepdims) + + +@add_boilerplate("a") +def argmin( + a: ndarray, + axis: int | None = None, + out: ndarray | None = None, + *, + keepdims: bool = False, +) -> ndarray: + """ + + Returns the indices of the minimum values along an axis. + + Parameters + ---------- + a : array_like + Input array. + axis : int, optional + By default, the index is into the flattened array, otherwise + along the specified axis. + out : ndarray, optional + If provided, the result will be inserted into this array. It should + be of the appropriate shape and dtype. + keepdims : bool, optional + If this is set to True, the axes which are reduced are left + in the result as dimensions with size one. With this option, + the result will broadcast correctly against the array. + + Returns + ------- + index_array : ndarray[int] + Array of indices into the array. It has the same shape as `a.shape` + with the dimension along `axis` removed. + + See Also + -------- + numpy.argmin + + Notes + ----- + cuPyNumeric's parallel implementation may yield different results from + NumPy when the array contains NaN(s). + + Availability + -------- + Multiple GPUs, Multiple CPUs + """ + return a.argmin(axis=axis, out=out, keepdims=keepdims) + + +@add_boilerplate("a") +def flatnonzero(a: ndarray) -> ndarray: + """ + + Return indices that are non-zero in the flattened version of a. + + This is equivalent to `np.nonzero(np.ravel(a))[0]`. + + Parameters + ---------- + a : array_like + Input array. + + Returns + ------- + res : ndarray + Output array, containing the indices of the elements of + `a.ravel()` that are non-zero. + + See Also + -------- + numpy.flatnonzero + + Availability + -------- + Multiple GPUs, Multiple CPUs + """ + return nonzero(ravel(a))[0] + + +@overload +def where(a: npt.ArrayLike | ndarray, x: None, y: None) -> tuple[ndarray, ...]: + ... + + +@overload +def where( + a: npt.ArrayLike | ndarray, + x: npt.ArrayLike | ndarray, + y: npt.ArrayLike | ndarray, +) -> ndarray: + ... + + +@add_boilerplate("a", "x", "y") # type: ignore [misc] +def where( + a: ndarray, x: ndarray | None = None, y: ndarray | None = None +) -> ndarray | tuple[ndarray, ...]: + """ + where(condition, [x, y]) + + Return elements chosen from `x` or `y` depending on `condition`. + + Parameters + ---------- + condition : array_like, bool + Where True, yield `x`, otherwise yield `y`. + x, y : array_like + Values from which to choose. `x`, `y` and `condition` need to be + broadcastable to some shape. + + Returns + ------- + out : ndarray + An array with elements from `x` where `condition` is True, and elements + from `y` elsewhere. + + See Also + -------- + numpy.where + + Availability + -------- + Multiple GPUs, Multiple CPUs + """ + if x is None or y is None: + if x is not None or y is not None: + raise ValueError( + "both 'x' and 'y' parameters must be specified together for" + " 'where'" + ) + return nonzero(a) + return perform_where(a, x, y) + + +@add_boilerplate("a") +def argwhere(a: ndarray) -> ndarray: + """ + argwhere(a) + + Find the indices of array elements that are non-zero, grouped by element. + + Parameters + ---------- + a : array_like + Input data. + + Returns + ------- + index_array : ndarray + Indices of elements that are non-zero. Indices are grouped by element. + This array will have shape (N, a.ndim) where N is the number of + non-zero items. + + See Also + -------- + numpy.argwhere + + Availability + -------- + Multiple GPUs, Multiple CPUs + """ + thunk = a._thunk.argwhere() + return ndarray(shape=thunk.shape, thunk=thunk) + + +@add_boilerplate("condition", "arr") +def extract(condition: ndarray, arr: ndarray) -> ndarray: + """ + + Return the elements of an array that satisfy some condition. + + Parameters + ---------- + condition : array_like + An array whose nonzero or True entries indicate the elements + of `arr` to extract. + arr : array_like + Input array of the same size as `condition`. + + Returns + ------- + result : ndarray + Rank 1 array of values from arr where `condition` is True. + + See Also + -------- + numpy.extract + + Availability + -------- + Multiple GPUs, Multiple CPUs + """ + if condition.size != arr.size: + raise ValueError("arr array and condition array must be of same size") + + if condition.shape != arr.shape: + condition_reshape = reshape(condition, arr.shape) + else: + condition_reshape = condition + + if condition_reshape.dtype == bool: + thunk = arr._thunk.get_item(condition_reshape._thunk) + else: + bool_condition = condition_reshape.astype(bool) + thunk = arr._thunk.get_item(bool_condition._thunk) + + return ndarray(shape=thunk.shape, thunk=thunk) + + +@add_boilerplate("a") +def nonzero(a: ndarray) -> tuple[ndarray, ...]: + """ + + Return the indices of the elements that are non-zero. + + Returns a tuple of arrays, one for each dimension of `a`, + containing the indices of the non-zero elements in that + dimension. + + Parameters + ---------- + a : array_like + Input array. + + Returns + ------- + tuple_of_arrays : tuple + Indices of elements that are non-zero. + + See Also + -------- + numpy.nonzero + + Availability + -------- + Multiple GPUs, Multiple CPUs + """ + return a.nonzero() diff --git a/cupynumeric/_module/ssc_sorting.py b/cupynumeric/_module/ssc_sorting.py new file mode 100644 index 000000000..4f32d0194 --- /dev/null +++ b/cupynumeric/_module/ssc_sorting.py @@ -0,0 +1,291 @@ +# Copyright 2024 NVIDIA Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +from __future__ import annotations + +from typing import TYPE_CHECKING, Sequence + +import numpy as np + +from .._array.array import ndarray +from .._array.util import add_boilerplate +from .._utils import is_np2 + +if TYPE_CHECKING: + from ..types import SelectKind, SortType + + +@add_boilerplate("a") +def argsort( + a: ndarray, + axis: int | None = -1, + kind: SortType = "quicksort", + order: str | list[str] | None = None, +) -> ndarray: + """ + + Returns the indices that would sort an array. + + Parameters + ---------- + a : array_like + Input array. + axis : int or None, optional + Axis to sort. By default, the index -1 (the last axis) is used. If + None, the flattened array is used. + kind : ``{'quicksort', 'mergesort', 'heapsort', 'stable'}``, optional + Default is 'quicksort'. The underlying sort algorithm might vary. + The code basically supports 'stable' or *not* 'stable'. + order : str or list[str], optional + Currently not supported + + Returns + ------- + index_array : ndarray[int] + Array of indices that sort a along the specified axis. It has the + same shape as `a.shape` or is flattened in case of `axis` is None. + + See Also + -------- + numpy.argsort + + Availability + -------- + Multiple GPUs, Multiple CPUs + """ + + result = ndarray(a.shape, np.int64) + result._thunk.sort( + rhs=a._thunk, argsort=True, axis=axis, kind=kind, order=order + ) + return result + + +if not is_np2: + + def msort(a: ndarray) -> ndarray: + """ + + Returns a sorted copy of an array sorted along the first axis. + + Parameters + ---------- + a : array_like + Input array. + + Returns + ------- + out : ndarray + Sorted array with same dtype and shape as `a`. + + See Also + -------- + numpy.msort + + Availability + -------- + Multiple GPUs, Multiple CPUs + """ + return sort(a, axis=0) + + +@add_boilerplate("a") +def sort( + a: ndarray, + axis: int | None = -1, + kind: SortType = "quicksort", + order: str | list[str] | None = None, +) -> ndarray: + """ + + Returns a sorted copy of an array. + + Parameters + ---------- + a : array_like + Input array. + axis : int or None, optional + Axis to sort. By default, the index -1 (the last axis) is used. If + None, the flattened array is used. + kind : ``{'quicksort', 'mergesort', 'heapsort', 'stable'}``, optional + Default is 'quicksort'. The underlying sort algorithm might vary. + The code basically supports 'stable' or *not* 'stable'. + order : str or list[str], optional + Currently not supported + + Returns + ------- + out : ndarray + Sorted array with same dtype and shape as `a`. In case `axis` is + None the result is flattened. + + + See Also + -------- + numpy.sort + + Availability + -------- + Multiple GPUs, Multiple CPUs + """ + result = ndarray(a.shape, a.dtype) + result._thunk.sort(rhs=a._thunk, axis=axis, kind=kind, order=order) + return result + + +@add_boilerplate("a") +def sort_complex(a: ndarray) -> ndarray: + """ + + Returns a sorted copy of an array sorted along the last axis. Sorts the + real part first, the imaginary part second. + + Parameters + ---------- + a : array_like + Input array. + + Returns + ------- + out : ndarray, complex + Sorted array with same shape as `a`. + + See Also + -------- + numpy.sort_complex + + Availability + -------- + Multiple GPUs, Multiple CPUs + """ + + result = sort(a) + # force complex result upon return + if np.issubdtype(result.dtype, np.complexfloating): + return result + elif ( + np.issubdtype(result.dtype, np.integer) and result.dtype.itemsize <= 2 + ): + return result.astype(np.complex64, copy=True) + else: + return result.astype(np.complex128, copy=True) + + +# partition + + +@add_boilerplate("a") +def argpartition( + a: ndarray, + kth: int | Sequence[int], + axis: int | None = -1, + kind: SelectKind = "introselect", + order: str | list[str] | None = None, +) -> ndarray: + """ + + Perform an indirect partition along the given axis. + + Parameters + ---------- + a : array_like + Input array. + kth : int or Sequence[int] + axis : int or None, optional + Axis to partition. By default, the index -1 (the last axis) is used. If + None, the flattened array is used. + kind : ``{'introselect'}``, optional + Currently not supported. + order : str or list[str], optional + Currently not supported. + + Returns + ------- + out : ndarray[int] + Array of indices that partitions a along the specified axis. It has the + same shape as `a.shape` or is flattened in case of `axis` is None. + + + Notes + ----- + The current implementation falls back to `cupynumeric.argsort`. + + See Also + -------- + numpy.argpartition + + Availability + -------- + Multiple GPUs, Multiple CPUs + """ + result = ndarray(a.shape, np.int64) + result._thunk.partition( + rhs=a._thunk, + argpartition=True, + kth=kth, + axis=axis, + kind=kind, + order=order, + ) + return result + + +@add_boilerplate("a") +def partition( + a: ndarray, + kth: int | Sequence[int], + axis: int | None = -1, + kind: SelectKind = "introselect", + order: str | list[str] | None = None, +) -> ndarray: + """ + + Returns a partitioned copy of an array. + + Parameters + ---------- + a : array_like + Input array. + kth : int or Sequence[int] + axis : int or None, optional + Axis to partition. By default, the index -1 (the last axis) is used. If + None, the flattened array is used. + kind : ``{'introselect'}``, optional + Currently not supported. + order : str or list[str], optional + Currently not supported. + + Returns + ------- + out : ndarray + Partitioned array with same dtype and shape as `a`. In case `axis` is + None the result is flattened. + + Notes + ----- + The current implementation falls back to `cupynumeric.sort`. + + See Also + -------- + numpy.partition + + Availability + -------- + Multiple GPUs, Multiple CPUs + """ + result = ndarray(a.shape, a.dtype) + result._thunk.partition( + rhs=a._thunk, kth=kth, axis=axis, kind=kind, order=order + ) + return result diff --git a/cupynumeric/_module/stats_avgs_vars.py b/cupynumeric/_module/stats_avgs_vars.py new file mode 100644 index 000000000..c463e174f --- /dev/null +++ b/cupynumeric/_module/stats_avgs_vars.py @@ -0,0 +1,518 @@ +# Copyright 2024 NVIDIA Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +from __future__ import annotations + +import math +from typing import TYPE_CHECKING, Any + +import numpy as np + +from .._array.array import ndarray +from .._array.util import add_boilerplate +from .._utils import is_np2 +from .creation_shape import full +from .logic_truth import any +from .stats_order import nanquantile, quantile + +if is_np2: + from numpy.lib.array_utils import normalize_axis_tuple # type: ignore +else: + from numpy.core.numeric import normalize_axis_tuple # type: ignore + +if TYPE_CHECKING: + import numpy.typing as npt + + +@add_boilerplate("a", "weights") +def average( + a: ndarray, + axis: int | tuple[int, ...] | None = None, + weights: ndarray | None = None, + returned: bool = False, + *, + keepdims: bool = False, +) -> ndarray | tuple[ndarray, ndarray]: + """ + Compute the weighted average along the specified axis. + + Parameters + ---------- + a : array_like + Array containing data to be averaged. If `a` is not an array, a + conversion is attempted. + axis : None or int or tuple of ints, optional + Axis or axes along which to average `a`. The default, + axis=None, will average over all of the elements of the input array. + If axis is negative it counts from the last to the first axis. + If axis is a tuple of ints, averaging is performed on all of the axes + specified in the tuple instead of a single axis or all the axes as + before. + weights : array_like, optional + An array of weights associated with the values in `a`. Each value in + `a` contributes to the average according to its associated weight. + The weights array can either be 1-D (in which case its length must be + the size of `a` along the given axis) or of the same shape as `a`. + If `weights=None`, then all data in `a` are assumed to have a + weight equal to one. The 1-D calculation is:: + + avg = sum(a * weights) / sum(weights) + + The only constraint on `weights` is that `sum(weights)` must not be 0. + returned : bool, optional + Default is `False`. If `True`, the tuple (`average`, `sum_of_weights`) + is returned, otherwise only the average is returned. + If `weights=None`, `sum_of_weights` is equivalent to the number of + elements over which the average is taken. + keepdims : bool, optional + If this is set to True, the axes which are reduced are left + in the result as dimensions with size one. With this option, + the result will broadcast correctly against the original `a`. + + Returns + ------- + retval, [sum_of_weights] : array_type or double + Return the average along the specified axis. When `returned` is `True`, + return a tuple with the average as the first element and the sum + of the weights as the second element. `sum_of_weights` is of the + same type as `retval`. The result dtype follows a general pattern. + If `weights` is None, the result dtype will be that of `a` , or + ``float64`` if `a` is integral. Otherwise, if `weights` is not None and + `a` is non-integral, the result type will be the type of lowest + precision capable of representing values of both `a` and `weights`. If + `a` happens to be integral, the previous rules still applies but the + result dtype will at least be ``float64``. + + Raises + ------ + ZeroDivisionError + When all weights along axis are zero. + ValueError + When the length of 1D `weights` is not the same as the shape of `a` + along axis. + + See Also + -------- + numpy.average + + Availability + -------- + Multiple GPUs, Multiple CPUs + """ + clean_axis: tuple[int, ...] | None = None + if axis is not None: + clean_axis = normalize_axis_tuple(axis, a.ndim, argname="axis") + + scl: npt.ArrayLike | ndarray = 1 + if weights is None: + scl = ( + a.size + if clean_axis is None + else math.prod([a.shape[i] for i in clean_axis]) + ) + if a.dtype.kind == "i": + scl = np.float64(scl) + avg = a.sum(axis=clean_axis, keepdims=keepdims) / scl + elif weights.shape == a.shape: + scl = weights.sum( + axis=clean_axis, + keepdims=keepdims, + dtype=(np.dtype(np.float64) if a.dtype.kind == "i" else None), + ) + if any(scl == 0): + raise ZeroDivisionError("Weights along axis sum to 0") + avg = (a * weights).sum(axis=clean_axis, keepdims=keepdims) / scl + else: + if clean_axis is None: + raise ValueError( + "a and weights must share shape or axis must be specified" + ) + if weights.ndim != 1 or len(clean_axis) != 1: + raise ValueError( + "Weights must be either 1 dimension along single " + "axis or the same shape as a" + ) + if weights.size != a.shape[clean_axis[0]]: + raise ValueError("Weights length does not match axis") + + scl = weights.sum( + dtype=(np.dtype(np.float64) if a.dtype.kind == "i" else None) + ) + project_shape = [1] * a.ndim + project_shape[clean_axis[0]] = -1 + weights = weights.reshape(project_shape) + if any(scl == 0): + raise ZeroDivisionError("Weights along axis sum to 0") + avg = (a * weights).sum(axis=clean_axis[0], keepdims=keepdims) / scl + + if returned: + if not isinstance(scl, ndarray) or scl.ndim == 0: + scl = full(avg.shape, scl) + return avg, scl + else: + return avg + + +@add_boilerplate("a") +def mean( + a: ndarray, + axis: int | tuple[int, ...] | None = None, + dtype: np.dtype[Any] | None = None, + out: ndarray | None = None, + keepdims: bool = False, + where: ndarray | None = None, +) -> ndarray: + """ + + Compute the arithmetic mean along the specified axis. + + Returns the average of the array elements. The average is taken over + the flattened array by default, otherwise over the specified axis. + `float64` intermediate and return values are used for integer inputs. + + Parameters + ---------- + a : array_like + Array containing numbers whose mean is desired. If `a` is not an + array, a conversion is attempted. + axis : None or int or tuple[int], optional + Axis or axes along which the means are computed. The default is to + compute the mean of the flattened array. + + If this is a tuple of ints, a mean is performed over multiple axes, + instead of a single axis or all the axes as before. + dtype : data-type, optional + Type to use in computing the mean. For integer inputs, the default + is `float64`; for floating point inputs, it is the same as the + input dtype. + out : ndarray, optional + Alternate output array in which to place the result. The default + is ``None``; if provided, it must have the same shape as the + expected output, but the type will be cast if necessary. + + keepdims : bool, optional + If this is set to True, the axes which are reduced are left + in the result as dimensions with size one. With this option, + the result will broadcast correctly against the input array. + + If the default value is passed, then `keepdims` will not be + passed through to the `mean` method of sub-classes of + `ndarray`, however any non-default value will be. If the + sub-class' method does not implement `keepdims` any + exceptions will be raised. + + where : array_like of bool, optional + Elements to include in the mean. + + Returns + ------- + m : ndarray + If `out is None`, returns a new array of the same dtype a above + containing the mean values, otherwise a reference to the output + array is returned. + + See Also + -------- + numpy.mean + + Availability + -------- + Multiple GPUs, Multiple CPUs + """ + return a.mean( + axis=axis, dtype=dtype, out=out, keepdims=keepdims, where=where + ) + + +@add_boilerplate("a") +def nanmean( + a: ndarray, + axis: int | tuple[int, ...] | None = None, + dtype: np.dtype[Any] | None = None, + out: ndarray | None = None, + keepdims: bool = False, + where: ndarray | None = None, +) -> ndarray: + """ + + Compute the arithmetic mean along the specified axis, ignoring NaNs. + + Returns the average of the array elements. The average is taken over + the flattened array by default, otherwise over the specified axis. + `float64` intermediate and return values are used for integer inputs. + + Parameters + ---------- + a : array_like + Array containing numbers whose mean is desired. If `a` is not an + array, a conversion is attempted. + axis : None or int or tuple[int], optional + Axis or axes along which the means are computed. The default is to + compute the mean of the flattened array. + + If this is a tuple of ints, a mean is performed over multiple axes, + instead of a single axis or all the axes as before. + dtype : data-type, optional + Type to use in computing the mean. For integer inputs, the default + is `float64`; for floating point inputs, it is the same as the + input dtype. + out : ndarray, optional + Alternate output array in which to place the result. The default + is ``None``; if provided, it must have the same shape as the + expected output, but the type will be cast if necessary. + + keepdims : bool, optional + If this is set to True, the axes which are reduced are left + in the result as dimensions with size one. With this option, + the result will broadcast correctly against the input array. + + + where : array_like of bool, optional + Elements to include in the mean. + + Returns + ------- + m : ndarray + If `out is None`, returns a new array of the same dtype as a above + containing the mean values, otherwise a reference to the output + array is returned. + + See Also + -------- + numpy.nanmean + + Availability + -------- + Multiple GPUs, Multiple CPUs + """ + return a._nanmean( + axis=axis, dtype=dtype, out=out, keepdims=keepdims, where=where + ) + + +@add_boilerplate("a") +def var( + a: ndarray, + axis: int | tuple[int, ...] | None = None, + dtype: np.dtype[Any] | None = None, + out: ndarray | None = None, + ddof: int = 0, + keepdims: bool = False, + *, + where: ndarray | None = None, +) -> ndarray: + """ + Compute the variance along the specified axis. + + Returns the variance of the array elements, a measure of the spread of + a distribution. The variance is computed for the flattened array + by default, otherwise over the specified axis. + + Parameters + ---------- + a : array_like + Array containing numbers whose variance is desired. If `a` is not an + array, a conversion is attempted. + axis : None or int or tuple[int], optional + Axis or axes along which the variance is computed. The default is to + compute the variance of the flattened array. + + If this is a tuple of ints, a variance is performed over multiple axes, + instead of a single axis or all the axes as before. + dtype : data-type, optional + Type to use in computing the variance. For arrays of integer type + the default is float64; for arrays of float types + it is the same as the array type. + out : ndarray, optional + Alternate output array in which to place the result. It must have the + same shape as the expected output, but the type is cast if necessary. + ddof : int, optional + “Delta Degrees of Freedom”: the divisor used in the calculation is + N - ddof, where N represents the number of elements. By default + ddof is zero. + keepdims : bool, optional + If this is set to True, the axes which are reduced are left + in the result as dimensions with size one. With this option, + the result will broadcast correctly against the input array. + where : array_like of bool, optional + A boolean array which is broadcasted to match the dimensions of array, + and selects elements to include in the reduction. + + Returns + ------- + m : ndarray, see dtype parameter above + If `out=None`, returns a new array of the same dtype as above + containing the variance values, otherwise a reference to the output + array is returned. + + See Also + -------- + numpy.var + + Availability + -------- + Multiple GPUs, Multiple CPUs + """ + return a.var( + axis=axis, + dtype=dtype, + out=out, + ddof=ddof, + keepdims=keepdims, + where=where, + ) + + +@add_boilerplate("a") +def median( + a: ndarray, + axis: int | tuple[int, ...] | None = None, + out: ndarray | None = None, + overwrite_input: bool = False, + keepdims: bool = False, +) -> ndarray: + """ + Compute the median along the specified axis. + + Returns the median of the array elements. + + Parameters + ---------- + a : array_like + Input array or object that can be converted to an array. + axis : {int, sequence of int, None}, optional + Axis or axes along which the medians are computed. The default, + axis=None, will compute the median along a flattened version of + the array. + If a sequence of axes, the array is first flattened along the + given axes, then the median is computed along the resulting + flattened axis. + out : ndarray, optional + Alternative output array in which to place the result. It must + have the same shape and buffer length as the expected output, + but the type (of the output) will be cast if necessary. + overwrite_input : bool, optional + If True, then allow use of memory of input array `a` for + calculations. The input array will be modified by the call to + `median`. This will save memory when you do not need to preserve + the contents of the input array. Treat the input as undefined, + but it will probably be fully or partially sorted. Default is + False. If `overwrite_input` is ``True`` and `a` is not already an + `ndarray`, an error will be raised. + keepdims : bool, optional + If this is set to True, the axes which are reduced are left + in the result as dimensions with size one. With this option, + the result will broadcast correctly against the original `arr`. + + Returns + ------- + median : ndarray + A new array holding the result. If the input contains integers + or floats smaller than ``float64``, then the output data-type is + ``np.float64``. Otherwise, the data-type of the output is the + same as that of the input. If `out` is specified, that array is + returned instead. + + See Also + -------- + numpy median + + Availability + -------- + Multiple GPUs, Multiple CPUs + + """ + if a is None: + raise TypeError("'None' is not suported input to 'median'") + return quantile( + a, + 0.5, + axis=axis, + out=out, + overwrite_input=overwrite_input, + keepdims=keepdims, + method="midpoint", + ) + + +@add_boilerplate("a") +def nanmedian( + a: ndarray, + axis: int | tuple[int, ...] | None = None, + out: ndarray | None = None, + overwrite_input: bool = False, + keepdims: bool = False, +) -> ndarray: + """ + Compute the median along the specified axis, while ignoring NaNs + + Returns the median of the array elements. + + Parameters + ---------- + a : array_like + Input array or object that can be converted to an array. + axis : {int, sequence of int, None}, optional + Axis or axes along which the medians are computed. The default, + axis=None, will compute the median along a flattened version of + the array. + If a sequence of axes, the array is first flattened along the + given axes, then the median is computed along the resulting + flattened axis. + out : ndarray, optional + Alternative output array in which to place the result. It must + have the same shape and buffer length as the expected output, + but the type (of the output) will be cast if necessary. + overwrite_input : bool, optional + If True, then allow use of memory of input array `a` for + calculations. The input array will be modified by the call to + `median`. This will save memory when you do not need to preserve + the contents of the input array. Treat the input as undefined, + but it will probably be fully or partially sorted. Default is + False. If `overwrite_input` is ``True`` and `a` is not already an + `ndarray`, an error will be raised. + keepdims : bool, optional + If this is set to True, the axes which are reduced are left + in the result as dimensions with size one. With this option, + the result will broadcast correctly against the original `arr`. + + Returns + ------- + median : ndarray + A new array holding the result. If the input contains integers + or floats smaller than ``float64``, then the output data-type is + ``np.float64``. Otherwise, the data-type of the output is the + same as that of the input. If `out` is specified, that array is + returned instead. + + See Also + -------- + numpy median + + Availability + -------- + Multiple GPUs, Multiple CPUs + + """ + if a is None: + raise TypeError("'None' is not suported input to 'nanmedian'") + return nanquantile( + a, + 0.5, + axis=axis, + out=out, + overwrite_input=overwrite_input, + keepdims=keepdims, + method="midpoint", + ) diff --git a/cupynumeric/_module/stats_correlating.py b/cupynumeric/_module/stats_correlating.py new file mode 100644 index 000000000..ac01d6a2c --- /dev/null +++ b/cupynumeric/_module/stats_correlating.py @@ -0,0 +1,187 @@ +# Copyright 2024 NVIDIA Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +from __future__ import annotations + +from typing import Any + +import numpy as np + +from .._array.array import ndarray +from .._array.util import add_boilerplate +from .array_joining import concatenate +from .creation_data import array +from .creation_shape import empty +from .linalg_mvp import dot +from .math_misc import clip +from .math_sum_prod_diff import sum +from .stats_avgs_vars import average + + +@add_boilerplate("m", "y", "fweights", "aweights") +def cov( + m: ndarray, + y: ndarray | None = None, + rowvar: bool = True, + bias: bool = False, + ddof: int | None = None, + fweights: ndarray | None = None, + aweights: ndarray | None = None, + *, + dtype: np.dtype[Any] | None = None, +) -> ndarray: + """ + Estimate a covariance matrix, given data and weights. + + Covariance indicates the level to which two variables vary together. + If we examine N-dimensional samples, :math:`X = [x_1, x_2, ... x_N]^T`, + then the covariance matrix element :math:`C_{ij}` is the covariance of + :math:`x_i` and :math:`x_j`. The element :math:`C_{ii}` is the variance + of :math:`x_i`. + + Parameters + ---------- + m : array_like + A 1-D or 2-D array containing multiple variables and observations. + Each row of `m` represents a variable, and each column a single + observation of all those variables. Also see `rowvar` below. + y : array_like, optional + An additional set of variables and observations. `y` has the same form + as that of `m`. + rowvar : bool, optional + If `rowvar` is True (default), then each row represents a + variable, with observations in the columns. Otherwise, the relationship + is transposed: each column represents a variable, while the rows + contain observations. + bias : bool, optional + Default normalization (False) is by ``(N - 1)``, where ``N`` is the + number of observations given (unbiased estimate). If `bias` is True, + then normalization is by ``N``. These values can be overridden by using + the keyword ``ddof``. + ddof : int, optional + If not ``None`` the default value implied by `bias` is overridden. + Note that ``ddof=1`` will return the unbiased estimate, even if both + `fweights` and `aweights` are specified, and ``ddof=0`` will return + the simple average. The default value is ``None``. + fweights : array_like, int, optional + 1-D array of integer frequency weights; the number of times each + observation vector should be repeated. + aweights : array_like, optional + 1-D array of observation vector weights. These relative weights are + typically large for observations considered "important" and smaller for + observations considered less "important". If ``ddof=0`` the array of + weights can be used to assign probabilities to observation vectors. + dtype : data-type, optional + Data-type of the result. By default, the return data-type will have + at least `float64` precision. + + Returns + ------- + out : ndarray + The covariance matrix of the variables. + + See Also + -------- + numpy.cov + + Availability + -------- + Multiple GPUs, Multiple CPUs + """ + # Check inputs + if ddof is not None and not isinstance(ddof, int): + raise ValueError("ddof must be integer") + + # Handles complex arrays too + if m.ndim > 2: + raise ValueError("m has more than 2 dimensions") + + if y is not None and y.ndim > 2: + raise ValueError("y has more than 2 dimensions") + + if dtype is None: + if y is None: + dtype = np.result_type(m.dtype, np.float64) + else: + dtype = np.result_type(m.dtype, y.dtype, np.float64) + + X = array(m, ndmin=2, dtype=dtype) + if not rowvar and X.shape[0] != 1: + X = X.T + if X.shape[0] == 0: + return empty((0, 0)) + if y is not None: + y = array(y, copy=False, ndmin=2, dtype=dtype) + if not rowvar and y.shape[0] != 1: + y = y.T + # TODO(mpapadakis): Could have saved on an intermediate copy of X in + # this case, if it was already of the right shape. + X = concatenate((X, y), axis=0) + + if ddof is None: + if not bias: + ddof = 1 + else: + ddof = 0 + + # Get the product of frequencies and weights + w: ndarray | None = None + if fweights is not None: + if fweights.ndim > 1: + raise RuntimeError("cannot handle multidimensional fweights") + if fweights.shape[0] != X.shape[1]: + raise RuntimeError("incompatible numbers of samples and fweights") + if any(fweights < 0): + raise ValueError("fweights cannot be negative") + w = fweights + if aweights is not None: + if aweights.ndim > 1: + raise RuntimeError("cannot handle multidimensional aweights") + if aweights.shape[0] != X.shape[1]: + raise RuntimeError("incompatible numbers of samples and aweights") + if any(aweights < 0): + raise ValueError("aweights cannot be negative") + if w is None: + w = aweights + else: + # Cannot be done in-place with *= when aweights.dtype != w.dtype + w = w * aweights + + avg, w_sum = average(X, axis=1, weights=w, returned=True) + + # Determine the normalization + fact: ndarray | float = 0.0 + if w is None: + fact = X.shape[1] - ddof + elif ddof == 0: + fact = w_sum + elif aweights is None: + fact = w_sum - ddof + else: + fact = w_sum - ddof * sum(w * aweights) / w_sum + + # TODO(mpapadakis): @add_boilerplate should extend the types of array + # arguments from `ndarray` to `npt.ArrayLike | ndarray`. + fact = clip(fact, 0.0, None) # type: ignore[arg-type] + + X -= avg[:, None] + if w is None: + X_T = X.T + else: + X_T = (X * w).T + c = dot(X, X_T.conj()) + # Cannot be done in-place with /= when the dtypes differ + c = c / fact + + return c.squeeze() diff --git a/cupynumeric/_module/stats_histograms.py b/cupynumeric/_module/stats_histograms.py new file mode 100644 index 000000000..05ab4e928 --- /dev/null +++ b/cupynumeric/_module/stats_histograms.py @@ -0,0 +1,388 @@ +# Copyright 2024 NVIDIA Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +from __future__ import annotations + +from typing import TYPE_CHECKING, Any + +import numpy as np + +from .._array.array import ndarray +from .._array.util import add_boilerplate +from ..types import SortSide +from .creation_data import asarray +from .creation_shape import ones, zeros +from .math_extrema import amax, amin +from .ssc_searching import searchsorted + +if TYPE_CHECKING: + import numpy.typing as npt + +_builtin_max = max +_builtin_range = range + + +@add_boilerplate("x", "weights") +def bincount( + x: ndarray, weights: ndarray | None = None, minlength: int = 0 +) -> ndarray: + """ + bincount(x, weights=None, minlength=0) + + Count number of occurrences of each value in array of non-negative ints. + + The number of bins (of size 1) is one larger than the largest value in + `x`. If `minlength` is specified, there will be at least this number + of bins in the output array (though it will be longer if necessary, + depending on the contents of `x`). + Each bin gives the number of occurrences of its index value in `x`. + If `weights` is specified the input array is weighted by it, i.e. if a + value ``n`` is found at position ``i``, ``out[n] += weight[i]`` instead + of ``out[n] += 1``. + + Parameters + ---------- + x : array_like + 1-D input array of non-negative ints. + weights : array_like, optional + Weights, array of the same shape as `x`. + minlength : int, optional + A minimum number of bins for the output array. + + Returns + ------- + out : ndarray[int] + The result of binning the input array. + The length of `out` is equal to ``cupynumeric.amax(x)+1``. + + Raises + ------ + ValueError + If the input is not 1-dimensional, or contains elements with negative + values, or if `minlength` is negative. + TypeError + If the type of the input is float or complex. + + See Also + -------- + numpy.bincount + + Availability + -------- + Multiple GPUs, Multiple CPUs + """ + if x.ndim != 1: + raise ValueError("the input array must be 1-dimensional") + if weights is not None: + if weights.shape != x.shape: + raise ValueError("weights array must be same shape for bincount") + if weights.dtype.kind == "c": + raise ValueError("weights must be convertible to float64") + # Make sure the weights are float64 + weights = weights.astype(np.float64) + if not np.issubdtype(x.dtype, np.integer): + raise TypeError("input array for bincount must be integer type") + if minlength < 0: + raise ValueError("'minlength' must not be negative") + # Note that the following are non-blocking operations, + # though passing their results to `int` is blocking + max_val, min_val = amax(x), amin(x) + if int(min_val) < 0: + raise ValueError("the input array must have no negative elements") + minlength = _builtin_max(minlength, int(max_val) + 1) + if x.size == 1: + # Handle the special case of 0-D array + if weights is None: + out = zeros((minlength,), dtype=np.dtype(np.int64)) + # TODO: Remove this "type: ignore" once @add_boilerplate can + # propagate "ndarray -> ndarray | npt.ArrayLike" in wrapped sigs + out[x[0]] = 1 # type: ignore [assignment] + else: + out = zeros((minlength,), dtype=weights.dtype) + index = x[0] + out[index] = weights[0] + else: + # Normal case of bincount + if weights is None: + out = ndarray( + (minlength,), + dtype=np.dtype(np.int64), + inputs=(x, weights), + ) + out._thunk.bincount(x._thunk) + else: + out = ndarray( + (minlength,), + dtype=weights.dtype, + inputs=(x, weights), + ) + out._thunk.bincount(x._thunk, weights=weights._thunk) + return out + + +@add_boilerplate("x", "weights") +def histogram( + x: ndarray, + bins: ndarray | npt.ArrayLike | int = 10, + range: tuple[int, int] | tuple[float, float] | None = None, + weights: ndarray | None = None, + density: bool = False, +) -> tuple[ndarray, ndarray]: + """ + Compute the histogram of a dataset. + + Parameters + ---------- + a : array_like + Input data. The histogram is computed over the flattened array. + bins : int or sequence of scalars, optional + If `bins` is an int, it defines the number of equal-width bins in the + given range (10, by default). If `bins` is a sequence, it defines a + monotonically increasing array of bin edges, including the rightmost + edge, allowing for non-uniform bin widths. + range : (float, float), optional + The lower and upper range of the bins. If not provided, range is simply + ``(a.min(), a.max())``. Values outside the range are ignored. The first + element of the range must be smaller than the second. This argument is + ignored when bin edges are provided explicitly. + weights : array_like, optional + An array of weights, of the same shape as `a`. Each value in `a` only + contributes its associated weight towards the bin count (instead of 1). + If `density` is True, the weights are normalized, so that the integral + of the density over the range remains 1. + density : bool, optional + If ``False``, the result will contain the number of samples in each + bin. If ``True``, the result is the value of the probability *density* + function at the bin, normalized such that the *integral* over the range + is 1. Note that the sum of the histogram values will not be equal to 1 + unless bins of unity width are chosen; it is not a probability *mass* + function. + + Returns + ------- + hist : array + The values of the histogram. See `density` and `weights` for a + description of the possible semantics. + bin_edges : array + Return the bin edges ``(length(hist)+1)``. + + See Also + -------- + numpy.histogram + + Availability + -------- + Multiple GPUs, Multiple CPUs + """ + result_type: np.dtype[Any] = np.dtype(np.int64) + + if np.ndim(bins) > 1: + raise ValueError("`bins` must be 1d, when an array") + + # check isscalar(bins): + # + if np.ndim(bins) == 0: + if not isinstance(bins, int): + raise TypeError("`bins` must be array or integer type") + + num_intervals = bins + + if range is not None: + assert isinstance(range, tuple) and len(range) == 2 + if range[0] >= range[1]: + raise ValueError( + "`range` must be a pair of increasing values." + ) + + lower_b = range[0] + higher_b = range[1] + elif x.size == 0: + lower_b = 0.0 + higher_b = 1.0 + else: + lower_b = float(min(x)) + higher_b = float(max(x)) + + step = (higher_b - lower_b) / num_intervals + + bins_array = asarray( + [lower_b + k * step for k in _builtin_range(0, num_intervals)] + + [higher_b], + dtype=np.dtype(np.float64), + ) + + bins_orig_type = bins_array.dtype + else: + bins_as_arr = asarray(bins) + bins_orig_type = bins_as_arr.dtype + + bins_array = bins_as_arr.astype(np.dtype(np.float64)) + num_intervals = bins_array.shape[0] - 1 + + if not all((bins_array[1:] - bins_array[:-1]) >= 0): + raise ValueError( + "`bins` must increase monotonically, when an array" + ) + + if x.ndim != 1: + x = x.flatten() + + if weights is not None: + if weights.shape != x.shape: + raise ValueError( + "`weights` array must be same shape for histogram" + ) + + result_type = weights.dtype + weights_array = weights.astype(np.dtype(np.float64)) + else: + # case weights == None cannot be handled inside _thunk.histogram, + # bc/ of hist ndarray inputs(), below; + # needs to be handled here: + # + weights_array = ones(x.shape, dtype=np.dtype(np.float64)) + + if x.size == 0: + return ( + zeros((num_intervals,), dtype=result_type), + bins_array.astype(bins_orig_type), + ) + + hist = ndarray( + (num_intervals,), + dtype=weights_array.dtype, + inputs=(x, bins_array, weights_array), + ) + hist._thunk.histogram( + x._thunk, bins_array._thunk, weights=weights_array._thunk + ) + + # handle (density = True): + # + if density: + result_type = np.dtype(np.float64) + hist /= sum(hist) + hist /= bins_array[1:] - bins_array[:-1] + + return hist.astype(result_type), bins_array.astype(bins_orig_type) + + +@add_boilerplate("x", "bins") +def digitize( + x: ndarray, + bins: ndarray, + right: bool = False, +) -> ndarray | int: + """ + Return the indices of the bins to which each value in input array belongs. + + ========= ============= ============================ + `right` order of bins returned index `i` satisfies + ========= ============= ============================ + ``False`` increasing ``bins[i-1] <= x < bins[i]`` + ``True`` increasing ``bins[i-1] < x <= bins[i]`` + ``False`` decreasing ``bins[i-1] > x >= bins[i]`` + ``True`` decreasing ``bins[i-1] >= x > bins[i]`` + ========= ============= ============================ + + If values in `x` are beyond the bounds of `bins`, 0 or ``len(bins)`` is + returned as appropriate. + + Parameters + ---------- + x : array_like + Input array to be binned. Doesn't need to be 1-dimensional. + bins : array_like + Array of bins. It has to be 1-dimensional and monotonic. + right : bool, optional + Indicating whether the intervals include the right or the left bin + edge. Default behavior is (right==False) indicating that the interval + does not include the right edge. The left bin end is open in this + case, i.e., bins[i-1] <= x < bins[i] is the default behavior for + monotonically increasing bins. + + Returns + ------- + indices : ndarray of ints + Output array of indices, of same shape as `x`. + + Raises + ------ + ValueError + If `bins` is not monotonic. + TypeError + If the type of the input is complex. + + See Also + -------- + numpy.digitize + + Notes + ----- + If values in `x` are such that they fall outside the bin range, + attempting to index `bins` with the indices that `digitize` returns + will result in an IndexError. + For monotonically *increasing* `bins`, the following are equivalent:: + + np.digitize(x, bins, right=True) + np.searchsorted(bins, x, side='left') + + Note that as the order of the arguments are reversed, the side must be too. + The `searchsorted` call is marginally faster, as it does not do any + monotonicity checks. Perhaps more importantly, it supports all dtypes. + + Examples + -------- + >>> x = np.array([0.2, 6.4, 3.0, 1.6]) + >>> bins = np.array([0.0, 1.0, 2.5, 4.0, 10.0]) + >>> inds = np.digitize(x, bins) + >>> inds + array([1, 4, 3, 2]) + >>> for n in range(x.size): + ... print(bins[inds[n]-1], "<=", x[n], "<", bins[inds[n]]) + ... + 0.0 <= 0.2 < 1.0 + 4.0 <= 6.4 < 10.0 + 2.5 <= 3.0 < 4.0 + 1.0 <= 1.6 < 2.5 + >>> x = np.array([1.2, 10.0, 12.4, 15.5, 20.]) + >>> bins = np.array([0, 5, 10, 15, 20]) + >>> np.digitize(x,bins,right=True) + array([1, 2, 3, 4, 4]) + >>> np.digitize(x,bins,right=False) + array([1, 3, 3, 4, 5]) + + Availability + -------- + Multiple GPUs, Multiple CPUs + """ + # here for compatibility, searchsorted below is happy to take this + if np.issubdtype(x.dtype, np.complexfloating): + raise TypeError("x may not be complex") + + if bins.ndim > 1: + raise ValueError("bins must be one-dimensional") + + increasing = (bins[1:] >= bins[:-1]).all() + decreasing = (bins[1:] <= bins[:-1]).all() + if not increasing and not decreasing: + raise ValueError("bins must be monotonically increasing or decreasing") + + # this is backwards because the arguments below are swapped + side: SortSide = "left" if right else "right" + if decreasing: + # reverse the bins, and invert the results + return len(bins) - searchsorted(bins.flip(), x, side=side) + else: + return searchsorted(bins, x, side=side) diff --git a/cupynumeric/_module/stats_order.py b/cupynumeric/_module/stats_order.py new file mode 100644 index 000000000..7d7564a3d --- /dev/null +++ b/cupynumeric/_module/stats_order.py @@ -0,0 +1,1063 @@ +# Copyright 2024 NVIDIA Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +from __future__ import annotations + +import math +from typing import TYPE_CHECKING, Any, Iterable, Sequence + +import numpy as np + +from .._utils import is_np2 + +if is_np2: + from numpy.lib.array_utils import normalize_axis_tuple # type: ignore +else: + from numpy.core.numeric import ( # type: ignore + normalize_axis_tuple, + ) + +from .._array.util import add_boilerplate +from .._ufunc.comparison import logical_not +from .._ufunc.floating import isnan +from .array_transpose import moveaxis +from .creation_data import asarray +from .creation_shape import zeros +from .ssc_counting import count_nonzero +from .ssc_searching import where +from .ssc_sorting import sort + +if TYPE_CHECKING: + from typing import Callable + + import numpy.typing as npt + + from .._array.array import ndarray + + +# for the case when axis = tuple (non-singleton) +# reshuffling might have to be done (if tuple is non-consecutive) +# and the src array must be collapsed along that set of axes +# +# args: +# +# arr: [in] source nd-array on which quantiles are calculated; +# axes_set: [in] tuple or list of axes (indices less than arr dimension); +# +# return: pair: (minimal_index, reshuffled_and_collapsed source array) +def _reshuffle_reshape( + arr: ndarray, axes_set: Sequence[int] +) -> tuple[int, ndarray]: + ndim = len(arr.shape) + + sorted_axes = tuple(sorted(axes_set)) + + min_dim_index = sorted_axes[0] + num_axes = len(sorted_axes) + reshuffled_axes = tuple(range(min_dim_index, min_dim_index + num_axes)) + + non_consecutive = sorted_axes != reshuffled_axes + if non_consecutive: + arr_shuffled = moveaxis(arr, sorted_axes, reshuffled_axes) + else: + arr_shuffled = arr + + # shape_reshuffled = arr_shuffled.shape # debug + collapsed_shape = np.prod([arr_shuffled.shape[i] for i in reshuffled_axes]) + + redimed = tuple(range(0, min_dim_index + 1)) + tuple( + range(min_dim_index + num_axes, ndim) + ) + reshaped = tuple( + [ + collapsed_shape if k == min_dim_index else arr_shuffled.shape[k] + for k in redimed + ] + ) + + arr_reshaped = arr_shuffled.reshape(reshaped) + return (min_dim_index, arr_reshaped) + + +# Define the gamma and index position for each of the distributions based +# on the paper/NumPy definition. +# +# `pos` is the "virtual index" at which we wish to sample, this is adjusted +# based on the alpha and beta parameters of the methods (which adjust for +# the fact that the distribution is sampled). +# +# `gamma` is weight for each the samples taken into account. Some methods +# are non-interpolating. `gamma` may be calculated to pick a side, but +# we forward `None` to indicate the non-interpolating nature of the method +# (the result dtype is for example identical to the input one). +# +# `pos` is (to keep with the paper) 1-based index, thus we always subtract 1 +# in the following step. + +# Discontinuous methods: + + +# q = quantile input \in [0, 1] +# n = sizeof(array) +# pos = virtual index (often 1 based, to keep with paper) +def _inverted_cdf(q: float, n: int) -> tuple[float, int]: + pos = q * n + left = int(pos) + + g = pos - left + gamma = 1.0 if g > 0 else 0.0 + + return (gamma, left - 1) + + +def _averaged_inverted_cdf(q: float, n: int) -> tuple[float, int]: + pos = q * n + left = int(pos) + + g = pos - left + gamma = 1.0 if g > 0 else 0.5 + + return (gamma, left - 1) + + +_desired_mod_2: int = int(np.lib.NumpyVersion(np.__version__) < "2.0.1") + + +def _closest_observation(q: float, n: int) -> tuple[None, int]: + pos = q * n - 0.5 + left = int(pos) + + # The calculation is done in a way that we should to take the next index + # (gamme = 1) except if we hit it exactly. + # If we do, we use round-to-even: The final index `left + gamma` should + # be an even number. But on older versions of numpy this was an odd + # number (due to 0 based vs. 1 based indexing used in the rounding). + if left != pos: + gamma = 1 + elif left % 2 != _desired_mod_2: + gamma = 1 + else: + gamma = 0 + + return (None, left - 1 + gamma) + + +# Continuous methods: + + +# Parzen method +def _interpolated_inverted_cdf(q: float, n: int) -> tuple[float, int]: + pos = q * n + left = int(pos) + + gamma = pos - left + return (gamma, left - 1) + + +# Hazen method +def _hazen(q: float, n: int) -> tuple[float, int]: + pos = q * n + 0.5 + left = int(pos) + + gamma = pos - left + return (gamma, left - 1) + + +# Weibull method +def _weibull(q: float, n: int) -> tuple[float, int]: + pos = q * (n + 1) + left = int(pos) + + gamma = pos - left + return (gamma, left - 1) + + +# Gumbel method +def _linear(q: float, n: int) -> tuple[float, int]: + pos = q * (n - 1) + 1 + left = int(pos) + + gamma = pos - left + return (gamma, left - 1) + + +# Johnson & Kotz method +def _median_unbiased(q: float, n: int) -> tuple[float, int]: + fract = 1.0 / 3.0 + pos = q * (n + fract) + fract + left = int(pos) + + gamma = pos - left + return (gamma, left - 1) + + +# Blom method +def _normal_unbiased(q: float, n: int) -> tuple[float, int]: + fract1 = 0.25 + fract2 = 3.0 / 8.0 + pos = q * (n + fract1) + fract2 + left = int(pos) + + gamma = pos - left + return (gamma, left - 1) + + +def _lower(q: float, n: int) -> tuple[None, int]: + pos = q * (n - 1) # 0 based here + left = int(pos) + return (None, left) + + +def _higher(q: float, n: int) -> tuple[None, int]: + pos = q * (n - 1) # 0 based here + left = int(math.ceil(pos)) + return (None, left) + + +def _midpoint(q: float, n: int) -> tuple[float, int]: + pos = q * (n - 1) # 0 based here + left = int(pos) + # Mid-point, unless pos is exact then we use that point. + gamma = 0.5 if pos != left else 0.0 + + return (gamma, left) + + +def _nearest(q: float, n: int) -> tuple[None, int]: + pos = np.round(q * (n - 1)) # 0 based here + left = int(pos) + + return (None, left) + + +# args: +# +# arr: [in] source nd-array on which quantiles are calculated; +# preccondition: assumed sorted! +# q_arr: [in] quantile input values nd-array; +# axis: [in] axis along which quantiles are calculated; +# method: [in] func(q, n) returning (gamma, j), +# where = array1D.size; +# keepdims: [in] boolean flag specifying whether collapsed axis +# should be kept as dim=1; +# to_dtype: [in] dtype to convert the result to; +# qs_all: [in/out] result pass through or created (returned) +# +def _quantile_impl( + arr: ndarray, + q_arr: npt.NDArray[Any], + axis: int | None, + axes_set: Sequence[int], + original_shape: tuple[int, ...], + method: Callable[[float, int], tuple[float | None, int]], + keepdims: bool, + to_dtype: np.dtype[Any], + qs_all: ndarray | None, +) -> ndarray: + ndims = len(arr.shape) + + if axis is None: + n = arr.size + + if keepdims: + remaining_shape = (1,) * len(original_shape) + else: + remaining_shape = () # only `q_arr` dictates shape; + # quantile applied to `arr` seen as 1D; + else: + n = arr.shape[axis] + + # arr.shape -{axis}; if keepdims use 1 for arr.shape[axis]: + # (can be empty []) + # + if keepdims: + remaining_shape = tuple( + 1 if k in axes_set else original_shape[k] + for k in range(0, len(original_shape)) + ) + else: + remaining_shape = tuple( + arr.shape[k] for k in range(0, ndims) if k != axis + ) + + # compose qarr.shape with arr.shape: + # + # result.shape = (q_arr.shape, arr.shape -{axis}): + # + qresult_shape = (*q_arr.shape, *remaining_shape) + + # construct result NdArray, non-flattening approach: + # + if qs_all is None: + qs_all = zeros(qresult_shape, dtype=to_dtype) + else: + # implicit conversion from to_dtype to qs_all.dtype assumed + # + if qs_all.shape != qresult_shape: + raise ValueError("wrong shape on output array") + + for index, q in np.ndenumerate(q_arr): + gamma, left_pos = method(q, n) + # Note that gamma may be None, in which case `right_pos` has no + # meaning since use the exact index. + right_pos = left_pos + 1 + + # The virtual pos, which was used to calculate `left`, can be outside + # the range, so fix all indices to be in range here. + if left_pos >= n - 1: + left_pos = right_pos = n - 1 + elif left_pos < 0: + left_pos = right_pos = 0 + + # If gamma is None, we only have to extract the correct values + if gamma is None: + qs_all[index] = arr.take(left_pos, axis).reshape(remaining_shape) + else: + # (N-1) dimensional ndarray of left, right + # neighbor values: + # + # non-flattening approach: + # + # extract values at left and right position; + arr_1D_lvals = arr.take(left_pos, axis).reshape(remaining_shape) + arr_1D_rvals = arr.take(right_pos, axis).reshape(remaining_shape) + + # TODO: We may want to use a more precise interpolation formula + # like NumPy here (or implement an `lerp` function to use). + # + # vectorized for axis != None; + # (non-flattening approach) + left = (1.0 - gamma) * arr_1D_lvals + right = gamma * arr_1D_rvals + qs_all[index] = left + right + + return qs_all + + +_ORDER_FUNCS = { + "inverted_cdf": _inverted_cdf, + "averaged_inverted_cdf": _averaged_inverted_cdf, + "closest_observation": _closest_observation, + "interpolated_inverted_cdf": _interpolated_inverted_cdf, + "hazen": _hazen, + "weibull": _weibull, + "linear": _linear, + "median_unbiased": _median_unbiased, + "normal_unbiased": _normal_unbiased, + "lower": _lower, + "higher": _higher, + "midpoint": _midpoint, + "nearest": _nearest, +} + + +@add_boilerplate("a") +def quantile( + a: ndarray, + q: float | Iterable[float] | ndarray, + axis: int | tuple[int, ...] | None = None, + out: ndarray | None = None, + overwrite_input: bool = False, + method: str = "linear", + keepdims: bool = False, +) -> ndarray: + """ + Compute the q-th quantile of the data along the specified axis. + + Parameters + ---------- + a : array_like + Input array or object that can be converted to an array. + q : array_like of float + Quantile or sequence of quantiles to compute, which must be between + 0 and 1 inclusive. + axis : {int, tuple of int, None}, optional + Axis or axes along which the quantiles are computed. The default is + to compute the quantile(s) along a flattened version of the array. + out : ndarray, optional + Alternative output array in which to place the result. It must have + the same shape as the expected output. + overwrite_input : bool, optional + If True, then allow the input array `a` to be modified by + intermediate calculations, to save memory. In this case, the + contents of the input `a` after this function completes is + undefined. + method : str, optional + This parameter specifies the method to use for estimating the + quantile. The options sorted by their R type + as summarized in the H&F paper [1]_ are: + 1. 'inverted_cdf' + 2. 'averaged_inverted_cdf' + 3. 'closest_observation' + 4. 'interpolated_inverted_cdf' + 5. 'hazen' + 6. 'weibull' + 7. 'linear' (default) + 8. 'median_unbiased' + 9. 'normal_unbiased' + The first three methods are discontinuous. NumPy further defines the + following discontinuous variations of the default 'linear' (7.) option: + * 'lower' + * 'higher', + * 'midpoint' + * 'nearest' + keepdims : bool, optional + If this is set to True, the axes which are reduced are left in + the result as dimensions with size one. With this option, the + result will broadcast correctly against the original array `a`. + + Returns + ------- + quantile : scalar or ndarray + If `q` is a single quantile and `axis=None`, then the result + is a scalar. If multiple quantiles are given, first axis of + the result corresponds to the quantiles. The other axes are + the axes that remain after the reduction of `a`. If the input + contains integers or floats smaller than ``float64``, the output + data-type is ``float64``. Otherwise, the output data-type is the + same as that of the input. If `out` is specified, that array is + returned instead. + + Raises + ------ + TypeError + If the type of the input is complex. + + See Also + -------- + numpy.quantile + + Availability + -------- + Multiple GPUs, Multiple CPUs + + References + ---------- + .. [1] R. J. Hyndman and Y. Fan, + "Sample quantiles in statistical packages," + The American Statistician, 50(4), pp. 361-365, 1996 + """ + + real_axis: int | None + axes_set: Sequence[int] = () + original_shape = a.shape + + if axis is not None and isinstance(axis, Iterable): + nrm_axis = normalize_axis_tuple(axis, a.ndim) + if len(axis) == 1: + real_axis = nrm_axis[0] + a_rr = a + else: + # reshuffling requires non-negative axes: + (real_axis, a_rr) = _reshuffle_reshape(a, nrm_axis) + # What happens with multiple axes and overwrite_input = True ? + # It seems overwrite_input is reset to False; + overwrite_input = False + axes_set = nrm_axis + else: + real_axis = axis + a_rr = a + if real_axis is not None: + axes_set = normalize_axis_tuple(real_axis, a.ndim) + real_axis = axes_set[0] + + # covers both array-like and scalar cases: + # + q_arr = np.asarray(q) + + # in the future k-sort (partition) + # might be faster, for now it uses sort + # arr = partition(arr, k = floor(nq), axis = real_axis) + # but that would require a k-sort call for each `q`! + # too expensive for many `q` values... + # if no axis given then elements are sorted as a 1D array + # + if overwrite_input: + a_rr.sort(axis=real_axis) + arr = a_rr + else: + arr = sort(a_rr, axis=real_axis) + + if arr.dtype.kind == "c": + raise TypeError("input array cannot be of complex type") + + # return type dependency on arr.dtype: + # + # it depends on interpolation method; + # For discontinuous methods returning either end of the interval within + # which the quantile falls, or the other; arr.dtype is returned; + # else, logic below: + # + # if is_float(arr_dtype) && (arr.dtype >= dtype('float64')) then + # arr.dtype + # else + # dtype('float64') + # + # see https://github.com/numpy/numpy/issues/22323 + # + if method in [ + "inverted_cdf", + "closest_observation", + "lower", + "higher", + "nearest", + ]: + to_dtype = arr.dtype + else: + to_dtype = np.dtype("float64") + + # in case dtype("float128") becomes supported: + # + # to_dtype = ( + # arr.dtype + # if (arr.dtype == np.dtype("float128")) + # else np.dtype("float64") + # ) + + res = _quantile_impl( + arr, + q_arr, + real_axis, + axes_set, + original_shape, + _ORDER_FUNCS[method], + keepdims, + to_dtype, + out, + ) + + if out is not None: + # out = res.astype(out.dtype) -- conversion done inside impl + return out + else: + return res + + +@add_boilerplate("a") +def percentile( + a: ndarray, + q: float | Iterable[float] | ndarray, + axis: int | tuple[int, ...] | None = None, + out: ndarray | None = None, + overwrite_input: bool = False, + method: str = "linear", + keepdims: bool = False, +) -> ndarray: + """ + Compute the q-th percentile of the data along the specified axis. + + Parameters + ---------- + a : array_like + Input array or object that can be converted to an array. + q : array_like of float + Percentile or sequence of percentiles to compute, which must be between + 0 and 100 inclusive. + axis : {int, tuple of int, None}, optional + Axis or axes along which the percentiles are computed. The default is + to compute the percentile(s) along a flattened version of the array. + out : ndarray, optional + Alternative output array in which to place the result. It must have + the same shape as the expected output. + overwrite_input : bool, optional + If True, then allow the input array `a` to be modified by + intermediate calculations, to save memory. In this case, the + contents of the input `a` after this function completes is + undefined. + method : str, optional + This parameter specifies the method to use for estimating the + percentile. The options sorted by their R type + as summarized in the H&F paper [1]_ are: + 1. 'inverted_cdf' + 2. 'averaged_inverted_cdf' + 3. 'closest_observation' + 4. 'interpolated_inverted_cdf' + 5. 'hazen' + 6. 'weibull' + 7. 'linear' (default) + 8. 'median_unbiased' + 9. 'normal_unbiased' + The first three methods are discontinuous. NumPy further defines the + following discontinuous variations of the default 'linear' (7.) option: + * 'lower' + * 'higher', + * 'midpoint' + * 'nearest' + keepdims : bool, optional + If this is set to True, the axes which are reduced are left in + the result as dimensions with size one. With this option, the + result will broadcast correctly against the original array `a`. + + Returns + ------- + percentile : scalar or ndarray + If `q` is a single percentile and `axis=None`, then the result + is a scalar. If multiple percentiles are given, first axis of + the result corresponds to the percentiles. The other axes are + the axes that remain after the reduction of `a`. If the input + contains integers or floats smaller than ``float64``, the output + data-type is ``float64``. Otherwise, the output data-type is the + same as that of the input. If `out` is specified, that array is + returned instead. + + Raises + ------ + TypeError + If the type of the input is complex. + + See Also + -------- + numpy.percentile + + Availability + -------- + Multiple GPUs, Multiple CPUs + + References + ---------- + .. [1] R. J. Hyndman and Y. Fan, + "Sample quantiles in statistical packages," + The American Statistician, 50(4), pp. 361-365, 1996 + """ + + q_arr = np.asarray(q) + q01 = q_arr / 100.0 + + return quantile( + a, + q01, + axis, + out=out, + overwrite_input=overwrite_input, + method=method, + keepdims=keepdims, + ) + + +# args: +# +# arr: [in] source nd-array on which quantiles are calculated; +# NaNs ignored; precondition: assumed sorted! +# q_arr: [in] quantile input values nd-array; +# axis: [in] axis along which quantiles are calculated; +# method: [in] func(q, n) returning (gamma, j), +# where = array1D.size; +# keepdims: [in] boolean flag specifying whether collapsed axis +# should be kept as dim=1; +# to_dtype: [in] dtype to convert the result to; +# qs_all: [in/out] result pass through or created (returned) +# +def nanquantile_impl( + arr: ndarray, + q_arr: npt.NDArray[Any], + non_nan_counts: ndarray, + axis: int | None, + axes_set: Sequence[int], + original_shape: tuple[int, ...], + method: Callable[[float, int], tuple[float | None, int]], + keepdims: bool, + to_dtype: np.dtype[Any], + qs_all: ndarray | None, +) -> ndarray: + ndims = len(arr.shape) + + if axis is None: + if keepdims: + remaining_shape = (1,) * len(original_shape) + else: + remaining_shape = () # only `q_arr` dictates shape; + # quantile applied to `arr` seen as 1D; + else: + # arr.shape -{axis}; if keepdims use 1 for arr.shape[axis]: + # (can be empty []) + # + if keepdims: + remaining_shape = tuple( + 1 if k in axes_set else original_shape[k] + for k in range(0, len(original_shape)) + ) + else: + remaining_shape = tuple( + arr.shape[k] for k in range(0, ndims) if k != axis + ) + + # compose qarr.shape with arr.shape: + # + # result.shape = (q_arr.shape, arr.shape -{axis}): + # + qresult_shape = (*q_arr.shape, *remaining_shape) + + # construct result Ndarray, non-flattening approach: + # + if qs_all is None: + qs_all = zeros(qresult_shape, dtype=to_dtype) + else: + # implicit conversion from to_dtype to qs_all.dtype assumed + # + if qs_all.shape != qresult_shape: + raise ValueError("wrong shape on output array") + + assert non_nan_counts.shape == remaining_shape + + arr_gammas = zeros(remaining_shape, dtype=arr.dtype) + arr_lvals = zeros(remaining_shape, dtype=arr.dtype) + arr_rvals = zeros(remaining_shape, dtype=arr.dtype) + + # Similar to the non-nan implementation except that it needs to make + # `n` depend on the number of non-nan-counts. + for qindex, q in np.ndenumerate(q_arr): + assert qs_all[qindex].shape == remaining_shape + + # TODO(aschaffer): Vectorize this operation, see + # github.com/nv-legate/cupynumeric/pull/1121#discussion_r1484731763 + gamma = None + for aindex, n in np.ndenumerate(non_nan_counts): + # TODO (2024-08): `n` should be an integral type, but wasn't: + n = int(n) + if n == 0: + # Cannot define a quantile over an empty range, return NaN + # TODO(mpapadakis): mypy mysteriously complains that + # expression has type "float", target has type "ndarray" + arr_lvals[aindex] = np.nan # type: ignore[assignment] + arr_rvals[aindex] = np.nan # type: ignore[assignment] + continue + + gamma, left_pos = method(q, n) + + right_pos = left_pos + 1 + if left_pos >= n - 1: + left_pos = right_pos = n - 1 + elif left_pos < 0: + left_pos = right_pos = 0 + + # assumption: since `non_nan_counts` has the same + # shape as `remaining_shape` (checked above), + # `aindex` are the same indices as those needed + # to access `a`'s remaining shape slices; + # + full_l_index = (*aindex[:axis], left_pos, *aindex[axis:]) + arr_lvals[aindex] = arr[full_l_index] + if gamma is not None: + # TODO(mpapadakis): As above, mypy complains about assignment + arr_gammas[aindex] = gamma # type: ignore[assignment] + + full_r_index = (*aindex[:axis], right_pos, *aindex[axis:]) + arr_rvals[aindex] = arr[full_r_index] + + if gamma is None: + # Note that gamma can only be always None or never + qs_all[qindex] = arr_lvals + else: + left = (1 - arr_gammas) * arr_lvals + right = arr_gammas * arr_rvals + qs_all[qindex] = left + right + + return qs_all + + +@add_boilerplate("a") +def nanquantile( + a: ndarray, + q: float | Iterable[float] | ndarray, + axis: int | tuple[int, ...] | None = None, + out: ndarray | None = None, + overwrite_input: bool = False, + method: str = "linear", + keepdims: bool = False, +) -> ndarray: + """ + Compute the q-th quantile of the data along the specified axis, + while ignoring nan values. + + Parameters + ---------- + a : array_like + Input array or object that can be converted to an array, + containing nan values to be ignored. + q : array_like of float + Quantile or sequence of quantiles to compute, which must be between + 0 and 1 inclusive. + axis : {int, tuple of int, None}, optional + Axis or axes along which the quantiles are computed. The default is + to compute the quantile(s) along a flattened version of the array. + out : ndarray, optional + Alternative output array in which to place the result. It must have + the same shape as the expected output. + overwrite_input : bool, optional + If True, then allow the input array `a` to be modified by + intermediate calculations, to save memory. In this case, the + contents of the input `a` after this function completes is + undefined. + method : str, optional + This parameter specifies the method to use for estimating the + quantile. The options sorted by their R type + as summarized in the H&F paper [1]_ are: + 1. 'inverted_cdf' + 2. 'averaged_inverted_cdf' + 3. 'closest_observation' + 4. 'interpolated_inverted_cdf' + 5. 'hazen' + 6. 'weibull' + 7. 'linear' (default) + 8. 'median_unbiased' + 9. 'normal_unbiased' + The first three methods are discontinuous. NumPy further defines the + following discontinuous variations of the default 'linear' (7.) option: + * 'lower' + * 'higher', + * 'midpoint' + * 'nearest' + keepdims : bool, optional + If this is set to True, the axes which are reduced are left in + the result as dimensions with size one. With this option, the + result will broadcast correctly against the original array `a`. + + Returns + ------- + quantile : scalar or ndarray + If `q` is a single quantile and `axis=None`, then the result + is a scalar. If multiple quantiles are given, first axis of + the result corresponds to the quantiles. The other axes are + the axes that remain after the reduction of `a`. If the input + contains integers or floats smaller than ``float64``, the output + data-type is ``float64``. Otherwise, the output data-type is the + same as that of the input. If `out` is specified, that array is + returned instead. + + Raises + ------ + TypeError + If the type of the input is complex. + + See Also + -------- + numpy.nanquantile + + Availability + -------- + Multiple GPUs, Multiple CPUs + + References + ---------- + .. [1] R. J. Hyndman and Y. Fan, + "Sample quantiles in statistical packages," + The American Statistician, 50(4), pp. 361-365, 1996 + """ + + real_axis: int | None + axes_set: Sequence[int] = () + original_shape = a.shape + + if axis is not None and isinstance(axis, Iterable): + nrm_axis = normalize_axis_tuple(axis, a.ndim) + if len(axis) == 1: + real_axis = nrm_axis[0] + a_rr = a + else: + (real_axis, a_rr) = _reshuffle_reshape(a, nrm_axis) + # What happens with multiple axes and overwrite_input = True ? + # It seems overwrite_input is reset to False; + # But `overwrite_input` doesn't matter for the NaN version of this + # function + # overwrite_input = False + axes_set = nrm_axis + else: + real_axis = axis + a_rr = a + if real_axis is not None: + axes_set = normalize_axis_tuple(real_axis, a.ndim) + real_axis = axes_set[0] + + # ndarray of non-NaNs: + # + non_nan_counts = asarray( + count_nonzero( + logical_not(isnan(a_rr)), + axis=real_axis, + ) + ) + + # covers both array-like and scalar cases: + # + q_arr = np.asarray(q) + + # in the future k-sort (partition) + # might be faster, for now it uses sort + # arr = partition(arr, k = floor(nq), axis = real_axis) + # but that would require a k-sort call for each `q`! + # too expensive for many `q` values... + # if no axis given then elements are sorted as a 1D array + # + # replace NaN's by dtype.max: + # + arr = where(isnan(a_rr), np.finfo(a_rr.dtype).max, a_rr) + arr.sort(axis=real_axis) + + if arr.dtype.kind == "c": + raise TypeError("input array cannot be of complex type") + + # return type dependency on arr.dtype: + # + # it depends on interpolation method; + # For discontinuous methods returning either end of the interval within + # which the quantile falls, or the other; arr.dtype is returned; + # else, logic below: + # + # if is_float(arr_dtype) && (arr.dtype >= dtype('float64')) then + # arr.dtype + # else + # dtype('float64') + # + # see https://github.com/numpy/numpy/issues/22323 + # + if method in [ + "inverted_cdf", + "closest_observation", + "lower", + "higher", + "nearest", + ]: + to_dtype = arr.dtype + else: + to_dtype = np.dtype("float64") + + # in case dtype("float128") becomes supported: + # + # to_dtype = ( + # arr.dtype + # if (arr.dtype == np.dtype("float128")) + # else np.dtype("float64") + # ) + + res = nanquantile_impl( + arr, + q_arr, + non_nan_counts, + real_axis, + axes_set, + original_shape, + _ORDER_FUNCS[method], + keepdims, + to_dtype, + out, + ) + + if out is not None: + # out = res.astype(out.dtype) -- conversion done inside impl + return out + else: + return res + + +@add_boilerplate("a") +def nanpercentile( + a: ndarray, + q: float | Iterable[float] | ndarray, + axis: int | tuple[int, ...] | None = None, + out: ndarray | None = None, + overwrite_input: bool = False, + method: str = "linear", + keepdims: bool = False, +) -> ndarray: + """ + Compute the q-th percentile of the data along the specified axis, + while ignoring nan values. + + Parameters + ---------- + a : array_like + Input array or object that can be converted to an array, + containing nan values to be ignored. + q : array_like of float + Percentile or sequence of percentiles to compute, which must be between + 0 and 100 inclusive. + axis : {int, tuple of int, None}, optional + Axis or axes along which the percentiles are computed. The default is + to compute the percentile(s) along a flattened version of the array. + out : ndarray, optional + Alternative output array in which to place the result. It must have + the same shape as the expected output. + overwrite_input : bool, optional + If True, then allow the input array `a` to be modified by + intermediate calculations, to save memory. In this case, the + contents of the input `a` after this function completes is + undefined. + method : str, optional + This parameter specifies the method to use for estimating the + percentile. The options sorted by their R type + as summarized in the H&F paper [1]_ are: + 1. 'inverted_cdf' + 2. 'averaged_inverted_cdf' + 3. 'closest_observation' + 4. 'interpolated_inverted_cdf' + 5. 'hazen' + 6. 'weibull' + 7. 'linear' (default) + 8. 'median_unbiased' + 9. 'normal_unbiased' + The first three methods are discontinuous. NumPy further defines the + following discontinuous variations of the default 'linear' (7.) option: + * 'lower' + * 'higher', + * 'midpoint' + * 'nearest' + keepdims : bool, optional + If this is set to True, the axes which are reduced are left in + the result as dimensions with size one. With this option, the + result will broadcast correctly against the original array `a`. + + Returns + ------- + percentile : scalar or ndarray + If `q` is a single percentile and `axis=None`, then the result + is a scalar. If multiple percentiles are given, first axis of + the result corresponds to the percentiles. The other axes are + the axes that remain after the reduction of `a`. If the input + contains integers or floats smaller than ``float64``, the output + data-type is ``float64``. Otherwise, the output data-type is the + same as that of the input. If `out` is specified, that array is + returned instead. + + Raises + ------ + TypeError + If the type of the input is complex. + + See Also + -------- + numpy.nanpercentile + + Availability + -------- + Multiple GPUs, Multiple CPUs + + References + ---------- + .. [1] R. J. Hyndman and Y. Fan, + "Sample quantiles in statistical packages," + The American Statistician, 50(4), pp. 361-365, 1996 + """ + + q_arr = np.asarray(q) + q01 = q_arr / 100.0 + + return nanquantile( + a, + q01, + axis, + out=out, + overwrite_input=overwrite_input, + method=method, + keepdims=keepdims, + ) diff --git a/cunumeric/window.py b/cupynumeric/_module/window.py similarity index 96% rename from cunumeric/window.py rename to cupynumeric/_module/window.py index 9f25f7e03..366f772e2 100644 --- a/cunumeric/window.py +++ b/cupynumeric/_module/window.py @@ -1,4 +1,4 @@ -# Copyright 2022 NVIDIA Corporation +# Copyright 2024 NVIDIA Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -18,9 +18,9 @@ import numpy as np -from .array import ndarray -from .config import WindowOpCode -from .module import empty, ones +from .._array.array import ndarray +from ..config import WindowOpCode +from .creation_shape import empty, ones def _create_window(M: int, op_code: WindowOpCode, *args: Any) -> ndarray: diff --git a/cunumeric/_sphinxext/__init__.py b/cupynumeric/_sphinxext/__init__.py similarity index 95% rename from cunumeric/_sphinxext/__init__.py rename to cupynumeric/_sphinxext/__init__.py index e6a7812ea..ddfbe0329 100644 --- a/cunumeric/_sphinxext/__init__.py +++ b/cupynumeric/_sphinxext/__init__.py @@ -1,4 +1,4 @@ -# Copyright 2022 NVIDIA Corporation +# Copyright 2024 NVIDIA Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/cunumeric/_sphinxext/_comparison_config.py b/cupynumeric/_sphinxext/_comparison_config.py similarity index 98% rename from cunumeric/_sphinxext/_comparison_config.py rename to cupynumeric/_sphinxext/_comparison_config.py index dc2f71d6b..3623a61a0 100644 --- a/cunumeric/_sphinxext/_comparison_config.py +++ b/cupynumeric/_sphinxext/_comparison_config.py @@ -1,4 +1,4 @@ -# Copyright 2022 NVIDIA Corporation +# Copyright 2024 NVIDIA Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -136,7 +136,6 @@ class SectionConfig: "asarray_chkfinite", "asarray", "ascontiguousarray", - "asfarray", "asfortranarray", "asmatrix", "atleast_1d", @@ -218,7 +217,6 @@ class SectionConfig: "identity", "linspace", "logspace", - "mat", "meshgrid", "ones_like", "ones", @@ -252,7 +250,6 @@ class SectionConfig: "savez_compressed", "savez", "set_printoptions", - "set_string_function", ) IO_ND = ("tofile", "tolist") @@ -283,7 +280,6 @@ class SectionConfig: "prod", "real_if_close", "real", - "round_", "sinc", "sum", "trapz", @@ -300,7 +296,6 @@ class SectionConfig: "extract", "flatnonzero", "lexsort", - "msort", "nanargmax", "nanargmin", "nonzero", diff --git a/cunumeric/_sphinxext/_comparison_util.py b/cupynumeric/_sphinxext/_comparison_util.py similarity index 87% rename from cunumeric/_sphinxext/_comparison_util.py rename to cupynumeric/_sphinxext/_comparison_util.py index 8e2adc6a8..a76d1cf67 100644 --- a/cunumeric/_sphinxext/_comparison_util.py +++ b/cupynumeric/_sphinxext/_comparison_util.py @@ -1,4 +1,4 @@ -# Copyright 2022 NVIDIA Corporation +# Copyright 2024 NVIDIA Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -16,9 +16,9 @@ from dataclasses import dataclass from types import ModuleType -from typing import TYPE_CHECKING, Any, Iterable, Iterator, Type, Union +from typing import TYPE_CHECKING, Any, Iterable, Iterator, Type -from ..coverage import is_implemented, is_multi, is_single +from .._utils.coverage import is_implemented, is_multi, is_single from ._comparison_config import MISSING_NP_REFS, SKIP if TYPE_CHECKING: @@ -66,7 +66,7 @@ def _lgref(name: str, obj: Any, implemented: bool) -> str: if isinstance(obj, ModuleType): full_name = f"{obj.__name__}.{name}" else: - full_name = f"cunumeric.{obj.__name__}.{name}" + full_name = f"cupynumeric.{obj.__name__}.{name}" role = "meth" if "ndarray" in full_name else "obj" @@ -75,7 +75,7 @@ def _lgref(name: str, obj: Any, implemented: bool) -> str: def filter_names( obj: Any, - types: Union[tuple[Type[Any], ...], None] = None, + types: tuple[Type[Any], ...] | None = None, skip: Iterable[str] = (), ) -> Iterator[str]: names = (n for n in dir(obj)) # every name in the module or class @@ -106,15 +106,15 @@ def get_item(name: str, np_obj: Any, lg_obj: Any) -> ItemDetail: ) -def get_namespaces(attr: Union[str, None]) -> tuple[Any, Any]: +def get_namespaces(attr: str | None) -> tuple[Any, Any]: import numpy - import cunumeric + import cupynumeric if attr is None: - return numpy, cunumeric + return numpy, cupynumeric - return getattr(numpy, attr), getattr(cunumeric, attr) + return getattr(numpy, attr), getattr(cupynumeric, attr) def generate_section(config: SectionConfig) -> SectionDetail: diff --git a/cunumeric/_sphinxext/_cunumeric_directive.py b/cupynumeric/_sphinxext/_cupynumeric_directive.py similarity index 86% rename from cunumeric/_sphinxext/_cunumeric_directive.py rename to cupynumeric/_sphinxext/_cupynumeric_directive.py index ef6402f6c..593d25b24 100644 --- a/cunumeric/_sphinxext/_cunumeric_directive.py +++ b/cupynumeric/_sphinxext/_cupynumeric_directive.py @@ -1,4 +1,4 @@ -# Copyright 2022 NVIDIA Corporation +# Copyright 2024 NVIDIA Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -15,14 +15,14 @@ from __future__ import annotations from docutils import nodes -from docutils.statemachine import ViewList +from docutils.statemachine import StringList from sphinx.util.docutils import SphinxDirective from sphinx.util.nodes import nested_parse_with_titles -class CunumericDirective(SphinxDirective): +class CupynumericDirective(SphinxDirective): def parse(self, rst_text: str, annotation: str) -> list[nodes.Node]: - result = ViewList() + result = StringList() for line in rst_text.split("\n"): result.append(line, annotation) node = nodes.paragraph() diff --git a/cunumeric/_sphinxext/_templates.py b/cupynumeric/_sphinxext/_templates.py similarity index 95% rename from cunumeric/_sphinxext/_templates.py rename to cupynumeric/_sphinxext/_templates.py index e19316ee4..39aa3a002 100644 --- a/cunumeric/_sphinxext/_templates.py +++ b/cupynumeric/_sphinxext/_templates.py @@ -1,4 +1,4 @@ -# Copyright 2022 NVIDIA Corporation +# Copyright 2024 NVIDIA Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/cunumeric/_sphinxext/_templates/comparison_table.rst b/cupynumeric/_sphinxext/_templates/comparison_table.rst similarity index 69% rename from cunumeric/_sphinxext/_templates/comparison_table.rst rename to cupynumeric/_sphinxext/_templates/comparison_table.rst index 3a4211100..55d1d583f 100644 --- a/cunumeric/_sphinxext/_templates/comparison_table.rst +++ b/cupynumeric/_sphinxext/_templates/comparison_table.rst @@ -3,13 +3,13 @@ {{ section.title }} {{ "~" * section.title|length }} -.. currentmodule:: cunumeric +.. currentmodule:: cupynumeric .. autosummary:: :toctree: generated/ .. csv-table:: - :header: NumPy, cunumeric, single-GPU/CPU, multi-GPU/CPU + :header: NumPy, cupynumeric, single-GPU/CPU, multi-GPU/CPU {% for item in section.items -%} {{ item.np_ref }}, {{ item.lg_ref }}, {{ item.single }}, {{ item.multi }} @@ -19,6 +19,6 @@ Number of NumPy functions: {{ section.np_count }} -Number of functions covered by cunumeric: {{ section.lg_count }} +Number of functions covered by cupynumeric: {{ section.lg_count }} {% endfor %} \ No newline at end of file diff --git a/cunumeric/_sphinxext/comparison_table.py b/cupynumeric/_sphinxext/comparison_table.py similarity index 92% rename from cunumeric/_sphinxext/comparison_table.py rename to cupynumeric/_sphinxext/comparison_table.py index f37a14229..baa62a53d 100644 --- a/cunumeric/_sphinxext/comparison_table.py +++ b/cupynumeric/_sphinxext/comparison_table.py @@ -1,4 +1,4 @@ -# Copyright 2022 NVIDIA Corporation +# Copyright 2024 NVIDIA Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -22,13 +22,13 @@ from . import PARALLEL_SAFE, SphinxParallelSpec from ._comparison_config import GROUPED_CONFIGS, NUMPY_CONFIGS from ._comparison_util import generate_section -from ._cunumeric_directive import CunumericDirective +from ._cupynumeric_directive import CupynumericDirective from ._templates import COMPARISON_TABLE log = getLogger(__name__) -class ComparisonTable(CunumericDirective): +class ComparisonTable(CupynumericDirective): has_content = False required_arguments = 0 optional_arguments = 1 diff --git a/cunumeric/_sphinxext/implemented_index.py b/cupynumeric/_sphinxext/implemented_index.py similarity index 86% rename from cunumeric/_sphinxext/implemented_index.py rename to cupynumeric/_sphinxext/implemented_index.py index 3d70f763b..f0e9598bc 100644 --- a/cunumeric/_sphinxext/implemented_index.py +++ b/cupynumeric/_sphinxext/implemented_index.py @@ -1,4 +1,4 @@ -# Copyright 2022 NVIDIA Corporation +# Copyright 2024 NVIDIA Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -20,11 +20,11 @@ from sphinx.application import Sphinx from sphinx.util.logging import getLogger -import cunumeric as cn +import cupynumeric as cn -from ..coverage import is_implemented +from .._utils.coverage import is_implemented from . import PARALLEL_SAFE, SphinxParallelSpec -from ._cunumeric_directive import CunumericDirective +from ._cupynumeric_directive import CupynumericDirective log = getLogger(__name__) @@ -45,7 +45,7 @@ def _filter(x: Any) -> bool: ) -class ImplementedIndex(CunumericDirective): +class ImplementedIndex(CupynumericDirective): has_content = False required_arguments = 0 optional_arguments = 0 @@ -59,7 +59,7 @@ def run(self) -> list[nodes.Node]: if _filter(x) ] refs += [ - f"* :obj:`cunumeric.ndarray.{x.__name__}`" + f"* :obj:`cupynumeric.ndarray.{x.__name__}`" for x in cn.ndarray.__dict__.values() if _filter(x) ] diff --git a/cunumeric/_sphinxext/missing_refs.py b/cupynumeric/_sphinxext/missing_refs.py similarity index 69% rename from cunumeric/_sphinxext/missing_refs.py rename to cupynumeric/_sphinxext/missing_refs.py index 94266deff..99938b80d 100644 --- a/cunumeric/_sphinxext/missing_refs.py +++ b/cupynumeric/_sphinxext/missing_refs.py @@ -1,4 +1,4 @@ -# Copyright 2022 NVIDIA Corporation +# Copyright 2024 NVIDIA Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -28,25 +28,25 @@ log = getLogger(__name__) SKIP = ( - "cunumeric.cast", - "cunumeric.ndarray.__array_function__", - "cunumeric.ndarray.__array_ufunc__", - "cunumeric.ndarray.__format__", - "cunumeric.ndarray.__hash__", - "cunumeric.ndarray.__iter__", - "cunumeric.ndarray.__radd__", - "cunumeric.ndarray.__rand__", - "cunumeric.ndarray.__rdivmod__", - "cunumeric.ndarray.__reduce_ex__", - "cunumeric.ndarray.__rfloordiv__", - "cunumeric.ndarray.__rmod__", - "cunumeric.ndarray.__rmul__", - "cunumeric.ndarray.__ror__", - "cunumeric.ndarray.__rpow__", - "cunumeric.ndarray.__rsub__", - "cunumeric.ndarray.__rtruediv__", - "cunumeric.ndarray.__rxor__", - "cunumeric.ndarray.__sizeof__", + "cupynumeric.cast", + "cupynumeric.ndarray.__array_function__", + "cupynumeric.ndarray.__array_ufunc__", + "cupynumeric.ndarray.__format__", + "cupynumeric.ndarray.__hash__", + "cupynumeric.ndarray.__iter__", + "cupynumeric.ndarray.__radd__", + "cupynumeric.ndarray.__rand__", + "cupynumeric.ndarray.__rdivmod__", + "cupynumeric.ndarray.__reduce_ex__", + "cupynumeric.ndarray.__rfloordiv__", + "cupynumeric.ndarray.__rmod__", + "cupynumeric.ndarray.__rmul__", + "cupynumeric.ndarray.__ror__", + "cupynumeric.ndarray.__rpow__", + "cupynumeric.ndarray.__rsub__", + "cupynumeric.ndarray.__rtruediv__", + "cupynumeric.ndarray.__rxor__", + "cupynumeric.ndarray.__sizeof__", ) MISSING: list[tuple[str, str]] = [] @@ -62,7 +62,7 @@ def run(self, **kwargs: Any) -> None: def _check_target(self, node: Any) -> None: target = node["reftarget"] - if not target.startswith("cunumeric.") or target in SKIP: + if not target.startswith("cupynumeric.") or target in SKIP: return domain = self.env.domains[node["refdomain"]] @@ -85,7 +85,7 @@ def _check_target(self, node: Any) -> None: if uri is None: loc = get_node_location(node) log.warning( - f"Cunumeric reference missing a target: {loc}: {target}", + f"cuPyNumeric reference missing a target: {loc}: {target}", type="ref", ) diff --git a/cunumeric/_sphinxext/ufunc_formatter.py b/cupynumeric/_sphinxext/ufunc_formatter.py similarity index 94% rename from cunumeric/_sphinxext/ufunc_formatter.py rename to cupynumeric/_sphinxext/ufunc_formatter.py index 60d8145ab..6f574d754 100644 --- a/cunumeric/_sphinxext/ufunc_formatter.py +++ b/cupynumeric/_sphinxext/ufunc_formatter.py @@ -1,4 +1,4 @@ -# Copyright 2022 NVIDIA Corporation +# Copyright 2024 NVIDIA Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -19,7 +19,7 @@ from sphinx.application import Sphinx from sphinx.ext.autodoc import FunctionDocumenter -from cunumeric import ufunc +from cupynumeric import ufunc from . import PARALLEL_SAFE, SphinxParallelSpec diff --git a/cupynumeric/_thunk/__init__.py b/cupynumeric/_thunk/__init__.py new file mode 100644 index 000000000..31d8d448c --- /dev/null +++ b/cupynumeric/_thunk/__init__.py @@ -0,0 +1,15 @@ +# Copyright 2024 NVIDIA Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +from __future__ import annotations diff --git a/cunumeric/sort.py b/cupynumeric/_thunk/_sort.py similarity index 72% rename from cunumeric/sort.py rename to cupynumeric/_thunk/_sort.py index a0503bf92..82ab73847 100644 --- a/cunumeric/sort.py +++ b/cupynumeric/_thunk/_sort.py @@ -1,4 +1,4 @@ -# Copyright 2022 NVIDIA Corporation +# Copyright 2024 NVIDIA Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -14,17 +14,21 @@ # from __future__ import annotations -from typing import TYPE_CHECKING, Union, cast +from typing import TYPE_CHECKING, cast -from legate.core import types as ty -from numpy.core.multiarray import ( # type: ignore [attr-defined] - normalize_axis_index, -) +from legate.core import get_legate_runtime, types as ty -from .config import CuNumericOpCode +from .._utils import is_np2 +from ..config import CuPyNumericOpCode +from ..runtime import runtime + +if is_np2: + from numpy.lib.array_utils import normalize_axis_index # type: ignore +else: + from numpy.core.multiarray import normalize_axis_index # type: ignore if TYPE_CHECKING: - from .deferred import DeferredArray + from .._thunk.deferred import DeferredArray def sort_flattened( @@ -35,11 +39,11 @@ def sort_flattened( # run sort flattened -- return 1D solution sort_result = cast( "DeferredArray", - output.runtime.create_empty_thunk( + runtime.create_empty_thunk( flattened.shape, dtype=output.base.type, inputs=(flattened,) ), ) - sort(sort_result, flattened, argsort, stable=stable) + sort_deferred(sort_result, flattened, argsort, stable=stable) output.base = sort_result.base output.numpy_array = None @@ -58,7 +62,7 @@ def sort_swapped( swapped_copy = cast( "DeferredArray", - output.runtime.create_empty_thunk( + runtime.create_empty_thunk( swapped.shape, dtype=input.base.type, inputs=(input, swapped) ), ) @@ -68,17 +72,17 @@ def sort_swapped( if argsort is True: sort_result = cast( "DeferredArray", - output.runtime.create_empty_thunk( + runtime.create_empty_thunk( swapped_copy.shape, dtype=output.base.type, inputs=(swapped_copy,), ), ) - sort(sort_result, swapped_copy, argsort, stable=stable) + sort_deferred(sort_result, swapped_copy, argsort, stable=stable) output.base = sort_result.swapaxes(input.ndim - 1, sort_axis).base output.numpy_array = None else: - sort(swapped_copy, swapped_copy, argsort, stable=stable) + sort_deferred(swapped_copy, swapped_copy, argsort, stable=stable) output.base = swapped_copy.swapaxes(input.ndim - 1, sort_axis).base output.numpy_array = None @@ -86,23 +90,24 @@ def sort_swapped( def sort_task( output: DeferredArray, input: DeferredArray, argsort: bool, stable: bool ) -> None: - task = output.context.create_auto_task(CuNumericOpCode.SORT) + legate_runtime = get_legate_runtime() + task = legate_runtime.create_auto_task( + output.library, CuPyNumericOpCode.SORT + ) - uses_unbound_output = output.runtime.num_procs > 1 and input.ndim == 1 + uses_unbound_output = runtime.num_procs > 1 and input.ndim == 1 task.add_input(input.base) if uses_unbound_output: - unbound = output.runtime.create_unbound_thunk( - dtype=output.base.type, ndim=1 - ) + unbound = runtime.create_unbound_thunk(dtype=output.base.type, ndim=1) task.add_output(unbound.base) else: task.add_output(output.base) task.add_alignment(output.base, input.base) - if output.runtime.num_gpus > 1: + if runtime.num_gpus > 1: task.add_nccl_communicator() - elif output.runtime.num_gpus == 0 and output.runtime.num_procs > 1: + elif runtime.num_gpus == 0 and runtime.num_procs > 1: task.add_cpu_communicator() task.add_scalar_arg(argsort, ty.bool_) # return indices flag @@ -115,11 +120,11 @@ def sort_task( output.numpy_array = None -def sort( +def sort_deferred( output: DeferredArray, input: DeferredArray, argsort: bool, - axis: Union[int, None] = -1, + axis: int | None = -1, stable: bool = False, ) -> None: if axis is None and input.ndim > 1: diff --git a/cunumeric/deferred.py b/cupynumeric/_thunk/deferred.py similarity index 75% rename from cunumeric/deferred.py rename to cupynumeric/_thunk/deferred.py index 9d9fa963d..b94a16aed 100644 --- a/cunumeric/deferred.py +++ b/cupynumeric/_thunk/deferred.py @@ -1,4 +1,4 @@ -# Copyright 2021-2023 NVIDIA Corporation +# Copyright 2024 NVIDIA Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -20,55 +20,75 @@ from enum import IntEnum, unique from functools import reduce, wraps from inspect import signature -from itertools import chain, product +from itertools import chain from typing import ( TYPE_CHECKING, Any, Callable, - Dict, - Optional, + ParamSpec, Sequence, TypeVar, - Union, cast, ) import legate.core.types as ty import numpy as np -from legate.core import Annotation, Future, ReductionOp, Store -from legate.core.store import RegionField +from legate.core import ( + Annotation, + LogicalStore, + ReductionOpKind, + Scalar, + align, + bloat, + broadcast, + constant, + dimension, + get_legate_runtime, + scale, +) from legate.core.utils import OrderedSet -from numpy.core.numeric import ( # type: ignore [attr-defined] - normalize_axis_tuple, +from legate.settings import settings as legate_settings + +from .._utils import is_np2 +from .._utils.array import ( + is_advanced_indexing, + max_identity, + min_identity, + to_core_type, ) -from typing_extensions import ParamSpec - -from .config import ( +from ..config import ( BinaryOpCode, BitGeneratorDistribution, BitGeneratorOperation, Bitorder, ConvertCode, - CuNumericOpCode, + ConvolveMethod, + CuPyNumericOpCode, RandGenCode, UnaryOpCode, UnaryRedCode, ) -from .linalg.cholesky import cholesky -from .linalg.solve import solve -from .sort import sort +from ..linalg._cholesky import cholesky_deferred +from ..linalg._qr import qr_deferred +from ..linalg._solve import solve_deferred +from ..linalg._svd import svd_deferred +from ..runtime import runtime +from ._sort import sort_deferred from .thunk import NumPyThunk -from .utils import is_advanced_indexing, to_core_dtype + +if is_np2: + from numpy.lib.array_utils import normalize_axis_tuple # type: ignore +else: + from numpy.core.numeric import normalize_axis_tuple # type: ignore if TYPE_CHECKING: import numpy.typing as npt - from legate.core import FieldID, Region - from legate.core.operation import AutoTask, ManualTask + from legate.core import LogicalStorePartition - from .config import BitGeneratorType, FFTDirection, FFTType, WindowOpCode - from .runtime import Runtime - from .types import ( + from ..config import BitGeneratorType, FFTDirection, FFTType, WindowOpCode + from ..types import ( BitOrder, + ConvolveMethod as ConvolveMethodType, ConvolveMode, NdShape, OrderType, @@ -91,12 +111,17 @@ def _prod(tpl: Sequence[int]) -> int: R = TypeVar("R") P = ParamSpec("P") +legate_runtime = get_legate_runtime() + def auto_convert( *thunk_params: str, ) -> Callable[[Callable[P, R]], Callable[P, R]]: """ Converts all named parameters to DeferredArrays. + + This function makes an immutable copy of any parameter that wasn't already + a DeferredArray. """ keys = OrderedSet(thunk_params) assert len(keys) == len(thunk_params) @@ -116,16 +141,15 @@ def decorator(func: Callable[P, R]) -> Callable[P, R]: @wraps(func) def wrapper(*args: Any, **kwargs: Any) -> R: # Convert relevant arguments to DeferredArrays - self = args[0] args = tuple( - self.runtime.to_deferred_array(arg) + runtime.to_deferred_array(arg, read_only=True) if idx in indices and arg is not None else arg for (idx, arg) in enumerate(args) ) for k, v in kwargs.items(): if k in keys and v is not None: - kwargs[k] = self.runtime.to_deferred_array(v) + kwargs[k] = runtime.to_deferred_array(v, read_only=True) return func(*args, **kwargs) @@ -134,83 +158,29 @@ def wrapper(*args: Any, **kwargs: Any) -> R: return decorator -# This is a dummy object that is only used as an initializer for the -# RegionField object above. It is thrown away as soon as the -# RegionField is constructed. -class _CuNumericNDarray(object): - __slots__ = ["__array_interface__"] - - def __init__( - self, - shape: NdShape, - field_type: Any, - base_ptr: Any, - strides: tuple[int, ...], - read_only: bool, - ) -> None: - # See: https://docs.scipy.org/doc/numpy/reference/arrays.interface.html - self.__array_interface__ = { - "version": 3, - "shape": shape, - "typestr": field_type.str, - "data": (base_ptr, read_only), - "strides": strides, - } - - -_UNARY_RED_TO_REDUCTION_OPS: Dict[int, int] = { - UnaryRedCode.SUM: ReductionOp.ADD, - UnaryRedCode.SUM_SQUARES: ReductionOp.ADD, - UnaryRedCode.VARIANCE: ReductionOp.ADD, - UnaryRedCode.PROD: ReductionOp.MUL, - UnaryRedCode.MAX: ReductionOp.MAX, - UnaryRedCode.MIN: ReductionOp.MIN, - UnaryRedCode.ARGMAX: ReductionOp.MAX, - UnaryRedCode.ARGMIN: ReductionOp.MIN, - UnaryRedCode.NANARGMAX: ReductionOp.MAX, - UnaryRedCode.NANARGMIN: ReductionOp.MIN, - UnaryRedCode.NANMAX: ReductionOp.MAX, - UnaryRedCode.NANMIN: ReductionOp.MIN, - UnaryRedCode.NANPROD: ReductionOp.MUL, - UnaryRedCode.NANSUM: ReductionOp.ADD, - UnaryRedCode.CONTAINS: ReductionOp.ADD, - UnaryRedCode.COUNT_NONZERO: ReductionOp.ADD, - UnaryRedCode.ALL: ReductionOp.MUL, - UnaryRedCode.ANY: ReductionOp.ADD, +_UNARY_RED_TO_REDUCTION_OPS: dict[int, int] = { + UnaryRedCode.SUM: ReductionOpKind.ADD, + UnaryRedCode.SUM_SQUARES: ReductionOpKind.ADD, + UnaryRedCode.VARIANCE: ReductionOpKind.ADD, + UnaryRedCode.PROD: ReductionOpKind.MUL, + UnaryRedCode.MAX: ReductionOpKind.MAX, + UnaryRedCode.MIN: ReductionOpKind.MIN, + UnaryRedCode.ARGMAX: ReductionOpKind.MAX, + UnaryRedCode.ARGMIN: ReductionOpKind.MIN, + UnaryRedCode.NANARGMAX: ReductionOpKind.MAX, + UnaryRedCode.NANARGMIN: ReductionOpKind.MIN, + UnaryRedCode.NANMAX: ReductionOpKind.MAX, + UnaryRedCode.NANMIN: ReductionOpKind.MIN, + UnaryRedCode.NANPROD: ReductionOpKind.MUL, + UnaryRedCode.NANSUM: ReductionOpKind.ADD, + UnaryRedCode.CONTAINS: ReductionOpKind.ADD, + UnaryRedCode.COUNT_NONZERO: ReductionOpKind.ADD, + UnaryRedCode.ALL: ReductionOpKind.MUL, + UnaryRedCode.ANY: ReductionOpKind.ADD, } -def max_identity( - ty: np.dtype[Any], -) -> Union[int, np.floating[Any], bool, np.complexfloating[Any, Any]]: - if ty.kind == "i" or ty.kind == "u": - return np.iinfo(ty).min - elif ty.kind == "f": - return np.finfo(ty).min - elif ty.kind == "c": - return np.finfo(np.float64).min + np.finfo(np.float64).min * 1j - elif ty.kind == "b": - return False - else: - raise ValueError(f"Unsupported dtype: {ty}") - - -def min_identity( - ty: np.dtype[Any], -) -> Union[int, np.floating[Any], bool, np.complexfloating[Any, Any]]: - if ty.kind == "i" or ty.kind == "u": - return np.iinfo(ty).max - elif ty.kind == "f": - return np.finfo(ty).max - elif ty.kind == "c": - return np.finfo(np.float64).max + np.finfo(np.float64).max * 1j - elif ty.kind == "b": - return True - else: - raise ValueError(f"Unsupported dtype: {ty}") - - -_UNARY_RED_IDENTITIES: Dict[UnaryRedCode, Callable[[Any], Any]] = { +_UNARY_RED_IDENTITIES: dict[UnaryRedCode, Callable[[Any], Any]] = { UnaryRedCode.SUM: lambda _: 0, UnaryRedCode.SUM_SQUARES: lambda _: 0, UnaryRedCode.VARIANCE: lambda _: 0, @@ -255,14 +225,13 @@ class DeferredArray(NumPyThunk): def __init__( self, - runtime: Runtime, - base: Store, - numpy_array: Optional[npt.NDArray[Any]] = None, + base: LogicalStore, + numpy_array: npt.NDArray[Any] | None = None, ) -> None: - super().__init__(runtime, base.type.to_numpy_dtype()) + super().__init__(base.type.to_numpy_dtype()) assert base is not None - assert isinstance(base, Store) - self.base = base # a Legate Store + assert isinstance(base, LogicalStore) + self.base: LogicalStore = base # a Legate Store self.numpy_array = ( None if numpy_array is None else weakref.ref(numpy_array) ) @@ -270,16 +239,6 @@ def __init__( def __str__(self) -> str: return f"DeferredArray(base: {self.base})" - @property - def storage(self) -> Union[Future, tuple[Region, Union[int, FieldID]]]: - storage = self.base.storage - if self.base.kind == Future: - assert isinstance(storage, Future) - return storage - else: - assert isinstance(storage, RegionField) - return (storage.region, storage.field.field_id) - @property def shape(self) -> NdShape: return tuple(self.base.shape) @@ -293,7 +252,7 @@ def _copy_if_overlapping(self, other: DeferredArray) -> DeferredArray: return self copy = cast( DeferredArray, - self.runtime.create_empty_thunk( + runtime.create_empty_thunk( self.shape, self.base.type, inputs=[self], @@ -302,6 +261,13 @@ def _copy_if_overlapping(self, other: DeferredArray) -> DeferredArray: copy.copy(self, deep=True) return copy + def _copy_if_partially_overlapping( + self, other: DeferredArray + ) -> DeferredArray: + if self.base.equal_storage(other.base): + return self + return self._copy_if_overlapping(other) + def __numpy_array__(self) -> npt.NDArray[Any]: if self.numpy_array is not None: result = self.numpy_array() @@ -312,34 +278,13 @@ def __numpy_array__(self) -> npt.NDArray[Any]: # and type return np.empty(shape=self.shape, dtype=self.dtype) - if self.scalar: - result = np.full( - self.shape, - self.get_scalar_array(), - dtype=self.dtype, - ) - else: - alloc = self.base.get_inline_allocation() - - def construct_ndarray( - shape: NdShape, address: Any, strides: tuple[int, ...] - ) -> npt.NDArray[Any]: - initializer = _CuNumericNDarray( - shape, self.dtype, address, strides, False - ) - result = np.asarray(initializer) - if self.shape == (): - result = result.reshape(()) - return result - - result = cast("npt.NDArray[Any]", alloc.consume(construct_ndarray)) - - self.numpy_array = weakref.ref(result) - return result + return np.asarray( + self.base.get_physical_store().get_inline_allocation() + ) # TODO: We should return a view of the field instead of a copy def imag(self) -> NumPyThunk: - result = self.runtime.create_empty_thunk( + result = runtime.create_empty_thunk( self.shape, dtype=_COMPLEX_FIELD_DTYPES[self.base.type], inputs=[self], @@ -349,14 +294,13 @@ def imag(self) -> NumPyThunk: UnaryOpCode.IMAG, self, True, - [], ) return result # TODO: We should return a view of the field instead of a copy def real(self) -> NumPyThunk: - result = self.runtime.create_empty_thunk( + result = runtime.create_empty_thunk( self.shape, dtype=_COMPLEX_FIELD_DTYPES[self.base.type], inputs=[self], @@ -366,13 +310,12 @@ def real(self) -> NumPyThunk: UnaryOpCode.REAL, self, True, - [], ) return result def conj(self) -> NumPyThunk: - result = self.runtime.create_empty_thunk( + result = runtime.create_empty_thunk( self.shape, dtype=self.base.type, inputs=[self], @@ -382,7 +325,6 @@ def conj(self) -> NumPyThunk: UnaryOpCode.CONJ, self, True, - [], ) return result @@ -391,25 +333,17 @@ def conj(self) -> NumPyThunk: @auto_convert("rhs") def copy(self, rhs: Any, deep: bool = False) -> None: if self.scalar and rhs.scalar: - self.base.set_storage(rhs.base.storage) + legate_runtime.issue_fill(self.base, rhs.base) return self.unary_op( UnaryOpCode.COPY, rhs, True, - [], ) @property def scalar(self) -> bool: - return self.base.scalar - - def get_scalar_array(self) -> npt.NDArray[Any]: - assert self.scalar - assert isinstance(self.base.storage, Future) - buf = self.base.storage.get_buffer(self.dtype.itemsize) - result = np.frombuffer(buf, dtype=self.dtype, count=1) - return result.reshape(()) + return self.base.has_scalar_storage and self.base.size == 1 def _zip_indices( self, start_index: int, arrays: tuple[Any, ...] @@ -424,7 +358,7 @@ def _zip_indices( new_arrays: tuple[Any, ...] = tuple() # check array's type and convert them to deferred arrays for a in arrays: - a = self.runtime.to_deferred_array(a) + a = runtime.to_deferred_array(a, read_only=True) data_type = a.dtype if data_type != np.int64: raise TypeError("index arrays should be int64 type") @@ -434,7 +368,7 @@ def _zip_indices( # find a broadcasted shape for all arrays passed as indices shapes = tuple(a.shape for a in arrays) if len(arrays) > 1: - from .module import broadcast_shapes + from .._module import broadcast_shapes b_shape = broadcast_shapes(*shapes) else: @@ -485,10 +419,10 @@ def _zip_indices( # dtype, to store N-dimensional index points, to be used as the # indirection field in a copy. N = self.ndim - pointN_dtype = self.runtime.get_point_type(N) + pointN_dtype = ty.point_type(N) output_arr = cast( DeferredArray, - self.runtime.create_empty_thunk( + runtime.create_empty_thunk( shape=out_shape, dtype=pointN_dtype, inputs=[self], @@ -496,26 +430,25 @@ def _zip_indices( ) # call ZIP function to combine index arrays into a singe array - task = self.context.create_auto_task(CuNumericOpCode.ZIP) + task = legate_runtime.create_auto_task( + self.library, CuPyNumericOpCode.ZIP + ) task.throws_exception(IndexError) - task.add_output(output_arr.base) + p_out = task.add_output(output_arr.base) task.add_scalar_arg(self.ndim, ty.int64) # N of points in Point task.add_scalar_arg(key_dim, ty.int64) # key_dim task.add_scalar_arg(start_index, ty.int64) # start_index task.add_scalar_arg(self.shape, (ty.int64,)) for a in arrays: - task.add_input(a) - task.add_alignment(output_arr.base, a) + p_in = task.add_input(a) + task.add_constraint(align(p_out, p_in)) task.execute() return output_arr def _copy_store(self, store: Any) -> DeferredArray: - store_to_copy = DeferredArray( - self.runtime, - base=store, - ) - store_copy = self.runtime.create_empty_thunk( + store_to_copy = DeferredArray(base=store) + store_copy = runtime.create_empty_thunk( store_to_copy.shape, self.base.type, inputs=[store_to_copy], @@ -524,7 +457,9 @@ def _copy_store(self, store: Any) -> DeferredArray: return cast(DeferredArray, store_copy) @staticmethod - def _slice_store(k: slice, store: Store, dim: int) -> tuple[slice, Store]: + def _slice_store( + k: slice, store: LogicalStore, dim: int + ) -> tuple[slice, LogicalStore]: start = k.start end = k.stop step = k.step @@ -612,7 +547,7 @@ def _has_single_boolean_array( shift = 0 store = lhs.base for dim, k in enumerate(new_key): - if np.isscalar(k): + if isinstance(k, int): if k < 0: # type: ignore [operator] k += store.shape[dim + key_dim + shift] store = store.project(dim + key_dim + shift, k) @@ -628,7 +563,7 @@ def _has_single_boolean_array( "Unsupported entry type passed to advanced ", "indexing operation", ) - lhs = DeferredArray(self.runtime, store) + lhs = DeferredArray(store) return True, lhs, key[transpose_index] @@ -640,11 +575,11 @@ def _advanced_indexing_with_boolean_array( self, key: Any, is_set: bool = False, - set_value: Optional[Any] = None, + set_value: Any | None = None, ) -> tuple[bool, Any, Any, Any]: rhs = self if not isinstance(key, DeferredArray): - key = self.runtime.to_deferred_array(key) + key = runtime.to_deferred_array(key, read_only=True) # in case when boolean array is passed as an index, shape for all # its dimensions should be the same as the shape of @@ -673,7 +608,7 @@ def _advanced_indexing_with_boolean_array( out = cast( DeferredArray, - self.runtime.create_empty_thunk( + runtime.create_empty_thunk( out_shape, rhs.base.type, inputs=[rhs], @@ -693,10 +628,7 @@ def _advanced_indexing_with_boolean_array( # and avoid calling Copy has_set_value = set_value is not None and set_value.size == 1 if has_set_value: - mask = DeferredArray( - self.runtime, - base=key_store, - ) + mask = DeferredArray(base=key_store) rhs.putmask(mask, set_value) return False, rhs, rhs, self else: @@ -706,26 +638,26 @@ def _advanced_indexing_with_boolean_array( # indirect copy operation if is_set: N = rhs.ndim - out_dtype = rhs.runtime.get_point_type(N) + out_dtype = ty.point_type(N) # TODO : current implementation of the ND output regions # requires out.ndim == rhs.ndim. This will be fixed in the # future - out = rhs.runtime.create_unbound_thunk(out_dtype, ndim=rhs.ndim) + out = runtime.create_unbound_thunk(out_dtype, ndim=rhs.ndim) key_dims = key.ndim # dimension of the original key - task = rhs.context.create_auto_task( - CuNumericOpCode.ADVANCED_INDEXING + task = legate_runtime.create_auto_task( + self.library, + CuPyNumericOpCode.ADVANCED_INDEXING, ) task.add_output(out.base) - task.add_input(rhs.base) - task.add_input(key_store) + p_rhs = task.add_input(rhs.base) + p_key = task.add_input(key_store) task.add_scalar_arg(is_set, ty.bool_) task.add_scalar_arg(key_dims, ty.int64) - task.add_alignment(rhs.base, key_store) - task.add_broadcast( - rhs.base, axes=tuple(range(1, len(rhs.base.shape))) - ) + task.add_constraint(align(p_rhs, p_key)) + if rhs.base.ndim > 1: + task.add_constraint(broadcast(p_rhs, range(1, rhs.base.ndim))) task.execute() # TODO : current implementation of the ND output regions @@ -740,7 +672,7 @@ def _advanced_indexing_with_boolean_array( out_shape = tuple(out.shape[i] for i in range(0, out_dim)) out = cast( DeferredArray, - self.runtime.create_empty_thunk( + runtime.create_empty_thunk( out_shape, out_dtype, inputs=[out], @@ -759,7 +691,7 @@ def _create_indexing_array( self, key: Any, is_set: bool = False, - set_value: Optional[Any] = None, + set_value: Any | None = None, ) -> tuple[bool, Any, Any, Any]: is_bool_array, lhs, bool_key = self._has_single_boolean_array( key, is_set @@ -843,8 +775,8 @@ def _create_indexing_array( elif isinstance(k, slice): k, store = self._slice_store(k, store, dim + shift) elif isinstance(k, NumPyThunk): - if not isinstance(computed_key, DeferredArray): - k = self.runtime.to_deferred_array(k) + if not isinstance(k, DeferredArray): + k = runtime.to_deferred_array(k, read_only=True) if k.dtype == bool: for i in range(k.ndim): if k.shape[i] != store.shape[dim + i + shift]: @@ -869,7 +801,7 @@ def _create_indexing_array( # to apply all the transformations done to `store` to `self` # as well before creating a copy if is_set: - self = DeferredArray(self.runtime, store) + self = DeferredArray(store) # after store is transformed we need to to return a copy of # the store since Copy operation can't be done on # the store with transformation @@ -918,10 +850,7 @@ def _get_view(self, key: Any) -> DeferredArray: else: assert False - return DeferredArray( - self.runtime, - base=store, - ) + return DeferredArray(base=store) def _broadcast(self, shape: NdShape) -> Any: result = self.base @@ -947,15 +876,12 @@ def _convert_future_to_regionfield( shape: NdShape = (1,) else: shape = self.shape - store = self.context.create_store( + store = legate_runtime.create_store( self.base.type, shape=shape, optimize_scalar=False, ) - thunk_copy = DeferredArray( - self.runtime, - base=store, - ) + thunk_copy = DeferredArray(base=store) thunk_copy.copy(self, deep=True) return thunk_copy @@ -971,34 +897,28 @@ def get_item(self, key: Any) -> NumPyThunk: ) = self._create_indexing_array(key) if copy_needed: - if rhs.base.kind == Future: + if rhs.base.has_scalar_storage: rhs = rhs._convert_future_to_regionfield() result: NumPyThunk - if index_array.base.kind == Future: + if index_array.base.has_scalar_storage: index_array = index_array._convert_future_to_regionfield() - result_store = self.context.create_store( + result_store = legate_runtime.create_store( self.base.type, shape=index_array.shape, optimize_scalar=False, ) - result = DeferredArray( - self.runtime, - base=result_store, - ) + result = DeferredArray(base=result_store) else: - result = self.runtime.create_empty_thunk( + result = runtime.create_empty_thunk( index_array.base.shape, self.base.type, inputs=[self], ) - copy = self.context.create_copy() - copy.set_source_indirect_out_of_range(False) - copy.add_input(rhs.base) - copy.add_source_indirect(index_array.base) - copy.add_output(result.base) # type: ignore - copy.execute() + legate_runtime.issue_gather( + result.base, rhs.base, index_array.base # type: ignore + ) else: return index_array @@ -1008,11 +928,13 @@ def get_item(self, key: Any) -> NumPyThunk: if result.shape == (): input = result - result = self.runtime.create_empty_thunk( + result = runtime.create_empty_thunk( (), self.base.type, inputs=[self] ) - task = self.context.create_auto_task(CuNumericOpCode.READ) + task = legate_runtime.create_auto_task( + self.library, CuPyNumericOpCode.READ + ) task.add_input(input.base) task.add_output(result.base) # type: ignore @@ -1023,8 +945,12 @@ def get_item(self, key: Any) -> NumPyThunk: @auto_convert("rhs") def set_item(self, key: Any, rhs: Any) -> None: assert self.dtype == rhs.dtype + # Check to see if this is advanced indexing or not if is_advanced_indexing(key): + # copy if a self-copy might overlap + rhs = rhs._copy_if_overlapping(self) + # Create the indexing array ( copy_needed, @@ -1048,29 +974,22 @@ def set_item(self, key: Any, rhs: Any) -> None: # the case when rhs is a scalar and indices array contains # a single value # TODO this logic should be removed when copy accepts Futures - if rhs_store.kind == Future: - rhs_tmp = DeferredArray( - self.runtime, - base=rhs_store, - ) + if rhs_store.has_scalar_storage: + rhs_tmp = DeferredArray(base=rhs_store) rhs_tmp2 = rhs_tmp._convert_future_to_regionfield() rhs_store = rhs_tmp2.base - if index_array.base.kind == Future: + if index_array.base.has_scalar_storage: index_array = index_array._convert_future_to_regionfield() - if lhs.base.kind == Future: + if lhs.base.has_scalar_storage: lhs = lhs._convert_future_to_regionfield() if lhs.base.transformed: lhs = lhs._copy_store(lhs.base) if index_array.size != 0: - copy = self.context.create_copy() - copy.set_target_indirect_out_of_range(False) - - copy.add_input(rhs_store) - copy.add_target_indirect(index_array.base) - copy.add_output(lhs.base) - copy.execute() + legate_runtime.issue_scatter( + lhs.base, index_array.base, rhs_store + ) # TODO this copy will be removed when affine copies are # supported in Legion/Realm @@ -1084,7 +1003,9 @@ def set_item(self, key: Any, rhs: Any) -> None: # We're just writing a single value assert rhs.size == 1 - task = self.context.create_auto_task(CuNumericOpCode.WRITE) + task = legate_runtime.create_auto_task( + self.library, CuPyNumericOpCode.WRITE + ) # Since we pass the view with write discard privilege, # we should make sure that the mapper either creates a fresh # instance just for this one-element view or picks one of the @@ -1096,27 +1017,22 @@ def set_item(self, key: Any, rhs: Any) -> None: # In Python, any inplace update of form arr[key] op= value # goes through three steps: 1) __getitem__ fetching the object # for the key, 2) __iop__ for the update, and 3) __setitem__ - # to set the result back. In cuNumeric, the object we + # to set the result back. In cuPyNumeric, the object we # return in step (1) is actually a subview to the array arr # through which we make updates in place, so after step (2) is # done, the effect of inplace update is already reflected # to the arr. Therefore, we skip the copy to avoid redundant # copies if we know that we hit such a scenario. - # TODO: We should make this work for the advanced indexing case # NOTE: Neither Store nor Storage have an __eq__, so we can # only check that the underlying RegionField/Future corresponds # to the same Legion handle. - if ( - view.base.has_storage - and rhs.base.has_storage - and view.base.storage.same_handle(rhs.base.storage) - ): + if view.base.equal_storage(rhs.base): return view.copy(rhs, deep=False) def broadcast_to(self, shape: NdShape) -> NumPyThunk: - return DeferredArray(self.runtime, base=self._broadcast(shape)) + return DeferredArray(base=self._broadcast(shape)) def reshape(self, newshape: NdShape, order: OrderType) -> NumPyThunk: assert isinstance(newshape, Iterable) @@ -1125,8 +1041,8 @@ def reshape(self, newshape: NdShape, order: OrderType) -> NumPyThunk: if order != "C": # If we don't have a transform then we need to make a copy - self.runtime.warn( - "cuNumeric has not implemented reshape using Fortran-like " + runtime.warn( + "cuPyNumeric has not implemented reshape using Fortran-like " "index order and is falling back to canonical numpy. You may " "notice significantly decreased performance for this " "function call.", @@ -1135,9 +1051,9 @@ def reshape(self, newshape: NdShape, order: OrderType) -> NumPyThunk: numpy_array = self.__numpy_array__() # Force a copy here because we know we can't build a view result_array = numpy_array.reshape(newshape, order=order).copy() - result = self.runtime.get_numpy_thunk(result_array) + result = runtime.get_numpy_thunk(result_array) - return self.runtime.to_deferred_array(result) + return runtime.to_deferred_array(result, read_only=True) if self.shape == newshape: return self @@ -1231,7 +1147,7 @@ def reshape(self, newshape: NdShape, order: OrderType) -> NumPyThunk: else: tmp_shape += tgt_g - result = self.runtime.create_empty_thunk( + result = runtime.create_empty_thunk( tmp_shape, dtype=self.base.type, inputs=[self] ) @@ -1262,8 +1178,8 @@ def reshape(self, newshape: NdShape, order: OrderType) -> NumPyThunk: assert src.shape == tgt.shape - src_array = DeferredArray(self.runtime, src) - tgt_array = DeferredArray(self.runtime, tgt) + src_array = DeferredArray(src) + tgt_array = DeferredArray(tgt) tgt_array.copy(src_array, deep=True) if needs_delinearization and needs_linearization: @@ -1275,8 +1191,8 @@ def reshape(self, newshape: NdShape, order: OrderType) -> NumPyThunk: src_dim += len(tgt_g) assert src.shape == newshape - src_array = DeferredArray(self.runtime, src) - result = self.runtime.create_empty_thunk( + src_array = DeferredArray(src) + result = runtime.create_empty_thunk( newshape, dtype=self.base.type, inputs=[self] ) result.copy(src_array, deep=True) @@ -1301,13 +1217,11 @@ def reshape(self, newshape: NdShape, order: OrderType) -> NumPyThunk: src_dim += diff - result = DeferredArray(self.runtime, src) + result = DeferredArray(src) return result - def squeeze( - self, axis: Optional[Union[int, tuple[int, ...]]] - ) -> DeferredArray: + def squeeze(self, axis: int | tuple[int, ...] | None) -> DeferredArray: result = self.base if axis is None: shift = 0 @@ -1328,7 +1242,7 @@ def squeeze( ) if result is self.base: return self - return DeferredArray(self.runtime, result) + return DeferredArray(result) def swapaxes(self, axis1: int, axis2: int) -> DeferredArray: if self.size == 1 or axis1 == axis2: @@ -1340,8 +1254,7 @@ def swapaxes(self, axis1: int, axis2: int) -> DeferredArray: dims[axis1], dims[axis2] = dims[axis2], dims[axis1] result = self.base.transpose(tuple(dims)) - - return DeferredArray(self.runtime, result) + return DeferredArray(result) # Convert the source array to the destination array @auto_convert("rhs") @@ -1357,8 +1270,8 @@ def convert( assert lhs_array.dtype != rhs_array.dtype if warn: - self.runtime.warn( - "cuNumeric performing implicit type conversion from " + runtime.warn( + "cuPyNumeric performing implicit type conversion from " + str(rhs_array.dtype) + " to " + str(lhs_array.dtype), @@ -1368,50 +1281,45 @@ def convert( lhs = lhs_array.base rhs = rhs_array.base - task = self.context.create_auto_task(CuNumericOpCode.CONVERT) - task.add_output(lhs) - task.add_input(rhs) + task = legate_runtime.create_auto_task( + self.library, CuPyNumericOpCode.CONVERT + ) + p_lhs = task.add_output(lhs) + p_rhs = task.add_input(rhs) task.add_scalar_arg(nan_op, ty.int32) - task.add_alignment(lhs, rhs) + task.add_constraint(align(p_lhs, p_rhs)) task.execute() - if temporary: - lhs.set_linear() - - @auto_convert("v", "lhs") - def convolve(self, v: Any, lhs: Any, mode: ConvolveMode) -> None: - input = self.base - filter = v.base - out = lhs.base - - task = self.context.create_auto_task(CuNumericOpCode.CONVOLVE) - - offsets = (filter.shape + 1) // 2 - stencils: list[tuple[int, ...]] = [] - for offset in offsets: - stencils.append((-offset, 0, offset)) - stencils = list(product(*stencils)) - stencils.remove((0,) * self.ndim) - - p_out = task.declare_partition(out) - p_input = task.declare_partition(input) - p_stencils = [] - for _ in stencils: - p_stencils.append(task.declare_partition(input, complete=False)) - - task.add_output(out, partition=p_out) - task.add_input(filter) - task.add_input(input, partition=p_input) - for p_stencil in p_stencils: - task.add_input(input, partition=p_stencil) - task.add_scalar_arg(self.shape, (ty.int64,)) + @auto_convert("input", "filter") + def convolve( + self, + input: Any, + filter: Any, + mode: ConvolveMode, + method: ConvolveMethodType, + ) -> None: + if method != "auto" and runtime.num_gpus == 0: + runtime.warn(f"the method {method} is ignored on CPUs") + + task = legate_runtime.create_auto_task( + self.library, CuPyNumericOpCode.CONVOLVE + ) + + offsets = tuple((ext + 1) // 2 for ext in filter.shape) + + p_out = task.add_output(self.base) + p_filter = task.add_input(filter.base) + p_in = task.add_input(input.base) + p_halo = task.declare_partition() + task.add_input(input.base, p_halo) + task.add_scalar_arg(input.shape, (ty.int64,)) + task.add_scalar_arg(getattr(ConvolveMethod, method.upper()), ty.int32) - task.add_constraint(p_out == p_input) - for stencil, p_stencil in zip(stencils, p_stencils): - task.add_constraint(p_input + stencil <= p_stencil) # type: ignore - task.add_broadcast(filter) + task.add_constraint(align(p_out, p_in)) + task.add_constraint(bloat(p_out, p_halo, offsets, offsets)) + task.add_constraint(broadcast(p_filter)) task.execute() @@ -1425,21 +1333,23 @@ def fft( ) -> None: lhs = self # For now, deferred only supported with GPU, use eager / numpy for CPU - if self.runtime.num_gpus == 0: - lhs_eager = lhs.runtime.to_eager_array(lhs) - rhs_eager = rhs.runtime.to_eager_array(rhs) + if runtime.num_gpus == 0: + lhs_eager = runtime.to_eager_array(lhs) + rhs_eager = runtime.to_eager_array(rhs) lhs_eager.fft(rhs_eager, axes, kind, direction) - lhs.base = lhs.runtime.to_deferred_array(lhs_eager).base + lhs.base = runtime.to_deferred_array( + lhs_eager, read_only=True + ).base else: input = rhs.base output = lhs.base - task = self.context.create_auto_task(CuNumericOpCode.FFT) - p_output = task.declare_partition(output) - p_input = task.declare_partition(input) + task = legate_runtime.create_auto_task( + self.library, CuPyNumericOpCode.FFT + ) - task.add_output(output, partition=p_output) - task.add_input(input, partition=p_input) + p_output = task.add_output(output) + p_input = task.add_input(input) task.add_scalar_arg(kind.type_id, ty.int32) task.add_scalar_arg(direction.value, ty.int32) task.add_scalar_arg( @@ -1451,34 +1361,40 @@ def fft( for ax in axes: task.add_scalar_arg(ax, ty.int64) - if input.ndim > len(OrderedSet(axes)): - task.add_broadcast(input, axes=OrderedSet(axes)) + if input.shape == output.shape: + task.add_constraint(align(p_output, p_input)) + if input.ndim > len(OrderedSet(axes)): + task.add_constraint(broadcast(p_input, OrderedSet(axes))) + else: + task.add_constraint(broadcast(p_input)) else: - task.add_broadcast(input) - task.add_constraint(p_output == p_input) + # TODO: We need the relaxed alignment to avoid serializing the + # task here. Batched FFT was relying on the relaxed alignment. + task.add_constraint(broadcast(p_output)) + task.add_constraint(broadcast(p_input)) task.execute() - # Fill the cuNumeric array with the value in the numpy array - def _fill(self, value: Any) -> None: - assert value.scalar + # Fill the cuPyNumeric array with the value in the numpy array + def _fill(self, value: LogicalStore | Scalar) -> None: assert self.base is not None - if self.scalar: - # Handle the 0D case special - self.base.set_storage(value.storage) - elif self.dtype.kind != "V" and self.base.kind is not Future: - # Emit a Legion fill - self.context.issue_fill(self.base, value) + if not self.base.transformed: + # Emit a Legate fill + legate_runtime.issue_fill(self.base, value) else: + if isinstance(value, Scalar): + value = legate_runtime.create_store_from_scalar(value) + # Arg reductions would never fill transformed stores + assert self.dtype.kind != "V" # Perform the fill using a task # If this is a fill for an arg value, make sure to pass # the value dtype so that we get it packed correctly - argval = self.dtype.kind == "V" - task = self.context.create_auto_task(CuNumericOpCode.FILL) + task = legate_runtime.create_auto_task( + self.library, CuPyNumericOpCode.FILL + ) task.add_output(self.base) task.add_input(value) - task.add_scalar_arg(argval, ty.bool_) task.execute() def fill(self, numpy_array: Any) -> None: @@ -1489,11 +1405,7 @@ def fill(self, numpy_array: Any) -> None: # Have to copy the numpy array because this launch is asynchronous # and we need to make sure the application doesn't mutate the value # so make a future result, this is immediate so no dependence - value = self.runtime.create_scalar(numpy_array.data) - store = self.context.create_store( - self.base.type, shape=(1,), storage=value, optimize_scalar=True - ) - self._fill(store) + self._fill(Scalar(numpy_array.tobytes(), self.base.type)) @auto_convert("rhs1_thunk", "rhs2_thunk") def contract( @@ -1583,7 +1495,7 @@ def contract( # of tensor cores. In the general-purpose tensor contraction case # below the tasks do this adjustment internally. if blas_op is not None and lhs_thunk.dtype == np.float16: - lhs_thunk = self.runtime.create_empty_thunk( + lhs_thunk = runtime.create_empty_thunk( lhs_thunk.shape, ty.float32, inputs=[lhs_thunk] ) @@ -1598,19 +1510,22 @@ def contract( # The underlying libraries are not guaranteed to work with stride # values of 0. The frontend should therefore handle broadcasting # directly, instead of promoting stores. - assert not lhs.has_fake_dims() - assert not rhs1.has_fake_dims() - assert not rhs2.has_fake_dims() + # TODO: We need a better API for this + # assert not lhs.has_fake_dims() + # assert not rhs1.has_fake_dims() + # assert not rhs2.has_fake_dims() # Special cases where we can use BLAS if blas_op is not None: if blas_op == BlasOperation.VV: # Vector dot product - task = self.context.create_auto_task(CuNumericOpCode.DOT) - task.add_reduction(lhs, ReductionOp.ADD) - task.add_input(rhs1) - task.add_input(rhs2) - task.add_alignment(rhs1, rhs2) + task = legate_runtime.create_auto_task( + self.library, CuPyNumericOpCode.DOT + ) + task.add_reduction(lhs, ReductionOpKind.ADD) + p_rhs1 = task.add_input(rhs1) + p_rhs2 = task.add_input(rhs2) + task.add_constraint(align(p_rhs1, p_rhs2)) task.execute() elif blas_op == BlasOperation.MV: @@ -1629,12 +1544,14 @@ def contract( rhs2 = rhs2.promote(0, m) lhs = lhs.promote(1, n) - task = self.context.create_auto_task(CuNumericOpCode.MATVECMUL) - task.add_reduction(lhs, ReductionOp.ADD) - task.add_input(rhs1) - task.add_input(rhs2) - task.add_alignment(lhs, rhs1) - task.add_alignment(lhs, rhs2) + task = legate_runtime.create_auto_task( + self.library, CuPyNumericOpCode.MATVECMUL + ) + p_lhs = task.add_reduction(lhs, ReductionOpKind.ADD) + p_rhs1 = task.add_input(rhs1) + p_rhs2 = task.add_input(rhs2) + task.add_constraint(align(p_lhs, p_rhs1)) + task.add_constraint(align(p_lhs, p_rhs2)) task.execute() elif blas_op == BlasOperation.MM: @@ -1662,17 +1579,97 @@ def contract( assert m == rhs1.shape[0] assert n == rhs2.shape[1] assert k == rhs2.shape[0] - lhs = lhs.promote(1, k) - rhs1 = rhs1.promote(2, n) - rhs2 = rhs2.promote(0, m) - task = self.context.create_auto_task(CuNumericOpCode.MATMUL) - task.add_reduction(lhs, ReductionOp.ADD) - task.add_input(rhs1) - task.add_input(rhs2) - task.add_alignment(lhs, rhs1) - task.add_alignment(lhs, rhs2) - task.execute() + def rounding_divide( + lhs: tuple[int, ...], rhs: tuple[int, ...] + ) -> tuple[int, ...]: + return tuple( + (lh + rh - 1) // rh for (lh, rh) in zip(lhs, rhs) + ) + + # TODO: better heuristics + def choose_2d_color_shape( + shape: tuple[int, int] + ) -> tuple[int, int]: + # 1M elements, we should probably even go larger + MIN_MATRIX_SIZE = 1 << 20 + # If the matrix is too small don't partition it at all + if (not legate_settings.test()) and shape[0] * shape[ + 1 + ] <= MIN_MATRIX_SIZE: + return (1, 1) + + # start with 1D and re-balance by powers of 2 + # (don't worry about other primes) + color_shape = (runtime.num_procs, 1) + while ( + shape[0] / color_shape[0] + < 2 * shape[1] / color_shape[1] + and color_shape[0] % 2 == 0 + ): + color_shape = (color_shape[0] // 2, color_shape[1] * 2) + + return color_shape + + # TODO: better heuristics? + def choose_batchsize( + tilesize: tuple[int, int], k: int, itemsize: int + ) -> int: + # default corresponds to 128MB (to store A and B tile) + from ..settings import settings + + max_elements_per_tile = ( + settings.matmul_cache_size() // itemsize + ) + total_elements_rhs = (tilesize[0] + tilesize[1]) * k + num_batches = rounding_divide( + (total_elements_rhs,), (max_elements_per_tile,) + )[0] + batch_size = rounding_divide((k,), (num_batches,))[0] + + return batch_size + + # choose color-shape/k_batch_size + initial_color_shape = choose_2d_color_shape((m, n)) + tile_shape = rounding_divide((m, n), initial_color_shape) + color_shape = rounding_divide((m, n), tile_shape) + k_batch_size = choose_batchsize( + tile_shape, k, rhs1_thunk.dtype.itemsize # type: ignore + ) + k_color = rounding_divide((k,), (k_batch_size,)) + + # initial partition of lhs defined py tile-shape + tiled_lhs = lhs.partition_by_tiling(tile_shape) + tiled_rhs1 = rhs1.partition_by_tiling( + (tile_shape[0], k_batch_size) + ) + tiled_rhs2 = rhs2.partition_by_tiling( + (k_batch_size, tile_shape[1]) + ) + + def run_matmul_for_batch( + tiled_lhs: LogicalStorePartition, + tiled_rhs1: LogicalStorePartition, + tiled_rhs2: LogicalStorePartition, + i: int, + ) -> None: + manual_task = legate_runtime.create_manual_task( + self.library, CuPyNumericOpCode.MATMUL, color_shape + ) + + manual_task.add_output(tiled_lhs) + manual_task.add_input(tiled_lhs) + manual_task.add_input( + tiled_rhs1, (dimension(0), constant(i)) + ) + manual_task.add_input( + tiled_rhs2, (constant(i), dimension(1)) + ) + + manual_task.execute() + + for i in range(0, k_color[0]): + run_matmul_for_batch(tiled_lhs, tiled_rhs1, tiled_rhs2, i) else: assert False @@ -1693,8 +1690,8 @@ def contract( # Transpose arrays according to alphabetical order of mode labels def alphabetical_transpose( - store: Store, modes: Sequence[str] - ) -> Store: + store: LogicalStore, modes: Sequence[str] + ) -> LogicalStore: perm = tuple( dim for (_, dim) in sorted(zip(modes, range(len(modes)))) ) @@ -1712,7 +1709,7 @@ def alphabetical_transpose( extent = mode2extent[mode] def add_mode( - store: Store, modes: Sequence[str], dim_mask: list[bool] + store: LogicalStore, modes: Sequence[str], dim_mask: list[bool] ) -> Any: if mode not in modes: dim_mask.append(False) @@ -1728,37 +1725,41 @@ def add_mode( assert lhs.shape == rhs2.shape # Prepare the launch - task = self.context.create_auto_task(CuNumericOpCode.CONTRACT) - task.add_reduction(lhs, ReductionOp.ADD) - task.add_input(rhs1) - task.add_input(rhs2) + task = legate_runtime.create_auto_task( + self.library, CuPyNumericOpCode.CONTRACT + ) + p_lhs = task.add_reduction(lhs, ReductionOpKind.ADD) + p_rhs1 = task.add_input(rhs1) + p_rhs2 = task.add_input(rhs2) task.add_scalar_arg(tuple(lhs_dim_mask), (ty.bool_,)) task.add_scalar_arg(tuple(rhs1_dim_mask), (ty.bool_,)) task.add_scalar_arg(tuple(rhs2_dim_mask), (ty.bool_,)) - task.add_alignment(lhs, rhs1) - task.add_alignment(lhs, rhs2) + task.add_constraint(align(p_lhs, p_rhs1)) + task.add_constraint(align(p_lhs, p_rhs2)) task.execute() # Create array from input array and indices def choose(self, rhs: Any, *args: Any) -> None: # convert all arrays to deferred - index_arr = self.runtime.to_deferred_array(rhs) - ch_def = tuple(self.runtime.to_deferred_array(c) for c in args) + index_arr = runtime.to_deferred_array(rhs, read_only=True) + ch_def = tuple( + runtime.to_deferred_array(c, read_only=True) for c in args + ) out_arr = self.base # broadcast input array and all choices arrays to the same shape - index = index_arr._broadcast(out_arr.shape.extents) - ch_tuple = tuple(c._broadcast(out_arr.shape.extents) for c in ch_def) - - task = self.context.create_auto_task(CuNumericOpCode.CHOOSE) - task.add_output(out_arr) - task.add_input(index) - for c in ch_tuple: - task.add_input(c) + index = index_arr._broadcast(tuple(out_arr.shape)) + ch_tuple = tuple(c._broadcast(tuple(out_arr.shape)) for c in ch_def) - task.add_alignment(index, out_arr) + task = legate_runtime.create_auto_task( + self.library, CuPyNumericOpCode.CHOOSE + ) + p_out = task.add_output(out_arr) + p_ind = task.add_input(index) + task.add_constraint(align(p_ind, p_out)) for c in ch_tuple: - task.add_alignment(index, c) + p_c = task.add_input(c) + task.add_constraint(align(p_ind, p_c)) task.execute() def select( @@ -1767,19 +1768,23 @@ def select( choicelist: Iterable[Any], default: npt.NDArray[Any], ) -> None: - condlist_ = tuple(self.runtime.to_deferred_array(c) for c in condlist) + condlist_ = tuple( + runtime.to_deferred_array(c, read_only=True) for c in condlist + ) choicelist_ = tuple( - self.runtime.to_deferred_array(c) for c in choicelist + runtime.to_deferred_array(c, read_only=True) for c in choicelist ) - task = self.context.create_auto_task(CuNumericOpCode.SELECT) + task = legate_runtime.create_auto_task( + self.library, CuPyNumericOpCode.SELECT + ) out_arr = self.base task.add_output(out_arr) for c in chain(condlist_, choicelist_): c_arr = c._broadcast(self.shape) task.add_input(c_arr) task.add_alignment(c_arr, out_arr) - task.add_scalar_arg(default, to_core_dtype(default.dtype)) + task.add_scalar_arg(default, to_core_type(default.dtype)) task.execute() # Create or extract a diagonal from a matrix @@ -1835,17 +1840,19 @@ def _diag_helper( else: diag = diag.promote(0, matrix.shape[0]) - task = self.context.create_auto_task(CuNumericOpCode.DIAG) + task = legate_runtime.create_auto_task( + self.library, CuPyNumericOpCode.DIAG + ) if extract: - task.add_reduction(diag, ReductionOp.ADD) - task.add_input(matrix) - task.add_alignment(matrix, diag) + p_diag = task.add_reduction(diag, ReductionOpKind.ADD) + p_mat = task.add_input(matrix) + task.add_constraint(align(p_mat, p_diag)) else: - task.add_output(matrix) - task.add_input(diag) - task.add_input(matrix) - task.add_alignment(diag, matrix) + p_mat = task.add_output(matrix) + p_diag = task.add_input(diag) + task.add_input(matrix, p_mat) + task.add_constraint(align(p_diag, p_mat)) task.add_scalar_arg(naxes, ty.int32) task.add_scalar_arg(extract, ty.bool_) @@ -1854,15 +1861,15 @@ def _diag_helper( @auto_convert("indices", "values") def put(self, indices: Any, values: Any, check_bounds: bool) -> None: - if indices.base.kind == Future or indices.base.transformed: - change_shape = indices.base.kind == Future + if indices.base.has_scalar_storage or indices.base.transformed: + change_shape = indices.base.has_scalar_storage indices = indices._convert_future_to_regionfield(change_shape) - if values.base.kind == Future or values.base.transformed: - change_shape = values.base.kind == Future + if values.base.has_scalar_storage or values.base.transformed: + change_shape = values.base.has_scalar_storage values = values._convert_future_to_regionfield(change_shape) - if self.base.kind == Future or self.base.transformed: - change_shape = self.base.kind == Future + if self.base.has_scalar_storage or self.base.transformed: + change_shape = self.base.has_scalar_storage self_tmp = self._convert_future_to_regionfield(change_shape) else: self_tmp = self @@ -1876,10 +1883,10 @@ def put(self, indices: Any, values: Any, check_bounds: bool) -> None: # (indices.size,) shape and is used to copy data from values # to the target ND array (self) N = self_tmp.ndim - pointN_dtype = self.runtime.get_point_type(N) + pointN_dtype = ty.point_type(N) indirect = cast( DeferredArray, - self.runtime.create_empty_thunk( + runtime.create_empty_thunk( shape=indices.shape, dtype=pointN_dtype, inputs=[indices], @@ -1887,24 +1894,21 @@ def put(self, indices: Any, values: Any, check_bounds: bool) -> None: ) shape = self_tmp.shape - task = self.context.create_auto_task(CuNumericOpCode.WRAP) - task.add_output(indirect.base) + task = legate_runtime.create_auto_task( + self.library, CuPyNumericOpCode.WRAP + ) + p_indirect = task.add_output(indirect.base) task.add_scalar_arg(shape, (ty.int64,)) task.add_scalar_arg(True, ty.bool_) # has_input task.add_scalar_arg(check_bounds, ty.bool_) - task.add_input(indices.base) - task.add_alignment(indices.base, indirect.base) + p_indices = task.add_input(indices.base) + task.add_constraint(align(p_indices, p_indirect)) task.throws_exception(IndexError) task.execute() - if indirect.base.kind == Future: + if indirect.base.has_scalar_storage: indirect = indirect._convert_future_to_regionfield() - copy = self.context.create_copy() - copy.set_target_indirect_out_of_range(False) - copy.add_input(values.base) - copy.add_target_indirect(indirect.base) - copy.add_output(self_tmp.base) - copy.execute() + legate_runtime.issue_scatter(self_tmp.base, indirect.base, values.base) if self_tmp is not self: self.copy(self_tmp, deep=True) @@ -1912,19 +1916,20 @@ def put(self, indices: Any, values: Any, check_bounds: bool) -> None: @auto_convert("mask", "values") def putmask(self, mask: Any, values: Any) -> None: assert self.shape == mask.shape - values = values._copy_if_overlapping(self) + values = values._copy_if_partially_overlapping(self) if values.shape != self.shape: values_new = values._broadcast(self.shape) else: values_new = values.base - - task = self.context.create_auto_task(CuNumericOpCode.PUTMASK) - task.add_input(self.base) - task.add_input(mask.base) - task.add_input(values_new) - task.add_output(self.base) - task.add_alignment(self.base, mask.base) - task.add_alignment(self.base, values_new) + task = legate_runtime.create_auto_task( + self.library, CuPyNumericOpCode.PUTMASK + ) + p_self = task.add_input(self.base) + p_mask = task.add_input(mask.base) + p_values = task.add_input(values_new) + task.add_output(self.base, p_self) + task.add_constraint(align(p_self, p_mask)) + task.add_constraint(align(p_self, p_values)) task.execute() # Create an identity array with the ones offset from the diagonal by k @@ -1941,7 +1946,9 @@ def eye(self, k: int) -> None: # privilege, then, is not appropriate for this call, as it essentially # tells the runtime that it can throw away the previous contents of the # entire region. - task = self.context.create_auto_task(CuNumericOpCode.EYE) + task = legate_runtime.create_auto_task( + self.library, CuPyNumericOpCode.EYE + ) task.add_input(self.base) task.add_output(self.base) task.add_scalar_arg(k, ty.int32) @@ -1953,30 +1960,21 @@ def arange(self, start: float, stop: float, step: float) -> None: if self.scalar: # Handle the special case of a single value here assert self.shape[0] == 1 - array = np.array(start, dtype=self.dtype) - future = self.runtime.create_scalar(array.data) - self.base.set_storage(future) + legate_runtime.issue_fill(self.base, Scalar(start, self.base.type)) return - def create_scalar(value: Any, dtype: np.dtype[Any]) -> Any: - array = np.array(value, dtype) - return self.runtime.create_wrapped_scalar( - array.data, - array.dtype, - shape=(1,), - ).base - - task = self.context.create_auto_task(CuNumericOpCode.ARANGE) + task = legate_runtime.create_auto_task( + self.library, CuPyNumericOpCode.ARANGE + ) task.add_output(self.base) - task.add_input(create_scalar(start, self.dtype)) - task.add_input(create_scalar(stop, self.dtype)) - task.add_input(create_scalar(step, self.dtype)) + task.add_scalar_arg(start, self.base.type) + task.add_scalar_arg(step, self.base.type) task.execute() # Tile the src array onto the destination array @auto_convert("rhs") - def tile(self, rhs: Any, reps: Union[Any, Sequence[int]]) -> None: + def tile(self, rhs: Any, reps: Any | Sequence[int]) -> None: src_array = rhs dst_array = self assert src_array.ndim <= dst_array.ndim @@ -1985,36 +1983,40 @@ def tile(self, rhs: Any, reps: Union[Any, Sequence[int]]) -> None: self._fill(src_array.base) return - task = self.context.create_auto_task(CuNumericOpCode.TILE) + task = legate_runtime.create_auto_task( + self.library, CuPyNumericOpCode.TILE + ) task.add_output(self.base) - task.add_input(rhs.base) + p_rhs = task.add_input(rhs.base) - task.add_broadcast(rhs.base) + task.add_constraint(broadcast(p_rhs)) task.execute() # Transpose the matrix dimensions def transpose( - self, axes: Union[None, tuple[int, ...], list[int]] + self, axes: tuple[int, ...] | list[int] | None ) -> DeferredArray: computed_axes = tuple(axes) if axes is not None else () result = self.base.transpose(computed_axes) - return DeferredArray(self.runtime, result) + return DeferredArray(result) @auto_convert("rhs") def trilu(self, rhs: Any, k: int, lower: bool) -> None: lhs = self.base rhs = rhs._broadcast(lhs.shape) - task = self.context.create_auto_task(CuNumericOpCode.TRILU) + task = legate_runtime.create_auto_task( + self.library, CuPyNumericOpCode.TRILU + ) - task.add_output(lhs) - task.add_input(rhs) + p_lhs = task.add_output(lhs) + p_rhs = task.add_input(rhs) task.add_scalar_arg(lower, ty.bool_) task.add_scalar_arg(k, ty.int32) - task.add_alignment(lhs, rhs) + task.add_constraint(align(p_lhs, p_rhs)) task.execute() @@ -2022,10 +2024,34 @@ def trilu(self, rhs: Any, k: int, lower: bool) -> None: def repeat( self, repeats: Any, axis: int, scalar_repeats: bool ) -> DeferredArray: - out = self.runtime.create_unbound_thunk(self.base.type, ndim=self.ndim) - task = self.context.create_auto_task(CuNumericOpCode.REPEAT) - task.add_input(self.base) - task.add_output(out.base) + task = legate_runtime.create_auto_task( + self.library, CuPyNumericOpCode.REPEAT + ) + if scalar_repeats: + out_shape = tuple( + self.shape[dim] * repeats if dim == axis else self.shape[dim] + for dim in range(self.ndim) + ) + out = cast( + DeferredArray, + runtime.create_empty_thunk( + out_shape, + dtype=self.base.type, + inputs=[self], + ), + ) + p_self = task.declare_partition() + p_out = task.declare_partition() + task.add_input(self.base, p_self) + task.add_output(out.base, p_out) + factors = tuple( + repeats if dim == axis else 1 for dim in range(self.ndim) + ) + task.add_constraint(scale(factors, p_self, p_out)) + else: + out = runtime.create_unbound_thunk(self.base.type, ndim=self.ndim) + p_self = task.add_input(self.base) + task.add_output(out.base) # We pass axis now but don't use for 1D case (will use for ND case task.add_scalar_arg(axis, ty.int32) task.add_scalar_arg(scalar_repeats, ty.bool_) @@ -2033,18 +2059,18 @@ def repeat( task.add_scalar_arg(repeats, ty.int64) else: shape = self.shape - repeats = self.runtime.to_deferred_array(repeats).base + repeats = runtime.to_deferred_array(repeats, read_only=True).base for dim, extent in enumerate(shape): if dim == axis: continue repeats = repeats.promote(dim, extent) - task.add_input(repeats) - task.add_alignment(self.base, repeats) + p_repeats = task.add_input(repeats) + task.add_constraint(align(p_self, p_repeats)) task.execute() return out @auto_convert("rhs") - def flip(self, rhs: Any, axes: Union[None, int, tuple[int, ...]]) -> None: + def flip(self, rhs: Any, axes: int | tuple[int, ...] | None) -> None: input = rhs.base output = self.base @@ -2053,19 +2079,21 @@ def flip(self, rhs: Any, axes: Union[None, int, tuple[int, ...]]) -> None: else: axes = normalize_axis_tuple(axes, self.ndim) - task = self.context.create_auto_task(CuNumericOpCode.FLIP) - task.add_output(output) - task.add_input(input) + task = legate_runtime.create_auto_task( + self.library, CuPyNumericOpCode.FLIP + ) + p_out = task.add_output(output) + p_in = task.add_input(input) task.add_scalar_arg(axes, (ty.int32,)) - task.add_broadcast(input) - task.add_alignment(input, output) + task.add_constraint(broadcast(p_in)) + task.add_constraint(align(p_in, p_out)) task.execute() # Perform a bin count operation on the array @auto_convert("rhs", "weights") - def bincount(self, rhs: Any, weights: Optional[NumPyThunk] = None) -> None: + def bincount(self, rhs: Any, weights: NumPyThunk | None = None) -> None: weight_array = weights src_array = rhs dst_array = self @@ -2075,39 +2103,37 @@ def bincount(self, rhs: Any, weights: Optional[NumPyThunk] = None) -> None: assert src_array.shape == weight_array.shape or ( src_array.size == 1 and weight_array.size == 1 ) - else: - weight_array = self.runtime.create_wrapped_scalar( - np.array(1, dtype=np.int64), - np.dtype(np.int64), - shape=(), - ) dst_array.fill(np.array(0, dst_array.dtype)) - task = self.context.create_auto_task(CuNumericOpCode.BINCOUNT) - task.add_reduction(dst_array.base, ReductionOp.ADD) - task.add_input(src_array.base) - task.add_input(weight_array.base) # type: ignore - - task.add_broadcast(dst_array.base) - if not weight_array.scalar: - task.add_alignment(src_array.base, weight_array.base) # type: ignore # noqa + task = legate_runtime.create_auto_task( + self.library, CuPyNumericOpCode.BINCOUNT + ) + p_dst = task.add_reduction(dst_array.base, ReductionOpKind.ADD) + p_src = task.add_input(src_array.base) + task.add_constraint(broadcast(p_dst)) + if weight_array is not None: + p_weight = task.add_input(cast(DeferredArray, weight_array).base) + if not weight_array.scalar: + task.add_constraint(align(p_src, p_weight)) task.execute() def nonzero(self) -> tuple[NumPyThunk, ...]: results = tuple( - self.runtime.create_unbound_thunk(ty.int64) - for _ in range(self.ndim) + runtime.create_unbound_thunk(ty.int64) for _ in range(self.ndim) ) - task = self.context.create_auto_task(CuNumericOpCode.NONZERO) + task = legate_runtime.create_auto_task( + self.library, CuPyNumericOpCode.NONZERO + ) - task.add_input(self.base) + p_self = task.add_input(self.base) for result in results: task.add_output(result.base) - task.add_broadcast(self.base, axes=range(1, self.ndim)) + if self.ndim > 1: + task.add_constraint(broadcast(p_self, range(1, self.ndim))) task.execute() return results @@ -2116,10 +2142,12 @@ def bitgenerator_random_raw( self, handle: int, generatorType: BitGeneratorType, - seed: Union[int, None], + seed: int | None, flags: int, ) -> None: - task = self.context.create_auto_task(CuNumericOpCode.BITGENERATOR) + task = legate_runtime.create_auto_task( + self.library, CuPyNumericOpCode.BITGENERATOR + ) task.add_output(self.base) @@ -2138,14 +2166,16 @@ def bitgenerator_distribution( self, handle: int, generatorType: BitGeneratorType, - seed: Union[int, None], + seed: int | None, flags: int, distribution: BitGeneratorDistribution, intparams: tuple[int, ...], floatparams: tuple[float, ...], doubleparams: tuple[float, ...], ) -> None: - task = self.context.create_auto_task(CuNumericOpCode.BITGENERATOR) + task = legate_runtime.create_auto_task( + self.library, CuPyNumericOpCode.BITGENERATOR + ) task.add_output(self.base) @@ -2168,7 +2198,7 @@ def bitgenerator_integers( self, handle: int, generatorType: BitGeneratorType, - seed: Union[int, None], + seed: int | None, flags: int, low: int, high: int, @@ -2192,7 +2222,7 @@ def bitgenerator_uniform( self, handle: int, generatorType: BitGeneratorType, - seed: Union[int, None], + seed: int | None, flags: int, low: float, high: float, @@ -2226,7 +2256,7 @@ def bitgenerator_lognormal( self, handle: int, generatorType: BitGeneratorType, - seed: Union[int, None], + seed: int | None, flags: int, mean: float, sigma: float, @@ -2260,7 +2290,7 @@ def bitgenerator_normal( self, handle: int, generatorType: BitGeneratorType, - seed: Union[int, None], + seed: int | None, flags: int, mean: float, sigma: float, @@ -2294,7 +2324,7 @@ def bitgenerator_poisson( self, handle: int, generatorType: BitGeneratorType, - seed: Union[int, None], + seed: int | None, flags: int, lam: float, ) -> None: @@ -2320,7 +2350,7 @@ def bitgenerator_exponential( self, handle: int, generatorType: BitGeneratorType, - seed: Union[int, None], + seed: int | None, flags: int, scale: float, ) -> None: @@ -2353,7 +2383,7 @@ def bitgenerator_gumbel( self, handle: int, generatorType: BitGeneratorType, - seed: Union[int, None], + seed: int | None, flags: int, mu: float, beta: float, @@ -2387,7 +2417,7 @@ def bitgenerator_laplace( self, handle: int, generatorType: BitGeneratorType, - seed: Union[int, None], + seed: int | None, flags: int, mu: float, beta: float, @@ -2421,7 +2451,7 @@ def bitgenerator_logistic( self, handle: int, generatorType: BitGeneratorType, - seed: Union[int, None], + seed: int | None, flags: int, mu: float, beta: float, @@ -2455,7 +2485,7 @@ def bitgenerator_pareto( self, handle: int, generatorType: BitGeneratorType, - seed: Union[int, None], + seed: int | None, flags: int, alpha: float, ) -> None: @@ -2488,7 +2518,7 @@ def bitgenerator_power( self, handle: int, generatorType: BitGeneratorType, - seed: Union[int, None], + seed: int | None, flags: int, alpha: float, ) -> None: @@ -2521,7 +2551,7 @@ def bitgenerator_rayleigh( self, handle: int, generatorType: BitGeneratorType, - seed: Union[int, None], + seed: int | None, flags: int, sigma: float, ) -> None: @@ -2554,7 +2584,7 @@ def bitgenerator_cauchy( self, handle: int, generatorType: BitGeneratorType, - seed: Union[int, None], + seed: int | None, flags: int, x0: float, gamma: float, @@ -2588,7 +2618,7 @@ def bitgenerator_triangular( self, handle: int, generatorType: BitGeneratorType, - seed: Union[int, None], + seed: int | None, flags: int, a: float, b: float, @@ -2623,7 +2653,7 @@ def bitgenerator_weibull( self, handle: int, generatorType: BitGeneratorType, - seed: Union[int, None], + seed: int | None, flags: int, lam: float, k: float, @@ -2657,7 +2687,7 @@ def bitgenerator_bytes( self, handle: int, generatorType: BitGeneratorType, - seed: Union[int, None], + seed: int | None, flags: int, ) -> None: if self.dtype == np.uint8: @@ -2679,7 +2709,7 @@ def bitgenerator_beta( self, handle: int, generatorType: BitGeneratorType, - seed: Union[int, None], + seed: int | None, flags: int, a: float, b: float, @@ -2713,7 +2743,7 @@ def bitgenerator_f( self, handle: int, generatorType: BitGeneratorType, - seed: Union[int, None], + seed: int | None, flags: int, dfnum: float, dfden: float, @@ -2747,7 +2777,7 @@ def bitgenerator_logseries( self, handle: int, generatorType: BitGeneratorType, - seed: Union[int, None], + seed: int | None, flags: int, p: float, ) -> None: @@ -2770,7 +2800,7 @@ def bitgenerator_noncentral_f( self, handle: int, generatorType: BitGeneratorType, - seed: Union[int, None], + seed: int | None, flags: int, dfnum: float, dfden: float, @@ -2805,7 +2835,7 @@ def bitgenerator_chisquare( self, handle: int, generatorType: BitGeneratorType, - seed: Union[int, None], + seed: int | None, flags: int, df: float, nonc: float, @@ -2839,7 +2869,7 @@ def bitgenerator_gamma( self, handle: int, generatorType: BitGeneratorType, - seed: Union[int, None], + seed: int | None, flags: int, k: float, theta: float, @@ -2873,7 +2903,7 @@ def bitgenerator_standard_t( self, handle: int, generatorType: BitGeneratorType, - seed: Union[int, None], + seed: int | None, flags: int, df: float, ) -> None: @@ -2906,7 +2936,7 @@ def bitgenerator_hypergeometric( self, handle: int, generatorType: BitGeneratorType, - seed: Union[int, None], + seed: int | None, flags: int, ngood: int, nbad: int, @@ -2934,7 +2964,7 @@ def bitgenerator_vonmises( self, handle: int, generatorType: BitGeneratorType, - seed: Union[int, None], + seed: int | None, flags: int, mu: float, kappa: float, @@ -2968,7 +2998,7 @@ def bitgenerator_zipf( self, handle: int, generatorType: BitGeneratorType, - seed: Union[int, None], + seed: int | None, flags: int, alpha: float, ) -> None: @@ -2992,7 +3022,7 @@ def bitgenerator_geometric( self, handle: int, generatorType: BitGeneratorType, - seed: Union[int, None], + seed: int | None, flags: int, p: float, ) -> None: @@ -3018,7 +3048,7 @@ def bitgenerator_wald( self, handle: int, generatorType: BitGeneratorType, - seed: Union[int, None], + seed: int | None, flags: int, mean: float, scale: float, @@ -3052,7 +3082,7 @@ def bitgenerator_binomial( self, handle: int, generatorType: BitGeneratorType, - seed: Union[int, None], + seed: int | None, flags: int, ntrials: int, p: float, @@ -3080,7 +3110,7 @@ def bitgenerator_negative_binomial( self, handle: int, generatorType: BitGeneratorType, - seed: Union[int, None], + seed: int | None, flags: int, ntrials: int, p: float, @@ -3104,15 +3134,18 @@ def bitgenerator_negative_binomial( doubleparams, ) - def random(self, gen_code: Any, args: Any = ()) -> None: - task = self.context.create_auto_task(CuNumericOpCode.RAND) + def random(self, gen_code: Any, args: tuple[Scalar, ...] = ()) -> None: + task = legate_runtime.create_auto_task( + self.library, CuPyNumericOpCode.RAND + ) task.add_output(self.base) task.add_scalar_arg(gen_code.value, ty.int32) - epoch = self.runtime.get_next_random_epoch() + epoch = runtime.get_next_random_epoch() task.add_scalar_arg(epoch, ty.uint32) task.add_scalar_arg(self.compute_strides(self.shape), (ty.int64,)) - self.add_arguments(task, args) + for arg in args: + task.add_scalar_arg(arg) task.execute() @@ -3126,13 +3159,12 @@ def random_normal(self) -> None: def random_integer( self, - low: Union[int, npt.NDArray[Any]], - high: Union[int, npt.NDArray[Any]], + low: int | npt.NDArray[Any], + high: int | npt.NDArray[Any], ) -> None: assert self.dtype.kind == "i" - low = np.array(low, self.dtype) - high = np.array(high, self.dtype) - self.random(RandGenCode.INTEGER, [low, high]) + args = (Scalar(low, self.base.type), Scalar(high, self.base.type)) + self.random(RandGenCode.INTEGER, args) # Perform the unary operation and put the result in the array @auto_convert("src") @@ -3141,26 +3173,30 @@ def unary_op( op: UnaryOpCode, src: Any, where: Any, - args: Any, - multiout: Optional[Any] = None, + args: tuple[Scalar, ...] = (), + multiout: Any | None = None, ) -> None: lhs = self.base - src = src._copy_if_overlapping(self) + src = src._copy_if_partially_overlapping(self) rhs = src._broadcast(lhs.shape) with Annotation({"OpCode": op.name}): - task = self.context.create_auto_task(CuNumericOpCode.UNARY_OP) - task.add_output(lhs) - task.add_input(rhs) + task = legate_runtime.create_auto_task( + self.library, CuPyNumericOpCode.UNARY_OP + ) + p_lhs = task.add_output(lhs) + p_rhs = task.add_input(rhs) task.add_scalar_arg(op.value, ty.int32) - self.add_arguments(task, args) + for arg in args: + task.add_scalar_arg(arg) - task.add_alignment(lhs, rhs) + task.add_constraint(align(p_lhs, p_rhs)) if multiout is not None: for out in multiout: - task.add_output(out.base) - task.add_alignment(out.base, rhs) + out_def = runtime.to_deferred_array(out, read_only=False) + p_out = task.add_output(out_def.base) + task.add_constraint(align(p_out, p_rhs)) task.execute() @@ -3172,13 +3208,13 @@ def unary_reduction( op: UnaryRedCode, src: Any, where: Any, - orig_axis: Union[int, None], + orig_axis: int | None, axes: tuple[int, ...], keepdims: bool, - args: Any, + args: tuple[Scalar, ...], initial: Any, ) -> None: - lhs_array: Union[NumPyThunk, DeferredArray] = self + lhs_array: NumPyThunk | DeferredArray = self rhs_array = src assert lhs_array.ndim <= rhs_array.ndim @@ -3190,8 +3226,8 @@ def unary_reduction( ) if argred: - argred_dtype = self.runtime.get_argred_type(rhs_array.base.type) - lhs_array = self.runtime.create_empty_thunk( + argred_dtype = runtime.get_argred_type(rhs_array.base.type) + lhs_array = runtime.create_empty_thunk( lhs_array.shape, dtype=argred_dtype, inputs=[self], @@ -3217,8 +3253,8 @@ def unary_reduction( lhs = lhs.project(0, 0) with Annotation({"OpCode": op.name, "ArgRed?": str(argred)}): - task = self.context.create_auto_task( - CuNumericOpCode.SCALAR_UNARY_RED + task = legate_runtime.create_auto_task( + self.library, CuPyNumericOpCode.SCALAR_UNARY_RED ) task.add_reduction(lhs, _UNARY_RED_TO_REDUCTION_OPS[op]) @@ -3230,7 +3266,8 @@ def unary_reduction( task.add_input(where.base) task.add_alignment(rhs_array.base, where.base) - self.add_arguments(task, args) + for arg in args: + task.add_scalar_arg(arg) task.execute() @@ -3262,10 +3299,14 @@ def unary_reduction( ) with Annotation({"OpCode": op.name, "ArgRed?": str(argred)}): - task = self.context.create_auto_task(CuNumericOpCode.UNARY_RED) + task = legate_runtime.create_auto_task( + self.library, CuPyNumericOpCode.UNARY_RED + ) - task.add_input(rhs_array.base) - task.add_reduction(result, _UNARY_RED_TO_REDUCTION_OPS[op]) + p_rhs = task.add_input(rhs_array.base) + p_result = task.add_reduction( + result, _UNARY_RED_TO_REDUCTION_OPS[op] + ) task.add_scalar_arg(axis, ty.int32) task.add_scalar_arg(op, ty.int32) task.add_scalar_arg(is_where, ty.bool_) @@ -3273,9 +3314,10 @@ def unary_reduction( task.add_input(where.base) task.add_alignment(rhs_array.base, where.base) - self.add_arguments(task, args) + for arg in args: + task.add_scalar_arg(arg) - task.add_alignment(result, rhs_array.base) + task.add_constraint(align(p_result, p_rhs)) task.execute() @@ -3284,7 +3326,6 @@ def unary_reduction( UnaryOpCode.GETARG, lhs_array, True, - [], ) def isclose( @@ -3292,8 +3333,8 @@ def isclose( ) -> None: assert not equal_nan args = ( - np.array(rtol, dtype=np.float64), - np.array(atol, dtype=np.float64), + Scalar(rtol, ty.float64), + Scalar(atol, ty.float64), ) self.binary_op(BinaryOpCode.ISCLOSE, rhs1, rhs2, True, args) @@ -3305,25 +3346,28 @@ def binary_op( src1: Any, src2: Any, where: Any, - args: Any, + args: tuple[Scalar, ...], ) -> None: lhs = self.base - src1 = src1._copy_if_overlapping(self) + src1 = src1._copy_if_partially_overlapping(self) rhs1 = src1._broadcast(lhs.shape) - src2 = src2._copy_if_overlapping(self) + src2 = src2._copy_if_partially_overlapping(self) rhs2 = src2._broadcast(lhs.shape) with Annotation({"OpCode": op_code.name}): # Populate the Legate launcher - task = self.context.create_auto_task(CuNumericOpCode.BINARY_OP) - task.add_output(lhs) - task.add_input(rhs1) - task.add_input(rhs2) + task = legate_runtime.create_auto_task( + self.library, CuPyNumericOpCode.BINARY_OP + ) + p_lhs = task.add_output(lhs) + p_rhs1 = task.add_input(rhs1) + p_rhs2 = task.add_input(rhs2) task.add_scalar_arg(op_code.value, ty.int32) - self.add_arguments(task, args) + for arg in args: + task.add_scalar_arg(arg) - task.add_alignment(lhs, rhs1) - task.add_alignment(lhs, rhs2) + task.add_constraint(align(p_lhs, p_rhs1)) + task.add_constraint(align(p_lhs, p_rhs2)) task.execute() @@ -3333,13 +3377,13 @@ def binary_reduction( op: BinaryOpCode, src1: Any, src2: Any, - broadcast: Union[NdShape, None], - args: Any, + broadcast: NdShape | None, + args: tuple[Scalar, ...], ) -> None: lhs = self.base rhs1 = src1.base rhs2 = src2.base - assert lhs.scalar + assert lhs.has_scalar_storage if broadcast is not None: rhs1 = rhs1._broadcast(broadcast) @@ -3347,19 +3391,22 @@ def binary_reduction( # Populate the Legate launcher if op == BinaryOpCode.NOT_EQUAL: - redop = ReductionOp.ADD + redop = ReductionOpKind.ADD self.fill(np.array(False)) else: - redop = ReductionOp.MUL + redop = ReductionOpKind.MUL self.fill(np.array(True)) - task = self.context.create_auto_task(CuNumericOpCode.BINARY_RED) + task = legate_runtime.create_auto_task( + self.library, CuPyNumericOpCode.BINARY_RED + ) task.add_reduction(lhs, redop) - task.add_input(rhs1) - task.add_input(rhs2) + p_rhs1 = task.add_input(rhs1) + p_rhs2 = task.add_input(rhs2) task.add_scalar_arg(op.value, ty.int32) - self.add_arguments(task, args) + for arg in args: + task.add_scalar_arg(arg) - task.add_alignment(rhs1, rhs2) + task.add_constraint(align(p_rhs1, p_rhs2)) task.execute() @@ -3371,48 +3418,36 @@ def where(self, src1: Any, src2: Any, src3: Any) -> None: rhs3 = src3._broadcast(lhs.shape) # Populate the Legate launcher - task = self.context.create_auto_task(CuNumericOpCode.WHERE) - task.add_output(lhs) - task.add_input(rhs1) - task.add_input(rhs2) - task.add_input(rhs3) + task = legate_runtime.create_auto_task( + self.library, CuPyNumericOpCode.WHERE + ) + p_lhs = task.add_output(lhs) + p_rhs1 = task.add_input(rhs1) + p_rhs2 = task.add_input(rhs2) + p_rhs3 = task.add_input(rhs3) - task.add_alignment(lhs, rhs1) - task.add_alignment(lhs, rhs2) - task.add_alignment(lhs, rhs3) + task.add_constraint(align(p_lhs, p_rhs1)) + task.add_constraint(align(p_lhs, p_rhs2)) + task.add_constraint(align(p_lhs, p_rhs3)) task.execute() def argwhere(self) -> NumPyThunk: - result = self.runtime.create_unbound_thunk(ty.int64, ndim=2) + result = runtime.create_unbound_thunk(ty.int64, ndim=2) - task = self.context.create_auto_task(CuNumericOpCode.ARGWHERE) + task = legate_runtime.create_auto_task( + self.library, CuPyNumericOpCode.ARGWHERE + ) task.add_output(result.base) - task.add_input(self.base) - task.add_broadcast(self.base, axes=range(1, self.ndim)) + p_self = task.add_input(self.base) + if self.ndim > 1: + task.add_constraint(broadcast(p_self, range(1, self.ndim))) task.execute() return result - # A helper method for attaching arguments - def add_arguments( - self, - task: Union[AutoTask, ManualTask], - args: Optional[Sequence[npt.NDArray[Any]]], - ) -> None: - if args is None: - return - for numpy_array in args: - assert numpy_array.size == 1 - scalar = self.runtime.create_wrapped_scalar( - numpy_array.data, - numpy_array.dtype, - shape=(1,), - ) - task.add_input(scalar.base) - @staticmethod def compute_strides(shape: NdShape) -> tuple[int, ...]: stride = 1 @@ -3423,12 +3458,20 @@ def compute_strides(shape: NdShape) -> tuple[int, ...]: return result @auto_convert("src") - def cholesky(self, src: Any, no_tril: bool = False) -> None: - cholesky(self, src, no_tril) + def cholesky(self, src: Any) -> None: + cholesky_deferred(self, src) + + @auto_convert("q", "r") + def qr(self, q: Any, r: Any) -> None: + qr_deferred(self, q, r) @auto_convert("a", "b") def solve(self, a: Any, b: Any) -> None: - solve(self, a, b) + solve_deferred(self, a, b) + + @auto_convert("u", "s", "vh") + def svd(self, u: Any, s: Any, vh: Any) -> None: + svd_deferred(self, u, s, vh) @auto_convert("rhs") def scan( @@ -3436,12 +3479,12 @@ def scan( op: int, rhs: Any, axis: int, - dtype: Optional[npt.DTypeLike], + dtype: npt.DTypeLike | None, nan_to_identity: bool, ) -> None: # local sum # storage for local sums accessible - temp = self.runtime.create_unbound_thunk( + temp = runtime.create_unbound_thunk( dtype=self.base.type, ndim=self.ndim ) @@ -3451,33 +3494,37 @@ def scan( else: # swap axes, always performing scan along last axis swapped = rhs.swapaxes(axis, rhs.ndim - 1) - input = self.runtime.create_empty_thunk( + input = runtime.create_empty_thunk( swapped.shape, dtype=rhs.base.type, inputs=(rhs, swapped) ) input.copy(swapped, deep=True) output = input - task = output.context.create_auto_task(CuNumericOpCode.SCAN_LOCAL) - task.add_output(output.base) - task.add_input(input.base) + task = legate_runtime.create_auto_task( + self.library, CuPyNumericOpCode.SCAN_LOCAL + ) + p_out = task.add_output(output.base) + p_in = task.add_input(input.base) task.add_output(temp.base) task.add_scalar_arg(op, ty.int32) task.add_scalar_arg(nan_to_identity, ty.bool_) - task.add_alignment(input.base, output.base) + task.add_constraint(align(p_in, p_out)) task.execute() # Global sum # NOTE: Assumes the partitioning stays the same from previous task. # NOTE: Each node will do a sum up to its index, alternatively could # do one centralized scan and broadcast (slightly less redundant work) - task = output.context.create_auto_task(CuNumericOpCode.SCAN_GLOBAL) + task = legate_runtime.create_auto_task( + self.library, CuPyNumericOpCode.SCAN_GLOBAL + ) task.add_input(output.base) - task.add_input(temp.base) + p_temp = task.add_input(temp.base) task.add_output(output.base) task.add_scalar_arg(op, ty.int32) - task.add_broadcast(temp.base) + task.add_constraint(broadcast(p_temp)) task.execute() @@ -3488,45 +3535,49 @@ def scan( self.copy(swapped, deep=True) def unique(self) -> NumPyThunk: - result = self.runtime.create_unbound_thunk(self.base.type) + result = runtime.create_unbound_thunk(self.base.type) - task = self.context.create_auto_task(CuNumericOpCode.UNIQUE) + task = legate_runtime.create_auto_task( + self.library, CuPyNumericOpCode.UNIQUE + ) task.add_output(result.base) task.add_input(self.base) - if self.runtime.num_gpus > 0: + if runtime.num_gpus > 0: task.add_nccl_communicator() task.execute() - if self.runtime.num_gpus == 0 and self.runtime.num_procs > 1: - result.base = self.context.tree_reduce( - CuNumericOpCode.UNIQUE_REDUCE, result.base + if runtime.num_gpus == 0 and runtime.num_procs > 1: + result.base = legate_runtime.tree_reduce( + self.library, CuPyNumericOpCode.UNIQUE_REDUCE, result.base ) return result @auto_convert("rhs", "v") def searchsorted(self, rhs: Any, v: Any, side: SortSide = "left") -> None: - task = self.context.create_auto_task(CuNumericOpCode.SEARCHSORTED) + task = legate_runtime.create_auto_task( + self.library, CuPyNumericOpCode.SEARCHSORTED + ) is_left = side == "left" if is_left: self.fill(np.array(rhs.size, self.dtype)) - task.add_reduction(self.base, ReductionOp.MIN) + p_self = task.add_reduction(self.base, ReductionOpKind.MIN) else: self.fill(np.array(0, self.dtype)) - task.add_reduction(self.base, ReductionOp.MAX) + p_self = task.add_reduction(self.base, ReductionOpKind.MAX) task.add_input(rhs.base) - task.add_input(v.base) + p_v = task.add_input(v.base) # every partition needs the value information - task.add_broadcast(v.base) - task.add_broadcast(self.base) - task.add_alignment(self.base, v.base) + task.add_constraint(broadcast(p_v)) + task.add_constraint(broadcast(p_self)) + task.add_constraint(align(p_self, p_v)) task.add_scalar_arg(is_left, ty.bool_) task.add_scalar_arg(rhs.size, ty.int64) @@ -3537,9 +3588,9 @@ def sort( self, rhs: Any, argsort: bool = False, - axis: Union[int, None] = -1, + axis: int | None = -1, kind: SortType = "quicksort", - order: Union[None, str, list[str]] = None, + order: str | list[str] | None = None, ) -> None: if kind == "stable": stable = True @@ -3548,37 +3599,39 @@ def sort( if order is not None: raise NotImplementedError( - "cuNumeric does not support sorting with 'order' as " + "cuPyNumeric does not support sorting with 'order' as " "ndarray only supports numeric values" ) if axis is not None and (axis >= rhs.ndim or axis < -rhs.ndim): raise ValueError("invalid axis") - sort(self, rhs, argsort, axis, stable) + sort_deferred(self, rhs, argsort, axis, stable) @auto_convert("rhs") def partition( self, rhs: Any, - kth: Union[int, Sequence[int]], + kth: int | Sequence[int], argpartition: bool = False, - axis: Union[int, None] = -1, + axis: int | None = -1, kind: SelectKind = "introselect", - order: Union[None, str, list[str]] = None, + order: str | list[str] | None = None, ) -> None: if order is not None: raise NotImplementedError( - "cuNumeric does not support partitioning with 'order' as " + "cuPyNumeric does not support partitioning with 'order' as " "ndarray only supports numeric values" ) if axis is not None and (axis >= rhs.ndim or axis < -rhs.ndim): raise ValueError("invalid axis") # fallback to sort for now - sort(self, rhs, argpartition, axis, False) + sort_deferred(self, rhs, argpartition, axis, False) def create_window(self, op_code: WindowOpCode, M: int, *args: Any) -> None: - task = self.context.create_auto_task(CuNumericOpCode.WINDOW) + task = legate_runtime.create_auto_task( + self.library, CuPyNumericOpCode.WINDOW + ) task.add_output(self.base) task.add_scalar_arg(op_code, ty.int32) task.add_scalar_arg(M, ty.int64) @@ -3587,70 +3640,69 @@ def create_window(self, op_code: WindowOpCode, M: int, *args: Any) -> None: task.execute() @auto_convert("src") - def packbits( - self, src: Any, axis: Union[int, None], bitorder: BitOrder - ) -> None: + def packbits(self, src: Any, axis: int | None, bitorder: BitOrder) -> None: bitorder_code = getattr(Bitorder, bitorder.upper()) - task = self.context.create_auto_task(CuNumericOpCode.PACKBITS) - p_out = task.declare_partition(self.base) - p_in = task.declare_partition(src.base) - task.add_output(self.base, partition=p_out) - task.add_input(src.base, partition=p_in) + task = legate_runtime.create_auto_task( + self.library, CuPyNumericOpCode.PACKBITS + ) + p_out = task.declare_partition() + p_in = task.declare_partition() + task.add_output(self.base, p_out) + task.add_input(src.base, p_in) task.add_scalar_arg(axis, ty.uint32) task.add_scalar_arg(bitorder_code, ty.uint32) - scale = tuple(8 if dim == axis else 1 for dim in range(src.ndim)) - task.add_constraint(p_in <= p_out * scale) # type: ignore + factors = tuple(8 if dim == axis else 1 for dim in range(src.ndim)) + task.add_constraint(scale(factors, p_out, p_in)) # type: ignore task.execute() @auto_convert("src") def unpackbits( - self, src: Any, axis: Union[int, None], bitorder: BitOrder + self, src: Any, axis: int | None, bitorder: BitOrder ) -> None: bitorder_code = getattr(Bitorder, bitorder.upper()) - task = self.context.create_auto_task(CuNumericOpCode.UNPACKBITS) - p_out = task.declare_partition(self.base) - p_in = task.declare_partition(src.base) - task.add_output(self.base, partition=p_out) - task.add_input(src.base, partition=p_in) + task = legate_runtime.create_auto_task( + self.library, CuPyNumericOpCode.UNPACKBITS + ) + p_out = task.declare_partition() + p_in = task.declare_partition() + task.add_output(self.base, p_out) + task.add_input(src.base, p_in) task.add_scalar_arg(axis, ty.uint32) task.add_scalar_arg(bitorder_code, ty.uint32) - scale = tuple(8 if dim == axis else 1 for dim in range(src.ndim)) - task.add_constraint(p_out <= p_in * scale) # type: ignore + factors = tuple(8 if dim == axis else 1 for dim in range(src.ndim)) + task.add_constraint(scale(factors, p_in, p_out)) # type: ignore task.execute() @auto_convert("src") def _wrap(self, src: Any, new_len: int) -> None: - if src.base.kind == Future or src.base.transformed: - change_shape = src.base.kind == Future + if src.base.has_scalar_storage or src.base.transformed: + change_shape = src.base.has_scalar_storage src = src._convert_future_to_regionfield(change_shape) # first, we create indirect array with PointN type that # (len,) shape and is used to copy data from original array # to the target 1D wrapped array N = src.ndim - pointN_dtype = self.runtime.get_point_type(N) + pointN_dtype = ty.point_type(N) indirect = cast( DeferredArray, - self.runtime.create_empty_thunk( + runtime.create_empty_thunk( shape=(new_len,), dtype=pointN_dtype, inputs=[src], ), ) - task = self.context.create_auto_task(CuNumericOpCode.WRAP) + task = legate_runtime.create_auto_task( + self.library, CuPyNumericOpCode.WRAP + ) task.add_output(indirect.base) task.add_scalar_arg(src.shape, (ty.int64,)) task.add_scalar_arg(False, ty.bool_) # has_input task.add_scalar_arg(False, ty.bool_) # check bounds task.execute() - copy = self.context.create_copy() - copy.set_target_indirect_out_of_range(False) - copy.add_input(src.base) - copy.add_source_indirect(indirect.base) - copy.add_output(self.base) - copy.execute() + legate_runtime.issue_gather(self.base, src.base, indirect.base) # Perform a histogram operation on the array @auto_convert("src", "bins", "weights") @@ -3669,14 +3721,25 @@ def histogram(self, src: Any, bins: Any, weights: Any) -> None: dst_array.fill(np.array(0, dst_array.dtype)) - task = self.context.create_auto_task(CuNumericOpCode.HISTOGRAM) - task.add_reduction(dst_array.base, ReductionOp.ADD) - task.add_input(src_array.base) - task.add_input(bins_array.base) - task.add_input(weight_array.base) + task = legate_runtime.create_auto_task( + self.library, CuPyNumericOpCode.HISTOGRAM + ) + p_dst = task.add_reduction(dst_array.base, ReductionOpKind.ADD) + p_src = task.add_input(src_array.base) + p_bins = task.add_input(bins_array.base) + p_weight = task.add_input(weight_array.base) - task.add_broadcast(bins_array.base) - task.add_broadcast(dst_array.base) - task.add_alignment(src_array.base, weight_array.base) + task.add_constraint(broadcast(p_bins)) + task.add_constraint(broadcast(p_dst)) + task.add_constraint(align(p_src, p_weight)) task.execute() + + def stencil_hint( + self, + low_offsets: tuple[int, ...], + high_offsets: tuple[int, ...], + ) -> None: + legate_runtime.prefetch_bloated_instances( + self.base, low_offsets, high_offsets, False + ) diff --git a/cunumeric/eager.py b/cupynumeric/_thunk/eager.py similarity index 84% rename from cunumeric/eager.py rename to cupynumeric/_thunk/eager.py index 4e6e504c2..74a02214f 100644 --- a/cunumeric/eager.py +++ b/cupynumeric/_thunk/eager.py @@ -1,4 +1,4 @@ -# Copyright 2021-2023 NVIDIA Corporation +# Copyright 2024 NVIDIA Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -14,21 +14,14 @@ # from __future__ import annotations -from typing import ( - TYPE_CHECKING, - Any, - Callable, - Dict, - Iterable, - Optional, - Sequence, - Union, - cast, -) +from typing import TYPE_CHECKING, Any, Callable, Iterable, Sequence, cast import numpy as np +from legate.core import Scalar -from .config import ( +from .._utils import is_np2 +from .._utils.array import is_advanced_indexing +from ..config import ( FFT_C2R, FFT_D2Z, FFT_R2C, @@ -37,22 +30,22 @@ ConvertCode, FFTDirection, ScanCode, + TransferType, UnaryOpCode, UnaryRedCode, WindowOpCode, ) +from ..runtime import runtime from .deferred import DeferredArray from .thunk import NumPyThunk -from .utils import is_advanced_indexing, is_supported_type if TYPE_CHECKING: import numpy.typing as npt - from legate.core import FieldID, Future, Region - from .config import BitGeneratorType, FFTType - from .runtime import Runtime - from .types import ( + from ..config import BitGeneratorType, FFTType + from ..types import ( BitOrder, + ConvolveMethod, ConvolveMode, NdShape, OrderType, @@ -62,7 +55,7 @@ ) -_UNARY_OPS: Dict[UnaryOpCode, Any] = { +_UNARY_OPS: dict[UnaryOpCode, Any] = { UnaryOpCode.ABSOLUTE: np.absolute, UnaryOpCode.ARCCOS: np.arccos, UnaryOpCode.ARCCOSH: np.arccosh, @@ -109,7 +102,7 @@ # Unary reduction operations that don't return the argument of the # reduction operation -_UNARY_RED_OPS_WITHOUT_ARG: Dict[UnaryRedCode, Any] = { +_UNARY_RED_OPS_WITHOUT_ARG: dict[UnaryRedCode, Any] = { UnaryRedCode.ALL: np.all, UnaryRedCode.ANY: np.any, UnaryRedCode.MAX: np.max, @@ -124,14 +117,14 @@ # Unary reduction operations that return the argument of the # reduction operation -_UNARY_RED_OPS_WITH_ARG: Dict[UnaryRedCode, Any] = { +_UNARY_RED_OPS_WITH_ARG: dict[UnaryRedCode, Any] = { UnaryRedCode.ARGMIN: np.argmin, UnaryRedCode.ARGMAX: np.argmax, UnaryRedCode.NANARGMAX: np.nanargmax, UnaryRedCode.NANARGMIN: np.nanargmin, } -_BINARY_OPS: Dict[BinaryOpCode, Any] = { +_BINARY_OPS: dict[BinaryOpCode, Any] = { BinaryOpCode.ADD: np.add, BinaryOpCode.ARCTAN2: np.arctan2, BinaryOpCode.BITWISE_AND: np.bitwise_and, @@ -168,12 +161,10 @@ BinaryOpCode.SUBTRACT: np.subtract, } -_WINDOW_OPS: Dict[ +_WINDOW_OPS: dict[ WindowOpCode, - Union[ - Callable[[float], npt.NDArray[Any]], - Callable[[float, float], npt.NDArray[Any]], - ], + Callable[[float], npt.NDArray[Any]] + | Callable[[float, float], npt.NDArray[Any]], ] = { WindowOpCode.BARLETT: np.bartlett, WindowOpCode.BLACKMAN: np.blackman, @@ -219,30 +210,21 @@ class EagerArray(NumPyThunk): def __init__( self, - runtime: Runtime, - array: npt.NDArray[Any], - parent: Optional[EagerArray] = None, - key: Optional[tuple[Any, ...]] = None, + val: npt.ArrayLike, + parent: EagerArray | None = None, + key: tuple[Any, ...] | None = None, ) -> None: - super().__init__(runtime, array.dtype) + array = np.asarray(val) + super().__init__(array.dtype) self.array: npt.NDArray[Any] = array - self.parent: Optional[EagerArray] = parent + self.parent: EagerArray | None = parent self.children: list[EagerArray] = [] - self.key: Optional[tuple[Any, ...]] = key + self.key: tuple[Any, ...] | None = key #: if this ever becomes set (to a DeferredArray), we forward all #: operations to it - self.deferred: Optional[Union[DeferredArray, NumPyThunk]] = None + self.deferred: DeferredArray | None = None self.escaped = False - @property - def storage(self) -> Union[Future, tuple[Region, Union[int, FieldID]]]: - if self.deferred is None: - self.to_deferred_array() - - assert self.deferred is not None - - return self.deferred.storage - @property def shape(self) -> NdShape: return self.array.shape @@ -265,96 +247,120 @@ def check_eager_args(self, *args: Any) -> None: if self.deferred is not None: return for arg in args: - if self.runtime.is_eager_array(arg): + if runtime.is_eager_array(arg): if arg.deferred is not None: - self.to_deferred_array() + self.to_deferred_array(read_only=False) break - elif self.runtime.is_deferred_array(arg): - self.to_deferred_array() + elif runtime.is_deferred_array(arg): + self.to_deferred_array(read_only=False) break elif arg is None or not isinstance(arg, NumPyThunk): pass else: raise RuntimeError("bad argument type") - def _convert_children(self) -> None: + def _convert_subtree(self) -> None: + assert self.deferred is None + if self.parent is None: + transfer = ( + TransferType.SHARE + if self.escaped + # We can donate the base array, since it hasn't escaped to the + # user, and we won't be using it anymore. + else TransferType.DONATE + ) + deferred = runtime.find_or_create_array_thunk( + self.array, transfer=transfer, defer=True + ) + else: + parent = self.parent.deferred + assert self.key is not None + func = getattr(parent, self.key[0]) + args = self.key[1:] + deferred = func(*args) + self.deferred = cast(DeferredArray, deferred) + for child in self.children: + child._convert_subtree() + + def _convert_tree(self) -> None: """ - Traverse down our children and convert them to deferred arrays. + Convert the entire array tree to deferred arrays. + + We have to convert the whole tree when we convert even one node, to + make sure any future use of any array in the tree will go through the + deferred path, rather than use the original eager NumPy array, that we + donated. """ - assert self.runtime.is_deferred_array(self.deferred) - for child in self.children: - if child.deferred is None: - assert child.key is not None - func = getattr(self.deferred, child.key[0]) - args = child.key[1:] - child.deferred = func(*args) - # After we've made all the deferred views for each child then - # we can traverse down. Do it this way so we can get partition - # coalescing where possible - for child in self.children: - child._convert_children() - - def to_deferred_array(self) -> DeferredArray: - """This is a really important method. It will convert a tree of - eager NumPy arrays into an equivalent tree of deferred arrays that - are mirrored by an equivalent logical region tree. To be consistent - we always do this from the root, so once any array in the tree needs - to be converted then we do it for all of them. - :meta private: + if self.parent is None: + self._convert_subtree() + else: + self.parent._convert_tree() + + def to_deferred_array(self, read_only: bool) -> DeferredArray: """ - # Check to see if we already have our deferred array - # or whether we need to go up the tree to have it made - if self.deferred is None: - if self.parent is None: - assert is_supported_type(self.array.dtype) - # We are at the root of the tree so we need to - # actually make a DeferredArray to use - if self.array.size == 1: - self.deferred = self.runtime.create_wrapped_scalar( - self.array.data, - dtype=self.array.dtype, - shape=self.shape, - ) - else: - self.deferred = self.runtime.find_or_create_array_thunk( - self.array, - share=self.escaped, - defer=True, - ) - self._convert_children() - else: - # Traverse up the tree to make the deferred array - self.parent.to_deferred_array() - assert self.deferred is not None - return cast(DeferredArray, self.deferred) + Convert this EagerArray into a DeferredArray. + + If `read_only` is `False`, the EagerArray's buffer is donated to + initialize the DeferredArray, and the returned DeferredArray is used + in place of the EagerArray going forward. + """ + if self.deferred is not None: + return self.deferred + if read_only: + deferred = cast( + DeferredArray, + runtime.find_or_create_array_thunk( + self.array, + transfer=TransferType.MAKE_COPY, + read_only=True, + defer=True, + ), + ) + else: + self._convert_tree() + deferred = cast(DeferredArray, self.deferred) + return deferred def imag(self) -> NumPyThunk: if self.deferred is not None: return self.deferred.imag() - return EagerArray(self.runtime, self.array.imag) + return EagerArray(self.array.imag) def real(self) -> NumPyThunk: if self.deferred is not None: return self.deferred.real() - return EagerArray(self.runtime, self.array.real) + return EagerArray(self.array.real) def conj(self) -> NumPyThunk: if self.deferred is not None: return self.deferred.conj() - return EagerArray(self.runtime, self.array.conj()) + return EagerArray(self.array.conj()) - def convolve(self, v: Any, out: Any, mode: ConvolveMode) -> None: - self.check_eager_args(v, out) + def convolve( + self, + input: Any, + filter: Any, + mode: ConvolveMode, + method: ConvolveMethod, + ) -> None: + self.check_eager_args(input, filter) if self.deferred is not None: - self.deferred.convolve(v, out, mode) + self.deferred.convolve(input, filter, mode, method) else: if self.ndim == 1: - out.array = np.convolve(self.array, v.array, mode) + if method != "auto": + runtime.warn( + f"the method {method} is ignored " + "for the 1D convolution" + ) + self.array[:] = np.convolve(input.array, filter.array, mode) else: from scipy.signal import convolve # type: ignore [import] - out.array = convolve(self.array, v.array, mode) + self.array[...] = convolve( + input.array, filter.array, mode, method + ) def fft( self, @@ -384,7 +390,13 @@ def fft( elif res.dtype == np.float64: self.array[:] = res.astype(np.float32) else: - raise RuntimeError("Unsupported data type in eager FFT") + if not is_np2: + raise RuntimeError( + f"Unsupported data type {res.dtype!r} in eager FFT" + ) + else: + self.array[:] = res + else: self.array[:] = res @@ -406,11 +418,6 @@ def scalar(self) -> bool: return self.deferred.scalar return self.array.size == 1 - def get_scalar_array(self) -> npt.NDArray[Any]: - if self.deferred is not None: - return self.deferred.get_scalar_array() - return self.array.reshape(()) - def _create_indexing_key(self, key: Any) -> Any: if key is None or key is Ellipsis: return key @@ -424,7 +431,7 @@ def _create_indexing_key(self, key: Any) -> Any: result += (self._create_indexing_key(k),) return result assert isinstance(key, NumPyThunk) - return self.runtime.to_eager_array(key).array + return runtime.to_eager_array(key).array def get_item(self, key: Any) -> NumPyThunk: if self.deferred is not None: @@ -432,12 +439,10 @@ def get_item(self, key: Any) -> NumPyThunk: if is_advanced_indexing(key): index_key = self._create_indexing_key(key) out = self.array[index_key] - result = EagerArray(self.runtime, out) + result = EagerArray(out) else: child = self.array[key] - result = EagerArray( - self.runtime, child, parent=self, key=("get_item", key) - ) + result = EagerArray(child, parent=self, key=("get_item", key)) self.children.append(result) return result @@ -464,10 +469,9 @@ def reshape(self, newshape: NdShape, order: OrderType) -> NumPyThunk: child = self.array.reshape(newshape, order=order) # See if we are aliased or not if child.base is None: - result = EagerArray(self.runtime, child) + result = EagerArray(child) else: result = EagerArray( - self.runtime, child, parent=self, key=("reshape", newshape, order), @@ -475,7 +479,7 @@ def reshape(self, newshape: NdShape, order: OrderType) -> NumPyThunk: self.children.append(result) return result - def squeeze(self, axis: Optional[int]) -> NumPyThunk: + def squeeze(self, axis: int | tuple[int, ...] | None) -> NumPyThunk: if self.deferred is not None: return self.deferred.squeeze(axis) # See https://github.com/numpy/numpy/issues/22019 @@ -485,9 +489,7 @@ def squeeze(self, axis: Optional[int]) -> NumPyThunk: return self # Should be aliased with parent region assert child.base is not None - result = EagerArray( - self.runtime, child, parent=self, key=("squeeze", axis) - ) + result = EagerArray(child, parent=self, key=("squeeze", axis)) self.children.append(result) return result @@ -497,9 +499,7 @@ def swapaxes(self, axis1: int, axis2: int) -> NumPyThunk: child = self.array.swapaxes(axis1, axis2) # Should be aliased with parent region assert child.base is not None - result = EagerArray( - self.runtime, child, parent=self, key=("swapaxes", axis1, axis2) - ) + result = EagerArray(child, parent=self, key=("swapaxes", axis1, axis2)) self.children.append(result) return result @@ -535,18 +535,14 @@ def fill(self, value: Any) -> None: else: self.array.fill(value) - def transpose( - self, axes: Union[None, tuple[int, ...], list[int]] - ) -> NumPyThunk: + def transpose(self, axes: tuple[int, ...] | list[int]) -> NumPyThunk: if self.deferred is not None: return self.deferred.transpose(axes) # See https://github.com/numpy/numpy/issues/22019 child = self.array.transpose(cast(Any, axes)) # Should be aliased with parent region assert child.base is not None - result = EagerArray( - self.runtime, child, parent=self, key=("transpose", axes) - ) + result = EagerArray(child, parent=self, key=("transpose", axes)) self.children.append(result) return result @@ -566,9 +562,9 @@ def repeat( array = np.repeat(self.array, repeats.array, axis) else: array = np.repeat(self.array, repeats, axis) - return EagerArray(self.runtime, array) + return EagerArray(array) - def flip(self, rhs: Any, axes: Union[None, int, tuple[int, ...]]) -> None: + def flip(self, rhs: Any, axes: int | tuple[int, ...] | None) -> None: self.check_eager_args(rhs) if self.deferred is not None: self.deferred.flip(rhs, axes) @@ -585,9 +581,7 @@ def broadcast_to(self, shape: NdShape) -> NumPyThunk: child = np.broadcast_to(self.array, shape) # Should be aliased with parent region assert child.base is not None - result = EagerArray( - self.runtime, child, parent=self, key=("broadcast_to", shape) - ) + result = EagerArray(child, parent=self, key=("broadcast_to", shape)) self.children.append(result) return result @@ -705,14 +699,14 @@ def arange(self, start: float, stop: float, step: float) -> None: else: self.array = np.arange(start, stop, step, self.dtype) - def tile(self, rhs: Any, reps: Union[int, Sequence[int]]) -> None: + def tile(self, rhs: Any, reps: int | Sequence[int]) -> None: self.check_eager_args(rhs) if self.deferred is not None: self.deferred.tile(rhs, reps) else: self.array[:] = np.tile(rhs.array, reps) - def bincount(self, rhs: Any, weights: Optional[NumPyThunk] = None) -> None: + def bincount(self, rhs: Any, weights: NumPyThunk | None = None) -> None: self.check_eager_args(rhs, weights) if self.deferred is not None: self.deferred.bincount(rhs, weights=weights) @@ -730,7 +724,7 @@ def nonzero(self) -> tuple[NumPyThunk, ...]: arrays = self.array.nonzero() result: tuple[NumPyThunk, ...] = () for array in arrays: - result += (EagerArray(self.runtime, array),) + result += (EagerArray(array),) return result def searchsorted(self, rhs: Any, v: Any, side: SortSide = "left") -> None: @@ -744,9 +738,9 @@ def sort( self, rhs: Any, argsort: bool = False, - axis: Union[int, None] = -1, + axis: int | None = -1, kind: SortType = "quicksort", - order: Union[None, str, list[str]] = None, + order: str | list[str] | None = None, ) -> None: self.check_eager_args(rhs) if self.deferred is not None: @@ -761,7 +755,7 @@ def bitgenerator_random_raw( self, handle: int, generatorType: BitGeneratorType, - seed: Union[int, None], + seed: int | None, flags: int, ) -> None: if self.deferred is not None: @@ -784,7 +778,7 @@ def bitgenerator_integers( self, handle: int, generatorType: BitGeneratorType, - seed: Union[int, None], + seed: int | None, flags: int, low: int, high: int, @@ -804,7 +798,7 @@ def bitgenerator_lognormal( self, handle: int, generatorType: BitGeneratorType, - seed: Union[int, None], + seed: int | None, flags: int, mean: float, sigma: float, @@ -824,7 +818,7 @@ def bitgenerator_normal( self, handle: int, generatorType: BitGeneratorType, - seed: Union[int, None], + seed: int | None, flags: int, mean: float, sigma: float, @@ -844,7 +838,7 @@ def bitgenerator_uniform( self, handle: int, generatorType: BitGeneratorType, - seed: Union[int, None], + seed: int | None, flags: int, low: float, high: float, @@ -864,7 +858,7 @@ def bitgenerator_poisson( self, handle: int, generatorType: BitGeneratorType, - seed: Union[int, None], + seed: int | None, flags: int, lam: float, ) -> None: @@ -883,7 +877,7 @@ def bitgenerator_exponential( self, handle: int, generatorType: BitGeneratorType, - seed: Union[int, None], + seed: int | None, flags: int, scale: float, ) -> None: @@ -902,7 +896,7 @@ def bitgenerator_gumbel( self, handle: int, generatorType: BitGeneratorType, - seed: Union[int, None], + seed: int | None, flags: int, mu: float, beta: float, @@ -922,7 +916,7 @@ def bitgenerator_laplace( self, handle: int, generatorType: BitGeneratorType, - seed: Union[int, None], + seed: int | None, flags: int, mu: float, beta: float, @@ -942,7 +936,7 @@ def bitgenerator_logistic( self, handle: int, generatorType: BitGeneratorType, - seed: Union[int, None], + seed: int | None, flags: int, mu: float, beta: float, @@ -962,7 +956,7 @@ def bitgenerator_pareto( self, handle: int, generatorType: BitGeneratorType, - seed: Union[int, None], + seed: int | None, flags: int, alpha: float, ) -> None: @@ -981,7 +975,7 @@ def bitgenerator_power( self, handle: int, generatorType: BitGeneratorType, - seed: Union[int, None], + seed: int | None, flags: int, alpha: float, ) -> None: @@ -1000,7 +994,7 @@ def bitgenerator_rayleigh( self, handle: int, generatorType: BitGeneratorType, - seed: Union[int, None], + seed: int | None, flags: int, sigma: float, ) -> None: @@ -1019,7 +1013,7 @@ def bitgenerator_cauchy( self, handle: int, generatorType: BitGeneratorType, - seed: Union[int, None], + seed: int | None, flags: int, x0: float, gamma: float, @@ -1039,7 +1033,7 @@ def bitgenerator_triangular( self, handle: int, generatorType: BitGeneratorType, - seed: Union[int, None], + seed: int | None, flags: int, a: float, b: float, @@ -1060,7 +1054,7 @@ def bitgenerator_weibull( self, handle: int, generatorType: BitGeneratorType, - seed: Union[int, None], + seed: int | None, flags: int, lam: float, k: float, @@ -1080,7 +1074,7 @@ def bitgenerator_bytes( self, handle: int, generatorType: BitGeneratorType, - seed: Union[int, None], + seed: int | None, flags: int, ) -> None: if self.deferred is not None: @@ -1100,7 +1094,7 @@ def bitgenerator_beta( self, handle: int, generatorType: BitGeneratorType, - seed: Union[int, None], + seed: int | None, flags: int, a: float, b: float, @@ -1120,7 +1114,7 @@ def bitgenerator_f( self, handle: int, generatorType: BitGeneratorType, - seed: Union[int, None], + seed: int | None, flags: int, dfnum: float, dfden: float, @@ -1145,7 +1139,7 @@ def bitgenerator_logseries( self, handle: int, generatorType: BitGeneratorType, - seed: Union[int, None], + seed: int | None, flags: int, p: float, ) -> None: @@ -1164,7 +1158,7 @@ def bitgenerator_noncentral_f( self, handle: int, generatorType: BitGeneratorType, - seed: Union[int, None], + seed: int | None, flags: int, dfnum: float, dfden: float, @@ -1187,7 +1181,7 @@ def bitgenerator_chisquare( self, handle: int, generatorType: BitGeneratorType, - seed: Union[int, None], + seed: int | None, flags: int, df: float, nonc: float, @@ -1215,7 +1209,7 @@ def bitgenerator_gamma( self, handle: int, generatorType: BitGeneratorType, - seed: Union[int, None], + seed: int | None, flags: int, k: float, theta: float, @@ -1241,7 +1235,7 @@ def bitgenerator_standard_t( self, handle: int, generatorType: BitGeneratorType, - seed: Union[int, None], + seed: int | None, flags: int, df: float, ) -> None: @@ -1260,7 +1254,7 @@ def bitgenerator_hypergeometric( self, handle: int, generatorType: BitGeneratorType, - seed: Union[int, None], + seed: int | None, flags: int, ngood: int, nbad: int, @@ -1283,7 +1277,7 @@ def bitgenerator_vonmises( self, handle: int, generatorType: BitGeneratorType, - seed: Union[int, None], + seed: int | None, flags: int, mu: float, kappa: float, @@ -1303,7 +1297,7 @@ def bitgenerator_zipf( self, handle: int, generatorType: BitGeneratorType, - seed: Union[int, None], + seed: int | None, flags: int, alpha: float, ) -> None: @@ -1322,7 +1316,7 @@ def bitgenerator_geometric( self, handle: int, generatorType: BitGeneratorType, - seed: Union[int, None], + seed: int | None, flags: int, p: float, ) -> None: @@ -1341,7 +1335,7 @@ def bitgenerator_wald( self, handle: int, generatorType: BitGeneratorType, - seed: Union[int, None], + seed: int | None, flags: int, mean: float, scale: float, @@ -1361,7 +1355,7 @@ def bitgenerator_binomial( self, handle: int, generatorType: BitGeneratorType, - seed: Union[int, None], + seed: int | None, flags: int, ntrials: int, p: float, @@ -1381,7 +1375,7 @@ def bitgenerator_negative_binomial( self, handle: int, generatorType: BitGeneratorType, - seed: Union[int, None], + seed: int | None, flags: int, ntrials: int, p: float, @@ -1402,11 +1396,11 @@ def bitgenerator_negative_binomial( def partition( self, rhs: Any, - kth: Union[int, Sequence[int]], + kth: int | Sequence[int], argpartition: bool = False, - axis: Union[int, None] = -1, + axis: int | None = -1, kind: SelectKind = "introselect", - order: Union[None, str, list[str]] = None, + order: str | list[str] | None = None, ) -> None: self.check_eager_args(rhs) if self.deferred is not None: @@ -1437,8 +1431,8 @@ def random_normal(self) -> None: def random_integer( self, - low: Union[int, npt.NDArray[Any]], - high: Union[int, npt.NDArray[Any]], + low: int | npt.NDArray[Any], + high: int | npt.NDArray[Any], ) -> None: if self.deferred is not None: self.deferred.random_integer(low, high) @@ -1455,8 +1449,8 @@ def unary_op( op: UnaryOpCode, rhs: Any, where: Any, - args: Any, - multiout: Optional[Any] = None, + args: tuple[Scalar, ...] = (), + multiout: Any | None = None, ) -> None: if multiout is None: self.check_eager_args(rhs, where) @@ -1486,13 +1480,26 @@ def unary_op( else where.array, ) elif op == UnaryOpCode.CLIP: - np.clip(rhs.array, out=self.array, a_min=args[0], a_max=args[1]) + np.clip( + rhs.array, + out=self.array, + a_min=args[0].value(), + a_max=args[1].value(), + ) elif op == UnaryOpCode.COPY: self.array[:] = rhs.array[:] elif op == UnaryOpCode.IMAG: self.array = np.imag(rhs.array) elif op == UnaryOpCode.REAL: self.array = np.real(rhs.array) + elif op == UnaryOpCode.ROUND: + np.round( + rhs.array, + out=self.array, + decimals=args[0].value(), + ) + elif op == UnaryOpCode.ANGLE: + self.array = np.angle(rhs.array, args[0].value()) else: raise RuntimeError("unsupported unary op " + str(op)) @@ -1501,10 +1508,10 @@ def unary_reduction( op: UnaryRedCode, rhs: Any, where: Any, - orig_axis: Union[int, None], + orig_axis: int | None, axes: tuple[int, ...], keepdims: bool, - args: Any, + args: tuple[Scalar, ...], initial: Any, ) -> None: self.check_eager_args(rhs, where) @@ -1560,7 +1567,7 @@ def unary_reduction( ) elif op == UnaryRedCode.VARIANCE: (mu,) = args - centered = np.subtract(rhs.array, mu) + centered = np.subtract(rhs.array, np.asarray(mu)) squares = np.square(centered) np.sum( squares, @@ -1572,7 +1579,7 @@ def unary_reduction( out=self.array, ) elif op == UnaryRedCode.CONTAINS: - self.array.fill(args[0] in rhs.array) + self.array.fill(args[0].value() in rhs.array) elif op == UnaryRedCode.COUNT_NONZERO: self.array[()] = np.count_nonzero(rhs.array, axis=orig_axis) else: @@ -1585,7 +1592,7 @@ def isclose( if self.deferred is not None: self.deferred.isclose(rhs1, rhs2, rtol, atol, equal_nan) else: - self.array[:] = np.isclose( + self.array[...] = np.isclose( rhs1.array, rhs2.array, rtol=rtol, @@ -1594,7 +1601,12 @@ def isclose( ) def binary_op( - self, op: BinaryOpCode, rhs1: Any, rhs2: Any, where: Any, args: Any + self, + op: BinaryOpCode, + rhs1: Any, + rhs2: Any, + where: Any, + args: tuple[Scalar, ...], ) -> None: self.check_eager_args(rhs1, rhs2, where) if self.deferred is not None: @@ -1617,8 +1629,8 @@ def binary_reduction( op: BinaryOpCode, rhs1: Any, rhs2: Any, - broadcast: Union[NdShape, None], - args: Any, + broadcast: NdShape | None, + args: tuple[Scalar, ...], ) -> None: self.check_eager_args(rhs1, rhs2) if self.deferred is not None: @@ -1627,7 +1639,10 @@ def binary_reduction( if op == BinaryOpCode.ISCLOSE: self.array = np.array( np.allclose( - rhs1.array, rhs2.array, rtol=args[0], atol=args[1] + rhs1.array, + rhs2.array, + rtol=args[0].value(), + atol=args[1].value(), ) ) elif op == BinaryOpCode.EQUAL: @@ -1648,7 +1663,7 @@ def argwhere(self) -> NumPyThunk: if self.deferred is not None: return self.deferred.argwhere() else: - return EagerArray(self.runtime, np.argwhere(self.array)) + return EagerArray(np.argwhere(self.array)) def trilu(self, rhs: Any, k: int, lower: bool) -> None: self.check_eager_args(rhs) @@ -1660,21 +1675,34 @@ def trilu(self, rhs: Any, k: int, lower: bool) -> None: else: self.array[:] = np.triu(rhs.array, k) - def cholesky(self, src: Any, no_tril: bool) -> None: + def cholesky(self, src: Any) -> None: self.check_eager_args(src) if self.deferred is not None: - self.deferred.cholesky(src, no_tril) + self.deferred.cholesky(src) else: try: result = np.linalg.cholesky(src.array) except np.linalg.LinAlgError as e: - from .linalg import LinAlgError + from ..linalg import LinAlgError raise LinAlgError(e) from e - if no_tril: - result = np.triu(result.T.conj(), k=1) + result + self.array[:] = result + def qr(self, q: Any, r: Any) -> None: + self.check_eager_args(q, r) + if self.deferred is not None: + self.deferred.qr(q, r) + else: + try: + result_q, result_r = np.linalg.qr(self.array) + except np.linalg.LinAlgError as e: + from ..linalg import LinAlgError + + raise LinAlgError(e) from e + q.array[:] = result_q + r.array[:] = result_r + def solve(self, a: Any, b: Any) -> None: self.check_eager_args(a, b) if self.deferred is not None: @@ -1683,17 +1711,37 @@ def solve(self, a: Any, b: Any) -> None: try: result = np.linalg.solve(a.array, b.array) except np.linalg.LinAlgError as e: - from .linalg import LinAlgError + from ..linalg import LinAlgError raise LinAlgError(e) from e self.array[:] = result + def svd(self, u: Any, s: Any, vh: Any) -> None: + self.check_eager_args(u, s, vh) + if self.deferred is not None: + self.deferred.svd(u, s, vh) + else: + try: + full_matrices = ( + u.shape[0] == u.shape[1] and vh.shape[0] == vh.shape[1] + ) + result_u, result_s, result_vh = np.linalg.svd( + self.array, full_matrices + ) + except np.linalg.LinAlgError as e: + from ..linalg import LinAlgError + + raise LinAlgError(e) from e + u.array[:] = result_u + s.array[:] = result_s + vh.array[:] = result_vh + def scan( self, op: int, rhs: Any, axis: int, - dtype: Optional[npt.DTypeLike], + dtype: npt.DTypeLike | None, nan_to_identity: bool, ) -> None: self.check_eager_args(rhs) @@ -1717,7 +1765,7 @@ def unique(self) -> NumPyThunk: if self.deferred is not None: return self.deferred.unique() else: - return EagerArray(self.runtime, np.unique(self.array)) + return EagerArray(np.unique(self.array)) def create_window(self, op_code: WindowOpCode, M: int, *args: Any) -> None: if self.deferred is not None: @@ -1726,9 +1774,7 @@ def create_window(self, op_code: WindowOpCode, M: int, *args: Any) -> None: fn = _WINDOW_OPS[op_code] self.array[:] = fn(M, *args) - def packbits( - self, src: Any, axis: Union[int, None], bitorder: BitOrder - ) -> None: + def packbits(self, src: Any, axis: int | None, bitorder: BitOrder) -> None: self.check_eager_args(src) if self.deferred is not None: self.deferred.packbits(src, axis, bitorder) @@ -1738,7 +1784,7 @@ def packbits( ) def unpackbits( - self, src: Any, axis: Union[int, None], bitorder: BitOrder + self, src: Any, axis: int | None, bitorder: BitOrder ) -> None: self.check_eager_args(src) if self.deferred is not None: @@ -1774,3 +1820,11 @@ def histogram(self, rhs: Any, bins: Any, weights: Any) -> None: cast(EagerArray, bins).array, weights=cast(EagerArray, weights).array, ) + + def stencil_hint( + self, + low_offsets: tuple[int, ...], + high_offsets: tuple[int, ...], + ) -> None: + if self.deferred is not None: + self.deferred.stencil_hint(low_offsets, high_offsets) diff --git a/cunumeric/thunk.py b/cupynumeric/_thunk/thunk.py similarity index 81% rename from cunumeric/thunk.py rename to cupynumeric/_thunk/thunk.py index 68aafb6c9..37f157881 100644 --- a/cunumeric/thunk.py +++ b/cupynumeric/_thunk/thunk.py @@ -1,4 +1,4 @@ -# Copyright 2021-2022 NVIDIA Corporation +# Copyright 2024 NVIDIA Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -15,16 +15,17 @@ from __future__ import annotations from abc import ABC, abstractmethod, abstractproperty -from typing import TYPE_CHECKING, Any, Iterable, Optional, Sequence, Union +from typing import TYPE_CHECKING, Any, Iterable, Sequence -from .config import ConvertCode +from ..config import ConvertCode +from ..runtime import runtime if TYPE_CHECKING: import numpy as np import numpy.typing as npt - from legate.core import FieldID, Future, Region + from legate.core import Scalar - from .config import ( + from ..config import ( BinaryOpCode, BitGeneratorType, FFTDirection, @@ -33,9 +34,9 @@ UnaryRedCode, WindowOpCode, ) - from .runtime import Runtime - from .types import ( + from ..types import ( BitOrder, + ConvolveMethod, ConvolveMode, NdShape, OrderType, @@ -48,14 +49,13 @@ class NumPyThunk(ABC): """This is the base class for NumPy computations. It has methods for all the kinds of computations and operations that can be done - on cuNumeric ndarrays. + on cuPyNumeric ndarrays. :meta private: """ - def __init__(self, runtime: Runtime, dtype: np.dtype[Any]) -> None: - self.runtime = runtime - self.context = runtime.legate_context + def __init__(self, dtype: np.dtype[Any]) -> None: + self.library = runtime.library self.dtype = dtype @property @@ -73,11 +73,6 @@ def size(self) -> int: # Abstract methods - @abstractproperty - def storage(self) -> Union[Future, tuple[Region, Union[int, FieldID]]]: - """Return the Legion storage primitive for this NumPy thunk""" - ... - @abstractproperty def shape(self) -> NdShape: ... @@ -99,13 +94,19 @@ def conj(self) -> NumPyThunk: ... @abstractmethod - def convolve(self, v: Any, out: Any, mode: ConvolveMode) -> None: + def convolve( + self, + input: Any, + filter: Any, + mode: ConvolveMode, + method: ConvolveMethod, + ) -> None: ... @abstractmethod def fft( self, - out: Any, + rhs: Any, axes: Sequence[int], kind: FFTType, direction: FFTDirection, @@ -127,10 +128,6 @@ def repeat( def scalar(self) -> bool: ... - @abstractmethod - def get_scalar_array(self) -> npt.NDArray[Any]: - ... - @abstractmethod def get_item(self, key: Any) -> NumPyThunk: ... @@ -144,7 +141,7 @@ def reshape(self, newshape: NdShape, order: OrderType) -> NumPyThunk: ... @abstractmethod - def squeeze(self, axis: Optional[int]) -> NumPyThunk: + def squeeze(self, axis: int | tuple[int, ...] | None) -> NumPyThunk: ... @abstractmethod @@ -166,13 +163,11 @@ def fill(self, value: Any) -> None: ... @abstractmethod - def transpose( - self, axes: Union[None, tuple[int, ...], list[int]] - ) -> NumPyThunk: + def transpose(self, axes: tuple[int, ...] | list[int]) -> NumPyThunk: ... @abstractmethod - def flip(self, rhs: Any, axes: Union[None, int, tuple[int, ...]]) -> None: + def flip(self, rhs: Any, axes: int | tuple[int, ...] | None) -> None: ... @abstractmethod @@ -223,7 +218,7 @@ def arange(self, start: float, stop: float, step: float) -> None: ... @abstractmethod - def tile(self, rhs: Any, reps: Union[Any, Sequence[int]]) -> None: + def tile(self, rhs: Any, reps: Any | Sequence[int]) -> None: ... @abstractmethod @@ -231,7 +226,7 @@ def trilu(self, rhs: Any, k: int, lower: bool) -> None: ... @abstractmethod - def bincount(self, rhs: Any, weights: Optional[NumPyThunk] = None) -> None: + def bincount(self, rhs: Any, weights: NumPyThunk | None = None) -> None: ... @abstractmethod @@ -243,7 +238,7 @@ def bitgenerator_random_raw( self, handle: int, generatorType: BitGeneratorType, - seed: Union[int, None], + seed: int | None, flags: int, ) -> None: ... @@ -253,7 +248,7 @@ def bitgenerator_integers( self, handle: int, generatorType: BitGeneratorType, - seed: Union[int, None], + seed: int | None, flags: int, low: int, high: int, @@ -265,7 +260,7 @@ def bitgenerator_uniform( self, handle: int, generatorType: BitGeneratorType, - seed: Union[int, None], + seed: int | None, flags: int, low: float, high: float, @@ -277,7 +272,7 @@ def bitgenerator_lognormal( self, handle: int, generatorType: BitGeneratorType, - seed: Union[int, None], + seed: int | None, flags: int, mean: float, sigma: float, @@ -289,7 +284,7 @@ def bitgenerator_normal( self, handle: int, generatorType: BitGeneratorType, - seed: Union[int, None], + seed: int | None, flags: int, mean: float, sigma: float, @@ -301,7 +296,7 @@ def bitgenerator_poisson( self, handle: int, generatorType: BitGeneratorType, - seed: Union[int, None], + seed: int | None, flags: int, lam: float, ) -> None: @@ -312,7 +307,7 @@ def bitgenerator_exponential( self, handle: int, generatorType: BitGeneratorType, - seed: Union[int, None], + seed: int | None, flags: int, scale: float, ) -> None: @@ -323,7 +318,7 @@ def bitgenerator_gumbel( self, handle: int, generatorType: BitGeneratorType, - seed: Union[int, None], + seed: int | None, flags: int, mu: float, beta: float, @@ -335,7 +330,7 @@ def bitgenerator_laplace( self, handle: int, generatorType: BitGeneratorType, - seed: Union[int, None], + seed: int | None, flags: int, mu: float, beta: float, @@ -347,7 +342,7 @@ def bitgenerator_logistic( self, handle: int, generatorType: BitGeneratorType, - seed: Union[int, None], + seed: int | None, flags: int, mu: float, beta: float, @@ -359,7 +354,7 @@ def bitgenerator_pareto( self, handle: int, generatorType: BitGeneratorType, - seed: Union[int, None], + seed: int | None, flags: int, alpha: float, ) -> None: @@ -370,7 +365,7 @@ def bitgenerator_power( self, handle: int, generatorType: BitGeneratorType, - seed: Union[int, None], + seed: int | None, flags: int, alpha: float, ) -> None: @@ -381,7 +376,7 @@ def bitgenerator_rayleigh( self, handle: int, generatorType: BitGeneratorType, - seed: Union[int, None], + seed: int | None, flags: int, sigma: float, ) -> None: @@ -392,7 +387,7 @@ def bitgenerator_cauchy( self, handle: int, generatorType: BitGeneratorType, - seed: Union[int, None], + seed: int | None, flags: int, x0: float, gamma: float, @@ -404,7 +399,7 @@ def bitgenerator_triangular( self, handle: int, generatorType: BitGeneratorType, - seed: Union[int, None], + seed: int | None, flags: int, a: float, b: float, @@ -417,7 +412,7 @@ def bitgenerator_weibull( self, handle: int, generatorType: BitGeneratorType, - seed: Union[int, None], + seed: int | None, flags: int, lam: float, k: float, @@ -429,7 +424,7 @@ def bitgenerator_bytes( self, handle: int, generatorType: BitGeneratorType, - seed: Union[int, None], + seed: int | None, flags: int, ) -> None: ... @@ -439,7 +434,7 @@ def bitgenerator_beta( self, handle: int, generatorType: BitGeneratorType, - seed: Union[int, None], + seed: int | None, flags: int, a: float, b: float, @@ -451,7 +446,7 @@ def bitgenerator_f( self, handle: int, generatorType: BitGeneratorType, - seed: Union[int, None], + seed: int | None, flags: int, dfnum: float, dfden: float, @@ -463,7 +458,7 @@ def bitgenerator_logseries( self, handle: int, generatorType: BitGeneratorType, - seed: Union[int, None], + seed: int | None, flags: int, p: float, ) -> None: @@ -474,7 +469,7 @@ def bitgenerator_noncentral_f( self, handle: int, generatorType: BitGeneratorType, - seed: Union[int, None], + seed: int | None, flags: int, dfnum: float, dfden: float, @@ -487,7 +482,7 @@ def bitgenerator_chisquare( self, handle: int, generatorType: BitGeneratorType, - seed: Union[int, None], + seed: int | None, flags: int, df: float, nonc: float, @@ -499,7 +494,7 @@ def bitgenerator_gamma( self, handle: int, generatorType: BitGeneratorType, - seed: Union[int, None], + seed: int | None, flags: int, k: float, theta: float, @@ -511,7 +506,7 @@ def bitgenerator_standard_t( self, handle: int, generatorType: BitGeneratorType, - seed: Union[int, None], + seed: int | None, flags: int, df: float, ) -> None: @@ -522,7 +517,7 @@ def bitgenerator_hypergeometric( self, handle: int, generatorType: BitGeneratorType, - seed: Union[int, None], + seed: int | None, flags: int, ngood: int, nbad: int, @@ -535,7 +530,7 @@ def bitgenerator_vonmises( self, handle: int, generatorType: BitGeneratorType, - seed: Union[int, None], + seed: int | None, flags: int, mu: float, kappa: float, @@ -547,7 +542,7 @@ def bitgenerator_zipf( self, handle: int, generatorType: BitGeneratorType, - seed: Union[int, None], + seed: int | None, flags: int, alpha: float, ) -> None: @@ -558,7 +553,7 @@ def bitgenerator_geometric( self, handle: int, generatorType: BitGeneratorType, - seed: Union[int, None], + seed: int | None, flags: int, p: float, ) -> None: @@ -569,7 +564,7 @@ def bitgenerator_wald( self, handle: int, generatorType: BitGeneratorType, - seed: Union[int, None], + seed: int | None, flags: int, mean: float, scale: float, @@ -581,7 +576,7 @@ def bitgenerator_binomial( self, handle: int, generatorType: BitGeneratorType, - seed: Union[int, None], + seed: int | None, flags: int, ntrials: int, p: float, @@ -593,7 +588,7 @@ def bitgenerator_negative_binomial( self, handle: int, generatorType: BitGeneratorType, - seed: Union[int, None], + seed: int | None, flags: int, ntrials: int, p: float, @@ -608,11 +603,11 @@ def random_uniform(self) -> None: def partition( self, rhs: Any, - kth: Union[int, Sequence[int]], + kth: int | Sequence[int], argpartition: bool = False, - axis: Union[int, None] = -1, + axis: int | None = -1, kind: SelectKind = "introselect", - order: Union[None, str, list[str]] = None, + order: str | list[str] | None = None, ) -> None: ... @@ -623,8 +618,8 @@ def random_normal(self) -> None: @abstractmethod def random_integer( self, - low: Union[int, npt.NDArray[Any]], - high: Union[int, npt.NDArray[Any]], + low: int | npt.NDArray[Any], + high: int | npt.NDArray[Any], ) -> None: ... @@ -637,9 +632,9 @@ def sort( self, rhs: Any, argsort: bool = False, - axis: Union[int, None] = -1, + axis: int | None = -1, kind: SortType = "quicksort", - order: Union[None, str, list[str]] = None, + order: str | list[str] | None = None, ) -> None: ... @@ -649,8 +644,8 @@ def unary_op( op: UnaryOpCode, rhs: Any, where: Any, - args: Any, - multiout: Optional[Any] = None, + args: tuple[Scalar, ...] = (), + multiout: Any | None = None, ) -> None: ... @@ -660,10 +655,10 @@ def unary_reduction( op: UnaryRedCode, rhs: Any, where: Any, - orig_axis: Union[int, None], + orig_axis: int | None, axes: tuple[int, ...], keepdims: bool, - args: Any, + args: tuple[Scalar, ...], initial: Any, ) -> None: ... @@ -676,7 +671,12 @@ def isclose( @abstractmethod def binary_op( - self, op: BinaryOpCode, rhs1: Any, rhs2: Any, where: Any, args: Any + self, + op: BinaryOpCode, + rhs1: Any, + rhs2: Any, + where: Any, + args: tuple[Scalar, ...], ) -> None: ... @@ -686,8 +686,8 @@ def binary_reduction( op: BinaryOpCode, rhs1: Any, rhs2: Any, - broadcast: Union[NdShape, None], - args: Any, + broadcast: NdShape | None, + args: tuple[Scalar, ...], ) -> None: ... @@ -704,20 +704,28 @@ def where(self, rhs1: Any, rhs2: Any, rhs3: Any) -> None: ... @abstractmethod - def cholesky(self, src: Any, no_tril: bool) -> None: + def cholesky(self, src: Any) -> None: + ... + + @abstractmethod + def qr(self, q: Any, r: Any) -> None: ... @abstractmethod def solve(self, a: Any, b: Any) -> None: ... + @abstractmethod + def svd(self, u: Any, s: Any, vh: Any) -> None: + ... + @abstractmethod def scan( self, op: int, rhs: Any, axis: int, - dtype: Optional[npt.DTypeLike], + dtype: npt.DTypeLike | None, nan_to_identity: bool, ) -> None: ... @@ -731,14 +739,12 @@ def create_window(self, op_code: WindowOpCode, M: Any, *args: Any) -> None: ... @abstractmethod - def packbits( - self, src: Any, axis: Union[int, None], bitorder: BitOrder - ) -> None: + def packbits(self, src: Any, axis: int | None, bitorder: BitOrder) -> None: ... @abstractmethod def unpackbits( - self, src: Any, axis: Union[int, None], bitorder: BitOrder + self, src: Any, axis: int | None, bitorder: BitOrder ) -> None: ... @@ -749,3 +755,11 @@ def _wrap(self, src: Any, new_len: int) -> None: @abstractmethod def histogram(self, src: Any, bins: Any, weights: Any) -> None: ... + + @abstractmethod + def stencil_hint( + self, + low_offsets: tuple[int, ...], + high_offsets: tuple[int, ...], + ) -> None: + ... diff --git a/cunumeric/_ufunc/__init__.py b/cupynumeric/_ufunc/__init__.py similarity index 95% rename from cunumeric/_ufunc/__init__.py rename to cupynumeric/_ufunc/__init__.py index 3887e13f4..edd62f1b9 100644 --- a/cunumeric/_ufunc/__init__.py +++ b/cupynumeric/_ufunc/__init__.py @@ -1,4 +1,4 @@ -# Copyright 2022 NVIDIA Corporation +# Copyright 2024 NVIDIA Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/cunumeric/_ufunc/bit_twiddling.py b/cupynumeric/_ufunc/bit_twiddling.py similarity index 94% rename from cunumeric/_ufunc/bit_twiddling.py rename to cupynumeric/_ufunc/bit_twiddling.py index dc70fc2ad..2943257df 100644 --- a/cunumeric/_ufunc/bit_twiddling.py +++ b/cupynumeric/_ufunc/bit_twiddling.py @@ -1,4 +1,4 @@ -# Copyright 2022 NVIDIA Corporation +# Copyright 2024 NVIDIA Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -14,8 +14,7 @@ # from __future__ import annotations -from cunumeric.config import BinaryOpCode, UnaryOpCode - +from ..config import BinaryOpCode, UnaryOpCode from .ufunc import create_binary_ufunc, create_unary_ufunc, integer_dtypes bitwise_and = create_binary_ufunc( diff --git a/cupynumeric/_ufunc/comparison.py b/cupynumeric/_ufunc/comparison.py new file mode 100644 index 000000000..148854fad --- /dev/null +++ b/cupynumeric/_ufunc/comparison.py @@ -0,0 +1,212 @@ +# Copyright 2024 NVIDIA Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +from __future__ import annotations + +from typing import TYPE_CHECKING, Any + +import numpy as np + +from .._array.util import convert_to_cupynumeric_ndarray +from ..config import BinaryOpCode, UnaryOpCode, UnaryRedCode +from .ufunc import ( + all_dtypes, + create_binary_ufunc, + create_unary_ufunc, + float_dtypes, + integer_dtypes, + predicate_types_of, + relation_types_of, +) + +if TYPE_CHECKING: + from .._array.array import ndarray + + +def _post_resolution_check( + arr_x: ndarray, + arr_y: ndarray, + obj_x: Any, + obj_y: Any, + op_code: BinaryOpCode, +) -> tuple[ndarray, ndarray, BinaryOpCode]: + """When we were passed Python integers, they may not fit into the operation + dtype. In that case, however, we can just define the result. + Note that as of now, we don't try to do this if both operands were Python + ints. + """ + truthiness = None # cannot guess the truthiness based on the scalar value + + if type(obj_x) is int and type(obj_y) is int: + # No special behavior currently, check if values fit operation + if arr_x.dtype.kind in "iu": + # Check if original Python integer fits first operand. + arr_x.dtype.type(obj_x) + if arr_y.dtype.kind in "iu": + # Check if original Python integer fits second operand. + arr_y.dtype.type(obj_y) + + elif type(obj_x) is int and arr_x.dtype.kind in "iu": + iinfo = np.iinfo(arr_x.dtype) + if obj_x < iinfo.min: + truthiness = op_code in { + BinaryOpCode.NOT_EQUAL, + BinaryOpCode.LESS, + BinaryOpCode.LESS_EQUAL, + } + elif obj_x > iinfo.max: + truthiness = op_code in { + BinaryOpCode.NOT_EQUAL, + BinaryOpCode.GREATER, + BinaryOpCode.GREATER_EQUAL, + } + + if truthiness is not None: + # Replace with an always-true/always-false operation + arr_x = convert_to_cupynumeric_ndarray( + np.array(iinfo.min, dtype=arr_x.dtype) + ) + op_code = ( + BinaryOpCode.GREATER_EQUAL if truthiness else BinaryOpCode.LESS + ) + + elif type(obj_y) is int and arr_y.dtype.kind in "iu": + iinfo = np.iinfo(arr_y.dtype) + if iinfo.min > obj_y: + truthiness = op_code in { + BinaryOpCode.NOT_EQUAL, + BinaryOpCode.GREATER, + BinaryOpCode.GREATER_EQUAL, + } + elif iinfo.max < obj_y: + truthiness = op_code in { + BinaryOpCode.NOT_EQUAL, + BinaryOpCode.LESS, + BinaryOpCode.LESS_EQUAL, + } + + if truthiness is not None: + # Replace with an always-true/always-false operation + arr_y = convert_to_cupynumeric_ndarray( + np.array(iinfo.min, dtype=arr_y.dtype) + ) + op_code = ( + BinaryOpCode.GREATER_EQUAL if truthiness else BinaryOpCode.LESS + ) + + return arr_x, arr_y, op_code + + +greater = create_binary_ufunc( + "Return the truth value of (x1 > x2) element-wise.", + "greater", + BinaryOpCode.GREATER, + relation_types_of(all_dtypes), + post_resolution_check=_post_resolution_check, +) + +greater_equal = create_binary_ufunc( + "Return the truth value of (x1 >= x2) element-wise.", + "greater_equal", + BinaryOpCode.GREATER_EQUAL, + relation_types_of(all_dtypes), + post_resolution_check=_post_resolution_check, +) + +less = create_binary_ufunc( + "Return the truth value of (x1 < x2) element-wise.", + "less", + BinaryOpCode.LESS, + relation_types_of(all_dtypes), + post_resolution_check=_post_resolution_check, +) + +less_equal = create_binary_ufunc( + "Return the truth value of (x1 =< x2) element-wise.", + "less", + BinaryOpCode.LESS_EQUAL, + relation_types_of(all_dtypes), + post_resolution_check=_post_resolution_check, +) + +not_equal = create_binary_ufunc( + "Return (x1 != x2) element-wise.", + "not_equal", + BinaryOpCode.NOT_EQUAL, + relation_types_of(all_dtypes), + post_resolution_check=_post_resolution_check, +) + +equal = create_binary_ufunc( + "Return (x1 == x2) element-wise.", + "equal", + BinaryOpCode.EQUAL, + relation_types_of(all_dtypes), + post_resolution_check=_post_resolution_check, +) + +logical_and = create_binary_ufunc( + "Compute the truth value of x1 AND x2 element-wise.", + "logical_and", + BinaryOpCode.LOGICAL_AND, + relation_types_of(all_dtypes), + red_code=UnaryRedCode.ALL, +) + +logical_or = create_binary_ufunc( + "Compute the truth value of x1 OR x2 element-wise.", + "logical_or", + BinaryOpCode.LOGICAL_OR, + relation_types_of(all_dtypes), + red_code=UnaryRedCode.ANY, +) + +logical_xor = create_binary_ufunc( + "Compute the truth value of x1 XOR x2, element-wise.", + "logical_xor", + BinaryOpCode.LOGICAL_XOR, + relation_types_of(all_dtypes), +) + +logical_not = create_unary_ufunc( + "Compute bit-wise inversion, or bit-wise NOT, element-wise.", + "invert", + UnaryOpCode.LOGICAL_NOT, + ( + ["??"] + + predicate_types_of(integer_dtypes) + + predicate_types_of(float_dtypes) + ), + overrides={"?": UnaryOpCode.LOGICAL_NOT}, +) + +maximum = create_binary_ufunc( + "Element-wise maximum of array elements.", + "maximum", + BinaryOpCode.MAXIMUM, + all_dtypes, + red_code=UnaryRedCode.MAX, +) + +fmax = maximum + +minimum = create_binary_ufunc( + "Element-wise minimum of array elements.", + "minimum", + BinaryOpCode.MINIMUM, + all_dtypes, + red_code=UnaryRedCode.MIN, +) + +fmin = minimum diff --git a/cunumeric/_ufunc/floating.py b/cupynumeric/_ufunc/floating.py similarity index 97% rename from cunumeric/_ufunc/floating.py rename to cupynumeric/_ufunc/floating.py index 0dceb691f..6c3644f8f 100644 --- a/cunumeric/_ufunc/floating.py +++ b/cupynumeric/_ufunc/floating.py @@ -1,4 +1,4 @@ -# Copyright 2022 NVIDIA Corporation +# Copyright 2024 NVIDIA Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -14,8 +14,7 @@ # from __future__ import annotations -from cunumeric.config import BinaryOpCode, UnaryOpCode - +from ..config import BinaryOpCode, UnaryOpCode from .ufunc import ( create_binary_ufunc, create_multiout_unary_ufunc, diff --git a/cunumeric/_ufunc/math.py b/cupynumeric/_ufunc/math.py similarity index 98% rename from cunumeric/_ufunc/math.py rename to cupynumeric/_ufunc/math.py index 3f79c84ed..dc161b949 100644 --- a/cunumeric/_ufunc/math.py +++ b/cupynumeric/_ufunc/math.py @@ -1,4 +1,4 @@ -# Copyright 2022 NVIDIA Corporation +# Copyright 2024 NVIDIA Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -14,8 +14,7 @@ # from __future__ import annotations -from cunumeric.config import BinaryOpCode, UnaryOpCode, UnaryRedCode - +from ..config import BinaryOpCode, UnaryOpCode, UnaryRedCode from .ufunc import ( all_but_boolean, all_dtypes, diff --git a/cunumeric/_ufunc/trigonometric.py b/cupynumeric/_ufunc/trigonometric.py similarity index 96% rename from cunumeric/_ufunc/trigonometric.py rename to cupynumeric/_ufunc/trigonometric.py index 0687cc13e..87cc394e7 100644 --- a/cunumeric/_ufunc/trigonometric.py +++ b/cupynumeric/_ufunc/trigonometric.py @@ -1,4 +1,4 @@ -# Copyright 2022 NVIDIA Corporation +# Copyright 2024 NVIDIA Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -14,8 +14,7 @@ # from __future__ import annotations -from cunumeric.config import BinaryOpCode, UnaryOpCode - +from ..config import BinaryOpCode, UnaryOpCode from .ufunc import ( create_binary_ufunc, create_unary_ufunc, diff --git a/cunumeric/_ufunc/ufunc.py b/cupynumeric/_ufunc/ufunc.py similarity index 77% rename from cunumeric/_ufunc/ufunc.py rename to cupynumeric/_ufunc/ufunc.py index 3bba6b8ad..5b2eb11e9 100644 --- a/cunumeric/_ufunc/ufunc.py +++ b/cupynumeric/_ufunc/ufunc.py @@ -1,4 +1,4 @@ -# Copyright 2021-2023 NVIDIA Corporation +# Copyright 2024 NVIDIA Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -14,16 +14,16 @@ # from __future__ import annotations -from typing import TYPE_CHECKING, Any, Dict, Optional, Sequence, Union +from typing import TYPE_CHECKING, Any, Callable, Sequence, TypeAlias import numpy as np from legate.core.utils import OrderedSet -from ..array import ( +from .._array.thunk import perform_unary_reduction +from .._array.util import ( add_boilerplate, check_writeable, - convert_to_cunumeric_ndarray, - ndarray, + convert_to_cupynumeric_ndarray, ) from ..config import BinaryOpCode, UnaryOpCode, UnaryRedCode from ..types import NdShape @@ -31,8 +31,20 @@ if TYPE_CHECKING: import numpy.typing as npt + from .._array.array import ndarray from ..types import CastingKind + PostResolutionCheckFunc: TypeAlias = Callable[ + [ + ndarray, + ndarray, + Any, + Any, + BinaryOpCode, + ], + tuple[ndarray, ndarray, BinaryOpCode], + ] + _UNARY_DOCSTRING_TEMPLATE = """{} @@ -67,7 +79,7 @@ numpy.{} Availability --------- +------------ Multiple GPUs, Multiple CPUs """ @@ -105,7 +117,7 @@ numpy.{} Availability --------- +------------ Multiple GPUs, Multiple CPUs """ @@ -143,7 +155,7 @@ numpy.{} Availability --------- +------------ Multiple GPUs, Multiple CPUs """ @@ -183,8 +195,58 @@ def to_dtypes(chars: str) -> tuple[np.dtype[Any], ...]: return tuple(np.dtype(char) for char in chars) +def _get_kind_score(kind: type) -> int: + if issubclass(kind, np.bool_): + return 0 + if issubclass(kind, (np.integer, int)): + return 1 + if issubclass(kind, (np.inexact, float, complex)): + return 2 + # unknown type, assume higher score + return 3 + + +def _check_should_use_weak_scalar(key: tuple[str | type, ...]) -> bool: + """Helper function for promotion, where we need to check whether we + should use weak promotion for python floats/integers (NEP 50/NumPy 2). + """ + max_scalar_kind = -1 + max_array_kind = -1 + + for in_t in key: + if isinstance(in_t, str): + kind = _get_kind_score(np.dtype(in_t).type) + max_array_kind = max(max_array_kind, kind) + else: + kind = _get_kind_score(in_t) + max_scalar_kind = max(max_scalar_kind, kind) + + all_scalars_or_arrays = max_scalar_kind == -1 or max_array_kind == -1 + return not all_scalars_or_arrays and max_array_kind >= max_scalar_kind + + +def _default_post_resolution_check( + arr_x: ndarray, + arr_y: ndarray, + obj_x: Any, + obj_y: Any, + op_code: BinaryOpCode, +) -> tuple[ndarray, ndarray, BinaryOpCode]: + """Check whether Python integers fit into integer operand dtypes. + This check is overloaded by comparisons to always succeed. + """ + if type(obj_x) is int and arr_x.dtype.kind in "iu": + # Check if original Python integer fits first operand. + arr_x.dtype.type(obj_x) + if type(obj_y) is int and arr_y.dtype.kind in "iu": + # Check if original Python integer fits second operand. + arr_y.dtype.type(obj_y) + + return arr_x, arr_y, op_code + + class ufunc: - _types: Dict[Any, str] + _types: dict[Any, str] _nin: int _nout: int @@ -231,12 +293,14 @@ def _maybe_cast_input( def _maybe_create_result( self, - out: Union[ndarray, None], + out: ndarray | None, out_shape: NdShape, res_dtype: np.dtype[Any], casting: CastingKind, inputs: tuple[ndarray, ...], ) -> ndarray: + from .._array.array import ndarray + if out is None: return ndarray(shape=out_shape, dtype=res_dtype, inputs=inputs) elif out.dtype != res_dtype: @@ -251,34 +315,34 @@ def _maybe_create_result( return out @staticmethod - def _maybe_cast_output( - out: Union[ndarray, None], result: ndarray - ) -> ndarray: + def _maybe_cast_output(out: ndarray | None, result: ndarray) -> ndarray: if out is None or out is result: return result out._thunk.convert(result._thunk, warn=False) return out @staticmethod - def _maybe_convert_output_to_cunumeric_ndarray( - out: Union[ndarray, npt.NDArray[Any], None] - ) -> Union[ndarray, None]: + def _maybe_convert_output_to_cupynumeric_ndarray( + out: ndarray | npt.NDArray[Any] | None, + ) -> ndarray | None: + from .._array.array import ndarray + if out is None: return None if isinstance(out, ndarray): return out if isinstance(out, np.ndarray): - return convert_to_cunumeric_ndarray(out, share=True) + return convert_to_cupynumeric_ndarray(out, share=True) raise TypeError("return arrays must be of ArrayType") def _prepare_operands( self, *args: Any, - out: Union[ndarray, tuple[ndarray, ...], None], + out: ndarray | tuple[ndarray, ...] | None, where: bool = True, ) -> tuple[ Sequence[ndarray], - Sequence[Union[ndarray, None]], + Sequence[ndarray | None], tuple[int, ...], bool, ]: @@ -290,7 +354,7 @@ def _prepare_operands( ) inputs = tuple( - convert_to_cunumeric_ndarray(arr) for arr in args[: self.nin] + convert_to_cupynumeric_ndarray(arr) for arr in args[: self.nin] ) if len(args) > self.nin: @@ -310,7 +374,7 @@ def _prepare_operands( computed_out = out outputs = tuple( - self._maybe_convert_output_to_cunumeric_ndarray(arr) + self._maybe_convert_output_to_cupynumeric_ndarray(arr) for arr in computed_out ) @@ -402,11 +466,11 @@ def _resolve_dtype( def __call__( self, *args: Any, - out: Union[ndarray, None] = None, + out: ndarray | None = None, where: bool = True, casting: CastingKind = "same_kind", order: str = "K", - dtype: Union[np.dtype[Any], None] = None, + dtype: np.dtype[Any] | None = None, **kwargs: Any, ) -> ndarray: (x,), (out,), out_shape, where = self._prepare_operands( @@ -432,7 +496,7 @@ def __call__( ) op_code = self._overrides.get(x.dtype.char, self._op_code) - result._thunk.unary_op(op_code, x._thunk, where, ()) + result._thunk.unary_op(op_code, x._thunk, where) return self._maybe_cast_output(out, result) @@ -487,11 +551,11 @@ def _resolve_dtype( def __call__( self, *args: Any, - out: Union[ndarray, tuple[ndarray, ...], None] = None, + out: ndarray | tuple[ndarray, ...] | None = None, where: bool = True, casting: CastingKind = "same_kind", order: str = "K", - dtype: Union[np.dtype[Any], None] = None, + dtype: np.dtype[Any] | None = None, **kwargs: Any, ) -> tuple[ndarray, ...]: (x,), outs, out_shape, where = self._prepare_operands( @@ -529,14 +593,17 @@ def __call__( class binary_ufunc(ufunc): + _post_resolution_check: PostResolutionCheckFunc + def __init__( self, name: str, doc: str, op_code: BinaryOpCode, types: dict[tuple[str, str], str], - red_code: Union[UnaryRedCode, None] = None, + red_code: UnaryRedCode | None = None, use_common_type: bool = True, + post_resolution_check: PostResolutionCheckFunc | None = None, ) -> None: super().__init__(name, doc) @@ -548,15 +615,21 @@ def __init__( self._op_code = op_code self._resolution_cache: dict[ - tuple[str, ...], tuple[np.dtype[Any], ...] + tuple[str | type, ...], tuple[np.dtype[Any], ...] ] = {} self._red_code = red_code self._use_common_type = use_common_type + if post_resolution_check is None: + self._post_resolution_check = _default_post_resolution_check + else: + self._post_resolution_check = post_resolution_check @staticmethod def _find_common_type( arrs: Sequence[ndarray], orig_args: Sequence[Any] ) -> np.dtype[Any]: + from .._array.array import ndarray + all_ndarray = all(isinstance(arg, ndarray) for arg in orig_args) unique_dtypes = OrderedSet(arr.dtype for arr in arrs) # If all operands are ndarrays and they all have the same dtype, @@ -567,8 +640,10 @@ def _find_common_type( scalar_types = [] array_types = [] for arr, orig_arg in zip(arrs, orig_args): - if arr.ndim == 0: - # Make sure all scalar arguments are NumPy arrays + if type(orig_arg) in (int, float, complex): + scalar_types.append(orig_arg) + elif arr.ndim == 0: + # NumPy 1.x needs a 0-D NumPy array for value-based promotion scalar_types.append(np.asarray(orig_arg)) else: array_types.append(arr.dtype) @@ -578,19 +653,29 @@ def _find_common_type( def _resolve_dtype( self, arrs: Sequence[ndarray], - orig_args: Sequence[np.dtype[Any]], + orig_args: Sequence[Any], casting: CastingKind, precision_fixed: bool, ) -> tuple[Sequence[ndarray], np.dtype[Any]]: to_dtypes: tuple[np.dtype[Any], ...] - key: tuple[str, ...] + key: tuple[str | type, ...] if self._use_common_type: common_dtype = self._find_common_type(arrs, orig_args) to_dtypes = (common_dtype, common_dtype) key = (common_dtype.char, common_dtype.char) else: to_dtypes = tuple(arr.dtype for arr in arrs) - key = tuple(arr.dtype.char for arr in arrs) + key = tuple( + arr.dtype.char + if type(orig) not in (int, float, complex) + else type(orig) + for orig, arr in zip(orig_args, arrs) + ) + # When all inputs are scalars, cannot use weak logic below. + # (Using arr.dtype.char may be off for huge integers that map to + # an unsigned int. But NumPy should mostly do the same currently.) + if not _check_should_use_weak_scalar(key): + key = tuple(arr.dtype.char for arr in arrs) if key in self._types: arrs = [ @@ -611,10 +696,18 @@ def _resolve_dtype( chosen = None if not precision_fixed: for in_dtypes in self._types.keys(): - if all( - np.can_cast(arr.dtype, to_dtype) - for arr, to_dtype in zip(arrs, in_dtypes) - ): + for in_t, to_dtype in zip(key, in_dtypes): + # Break if `to_dtype` doesn't work. + if isinstance(in_t, str): + if not np.can_cast(in_t, to_dtype): + break + else: + # In NumPy 2, the value doesn't matter. In NumPy 1.x + # it could matter (but caching wouldn't work anyway). + if np.result_type(in_t(0), to_dtype) != to_dtype: + break + else: + # dtypes OK (no break), choose them and break outer chosen = in_dtypes break @@ -622,10 +715,23 @@ def _resolve_dtype( # try to find a match based on the leading operand if chosen is None and not self._use_common_type: for in_dtypes in self._types.keys(): - if np.can_cast(arrs[0].dtype, in_dtypes[0]) and all( - np.can_cast(arr, to_dtype, casting=casting) - for arr, to_dtype in zip(arrs[1:], in_dtypes[1:]) - ): + if not np.can_cast(arrs[0].dtype, in_dtypes[0]): + # Check next in_dtypes + continue + + for in_t, to_dtype in zip(key[1:], in_dtypes[1:]): + # Break if `to_dtype` doesn't work. + if isinstance(in_t, str): + if not np.can_cast( + in_t, to_dtype, casting=casting + ): + break + elif casting != "unsafe": + # Same-kind/safe can use result_type (see above) + if np.result_type(in_t(0), to_dtype) != to_dtype: + break + else: + # dtypes OK (no break), choose them and break outer chosen = in_dtypes break @@ -646,11 +752,11 @@ def _resolve_dtype( def __call__( self, *args: Any, - out: Union[ndarray, None] = None, + out: ndarray | None = None, where: bool = True, casting: CastingKind = "same_kind", order: str = "K", - dtype: Union[np.dtype[Any], None] = None, + dtype: np.dtype[Any] | None = None, **kwargs: Any, ) -> ndarray: arrs, (out,), out_shape, where = self._prepare_operands( @@ -677,11 +783,17 @@ def __call__( arrs, orig_args, casting, precision_fixed ) + # Check python integers operands. For comparisons, this may return + # new values and op_code when the integer is out-of-bounds. x1, x2 = arrs + x1, x2, op_code = self._post_resolution_check( + x1, x2, orig_args[0], orig_args[1], self._op_code + ) + result = self._maybe_create_result( out, out_shape, res_dtype, casting, (x1, x2) ) - result._thunk.binary_op(self._op_code, x1._thunk, x2._thunk, where, ()) + result._thunk.binary_op(op_code, x1._thunk, x2._thunk, where, ()) return self._maybe_cast_output(out, result) @@ -689,12 +801,12 @@ def __call__( def reduce( self, array: ndarray, - axis: Union[int, tuple[int, ...], None] = 0, - dtype: Union[np.dtype[Any], None] = None, - out: Union[ndarray, None] = None, + axis: int | tuple[int, ...] | None = 0, + dtype: np.dtype[Any] | None = None, + out: ndarray | None = None, keepdims: bool = False, - initial: Union[Any, None] = None, - where: Optional[ndarray] = None, + initial: Any | None = None, + where: ndarray | None = None, ) -> ndarray: """ reduce(array, axis=0, dtype=None, out=None, keepdims=False, initial= binary_ufunc: doc = _BINARY_DOCSTRING_TEMPLATE.format(summary, name) types_dict = dict(_parse_binary_ufunc_type(ty) for ty in types) @@ -823,4 +947,5 @@ def create_binary_ufunc( types_dict, red_code=red_code, use_common_type=use_common_type, + post_resolution_check=post_resolution_check, ) diff --git a/cupynumeric/_utils/__init__.py b/cupynumeric/_utils/__init__.py new file mode 100644 index 000000000..626ef7aae --- /dev/null +++ b/cupynumeric/_utils/__init__.py @@ -0,0 +1,19 @@ +# Copyright 2024 NVIDIA Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +from __future__ import annotations + +import numpy as np + +is_np2 = np.lib.NumpyVersion(np.__version__) >= "2.0.0b1" diff --git a/cupynumeric/_utils/array.py b/cupynumeric/_utils/array.py new file mode 100644 index 000000000..eda134045 --- /dev/null +++ b/cupynumeric/_utils/array.py @@ -0,0 +1,113 @@ +# Copyright 2024 NVIDIA Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +from __future__ import annotations + +from functools import reduce +from typing import Any + +import legate.core.types as ty +import numpy as np + +from ..types import NdShape + +SUPPORTED_DTYPES = { + np.dtype(bool): ty.bool_, + np.dtype(np.int8): ty.int8, + np.dtype(np.int16): ty.int16, + np.dtype(np.int32): ty.int32, + np.dtype(np.int64): ty.int64, + np.dtype(np.uint8): ty.uint8, + np.dtype(np.uint16): ty.uint16, + np.dtype(np.uint32): ty.uint32, + np.dtype(np.uint64): ty.uint64, + np.dtype(np.float16): ty.float16, + np.dtype(np.float32): ty.float32, + np.dtype(np.float64): ty.float64, + np.dtype(np.complex64): ty.complex64, + np.dtype(np.complex128): ty.complex128, +} + + +def is_supported_dtype(dtype: str | np.dtype[Any]) -> bool: + """ + Whether a NumPy dtype is supported by cuPyNumeric + + Parameters + ---------- + dtype : data-type + The dtype to query + + Returns + ------- + res : bool + True if `dtype` is a supported dtype + """ + return np.dtype(dtype) in SUPPORTED_DTYPES + + +def to_core_type(dtype: str | np.dtype[Any]) -> ty.Type: + core_dtype = SUPPORTED_DTYPES.get(np.dtype(dtype)) + if core_dtype is None: + raise TypeError(f"cuPyNumeric does not support dtype={dtype}") + return core_dtype + + +def is_advanced_indexing(key: Any) -> bool: + if key is Ellipsis or key is None: # np.newdim case + return False + if np.isscalar(key): + return False + if isinstance(key, slice): + return False + if isinstance(key, tuple): + return any(is_advanced_indexing(k) for k in key) + # Any other kind of thing leads to advanced indexing + return True + + +def calculate_volume(shape: NdShape) -> int: + if len(shape) == 0: + return 0 + return reduce(lambda x, y: x * y, shape) + + +def max_identity( + ty: np.dtype[Any], +) -> int | np.floating[Any] | bool | np.complexfloating[Any, Any]: + if ty.kind == "i" or ty.kind == "u": + return np.iinfo(ty).min + elif ty.kind == "f": + return np.finfo(ty).min + elif ty.kind == "c": + return np.finfo(np.float64).min + np.finfo(np.float64).min * 1j + elif ty.kind == "b": + return False + else: + raise ValueError(f"Unsupported dtype: {ty}") + + +def min_identity( + ty: np.dtype[Any], +) -> int | np.floating[Any] | bool | np.complexfloating[Any, Any]: + if ty.kind == "i" or ty.kind == "u": + return np.iinfo(ty).max + elif ty.kind == "f": + return np.finfo(ty).max + elif ty.kind == "c": + return np.finfo(np.float64).max + np.finfo(np.float64).max * 1j + elif ty.kind == "b": + return True + else: + raise ValueError(f"Unsupported dtype: {ty}") diff --git a/cunumeric/coverage.py b/cupynumeric/_utils/coverage.py similarity index 87% rename from cunumeric/coverage.py rename to cupynumeric/_utils/coverage.py index a8e57285f..d0d0f0eba 100644 --- a/cunumeric/coverage.py +++ b/cupynumeric/_utils/coverage.py @@ -1,4 +1,4 @@ -# Copyright 2022 NVIDIA Corporation +# Copyright 2024 NVIDIA Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -17,36 +17,21 @@ import warnings from dataclasses import dataclass from functools import WRAPPER_ASSIGNMENTS, wraps -from types import ( - BuiltinFunctionType, - FunctionType, - MethodDescriptorType, - MethodType, - ModuleType, -) -from typing import ( - Any, - Callable, - Container, - Iterable, - Mapping, - Optional, - Union, - cast, -) +from types import BuiltinFunctionType, FunctionType, ModuleType +from typing import Any, Callable, Container, Iterable, Mapping, Protocol, cast from legate.core import track_provenance from legate.core.utils import OrderedSet -from typing_extensions import Protocol -from .runtime import runtime -from .settings import settings -from .utils import deep_apply, find_last_user_frames, find_last_user_stacklevel +from ..runtime import runtime +from ..settings import settings +from .stack import find_last_user_frames, find_last_user_stacklevel +from .structure import deep_apply __all__ = ("clone_module", "clone_class") FALLBACK_WARNING = ( - "cuNumeric has not implemented {what} " + "cuPyNumeric has not implemented {what} " + "and is falling back to canonical NumPy. " + "You may notice significantly decreased performance " + "for this function call." @@ -60,7 +45,7 @@ def filter_namespace( ns: Mapping[str, Any], *, - omit_names: Optional[Container[str]] = None, + omit_names: Container[str] | None = None, omit_types: tuple[type, ...] = (), ) -> dict[str, Any]: omit_names = omit_names or OrderedSet() @@ -84,7 +69,7 @@ class CuWrapperMetadata: class CuWrapped(AnyCallable, Protocol): - _cunumeric: CuWrapperMetadata + _cupynumeric: CuWrapperMetadata __wrapped__: AnyCallable __name__: str __qualname__: str @@ -131,7 +116,7 @@ def wrapper(*args: Any, **kwargs: Any) -> Any: multi = "Multiple GPUs" in (getattr(func, "__doc__", None) or "") single = "Single GPU" in (getattr(func, "__doc__", None) or "") or multi - wrapper._cunumeric = CuWrapperMetadata( + wrapper._cupynumeric = CuWrapperMetadata( implemented=True, single=single, multi=multi ) @@ -148,7 +133,7 @@ def unimplemented( prefix: str, name: str, reporting: bool = True, - fallback: Union[Callable[[Any], Any], None] = None, + fallback: Callable[[Any], Any] | None = None, ) -> CuWrapped: name = f"{prefix}.{name}" @@ -156,7 +141,7 @@ def unimplemented( # all array-like arguments to `numpy.ndarray` through `__array__()` (taking # some care to skip the `__array_function__` dispatch logic, to avoid # infinite loops). However, it appears that this behavior is inconsistent - # in NumPy, so we will instead convert any `cunumeric.ndarray`s manually + # in NumPy, so we will instead convert any `cupynumeric.ndarray`s manually # before calling into NumPy. wrapper: CuWrapped @@ -194,13 +179,13 @@ def wrapper(*args: Any, **kwargs: Any) -> Any: return func(*args, **kwargs) wrapper.__doc__ = f""" - cuNumeric has not implemented this function, and will fall back to NumPy. + cuPyNumeric has not implemented this function, and will fall back to NumPy. See Also -------- {name} """ - wrapper._cunumeric = CuWrapperMetadata(implemented=False) + wrapper._cupynumeric = CuWrapperMetadata(implemented=False) return wrapper @@ -208,7 +193,7 @@ def wrapper(*args: Any, **kwargs: Any) -> Any: def clone_module( origin_module: ModuleType, new_globals: dict[str, Any], - fallback: Union[Callable[[Any], Any], None] = None, + fallback: Callable[[Any], Any] | None = None, include_builtin_function_type: bool = False, ) -> None: """Copy attributes from one module to another, excluding submodules @@ -224,7 +209,7 @@ def clone_module( new_globals : dict A globals() dict for the new module to clone into - fallback : Union[Callable[[Any], Any], None] + fallback :Callable[[Any], Any] | None A function that will be applied to each argument before calling into the original module, to handle unimplemented functions. The function will be called recursively on list/tuple/dict containers, and should @@ -251,7 +236,7 @@ def clone_module( reporting = settings.report_coverage() - from ._ufunc.ufunc import ufunc as lgufunc + from .._ufunc.ufunc import ufunc as lgufunc for attr, value in new_globals.items(): # Only need to wrap things that are in the origin module to begin with @@ -315,13 +300,19 @@ def clone_module( def should_wrap(obj: object) -> bool: - return isinstance(obj, (FunctionType, MethodType, MethodDescriptorType)) + # custom callables, e.g. cython used in np2, do not inherit anything. See + # https://github.com/nv-legate/cupynumeric.internal/issues/179#issuecomment-2423813051 + return ( + callable(obj) + and hasattr(obj, "__get__") + and not hasattr(obj, "__set__") + ) def clone_class( origin_class: type, - omit_names: Union[Iterable[str], None] = None, - fallback: Union[Callable[[Any], Any], None] = None, + omit_names: Iterable[str] | None = None, + fallback: Callable[[Any], Any] | None = None, ) -> Callable[[type], type]: """Copy attributes from one class to another @@ -373,12 +364,12 @@ def _clone_class(cls: type) -> type: def is_implemented(obj: Any) -> bool: - return hasattr(obj, "_cunumeric") and obj._cunumeric.implemented + return hasattr(obj, "_cupynumeric") and obj._cupynumeric.implemented def is_single(obj: Any) -> bool: - return hasattr(obj, "_cunumeric") and obj._cunumeric.single + return hasattr(obj, "_cupynumeric") and obj._cupynumeric.single def is_multi(obj: Any) -> bool: - return hasattr(obj, "_cunumeric") and obj._cunumeric.multi + return hasattr(obj, "_cupynumeric") and obj._cupynumeric.multi diff --git a/cunumeric/utils.py b/cupynumeric/_utils/linalg.py similarity index 51% rename from cunumeric/utils.py rename to cupynumeric/_utils/linalg.py index 8c2d70140..5aa0b292c 100644 --- a/cunumeric/utils.py +++ b/cupynumeric/_utils/linalg.py @@ -1,4 +1,4 @@ -# Copyright 2021-2022 NVIDIA Corporation +# Copyright 2024 NVIDIA Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -14,108 +14,12 @@ # from __future__ import annotations -import traceback -from functools import reduce from string import ascii_lowercase, ascii_uppercase -from types import FrameType -from typing import Any, Callable, List, Sequence, Tuple, TypeVar, Union +from typing import Sequence -import legate.core.types as ty -import numpy as np from legate.core.utils import OrderedSet -from .types import NdShape - -SUPPORTED_DTYPES = { - np.dtype(np.bool_): ty.bool_, - np.dtype(np.int8): ty.int8, - np.dtype(np.int16): ty.int16, - np.dtype(np.int32): ty.int32, - np.dtype(np.int64): ty.int64, - np.dtype(np.uint8): ty.uint8, - np.dtype(np.uint16): ty.uint16, - np.dtype(np.uint32): ty.uint32, - np.dtype(np.uint64): ty.uint64, - np.dtype(np.float16): ty.float16, - np.dtype(np.float32): ty.float32, - np.dtype(np.float64): ty.float64, - np.dtype(np.complex64): ty.complex64, - np.dtype(np.complex128): ty.complex128, -} - - -def is_supported_type(dtype: Union[str, np.dtype[Any]]) -> bool: - return np.dtype(dtype) in SUPPORTED_DTYPES - - -def to_core_dtype(dtype: Union[str, np.dtype[Any]]) -> ty.Dtype: - core_dtype = SUPPORTED_DTYPES.get(np.dtype(dtype)) - if core_dtype is None: - raise TypeError(f"cuNumeric does not support dtype={dtype}") - return core_dtype - - -def is_advanced_indexing(key: Any) -> bool: - if key is Ellipsis or key is None: # np.newdim case - return False - if np.isscalar(key): - return False - if isinstance(key, slice): - return False - if isinstance(key, tuple): - return any(is_advanced_indexing(k) for k in key) - # Any other kind of thing leads to advanced indexing - return True - - -def find_last_user_stacklevel() -> int: - stacklevel = 1 - for frame, _ in traceback.walk_stack(None): - if not frame.f_globals["__name__"].startswith("cunumeric"): - break - stacklevel += 1 - return stacklevel - - -def get_line_number_from_frame(frame: FrameType) -> str: - return f"{frame.f_code.co_filename}:{frame.f_lineno}" - - -def find_last_user_frames(top_only: bool = True) -> str: - for last, _ in traceback.walk_stack(None): - if "__name__" not in last.f_globals: - continue - name = last.f_globals["__name__"] - if not any(name.startswith(pkg) for pkg in ("cunumeric", "legate")): - break - - if top_only: - return get_line_number_from_frame(last) - - frames: list[FrameType] = [] - curr: Union[FrameType, None] = last - while curr is not None: - if "legion_top.py" in curr.f_code.co_filename: - break - frames.append(curr) - curr = curr.f_back - return "|".join(get_line_number_from_frame(f) for f in frames) - - -def calculate_volume(shape: NdShape) -> int: - if len(shape) == 0: - return 0 - return reduce(lambda x, y: x * y, shape) - - -T = TypeVar("T") - - -def tuple_pop(tup: Tuple[T, ...], index: int) -> Tuple[T, ...]: - return tup[:index] + tup[index + 1 :] - - -Modes = Tuple[List[str], List[str], List[str]] +Modes = tuple[list[str], list[str], list[str]] def dot_modes(a_ndim: int, b_ndim: int) -> Modes: @@ -165,14 +69,9 @@ def matmul_modes(a_ndim: int, b_ndim: int) -> Modes: Axes = Sequence[int] -AxesPair = Tuple[Axes, Axes] -AxesPairLikeTuple = Union[ - Tuple[int, int], - Tuple[int, Axes], - Tuple[Axes, int], - Tuple[Axes, Axes], -] -AxesPairLike = Union[int, AxesPairLikeTuple] +AxesPair = tuple[Axes, Axes] +AxesPairLikeTuple = tuple[int | Axes, int | Axes] +AxesPairLike = int | AxesPairLikeTuple def tensordot_modes(a_ndim: int, b_ndim: int, axes: AxesPairLike) -> Modes: @@ -229,23 +128,3 @@ def check_axes(a_axes: Axes, b_axes: Axes) -> None: ] return (a_modes, b_modes, a_out + b_out) - - -def deep_apply(obj: Any, func: Callable[[Any], Any]) -> Any: - """ - Apply the provided function to objects contained at any depth within a data - structure. - - This function will recurse over arbitrary nestings of lists, tuples and - dicts. This recursion logic is rather limited, but this function is - primarily meant to be used for arguments of NumPy API calls, which - shouldn't nest their arrays very deep. - """ - if isinstance(obj, list): - return [deep_apply(x, func) for x in obj] - elif isinstance(obj, tuple): - return tuple(deep_apply(x, func) for x in obj) - elif isinstance(obj, dict): - return {k: deep_apply(v, func) for k, v in obj.items()} - else: - return func(obj) diff --git a/cupynumeric/_utils/stack.py b/cupynumeric/_utils/stack.py new file mode 100644 index 000000000..f5e714a3c --- /dev/null +++ b/cupynumeric/_utils/stack.py @@ -0,0 +1,52 @@ +# Copyright 2024 NVIDIA Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +from __future__ import annotations + +import traceback +from types import FrameType + + +def find_last_user_stacklevel() -> int: + stacklevel = 1 + for frame, _ in traceback.walk_stack(None): + if not frame.f_globals["__name__"].startswith("cupynumeric"): + break + stacklevel += 1 + return stacklevel + + +def get_line_number_from_frame(frame: FrameType) -> str: + return f"{frame.f_code.co_filename}:{frame.f_lineno}" + + +def find_last_user_frames(top_only: bool = True) -> str: + for last, _ in traceback.walk_stack(None): + if "__name__" not in last.f_globals: + continue + name = last.f_globals["__name__"] + if not any(name.startswith(pkg) for pkg in ("cupynumeric", "legate")): + break + + if top_only: + return get_line_number_from_frame(last) + + frames: list[FrameType] = [] + curr: FrameType | None = last + while curr is not None: + if "legion_top.py" in curr.f_code.co_filename: + break + frames.append(curr) + curr = curr.f_back + return "|".join(get_line_number_from_frame(f) for f in frames) diff --git a/cupynumeric/_utils/structure.py b/cupynumeric/_utils/structure.py new file mode 100644 index 000000000..14bf6a3a4 --- /dev/null +++ b/cupynumeric/_utils/structure.py @@ -0,0 +1,37 @@ +# Copyright 2024 NVIDIA Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +from __future__ import annotations + +from typing import Any, Callable + + +def deep_apply(obj: Any, func: Callable[[Any], Any]) -> Any: + """ + Apply the provided function to objects contained at any depth within a data + structure. + + This function will recurse over arbitrary nestings of lists, tuples and + dicts. This recursion logic is rather limited, but this function is + primarily meant to be used for arguments of NumPy API calls, which + shouldn't nest their arrays very deep. + """ + if isinstance(obj, list): + return [deep_apply(x, func) for x in obj] + elif isinstance(obj, tuple): + return tuple(deep_apply(x, func) for x in obj) + elif isinstance(obj, dict): + return {k: deep_apply(v, func) for k, v in obj.items()} + else: + return func(obj) diff --git a/cunumeric/_version.py b/cupynumeric/_version.py similarity index 99% rename from cunumeric/_version.py rename to cupynumeric/_version.py index 7c006fdc1..9d0505089 100644 --- a/cunumeric/_version.py +++ b/cupynumeric/_version.py @@ -43,8 +43,8 @@ def get_config(): cfg.VCS = "git" cfg.style = "pep440" cfg.tag_prefix = "v" - cfg.parentdir_prefix = "cunumeric-" - cfg.versionfile_source = "cunumeric/_version.py" + cfg.parentdir_prefix = "cupynumeric-" + cfg.versionfile_source = "cupynumeric/_version.py" cfg.verbose = False return cfg diff --git a/cupynumeric/config.py b/cupynumeric/config.py new file mode 100644 index 000000000..b3cac0573 --- /dev/null +++ b/cupynumeric/config.py @@ -0,0 +1,835 @@ +# Copyright 2024 NVIDIA Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +from __future__ import annotations + +import os +import platform +from abc import abstractmethod +from ctypes import CDLL, RTLD_GLOBAL +from enum import IntEnum, unique +from typing import TYPE_CHECKING, Any, cast + +import cffi # type: ignore +import numpy as np + +if TYPE_CHECKING: + import numpy.typing as npt + + +class _ReductionOpIds: + argmax_redop_id: int + argmin_redop_id: int + + +class _CupynumericSharedLib: + CUPYNUMERIC_ADVANCED_INDEXING: int + CUPYNUMERIC_ARANGE: int + CUPYNUMERIC_ARGWHERE: int + CUPYNUMERIC_BATCHED_CHOLESKY: int + CUPYNUMERIC_BINARY_OP: int + CUPYNUMERIC_BINARY_RED: int + CUPYNUMERIC_BINCOUNT: int + CUPYNUMERIC_BINOP_ADD: int + CUPYNUMERIC_BINOP_ARCTAN2: int + CUPYNUMERIC_BINOP_BITWISE_AND: int + CUPYNUMERIC_BINOP_BITWISE_OR: int + CUPYNUMERIC_BINOP_BITWISE_XOR: int + CUPYNUMERIC_BINOP_COPYSIGN: int + CUPYNUMERIC_BINOP_DIVIDE: int + CUPYNUMERIC_BINOP_EQUAL: int + CUPYNUMERIC_BINOP_FLOAT_POWER: int + CUPYNUMERIC_BINOP_FLOOR_DIVIDE: int + CUPYNUMERIC_BINOP_FMOD: int + CUPYNUMERIC_BINOP_GCD: int + CUPYNUMERIC_BINOP_GREATER: int + CUPYNUMERIC_BINOP_GREATER_EQUAL: int + CUPYNUMERIC_BINOP_HYPOT: int + CUPYNUMERIC_BINOP_ISCLOSE: int + CUPYNUMERIC_BINOP_LCM: int + CUPYNUMERIC_BINOP_LDEXP: int + CUPYNUMERIC_BINOP_LEFT_SHIFT: int + CUPYNUMERIC_BINOP_LESS: int + CUPYNUMERIC_BINOP_LESS_EQUAL: int + CUPYNUMERIC_BINOP_LOGADDEXP2: int + CUPYNUMERIC_BINOP_LOGADDEXP: int + CUPYNUMERIC_BINOP_LOGICAL_AND: int + CUPYNUMERIC_BINOP_LOGICAL_OR: int + CUPYNUMERIC_BINOP_LOGICAL_XOR: int + CUPYNUMERIC_BINOP_MAXIMUM: int + CUPYNUMERIC_BINOP_MINIMUM: int + CUPYNUMERIC_BINOP_MOD: int + CUPYNUMERIC_BINOP_MULTIPLY: int + CUPYNUMERIC_BINOP_NEXTAFTER: int + CUPYNUMERIC_BINOP_NOT_EQUAL: int + CUPYNUMERIC_BINOP_POWER: int + CUPYNUMERIC_BINOP_RIGHT_SHIFT: int + CUPYNUMERIC_BINOP_SUBTRACT: int + CUPYNUMERIC_BITGENERATOR: int + CUPYNUMERIC_BITGENOP_DISTRIBUTION: int + CUPYNUMERIC_BITGENTYPE_DEFAULT: int + CUPYNUMERIC_BITGENTYPE_XORWOW: int + CUPYNUMERIC_BITGENTYPE_MRG32K3A: int + CUPYNUMERIC_BITGENTYPE_MTGP32: int + CUPYNUMERIC_BITGENTYPE_MT19937: int + CUPYNUMERIC_BITGENTYPE_PHILOX4_32_10: int + CUPYNUMERIC_BITGENDIST_INTEGERS_16: int + CUPYNUMERIC_BITGENDIST_INTEGERS_32: int + CUPYNUMERIC_BITGENDIST_INTEGERS_64: int + CUPYNUMERIC_BITGENDIST_UNIFORM_32: int + CUPYNUMERIC_BITGENDIST_UNIFORM_64: int + CUPYNUMERIC_BITGENDIST_LOGNORMAL_32: int + CUPYNUMERIC_BITGENDIST_LOGNORMAL_64: int + CUPYNUMERIC_BITGENDIST_NORMAL_32: int + CUPYNUMERIC_BITGENDIST_NORMAL_64: int + CUPYNUMERIC_BITGENDIST_POISSON: int + CUPYNUMERIC_BITGENDIST_EXPONENTIAL_32: int + CUPYNUMERIC_BITGENDIST_EXPONENTIAL_64: int + CUPYNUMERIC_BITGENDIST_GUMBEL_32: int + CUPYNUMERIC_BITGENDIST_GUMBEL_64: int + CUPYNUMERIC_BITGENDIST_LAPLACE_32: int + CUPYNUMERIC_BITGENDIST_LAPLACE_64: int + CUPYNUMERIC_BITGENDIST_LOGISTIC_32: int + CUPYNUMERIC_BITGENDIST_LOGISTIC_64: int + CUPYNUMERIC_BITGENDIST_PARETO_32: int + CUPYNUMERIC_BITGENDIST_PARETO_64: int + CUPYNUMERIC_BITGENDIST_POWER_32: int + CUPYNUMERIC_BITGENDIST_POWER_64: int + CUPYNUMERIC_BITGENDIST_RAYLEIGH_32: int + CUPYNUMERIC_BITGENDIST_RAYLEIGH_64: int + CUPYNUMERIC_BITGENDIST_CAUCHY_32: int + CUPYNUMERIC_BITGENDIST_CAUCHY_64: int + CUPYNUMERIC_BITGENDIST_TRIANGULAR_32: int + CUPYNUMERIC_BITGENDIST_TRIANGULAR_64: int + CUPYNUMERIC_BITGENDIST_WEIBULL_32: int + CUPYNUMERIC_BITGENDIST_WEIBULL_64: int + CUPYNUMERIC_BITGENDIST_BYTES: int + CUPYNUMERIC_BITGENDIST_BETA_32: int + CUPYNUMERIC_BITGENDIST_BETA_64: int + CUPYNUMERIC_BITGENDIST_F_32: int + CUPYNUMERIC_BITGENDIST_F_64: int + CUPYNUMERIC_BITGENDIST_LOGSERIES: int + CUPYNUMERIC_BITGENDIST_NONCENTRAL_F_32: int + CUPYNUMERIC_BITGENDIST_NONCENTRAL_F_64: int + CUPYNUMERIC_BITGENDIST_CHISQUARE_32: int + CUPYNUMERIC_BITGENDIST_CHISQUARE_64: int + CUPYNUMERIC_BITGENDIST_GAMMA_32: int + CUPYNUMERIC_BITGENDIST_GAMMA_64: int + CUPYNUMERIC_BITGENDIST_STANDARD_T_32: int + CUPYNUMERIC_BITGENDIST_STANDARD_T_64: int + CUPYNUMERIC_BITGENDIST_HYPERGEOMETRIC: int + CUPYNUMERIC_BITGENDIST_VONMISES_32: int + CUPYNUMERIC_BITGENDIST_VONMISES_64: int + CUPYNUMERIC_BITGENDIST_ZIPF: int + CUPYNUMERIC_BITGENDIST_GEOMETRIC: int + CUPYNUMERIC_BITGENDIST_WALD_32: int + CUPYNUMERIC_BITGENDIST_WALD_64: int + CUPYNUMERIC_BITGENDIST_BINOMIAL: int + CUPYNUMERIC_BITGENDIST_NEGATIVE_BINOMIAL: int + CUPYNUMERIC_BITGENOP_CREATE: int + CUPYNUMERIC_BITGENOP_DESTROY: int + CUPYNUMERIC_BITGENOP_RAND_RAW: int + CUPYNUMERIC_BITORDER_BIG: int + CUPYNUMERIC_BITORDER_LITTLE: int + CUPYNUMERIC_CHOOSE: int + CUPYNUMERIC_CONTRACT: int + CUPYNUMERIC_CONVERT: int + CUPYNUMERIC_CONVERT_NAN_NOOP: int + CUPYNUMERIC_CONVERT_NAN_PROD: int + CUPYNUMERIC_CONVERT_NAN_SUM: int + CUPYNUMERIC_CONVOLVE: int + CUPYNUMERIC_CONVOLVE_AUTO: int + CUPYNUMERIC_CONVOLVE_DIRECT: int + CUPYNUMERIC_CONVOLVE_FFT: int + CUPYNUMERIC_DIAG: int + CUPYNUMERIC_DOT: int + CUPYNUMERIC_EYE: int + CUPYNUMERIC_FFT: int + CUPYNUMERIC_FFT_C2C: int + CUPYNUMERIC_FFT_C2R: int + CUPYNUMERIC_FFT_D2Z: int + CUPYNUMERIC_FFT_FORWARD: int + CUPYNUMERIC_FFT_INVERSE: int + CUPYNUMERIC_FFT_R2C: int + CUPYNUMERIC_FFT_Z2D: int + CUPYNUMERIC_FFT_Z2Z: int + CUPYNUMERIC_FILL: int + CUPYNUMERIC_FLIP: int + CUPYNUMERIC_GEMM: int + CUPYNUMERIC_HISTOGRAM: int + CUPYNUMERIC_LOAD_CUDALIBS: int + CUPYNUMERIC_MATMUL: int + CUPYNUMERIC_MATVECMUL: int + CUPYNUMERIC_MAX_MAPPERS: int + CUPYNUMERIC_MAX_REDOPS: int + CUPYNUMERIC_MAX_TASKS: int + CUPYNUMERIC_MP_POTRF: int + CUPYNUMERIC_MP_SOLVE: int + CUPYNUMERIC_NONZERO: int + CUPYNUMERIC_PACKBITS: int + CUPYNUMERIC_POTRF: int + CUPYNUMERIC_PUTMASK: int + CUPYNUMERIC_QR: int + CUPYNUMERIC_RAND: int + CUPYNUMERIC_READ: int + CUPYNUMERIC_RED_ALL: int + CUPYNUMERIC_RED_ANY: int + CUPYNUMERIC_RED_ARGMAX: int + CUPYNUMERIC_RED_ARGMIN: int + CUPYNUMERIC_RED_CONTAINS: int + CUPYNUMERIC_RED_COUNT_NONZERO: int + CUPYNUMERIC_RED_MAX: int + CUPYNUMERIC_RED_MIN: int + CUPYNUMERIC_RED_NANARGMAX: int + CUPYNUMERIC_RED_NANARGMIN: int + CUPYNUMERIC_RED_NANMAX: int + CUPYNUMERIC_RED_NANMIN: int + CUPYNUMERIC_RED_NANPROD: int + CUPYNUMERIC_RED_NANSUM: int + CUPYNUMERIC_RED_PROD: int + CUPYNUMERIC_RED_SUM: int + CUPYNUMERIC_RED_SUM_SQUARES: int + CUPYNUMERIC_RED_VARIANCE: int + CUPYNUMERIC_REPEAT: int + CUPYNUMERIC_SCALAR_UNARY_RED: int + CUPYNUMERIC_SCAN_GLOBAL: int + CUPYNUMERIC_SCAN_LOCAL: int + CUPYNUMERIC_SCAN_PROD: int + CUPYNUMERIC_SCAN_SUM: int + CUPYNUMERIC_SEARCHSORTED: int + CUPYNUMERIC_SELECT: int + CUPYNUMERIC_SOLVE: int + CUPYNUMERIC_SORT: int + CUPYNUMERIC_SVD: int + CUPYNUMERIC_SYRK: int + CUPYNUMERIC_TILE: int + CUPYNUMERIC_TRANSPOSE_COPY_2D: int + CUPYNUMERIC_TRILU: int + CUPYNUMERIC_TRSM: int + CUPYNUMERIC_UNARY_OP: int + CUPYNUMERIC_UNARY_RED: int + CUPYNUMERIC_UNIQUE: int + CUPYNUMERIC_UNIQUE_REDUCE: int + CUPYNUMERIC_UNLOAD_CUDALIBS: int + CUPYNUMERIC_UNPACKBITS: int + CUPYNUMERIC_UOP_ABSOLUTE: int + CUPYNUMERIC_UOP_ANGLE: int + CUPYNUMERIC_UOP_ARCCOS: int + CUPYNUMERIC_UOP_ARCCOSH: int + CUPYNUMERIC_UOP_ARCSIN: int + CUPYNUMERIC_UOP_ARCSINH: int + CUPYNUMERIC_UOP_ARCTAN: int + CUPYNUMERIC_UOP_ARCTANH: int + CUPYNUMERIC_UOP_CBRT: int + CUPYNUMERIC_UOP_CEIL: int + CUPYNUMERIC_UOP_CLIP: int + CUPYNUMERIC_UOP_CONJ: int + CUPYNUMERIC_UOP_COPY: int + CUPYNUMERIC_UOP_COS: int + CUPYNUMERIC_UOP_COSH: int + CUPYNUMERIC_UOP_DEG2RAD: int + CUPYNUMERIC_UOP_EXP2: int + CUPYNUMERIC_UOP_EXP: int + CUPYNUMERIC_UOP_EXPM1: int + CUPYNUMERIC_UOP_FLOOR: int + CUPYNUMERIC_UOP_FREXP: int + CUPYNUMERIC_UOP_GETARG: int + CUPYNUMERIC_UOP_IMAG: int + CUPYNUMERIC_UOP_INVERT: int + CUPYNUMERIC_UOP_ISFINITE: int + CUPYNUMERIC_UOP_ISINF: int + CUPYNUMERIC_UOP_ISNAN: int + CUPYNUMERIC_UOP_LOG10: int + CUPYNUMERIC_UOP_LOG1P: int + CUPYNUMERIC_UOP_LOG2: int + CUPYNUMERIC_UOP_LOG: int + CUPYNUMERIC_UOP_LOGICAL_NOT: int + CUPYNUMERIC_UOP_MODF: int + CUPYNUMERIC_UOP_NEGATIVE: int + CUPYNUMERIC_UOP_POSITIVE: int + CUPYNUMERIC_UOP_RAD2DEG: int + CUPYNUMERIC_UOP_REAL: int + CUPYNUMERIC_UOP_RECIPROCAL: int + CUPYNUMERIC_UOP_RINT: int + CUPYNUMERIC_UOP_ROUND: int + CUPYNUMERIC_UOP_SIGN: int + CUPYNUMERIC_UOP_SIGNBIT: int + CUPYNUMERIC_UOP_SIN: int + CUPYNUMERIC_UOP_SINH: int + CUPYNUMERIC_UOP_SQRT: int + CUPYNUMERIC_UOP_SQUARE: int + CUPYNUMERIC_UOP_TAN: int + CUPYNUMERIC_UOP_TANH: int + CUPYNUMERIC_UOP_TRUNC: int + CUPYNUMERIC_WHERE: int + CUPYNUMERIC_WINDOW: int + CUPYNUMERIC_WINDOW_BARLETT: int + CUPYNUMERIC_WINDOW_BLACKMAN: int + CUPYNUMERIC_WINDOW_HAMMING: int + CUPYNUMERIC_WINDOW_HANNING: int + CUPYNUMERIC_WINDOW_KAISER: int + CUPYNUMERIC_WRAP: int + CUPYNUMERIC_WRITE: int + CUPYNUMERIC_ZIP: int + + @abstractmethod + def cupynumeric_has_cusolvermp(self) -> bool: + ... + + @abstractmethod + def cupynumeric_max_eager_volume(self) -> int: + ... + + @abstractmethod + def cupynumeric_register_reduction_ops(self, code: int) -> _ReductionOpIds: + ... + + +def dlopen_no_autoclose(ffi: Any, lib_path: str) -> Any: + # Use an already-opened library handle, which cffi will convert to a + # regular FFI object (using the definitions previously added using + # ffi.cdef), but will not automatically dlclose() on collection. + lib = CDLL(lib_path, mode=RTLD_GLOBAL) + return ffi.dlopen(ffi.cast("void *", lib._handle)) + + +# Load the cuPyNumeric library first so we have a shard object that +# we can use to initialize all these configuration enumerations +class CuPyNumericLib: + def __init__(self, name: str) -> None: + self.name = name + + shared_lib_path = self.get_shared_library() + assert shared_lib_path is not None + header = self.get_c_header() + ffi = cffi.FFI() + if header is not None: + ffi.cdef(header) + # Don't use ffi.dlopen(), because that will call dlclose() + # automatically when the object gets collected, thus removing + # symbols that may be needed when destroying C++ objects later + # (e.g. vtable entries, which will be queried for virtual + # destructors), causing errors at shutdown. + shared_lib = dlopen_no_autoclose(ffi, shared_lib_path) + self.shared_object = cast(_CupynumericSharedLib, shared_lib) + + def register(self) -> None: + from legate.core import get_legate_runtime + + # We need to make sure that the runtime is started + get_legate_runtime() + + callback = getattr( + self.shared_object, "cupynumeric_perform_registration" + ) + callback() + + def get_shared_library(self) -> str: + from .install_info import libpath + + return os.path.join( + libpath, "libcupynumeric" + self.get_library_extension() + ) + + def get_c_header(self) -> str: + from .install_info import header + + return header + + @staticmethod + def get_library_extension() -> str: + os_name = platform.system() + if os_name == "Linux": + return ".so" + elif os_name == "Darwin": + return ".dylib" + raise RuntimeError(f"unknown platform {os_name!r}") + + +CUPYNUMERIC_LIB_NAME = "cupynumeric" +cupynumeric_lib = CuPyNumericLib(CUPYNUMERIC_LIB_NAME) +cupynumeric_lib.register() +_cupynumeric = cupynumeric_lib.shared_object + + +# Match these to CuPyNumericOpCode in cupynumeric_c.h +@unique +class CuPyNumericOpCode(IntEnum): + ADVANCED_INDEXING = _cupynumeric.CUPYNUMERIC_ADVANCED_INDEXING + ARANGE = _cupynumeric.CUPYNUMERIC_ARANGE + ARGWHERE = _cupynumeric.CUPYNUMERIC_ARGWHERE + BATCHED_CHOLESKY = _cupynumeric.CUPYNUMERIC_BATCHED_CHOLESKY + BINARY_OP = _cupynumeric.CUPYNUMERIC_BINARY_OP + BINARY_RED = _cupynumeric.CUPYNUMERIC_BINARY_RED + BINCOUNT = _cupynumeric.CUPYNUMERIC_BINCOUNT + BITGENERATOR = _cupynumeric.CUPYNUMERIC_BITGENERATOR + CHOOSE = _cupynumeric.CUPYNUMERIC_CHOOSE + CONTRACT = _cupynumeric.CUPYNUMERIC_CONTRACT + CONVERT = _cupynumeric.CUPYNUMERIC_CONVERT + CONVOLVE = _cupynumeric.CUPYNUMERIC_CONVOLVE + DIAG = _cupynumeric.CUPYNUMERIC_DIAG + DOT = _cupynumeric.CUPYNUMERIC_DOT + EYE = _cupynumeric.CUPYNUMERIC_EYE + FFT = _cupynumeric.CUPYNUMERIC_FFT + FILL = _cupynumeric.CUPYNUMERIC_FILL + FLIP = _cupynumeric.CUPYNUMERIC_FLIP + GEMM = _cupynumeric.CUPYNUMERIC_GEMM + HISTOGRAM = _cupynumeric.CUPYNUMERIC_HISTOGRAM + LOAD_CUDALIBS = _cupynumeric.CUPYNUMERIC_LOAD_CUDALIBS + MATMUL = _cupynumeric.CUPYNUMERIC_MATMUL + MATVECMUL = _cupynumeric.CUPYNUMERIC_MATVECMUL + MP_POTRF = _cupynumeric.CUPYNUMERIC_MP_POTRF + MP_SOLVE = _cupynumeric.CUPYNUMERIC_MP_SOLVE + NONZERO = _cupynumeric.CUPYNUMERIC_NONZERO + PACKBITS = _cupynumeric.CUPYNUMERIC_PACKBITS + POTRF = _cupynumeric.CUPYNUMERIC_POTRF + PUTMASK = _cupynumeric.CUPYNUMERIC_PUTMASK + QR = _cupynumeric.CUPYNUMERIC_QR + RAND = _cupynumeric.CUPYNUMERIC_RAND + READ = _cupynumeric.CUPYNUMERIC_READ + REPEAT = _cupynumeric.CUPYNUMERIC_REPEAT + SCALAR_UNARY_RED = _cupynumeric.CUPYNUMERIC_SCALAR_UNARY_RED + SCAN_GLOBAL = _cupynumeric.CUPYNUMERIC_SCAN_GLOBAL + SCAN_LOCAL = _cupynumeric.CUPYNUMERIC_SCAN_LOCAL + SEARCHSORTED = _cupynumeric.CUPYNUMERIC_SEARCHSORTED + SELECT = _cupynumeric.CUPYNUMERIC_SELECT + SOLVE = _cupynumeric.CUPYNUMERIC_SOLVE + SORT = _cupynumeric.CUPYNUMERIC_SORT + SVD = _cupynumeric.CUPYNUMERIC_SVD + SYRK = _cupynumeric.CUPYNUMERIC_SYRK + TILE = _cupynumeric.CUPYNUMERIC_TILE + TRANSPOSE_COPY_2D = _cupynumeric.CUPYNUMERIC_TRANSPOSE_COPY_2D + TRILU = _cupynumeric.CUPYNUMERIC_TRILU + TRSM = _cupynumeric.CUPYNUMERIC_TRSM + UNARY_OP = _cupynumeric.CUPYNUMERIC_UNARY_OP + UNARY_RED = _cupynumeric.CUPYNUMERIC_UNARY_RED + UNIQUE = _cupynumeric.CUPYNUMERIC_UNIQUE + UNIQUE_REDUCE = _cupynumeric.CUPYNUMERIC_UNIQUE_REDUCE + UNLOAD_CUDALIBS = _cupynumeric.CUPYNUMERIC_UNLOAD_CUDALIBS + UNPACKBITS = _cupynumeric.CUPYNUMERIC_UNPACKBITS + WHERE = _cupynumeric.CUPYNUMERIC_WHERE + WINDOW = _cupynumeric.CUPYNUMERIC_WINDOW + WRAP = _cupynumeric.CUPYNUMERIC_WRAP + WRITE = _cupynumeric.CUPYNUMERIC_WRITE + ZIP = _cupynumeric.CUPYNUMERIC_ZIP + + +# Match these to CuPyNumericUnaryOpCode in cupynumeric_c.h +@unique +class UnaryOpCode(IntEnum): + ABSOLUTE = _cupynumeric.CUPYNUMERIC_UOP_ABSOLUTE + ANGLE = _cupynumeric.CUPYNUMERIC_UOP_ANGLE + ARCCOS = _cupynumeric.CUPYNUMERIC_UOP_ARCCOS + ARCCOSH = _cupynumeric.CUPYNUMERIC_UOP_ARCCOSH + ARCSIN = _cupynumeric.CUPYNUMERIC_UOP_ARCSIN + ARCSINH = _cupynumeric.CUPYNUMERIC_UOP_ARCSINH + ARCTAN = _cupynumeric.CUPYNUMERIC_UOP_ARCTAN + ARCTANH = _cupynumeric.CUPYNUMERIC_UOP_ARCTANH + CBRT = _cupynumeric.CUPYNUMERIC_UOP_CBRT + CEIL = _cupynumeric.CUPYNUMERIC_UOP_CEIL + CLIP = _cupynumeric.CUPYNUMERIC_UOP_CLIP + CONJ = _cupynumeric.CUPYNUMERIC_UOP_CONJ + COPY = _cupynumeric.CUPYNUMERIC_UOP_COPY + COS = _cupynumeric.CUPYNUMERIC_UOP_COS + COSH = _cupynumeric.CUPYNUMERIC_UOP_COSH + DEG2RAD = _cupynumeric.CUPYNUMERIC_UOP_DEG2RAD + EXP = _cupynumeric.CUPYNUMERIC_UOP_EXP + EXP2 = _cupynumeric.CUPYNUMERIC_UOP_EXP2 + EXPM1 = _cupynumeric.CUPYNUMERIC_UOP_EXPM1 + FLOOR = _cupynumeric.CUPYNUMERIC_UOP_FLOOR + FREXP = _cupynumeric.CUPYNUMERIC_UOP_FREXP + GETARG = _cupynumeric.CUPYNUMERIC_UOP_GETARG + IMAG = _cupynumeric.CUPYNUMERIC_UOP_IMAG + INVERT = _cupynumeric.CUPYNUMERIC_UOP_INVERT + ISFINITE = _cupynumeric.CUPYNUMERIC_UOP_ISFINITE + ISINF = _cupynumeric.CUPYNUMERIC_UOP_ISINF + ISNAN = _cupynumeric.CUPYNUMERIC_UOP_ISNAN + LOG = _cupynumeric.CUPYNUMERIC_UOP_LOG + LOG10 = _cupynumeric.CUPYNUMERIC_UOP_LOG10 + LOG1P = _cupynumeric.CUPYNUMERIC_UOP_LOG1P + LOG2 = _cupynumeric.CUPYNUMERIC_UOP_LOG2 + LOGICAL_NOT = _cupynumeric.CUPYNUMERIC_UOP_LOGICAL_NOT + MODF = _cupynumeric.CUPYNUMERIC_UOP_MODF + NEGATIVE = _cupynumeric.CUPYNUMERIC_UOP_NEGATIVE + POSITIVE = _cupynumeric.CUPYNUMERIC_UOP_POSITIVE + RAD2DEG = _cupynumeric.CUPYNUMERIC_UOP_RAD2DEG + REAL = _cupynumeric.CUPYNUMERIC_UOP_REAL + RECIPROCAL = _cupynumeric.CUPYNUMERIC_UOP_RECIPROCAL + RINT = _cupynumeric.CUPYNUMERIC_UOP_RINT + ROUND = _cupynumeric.CUPYNUMERIC_UOP_ROUND + SIGN = _cupynumeric.CUPYNUMERIC_UOP_SIGN + SIGNBIT = _cupynumeric.CUPYNUMERIC_UOP_SIGNBIT + SIN = _cupynumeric.CUPYNUMERIC_UOP_SIN + SINH = _cupynumeric.CUPYNUMERIC_UOP_SINH + SQRT = _cupynumeric.CUPYNUMERIC_UOP_SQRT + SQUARE = _cupynumeric.CUPYNUMERIC_UOP_SQUARE + TAN = _cupynumeric.CUPYNUMERIC_UOP_TAN + TANH = _cupynumeric.CUPYNUMERIC_UOP_TANH + TRUNC = _cupynumeric.CUPYNUMERIC_UOP_TRUNC + + +# Match these to CuPyNumericUnaryRedCode in cupynumeric_c.h +@unique +class UnaryRedCode(IntEnum): + ALL = _cupynumeric.CUPYNUMERIC_RED_ALL + ANY = _cupynumeric.CUPYNUMERIC_RED_ANY + ARGMAX = _cupynumeric.CUPYNUMERIC_RED_ARGMAX + ARGMIN = _cupynumeric.CUPYNUMERIC_RED_ARGMIN + CONTAINS = _cupynumeric.CUPYNUMERIC_RED_CONTAINS + COUNT_NONZERO = _cupynumeric.CUPYNUMERIC_RED_COUNT_NONZERO + MAX = _cupynumeric.CUPYNUMERIC_RED_MAX + MIN = _cupynumeric.CUPYNUMERIC_RED_MIN + NANARGMAX = _cupynumeric.CUPYNUMERIC_RED_NANARGMAX + NANARGMIN = _cupynumeric.CUPYNUMERIC_RED_NANARGMIN + NANMAX = _cupynumeric.CUPYNUMERIC_RED_NANMAX + NANMIN = _cupynumeric.CUPYNUMERIC_RED_NANMIN + NANPROD = _cupynumeric.CUPYNUMERIC_RED_NANPROD + NANSUM = _cupynumeric.CUPYNUMERIC_RED_NANSUM + PROD = _cupynumeric.CUPYNUMERIC_RED_PROD + SUM = _cupynumeric.CUPYNUMERIC_RED_SUM + SUM_SQUARES = _cupynumeric.CUPYNUMERIC_RED_SUM_SQUARES + VARIANCE = _cupynumeric.CUPYNUMERIC_RED_VARIANCE + + +# Match these to CuPyNumericBinaryOpCode in cupynumeric_c.h +@unique +class BinaryOpCode(IntEnum): + ADD = _cupynumeric.CUPYNUMERIC_BINOP_ADD + ARCTAN2 = _cupynumeric.CUPYNUMERIC_BINOP_ARCTAN2 + BITWISE_AND = _cupynumeric.CUPYNUMERIC_BINOP_BITWISE_AND + BITWISE_OR = _cupynumeric.CUPYNUMERIC_BINOP_BITWISE_OR + BITWISE_XOR = _cupynumeric.CUPYNUMERIC_BINOP_BITWISE_XOR + COPYSIGN = _cupynumeric.CUPYNUMERIC_BINOP_COPYSIGN + DIVIDE = _cupynumeric.CUPYNUMERIC_BINOP_DIVIDE + EQUAL = _cupynumeric.CUPYNUMERIC_BINOP_EQUAL + FLOAT_POWER = _cupynumeric.CUPYNUMERIC_BINOP_FLOAT_POWER + FLOOR_DIVIDE = _cupynumeric.CUPYNUMERIC_BINOP_FLOOR_DIVIDE + FMOD = _cupynumeric.CUPYNUMERIC_BINOP_FMOD + GCD = _cupynumeric.CUPYNUMERIC_BINOP_GCD + GREATER = _cupynumeric.CUPYNUMERIC_BINOP_GREATER + GREATER_EQUAL = _cupynumeric.CUPYNUMERIC_BINOP_GREATER_EQUAL + HYPOT = _cupynumeric.CUPYNUMERIC_BINOP_HYPOT + ISCLOSE = _cupynumeric.CUPYNUMERIC_BINOP_ISCLOSE + LCM = _cupynumeric.CUPYNUMERIC_BINOP_LCM + LDEXP = _cupynumeric.CUPYNUMERIC_BINOP_LDEXP + LEFT_SHIFT = _cupynumeric.CUPYNUMERIC_BINOP_LEFT_SHIFT + LESS = _cupynumeric.CUPYNUMERIC_BINOP_LESS + LESS_EQUAL = _cupynumeric.CUPYNUMERIC_BINOP_LESS_EQUAL + LOGADDEXP = _cupynumeric.CUPYNUMERIC_BINOP_LOGADDEXP + LOGADDEXP2 = _cupynumeric.CUPYNUMERIC_BINOP_LOGADDEXP2 + LOGICAL_AND = _cupynumeric.CUPYNUMERIC_BINOP_LOGICAL_AND + LOGICAL_OR = _cupynumeric.CUPYNUMERIC_BINOP_LOGICAL_OR + LOGICAL_XOR = _cupynumeric.CUPYNUMERIC_BINOP_LOGICAL_XOR + MAXIMUM = _cupynumeric.CUPYNUMERIC_BINOP_MAXIMUM + MINIMUM = _cupynumeric.CUPYNUMERIC_BINOP_MINIMUM + MOD = _cupynumeric.CUPYNUMERIC_BINOP_MOD + MULTIPLY = _cupynumeric.CUPYNUMERIC_BINOP_MULTIPLY + NEXTAFTER = _cupynumeric.CUPYNUMERIC_BINOP_NEXTAFTER + NOT_EQUAL = _cupynumeric.CUPYNUMERIC_BINOP_NOT_EQUAL + POWER = _cupynumeric.CUPYNUMERIC_BINOP_POWER + RIGHT_SHIFT = _cupynumeric.CUPYNUMERIC_BINOP_RIGHT_SHIFT + SUBTRACT = _cupynumeric.CUPYNUMERIC_BINOP_SUBTRACT + + +@unique +class WindowOpCode(IntEnum): + BARLETT = _cupynumeric.CUPYNUMERIC_WINDOW_BARLETT + BLACKMAN = _cupynumeric.CUPYNUMERIC_WINDOW_BLACKMAN + HAMMING = _cupynumeric.CUPYNUMERIC_WINDOW_HAMMING + HANNING = _cupynumeric.CUPYNUMERIC_WINDOW_HANNING + KAISER = _cupynumeric.CUPYNUMERIC_WINDOW_KAISER + + +# Match these to RandGenCode in rand_util.h +@unique +class RandGenCode(IntEnum): + UNIFORM = 1 + NORMAL = 2 + INTEGER = 3 + + +# Match these to CuPyNumericScanCode in cupynumeric_c.h +@unique +class ScanCode(IntEnum): + PROD = _cupynumeric.CUPYNUMERIC_SCAN_PROD + SUM = _cupynumeric.CUPYNUMERIC_SCAN_SUM + + +# Match these to CuPyNumericConvertCode in cupynumeric_c.h +@unique +class ConvertCode(IntEnum): + NOOP = _cupynumeric.CUPYNUMERIC_CONVERT_NAN_NOOP + PROD = _cupynumeric.CUPYNUMERIC_CONVERT_NAN_PROD + SUM = _cupynumeric.CUPYNUMERIC_CONVERT_NAN_SUM + + +# Match these to BitGeneratorOperation in cupynumeric_c.h +@unique +class BitGeneratorOperation(IntEnum): + CREATE = _cupynumeric.CUPYNUMERIC_BITGENOP_CREATE + DESTROY = _cupynumeric.CUPYNUMERIC_BITGENOP_DESTROY + RAND_RAW = _cupynumeric.CUPYNUMERIC_BITGENOP_RAND_RAW + DISTRIBUTION = _cupynumeric.CUPYNUMERIC_BITGENOP_DISTRIBUTION + + +# Match these to BitGeneratorType in cupynumeric_c.h +@unique +class BitGeneratorType(IntEnum): + DEFAULT = _cupynumeric.CUPYNUMERIC_BITGENTYPE_DEFAULT + XORWOW = _cupynumeric.CUPYNUMERIC_BITGENTYPE_XORWOW + MRG32K3A = _cupynumeric.CUPYNUMERIC_BITGENTYPE_MRG32K3A + MTGP32 = _cupynumeric.CUPYNUMERIC_BITGENTYPE_MTGP32 + MT19937 = _cupynumeric.CUPYNUMERIC_BITGENTYPE_MT19937 + PHILOX4_32_10 = _cupynumeric.CUPYNUMERIC_BITGENTYPE_PHILOX4_32_10 + + +# Match these to BitGeneratorDistribution in cupynumeric_c.h +@unique +class BitGeneratorDistribution(IntEnum): + INTEGERS_16 = _cupynumeric.CUPYNUMERIC_BITGENDIST_INTEGERS_16 + INTEGERS_32 = _cupynumeric.CUPYNUMERIC_BITGENDIST_INTEGERS_32 + INTEGERS_64 = _cupynumeric.CUPYNUMERIC_BITGENDIST_INTEGERS_64 + UNIFORM_32 = _cupynumeric.CUPYNUMERIC_BITGENDIST_UNIFORM_32 + UNIFORM_64 = _cupynumeric.CUPYNUMERIC_BITGENDIST_UNIFORM_64 + LOGNORMAL_32 = _cupynumeric.CUPYNUMERIC_BITGENDIST_LOGNORMAL_32 + LOGNORMAL_64 = _cupynumeric.CUPYNUMERIC_BITGENDIST_LOGNORMAL_64 + NORMAL_32 = _cupynumeric.CUPYNUMERIC_BITGENDIST_NORMAL_32 + NORMAL_64 = _cupynumeric.CUPYNUMERIC_BITGENDIST_NORMAL_64 + POISSON = _cupynumeric.CUPYNUMERIC_BITGENDIST_POISSON + EXPONENTIAL_32 = _cupynumeric.CUPYNUMERIC_BITGENDIST_EXPONENTIAL_32 + EXPONENTIAL_64 = _cupynumeric.CUPYNUMERIC_BITGENDIST_EXPONENTIAL_64 + GUMBEL_32 = _cupynumeric.CUPYNUMERIC_BITGENDIST_GUMBEL_32 + GUMBEL_64 = _cupynumeric.CUPYNUMERIC_BITGENDIST_GUMBEL_64 + LAPLACE_32 = _cupynumeric.CUPYNUMERIC_BITGENDIST_LAPLACE_32 + LAPLACE_64 = _cupynumeric.CUPYNUMERIC_BITGENDIST_LAPLACE_64 + LOGISTIC_32 = _cupynumeric.CUPYNUMERIC_BITGENDIST_LOGISTIC_32 + LOGISTIC_64 = _cupynumeric.CUPYNUMERIC_BITGENDIST_LOGISTIC_64 + PARETO_32 = _cupynumeric.CUPYNUMERIC_BITGENDIST_PARETO_32 + PARETO_64 = _cupynumeric.CUPYNUMERIC_BITGENDIST_PARETO_64 + POWER_32 = _cupynumeric.CUPYNUMERIC_BITGENDIST_POWER_32 + POWER_64 = _cupynumeric.CUPYNUMERIC_BITGENDIST_POWER_64 + RAYLEIGH_32 = _cupynumeric.CUPYNUMERIC_BITGENDIST_RAYLEIGH_32 + RAYLEIGH_64 = _cupynumeric.CUPYNUMERIC_BITGENDIST_RAYLEIGH_64 + CAUCHY_32 = _cupynumeric.CUPYNUMERIC_BITGENDIST_CAUCHY_32 + CAUCHY_64 = _cupynumeric.CUPYNUMERIC_BITGENDIST_CAUCHY_64 + TRIANGULAR_32 = _cupynumeric.CUPYNUMERIC_BITGENDIST_TRIANGULAR_32 + TRIANGULAR_64 = _cupynumeric.CUPYNUMERIC_BITGENDIST_TRIANGULAR_64 + WEIBULL_32 = _cupynumeric.CUPYNUMERIC_BITGENDIST_WEIBULL_32 + WEIBULL_64 = _cupynumeric.CUPYNUMERIC_BITGENDIST_WEIBULL_64 + BYTES = _cupynumeric.CUPYNUMERIC_BITGENDIST_BYTES + BETA_32 = _cupynumeric.CUPYNUMERIC_BITGENDIST_BETA_32 + BETA_64 = _cupynumeric.CUPYNUMERIC_BITGENDIST_BETA_64 + F_32 = _cupynumeric.CUPYNUMERIC_BITGENDIST_F_32 + F_64 = _cupynumeric.CUPYNUMERIC_BITGENDIST_F_64 + LOGSERIES = _cupynumeric.CUPYNUMERIC_BITGENDIST_LOGSERIES + NONCENTRAL_F_32 = _cupynumeric.CUPYNUMERIC_BITGENDIST_NONCENTRAL_F_32 + NONCENTRAL_F_64 = _cupynumeric.CUPYNUMERIC_BITGENDIST_NONCENTRAL_F_64 + CHISQUARE_32 = _cupynumeric.CUPYNUMERIC_BITGENDIST_CHISQUARE_32 + CHISQUARE_64 = _cupynumeric.CUPYNUMERIC_BITGENDIST_CHISQUARE_64 + GAMMA_32 = _cupynumeric.CUPYNUMERIC_BITGENDIST_GAMMA_32 + GAMMA_64 = _cupynumeric.CUPYNUMERIC_BITGENDIST_GAMMA_64 + STANDARD_T_32 = _cupynumeric.CUPYNUMERIC_BITGENDIST_STANDARD_T_32 + STANDARD_T_64 = _cupynumeric.CUPYNUMERIC_BITGENDIST_STANDARD_T_64 + HYPERGEOMETRIC = _cupynumeric.CUPYNUMERIC_BITGENDIST_HYPERGEOMETRIC + VONMISES_32 = _cupynumeric.CUPYNUMERIC_BITGENDIST_VONMISES_32 + VONMISES_64 = _cupynumeric.CUPYNUMERIC_BITGENDIST_VONMISES_64 + ZIPF = _cupynumeric.CUPYNUMERIC_BITGENDIST_ZIPF + GEOMETRIC = _cupynumeric.CUPYNUMERIC_BITGENDIST_GEOMETRIC + WALD_32 = _cupynumeric.CUPYNUMERIC_BITGENDIST_WALD_32 + WALD_64 = _cupynumeric.CUPYNUMERIC_BITGENDIST_WALD_64 + BINOMIAL = _cupynumeric.CUPYNUMERIC_BITGENDIST_BINOMIAL + NEGATIVE_BINOMIAL = _cupynumeric.CUPYNUMERIC_BITGENDIST_NEGATIVE_BINOMIAL + + +# Match these to CuPyNumericConvolveMethod in cupynumeric_c.h +@unique +class ConvolveMethod(IntEnum): + AUTO = _cupynumeric.CUPYNUMERIC_CONVOLVE_AUTO + DIRECT = _cupynumeric.CUPYNUMERIC_CONVOLVE_DIRECT + FFT = _cupynumeric.CUPYNUMERIC_CONVOLVE_FFT + + +@unique +class TransferType(IntEnum): + DONATE = 0 + MAKE_COPY = 1 + SHARE = 2 + + +# Match these to fftType in fft_util.h +class FFTType: + def __init__( + self, + name: str, + type_id: int, + input_dtype: npt.DTypeLike, + output_dtype: npt.DTypeLike, + single_precision: bool, + complex_type: FFTType | None = None, + ) -> None: + self._name = name + self._type_id = type_id + self._complex_type = self if complex_type is None else complex_type + self._input_dtype = input_dtype + self._output_dtype = output_dtype + self._single_precision = single_precision + + def __str__(self) -> str: + return self._name + + def __repr__(self) -> str: + return str(self) + + @property + def type_id(self) -> int: + return self._type_id + + @property + def complex(self) -> FFTType: + return self._complex_type + + @property + def input_dtype(self) -> npt.DTypeLike: + return self._input_dtype + + @property + def output_dtype(self) -> npt.DTypeLike: + return self._output_dtype + + @property + def is_single_precision(self) -> bool: + return self._single_precision + + +FFT_C2C = FFTType( + "C2C", + _cupynumeric.CUPYNUMERIC_FFT_C2C, + np.complex64, + np.complex64, + True, +) + +FFT_Z2Z = FFTType( + "Z2Z", + _cupynumeric.CUPYNUMERIC_FFT_Z2Z, + np.complex128, + np.complex128, + False, +) + +FFT_R2C = FFTType( + "R2C", + _cupynumeric.CUPYNUMERIC_FFT_R2C, + np.float32, + np.complex64, + True, + FFT_C2C, +) + +FFT_C2R = FFTType( + "C2R", + _cupynumeric.CUPYNUMERIC_FFT_C2R, + np.complex64, + np.float32, + True, + FFT_C2C, +) + +FFT_D2Z = FFTType( + "D2Z", + _cupynumeric.CUPYNUMERIC_FFT_D2Z, + np.float64, + np.complex128, + False, + FFT_Z2Z, +) + +FFT_Z2D = FFTType( + "Z2D", + _cupynumeric.CUPYNUMERIC_FFT_Z2D, + np.complex128, + np.float64, + False, + FFT_Z2Z, +) + + +class FFTCode: + @staticmethod + def real_to_complex_code(dtype: npt.DTypeLike) -> FFTType: + if dtype == np.float64: + return FFT_D2Z + elif dtype == np.float32: + return FFT_R2C + else: + raise TypeError( + ( + "Data type for FFT not supported " + "(supported types are float32 and float64)" + ) + ) + + @staticmethod + def complex_to_real_code(dtype: npt.DTypeLike) -> FFTType: + if dtype == np.complex128: + return FFT_Z2D + elif dtype == np.complex64: + return FFT_C2R + else: + raise TypeError( + ( + "Data type for FFT not supported " + "(supported types are complex64 and complex128)" + ) + ) + + +@unique +class FFTDirection(IntEnum): + FORWARD = _cupynumeric.CUPYNUMERIC_FFT_FORWARD + INVERSE = _cupynumeric.CUPYNUMERIC_FFT_INVERSE + + +# Match these to CuPyNumericBitorder in cupynumeric_c.h +@unique +class Bitorder(IntEnum): + BIG = _cupynumeric.CUPYNUMERIC_BITORDER_BIG + LITTLE = _cupynumeric.CUPYNUMERIC_BITORDER_LITTLE + + +@unique +class FFTNormalization(IntEnum): + FORWARD = 1 + INVERSE = 2 + ORTHOGONAL = 3 + + @staticmethod + def from_string(in_string: str) -> FFTNormalization | None: + if in_string == "forward": + return FFTNormalization.FORWARD + elif in_string == "ortho": + return FFTNormalization.ORTHOGONAL + elif in_string == "backward" or in_string is None: + return FFTNormalization.INVERSE + else: + return None + + @staticmethod + def reverse(in_string: str | None) -> str: + if in_string == "forward": + return "backward" + elif in_string == "backward" or in_string is None: + return "forward" + else: + return in_string diff --git a/cunumeric/fft/__init__.py b/cupynumeric/fft/__init__.py similarity index 80% rename from cunumeric/fft/__init__.py rename to cupynumeric/fft/__init__.py index d0d3eb28d..0bc511d57 100644 --- a/cunumeric/fft/__init__.py +++ b/cupynumeric/fft/__init__.py @@ -1,4 +1,4 @@ -# Copyright 2021-2022 NVIDIA Corporation +# Copyright 2024 NVIDIA Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -16,9 +16,9 @@ import numpy.fft as _npfft -from cunumeric.array import maybe_convert_to_np_ndarray -from cunumeric.fft.fft import * -from cunumeric.coverage import clone_module +from .._array.util import maybe_convert_to_np_ndarray +from .._utils.coverage import clone_module +from .fft import * clone_module(_npfft, globals(), maybe_convert_to_np_ndarray) diff --git a/cunumeric/fft/fft.py b/cupynumeric/fft/fft.py similarity index 91% rename from cunumeric/fft/fft.py rename to cupynumeric/fft/fft.py index b10ce8c98..7576f3dd4 100644 --- a/cunumeric/fft/fft.py +++ b/cupynumeric/fft/fft.py @@ -1,4 +1,4 @@ -# Copyright 2021-2022 NVIDIA Corporation +# Copyright 2024 NVIDIA Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -14,21 +14,22 @@ # from __future__ import annotations -from typing import TYPE_CHECKING, Sequence, Union +from typing import TYPE_CHECKING, Sequence import numpy as np +from .._array.util import add_boilerplate +from .._module.array_rearrange import roll from ..config import FFT_C2C, FFT_Z2Z, FFTCode, FFTDirection, FFTNormalization -from ..module import add_boilerplate if TYPE_CHECKING: - from ..array import ndarray + from .._array.array import ndarray def _sanitize_user_axes( a: ndarray, - s: Union[Sequence[int], None], - axes: Union[Sequence[int], None], + s: Sequence[int] | None, + axes: Sequence[int] | None, is_c2r: bool = False, ) -> tuple[list[int], Sequence[int]]: if s is None: @@ -58,9 +59,9 @@ def _operate_by_axes(a: ndarray, axes: Sequence[int]) -> bool: @add_boilerplate("a") def fft( a: ndarray, - n: Union[int, None] = None, + n: int | None = None, axis: int = -1, - norm: Union[str, None] = None, + norm: str | None = None, ) -> ndarray: """ Compute the one-dimensional discrete Fourier Transform. @@ -103,7 +104,7 @@ def fft( numpy.fft.fft Availability - -------- + ------------ Multiple GPUs """ s = (n,) if n is not None else None @@ -114,9 +115,9 @@ def fft( @add_boilerplate("a") def fft2( a: ndarray, - s: Union[Sequence[int], None] = None, + s: Sequence[int] | None = None, axes: Sequence[int] = (-2, -1), - norm: Union[str, None] = None, + norm: str | None = None, ) -> ndarray: """ Compute the 2-dimensional discrete Fourier Transform. @@ -172,9 +173,9 @@ def fft2( @add_boilerplate("a") def fftn( a: ndarray, - s: Union[Sequence[int], None] = None, - axes: Union[Sequence[int], None] = None, - norm: Union[str, None] = None, + s: Sequence[int] | None = None, + axes: Sequence[int] | None = None, + norm: str | None = None, ) -> ndarray: """ Compute the N-dimensional discrete Fourier Transform. @@ -248,9 +249,9 @@ def fftn( @add_boilerplate("a") def ifft( a: ndarray, - n: Union[int, None] = None, + n: int | None = None, axis: int = -1, - norm: Union[str, None] = None, + norm: str | None = None, ) -> ndarray: """ Compute the one-dimensional inverse discrete Fourier Transform. @@ -319,9 +320,9 @@ def ifft( @add_boilerplate("a") def ifft2( a: ndarray, - s: Union[Sequence[int], None] = None, + s: Sequence[int] | None = None, axes: Sequence[int] = (-2, -1), - norm: Union[str, None] = None, + norm: str | None = None, ) -> ndarray: """ Compute the 2-dimensional inverse discrete Fourier Transform. @@ -384,9 +385,9 @@ def ifft2( @add_boilerplate("a") def ifftn( a: ndarray, - s: Union[Sequence[int], None] = None, - axes: Union[Sequence[int], None] = None, - norm: Union[str, None] = None, + s: Sequence[int] | None = None, + axes: Sequence[int] | None = None, + norm: str | None = None, ) -> ndarray: """ Compute the N-dimensional inverse discrete Fourier Transform. @@ -470,9 +471,9 @@ def ifftn( @add_boilerplate("a") def rfft( a: ndarray, - n: Union[int, None] = None, + n: int | None = None, axis: int = -1, - norm: Union[str, None] = None, + norm: str | None = None, ) -> ndarray: """ Compute the one-dimensional discrete Fourier Transform for real input. @@ -528,9 +529,9 @@ def rfft( @add_boilerplate("a") def rfft2( a: ndarray, - s: Union[Sequence[int], None] = None, + s: Sequence[int] | None = None, axes: Sequence[int] = (-2, -1), - norm: Union[str, None] = None, + norm: str | None = None, ) -> ndarray: """ Compute the 2-dimensional FFT of a real array. @@ -573,9 +574,9 @@ def rfft2( @add_boilerplate("a") def rfftn( a: ndarray, - s: Union[Sequence[int], None] = None, - axes: Union[Sequence[int], None] = None, - norm: Union[str, None] = None, + s: Sequence[int] | None = None, + axes: Sequence[int] | None = None, + norm: str | None = None, ) -> ndarray: """ Compute the N-dimensional discrete Fourier Transform for real input. @@ -670,9 +671,9 @@ def rfftn( @add_boilerplate("a") def irfft( a: ndarray, - n: Union[int, None] = None, + n: int | None = None, axis: int = -1, - norm: Union[str, None] = None, + norm: str | None = None, ) -> ndarray: """ Computes the inverse of `rfft`. @@ -738,9 +739,9 @@ def irfft( @add_boilerplate("a") def irfft2( a: ndarray, - s: Union[Sequence[int], None] = None, + s: Sequence[int] | None = None, axes: Sequence[int] = (-2, -1), - norm: Union[str, None] = None, + norm: str | None = None, ) -> ndarray: """ Computes the inverse of `rfft2`. @@ -785,9 +786,9 @@ def irfft2( @add_boilerplate("a") def irfftn( a: ndarray, - s: Union[Sequence[int], None] = None, - axes: Union[Sequence[int], None] = None, - norm: Union[str, None] = None, + s: Sequence[int] | None = None, + axes: Sequence[int] | None = None, + norm: str | None = None, ) -> ndarray: """ Computes the inverse of `rfftn`. @@ -895,9 +896,9 @@ def irfftn( @add_boilerplate("a") def hfft( a: ndarray, - n: Union[int, None] = None, + n: int | None = None, axis: int = -1, - norm: Union[str, None] = None, + norm: str | None = None, ) -> ndarray: """ Compute the FFT of a signal that has Hermitian symmetry, i.e., a real @@ -960,9 +961,9 @@ def hfft( @add_boilerplate("a") def ihfft( a: ndarray, - n: Union[int, None] = None, + n: int | None = None, axis: int = -1, - norm: Union[str, None] = None, + norm: str | None = None, ) -> ndarray: """ Compute the inverse FFT of a signal that has Hermitian symmetry. @@ -1012,3 +1013,80 @@ def ihfft( return rfftn( a=a, s=s, axes=computed_axis, norm=FFTNormalization.reverse(norm) ).conjugate() + + +integer_types = (int, np.integer) + + +@add_boilerplate("x") +def fftshift(x: ndarray, axes: int | tuple[int, ...] | None = None) -> ndarray: + """ + Shift the zero-frequency component to the center of the spectrum. + + This function swaps half-spaces for all axes listed (defaults to all). + Note that ``y[0]`` is the Nyquist component only if ``len(x)`` is even. + + Parameters + ---------- + x : array_like + Input array. + + axes : int or shape tuple, optional + Axes over which to shift. Default is None, which shifts all axes. + + Returns + ------- + y : ndarray + The shifted array. + + Availability + -------- + Multiple GPUs, Multiple CPUs + """ + shift: int | tuple[int, ...] + if axes is None: + axes = tuple(range(x.ndim)) + shift = tuple(dim // 2 for dim in x.shape) + elif isinstance(axes, integer_types): + shift = x.shape[axes] // 2 + else: + shift = tuple(x.shape[ax] // 2 for ax in axes) + + return roll(x, shift, axes) + + +@add_boilerplate("x") +def ifftshift( + x: ndarray, axes: int | tuple[int, ...] | None = None +) -> ndarray: + """ + The inverse of `fftshift`. Although identical for even-length `x`, the + functions differ by one sample for odd-length `x`. + + Parameters + ---------- + x : array_like + Input array. + + axes : int or shape tuple, optional + Axes over which to calculate. Defaults to None, which shifts all axes. + + Returns + ------- + y : ndarray + The shifted array. + + Availability + -------- + Multiple GPUs, Multiple CPUs + """ + shift: int | tuple[int, ...] + if axes is None: + axes = tuple(range(x.ndim)) + shift = tuple(-(dim // 2) for dim in x.shape) + elif isinstance(axes, integer_types): + shift = -(x.shape[axes] // 2) + else: + shift = tuple(-(x.shape[ax] // 2) for ax in axes) + + return roll(x, shift, axes) diff --git a/cupynumeric/install_info.py.in b/cupynumeric/install_info.py.in new file mode 100644 index 000000000..c582492df --- /dev/null +++ b/cupynumeric/install_info.py.in @@ -0,0 +1,47 @@ +# Copyright 2024 NVIDIA Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +# IMPORTANT: +# * install_info.py is a generated file and should not be modified by hand + +def get_libpath(): + import os, sys, platform + join = os.path.join + exists = os.path.exists + dirname = os.path.dirname + cn_path = dirname(dirname(__file__)) + so_ext = { + "": "", + "Java": ".jar", + "Linux": ".so", + "Darwin": ".dylib", + "Windows": ".dll" + }[platform.system()] + + def find_libcupynumeric(libdir): + if exists(join(libdir, f"libcupynumeric{so_ext}")): + return libdir + return None + + return ( + find_libcupynumeric(join(cn_path, "build", "lib")) or + find_libcupynumeric(join(dirname(dirname(dirname(cn_path))), "lib")) or + find_libcupynumeric(join(dirname(dirname(sys.executable)), "lib")) or + "" + ) + + +libpath: str = get_libpath() +header: str = """@header@""" diff --git a/cunumeric/linalg/__init__.py b/cupynumeric/linalg/__init__.py similarity index 77% rename from cunumeric/linalg/__init__.py rename to cupynumeric/linalg/__init__.py index 6904bf72c..9ba64b37d 100644 --- a/cunumeric/linalg/__init__.py +++ b/cupynumeric/linalg/__init__.py @@ -1,4 +1,4 @@ -# Copyright 2021-2022 NVIDIA Corporation +# Copyright 2024 NVIDIA Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -16,10 +16,9 @@ import numpy.linalg as _nplinalg -from cunumeric.array import maybe_convert_to_np_ndarray -from cunumeric.linalg.linalg import * -from cunumeric.linalg.exception import * -from cunumeric.coverage import clone_module +from .._array.util import maybe_convert_to_np_ndarray +from .._utils.coverage import clone_module +from .linalg import * clone_module(_nplinalg, globals(), maybe_convert_to_np_ndarray) diff --git a/cupynumeric/linalg/_cholesky.py b/cupynumeric/linalg/_cholesky.py new file mode 100644 index 000000000..a99ae6811 --- /dev/null +++ b/cupynumeric/linalg/_cholesky.py @@ -0,0 +1,305 @@ +# Copyright 2024 NVIDIA Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +from __future__ import annotations + +from typing import TYPE_CHECKING + +from legate.core import ( + broadcast, + constant, + dimension, + get_legate_runtime, + types as ty, +) +from legate.settings import settings + +from ..config import CuPyNumericOpCode +from ..runtime import runtime +from ._exception import LinAlgError + +legate_runtime = get_legate_runtime() + +if TYPE_CHECKING: + from legate.core import Library, LogicalStore, LogicalStorePartition + + from .._thunk.deferred import DeferredArray + from ..runtime import Runtime + + +def transpose_copy_single( + library: Library, input: LogicalStore, output: LogicalStore +) -> None: + task = legate_runtime.create_auto_task( + library, CuPyNumericOpCode.TRANSPOSE_COPY_2D + ) + p_out = task.add_output(output) + p_in = task.add_input(input) + # Output has the same shape as input, but is mapped + # to a column major instance + + task.add_constraint(broadcast(p_out)) + task.add_constraint(broadcast(p_in)) + + task.execute() + + +def transpose_copy( + library: Library, + launch_domain: tuple[int, ...], + p_input: LogicalStorePartition, + p_output: LogicalStorePartition, +) -> None: + task = legate_runtime.create_manual_task( + library, + CuPyNumericOpCode.TRANSPOSE_COPY_2D, + launch_domain, + ) + task.add_output(p_output) + task.add_input(p_input) + # Output has the same shape as input, but is mapped + # to a column major instance + + task.execute() + + +def potrf_single(library: Library, output: LogicalStore) -> None: + task = legate_runtime.create_auto_task(library, CuPyNumericOpCode.POTRF) + task.throws_exception(LinAlgError) + task.add_output(output) + task.add_input(output) + task.execute() + + +def mp_potrf( + library: Library, + n: int, + nb: int, + input: LogicalStore, + output: LogicalStore, +) -> None: + task = legate_runtime.create_auto_task(library, CuPyNumericOpCode.MP_POTRF) + task.throws_exception(LinAlgError) + task.add_input(input) + task.add_output(output) + task.add_alignment(output, input) + task.add_scalar_arg(n, ty.int64) + task.add_scalar_arg(nb, ty.int64) + task.add_nccl_communicator() # for repartitioning + task.add_cal_communicator() + task.execute() + + +def potrf(library: Library, p_output: LogicalStorePartition, i: int) -> None: + task = legate_runtime.create_manual_task( + library, CuPyNumericOpCode.POTRF, (i + 1, i + 1), lower_bounds=(i, i) + ) + task.throws_exception(LinAlgError) + task.add_output(p_output) + task.add_input(p_output) + task.execute() + + +def trsm( + library: Library, p_output: LogicalStorePartition, i: int, lo: int, hi: int +) -> None: + if lo >= hi: + return + + rhs = p_output.get_child_store(i, i) + lhs = p_output + + task = legate_runtime.create_manual_task( + library, CuPyNumericOpCode.TRSM, (hi, i + 1), lower_bounds=(lo, i) + ) + task.add_output(lhs) + task.add_input(rhs) + task.add_input(lhs) + task.execute() + + +def syrk( + library: Library, p_output: LogicalStorePartition, k: int, i: int +) -> None: + rhs = p_output.get_child_store(k, i) + lhs = p_output + + task = legate_runtime.create_manual_task( + library, CuPyNumericOpCode.SYRK, (k + 1, k + 1), lower_bounds=(k, k) + ) + task.add_output(lhs) + task.add_input(rhs) + task.add_input(lhs) + task.execute() + + +def gemm( + library: Library, + p_output: LogicalStorePartition, + k: int, + i: int, + lo: int, + hi: int, +) -> None: + if lo >= hi: + return + + rhs2 = p_output.get_child_store(k, i) + lhs = p_output + rhs1 = p_output + + task = legate_runtime.create_manual_task( + library, CuPyNumericOpCode.GEMM, (hi, k + 1), lower_bounds=(lo, k) + ) + task.add_output(lhs) + task.add_input(rhs1, (dimension(0), constant(i))) + task.add_input(rhs2) + task.add_input(lhs) + task.execute() + + +MIN_CHOLESKY_TILE_SIZE = 2 if settings.test() else 2048 +MIN_CHOLESKY_MATRIX_SIZE = 4 if settings.test() else 8192 + + +# TODO: We need a better cost model +def choose_color_shape( + runtime: Runtime, shape: tuple[int, ...] +) -> tuple[int, ...]: + extent = shape[0] + + # If there's only one processor or the matrix is too small, + # don't even bother to partition it at all + if runtime.num_procs == 1 or extent <= MIN_CHOLESKY_MATRIX_SIZE: + return (1, 1) + + # If the matrix is big enough to warrant partitioning, + # pick the granularity that the tile size is greater than a threshold + num_tiles = runtime.num_procs + max_num_tiles = runtime.num_procs * 4 + while ( + (extent + num_tiles - 1) // num_tiles > MIN_CHOLESKY_TILE_SIZE + and num_tiles * 2 <= max_num_tiles + ): + num_tiles *= 2 + + return (num_tiles, num_tiles) + + +def tril_single(library: Library, output: LogicalStore) -> None: + task = legate_runtime.create_auto_task(library, CuPyNumericOpCode.TRILU) + task.add_output(output) + task.add_input(output) + task.add_scalar_arg(True, ty.bool_) + task.add_scalar_arg(0, ty.int32) + # Add a fake task argument to indicate that this is for Cholesky + task.add_scalar_arg(True, ty.bool_) + + task.execute() + + +def tril(library: Library, p_output: LogicalStorePartition, n: int) -> None: + task = legate_runtime.create_manual_task( + library, CuPyNumericOpCode.TRILU, (n, n) + ) + + task.add_output(p_output) + task.add_input(p_output) + task.add_scalar_arg(True, ty.bool_) + task.add_scalar_arg(0, ty.int32) + # Add a fake task argument to indicate that this is for Cholesky + task.add_scalar_arg(True, ty.bool_) + + task.execute() + + +def _rounding_divide( + lhs: tuple[int, ...], rhs: tuple[int, ...] +) -> tuple[int, ...]: + return tuple((lh + rh - 1) // rh for (lh, rh) in zip(lhs, rhs)) + + +def _batched_cholesky( + library: Library, output: DeferredArray, input: DeferredArray +) -> None: + # the only feasible implementation for right now is that + # each cholesky submatrix fits on a single proc. We will have + # wildly varying memory available depending on the system. + # Just use a fixed cutoff to provide some sensible warning. + # TODO: find a better way to inform the user dims are too big + task = legate_runtime.create_auto_task( + library, CuPyNumericOpCode.BATCHED_CHOLESKY + ) + task.add_input(input.base) + task.add_output(output.base) + ndim = input.base.ndim + task.add_broadcast(input.base, (ndim - 2, ndim - 1)) + task.add_broadcast(output.base, (ndim - 2, ndim - 1)) + task.add_alignment(input.base, output.base) + task.throws_exception(LinAlgError) + task.execute() + + +def cholesky_deferred(output: DeferredArray, input: DeferredArray) -> None: + library = runtime.library + if len(input.base.shape) > 2: + size = input.base.shape[-1] + # Choose 32768 as dimension cutoff for warning + # so that for float64 anything larger than + # 8 GiB produces a warning + if size > 32768: + runtime.warn( + "batched cholesky is only valid" + " when the square submatrices fit" + f" on a single proc, n > {size} may be too large", + category=UserWarning, + ) + return _batched_cholesky(library, output, input) + + if runtime.num_procs == 1: + transpose_copy_single(library, input.base, output.base) + potrf_single(library, output.base) + tril_single(library, output.base) + return + + shape = tuple(output.base.shape) + tile_shape: tuple[int, ...] + if ( + runtime.has_cusolvermp + and runtime.num_gpus > 1 + and shape[0] >= MIN_CHOLESKY_MATRIX_SIZE + ): + mp_potrf( + library, shape[0], MIN_CHOLESKY_TILE_SIZE, input.base, output.base + ) + + tril_single(library, output.base) + else: + initial_color_shape = choose_color_shape(runtime, shape) + tile_shape = _rounding_divide(shape, initial_color_shape) + color_shape = _rounding_divide(shape, tile_shape) + n = color_shape[0] + + p_input = input.base.partition_by_tiling(tile_shape) + p_output = output.base.partition_by_tiling(tile_shape) + transpose_copy(library, color_shape, p_input, p_output) + + for i in range(n): + potrf(library, p_output, i) + trsm(library, p_output, i, i + 1, n) + for k in range(i + 1, n): + syrk(library, p_output, k, i) + gemm(library, p_output, k, i, k + 1, n) + + tril(library, p_output, n) diff --git a/cunumeric/linalg/exception.py b/cupynumeric/linalg/_exception.py similarity index 94% rename from cunumeric/linalg/exception.py rename to cupynumeric/linalg/_exception.py index 9a086edb2..38e38552e 100644 --- a/cunumeric/linalg/exception.py +++ b/cupynumeric/linalg/_exception.py @@ -1,4 +1,4 @@ -# Copyright 2022 NVIDIA Corporation +# Copyright 2024 NVIDIA Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. diff --git a/cupynumeric/linalg/_qr.py b/cupynumeric/linalg/_qr.py new file mode 100644 index 000000000..4b20d5fe6 --- /dev/null +++ b/cupynumeric/linalg/_qr.py @@ -0,0 +1,50 @@ +# Copyright 2024 NVIDIA Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +from __future__ import annotations + +from typing import TYPE_CHECKING + +from legate.core import get_legate_runtime + +from cupynumeric.config import CuPyNumericOpCode + +from ._exception import LinAlgError + +if TYPE_CHECKING: + from legate.core import Library, LogicalStore + + from .._thunk.deferred import DeferredArray + + +def qr_single( + library: Library, a: LogicalStore, q: LogicalStore, r: LogicalStore +) -> None: + task = get_legate_runtime().create_auto_task(library, CuPyNumericOpCode.QR) + task.throws_exception(LinAlgError) + task.add_input(a) + task.add_output(q) + task.add_output(r) + + task.add_broadcast(a) + task.add_broadcast(q) + task.add_broadcast(r) + + task.execute() + + +def qr_deferred(a: DeferredArray, q: DeferredArray, r: DeferredArray) -> None: + library = a.library + + qr_single(library, a.base, q.base, r.base) diff --git a/cupynumeric/linalg/_solve.py b/cupynumeric/linalg/_solve.py new file mode 100644 index 000000000..325fe301d --- /dev/null +++ b/cupynumeric/linalg/_solve.py @@ -0,0 +1,108 @@ +# Copyright 2024 NVIDIA Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +from __future__ import annotations + +from typing import TYPE_CHECKING, cast + +import legate.core.types as ty +from legate.core import broadcast, get_legate_runtime + +from ..config import CuPyNumericOpCode +from ..runtime import runtime +from ._cholesky import transpose_copy_single +from ._exception import LinAlgError + +if TYPE_CHECKING: + from legate.core import Library, LogicalStore + + from .._thunk.deferred import DeferredArray + + +def solve_single(library: Library, a: LogicalStore, b: LogicalStore) -> None: + task = get_legate_runtime().create_auto_task( + library, CuPyNumericOpCode.SOLVE + ) + task.throws_exception(LinAlgError) + p_a = task.add_input(a) + p_b = task.add_input(b) + task.add_output(a, p_a) + task.add_output(b, p_b) + + task.add_constraint(broadcast(p_a)) + task.add_constraint(broadcast(p_b)) + + task.execute() + + +MIN_SOLVE_TILE_SIZE = 512 +MIN_SOLVE_MATRIX_SIZE = 2048 + + +def mp_solve( + library: Library, + n: int, + nrhs: int, + nb: int, + a: LogicalStore, + b: LogicalStore, + output: LogicalStore, +) -> None: + task = get_legate_runtime().create_auto_task( + library, CuPyNumericOpCode.MP_SOLVE + ) + task.throws_exception(LinAlgError) + task.add_input(a) + task.add_input(b) + task.add_output(output) + task.add_alignment(output, b) + task.add_scalar_arg(n, ty.int64) + task.add_scalar_arg(nrhs, ty.int64) + task.add_scalar_arg(nb, ty.int64) + task.add_nccl_communicator() # for repartitioning + task.add_cal_communicator() + task.execute() + + +def solve_deferred( + output: DeferredArray, a: DeferredArray, b: DeferredArray +) -> None: + from .._thunk.deferred import DeferredArray + + library = output.library + + if ( + runtime.has_cusolvermp + and runtime.num_gpus > 1 + and a.base.shape[0] >= MIN_SOLVE_MATRIX_SIZE + ): + n = a.base.shape[0] + nrhs = b.base.shape[1] + mp_solve( + library, n, nrhs, MIN_SOLVE_TILE_SIZE, a.base, b.base, output.base + ) + return + + a_copy = cast( + DeferredArray, + runtime.create_empty_thunk(a.shape, dtype=a.base.type, inputs=(a,)), + ) + transpose_copy_single(library, a.base, a_copy.base) + + if b.ndim > 1: + transpose_copy_single(library, b.base, output.base) + else: + output.copy(b) + + solve_single(library, a_copy.base, output.base) diff --git a/cupynumeric/linalg/_svd.py b/cupynumeric/linalg/_svd.py new file mode 100644 index 000000000..a9be94924 --- /dev/null +++ b/cupynumeric/linalg/_svd.py @@ -0,0 +1,60 @@ +# Copyright 2024 NVIDIA Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +from __future__ import annotations + +from typing import TYPE_CHECKING + +from legate.core import get_legate_runtime + +from cupynumeric.config import CuPyNumericOpCode + +from ._exception import LinAlgError + +if TYPE_CHECKING: + from legate.core import Library, LogicalStore + + from .._thunk.deferred import DeferredArray + + +def svd_single( + library: Library, + a: LogicalStore, + u: LogicalStore, + s: LogicalStore, + vh: LogicalStore, +) -> None: + task = get_legate_runtime().create_auto_task( + library, CuPyNumericOpCode.SVD + ) + task.throws_exception(LinAlgError) + task.add_input(a) + task.add_output(u) + task.add_output(s) + task.add_output(vh) + + task.add_broadcast(a) + task.add_broadcast(u) + task.add_broadcast(s) + task.add_broadcast(vh) + + task.execute() + + +def svd_deferred( + a: DeferredArray, u: DeferredArray, s: DeferredArray, vh: DeferredArray +) -> None: + library = a.library + + svd_single(library, a.base, u.base, s.base, vh.base) diff --git a/cunumeric/linalg/linalg.py b/cupynumeric/linalg/linalg.py similarity index 78% rename from cunumeric/linalg/linalg.py rename to cupynumeric/linalg/linalg.py index d1c0498b2..25f9ed964 100644 --- a/cunumeric/linalg/linalg.py +++ b/cupynumeric/linalg/linalg.py @@ -1,4 +1,4 @@ -# Copyright 2021-2022 NVIDIA Corporation +# Copyright 2024 NVIDIA Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -14,25 +14,29 @@ # from __future__ import annotations -from typing import TYPE_CHECKING, Sequence, Union +from typing import TYPE_CHECKING, Sequence import numpy as np -from numpy.core.multiarray import ( # type: ignore [attr-defined] - normalize_axis_index, -) -from numpy.core.numeric import ( # type: ignore [attr-defined] - normalize_axis_tuple, -) -from cunumeric._ufunc.math import add, sqrt as _sqrt -from cunumeric.array import add_boilerplate, convert_to_cunumeric_ndarray -from cunumeric.module import dot, empty_like, eye, matmul, ndarray +from .._utils import is_np2 -from .exception import LinAlgError +if is_np2: + from numpy.lib.array_utils import normalize_axis_index # type: ignore + from numpy.lib.array_utils import normalize_axis_tuple # type: ignore +else: + from numpy.core.multiarray import ( # type: ignore + normalize_axis_index, + ) + from numpy.core.numeric import ( # type: ignore + normalize_axis_tuple, + ) -if TYPE_CHECKING: - from typing import Optional +from .._array.util import add_boilerplate, convert_to_cupynumeric_ndarray +from .._module import dot, empty_like, eye, matmul, ndarray +from .._ufunc.math import add, sqrt as _sqrt +from ._exception import LinAlgError +if TYPE_CHECKING: import numpy.typing as npt @@ -82,11 +86,63 @@ def cholesky(a: ndarray) -> ndarray: elif shape[-1] != shape[-2]: raise ValueError("Last 2 dimensions of the array must be square") - return _cholesky(a) + return _thunk_cholesky(a) + + +@add_boilerplate("a") +def qr(a: ndarray) -> tuple[ndarray, ...]: + """ + Compute the qr factorization of a matrix. + + Factor the matrix a as qr, where q is orthonormal + and r is upper-triangular. + + Parameters + ---------- + a : (M, N) array_like + Array like, at least dimension 2. + + Returns + ------- + q : (M, K) array_like + A matrix with orthonormal columns. K = min(M, N). + r : (K, N) array_like + The uppoer triangular matrix. + + Raises + ------ + LinAlgError + If factoring fails. + + Notes + ----- + Currently does not support the parameter 'mode' from numpy 1.8. + + See Also + -------- + numpy.linalg.qr + + Availability + -------- + Single GPU, Single CPU + """ + shape = a.shape + if len(shape) < 2: + raise LinAlgError( + f"{len(shape)}-dimensional array given. " + "Array must be at least two-dimensional" + ) + if len(shape) > 2: + raise NotImplementedError( + "cuPyNumeric does not yet support stacked 2d arrays" + ) + if np.dtype("e") == a.dtype: + raise TypeError("array type float16 is unsupported in linalg") + return _thunk_qr(a) @add_boilerplate("a", "b") -def solve(a: ndarray, b: ndarray, out: Optional[ndarray] = None) -> ndarray: +def solve(a: ndarray, b: ndarray, out: ndarray | None = None) -> ndarray: """ Solve a linear matrix equation, or system of linear scalar equations. @@ -112,13 +168,17 @@ def solve(a: ndarray, b: ndarray, out: Optional[ndarray] = None) -> ndarray: LinAlgError If `a` is singular or not square. + Notes + ------ + Multi-GPU usage is only available when compiled with cusolverMP. + See Also -------- numpy.linalg.solve Availability -------- - Single GPU, Single CPU + Multiple GPUs, Single CPU """ if a.ndim < 2: raise LinAlgError( @@ -134,7 +194,7 @@ def solve(a: ndarray, b: ndarray, out: Optional[ndarray] = None) -> ndarray: raise TypeError("array type float16 is unsupported in linalg") if a.ndim > 2 or b.ndim > 2: raise NotImplementedError( - "cuNumeric does not yet support stacked 2d arrays" + "cuPyNumeric does not yet support stacked 2d arrays" ) if a.shape[-2] != a.shape[-1]: raise LinAlgError("Last 2 dimensions of the array must be square") @@ -154,7 +214,64 @@ def solve(a: ndarray, b: ndarray, out: Optional[ndarray] = None) -> ndarray: if a.size == 0 or b.size == 0: return empty_like(b) - return _solve(a, b, out) + return _thunk_solve(a, b, out) + + +@add_boilerplate("a") +def svd(a: ndarray, full_matrices: bool = True) -> tuple[ndarray, ...]: + """ + Singular Value Decomposition. + + Parameters + ---------- + a : (M, N) array_like + Array like, at least dimension 2. + full_matrices : bool, optional + If True (default), u and vh are of shape (M, M), (N, N). + If False, the shapes are (M, K) and (K, N), where K = min(M, N). + + Returns + ------- + u : (M, M) array_like + Unitary array(s). + s : (K) array_like + The singular values, sorted in descending order + vh : (N, N) array_like + Unitary array(s). + + Raises + ------ + LinAlgError + If SVD computation does not converge. + + Notes + ----- + Currently does not support the parameters 'full_matrices', 'compute_uv', + and 'hermitian'. + + See Also + -------- + numpy.linalg.svd + + Availability + -------- + Single GPU, Single CPU + """ + shape = a.shape + if len(shape) < 2: + raise LinAlgError( + f"{len(shape)}-dimensional array given. " + "Array must be at least two-dimensional" + ) + if len(shape) > 2: + raise NotImplementedError( + "cuPyNumeric does not yet support stacked 2d arrays" + ) + if shape[0] < shape[1]: + raise NotImplementedError("cuPyNumeric only supports M >= N") + if np.dtype("e") == a.dtype: + raise TypeError("array type float16 is unsupported in linalg") + return _thunk_svd(a, full_matrices) # This implementation is adapted closely from NumPy @@ -206,7 +323,7 @@ def matrix_power(a: ndarray, n: int) -> ndarray: # Invert if necessary if n < 0: - # TODO: Add this once cunumeric.inv is implemented + # TODO: Add this once cupynumeric.inv is implemented # a = inv(a) # n = abs(n) raise NotImplementedError("Negative exponent in matrix_power") @@ -222,8 +339,8 @@ def matrix_power(a: ndarray, n: int) -> ndarray: # Use binary decomposition to reduce the number of matrix multiplications. # Here, we iterate over the bits of n, from LSB to MSB, raise `a` to # increasing powers of 2, and multiply into the result as needed. - z: Union[ndarray, None] = None - result: Union[ndarray, None] = None + z: ndarray | None = None + result: ndarray | None = None while n > 0: z = a if z is None else matmul(z, z) n, bit = divmod(n, 2) @@ -237,7 +354,7 @@ def matrix_power(a: ndarray, n: int) -> ndarray: # This implementation is adapted closely from NumPy def multi_dot( - arrays: Sequence[ndarray], *, out: Union[ndarray, None] = None + arrays: Sequence[ndarray], *, out: ndarray | None = None ) -> ndarray: """ Compute the dot product of two or more arrays in a single function call, @@ -268,9 +385,9 @@ def multi_dot( -------- Multiple GPUs, Multiple CPUs """ - arrays = [convert_to_cunumeric_ndarray(x) for x in arrays] + arrays = [convert_to_cupynumeric_ndarray(x) for x in arrays] if out is not None: - out = convert_to_cunumeric_ndarray(out, share=True) + out = convert_to_cupynumeric_ndarray(out, share=True) n = len(arrays) # optimization only makes sense for len(arrays) > 2 @@ -314,7 +431,7 @@ def multi_dot( def _multi_dot_three( - A: ndarray, B: ndarray, C: ndarray, out: Union[ndarray, None] = None + A: ndarray, B: ndarray, C: ndarray, out: ndarray | None = None ) -> ndarray: """ Find the best order for three arrays and do the multiplication. @@ -359,7 +476,7 @@ def _multi_dot_matrix_chain_order( for l_ in range(1, n): for i in range(n - l_): j = i + l_ - m[i, j] = np.Inf + m[i, j] = np.inf for k in range(i, j): q = m[i, k] + m[k + 1, j] + p[i] * p[k + 1] * p[j + 1] if q < m[i, j]: @@ -374,7 +491,7 @@ def _multi_dot( order: npt.NDArray[np.int64], i: int, j: int, - out: Union[ndarray, None] = None, + out: ndarray | None = None, ) -> ndarray: """Actually do the multiplication with the given order.""" if i == j: @@ -394,10 +511,10 @@ def _multi_dot( @add_boilerplate("x") def norm( x: ndarray, - ord: Union[str, int, float, None] = None, - axis: Union[int, tuple[int, int], None] = None, + ord: str | int | float | None = None, + axis: int | tuple[int, int] | None = None, keepdims: bool = False, -) -> Union[float, ndarray]: +) -> float | ndarray: """ Matrix or vector norm. @@ -583,7 +700,7 @@ def norm( raise ValueError("Improper number of dimensions to norm") -def _cholesky(a: ndarray, no_tril: bool = False) -> ndarray: +def _thunk_cholesky(a: ndarray) -> ndarray: """Cholesky decomposition. Return the Cholesky decomposition, `L * L.H`, of the square matrix `a`, @@ -627,12 +744,33 @@ def _cholesky(a: ndarray, no_tril: bool = False) -> ndarray: dtype=input.dtype, inputs=(input,), ) - output._thunk.cholesky(input._thunk, no_tril=no_tril) + output._thunk.cholesky(input._thunk) return output -def _solve( - a: ndarray, b: ndarray, output: Optional[ndarray] = None +def _thunk_qr(a: ndarray) -> tuple[ndarray, ...]: + if a.dtype.kind not in ("f", "c"): + a = a.astype("float64") + + k = min(a.shape[0], a.shape[1]) + + out_q = ndarray( + shape=(a.shape[0], k), + dtype=a.dtype, + inputs=(a,), + ) + out_r = ndarray( + shape=(k, a.shape[1]), + dtype=a.dtype, + inputs=(a,), + ) + + a._thunk.qr(out_q._thunk, out_r._thunk) + return out_q, out_r + + +def _thunk_solve( + a: ndarray, b: ndarray, output: ndarray | None = None ) -> ndarray: if a.dtype.kind not in ("f", "c"): a = a.astype("float64") @@ -666,3 +804,32 @@ def _solve( ) out._thunk.solve(a._thunk, b._thunk) return out + + +def _thunk_svd(a: ndarray, full_matrices: bool) -> tuple[ndarray, ...]: + if a.dtype.kind not in ("f", "c"): + a = a.astype("float64") + + k = min(a.shape[0], a.shape[1]) + + out_u = ndarray( + shape=(a.shape[0], a.shape[0] if full_matrices else k), + dtype=a.dtype, + inputs=(a,), + ) + + real_dtype = a.dtype.type(0).real.dtype + + out_s = ndarray( + shape=(k,), + dtype=real_dtype, + inputs=(a,), + ) + out_vh = ndarray( + shape=(a.shape[1] if full_matrices else k, a.shape[1]), + dtype=a.dtype, + inputs=(a,), + ) + + a._thunk.svd(out_u._thunk, out_s._thunk, out_vh._thunk) + return out_u, out_s, out_vh diff --git a/cunumeric/ma/__init__.py b/cupynumeric/ma/__init__.py similarity index 80% rename from cunumeric/ma/__init__.py rename to cupynumeric/ma/__init__.py index 14a9e0d46..830910f51 100644 --- a/cunumeric/ma/__init__.py +++ b/cupynumeric/ma/__init__.py @@ -1,4 +1,4 @@ -# Copyright 2023 NVIDIA Corporation +# Copyright 2024 NVIDIA Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -16,9 +16,9 @@ import numpy.ma as _ma -from cunumeric.array import maybe_convert_to_np_ndarray -from cunumeric.coverage import clone_module -from cunumeric.ma._masked_array import MaskedArray +from .._array.util import maybe_convert_to_np_ndarray +from .._utils.coverage import clone_module +from ._masked_array import MaskedArray masked_array = MaskedArray diff --git a/cunumeric/ma/_masked_array.py b/cupynumeric/ma/_masked_array.py similarity index 87% rename from cunumeric/ma/_masked_array.py rename to cupynumeric/ma/_masked_array.py index 4420bdf1c..1884d2cea 100644 --- a/cunumeric/ma/_masked_array.py +++ b/cupynumeric/ma/_masked_array.py @@ -1,4 +1,4 @@ -# Copyright 2023 NVIDIA Corporation +# Copyright 2024 NVIDIA Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -14,7 +14,7 @@ # from __future__ import annotations -from typing import TYPE_CHECKING, Any, Type, Union +from typing import TYPE_CHECKING, Any, Type if TYPE_CHECKING: import numpy.typing as npt @@ -23,8 +23,8 @@ import numpy as _np -from ..array import maybe_convert_to_np_ndarray -from ..coverage import clone_class +from .._array.util import maybe_convert_to_np_ndarray +from .._utils.coverage import clone_class NDARRAY_INTERNAL = { "__array_finalize__", @@ -37,7 +37,7 @@ "__array_wrap__", } -MaskType = _np.bool_ +MaskType = bool nomask = MaskType(0) @@ -51,8 +51,8 @@ def __new__(cls: Type[Any], *args: Any, **kw: Any) -> MaskedArray: def __init__( self, data: Any = None, - mask: _np.bool_ = nomask, - dtype: Union[npt.DTypeLike, None] = None, + mask: bool = nomask, + dtype: npt.DTypeLike | None = None, copy: bool = False, subok: bool = True, ndmin: int = 0, @@ -60,7 +60,7 @@ def __init__( keep_mask: Any = True, hard_mask: Any = None, shrink: bool = True, - order: Union[str, None] = None, + order: str | None = None, ) -> None: self._internal_ma = _np.ma.MaskedArray( # type: ignore data=maybe_convert_to_np_ndarray(data), diff --git a/cunumeric/patch.py b/cupynumeric/patch.py similarity index 80% rename from cunumeric/patch.py rename to cupynumeric/patch.py index d47ab24e3..b92a7f7e3 100644 --- a/cunumeric/patch.py +++ b/cupynumeric/patch.py @@ -1,4 +1,4 @@ -# Copyright 2021-2022 NVIDIA Corporation +# Copyright 2024 NVIDIA Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -13,12 +13,12 @@ # limitations under the License. # """ This module may be imported in order to globably replace NumPy with -CuNumeric. +cuPyNumeric. In order to function properly, this module must be imported early (ideally at the very start of a script). The ``numpy`` module in ``sys.modules`` -will be replaced with ``cunumeric`` so that any subsequent use of the -``numpy`` module will use ``cunumeric`` instead. +will be replaced with ``cupynumeric`` so that any subsequent use of the +``numpy`` module will use ``cupynumeric`` instead. This module is primarily intended for quick demonstrations or proofs of concept. @@ -28,6 +28,6 @@ import sys -import cunumeric +import cupynumeric -sys.modules["numpy"] = cunumeric +sys.modules["numpy"] = cupynumeric diff --git a/cunumeric/py.typed b/cupynumeric/py.typed similarity index 100% rename from cunumeric/py.typed rename to cupynumeric/py.typed diff --git a/cunumeric/random/__init__.py b/cupynumeric/random/__init__.py similarity index 67% rename from cunumeric/random/__init__.py rename to cupynumeric/random/__init__.py index 0f397a0c5..09e5054ff 100644 --- a/cunumeric/random/__init__.py +++ b/cupynumeric/random/__init__.py @@ -1,4 +1,4 @@ -# Copyright 2021-2022 NVIDIA Corporation +# Copyright 2024 NVIDIA Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -16,16 +16,13 @@ import numpy.random as _nprandom -from cunumeric.array import maybe_convert_to_np_ndarray -from cunumeric.coverage import clone_module -from cunumeric.runtime import runtime +from .._array.util import maybe_convert_to_np_ndarray +from .._utils.coverage import clone_module +from ..runtime import runtime -if runtime.has_curand: - from cunumeric.random.random import * - from cunumeric.random.bitgenerator import * - from cunumeric.random.generator import * -else: - from cunumeric.random.legacy import * +from ._random import * +from ._bitgenerator import * +from ._generator import * clone_module( _nprandom, @@ -36,4 +33,5 @@ del maybe_convert_to_np_ndarray del clone_module +del runtime del _nprandom diff --git a/cunumeric/random/bitgenerator.py b/cupynumeric/random/_bitgenerator.py similarity index 89% rename from cunumeric/random/bitgenerator.py rename to cupynumeric/random/_bitgenerator.py index 8d99e61aa..55ecbea8e 100644 --- a/cunumeric/random/bitgenerator.py +++ b/cupynumeric/random/_bitgenerator.py @@ -1,4 +1,4 @@ -# Copyright 2022 NVIDIA Corporation +# Copyright 2024 NVIDIA Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -16,11 +16,11 @@ import time from abc import abstractproperty -from typing import TYPE_CHECKING, Union +from typing import TYPE_CHECKING import numpy as np -from ..array import ndarray +from .._array.array import ndarray from ..config import BitGeneratorType from ..runtime import runtime @@ -33,7 +33,7 @@ class BitGenerator: def __init__( self, - seed: Union[int, None] = None, + seed: int | None = None, forceBuild: bool = False, ) -> None: """ @@ -44,7 +44,7 @@ def __init__( Parameters ---------- - seed : {None, int}, optional + seed : {int, None}, optional A seed to initialize the `BitGenerator`. If None, then fresh, unpredictable entropy will be pulled from the OS. @@ -53,7 +53,7 @@ def __init__( numpy.random.BitGenerator Availability - -------- + ------------ Multiple GPUs, Multiple CPUs """ if type(self) is BitGenerator: @@ -76,7 +76,7 @@ def __del__(self) -> None: runtime.bitgenerator_destroy(self.handle, disposing=True) # when output is false => skip ahead - def random_raw(self, shape: Union[NdShapeLike, None] = None) -> ndarray: + def random_raw(self, shape: NdShapeLike | None = None) -> ndarray: if shape is None: shape = (1,) if not isinstance(shape, tuple): @@ -90,8 +90,8 @@ def random_raw(self, shape: Union[NdShapeLike, None] = None) -> ndarray: def integers( self, low: int, - high: Union[int, None] = None, - shape: Union[NdShapeLike, None] = None, + high: int | None = None, + shape: NdShapeLike | None = None, type: npt.DTypeLike = np.int64, endpoint: bool = False, ) -> ndarray: @@ -112,9 +112,9 @@ def integers( def random( self, - shape: Union[NdShapeLike, None] = None, + shape: NdShapeLike | None = None, dtype: npt.DTypeLike = np.float64, - res: Union[ndarray, None] = None, + res: ndarray | None = None, ) -> ndarray: if shape is None: shape = (1,) @@ -131,7 +131,7 @@ def lognormal( self, mean: float = 0.0, sigma: float = 1.0, - shape: Union[NdShapeLike, None] = None, + shape: NdShapeLike | None = None, dtype: npt.DTypeLike = np.float64, ) -> ndarray: if shape is None: @@ -148,7 +148,7 @@ def normal( self, mean: float = 0.0, sigma: float = 1.0, - shape: Union[NdShapeLike, None] = None, + shape: NdShapeLike | None = None, dtype: npt.DTypeLike = np.float64, ) -> ndarray: if shape is None: @@ -165,7 +165,7 @@ def uniform( self, low: float = 0.0, high: float = 1.0, - shape: Union[NdShapeLike, None] = None, + shape: NdShapeLike | None = None, dtype: npt.DTypeLike = np.float64, ) -> ndarray: if shape is None: @@ -178,9 +178,7 @@ def uniform( ) return res - def poisson( - self, lam: float, shape: Union[NdShapeLike, None] = None - ) -> ndarray: + def poisson(self, lam: float, shape: NdShapeLike | None = None) -> ndarray: if shape is None: shape = (1,) if not isinstance(shape, tuple): @@ -194,7 +192,7 @@ def poisson( def exponential( self, scale: float = 1.0, - shape: Union[NdShapeLike, None] = None, + shape: NdShapeLike | None = None, dtype: npt.DTypeLike = np.float64, ) -> ndarray: if shape is None: @@ -211,7 +209,7 @@ def gumbel( self, mu: float = 0.0, beta: float = 1.0, - shape: Union[NdShapeLike, None] = None, + shape: NdShapeLike | None = None, dtype: npt.DTypeLike = np.float64, ) -> ndarray: if shape is None: @@ -228,7 +226,7 @@ def laplace( self, mu: float = 0.0, beta: float = 1.0, - shape: Union[NdShapeLike, None] = None, + shape: NdShapeLike | None = None, dtype: npt.DTypeLike = np.float64, ) -> ndarray: if shape is None: @@ -245,7 +243,7 @@ def logistic( self, mu: float = 0.0, beta: float = 1.0, - shape: Union[NdShapeLike, None] = None, + shape: NdShapeLike | None = None, dtype: npt.DTypeLike = np.float64, ) -> ndarray: if shape is None: @@ -261,7 +259,7 @@ def logistic( def pareto( self, alpha: float, - shape: Union[NdShapeLike, None] = None, + shape: NdShapeLike | None = None, dtype: npt.DTypeLike = np.float64, ) -> ndarray: if shape is None: @@ -277,7 +275,7 @@ def pareto( def power( self, alpha: float, - shape: Union[NdShapeLike, None] = None, + shape: NdShapeLike | None = None, dtype: npt.DTypeLike = np.float64, ) -> ndarray: if shape is None: @@ -293,7 +291,7 @@ def power( def rayleigh( self, sigma: float, - shape: Union[NdShapeLike, None] = None, + shape: NdShapeLike | None = None, dtype: npt.DTypeLike = np.float64, ) -> ndarray: if shape is None: @@ -310,7 +308,7 @@ def cauchy( self, x0: float, gamma: float, - shape: Union[NdShapeLike, None] = None, + shape: NdShapeLike | None = None, dtype: npt.DTypeLike = np.float64, ) -> ndarray: if shape is None: @@ -328,7 +326,7 @@ def triangular( a: float, b: float, c: float, - shape: Union[NdShapeLike, None] = None, + shape: NdShapeLike | None = None, dtype: npt.DTypeLike = np.float64, ) -> ndarray: if shape is None: @@ -345,7 +343,7 @@ def weibull( self, lam: float, k: float, - shape: Union[NdShapeLike, None] = None, + shape: NdShapeLike | None = None, dtype: npt.DTypeLike = np.float64, ) -> ndarray: if shape is None: @@ -358,7 +356,7 @@ def weibull( ) return res - def bytes(self, length: Union[int, tuple[int, ...]]) -> ndarray: + def bytes(self, length: int | tuple[int, ...]) -> ndarray: if not isinstance(length, tuple): length = (length,) res = ndarray(length, dtype=np.dtype(np.uint8)) @@ -374,7 +372,7 @@ def beta( self, a: float, b: float, - shape: Union[NdShapeLike, None] = None, + shape: NdShapeLike | None = None, dtype: npt.DTypeLike = np.float64, ) -> ndarray: if shape is None: @@ -391,7 +389,7 @@ def f( self, dfnum: float, dfden: float, - shape: Union[NdShapeLike, None] = None, + shape: NdShapeLike | None = None, dtype: npt.DTypeLike = np.float64, ) -> ndarray: if shape is None: @@ -412,7 +410,7 @@ def f( def logseries( self, p: float, - shape: Union[NdShapeLike, None] = None, + shape: NdShapeLike | None = None, dtype: npt.DTypeLike = np.uint32, ) -> ndarray: if shape is None: @@ -430,7 +428,7 @@ def noncentral_f( dfnum: float, dfden: float, nonc: float, - shape: Union[NdShapeLike, None] = None, + shape: NdShapeLike | None = None, dtype: npt.DTypeLike = np.float64, ) -> ndarray: if shape is None: @@ -453,7 +451,7 @@ def chisquare( self, df: float, nonc: float = 0.0, - shape: Union[NdShapeLike, None] = None, + shape: NdShapeLike | None = None, dtype: npt.DTypeLike = np.float64, ) -> ndarray: if shape is None: @@ -470,7 +468,7 @@ def gamma( self, k: float, theta: float = 1.0, - shape: Union[NdShapeLike, None] = None, + shape: NdShapeLike | None = None, dtype: npt.DTypeLike = np.float64, ) -> ndarray: if shape is None: @@ -486,7 +484,7 @@ def gamma( def standard_t( self, df: float, - shape: Union[NdShapeLike, None] = None, + shape: NdShapeLike | None = None, dtype: npt.DTypeLike = np.float64, ) -> ndarray: if shape is None: @@ -504,7 +502,7 @@ def hypergeometric( ngood: int, nbad: int, nsample: int, - shape: Union[NdShapeLike, None] = None, + shape: NdShapeLike | None = None, dtype: npt.DTypeLike = np.uint32, ) -> ndarray: if shape is None: @@ -527,7 +525,7 @@ def vonmises( self, mu: float, kappa: float, - shape: Union[NdShapeLike, None] = None, + shape: NdShapeLike | None = None, dtype: npt.DTypeLike = np.float64, ) -> ndarray: if shape is None: @@ -543,7 +541,7 @@ def vonmises( def zipf( self, alpha: float, - shape: Union[NdShapeLike, None] = None, + shape: NdShapeLike | None = None, dtype: npt.DTypeLike = np.uint32, ) -> ndarray: if shape is None: @@ -559,7 +557,7 @@ def zipf( def geometric( self, p: float, - shape: Union[NdShapeLike, None] = None, + shape: NdShapeLike | None = None, dtype: npt.DTypeLike = np.uint32, ) -> ndarray: if shape is None: @@ -576,7 +574,7 @@ def wald( self, mean: float, scale: float, - shape: Union[NdShapeLike, None] = None, + shape: NdShapeLike | None = None, dtype: npt.DTypeLike = np.float64, ) -> ndarray: if shape is None: @@ -593,7 +591,7 @@ def binomial( self, ntrials: int, p: float, - shape: Union[NdShapeLike, None] = None, + shape: NdShapeLike | None = None, dtype: npt.DTypeLike = np.uint32, ) -> ndarray: if shape is None: @@ -610,7 +608,7 @@ def negative_binomial( self, ntrials: int, p: float, - shape: Union[NdShapeLike, None] = None, + shape: NdShapeLike | None = None, dtype: npt.DTypeLike = np.uint32, ) -> ndarray: if shape is None: diff --git a/cunumeric/random/generator.py b/cupynumeric/random/_generator.py similarity index 83% rename from cunumeric/random/generator.py rename to cupynumeric/random/_generator.py index 7145de662..4736bd898 100644 --- a/cunumeric/random/generator.py +++ b/cupynumeric/random/_generator.py @@ -1,4 +1,4 @@ -# Copyright 2021-2022 NVIDIA Corporation +# Copyright 2024 NVIDIA Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -14,16 +14,16 @@ # from __future__ import annotations -from typing import TYPE_CHECKING, Union +from typing import TYPE_CHECKING import numpy as np -from cunumeric.random.bitgenerator import XORWOW, BitGenerator +from ._bitgenerator import XORWOW, BitGenerator if TYPE_CHECKING: import numpy.typing as npt - from ..array import ndarray + from .._array.array import ndarray from ..types import NdShapeLike @@ -43,8 +43,8 @@ def __init__(self, bit_generator: BitGenerator) -> None: then an array with that shape is filled and returned. - The function :func:`cunumeric.random.default_rng` will instantiate - a `Generator` with cuNumeric's default `BitGenerator`. + The function :func:`cupynumeric.random.default_rng` will instantiate + a `Generator` with cuPyNumeric's default `BitGenerator`. Parameters ---------- @@ -57,7 +57,7 @@ def __init__(self, bit_generator: BitGenerator) -> None: default_rng : Recommended constructor for `Generator`. Availability - -------- + ------------ Multiple GPUs, Multiple CPUs """ @@ -67,7 +67,7 @@ def beta( self, a: float, b: float, - size: Union[NdShapeLike, None] = None, + size: NdShapeLike | None = None, dtype: npt.DTypeLike = np.float64, ) -> ndarray: return self.bit_generator.beta(a=a, b=b, shape=size, dtype=dtype) @@ -76,21 +76,21 @@ def binomial( self, ntrials: int, p: float, - size: Union[NdShapeLike, None] = None, + size: NdShapeLike | None = None, dtype: npt.DTypeLike = np.uint32, ) -> ndarray: return self.bit_generator.binomial( ntrials=ntrials, p=p, shape=size, dtype=dtype ) - def bytes(self, length: Union[int, tuple[int, ...]]) -> ndarray: + def bytes(self, length: int | tuple[int, ...]) -> ndarray: return self.bit_generator.bytes(length=length) def cauchy( self, x0: float, gamma: float, - size: Union[NdShapeLike, None] = None, + size: NdShapeLike | None = None, dtype: npt.DTypeLike = np.float64, ) -> ndarray: return self.bit_generator.cauchy( @@ -100,7 +100,7 @@ def cauchy( def chisquare( self, df: float, - size: Union[NdShapeLike, None] = None, + size: NdShapeLike | None = None, dtype: npt.DTypeLike = np.float64, ) -> ndarray: return self.bit_generator.chisquare( @@ -110,7 +110,7 @@ def chisquare( def exponential( self, scale: float = 1.0, - size: Union[NdShapeLike, None] = None, + size: NdShapeLike | None = None, dtype: npt.DTypeLike = np.float64, ) -> ndarray: return self.bit_generator.exponential( @@ -121,7 +121,7 @@ def f( self, dfnum: float, dfden: float, - size: Union[NdShapeLike, None] = None, + size: NdShapeLike | None = None, dtype: npt.DTypeLike = np.float64, ) -> ndarray: return self.bit_generator.f( @@ -132,7 +132,7 @@ def gamma( self, shape: float, scale: float = 1.0, - size: Union[NdShapeLike, None] = None, + size: NdShapeLike | None = None, dtype: npt.DTypeLike = np.float64, ) -> ndarray: return self.bit_generator.gamma( @@ -142,7 +142,7 @@ def gamma( def geometric( self, p: float, - size: Union[NdShapeLike, None] = None, + size: NdShapeLike | None = None, dtype: npt.DTypeLike = np.uint32, ) -> ndarray: return self.bit_generator.geometric(p=p, shape=size, dtype=dtype) @@ -151,7 +151,7 @@ def gumbel( self, loc: float = 0.0, scale: float = 1.0, - size: Union[NdShapeLike, None] = None, + size: NdShapeLike | None = None, dtype: npt.DTypeLike = np.float64, ) -> ndarray: return self.bit_generator.gumbel( @@ -163,7 +163,7 @@ def hypergeometric( ngood: int, nbad: int, nsample: int, - size: Union[NdShapeLike, None] = None, + size: NdShapeLike | None = None, dtype: npt.DTypeLike = np.uint32, ) -> ndarray: return self.bit_generator.hypergeometric( @@ -173,8 +173,8 @@ def hypergeometric( def integers( self, low: int, - high: Union[int, None] = None, - size: Union[NdShapeLike, None] = None, + high: int | None = None, + size: NdShapeLike | None = None, dtype: npt.DTypeLike = np.int64, endpoint: bool = False, ) -> ndarray: @@ -184,7 +184,7 @@ def laplace( self, loc: float = 0.0, scale: float = 1.0, - size: Union[NdShapeLike, None] = None, + size: NdShapeLike | None = None, dtype: npt.DTypeLike = np.float64, ) -> ndarray: return self.bit_generator.laplace( @@ -195,7 +195,7 @@ def logistic( self, loc: float = 0.0, scale: float = 1.0, - size: Union[NdShapeLike, None] = None, + size: NdShapeLike | None = None, dtype: npt.DTypeLike = np.float64, ) -> ndarray: return self.bit_generator.logistic( @@ -206,7 +206,7 @@ def lognormal( self, mean: float = 0.0, sigma: float = 1.0, - size: Union[NdShapeLike, None] = None, + size: NdShapeLike | None = None, dtype: npt.DTypeLike = np.float64, ) -> ndarray: return self.bit_generator.lognormal(mean, sigma, size, dtype) @@ -214,7 +214,7 @@ def lognormal( def logseries( self, p: float, - size: Union[NdShapeLike, None] = None, + size: NdShapeLike | None = None, dtype: npt.DTypeLike = np.uint32, ) -> ndarray: return self.bit_generator.logseries(p=p, shape=size, dtype=dtype) @@ -223,7 +223,7 @@ def negative_binomial( self, ntrials: int, p: float, - size: Union[NdShapeLike, None] = None, + size: NdShapeLike | None = None, dtype: npt.DTypeLike = np.uint32, ) -> ndarray: return self.bit_generator.negative_binomial( @@ -234,7 +234,7 @@ def noncentral_chisquare( self, df: float, nonc: float, - size: Union[NdShapeLike, None] = None, + size: NdShapeLike | None = None, dtype: npt.DTypeLike = np.float64, ) -> ndarray: return self.bit_generator.chisquare( @@ -246,7 +246,7 @@ def noncentral_f( dfnum: float, dfden: float, nonc: float, - size: Union[NdShapeLike, None] = None, + size: NdShapeLike | None = None, dtype: npt.DTypeLike = np.float64, ) -> ndarray: return self.bit_generator.noncentral_f( @@ -257,7 +257,7 @@ def normal( self, loc: float = 0.0, scale: float = 1.0, - size: Union[NdShapeLike, None] = None, + size: NdShapeLike | None = None, dtype: npt.DTypeLike = np.float64, ) -> ndarray: return self.bit_generator.normal( @@ -267,29 +267,29 @@ def normal( def pareto( self, a: float, - size: Union[NdShapeLike, None] = None, + size: NdShapeLike | None = None, dtype: npt.DTypeLike = np.float64, ) -> ndarray: return self.bit_generator.pareto(alpha=a, shape=size, dtype=dtype) def poisson( - self, lam: float = 1.0, size: Union[NdShapeLike, None] = None + self, lam: float = 1.0, size: NdShapeLike | None = None ) -> ndarray: return self.bit_generator.poisson(lam, size) def power( self, a: float, - size: Union[NdShapeLike, None] = None, + size: NdShapeLike | None = None, dtype: npt.DTypeLike = np.float64, ) -> ndarray: return self.bit_generator.power(alpha=a, shape=size, dtype=dtype) def random( self, - size: Union[NdShapeLike, None] = None, + size: NdShapeLike | None = None, dtype: npt.DTypeLike = np.float64, - out: Union[ndarray, None] = None, + out: ndarray | None = None, ) -> ndarray: if out is not None: if size is not None and out.shape != size: @@ -306,7 +306,7 @@ def random( def rayleigh( self, scale: float, - size: Union[NdShapeLike, None] = None, + size: NdShapeLike | None = None, dtype: npt.DTypeLike = np.float64, ) -> ndarray: return self.bit_generator.rayleigh( @@ -315,14 +315,14 @@ def rayleigh( def standard_cauchy( self, - size: Union[NdShapeLike, None] = None, + size: NdShapeLike | None = None, dtype: npt.DTypeLike = np.float64, ) -> ndarray: return self.cauchy(0.0, 1.0, size, dtype) def standard_exponential( self, - size: Union[NdShapeLike, None] = None, + size: NdShapeLike | None = None, dtype: npt.DTypeLike = np.float64, ) -> ndarray: return self.exponential(1.0, size, dtype) @@ -330,7 +330,7 @@ def standard_exponential( def standard_gamma( self, shape: float, - size: Union[NdShapeLike, None] = None, + size: NdShapeLike | None = None, dtype: npt.DTypeLike = np.float64, ) -> ndarray: return self.gamma(shape=shape, scale=1.0, size=size, dtype=dtype) @@ -338,7 +338,7 @@ def standard_gamma( def standard_t( self, df: float, - size: Union[NdShapeLike, None] = None, + size: NdShapeLike | None = None, dtype: npt.DTypeLike = np.float64, ) -> ndarray: return self.bit_generator.standard_t(df=df, shape=size, dtype=dtype) @@ -348,7 +348,7 @@ def triangular( left: float, mode: float, right: float, - size: Union[NdShapeLike, None] = None, + size: NdShapeLike | None = None, dtype: npt.DTypeLike = np.float64, ) -> ndarray: return self.bit_generator.triangular( @@ -359,7 +359,7 @@ def uniform( self, low: float = 0.0, high: float = 1.0, - size: Union[NdShapeLike, None] = None, + size: NdShapeLike | None = None, dtype: npt.DTypeLike = np.float64, ) -> ndarray: return self.bit_generator.uniform(low, high, size, dtype) @@ -368,7 +368,7 @@ def vonmises( self, mu: float, kappa: float, - size: Union[NdShapeLike, None] = None, + size: NdShapeLike | None = None, dtype: npt.DTypeLike = np.float64, ) -> ndarray: return self.bit_generator.vonmises( @@ -379,7 +379,7 @@ def wald( self, mean: float, scale: float, - size: Union[NdShapeLike, None] = None, + size: NdShapeLike | None = None, dtype: npt.DTypeLike = np.float64, ) -> ndarray: return self.bit_generator.wald(mean, scale, shape=size, dtype=dtype) @@ -387,7 +387,7 @@ def wald( def weibull( self, a: float, - size: Union[NdShapeLike, None] = None, + size: NdShapeLike | None = None, dtype: npt.DTypeLike = np.float64, ) -> ndarray: return self.bit_generator.weibull(lam=1, k=a, shape=size, dtype=dtype) @@ -395,14 +395,14 @@ def weibull( def zipf( self, a: float, - size: Union[NdShapeLike, None] = None, + size: NdShapeLike | None = None, dtype: npt.DTypeLike = np.uint32, ) -> ndarray: return self.bit_generator.zipf(alpha=a, shape=size, dtype=dtype) def default_rng( - seed: Union[None, int, BitGenerator, Generator] = None + seed: int | BitGenerator | Generator | None = None, ) -> Generator: """ Construct a new Generator with the default BitGenerator (XORWOW). diff --git a/cunumeric/random/random.py b/cupynumeric/random/_random.py similarity index 90% rename from cunumeric/random/random.py rename to cupynumeric/random/_random.py index d877d391d..6879e9053 100644 --- a/cunumeric/random/random.py +++ b/cupynumeric/random/_random.py @@ -1,4 +1,4 @@ -# Copyright 2021-2022 NVIDIA Corporation +# Copyright 2024 NVIDIA Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -14,14 +14,14 @@ # from __future__ import annotations -from typing import TYPE_CHECKING, Any, Union +from typing import TYPE_CHECKING, Any import numpy as np -from cunumeric.array import ndarray -from cunumeric.coverage import clone_class -from cunumeric.random import generator -from cunumeric.runtime import runtime +from .._array.array import ndarray +from .._utils.coverage import clone_class +from ..runtime import runtime +from ._generator import default_rng, get_static_generator # NOQA if TYPE_CHECKING: import numpy.typing as npt @@ -29,10 +29,7 @@ from ..types import NdShapeLike -default_rng = generator.default_rng - - -def seed(init: Union[int, None] = None) -> None: +def seed(init: int | None = None) -> None: """ Reseed the legacy random number generator. @@ -54,7 +51,7 @@ def seed(init: Union[int, None] = None) -> None: def beta( a: float, b: float, - size: Union[NdShapeLike, None] = None, + size: NdShapeLike | None = None, dtype: npt.DTypeLike = np.float64, ) -> ndarray: """ @@ -100,13 +97,13 @@ def beta( -------- Multiple GPUs, Multiple CPUs """ - return generator.get_static_generator().beta(a, b, size, dtype) + return get_static_generator().beta(a, b, size, dtype) def binomial( ntrials: int, p: float, - size: Union[NdShapeLike, None] = None, + size: NdShapeLike | None = None, dtype: npt.DTypeLike = np.uint32, ) -> ndarray: """ @@ -145,7 +142,7 @@ def binomial( -------- Multiple GPUs, Multiple CPUs """ - return generator.get_static_generator().binomial(ntrials, p, size, dtype) + return get_static_generator().binomial(ntrials, p, size, dtype) def bytes(length: int) -> ndarray: @@ -172,12 +169,12 @@ def bytes(length: int) -> ndarray: -------- Multiple GPUs, Multiple CPUs """ - return generator.get_static_generator().bytes(length) + return get_static_generator().bytes(length) def chisquare( df: float, - size: Union[NdShapeLike, None] = None, + size: NdShapeLike | None = None, dtype: npt.DTypeLike = np.float64, ) -> ndarray: """ @@ -218,12 +215,12 @@ def chisquare( -------- Multiple GPUs, Multiple CPUs """ - return generator.get_static_generator().chisquare(df, size, dtype) + return get_static_generator().chisquare(df, size, dtype) def exponential( scale: float = 1.0, - size: Union[NdShapeLike, None] = None, + size: NdShapeLike | None = None, dtype: npt.DTypeLike = np.float64, ) -> ndarray: """ @@ -278,13 +275,13 @@ def exponential( -------- Multiple GPUs, Multiple CPUs """ - return generator.get_static_generator().exponential(scale, size, dtype) + return get_static_generator().exponential(scale, size, dtype) def f( dfnum: float, dfden: float, - size: Union[NdShapeLike, None] = None, + size: NdShapeLike | None = None, dtype: npt.DTypeLike = np.float64, ) -> ndarray: """ @@ -326,13 +323,13 @@ def f( -------- Multiple GPUs, Multiple CPUs """ - return generator.get_static_generator().f(dfnum, dfden, size, dtype) + return get_static_generator().f(dfnum, dfden, size, dtype) def gamma( shape: float, scale: float = 1.0, - size: Union[NdShapeLike, None] = None, + size: NdShapeLike | None = None, dtype: npt.DTypeLike = np.float64, ) -> ndarray: """ @@ -369,12 +366,12 @@ def gamma( -------- Multiple GPUs, Multiple CPUs """ - return generator.get_static_generator().gamma(shape, scale, size, dtype) + return get_static_generator().gamma(shape, scale, size, dtype) def geometric( p: float, - size: Union[NdShapeLike, None] = None, + size: NdShapeLike | None = None, dtype: npt.DTypeLike = np.uint32, ) -> ndarray: """ @@ -416,13 +413,13 @@ def geometric( -------- Multiple GPUs, Multiple CPUs """ - return generator.get_static_generator().geometric(p, size, dtype) + return get_static_generator().geometric(p, size, dtype) def gumbel( loc: float = 0.0, scale: float = 1.0, - size: Union[NdShapeLike, None] = None, + size: NdShapeLike | None = None, dtype: npt.DTypeLike = np.float64, ) -> ndarray: """ @@ -458,14 +455,14 @@ def gumbel( -------- Multiple GPUs, Multiple CPUs """ - return generator.get_static_generator().gumbel(loc, scale, size, dtype) + return get_static_generator().gumbel(loc, scale, size, dtype) def hypergeometric( ngood: int, nbad: int, nsample: int, - size: Union[NdShapeLike, None] = None, + size: NdShapeLike | None = None, dtype: npt.DTypeLike = np.uint32, ) -> ndarray: """ @@ -508,7 +505,7 @@ def hypergeometric( -------- Multiple GPUs, Multiple CPUs """ - return generator.get_static_generator().hypergeometric( + return get_static_generator().hypergeometric( ngood, nbad, nsample, size, dtype ) @@ -516,7 +513,7 @@ def hypergeometric( def laplace( loc: float = 0.0, scale: float = 1.0, - size: Union[NdShapeLike, None] = None, + size: NdShapeLike | None = None, dtype: npt.DTypeLike = np.float64, ) -> ndarray: """ @@ -555,13 +552,13 @@ def laplace( -------- Multiple GPUs, Multiple CPUs """ - return generator.get_static_generator().laplace(loc, scale, size, dtype) + return get_static_generator().laplace(loc, scale, size, dtype) def logistic( loc: float = 0.0, scale: float = 1.0, - size: Union[NdShapeLike, None] = None, + size: NdShapeLike | None = None, dtype: npt.DTypeLike = np.float64, ) -> ndarray: """ @@ -597,13 +594,13 @@ def logistic( -------- Multiple GPUs, Multiple CPUs """ - return generator.get_static_generator().logistic(loc, scale, size, dtype) + return get_static_generator().logistic(loc, scale, size, dtype) def lognormal( mean: float = 0.0, sigma: float = 1.0, - size: Union[NdShapeLike, None] = None, + size: NdShapeLike | None = None, dtype: npt.DTypeLike = np.float64, ) -> ndarray: """ @@ -641,12 +638,12 @@ def lognormal( -------- Multiple GPUs, Multiple CPUs """ - return generator.get_static_generator().lognormal(mean, sigma, size, dtype) + return get_static_generator().lognormal(mean, sigma, size, dtype) def logseries( p: float, - size: Union[NdShapeLike, None] = None, + size: NdShapeLike | None = None, dtype: npt.DTypeLike = np.uint32, ) -> ndarray: """ @@ -679,13 +676,13 @@ def logseries( -------- Multiple GPUs, Multiple CPUs """ - return generator.get_static_generator().logseries(p, size, dtype) + return get_static_generator().logseries(p, size, dtype) def negative_binomial( n: int, p: float, - size: Union[NdShapeLike, None] = None, + size: NdShapeLike | None = None, dtype: npt.DTypeLike = np.uint32, ) -> ndarray: """ @@ -723,15 +720,13 @@ def negative_binomial( -------- Multiple GPUs, Multiple CPUs """ - return generator.get_static_generator().negative_binomial( - n, p, size, dtype - ) + return get_static_generator().negative_binomial(n, p, size, dtype) def noncentral_chisquare( df: float, nonc: float, - size: Union[NdShapeLike, None] = None, + size: NdShapeLike | None = None, dtype: npt.DTypeLike = np.float64, ) -> ndarray: """ @@ -767,16 +762,14 @@ def noncentral_chisquare( -------- Multiple GPUs, Multiple CPUs """ - return generator.get_static_generator().noncentral_chisquare( - df, nonc, size, dtype - ) + return get_static_generator().noncentral_chisquare(df, nonc, size, dtype) def noncentral_f( dfnum: float, dfden: float, nonc: float, - size: Union[NdShapeLike, None] = None, + size: NdShapeLike | None = None, dtype: npt.DTypeLike = np.float64, ) -> ndarray: """ @@ -816,15 +809,13 @@ def noncentral_f( -------- Multiple GPUs, Multiple CPUs """ - return generator.get_static_generator().noncentral_f( - dfnum, dfden, nonc, size, dtype - ) + return get_static_generator().noncentral_f(dfnum, dfden, nonc, size, dtype) def normal( loc: float = 0.0, scale: float = 1.0, - size: Union[NdShapeLike, None] = None, + size: NdShapeLike | None = None, dtype: npt.DTypeLike = np.float64, ) -> ndarray: """ @@ -873,12 +864,12 @@ def normal( -------- Multiple GPUs, Multiple CPUs """ - return generator.get_static_generator().normal(loc, scale, size, dtype) + return get_static_generator().normal(loc, scale, size, dtype) def pareto( a: float, - size: Union[NdShapeLike, None] = None, + size: NdShapeLike | None = None, dtype: npt.DTypeLike = np.float64, ) -> ndarray: """ @@ -926,12 +917,10 @@ def pareto( -------- Multiple GPUs, Multiple CPUs """ - return generator.get_static_generator().pareto(a, size, dtype) + return get_static_generator().pareto(a, size, dtype) -def poisson( - lam: float = 1.0, size: Union[NdShapeLike, None] = None -) -> ndarray: +def poisson(lam: float = 1.0, size: NdShapeLike | None = None) -> ndarray: """ poisson(lam=1.0, size=None) @@ -964,12 +953,12 @@ def poisson( -------- Multiple GPUs, Multiple CPUs """ - return generator.get_static_generator().poisson(lam, size) + return get_static_generator().poisson(lam, size) def power( a: float, - size: Union[NdShapeLike, None] = None, + size: NdShapeLike | None = None, dtype: npt.DTypeLike = np.float64, ) -> ndarray: """ @@ -1007,10 +996,10 @@ def power( -------- Multiple GPUs, Multiple CPUs """ - return generator.get_static_generator().power(a, size, dtype) + return get_static_generator().power(a, size, dtype) -def rand(*shapeargs: int) -> Union[float, ndarray]: +def rand(*shapeargs: int) -> float | ndarray: """ rand(d0, d1, ..., dn) @@ -1044,10 +1033,10 @@ def rand(*shapeargs: int) -> Union[float, ndarray]: def randint( low: int, - high: Union[int, None] = None, - size: Union[NdShapeLike, None] = None, - dtype: Union[np.dtype[Any], type] = int, -) -> Union[int, ndarray, npt.NDArray[Any]]: + high: int | None = None, + size: NdShapeLike | None = None, + dtype: np.dtype[Any] | type = int, +) -> int | ndarray | npt.NDArray[Any]: """ randint(low, high=None, size=None, dtype=int) @@ -1100,10 +1089,10 @@ def randint( elif low >= high: raise ValueError("low >= high") - return generator.get_static_generator().integers(low, high, size, dtype) + return get_static_generator().integers(low, high, size, dtype) -def randn(*shapeargs: int) -> Union[float, ndarray]: +def randn(*shapeargs: int) -> float | ndarray: """ randn(d0, d1, ..., dn) @@ -1140,8 +1129,8 @@ def randn(*shapeargs: int) -> Union[float, ndarray]: def random( - size: Union[NdShapeLike, None] = None, -) -> Union[float, ndarray]: + size: NdShapeLike | None = None, +) -> float | ndarray: """ random(size=None) @@ -1156,16 +1145,16 @@ def random( -------- Multiple GPUs, Multiple CPUs """ - return generator.get_static_generator().random(size) + return get_static_generator().random(size) # deprecated in numpy from version 1.11.0 def random_integers( low: int, - high: Union[int, None] = None, - size: Union[NdShapeLike, None] = None, - dtype: Union[np.dtype[Any], type] = int, -) -> Union[int, ndarray, npt.NDArray[Any]]: + high: int | None = None, + size: NdShapeLike | None = None, + dtype: np.dtype[Any] | type = int, +) -> int | ndarray | npt.NDArray[Any]: """ random_integers(low, high=None, size=None) @@ -1214,8 +1203,8 @@ def random_integers( def random_sample( - size: Union[NdShapeLike, None] = None, dtype: npt.DTypeLike = np.float64 -) -> Union[float, ndarray]: + size: NdShapeLike | None = None, dtype: npt.DTypeLike = np.float64 +) -> float | ndarray: """ random_sample(size=None) @@ -1256,7 +1245,7 @@ def random_sample( def rayleigh( scale: float = 1.0, - size: Union[NdShapeLike, None] = None, + size: NdShapeLike | None = None, dtype: npt.DTypeLike = np.float64, ) -> ndarray: """ @@ -1289,14 +1278,14 @@ def rayleigh( -------- Multiple GPUs, Multiple CPUs """ - return generator.get_static_generator().rayleigh(scale, size, dtype) + return get_static_generator().rayleigh(scale, size, dtype) sample = random_sample def standard_cauchy( - size: Union[NdShapeLike, None] = None, + size: NdShapeLike | None = None, dtype: npt.DTypeLike = np.float64, ) -> ndarray: """ @@ -1326,11 +1315,11 @@ def standard_cauchy( -------- Multiple GPUs, Multiple CPUs """ - return generator.get_static_generator().standard_cauchy(size, dtype) + return get_static_generator().standard_cauchy(size, dtype) def standard_exponential( - size: Union[NdShapeLike, None] = None, + size: NdShapeLike | None = None, dtype: npt.DTypeLike = np.float64, ) -> ndarray: """ @@ -1361,12 +1350,12 @@ def standard_exponential( -------- Multiple GPUs, Multiple CPUs """ - return generator.get_static_generator().standard_exponential(size, dtype) + return get_static_generator().standard_exponential(size, dtype) def standard_gamma( shape: float, - size: Union[NdShapeLike, None] = None, + size: NdShapeLike | None = None, dtype: npt.DTypeLike = np.float64, ) -> ndarray: """ @@ -1399,12 +1388,12 @@ def standard_gamma( -------- Multiple GPUs, Multiple CPUs """ - return generator.get_static_generator().standard_gamma(shape, size, dtype) + return get_static_generator().standard_gamma(shape, size, dtype) def standard_t( df: float, - size: Union[NdShapeLike, None] = None, + size: NdShapeLike | None = None, dtype: npt.DTypeLike = np.float64, ) -> ndarray: """ @@ -1439,14 +1428,14 @@ def standard_t( -------- Multiple GPUs, Multiple CPUs """ - return generator.get_static_generator().standard_t(df, size, dtype) + return get_static_generator().standard_t(df, size, dtype) def triangular( left: float, mode: float, right: float, - size: Union[NdShapeLike, None] = None, + size: NdShapeLike | None = None, dtype: npt.DTypeLike = np.float64, ) -> ndarray: """ @@ -1487,15 +1476,13 @@ def triangular( -------- Multiple GPUs, Multiple CPUs """ - return generator.get_static_generator().triangular( - left, mode, right, size, dtype - ) + return get_static_generator().triangular(left, mode, right, size, dtype) def uniform( low: float = 0.0, high: float = 1.0, - size: Union[NdShapeLike, None] = None, + size: NdShapeLike | None = None, dtype: npt.DTypeLike = np.float64, ) -> ndarray: """ @@ -1537,13 +1524,13 @@ def uniform( -------- Multiple GPUs, Multiple CPUs """ - return generator.get_static_generator().uniform(low, high, size, dtype) + return get_static_generator().uniform(low, high, size, dtype) def vonmises( mu: float, kappa: float, - size: Union[NdShapeLike, None] = None, + size: NdShapeLike | None = None, dtype: npt.DTypeLike = np.float64, ) -> ndarray: """ @@ -1583,13 +1570,13 @@ def vonmises( -------- Multiple GPUs, Multiple CPUs """ - return generator.get_static_generator().vonmises(mu, kappa, size, dtype) + return get_static_generator().vonmises(mu, kappa, size, dtype) def wald( mean: float, scale: float, - size: Union[NdShapeLike, None] = None, + size: NdShapeLike | None = None, dtype: npt.DTypeLike = np.float64, ) -> ndarray: """ @@ -1630,12 +1617,12 @@ def wald( -------- Multiple GPUs, Multiple CPUs """ - return generator.get_static_generator().wald(mean, scale, size, dtype) + return get_static_generator().wald(mean, scale, size, dtype) def weibull( a: float, - size: Union[NdShapeLike, None] = None, + size: NdShapeLike | None = None, dtype: npt.DTypeLike = np.float64, ) -> ndarray: """ @@ -1675,12 +1662,12 @@ def weibull( -------- Multiple GPUs, Multiple CPUs """ - return generator.get_static_generator().weibull(a, size, dtype) + return get_static_generator().weibull(a, size, dtype) def zipf( a: float, - size: Union[NdShapeLike, None] = None, + size: NdShapeLike | None = None, dtype: npt.DTypeLike = np.uint32, ) -> ndarray: """ @@ -1718,7 +1705,7 @@ def zipf( -------- Multiple GPUs, Multiple CPUs """ - return generator.get_static_generator().zipf(a, size, dtype) + return get_static_generator().zipf(a, size, dtype) def _random_state_fallback(obj: Any) -> Any: @@ -1726,7 +1713,7 @@ def _random_state_fallback(obj: Any) -> Any: # wrapped vanilla NumPy RandomState if isinstance(obj, RandomState): return obj._np_random_state - # eagerly convert any cuNumeric ndarrays to NumPy + # eagerly convert any cuPyNumeric ndarrays to NumPy if isinstance(obj, ndarray): return obj.__array__() return obj @@ -1746,5 +1733,5 @@ class RandomState: Random seed used to initialize the pseudo-random number generator. """ - def __init__(self, seed: Union[int, None] = None): + def __init__(self, seed: int | None = None): self._np_random_state = np.random.RandomState(seed or 0) diff --git a/cunumeric/runtime.py b/cupynumeric/runtime.py similarity index 60% rename from cunumeric/runtime.py rename to cupynumeric/runtime.py index a07c0847b..7af064d46 100644 --- a/cunumeric/runtime.py +++ b/cupynumeric/runtime.py @@ -1,4 +1,4 @@ -# Copyright 2021-2022 NVIDIA Corporation +# Copyright 2024 NVIDIA Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -14,122 +14,134 @@ # from __future__ import annotations -import struct +import math import warnings -from functools import reduce -from typing import TYPE_CHECKING, Any, Optional, Sequence, Union +from functools import lru_cache, reduce +from typing import TYPE_CHECKING, Any, Sequence, TypeGuard import legate.core.types as ty import numpy as np -from legate.core import LEGATE_MAX_DIM, ProcessorKind, Rect, get_legate_runtime -from legate.core.context import Context as LegateContext +from legate.core import LEGATE_MAX_DIM, Scalar, TaskTarget, get_legate_runtime from legate.settings import settings as legate_settings -from typing_extensions import TypeGuard +from ._utils.array import calculate_volume, is_supported_dtype, to_core_type +from ._utils.stack import find_last_user_stacklevel from .config import ( BitGeneratorOperation, - CuNumericOpCode, - CuNumericTunable, - cunumeric_context, - cunumeric_lib, + CuPyNumericOpCode, + TransferType, + cupynumeric_lib, ) -from .deferred import DeferredArray -from .eager import EagerArray -from .settings import settings -from .thunk import NumPyThunk -from .types import NdShape -from .utils import calculate_volume, find_last_user_stacklevel, to_core_dtype + +# We need to be careful about importing from other cupynumeric modules. The +# runtime is global and used in many places, but also depends on many of the +# other modules. Things like config and utils are OK, but imports for thunks, +# array types, etc. need to be deferred in order to avoid circular imports. + if TYPE_CHECKING: import numpy.typing as npt - from legate.core._legion.future import Future - from legate.core.operation import AutoTask, ManualTask - - from .array import ndarray + from legate.core import AutoTask, ManualTask + from ._array.array import ndarray + from ._thunk.deferred import DeferredArray + from ._thunk.eager import EagerArray + from ._thunk.thunk import NumPyThunk + from .types import NdShape DIMENSION = int +legate_runtime = get_legate_runtime() + + +def thunk_from_scalar( + bytes: bytes, shape: NdShape, dtype: np.dtype[Any] +) -> DeferredArray: + from ._thunk.deferred import DeferredArray + + store = legate_runtime.create_store_from_scalar( + Scalar(bytes, to_core_type(dtype)), + shape=shape, + ) + return DeferredArray(store) + + +@lru_cache +def cached_thunk_from_scalar( + bytes: bytes, shape: NdShape, dtype: np.dtype[Any] +) -> DeferredArray: + return thunk_from_scalar(bytes, shape, dtype) + class Runtime(object): - def __init__(self, legate_context: LegateContext) -> None: - self.legate_context = legate_context - self.legate_runtime = get_legate_runtime() + def __init__(self) -> None: + self.library = legate_runtime.find_library(cupynumeric_lib.name) self.current_random_epoch = 0 self.current_random_bitgenid = 0 self.current_random_bitgen_zombies: tuple[Any, ...] = () self.destroyed = False self.api_calls: list[tuple[str, str, bool]] = [] - self.max_eager_volume = int( - self.legate_context.get_tunable( - CuNumericTunable.MAX_EAGER_VOLUME, - ty.int32, - ) + max_eager_volume = ( + cupynumeric_lib.shared_object.cupynumeric_max_eager_volume() + ) + self.max_eager_volume = int(np.asarray(max_eager_volume)) + + assert cupynumeric_lib.shared_object is not None + self.cupynumeric_lib = cupynumeric_lib.shared_object + self.has_cusolvermp = ( + cupynumeric_lib.shared_object.cupynumeric_has_cusolvermp() ) - # Make sure that our CuNumericLib object knows about us so it can - # destroy us - cunumeric_lib.set_runtime(self) - assert cunumeric_lib.shared_object is not None - self.cunumeric_lib = cunumeric_lib.shared_object - self.has_curand = cunumeric_lib.shared_object.cunumeric_has_curand() + from .settings import settings settings.warn = settings.warn() or legate_settings.test() if self.num_gpus > 0 and settings.preload_cudalibs(): self._load_cudalibs() - # Maps dimensions to point types - self._cached_point_types: dict[DIMENSION, ty.Dtype] = dict() # Maps value types to struct types used in argmin/argmax - self._cached_argred_types: dict[ty.Dtype, ty.Dtype] = dict() + self._cached_argred_types: dict[ty.Type, ty.Type] = dict() @property def num_procs(self) -> int: - return len(self.legate_runtime.machine) + return len(legate_runtime.machine) @property def num_gpus(self) -> int: - return self.legate_runtime.machine.count(ProcessorKind.GPU) - - def get_point_type(self, dim: DIMENSION) -> ty.Dtype: - cached = self._cached_point_types.get(dim) - if cached is not None: - return cached - point_dtype = ty.array_type(ty.int64, dim) if dim > 1 else ty.int64 - self._cached_point_types[dim] = point_dtype - return point_dtype + return legate_runtime.machine.count(TaskTarget.GPU) def record_api_call( self, name: str, location: str, implemented: bool ) -> None: + from .settings import settings + assert settings.report_coverage() self.api_calls.append((name, location, implemented)) def _load_cudalibs(self) -> None: - task = self.legate_context.create_manual_task( - CuNumericOpCode.LOAD_CUDALIBS, - launch_domain=Rect(lo=(0,), hi=(self.num_gpus,)), + task = legate_runtime.create_manual_task( + self.library, + CuPyNumericOpCode.LOAD_CUDALIBS, + [self.num_gpus], ) task.execute() - self.legate_runtime.issue_execution_fence(block=True) + legate_runtime.issue_execution_fence(block=True) - def _unload_cudalibs(self) -> None: - task = self.legate_context.create_manual_task( - CuNumericOpCode.UNLOAD_CUDALIBS, - launch_domain=Rect(lo=(0,), hi=(self.num_gpus,)), - ) - task.execute() - - def get_argred_type(self, value_dtype: ty.Dtype) -> ty.Dtype: + def get_argred_type(self, value_dtype: ty.Type) -> ty.Type: cached = self._cached_argred_types.get(value_dtype) if cached is not None: return cached argred_dtype = ty.struct_type([ty.int64, value_dtype], True) self._cached_argred_types[value_dtype] = argred_dtype - self.cunumeric_lib.cunumeric_register_reduction_op( - argred_dtype.uid, value_dtype.code + ids = self.cupynumeric_lib.cupynumeric_register_reduction_ops( + value_dtype.code + ) + argred_dtype.record_reduction_op( + ty.ReductionOpKind.MAX, ids.argmax_redop_id + ) + argred_dtype.record_reduction_op( + ty.ReductionOpKind.MIN, ids.argmin_redop_id ) return argred_dtype @@ -138,13 +150,15 @@ def _report_coverage(self) -> None: implemented = sum(int(impl) for (_, _, impl) in self.api_calls) if total == 0: - print("cuNumeric API coverage: 0/0") + print("cuPyNumeric API coverage: 0/0") else: print( - f"cuNumeric API coverage: {implemented}/{total} " + f"cuPyNumeric API coverage: {implemented}/{total} " f"({implemented / total * 100}%)" ) + from .settings import settings + if (dump_csv := settings.report_dump_csv()) is not None: with open(dump_csv, "w") as f: print("function_name,location,implemented", file=f) @@ -152,46 +166,20 @@ def _report_coverage(self) -> None: print(f"{func_name},{loc},{impl}", file=f) def destroy(self) -> None: + from .settings import settings + assert not self.destroyed - if self.num_gpus > 0: - self._unload_cudalibs() if settings.report_coverage(): self._report_coverage() self.destroyed = True - def create_scalar( - self, - array: Union[memoryview, npt.NDArray[Any]], - shape: Optional[NdShape] = None, - ) -> Future: - data = array.tobytes() - buf = struct.pack(f"{len(data)}s", data) - return self.legate_runtime.create_future(buf, len(buf)) - - def create_wrapped_scalar( - self, - array: Union[memoryview, npt.NDArray[Any]], - dtype: np.dtype[Any], - shape: NdShape, - ) -> DeferredArray: - future = self.create_scalar(array, shape) - assert all(extent == 1 for extent in shape) - core_dtype = to_core_dtype(dtype) - store = self.legate_context.create_store( - core_dtype, - shape=shape, - storage=future, - optimize_scalar=True, - ) - return DeferredArray(self, store) - def bitgenerator_populate_task( self, - task: Union[AutoTask, ManualTask], + task: AutoTask | ManualTask, taskop: int, generatorID: int, generatorType: int = 0, - seed: Union[int, None] = 0, + seed: int | None = 0, flags: int = 0, ) -> None: task.add_scalar_arg(taskop, ty.int32) @@ -203,15 +191,16 @@ def bitgenerator_populate_task( def bitgenerator_create( self, generatorType: int, - seed: Union[int, None], + seed: int | None, flags: int, forceCreate: bool = False, ) -> int: self.current_random_bitgenid = self.current_random_bitgenid + 1 if forceCreate: - task = self.legate_context.create_manual_task( - CuNumericOpCode.BITGENERATOR, - launch_domain=Rect(lo=(0,), hi=(self.num_procs,)), + task = legate_runtime.create_manual_task( + self.library, + CuPyNumericOpCode.BITGENERATOR, + (self.num_procs,), ) self.bitgenerator_populate_task( task, @@ -226,7 +215,7 @@ def bitgenerator_create( ) self.current_random_bitgen_zombies = () task.execute() - self.legate_runtime.issue_execution_fence() + legate_runtime.issue_execution_fence() return self.current_random_bitgenid def bitgenerator_destroy( @@ -237,10 +226,11 @@ def bitgenerator_destroy( self.current_random_bitgen_zombies += (handle,) else: # with explicit destruction, do schedule a task - self.legate_runtime.issue_execution_fence() - task = self.legate_context.create_manual_task( - CuNumericOpCode.BITGENERATOR, - launch_domain=Rect(lo=(0,), hi=(self.num_procs,)), + legate_runtime.issue_execution_fence() + task = legate_runtime.create_manual_task( + self.library, + CuPyNumericOpCode.BITGENERATOR, + (self.num_procs,), ) self.bitgenerator_populate_task( task, BitGeneratorOperation.DESTROY, handle @@ -261,9 +251,9 @@ def get_next_random_epoch(self) -> int: def get_numpy_thunk( self, - obj: Union[ndarray, npt.NDArray[Any]], + obj: ndarray | npt.NDArray[Any], share: bool = False, - dtype: Optional[np.dtype[Any]] = None, + dtype: np.dtype[Any] | None = None, ) -> NumPyThunk: # Check to see if this object implements the Legate data interface if hasattr(obj, "__legate_data_interface__"): @@ -277,13 +267,14 @@ def get_numpy_thunk( raise ValueError("Legate data must be array-like") field = next(iter(data)) array = data[field] - stores = array.stores() - if len(stores) != 2: - raise ValueError("Legate data must be array-like") - if stores[0] is not None: - raise NotImplementedError("Need support for masked arrays") - store = stores[1] - return DeferredArray(self, store) + if array.nested or array.nullable: + raise NotImplementedError( + "Array must be non-nullable and not nested" + ) + + from ._thunk.deferred import DeferredArray + + return DeferredArray(array.data) # See if this is a normal numpy array # Make sure to convert numpy matrices to numpy arrays here # as the former doesn't behave quite like the latter @@ -292,21 +283,40 @@ def get_numpy_thunk( if share: obj = np.asarray(obj, dtype=dtype) else: + from ._array.array import ndarray + from ._module.array_joining import stack + + if ( + any( + ( + isinstance(obj, tuple), + isinstance(obj, list), + ) + ) + and len(obj) > 1 + and all( + (isinstance(o, ndarray) or isinstance(o, np.ndarray)) + for o in obj + ) + and math.prod(obj[0].shape) != 0 + ): + obj = stack(obj) # type: ignore + return obj._thunk obj = np.array(obj, dtype=dtype) elif dtype is not None and dtype != obj.dtype: obj = obj.astype(dtype) elif not share: obj = obj.copy() - return self.find_or_create_array_thunk(obj, share=share) - - def has_external_attachment(self, array: Any) -> bool: - assert array.base is None or not isinstance(array.base, np.ndarray) - return self.legate_runtime.has_attachment(array.data) + # We can't attach NumPy ndarrays in shared mode unless they are + # writeable + share = share and obj.flags["W"] + transfer = TransferType.SHARE if share else TransferType.MAKE_COPY + return self.find_or_create_array_thunk(obj, transfer) @staticmethod def compute_parent_child_mapping( array: npt.NDArray[Any], - ) -> Union[tuple[Union[slice, None], ...], None]: + ) -> tuple[slice | None, ...] | None: # We need an algorithm for figuring out how to compute the # slice object that was used to generate a child array from # a parent array so we can build the same mapping from a @@ -331,7 +341,7 @@ def compute_parent_child_mapping( offsets.append((ptr_diff % mod) // div) assert div == array.dtype.itemsize # Now build the view and dimmap for the parent to create the view - key: tuple[Union[slice, None], ...] = () + key: tuple[slice | None, ...] = () child_idx = 0 child_strides = tuple(array.strides) parent_strides = tuple(array.base.strides) @@ -375,102 +385,121 @@ def compute_parent_child_mapping( return key def find_or_create_array_thunk( - self, array: npt.NDArray[Any], share: bool = False, defer: bool = False + self, + array: npt.NDArray[Any], + transfer: TransferType, + read_only: bool = False, + defer: bool = False, ) -> NumPyThunk: + from ._thunk.deferred import DeferredArray + assert isinstance(array, np.ndarray) + if not is_supported_dtype(array.dtype): + raise TypeError( + f"cuPyNumeric does not support dtype={array.dtype}" + ) + # We have to be really careful here to handle the case of # aliased numpy arrays that are passed in from the application # In case of aliasing we need to make sure that they are # mapped to the same logical region. The way we handle this # is to always create the thunk for the root array and # then create sub-thunks that mirror the array views - if array.base is not None and isinstance(array.base, np.ndarray): + if ( + transfer == TransferType.SHARE + and array.base is not None + and isinstance(array.base, np.ndarray) + ): key = self.compute_parent_child_mapping(array) if key is None: # This base array wasn't made with a view - if not share: - return self.find_or_create_array_thunk( - array.copy(), - share=False, - defer=defer, - ) raise NotImplementedError( - "cuNumeric does not currently know " + "cuPyNumeric does not currently know " + "how to attach to array views that are not affine " + "transforms of their parent array." ) parent_thunk = self.find_or_create_array_thunk( array.base, - share=share, - defer=defer, + transfer, + read_only, + defer, ) - # Don't store this one in the ptr_to_thunk as we only want to - # store the root ones return parent_thunk.get_item(key) # Once it's a normal numpy array we can make it into one of our arrays # Check to see if it is a type that we support for doing deferred # execution and big enough to be worth off-loading onto Legion - dtype = to_core_dtype(array.dtype) - if ( - defer - or not self.is_eager_shape(array.shape) - or self.has_external_attachment(array) - ): - if array.size == 1 and not share: - # This is a single value array - # We didn't attach to this so we don't need to save it - return self.create_wrapped_scalar( - array.data, - array.dtype, - array.shape, - ) + if defer or not self.is_eager_shape(array.shape): + if array.size == 1 and transfer != TransferType.SHARE: + # This is a single value array that we're not attaching to. + # We cache these, but only if the user has promised not to + # write-through them. + # TODO(mpapadakis): Also mark the Store as read-only, whenever + # Legate supports that. + if read_only: + return cached_thunk_from_scalar( + array.tobytes(), array.shape, array.dtype + ) + else: + return thunk_from_scalar( + array.tobytes(), array.shape, array.dtype + ) - # This is not a scalar so make a field - store = self.legate_context.create_store( - dtype, - shape=array.shape, - optimize_scalar=False, - ) - store.attach_external_allocation( - array.data, - share, + # This is not a scalar so make a field. + # We won't try to cache these bigger arrays. + store = legate_runtime.create_store_from_buffer( + to_core_type(array.dtype), + array.shape, + array.copy() if transfer == TransferType.MAKE_COPY else array, + # This argument should really be called "donate" + read_only=(transfer != TransferType.SHARE), ) return DeferredArray( - self, store, - numpy_array=array if share else None, + numpy_array=( + array if transfer == TransferType.SHARE else None + ), ) - # Make this into an eager evaluated thunk - return EagerArray(self, array) + from ._thunk.eager import EagerArray + + # Make this into an eagerly evaluated thunk + return EagerArray( + array.copy() if transfer == TransferType.MAKE_COPY else array + ) def create_empty_thunk( self, shape: NdShape, - dtype: ty.Dtype, - inputs: Optional[Sequence[NumPyThunk]] = None, + dtype: ty.Type, + inputs: Sequence[NumPyThunk] | None = None, ) -> NumPyThunk: + from ._thunk.deferred import DeferredArray + if self.is_eager_shape(shape) and self.are_all_eager_inputs(inputs): return self.create_eager_thunk(shape, dtype.to_numpy_dtype()) - store = self.legate_context.create_store( + store = legate_runtime.create_store( dtype, shape=shape, optimize_scalar=True ) - return DeferredArray(self, store) + return DeferredArray(store) def create_eager_thunk( self, shape: NdShape, dtype: np.dtype[Any], ) -> NumPyThunk: - return EagerArray(self, np.empty(shape, dtype=dtype)) + from ._thunk.eager import EagerArray + + return EagerArray(np.empty(shape, dtype=dtype)) def create_unbound_thunk( - self, dtype: ty.Dtype, ndim: int = 1 + self, dtype: ty.Type, ndim: int = 1 ) -> DeferredArray: - store = self.legate_context.create_store(dtype, ndim=ndim) - return DeferredArray(self, store) + from ._thunk.deferred import DeferredArray + + store = legate_runtime.create_store(dtype, ndim=ndim) + return DeferredArray(store) def is_eager_shape(self, shape: NdShape) -> bool: volume = calculate_volume(shape) @@ -485,7 +514,9 @@ def is_eager_shape(self, shape: NdShape) -> bool: if len(shape) > LEGATE_MAX_DIM: return True - # CUNUMERIC_FORCE_THUNK == "eager" + from .settings import settings + + # CUPYNUMERIC_FORCE_THUNK == "eager" if settings.force_thunk() == "eager": return True @@ -500,7 +531,10 @@ def is_eager_shape(self, shape: NdShape) -> bool: return volume <= self.max_eager_volume @staticmethod - def are_all_eager_inputs(inputs: Optional[Sequence[NumPyThunk]]) -> bool: + def are_all_eager_inputs(inputs: Sequence[NumPyThunk] | None) -> bool: + from ._thunk.eager import EagerArray + from ._thunk.thunk import NumPyThunk + if inputs is None: return True for inp in inputs: @@ -511,35 +545,54 @@ def are_all_eager_inputs(inputs: Optional[Sequence[NumPyThunk]]) -> bool: @staticmethod def is_eager_array(array: NumPyThunk) -> TypeGuard[EagerArray]: + from ._thunk.eager import EagerArray + return isinstance(array, EagerArray) @staticmethod def is_deferred_array( - array: Optional[NumPyThunk], + array: NumPyThunk | None, ) -> TypeGuard[DeferredArray]: + from ._thunk.deferred import DeferredArray + return isinstance(array, DeferredArray) def to_eager_array(self, array: NumPyThunk) -> EagerArray: + from ._thunk.eager import EagerArray + if self.is_eager_array(array): return array elif self.is_deferred_array(array): - return EagerArray(self, array.__numpy_array__()) + return EagerArray(array.__numpy_array__()) else: raise RuntimeError("invalid array type") - def to_deferred_array(self, array: NumPyThunk) -> DeferredArray: + def to_deferred_array( + self, + array: NumPyThunk, + read_only: bool, + ) -> DeferredArray: if self.is_deferred_array(array): return array elif self.is_eager_array(array): - return array.to_deferred_array() + return array.to_deferred_array(read_only) else: raise RuntimeError("invalid array type") def warn(self, msg: str, category: type = UserWarning) -> None: + from .settings import settings + if not settings.warn(): return stacklevel = find_last_user_stacklevel() warnings.warn(msg, stacklevel=stacklevel, category=category) -runtime = Runtime(cunumeric_context) +runtime = Runtime() + + +def _shutdown_callback() -> None: + runtime.destroy() + + +legate_runtime.add_shutdown_callback(_shutdown_callback) diff --git a/cunumeric/settings.py b/cupynumeric/settings.py similarity index 75% rename from cunumeric/settings.py rename to cupynumeric/settings.py index 9faa52302..d73eee261 100644 --- a/cunumeric/settings.py +++ b/cupynumeric/settings.py @@ -1,4 +1,4 @@ -# Copyright 2023 NVIDIA Corporation +# Copyright 2024 NVIDIA Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -25,21 +25,21 @@ __all__ = ("settings",) -class CunumericRuntimeSettings(Settings): +class CupynumericRuntimeSettings(Settings): preload_cudalibs: PrioritizedSetting[bool] = PrioritizedSetting( "preload_cudalibs", - "CUNUMERIC_PRELOAD_CUDALIBS", + "CUPYNUMERIC_PRELOAD_CUDALIBS", default=False, convert=convert_bool, help=""" Preload and initialize handles of all CUDA libraries (cuBLAS, cuSOLVER, - etc.) used in cuNumeric. + etc.) used in cuPyNumeric. """, ) warn: PrioritizedSetting[bool] = PrioritizedSetting( "warn", - "CUNUMERIC_WARN", + "CUPYNUMERIC_WARN", default=False, convert=convert_bool, help=""" @@ -49,27 +49,27 @@ class CunumericRuntimeSettings(Settings): report_coverage: PrioritizedSetting[bool] = PrioritizedSetting( "report_coverage", - "CUNUMERIC_REPORT_COVERAGE", + "CUPYNUMERIC_REPORT_COVERAGE", default=False, convert=convert_bool, help=""" - Print an overall percentage of cunumeric coverage. + Print an overall percentage of cupynumeric coverage. """, ) report_dump_callstack: PrioritizedSetting[bool] = PrioritizedSetting( "report_dump_callstack", - "CUNUMERIC_REPORT_DUMP_CALLSTACK", + "CUPYNUMERIC_REPORT_DUMP_CALLSTACK", default=False, convert=convert_bool, help=""" - Print an overall percentage of cunumeric coverage with call stack info. + Print an overall percentage of cupynumeric coverage with a call stack. """, ) report_dump_csv: PrioritizedSetting[str | None] = PrioritizedSetting( "report_dump_csv", - "CUNUMERIC_REPORT_DUMP_CSV", + "CUPYNUMERIC_REPORT_DUMP_CSV", default=None, help=""" Save a coverage report to a specified CSV file. @@ -78,11 +78,11 @@ class CunumericRuntimeSettings(Settings): numpy_compat: PrioritizedSetting[bool] = PrioritizedSetting( "numpy_compat", - "CUNUMERIC_NUMPY_COMPATIBILITY", + "CUPYNUMERIC_NUMPY_COMPATIBILITY", default=False, convert=convert_bool, help=""" - cuNumeric will issue additional tasks to match numpy's results + cuPyNumeric will issue additional tasks to match numpy's results and behavior. This is currently used in the following APIs: nanmin, nanmax, nanargmin, nanargmax """, @@ -90,7 +90,7 @@ class CunumericRuntimeSettings(Settings): fast_math: EnvOnlySetting[int] = EnvOnlySetting( "fast_math", - "CUNUMERIC_FAST_MATH", + "CUPYNUMERIC_FAST_MATH", default=False, convert=convert_bool, help=""" @@ -105,7 +105,7 @@ class CunumericRuntimeSettings(Settings): min_gpu_chunk: EnvOnlySetting[int] = EnvOnlySetting( "min_gpu_chunk", - "CUNUMERIC_MIN_GPU_CHUNK", + "CUPYNUMERIC_MIN_GPU_CHUNK", default=65536, # 1 << 16 test_default=2, convert=convert_int, @@ -121,7 +121,7 @@ class CunumericRuntimeSettings(Settings): min_cpu_chunk: EnvOnlySetting[int] = EnvOnlySetting( "min_cpu_chunk", - "CUNUMERIC_MIN_CPU_CHUNK", + "CUPYNUMERIC_MIN_CPU_CHUNK", default=1024, # 1 << 10 test_default=2, convert=convert_int, @@ -137,7 +137,7 @@ class CunumericRuntimeSettings(Settings): min_omp_chunk: EnvOnlySetting[int] = EnvOnlySetting( "min_omp_chunk", - "CUNUMERIC_MIN_OMP_CHUNK", + "CUPYNUMERIC_MIN_OMP_CHUNK", default=8192, # 1 << 13 test_default=2, convert=convert_int, @@ -153,15 +153,15 @@ class CunumericRuntimeSettings(Settings): force_thunk: EnvOnlySetting[str | None] = EnvOnlySetting( "force_thunk", - "CUNUMERIC_FORCE_THUNK", + "CUPYNUMERIC_FORCE_THUNK", default=None, test_default="deferred", help=""" - Force cuNumeric to always use a specific strategy for backing + Force cuPyNumeric to always use a specific strategy for backing ndarrays: "deferred", i.e. managed by the Legate runtime, which enables distribution and accelerated operations, but has some up-front offloading overhead, or "eager", i.e. falling back to - using a vanilla NumPy array. By default cuNumeric will decide + using a vanilla NumPy array. By default cuPyNumeric will decide this on a per-array basis, based on the size of the array and the accelerator in use. @@ -169,5 +169,21 @@ class CunumericRuntimeSettings(Settings): """, ) + matmul_cache_size: EnvOnlySetting[int] = EnvOnlySetting( + "matmul_cache_size", + "CUPYNUMERIC_MATMUL_CACHE_SIZE", + default=134217728, # 128MB + test_default=4096, # 4KB + convert=convert_int, + help=""" + Force cuPyNumeric to keep temporary task slices during matmul + computations smaller than this threshold. Whenever the temporary + space needed during computation would exceed this value the task + will be batched over 'k' to fulfill the requirement. + + This is a read-only environment variable setting used by the runtime. + """, + ) + -settings = CunumericRuntimeSettings() +settings = CupynumericRuntimeSettings() diff --git a/cunumeric/types.py b/cupynumeric/types.py similarity index 83% rename from cunumeric/types.py rename to cupynumeric/types.py index 1e3d032b0..f2fbf8311 100644 --- a/cunumeric/types.py +++ b/cupynumeric/types.py @@ -1,4 +1,4 @@ -# Copyright 2021-2022 NVIDIA Corporation +# Copyright 2024 NVIDIA Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -14,17 +14,15 @@ # from __future__ import annotations -from typing import Literal, Tuple, Union - -from typing_extensions import TypeAlias +from typing import Literal, TypeAlias BoundsMode: TypeAlias = Literal["raise", "wrap", "clip"] CastingKind: TypeAlias = Literal["no", "equiv", "safe", "same_kind", "unsafe"] -NdShape: TypeAlias = Tuple[int, ...] +NdShape: TypeAlias = tuple[int, ...] -NdShapeLike: TypeAlias = Union[int, NdShape] +NdShapeLike: TypeAlias = int | NdShape SortSide: TypeAlias = Literal["left", "right"] @@ -36,4 +34,6 @@ ConvolveMode: TypeAlias = Literal["full", "valid", "same"] +ConvolveMethod: TypeAlias = Literal["auto", "direct", "fft"] + SelectKind: TypeAlias = Literal["introselect"] diff --git a/cupynumeric_cpp.cmake b/cupynumeric_cpp.cmake new file mode 100644 index 000000000..f9d7cbb01 --- /dev/null +++ b/cupynumeric_cpp.cmake @@ -0,0 +1,565 @@ +#============================================================================= +# Copyright 2024 NVIDIA Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +#============================================================================= + +############################################################################## +# - User Options ------------------------------------------------------------ + +option(BUILD_SHARED_LIBS "Build cuPyNumeric shared libraries" ON) +option(cupynumeric_EXCLUDE_TBLIS_FROM_ALL "Exclude tblis targets from cuPyNumeric's 'all' target" OFF) +option(cupynumeric_EXCLUDE_OPENBLAS_FROM_ALL "Exclude OpenBLAS targets from cuPyNumeric's 'all' target" OFF) +option(cupynumeric_EXCLUDE_LEGATE_FROM_ALL "Exclude legate targets from cuPyNumeric's 'all' target" OFF) + +############################################################################## +# - Project definition ------------------------------------------------------- + +# Write the version header +rapids_cmake_write_version_file(include/cupynumeric/version_config.hpp) + +# Needed to integrate with LLVM/clang tooling +set(CMAKE_EXPORT_COMPILE_COMMANDS ON) + +############################################################################## +# - Build Type --------------------------------------------------------------- + +# Set a default build type if none was specified +rapids_cmake_build_type(Release) + +############################################################################## +# - conda environment -------------------------------------------------------- + +rapids_cmake_support_conda_env(conda_env MODIFY_PREFIX_PATH) + +# We're building python extension libraries, which must always be installed +# under lib/, even if the system normally uses lib64/. Rapids-cmake currently +# doesn't realize this when we're going through scikit-build, see +# https://github.com/rapidsai/rapids-cmake/issues/426 +if(TARGET conda_env) + set(CMAKE_INSTALL_LIBDIR "lib") +endif() + +############################################################################## +# - Dependencies ------------------------------------------------------------- + +# add third party dependencies using CPM +rapids_cpm_init(OVERRIDE ${CMAKE_CURRENT_SOURCE_DIR}/cmake/versions.json) + +rapids_find_package(OpenMP GLOBAL_TARGETS OpenMP::OpenMP_CXX) + +option(Legion_USE_CUDA "Use CUDA" ON) +option(Legion_USE_OpenMP "Use OpenMP" ${OpenMP_FOUND}) +option(Legion_BOUNDS_CHECKS "Build cuPyNumeric with bounds checks (expensive)" OFF) + +# If legate has CUDA support, then including it in a project will automatically call +# enable_language(CUDA). However, this does not play nice with the rapids-cmake CUDA utils +# which support a wider range of values for CMAKE_CUDA_ARCHITECTURES than cmake does. You +# end up with the following error: +# +# CMAKE_CUDA_ARCHITECTURES: +# +# RAPIDS +# +# is not one of the following: +# +# * a semicolon-separated list of integers, each optionally +# followed by '-real' or '-virtual' +# * a special value: all, all-major, native +# +set(cmake_cuda_arch_backup "${CMAKE_CUDA_ARCHITECTURES}") +set(cmake_cuda_arch_cache_backup "$CACHE{CMAKE_CUDA_ARCHITECTURES}") +if(("${CMAKE_CUDA_ARCHITECTURES}" STREQUAL "RAPIDS") OR ("${CMAKE_CUDA_ARCHITECTURES}" STREQUAL "NATIVE")) + unset(CMAKE_CUDA_ARCHITECTURES) + unset(CMAKE_CUDA_ARCHITECTURES CACHE) +endif() + +### +# If we find legate already configured on the system, it will report +# whether it was compiled with bounds checking (Legion_BOUNDS_CHECKS), +# CUDA (Legion_USE_CUDA), and OpenMP (Legion_USE_OpenMP). +# +# We use the same variables as legate because we want to enable/disable +# each of these features based on how legate was configured (it doesn't +# make sense to build cuPyNumeric's CUDA bindings if legate wasn't built +# with CUDA support). +### +include(cmake/thirdparty/get_legate.cmake) + +set(CMAKE_CUDA_ARCHITECTURES "${cmake_cuda_arch_cache_backup}" CACHE STRING "" FORCE) +set(CMAKE_CUDA_ARCHITECTURES "${cmake_cuda_arch_backup}") +unset(cmake_cuda_arch_backup) +unset(cmake_cuda_arch_cache_backup) + +if(Legion_USE_CUDA) + include(${CMAKE_CURRENT_SOURCE_DIR}/cmake/Modules/cuda_arch_helpers.cmake) + # Needs to run before `rapids_cuda_init_architectures` + set_cuda_arch_from_names() + # Needs to run before `enable_language(CUDA)` + rapids_cuda_init_architectures(cupynumeric) + enable_language(CUDA) + # Since cupynumeric only enables CUDA optionally we need to manually include + # the file that rapids_cuda_init_architectures relies on `project` calling + if(CMAKE_PROJECT_cupynumeric_INCLUDE) + include("${CMAKE_PROJECT_cupynumeric_INCLUDE}") + endif() + + # Must come after enable_language(CUDA) + # Use `-isystem ` instead of `-isystem=` + # because the former works with clangd intellisense + set(CMAKE_INCLUDE_SYSTEM_FLAG_CUDA "-isystem ") + + rapids_find_package( + CUDAToolkit REQUIRED + BUILD_EXPORT_SET cupynumeric-exports + INSTALL_EXPORT_SET cupynumeric-exports + ) + + include(cmake/thirdparty/get_nccl.cmake) + include(cmake/thirdparty/get_cutensor.cmake) +endif() + +include(cmake/thirdparty/get_openblas.cmake) + +include(cmake/thirdparty/get_tblis.cmake) + +############################################################################## +# - cuPyNumeric ---------------------------------------------------------------- + +set(cupynumeric_SOURCES "") +set(cupynumeric_CXX_DEFS "") +set(cupynumeric_CUDA_DEFS "") +set(cupynumeric_CXX_OPTIONS "") +set(cupynumeric_CUDA_OPTIONS "") + +include(cmake/Modules/set_cpu_arch_flags.cmake) +set_cpu_arch_flags(cupynumeric_CXX_OPTIONS) + +# Add `src/cupynumeric.mk` sources +list(APPEND cupynumeric_SOURCES + src/cupynumeric/ternary/where.cc + src/cupynumeric/scan/scan_global.cc + src/cupynumeric/scan/scan_local.cc + src/cupynumeric/binary/binary_op.cc + src/cupynumeric/binary/binary_op_util.cc + src/cupynumeric/binary/binary_red.cc + src/cupynumeric/bits/packbits.cc + src/cupynumeric/bits/unpackbits.cc + src/cupynumeric/unary/scalar_unary_red.cc + src/cupynumeric/unary/unary_op.cc + src/cupynumeric/unary/unary_red.cc + src/cupynumeric/unary/convert.cc + src/cupynumeric/nullary/arange.cc + src/cupynumeric/nullary/eye.cc + src/cupynumeric/nullary/fill.cc + src/cupynumeric/nullary/window.cc + src/cupynumeric/index/advanced_indexing.cc + src/cupynumeric/index/choose.cc + src/cupynumeric/index/putmask.cc + src/cupynumeric/index/repeat.cc + src/cupynumeric/index/select.cc + src/cupynumeric/index/wrap.cc + src/cupynumeric/index/zip.cc + src/cupynumeric/item/read.cc + src/cupynumeric/item/write.cc + src/cupynumeric/matrix/batched_cholesky.cc + src/cupynumeric/matrix/contract.cc + src/cupynumeric/matrix/diag.cc + src/cupynumeric/matrix/gemm.cc + src/cupynumeric/matrix/matmul.cc + src/cupynumeric/matrix/matvecmul.cc + src/cupynumeric/matrix/dot.cc + src/cupynumeric/matrix/potrf.cc + src/cupynumeric/matrix/qr.cc + src/cupynumeric/matrix/solve.cc + src/cupynumeric/matrix/svd.cc + src/cupynumeric/matrix/syrk.cc + src/cupynumeric/matrix/tile.cc + src/cupynumeric/matrix/transpose.cc + src/cupynumeric/matrix/trilu.cc + src/cupynumeric/matrix/trsm.cc + src/cupynumeric/matrix/util.cc + src/cupynumeric/random/bitgenerator.cc + src/cupynumeric/random/randutil/generator_host.cc + src/cupynumeric/random/randutil/generator_host_straightforward.cc + src/cupynumeric/random/randutil/generator_host_advanced.cc + src/cupynumeric/random/rand.cc + src/cupynumeric/search/argwhere.cc + src/cupynumeric/search/nonzero.cc + src/cupynumeric/set/unique.cc + src/cupynumeric/set/unique_reduce.cc + src/cupynumeric/stat/bincount.cc + src/cupynumeric/convolution/convolve.cc + src/cupynumeric/transform/flip.cc + src/cupynumeric/utilities/repartition.cc + src/cupynumeric/arg_redop_register.cc + src/cupynumeric/mapper.cc + src/cupynumeric/ndarray.cc + src/cupynumeric/operators.cc + src/cupynumeric/runtime.cc + src/cupynumeric/cephes/chbevl.cc + src/cupynumeric/cephes/i0.cc + src/cupynumeric/stat/histogram.cc +) + +if(Legion_USE_OpenMP) + list(APPEND cupynumeric_SOURCES + src/cupynumeric/ternary/where_omp.cc + src/cupynumeric/scan/scan_global_omp.cc + src/cupynumeric/scan/scan_local_omp.cc + src/cupynumeric/binary/binary_op_omp.cc + src/cupynumeric/binary/binary_red_omp.cc + src/cupynumeric/bits/packbits_omp.cc + src/cupynumeric/bits/unpackbits_omp.cc + src/cupynumeric/unary/unary_op_omp.cc + src/cupynumeric/unary/scalar_unary_red_omp.cc + src/cupynumeric/unary/unary_red_omp.cc + src/cupynumeric/unary/convert_omp.cc + src/cupynumeric/nullary/arange_omp.cc + src/cupynumeric/nullary/eye_omp.cc + src/cupynumeric/nullary/fill_omp.cc + src/cupynumeric/nullary/window_omp.cc + src/cupynumeric/index/advanced_indexing_omp.cc + src/cupynumeric/index/choose_omp.cc + src/cupynumeric/index/putmask_omp.cc + src/cupynumeric/index/repeat_omp.cc + src/cupynumeric/index/select_omp.cc + src/cupynumeric/index/wrap_omp.cc + src/cupynumeric/index/zip_omp.cc + src/cupynumeric/matrix/batched_cholesky_omp.cc + src/cupynumeric/matrix/contract_omp.cc + src/cupynumeric/matrix/diag_omp.cc + src/cupynumeric/matrix/gemm_omp.cc + src/cupynumeric/matrix/matmul_omp.cc + src/cupynumeric/matrix/matvecmul_omp.cc + src/cupynumeric/matrix/dot_omp.cc + src/cupynumeric/matrix/potrf_omp.cc + src/cupynumeric/matrix/qr_omp.cc + src/cupynumeric/matrix/solve_omp.cc + src/cupynumeric/matrix/svd_omp.cc + src/cupynumeric/matrix/syrk_omp.cc + src/cupynumeric/matrix/tile_omp.cc + src/cupynumeric/matrix/transpose_omp.cc + src/cupynumeric/matrix/trilu_omp.cc + src/cupynumeric/matrix/trsm_omp.cc + src/cupynumeric/random/rand_omp.cc + src/cupynumeric/search/argwhere_omp.cc + src/cupynumeric/search/nonzero_omp.cc + src/cupynumeric/set/unique_omp.cc + src/cupynumeric/set/unique_reduce_omp.cc + src/cupynumeric/stat/bincount_omp.cc + src/cupynumeric/convolution/convolve_omp.cc + src/cupynumeric/transform/flip_omp.cc + src/cupynumeric/stat/histogram_omp.cc + ) +endif() + +if(Legion_USE_CUDA) + list(APPEND cupynumeric_SOURCES + src/cupynumeric/ternary/where.cu + src/cupynumeric/scan/scan_global.cu + src/cupynumeric/scan/scan_local.cu + src/cupynumeric/binary/binary_op.cu + src/cupynumeric/binary/binary_red.cu + src/cupynumeric/bits/packbits.cu + src/cupynumeric/bits/unpackbits.cu + src/cupynumeric/unary/scalar_unary_red.cu + src/cupynumeric/unary/unary_red.cu + src/cupynumeric/unary/unary_op.cu + src/cupynumeric/unary/convert.cu + src/cupynumeric/nullary/arange.cu + src/cupynumeric/nullary/eye.cu + src/cupynumeric/nullary/fill.cu + src/cupynumeric/nullary/window.cu + src/cupynumeric/index/advanced_indexing.cu + src/cupynumeric/index/choose.cu + src/cupynumeric/index/putmask.cu + src/cupynumeric/index/repeat.cu + src/cupynumeric/index/select.cu + src/cupynumeric/index/wrap.cu + src/cupynumeric/index/zip.cu + src/cupynumeric/item/read.cu + src/cupynumeric/item/write.cu + src/cupynumeric/matrix/batched_cholesky.cu + src/cupynumeric/matrix/contract.cu + src/cupynumeric/matrix/diag.cu + src/cupynumeric/matrix/gemm.cu + src/cupynumeric/matrix/matmul.cu + src/cupynumeric/matrix/matvecmul.cu + src/cupynumeric/matrix/dot.cu + src/cupynumeric/matrix/potrf.cu + src/cupynumeric/matrix/qr.cu + src/cupynumeric/matrix/solve.cu + src/cupynumeric/matrix/svd.cu + src/cupynumeric/matrix/syrk.cu + src/cupynumeric/matrix/tile.cu + src/cupynumeric/matrix/transpose.cu + src/cupynumeric/matrix/trilu.cu + src/cupynumeric/matrix/trsm.cu + src/cupynumeric/random/rand.cu + src/cupynumeric/search/argwhere.cu + src/cupynumeric/search/nonzero.cu + src/cupynumeric/set/unique.cu + src/cupynumeric/stat/bincount.cu + src/cupynumeric/convolution/convolve.cu + src/cupynumeric/fft/fft.cu + src/cupynumeric/transform/flip.cu + src/cupynumeric/utilities/repartition.cu + src/cupynumeric/arg_redop_register.cu + src/cupynumeric/cudalibs.cu + src/cupynumeric/stat/histogram.cu + ) +endif() + +# Add `src/cupynumeric/sort/sort.mk` sources +list(APPEND cupynumeric_SOURCES + src/cupynumeric/sort/sort.cc + src/cupynumeric/sort/searchsorted.cc +) + +if(Legion_USE_OpenMP) + list(APPEND cupynumeric_SOURCES + src/cupynumeric/sort/sort_omp.cc + src/cupynumeric/sort/searchsorted_omp.cc + ) +endif() + +if(Legion_USE_CUDA) + list(APPEND cupynumeric_SOURCES + src/cupynumeric/sort/sort.cu + src/cupynumeric/sort/searchsorted.cu + src/cupynumeric/sort/cub_sort_bool.cu + src/cupynumeric/sort/cub_sort_int8.cu + src/cupynumeric/sort/cub_sort_int16.cu + src/cupynumeric/sort/cub_sort_int32.cu + src/cupynumeric/sort/cub_sort_int64.cu + src/cupynumeric/sort/cub_sort_uint8.cu + src/cupynumeric/sort/cub_sort_uint16.cu + src/cupynumeric/sort/cub_sort_uint32.cu + src/cupynumeric/sort/cub_sort_uint64.cu + src/cupynumeric/sort/cub_sort_half.cu + src/cupynumeric/sort/cub_sort_float.cu + src/cupynumeric/sort/cub_sort_double.cu + src/cupynumeric/sort/thrust_sort_bool.cu + src/cupynumeric/sort/thrust_sort_int8.cu + src/cupynumeric/sort/thrust_sort_int16.cu + src/cupynumeric/sort/thrust_sort_int32.cu + src/cupynumeric/sort/thrust_sort_int64.cu + src/cupynumeric/sort/thrust_sort_uint8.cu + src/cupynumeric/sort/thrust_sort_uint16.cu + src/cupynumeric/sort/thrust_sort_uint32.cu + src/cupynumeric/sort/thrust_sort_uint64.cu + src/cupynumeric/sort/thrust_sort_half.cu + src/cupynumeric/sort/thrust_sort_float.cu + src/cupynumeric/sort/thrust_sort_double.cu + src/cupynumeric/sort/thrust_sort_complex64.cu + src/cupynumeric/sort/thrust_sort_complex128.cu + ) +endif() + +# Add `src/cupynumeric/random/random.mk` sources +if(Legion_USE_CUDA) + list(APPEND cupynumeric_SOURCES + src/cupynumeric/random/bitgenerator.cu + src/cupynumeric/random/randutil/generator_device.cu + src/cupynumeric/random/randutil/generator_device_straightforward.cu + src/cupynumeric/random/randutil/generator_device_advanced.cu +) +endif() + +# add sources for cusolverMp +if(Legion_USE_CUDA AND CUSOLVERMP_DIR) + list(APPEND cupynumeric_SOURCES + src/cupynumeric/matrix/mp_potrf.cu + src/cupynumeric/matrix/mp_solve.cu + ) +endif() + +list(APPEND cupynumeric_SOURCES + # This must always be the last file! + # It guarantees we do our registration callback + # only after all task variants are recorded + src/cupynumeric/cupynumeric.cc +) + +if(CMAKE_BUILD_TYPE STREQUAL "Debug") + list(APPEND cupynumeric_CXX_DEFS DEBUG_CUPYNUMERIC) + list(APPEND cupynumeric_CUDA_DEFS DEBUG_CUPYNUMERIC) +endif() + +if(Legion_BOUNDS_CHECKS) + list(APPEND cupynumeric_CXX_DEFS BOUNDS_CHECKS) + list(APPEND cupynumeric_CUDA_DEFS BOUNDS_CHECKS) +endif() + +list(APPEND cupynumeric_CUDA_OPTIONS -Xfatbin=-compress-all) +list(APPEND cupynumeric_CUDA_OPTIONS --expt-extended-lambda) +list(APPEND cupynumeric_CUDA_OPTIONS --expt-relaxed-constexpr) +list(APPEND cupynumeric_CXX_OPTIONS -Wno-deprecated-declarations) +list(APPEND cupynumeric_CUDA_OPTIONS -Wno-deprecated-declarations) + +add_library(cupynumeric ${cupynumeric_SOURCES}) +add_library(cupynumeric::cupynumeric ALIAS cupynumeric) + +if (CMAKE_SYSTEM_NAME STREQUAL "Linux") + set(platform_rpath_origin "\$ORIGIN") +elseif (CMAKE_SYSTEM_NAME STREQUAL "Darwin") + set(platform_rpath_origin "@loader_path") +endif () + +set_target_properties(cupynumeric + PROPERTIES BUILD_RPATH "${platform_rpath_origin}" + INSTALL_RPATH "${platform_rpath_origin}" + CXX_STANDARD 17 + CXX_STANDARD_REQUIRED ON + POSITION_INDEPENDENT_CODE ON + INTERFACE_POSITION_INDEPENDENT_CODE ON + CUDA_STANDARD 17 + CUDA_STANDARD_REQUIRED ON + LIBRARY_OUTPUT_DIRECTORY lib) + +target_link_libraries(cupynumeric + PUBLIC legate::legate + $ + PRIVATE BLAS::BLAS + tblis::tblis + # Add Conda library and include paths + $ + $ + $ + $ + $ + $) + +if(NOT Legion_USE_CUDA AND cupynumeric_cuRAND_INCLUDE_DIR) + list(APPEND cupynumeric_CXX_DEFS CUPYNUMERIC_CURAND_FOR_CPU_BUILD) + target_include_directories(cupynumeric PRIVATE ${cupynumeric_cuRAND_INCLUDE_DIR}) +endif() + +if(Legion_USE_CUDA AND CUSOLVERMP_DIR) + message(VERBOSE "cupynumeric: CUSOLVERMP_DIR ${CUSOLVERMP_DIR}") + list(APPEND cupynumeric_CXX_DEFS CUPYNUMERIC_USE_CUSOLVERMP) + list(APPEND cupynumeric_CUDA_DEFS CUPYNUMERIC_USE_CUSOLVERMP) + target_include_directories(cupynumeric PRIVATE ${CUSOLVERMP_DIR}/include) + target_link_libraries(cupynumeric PRIVATE ${CUSOLVERMP_DIR}/lib/libcusolverMp.so) +endif() + +target_compile_options(cupynumeric + PRIVATE "$<$:${cupynumeric_CXX_OPTIONS}>" + "$<$:${cupynumeric_CUDA_OPTIONS}>") + +target_compile_definitions(cupynumeric + PUBLIC "$<$:${cupynumeric_CXX_DEFS}>" + "$<$:${cupynumeric_CUDA_DEFS}>") + +target_include_directories(cupynumeric + PUBLIC + $ + INTERFACE + $ +) + +if(Legion_USE_CUDA) + file(WRITE "${CMAKE_CURRENT_BINARY_DIR}/fatbin.ld" +[=[ +SECTIONS +{ +.nvFatBinSegment : { *(.nvFatBinSegment) } +.nv_fatbin : { *(.nv_fatbin) } +} +]=]) + + # ensure CUDA symbols aren't relocated to the middle of the debug build binaries + target_link_options(cupynumeric PRIVATE "${CMAKE_CURRENT_BINARY_DIR}/fatbin.ld") +endif() + +############################################################################## +# - install targets----------------------------------------------------------- + +include(CPack) +include(GNUInstallDirs) +rapids_cmake_install_lib_dir(lib_dir) + +install(TARGETS cupynumeric + DESTINATION ${lib_dir} + EXPORT cupynumeric-exports) + +install( + FILES src/cupynumeric.h + ${CMAKE_CURRENT_BINARY_DIR}/include/cupynumeric/version_config.hpp + DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/cupynumeric) + +install( + FILES src/cupynumeric/cupynumeric_c.h + src/cupynumeric/ndarray.h + src/cupynumeric/ndarray.inl + src/cupynumeric/operators.h + src/cupynumeric/operators.inl + src/cupynumeric/runtime.h + src/cupynumeric/slice.h + src/cupynumeric/typedefs.h + DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/cupynumeric/cupynumeric) + +if(cupynumeric_INSTALL_TBLIS) + install(DIRECTORY ${tblis_BINARY_DIR}/lib/ DESTINATION ${lib_dir}) + install(DIRECTORY ${tblis_BINARY_DIR}/include/ DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}) +endif() + +############################################################################## +# - install export ----------------------------------------------------------- + +set(doc_string + [=[ +Provide targets for cuPyNumeric, an aspiring drop-in replacement for NumPy at scale. + +Imported Targets: + - cupynumeric::cupynumeric + +]=]) + +string(JOIN "\n" code_string + "set(Legion_USE_CUDA ${Legion_USE_CUDA})" + "set(Legion_USE_OpenMP ${Legion_USE_OpenMP})" + "set(Legion_BOUNDS_CHECKS ${Legion_BOUNDS_CHECKS})" +) + +if(DEFINED Legion_USE_Python) + string(APPEND code_string "\nset(Legion_USE_Python ${Legion_USE_Python})") +endif() + +if(DEFINED Legion_NETWORKS) + string(APPEND code_string "\nset(Legion_NETWORKS ${Legion_NETWORKS})") +endif() + +rapids_export( + INSTALL cupynumeric + EXPORT_SET cupynumeric-exports + GLOBAL_TARGETS cupynumeric + NAMESPACE cupynumeric:: + DOCUMENTATION doc_string + FINAL_CODE_BLOCK code_string) + +# build export targets +rapids_export( + BUILD cupynumeric + EXPORT_SET cupynumeric-exports + GLOBAL_TARGETS cupynumeric + NAMESPACE cupynumeric:: + DOCUMENTATION doc_string + FINAL_CODE_BLOCK code_string) + +if(cupynumeric_BUILD_TESTS) + include(CTest) + + add_subdirectory(tests/cpp) +endif() diff --git a/cunumeric_python.cmake b/cupynumeric_python.cmake similarity index 68% rename from cunumeric_python.cmake rename to cupynumeric_python.cmake index 8139d6d3a..1be5b35c6 100644 --- a/cunumeric_python.cmake +++ b/cupynumeric_python.cmake @@ -1,5 +1,5 @@ #============================================================================= -# Copyright 2022 NVIDIA Corporation +# Copyright 2024 NVIDIA Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -17,25 +17,25 @@ ############################################################################## # - User Options ------------------------------------------------------------ -option(FIND_CUNUMERIC_CPP "Search for existing cuNumeric C++ installations before defaulting to local files" +option(FIND_CUPYNUMERIC_CPP "Search for existing cuPyNumeric C++ installations before defaulting to local files" OFF) ############################################################################## # - Dependencies ------------------------------------------------------------- -# If the user requested it we attempt to find cunumeric. -if(FIND_CUNUMERIC_CPP) +# If the user requested it we attempt to find cupynumeric. +if(FIND_CUPYNUMERIC_CPP) include("${rapids-cmake-dir}/export/detail/parse_version.cmake") - rapids_export_parse_version(${cunumeric_version} cunumeric parsed_ver) - rapids_find_package(cunumeric ${parsed_ver} EXACT CONFIG - GLOBAL_TARGETS cunumeric::cunumeric - BUILD_EXPORT_SET cunumeric-python-exports - INSTALL_EXPORT_SET cunumeric-python-exports) + rapids_export_parse_version(${cupynumeric_version} cupynumeric parsed_ver) + rapids_find_package(cupynumeric ${parsed_ver} EXACT CONFIG + GLOBAL_TARGETS cupynumeric::cupynumeric + BUILD_EXPORT_SET cupynumeric-python-exports + INSTALL_EXPORT_SET cupynumeric-python-exports) else() - set(cunumeric_FOUND OFF) + set(cupynumeric_FOUND OFF) endif() -if(NOT cunumeric_FOUND) +if(NOT cupynumeric_FOUND) set(SKBUILD OFF) set(Legion_USE_Python ON) set(Legion_BUILD_BINDINGS ON) @@ -51,9 +51,9 @@ add_custom_target("generate_install_info_py" ALL VERBATIM ) -add_library(cunumeric_python INTERFACE) -add_library(cunumeric::cunumeric_python ALIAS cunumeric_python) -target_link_libraries(cunumeric_python INTERFACE legate::core) +add_library(cupynumeric_python INTERFACE) +add_library(cupynumeric::cupynumeric_python ALIAS cupynumeric_python) +target_link_libraries(cupynumeric_python INTERFACE legate::legate) # ############################################################################ # - conda environment -------------------------------------------------------- @@ -75,37 +75,37 @@ include(CPack) include(GNUInstallDirs) rapids_cmake_install_lib_dir(lib_dir) -install(TARGETS cunumeric_python +install(TARGETS cupynumeric_python DESTINATION ${lib_dir} - EXPORT cunumeric-python-exports) + EXPORT cupynumeric-python-exports) ############################################################################## # - install export ----------------------------------------------------------- set(doc_string [=[ -Provide Python targets for cuNumeric, an aspiring drop-in replacement for NumPy at scale. +Provide Python targets for cuPyNumeric, an aspiring drop-in replacement for NumPy at scale. Imported Targets: - - cunumeric::cunumeric_python + - cupynumeric::cupynumeric_python ]=]) set(code_string "") rapids_export( - INSTALL cunumeric_python - EXPORT_SET cunumeric-python-exports - GLOBAL_TARGETS cunumeric_python - NAMESPACE cunumeric:: + INSTALL cupynumeric_python + EXPORT_SET cupynumeric-python-exports + GLOBAL_TARGETS cupynumeric_python + NAMESPACE cupynumeric:: DOCUMENTATION doc_string FINAL_CODE_BLOCK code_string) # build export targets rapids_export( - BUILD cunumeric_python - EXPORT_SET cunumeric-python-exports - GLOBAL_TARGETS cunumeric_python - NAMESPACE cunumeric:: + BUILD cupynumeric_python + EXPORT_SET cupynumeric-python-exports + GLOBAL_TARGETS cupynumeric_python + NAMESPACE cupynumeric:: DOCUMENTATION doc_string FINAL_CODE_BLOCK code_string) diff --git a/docs/cunumeric/source/api/settings.rst b/docs/cunumeric/source/api/settings.rst deleted file mode 100644 index abc807f0b..000000000 --- a/docs/cunumeric/source/api/settings.rst +++ /dev/null @@ -1,8 +0,0 @@ -Settings -======== - -cuNumeric has a number of runtime settings that can be configured through -environment variables. - -.. settings:: settings - :module: cunumeric.settings \ No newline at end of file diff --git a/docs/cunumeric/source/developer/CONTRIBUTING.md b/docs/cunumeric/source/developer/CONTRIBUTING.md deleted file mode 120000 index 069558fad..000000000 --- a/docs/cunumeric/source/developer/CONTRIBUTING.md +++ /dev/null @@ -1 +0,0 @@ -../../../../CONTRIBUTING.md \ No newline at end of file diff --git a/docs/cunumeric/source/developer/building.rst b/docs/cunumeric/source/developer/building.rst deleted file mode 100644 index 674c9f34f..000000000 --- a/docs/cunumeric/source/developer/building.rst +++ /dev/null @@ -1,71 +0,0 @@ -.. _building cunumeric from source: - -Building from source -==================== - -Basic build ------------ - -Users must have a working installation of the `Legate Core`_ library prior to -installing cuNumeric. -**Installing cuNumeric by itself will not automatically install Legate Core.** - -As for other dependencies, the Dependencies section on the -`Legate Core build instructions`_ also covers cuNumeric, so no additional -packages are required. - -Once Legate Core is installed, you can simply invoke ``./install.py`` from the -cuNumeric top-level directory. The build will automatically pick up the -configuration used when building Legate Core (e.g. the CUDA Toolkit directory). - -Advanced topics ---------------- - -Building through pip & cmake -~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -cuNumeric uses the same cmake/scikit-build-based build workflow as Legate Core. -See the `Legate Core build instructions`_ for an overview. - -There are several examples in the ``scripts`` folder. We walk through the steps in -``build-with-legate-separately-no-install.sh`` here. - -We assume a pre-existing Legate Core build. For details on building Legate Core, -consult the `Legate Core repository`_. - -First, the CMake build needs to be configured: - -.. code:: sh - - $ cmake -S . -B build -GNinja -D legate_core_ROOT:STRING=path/to/legate/build - -We point cuNumeric to the Legate *build* tree, not an installation. -This generates all build-dependent headers and Python files. - -Once configured, we can build the C++ libraries: - -.. code:: sh - - $ cmake --build build - -This will invoke Ninja (or make) to execute the build. -Once the C++ libraries are available, we can do an editable (development) pip installation. - -.. code:: sh - - $ SKBUILD_BUILD_OPTIONS="-D FIND_CUNUMERIC_CPP=ON -D cunumeric_ROOT=$(pwd)/build" \ - python3 -m pip install \ - --root / --no-deps --no-build-isolation - --editable . - -The Python source tree and CMake build tree are now available with the environment Python -for running cuNumeric programs. The diagram below illustrates the -complete workflow for building both Legate core and cuNumeric. - -.. image:: /_images/developer-build.png - :width: 600 - :alt: "notional diagram of cunumeric build process" - -.. _Legate Core: https://github.com/nv-legate/legate.core -.. _Legate Core build instructions: https://github.com/nv-legate/legate.core/blob/HEAD/BUILD.md -.. _Legate Core repository: https://github.com/nv-legate/legate.core \ No newline at end of file diff --git a/docs/cunumeric/source/index.rst b/docs/cunumeric/source/index.rst deleted file mode 100644 index ace34f0e9..000000000 --- a/docs/cunumeric/source/index.rst +++ /dev/null @@ -1,39 +0,0 @@ -:html_theme.sidebar_secondary.remove: - -Welcome to cuNumeric's documentation! -===================================== - -cuNumeric is a `Legate`_ library that aims to provide a distributed and -accelerated drop-in replacement for the `NumPy API`_ on top of the `Legion`_ -runtime. - -Using cuNumeric you do things like run the final example of the -`Python CFD course`_ completely unmodified on 2048 A100 GPUs in a -`DGX SuperPOD`_ and achieve good weak scaling. - -.. toctree:: - :maxdepth: 2 - :caption: Contents: - - user/index - comparison/index - api/index - developer/index - -.. toctree:: - :maxdepth: 1 - - versions - - -Indices and tables ------------------- - -* :ref:`genindex` -* :ref:`search` - -.. _DGX SuperPOD: https://www.nvidia.com/en-us/data-center/dgx-superpod/ -.. _Legate: https://github.com/nv-legate/legate.core -.. _Legion: https://legion.stanford.edu/ -.. _Numpy API: https://numpy.org/doc/stable/reference/ -.. _Python CFD course: https://github.com/barbagroup/CFDPython/blob/master/lessons/15_Step_12.ipynb \ No newline at end of file diff --git a/docs/cunumeric/source/user/configuration.rst b/docs/cunumeric/source/user/configuration.rst deleted file mode 100644 index a63a6ae89..000000000 --- a/docs/cunumeric/source/user/configuration.rst +++ /dev/null @@ -1,108 +0,0 @@ -.. _config: - -Configuration -============= - -The underlying Legate runtime has many options for configuring the details of -execution. - -How to specify configuration ----------------------------- - -.. _config_legate: - -Legate driver -~~~~~~~~~~~~~ - -When using the ``legate`` driver, it is possible to pass most configuration -options via the command line, as long as they appear *before* the script to -run: - -.. code-block:: sh - - legate script.py + +{% endblock %} \ No newline at end of file diff --git a/docs/cunumeric/source/api/_bitgenerator.rst b/docs/cupynumeric/source/api/_bitgenerator.rst similarity index 65% rename from docs/cunumeric/source/api/_bitgenerator.rst rename to docs/cupynumeric/source/api/_bitgenerator.rst index 32854eff9..e269e9872 100644 --- a/docs/cunumeric/source/api/_bitgenerator.rst +++ b/docs/cupynumeric/source/api/_bitgenerator.rst @@ -1,7 +1,7 @@ -cunumeric.random.BitGenerator +cupynumeric.random.BitGenerator ============================= -.. currentmodule:: cunumeric.random +.. currentmodule:: cupynumeric.random .. autoclass:: BitGenerator diff --git a/docs/cunumeric/source/api/_generator.rst b/docs/cupynumeric/source/api/_generator.rst similarity index 65% rename from docs/cunumeric/source/api/_generator.rst rename to docs/cupynumeric/source/api/_generator.rst index 539a3c001..c73481232 100644 --- a/docs/cunumeric/source/api/_generator.rst +++ b/docs/cupynumeric/source/api/_generator.rst @@ -1,7 +1,7 @@ -cunumeric.random.Generator +cupynumeric.random.Generator ========================== -.. currentmodule:: cunumeric.random +.. currentmodule:: cupynumeric.random .. autoclass:: Generator diff --git a/docs/cunumeric/source/comparison/_grouped.rst b/docs/cupynumeric/source/api/_grouped.rst similarity index 100% rename from docs/cunumeric/source/comparison/_grouped.rst rename to docs/cupynumeric/source/api/_grouped.rst diff --git a/docs/cunumeric/source/api/_ndarray.rst b/docs/cupynumeric/source/api/_ndarray.rst similarity index 95% rename from docs/cunumeric/source/api/_ndarray.rst rename to docs/cupynumeric/source/api/_ndarray.rst index 3320f0857..ea6b57a32 100644 --- a/docs/cunumeric/source/api/_ndarray.rst +++ b/docs/cupynumeric/source/api/_ndarray.rst @@ -1,7 +1,7 @@ -cunumeric.ndarray +cupynumeric.ndarray ================= -.. currentmodule:: cunumeric +.. currentmodule:: cupynumeric .. autoclass:: ndarray @@ -31,7 +31,6 @@ cunumeric.ndarray ~ndarray.dump ~ndarray.dumps ~ndarray.fill - ~ndarray.find_common_type ~ndarray.flatten ~ndarray.flip ~ndarray.getfield diff --git a/docs/cunumeric/source/api/binary.rst b/docs/cupynumeric/source/api/binary.rst similarity index 90% rename from docs/cunumeric/source/api/binary.rst rename to docs/cupynumeric/source/api/binary.rst index 237fdc071..38b0260ab 100644 --- a/docs/cunumeric/source/api/binary.rst +++ b/docs/cupynumeric/source/api/binary.rst @@ -1,7 +1,7 @@ Binary operations ================= -.. currentmodule:: cunumeric +.. currentmodule:: cupynumeric Elementwise bit operations -------------------------- diff --git a/docs/cunumeric/source/api/broadcast.rst b/docs/cupynumeric/source/api/broadcast.rst similarity index 52% rename from docs/cunumeric/source/api/broadcast.rst rename to docs/cupynumeric/source/api/broadcast.rst index 50d329a2e..9e093e79e 100644 --- a/docs/cunumeric/source/api/broadcast.rst +++ b/docs/cupynumeric/source/api/broadcast.rst @@ -1,6 +1,6 @@ -.. currentmodule:: cunumeric +.. currentmodule:: cupynumeric -cunumeric.broadcast +cupynumeric.broadcast =================== .. autoclass:: broadcast diff --git a/docs/cunumeric/source/api/classes.rst b/docs/cupynumeric/source/api/classes.rst similarity index 100% rename from docs/cunumeric/source/api/classes.rst rename to docs/cupynumeric/source/api/classes.rst diff --git a/docs/cupynumeric/source/api/comparison.rst b/docs/cupynumeric/source/api/comparison.rst new file mode 100644 index 000000000..eda6dddec --- /dev/null +++ b/docs/cupynumeric/source/api/comparison.rst @@ -0,0 +1,12 @@ +Project comparisons +=================== + +Here is a list of NumPy APIs and corresponding cuPyNumeric implementations. + +A dot in the cupynumeric column denotes that cuPyNumeric implementation +is not provided yet. We welcome contributions for these functions. + +NumPy vs cuPyNumeric APIs +------------------------- + +.. comparison-table:: diff --git a/docs/cunumeric/source/api/creation.rst b/docs/cupynumeric/source/api/creation.rst similarity index 93% rename from docs/cunumeric/source/api/creation.rst rename to docs/cupynumeric/source/api/creation.rst index ba43a2138..e35f6ab4c 100644 --- a/docs/cunumeric/source/api/creation.rst +++ b/docs/cupynumeric/source/api/creation.rst @@ -1,7 +1,7 @@ Array creation routines ======================= -.. currentmodule:: cunumeric +.. currentmodule:: cupynumeric From shape or value ------------------- @@ -41,6 +41,7 @@ Numerical ranges arange linspace + meshgrid Building matrices diff --git a/docs/cupynumeric/source/api/datatype.rst b/docs/cupynumeric/source/api/datatype.rst new file mode 100644 index 000000000..bb5667fd0 --- /dev/null +++ b/docs/cupynumeric/source/api/datatype.rst @@ -0,0 +1,12 @@ +Data type routines +================== + +.. currentmodule:: cupynumeric + +Data type testing +----------------- + +.. autosummary:: + :toctree: generated/ + + find_common_type \ No newline at end of file diff --git a/docs/cunumeric/source/api/fft.rst b/docs/cupynumeric/source/api/fft.rst similarity index 67% rename from docs/cunumeric/source/api/fft.rst rename to docs/cupynumeric/source/api/fft.rst index 8ca9dcc17..8a656a253 100644 --- a/docs/cunumeric/source/api/fft.rst +++ b/docs/cupynumeric/source/api/fft.rst @@ -1,6 +1,6 @@ -.. module:: cunumeric.fft +.. module:: cupynumeric.fft -Discrete Fourier Transform (:mod:`cunumeric.fft`) +Discrete Fourier Transform (:mod:`cupynumeric.fft`) ================================================== Standard FFTs @@ -36,6 +36,16 @@ Hermitian FFTs .. autosummary:: :toctree: generated/ - + hfft ihfft + + +Helper routines +--------------- + +.. autosummary:: + :toctree: generated/ + + fftshift + ifftshift \ No newline at end of file diff --git a/docs/cunumeric/source/api/index.rst b/docs/cupynumeric/source/api/index.rst similarity index 67% rename from docs/cunumeric/source/api/index.rst rename to docs/cupynumeric/source/api/index.rst index c6a5243c4..d57ccba21 100644 --- a/docs/cunumeric/source/api/index.rst +++ b/docs/cupynumeric/source/api/index.rst @@ -1,7 +1,7 @@ API Reference ============= -.. currentmodule:: cunumeric +.. currentmodule:: cupynumeric .. toctree:: :maxdepth: 2 @@ -9,3 +9,4 @@ API Reference classes routines settings + comparison diff --git a/docs/cunumeric/source/api/indexing.rst b/docs/cupynumeric/source/api/indexing.rst similarity index 92% rename from docs/cunumeric/source/api/indexing.rst rename to docs/cupynumeric/source/api/indexing.rst index e1a3358cb..3468e893e 100644 --- a/docs/cunumeric/source/api/indexing.rst +++ b/docs/cupynumeric/source/api/indexing.rst @@ -1,7 +1,7 @@ Indexing routines ================= -.. currentmodule:: cunumeric +.. currentmodule:: cupynumeric Generating index arrays ----------------------- @@ -9,6 +9,7 @@ Generating index arrays .. autosummary:: :toctree: generated/ + unravel_index diag_indices diag_indices_from mask_indices @@ -17,6 +18,7 @@ Generating index arrays triu_indices triu_indices_from indices + ix_ nonzero flatnonzero where diff --git a/docs/cupynumeric/source/api/io.rst b/docs/cupynumeric/source/api/io.rst new file mode 100644 index 000000000..a5ba6f670 --- /dev/null +++ b/docs/cupynumeric/source/api/io.rst @@ -0,0 +1,11 @@ +Input and output +================ + +.. currentmodule:: cupynumeric + +NumPy binary files (npy, npz) +----------------------------- +.. autosummary:: + :toctree: generated/ + + load diff --git a/docs/cunumeric/source/api/linalg.rst b/docs/cupynumeric/source/api/linalg.rst similarity index 82% rename from docs/cunumeric/source/api/linalg.rst rename to docs/cupynumeric/source/api/linalg.rst index 78394ead0..0ccd9d4e6 100644 --- a/docs/cunumeric/source/api/linalg.rst +++ b/docs/cupynumeric/source/api/linalg.rst @@ -1,9 +1,9 @@ -.. module:: cunumeric.linalg +.. module:: cupynumeric.linalg -Linear algebra (:mod:`cunumeric.linalg`) +Linear algebra (:mod:`cupynumeric.linalg`) ======================================== -.. currentmodule:: cunumeric +.. currentmodule:: cupynumeric Matrix and vector products -------------------------- @@ -29,6 +29,8 @@ Decompositions :toctree: generated/ linalg.cholesky + linalg.qr + linalg.svd Norms and other numbers ----------------------- diff --git a/docs/cunumeric/source/api/logic.rst b/docs/cupynumeric/source/api/logic.rst similarity index 95% rename from docs/cunumeric/source/api/logic.rst rename to docs/cupynumeric/source/api/logic.rst index abc016c65..1ab6c7873 100644 --- a/docs/cunumeric/source/api/logic.rst +++ b/docs/cupynumeric/source/api/logic.rst @@ -1,7 +1,7 @@ Logic functions =============== -.. currentmodule:: cunumeric +.. currentmodule:: cupynumeric Truth value testing ------------------- diff --git a/docs/cunumeric/source/api/manipulation.rst b/docs/cupynumeric/source/api/manipulation.rst similarity index 91% rename from docs/cunumeric/source/api/manipulation.rst rename to docs/cupynumeric/source/api/manipulation.rst index 86010e721..b1d3f54c3 100644 --- a/docs/cunumeric/source/api/manipulation.rst +++ b/docs/cupynumeric/source/api/manipulation.rst @@ -1,7 +1,7 @@ Array manipulation routines =========================== -.. currentmodule:: cunumeric +.. currentmodule:: cupynumeric Basic operations ---------------- @@ -32,7 +32,7 @@ Transpose-like operations swapaxes transpose -See also :attr:`cunumeric.ndarray.T` property. +See also :attr:`cupynumeric.ndarray.T` property. Changing number of dimensions ----------------------------- @@ -46,6 +46,7 @@ Changing number of dimensions broadcast_arrays broadcast_shapes broadcast_to + expand_dims squeeze Changing kind of array @@ -105,3 +106,5 @@ Rearranging elements flip fliplr flipud + roll + rot90 diff --git a/docs/cunumeric/source/api/math.rst b/docs/cupynumeric/source/api/math.rst similarity index 96% rename from docs/cunumeric/source/api/math.rst rename to docs/cupynumeric/source/api/math.rst index a4f71d1cf..5764a9372 100644 --- a/docs/cunumeric/source/api/math.rst +++ b/docs/cupynumeric/source/api/math.rst @@ -1,7 +1,7 @@ Mathematical functions ====================== -.. currentmodule:: cunumeric +.. currentmodule:: cupynumeric Trigonometric functions ----------------------- @@ -43,6 +43,7 @@ Rounding .. autosummary:: :toctree: generated/ + round rint floor ceil @@ -59,10 +60,12 @@ Sums, products, differences sum cumprod cumsum + diff nancumprod nancumsum nanprod nansum + gradient Exponents and logarithms @@ -136,6 +139,7 @@ Handling complex numbers real imag + angle conj conjugate diff --git a/docs/cunumeric/source/api/ndarray.rst b/docs/cupynumeric/source/api/ndarray.rst similarity index 97% rename from docs/cunumeric/source/api/ndarray.rst rename to docs/cupynumeric/source/api/ndarray.rst index aca3b9ce0..a9d17a648 100644 --- a/docs/cunumeric/source/api/ndarray.rst +++ b/docs/cupynumeric/source/api/ndarray.rst @@ -1,6 +1,6 @@ -.. currentmodule:: cunumeric +.. currentmodule:: cupynumeric -The N-Dimensional array (:class:`cunumeric.ndarray`) +The N-Dimensional array (:class:`cupynumeric.ndarray`) ==================================================== Constructing arrays @@ -59,7 +59,6 @@ Data Type :toctree: generated/ ndarray.dtype - ndarray.find_common_type Other Attributes ~~~~~~~~~~~~~~~~ @@ -239,6 +238,8 @@ Matrix Multiplication: :toctree: generated/ ndarray.__matmul__ + ndarray.__imatmul__ + ndarray.__rmatmul__ Special methods --------------- diff --git a/docs/cunumeric/source/api/random.rst b/docs/cupynumeric/source/api/random.rst similarity index 93% rename from docs/cunumeric/source/api/random.rst rename to docs/cupynumeric/source/api/random.rst index 0cf5a61a9..22036b534 100644 --- a/docs/cunumeric/source/api/random.rst +++ b/docs/cupynumeric/source/api/random.rst @@ -1,6 +1,6 @@ -.. module:: cunumeric.random +.. module:: cupynumeric.random -Random sampling (:mod:`cunumeric.random`) +Random sampling (:mod:`cupynumeric.random`) ========================================= Random Generator diff --git a/docs/cunumeric/source/api/routines.rst b/docs/cupynumeric/source/api/routines.rst similarity index 91% rename from docs/cunumeric/source/api/routines.rst rename to docs/cupynumeric/source/api/routines.rst index e85a5c65b..166cb4450 100644 --- a/docs/cunumeric/source/api/routines.rst +++ b/docs/cupynumeric/source/api/routines.rst @@ -8,7 +8,9 @@ Routines creation manipulation binary + datatype indexing + io linalg logic math diff --git a/docs/cunumeric/source/api/set.rst b/docs/cupynumeric/source/api/set.rst similarity index 79% rename from docs/cunumeric/source/api/set.rst rename to docs/cupynumeric/source/api/set.rst index c4299e870..e797379d1 100644 --- a/docs/cunumeric/source/api/set.rst +++ b/docs/cupynumeric/source/api/set.rst @@ -1,7 +1,7 @@ Set routines ============ -.. currentmodule:: cunumeric +.. currentmodule:: cupynumeric Making proper sets ------------------ diff --git a/docs/cupynumeric/source/api/settings.rst b/docs/cupynumeric/source/api/settings.rst new file mode 100644 index 000000000..6a424f0fb --- /dev/null +++ b/docs/cupynumeric/source/api/settings.rst @@ -0,0 +1,8 @@ +Settings +======== + +cuPyNumeric has a number of runtime settings that can be configured through +environment variables. + +.. settings:: settings + :module: cupynumeric.settings \ No newline at end of file diff --git a/docs/cunumeric/source/api/sorting.rst b/docs/cupynumeric/source/api/sorting.rst similarity index 93% rename from docs/cunumeric/source/api/sorting.rst rename to docs/cupynumeric/source/api/sorting.rst index 86d8e65dc..ab5570cfd 100644 --- a/docs/cunumeric/source/api/sorting.rst +++ b/docs/cupynumeric/source/api/sorting.rst @@ -1,7 +1,7 @@ Sorting, searching, and counting ================================ -.. currentmodule:: cunumeric +.. currentmodule:: cupynumeric Sorting ------- diff --git a/docs/cunumeric/source/api/statistics.rst b/docs/cupynumeric/source/api/statistics.rst similarity index 64% rename from docs/cunumeric/source/api/statistics.rst rename to docs/cupynumeric/source/api/statistics.rst index 48f10f19c..9430ea324 100644 --- a/docs/cunumeric/source/api/statistics.rst +++ b/docs/cupynumeric/source/api/statistics.rst @@ -1,7 +1,18 @@ Statistics ========== -.. currentmodule:: cunumeric +.. currentmodule:: cupynumeric + +Order statistics +---------------- + +.. autosummary:: + :toctree: generated/ + + quantile + percentile + nanquantile + nanpercentile Averages and variances ---------------------- @@ -9,26 +20,27 @@ Averages and variances .. autosummary:: :toctree: generated/ + average mean nanmean var + median + nanmedian - -Histograms ----------- +Correlating +----------- .. autosummary:: :toctree: generated/ - bincount - histogram - + cov -Order statistics ----------------- +Histograms +---------- .. autosummary:: :toctree: generated/ - quantile - percentile + bincount + histogram + digitize diff --git a/docs/cunumeric/source/api/window.rst b/docs/cupynumeric/source/api/window.rst similarity index 85% rename from docs/cunumeric/source/api/window.rst rename to docs/cupynumeric/source/api/window.rst index 28058d21f..e50dc5898 100644 --- a/docs/cunumeric/source/api/window.rst +++ b/docs/cupynumeric/source/api/window.rst @@ -1,7 +1,7 @@ Window functions ====================== -.. currentmodule:: cunumeric +.. currentmodule:: cupynumeric Various windows ----------------------- diff --git a/docs/cunumeric/source/conf.py b/docs/cupynumeric/source/conf.py similarity index 59% rename from docs/cunumeric/source/conf.py rename to docs/cupynumeric/source/conf.py index 93a2bfcd0..a0dcd2af8 100644 --- a/docs/cunumeric/source/conf.py +++ b/docs/cupynumeric/source/conf.py @@ -1,4 +1,4 @@ -# Copyright 2021-2022 NVIDIA Corporation +# Copyright 2024 NVIDIA Corporation # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -13,11 +13,21 @@ # limitations under the License. # +from os import getenv + +from cupynumeric import __version__ + +SWITCHER_PROD = "https://docs.nvidia.com/cupynumeric/switcher.json" +SWITCHER_DEV = "http://localhost:8000/switcher.json" +JSON_URL = SWITCHER_DEV if getenv("SWITCHER_DEV") == "1" else SWITCHER_PROD + # -- Project information ----------------------------------------------------- -project = "cuNumeric" -copyright = "2021-2023, NVIDIA" -author = "NVIDIA" +project = "NVIDIA cuPyNumeric" +if "dev" in __version__: + project += f" ({__version__})" +copyright = "2024, NVIDIA" +author = "NVIDIA Corporation" # -- General configuration --------------------------------------------------- @@ -32,10 +42,10 @@ "myst_parser", "nbsphinx", "legate._sphinxext.settings", - "cunumeric._sphinxext.comparison_table", - "cunumeric._sphinxext.implemented_index", - "cunumeric._sphinxext.missing_refs", - "cunumeric._sphinxext.ufunc_formatter", + "cupynumeric._sphinxext.comparison_table", + "cupynumeric._sphinxext.implemented_index", + "cupynumeric._sphinxext.missing_refs", + "cupynumeric._sphinxext.ufunc_formatter", ] source_suffix = {".rst": "restructuredtext", ".md": "markdown"} @@ -43,27 +53,25 @@ # -- Options for HTML output ------------------------------------------------- html_context = { - "default_mode": "light", + # "default_mode": "light", "AUTHOR": author, - "DESCRIPTION": "cuNumeric documentation site.", + "DESCRIPTION": "cuPyNumeric documentation site.", } html_static_path = ["_static"] -html_theme = "pydata_sphinx_theme" +html_theme = "nvidia_sphinx_theme" html_theme_options = { - "footer_start": ["copyright"], - "github_url": "https://github.com/nv-legate/cunumeric", - # https://github.com/pydata/pydata-sphinx-theme/issues/1220 - "icon_links": [], - "logo": {"text": project, "link": "https://nv-legate.github.io/cunumeric"}, - "navbar_align": "left", - "navbar_end": ["navbar-icon-links", "theme-switcher"], - "primary_sidebar_end": ["indices.html"], - "secondary_sidebar_items": ["page-toc"], - "show_nav_level": 2, - "show_toc_level": 2, + "switcher": { + "json_url": JSON_URL, + "navbar_start": ["navbar-logo", "version-switcher"], + "version_match": ".".join(__version__.split(".", 2)[:2]), + }, + "extra_footer": [ + "This project, i.e., cuPyNumeric, is separate and independent of the CuPy project. CuPy is a registered trademark of Preferred Networks.", # NOQA + '', # NOQA + ], } templates_path = ["_templates"] diff --git a/docs/cupynumeric/source/developer/CONTRIBUTING.md b/docs/cupynumeric/source/developer/CONTRIBUTING.md new file mode 100644 index 000000000..8dacfa72c --- /dev/null +++ b/docs/cupynumeric/source/developer/CONTRIBUTING.md @@ -0,0 +1,72 @@ +# Contributing to cuPyNumeric + +cuPyNumeric is an open-source project released under the [Apache license, version 2.0](https://www.apache.org/licenses/LICENSE-2.0). We welcome any and all contributions, and we hope that you can help us develop a strong community. + +## How to begin + +Most of the time, the best thing is to begin by [opening an issue](https://github.com/nv-legate/cupynumeric/issues). This gives us a chance to discuss the contribution and to define the problem or feature that it addresses. Often, opening of the issue first may help prevent you from doing unnecessary work or to enhance and further develop your idea. + +Once you are ready to start development, we ask you to work on a [fork](https://docs.github.com/en/get-started/quickstart/fork-a-repo) of our repository. The next step is to create a (pull request)[https://help.github.com/en/articles/about-pull-requests]. Feel free to open the pull request as soon as you begin your development (just mark it [as a draft](https://github.blog/2019-02-14-introducing-draft-pull-requests/)) or when you are ready to have your contribution merged. + +## The Legalese: Developer Certificate of Origin + +cuPyNumeric is released under the open-source [Apache license, version 2.0](https://www.apache.org/licenses/LICENSE-2.0), and is free to use, modify, and redistribute. To ensure that the license can be exercised without encumbrance, we ask you that you only contribute your own work or work to which you have the intellectual rights. To that end, we employ the Developer's Certificate of Origin (DCO), which is the lightweight mechanism for you to certify that you are legally able to make your contribution. Here is the full text of the certificate (also available at [DeveloperCertificate.org](https://developercertificate.org/): + +```` +Developer Certificate of Origin +Version 1.1 + +Copyright (C) 2004, 2006 The Linux Foundation and its contributors. + +Everyone is permitted to copy and distribute verbatim copies of this +license document, but changing it is not allowed. + + +Developer's Certificate of Origin 1.1 + +By making a contribution to this project, I certify that: + +(a) The contribution was created in whole or in part by me and I + have the right to submit it under the open source license + indicated in the file; or + +(b) The contribution is based upon previous work that, to the best + of my knowledge, is covered under an appropriate open source + license and I have the right under that license to submit that + work with modifications, whether created in whole or in part + by me, under the same open source license (unless I am + permitted to submit under a different license), as indicated + in the file; or + +(c) The contribution was provided directly to me by some other + person who certified (a), (b) or (c) and I have not modified + it. + +(d) I understand and agree that this project and the contribution + are public and that a record of the contribution (including all + personal information I submit with it, including my sign-off) is + maintained indefinitely and may be redistributed consistent with + this project or the open source license(s) involved. +```` + +### How Do I Sign the DCO? + +Fortunately, it does not take much work to sign the DCO. The only thing that you have to do is to mark all your commits with a `Signed-off-by` line that looks like that: + +```` +Signed-off-by: Your Name +```` + +Please use your real name and a valid email address at which you can be reached. For legal reasons, we will not be able to accept contributions that use pseudonyms in the signature. You can simply add this line at the end of all your commits manually, or you can use the `-s` or the `--signoff` options provided by Git to automatically tack on the signature. + +## Review Process + +We are really grateful that you are thinking of contributing to cuPyNumeric. We will make every effort to review your contributions as soon as possible. + +As we suggested at the beginning of this document, it will be really helpful to start with an issue unless your proposed change is really trivial. An issue will help to save work in the review process (e.g., maybe somebody is already working on exactly the same thing you want to work on). After you open your pull request (PR), there usually will be a community feedback that often will require further changes to your contribution (the usual open-source process). Usually, this will conclude in the PR being merged by a maintainer, but on rare occasions a PR may be rejected. This may happen, for example, if the PR appears abandoned (no response to the community feedback) or if the PR does not seem to be approaching community acceptance in a reasonable time frame. In any case, an explanation will always be given why a PR is closed. Even if a PR is closed for some reason, it may always be reopened if the situation evolves (feel free to comment on closed PRs to discuss reopening them). + +## Code Formatting Requirements + +cuPyNumeric has a set of coding standards that are expected from all the code merged into the project. The coding standards are defined by the set of tools we use to format our code. We use the [pre-commit](https://pre-commit.com/) framework to run our formatting tools. The easiest way to meet the coding standards is to simply use the pre-commit framework to run all the checks for you. Please visit the [pre-commit project page](https://pre-commit.com/) for pre-commit installation and usage instructions. Once pre-commit is installed in the cuPyNumeric repo, all the checks and formatting will be run on every commit, but one can also run the checks explicitly as detailed in pre-commit documentation. + +We hope that the automation of our formatting checks will make it easy to comply with our coding standards. If you encounter problems with code formatting, however, please let us know in a comment on your PR, and we will do our best to help. diff --git a/docs/cupynumeric/source/developer/building.rst b/docs/cupynumeric/source/developer/building.rst new file mode 100644 index 000000000..9c4f74765 --- /dev/null +++ b/docs/cupynumeric/source/developer/building.rst @@ -0,0 +1,71 @@ +.. _building cupynumeric from source: + +Building from source +==================== + +Basic build +----------- + +Users must have a working installation of the `Legate`_ library prior to +installing cuPyNumeric. +**Installing cuPyNumeric by itself will not automatically install Legate.** + +As for other dependencies, the Dependencies section on the +`Legate build instructions`_ also covers cuPyNumeric, so no additional +packages are required. + +Once Legate is installed, you can simply invoke ``./install.py`` from the +cuPyNumeric top-level directory. The build will automatically pick up the +configuration used when building Legate (e.g. the CUDA Toolkit directory). + +Advanced topics +--------------- + +Building through pip & cmake +~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +cuPyNumeric uses the same cmake/scikit-build-based build workflow as Legate. +See the `Legate build instructions`_ for an overview. + +There are several examples in the ``scripts`` folder. We walk through the steps in +``build-with-legate-separately-no-install.sh`` here. + +We assume a pre-existing Legate build. For details on building Legate, +consult the `Legate repository`_. + +First, the CMake build needs to be configured: + +.. code:: sh + + $ cmake -S . -B build -GNinja -D legate_ROOT:STRING=path/to/legate/build + +We point cuPyNumeric to the Legate *build* tree, not an installation. +This generates all build-dependent headers and Python files. + +Once configured, we can build the C++ libraries: + +.. code:: sh + + $ cmake --build build + +This will invoke Ninja (or make) to execute the build. +Once the C++ libraries are available, we can do an editable (development) pip installation. + +.. code:: sh + + $ SKBUILD_BUILD_OPTIONS="-D FIND_CUPYNUMERIC_CPP=ON -D cupynumeric_ROOT=$(pwd)/build" \ + python3 -m pip install \ + --root / --no-deps --no-build-isolation + --editable . + +The Python source tree and CMake build tree are now available with the environment Python +for running cuPyNumeric programs. The diagram below illustrates the +complete workflow for building both Legate and cuPyNumeric. + +.. image:: /_images/developer-build.png + :width: 600 + :alt: "notional diagram of cupynumeric build process" + +.. _Legate: https://github.com/nv-legate/legate.core +.. _Legate build instructions: https://github.com/nv-legate/legate.core/blob/HEAD/BUILD.md +.. _Legate repository: https://github.com/nv-legate/legate.core diff --git a/docs/cunumeric/source/developer/index.rst b/docs/cupynumeric/source/developer/index.rst similarity index 100% rename from docs/cunumeric/source/developer/index.rst rename to docs/cupynumeric/source/developer/index.rst diff --git a/docs/cunumeric/source/developer/testing.rst b/docs/cupynumeric/source/developer/testing.rst similarity index 97% rename from docs/cunumeric/source/developer/testing.rst rename to docs/cupynumeric/source/developer/testing.rst index f5485b787..55aa39e36 100644 --- a/docs/cunumeric/source/developer/testing.rst +++ b/docs/cupynumeric/source/developer/testing.rst @@ -4,7 +4,7 @@ Running tests Basic usage ----------- -The simplest way to run the cuNumeric test suite is to use the ``test.py`` +The simplest way to run the cuPyNumeric test suite is to use the ``test.py`` test driver script. .. code-block:: sh diff --git a/docs/cunumeric/source/user/notebooks/black_scholes.ipynb b/docs/cupynumeric/source/examples/black_scholes.ipynb similarity index 99% rename from docs/cunumeric/source/user/notebooks/black_scholes.ipynb rename to docs/cupynumeric/source/examples/black_scholes.ipynb index b77e60386..e5868463a 100644 --- a/docs/cunumeric/source/user/notebooks/black_scholes.ipynb +++ b/docs/cupynumeric/source/examples/black_scholes.ipynb @@ -19,7 +19,7 @@ "License\n", "
\n",
     "\n",
-    "Copyright 2023 NVIDIA Corporation\n",
+    "Copyright 2024 NVIDIA Corporation\n",
     "\n",
     "Licensed under the Apache License, Version 2.0 (the \"License\");\n",
     "you may not use this file except in compliance with the License.\n",
@@ -41,7 +41,7 @@
    "id": "5b787e94-e440-4e1c-bd66-29faf9b59041",
    "metadata": {},
    "source": [
-    "To get started, `import cunumeric as np` (just the same way we would import `numpy`)"
+    "To get started, `import cupynumeric as np` (just the same way we would import `numpy`)"
    ]
   },
   {
@@ -51,7 +51,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "import cunumeric as np  # instead of numpy"
+    "import cupynumeric as np  # instead of numpy"
    ]
   },
   {
@@ -162,7 +162,7 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "/home/bryan/work/legate.core/legate/core/context.py:280: RuntimeWarning: cuNumeric has not implemented numpy.result_type and is falling back to canonical numpy. You may notice significantly decreased performance for this function call.\n",
+      "/home/bryan/work/legate.core/legate/core/context.py:280: RuntimeWarning: cuPyNumeric has not implemented numpy.result_type and is falling back to canonical numpy. You may notice significantly decreased performance for this function call.\n",
       "  result = func(*args, **kwargs)\n",
       "Elapsed Time: 45.659 ms\n"
      ]
diff --git a/docs/cunumeric/source/user/notebooks/cholesky.ipynb b/docs/cupynumeric/source/examples/cholesky.ipynb
similarity index 86%
rename from docs/cunumeric/source/user/notebooks/cholesky.ipynb
rename to docs/cupynumeric/source/examples/cholesky.ipynb
index e0bcfa21d..ee39c6ec0 100644
--- a/docs/cunumeric/source/user/notebooks/cholesky.ipynb
+++ b/docs/cupynumeric/source/examples/cholesky.ipynb
@@ -9,13 +9,13 @@
     "\n",
     "A [Cholesky decomposition](https://en.wikipedia.org/wiki/Cholesky_decomposition) is a useful factorization of Hermitian, positive-definite matrices into the product of a lower triangular matrix $L$ with its conjugate transpose $L^{*}$.\n",
     "\n",
-    "Numpy has a function [numpy.linalg.cholesky](https://numpy.org/doc/stable/reference/generated/numpy.linalg.cholesky.html) built-in for computing Cholesky decompositions. Cunumeric also implements this function, and it can be used as an immediate drop-in replacement.\n",
+    "Numpy has a function [numpy.linalg.cholesky](https://numpy.org/doc/stable/reference/generated/numpy.linalg.cholesky.html) built-in for computing Cholesky decompositions. cuPyNumeric also implements this function, and it can be used as an immediate drop-in replacement.\n",
     "\n",
     "
\n", "License\n", "
\n",
     "\n",
-    "Copyright 2023 NVIDIA Corporation\n",
+    "Copyright 2024 NVIDIA Corporation\n",
     "\n",
     "Licensed under the Apache License, Version 2.0 (the \"License\");\n",
     "you may not use this file except in compliance with the License.\n",
@@ -37,7 +37,7 @@
    "id": "389cd191-ccda-4597-8e08-8d01ac226bee",
    "metadata": {},
    "source": [
-    "To get started, `import cunumeric as np` (just the same way we would import `numpy`)\n"
+    "To get started, `import cupynumeric as np` (just the same way we would import `numpy`)\n"
    ]
   },
   {
@@ -49,7 +49,7 @@
    },
    "outputs": [],
    "source": [
-    "import cunumeric as np  # instead of numpy"
+    "import cupynumeric as np  # instead of numpy"
    ]
   },
   {
@@ -57,7 +57,7 @@
    "id": "9ef2bc57-e703-40ce-8aaa-d45408259c7a",
    "metadata": {},
    "source": [
-    "At this point we can call `np.linalg.cholesky`, exactly how we would with Numpy, but will get the result computed by Cunumeric's `cholesky` function. Let's quickly try it out with a simple identitity matrix:"
+    "At this point we can call `np.linalg.cholesky`, exactly how we would with Numpy, but will get the result computed by cuPyNumeric's `cholesky` function. Let's quickly try it out with a simple identitity matrix:"
    ]
   },
   {
@@ -96,7 +96,7 @@
     "tags": []
    },
    "source": [
-    "We'd like to get some information about how well Cunumeric's `cholesky` function performs. In order to obtain accurate timings, we need to use the `time` function from `legate.timing`. Let's define a helper function `cholesky_timed` that calls the `time` function for us, and prints out the results as well:"
+    "We'd like to get some information about how well cuPyNumeric's `cholesky` function performs. In order to obtain accurate timings, we need to use the `time` function from `legate.timing`. Let's define a helper function `cholesky_timed` that calls the `time` function for us, and prints out the results as well:"
    ]
   },
   {
diff --git a/docs/cupynumeric/source/examples/compact_finite_difference.ipynb b/docs/cupynumeric/source/examples/compact_finite_difference.ipynb
new file mode 100644
index 000000000..6c77763a4
--- /dev/null
+++ b/docs/cupynumeric/source/examples/compact_finite_difference.ipynb
@@ -0,0 +1,336 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "id": "a040718f-95ed-4e39-8e96-76529df1811a",
+   "metadata": {},
+   "source": [
+    "# Compact Finite Difference Scheme"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "b8ced087-d285-4a55-97c6-fc78ad69d186",
+   "metadata": {},
+   "source": [
+    "## Learning Outcomes\n",
+    "\n",
+    "This examples teaches how to compute derivative of a function using Compact Finite Difference scheme as described in the paper by [Lele](https://www.sciencedirect.com/science/article/abs/pii/002199919290324R).\n",
+    "\n",
+    "In this example, you will learn:\n",
+    "* how to convert stencil expressions from discretization to NumPy slicing operations\n",
+    "* how to create a tridiagonal matrix (dense)\n",
+    "* how to solve the resulting tridiagonal matrix using `linalg.solve`\n",
+    "* how to compute the L2 norm error between exact solution and computed solution using `linalg.norm`\n",
+    "\n",
+    "Note that a more optimal way of solving tridiagonal matrices is by using the TDMA algorithm. Here, we show how this can be solved using NumPy's `solve` API.\n",
+    "\n",
+    "---"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "adb9700f-d3d5-4003-a532-123c1c55e340",
+   "metadata": {},
+   "source": [
+    "## Background\n",
+    "\n",
+    "Compact finite difference schemes approximate the first derivative by including the information of derivative of function at neighboring points in addition to including the value of function themselves, as shown below:\n",
+    "\n",
+    "$\\alpha f^{'}_{i-1} + f^{'}_{i-1} + \\alpha f^{'}_{i+1} = a_{1} f_{j+1} + a_{2} f_{j+2} - a_{2} f_{j-2} - a_{1} f_{j-1}$\n",
+    "\n",
+    "This can be represented more compactly in the following form,\\\n",
+    "$\\mathbf{A} f' = \\mathbf{B} f$\n",
+    "\n",
+    "where the matrix $\\mathbf{A}$ is tridiagonal and $\\mathbf{B}$ is pentadiagonal. In this example, we store the matrix, $\\mathbf{A}$, as a dense matrix and explicitly compute $\\mathbf{B} f$ instead of storing B to save memory. To compute the derivative, $f'$, of a function, $f$, using a sixth order compact finite difference, we solve a linear system of equations\n",
+    "\n",
+    "The main and off-diagonal elements of matrix $\\mathbf{A}$ are 1.0 and 1.0/3 respectively. For this example, we consider a sine function, $f=\\sin({k x})$\n",
+    "\n",
+    "\n",
+    "---"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "df0cdc69-4bf6-4ecd-bb46-4df1b95437c4",
+   "metadata": {},
+   "source": [
+    "The domain extends from 0 to $2 \\pi$ and is discretized using N points. Since the stecil for the RHS extends 2 points on either side, we may have to create arrays of size (N+4) to accomodate storing the values of points outside the domain."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "9475f334-b2ab-4cad-8a1f-8803ecf97371",
+   "metadata": {},
+   "source": [
+    "## Implementation"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "id": "39bc4d10-25e2-4ef8-a70a-2ce270c8eb5f",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import numpy as np\n",
+    "from matplotlib import pyplot as plt"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "id": "7dc1892b-842f-4200-abf6-769095caa861",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# number of points used in discretization\n",
+    "npoints: int = 100\n",
+    "\n",
+    "# number of stencil points to compute the right-hand side\n",
+    "n_stencil: int = 2\n",
+    "\n",
+    "# length of the domain\n",
+    "length = 2.0*np.pi\n",
+    "\n",
+    "# wavenumber of the initial profile\n",
+    "wavenumber = 10"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "id": "ee7c5c26-ffcb-4e6a-acb6-73540e0c735c",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# compute the spacing\n",
+    "h = length/npoints\n",
+    "\n",
+    "# generate the discretized points\n",
+    "x = np.linspace(0, length, npoints, endpoint=False)\n",
+    "\n",
+    "# compute the function and exact derivative\n",
+    "f_interior = np.sin(wavenumber*x)\n",
+    "derivative_exact = wavenumber* np.cos(wavenumber*x)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "id": "17b3c8ce-22dd-4a26-8ec7-ceb51632e59f",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# For sixth order, the stencil should be of size 2\n",
+    "assert n_stencil == 2"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "816d739e-c4d3-4559-a097-af896eb75228",
+   "metadata": {},
+   "source": [
+    "Compute the function values including the left and right-hand side boundaries"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "id": "59649243-9f42-4344-bbd7-17051d303d1d",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "function_values = np.zeros(npoints + n_stencil*2)\n",
+    "function_values[n_stencil:-n_stencil] = f_interior\n",
+    "\n",
+    "# set the RHS boundary values using periodic boundary condition\n",
+    "function_values[npoints + n_stencil] = f_interior[0]\n",
+    "function_values[npoints + n_stencil + 1] = f_interior[1]\n",
+    "\n",
+    "# set the LHS boundary values using periodic boundary condition\n",
+    "function_values[0] = f_interior[npoints - n_stencil]\n",
+    "function_values[1] = f_interior[npoints - n_stencil + 1]"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "3dbc59ce-7099-4f49-ba55-d2f918eb3adc",
+   "metadata": {},
+   "source": [
+    "Form the matrix $\\mathbf{A}$"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "id": "9a11d32c-1568-4a30-aeb6-52480aca34c6",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "A = np.zeros((npoints, npoints))\n",
+    "\n",
+    "# Eqn (2.1.7) from Compact Finite Difference with Spectral-like Accuracy, Lele, 1992, JCP.\n",
+    "alpha = 1.0/3.0\n",
+    "\n",
+    "# generate the tridiagonal matrix using np.diag\n",
+    "main = np.ones((1, npoints))[0]\n",
+    "diagonal = alpha*np.ones((1, npoints - 1))[0]\n",
+    "A = np.diag(main, 0) + np.diag(diagonal, -1) + np.diag(diagonal, 1)\n",
+    "\n",
+    "# Apply periodic boundary condition\n",
+    "A[0, -1] = alpha\n",
+    "A[-1, 0] = alpha"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "204c132c-b4b2-494e-96cc-1b7ee1c55e83",
+   "metadata": {},
+   "source": [
+    "Form the right-hand side"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "id": "d939df3a-6e6c-4e88-b671-18ab0b1e58b9",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Generate the right-hand side\n",
+    "a1 = 7.0/(9.0*h)\n",
+    "a2 = 1.0/(36.*h)\n",
+    "\n",
+    "# note how $a_{1} f_{j+1} + a_{2} f_{j+2} - a_{2} f_{j-2} - a_{1} f_{j-1}$\n",
+    "# gets converted to slicing operations on the array function_values.\n",
+    "\n",
+    "# It is important to derive the right start and end indices for the slices corresponding to each term.\n",
+    "# Since the stencil size on the RHS is 2 (n_stencil), the index j in the equation starts from the second point (j=2)\n",
+    "# and ends at j=(npoints+2). This translates to the following slices;\n",
+    "\n",
+    "# f_{j+2} - f_{j-2} -> (function_values[4:npoints+4] - function_values[0:npoints])\n",
+    "# f_{j+1} - f_{j-1} -> (function_values[3:npoints+3] - function_values[1:npoints+1])\n",
+    "rhs = np.zeros(npoints)\n",
+    "rhs[0:npoints] = a1*(function_values[3:npoints+3] - function_values[1:npoints+1]) \\\n",
+    "               + a2*(function_values[4:npoints+4] - function_values[0:npoints])"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "77ee7783-933e-4be0-88c7-984396cdaeef",
+   "metadata": {},
+   "source": [
+    "Compute the derivative and the L2 norm error"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "id": "87df4961-25af-4876-8e46-2762429a3418",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "L2 norm error: 0.0021705301478625403\n"
+     ]
+    }
+   ],
+   "source": [
+    "derivative = np.linalg.solve(A, rhs)\n",
+    "error = np.linalg.norm(derivative - derivative_exact)\n",
+    "print(f\"L2 norm error: {error}\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "1015951c-29a9-4be1-b65e-5f3d19efdb9c",
+   "metadata": {},
+   "source": [
+    "---\n",
+    "Compare the exact and computed derivative of the function"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "id": "c1d82ac2-d584-451e-8bc7-b618e3a0f9af",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       ""
+      ]
+     },
+     "execution_count": 9,
+     "metadata": {},
+     "output_type": "execute_result"
+    },
+    {
+     "data": {
+      "image/png": "",
+      "text/plain": [
+       "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "plt.plot(x, f_interior, color='red', label=f'$f = sin({wavenumber}x)$')\n", + "plt.plot(x, derivative, color='blue', label=f'Computed $f\\'(x)$')\n", + "plt.plot(x, derivative_exact, color='green', label=f'Exact f\\'(x)')\n", + "\n", + "plt.xlabel(f'x')\n", + "plt.ylabel(f'$f(x), f\\'(x)$')\n", + "plt.title(f'Compute derivative of a function using sixth-order accurate compact finite difference scheme')\n", + "plt.legend(loc='center left', bbox_to_anchor=(1, 0.5))" + ] + }, + { + "cell_type": "markdown", + "id": "4f05eea3-5e1e-4567-9d7a-6ef8143f2723", + "metadata": {}, + "source": [ + "We see that the curves for the computed and the exact derivative overlap (green and blue) and the L2 error is of the order of 1e-3. If you are curious,\n", + "\n", + "* Try increasing the wavenumber, $k$, to see how this affects the accuracy of the solution. Plot error vs wavenumber to see how it varies\n", + "* Try reducing the resolution of the grid (n_points) and plot the error vs number of points.\n", + "* Try plotting the modified wavenumber and see if you can match the curve in the paper by [Lele](https://www.sciencedirect.com/science/article/abs/pii/002199919290324R)." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9573de0a-be2a-4781-ae79-3ca9dcb17f05", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "python3 (legate) *", + "language": "python", + "name": "conda-env-legate-py" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.13" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/docs/cupynumeric/source/examples/edge_detection.ipynb b/docs/cupynumeric/source/examples/edge_detection.ipynb new file mode 100644 index 000000000..c83093f02 --- /dev/null +++ b/docs/cupynumeric/source/examples/edge_detection.ipynb @@ -0,0 +1,210 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "0c2fc84f-fb0b-4fb2-b356-14a75a1f9dba", + "metadata": {}, + "source": [ + "# Edge Detection" + ] + }, + { + "cell_type": "markdown", + "id": "ceb11e7b-85d7-4598-826b-2a91ad751092", + "metadata": {}, + "source": [ + "## Learning Outcomes\n", + "This example identifies edges in an image using Sobol edge detection algorithm and is implemented using NumPy and SciPy. An edge is defined as an abrupt change in intensity of the image. The Sobol edge detection algorithm uses a kernel in each direction to compute derivative of intensity of the image. The gradient of the intensity will help us determine the locations where changes in intensity are abrupt, which can then be used to detect edges in an image.\n", + "\n", + "This example uses the following packages in addition to NumPy/cuPyNumeric: Scipy, Matplotlib, PIL" + ] + }, + { + "cell_type": "markdown", + "id": "15f468c7-5e09-4456-a770-03ee0e713546", + "metadata": {}, + "source": [ + "## Background\n", + "For more information on edge detection, check this [material](https://www.cs.auckland.ac.nz/compsci373s1c/PatricesLectures/Edge%20detection-Sobel_2up.pdf)." + ] + }, + { + "cell_type": "markdown", + "id": "ed8b8b45-a02d-42a0-8092-d232ef3da30f", + "metadata": {}, + "source": [ + "## Implementation" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "98e3e73e-e500-433c-94a7-9e0a5593e3c1", + "metadata": {}, + "outputs": [], + "source": [ + "import numpy as np\n", + "from numpy import ndarray\n", + "from scipy import ndimage\n", + "from scipy.signal import convolve\n", + "from matplotlib import pyplot as plt\n", + "from PIL import Image" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "94310f80-baf0-4cd3-b0bc-b49dd62ccfbb", + "metadata": {}, + "outputs": [], + "source": [ + "# Intensity varies between 0 and 255 in the image.\n", + "intensity_min = 0.0\n", + "intensity_max = 255.0" + ] + }, + { + "cell_type": "markdown", + "id": "78273013-cea0-4c28-a376-c3c40e681276", + "metadata": {}, + "source": [ + "Since NumPy's `convolve` API does not allow two-dimensional arrays and our image is represented in an two-dimensional array, we will use the `convolve` API from SciPy for this example. cuPyNumeric's implementation of `convolve` permits two-dimensional array and will be used if `cuPyNumeric` is imported instead of `NumPy`. Try changing the import statement from \"import numpy as np\" to \"import cupynumeric as np\"!" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "64c779b4-6167-4854-97af-ac74469d772c", + "metadata": {}, + "outputs": [], + "source": [ + "def convolve_nd(array: ndarray, kernel: ndarray, mode: str = \"same\"):\n", + " \"\"\"\n", + " array: ndarray\n", + " Input array corresponding to a grayscale image\n", + " kernel: ndarray\n", + " Kernel to compute the gradient in x or y as per Sobel Edge Detector\n", + " mode: str\n", + " The default convolution mode. Note that cuPyNumeric only\n", + " supports the convolution mode \"same\".\n", + "\n", + " Notes:\n", + " Check https://homepages.inf.ed.ac.uk/rbf/HIPR2/sobel.htm\n", + " for more information on Sobel Edge Detector\n", + "\n", + " The image was taken from:\n", + " https://docs.nvidia.com/vpi/algo_canny_edge_detector.html\n", + " \"\"\"\n", + " if np.__name__ == \"cupynumeric\":\n", + " return np.convolve(array, kernel, mode)\n", + " return convolve(array, kernel, mode)" + ] + }, + { + "cell_type": "markdown", + "id": "a93b9fb7-f792-48ac-9a80-f5d33d800c9a", + "metadata": {}, + "source": [ + "Read the image and compute the gradient by performing a convolution operation" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "4edeed3a-9729-4a2c-a52e-95be99e2e5a3", + "metadata": {}, + "outputs": [], + "source": [ + "# Read the image\n", + "image = np.array(Image.open(\"image.png\"))\n", + "\n", + "# Sobol kernels in x and y to compute the derivatives\n", + "kernel_x = np.array([[-1, 0, 1], [-2, 0, 2], [-1, 0, 1]])\n", + "kernel_y = np.array([[1, 2, 1], [0, 0, 0], [-1, -2, -1]])\n", + "\n", + "# Apply the Sobel kernels and compute the gradient\n", + "grad_x = convolve_nd(image, kernel_x, mode=\"same\")\n", + "grad_y = convolve_nd(image, kernel_y, mode=\"same\")\n", + "\n", + "# Normalize the gradients and scale to the max intensity, which defines the edge\n", + "edges = np.sqrt(grad_x**2 + grad_y**2)\n", + "edges *= intensity_max / np.max(edges)\n", + "edges = edges.astype(int)" + ] + }, + { + "cell_type": "markdown", + "id": "2d10238c-50f0-457a-be84-f50791d8989f", + "metadata": {}, + "source": [ + "Now that we have computed the gradient and the edges, we can plot the edges and see if they actually pick up the edges in the original image." + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "ebe45839-78bf-42bb-ac1b-24eff249d39e", + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "fig, axes = plt.subplots(1,2,figsize=(16,9))\n", + "\n", + "axes[0].imshow(image)\n", + "axes[0].set_title(\"Row of houses\")\n", + "axes[0].set_xticks([]), axes[1].set_yticks([]);\n", + "\n", + "axes[1].imshow(edges, cmap=\"gray\")\n", + "axes[1].set_title(\"Edges on a row of houses\")\n", + "axes[1].set_xticks([]), axes[1].set_yticks([]);" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2b961d99-f6d3-4aaa-a9b4-465bb2a02c1f", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f150d75d-824e-458c-a410-63a6c66d1f27", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "python3 (legate) *", + "language": "python", + "name": "conda-env-legate-py" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.13" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/docs/cupynumeric/source/examples/image.png b/docs/cupynumeric/source/examples/image.png new file mode 100644 index 0000000000000000000000000000000000000000..68fab2d4d9365d778ee98f74344a0061970060cb GIT binary patch literal 305442 zcmV)pK%2jbP))!001BWNkl89wAU56g*+Tp`enysM;nuX(JLT9n@BR(Yz?B z+DNVFl$0imf`fk1+Bjj_Sl!S?l?=X>tC=RD7O#{cm9{r28#tsY2Qp(?8S zvQqhc=qCQyAN&6=x`{vb|ArI(nH(V8=L9(6pTY5m2>%x+{C|dn@c+c|hY0_74v>?B z0LOjg_&*TtbHaU24v>=*04Kl+|0f4I2!Dj*pCZ5kkduQP{2>Pjf0Tn9 z{}eg?2*Mw5{2>Ay_mLAoHvw;3B>tHrJ5WLR*kM~0)nxF39i?LC&Lt&9L6Rf2Wka`yg0EzB#0I@K!n`Q zo8&sOF~yllAL?dhY}G9i97KRAi7Nzb448R=h?}ZINLYi6~6mZ-a1YITWfAtP+#3m4>_~ zpt=Vp0}&jA`J!d3F<*hY~pCvRkn(DOCeO6bl+Fa2%eUxlpBv(D~H6rbQ5&`qaVKD=+P0Gl^U1I)~QS(GGd(i+FD|`R-8>jg6mtR;cLK-&*Pd@g>?Kd~?=CRf_ zX)eCqUZmJumPLmZFG9beR0Ewg`qJ1pn{@#;BIpwmj|n`dd1F=QkL_J)+EL5tQ`>(d zx={@&mrYt4wha|xu2NxKLvSPJYN?b&Xk~Cxcw5(P+H#0RsRu|SS#WKXExubtIqzkLIjpM3T^6W{(v!|w9M*-RjKWIt`6T|Dvfd-gu{B>d-IUClAyPDpfX z^`~C0-}d<5Ph6{{{5>DSA9(gxee$Sj)y_w@ErM`O?!oJHqGhTy7=YM?CWe@&?J%~> zo~v{sv*9Aou0DD6#cf)pAN;njzIc0mo|xxdnTkwdF2)@6_iBdTbS!6PdJ6SFv#kl~G1uTW3ITVjpywrJp- ztj?+h7Y}B^DO8Pwl7d)H&IXsU*I;JR2~=9iLB+?GowWq0XmlqN1yhX#^#G_i3^kHh z(Fm)hA0;Rhkq9-YyA)G4Kr=DBE5rb!iMf)1s*h;c0XI!B3)zv1@X#82n5<<}2%#dm zDwG6JG=)e}*_-;;<+=EyFMcIrGuvzQZQu2!;rZK79z<`up~O7ZQieFK&N9+`uOQ^` zsLV)j?aDJqd6z~mIS-@O0GWXimWJ4t!@fbY3{JHD{wH$_E&24`=RWq&uZmD`Ygn@s zkg?0qAn2wwNkyhv&5fDK7^OmjnFjTyRi>N(V{$R&hS->L0J}RoX!x~W;$<5;+147Qred_T1f;;cgl3hv}*TLIk!Z9e{i z+y!=GW9E$D(L%6kv1W)7IBQ!<57up9a#w?D;2g}t)YcH7F3>KsR54+&OPi;L;zX2W zjD^}sg``H9^%Jp}kV%f!ld&p8Fj{L>dp6W)1dUFh5-_pk(wjMZsz&77MBcb^Z3@Ua zL!;-I*}S4Q>R}s;lW-<*7M@z2k_lxaq5zeOX4_`-W^ zW%25Nap`yD&s=<^-G-fZRp0U0=O5g@)deln-YKrdwxy|Y&1d69g@tq7MH@GLcRoHO zw}IV7R>YE7S2c$;S~(AZEV!cO5Mo4Vwkf4Wt3!?_5B|A%F^*e3`sU>ij^f)CwN;gg zVilg+EjPJ#WIP3T)=(2w;$hC@P-e{?hmSTf^VDGmqmi z_cD(2MW6cckv~a5X-AK&9?8ASnv*zZCIbmP4mA;m85K%z-g@{Ozw-6fIgs*qmgT$E ze)+BVwpTX$K_L5=XYJFg55M}r!%zNz6Mpf9Hy+?z>hA2H-1_0$_40?Gx_eLyMO^xq zaPrn0ch7u&m@>LGi0=yo_p&+LR2XFAZ66zM#m05jrWc0OaypBBT{Vw?=v@l{cJa%< z6{Lx8NYll6XNsX(-ZZI4En3ELlea~fx!Pt6^s|=JPT(%KqM}6Z#K#)}-2`pE?>jc! zRAFWah}$t$NJ-V&qRE^K5R9d85rwkVn$$d~I4L{?Dy@QYg4doamk=aJ7<&vQcWlk1 ztRWUQN|Q$lL{=oXT$w5x;En*TtruaG5EV={IKhjzwJMQDOpO%v5S2_N(#R&RpsASx zWjI$Lw_1V*$|}y4QJsxkta6+TfTAx1Q733S#+r(GNSWNpO+qCl5AMtp8*9+8__bfy z{c`wp_vPJgY!^5VuPh%ETVL}$93vr5DB}aEZ}WKhV!KK+s>8*_>9n(G?TLH0+cb~C zj9uy=0LZbO)>9S*Afi#)#G`QtAtfi#%Qxm0#5e2L`hO1<#xf4SfA@ziAYe$o?IN;= z>e|%k#!MckeD<&s8Wd@`fCpb}nj*oP97I_euBV3&{_X$quYLCyekP@O8fHVSfkp&wTc9Q_$hNUij`gzxS;rbY#XgH%5v}7O`N&!)mzJv z0;^ke;ZioI8&d{3Qrq^y%Xz6fqb3lIJInN@V`}5OKlE4taQxB>w~y=U5^A+&D{!CI z6TrqZ&QsA<-i{@P%6zd^^lg(;jEj`!RdQ063JUJwa3!Fd!1klxT5`h;g`5VSW$LoE zsNG<}1;kkqhRBo(6&EeyHJMq}mONDBkOg3>)Q$mQ)f5F94MG!(d1uvwYU@QbSnCNz zXy{OaL0P8Rz@a26IU*J(5*dlDDZy$|jYcphrMP8QLBzl$9(muz4qUI!@eFr z@xX8H?Pe^N_jHqHi|Gsdm*?%_!LHeK0Zdb=VZA8ZHeA0H$xl%Y6#kK1Im*(Q6w^ezHBid+#s5bF5rRqu&h&)6khX z8^ej=F30EZzKHAzbza><;caW>^m^IJ5@5!qSv!6CKY8fikALg=8Oz0^&2Koq!oJHg03x9sF&lx;7o8IMyVsamL0V0!cRL(lxm)5BE5L)Q*Jmg$Sn;7WPv zO0SUik-j^8{aecBBi&zN2mQ`(9543lt$2F;EsuTEFUy_1nIormbrcWgvpSBWAyyKL zY$cCny;2SnxrEtH!&BYlI)!B?6=}KG;Uzhdo$=%^d=NN){pIIg>N>eN?XGhP^E!E* zic#%T_9}H4hxJCZ;arAELO1JyW(R$f03v8(Fm@NWaAzZ+o1oPv-#>;BNc&2mq)?rO zDkbfw$rGrs6qvU>q);+ca?()UJ%@0y!>9sSzDpvu;3J zwI|OK3Y$0uM~zN3dLdX1Q0{V>Web+)wK|8YAxunOQ)VgDs#Pi>aaImvW2UT5P6jKn zFrexI)lj1wD}}<8h1|7O%c&hTnYwvPT%nvPkO4I|bxP_8l9{FE6!Y(6A9eP?Po5vW zitm2%^T*fv_VW7KM-~Gw%Ha`jf%!7@;qIl&spc(tvDaR2+G~B&$$Htw?)gj?<#Q)h>-QWkK46<)J;Xa+*<6Q0 zu3qWz+WLL3JoMmuei#mV^%w7r^4{C)p}Dj$CB4_4ztzRUcHMREnr5 zHjUeD<^T+3tJs~T7E#io>6A&uJh=u2RiFyeTPp$G1g$>#;Vp-$W9uzS39^ksWI~}< zM9U(v1Rg>qW(B-tQU|~WnNlp8lq`fQ-sal4Yp2BG>I^i>6@k)_3IVY&N0q5DCvb@l z_YfR9QIP5u3O5rc@I+kMg(~8tA_^6#TkB&RHZsdvBUhAIGsRdclNpy#k&`+!SQR)l zn8r-bMo>52560(D!jd_(&)B^~B4smI(UCi%|2i@X7Z=XGj@A}SfzJMmR z+q{b!o}m6t3tFtU>w*$w7P0LY-EMiKz&E8-+l>}?-x#iL?}mBgO}@4pP&|&tMzo2r zW@2>et`2Y=gjd)Cc?+ltB*p{_t=TLHgpb$lEOea*jN#8dQ45!-*mz%=xDJ1J_R%NS z78+x6iWj3bvwLsNuO1Fh>^cZQomMaWWc`n?{M0vQyczf6)<(Y6@UnUA6B>C@N+66) zFe=Eg)6%j{LMlE;Lkfu_KwveBci+1Fjn90es(jBge`4M}|G8m)<15W|1=54d`RS{d zA9(3Q?Z5o6gYfJxt&Xn6J&l`khYp(Q!PA^Q4Yfz{Buz;qTcVm*7n63Tt|cwIKCaI; zledd*$g>@ejZcMS=WPdXzx)q={z{v#qbJHC7+1X*F3gNWo$?mV zvQHt!HnlNI$rPd)TOb2l$l}Hv?w$qEP0;F7A6!{zMPPxFiHfHjW33GpkE+Cwf%`2` z8_H$|hPmYiIt32wj-;v)5}c`uk&*WQ_)P+)>IGUY%{ zQ)_SmSE=0;93+#vQ!2q~Z9VW1vbYmOt<4c-jJdODqcgG?yoh8sA$CPoNjYYd?E+fK z18=D<7AwOI$m3^ACdZT5%T4&CcgRR@$el+Zl>;1kXJ8NB?de5L} zq_7;|${NF1n|0B#)*_=$L0W544Pdl&ol>f?NfIhwJvi83#Bq3b`15ICeDlUHj2~NE zF)&APt*gWR=7L^r9=|B>AIC*B4>)@Iw|@T5|Cwig8iLih+mtcSFQ>9xeEWNZwNVQ2 zDa8Udh-4nSP)vh{%0XF-x1NYKj5?p5-aY@_&)l2dcH{Qc6J8^zmHt#&Ivyc4N=o~ABP#_W^7&gOZEyk49shusFyt!C$ zT!uDKdwjb4Xm|HceCU1ey~gB(zxTO$SNmZIWL=U9D?`z-?j*`I1uk`aF(V4 z?>nRzn|TzoYEhZpn5mLS1vn@gEB7t}=q70OsrLduLp2X98i6{_%|bA8kz6AiA_*aTWD-makVxct#;-+I$Dd9#eSo_ z?NhZl0wI{&NM5uf7al7Lbk4)tMyn;KED{+LI0A{=CLQG--8}W2%a1)d#O`aqH9w)Z z{i%@)d$QXXJ7;Y?zm+Z?nO|COb{`=Gw>w||yGK9q@c;0|J(TfGcT(j`J56mK`cw>2 z91Kwp6E`TL0w)I9L_||!5_NWKMj_Ox4KJ;hFaLwDPk-XxrFYf!Z@z`c-Z*(kplSbF zTVCV$JoD&{5BwmUaQ2_J;Vr#1XteS1Z+YqVqUMAEaLUHuTJvf~?w@w-@7# znq9Cpm+c#g7YDa)y#I-Z3Gngo)z{uS;#RWNUAIod#ppgv0?c`0JHuw1OAHvW*@R}c zhgtA$mU^yIKn`*#77^eyi3D@RF~R8GSpeMx)*t)8dOk@A%)vubqCr#f8Kz`YssXt* zhXzzwB|uYFDa6PT2B`ri9?7VB=e0Bt!6X5xvXjLrxBV1oYDu9H@R~yAU=Wl*m_pjJ z*PT2@4NxZa){3NpG-)9B3Nk{dBvl*UumJEJLs4-tb&Hk}tGkw_s)Wip4ME&kgAKFH zRvIxi63(Qth{D>MlVUbinnv$DowlI>Wi!M1J$v9zSyFg&=c0M!S8rcDy;GYXrO$EL z+_*}myEwWbv;OWY@0#0~A!RyQgZKNU!RlH&)srnS6qfV$a-7d@z4l}Njpu`(tmui5cbY0FfJ%{CN!E&_#3`HS5SEvMpG><*i{l42P zFL&V_KmJfLQ=RTU_42n|zr7RBPOcn`M~^gLd%@$Sw#TMve zx0l+kK}2$KSD~6v+`%SX1EU0nd8|&EPBsF%3EF)01KU`ErVa4W)ZjHnBS-DEGNj}t zM{>Z_$y_zUz@~~mQ)NP|&M7;b8&X!(F4R;jgopumO;8|M?`nd|0VoimDpf!#u>w$5 zjid}%qtzI~qalh#H8NC3S4&gL$)^AW7$h5$xto~50!9z05VKSZO%i>Qm~&H-7(tcO z$N{oS?N%y?m8$WXSSamg=BNAHZTHMxb1Pn*tfnnpIWGM9_}pPv@u}#S%*o?4opy}|TU8SmxVA`zvW|J#;7@Q+E(yL9OO=cICQ6agR4a{_ry9zP*C{bzDoGZeq$N*7E-1XzQ3{BTkEM@oY z!Gtztn_qkTcWzdXzj@Tic6jn=uhA#=?p^%!|Mn9f`PpCFaT}98@V!sHU^`utuKrN8 zK#6k{v)oz@<#sN~I)gWY3|JtT~{(oLyyYsy+pMNS}moL3%k!!QK z+V^jqz5lSh^Tv<6Gw{qWbmaj&qm2zjp=}tD>Xdmij2lrgRpq|#QW(cPq3Py4=`3Ac z-n+BD)HJ_c_by%k3*q#Yx1L`Y-=dqOEJ>%XoKDPh1oawZ8qQbMjN5c{4xsN-O3Q_G zfmApnkO2_|1hs^ap+TgA2(?#O7!L4;9+F5p!X8Z>do5v}F+uUZfp@Mp#8#xEWy@ zjdLPHZrCZunv)cAXb_F1x)>1`YkrSzqa>JPG%^7@_k=8xjM7W z(b#eed*HHeOLsgH7Yp;DTuyyFmBqz0o!8J8PcxO>Ifi_Gbh@)lI9c{UTnAC!thVQC0`lH+FIEP}j{*di0UVbzWZGKC=`G#BJHWMl(i(+Hg;q&6@kr5KLt zl2a>DfwDhqZqOUE+`e{jaQWc$!F2iAfAc&3gQNfKqQjJX#H(~zciTRG_}jeAMPH`=rrx&@ZOxM_0~%&;AyDipm6izl)72RWn~2+&6K+-&id|f+wqO} zJjuWK_jZRH5AW0$F`e&WLgX06Fx3f_Vnc2c^0+!%S5C|}POB*_mh-liekRNW2h0RS zA)BZ&iJ->fT$8AiB4?50bhHuBP0;G2AI`-SF@=suIan&4sYqrPB~n2pj}ru&V2JMO zVOwLY8Q`|!jLV!f_HdazRUu<%gk;97k`pR>R8fXAlA;DSl5yrk6YF*cFTuge29ji`d+kcKc5<#^+qpS0P2$MOOFO141&yjx zlZ``#1+61Q=YBzYS|eXN%fq#!TMsVoh2^s@b=luIPVr>u4?ny6l?{Mz|K2aYcCAG> z#F$27?OJ*9QpDVS+zyCk9C_@owQU;D7abj~+&EN7L3jH?xC!W6!Ck%Pa-VJOu$-NMi5| zB}BOwT1m?g;N|ffEuB5^(Es$aKk%fb z!Dqkv{1k7*w~7V{!l6Nh+PJNCP(cA)`bE!PF9uIdDzR--%E!Wkd0MGp(X_M0Jc%T>8b~y#1u~SN%se4D z83pI+#R%+XHN~kZ_bP#If>xjU;L0m=iy%>{ZAOrkj!`w%#u-|=Nu!f$r8I#IC9-KD zM4t-3`N}2iug5DJe`JT1t1WHP)`+R*odWkZM{9=2S=*f25f;Sb6ukmmTZ5HoaYO(+ z#7gTm3}b4e)tyco^ATY$*n8reRFL{RrB5#Ue({n2i8x&_v6of z{nD<6#is34XBX48@O$GN%NQC=mwPFP$4kn-sWmTW{hU|JrVhs-*ElYnj20Ig^jkkB=l)sscL2+Q!9%YW7W#;uu;xlL9t+WB7FbpFxz6b(tFs6dD) zPacI$BZqN{2l{wV0TpUUrk{FP%B z$o%p$y|R7Zo#`Xa=ZFvP)}wndXcMYTiKkM_dMsdbD5ZWGC9lWa@lKaYVOf>ERTsiz z?|ml&s81cgaq`@yv;O+=h}3~`LFbmdO^GWIPPcipt(6U>8D(oasG}!?gLssSlWSP2?v&W~^M4+3X)h9o^R;koVW1N86h({%I_E3Y0*OWC> z3q^^zus74hDGVSV&+A*^IAZ&)x6#D2lPw zu6pt!vW;#?xh62dSzTa24RPc`tg(UwpUUma>D(?(Q<@1nIXT#kQts4t*qj-#AN!e{ z&1T*C>m?G)Y_pw3_LFtkwQUYz9Tu}Dyk!rxdEAtpuv%5FTlD)h-q~Dg17eq6$HQ&n zKe z5)0Ds`c=F5p^xLfdoR9pcU9!ZSFdxKny#zs$wnn^V4c=r^>}kJL`KT%Z9o@8YMZ9n z;Ye;I+*A_4YCwsD7>6*CBfwE$BPj(=nO5yOmax7w`KSQh1g$>#;d3?#EJCr@QfO}- zX;?5hiZGGM6s8z#;~oSPTVa%w-6$uA+r6{%i`n(3_dDM|UM`OBUJdzO=b_!3kCj7d z$C+EqPHf`2k&+ZFq_xc&`Mwgj9n6&!Ofb$3aSoJ|sk=3jC5*Lo*TK&3E|xou#wwM# zf}F*;ViGQqifTI=MK2YtjjdHMV+v9MLth4IoGPiecd*v(a(;S$>FvqpglDV$s8dh( z4)Un01NFYK*ZOoAi|_sV;<>}_BlEAH|LO5dQ_d7yRfK|MvGc>DA8-aqkneS6_ImPuG{-$*sp{ue|dLbSyT5L{@=eJSkgc z{-MH;QH zp{HX8a~L;UG_$VdKBc~CDzgKPafEts@(Co>h+<_f@H{LzcJbno%z|^O@Jgaanzl{g+zr+icCBsE zMzf;i#9#Uh-nc4XLW$`oB7>-nW7g<;bpo^HomT$VeGB47MEcIEiJ{CW*t_i*#t z>V0JU#al{|s)%aNo3WToqnO8jK6lxyRGTHvm$6@TCla(jc<}1uum1GKJn*~wr|-^Z z%gtfYuIu&eWQ=V)=XJ5e!Tk1e(kX8%3+x=OYeS^l}1h}G<3e0MkUZq(B@+w9HFgx*`wcl{mCos{^nl)ELVEhwPfK=yn?Re6iiG70;U{^oPw4hssb+2 z1U0&fhAD!ngv!LFor1%R$l=xVHeRZHyut1B#}6{?1RW#>D=yVzMsltdVpb!U;$5*o z)uL@03VyuXT&%h-hu{Wl%=PHt9{=1qj+%@fbHIud+Yw% z=D7z?zPXjJW40-X-}mTeSBtt!VRt~^Id7$FkLuoVmg9&T%Xn!qTWoA;rL0y4o=R8p zrKV}KjIWKkU}rI{LtDpow{LJK>;~%wOJ03Kt7W{0em<@_7-n6x^=dZ>>fJ>gnM&Q> z>#yC2;nv;j%Q~!UJ4@#~%ChkxmWYC|ZP{8`jV-!{@AUH|C#&5kG?j$?!@Ga=vA_9y zyQpJIqZy~GZQ{3o!a@?C8W)uf_h}67Oq3C8W8*3XZZ~tz(m#2(2~^n-!25ArC8{?^D~QFmEMvrBSXb zFiuuYNNOzPtQ=rgype2Yh!Upqrr8r`|hh=m0@Y8~_4Q z>yFx@N-ViSB~h0#NEu?0^W!^byOHL1`VZ!>UD4M+^y-CNw2$te-FdU8^`j@>a%~8* zI@>NE6K_Bp29LckI8Lz!6vUEpJB4nv=#|JVRthyvLLQ5^_d-{bWab*PPIS_CeLT5) za@;+Zrk%M8Pk{}j&FCQ;ni2|0RTnBjWrD{ld7GEx&LO(^*$3>6_(?XW^xI@ztH?EEas{2Y>td9SQTr z!3LQQq^_53%-J|7mvd(*|`<<@gj`8bA$w2EIDu1X~)kpvjxm% z@h!gI1Z=c{aUGa+D4ddx=K(2X3zqwZwdHnN7LVs>GeQ$_(q8hG5u%|z8WuTC=bbRd z^Cq$mW96N&8B;sgVa?cW^VKhX$IaJ&swRwii4kMJyCbRn?k7v2SfZAGM9PV*gct(X zig5St`|!-g>EY2y9^#_kSwynUOc2^)kw&@o3m=^|&;M4%BTr)=XwuvCF2C(Tau?$u zKCHG@OfWdm@6AL@K3CzcpH=bMz4Hf-=U3nJ*aL+7Y$IRz!j^~S-Q9)Pz$#wGT$|cP zg_*Bc+c9%!*Hgh3&7w~&cd6+}gh3#W)s^EU8brZnaU%jms%9KDTL5Eyae!e$hdOo5 zszcjM18Ln&ch3UoCTR7E_o;?xB7jh;Arxv|#kyjl3K^4z;?cLlHG}cyy@yxp$(Ikl zoS!(l;N4drd-;W(E3<>6cf@AfJVjpzVT2nj4EX;yWSc0}4I+wmkBCr&&bJBcTbV~>;AK@k0LxQ4^k-P``{*Wdos(4NfX_tOk2!jZw2I)oe$gMc8%oCSiA zCR&!jEQk%6FhevqgpOsfH!V9%m_fe^=W*P{6#;4;0QOj0i|s8F2^a%8q}5bJ}T z_O0l~W(R73{P8C*Uj>2%P1r2Mi;VP_SH{=yih)AQNJlLSrtsX(irj%=RO zayWKUxn5u?c;>Vk@rD@B9F1|o6F9&NEzgR93=9H7J!V6XfT#?FRCNWIat8zsF>i-v zJWOH)8d%O-Jb+!jPDnT@8bH`oB(-L_Vz#oj5jj9vNvH|a>;K@vfA{SUzzoL)DA(vo zftNql3>>wNIU~S2PD{iYwiac1HTbvce`&pN=Lg9t9!YG{e4N!q>$!p&qpN8-i-rdu ztHM_~ZpsRC7P1n_Zek8tHtZZWzWg8n z_u_vIPWjD3RR= zql1Gvl++3i@;K=(Fy)DM9Kblj6v7&Lj1)l{c^~;*DhlA-VOEBNJp+9a;l3t~b(Sto z12bEc=D^ji?^wZBbwBd`b*8T@*oN3SQEc-D45`>^)$@n(+|Ctw&^njJ)&S)9eEi0Z zE_Q4*30+77n5Mn^JlgHenQS&Y02X`rvGx1DY1Dqn?8!tip)l>QQoxLn~sM0QrZKW2S z)5?NptwiNMgXvUdlo|{sD{L4^%mWZYG#J6gS;D!(;1mT50*ufIJjUK5&s=Q@^O#y? zsX}Iimt2N3nN#K)w4BvgafB!(E}^hLqkzr=NU>AfPB<;)HRC zD^H9=T476^Lt$Ydd+FZ!>go!)FyEXLIh%Fz{Yn)f z8!WfOD~+L9E5{^Q+Z@JWZKGgl8|n3Iu1Bx(_X_QDQ_OL*%8Y=?M9_EHF^ND60T->GU{$f&cUpBp^ z)vSf4`84VAY^*4khD9(MCrK3Kbw?U%eN-E&Dh{4zIp?6JhKEF@7#kc2p#8&WDX6kN znMB$dX=bt=%kgNf5t8~wD@N&SfB$3O`bJ1}hGS=pikp%}-OmV)5ka0f;sTaU>p{Pt zjwhM6ruzKd+rM7FSA6Gktzmg_uDdkfirWXT)h{+%9CJe8#iwF<=S84PSL+UFfQRp~ z47qAzymi>t;7Oz1aao}q#jPm~gG9?hE8IJ^dPHpeBj1hqLhCxlhM3$?PE}^L<-jDh z(p##FJj*M<9E>TaAV?SsX)#S?z!-oc9soOV;0XXwW{|fCh+?3qAq;!Nv9aKlM;6$4 z`!ua-l{k$3;gp2U#8cuyyINs>m=Zu|0i~aND94B)1OU=&0ywfBd4oAH%?vPIaRRu; z1}WqUPqW=}xs%Rz*NZ#x{I!P@v-j`G7g~$ij~^Qh!)t5DqJRO&mu6@BCq}`fIv6h9 zH{F?AJ5n8*<3rVrt2&UzYX%e)1_TvBVMUw?N<^K_+p6zDIZGUu9<)(po zbF_|JLud+sku@G+0~id^%y?$i_Wl{PHR>&t9aAms+&c6{RI7GfxAM^$z2f`$?u-o? zo|bhCDe0)yoVznj6@|)^ZIdlbBY;5MntgFks4qAT<4651<~$3<#QGVt8!=Z3zffdy6I@2;K+ON|A>EUGbDPHw?k7=+2u^!^)U~UP-JLf@aubn>1?DZ z#*|7C;HSj&^KedoYl9a2qt76;(uc?)O8P68Qk*n9W_kB|`|7i}xFQXAZ(+8^P%x9)4zyd_a0XiJFtYoE9Cm=Z+q1f|mmV+8lQ7 zcYE!l@%wx^$g6ekG^R*PW58ib3XVZh>jR`5!L}IFK^%85SEg3N?p}{Z_VNAMv^uT( zJ+)pnWL%4~ojb0Hb3ButD^wRDfg;ajIo>)Qwnp`7Wp3W4!xA36??Q#@jU-x_w2CUd zx4TPA5}-X<9UYk?`Ib;-ezb;m)5Wk5iZ>2MTk`HptrIu-WP|NoZIT*5tLSr$pAJE~ zU@A{ABrnxZ+x;zwmPf1w|s3pP9Pifao zGl!*9JRtk7<$}C8)v9@ElgeNNJav9rGy0;RYZ%66cu?t6dz*T4U@7~!-y>@JF+^uw#lN!h6HP-VPlTpoKQe#0i|DjY$P3G&T#Az6$XsJfQhyk zQ?I4NwF`^~m1{n^IU%HRZ*vWXXqgsc< zE+0=N_hNiJtiUY2Pv?(-?LmOSvkD^sOF7L5=t!|rC(Z#lx2fEmUn(F#+tw#rPP?ITOKiS@g__Jo2GbfLP}Czs({ac&-s1}!FmCo1-f2B}^PR(ZQJ*{U>HY^s znF*)5(X7U-$25gdvr+pnTHU8{b(?e+x9Y~7^4@r$wIz%7MgP(2jWh9u*`4l%y-p3F z0pPinO9w&&lg?opl+%O{@9qTSA1|)Q2l|+Ig?6}bDc~XuQIFr@rw9~dWGhuVJh0Gg z0l2KeEjTP)Zi!D&H7;lqhEofqn=}%#OS{1M%sESN6+3I)L_l3EW8>T+~qdQZsA`symrA8SZ^b!trg|rPqBn6L>azpf)m8*Xn0Q_jPU+kw91e`e6?3I|F$qKjAZ)GL{T5Z!a`)yoY(4Q{@?e!=PPlM;2jz`RFOQa{Y8q-1aSg zVLzt)?N6QfN$LnG`P_|_aV3JFGO*m9nxaM0xuU(UZr^Sld1nz#<_e!0OvWddMsIyO zxq6sv7W?hDr=5&J&Otr>zT(l1>ST4+N##OSnltg5~fj216IW-O{um_I#8q{;SWFmx8D2hkXTjG zXS&y~R;>jpn+;hLgE<92@Ml|3KK0W(E_>)9-|K}js!@F@_=tRa`oe(M$Nww%(Qor- zAJ%u5s#%w2C~Iw)DEb8a9Qe&|bn z_sTRMX+0@8mcmr5F{@DJBM2GsdZx=+sjzd{l&XfX(G&!fp>Yxm2Y?)P%py%SVida) z5eFCySWLibB*HK+OxY{j>vQF6Vh|Ea3?NLEt`Gv2)l1e^H|CFDd!zN_x-^Vbh}R5j zI@>)^1khPP(=R+)Vo!;8h)^vpP>+4A3`Y)=(u7c3>H!6n7P(~vmDU_itl%3wc>Nx8 z?D+2WORd3)51Ah)WG0_1Nv9!9{Z97O4QKKuOY=c>BJOuOl`qwtGxw`(Ino{)cubJT z;HWdaCgn~*m|x6q11a5{wiP5nu@U%e*P!u{G>xN)Ja&-B9k)X12w}WhiO_g($eYww z+1wDjKW#or2NypYv72Fjq?W}#*=yqDI-j52US^@1unHx8yn^Q2ZaBzRB7)n^o0r!r zdVD)x>Q5s^(Cly_e&hI=59DOLQ?he;vt$}T)ersL^{Wl)=BR2?OE^?wlpNg;&~_IY zn!=$08hDi0VW&YPOn5xaXDP8o2EsselqH zYe?HWH~z_8|N7O0>VgW~tPg0biIaKcBtw`{qKx^|-De+rV-Kdk`ni3waS&HJKT+~e z`nU2I`a+%j`|dCP@7AMe9#72d?N%a@?>s+$zgOQLvNdxDR4q88V0!f_;0q4QMb63?WS>^DwBka6=_ki$Pvo0G%l~ptjsVq&e+^Q62%NK01=6l zq4nM%j|m{4m7!Eg?5yGrfEtXzR));CM0DT}eG$s19-(*saGFqKXyCoq%+{FLXb(ER zzOg#@^7e-xrwA0%rQ`sF0PuJAIiRzEreAn=NHD{S5={|dB{!Zk=Cz^1I_Ef4z_e$^ zaYGAD{TO#W9^9Jjudja3{hBCVy?NKI`;TqDz6>9%tQL#IbP&; zZW^n`WVSq6>-HDt8Mnx=nmA022S#dQQ6Hqr8Zi05HS{rLc6{6`GgXvzrKc?dR9nAN zY;@ajnherL-l3fiDyT;lFETW)S}cc)qLp8ptSY(lNI0O&knRuQ8Tw|nY;SfuIU}3h zxWIBXWH5Ax5Jn5lE2rSiR=Ff5d2mbwt$bFc2XxptbAGY7b^!UM6cPp+AU*TRy?219 zUX(Oo3{7TeT72d`+qkyVGOK}9;Joqq$WUg1v5{QhG&l$n<);dW%MN->gJjgC3fizy zk(hIbF=Szndz3-!3{s5KenavC*JI3akrxosYNiN8an#`Xo#hAuGHy^qgfbMu$*fbC z`S7?j^{fUEWrZnr)L?2{H*AnTd+)LTdh@rOgraD>+*Ps}u^{Sk#)zY;o(<;b{%G>l zLqE;z*4KU{o@_7L001BWNklFX&FwWe>a4|S*0UZ&ch z%flmLx^8^$*1kQ_ePd-^<&kEzRMVREk1Cbz?X)&-L?7PTU>ZQpe>y*(C(;?rp^&XW z5Ow&=v&-GtHOWd0OjoUl3u9vhWkHRtbXgg`5Xv~!Ormk-r!*iytjs~?w~lkk^hlXD zsT7A9O$0|nx70d}1p-A338&UN%e*ahPGglhg*gBhb`&mgF1CUeVRW+@?!#bVsq#>H znQG&71dwG$2B4ji$(4Wb;s56^+J=p)1#K!7w-QL)8Z~8Jqr?zG{~<~vUt~H%LIN#ZEZ|0rz9*i2EhZXovtcFcoR9GsWqd_IN;6|MPWs! z!MH&h^G3)+0tOsr3V>QqoTJ7~wg7Y%(DWCdo;kvt z6@aV>ETM)uPK>BM0?m;p8d--twMOPB$Or7wk)_SjP6B@g-+bn|S9ezOUweY__8Cs2;#am2~Uw2gylhhxOSd4lw$G)Y*D9mW{iPP%k~ zGggip6nMomik$<6wXG6jbw1;ftqM%3#7wYF;FUD_+7dD(?U))f4Fzzhq{1{G9ec0$ z-xdGoRIs9=7>rV#MuK9xdi5H|OoTz4oecjld}#gkjAVc3nQQ<0QuHKiq2)WvPmi~^ zUP?vv%)qHjCUQ>U+4@-E5X;mi-=*r0uM_o~lCB6}%)&CNH zW_o#Nn>W~d(?D{>h6Lv=C9qC`JxrhrwW>4YigT zf~ct&#Wg_^$jk}{tZ;Oeh~DCIH)!IoZxVx;{KP46c=&rmeB_WtIkTSBh6zNgi>u-H zRzJ(kox|sztlxY4>`yP(Ics;@S7ShD0i~b&$V6Bj8Ul!b7sNO#08mgHYGS8+ zEszICJU|JAXUe1_*Wpx~h`)QOT0C<#iuNCVc?(Xg9h{uqeC8T%Z|ru2U0J*3**#Zw z8~2IZX{$XOBiDw8k;M1Yx%&UanIAD-{v>Gtw7>5cw(GH#yx4fUu0e&r15DFqiVN(`)Z z>B9|l<28VlNe+?~nT9z+fsXL z{p1U;G(9>CDE-0{Gse8-4%tXkg#%=W15cO+ON|dmhOxt7!Gj`>C>5DJ30&oC4e_6U z`{9pYBnNV`xv-cWc?iFFQURO;ldR2pNA`fCE!({BMg?o}{+(4^iIY&J<#FFZggasZ zA?E98SoBps2=*2{*#P5deD|c%ooZ}~-kjPvXeJ|HFU6&HqKcxzP}Z~4JZKniDL7@V zp{mo$f+)S1vZwhg)qEi|rTdlc>H68%!p)l0_>mjql}4bkJT(U&lp?-xx7>@T+F*5N zBUu=yAXt=+tfkZPZcz0lXWxuy$us~cnE&wHgDL@ zc?D9}OI+ZJdC)T*Ri3C?8M-9HP;V(>OL91Fs&{*jN6s>&pUWR}A%{nZO z#S_Dp)#0#&fZ;SnH6cJ6{WLzj?=dFN0s4_Xxgqz9lvzVJiU!6{PLlEp_}` zNB@%4Z$9+J_s;1u3jW3mo2DPRUm1NTo*hy^X8}#V^zejnU&i1(2IM8JBy!G6ptUoI zRZ)!;W(Fa{wf4wTkC@Jh(7+E4mg?UR@7X$Yz1n<$PoZ7V$FJy1?Iefh@}L*n&JCyi zuf32?&Mdqunbz!7WG*+G(S0>0mRdmAT5PJr&b!?k(|8;fx>MybiK{kpZdP-3N>6rz z`FD?Tb_kT}IL5O?`PjN4EL5IC+mxl_x?<1LxU^SEU6t~&eW`+GAN6ZoqYY5fAft$NE|LzODUSEUQ1&7;QG~zyPY7S z9tv}x{pKU@tpr1n|C1A&|NEtdC;Xf~d2Q|K>b?H=3zmNISL`2uc~|lwN0vZscucnTKJ=~V*Le2kXP)};4V~9(KBC4VZh&BIsm8=r(t=@V%S0ubBbHWqH~*6- z-_Z2@k%tFVnO6Cp{F>X;+ zo1iLNNpEa|-Rv%Vd(@&wZ}&TOxFH4$j}+VC@L=(ny0SI5x@S^imzvFCc;EKTM+!hk z>`&YGkA8f5)~N!WlS%_5We%%#{r1v@VVnWe0HFBNku#gu833nL6*dBAaGm_>diev^ z+`H;{L<*Kvyy1zifLNn+RRI`|fv&`06b{Ng#U*5_$22Z{gji{X#Z_JKHjFEv6hk2C z(BY6U8)M>uV!+NJX923(a?6WJ#UoZ!jv5(>th*`KYtbC!Jzsj0;2LQL8AjF$=WyO0 zZ2i-dzjL9bT^@3!OhxD7K=O~%^Mf3jLY3ake{BB`?RFbj9? zKW?vXz2s2!tDh;p@#m+G2hBQ@@4vesTJ@{X2F(wC1h6uhu)I#XV3$v8NBEgv*u8dG z>>KJ(!9s2VoSCv@2tpJy`#9`TqBNV z9EepoDhP7kQK&El&8Guf%(wO-6g%7aps{uBNG3jb`e9P-&h2kbOsAz$RI5#qVpmI$h<7G9fuv~lv(H| z`R!Rqo#T{>dKk4iSjz#BwH!NBv%u!Ll94V9NSAOD$pgJ8m{+;r7F*&9N03gigTXZ5 zWR^Lc*l-3xYMN4_ak(Iv%yNz}Eg<#=B`<#T?_B&{#*}i2233LVM3k-m(#`kZ9Y}}* z%6NG<`rYL<4>Ml><0E%|`~2Dy(6)2453Q9~_P(t``|BSa|Iv#lkKh(HYE;-Nxqk4G zs{P|jaJ+FI803*yJoV5;9r|d|C`3k+ka!~zSG83JXw033(iF4WU{0JL6$OZ>5mE1e zg<+c-psp6)5QGS>fEW&-JVhQXmnlIx0bI7V@zU3kw_%B~t%*;g+0TDmAaD*@@b0_A znboxYS3}Lf!`IPn9n2Uuid!nq&y_#=^)vaaKX;N9FP3FT#?^2Bc*|`|{!5$hke!SI zItys}#b+{VuoH$NZ>htcQcsZQ1xS{Ip#V@Y44PV2b7vh>(<-cpAcqHeH`wm4Z(mR+ zw$^@*#apl34#M6vJQgBmuLl9T@P)_DT|#Xlu9NPRbPi}TzPnDuK0C5T9Lac45(E~Z zdh3b`4|b39d^Bz~@@No{HkSQJXpy$*tI2BI*i7$Y*LsSSKqE3G!cJW~8mzX*DimmA zud!UTdibU8!{^hk2WG6Nu|M+GxS$K@3Xz;)$02VA0V@aX2Da8CEX@SP9%VD$ z_Id88p%4Tjoph2=q?3j;4i_x)Tum84aHyNu$!d=Wq{`a>k~N4FkX z81LNqVI|a8em?)bbI;tru*QCJ(i%iUDt{qKPpc*FE~fT<#7sj z8c11E(RbKWCOP0LE3#T)0bZ#|VZ$g41B1Xel29^Bkv7z^S{RQ#v0#BP2L^~!juHqP zFs&U@qGFyp2#jVPdkkEge4v<(SWMw^C7a({A%bhc=Y>&CoZyn49YVncBac; zfA%?e`L1XF;<51g%8D>SUwvVqwn+5lTW^w`i~>3fDE-_MH7juB06Y*+kw?Br5F%-a zX{{w_=eae&vXDdZ4J}GsA8g^}zM9behF`C5-tUnB!b*9+rxs~+` zvjueY>Xju?lk_-aWzp$hbmhc?niCvZ5jiB1K}OdEg@y0fG0%?+!;v&o{ig$h@tI7 zcw9N^04HPWJh2J@ySx^ZyR@KKstn)v)?NQ-|6gJTldL6ld*!bCP-4~x5955g`LadzZ~Pn}1mM&Ccgy}Dk&_>M67w5B zq2-6Z^2Yf~5FMk{$||3wOj#-=_Z1Wd0AzqDQ+1h+F$e}#k)|M;QZi#P3ga%P9(hNN zk(xPc0&BSS4m;u*S2fi*3Hb1!WuQ@cKZm!tC@6$bFoK+O!;h?fEqLs6fC0Sm7-rv@ zWh{4dUx}QhV4q()`X_9RSxr1LyDQ)4Uy-kgPn|pXeE3bRA#D1uyfi3wg7AyC&ckj_ z0i6Xj{iP>Ua2R2xL4pQR1L3$3ESJs_%>Zl102DIr9&66TxmQ+}uH(BuSZ?ht4Gv^U7CfoZ;*oJXq)KqXD^Cd? zY`QzQL>VdO!eKIAn5~`KzWnj*gTvn1{{8my)E+&}KZtiiUvOoK2^w|bY8REzF12qq z_Y*Vh=s*PNwWPA5H#eNNgK=pJws-I8-8Z;wSX1vk;AGE@vc0&Y=^FiDIv;2I>t$5z z-zyHpLNJ(WT;`-rr|rXK^wP@J&H1`y8WaFO0`#ulxhxjm%nE`Tr=0lh)xXX`UOg9G zyvBeU0MNjI65A3Q7F&$!GGTU7Fb&jcQujM17C?~UV1ZN;oG(&z`slx&v;(3mI z&Rfni?;S>vw~WE*B=Ih*C>6G%QJpnK0CvCCWMykdEg1wA5*9ol49FYooG`U}@4x@~ zfBRB{_~Hg##Zd8PgHrt18J$(#+N9=&wo-4*X9aW4~V)K)nYtV>d7U z=IWI@f8|_Semwr69`hhVf9-{lz8!{Ne*Yc3lTkot0i~aQHUkD=rE|Ij!6 zGB$DwG!Trf2Ic^Pk*M@lHnK}0K3GIK#yV{L#s77taU~vgTF1{UP4-4EjaEEk7pfUra^8Onn$WTba8T+ zA8ACv#UX?B=1wDZ%3+PDatTAJnT4JO3)MhlB;v!Y7i7K|H4~BsTFFSJ{h+E;7~vFp znJwi!Y@fu9{i$m5s$OBYTZauY?B&5g&S_^j6}YNuIWf{S^U$1Mdo}CMK!XCPJ8NK$ zp13&dARpKaf{6Mp__#l4UW2mdhLkhh-eWPzy7h4E|A= zFaR=A2i|)IK)mrTAf@m%q23Y=+(V6!R|1$WY8*4zanYnYiAFFQ$S$DYf%z=wj}JGN zA3HWa2&(kp_#!$sEfK!5y)PvqzFGINY3L+$vL?~ml9QwUYRB^2idmed?+y8mh``aI z!LXocM@+}k=8bll zB%U1R3-ZvQ`Qw*&A8qU?3Y8^uHsb|Xz27eHgt- z#e`{40QLL=Rd=o2y4vUxpd-8_taAmp;iv91Fe0_(#Hmb#WR^MXXvnCYzzX+4F`6e10kRdMzFli% zDoQ+*6Wr7NOp6$2E*4W0Ml2Y*Bg|9t{lESB?|rlFz!vb}nU^7EZu+^cY(znOlz4@FM zw_i7~bXWo?XaZ@jEvG}p8&-10IEFG!vjTH2tJzE$k;F06RmH=&(WC|eq8h*vM47h5{g|Js^sWCqxnNh%o^#nI^vC%$Ci8i-fz3+mteWFpI?8j%`&g z8ce>8PLC$ZvuQSNE+w15I(c8TK9VVN(WS3{DBNE=v_5AQgh-eG!sTkNQ|DVx3mue` z*%(+`HAC`)OlKM z`v=EWquyAwP+>i5+G;HgeLkSL<#J29Q-|}TnZgt*N6QG?qPe)WoR5(dn;i)g+;q&A zrVKy@HMeq;jxGgIGSCX3UYd9GL*e1O#hi3FWyb`g3T7$8|FZ~|qeWP;O*a^IJYc zsDK?&Y``Fjab0`Ha~WSQlglJ$XMC4jJkB^=63>{}$=NX^rkqVN#(*e7NT7nmu2!p+ zv|4T7ee3^y|LxbF{+x{G?(=DPS1<@S%OOha0gxEkg!py}GHDg(4RDdudNh|3s_Tvn zSS1uiF|myAM8yd8mBP6!r;cNcLpZcH`=tD%tzHbfDO*{bopcL5EYgSfHS*Nuji9Y7 zRn$lBKlZis*YtA{q5qJa`%kB@-Jd1=O|Rc}-T1<*-=~57lbg2x_}pDdpV>Np{I#1m z%v+3j>KtU*wgo6f7s#T{a@iHYfSO=tWfdeBp`02`J2K6nc95mAmoWt4_rr2?riW{;32uvZfB`tP_S&ys*m~CsvKgD-ntdOpG?(b(-)O3hr1RmE zuiDLq06Ge2`oXtM2p}A23!vZ;i<+b0;K^wwVtNUrVeWd1%^waZ`p*l#kg3! z7!uw~+s~Z7HV)r{=l#}o=bQP|ugju6`;G&bCVhP6H18u+R|Tu7me|9d#k1G1>||Mf zc9G?LGE{O0@6<_2nlYs&6e?c7xIWZ5Yg@K=cQS}=zC{?v3r!4xkf{)eO}Ppz_*=Q! zXz854>;_@)1uH}35vJPZ?d@BeHBO+$OO@6%t`;v`x^WqsWXNS@8E%mb*H3oM4I7P6 z-0N$`*dY z0B8UZpaF!2HwgH98~_+tW&u|Bcmz1Gn4k!C3rW&W6~%xtPiIxH3mn%evkZ>~L}Cb7 zZ>@mDG_#OXfIyu}b3xkiN+u+JJzE?Fa3~_-9dk66IZ~mnSC9S7|MJXda)NZ7$7vDD zv@cS8(}Mx$Ph5SjWT{9=PU<_(e69Nlcg#@y@3UwBuhG5x)6`#k`PRKV=TALH5&oZV z+WgdoTNup7>;M2D07*naRODQ)FLyUh^6gt!VD_knMYYQ;lp>?znm9Sqpr2|s_xd5fq(m(5eA46-hkbI z-vQbV4h`#C>}D)FVu3|FK2pwqG!lq_JVY9=d_%WV#cPgSs&`v2IBO<#AMrp!3~QHu z^TaJnPo!5d|M2u_Q>7_SKloK&ZSed9XI{3OH3f7O(DZ}%Gyx+YC{ThiRtPBafB+D0 zyg*=dLIZ(uxp;Y};XRI^Sz6rXa~5B|Fxc;}%%%*zc7EP;?kc%V@u62Q%;oJe8J^4z z_YVv&oS(8mV+FBHhXS`^wO>}Z%xy+GNnyCsLuUzF8>xxNt=;LTu`NXcSf@5~!IV9U zF*P1)$p~=HTa2Ag9B4A7QIQ*CYg!d#+U zeUnrcP(MtZZw`Y$>|1a`WzNp9`&S zT>uH8DH|fX9SO!co(@uxqNoIN5m85pgAg4HfCFdTnrJZ)uK`&B3$gNvpZ?Od5kW*D zB0$J~b_ZCMZ7m;OyVA9s5e*QKrfK!l_W=$NafCNq{kl=S$=cM5sSd~!g0r9#2-x_I zwcj{-$DVJ@pTXjzvnOoH(=7e?mu26yXXBy$;#EVlXM2nV8GOp=t)wR6T>+k)0$79z9#D>u-4_^*CQx*fak6T~b8t z&aa;HFuZYhvw$~fVAvv6EbMn@ZXUY9{FHfIIOU12U#p=JjtG?2-gBWdgZsWjRnwyg zNvC5iqhKx*RboZ!XpFYOu`TvEn>G_F{LUWdF|7$GLsM5f>M2d23G;3Vz;Iy&Xf{gwxMq10cJW*eWJmNZ zqSk=n$Z$t_S#zGWO@lFafRnJx7ZL)~9=FtuWu_QwC|0;l=NrvNo3e*mvgHeCXbKhd zBqtkbm(t?fcm1oE|7*gLne{>wf-4p%%{YC}o*@73(z;Rxb17JM+llWCK0bWaK=+gM zg-;Fd?2BHw_T+sBHg=Ca4~YM>gIE9L%H4dCS4y6SSHgXbJ9dc(xeb<$U_fDIxvH=W zm{L&MXfHalV8%F}casjrEFj~k2tFd*P>xXy7DzyX>gd2ZoO;6`@LO+Vk6olb#*Z9^ zH^c}y0`i~PA}AIbJ3kXBWU`6TV%5Ky9@!HS0HA2p?)La(r$TCeIw_iHB=I>W)*&W> zbzyz|=u3C>pYFd7;WsAVg&D~^@{uq5$(rbV{P`E!sY8H{0xIA8))|h}N>8ytkrD26 zAdwP?5ees9Gj$p5k###ULh@5=!5_rBd+xP#4n(oOerTzC`RtjN9PVwFiz#*cI%Iv; z>A?9=#T#E5cS!ewWVTMQCuBwvn;u?#`N;MjyojES{LpT{*cf89tw<1HJ$11QGb#hO z4TToDj3dl#s0r^xy4|HyA?!?|P6^mnqDO=E!UtO66gh}0(lSAfw;&yHOoHpG+A_wd zZO&cO2W^2`OjQU;R3hJ!f|ZD+Mi*pe8<$F&nuNBm0~}-TxpItIFrW|i4ZnV)yXWPz zg^~NDM*<`x9^Bf4HdCng>3@H zhRT%D-DaKzrkJQQtON-FBPj#8Es!}(6o(*1!qHZ>N~ln#>mtCS)mW~f)IiiKgLWKb z!e(PeIdx_ad-|U}^o6Iom`+{pb(rz_f@kbd`M|yaUw|K_IY*YYv;KWQe0=p6FP{eA z`$Ks7KaOsn>vDYUv3DG}GJWbLZuq}AaP{Bsycw;aD)^V#vv(}O^ofRa)r^80iLRq0 zWrBkVL6tzdSHv@8i#-hwtzecC0KxHWy_rom{V*q^@=rno8P9>za46w zl8>*z8v;TRMP&bcGeJIo>!Gi{exw?4$yy-F#pU;|2EY(!Ds1geug)-H*x+^&Nta{l zJX0hBB_YP(cP{+;>D%&S*+q|kefmUQraaF+`c*T!$U7f;t-SBskM$xt3aEVd-L-I- z0QOyHi6_xv7OTV}7L?EHYOGwgecgfGp^vrblN00I3bI^Gx0c#gcFE){4i0VbL5W;( zIE-(rTYQDD)M6%PWF7`jv=L)gb*%>!Rf7Gz?2$`zN4P%;;sB&qH>yz9K4BtwNR-9G z0-?w>D0fnl;Rc zkHYphM>V44A14taLI4l})t??Eo`eq_{-Y+HTc=iNM16>>AIP;;v*DGxEgRnTwyYMx zM}%-Rga#i0WDL}@SfSm^zjgd>aUwf|#mB1esfK1r{_|gTli8u0?xqO+(UXISjshy* z_vX?@i35oYqkt?B$3w|sEOgXXpjnU2*@NDqD7D^%}Xv4S_XwQq?qMU_tlyCmv-~L-Lo4U z4DkKf+H&ER#o@_$NqjT%jB@HQuXf*ce~pXs&(BRcEV6(t{DcS4;2qSU0fqpu00JPR z#Uh=x3ls_E1|~3o0Nj);V@8c}7g`q}1fC|)PL?~KhiM1Jh#?@vdFc zlx+(Q)^q`!g~avqG~?dpTdpWN9nMr>;8Bc}E5fX|8TLTp8<-B*I{_Goh!%pux!29U z`OCkJ(da8PPn`JGP6UVu08m8gFR!G5@b@45?05HGQ)`WBL{4{Cj<}85jZp(6v#lvH zCux&Fj3F=&!7&stFpsgfus!^>AHB`Lj?Q!Ri}iO@BWbeyzGL&teGOoWBEhiM2U7X2=@ElRLyEb^QQ<b3$n3|@*06t_8MC}U=VWjd@H4R8nHoOv1`YJmVlzz7i+sS7+fB;C|PKk9@B8bbu2Hc*?w*zC*S zJBVoQ(P5;1{$p7H1OcN6fU3`qB6sP}A2@O1(CLXUT#OLflsb>GhlI_v^H${85*xYm zgnC{B5CG1NA;dvs7k7W_nfvS+b}sT?U4LAU`uhq0Wv{lboGst}i&F@%sejv`zNCm8p(3(o9aO;fWr9Hnyxc4tu&>LqB#os!l_ zG>NH6g%&f~X+TnF0=Q;THw8@tf$f3kqICfb2Low?g=+*LA42M_ujkPWGtfQ{jkGLs zLQftE9$dgdM4hyhMGL;MS-{>?5hhwNsp6tkL|!rE*_=)QzyRod^RqkNI@>dQnf%x- zu!z7iDJi5uSIzrYG++P1+-=u} z7rp|lB+$;_TStHWn_udNsN24`+Jwapqx8-P6i`pXk1qTu5+Vyu1i$gi1%2u5Bjfk{ z>v;S>?!09==W*`B2fLSsk6pxe{$F)3f4Y0m-bD$Y{f;>E&PYzZ2%@(G8f;nQ0JNq8 zGR9>(mq22HdSHkd9)kCo_k=r&ouZtSG4APRFg@RoiclZ1g%~h~x>L{Ydp|^U^;;V} z;=w04ydfe40r2>|gUH@x}U)uk1{QVec<}a}%v;0_Z59@_{>1?Yq*- zXdWQ|8>rCT)k>)}>HgZ;a&QkeKcLU_C;!DN0ipJ-~>8f)oR8Nr7p* zPjuyIh9?#9pqT{}O^5Grq%S zp%h$VU2d$E#t|?a6@@h-Ce+8N@U_5T0CehA%(DsP4Nh9xSe8WNHA;+6A~;~_$?H|{ zAW$u&vIVLzxC#V`Ew5Qe8Q?}2p(njBUS~U}7F`7ZhC_L^+2wEcD(|&D4fLsGqAz4fCIb%Teu1u;7GB%TZ%*d2B!fF4GDlNA(y%~ znBoSfiQnqZ5fQd<3IHL)0XdI2LAzKm>m~tbpaIyJFQ!afHG3r^L)$@?0XLA29c!@j zjsml@V!HVU`~PS?iMlL`(z~kD6-kynJn%i!_L^tLFLq=Mjc&`kZ~2_>Z5(NhI`TW= z*q>g0#{$jS!if(qUYUG-jn0;TPg3{?U-^03s9SBGrLMi;X*KEI(krhn&*@Y!= z@)-rnCz*G_`(XjvNeD7ybe$C?5tt%I&Uatio}1Mi-eBO6(EA?RK6Y&qPtBxjAf?7| zj%WEi*}C^$m%sAm%?gqQ9BNH9p`_3ZJn;}b3$S6dfW_ z?+tnjy(_$r*u`vG{mKvi5wh189N@y~_01_PUh}TM#N#pF|1;m+vYQPBbQGYy^Tq^` zmiWZB`(~{5guOHl87o79^(-Psyw%bNz}6UgI9;F>8@qB$`(~zCcdP^EjWL4)&q~Ra zXIK|0LyAa6r+_$$+Tc8DjU08{QEn146`nxIMGeJrXQJy_iz29`rVu3nb((mrzWv($`}vPv`WRii{OFXF%YP6lyrJDa_i49zzsIk>NJOzkJh)MjgC|;u zrnq)M5s8dDf(XT6u@B53?n>w%Ie;eBdbNC3dUpNKq3~BPt|mj(Vb)Vc#Ml>gb(rk? zSsKylzYtwdmp_UlK#Vv70<`uSicKi`wzTb`&2mm*;+nvPkA!*>Fd$8FO_^(pdIrOd z%FAqNAxk*I-RF~4TR*X|Qhnn4kK7D?=lJ^j9(#_ObP4Tw= mmhlYJG*LFQ$R-n z%z;A(h*DM3fi#jWHNcm{cAZO|HIcQ}duHVT5aPYjs zQ{VNJiy)2078&FX4mqMStAJ3KvXLhqvhKu0)5k+YNg9+FEp;IHrNueFODzpi@>&pA zvh0;30U!d9G8irmLeBJDDWFvfCq22{QXn+-sa3T+Mn!Fz1u%MXCD@NIzgXQc-5sA= zVwNP1Am2-=rQzhtha!LVtJ@VME1(r%Ttn>4ZeHJC@7Zg{2Nd=+^roAYjlNK7Dm7no@&B^r5*gzkKVh z+huHS{$DNg1peNMQ@?;uKl==d!yA5gr?%%Wk0!g->RR`91CMXe9-e}44P?{_(3$f~ z*GYy}%$u@?C?^1>KGyysm3Phb0SnH_p5@j7uPvjJ(_zwLem z5uNSfEpt^~~{;{oY`C zQ99{8#XfyLIcWV=aPh-W|D5~rXxAmbef*_Zb~1P9q0i=hqQNoJ~OVMzzDi8x|bQ>r~#^`jR#B_)77IUa_WDUn6fMpaS_#j?B1SGnM zoUn!%pN1+OsEMn=(86YO!y%>2XDJp}N&QMrYs;(w&;?YHKw&+%yZ$3)tnz$4Cc5 zYh5FWuXyLj|Jwt9dScGO6vCVIxh9`au@pbM5|GajCYN8i@I;qoNhj^nH2m2pN|Ic4?d3`nf^al)(hbzZf2!*fN8PvgSX6u;p1A&mkofcc_M0|A#?QLJ|K)z zz#gN>UD{|c{N{9$`b*{BrIkI){N%AsZv9&x@)-U2Z%23B{`C$CzRnYELCppqxE^A} z&wMc*#QOS2Vgv*TB0!v+{IL>bQox8}R77kDb&Zi(@R$V~iKPw^WSys2;O9=fZ!ZY% zBVcA@kSE1K8$4X`AN$@%PM*!z=+cLuJL@NTD)+zni~WsZdf@(V*5y`9038J|2ltdo z2GzAX^s-l~JZ3ZP8^gquA`6I9<$zUDDrwt@6Bbyvvx%HTRyqwT5K#e(F@ms)hgx8h zqG%|wh;c#vPErFUkwg;|50oHhix>%Zbp|?QEh)S(Gww@f6N01#;t8~lGZfGkWYmEW zzDAsyMrBE`zz8=oD}#up4Oy31YcLSh2G+XYJO0VNnJ3a>QWKT+Ldyb^&MV>MnN^2V zXMKzaV#^@`YecA8#!R)xXR~#lN-Kz%Kb2p(u;{b}Fu={VT&;e*dNn)M&+!kHl+rX@ zD%dL#PS5T9+Dh@n*QXWq1^_J>-L)jCfYsWqrsD*vtMG=a3&kq2{W(G~o{V|o5pDs~ zP+^{ct^#mb1xiiG6V7+lJOyk}sxkN!xriswjaaXTT-Ykm05nC>Rm-UaxSV$g7hCfX ztPLIoyzs=_A8h>z;}%Mq{M`EKbBl9`$wT+MG^kGG>G@~frNu%JLZd?Tug^X3=HqSH zx#ypH0swLQ$`8NxwtG)sp)Q!ZF__9;ue&t3;m@9Y81v)LLob_p2euq!;AowZrns(8 z5yLDJ={ydV^OT^~g;!fScjU-iK>*j<*3?x{B2oX32>|DU$GS9-4rmZ6pxezR`Fb#=` z*ma1S3CA;0V#^#vq%j&7D0<2j_#yx}hdSB?j$F}j=6zd90E%K_fe{-6b1hh6B(NsK zMQCJY(X@kIB+Q2tX+VU9R*{Z0vT3{AD9^_Ge&ZlYyGnvG0j;M@bML8Y1RzjWFdMv*nJWW;qZups>vBJn zalKllL_PMKAOEu-E_*U_g)ipTh6@?Z**mXkX`n7Aw7fY zx$BWv_?6-AWCGKA&jSbla+^}o>mgOJ zG$#$*^nM88>eY>HWnAP7?TCRBac97s1@Ku!;0#3(Lqv|@P=ew#xy_%H{^ipjp0llj zk`(#EV0UsXJD_RBUl$*I@Ec9k!R1@X*yrt{Pxdh>mzafz27d)rp9~IhyVZ} z07*naR7QY~0=VTtO&2tDT(i)(cDnbfnjkvC^G>x96CFb&(NLhS=81@qvXXsY7TrVYGJMqEx}sgmhl<~@AV#BGCJPi zh8q}kNv*p~8?9IOU7B&?jT=ZA8o3m$Lmqt|@RCVZV&7 zgHh1~M;!FvBRR>Sg4!+wKQ|7!d#hc=ALONc-@cEw;~`_3UziP#})e zTE|}Jo||Jt5kK~5A(6Y|p7R%XH=RhB(5?4DI<72h1&Yq)h(lmnag3P;s3Dr7mlsRB z^}%ODb9((l@+#_di!K2*nsk4dUt>@sUzfk|-H)6e!bLX!(WlP(Nw;fny8TP}`Y5~c z?!VP`y&-^(0=UKg7-hC#fce~3SF>TLM@4RVG@w!+EDit;N9v>$)LGXi!OhrwG@P~66Qeq;6*x)JuIgdS8s^$S zF;Sj)noyVqDr_BgvJMt)o6;z#0dNgz5(rVqh#VrP}sTV?^ETv;~w zeQ1XhJ9v5Z+zX3yJff*c5ofI9*rntH<0PS9wQM|LwO;+=K zz@g280{{v*w9|Cnw8|5ob!7(i##|24))|1L6N>~`Plyi|a$?m8XbKILP=%DsYx&SzvhTuK(Uge(Q^m&Ii^`=7Ou6be_>Z6>q!FX%=GYdS{NMArab- z)$Xi0-`mT4FxIFbNt)(!onF?N%R&I|eT*vgL0HDp|NWWwdUW!X6Suwp*%vRtXpn)U zS`c0Aodb#x3rslfom5$|n>%vvJ0M0B0RaMd{CEYj`i0wVMC7$&>s69c<2Zq`!OKTE z#E1bN{Z7hK&2h8P$91b`E;7j)p@Mb{Bd0j&$Y_jkfHbZlWZjMr1Glz8GP-w*URayG zPp^NtjAlHnqw6j{lkZdcG&#*a{NykGsCF0do)0~F!8B<%zUlTa_Ac&TPbm7Lj&A6U+-}STtbDptY9HWTyM+) z2JRqWVR0K`Z*A=l=RY}32;bzH*R`APV<894!3U~@q$Nk$iDO};Yg5?}ijLspwGJn9 zS+|q+dj;+UY@uUZ5~_@r0p%tLL&W4)_q-|iC$^xs@>Az#&y7TuISNjAH~*#sOZ+)Z z5g`z)ll=DbY&z@ScW;C@L>wcAM_+9S)DQoZ4?s7*Da4k_8w8#hq!0fhydhk?aQT%? z_!Uh+)MbSdMlkX%#%h$LKJkqwM5GZ~8pb}Ad*M=&SG7Psn_pOfGiTBZXWNhMbU*yL zFtsAbF6};3?6X0O*NXQ%^YQ0vbxACK=;=#sS@h~#?)X~g+>GCJ$77?}#3MjQ0ep83 z)RYNkOS(FjP3x(m77OqmKpJE*RhBD`95s=59Z+OV^c*>&B(pk0F<4?FM;@1&@+M{h zxK<1W8nOr`M#o$XL1#iWikk6Fh%@V&#+Z{VDWiW-Np(1A0V?v&Dvu}Cut46Nb3 zg~@2nG-*m{S*ny;scWn0tPh$7O!#>2m0f0mfi(bdgt~oi>z>_R+_SYUCnr;?Fz>)f z0S>YYb@j+i|M%&}*~JbxjR^*cos-MR2SS}!iKvhs)d6@ap!j$+6? zqirjguNzx%Wq<=1ETtRv=cq%I0d+nf1GXF(@s>jyQ_@Z=Jcnx+Fa#cCz*RrR((Pm- z_8xukcfRrEd5_HI67`LudlJaPPa|@Z=WRCvW#y^At#gsDF7d1nX>#$In~J266o&{5 z2TwH;)-i`bRD?l^GG>zR{{F*Ra`AC5I-})$eLNCf0@Oj;z`@_BuUz|5O9%!}9CHU? z)XJv+(|5**P>cuw(6>%EB-r=f;ZejFKC@4{p)>>;s&VtypAEyU-PgB@y-yX1^|`8J zA%d5IITRIU?Mx&nL` zCDfzJrOSwpqtG!?)U})d3f_UGI7VV|2r(;3(K14Vi#qj?x<+6yih&UUuxmrlPicg- z0V0CM#91oJ!W%A12Wf420S%RPfFv?Yur91gWZhC<)QSQ%#0QQ8v78#$l5}ly4Pfnj z&T60nh_{rYnq%w&VB|FN31~7AG!@uHK9{7=O$DaP(hvd-&N=Ag+L@BDBmjlLxk?Aj z2@4_6j;(9rEh|vzE7SF?rjYWT?Bw*J1AjBhz#2U`6b>{m z{QeF9Vc4N)*c3t;)9+D|cHccv38ID~(>0ik8jRAM_~;wSYSr_Rg$Up%XI?4BY$c@k zC^bCP(#FJf{^hBM+vK^|be~N%fHhg>fI3PV^3$u6#m+|b6kCLmA@58d4H)RV?tq9O z!W$5L>$MqT?gKZv0QBOcx5leinMD+E8_^9b7q%Rp>$fj&A1=9-k$K?CGT|aZj>I^j z+zBm0P_i2$3byTRcjgwB7FIY&6j1~?b?i!F9-I8;=JJDInY#qJhzMuT^_PNI#mV`9 z^u*DpO}&OzANlsWF7rWi`wd^p&f0zV++lP4U%owv=qNxGDRDYVZLl*14-!)nk+#CS z%ps&gh>?Ne(kKQ5dulkCZzx(?N*1OEKrAy=|H$5 zag}3(6X)SX`A={D%9FsX(JRXV-*L;{Qx`Mp`?J{#w2NE80>MNAffOCT_pZ;a?^syP z@D#d|)P_@osd9lj3UI}O;ij7w4NF3Rh9%sUEh*%*B|rc=Y3{GG1s1RZ;(=fUn3Bp% ztDBG~u5Ks~IM9Sut|x)%os8q)V-GadCJDeS1MB?+3Z`ZUUb*hyKK`Xt1~(cY(>8KR zFe>i6!(z$0W3*Nf@%i5u_}~;9=So0dZVQrv#dz?&Oyb9s%?H zkz1)709Qko0Du?Z#LupN;_?qK1q#xT;4FWj_%=5&d*>|?0L6$PqWCwLT0)zTU*l0k zmv`>`-uez$1{i6_QA~OUW}~TGpfgNjO*tYK2j*gMk@L2tovMtPl)HRh10XxiJMZgd z48a>>#4+OI$Jg`lo$)88bMN@Z{Q1z4F3Rl3>3*widS>w8|H~A727Pv(_hJ4$_x-eU z&cD57-!0e$799kONKhidCN{A5CQ>XaSxY0uqwyq8oS8(E56MiDNo>jX*qKZuD;e97 zWr-9;0wP#I3Zw`&EDBKADYp|D0b=$HSYB z{Oo!&JFIq}KE3_yNIT8%bL3+wUMSRBI#w7RvY5QaR{ne*Dj{JHXJhM-IO8Pq+7bwbf)V*QE5V%pT*^EvEw^ztn1| z-Sx@Xp^1n5xw?2;uy9#Aiv z(vJJI=G#$@FcFecLK#Y3<Cwhtv{lzJ z#0k92vJ4)Kpea>FX=IUO&bmr*siF{l11Ajxiv_YO$p<^AOboDq44_OUncPVkw@V!2QGL)#qwGEL^5coY+cshFm#`G$B){vuRe24 z77$qgg~FXL3_{}L*98^;!Vg~9<;0ZHcNO_kkuOzP)yBy_6~QVSK_v*eJ)dl*diVH& zy#4HOU0klMN+bOK+WfPx%nkN(|+-VtKa;cwWq_(ilXd~#@C{(%oEqV z>ExI0<-LpU`1|j18`rd=I-tN)$q30d>LW8Wr!01xCoZs42d*lD-Ci+_6b!i zvR8rRZiQ^LTq-WUST!K9b^;6;nUIEDSPg{`95%3|oTQWRBpT(!z{zTtV0=ln&qRP6 zbZ$a#&`FY>l9*=4xi}*Q#CELx~zG^59JW2~S;~Ya)LXaXSfNeJ! zdm@=v>!U6 zVqG^D38hB@4ZabSSD>2qyI@8`l+s68Z^`fe?kE5CQ)A_?j7HIgVKfnTCf|Ns=GrTc z%E1gxDb#Tlmr4;8;Rh2cc#-`$#bIiFh^mNw0`g4bJ=@yqRJmZg@&tc?C z34)S@UOFKxNEYgPq%kEWBa1$sQ(NmEP5s1d>)Hd4ocFWjl^TX>|I@F_0)>!80U+*s zU|BEjQNQ})b#J+I%^7LSkpd3Q;w}i z_8~zjsvBe`ol=S}lh4T1hm@ExTI+|LVaExNhgpk&m-*>cG2edRDn7mHuU0oca=CkO zycUE+4wD)fOf45{&p*Ex)ZyCC4^DP%lvzfNAr1$3i2NCNt=jck!Jywa>v~uwH=gu! z$zbqUqw5b;g7)_g5@)PAaO#ZlCC&XzwL{dk8m6tl^jm^dPXLS)V3*pwocxj7SiXHLIxxm{Q9Ui%iEFY;a2-We9VEkm5Z%g71R3 zCx%dqL5amAsJX0yZn_9k)!wCjpgYDzy&D z6mR4ZL>)+&7Q&=RaB-Mc%ZgIdm8d8fI3RNB`fMPI9$<$|2)@&{9^yLp-H201QzH?2 zjSiJh$eJ)TqPu4K?DVbkpBT!^Wnym`nq(yn7!-OJDf*beNT#kRCyfFihXL7ZAEwz; z>zm2w$`eh~2K=4n@oC0ut~u(S_wAwC&G#AroDwR@QE>C!5B%<*{%{SL@8rWGtm?y~psHWK zZRux-ekxKYwG@F85y}85u~f>D0tMxxT}s1ZVT38XYDyBxDOp0Le*4n9qW;-47$*m$ z-Zzb784cX@m*ekVjYl4OUY4wq7x}>TAN=#jwcz?QM}PuRL=k0m=LN@7Z@v_!hbPIq z7hcj^GKmxuIg?jxY$v&7k#(E1T9p#qAlTG)EkX=4^_Euy-#B}Tr#|WkK)m7Y_)iK@ zsQl=GD+l-QQeQlK>l?qZ@l2Y@0Hq(AUX`OakKXjEpZ`~;%uk=im( zA_;3UbX~AYl*5qQoWMo{*4XShC6#-xfg!HP`nea^Q;(N$Uhm(RpaI!&MeWXk8bw~NbS6Op-ij4v%Dx-VXtgtt#2}?z(4=O$(``0T zo!>z9^!EQ^|1gYmT*U5Z-p}i6MymDqPXeJXtBJ&1YY@96!8vnjM=Pa+@=Toikr-q~ z^kW*BWpA= zry`r?a|eqk_&WmKU%nJsPFB+c%hlpCY*mXQkVKAfC_*Wj$n9U++!9^vL+3-Ir(>z= zRy$8|Z0|UZ{Fl4uDrG_>PfA_?;Zg)%CXfa2H;*h2T=|RXZ=Qa|>%O;kIyANnx_bEF zMoOW1;1#cX^jGg&p1+)4@!s$Bt{ok?^0E3m@&!llIrfd!u-h@j2>@PMN!bWbnH1B? z1eu}@VyR-Br84Md+gcvX?zDE1%*av8Nv?yc^EfGGk6aNGnALtB9$Dar^gyUrvY(EvNNUbC$0wr57@;xMroHj8kh z`Jy8|u2h@kAZ0-k`oJ@YAG!S1MuDhSDzQd%LAe-nL{db0mf~1w@7RYJV;hoIQfir% zK@L{?!A%y^2j}alR|p=^$7TQOoA^+C+{~|8-F_G8`{OL;j_k}Whs2{vHSZ2RaT(QN ziRIw~^N_f38ll%M?Mhl+fY(@Vja5*ADR3GQ_S{w-V>=!p(g2&IUD+y9zc;p~-D8`r z&t&tS8Py6GwoIzTvH(Fr8&?f;p^!qPy7U7e<&dpu2Rl3Tt@r$QfA*~z^U~>~Z~xI# z@xa71;TPT>9H(i_Qejy$Fz6f=6NC$P#mbFj&SGdNkRNKIM7{J?$uMCkl;h?f|H&tm zKK(;B=vcbAyWT`VM(OI?kG}KunSOe5+XTgFm^G_^jDPn+ROQgeH%k!^KpB=7AKr7w z2mNv$tW`+^B~#(t1ruS+s_J$wuY7*h7A~xcsO#~lK_7j{oAan>qTYK9%eyX}*=lmm z)TOcA{1p^X0R)OD`8!VzP5b4^7oUCoFWfUZ6{Qgenw`fEZpGNy$8LSaGoSx|s9mVn zz2~suq0s!fZ+#vOsMW&FlW}oYjoN%%-i(I&Gn%tV8 zG=*ekKiJyoysu;xyF#H5X_{9Uct`_P15{_B6QS1{$UtFH6hsz`97(`ls4A6R69#4V z(27)ot{sF4iJ@W&NqaBQWM}4jF|GnQ%BGZ+POI$Yct~c`7pY%zQuPL)q}4!t^s%aa z_w-w?>nX6JoQ!U2&!+a>pnUX0Mt~x#6%?xgE+J66LIMK>tt-~4Rrh1&dWGQi^6;TZ zw{KKi?&no^UH6LbQ2prkhN9eXf)Wr$Xn%Tgs)amEMD=v1^Al9Ts)2pp)wF*ZfOlK7 ziqNJJ#pPmRtG)Tpwp#};VwW+EAV z02FifKqAR$(P=}y#*_cUZU6J`h|->s2d|qy61J@3@kb92l#NKfhSSP7LoCGLMEz%rcNV_y1*yBl3B8}12pJ{f`a2GOdQ{9$AA6x< zr+heY?C2Zvzde^&T=TIBURKUuSYEm)j=dC88nV@bN(7gZB?q0CG;xA<_Ahs`nyuyG zkbL0amTkx-7kst0VJ~b$r}5~Ht8nov?WNf`P*!PGW9!#M5e0uw0N;GXO?SV#`T1vF zciWxj`Sn-be2snRJEynS5QqBbH{W>n*S|_T`}Eq|?wLDZkCz`G-8FgM%MV`lm1W#+ z8R7&`WCcVcV;e`DP3Zfvw!xL!x6pCycNpur4_i4YFJ(vEQDuT}b2VCp9Hl9(92ex# zj&QwA86akYg8^Kuyr?|1&H^?rsc8dK*B;eNIqEusRQ5^=npA|2#GGUlkzzJ+PGh?$ z98fQd4%KM4uxM|&>Ib70b*n9Tg$|Ei%ynYMZicTF4%)}2?p3#bvqwJ>%bb078N{Hv z$yAZSE_A;jP?rKLOH?JR5p(6aU&b6E`!t!Ty$8m#eZXsYckIyoz?+OZ7$0hu*QJ}k ztPef;#MX$H@=z)bMJ^)M&zS8q%4|}Ul46dQWakb5QD16k`+^2M;0_;+l5&Kc)0*UV z-p|(K{(@NrjHK+A2WCa2y~B*WuoS>xl^6@=Dz=NLYc(gCJ!uzZq^$q|AOJ~3K~!Va zvYlCleXM&Ypi2NJhQrZ&Z}}g8{1we=SgoPebY*R%Dt^@+5hhARp>k2=L5M?LnDZ&LxA#?|EB9~m0hlVFk$xFi~@*K1O&=A9|`rP zZ*G3+)SGU9=(>&dvON3vwtjSDR!TcL`HRP&{k1Qv-TiXgYroYyH(I~+tMZ%mnV>$r z_I20ovl~EhX01X~uIi$c1Afdne>F0NkpPdI+E;k-npQC)4FY3cWai52|RM& zj56||b);^vBQaliUbBm!WZrM4ja}}~>P*Nsr^H+(LEh=aq0MBGmV?&tsCkJ|rLb&g znnY=Bs=|+d|LuQpse;&}QEIzkvT5Wbz3~nGh~RX|EEF$c5j7G=C?}{C7bk*JDJ1e> zA%?23=a`g<(Zs;O>|gy~AFb`<_a_FB&@a~wl8KT#^RA=szbS)$_LE)RS?yL=eexLc zzkSiC@u6d@3vQUIBCqt)GfSv|ob_M|F(idZRL(t8`|0|$w#rIBdD0!-KmE4domU^3 z&f?DQu0xTRq86L3g?9>&x_5G|4w8NpxuIXQK{h#evC?ap6Cx8w~yB401BUa!_5>Qv* z$a2mcT<4eokZzg?QD0blXm`uaHQ?dK$0@${>U3zf`00i_R!#3zYwg2TqysVd3?H&b zYKD#VpJ22_F#|Hn3=n`UGl9qIHcv=PiNh3 z(n@Ao8@H$@1Ti<8Im?~|$b%E-(h38(wo!Iic^LsBIr!1&@B{T9Jo88EwTpYx1m9u2 zNi@^?jx7}Bng~(Iv<68EM9-{>r>urF&QxMjoIM9UH0*QAHgTwet8&)-#qOO^{OB|$ z4oK*IBf&@pas8cFf5r-t?|o>`AXdG<^@AJ3g}b-WZB)yHTBpUZYLyttl2K%6xvZ$E>R!f8q_zL`jt(BCX?OI#_!ipX7iEtJ6(G@3WyT`v_O)S5m^+8LdE4ua8OYqWQmyjVk#gqsj722 z6o878T>LO9rS>!#Xb-bhTO&fm0Dw}~C0Nrdn^KXHm4o!eQE+P0Xl?1F=};sh%L*wt zNs;V8*_Es~iBzFC1B^3W8pquC6Gl!d-0}Rg^}N|W?6iKCAh=8@fXIoE)9Sie5T zHy<1hP4|AbM%Rqx*Tj*0(pGN5nO6&0+irK)y!q@)I~a{gQ6^BN92k@#qa-@N>x@Qa z8Wp&vVGeMeTUbV*M#j9D)nF9G0R#r;c^>O9tfH>tkh6|ik88e&4aIW4Iw&1HBPd2P zQP^D#V3=dn1b^_}-}>age0n;{yJ4Dg7cpB?mS1({&BwZ8h-F8WE2MKYK`bnwOXgm2 zGNrhz6Bpq~aZF6b1I(PFqQ&&7-+ANl)ZMd(Nyk8%>ydPfR6E@K)?avAq|D#@`N9s0 z=XBe(&%PA5wmUbzW|>yqpp9UiLdb+$jpQq7#O%yLqjwYbUh<+umKIM;g zPrvQSPqkRKNbxT}M9q z#Ji6D$LS?Aqmrup_@QwUL-XJ-9Xj{xe`PN0m$$y^+xyJ!aZSHm(2 zh!a3*DAAu`VNfKBi_VA+ zDK$RvP^m$eROhTj5VCNLa;{OhRg=q8JIE}N9dpSEacq)k{aE#ahk^2}B<}0tk^srM zV&9X}rHS098+I0zOQy7=gw`2s`aRWm8ClxN@keH&*Pp)c=X4|$Qxc~(4#+V8$f~vg zTitt`b#!hOGE{_q~3ja9XHGtcheDjGOw>_ zJw&Mn3+dyE;!`jwHi(j$2~5ZYAQ-|+b3ps8O|NAhc@w&R+^E%XMX%f4HZeHyjPm|y zgQ2#^HUL!h;N%nm&NWqeHiKi&V|l`-Y= zm_nK7@yr@N`rdZa4z+Rw!Nr@RL-NIDDxxGwu^n*~gP)*hld{eUf^rtF)M^l5ET(_* zrytVhsr%EskpTMTdJ`c5MESNG?zquOh3`JKmovDX9k_H*PWw}J4l$6f6opu-yf1@Q zvSWof-@UvOMx%OTvX%?&9T+3=uxz24O+FRZ{j+a<=5v>JR%NTIU);Lt{A2SN{sI#C1-}h@W3I4SA>@c&zCH9Tr7`z-LXsm250j1vOvQ0V|bSi-d089;w7*@O{hB3_{Q(4(C z$-7po;#4GWk%LaFM&+PI)ZPhNu94sbPY~4F7CfkIi%vq9;44PQI#3|H&sa&O;NLxe z7GXJGUVY?BjaB{ezc|fS$qYSK9F)e80*a3@h745`KvM$1Ly1_nVC@4GX{?zg`7k@z&Ob?~k_lvGX$wbwg^r?bk_={fR>Y5GBA!;KY)_ zP@JGH^&ZXc`Y|odT-r$4OIdN6lLX~euC3o|(Ws7kjxFL)%% zvL|cXxhG8#u(_Kx;B;YY5=&azn$)*`)U z&1koaa<(;FtIuC}Djai9yz|tjcKTM2$D>p0SMUCGjv9LNS83-fah^%1p$anEJbuM5 zqu}p|0xa)2yT7*mwHtrsXYb$oetlLMORmn3Uu_5JtCMe=?)}=o-#Wc3?zrL3zJ;Bv zKC}IEarWSiuixN=|Kr|S5hw66Q?e0JuF^UJ#Vk`G**3ZNy2|Jht(5Yb^}(=!aTRm# z?GVAE0L^+aq0(C9ELJ51un90F%lzzfdnuB33fn2uNpm} z7LBI77&aR&Zg-?}jwC_s&~TWyP_v;+StYi$(dTeLb7}JokS>p7(mBXfxFtsLtE`(a zL`^`3l)G|z)6b>L67wNYIjuU?)h3K2+Jrp`sR1 zxpD8CpfYh1suim#V$m5%tUd5MZ@UV+KlGRJiMQYR+)K;xEJ8*Yhu6IMmbW~6{^ARZ zTf*78*K(1jsnbo&EHY&oO(6`bBn2!t^DH*9yl zK1G}Uxwk(3r7QDcan+vqzYiG#5CuiJyPt?_ z=kL1pqYr#y@`L&;*EU1u$B&JD@y$=~u&a-JJJb17#oH)*{&9WQ%+!F zXbY_y(Gv@gAq_)L$cdHIS~ikGVP3@)P2W5mHm(~zashxFUkAMQ8WA@ymKUdM)1yCs zyj&MAojlqIJ6v>)wyU;9gc)Cv`2R#80h~Ai3`o4p)!H{&`V|thhpi~i^%}_UZ|W>F z*_;Rg85H-e#mWUMDZ-MElSLlEuxtx-QGB?|_V$KB?}>lUY9TCO({5u@7urp3j|e6Aq^)6kd4a*)Q!Z7t0$*c>eI!mwvod z#;5B)EgrnP3WBKwDO+I*-TVQFEC7K5EdT1PSUdZTn?L&Shqvydv%JP3jq^iSk79B4 z$@j|nNAB7@vnzi2_&4_#`r1Q}|JI?!^7%7Qy(fG#qYVP$1dy6CSTXb)s?W^JB0)k9 zW-W9BG{hohJM<$j8qg>WL?gMYi!L78%L-ApKFy+&O|e6wkh4!F38z(Sa!9NN^Ch{LA(GKj(;;o}hF;5o5X(AH>KT(fc9Axks2AZR)Z{}*?s(FO>so+O1xCy!%i{P6k z0+L-ict?mg-DJ*RzHrGlgl9QY7wKWRwxV_|4x~YYC9>-n(hFBSqZ5 z_LEEgR^oB%+rssX2(75D7e3)`=8G_sAoF)lenys&9tr45xN z6+t?o#Zne#FQSYsPdsFuoM>8|ctzQJ&7wHNxwsUx8ueLsoGfZYUC_4A!8NHOz-X_wkoGa=3PEqkzC9tj1))Glkl1Z3 zWG{s)l|X@)MWN#?bc7V>;WEkD{r}}#59{SUzCGM_5!f~B(lI12nj@dSbGPxX6Wdg=jn5$4x- z?-AW@%Md5Iz>sUec%H^vw^ke(EIT`UOrgEZ3XX021!N;e(S!FqKrPl|t0QwNvWZGc>7AXO^iXH@G;c z>TqM9_}ecwmwOr?9ABkfdH%|>aGoE1;WHQF{^{Gt@%k5@dPvXcg+&X}QLIrvM?(=T)$bD~k@BN>y?$^)iF^Oo! z!^ft+1pCvExbpE!*ICN_`M*8e_2OFh`P0wfjL=``?}_bh%Md50!B<@K&o(Fh0XIvA%c zKQls;gPB$n6qrYfa2l81e9M36VQSg14=}mQIqP`Evo=ynXwJ%VSg}fd=acDsuvO9; zr72YM#nI3Ejc%@0fQP=CaOif4su7ROXFnUh@aWOk*n9HRiz((&kEve~vQsHn*FCqZ z8GxaP6u=4pKOWIF6FamWk5y7y$9W|!x_)EJ?Mbqkgm0yU?^gAQ_7}_}8Kumz-Iyxb zUzm(@5v^rFrP`-ToF_(D#nA+-p=Vp~U;X0d@4xWxAdwbx930>$S6PLd-WMf{QnHR7 z#frYDQ5$V>O&c2s0IujHi`S~?E(>rLfgMXqrK?qNN}RB7#PH+GZp>FCV({zIu>`wx z?BG<%q9|*zGRQTBkg3Qz`cjLYa`eLzvNCEi(%IRE@BiHkXFj%WQ@b}0k=s_Qc=@_B zUpP(kr(d;6*PeU&r;{~z{!M59n@5-X;yUW~r^hb-psVCk-t_7C$9=CPOk3&zf;Z)} zhfqW*5GYuF^|EW8_~Gk6_|q@pLHWFHh}@)q>;?&R=F#`Q{ z|A}*F!Wk_;>%N&+dmTfZ0BGO@>jkmTC`eLeQ}I$ng7m)XBzL(=cJN#gF!7m-;(g(4 z83#ulCt*gtT}89jH1j1jamWjS1W>OHfkUUOUWZjuxd7*5)iwb6c6i1@2^W%p{L5QK4gB_r2P-hzI@Yb z|4pzdq{JB4veufTatnkk^L|K51-2jwGiEF_N|;B9VE|C)dg}17TH6Z%Jlxi?9A9^R zimv-=j$qa<+=0Ir}qxCZ~`RV?f{D+3^$vHv|@njNborzQwESYFT0ZzO=b{K|(Dyw#A0=SnZm$;fuvhNmQEd3xsMA=f_;e%GtQcaP>aBf|c zD4CPOAWV|uAO08b_}abWB8KJK+b%w~m^5p$!3S)kWE(_*kVQm4 zO1XH}sR+Sd=j>HWELBk1L)stzw{JVh+duT}v0cXwtsVgqU}u_Z4ncv6%xN`ldL?t8 zYF$!Zsu8ZtMO0?iXd~g5m0eCAeDEJWa>ILn*6pTxJT3EXu5`R|?1g`Dig%xV#gTIO z{4*z;gS(e*Klgj5aF(yN^Yfdpzw*}$S+(M}5AXg7cQR`!Ta!g`q~jg0EdYuD5?#D& zJ2t0&_)B*@^f~dcJx#WRv?iWBu8~&N!ylfH-t?9F#ZW$c)z>kkdba%1lc(buqdq&_ zUG`RkfH;Ad^}0ZE3CZ?@hLRLCqk~|SoKoVH$9@u>U|I3t6~iNHXOibSIl8LwTAZK`je9n-gjxz=QhR_R3;O(JdlP7 zXY@r&H{?Z6isHaZgRHS3*{phY&U>kwom0))WxxY)#}0L~HyixFB*Ay^=XqTx_~-lm zdZ*9l^G(0pxqEVsM1d918E_l~T@X+~SXvPl@vJ(_a<}fhDsQdYt@1+MZQVbfce{Fu zZplheKtUab2{3u4dwM!d=kV+P#c%qAcRsy)Jk-+UZ|3t`-*JHI7vJ2s(%rapP_a1| z(G@XfsL8kg-LIm*?DH`9}HN_EB1x|i~QpWLQX`$XtPD#$W67!wK z5y4Iu$@3tpD5RgG=zLR_lBUz3jB_x2icul3d3j|dq#ZA5hL}~e@3Q;-^oJWJ$w}Jt zmiF|qJ&Q)FwU5e~5CVx9gy(`sZaM@heI^$`2&$1pDMSGlVlQ_}cPu&?LunZ~nBl+s zyT_P${pCpeE)%SGRTUx$AoTg2YaJy>#-)=*2hBQ(d0QU5-Ym!S{($KT?c&UrG%_2u z_rjT@#~-@w8MP}0w+`c*8o_g?wJnY3~4Z7XhN^NkmVgZb9On_qq{oe6JQSk!&~ z`Zro#PV&1xc<#^2z&c?(6D(5-GQI6R2@EU)Ij{fS=6LD&58v}Ip8pm10y`=9t=)X{ zvnMVbT(Gv2$3D6>y8mm7m$H0xlfo0UjD-7KVmv<3G65U;XOskxR1yn zmqzFqLR4kSxr@=Di>8yx8<1pkrb3rX%OGW~z&IQ{B$#xLNC#6z6>EffB||VVC1SpV z(YEUaR&sB%cebQxc7|1>X3A%VI7plat5_DqYNk^w9Je}>=h|^U3iF;_h~BqkyxTu^ zRwJUg^BLY30=3eULM^Uu6)88a-TVG;-`HD7>qo7X8S@b0(zcgLi=@yu9f2e9)Xdw0 zyBbhj8x@8F-}2t&;^LJ>>?k+|hr|8Z;kWctT8J;4d-#Fg)=RGr?>o4;X_Qt!G^ZpwzymlnkJNIuNJ-*_HcjmZ0UEkQ) zEzNFt=ZBvAuIX4ikXEt47!Txc|Jj_uSWXmi<-5DH;fpW5=eklTMeP!LBq2lK@p2}gjA+V!BHes&HXes8{NtU4i zL=r#{*~hsqC*^R7DYHR3sppY^m#{OudFg)YJrDkrTNab*Uv4D8>>waS`fa zfS7mKkF9~8c-M2Txa)-lQM7>~lywKLbmScmli>%Rz9wUealvpYNX40PbELpSO(32i zg;G)r1g~T}3p}}5M5R-iR<{PPwjc3n2lh+oa z8uQ+~acNZblv;dzIXaSJZcIm%5WA8yW{Iq@Zp3p2Bgt3?nL^?@bKzKqn=U8cG4yks zA9~`y{oqJU|K7%f+YQCAqZfrG0od}sfe=X$MJ-s3GBYG!ydzv&mxJq>&K%nR=DyXX zWup1!xXoQGe|YxuFTeZ9E5r+asIVu-GnxEs|J5&^;2WnOUQ>H7y>NU#+~$MpzuiOJ z+PZROzH(r4(xDjV_y5FqUL_CL=UyOi2HutnpGM}$0BgmiZ%nCt?v_{Gn>1Ib-G zb7^Daka}Zp<>Bz!C)TU`o>@NK@t+gli*+7Wi&Q&Iraz}l#x#rrIdK+wMqNW|c%EZ+&iUR`43&6w|G5aPB*fDL%S+Mz`A{*c2(!P^5y}Uh1SwZ59PHx3Bp%v(Y&qV=m45R@d-uVz_EkL3UJS}q%+H)s zMVu!NC=}Gq(US$a`XD63SoZ1w4+liM(9WInlm1$ow=D`;8>#BL81}2_dVfefHC%@n z3i27QDYfrvXoV({NUF~ZzSXQ6M()~>N0wnyrszpVw2<2GdFBJZa_aw1CHd{e)P;@@ zdsJ5D@cX#e8ID92oP5g#7de6wqs~OHa+{3|-iYi|;W(j|1&_=k%1%j}+U4K-!G{t* z`7-qCWr}ELi&|I>1T5d#vm~U<+(jpQIHFy=BY@ z$3A&!`Q9g&&e-(W@cVYkhkEww&#wD3Qhj>oi6)F^1a=fa1g{lY&jeX`0pdw|DcWRQ zB$GI1(l|>3u%i@7d!L(1?dsGM8H}QLWz#K%h9anpftYAQKMIp$7rFG%t?GA_`8bg) zT({89dNF}WBGy$Ew1oqYs6w0%T-3H(fDZ%`wi`sCf+Jr8vZD%fh!smVh}oWl&x~qS zHZ>=%MDOGfKEciF&$iDSnpR?-V1ds07~=p638qrn7u3esgf{Um5@J>urr;>!}oS^Ld42p*Urw${nKY3SnA$(@!2rI$kGr|f+{`Jd}e z(%`~FdhRF+r9R!unISU-y7Kip>Ss^=#KSLr?#8obQ!Wb;b$jxbl)&5@A3wAI-e-Gf zJU%SHZ)=QtlaC$Wc2~Li>EF-PP92BEBzS9aRgtCNAUhoNWib_fqJ0^mN*}6X3j4llT;=b}t z0tM}QHc8ZqVp&&WS5gXEq?RI>s%i?Is!QPvw)sGIg10FyAAD{l71){ceoV{Vl|`SB zyZPo9*1v0#s}@Nx>ZK54m%Gq)4irI#Fi=c_M?%+ify1^zqf%Um;Y44#h+zxB0rtpJ zeeiAL>kkQe>iF6ny_8bcMuZFg;>DG@s0_9BYJp5ikQL}sij?b=6_0I5T)0k&28)!W z-`=B$hLdw6=7p?95>y8xQrh;*rfF&zBuImrmJ9G3X)h%?m7?aR5julrc^E@CMsKmc zRtK@nyGZD$-&44T_$e4<7Ad_GDuZw-dnbENX29ftvokD?T}@_VpL+ z=E`v%g@h-@KYj>bJH@j*d#}HB&q?J6LoIA?{aon%8v^!UP?4cnuRU~+u7R-tZh zy77e{>Qm!K?$D#xzjN$nGrxM@{7c_6=i}~m4mCV%`bf85MTaOqvaa;H;5 zk$&z_#{b8Q3xDmbc;?h&554%=*|B(~SAvr6&K+PpdUo<-=MNlyZs|m1kMpO(g!fDP z@#Ed}4D0{=jlZ9H>lxTlfVYt%^Qdf#p`+CFyo?O7@{V%P=Lps^(Q=+-&GjHS1}d7g z(j^X&&~%Crd6zgy`b;T$sbaHVw~gZ9wkwESP^K;_>s8BbYz)O<`(_pgJn@dXLM2-` zE~0e`+WIADIr$Jv?udn^Qo*YhkXWj3@U#?NrFNUOTYs=7$<&b)mk~fc-TWo> zy%gYUMTjF+BrYqP5tHnE)74NB5io@^GV!)31AK>hNOfEl%dcJ53V^I(agPdj+`$;@ zzj5xtC1fO>z?rH;16}J6B8OiUglp(XGNSV55grM=((4gCH zdc8DnKr$A${OI4j<6GbAacb(7&N?TShXU&Hd-9x1kzqOtqGMd9n#qB;r3Fu3L`@Bt z%C2w%$cPfY>6>P5gcSm^&V&E{#Shuy_^T0zuZM`%RT>Zg!k%00Xt4K?+PFTM^SupE zMS`fcdjCE6&Z*?sxx3qkjvON{S0x+oz4AgBJvsT+AAG!bWlSsV<}^OBVP)I8n>W9_ zvsiFyHg+yve|b;OU%RLNjejtw7uWjv`ps*vJd^urtpDZmS5jb6kj!8)sN~7CAGizu zzs#=vwVfB=KlAuKum0-vn7_gLg(cHo+?S>E{?wDWa@+6piguV?@{X5Ghb9`7FkU5J`}>h(ysFWBhp748=N?ECiDr z%QPD?%amwrL8;1aj5XVoT?cAL4jmVJWr@QdDxw!b6K5%d5bJ_Dm89oFB%}_d6DbCnA_-(5 zq#|c0c$)P>Qrt>c?*8h5I}?BEnq3%NrHBcJ6iE;)ZhiQW+HJymUb_9Roma<&&y1*( zOK*8Nf8#=%@|m}6{@9*lh3G9r-u8E1_QNOYqd)k?v^%4vbZ6bXveR-FCO6;sU8GBA zu5vkOw_Z40&91#|=gUusjRU4g>-+Z|f6flX27R1;gDYoAb1wid5t(Qn_^AvAAcI}` zi;fjfT==Pbe)#J z2*7{-%RQML1^C>8GSEXyoilI{yg+8)H58dnqNdE$eBK9q=^Pg_@JfSGNL?g!vGTD> zZE7p&;rplqUs{-?nvBPR>0MG+_0%lDYSGKCTaGvvSu zJBw(X_mPN8h@NI9b_pW22uO8JFcnl#+H^6zV1M(rKRy9CV0AGJ-cbnw!2?g6^OPV$ zV(pvq>36>C`inP)l?S;760z%q;7N(W?E2b*&^WD@9HCw04$Nt569=&zgWZnHRXyoq zQM4^7w03bpgvKt(oSfi^^H3mg%&LC$W^q@>nLso#6w%$KpuNa$Gx$}F-*suUcJTJq9((HhZeV#vAhCGw zJMon_%jnKNxbv>1H%0EPcBmIFpULHS!)KoVg>Gxc7tP%_n&b5xGS;i@C;zT@{`%fS zqovJDFCSsuwTGskd$xD(Ho<%AYpXAxaN1leJ~)3yNgK&>XR=&v~Z$2<;5fRm9)*xNPKK#{}Kbb3ssa#T!xsI+oUNp86*T%&jjQ{rRA zrAo6t%EnA#1=UI-Wg^dQD5^W-vxT5>QoVV%(%UKWx>@$Smp-SaFU&)W2%~s1+Q`lz zsY!zETJqpY0;VrH00z_gU|F@DFk@pN#w{=;z-KB zE?zkNUVrYG893J)M6`*^?0AZ9hJO|y0#^!Mz(K~)ZLWx%GSjt>$~8uU;zVgtZ)~iN zFl&`eKvjzQ_DV_cO`nsTrG93zg^CKb)jd}?iT5$96Gw7GfmvG(4aKd&kqha6RDbwp zAGR~brADNYs?5R1m3I}GKuRh`Em(|vQWS7qpGU?KlL*wNU|_nWV&a*2$ONJU8#p2C z{ms`u*bQHJ71%qm5v?17APCW}J&e&kx2-J@GD}~*I5#1dDFKT&J<9&m`9}bbX4wyI==OGJ-s;I zv#25JxDdO7dBh~RZ@l!e=@%J(r&B_3a2J$fyr}I^g0)*cLxk)CblMPx%h~m!-*;1sus*knt%St zEi8ZkY8WhB1<<0PNE)y>a>wnT%>s!rmcyT3WMzXgp_NNFKZ-xxs=Pb#w(TG9t+%N< z($Hkj)z_W+M||pszt*kqE}hnD33B^q|6z6mkvLhm zm;<<+LsN{TBJreJCk43Af?6&LWZ~L^E3%m-&yzDLGs%VX#Z0S|Ws1Fe<~e2b8!zEG zTbR543`CQW-rg&_(316OK$|Fhe=w`#QkqJr7IDFBkU^KjYCTR#NzP(aoILSn6_<5R zCeNzHO@L)ddenPDG`F7p!TUfEMJVB*j$$ ziiJkwl6Ogj8VS+}6)>n$y0P$FePA|$k~M7q)|B3UAWCLJ&qv|WanmU=FYzWj|C-f; zOd-J46f32Y_VUsK^Kup`CaR4J)T`$zd&+`BisYJ#+LlxicxiA$544ci`7&~XBpF_?+*XVi$h?uS?v3% z;!8agiyxI zFdR1h-F@fIKzwWPiKl+!W`KulH`>Wn>%F>O_3^1EigV+e@7+3l<;6Ge$lKGK+0pM; z+XtII)raYYQ;GO?|IRB@#gUR?R>Nk4_Udg>9W4Pw#=uT|dpwliJO8sczw*V)+x2-G z5Oclxg+nSk)|~(ND|eUUh1qR44S{s+?A6VQ7?r4FUuKont8W%yM*%)?fJ#lch2jYR44~$mmJbT6B%KuEU_*m`9QxqoKk{EcH2!n;w*G z10X672I(hhP&Q3X#)QU86;NtHTWQ%a4LSkwbI6zjvOL z+Rr3^=R0=`&UGS+!b%&;41{1{`@y3?4B9q4iGp*>7;{RpNk0Vb+Fpp-$(f{0h9%8W$5v{9q3>rGt`+t1+u{|tuN}1TkpHCPCAz8Gl*!N-e=a+a)C*C@KG)-sS z!dh4DET6rS#COy$JoUT7%jY-7Vo%SX^R=LjMJay%nf}Sm!*7`{t-pBoFedZc_~*W_ zt{(1&dVVmye8CH~J-p+rS1w6gh*t5=8D=Ldf_-L~S&ksEm%lw;R!?92>4UF*`OU5F zY}L;xoA&%cj?5<)Kl0+cA0q(%*u}}z%3{D!YbO*1u37oinG)a^HV^nx|e$byAggzANP;L;jJ zp^g$FcXGj!nQs*|@0m0XsAAzfNlg1~$(QThppkOwPdY3x&(KgPQH#=lFY;03kJVRht}1q$qUc7q;gwKX&(XmzbtROAesg z>)^T;fg7a2lk3Ue2(O0&IEZ)J*$F4mi(RB6XM8l*W&-nLaWL_IH6;u69G z%!?pYOl4GYw;L3EG7l;VCpb@a;uQ~5v!cP)?Vat#e|zg+eQ$yMt}%~Xdd-=IN~y)4 zuJW*Bspv}Nm_?VgQz08l8QDwGph`_d!MtRd=ejDb71k9F1)oXj+P+rjzj*h}4*z%o zOAFT#!%n{-M1r2`+aF&3oMngS&sy}q^hUbl4RM;8yZuD(To_PcWH=8aXe zdvE!~F-X0&e9IdNEXf2OB*U!YY`4_m;|CK1GGq9`55~*-iR&L;e*IscYq!r9J@{@Z zp1O&G<~JoCu=uU_3aABcfnr25p&7g1wBWv(Y9;)a;!|#D5n&h7`2hiBvd3*nZ|Jtca`BNVof|9 zES@SmB?Q3su<3v`g3UI!zq0-}gYpT^gUqUmUDtE~c=B8hBy^$?ZyahQ*Cn4KaWWib zPkU77mrq_<4RZhvuy2{vk7iKeR}k5RKEb}pso7w*4$qhC?vgN(p=Qjs&) z2zbQk39_l79R7)E(vYJU+X58z4LQiMvwfN-Ia7mZZ?v@3+nJFBc`Z0*!y%(?rV3C_ zL=mzx1TlAvC*O6ZUxkh7OfS2R0PY_+{?;$A|Hrz|nxseX+Ij8rU`-V2p|^p^D&e@0 zR>6bmj;!EJ1mYcdBtnjvWgcvwC6!r>L3$B`kZi6!&BflY{O>=b<+-P07^qo9sH+MA z8nIq@%gVbRX(TZqGybC=Obr-i1|CcL$o@axNaRkxu-een+Jd9 zxzC^5ousw>s!nGoT_?MN?|t-{{>7dCzhLFb*RCE6cIT$vr@yDq-`yR&i5Yz36_jl2 z@NGZRUPkVv@R@}$$hnb4{m5I9S$rXpS|#r)nmW;hTlG!3#0eV#}|)e?@fL2 z(@(wdUM7{ORLDGL7K9ee=J{;0+i?*a0y_%8-XvW&OEK+W7__y0ZQ2w=BdeBC^li1& zw2R5J*r!_Xnnxyi00$3IH-?9-TSTq^wp(c)+Cgqx)sMAkt+qYu5zueZpHx<+EH&x@ zUtiO87kj~uSd)BQVsn+dQ8&>NJ}TEJmrMw}5*Abo&Z(Lgh=gsSy46`GPm{X`&y`J2 zrox8(Z7REDy7uCMKl_^>8v7kn1SQf)1xCc9>r$5x+UOKjrG)ScJUE3m)6_Zdi38YY zN6$}IYcEklIovM7eTTVFV8v{hB9GZiOR|V*e#{)(RgrixFw@0>8=+woTvJMM~{lf#q(ZO;LBYEeh*Po-Brj^mR2N zndV^xjwutZV7^~CPVKnh@MB*rEw-=x?pwe5&4Er9~iPv`RcTPU))r+TqzH5Ni4K;!wa$(Oq znor+nC1)96?0e57!L(1>h7!HU5B)!zUfPrIx%AepN}28audeQ{^8HPc8gi%VZOOtU%BB+T)AcGB4d&W*e5%`+-g^7(5`T7jb3bE_+Y7(?4?lW* z$rC77X`aEANcF|-Hs-27kPD};Xt1L|)bCI0vFrD3-E4ZGEn1#3h|Nv(NFh})f<$&o z)KTkQ(qI(%L@Fd@nMg0v}v@2y&x!MSY zO)FhQ^b2k_Ol{>m1JA2CYXXc2sP=F8%2|=TQ7sjYighVck-h1LS8pp{-P0;W3wzhq ziY(`8{Q5_J@0(BiwP!h|7Kwv6=W2vl5QBBtPKvwQ#i9_3LrVeTau%4Ir%+aS9;+LG z^l*0ZU@|{85J@v)xu9$#td2obZ|+X1=eLSsIp2oq5Y>oxGgDz$jJFRA1m&qwA!U}B z>7rs55P`Q08RKTq?4pXP9j!z)-(6eGaZ6M&`%Xx3IS(*wx5cosbL)&2P>MFwqLF&{ z`W_im4kP6_n@VJnlM4HuzV&zenz@bu03ZNKL_t)>f4Egp7ly8Nohlc#l(&2^1)&UD zR*J5$QL-4}1#*#dWSO(b1xG_2^&)3pwj@3GI%HzeLHi(Lwf^}#?+Eb+*R6i{;pg78 zwd#o=h+O@CnNDC}(O;!JwT$#tl7 zbzi@;L1c8Jkg^C+n1lLZ|HpvL5(9hk#oD;1H-CE1sV@%K@Xy|Od856)pP|zAtxuA4cPjFD`D*csba zxMophQqyjkAaWEr5(&hdV44=)mY0jz^ilByZPc_`taKNqy~R0#D)%8)thG6^dw+hlTbgPIvz*lc`82vrb{Y?SDK{SQ7u`ua0A>R()Y>ih{*S^xwz zD|hLKum8#rIe{^bzj#R&iMWuB@3Z19Z~gQ0g8S3&zVx#Xj4u9rmBrmpae6nLL zN`zqB?PlY^eSY<+*B(I(h{#NxyRN{F0%+J13y7A7JBE!S}-R77nkBA_U`v5~%iL)~{or_TN4U!~IV6$h0J@47amE+&V6GAu>Kou*t{-718x>m>Oo6F)Lp zJac+)Y^8^T#Vv#O&O3yHD7(^hLGo6m#3`6|rmonH7Y-c`d#9&IJ%0SO9%=-k z(8SvgmIa$wZy|pFTKrK#xkQU&aW%5qX3%y3vm$1ZA%fyFx#x>S{tg?H$4UpUj-!) zOR_B$q5;LsL=LR&A@&A4Q`d{3EK=8|WuF?J2GzV4qC*FYF(oN^<0{VPV!sVjchlto zzrK`H)~zq+Wh^YQI=TL8nybyiE1Ge#OizlPzEjl5Fv3h<)g~@jZ_XI?^;oUwE=?KX{?kO&rtPtQE*J-LeF3$ z?NLC>bq1x7AZdm^5F0;43cE8sqUEPQ|FLg=wGTNJ{Wf&T3@T;H55|SXSmczJ>cn$S zp=_g&$s6BloiHzWgy4B@ydax+BYKtzVmu-{8DtIV#m*P+ye)N4?m(|x(X?ADS%s)o zoL2X(@w~WSo1cy##%_G&M#WgvM9k`L@tz<1zjidnXWnz=S1xRBvg!~+hs_I_f9u}2 zo%z=%^eK7UzIJfwm~TUhOWSwd|HA0n>K#WaeDj6z0dtLAewa?;?|VCa{$27&Let_VOE0 zw_MWo?g|aY*>+_RT^}|HjC;e>R$VM&F71>DWf~E591zpi)xKzjNf8l&bK(KMZ#Lhv z^HQI~kWBlw5Vv}L*PML&fBZr9KVAOJ`Rft6cCH3I@f6ENm7xTW9eC69*zUw?7;g+$ zMIxn%Qn#(2cU2AL0NBODLA|R_LNZms!a<~{L+tw0GP85uzf`!+VNuMmFj`n0w;P=r zTTaTT9kZ8XC7$Z5Tgt6GpJYNbJoueX3CvqAXLfm|!nj#xk<>PzW!ES{a@v@K>FT5| zhL{1Mm@0$LEmxZC^|BWu-&*L~M75NjOdYXysrmiE|8{C1Q(MtIbz(TALfrhAZn@!b zSe-Z_Q{ZOGO7E1kiEOE2Of6B=*4wJI%sD-nNsgQYq-Q+31itj=fA}F1=U<9pC~qL3 zt~4M(vEF}ICE|#!ANg-G%Z#1=tLd<#V7Lsnh}BI$`KQ~w*gW&Ti@$Pac2!sV8E^8= z1@d2e_kCx7?e*$}zIDy^&z=mm^6kRT-9I~C;c0SpJKVT*V=rBnOGn=G?82Gb8h@}@ z8ozkM_1oP6Sd}wl;}NW;NgDW=3RmWjuVo~jVZU>3YpwkTJ+kj&tVAK!dPeI_w|gOn ztmgI4{oRd2yx(IKGmG>7?cMOns}ER)q^yhj>b3wo3b=g-CmU^(4!O?MgU$)J%M79l zDJHL5u7?evBX3axqKO#_9Z%%28f;zkte6-5ZDkybVya^orA!?-)k09KB`@Z)l+3V! z%wmSyugx2=zn(&bq@;|!HrK9fL=hz^Cuh<)KNdsIcEp-$Joib=3du>d;7QJ841I)l zsqybScX{A4Ga@c-R_(>br7(T%w?6x~M}BMf=&3ifkx@k!W6NeW#RNDddy2SBkjc0@ z$<)q$B%Z)4n33z;E9$WuFG&vvyLVr={0_mYSenczHc(Xa3N^CV)V_h|kKA-_j7quP zu(nVHv9Zn7lAG}{hd|@^7H4|pTkpE`Y`UiuIKv89)u|@Ow7I>4%~)WzZ{HT@X|pN| zJ*Qj4IT3Ogjy9X5b4aOCJv0+nnO>8U3eG?wZM&7e%GW1{dy=)Aiz`3)g-<;FCqkI9 z7>b~ zWWWZNoaO8ua@ax+n`D>W1BYPB9s(hOBw)wJ$RPJ3+mfv=jWnZ~xl`}lUcUR?uf6T* z>+J6*04MMMR3X_13zVkn4NbX&EaPL#s+)XmoPHbN8 zu4?g5Z#}sF>#y*&bh;MDR~9QvjcRu9{Q33GrS&e_Kij|B--oY7V@L0Osda50XNMEs zf2|v+?#=9`A`sC1X@!Xm6oMwKNb%1e3IPB^`Hky)GsEw=N2YmGrVZk%e4w9s^6*13 zb;*u>&sUS<6?RmC^e(R*8`YohoHQtCZ-Db#I~>qi09TV$ z#||~IaULX;UXqMjXq&>tIA>J@DXb~>ni1!y^^Oo?op5Y~m5(6zNsz7ts8W*CN~Zl* zb2U;!P7~h)$}Q9b$*Kvwo$_G|=a4wBId(`FjkKqyK##Cx3ZUVff*IdoJhzd@gai}W zH($Rw9+kwwoY{)#!kW>&D}Q(9D|h@#^_iE>bId4UTqBqeC+rAJVX>`16T*BHd!K{| z0~iRVDhxnEYio;>|9btaSGfjI#~P!lH{r zhqP|T!jg8?f@dWJwGJ_$h$$sU43kA5Q zZ(}Qn5+U1L734q|a|MQ<(hUNjdbExq>oT0$K~5xTW}bRAI4+iczwz%E8yNBk8(YL8 zi5Z-FkHI0KIcF6EkA3MILCGu@#%D}aH4VgsN-|Og0z~r=m=7u-t|>CKj8JK1y!Fe+ z58>j+Lo3F16j3>k1XvJcB#s|csnvHB!(VQA1n{j_Moo{1$KVCgZr{&-V;gv9`GL-7 zU!ohTIg>)M*WK0Vn~yGR{mKP?E14b_=*Hy&k;+eOpE~qZeLciUlcQ_hIeLq=j^6h= zzjBO?78;`a>Mp`Izx`qpc|`hlUylKn6HC;TjezuHcLN0Q>-pNnjk&=OX5Kduk)rJH zZ1?wu!_nZxL$P!5vVZ(5i znCF>KoTQpLXOQzL2*=PswcR#Pnobk!gfdkPj3Zc-5)Z8a!gUer$Wc-NIPUX`SI{ag zNaQMvePO1>P>+*s>_PK3n1BKkTeXBZHQ=>D-8&++Lk?UtbzR4M%~qy>>c*&O)$>40 zA}f~|N1UQjY~|jA*KamtQqpQ)-l?&->|E~kwx1(koBMVBIK05jC_jGS6N;t{tvPN!g1FkUly>k5a>BHOKI(}&4$lA4<9KhmI!DC&3$q7#yb+s2G zqrJqMLIpD7+asN%)c~MX-!lzKIz@dlW&=k+fnhaoh1MK{P=qc@+yY;)%5w1w|0# z5CtGKkF8;$@ehCePRXuaaJDTs0MuSv001S+G~gD17A-MmQkA-jN8x z*h4RES-QFX(CV+9HygGw9*Lqe=xYC+NB8ai(kszraq>u-EPvHziE>jn-g@*Ix$0us zYV@xaqC11{rsZA=mrq9y)F6who`_bNs3}Hg7!)$4X13C); zA;rTIxCxYCFq)ByI9Y+LNMwD*G!mSc;0%#D30#hfh+2XibJT%LAa6nu1~@i~C8}hO zG>0I)c0>U%V-x2K;28Q`dN~~5qux(A4i0PJj!5O4PE05%Cjg*%Fzr2}nOCvngqM-F zecKME4%##_$H5tispYh|cd;~avuCSASs)5G_WG-}ulw)sdz`<&{X+>J3#v+^hK^tl z09BfTmNN0dH~^hNZh(0Qv6q;~$!7B+|TD*Mvftl?3>sLzR1}3W)ddFE< zi0kv~t0~3IcsT8^t4i9$Wg{^yS4vG}TvP;Vje$*BzBfLOiC=Hm48mE2G(;od077{z z?Hm8*(BC}QB&t6J%37S%Ov+Ec%MyYRassF&I2d47VTefOm@lQTIB*mS(}X|_2?`=! zRD@)Nvn*njmo^Yc|EJ~0O8Vj&!1SO`R1QgEfdWx*fa_nrm=TKdPo4|`yZ^LQ2T~A* zy#d0Z@#r&~M6B&TwDudXnPod2x7c8BH`nD;k4=t#>HGmV(+H#D^($2oYu8$T+mRPw z)k{2o^88{kO)u5>19!ccUO3R`&yUmX^P8OKWbWFe4Z+OxOUj&MjV);`Fe^{AdsSy^ zo7DH)D`Jwato4Q(qNGu45-F%=mBc8%f`9Rg>9oQjc&-Q=w)ZRYj5?MJ>@5z6EOj`b zvjDgR@LW-h97C>@+_?ff#2mK~<2JJYC#&W+x<>#v8e#-BR#ftz2Gf&*=E z2gDMlO0P6nSWycA3UlS8R;hmmBdWa1DF}lXE(KAPF=z!8aU=b1kyA z$guLX@*uZ1?DJ=mZ9tM;*+!)OW5|uUBy5xoLtO1+0olBtX7<2Zz~_i#hxRiW-Z6Mko5+^ zD}Em{(P(+P1}Ln^+n&Gkw}1QvV7T0o!3ge z&6sxtEOi*=QbA&c%K({Hs4%Arc_j$m_}o4F_2}856%(DpLe5iaff6X$_mG;x{`?iH$)p??43GG;qF_wWA7i}ZThsHaBvcY4Nt_xUJ28Ru^~d_H;oaBaBIqMdU)k`*-W)*U#1e5E4P1KoE% zHrPrMmYs`T9XFf1tAgU?Taj#wq|N}e6G$NxV*s+me*W*%{Z4t}1Hq7`^96emA5gTS zL=&6b*ye!F0+0dZkeIlV4Jf^hyt9TdX{yL6o4A;dNXC4qV@FEO3K@WyjFK{86~_jr z)EhSvQ91HFvN`6^kDFP6DaOoTLb+D`nCe>D>P)0@waP*w8`~gMz2^&teF-gq;7n8l zryLMn9WKraB|zFHlR**zIQJDXu!N|IVjh zAjM~Y=3`I4-oZJ50HEO0nCK_~avF?>N}GWoF{ji+gM%eR+D!Yr*16h-J+1*j!GV?@ ze+$A9Gn|6}oreemR~W3aR?@wd@WP>^C2lAI5|G~2GE%X>DejWxh=gG4{uV&xGpG( zNXI6O4?7UGT)%WA!MTrcK7u%=rD@Kc`?sh6?By|VRo0n_3mG*TgTwE2#K#mYCfEyC za;zvq7)%r@UImZ16Gj`BxQY=Y6?jXDMZ~0b5NR%*3j!y9|M{QO=-MSvjoKQ3%98|s zia0fO`@|#XY+r<_{O9%oLN}k-Xz}1^AVEXIM$z3@HX5+ixvTrR7v(M1tS2GcCR|+A{=_SVrj^(Z~O5X+!4%1c4$3!;l4D0s$0dm{h94by7su zmkbK*g#w2mvM4|c5?6ISH4GUeV8Fo_DHJr;mIAdjc4bzF;dFMSGinY=vWru~3o9}T z785+6wMhF&xpZ2ou*+JS_S#q4Mbxf3xLt+GDmPKya*F39fs(WenA_NfcK=-$mS^g; zH@ZFFnCR2UHp(XANkhC7S{x{vKdhC!C+sjVUUh9>nwPad#wO40|T{0 z2+KRQ#aj!i1P#EoIw5@hEh&n<1BoOW1}la0$YJLc0iv#<7lDnE*uw~>#*D2Ky{YlL z-uc7fDKq+}{1`nSTDu!=_~4zJZ@e+yYU*csvsA*DU^z|bK;GFuUu4aD=bpX-q*Y%l z==kkB*B@-8qwV4JnTf@hvV>4QjG8*uMd}@7S*-?TcL*%erlEEF!10BD!ResS=`?1K`0J2;2lj0AuSIF?r2b1~UR6v_isp9AwZb z$G_2eWQ3pF0y|yq5y*5Sb^yS&J+g4@bAP{9Y9UVk3?M)kp4^^s6+@V!fC2%su-%Kv z>hMiFpL>~{&lhG6wwfDH_Cxv92c-GvQ?+$8xlh5)rU?)RGi&djzFOQOHR2A8&iPq# ze%3v@umYEkwlYlU5MSTqfX)Jda{+~FqA+7<4Oc}B zwm}RCYCR{!I_EWaoc`icJDGnN^FQuV{s5TdMTIP>VMtF!an%r6v0O?&D`x0+ADfB5uY{NXQu;9ECudfTiaOEGcA zRaSfN!GjTLq;MPus8tH=|3C$dg9rf|bM(yxNkB-V4oAaF;Zy6g5pP7HNW9EA~uhj9$Z(mP& zt!JAtIacf@nQPhl$}Y@3a_OZqs#}YD=HSuJ5|;Ja^X&vQ0coIP1WqH9nVI z11^jQ3UynM*=K*_Xa44?l$(6XQ#87qa1vAL;7^ z_sDECip$0N|gCyrb`r%%0WaCuN=cbGk~ZSOp@ z_-fVIKe{p^lXL3Uh>)msG3Qw^UdK=k#~aSsuBWu%>2PwF{y*6t=8TxUrl27&LonPSiVUV$V+ROWs7UZ}&m~Ud$WUyeikYB=^w!yk5))X!8si+*oJ+s($3Kt5 z)eB%7(K>-lCka@f1d4?>MISlTd1A|Pa{K!|AoS!b7+7$DaBFo!QJ6_D<76qjbN7D( z1a@^V_47Fjc z{=P3x&E`P`iG#nKz&*h04ph zcsEo5$r2)x@yaoaxz54Uh#6IlChjhLb$T{Dw{^w_)!wUuT1l!s36}dxTA!43NO9hv zV@;A9HXKaoQL`4QXzKnypIjRwJ=Cj@ksX(o#(17U$Ir!;nWR3qoOoP%(=Ik95$%Nt z2-<5~T`M>$Cr-RnLnF{`X1ifrY|P-bKY1to^=E$d>%V>f_wj2y2W=~l0enSRoPeM% z3S}h`_GMNXz=Q(zHfqi0C3=9-<641m8>{J@n{5TVvIA`JBwKfiE7KfGXR`GW51GUiE?Tk zCeC^xsz{G2c<;Smzj?>}+;`R=(c8U7B_%<7o4txk_$gX7V8m-cYU?dt<`|RR{S#Mi zf4lzXe6snXWCYc029RckiXjPvL^Uwc9zhJ}bd>twNsNYT`)FKtGOov?1h0|()oxlt z*QW}I)tFpQ3K;OZN-U+yKC9&tWh6s)MpSWcbhb7 zd$_R|Pv1A8<|hmc{NxTa-iXq5IFsyPxvm@BIjqmI6P?@HyVU+4}KFf1=1Zs1~2ve~L?#=?zX-~B!K<6c5k zH;qiCV^PPon!W$N5IN?FwSocY+(|@p2^d)n+Ib0vSC}8z#xLy@K8V#r%b%c2>IRb`PwQ_K z`^d#(dGnoX*E=R8JVrL!!g94Z38V^QIGS->NuGNrfqHKlDTwgcdk>z1BLFsdLbHls zR!8NaUEpLWH=;+rJzYn{IS2%iUV5zJ--!>F7+XNdvzuES&{+WBV6`jiCD&y#N|F*t z!9W71MNxwaWUeYbK{5fc!JKYHGb<3AGASd&l(zWf!{2Qlx_QsWj~{DpWrwW0Qctq; z+rT9NjYe5qFCd8uG@?l%1hG5ygV&FwoviKGhLFHiwdX4@fkuPLfr=V+E;bHEV6;!G)~h=UdCoxtC~<`P2c|IP77@n;Mmlh$ zsMDUX5yF^)y5e3aKqZOST*xikAPem(-Q2Se|ErBrX9wanIL&IE8;55rxUs7$NaiQg z6@7eP21~!Y`QbA*w{Kk3Q}>9gt)3pEy{7J9w<+(O?Jfe> z*aHWgQ0y(UgPN6X|E&-F+E>5SV61~F25sXei{w<8e^;;q11Qf7qfQ|b5MYHF1ez;? zI8cN%2H_nTj*beS63VwCN4O|kqKLKoklkkpn5WI9Q}QXtOG-`)CWQyzc$ zR>7i^KLda||FOapJ0BPe#@2M;NEAUVo8!eVeIKn=<8r(X+p8tpeEyT2nRmRxZtg!} zeRX-D6wu8>w;uPqolAJUb-e!kRXibIIhlRw!C4U@%RObr_fmBuIe;As*buI;a4r}Z zTSk;su?Ol(6bI}T510gEzy~`cvo0Fe^H$+o1F_nA=qpE>HW()y_E^;y5c)=XXoRuH zjytrn$pM`OaKc)sOj0#<0VXyg^Ytjl6|_TXh#QAqVWnp*1F%2=C$ht+4HjdLtE!kg z{hGcl$x60VcIxjE=fVc)a3Td?&zp6Pf zqXAUs_Nf^E#oPrtgHrXK)noHBZI?g0@&QuxihT$3{?!dR{f-~M zYG?1SePG-W#4qmp2`Yq{Je_p zHs%L=;O5F5;Yytd0H8#{q2qUa48Z^W<&gR_?+Jh|f7wwiwZoL!%B6*fhIL%X)$(Nc zXFnh-FoEX~+F2R2SD$-)=g_0C+MTJ1@#Ol|QhO!#_Fw;m?a=Yl5+MKUP3ao19f{T! zXuyWVNg`?Y*#=vQ4hA*060@5NT|4kvh-2Yj6#xINC_fV zBMngw;2Xk`QJzBITktrgU@6ah9q-|p1>RPV?3IdPGfMpZ@K|m**PGv$iX$@k4aEf8Fo9*?RL05v`tXZe}<6UHZ4q z%-)=Y^o_NtoLj7I>;7ni%Y&}7-nr-Y#izIU!RcF_%p_B-;cGw|xHLp4S0CeIXE>MQ zY?LfK_3;1v&96k1=2gRJ?c-W3xcuND&^87Q7z~Sp22Y5hfUx%nio}OtYQzK=f!j6n z9;~ssE~~-n`i)iZ3fGS6yq30}x~m>-|3HD5+_9hx6Vw7Fpu}C%AG*_H^TjKSqBHLg zfS&mtkp>0q5vRG0bxg{+EMvFK5&W@d_@Wr6^C)bu<;nGnkFOkl_;tLpZ+tF&{Zh!m z!RX-Pznp=e+P-?ZGqJ>}Ja=f00A+Q`6I1CYFC1!4HO^= z%}{FO1EZKA@Yb7alTBocz zEG*g`2#@P*(4w1ZEYqQG(5j|pTZ5 zBm*Pjln10!RBX;%T%Gy&@P{`9fD$Lb-!mTsGt4xs_fZI7I8q)#C3J{F(Bh_EUlxif zK_Q0;!Bm%ykAW|Dci_E`FTcERVp!KRQ?6W{jFwHbVdj_R8N<6v`)gZ^?iRN**yl#94XNPW%|K5ea z02V5o8t1dP71Nl$^RSm#LxMF1OR=H>AOc4mre0AnuEeFyI$5tohy`OfHB^w!KbFnk zy7o-&tA)c+B3ku7tse*Uy*+R>H>5D&tnRD;P~yn`Qy)j3pf6n^QF+_X2V}pvgoyVT z12ar9Yyrk*UXygqF0c>323PD<+-TzUWgA}2K78fAdtM7yY9}YFVr6H90XFY=@!X?` zClI(+vpbfNl@q5_ue-Cg-c^p&8WYV4JDf+xLNov#m5g*_4h|D!iQ;5X5jrvBFxnc? zaOlvt%I+bf;5L^1qV9faw>I)|NY34H&lirT))F5A;%G28Vfa_4rhGvuSnO)oHaMWO z0J!Q=u+n?LJ_Zd$F<~$QaKL;9yoxXxw&E~CAl*RI3OCY$r63E7b%njeSW`=>P!{-J zGg6gSY;0gvS;rJp&)MuCXj{^uVcgrMw`0VMT3+%|lnz9|BVrNGSZsFYn>)c1fSPd9 zB4KRM#@iMe_)zl-Y_uIUyOv?>6E}9LF=dv{sDf7lRC#m$%K(4$n=k&4YyWZT&+l2> zN_>g5^4e=K0GM&`xY0l!Tn0FcInD%!7@REbP;4XV9h`CFSsN<0xPI0^t2 zK{hCYfK+uMaAO(rBq7cU zaJKR0+Pm%xp2Amd71A8}1q_2HE;Xq4*db1Q0n$5GJN+ZZZpeN7eXoc&LVdg?*~%?X z7uP;|{r(d-i_1q(AEEHW=QZ%K^yb(1&M-i<8Ow5_{_XBT7~a?#=8h!oF%&08V5msK z4fbG!r#3c2Ar#n@7fc7o=7Y*6c5^;9b?o(9@B028k4G-O`+N9QW9H>8UXa)%=T6=I zrQ1?#m@^2m(BD69qJNxjTksx3km%|T2XqzyXWn3IDb=Ek4CRF&Pyl9-Q-M`c!=SKk z;ewV{CWXtu#uRLAl=zm*x@y)dti88&mSc#hDZB=Nsh-bW44%i9>IGQODBVc}FE}69 zRXwC-B(?~FKr|713GK!yvH8|Sb33IGMT{AoIZ$<53hx0lgxG3B07z6;qQDH|W6vZ} z(EE0=$T2m;%m3nY;Q#V--}v3le}aGimLFUji#QPAU_H^!7(Fu1MFMlraY~HCoFJVS zIe5@~*1XJ{JHRyut!8HP**75+GmC;GnloDkjIq{4D5Q~K8Ij#(^W4#Ww}ctkW)!O3 zAt&HYws3y6JpO*|Bh(zWk7~S(nj6wAHyim}ebDH4C&Q}VYt?RjwRoGZ#!ykLtqD5Q zgM<60FL$AKVEn@Jf!XHb@|v%oQs=vF{=oG;N65a~RY~~}rjs#eFYjOCf42N5M&cbi zWwfo*76+NU={_4dMuQTBXdI~b7)JRB^0JqWie7){3Jn-!Bxd_;w$=1D2G<#1eIgtG zO!;ch@4fvkjjwbkch(;)<*V1JBJY0ShnE(iAuLb=&hEQ=?D1AWfWLY%2z~4$IPCo6 zibT{Qrd*6OCY;0R;a51lC1#TMyvVO1*=#nW#T6$nt$gg(o9-s#@KfgM3&n0===vQf zl|s}g!*1}mtMO5Jp#zO(GKL8=1%N=v5h)RN94U&h5=wEL6N#}0aHff_k(sIe>4cnI z{j(---z=v#c)T%jq`!1qycCZJf#mhmcYf&<4;DNKhoO6DGGqUExLE}PmK&U0TNi-N z0+2lfct8PzrxqeP0>QkI(DU3TDwC3vj;{klp#pB!JTl42gX|o-T&?Fkk9L6g<~>daqF*0>pBZz$jK()Yxl6w=u{PA^L+hcTrZ8 zT685XBR@)NjG(EmDg{EchN zfAHW-H)x5yBMmMXfDkGm4bG@Sfd}Ro_LSEsVAD_|HyrDonq901gTvHavILnH+& zC@4-LkFa9UCuvE+dZ{s#E9hzV)@r%Lxall_l7Llym=ScT^=l7oT`o?ouAJJ;$M>SU z5Nu~@K-Epx-fSGBWsc(0&;HqmTG#SQmFf8vwg0W6cd^+d{hmEEKR|6!v{$#REe#>W zL6JdAX=`(ta9v3Ze*a7V{PDm1LCcf9?u679H&V*U5R590W%;A^LDkLk z(rOE?#eEz%l5|Y+vKuCGsH$w({C+h7diymol7Kk>eVur;U zVc=9TYACU)URhFdGQ+AY<5;N(bss!9iBKB>EdO;XN87qx&}L!piErM0ZE;#>#Kmvi zd+Lj)IRa-X_Ez-vw;gLv$t767dCrUVS41ez*CnIFj(bgA|bjcCOi z-gl>U2H<(5M6Ddvcr~-z#~BoD>>EH2ZtYnCj#tDLV7;ehM76}!R%K+3z>{OwET!XO zJ*tZ#O?C-ql<(nsMU#OK)NR{#0hkfzc$zuKQZNik5f8P9u&f4js9REdsb!_Xs#XEQ zN9pAl5AP7`v9ToRo#E3AE^fd5Pyc-4gFiuU(pQMC`V=4s1BAsuITH2C(LhUQR1W~W z14l@Wb2Mlz-)nhV8m@ zNvR~*!Q{9}pBvR4I+7gK(YPu1xE#_OP9%1^uH`M>^i{qzn=a>fcE5gS8}PN&@qMc| z{Oygw%dZ@HC_mRzjk}|joAGRtLt!V}hMw8aw>!c!W2!M(1z%w@y*m6O_xo8y+@O<8 zlv=hU8sWR%Hb^l82!sY(4vVcDA3J~U+_KZQF~+A7nY1JUAEL;{-F2fz{o%k`^WM`p zzBp?Big`KnqxV0kzt)FWA7N0mhw(lS8DMpDPb+L+wqW419 z-Pzddj+BG;?D3jKK@*iEeW^T9WPv)9ii`z5$|Vutg|V?NqNCTivnW{lmWSd0xAg5^ zZsXlrEjKkyx_I)p{$!`+ocHOu`;Py&djldGQ}38B4y4pRaj>OP2-wpQU0WA`&H~`R ziB!ftrQZ8`D6vqsVG-AXYSrg}sjc-=TFHK0f(i9xN%AJy%`=$5n`UBm6ysIUT>64a zHmbbG*gEgx*lVz0l7@@<9%GKRh7~Ymr@AgQ=O+B!QAts8)FC;iWp$cXkcGdF!F9Ljh|Lvd6Ctm0E6YUow zfU@nC#tOhxRqi-Rs0ZAj1f!A(JHow^88D8wn&$?SxdG7dQztyVeL)7yaVeP%F4kI* zU@!n!I*I}?Yuzn;G0T+z;&{@E;_li+B>H+jiu_tueCDzC<}&Ssxf``R9O(=X@~VH0 zwT5VaAcdMCJ2&~0TaWqmiHXtNp4zJpj#WQgJ$jlvze;bL>%5e!`HAHr0SAywwbAOt zoflx?LQJS7NO1Q35B~Z;JyFMKWG5efbvdgyVxExq94MG=O4}dwihN|K{pMTer@wch zZ_Qo%*|gLcC81p`pCS%+Zxr9aDXpb7?C#q8!qMTQ?A0L}e&lZaWPU|Gn6cNcgPLUP zU^FC&RS6*lj6X8<;W0}Ap|8K{C~f@0I6V73BSMha2WCa#6RdGay25YE#@u~Z-D^`v z&P)Pa`ZBM-y!GjG-+KF)k7`MpM)B@5s2TM-E7`DcwRR$E2~WLbI)tDSUQlf)a+ILT z)&j*w$465CyeMN+h4J@YvezgYG3WGc9|w;;LjSIuy%-0D?p@TVz3|fHnaBQZ)vgSJ z#*1$`@DF!|;0STR0Qv`;j{N<>38N9V1cAJ{!2z8G!2Lm84lN&2t|6@lqywS0N{SLk zklQ?xRt_<@`>{{<)W~z|YrNDx={1u6Ry;9jH!ECm3KlTM-g^tw>S4XDwU=7fy>U${ zawlYISaoo&yS-6QYp7_e9v;-HykO0&1I_+eV=a~_+S5}xFbAAyX?2B36%$vXeA?&K z_r}`WP*IWlBXd9C)F0bnI7h6iu6*Ku0r;=^!LP5~ie~eBiyy~^xUm=jqnK;OdqB-l zSB`^Y!aC8E$U$h@5|tU{uR(K%1AqqA-d|4N83V0_N)iu%LNF0nL{N!%!NFtjmM!U< z*?m_xJez>-h%nE%$HRDG!Z!LZM|Ye_PSic$oH13=FA`rJj*;1zwuQMdI?;F2z7{tB zP^Q)GxIfm3{ZCr=EG%8we)Ir&DeoV6=((jro~W+d9A#w&bdOELh3fm{j z$f06O|VpELeyzX5h=DMZ%sbSdy+C z_~eh)yAC`UYxjKy2MBogFB{vf?vS-V`}J2y8>?3Gj=vyHmwVBC;i36|xKm`D02rY~ z|G-q~pE%a65DJ0kOy1byfX)Koa%jOrCGCt0)IpIpa$+m&RpB|+zQO)Ks^B|X&$~Vl z{P)+N_V)L7>8e*tvSeGXa>WLl*cfaaz_ba>SRSuzw_wItd9O zfGMV%W{i!iELPgvjwD- zhQ;#1@E8ZMz*%2Y3RH5f0kUL#eNO=Z2@PATL?X+h%`3P&U`?YYhP2v`S?t@S*;mn( z3SGgxGlG+-l0w^KzAfb_^%?7EQiM9^VO&Je!ht7!V+z4iZ@6yV^G(~V&ICmfpe&jB z(oN>fH+1d!`E?UryN{pl41`q-i~=v9z_lIEat7RFsZFc_sf8ghRU(Do!NcRDWC$Pu zpjh9OoA(gsGv_R~)HsAP5#?HYIVPGaLrGzyxpd{kz5a`wa%@(pCj=sKYWLI^n%QiY z@A_1j9WRf>l`YCxH9nB}T%9gQV?$r%wX$4EaP|3b-m}vi&Uc8e@;A(GI30VPgOf=- zo+{mOTZ;4LR$Sgb*B{yj>Ok96J?I!9!H$!+{^4_9Dl^i7QX>3$9F-&A-hLf1Hu}bQ z_hnaA3(pE|9$(%2{+BOhN%?{4Xf6vm=)v{lYUZVh9Si4rmvTAO)Qhg#`Sz3Verx5{ zLB|XpzF9td>E$~JyLcS5s4W2Kghx~ZB?kQcTMq3{ovWC~U%&5ljjCJT?H;>m#n5n3 zwgrfJv2LgYv?Cu~E{QE42Df_d!dfRsqfYzBN8hJMoyk|0SC@L1E^K)})+oP;X1{!>92!S0#PbX>_g5ma8gapU9H6Undv4b&#vee1rzEpiq)0@_s0Epb4H z_@lQ;001BWNklePu2nfww$&nS)zoU3Qz1zB6`7JO`9ai z=l`+t^WIFUzc@Ov8vpj{@_bu1ii9dnrB0KA0x3}mFd?vF78{kzp-}`YVgg>fG+)sP z2LM^X_E7HI?1U#B!2qGkDbK0Mk;Se6p}54zGCiY*CnqN_N$N2>fO_C5oY79@&>M)A z-^{ms^y+jrS??LNd|@mrHgaohJ(T8bTIma}XsuV7{^8keG~z2K^Bcky+qVAT_$(Zp zo?V@9P6c*VPwZ>u5H7X(kXBLG4m}$}p}nSi<4?AIVWxytr!KXPIj=>&7u-LIz(g}& z8ClOnW{SmV;ya7`A9(#@n$$nK`?)90p)GVI-;0Zj&o{1GzNmj#UdO3Niu~I4*LN3d z*Y*cGedLDX$dAr!=gS*0otaGKc+D{>~?b>Td~ zRvC<}Lky{Er3|gOxuhaMnP$6hI(Z^TmgE2vzqR-RTn?qd))gP!A~R*Zm)@f5j;Ux% z?ad)6+l)-Ypj*u}O6zalQTxfw(l7!kp#@H+BMjf)QIdutr75?PhQZ;Hd41<*JRx>-B;?=^Gp~ceS@O>!jt=8Jf&XG6ym#%!_Ob15G zP%s>-Oh5t(@JID(lydwEaWTOP68iNOpRtwv{`uCOz8nCUoH! zGIU-6Zu*U#fBgDig}&?SS|+vhLf_}1f6WaRVZQhz^ZK2n7iW)DzIyS{`(BvU!}T9t z_tMe)gOll4e>a#@N9%i*&*nWkgnlRQ-*)5Nm*RtFQ+B|1bgsdX*nYJyB7z(bu78L`pUeZIz(PdfXV5!<(t$iSbSziI?TagYY{Ctvx5v+#qXis)zvo(OPkqBDF7{1 zbmO6mKg~75nOE%mp!_ae3baE`>uc^3@x{E~Uyh{!KHy3e%HzclshST7@6}GctMSAQ z4v^6xjHZKa6q_gZ3gxhKPN;BZK>#`ofQQQCEXq|HAe%r%W}TcyeyvxEv#M+XrWT@M z#V??cWdeC-xeW^oxs1lzad34p&)PO11MoD4yp&nGLv?vvwMZ2GsR7O6rnY_=#Lx^9 zMZ&~IWnVkevm6T2)q5Yy62tff1N;`A!ma-+3ScX%?gtC%GDAQmVmR2>&3#ZStisr5Og+2sT z+?NCl@#>y0S89XzT=mP?bVaBcvqOwPaH7FW^OU}B`ahsRfYJCG?zxu34 z*+?-6H;U}?YI$nc+H6x)f(zDdV(pi^Bb)HzX&}tQedFcC*8#Svx9PS2bi?1iUMA2N zJdru&m4N#VRQeDY2+kf|dKF_Xs^Z$z=I_~iMrV#gP_Bj5x-AGm zpt$oAEihoL5px9QpBM*$f`HWE@GGVqnY=77t zuoSE{{hQv8A*C0-#&Jk++LvC8u@MSV+?5+7Z(4M$B^;siqOLxCk! zmv0DXPu{Wb$(a(@nC1Y|)4fp{HA<+LYwCi`Y;U|l!=XPQ9_kC!NDFL*D0&i?-LOoC zB^V@iSP)5=bu0B415n-zO+F$}VmPI3>p8`}TqNyLF@F{SVPd44L zz47Mu>qTJR(A5Kbp(%vl52tx{xmBeSn=17AL^PO1kZX+0)OX*wMU0fry&@ACi21?v zuJ*-bYHG!X^~rGVa(&Cj<7aX&_nPw>>qrdp0?gjIk^egLMa)5Fl|*Vwd6O}AU_*W| z16R6utaoHOAVhX|j(mAHqGUzKTRt)V+>6=a>6MC|R&epvi9_d}6TfPB0HnJ%ans_r zm-cNaw{zS5>f^bdD4gYK>|o!%!E3l7ep8H~xr^98d($U5Nb_C)u=;=OcvN zxQi{Ox-14bPXdwmN|!d=gM{k0+A9m))4b(T z=XD1~yX$Rj$I;m29o*0p-=-A_8u96e@UiDyW+Fwh9gmGI?1E5hNy1;fH+u45fgMn5n6lZHhz)*tHMbTaLlutBE^|PK0f3lH zGHZBfQ%#YhTC3E0kYh?UhaskxPjy!7OyyQ`55s7_IJPIh(v3fH;mXp1iPzX^zaXCG zIj(X7kuF0X1M$ku!7wGgFz-e)mWYY~_E7%YLsV26R0RSN-t zOsfu+@StSXprqPWlayIoLp1bC?P~5-Mk;ee?K)m;B*mrBW(G{Utglivx&8OQJo`C# z`0n4)f3a|~h5(pnb5a0NMXCz`EI@PW311>#O9eQC9Av%s=EUcYeQ${az+g*I?A!~X zvrb6Nr~#KV0O~SK9f$6zxBCXf^u~#nm|65A%MmVcU=7bb$xMIZz2fq#BcNv6;fFU( zllEGqDtUpZo*c=BYTzN>p7wc+5->fuZ|Pf0TMqJ7lXpLH=b6vk6Vm=O z1(=aeL3P4?QcxmHQQbFr?LCrOOaY%fQj?b$(fZ~iMWCD;k;+&GV|2~~iCvKiK2cv> zU0zx17{SY9jesD~l-YrsLJD=9HjOITB!WQY`K|xWW9{+U>cW+j#J7?b{Ymfb6T@Mh;5x_ZY%xWggCyWNrx zCL2KD6>|9Ty?_0}-Qc}-TqI%G0#Gs( zG@$}OuiH2F-px6u$U55oN|#nLO0piOfGyh99=P1In@h(9(L}G7t7{im)(Qw}<$4uG zR3l>wf|MoHGZ?tC3}bBt*t}rXu}$?twOkp=aN>+WGdaQEd z0sgatSX*iwwb;g6YqtNhtAh-w+!Yjyl{N)*7yu^_A~bLa03<3j#?T%#H%i0#s^G*e z#Z?Q9u3i^16_`w0keAopvA6z%C-)c`H}B}TpK6e}21J)w>{Fa`*6e{@ZQ#V}l9AZx z$-XE!Ds|FevcF23{WVujJ+DVHY7798q|o_>5Ys`AKyQ6~rB?MVPMmM$P-4&OrjbQ2 zs-9hsm&8bQfDv)qS60#F8pg&snGKi5u6p2}FCIppEB?#H-}|H6;qT|qLu|pOxiQ>u zpK8pTV2Gm1DD?9Ky{5o`zLORw5RkL1I?zBBwrzMhY+$x;FA=~ z;SuK{4uFJKW$y?6uUYKxlc#VSu*pu)|>g=wIZDR$x_K7tlWV^5xj6Jfx;4& z?!Gt3p3JTYtid|H=fTV0AM_YillOkJ@frQ?)(4J0%bPLDM?8tn%w4y9pM3UaWeCw0 zBWv@`9&SIgU0Dr+ML~(4I2Qmq41g1bp>7blB>Uo0<2cco%LNJoqWKdTYXPnSloJPh5ssI>r6Y4=- zta4=KsFl(iX5bgJ;6Y!pv>Jl-i^kxkZ@gsFFs*?2Cm>U2{a1)WuC&AM*l*9;>z zK9HV;eOEY3h|P<*_sGYl;me;upPzi>XHOiy2L2{HnSo$dIs=)svXDR+;{cfY+<2ZV zlv4#*DJc%#fNBdkfaJ1(`mURh=UD)r2Uu%X@Em)zbLQ-drbJBa9A{ejTC67e)zFh2 z1C`n!A=E7S5Qje%qOB{KA zz{ej1kK+Qz6K}oRyO3xa2yv5mZ28(M#ISte(Zk$`$JQ)OecxDavCJ4Uh5&FcC6)qUrwta4DX_x&2bNn>M;mt@ zk9(R9KeScTmqhgC$X3Qb00h19d^gGhl!tq6r|!k$33CQ=bmM!#Jvp0Ti)BIF)F;pW zAk`4`9$5F+8}Cw2t!{g()UQ&!HiD^IczgfyZ)7iCk5FN-XB8XBt1(;r`TkH~5YA(+ z^3v%5&|v^P)C^Px6hyboNjJ5z4uB=$hQ@-_d4Tms)G9U>8Z&3Du_UI-PEFgDjxLx| z_1sk@m!`hVWVSOWI>*C$)~Qvpg=z_8m54rSC1X9xCuPs_ST}`D%Bx8&>KPjuFOEGW zLd^%W1P55))oWYFX>*&dNVmG~Wa?>d?U;$g4gRm5H&dcLVMuPgx!cvN(~YG_Nd{SR z>4v`tfWQ1*`24lkfA7R6x5MA8K9fMn3y`xRm%tRbd}uWQC`Alob;f}bVRDqk@^nSa zl_Up{P>9jJ2aLyQA(Z9>azrVCmD6W?Kp@yWJqn8FSmaUM;>T>{4>0&4bh>t=1X2y{ zzIP{pf3+}n@*D%802Blg#Ex!Z^l@K)vI*veofdz{{l91W9U-tF@XGvSU6 zmtP~^?#fzEO30MAzE*p)J z@cLB5i0bSZ`^xIUiXOn~{tq?3_iX7uXUR6{X$rr#{55u@TJecfOVOd;*ITtm-AS`@ z;Q#vLU*4zLspBFk^EC>Y3w*?Q=SzHr!T zKrASd5Df=qSp?K-289x!1O|e@V(D3_GcN+gG$smDUa;+($7%?kp3jD4aR1~pkN$%S zFFGDvb+DN`DnbuP~Zvgk~o6`!u zy_v!|SZWu?eVo%F%se!F=>`YND9{RJ)qMr@kKbGZZJg02M`un3q@CM0bViJC;vx;$7cL0!cRWTmK%wec|82KU}BJod2z9_{+{~ zLoO908Y&gff~AqnS%hgJRUx$nYYmWqQ!js&zzR3^98g44&|Q!TJXOf9lg!-!i8vJ{v+uhpfH`SQp^8Y1-KFE15>126yzKn!U3 zHO0yt!VM)OOVbmjyju6QoB81`Amuudf@M}+>5rD_o3EHH>*>YQhSzD@4@Z`hkdjVP&cP@yOOQ*+TTMFVbEYxbT(*(cr@#N{7vaCSB1I9|B zHQ=n%xp7Vj6;@xL6@U%{;5dt<9VQjvqU+JrlNt{OrWDlc&QPP%!Ib1iSYU-VhL}<` z1TQWu^DB#lrzL?0mfmBr#@q4elzCSle zQ=DhRbHD#@0S4dvZS&oO-`cwN2@n2XHn&6yOi};|Ixzqq=N4$h4CS77nah>6S{B0d z&vr%=1(E~E0_y9pUSEVnXbV_E=nYSuT+$T8=%xveDEGqr$2^T3L-sPeFduOkFprPW zC0!l2%BZ${v~15^$)4{BC`17QI7rUm9l~h%H3dGQWp9kfUbD0=SU9m--#t>+-QlKg zydiPJwTZ=*2CM5Uv$T|P)9-DCxNU%y()Dxu|KoFCsxp-hTkE_Ocv$9y>_5Z+lVKim z_V&5U&(O4WCdSmy|O^QCZ&Cw@50lfW8V^Y2^l(5YYa zg%w!ld)|+A_T-|n1;NhUaF;{rW4g@7#Ya`ECZ!L11F61K>8KQCHO}BMIoV#NH!SDQlBP0%)L^clE6>t8e#)G z0zF)YVX~=3NvSBVY7ap9NYa+?XDlYEh1BC%wqO!^9+n#0lz@byP?9qdbSViUz!d6c ziSm3;cj204Uv)-UUbJWb!@mN!a_YX}(|iB#kF-8U;J-$%u2#7L6Y+vVS7@VUZmcQW z%&=160~i+Sj2FD+pGYR!Ab|tx08CtcKnV}Hby*bH{@D`?K!J?!S|7-wvplzU-~iG> zq?XRX%4>qa2&6^?ZB`x$oqt~H`>sm9d+&ooD$aWSeTQ0)b?P4~ zUQuphN$x|nmrs*nBx0%)2q`fHC=?S{um5;8r$qMF7MJ=%MAiY`{8FE3LwUv{ZHeRv zv?(I$AWs1U5*iSfh)`|%tQyP;(TwGwQWaC;m0skKLm)`=?g{HKpoJe+=Y+PoV1rt$ zZFR%_6xk!kha~5LN^ZQj5B^XR zFi_=k1bi+X6}gudggWU111dA)OUg5jRb{~PWD;^w5aUY?k}wQxn3YxTiM|{ue3Hck zqk6hj4HFc>TxqQ3s=*rPj#)Q_E_OWmfX69KfTW0XRnYb&l#}Eo zYpN4J9!joL0Au`$hY2HHI2ML;C3wdG#Kdh|q+ldNAuGCXob6H|Fu8dXm1b?em6vyq zieUESwGE7xbmvqUP(oy9z)OMcnGg?rd+VX$SI$%iLk0>Y2MHV`{11hIcW__;R8s&= zHT>F!tCPU*N4tl;)Yb+-$Jf^4(lp`Sb65=-GD*lK){ak}9RJ*Fe=9giGI~?<>$52vDBVbvv)RH|us=$y|;{$0kZ`Jipo{#99!DP@GdR z#1a{oBdJ-BRzOiwSPT;7wCp#=oPEKS4Bt30T8rQ(ufw=~QzPUm(reK8_$c%_yhF}!o<7tBt^&dHORp~$K%lg4 zx)K$#wGi{h#$DxegUaG#`|nq*9jhbN5&(#vg|I~Rm9iki;l8`|7w5<43jk0|0Z0z- zknleh03d(^0}7yl0_ddv=_x?YEq5qnzILXy&d#jTt;=)D3!p!k^dj1cCmS#Q(Vc(w zdJRnMW78P4ylUXF;LaUGzYJzzeNsAo^76|&J%(&|U-X^DL$|$Xdu;u$001BWNkl*d#%Mm}O+h)3!9+f1qgOqB`$N!qa#qs>d4nyF28!iB zzF>#}YWa1Cz>-rxRDs3X8Mn9eMAdC*>Dc+~-U~-XbNjLQjt77AN?ICfs@(azsv*!? zfh==I8)PXUk|P98ImW4ZVa^A17yu93m2$U(!3I{-JWmA}ZR%qyW0NS8iHZV~0*InC zWI-AjonS(uF1DB;Orc-4)Pm$W$S{{<5bBjZO>@;5}5L(Z|?wd zK#jluz`*OYf*11Im{ZKQY%mt(bE6@|t7GryUw;+~01yBiBxi~#U<&>x2~Y_5|8W-T zBDwE3R#rDN>P(_NI=oVI!KE{4vAZ(6s&dk_tHpTZ%3x0lUzqy~Adu=Px5L7ZYSf3R z`zchF3s?O>hi|K}G5#G7XMwf)lTXUUL_e(2nbbD|O$4FZf_cWAg)oeTnXK>l)>X~BVmYG^L$pjeKUZy zY4nZ6Tjn~scK27U6`0Qbn{DpQ(OJY4lH|6#z>?1MR}u%nt-0-xb9nB_c0maOrpo&- z*6@jZ3cpgLSDz_}8Dw_q(@&hv8zwIY>02KTu@zPlZLu?$0BB(>aRy75FQ*juGa#PGCWgJ{1ebZx4r^i&BT4)<5;`M{%sr1vKIF{;t$AX|X~JO; zNED?*Txujmp;xe4%P3jk!>Aq5Sd5>jA#_+wi^j zyzz_w@|XVx7XHW|o-tX21Qb)tsU^%9ooicO#l(O$Ohn9C7V$an;#>(NhhLM$>W+8s z&}G-YaJmBsS2m7L_{n0?pUWq9G+Dig&#jhF^~ASsd0&B&1@DbzUJpD3m#x2UET4@k zPS4X1SBhV}b!mPC{+TEQ0C4yhBoy$^N>RWTZk)Q*+qR>`O7pa?!sTN_+ozM|b>935 zi}luP&RzFgrz;L(&U2mGpc>Es%6Dxteu2E0M$Vl+KXat%l{vG!%HNo~ z?e-V)w0+Z^tmZce09agzw-@4K$d-RByi(2MSne&p(W*U~znX4%^SVkOKR+NvSbq4# z?D0}0QXLX7WW&2VNo~+a7>67og{-5F8Eu#aLBVRo4FZRy95k_^(vk{Ou1{Hkhj$-> zUwh-HeLX(<8`!4=R=z*anFZdVCDOcQt0{mJ^wZbJ`hXD0b5DcmZ2ujApTD(I;U<;% zsnRTF5})Ha81;veE%H*72pZ4H=}wq-7oSUAe*5)*#w3h!^Vh z(R$oh2r!nIN3o$Aav&6W(h_GeF@OOpO`LWGXs}=jB0P20Rf{zx!{*wGEGkp8YY=X> zOG|pg#_OMJq`s7xW_ot>BcX^RTNxWexFn1zE2!1E z@qDbYwV*6W@Sq0tRA)W83?&I%7OOYjw@%KzJy(Dc>o$%Agi1a4YPx;b#()dH@)m9q zv2yt*cikIXdfq>`S^`-Bq23UxemfbdkhpXIRS>yvK4)400ssntRajTRuL%I)k^@K% z?~uhR>=2{xpIwdDUGv6L-7lx>4GDs!?}%%f;u5bfHM2{0{kHD8*y=r(dVe|mGfA*q zsd}l@5Y!nDYQ;4-4!NHrUf58bxwLk%0V?jI_mb~kzV((Pxm^0--CogFT;=h?r@(w? z(S;xemppEZbkNy&ck8R2%A@%aL6+_ssX}MdOu!tTKO1>+xCIM<_D{@ zO-r3sX!N7%JEx7aZncKHrHTjqj+@-~PNidcfs{V~aHt5g*ceBw_me@g`fQJdg zXrV^|9R{%edeWa=qcl%gUH5VmmT?ZY=2;BogebtdvzGay3lqa4uuAm3aG(j|WSCcb zS-GVAengf$7f>$wn#f=(89hh5TDnwStJNB6zO-?qyOcUhj58R(a&4)_*h?ry4!Fbs z7#uRrcSayp=ju0#=?U5d+lZ+S(;6{Wbg@!Nl}Q{4*b87etKf zuH3QjWm9w>exUp3ZzQ*dV>JC;uut~{pjW zQ5tB)5(q=kAPner+dL{XB2v&yU;++ND@jt62__biBEy7IuE^{p83oaoWrQPrN9 z@8a$QSGTtOr-x4-BVOg61I7|V&OJt~px6#epzBv10&svnkG8$YJmLU!QGfUI$;WYd z^{JM$RI=3@9{~Wo_T(Z@WK{#dW1m47emc7x80nOB?EUil8>$zFVM^eCXi0CN2_e*1h)4e=^or?Vk=G>;fm zA@PtNkh;nx_*q@ZSl7@Xran?0i$}0jvbUi$549*BpxZB=3{}MlTUxNAO>foA)v^&# z{-AD(%w8jUoxY{mf^ifpq=`cgV+j^fWidinV{D8glma2(C^+nZA}|g(0&os-gtZeG zlho;6PnP!24N^~9Zf)=MQ}g5+xOnbf`19ZU{HI@&?}GTVcg#IGS1Th$k>?ON0+6a) zTEid(Vmyyqz%n3xU1WUJZ)XMo3;+;g2=VB|s1TG0OaP79D3ra`D{oxdzt0@W$0nAZ zdGzXnxQnp%c7qe+^TK*D%BrFD7UpU@@6QeY<@qxm0b$JmA;y4#LIWrXP^`iTtOCHu zD(u_twa)wN-$yXhj#Z46np(M{V6o^fk)6{E$BWV?y$rsWcgGrY|E>CGtq{q$stPG_ zFiI(&x<8<~ML5FB`Nn^J?$j&Wy?`jWpM8D#_G^zQ89)5)?qe_IA7WEYay7sU%Xin# z{mlJnq84Gz;`~tY)wRlR+N0sj?F{vwFM#qcg=|prg*Jjf8tj~8#9;0UAuKk86^I*y zZOXxfS`SPR6%1%1c3h~UMGsjWH6P?C{pq5no&EUoJ&%=4U)O67PFVm$kN(^e3>0Z_ zFuC()NL9tX^7EUHS9O~zgiLAAY`*t4boPb0G8D$Z{nt7GJpb|qe+U$_@b>M_Y4oFG zIUq|B7Vv?|KaKZf(`VZ>!ct@_X*~qn2WgEoOzI zVEn}{1#}o7JK!Jdl<+{5x@{WJHC|D}8UQ0;xFm)pGODD+@ZqY)QDmSJ4YlkwSppKN z^z!1>L zA}tAZ2rzO$F<8K0F-8;sD*yoogvHcg;cKTE9uzt?Osci+i$9t=0M7^4!9RTJKmVW4 zPhA6xfBE5Oj$b570wKb~7Q!;s5;)EZP&t6s#scHCk=QG969s^R|3MZdrW!j-Cb5oK z1B75c$;Z^(a!fYuEd{T=(pcJc>A6qskj|{c`N>j=YBga~o+^aLj3(#s`iBf1{;YMW zU%+U|5CR5(7%=dT6rf+gD!>SoHhMXY=fkP}1jn#@#6q#M#j0r7_Xb>_XQg$mlZ&)0 zCB!Bi8$WXIAAaxq6~KL4R+$7_s}jbqy+sow3E`5-h~SeadMl6((tP}Vy&o+dI&dU+ z!-wA0ef-k>@3tNQNPhInjni-b%(ihc%mx-8IC$mTt0NzY&-DhkD}3sNC<0Re^dStP zAOavA*;PiAB4UwdoMeOpA~wf(=xfZJ-n4b)g^KYFYF)Me(qIK)OK|o6vT@%_1`blp z+ne$$>zI$p;oQaZ4egF$Jfq&6x_Gp&d2&o zlQenb!^reyra)ofMlq~CX8^e2R%08>R3ku#0kWG1FExD~vv7QVp@eNwPIHVYBLKvu zw$vYvcuMuNN)9~m0%D=Mn#&3lK228x$wxr@n9Oiqp>0;lFfU3KlyI8o8gSQCymmG( ziZ#WkhF_zgDQHT-2p}97U<;IUOaKER4l!b>BZz=O7z_ad1`syH0Co;AI0wQ3wU{H& zg(>=T8@}?jJy*lG4o$j$_|%8~?3cGr!^}Va(Lc7|ip&55imr#feI(yHyluIm~%Zm(U=kTWX#jr%DHri_K)Ty*NdGgiY-=x>( zh81)a1K~0Xfx{Be@et@tI`OdY+}B%6EdiAh!SHL=00AQjF$5r8f_F^3yKFn1R~6e7 z4dt}%G-|L)r`P$er8Y9APsYtwzOge){XC`?D)|4S2|mE>JnzG>|KI(sv(MeQ7aJe| zE^w0~DUzZnYHv}4vTVU}9XGKXCwkJRv6{q<(#BJMTFYuJx3WZu+LNNhAyOP7L4p7Q z5PRQ!&pr37Z@%*@ojm>h&j06`FD(iLy(SdOI6A|?AA4718Bj}|$1+a%tCvn3s{5kJ z>{Z^6R_?m)WvzxEf6vnQMjzj;MTxMc&yDuY{^oJ-SAx?j_XW8&|4yg*iR{X;otpst zs!L5}0FVRt0wfVUC*kHv$0@gjSZb+ZB}KNh)?k92&~xfQ# z#_=niwXN~ViF~8&l6)3BjS&D)Fa`0PBeVoe^4ug^+maFhbO1ozcyf7$Ae}H>sHqXj znrVzN2ZaU1CUS{qin@x&Dk2$Mjk&Nfq_;4vN@&2Jsb&a2%>~Kqt3o`uXlWN!=g%YWz@XP zVea(hk8OgLS0|>?G3<&KQ=`1#VK9n40STM@aLu~^cd(QKG;3NJ0USHP02;s&3t0z1 zNVw`gz2bAD&vWIcgBBCTtzC_bUb#y|zOcM@r4D7*6Ze-tT)X$0-zeMv_&XPWvizRgi?BpY zv**%Xb59rh-pvDRUQiR)^jA4R+mKGr( zn%V^8YMw`!0EB5%FLO#wncViC=Pz^(BgXNY?lA0E#}jA9{RO`#&>)z2``!{tAm!IC zA<7k^&bbhcuj`plI;_96?Fd2^sH_Q6@U;E${K<8Rfx6`8`%Oxd@{QAdp-jcM<8R$k zI)u&l)~`&+Qh*wFcU-&ig&PihZGF94pUy;2w;T1TpO}`@0-PuqI0_{}rW}cyEotG* z(3l*6TSGDySwIH>Fg@edr~30jc)k*Zea{bz0)r`VD4^>V3S$>UmDKrp7QmBLpFWK$GFoFW>bSzukiW^AAw=O7P`B zI`q2$C;#qmzV`ePLLlp!IUR-_4gd^N06Enx+IMM> zTZEXU-6F3He=>e&e|CElExz8F#xJ+4a4x%ti4w98ZC80BfxuuYdpvQ~M|SuPs3H9dn8k&tqjKD8{N6hueEA9ci}* zmrx!JRynCQR@P=ud|}sD&ehb=jJw1cj2oWMYwv4km}^flmIt23{P~6Dvsyb5HEv9Q z(s^LtZf(_aA z&fFY2O$MN>m|Y#^wz=u;lNgmY%^$zu0|G3JRy7cOe*cFYetg+G98!bBXedl6=tJ!{ zo*qK3hz@VP7F8pJj~p4H9F>R%Z{1UtC5ERD<*uSZ$iTaLx^r}DMVE`&Sbf{2b!h03 zgj%T?#-s!U7Rr(ns|r%)(nw2`24GSr06?q-07Jn5fCT^p|36CLP1IMF z>go&R%b$+m?|&CAUrJuNdGs*AuYdI){{GZyDO6p;U~Lp=jvoxb|?vC)qw>wW;p zq&K*)uZ_Jw`syuH<>0xDrj5*kNkc)6g#fJCw+Rzome^WE0jNVXy&9bb@C$4ajL*FL z_%rJb>yRlPxbyO}c?!Tv-FiEVdOzX=f|9)t{#jnKZL8ZqSr+W_qwl!%x{pXHG&u0f z6ydAiUzUG+`R><=CH9)_KlhwA@kgezxV#jXf269v_;$SEM4w8BKhFz=16>BdBx`f>Ow&g_f?guGGXT1K2}hMF`T(2%rN1 zbP1qcZFpuO^2q#}*VK6y01^W_lPaZZmf0*UHF&lFaH0~UsIHdk)MaC-sffxMzcR7~ z)}(Ca3N>;!ccrMfQ7v!a95b1potJ4K*4Vg$mj9DE!&)SIYnAvvB!!%@!v<3h>j{KmS+Z z5ml3g76}7e6qyCWfB?n~Mu1Uj3s;IYe!ka~y$aI+iYqC0@Vs?1rOW~K^5cS6*_o%W zzOgrZRF3amDfS#cRj*f`5})&A6rMV{b?Ic)KvfVDJ6y*$RasON?P=d0)@cf&>7~MR zSz$dG0@xh3nZjG|z(2X&9b6yVa(lXht9!ePFopAebx#A?Cux$W+047vf8KfTuIH^8 zJbd^0pC>ooJsL|YHCO&(G;!DADZF-+xE)y&^`Xv8?%ySqtAy zv!Sn=_1}0tkzV8XB=NuV@)my{(MS@*3K3e!EB(3m2lCq)T4UDUj(#3F1o7Rs|1`0_ z%-T!WeZnbB2|RzXQ)b*T+j?-bvv{P6^H*&huVT00FQ-Fbg)Eig7fvsu+nka&-Y)KO`f^t)b$Hw>!n5IcKxvZxe=<+ zg_G0S;joI9tw(`E)FOjU!i1T))y^NcPGtasNWtTTiCQX26V{t#L}U*@3fO!_imi8y zp~UivpKUDVd&f?#Yf1>6m}+$gl}QNDxjd-y;ql8`mMj1H%2!Hmi)0XuZ2+%UFlT!o z$YkUJl*~FLaEe+#KC^mkf`_3fuYqrO-#7OX6xr|Gaqdxh?Y(QWX{+gQK`AUf*87?7 zfmf|ExK4h3ed=8=h-)loV#q8BkRbhEY6-B z$HXezs&-bO7831<1>*n_YdOW*&xsAVx5bJElmGP>Yn$!XH8PWdD;}@r%9;n${ zzMoC^QiiLhFaUu8Z~)*ib`H3E6JQ4pI}3W+JMq821ODTmpetAIhQBYa+5NBn@K$hN zJZ~qjdb$rTRTe_Qse`bll=Tpp0tyG9q*lD`Wyiv>!{MJO0NI>1^mdPU6p&^4rDAD# zZT9n9ccqh^y~nQZ3IFKgcnv>3^C@d0zH*`t#{zbl>3ALxvV;i(2o6~J!cQkBHPW8?w z<+FeAzJGkSPR)Shcdx%ZifY31#CtsyDnBYMMP@lkgqO#`7VE+N!|%oK*!+r9 z>eF|u{;GA|_7VYr;-*OH;pdwtn;amE`{VbF9$A~+_u6eY%~gsm3i<^_uWp9BTUNjwp-DKALp)$ z!R8s1jehvYp~d+(1{wH@2rGu6E2_iJEsM+Ey|;9~mze;U==yj*j>3Z0uR1k=G7Ylk zgS(7nCD13&XHX!=$ksh8Yb&N4@uYw$ZXMu}7C}iI!UnKm7E=eggF>oQ19rRwonq_< z+@}JWw15nh3qUaH1JOf`0386p8;ZKjc*bE&A%Ip^U>}FLy)c{ziWV$~X%eIa8qU^C z*jt5h*YnwE))q@G5>ujJxf$XDm~WlJe$Lx*5=BXKV00zcETTv#4~r!PdF&A%a_g5P zTR1QVFnB)bAgK!KDP9TH=3i>H<>P;Q=B~?U2wcrC3=_w86#xJr07*naRH3#>_C`Ev z=`{eqT4dBYPsxxgFyNv}dik6C;6MFabg;4)o_XYdZhqveb1xrzr5{%AIy~=@QQ{~~ zVoQKCO2MOki8v6j3cvuEjpI$41Jg_^;7U$Sr?s1W@OVy8kUzOGMyJJzy4G*uYk`1q$H9K0E1S9 z+xkXkG}G{=ScFvUta;vAy3h9on4%?|p;QIDvmvI{dBY|4FL^ayZd`PKnS7}TP~NZM zYxL!XXw36r`~8wtkfs2hWW;I+;n_FjVy>JBTDPwKy!-y?L#~jYytxffLMk`-7Uv!* z#Iu|$w8xUS(4;oy=zdy4t9#3LAM ze9vt!KZa-?WMi8@gnoTE_6v%fzV|Nk)0K?5(v@!e2j~;0%XM1OMXBRmAN`v6Y{ik4 zAG;oQ7I957ozYrh?!2%z_J`iZr?Me2vuo*Cc6xlS5OSs)du7b?o$qayxd#{?yPV;K zIns<1*9^*_7#h7c!!6>u280O$%fMM_`~u}8Edd;0XC2^-2T_9)zZ_**o&ytph=NG> zOH(Z%9IkUf2LR}L&vOd5LI?|0A~K$2p)`D4s)SvL|F$R!Syn4n;xyVPi;(k>lI>9Oq)s<^-PT-S`-`$lolgQGieEC zRlkiaC?Bb;?IWv&K^>93+;WRrRi{_@mJV13Kzi4B@N>VldL`{};of@qWZ;|$s(Y`0?NFekDiVSnc7g(~P@s&~ zF2Lo53QI%HKr5~!Y-!{ZH(*9eOfAn&471{zD@zk|&=_3%#tvIqf92w)+dGfm{4tf) zy^F`ktFOh=jFP_Rm#7>x+IccFxPU*j`{l>YidMB5hoRr1LtRxKE-;OKS&%FN0Hzf* zuZB@3S>z8*1PV8&kf@~9wpP`^m<&lpRTTz>t#y9(nFBxiK^16+M~UGh+UCCJKQLx^ zM6n{ca1qIENgF3$J2}6FdPW!5s~>NCWb%+J>upee1(HrF zK_JUCcs{2j*W#LY!gEJrOqWG{6ot1uR>5>Vsur}c+TC{#lzKx;dFBiXQZROU=ThXf zOnX@~=zCaMWE>ON0Vxub=9cIJ1Ln>cTeyhxY9NHOCeNfq9xE%p;Y4eLnJP;Ex*VNZ z5`Yc>&?P)+OM!jkDa1yGsVYw6YKk3WhFDrsyAeV-g1O5rM~8jvxt<4D)vaVTyHsp3 zo$1_RkB@v`m9EB!wn($-+^p4h48$-r%c9nkpp|0aASW0BD`a7?O0DltRV2p7!&^@+ zMCA7Ppuyqip8c^&Aq>Oq^If2Oo2R=FwrUrLG~8^@OA)r+!ft{{K@rGH*Zm#97ym6h z`Y{Y&dg`aI{Pp+}ClHCjy}vvSh8hr!S-U#sD*%I_9A(@oa2CK-+CA??$`ynua0OC; z+~v#7*C8N;VigU4J(@5xr#sVgJWO|7+N$fTFHY@gJpa>MKa~!vwR1}qbm2@za5JoU zHBoMOQ+%UMFAX2O{?HTVGN^FYc;0F-)*m%v*UD%ur|ZDhI)WN27SEtMCFRvUz|@eev2Q0+S)fH8%{ z(8iIIYrP_m^J43R>pvQNsCm>S_V;c){Pp05>+E>CgKDD$?{Bf z^ZK1CIB^J!s12o75Q-qPL=Xl0$1uXk(CH$ZY@DmS?T3m}Mc=#YD{EYqysY1IH9Y^D zqMm0K^N;O@wO{nKuo>gEw@>DeR&PK4R7e>C(qWtEyP~Il9oDXXhI-g9Q6Z$@)DC>} z=`H0C<_BR;Hoy3HKYQVS`vSCFI=#V)gprAxcMMa#KIo57pCK@c;nrxw37;?`C9(_? z3zV87S6Y>pfK5EBZGpl0)zGg}qg9gTgmA){Z-upr_?{>l;gK?i-Ql^$aE${x0AL!^ zHubF`DJG^Y3R#3ztwjo!Hu{F=x)m!DdL6uJ-5(HeT1G|YQNyZsps)!hR&XmN3Va`O zhCzbwceJCkThk71pjKmEB_c3u{RERrtfRcZ2!k$g;du}#%pJ*sI{1zGqS3qK;_D+G z{L#zb7&em_00fWh%*4V7{E4xZMc~un!U+5^NT!T{lrxi^|7X7s@TY$QkA58B-_8Hw z4`1g+G}ag_yZ5dh$$%@DuBcGddQnHu925ryAYng_Z`e#r z#FZ$acP<{#{jZ&VOWpJJwzF4_YU{o)@heZhDjzNyg%nNhu5RX;k$hns;RQ*~|DR&Nk zHhL&LM2tSL|IBxTYj4|V_ID9ye`PlBdhycQbJ#%;1U0oZ{=yE`-c_QLX?xz+o&f9KHn+*!{nEqti?wIOHP;;HwJ!>^xle$H@-KC}n2?+r3S z6YRC`n;HJ}?B?~2iUviH#;tR|f9*H=CqJ_PtKb}kMm{V_4XGR#pV{TUHCW@r2i)ps zMz7reXX`UXlV7UGmV{H=7jqlKWS-3)#FScsA@dm+<1i^T_N=w8NK9ck)<{YV&V+eV zuY}c*lrkG77$~a*g(DRubr_7-9gPRY%GPa9D@P_af1viwZ;zMg0D#$4-yqy5Fck8+ zwF_Qiw)QP>>~S8WvMhL6UE%(+*NQXMO0$L=AH^yJF9Rquk1)Wb08%g}g|AC(J>EAC zL%o@!aj!u-I2~kx8dzwOp|!+W!x?d)HrX_IxK0g$Bq|E7b70oOwe% zSpGNg$Ot1q61qbGAl^ zY~FMVI%A}7@E zRf7g(0a;Onsg{y6$!uBL5^;=lsnn>@x+I8aBWw&8pHB0b@{65DG|` z2qHJ*_X8yZofhUv3huA}xcth;p163zOR)|Mg7tVU+vL7&NzK^(d!OsoED*~{o~axx z0kFU-xRT=4#ai9(4S+!k0CH;5dHVxOJ!B|>wKH+%?R;_ZL%?w*THKxn8zSn!MMitcVx@+c{=T zEo1%!K$h7+!UB?ph%$d=05hpqy6=Jgm$XIU}%&L`H1#FmWLyIyJd@PV7*H^1V`QG{Gl zqofN{`(C6PLoE+)xo{z;y??&l0Rr^x7q`ioRY@=|Dx9jMhp6EJWzv<%VPK^NM)FK1 zC6ZvVF068nhfP0p*6JdTRIY$W5w!?PtAS`-gY>FWt@C=bHM6Nk03JV5q$qCeo#?*4 zDgYe-u=R%JBj@Q19Ny_KY%H0rsaP}4tI-8hP0TjdtJ`_UwPzdQMgU*p-Jpkwo-5sD96oe zFW7O%cW@D|{4V?AXiv5ZblO{qshnxAE&&~@Ug!Wc!;S=i{cT{GW@XXe`gH*JzkcCY z-@E3@LgoWd6SsX@wW~Aj$0(#@Yga-_gvUTnL|_R+N)@gM$WVFIJDapdJrQbP8WhMa zt#tZYN&_s+($U-GOV}?rKa;*QZc(51=N!uNv#U3qJou5j*IB@h=9}l)>kiAX#wgC$ z>XQ`FuB|T8_rGw`E^;;AhE%kMY3_%qwZJU{!p&s`TyX`>@moh7n5&O23`})zXOZH{ z(9Lb}YAb_%^&$&GRq!m$(EPLi<+{JT5CUFnLgorui&%gs-&1GSh?L4Q?@^tZb5#?xh{H%Qy1QS_*C855}OBaC*^amW_7Kh za39>6!3#(75|xYx+duq`r7dTy7(8Tu)TYR$cigAbpT7<*nxbaH-8!GV={W=m@M=G7 zEu%vJDR$-3p!15US~RRVWL+E+?o&psD=~70FenX_Qe~wA?7(EYaKIz(x3Ga?lpEnY z<1vD`#y0mok6VNq_;6dsyTzXIZ5;k9XTCggN||wg-6aVDbO6BCBkHd^Bnl40N!w4; zbhcYhy{c3e1`)C?l~R9!R$4QWey#9|m>UD!KpkfQ;Dob`!N3KE`d$iP?ErIZVxlqg zyqQMPRiFVFtwp9g1O%l6FwrJ&H6J$ zFPeXTu2DPLlY$T+7@$^n1foq#8L%aO3|b=%z(9lIO705tJzGhW^_Mr+FFdT?=tE=b zX>$LtZQC|$BX;5S2~j_G{I~ZHP>9~po379kgD9ezaaaMZR={5IH>Le|y>^-`0!+0W zm^x03<4UK-3f~88F2{l^uAn)2LrF1JAy>m%TdZ!Z@qOrYPg0M<=F+Qof@TE9UwCxi zpCA0HKse3ns!%3qaKZS^HxgS>+?G_ZG(-r;*5mZ_t1F}7K!TgQdj17@gIU43zPi5J z3C6~tMrTPP&zoK0>5Zhjh01}7hqv8$<~4J-t-kguku&n( zl(RceUww12>D9cu4p!K)?XO5w=pxwqTVAPu@p>BcdI2RL+=rq|Pi_bwoD=mAul)cG zX)wBWeZGm22KKI<@&{)SkYa4h3E{8d)^%rkUIt-C3DQQ}hvpZClDGO=*c?-!1_NMg zkp+q@L&(}(%fdNqDM(v51Kjf~ObDyfoW*`)?2mnV z7T)AKt1HV3>2}p6*+z8psJk8$KnDQqScq_LOkhNw;;vMc7>kl~<4VN(J4_;mSWA`{ z5JgD=g_le zJ$5zBX~K=U@~!>nkDMAhMN-H5e$IV?6p~*3wq@~)UZ8He<!M!1gj$QWvx710L}66Oie3Q`1W6X{9-dbN#iv4`y{MJKumV&$m(+Q}(BHi6Ncn%i?ZqektHuUcUc0f89LH0*e>pxQ zEb%H83$^~j#1ycz31m>4+_Ju!3JeA)rYhgL756Ew3=m^`?ytke@8i~LNlgHQ{D_JA z@W4adb`M_U+!12w?~j*u<;kv%tOQCwkw2OetemZAuau6F-Bq>^^`d~kYx{^DAJG_CX9(NhUlX)ahL9uYtX z0Bkd&AhoEaJr}GZsgIzY5%A;U<%lb-GvA{Xx3i1M+lfb;n>gn-dg)7c9C;-{qnu=Q+ z-c#p9b!u}ZT)DiKUeo&i`2CBU;!SIzIs4X==i9(fj6OR(s0#B^=q*moFH8V%i$y|+ zO3eglwgzMUZk7N{g%FIOm1o+g;DDP@CSH6=3IMKnorDyOH#QUCerH=yEnUCm6Y8jO zA=U+I6e}0HJKQtVf4=zFz-;X5304Jfhm_H~Z_gEYqTtk6wxf9}l^q+0CXz7w=|E8ZV--luGD1oGI?2lV;V z>kXSyr$0J+Iv0k-)oYG7HW(|-gW)sXJ`0WAm9B*_G|kVPdaTV-;I1-xB<#_E7^WR& zrL)cwMloP8w$zz6%5iIGsWR;VD5+Eg^VHB3%5q9Xg<)Oji12nZxuliZmc-uV)b!>t z1aEr&#ll&Lx|WKGiD`eWHf_WE99?2ScGz{58C@GrZBl!IG7`Y+$$ejW?XKu16~sSc3Z zTz3(8y|&Sz{>}l@wf#%SmE+f?1hOG)q9`?4Yo^mRwA|D_P=ST7$#bx#ue*KiHz(2> z1cWe58w5B2(*`u_Rjw>Hp~vA(3IOEPbiuEmV&>5F=6d@izWB_RU7Jolw&T{dt^F+v zjp52IOXso%JxqS5F8!*0W(=(~dq-;Hgy7g#NWnmpv4K!alY{j*0hkJ1)r^8*;Jkuj z09qYoJiR0Z2T;5ohNiV6T}%obO(t1o{7UbRW=+VomTy@pO2qLpnSJ?Npa0N5J{J}` zt0Juw4kF(Z?RN)=AWpU82Gbl%JCD&JKAC2o4EGMUfA$UH$G}qbfoner3Nf>RG`upsVNF8NTz*`|q#x|>djd*OpeJa9i+{?C~^X+836?xRY#<1OC6b_ z<%ZxVhLz6KtX7mGa#h+(0k{H{C>)tg;t*=Oq7?&5NM>*@Qfj!NhB6R3-5qsXWO?CE zDvvDJJ@9Ef`?l{8%T4#H&;QY>x-T!u^-awr2B?7IENl+5fY<7yWx}U(-zvZfXxLQs z2SJQm5n62XL2NN;?3>O$2ypZ{CpWyjEcx7TtEoV40fLI9Ghe#}zxGkxD#cc%?K!Qb2k|RuqIOkLIq`Awx93D0WQ@ z7B8+(-+XbUMv{fv>{R5=&>L$b)?EAn{jW}pJ#^uf^{teY3EVjUwEkprkq_#hx>`6208aX#)93AM`^=^Mr&SSoBoLPT^zHR> z^lVIIm9MZKRG1JFAYjAhjbkZyT%q0N@-_eIBR|^vuUmshp6gF6yt?nxR}S601AFhB z)2R668;qxx%Eo8L974yR-3V<-OV)sHOnP3e)@$GXxL`WxVy#{lP>g-@u}hHzJ^AkB z$yIKNPnD&(gwiada zoIiJc=U8fRcGbTCfHL3MSXh`ZU4zTAZ?@Y}50GD4p{a?D0sI^rEg$doXD`4r|=!+)U`A zLW7J-M2p5-+5%`<#NyYm7--1MxXN`EZy_trIY#%P8D#EXGso>(*kqoHOCgZ zJq~k0;YQpGOgiR_!IhE?6&)-B2NkB=pWPSNYP1wgYSA__4;t>usg3F_@})unRLz7J zdMgKi=Mz8qMTqr6XV!xPf(cZHQxAkFa86K-0ZW9KHca7?*4{X}n(9GZX4l+y`bqck z>^$kWKf5(QeWf-o%j9idx%~Y2J^IKIIPD{)<9y?;lN)c-;jxz~z*sMVScEjN1}KMk z=T6Zo5^m|{9oO>rUis41-@WnCnU`L_YW!T~9pTHz_U~_h0vjS9f1ThQ*Ye84ZB20J zu?2xk4A`q&9wPFouYREyZ~nhM#d{nWEf5TEz2gTP!GESp_TMDX~iG34zE3}bG-pd&R+w2j|00T;uF~$pKH#Vgg zxR101po}ES!>Yi54W%9saL|a1e-X;*N+)+7!!Xs`JvumO`gY|1yT5%wY*LY0oUE;P zsF`c5ty-9>Us;js+oo17?!G|+6yS`@y!My~DOPIN<+CM(K{E6og^gc)3g8>3t1m`_ z`hCZKsex5P4`bsohAAg1aawsm@`KGV)$k^kf`rO8hi@8}e2aQ&VMnzVEMOlGFF*g* zZ5vxQb|2h#I@tQEBV~N9{YfInLA^Lt`u*}XeEpKE8v-UygPIt0h7zV{*3M~QGd#W` zMJQyo1w^$TWdGBqXmuP%d=Qma(I+ZTPjc{Ey-* z9f8z(?2U~uuLsB{civv82qO&A5ll2U1h6bQt{gl1>y?&Yt6{im{^#(JJO@er!1xaj zZhp`o3rri##=%WDn9EOhUI9ZPjQR~1D*yl>07*naR4NlcxM90BPp<%k)p>}a;u9eO z1Xr?sRZBSJY<2Vf-T04h{x9iYy~`ax_3X`+5SUjSJMqAWZbKG#esPvZ(uG;;;m84U zKRd=fZK#YVGO3!M?>7LgKGrK;kZSJ0TKmrCFE@|@p`{;L>Yy+VD5IX!I#YQLV2J{n zf^@WA^#cJiR(U>h#`A+lK(S8BAgTv`@4Q!#=?y(Oy=#|%H#zs%k-}oitU{zcZu4>@ z(U^0fGDA*{tQCxTj1oo_v!!)qk|_d!4glCmysW$(v#4af6E|I+%F>1xG#C8QoV?U< zcoc+{?Ce^pj8N6#WDwW1ENhVUtF+IoXL-2gEA&fzqx*DDOA(b@)h4jU(O z8j&pL$~i;9K?D_t?O+CqBK8{Ps45gc*V_$yy7SakY|lLM>nW{{H-yBW2 zEidms(waEh4WjPZxlfR+O5AIcd9ANc_Vx6zH2P$Ww3 zV)H%;f}q|tbYlg8t1xW}GQI^}$}sJ16(XvR&g`w%2}wMRISoL$sy^qXk9^<{fAJ#^ z&<2F}UOAD~+a$m<_i+Y3mjIqXVZS9N+A<56V+;z6@4a&2a@?>*zA=5vnWy37*?H8T zd3fem^V{E6`i5|4lb3gGFR%PeJTYEjj!JLw#JfRU{O)M!Z8ulu zksIAozf1g2{Vy@{$=>@3)-& z++J*O-uR+q`(aFv}-er7(pJR9>#`Zlx8N1 zjLlR!F`@t+2LQ?yU^X=>2P=Bd^@H(nW3L#$N_Ll)w<|M)&CpB?p0|XV4jmhe;a-(= zl3gZoq~DnoNg>HX4%#arQz)?736fPtn9g9+v27P(R2b?Et;R7{7-vvsajgVefo*is z$ux$r>N-6+2=2ao^mOs7m%b1E?C<~S3;q$cROzr8I|rsY+q(%QnCP~EPqMA;SxRn5)C)mL1!>aHS2Hf!ymy^UWhw-}d7jo^#dhZ>{`E?Q@GIthasdmiWf{L?}7C z|7dEsj|gZ~SPmj8A;%MNJ~}mjZ8HYUgYi2qe;dC)nRn5|&rUwGdFvg78bk`yKfU7s zYlf(zpF6&? z;zh|+B2)p@^OyhPPD_U0xkRPbq$s^}r*jBD_pBuum03BQ_tqbIO@DoH>BbJ+mBr9Y zi~s}{3uFKk25=yNSd$Ku7;~ar3Yh~c%3~o@gFKFt1b8_p`3wiPt9UpPHlKj?# zy<*M>9vplx*oCFV)j<}Z+FpCv0461MZ;p>VC5l36xI?84sLuFmb}lUGmotaPb2yHyKIaXuKapbc*e zIgCn^$qj!o=A^x8~$kT zaOMWtn!{O58UsH@0jL|sB%I1Frm8Z94R#ni$Fs;nkJWaxfwo$?$VJep2lWGWFK}46 zLa1&e#?9N(^07>4O~dCZWN|+XS|hQ*Lt$9%4o6 z86yA{NW668)b@;rAVzo3KaPKi>z17Zz?=>qPN0~t!iX!bCIy#)o4Q7+K+(rN%ySgX?7L2U5qA&~{ ztF6TOTsRGw`IeZWh9MLbv49AcQKrD=L`QBInW4=9ctC-IG0o5dIu2kflOoFU+$5P! z!aayjv;xcX!7hzfrYGBr<$JQrFXGIWRb}6DI5~E`KZe(6xW)58Ye2Ow9X;8i6=4gl zNnW#gQ}|wcQ%{rasc2Oa7Fh-^H-6T4HCh+Zpox^#lXN>Ho`g;=ZHigy_`zEqgFHxe zXXGEh^6^eZ#2o;ULx~0TbiK;uh}}$ixO=0!ze`dKE`vV2R#BFrnn`avyZXqx0QBl( zAA)D?17OaPCvUb&(H6bs$TO#M0hDmeunRGjRDjF6{A~38d!Kj$Km}Ss4hMz1tH}+S z(Jz(rS}A>DEA&P$Tzh+EP!{zwhtJ#+zg*kevGn3AAC9TxgJ%z*sk35mr7^@IMt~V5 z-C-9>zG?05EP|>}Is(t{rvt-~0Lt1amKX=D;UC2l(0_zDf^el(SlD6{#J2@N<9r_3_pdSA46Hhr{<) zUAl$TkLJs+FocWExLTX5lNdzq@d65LX@O8-5HaAn+}cE?8M8ohonps5>D8DmdOGc> zl4Fw|!-a8vm63*7j(P@yGb4mR?#PSllk6o#o1KI0j^S zCz5pK=yH4!2auP>d+F|UceFaWo;HF75DAf)bquTZ7_cNs&H?$(h*F&|BF|grUfN?a zHC7-?24&eA+DZ*)X;a%7r|otm!#w6da>Ijl(|l#{G1FiAm^yqu&fA|vsy(ZZgej6TdyaGC@_-|`3dpNrL zUX#0L77cg3445X?5-68t4)LjaQ{?LaOarJ~95(#bo)c~|JKyY9%jm_vQisnjzJm{I z9>4y|SZ`i6rOB61y#6i9G7(k&GK}v~N$w1PjK{-If!A1uls1ZysV21%5 z2I|1*l3DEoKmc*R$6Djd>Hr$MX1UsoCnQ0(gee+cE00(4+Kr)qar|R1KFWZ%x?`X+ z&MQb%im7*42b2-Tks+c;Wx@^f6Xh4;)WWlyP0z2e;n6EMCobI{yRGRDj{jioj$EIk?j3Gq0Sm#S<-RYR$_KLCGV&wT1?4^XR_*(f2>PY^fZ6@6|`{ znYmVC&%d;}yB2qQ<#O(rWN)|y{^HT^%CDXbN<$QF-S(3@DvUChZZr^R<_et^L6J)j zKwAu$D#wW{=n!M4Q=5VYXPL`WLq$;MHpHgW(;CaLib$b+13|svyUi?(qQL7KJoBy@ z04Nr&T-wz2#sgrM{rd5n3i2{w8rzmhvF!r2wP8-P5tt`$gu^uMCu}cY9 z4Zs*+Jmy(I#{par&eJe;MVYUcDdJ)fRk>%}DB+vq1yjA6OXl!|?p5nogEw~5$%V>X zWm3#{ONoaEH0N2Cw~doQrYS?xBJe61E+sGk7iU5{5S-M_N<((m=~y%@bHGk!nV^t@ zVY$QDIA6iY>o!gzQo!)$2Y%zp-;1i1sIm>Ccy$2ruF18997Mg7i(KUQG!iS?*g06A zcnevryk_bj|2^10_!yjTOkMH5`S0M-Lr37+nE&Z>mw;F(m!~E+Zk9?%F119E8oZ`~ zFv_dTS0`W{>Y8Z)Q2FSNto#6SlCo%Nq%$hr4+bNsbguh0Q+WQ49&J@NZnD9?OBY}J zCS!1U^Vwr_%ZDsKJ7AszpZ8|RofX5Yc$&D?IRK!}L{Fb@@BL{_4-o2Sjg4mS@1r5=pOl;Ak*3A*F_> zq{<3MGY1q<=4Zv_M0JdJ)@V#}4Q@h#&4yyW~RyuVdX3@A0wp(TTJZt2cUt4kS+Fsgg`H9KN(F?26 zmgxWxK6c$q+hd%;k*RC&)#rQ*&+)t)p-fi#K)P) z3a|`Oq%Y0QZx4+K=$?bmUfF%~pw^w;A80;(`M|@LSOSXNxVGz1@$6G%78^v(r;qK^ z;+e~yq5H0Am5YiXkOSrQesGjk{%!cpiv>TK{6wwstoMpP5PSuK1NUsePHUX?W*%k* zhClw|4mCZ=Yh!~|;a8iDJZW8|6$KbOum#tkNWmBauJfVN+B)lJ&a-(!FczLim{V3o z8i?oP5;sQr^~zG413z%dv$H#Pj8gcq&o3c`CbnX^gU*cfH@gcx1>p`oKos&k2L}@2 zGDE`jjh6;;7~xo_eWf)J%z$$PF4Go(5iBVdURhLXWTa6JzkZ@ppyL3hS?|_Ro46vI zkFaO9>+h>}Vptm@sW@e7I59prSK^L%aKnG|T97%kD!qe0LY^Z8asqio25s!|7!!*E zQ50nfy$qa0biL;HNTLHL?ai8ul~aJR1ME1))H&qf_rj@EWw&13p+z^Tpxn zO8kRghA%!i1&_Z0eagJx=dXNU`^MLzTf@!sOM)jL2mmVq_0r%3GzJ<{8w z_q#%d7tbFZ>5qx{ho+88&(SxuJ+IoqlwY=KI9L)v6VGgAq%kJwUf=6=~lO! zSN)-`LQKBC^Z`L~07gwJd}J9U0(DE=WVYn!xus{~T|R{b z-TwR&x46k%clQ56_|1zye@H@_ z;A;nW>w`aM9=qzU-+aK)SLJJO`}QB}J9>v}VqJ_|B{TBSJzKb0JpPGQcoUr}6LNXy zo~YI98tU_$pd3)4g@+A8Lt@fAEnI;V87hFxNescnsIqaYQVqaLWgl6L!ctfr^jdqe zwL*p71GmEeuzq>r$}4VmO!-yuc;4!7x1$2AD0^HDqMQSGm?7&g%Fet?xV1==fG;Ir>*z<1sRpTGWn zuL2`uYygKQcPxKntHg)>NO6@@sF$17?&j8#s75DONF zHPiqAHSph2Bfy&Vou%AHtjXY&%h_8=o|v%?Qc*%}ga+ zjsj)g+h>c;I$1n9y?m0MYY`SCj zE`IFpJDzuA!ze^X5EH(0_|3I8X6%Z=oms8mbf4iU*5c5yoF`+3Hk zadwzdNKp;EZ+_02nYXMKDsE^g`ibh6FyQ=~LL*jCUyw$|fD0Vr5JeBZqf zKKgcHzV?&w^(X%F5})!n;>&M`zxa35{qQ09&+F&&VPXfG1xD;EVpSi#ZT_m%+IQQy zvUOq9U+2I;gMuGBjI-PJ4F#sVHZKHv3b#)A%vGK%y>`8c8yS-%yRjY1Jx|2%xJ4D% zzj0${PQ7@2;^n!rgr1!W%S1UzqORH=byhnWxB|MJcCNCr~K$&u%t+JS6(;bipRRtlxNPW z97G`M+FBb_*6PPR@hejP^Od!^ha|AIsC0mwen;{Bjj&+=>q^QG@B88jea9=x3#TWg z3om-3y%B6wK`}y@BaBpGhAPbrLB1PUl_|{qAgp0W_;AbF0+3(vGgY?zx*Z)wgH4mL z^1cK6D17l6R3pDuUu+j_+zYg)mR1=lGE)}PsXWnu2~dP9g^~nZk+ltrw-fABH#8Vi zssM~8794<}lzZidsFj0ixex%U$bc{i^NuC}&Bsoc3UnMGKSWKiG4#2aZ~K7;S3M*$ z*vHndH@)t(i6cAO$}?wm3D^+lrdO5u>}Er5TF>iArOT4$gI5$qtsyK^uh-Y+;iyjA0*ND-wj{5^vSbgm%PzxzZE|9Yt)^* z7ye=CW*(0{{BqqRbKkkjXf?7UVc(? z)AOrz%QVVQPdvCVWoEZz$hOW6COJHpzIU%Gpn7HL_VG(0zBJi?$uHk%9$DD-B^_2v zqT{w$BOk=X60iWo0f}rRY4$G%U|p|nOvY&waaw-O4#W-RF#(J)04E7U0~OTsAtU+NKRmxj*Ez)wYCm{< z$HPlezIp87Lm2))g^hLlhZpn7;l*YrdA%h|&lzfVscVpnQ9?*+K-BQ^?R(LoMyIr^ly<@vI@^6bQbARPdO?*2PU^Bi>*;W zXra?A!&1l^^%%-+H)R%x@T4FP6?CM%&CM)aSN`PfN1Fgpw0?9Q8OV3i#i=pK2YR&w zPL{itXo+%zT|qgKtZ*1`kz^>%lT;-ZG)T$;9H<qX1cI-m*Vr48^SPhc+@bT9}}4Eb~cMP(R;*d*niR-9om95yyE9S*QF*Z@~lCe%5I z2!S87K&;J6rR;sr$Gh4pi`pN3!#15IG3WK(oL*IMm8er(81s z8T6{@FaH#7pL{cXX80dZ%YE$ndj2~2vk#-qrMuy=%NMpJyt-VSte3XGdf~2JH(t2h zkU9ZN#(DKje=# zX~=>AQZB%j`v|os`k9{wW|$cVK1sSt8ThdrGjL$F1wsHUSOUsa*vOk73+|h{X}jvr zZ$H`i(pN<=(%B+(T5R-lJyD5PqpjT&>o-P9cNWNI*cjLAJ@L$2>tA{=zR-Ftd`x`g zyx7gIU-$1XKKlFUI-iBVjlaE3Y%c^lT`!w#|HLu=_j7(SzBM0|usjWoHb7Kj8dNUw z{gvd%!G!w5>ge*dx|t}hEoOeTc;b%RmqzjARu!|Ad4Lj}fS;by44bv&X55_FbFn{m zcIB40yi~f!T)4L7d3MX0$o8OWe1d`a0Mr-<#%cg-fmi@A6!Nse!7A|B{+VzE?`i->z>!pCYdL681WRQE!o(p<5c}E> z?`DNF9QohAC}3~D$JL=nl5VHn84NO`31H4KH9qbf-+K1tH)FE>eK2sq%~2CWMQ~-T zD36?P-iK?ygbDik*%zy?Dz+|i71Y*by)`&mVQnRdKo`Q>&!2?mG2gvJd3NiVqI?*m zL>mi~<|c8V9QB$dL3L(gl@Sii4}>d{Qd6PG=osoK*uC6RDyseBfnxQ-%WK4|jy5Ek zzdVvyl?hIZ!X}wkk}xVIrG=%rAx=j_mDyqHFaf}vVo)a63UWd@@oME-wKNu%1;bQh zk}&457`fKbt#&&#Xpp6K-JSpdAOJ~3K~%AV`$m@L*TALT0crsq2T(IJC`#5!uGeyP zI$&DLkV=`2ZE8?-sM6yV4g?}>+`cq$o z`ph=(Mx(WEi^8Zs>?W{p>dK-TJ(w`7z$vrkpZLtL@PBwO`sVm6;Md20_e6CM>Rguh zxc~i+;L3>j+NpiZbIwit7XHU?t?))Ztd8#*tcKp3-8V}*QzuG|bq)+P(+VKJg{u3S ziugJxudD`kQe3|(`b~KJbqB8R&Gs&gfw<9GA5PQfgAbGo%jWytf41A0EcFK&G@xyGl3TC=1`NPKVFyNIZHcu2U|<-EK~M$Fl3iwzH;y%emp69o z37p9cBgoGmWq)?+%Z%Ce>(fo+c&UWAIQW2JIIqUmcN`l;EFl&UWP&+kDZ<}5c^8Gu z+J1B{YidiFbvAPfIHsI1>hXXJPdL$V`;y;i-fPIImw}2~=(W`wBctH+6bzkpr|-Tc zyYrFFJox0P>hP^P`_T%$6RzGmF~@7j_Vt)3Kp~#J*gGbrJNpWw%2w%27hHfzwbP1v z(uID&bP|~yaVkA2G8Cx3h)vnFT)8$rGf~}z;E8XuBA%#{MKL)!5v1LfMKj1pD%sF0 zAdfTwzS?cQV- zGYLGY_Qu6_h)U}(-P-SK?0G|4Nrz-SZF32&yYe`ePL_(S!dyS>8i5n}iM-*&jMIy&8LfvJoQx;foB z@p8u;kTHpYDr0WG>i_w(kHD9I3V(0sK6vE5-#Ss+i`dQWJBq*j4LEUicy0aF(OJvx zd*^s`@gJ%`mp}Q$x8>E?%OLT}ociV7}$=VNz% z`g+M_8^P@M5Yqp{ z2d|$eW`gvIurgEnS6^RFqbPMbqZqgGs6 zkKO*2E7hO$Ts27!6>9F(?Y>?x;i)}uuSHtS%iW!mPuePXv6VEhO8Qig?kn0gGW^+a}#J^ zA(92Zy7f8-YiA+PcP+o)zi_5ir=GG#jFe7I+*k1*h_^NdzRRBOzpi9n9G%To;WltR z#)GP>VHS7_uRmEI?X>p#IZ~NXjsmX+fg}tMGt3YOgEE{;#z&awWA*9Ah;d4QJ1eEEa!GtsN!ewrTGdmVUfsO+d zlDop@p}xsU1X8K82uIvVHyYZJz85vJY&vXPvR2;O)!SJtg%g|C&D{lEo06@eN+8Z_ zJj4A2%9#wy&GiYPc%$vvZjdRBC-rd9j+xGd1jw<$svu;hShCQ(W%G=frm|>m8tUwZif=#3ZN=Ea}gdAh9%=-p*rk_n%_dW}!9yfRG|u)F`~PClx0k$UGlAG+g#K*X}fc|fxVM!O*Jw%-+Yg!=}uT$ zSe4UNCAK%4mqzNz?y+5qFYmlq!_N-?I)#r;|xxE~cP2h3z!=Sn$eK`Ud0 z0pF)-YxLm3G9rFt>IK?HHLu(0&n^AQ%zwVZv5h!XRmU|w#>D;mF;L5}^`d%;QYJCZ zXrB5MtVWKbf{W!L;Y6ZDl60xYB@@I^!w?KzkW!IZ=L1iTw4;4)+;o}9@G9mAkIhDkZilyS7` zHw7nYpQ79e@E9nkIqDY&8a<`LU^EyX_t6)wn{H+Hm*;QHFPNaJF_)FgvI9gzlRK7~ z2r^75WhHDeim^+a%hG6QGiA_z~E$>7rke?z8mhDofCkL1K3KC#buwbE=p|hz|hgKs3_mS zK=|E|v37c7m0Q)C&1)OuWJ@K>nR*jK+2~Z!8D_Y%P8|RVJ194M!~f>Yla)J5C;w&@LZIGQby1gs+?|Ct?>Tdd0cZe~!@(flf5agwTv=Rem)*F% z&@qEu%aqS54Hx1Pa&VO>Idqs1(YxU^4*d8>d-h2>})FI$W~Ls&=qWZe-0mC(D2L zmJi+de1Z5-d69C|uQCafZ>Zn`gyjwcvplEB8tp5gGy=cxC|()V zw`ng8U^3Mzu}YdoDEEss@VXQkC~PLP;)uRR=eJyD34xR|0@g8YEg=?&0?3DrH{SCl z#E8zeMn2PsJ@U*St&1w{wiQ%H;`Nmg!Bs8*AisTbK;C%e9DrFLecJxf@57(|4t(-A z@#5P)`tX;I9k#X2aE)LT8&YgxJ?ke(X-^W$@!iMk07OuS)_iJK$c{C}V+8EEq&o1u&$&9d#sP4y> zdSKy-V3$95U?W{Gja@&zIp~N0Q=@fxI>05>1!pK!TNx!N&S(G*^nlhCAvUTzOVluQ ztde+aV`}}z?!)SAHhC8j|78AC{Tn~5K`{_kWfUp_6L9yd4ObjF=`1UlwnSr^^8(QV z8KSf_h7(%=$ukrnK*k#IP(lC&3JOFF(+rB7%D!ede|zTx70mSAfy!5Mw?QOmt}#3#s(Dzb3i!diX#(PsmV9bHb_7TGbR3{4cZI}GZ1ot9y+YcYX-tS_g@zi}0(yQTIocGKK(MO1 zcD6lUQ&90{7ga-M+u7`9ZP*@W4Lwp=GIX(T9qeTgMuk*mk`KZpnd}wON>^!1n6|_N zmxM0y6HAEV)&hyh2gkmO%L??;{eNf~)>nUTzAQ^ohtPVtw+(rDs=bhd813yJ>5HG} zy)?x6!n;4c`K8~4Pyagn*+<~)dq4j0eV zZ2kRJ{OHSW&m(JpAFeNrnhtPa8UP}w?{Mz!F@zw%)-D6`tLjW!!OknKx4Zs`oNQGS zdHrTPxa$Y4eeZUek+GqQ5KIAIk7^h1epSkBUf;v!9&_d8?jY^cYQ!5s+&bt{V~wz> zl`_H-f_~otf!HWpb4EfB*U8v8w?Ro<-=eJp)f3>;AN>0_UidqZS{F!X4$6|F61nG) ztz%apfvmI;AU z4zF-eL?oYbH(8vE3MsM7TCcPW_m4Yku(q~9hyw*qXl_)zP;!uuv{xH+XaNZDqcw%| zZf+d+N`esF-Pk|rWFE&Ea}zTYH%?}#KKj3)0_@syXM0N^e?CxgSw)0lAuvjN&NzZh z*%N{!&g3d9`UB;_VQ^SFr$9Sv!Ez#GwK`TCtq{pI;npxh8F1y0K}dUr1`jmDC@&Ab zjY}LO29~&jF^-stRjbu)Z*8UufHNwH6rs<(;oQv3{fp043UnL*f;b7w>7Y}xSyei# zG4D}P06-3DDMHl(Wr&OZAS9{V2w3Q_X9u!U+I5cA8l8D>cV9@Y__}ef%sU|wnI4)w zl_(QHW~v=q+FTE~BJ*n^cT6i%#hDi1uuBM}9DF>i8~34&e}yrexy28DF!KRj``s%+ zFxKfAD2)v_D3r(AEfAHFZiukXcw!)#|LjklIr|In=f4e~`b~K9S3mo@#}Dj`lgGYY zS5m9qP`*96?d(E#n7scj_WZLiTvv5!#`hJMp}zmte`J^Y6G^H#0}V_osE?=KLw*6+ z=YuPrP|arN`J|pTE_8pYt;a;QU+>l*zj^!K^s)7O-w|gl*Qz8_To6F5weh9x+rX_B zw;ti;_4SF0kCHE4E$!c)tNByw3WMm%32TYntdtqyR-p`|WIzq5ND&2P?(dZAV>`>4 zH6hP#3{D*R59xosR)E3-2iaI?2L&NE@tE)9^-0Eg%>injhQ0)?e zRyeg<(Y=vsyXOf%x9@*$Zem?#QS|!{7>e-4kDnE#$#g9zVg#=l@F&d*kW$Di?J$p> zGp;i{RsUY^J@7~W8~o+}n*7y2eezY`xN|b=e)Wm__k3;C`_Z1if$!CSxOoG5KYzH` z18e_cUp;vCaZ7x%4KrnS3K|0q3^V{J$ajuJV|P(nc!Wpa+Fwf=wa&$^kz=ReU8^u5 zs_jzWUn{Tfi~e=}^>2-zajHZt4Xv)`RO_qqp@Z>`BA zGh%ZkHjy0+43$G3qEZJ>6c$XeLvO_y8Z8D(S^k>v`!IP3)t1+1ro89oK6>D*PgV%H zB8S95SSA#jkBnN9;IMEMWkeW78Q?S*3=w0n<78+pEg_~eQvd_43ARi^K~N41sCI_t zZCO@A82_mczh>03=avFf*V_~f^%JOP7QDm$>WJqg1jM?;=EfQgn4sLQUxx`B>nTRV zWI1J2j5lN*a#F*rR#e%rvVNK8{I1tl2>egx@2|x$dbnjmrLtICO&GHTuv1wEgbG2W zjGf7y$zq*kCbfi7M;#ce!Ga~kFE^UaN`rZI6wAV3h6t6pNjIZ-rQ2T9*g^Eu?;kQ- zUVIY7wC(7dcSo{4_CvD;#ENNntTmE^`<{5yP;l3|77x#iTO(avEFY)58fY2!lIO z#icc?+xXU6A?+HIQKo0YRY@})^(ttbE?1_yTZB10tXYx9{?7J1Cz%dE^k2(&KG!bV z_%oB1W4QEJPY3mey4pFaNgCtfxHw0@*%($RfQ`_#GQ_4}T=U~1{vir@Z! zS%MEhOV6@U)bH27zpS#??lZHe*Gy*86Ow@>kU~*0i3lRL%OD4oYdebd0HWgYfQ4d+ z(nJEG21tZ-Qb?I(YNq#@y=V8b_G*9m`VHs$Jn$N3Km(|)mG+t|IdvSNN^x3-IxAno z(nQBf!_w-V0apy?K9sr25TnS3 z(kKhEe!dCL8c0Wqrma@gb%Q>W)se*8w#_X=!pxVQk&j|{O?z_k{8DG2ZGB}h(`wRJ zScEy!HV>LvmM~5dqrFjOuj{i?c>qSS^%#_MKmb_OHyn+O)+;O_%Fu!d??ITny*$_K zwYmmaTx(8auex`%YfsKy{2JwBxk!Zn-?4A5!F<+hiIcSTjQo37$k8420;!z z(5$p;Z~LyR`EI#5_Kyh$0&|~vDB}#8l{%?F3%DEXpIaoPdc|Z{YVaEL*$@5&{M*Lc z!bkoFK9qL(*-z|vc*okQ^2Dm2{>wM7`>#Ly@H-A4ec!(xn25`5*Bv;021d(#bw8T$ zn{FF>`uVtdcE~mp13#ldiLGFVuak-p>1%KVqlRqm>tR_r!`AaC?q{~r0u`Ws;i2xk zuWBc*v@z1cA%y~+ne9K7Y|EG4$`@{a<=BwFW8~k)R_|}Uc>e04vbm59^yhRPCBq`$ zQCn6Zddm{gd>mrEYKepD=au(p)blRUo}ZZfKLo5FqFhA@2xIY2bu8Yl?1JsGe00 z2*ZHEduPBAf{B!kf%U_qsb|{RAS{!l?QwT$esL*pJ4n2Yu#8as{2uAD=(6{YmB9md z(%D+!Y46<9T=T_a4Jt^DOBS(M2p1mBB+!f;U}`FgtuR9`fG1M?bV zjjxRg3Z9~cB|K08KxJGkZj~L$r|M>+KkLUK4Q(W+#j`IXG9GIWVFJP)lCG-Lk?>EYq!E*-*rX!#9#W~6gD~kuj_ww`I;$nqJ8!| zFQlLP+MoQ|OEdTU+tSIXx9)FY=~;Hw)c)_!2qN1s0&!#C%-X!o0hnQ?E0ov@uxSJJ zGNM_{o`>E*zrFLp>*R@F{nY&14{l!n_3q9yF+R|#HU>-5kH>tsu?3NId}iHXd}vyEF}){l z0Cd)F8=>uPw2gk_nUCD?#KW}$td6bDIqu_|Dq6C0svN4=QeZzef6EkOx<&=QU zn=><=W#s@#L8YWrQWT{mI{Z^^OS9|6Uq{x%2Pl%lf7YNNgHd6z2#VQ0H7tM zCD#&L3?PWMv{&;0Zm=`pwPEFw_6@7O2EU`vQtUM)Wx;|0Qh3elDX`j{(>hTCQXYs6 z)3wf0f6;-r>L&;ZkWb(JN4dm+_2(V{E@L~M0bgk*Cjr(&7b~Wm;o&Ca!AH06xBHKH)@8pZ<_aFPjJulAR^`EDosmT1QYmB99H}GRqE88P` zA2~JJsO-r`I~gPZ1~k{Iv8JfqlscR`>QggauFreDqiyIH&kem{p;}wpM9dwtW4)6X zvf4fCx*&qGaR`=|C|2bK5XV0-UtRgc-16!j$BteYVj~>?%Du1O_{h#%9=qE8GE0>h?S%vI;VmAz?&ts)0C~9DFv_n!opk zZVC>CKpj(r9oLc+TkswC+d9SH!HyKcPNI>|dSeRAjRhx_Bv<%Fq&=kWzKG`4PM@PUF30)l~nxDOM{!-uLX??7j+ zI@)?kbU*+AAOJ~3K~#J9D@&LC$BC;O`(WYjXn(V|^47z9->KDGo}Rz#;81tpwy|R? zUOvOtthnX=VqwKf)m}9mBX_!bpqG3hTCjJ zJlBj^EEGnDfr*Soh=@)|tXGr=CoDLIUBNvDM+!l#>!C;rqhg1YCjvBedgS{byEFE9 z{|t!rsC2b+wk)U zz|EbXS@}DBCMkqep(5=f#0hk=^X|poDLNC zFtJGS`P1VOuf{Sg$+bvi5JU9l?Z>&{g>2OFZNfdhaBCDhhRzI~%*#7B`g zbIXhKgRj5zV!^icp1%I({iE^vWjWG)Y5aoNIasR`$U}lkA`LDBM>vyn6aCBk8aFV$ zX4$ z#uz_$W@+c9&Vn8qMbqfoE98^+{r0SDYXOf z1p>Wpg=N4aLanO|RN?_C2yw0~0_74Dq*Y6E8e=y7d<;1y;E018M!l_HcE_1>(*?!I z<8*O~ltdDaRHm$Vj37xvg(K^1mbFw*8x4dJzyL}+cumAOjhTq+D+Y$5)VDeDocY|I zo6fq;lKNEelrmS1h!Yj6F}x<>p4(nNKf64$&=Q1f{9s??9VQ4UaVR8ZNs%@X=1+PN zIk4U_u!II23__%|30MLcv7mb=?pdt|zWrn)pxpqdY)R7P7<|gb6qR`mg**W1mf%f- zeVk*iEybE@Of~cyYc<8R#C=AB^Gd|pJMgSXiJk+%Jmvgcd+oLbCnTjf00N-67#n;w zt`@k*D4C-LE4c7cm(b3M`g%9J;qHG`gd06K_P=8yFd+NzXPR}lBA)Enc)&L`@O3bm z!P?mON(vf!^IyK@s|OeA%OqCllLO=b>)2CuFzndV`_kU>50`IOpS$%lBk#O#ap7X$ z^38YD$DsJp#O6(tUp@e)K2jM!SAqYF84U`=W@>j0f)BOsa%J{N8LzFZ7~ea*rgC)8 zuAQfclFeM@MeDKFuFHbvl$C@jNIcCfCfFj>y%2{3B37z``5<1V~yDxS6$FoJu!RLsaSlUuae1GeeKJ` zf2+PUks7j?tw}Fh9@h~U^jEJNHe&yo1XfSw0BzMU5XDMW`v+IAKqLeN-biPNv&Pv# zx+n9#m7;9rv?#sx7&xVMz_LM#B^X?@c4{I=fTK>q5HenakQ58-5I3&O=S`j!K)`VZ z0LB40;6PYhiK_LaF(5_UAKH$&l5q-1L7Y8#R+1=2f=L!hKnN>>F(Z_OVrhA)IoDiN zMst>^_UMlH4+0}x1&UN4n0YYRhETMR6NrsM$RZBl2?C6%Mc812L6xDKTXXjcU47t% zT0px2=vPy*MoXPA#AXs}Q*wp>{Pw#mF6!m5a} z*lP(hZ2kGNnswu`tA>uw**Yf-d&dItFo1dk!YT-v>slvuAE9; zIeo*Q`-GM;*dO0F6;}ouXWAh9qscZUYvg1Hwp)|dj!_Hqm0z2l&!OuFqH^^I4t%;5 zKVCd}&Dhk17q|bg_l;xkW9-k*ZoT#MGsiADuwr;@EUjQMva$Iu6B~9so*Ztk89oHS zXrVc1)mWD$yCg@bcD}QYo+8_}{1j&SvKZU@%B@>YjHM%NW^Qh-d)*iX#1dB^9t`4y z&U_dMSw1IckZ<7JEmL)L=40@W>|~xyM2I z(`C_!$B8R+l7Lla^v&C9&3SDw0q>Z@1;s9S6ENf>oySy2NwU(%f&!Pp zm6t>uDt*X#Tl9oP%r1jqP-aEe&5Uz|5CO_LZ~%l5MrF0uzoJr$WD;;w6;sx{LIoqv(UgHA~nGiHtyDM{-u5uLC_QhX^;uXTS7H< z-6IgNcgT`Jg~vdE=GYTm7A~05mH`^gyH}fm|9G|@&~5;-sIjM@xiC8EmYB+tLl3z1 z7-Jjfy;x$<9NU`e@e*j9OR!L;P%8~+?DKdab6~kGJuYFa=ww><4H9QHjRgb1h8Rb= z3IqKS>J_}Z=sYV)Y-?F7P%6uKF8d|)4OxRiPrmDaWGQhB%$|QdlO+kChdNK=rLG_= zhA&JHTt1_&+W7d4_sxOr^9+&T<7o4`a^blP@#8bk|9mvA4phG0_}p`UhxyT=d$0QJ z`EbHR^;h!m9)d(v#M+HVCf00zB|dWM6FdIx5NBXC0I1QCV&pnZf%LzcY;(B3IlXts zqAAj$pC0T4T8KKB>Tu^63pgw0vf^)=qtOPG9 z?Fly@OB)mCVqY@FsG$mC$8Ado#H#6~-K#FsY~p1Q)l31kKy6P$w98y*k= zV4eqH+?5U$^KIET92FfWy#vBh0pLtY`zQ%0a%i@bS}_^}))aYby~4WHp6@sS0up=Y zomLC5vZvkO{F;y=?z}?r8KMbdr4g3fR#21)Rywz2iy3 zrjiI24TB6W+ZI%_?J33}2_X^?@qjV*UMp2-95aHAr$$o%#yknmK!iC0MsTYoFjV>k zc7gkn3I>XF=OzpTY)hvcp`1m4(=<*SLrJAifW=^2-N}V^XL%X0cjTj6mJkPK5axm8 zr#z1|;ze({J-*mpY-s=bSI6YK-%EktHB=FH5Q^#IS&C z7Glr%jy0{ud4K;@8B$yhv}54|^AL!~^K|_Xa@~?? z@KeFnq|cL#rgLgdR+M)=nr}KLb=!aRx~wz6EJdgO>LTHI1fS?>HB=v;gh2qfYBZ-^ z@3qf5n^vnEhyP8Y;y2#$g!)gvF!12Ty(4cp*FIJK_gx?U;twajv-!)nuK(v5@oY9Q zx^l%p6^{uu`t(=rX#I?sJvn4E4zJMwpkQlKUcEX1NR&b`e?A#%u<`aVxq*Lo_@&v6E_(TQ-?`_Df_a!tajAj! zQ%E`7aYcbJl?EehX_k;+x#lfO5DOY>VTI5Z3F~OtD3x)lZ)->5s80btY$3WP9Vn0idXgZJ7IiYeuUkP?6r3Or!! z131742@DRrXAuerQ&1oRAPQ{ZWK)BsgfJm#oTl}$L6UIqus6$7#loTmLy{ypvYhB# ze&7m+DPT-W?t?qpv3aw%IJeZvH~>pH5mk}Oh#-amML{CQB50{M*)&8HtaLzuD1tx- z=81h-7dk9ErNiB2CfVCPx(uy|LW1I6dU*Rn*(oqg!*^kV3D?%sO z@`b3;--j+CA#&6#YZd}-4|Xy~R&`XqW@5{YPfgh6{!i6{OE9uWhCX@>Kn{wt&Q?bK z^fH*0V&-xJ##yUvN8NPi3;#TO_m00hbMn^bPRzTRO|4xj-Ph^^pvH4yiN#m<(Bf`xE~ z>2qhdu9-R09(vvSD43|rsg6nj5G8XVV6IWlAW&+{+J)(rM|Rw0V*}->!yA5X?(k2~ z%O zjPO2Ar684`o{N0D{~fykO9xz00I=sLXL?5ua9~s!F!)-57u%D7)ow!zX1$o?x6uO=@J9Erxc{-$Y`!@tZQim zi=~JGw!i9euE68ka5GwTFeL5zjJR2Av1q4W+fCxik}53e#Vwx%gAz>cec+p) zeZe_0CeL>v9^liOz$lcL%`j!GwSF=F*6R0t@#1YS4?MH)4?Z+H@c7J@ubn%8eeFPg zN#6Uxdp`e@^FOM;|I74?hc+M6(|>%&V+ADV_Z(>QwOzMXM~Aba%m8Q(paE26Ybkee zEG&(v56(Ce5FD90RUO;tAE!Gu*GfW}wgM}v0%HXh&N(7O4lIN~tq+S+!;9xOw+FXg zzR6!i>teO|tw&+VFDtROaba=&_O!Ks*WJ`ke4}$cX`NaUH)c=v*boENrBxiS&T)UQ zcg?F?*XE&_E+i>>9X zYcP>KA)3Wh$Os9Ee>f5Ay!y`9cTz+YU>|$zEf8_~uWel>j7y3LVi*|ZoM7Y(B0fZ3 zM`Yf;6$FG>xd<6dhyzWLvXc8;PCvwQ!5~81r-(!@DBUz!sfw(yK>6?r6EH%SF0?72 z-2im1lh@-|E%1T7qXfm;rvN#pd2FT6an*0rAYIHwj$ zPQl_5DDC{g6@T0K z-AC}*AJ6Xik0VpND$j>0a_`4(`KK4=9uoiaUVQ&cTVE;d;BV*gN&s}=)t#3e`}gHO zehBKB(HsCY0I0EnV#N)PBE%#!E&zibWiURmEk(WQ5s54!xX~?zccMfL{ z1VGjib)K_>1Wb4J_C9o?a_dMsbN*m$g*!8G*{_%vrFU(%UzEbqyBj^wy0N zm&Q59-Y-K1uy4tFZ{3S`zh~)R6?15rb6rwME5KQN=jGibkBO1aVvdUt;b5#Z)>R-Q zfT+VI?^>2{a5yNGQ$ho_gcZniI3A?nmBoM%SC*vD{r(N>N;0+I48@D*QG!Wq05uyn=00IhL;{ZSq?tlO|0?xr}h*LnVvycG?ef@jW zN{#lGmz$S*Dk~i@!P12C*sEG#+#vxtiobO!GOf#YM^d z9(w?eg|psbjy16;>^|ta?bUwyMO&Eqd}NW z3ysHZE2*;HT6N)L zyU&02pMU(Z&+h#E@Oyqhk3Bzq!@aL8-Bf!iTd-gLrz<~kVBw+SAN~n{`<3dmE#58G z4iAwF*vfkjMc9svNOSfC^3t!i0;Rk~zUh^Pq{Ze&YViw+iyg z$z-gZ1A&=pz!>KOG!UvVbv7V^Fm;5W5)c=sM{X;rR~!q2q|zuYDJzH%jM)0*gQh;z z>6Uq^byg}3JgN?kRM)H>L+~?yIc+;-=JNH!7SL`0I+uG{Qt*H+_8zCD#a`x^+G_DZKaIY7HjK|LtiS8Z^yMGybCv4ZbY$Z!nLa%VO%9r~43to?HOWS z?*QSUxw$r)9POCm;}y6~RC)I048p0v2J9y+fEBSo4bo3yyl~2r@`o zCkV0_16`OoH;d{o`^0vQ0WmZ%v|(&z1^$~Wn$wS+I0KwSykB}iy8-Cj0j6GzuV-W$y_t^FO4lZtXifsadeDgwK(>rJZ5)Xb zaEz0@2c@-vM6?!x0dKNS*6NmylZaxfy9K<45Q#Yj@KCSCh|2{p2yu@4bn^`cpL~+{ z5u9KuKv9f20R)_z#n$rD+)^_$01=6a&WGRfiz;R~gpeBKHKsWQrx#8$AeLf4I7b8l zFr=x_T3T%^l<30ccQr*IRADbeS8^`3>9m(ROTB#Jg-shaK9bJUH6z=%ud2d-bIqO! z7Kps`O!H)eU}L28fOZ4Wxg`@TV88)Wje+-kDY*wOnX@{leOLk3ka%oZS95PcCSWuG zU@KV$*i*|jE{lX(0PZV(-TS}%{F~0{ZLbV%c>4BB$KH5kc=Z$Mh9hq~cHy?Yn`_Um z9y)dB)8oJJ+{V7gwvgEyUs|~K>ZgswP)=U=H;;5Lk6vakoN0c1{cc48Mh3@cGT?P} z3}DOcO|ocXtNqljFTuxdz4gzY|0(+YYv28yf#(j4|NQ^9p2H)hYTf^6=ZX8{Z%ust zui*z|`NiWUSySIUTeJ5S&zo2cU(bFg?iFJi5R$6k1_;&Cy*oD7vz z_5oC2$DO&25+f*36$nO_;UXZQq)@(;q=t*Ye31}UB(97i<4x&2s}SU=vLF;?ijV+Z zZRJqsK6}1)&_WLYA`1!cw$%c_s>CosBUc}woZtvkXTU0@nUGuxt~1?*UZL9>2m$7V zEwut5luOJh0IxPyq>X-w%ARZ2s&NemNvJI`_!ql<5%%CL1;PonOTGE|<=JlD21tb9 zNh)Pu>+q%x*It>Y+!BL;VB;`YiWE%0S|ym4$RkRzrZ@)-%sA(Cp{%vTGaG*?x1y*( zDIIo2*;!~WwR_$%TD$Ztti5GFKKy6zhX2+xonq^Y^+0^LIoI)XGbYaqlW!c>fOZ4W zsS;FH<(^vVW9zlWfH5s&P7Ey}Pty`ufpMO4X8`vs#xyE`VhzASYfQP#CC{@OU}#1rdx86^(VC};I z03ZNKL_t*i$p_%i{)YYTftTR7?z-d0jmM5$_=QhiI=g-7$!3`Q_75&x{PfrZhadS< zco51Tov$vhUB7oK8R?d_4g8c(?Quw(z~MDo2~cALdTo@1s<6NOG)X4F!Wg~$@tvr#b7{0TSRwFa1ptO4i>NY>OEO$?@d-G-*CudUUy zwx`L2*HOLq(ma%FdIwj%IhmR-hBgnzj~w3n-glh+TFx-cnabcoPWuwfME_lB&+`Pi zst+y%t_5-=1lrRXBdN3?v<;3>t2t0_XqSSoIvKR9mR%B+%uq}_8P>Ai`PPN_it)L< zDzVCFB|r@UkeP)+XqbplND{j`;l?0U7|fzR>Ih)dEsHD%We6~g!IrrrP7op`$3Qu& zH~I(qQ&3CoL+2$SOul{tj+hk=5Xb(=QYAu|IZ#rkDbyCB_yY2Z^c8Q?EVc9pFGE+4`ZcerG+j zx1RM^?z@7#a6LWuwihnlw)YzR^wsFXozJx1{M60u$8Rc|um4H3{i)k0j=oXv_anin z=|8>2pWokmHS>Dz&AGns)(ULshT80x$~%QR^{5&M?;ThzM#yTTGFv>#_tE6$o33wZzsT00of` z7F-aHH4i5Tdl6V_DG+A?z;wkuM){02=dByP;_CJ4FkiVY($+ETTW93zp_3O^UQ`2T zPsW>vyWd>-k@$fF3E*x{w_SGdA`CMT!)@C;SyTfCfKUJvLm`P#4xEkCj5~%yL3z%W zWExC{p{#SJs75RhO?%WTDr%Q{yBUt*lX08RE?HyWZVR zBTF51R+yu@s&uwIH8;Q9$$cq}VCiWvdM0dvm8`TE;(NeSa56Or{cAS8Q03chrL z2MW0W2S!<~oimm)jG~Cg(kOAHe&wYyFG~x7JuSWsWM=tTn&1@osH6s#kW2ecc2j&}v78iSWz!$cLlF0nNU#MA@iU}a)h zyOdbR6qme*0jR|gYs2^PCCVAnShZN1sh=&5B#96Uc3qLv50dwAYpA z*UR~v_UJdhbgh2odU@f^Png>t+m#)?<#0PO%r(nTUN!&7Ts{NEyuRjl{^I=TB-Gb+ zF6A^Dv=bn1nKo{1a;ycd?Bx7s*1pfb)jt7$c>A{f>?h|=?*5CPwB9iIgQ9Zmxd#rY z5AXci(*3{u%=1Hz?FV@4CJ#e{qi*@)3id&`cu8;#02}}`fZ9&gHLJZ0gqS2aO0NP~wWRn3NtN4kb;Sl!T--jQavPf}splOYhy0>{wF(2hD*200j$XZHk!_c7Igp zp-oBiFsyy&3VPy|v2~lGA`=7syt7FAz^JLkhfn?0>t6mgM@-LDC=8HGX)I#2^LCXX znIKO^P!M7RM43X#D0T)A1&T`M7|oC@7d)kmr&&T5BUh#@CkTx8rigfGnBbG_(Z{hL zSv*`Y;j94-035mv(VQHtq{`Gu4OBAF7ZVp;r)N4AgZKDnv~|YiL~tx(j4g24i2D2M zChIg?s&H5c$vHu;lvtA?@+ObSYUGzc+Q&jX#86?mZIzX4}@2 z{IN^NKlsN_mfP3eFPqQUPmPEF=jN}R{L0$a5*W81T*p8v`mb0amp0{J8#y??eeH9a z12Y-`l-TOw^$kOX6UJa>2O2BJ4XwD1(FhL=s0AK+OtgiZr85n)@z|>1}>(v|7^!c%s!+ph%zH-m*?iZ#K z3d?2eOT*F%sxf)@stj4l0!0iuRn4{6%JvEDJkE%;j$&jaa>{xgp@j4Z0t%V%Hp!$j zR_AfVg*B1F&ZRHjJGwO5dKs8-P=HP-Sff=Vxdb9HI-!l7^DtWUjPF^cfn!D~Sg#Cd zAcV0lA7lzgX=6xMy$A*NTX^s%c^d2k!2fqn@|G; zIKU0$Dg^@JCMTTG5<}<#Ldh|qg#&?bF)`h7TqWF*Em@W&TfIv&nm)U(wf=q$JokO6 zkV57(OIe!sJ+M1L&tCc$xdjw*(jj@JyX4pxSS{mujk7u!i=2^^bH*7lUibv@Y;qgr zEQW+S;Sm82Xr^P|!QW)QGN@RjQcBg}iBC+c=(W3gc4pB^59Wh36pTspUl&|#5X(aM z+Er1z?)(nEa`Ov)<3EjOPJO+H%e+&$nfq>6{pS%8YMBlCr5EcW%~HhxvJGHU6)%uf zP9{~U65i$IR>83vC&BxK`7-GuXNLH~P_AmIWPsyV5fVx;!&@dp zN|=n^2p2fSi7(YA*Bsw10l`bu;9FWjnm%@3A?$1QJxF;oEyx3M7m&!njNb z3$RqeJH{QdfHRF-gM>#Gob&+mj5xxb%?~`OyLGCPM5YDoh2@|9QY?H~JTICuI7J-;qS)XJsrw8f83M_FTpv}Q+Z z5T+0{53&v5=Vui-Eo4#|X))M#Aw5+;Ar=nKW%m z1E*4Gi3FWNpk8K_M1;UgiZVyN>IO{`nptD1P_!tL zN~s{j7oQbNcE2=ls6^V5I57{C^YF5vY`Oy|nTS3WQ=(i5R5D6w{rRf&Mo*&mjYeUV6c- zwN~5G^{T}ye{sk81s1j!xm^2ABnmXsP75?9F( zY%aUB)=sEPR0*Wxw6kng{jKkry|uOPwo^y0+p~0V`;s|_HjbUzd?J~4{!XUeG-IrjA921`|5^ zi3Yy%KK|r;Fa6%w-L;jy>FwhO?)h*;iXSHy=8=w9?Jf)nFnv zE}K)u`rIWSYR&8)_7h4PNT35?Wv#nzBXJ3rF{oY?u%wrBA0sVD8p{SPoTQFYD=MW4 zCkQ+MLFtMEL>9DG^@7Z&cLY!b4`w|)GjMlwq+_bdQof@&6>jE;w3 z3v;mOkJeE*=?v;I2Fnv4i3pTyzkm5_dz45PBiG8s#5*BEh_b%xan(Ov>8`H2-Z&}p z`HSYQTwBBM9JouDWo2vyujk#4pRK!Q_u2VG7;Cznh1VQ>O08dYIG?>IOdk1h&#W&^ zJ?&obPE-E)D5rxg#Pa#Wb)k~H27qh>_;|PUHGqZy8YBe|E@YLV3C*<#zCfVhfh!*h zaEiKahYXH~U;t?{X(+0sa6fw<8rAdaWp>p^w+&& ztg-6cgh9S=c6Q6CGzQM#{JdWSpZ@?o{I*TsJ^$@F7lh`0qbEQ2_6O`M`QP$u{=2b$ zPxIMj%bJT;ogdqH-93BStg|(HYC;SxIa{r6&UxYdurG}?(g4tti&s&7)i4b;C{Ra3 z)RdNW9l2u#PhBDM!ZQJ_VlXMul$%(yMiNw-^WY>z& z2iNNA-JKPE59m#;i4jq@CU_1TY%3U&K56UHH_bLRj1q7_2|*#3R0*fmwxWhFE5>UgtjVM zf-$l2!hKKWmn=R}pS;JNJNeU|zcA=5FIK$Ol|MQr*c|wZoE;eetdV&D*#_|OX;Fxr z7ZeAqX{oc@D7{9QjE z%@S*xd4RrVl+HccI9avyXTryOLlvv7Xq_&&TwHY4O)6JX1U;nRX zzSy|ZoZLBi@~+q2SH21VuEw5NRMh>1^jOrt^w=M*|4eHFQ{Nx>&5^~6SI_?R{_eoO z-cZxvWzqmRxA-F9+I}BZMgSj)M>HbUuGZl_lED%lU6d#!3r=V*n6XKOp_Ftfr3gI2 z3UB5ddL|9vAt3Mo3LpTWA;3cfA3WgU9XuQ?0E6GrxM;(J@4r2KYR?^RDj_rHA=P}w zb2jJAX)DV@vrO}ZF%v0;Dk(Jz!OCuj38IAP1;ur)VWbchF}k3GlDS!=f_YvADm-M+ zjz^y!Dn(w85=WLL|W@j#W?6o8n6sc9U=INDQ)k&gwb+{Rw3yBH_{# z*LUR5T**K!BzJ;S-<}PH7wE!95pjHq*Vyw_LPzHGVIU-oCj> z=;=_r&>TNFPf`hMOn1e?pIvh4eJ2N!BuOQq&Gn=@9dJ& z^bO%No6@ARU=XzW@G0Nx-T0**SH{691kOQ7 z*mro@@C8*a9(Zmr8J^QnJq8-1=ZeNaElbXh3vfshU9Z`i8J}o1ofS&eWlcyW6iHO? zg}2ThN=ceHk088Fz`I_t(A@7O^M*$voi$vesv>mcH9*$Xo{MfAZBCcnW-#Lm2D`n$ z6u4)#L(gFAz*ukLSMjn~^a{_WHe>9ib7c!gvuZq)RiR5bEV!fli_Poyw`wd_rl>k= zHa&m%>K7I!N&9Z!e&+t3UsXYh-qo)%lMfw6nu_SK=f!@NC8;0)*#_{*CUqRBP3H)l za@r0}nQ(4sQe~3qkd_8nk+>oWiKA6QLja{F@Zcy%y;Oi!OjuB>1KRfT#gBaH!LKdN z&hf@%HhgaV`0T1uZG2Q-`Q$aTJFoA$EkCJU|HNw!J@E%;*(LYR-`Kt6JBf^}H`6ye zRIg9?O>+(%Duz$J|Ke>Ui(qRD8Ye~->#r@J)Vi4kgI4d?Lzw*5M{)1RZ~JU#|HO(z zKMZHO-@5tWTHu+_Evx!wOXbt_{FmqBC+amn`nRTJQ?@oJ&HUlym>55jtFDm%44{Dv z>U!R_h*UBOW=$j<%+@PMXrWc6Z3;#!OQI7=fb~2?0>&aKy`wDiS`Z6iLdJ{dD&wk^ za-RUexdUi$90)uF1PahV9SuAnct&lY2nPD%&Y6#|J^HVPQ%PA_lo;q|jEjqI>o_!W z=82FR!b#@{Qys?tC;s1wHVii?s_AM!HkGDV0H#*p9>#ORsS zpF8d*CvN|OW2|S9ixOP8NKL~&F!2smW#^CdF`GI;AQ-P`<|7R>#{A^zx(h|~{K$Bx zaIhkmB2O||mm&p;B2}bk|FBaNAe@013PcgDSJcn$3!D?}Tp6P^o&?SvSYN7m=(Y8% zWQ8XXLIi3Y5rGpLoXqy_R9otk=90gA$)u>cUay=w@~+{Td-qUU>-VnQdrJ9KC0W9C zWB6BF`VX2y8;?}THOuzAuwk-4Pp5ubcAwea4l8X@ueG+^p}W6&iTW(12&ZRih*jB0 z1IRXjPtR})C*fozLnS$-(`739U66)CDlJ-uXO7c$!%!NE1|09A<^nf?0=ds)Xc}JE zJSn=AcKj;_9{9}ZPhoh)ZZ9GnUAHH$h5kL$y!dWO)?_WGlOVZ(nkM%tc9J}{#aQCPF@RO~> zN2U(+y62kTx*7KG){mZ~gBY)~bpFtz-zIpl{YQ`dq_Rc2{(`Hr2Y&hD3ch6j@gdeV zkbnW8agjjd(i&5aWZXGcav5yQN}Z9)$6R}91%i(b0)cwz9Zk{V%<@Pr3(`_gB9Bf| z!fnBN^TeL1JXU2vxuXaG1$6$l7cNutEGtOqYF2ay4lw}B;*Ai|~q#jLcc1Zft$#uC;+!y20e!12JmijGJWOlhB3 zi@XfPBBh@>RL-=n|60=}C`syhR~hE|ZIzH;W6g?e- zdf}9|NLiL8NiG<|g%nUyBu+5j zZB{K`2Mu6eu|mSaiwFVFnVlXwx|3}loHAGc)x($VZ=H|bZbf_l^Ma`z`z+7vzF;Rt z=Xy!4uUgp9|C252?ms)IlDZPGGn-HET*G3XpZldg|I`c9SY<8Cn^(P7kKcQ8&hiT{ z7(Dxh5mN+9!{UAivJIfK?ZgGgIjvl%Orr*gZck;L8XACvv1XM}m(XUvuvKExvPzJ~ z;CMm%$}E;Hw^aci0;pEp$ScJoAHk>8#*6NgYj-U_YA-#NuHVsr@gvvFJb&{+v-;VA zp#ztXpSyJ5oce)9^2`Odj2sqq65H)nlVkHQTgZ2{=uEe9>LZ)}`kZR_fIdtB7~VQ= z*4_5+BQ%U${XO8hz1#4OyWaHy_w%3ER#s0=%>42)eE4&??cz$%&PuZQkKAAW8^OcV zZ{7DD*P%;a(>E}eJbq^G7Z$vDeo3zfX(Wt>(rOhCE0-k#Q6*VX)uM?Jjynrs1LGtH z)uoJToj4W=je+t69hYha$6^3Q43=@nIJKZz=lq%aVBebOAJE`{6kwEs0-%5cKpiL$ zC>#%<0c)RK@{zuu|My_%!f|Mo&@|zkFS<1qG$TDHGg=B+MHPKCu|`1K2}&Wn^vWZtEL!EsQ^D=AHgWq$add-bf&lQa<=ii(sn!nx9sSeNk8^mZM2QgNhk zNGi$w7=H_QHehy0fnA>T1)*=XcJn z4~nFDx9L2*r|B1Wd$l>)t?M*5DF8dR^T3M-=VzSPL;%P(fKN6#=b@5;x-f(O%w-~s z)f2``P*x@`1ji8)8l0nnQ)(a?51oWMioj7I5iMS51uZ;8CNCyC-nrGO7+!Gy)wQQ?9C~7K`QGSTVcGfaqRoxw zld5sT)sKGZ+IPKBXT~>HPM+)a^mX&c&9dvB+eO2%kGu!CzkCD!`Md9Xcl@`>i`&21 zGd1^p?H?bJ!vm}6;&SAU8|vu&&u;>cjlcQC7sn^_PE!r7@9WBHU*B_PsA(h&q=8nt zzCw%37ddHoA_<~{u%1{REhEH00vx50tV9}!cTx+&c+eDqQX@UUBj5t#mMYO3J9)m{ z8@Os9`DvTOfx`jdjDs--%D9shK)~+=K(B5d{=|mSuNBgDEd*~Ma3*C_x(e0hPM_^dz01aK(%^rfl_$*E*O|Wy3?+3_S6M z+FsET8=GC%t*Xja-fg+~nVtOlbf$OHyN@jV^trLMLptQu`-a+&>b2j&?yFgO_03ZNKL_t)oT$cba zyvdIpKQpP-;K8XCiQxI@EC<;J@Dazka}GQ~VF1Y!Rhrxj>jGqz5f2A&1Q&n=w4jNh z)EVxAgpu6I!cpo11sn%JZz&#q-L)UP^rhQR@4j+&r0?jal~3G$==hB<ki1~?=)~&2#bb@X1#tG$ z-vFLI@ecEzGmg6z%MTyWmtFLH752&g3A{`iXkc>FkgjjdHRDu(1R{+hm7!_S3cf2c zO_`3)Lm(u~IjCq|!&cnL02+N#B1aV<65i^J9G~!Do^8pSXC}A<;K4Kp^A5koodRcI zOn`$@mzZqf1MhfK{-Yg@;HqYCrOZSXL|tLc9gd3t3#fz|uQg?|k47CL6QzqBB=fz( zTgCg}tl)?oKE^=7qF0&`5~B;MCpq^n8bBfv;+Syun=j7JOx*INSk`?(BuY}JMG)Mp z(e9$Eo}U^`g(H1JN3JZAKr)YLIV?rxC&Iz9#uO)B*=iY zn^d4Cg4I|R9+|Awbk*)^l+l&JhkBziv~u}M4*Wkcw9Cr%Y*D{Gnb|YO!4i?=G+%RM zr@BhDdpG_0q4{4ub$<22lBwdahV*^Ag2?hX!^g+w5burGZF;%<<+ZEtADb&{DP_cU zt~vN*e$}FvviO}&F`Le*0VqaaShu!wa{Fiq1KcwkYC(g_(IyAk2Jp#ofr_OGcP<2g zCNek*C|x<4(Ga*vH2^0?B+)q+I7h*$;~ZU{g>F{qoL4jefx}S#s*&AS{P90ru=n!z zflK)_3&;9yd1z~R=$7)>ZQJRsKf6{xamNw2^_R<5JocJ>CvSRk?TYO@^+g!HdilLm zH!R+{jGu7RS#94v7yixtE3?xqSwatYRC@8OqoqNGHl71;*9{x+vH$a}S5}|t+;ZBC zA2_bot!T{Me*Tr4Zhm;|j{zL}-(LlG?b{aq)`l?scUQRodh{#U0Es|$zt*1FKge77 zEoq>Em5p-271S-{BdT77EU@56TV<8WV;K@|9k+0xP7pw8Cz;`am#K*eX`4&$6^TB< z)X4GK!f}SadBb^+7W+K&4xDL#!*Pe-(EzLfz`>xH-@JR^WAmT?c0z+QT|dCXB5!JG z-go_i9%C6Gobawi6H4o$M+s++qlnC;p=Wtj6Esjt0q&fVg^?nn0H;JcQVOBDlSCUH z9h4)?i;B~TyIw5DTN}O}y$YU*;3<USBR>8)1RIg`r_%aHG@71{r;iu1N$|v zrE+R=!WWhHUURIxX6*hO79H!Rs7%obiEzQxNLNPOUtl!`~+V}~X)<|7MW@X91`glQJ6O-lRxjkmT<8dwyF83}Z1Vy4?H zdfkWnueOH=$NCn{U9oXdAAT?D%EoN(uTLCbLOFx?t>M+@W>J);w8rr*7wz7uuH#kn z)@`Ty|K;4s>Ulcm-4E6Dea~4sAX$^_IoeNCo=MbZh8y?Zrk?9r1143L%wPWWVRhBq zQzH3VOaJGS({q+up61=RZ0JticTCjt$V|(*PS2Lzd4mpQ8$f4gEaTlQ>(?3v+y$Ds zqEYWqrz|TWQBf+Z;6MW;cc=oFh2+lg;5el{%krL}lp+M~sO!Id|9N;Wcxmt8nn}Cxl$$#~ujhXDCGNbE3m1(vn~Ue4Ph%BAWlz8RqX0hjQPO(fg8R#NwmR)TGjC-GEb_kZF8wBO z_qt7y;IRij3+#XTFYr%~HjOkr@Or1-7}_;6q$U7J18AkyML};~t*cxbmQYPOv$TjA zu`WgFkr+hBBB7B4Do80bBLPWpyqmZf2}_9^KYeyegR8j<`}-*<3%uJoH3E&7d8Rqj z9B|-Z;0|5^aNKe9@-8ANEMG^*K#kAxYQ1VVV=MUOYgCk{(Vk6iI7 zYAUK2b(%&)SZ1@6W`XmKsX_%Hqx9@wE%fvTi?6>pir`CTX9WiK9~uYM^b^$+s5w z-?Hq!_7JV5u+U+{3kS0;1IJ~0*EH_g-5OkKOf8#z)27bZU!NgqzlmaWUuliwY}p(K zvJIfkk~?E)lKX;(fWSjthq?s}chn%%y@52`R1{Rg(NHDm<=_Fv(I8O@Dm%(?{1)%b z|AXJmF8HhOJ##_t*6sSLUCqmOU*Fz)+o{p5Ptl8?+B)&VH3#Ocd1P?ji#HxWdD#o| zmj3ZQXKUP6o$Bv?zLUMR_2MNHon)dgx;GKTPFve zJMn=l|MevWX#kDMHOm~f4*FPw=9wypRe=>QwbV;mASfR^4C7K0%3=)B2hIsG(o+vB zoGi|qJ>Le;k|lEoRAd#Qg!5u%*6#zjgJZdp7Wwn zm%;|PoKw@dex>6oRuxyMaU(pp!rGLVL2-0_1dnwc7z5x9QscR!DX4Q4n)#Gfx<)iH z8F3v_R}~zrlGcexdy?7O&%ZxlWOwQVublY{Z#f9TILYCT;c5F#ym1c}PB-V>) zg=2w}01~w<_c>{x5MiUMnA4<$C!ir~)E|A#mTu+RGu`P^x9{i?DLD;6gzBN$O$%Gy zjdvnO-2v;!_TpOk!mgs^5#%@L6ibo>{jP5ze{p(_V5m^F`mWx$bKnZ+Ctm-~5%rbX zQx^`E$h#k`O+9k367`g~Mn+sw80H!CtFJiw;2rr$L|MAZSzJ4`^FX#G9pU+1WBHF> zn(ALvAgNEi{&I8lkyANWoLl|unW0SPoCbhw1893T6Rp6dEJEf2Bmy@)v+xF>JXA@+ zISh4Sj%$R6XMa(30VkNM$0`}uua_uX;)_?2f5U3+lZf`>1j z^W?2Bj9$NIRr189Uia#q)%q7+G5hRgXBro8y4xx^bAI@}-##GTFne<4REJE{ZuvNV z{&Bsic@~oZ=Dnt>oTKR4rh9w{IQEs*#mu3j!z+>0&sCd;4Pl5O`T9+t06zb>48YHh z|1V(l$A5$Wc(x5+UA{F@L!UY9j^LFmp4*9+X$3vGX-KCx3PvG(tjEAhB5CPru!b5$ zfdETQRD^oazypIf0yF?Ikzsmr^jyn>)rS|>b6!TloiwGC$+Ld(!yItISornbBkSb6j9IJS)ePk{pIs(3P$Z(+z(cV~i8ln^uBo&ky^8C8`WV@tVO+EJd z#53jsr8baaM|;!4Zugcy-M*l1n!GdKJagIWi(l~pQ?#}}=Sa?6Dfb*55*baU5YtQLy{`_>YcOz5 zlg9G&+5P$T$(YE$JeJ&jbZlUu?e&S)+t=x{|1}~rPO0sk*V6iKF)-*rwgIfwN#Kl3 zFv{X{={r(Qok!pqv+roLeW#>r4r6Bphe4jtQdwm+7$CZE6f zsFbT~6^oo$%I|vZhk-AAfB^V@?>)fuf4&C~es|VTj9@NZ)=W>G8iKP1fL5}D$t(KG zg`0s)a2qIM>MaQ*3U0l($_2uhH^GV228!sFu+ay`tZ8sRdiq35gRA}}OHyjV9ODk$ zS>gL1l=FY1h)0AI5e^6x@Bo+x;J^W_RP@F9pWO8;iNTz6<{3h*p8JuZTT4He*F<5! z1$VuKTlAqfLi^VLYQb= z1Wmzsf*N|9q8QgQFkgru3CSiNUVHK4;DY?aPNcvu}9&^NpV#X)Py#Rj4&d3_K}e}GUr*EpnK7ahgXd*S#b7`-ZrhjRGwHrSh|M!dp3RF z^Q^vn>B{ZhY(eUJE?C8LYo$gplLl{1_7gjYPXJl%7d+9rr7&R=)x%(@9zjwWW__jgZ!vb<{Ygf#tw zQ-h0+C(V!<*PFie9gmNt-RXwN$7=LK@#Gs{EuY*o=pdZ(U;2~xA%5<|c-glXyat&5 z;rp@kZ#t}ArZ*g!q28s@DaS}~z@l({C+Qx-f;yDWfF17a0eC7lt}I^X}jhca38nt%JcSwfAnB9IWA z6ytW8t>q&#q(~EyaKkO71c5Ne0}qw5DbdxhC%q)OE}2%eYJJE3aw(<|*(RrFO??pOTqOSa)!# zJKkwf8;vbG3oqaQ)D-f69X+OX|B4=Bv%w_aHdTP2bAuu z_p#Y|uR1tZ*R|R4zSgpD?abe6FYlgSu~#na4#{TdIvHGBU$*pgsMFShr0eGK%|rk4 zJz)Ehx?{Yk7hnE|H=+2}U*lz;z3$pbTHp8}_We^4hO6S!t7kv(=w*A%pzcA!02)fG zRkXNjc}a7pV&bBtJ??qbLKxyKQLq9=xCj_&g%!~=#5yack00(ho(;^e4L3@{0xcK= zb(Bck6=`q}FXPWEzy`*wm!3or7ZDZn-oY~f+^P8|C;pQCc-{pWtE+**By-Xl2EJfW zlBhGVA}ge$Qg^?pkzr3(qG&;PAR7PzqX(m60V%5r7v_c+FdkLbcU9;2x=Mi;L$0^Sky8ECXC~v zi09Q;hXx2YJt+;fa28$W)@4aZt6H^QiPXM|n>{Ga5iUNuaE%QEGT7uU-16n7QK(Z@25tY@K~>^JwqtopQ;Z zE$!zwj=D8_#qi;kz0*r3rW^XYP1CyU8~?uf z&i0Dz$Td%nF6PT7Y`Esw5unXhkp-+b+qjbLuim_25dYV=Qrve|xjs`3j(_8Guf&l@ z{~Ry-)E}+~T3`M+j((DjE(?=`@1cKve%)T#-CP@G7zpqYhjZ4dQqZA#&{{_Z25)&4Nb`Z;4HSSPEW$;wwo--S{i- z>`qN&y-sJOFxr*Ilgbruzh#WYng}4$FyRB^T@?t;Or;1Y`DJT?E?viTyeldT(Z6Wn z(BhR19Qo=zuZyy>z3Nhb|Jqx(w}+Sz)^sn#1A0rn7=Qn*r@G&5wbnZ(s{gURcyKR^ zs;_hAY*khOaz1YZiQC5qRgwsvn)Vfk9~bNM#`rs4cVPHje?ACc_ShBZ{a`Prjf}O` zcr}kRZYeh+IkxlFeVVymWp$~W^Dlq;rN*svyjt|xk-`7>%!$DT*5&E=dl$BkJ$WXR z84Mjg*ek&pmNy*8Hh}hKOhRb@s>DzksG}4i@3;U8gaA&#xuJQhLS=v<>1Z$^fTB^D zL>46tDp208(W!^zS_~Y3dUb;v;v!edejYq~e?7M_Nz1sG!e8ya~_o~iw zSDcYoK9sE3b7j2a2a7IxX*7-u{P3YK>D9OH(%D2>j5Wr$eBld!_}SfxP4qX~gDV;b z5_$XSoxL!%Y-82m%TFxbxO7kUXFz=CcLeTfad5c)v)}yQX6!iscD(G)cP|9=Ki!Gm zduPwfl6g6I@8IAuS(^c5N8z{h@KAT~8l?~^@sb424EK>)npC|!T3w4S02Iu_l0Yf7 zbmGWp2dFoO7w3s(9+Vm`N)cTeitg;suDBt3xCd96V-k)8X1I@t0Q1btEL2P((BQ$~ z0amTSrfVh}RjGQ8HE57g`#Fnj9+dZKSG;;_=x-%m&-~>@5%es|ywa5`iXxQr7#E z&7BzPGI;I~nR1bO&w0qW^|m?Io+YOb4B1xjO4jBtS+aH^e(#yPW-1%JXKdaL2OhcV z!gDA(#-P36_7jh|tu=f8e_eB;{U1}+T2FcP#}~GDoOB)om42+%mnh{KZ7*I}oD8C( zzEhRu#*6pt!fn(}yyZ9faYefz7)*!>4$mZfyY%ji^~Ardj0kNoEK?TOf_ zO5?%gZMDaaXE%!xyZj%|=Kncr3#&aTj)cQ zFZ$>QUwY$Fw(-$y!$Y@E?z!e<|FTE>F4%wF$rD!|Sla*OQg!~OXQwXRzjDR$dtEJb z!r6`gd;4dw_R2?-1*hw?6WPcs|M5eA{>6LsQ0v_PM;3el=v5YYqyL}hdCqgnTc>A| zNt-03kxBwVI)ngglQc+*iF&TX5kSl6NVUySD)w*uy$e@5`TfD>Cp zZT&xeehz-seFe_D?Xz8gyyqSZ?>X8miCruW3+_$ivSn}q4uJ&DTqntGXeJ7bT2xb5 zk{U2$r6#s4kc85NX{Kqasl|ACq;T{Y11-#KZ=u9Vnb4G2in7R}aj2&2mC1EWHAK&; zmE|563PwXQC`VHbD8Hx(U=mP`^9(_-k#95&KvJ%JrF71ydSM>rI+uzDiCp8kES6v? zu@t2#O|;?(V{xiorj(Uh@GJ|7(!@)NC!vrMn^-W6Yh@W_kvQZv*+?LwKq$#%xn@aS zt)&T36WzZ#@I50SK$NB$1`aVFT^h11I6w%Lc4%T{Eu-V9a(%L#oIbGfqJe%MuAJS~ zJ-2}WrsJygUpn%tEV!J*316q#A4>vdZ@naF@#4FNat}}TG|yDR_eVe7+&KJN z&^>4VOtSYkp`s#*raM~{tjz$KE)!y0;j5VVN+?bQ0kWYnj0?s9r3ev`Ee(M!V>C5s zFR-C4$j}&Kzz8EVPBRb)O{H^7eS4H^`Pz40ysB1Sebia8ZG+x*nd!OqmHO2?R*_d% zSHp{Tdh_>OH+f*?fPQ24X_rCOM>gL5|U#?tj9lGU{w}0S)t+iVdc~py0swxnjx? z1_J{aLrm>PwyI^cIj@3@k<2)7SgpP&Z{$>i_^r$#GRg?aEK7o!$cLQ}lGs9KJWDi~ zmpBxLI7*TfsV$ta<|K&<43w&d(rSb;i|bAh8ZA>OC_@wh4CBIMwu8! z8wlzXd}Q@h-j^106-3JH1iUsmJ~>s7!7YFI6?Kz4eD>Y{g8!1r$ZguR1Jw37 z%Pj?aZgK|CIiOV4Ece|fxoaiNN)=q?sKaY{K^57kmc8&xG#EP}O{ z9oYNU+%}eTp|z~U3m5L_&s|FfRF+cpp4?CSCSD}TnpXg4dI|kgj0}Il=3&YZ-`@E&Et#-Dq^tRtVwatU2 z8zcYrmiwN>Wh$v*w<}HHx(mlX; zFS#5rcYf0{KXX&&2Bh%Hb1&y{?W4NIT*HbQ|DlF*bGrF3Cj zND)*f8%<4_w!Sk<&n;TT8{Uf*2AQymE%9GGdnCye001BWNklwl`(KZj3Lk#JjHBihnjA?Bx_8tzZlDH#BbPVbe(!S>Ci#N2M zjp9gxlG*F`9bYn;?>zIprQ>Iwo{Cl|=>@m7PyRI)5iK+tuZB&`vq+p)mbFZe8ln>! zi$t$jws)U%jS}Pka#LT+H_C%+bCJn}Un12f`lOR}tm@>E3ZNLbJvRG-Q?K1JRTK3% z5uiG^i9J8;UuWuK!JUKViRp8T7dLm!c7VZblhy(bvtE1R)I_`E+O{!(wHaXJl`Ip) zkQlBq5S$q|G%yTA06s$EJ5ED2V&6bU8RNBC5FDw8I3y6<)d^|96)JI1jNx59c6!NZ z-&YQNr*lsK`su?P#*Qu<&$OOEbGf5lYYH72`XVXT3v#+uyZ1hK#)pxU@h|Z2(`w_4 zms`BPe5G1AaqkTuy8E$*)y$ZX^1Ov7Gl3#P#34TTZvc)xeJAk6x32_n`!_A~fJb$% zmbUJE?v-r%&D#lp^!-1#f#`GJ$C=v-Ah0UEbGBz-^H#nfa2PC0=#b9*1m?S2ef6rZN=`Xqd?*P zP;^VxlL-(&!e-Q=I!Pn|(WL=R0Sf967y}MK5fEk!07HURA2Xa5jBRtt69G_*r9v@b zq9|;5mKcd4;uvvF3jH@yf*61pW+*d;Sx6O9MT~)h0Wr)N5E#P@A%+o1+(6yz5J8qcuEOJ@faZu|XI6m*}ysncE%$+#)Y)Pz;#7;iiT7Irk zZDi+_PKSY%p;21SUmuShPw+RC=NEMBKIm-J@#x2|AI^NM*0(krqZoW1wI}-(^E_H| z_l~q@xJ8M^e!9$~+clVwqbnH{%S2~uTStSpz60fZp%P)V?U3l=y;bWH_^SfUy%s75=`S^-K zlJ!6OSc6f7qa8oeUp!`R@wUwM`kiW{aN@BG-u#8$FUvZR!;*O;UJw(b>Y@LEj{$gQ z_`|>_KXw6NKKuaD59Cwh1!>1K`(GjEzJId-G(Pfc3W(dkXC3^quc7J4a38vW<}w#Z z8~_QhAREoxkfu&*6QowA4sN^%N87_B&D$q3=sBJ^h4;`DTOuO0Mg?|;1EE&Z#(A=x0-6E@R0>Ts07^lL zA+`Y{Oo>&WW+ri+(x??Af@&S62&32$4VfeYNR=cCAch#C3}FTYMu8ba2oxv8HntJU zD1d+{#uT^!2tYu!@&HX~xPraRm1&5Tb_^v2U`)(d!Ak#rZrZFZ24X9!6UT#jWQr*+ zq6DQmjcxQMjF*2jvw!PrQ`z~E(cdG-pQ>;-BL-?lghI(^@nx^<-&h=@5J?c%x~|)` zXZ3WxqyNFq@#D``qD3N3<8NlCc4S*R+t?FZjvAW~!u(8Ar=ov~=26#UJj)3=urIq& ztKQFEQ%b)Z^{;gznn~`mfVGtUHo-iWPrY8rtSQAfv1m~T) z_W=r|pZYF)`Dt~|rut|#fs;6yfD8P$p|&MdE?;VBN+?50iKUW^kxnXMBj60GS{j`w z2S7g8JhPCoQ^=r{T$rkiwiL8_?ZA7RY?Gasyx1|NvB3VFiV3{Wo*C#sHWPchAZDW3IP=qIwMN8hA++t6;bOW?+-V^UA@Uma(46$pryD-Z8rZ`S3hU*-pXcVx14nKa3_aO6*Qn)PsQ zi*F;txN9#vxPKElmc*1gS;t+wZ|~x;IOpg?ZPTwmIgGXWmU*)lq{>dy>3Z z@fqh_+K@x}EYTiPC^);1Wv-Oy|M%<0*$>LYD;!~EtWR5&XO2>qcj#nw|0J05sgp;` zqNAs7IXkTy4Gm4*%m(+RG5^M>G5v;*J?y>J!v7&n4bsO?@2SrqE~N&5wHY9lIx#GU ztAsC1zGkG!aOKwAnAeE|rckCjV?>5A5&8&efj4F}NDkR)n+6%GSrTXLdWKctN3>qt z`0|t&kG*Hb9rl%j$@&*s=I<;n-FMyT-qok)c5dtFIC=Gv(W_sd)3kj~X5gaPg~#m- zO&S9q`9Srq)8viGks0Yob~s~`qjXD zep~bFJ3c!a{E^*>?%iTB{VW8Z%eK>D%!*>8@uZOgIh(R!&ZCwTi1&f@|| zO5NQ`ub-NHVM@yt1L!q!?TDB>3uC6~6UqOv{B6iW@E4CIlW1F#*} zwb}f0uR6DcrTXvb;2)1ow$4q6z6YlttL3O4R(m}V7!{dI_MN)EZEH5mY{V+K_~b5o zX*PfM_u1jm$)+U*0I{pa_lDK5)*1%Ph9Z^)mH^6SA_YxTr!h5W&td0!q0ioNMPK3m z$)S~=vU1_)5Iucd@TRO9%k8SPQ5zO9NwB)_)H@n~raA6rEzhdc<acRiD{; zIsRYTXl1ApB=^L&05nV*_1SF-)@FcCPZJJd2!k6M9;+HJ@>nqzlZZG*##D2F7)LsY zJeUL?akGU=-6NSe&QPx47!&(^l(q*l%3S+OmCDj}w|+30S7Ti#^M$i*OOISUb#z0u zzG17q_LWN-yRR+P-|(Vd{n`bW^}a@0ND`FpzP$dqBe}N@9h)bo3Zpf5dc#2e?dm7j zK6OF*`h_!(7aLRXl&(4}-gGN~d;YZ**!-Xc4BYl48G5RiS~Mcf59WV^=hV$O@4$aN z0KokF_pQ6bS=&9{H!Oy_c;APweejS24uPbE<+Edcc|j1$90glA$`Bqi-=ucA+B-g3 zS8)7nTYIx-s3U|;ER!nXdLm7d$?*~D%xP{Z%El~RIT(^cZu@$}fIqgwQ(7vcjESV3;a~tU zirFv#gCT$gP*Bj+05k<9DS*_oj0H`UAzY=DD5|MusaAx62(E|&!+grEgOfy&8vRuB zPY5J|iV2^&png@mCyvIDg*NQ&A4Jof*D{ zwHct(Q}EGXCTk#+ADa+GVYH-S;$Rn@qcnAC8s%Pgm?_QD{XSLN6=S&>>w=TF9z@1+s zNI&`$0F4j*);jZIF{VUQ|0i=Fz%N#;15)ed-~SNj-S&^;56_*y%@O+}G1P_Rsx^mS z0XPH_l)AH6yoRS7#8$*nc0_DOvhmc|z(6$tGWq6NZZ>Zzh14Ob1>xwLmq<1}JXLqQ zi(dHZvZ;=_oe@3Nx2C+ooZtklSpkS zFeuj|!lFtUOLArib3y!sQO1Z(Y?@>|W-*i4cK@G^F{*s&#~wdyYe%Iq^IJH1b6d(x z7FG(93O}Vte|bbDA~qnM>s~wT+&mRjp1|bK2M5|0i70y)jyw(JdCBmOW}kDyNNx3N z{cjGoGtY5-?&{_H4rZ=`l3rC-9>bGpG;VPt}dIEa!k4(6>>6D1kM ziPl=NO*3{J%Ur9X@h_|$;Xj=`w=C13&5h4lwZH66oxIyJF}kTSq{O& zyAL(HS=aLG3;Opux0d4Q;k5CS{=wPvWR(9b_Pj)-&!@%%ZM2Nz`oc9^hHrL{6sa~k zYBXK^^0CbI^>px0Y4FECPtWW&I;ZZihF(BoNN(cLpb0bux5<+3u(7q42hT9>6TW=U z!OZnhRQlqYA?Lx#)9ZMcQuAdpyrSM-dxzs3XU3Q$9$z{=`1ar_FXQ_zgH_2c z*fQC?X)>y;{+UZC2X^;NO=SGp_1TB^Q)qQ+Q*_ed5F7#tN@%-a z{XET5%8-It1yh|qdA6j${Ek_ju4^I|JHFPbEs3uZoisn>zD=3qgNtT0&+OZlBlTke zk_$c>$Ri4~l5(uf8Rds_uC(0)2eL{EBnssc+MH&VM0Fqs6!QcmJLO~*6R{g3L8KTL z3V)-EnZQZvPDh91`uXaFgjOaoO65J9Q9~??f)G&4DBFgUGA6Rbbs5*(X2!ErgaV!egf>G`BgMKli!HrimaD zLc%udyI$;duCHL~v9$cc-r?fh#CF8zF#Kf7lw()MxBGt1qOK(xE2%jIk0X^L2|HH5 za?H6YlBL_%O^5$Ed}3{j?Jw?Ho?7^CzkEsSM8r~tOkG2d)^4eca+ok_sJgq6zBJx^ z(_}RIw)cMR(A2mpm%8S6WaNLkwU?r}>a{40ojp_or;gv`8d#eFHa=wokPJfw5*>`<{9Jr^}`Ig<4 zj#At@GForm`t0NXy7rFe{ORkK9U0EZP$?aU>Zb4i@fP6t@4pNj`}ubP9QoYS)(d@E z=~xXn{m$FJfKT5`ao%G+caCrE8=m%z9W1<g=FyDr2OQL&JrwX%bdSajAP5$yl|8@$|&_l*h~a>$&;`A5X+S zWC~N~YO`>*jEReGgtP0Umzug0Mgq+a5Jq(X4hM>P!c|t3X8^20NY$1B&B*|O0(5hx zZfhFkzUUa8!GG46RxFK$$!9VdWIdWOENd|bMA*L0+9oxWx|X!KY_KR1ZAclBp~n-8 zC_Cotg<7em2j*`xg8%!FbmUZ%q#8iS1Y0n&P}Cw!a$s)OJYLwtc&|a?9zVNNRA~C7Gr4`Fi2{ zax(hin?5rVG%y?#=4b52<1~3e1>??aX@;X6ro|_QY5>+|fK)~t9&-Wd;5=d=Gf*ah zrnUrZVu+MkLzEj}4qs9k^J8OT9c$e7fnR+9*uM8Oz}~;y4dBR^{(u+zxnaDL zmtT7Ox6H?X45YCB^2)UbYA2ISlde?PuI|}ccYr8T8UWP*6r`QUtjj3~fzK+_r$66O(u5GZ3BB4eVUHB)mn)zwd5 zJ|>;Q!+m~Y5h)Ym%)hY!hLm!}Q>G)!(Sj#d;E+(obqEzpUE^)598D>mU__b%QL5E? zVt8p5+_osQ2&^PcW2to`N+REuXGR;G#Xv-IAwUQZAv>>MXM5bT3^QCQ<{H7GKOP)v zb8N^|b|0*^)aF^@*J|p4YX9t+Nih3kX3z6^KBsj{kJd?SjF9WQ9>5U3 z?Qgxjo=!fh!Y7Xow$BVnhJP|0eld(}2f>Mxv8LQ1uG>C44W3Pk#PK=jskQvoW6q5t z8voj|@eceKlT!RYetV!_6>UHwl6K44^4qIVwdNg8sZOh%UC))9Z<4h3s7K&H!y2SNf+hKShGHnDc?&^DwHhOYDs zfnml#+9a_70!0$fFq1e++5|`nu_4U&iAFvdyQ=tto653$Xv^KrP6(Q0+AD7UZ2R2P zE5`?}Do?I|nQz!}ad>Xc@YuSW9v^a2la9Fe%_1*;FNYhCMfnogP)L^k@O#hPx8kX9 z(oVDd5VrzFgbXMiT=}aH0FTx;1262k3&895K8mNu{lrsIG5o?iza4z{i%2b)2M)Fj z2BukPV_1hillAJ#7{6SGRsaK_%5b`tuTU8;5aT_g6$P-inKRmKW``EjsWzT5X=<_X ztXm%oWVUnR1=yMCSw2^v>7#Alfn?F-@xJc*nh%M}>p4499%#%94xj3-uYaSWI|f~C zh%IH<5a0p{fKppCWV=sP5YiknS;HElD8ORF8HuYXh;oqyHb^-1*4w5)i!bhQ0jZ=R z?uuItv?7L^K-*D5lUUGDia1S0loF)OB`#^ZXu6RE@`8*?;FwAni(SockVb?_940bM zRUjjsNxLd1@S|-I%m^ro3o69Vk~k}N2Y$|fl-r0F-)**<+B58 z;*2AHA*X&)J~ew*ATN5q2)S!n!~c2wL@Nl%x@5ld6up7ws4`F{4RTwDi&spe{;(3i z>Fe*D5m6`e3G_YhRj6y1>O0+L&XAghS)GbARz{{mMJiabaQCsy^|2WJ^2+Dd7g8Jl zA3uFnm?&0>NWd=RrzUTn9(8pfLKTP2o5TzC!ru+VrB7XTPq{RPv-S0z)1rTOM!bk} z?}v^x?B2dTA*0-LTkI6pW`GS(QU{W1$^*eOiX<_mHcMPVzK$)yCNQ})0@r}7gzviXMSnf}M_^Ojxd}?&jxz=oNj#iqh2~!y;B*=%w zC~2!DH@*93p-U1uy!fk`^qC(o@)qy*3a4Ad@rwid?oS8zE!h3}a+g?k+BE{gs8TNb zy!FJbz=K^k0#6Lx2H=$+Jc=jEg_@ft&EsEu*AIq%`#%8l`hhbelG;p3*VpxF(~=u$ z=uESS=3lO={k>B)Fu<8PYeo~5hC*wKlqJ40%2CePE-l^J^EzfkCrSHd~@_;ix%yZN79ALhQ@JQ&fWa=f0aUwwtKwoYXLAtxaF+HjtOzu|Vi z#Sp(z&zZ6=<``416~PFS4H=n+1Fj-~)+XkEpDQtE-y;qs!laUVYd0!r?g++qk6R>2 zpfz=1T*9cEf$LE_n2em}p{Y# fW?vT39hi;ti5FF!R{_wvR#grNng(^{n2LbJPH z1ZM|p27XLTq>F*HsUPy7`-a6dOPM37Wh+Zq%FRyx`fy*y<2la6g8ipDoAbo}i5mK! zI();*jwT;bl8l@hna-W=UCbTEVKebm=7x+GCP}Jl+P&zn1D$V}BK6Y>5jj#(ke z6h4HLJ4rxXHheBX9EVa0b!~T}XF>#802H~)iZ7p`H^pM`^Ot<(3;5rqIwm^(P2bC* z&xylwHYs?!&8Fi+S}2Em%*}R_=jv^5?i0gzUVe8tHfegq)tOq-v&X6K;zQNgf~)ur>p1bb@+D+QdK_O(8h6ON_KxVoP|1F{+)2P-ur31H&T#wjn0=NkTbM zhA84IK}?ucoUogMT{#t%7X9McdyDF(*9NcHx1e>)Jg;Ztp)>1`cDC%=lBP4;?U{wG2{8Q#I&}ViE5@;JwuB* zA}d+1Z>sJjC7jph6qdH@!O|3LSbni(<}5D;EIU<5Ey{(qY+mJxTFGm3mbK+-lZVgx z9EX}eC{9+ghV+`k^hspoB{vz$lB8Itw5L7m0%e=QU?k z&I$c2Q^c=Lf@<3VCyJ4f`HICMI1dwB)7nJl+w(-~R3DNSG}6%Zueh>qaYu0C{BC-- zcBIXj%NXT~FjzVgB8g(kK!TQ`r(7{>F=0gdnFEK*8z;_*MXP2tXA2o#$W4+Kq!xf* zkJRJ7FbP|TAS$#m0jF8odeO!Oo^QE`eSjGSa8w>0Xn4dVrq;CRwUfomV(t7`rS)b2 z(HVPb{iS1C+ioaxsRh6`B7U-Pbvdd>VWKr|Y0X#UtaYUrr2|3^7zm z5qGE_d#O2I$haH^QDbiFzVA;a001BWNklPl1gy;f8=khM4F-e6js>_aDLfPUQo0hEd>KX?0GWt#MGOmw z0RgBaHVh$3W>k_9#ElIn#;|`FIpFDdaKrsOzp!BV`o0sF4yVg^m}Q4A8$YyasCmJw zeBSXZCl0Uc%Xja%EVIw!Q1y{dzBTgy_+ztt)zkCs#n#qj)jMB!K0EKzO-JuLZac1v z3Yd^_;@td2{QA8F-}vWN;2SI71YqyyUce)g00J}mKk@mm>^S@0Kb)xbt2}5m%xCRP zrIPc?eSEePD>J_|F*<=#TdBj&m{3WC;w zg_+9PQdLWU9_oB=yg%XX6EzaWBfh9yx#HzH)uP8o16z-r9_q%%1&-P|X460;02mYi z2Fd`{$QEL1Q?G*6!FVbOEGDg7>1VxBtTZKL>6{n)+iyH8 z^7jX7XrxCps=@jXs;x&-Z6ji~)Ts)sdNlj-wrltF=iswEM{(YB!|j*V75iZr{ch~k zoVJjN2;58(5nGHc)}c*d!9JA&|C>x*KNzW(-Me&Km$zu}`2t z62}lE9ue9$0GN_Ckv360$PmQ@gYe&d^kCKv2XFYwq0e6a+FF0x;=BxoyR}CE}tw^RDU%x2v9{jz#V$ENcGqaebOKKkWBDRwkZE8> zo_Na|#cFe=CPcZ!V0=o|t_$9i>=SNoXaen(GXVgkKwH1>``~3y&WfA!bTG7X1E=lQ zXx;3L+E%qm7&*}QWX;7^Q@Q6W-Mh>xGbnVHo(_kqn(sQ?$INq zw@gh=-+A@E+UAw4Nj$NvoZ+9i` z5C4o+vkLW{ofqEN{QQ>TvzL5>udhHv7ItYk<<{HI?7Ck@np4ZuUS;9?Orfzbh^>n+0i$mMAP17{}8{5 zW_JjYtQ;wIC2x6K-`U}|JQ)Qi17|fR-f@AFTO(U1AqymIMt!Z52sX~kyD-pzP8gt| zDMR8=O#x5}5WohIOg4R|C6$Fe&oF3WlIBI7jVP^C%QgE3i>tQR)VvwFLf*?1vuUx} zcP)?L*^^mSk7Bp!w?pmM&kGV6lowCFK6zuKQvTAqS5Lk!rBbDNKd+zqScb@!b+Y81 zt+7hI*rp6jU?-F9lqHLFb<1xFqUZEvDD+ zQ&&H~(uw-IH*PP*ColbGO57j*!M>&viogEz+h5&Nt~73Xx6fVw>pl19-#y;;_H742 zCYokH3Os&8u5tU10DR>3ZU8TQWd|Ntdf-@D5kr=!N?P|?D+#fD9!DNkBZ=qd5H4Xs;n zVqN>R96J^j+g@4s4^#WIKJ7b~=@>o&Cwa$p(}Twr&T;m|lpj7>>aM@5n?!$2Y$*~J zFeL$j#NmJ~fno*@QG@_6P)tcc3!JBL9#imrKw={J>J8(Ro_j(=QzglTbEgdf(-zr# zCcEKSHU8LJQ!BLqE43Q7gA~?Z9u2h#)h*;#Q^myyVgwgnvFG^2#zv!d_u5@g?z3IO zE6cxM?YYzUrkmy;ZRtE2(uT3K2z@1zjNp+IIk&9I`pi|_N@K#M#Z12Mp)*r9h|kTU z;?{w=*PaO%-`$YknJh2P%knK(1Uq^o;%ISjq{C(e5c%fE7F@G+sFmbhn+Lh>KbL21 zn2P3oVIthy+_K8TdBcMn@L#@ME_qfeRn#zxy~6Y)XedQ&YTmVlmj;|off#=%b5k5^ zIvsS*HiciJ{6yLy8B0zKObQ`mXsFyx*{99Mb0U!qA&p4pY#ehY8Tjd(&->>} zrQ!FwXVF7mfY_R{%m0$gnOFPwB+MgDw!jUn%>e5U>^O8b;5f#B^h9oxpSb;~Ba6yPM((8ERpIZFBgyUU5j*rj*4ANIzN?qK}^ToJWvQ2}Bv&5EIv{&g>CvKh$Yu{M<`ZLD_qq6jddxKMV7Th4eWQwYjaZ=TwT|{st3^x&x*{d&a<(#Y3 zMQqucSvq&-$s@G|B;&axO}C!C;F?q7!rL3=pH@c~WvuCUt!r%U(Wz$!OIjvEM8dtf z{MgzxeTgT92xEB_|7&IL>d9o$T|ox_KX;vOh)`<|T9|8h44eq4rqtymx1f1jZ~lfU zGx$j1!*WE(QL!*DnjXQ#Q+$|rPmnS3Eb`3fR*p)wY@7PK{RbGOK6gDw%(h-k+HX8g zhyP>FXN%{?r{ecbR%*S4KcV~*o3~)clG)ZT&mQEy3qveoU~L9iZ^Os@##A*!qyrek z3)A=;V@N`5t`f>4;~?=8N5y$Wjv_2F8Iw3Bw3}F&K}nAY2EeenSN>JnNp$X09|%7_ zI(PV*ll^N;gBKs3J7df2;>jyd53D;fqy4omICZ;{oK&DZ^x+Q&S@UJ*Ev7FWdp({lB09zPtNT-1FxQ7pzlF zbwJpmnaVVIA|y7{nCZxnVEhf=uzvlU=)QG#9WD-)sz|V;rDhO^jXo^yoZT@uNSUKy zd200a4aL;z?{Awqv$aKxj0Z_*`0k-Ao8nAUeKe`aT=D4gRq{kj-WnM6TZWI6x{`lh zH*s#IJbq0#N>W{avMt=au&idPQtJ>{I|ob!Ca-S2zf@AtzO1Sn;RLnMw>1w>di*YBT<-89c<{}TC- zHp%D=tVMO>Nhm&nCs(dK5F4%LR4l*`J(n+BwHMw#)0@%}NG}lXT4f)!ZyO?$ziGKU zIb52K-!??66N$%x)nf!MU_a{*!oc;Zx44;svIl@n)@-9|vLqsifhH-afRLwy;Y$c4 z(zzx*ovPqyU`nt!LlPk#r~rT{#DPgk2;!V@K%B1J`T|4Lo&DTx?pKfPczsLh)J4#@ zzUr(wY%T7)G=Fx(Jih2qGIeIl?8t{sWentsIq#cWg!i8v?~A7QAFh1zaw>h3S1n>H zG)NHt7D;?0si-CF=K$OU{d7-=hBEqlB}D)`kv1%g2b08It(;Q|a7Adn)$g)03L=wIDf3L6eRX9^tf z?qrVnF8Aw;*B+T{y|j;MKhO0(P*YSzqYpIFJLj84u&FfyN zHfcyxM76W&nbMkN<=Q2m`t!Ts{|EyIhw9|-OD-0&&V3herdke|pf2;l)vu4dFde(T zz~=vI>@lYqn~1DuHRBPe{0&WPx6Z{+kI(s(2@V8DuZ}&b?Ks1!?+20zzHH0nYqb5^ zv&zh)_Lma_#mw?;CxYqO)IITp{Xi~#8e5&9sn{~Qv=&Er9>bRP$ZG< z`v&oBj;IQNkOx?(GEjxi5#dr}1SB9!N+yxP8KS0ECmL}9F<7?wIR*Q*!Vj+$-+u1> z`#ZW{N_F-&cAeTjJayTL#SO15vBs`CS-R}R3Z;C@vo4^3O)mIhf}5velLtpKe)4S`ceW=FWGP$ly6Cl+xI9I7-Q&_w6PUa;;hocT+IC`ibAWAx6sByz8xnr#)5 zrL-q2w5vO!6Yg|QI5VBmvipOc-p9s~B2Z=5zk6@o+fwn9OM-$wAz5Bv5PGNN8fOSO zuys-C28bChV4na{6vP_)A5wkcQjhs-TKCPYx~wnQal147yHf6Ao#j5Zy!c9~tZ2d5 zB=ZAcAlkQ&@4YrPExE&e>LqW=z2L6yne%?u2>&2uNHy#U5fKCr|9*HDF0hCdiRLsm zNr7NPwKR1>+jEl*+beSV5#s~VY2TmfSf8Ph2O@x;#e2;g zPxM-vY8aLwX-oJTU$E^g%srU+Z0p!)Y3s`J;)Zxed`H4jgB$YMGy5UX$c(!Bh*S2p3n7`*2 z0AcCLgRK)j1ENBWfJ<6{X`CCvEkLX4mr0BDi(_BZTETPbzv&+MBFW9vbP8LV<~T5~ zR#&^Lo4Z3l%VOtqf!@{~W|WSxE6r2=LF)p;PB^)-a^qxexA0oToJU>XB1dlBIy@9@ zw@zRaM$S{vKe>43WSHRbsUTjQtU}OsqhjawgC6))aBwL6j~p~dL5&%>ST{X20F_{& zx4K9c>k$E%0s&5%L8uY}Q6iOV{xctH?(b?EW~jR%TZv09sn$9$x^TlAVf5z{BY&zB zz=;0ZE`O;K0d>__iBnm>Sv+5$8+F$Y>TA{vcXcJUh@vG)#QOB{^g9pkZN*MZle=W&0;%H)PSwqxg1fw#o-DTRiWm55erS zdhVUCS>)7Q(S#uMh3nHxx}P*}$efEC5D4zojrEm-e8Dw+TILbsuCCGZwW~kY4glpu zSlUl?Kh}yTkET5uxSVmSPPbR!?EzW$h z>Sc>M(94%kyOLx}#pnXq9+&x{B0y|$gqF*daodS)N$$Vs;%Y6h!{S^qnS}*HODVIp zpb#{{(Oa*|9PR1Oj0v5c>FWgebjN%zYK+g-vS5{SytwS26l3g&3c}%n2Q>g4{((Xg zph1O7U4=KyvDB|MF0F?~Z7IV=4X1O6OU}2nyl73~`_#X&>L$?>A{GfUmW_ znq7NoKfm$a`Qevm%Il<4`pn{~gI=QJk{6zJF%N{tUD!GE+|CuRKmq`0;I9i`s9xGp zs%*P`PQzT%fQFf?2|F|vI1U)KSt)9qF`wDHD_I%(KJz3KGa%^-w2pA`zVf#w!NqhSl*JVKDXUlD>hd4i5Gspc0qLj?woc zkgD);cCCCw1AylnE_>GlCdj7_txK0mMc@cwL|v}g1U!O4#TA!XN#T?lKeOe5qm1cL z_Y;!d|xq{TDB6?u++bzCE0T3HQjWEic#3H@Kn1=P$i79 z*{VJvI%Bb(KpT$$j@@}9yT=4uuQ!WUMbtKo758M=)@?AGz zSaj_idezy~TFiZP_mhp(6WFViavxtHAI`WUYO~_$AxlHHp{vl=Tqqv_qFKZ+RoT$> z!c4=CJj^_$>`6^jYtHq4HIw)w%rRh?(uk3jL5;*usCPM*gnA{xTrV+^3|0U#NsnL!mf zpa!OgbPr6BfIKzA85OG3DS%3-E(58{h-4rENsvT>#Uv0eiS$$uD$qvhl5KFnY0zd@ z{(@?6+!fpT+9k$|&57x)XD8MTceTIPrcG`hoW1OHXX{HHAOA}o4D6&A{5a{W0Wen$ z5fLQxW4>f2!CIZEO~3g`D`JvR$64NdKV0xq?9%uB4xXDEoQc$Z6-g8@)A2P6qk!u; z;uTyHvr(mL-SiRV-XlxS8c;{gk(kr_n{N2mN?{x>*xEV6Ys(6!Gtp(6nuOpTa|^W0 ze7dA6<-uAq*tYnrxzL-NHI^wStKDN_VJsFGdCwb8c01>8T{Jlo>*&}!-5Tl3=rsA+ zQz@RIg}AAYtDl2jtxA)uyj%MmpJoD zVzR`Bm*x!w=+^2ZS<<3O39zN-wNbuv*75FbJbC}9MmYE1vqv6&xhdIg-`{tUdKM&+ z5)eqKX&i{pwJi+;3vP%ZZZQ&amFrp%J^Z)R%qE7dL1X0Q{H1FL{eOO&d;U~&(<%!9 zj&`@g1<$|GN0Oj{8qfykwVPAV29_2@hE-l5UZ9(kPVUZ4{R8_m@)F7I@DoY)RMBCY z&e&wS1rUTT^%ghFpf6%VCxm-tom@bbdB zY8eX_dYn11x$!CcEfeVol2q^-vo_?9@MTx^#>Spe?oRekuDt%Zn92DIehbbaT(@I( ztuH-I1J@4%cS#EaWe)(EshApp6m=_(Dx@i0OA4$>O=D2Axu=6`1pvBH!GJ6Voa%s( z1SAb#N+OY@06G8y#0czk`Z_W^ebo<`dHim2!&$iEh0?_*uPz;3Q-XDeB~(%XOt(R}Cy z&C1lN_b-_oSbN#nYoHZJ2b!VnQ_9SMTo4_rt6FX}P@OB5wTg4cGz@v-F`zjBXwEeQ zL?gsRCS1B={;=KJ?#xU!wiZLD)5^-Y!vrdGE+c%*GBhu61$6Y-AK=3Ff92U!U9Ik$ zZ6K4bA8hM6+;B?=0%n9^ zC^XGgWBXs3Lk0rs1s%r0{1q3Sb3VM&H{ea70{?5@14a-~m&)3brK2Zqj*nt35X^Cn zt26uQ)$#iDf2^JE-ItRaXc*xiO}cxlC1z`Kc1pU{AP@o)yQ!S2x_%Ha476z%-#;I{ zZk~)krQF^!1Q^Lg+|~1h zbB-u@G2cQG8TG)lT!}mte87!W2#^U-H8juETnQRjuG3i6s?0F8g zvW^i^v??+~3PE5+LMJei-+ORXl1x|8Tr%?6s{z10aE=TZ)e^JRz)*u~#k3-d#yqKG zy_5qpuKO;qU3bCZpE^x!72coH+m-WfTzYr74+-2?1fcqjc169EmRIz!uFbJMJqOqbk+79Fj%oUdQ&>I-a6hM0i#z(+1RKiq?g`FUcUD<( ze+Ofq<`LPjevbKpSKnQKz5<(dpZlc+%lijmSGMB*n%e~ahhxJ({4VxJei8vli6oI> zs1cBo*DqdwQn_KBA~BXB*RW4?`?W{PL0tk6?W)o%rEQD*>vvuG;#CIx|2%Zy7}j)+ zXoeY`ICrx#o~7)QmLECYqOO zP^!;bhDt0GYeHZ0Os3)bId$rB__yYCwXk6AD4gs070^Np1_1V5e_ulkV-+icYODU1 zyy4`*Cfzb^P*HAi?Fdj6IIziPvdu*2@DjbSx;>>P~~H zk~+i~h+0LOk%&^0P1W^ZJouC10v+exK5=Goq2wkspn(x913y^-p!%+cn&LK$jaOD$ zvM@=b>J_NAF_@Wbc5Abs!zC?~@%VhcZgs2YeKZL>itP(CQ#ngwW2XP3%Ld0AR6LFo zh3SG?Ty+Oa_cg}Dv~QyH+4N#|19lx(Bb+cIrSyqWuLz(#U{u%C1|iA}GzL^pQ7@G& z0&{=>Cjgk34fTWsUO|2pf*O2lca_IqJ{YSm*f4Ug1U{4WtqrfFm-n1yJzt+b`sbNH(K^&{$rPAi^slsQrJn=U8KLS@J zzZoA_wNT;G*E`OpuP{$(F+-D}7FHJyz&71a|FnDL`0<=rk=mHn(luL# z0TWfYRDf)v6qLrPRJkw(M@Zp{A%(E@Z@r%KsN_(Lt^C-0?|eP)X^bFHv0FEgiZoQi2G-O} zF+g@4YEjk7-Z?Z_8f=T>imco+nbUK`!u5PSYGYb)M=GGz^y}kQa2rR*y%mI%`*X{K zEd;_wXV7YJ28zk1m5E$qWVT$1EaYc`&T(r)**?z}wT?>cSqg5P%vJw8f~YfkQ39 z2?t^_8+=1!5Y#9@POdZIUjR!C+CcO&+QG6%ZhWX5Yg6#_tO-&jS{ zS)Y0Uph0q5uy3m4+SzdKNw}l)e5Ja5h1Js71#)PwT$EX+*f3C$aVBpW3hL ztvkK|z_zwo{f z0OVAO8iL6vXO4dYp_n6047>5?h^2O&$D+U&j;ka6BEM#up@|>%d7fN{Vjk!%biA2B%y*%K$*TK2oNB^T>2%MTAv`bBkdc z7O~h60?$8_ULIQ>_Z#hWxz^EA8|p&GwawXvmX2uUd9SXO)1PhZpKXXwyD>gLJl07s z|9}`f8Ea)TA`*;_(_Kw9^kyDtnml%V2Cod5#lHyk$4QCmrsEx+YPD2?G*WJf8uW>{Zt29- zm2$M}#Y}2vMr0p{J5xhBdFy*gF$1f_=yNJE7K+&^=aqO=!=MUWx0XNBd}H6iHcKa( z2I}mJ(h<01-SOzu)8!w>UunL5KIypr9%d z*Y!XmBdEJn1*&TZnIv#Qa!*GD7)S)f!y4isB{CRL&}9t-Y?{a!I@H0U!e~;a)n!+_ zR^wW|@S{u1QS`$<_H2FqGV$_)#y2_>qgM^jUUsId@nE+xz3t@rk9ddTB=9Q}pSg{p zQ1uYkLSNAoUzd*UDnz9OV}0L!l2gC|gY{w_p$G&jW?n={WC|f&UB$k~ePWq9r7lFb zviP5u-*?wsYOV@Vg7bS$9c#^N*KXJdu(QKfvbAvs0JNJ)PVknYX>VZ=tsH{Ikt-05 zS))1BP&*DOYl+3$%?T%nw=ky8yq>&ncHw2Yx!T~A-HCTH;zwrbjj0m{vJr#`+ypPpZfYAUUv_8zTKIp35CW<`MF|E@6EnZ5 zBzG^ShETyB8Zvf_&fT${DXcEICqN5|Z5FYO%GZXMvmu~N13(giR0Y-mta`<6i@c|Y zi;yS;cxdv-zSqyIF%pJQZ|8@r*QU<1k6t<509NRtu+#%n`(@Qtc&?0jK&9_=U-U|L zi#^ytkP`a7w{_&OzJmst-^XSSpP4jzC^Gq7;5`9(j8rd^=L!!3&#%O8YHg0cCywj=4*Jc)=Aj6)d^$ z`_x&!ao{LkzwUT6_elO1k>}Ukv2SKNi_+VE2!#{GS8-)f(s75^sStEY3j<{j0CIeb z3M_O7=^B@TbP)&Q3sdtOL=^}_G6)QUxU@JTJ|_}1Q~`zoi&_ivSRCqLQBa9dM?IH6 z=UTihe%U1q{L9}rZyks&dls!eWi2~(WugC)B42SZ(sT0a*}hMlIFr&D%{t$^gk#Ub z%m*XzG@a>A5UXmIhZSrT=k$liDkD6lYVDq9a$To&Zdy1%(!qpVR&7oOcEl!L-PbO^ zbp`YeJn+^#@0tu5u|QbWbu(Fi^=BIap#SbIA57oFW;|dtxjU>%o;o+n+%7b50$#b7&}X&2Wu=U2&J0C`1q{^?GBWT4v?5CBHVPIxhS#45G^vZkFnc z$=kNL*aE473ZY^=^E7bB=TbCWJ#mO$Z%kJH6g_AcsE7Pg`AabIcTyyg%K5V(unHlroXJJKL8BfiW^QdreK7 z8S@1t05(os%3sVj-H=w&Ps-1v&gK_hc_Nsd+x{Pr-b*U&DJL^KaZ(q6v854nwffMK z%U-X#F2R~gjP~UNy=3jmJ}dip_5t;!>u!0}J?rt=?cayYQC&1AuqppC^_pdA#IQ99 zpzHxatfpJEZlM5C)vU%m3}xg?!jY6l&1^3Dh=Q(34w}X|00-y{(z@lkYQQuLT&xl> zxQ}Aoho$SDowFhZ^WHX1l=l9y;gWE4`{A*5`Pp5)9Sfda)P8cuz{J*53tReb-dnXz zK_{Dk)+l+1G3TMmg+_rmI-ybr?3gAX?Jth-9aWmCZrClTS4fXi5u zc%)wvcks&^H0A z<9K$`(Gpkqydg4@j>bzTA*yb}(AH8=TrJ03J^x1c6?3+2&f#n!8a+opg-(|n8_s8C zb8WDu)0?mHCWqSMbaK{i8k*3-en%Yf{Y9+%CEEdq>Tm&t3v@e(M1`ml08Y4WG1>)= zPJMwX!62}p9i*ZnM4{+`8nE;oU(KM#1Bdwgyp!#ENT}rcRPXFnEvMP1E`$G@V66U^ z*Ca!nyg&WNV)vqe`oOMFzBZQFG*Q=nlMgbrvQx{>-@Zn<@6~`A-D0Y{m_){+*(@ewG48NW37dIHw&Ts9B7Y<~aR{LOm1=9Oa z7Fm_TsYaCmMuBt5OWF5Ly_D27!_bhww(qpQt0rfjP^R~vpRjs75<#DU@{|6Y6=}@X zj~7}sRSAib>r!Lh^9`1&8h|0!*u5{C+BuCfPs+O!X9^pa4uwO79rwWK0jPE9VK(g^ zX;lpZAcT|berWM^Cr`8(rmE;7oLiGUN;WJxXJsFm`GflMM;AZJdwD(m&L0S&sPLxZ zy`{9Nf(U%el%VVZK#+@?w1!m(103;C9U*C#0Q`jS2B1qf5~)h05%7^wmm1*=3=Twd zy|@b+NU9vHXl#TryFWHH8uht z7sHiAVnhI~52ic)f7)70k2f^<^B{uJY0%?uY0}&mDCV+;X#h2NlMWg%u!D$&S(R`g z9BfU{cIl~s!*u}~1B~4isi_F0sMOV1vM0#`6aq~Z0 z6cCQ^!*b9xV}z%)vA-5%}_Q`_Z;XV^+UNwTVH8vhiYk1ec`7IY0*V-CAc05% zVE71#8)Lv79RXrOM@R$i%LqYbxFAre9a+)-_p6j6hD=>-*kP?DyTV4}9DORp=pSDud9Z0{et;#YYJ#F*DfVRPx?K_Z)a7*3Pk_ zLQq$9Gk|K<(=8JrO!azva@WzI4ioGhU_S$^yLjCstjgYjA~v`eG?;<;-ZdL`&hm&p$#uy zl}*Lx%l2F)stu7(u`~7N$m9%ZD4eBH_Z?T~hq4WcO1d1g&SZ2*y;C*v2Rs4}#dQE+ z02%`dkn6OAMh!fN0B`_3%4oavKr$L-@DH?IdMZc|MGgTln)+=+fQx(UpBOvcfSFeh z>UQ*4`f7csxbahe_gu&EXl~(msXZNdZu#7-%30mpntj4;*?^^R%iSHvM_Vo%;@W-k zjXxA*#E1HiSBO9M&MQ`9s>BPUcC7|)5`YEpRZZDKMc0IJ?R%x7a#a#6O=djxbitDV zHfmTic?53a+4{rE{NK+^B$r6P<>N5(k}{*)<#Q*ZNGBFn;|rgH8_wiB0RRT9UB2&J zYjvGKz7UxK;499lh`DUJ8)ps6Z}Nxj>8bos`k^GpD?HDa_ZK7A5zUfgHa z9~^zAJo(LL0{??g9t2?~rYb;l`x82`OkI@#${qkR&!UxPHBpmxy&A_ZK{CX?kfEpJpmVq>pG<+lpku^2&jlM zuB@N*JLgwMUR_fyZ#|J)d3>QU7`v-?C}CE;;@mfO7i@!t2t-H?MwpD2EaBTU)O9Qb zDaTf#qaA z)w%O3_}woPtH1wx(-JWh`~14Q;J^>vVG25QZV4C&d^!Uw8`=0MM9qzwnhd9q6vZ=C zPi?1sX)5NGS-Lb8sZVz=$v4M}dAnRj4So8j#0j@m8%8{+_UAh3p35@>rG`W;F9aXS z>aghTD40J25Mi=-Td;wU%|7c+j0yU6j4i42R zeEFRf5$>J-{P;_w4r5%xJLv1P*YFAUj(^@grvqScH~(0F`8&XzmR(PAVnD+?j{n86 zQ#z5Fwv(M6>fF!=_Pz3zN2(^~tn{OcTE=2$m5Ec?(fmW*Lg$}Vz zdsF~ILTal36HiT!CM``xyu7e}2yXU^#K8DC>j&PJFGga?eV^|e^|T0+Mr z2$1De$BV`0YbQnVasH*mN%4W~NM+pF`4yOd27;)>^Iq;mv#la2B(Jx%{dMiuV`n=J z0;=S)vVOXkY;HfRc@Lg_C4)X+$fwg2Gr?_dgX!boSdqyp&d`jZAQlM2l%VVZKweej zfEG^0ZR#k-cw@}hu;NG|>_|~7b#Y300U(g7#v+m-PbEll0vrqyb>};p52xO@w~Q3vbKbW%)Z&OM zst5ugNU3YY69lPMQ3AeZ0P^M^bC>;CJJ zVG*xVbP69EX#3E!-)jpl|8N{Y-hGm9{Qll0%f(3au63V-XMb87j+&N8tfmwr^>iM( zw8Z40W<9=x;wED_uNia0pe43>+K%kJUv8?+PuOr~hQre^ZjLWQE{@|ECz}?EV)La`4fPV@8UWzs#Bf9Kj<1HZ0R`!-h`t}bk-#V zOqdJ+geIVcF%YH)MINXQK>FYQV@fnVGx>#AhK_)(o0_vkIW)hMjnaR)_1)E|lFfj( z?SAFiKURQs>^i0q71esC|6Vt{7e!P>K=ka`qKgMu{AcvYUn&MElll)a|m330f4|l+s{=wiJ`9dui@09sv@NkyYJa9CjEf9e(N=74&%30E8bs}?8B2| zjTbr2;t#{@D>AL=g>xkalwrWz?T3@sPEqcLRUSB(uX=9W+F63!Balt{=jZbCe%<&22oOs*j~PYAd>?93)LO2uKVNFwK8@xd_r9Ju68p%RnRLOu`m5pa zVT>u3IcHLOj5AL#+OfO%#Ok#pNK(&dl(|=}>^-e->>9%2cLOiYuDI^MW;0nJD{s3o zoIIn?8`?}LvLjwK5DZ{pDgb2<04fw?HqN)sk<~>}N13E;MI8auNz#ZTYAu>@1@axk zC-q8WG^|UmMU;x>Qyn}V0b&+%C28(X=Zy|KULDY6HTcyMmz)~A8518ruw&QbUwPl>0HhnteYa5G z{*`?zThK`4lb7BNPv0j?n9*!%2MrXYoGhrzKg0y%ni;6{oq^Q#glS$q6TldUwQ7YX+pYI3~|@58}I7hSL$9( z9ad%6DaXebEg$ABKL`%}wWQ&K;osLS?j0ytIAzQGysC+vk&7z0~>~gacAar{MKsCc?gPs8yag~UiTM$2!>yWoC^NXsg!mBVpiu9 zO*gzcfk8vMrL`}5emZe=i8>F3XC9uOZeJ9rTFZyS>=WSGI?h*K^rMjosG3s0DAzdi zI_SdGiKVIw^!?ek>&DUiWBxrYFE8G+uX2vnH-AG6zm8R=v7A42%*L9jX+(og^@)|6 zU!4E|TO|=Sv2frdzP5Qtr@uXQaN@?Lzo`%T$mrR*8^%VI=_Ws;c*^*}q^=o;h50;rU5eUSseqmk%8u*E$KOk)*3<`3bn;tV~*v>`n2ezDAg?nub z%o%>?opr^Q*i}^@6fSL_00#&dN=S)-g+6W={bm0}sgtUaD9<|xM{F&uN@Mj`m2mRh zk?hci-u>B^cX@R$v_h#nU;Np(F?$Z^*RHR18p|(62*k`)t%Urdk$_;hDiEIk>T-+8UY9#%3K= z8|wK``V4Nu!^NaFJqS^D$I{_pi5m-3vb{W5i`K5#6_A+&h$tlip6XB?C;;RDpi_fJ z4Oo@Hn{d>_ZFLC&6BsaHPXpHOQlx_POAtU!hxE3GYF={o^lh&lI)Vrwu--OvG<|%M z!~g&w07*naRCirNLxOSZ;roh)Mm zLo1dJqQ>vL2On`%QpMuGI_1xYXIhl*+{CisdDCGSFaQD!0y;I8t^h>UUH|GCn&61w zP|?JF>9UCx88~9~)G_?_VmWw_mmeD)ZCzBSE8Y+1kHVY*a~Y~L!I05))V92I{}7HnJUzdl!&PwiyT#;do|DjIp_o+^#60G+$cA()JDJc_06!F^ zrKuOPZM!CA>5=+>G#uEnf%FH1aj(azInn{}co zf$+UeXU;UO!)1j(aN^+n9`C8jNz3n!?cO1CCxc?t&Fdmp%^H{q;aRo>We)(EDIoxi zEUh`GYB?ThE1Mm8VX}D9h|lE~hKwb`Tu7&xhMQY-;XzSvt>nQlYjrQa)GY@tLbZkn z4oL^=7#-1|WdFLAk{8e2^2glp+RCuw1RdbFW!=t7G?Gq4=PoMUIC3tkGgh7b!cBE0 z8iZ8ESh@rQ*FvreMmP{v5g-_e&3$`t$-)2{=_~?M(m6lII6&A|5;WvA2Et z&|(y&y5Znr>|Y+f1`hOo>*}ec3O`=>VfW67kglmT71iGqNz zziO}0#Ka)z-J9l{#LE7DXsGw=@#MN{ys^-?a?N=(H)FZ|+ghus$lQEvt`IfOF4;4G zQfbi!QAIkFpuu|<%@10Zovs6Q&)1U9rmMgnd^rlTB5)mQxn_8i4geZJ3b5x8xPaRx z!>wh3112Ue$1U~{N=iXWvryt*Wo1Kz< z;|_iA50t0>HWlFuFB!uEd}qVV^KNRnZit}LX$}r9U)2X4-*yi^>_z-gR)5>%-8VXY ziK8~p1?eKxTmUTK0t|GO&C?ao4fVQrg%S~|2M8;H@w+#e>j!{E*QVpW`YqWi{0@qL zN{_THcAZP!2?J;SX#-Vy=P>}*!MGeA?%q5?J?2v;;D(0hCsW(zB>Gi&_V2~%_9RoS zw%g?Ni{Pp%DU43|lnIPgylmM}^PK1Ti~<9z&C&63uO7;>UTY`sFKhQ{5 zyVu+V>Cstx) zd>}GqJ<)FIC`2Jd28WrQ;yyg$@ zLhi`uUl+de-cLMrNeWKu|Nfpm@ZV2VG5|oku3#WZI9A8azBO^8Q|q{V`l8DU2SyhM zGcyo$T#>a~3+ERvnbb!oo9gE`H`SXWnTnC&ns#E{yC%-cMr9BhgUJkl_VJ#%Q;HRv z9w#l+!!%jkeucF9-!O3CQSg8PP`Du1{QqzTA8>Y+b>i@U=Xsva@7!Kad)j#e zzW?|0dH4GTFv1X{;QCm?c}9rgx)OkLfiemJShBXN4+e0R1+aa9%-SbwilJj8%wZ7O|OAcQ2+6Ldvb~ST15f*aQe(h z=hCyH<45e(7fPYPi&cLY5kDWzuPu?uOy@u`QRYN|0tOhQzck`cWx)ue>u%PlkR;F$ z3WD%Mr_a?QgP0^gH+{^yZbG?_Lh}b2t?+-oN2r;3+Auw@xQqP9Z9C+&YQ&48<39}s8HRFB4J922D#3BQ~He<)(vWz zgf%2zpKZTvSf72ebYJA)7qX{khwIB9{5+#@9xK1mJ$$?dOp}Im>a~%l*RI<$W&jPD zVFaOt?~Hd2ERIV2-$&oB|Hz+9=SJ%Bs>y4wfXQB%5!?Y+jvk5YriDYg0?>8=kaQ-b z;5-F@6cIFz3aVM3ZI4XO6X_~7TNDd&VLs@Xw1q~h)h0WiAmb)AD?EuI9t>I4#@Zn; zkrpx*?n{}G0(!j8)=usJ(96GFAg=7{T{ab6awszY#J22-3)1BaPUf#(_e27mz#m3QqZe$D;4 z!qerUxe12#CD)0*!`ehVLJCuPqr)+ynruaFn}cm6aqr8PhocV@#|dPp{cBG+2v4=K-sP)*d;xxop-t<^zZJZ(Q*w zb#m64(=>VG&S0uvjOhhTyka&V(FBv+Gy~9f0g%Z;NFkz>BEXR1C>~8)(fSG8G7_or z#$!vh90|%vH|0TW#c)Fr;-HJH!)mo+T(hH*Osr-m9^|=d!VKo`Sy&mjqYoR!#^bNv z*md73=gPigTPkB4k2!13E;shgC+V%H2W~Z<`(Uu&v&TOB>8g&v5E|AH(*r3KQ6NxY zafJ~J!j%xFV?P;hS*5*P3r2PEH}Tltk5%fLLC9(O*DU~Wm6ZtV z!!y5K_1K4rH*xUQIR`%R*=Mf?s?d!e`v81>fE_oe4)c~X5a6FGhB`rvbhp8Z#hE(I z7%A29nO*}HUNjkQ$PX=VpB@=k@ltAANYs#-ayaK2(7XAT!J}pe>k&!TKMh9sj>PE6 zP%=C^9!m^O0XCj(F)1%PI7Z35?sI^CxAIV=_PqY?m1U9r{- zQikHJ3}E{J(C~+aC8$^p4+>ji$^FtYd9ZZdc-Fad*{5n-oYNz`a{D*osh5Tty@m6J zxi2dC`mSf8bGgUds<+cWJGi=|k1hF5{^h-_K4I0=T_bTOY+xAvy^T_3Qg(;{kXQnM zySBER&VmtzTds2hQxRp@G?4i5!zm2NuxN7OspB=*4@>e}n0~ZczFTLTR)kDY3lZD%pKf7NZE|ht7I{ z!6X#g7WOqw6g=Tb2_WWdFYL=V?o0*tV}(DKNZ(^Wl&6Vu`N|sVqiZ)aOGJnq;ZG5)wAu+n(@H^L}zqxa2b^MLGj<+wHJ+*o=w(1SC z=*0G!GoKwfwYY9DUn=})YcW7^=2=+i3M6ur4%oxGrT}vQEQJ{!{Mpo;B@KToQnPbi z(^GvlwN2T0d`~ROi4hwqKMywxZ}F$r0DyY^m&s@;KAwKI<(e(e z1G>KUmFwZQaWN7O^GfSd7liJetcKd{@c3+!ES@u4lN_`eixhgnShQiXrg3&)O~a7h zQ?#7IIw~9VT$M~Zb?VrqxAY#1H}WBmnN#BiB)<|HJrfP%Dd|9F8i0Lh4%d&SkQS6A zxKM#iJ%9n=8$flNHiIib!g&^DW^EC14iW-S2gLHreVQOYzFrzX4CYG|*IBx%5v7OQ#K6_dd&@AxTXf-8$Vf zxVmGAExEH2h4&~_iM~@{MoND(x2)GOOA5hx3IQlNFH%is!3d*|Ul&MjXv`1+VDec1 zU^7sqJa_rP@rG-Mg!yZ(D+HhlL$wRoKZd{w>Gotb%ylJHsU!OIx}DFY>n%(vtw!2j z%66{GxNSem_U~rBi!Esc8+OS4qpBRmqTueQ35o$gm6oLE&AuKrDKSlo{dmJ0`Nk_p zA^YU)BUR(WPfU+woYGyp_}SxnCWd%fb|0@Z2(c&yzcuvax(%F+?Ubf#(wzW99TQ|bQ1RL?0;W%>iSQDitO~?ucZ5IF;DTI+@B0%|2Sb?bo zlp_K~lo@kFY-UcZ913Oagq>A>IIe3f7SC9y5SbNb*bZ}`v?5*?ZR&);4Ew`J`a@Js&k?;d%-S4)Vv@3u z1kgR)PykHCv>gV{d@v^3{<^9XIHBn8$|H4g0Yz*SDc*)LH!>nbU`SD9=LXsbu~;sqpfN1u#MT z9F-9S14s-&1k7*EPuRlJL!Y=pUDXgCQL;8CIY?2?%*6m%2$jMl?gYSP%E!Ki#r$@l(FP<--q< zDtIN`-&Erb2%dA0x$vIxyNCQ=UNF)EWB!w~=KQb1jpc`0Ub+ruE6?@8fuCNrcQd2o zv$y|lrT@9g*f<#AuUo{GSCby8MBBj~>28Ay=Vt3-lew~KO$kbELbrwlK+*IeMlI=G`naV^$3r)&iay1%uvNqnU4x8;Z<= z;LL~f40ul%pa!t(V~_w~Q5LmISSkisQVAF&t}TF0y)hQ%zyQFHK72u0>R5kx0=%jx ztMta9Q}*>kIp?Q|&$Tx>henzI-7ms>yzdSxN2sQcz4m5xPG{9;4*l5B+3^i^!~U9Y zk=TwawhQW~+FsSgh1m%X=&JI>DIQ&E(l_b9ZwNx=zf{ zGXAQwcuB6Z{LabDD{fzB#0v$Rw~2R7^GvAT$PMf>u&z@YLa{mB3x=|cGoJ_p8n+i; zqmA?PM)t|^N2=F)2Mhf|)PH`5-+e;mVmf%!L$=4c^a+-WHxItLdCmZLJO>yCyuEeL zpmD8}mo-1zo&UhI6DLxOTDhD1{3aNj%H_?vXh-@lh9v?}$jD|1XuAMN%2pv#)Rn|2 z1mzeIRtj4JkN^~M5D3#CHVv_o5(zy7ftA57suj~UNuwyJ2Q{7xxD?7!kCG*!;{?3& zCZ`TQH*j?EqC2KLGTZF~i`dZ8X_(h1TShwAIQew!6&+(%DBrcC5Yp{15Xlgt9@o6E zDU^q#sk^i{9rN?uk*e604nAmdWP}&53y*^uJYS_>;xEmz=po zjEP^@MuI!|2WMZmp1lc%%tJ4b!#{i9Q#adUB6s(Ht>+)9PR<$t{>BcE`dVqCg4^Qm zKzAE#YR%P=Q~@DW>NjB3lG(<@)c8hYDsrL{p0&4!h!xCMv>6p~x;}dOk>2KbWxx!V zhGwIcjoVJPR8g%M(G@;DZ9w>A;Q;TQGL)9XJ_iQi!+SFIQP7(?Q~)HL7sV_ZDPS%o zyo*YZW=#%Z#3xx8IP(EAZO<*wNrS3ANA@<*5T?=9-RGR^2D0`q=$GfTlt!vv;a@)u zyL%=e+urt6M-&@k_3@WW&^$L9rno)dmAWWCEH1t+{qpXrZk9y-!>W4kL_>q$d~&ws zoCj5+B*p*;K(UV2*@+SuVe>OPolxLmBBB8N7j{1Y<|IUnfn&z?hK`3JW0 zb0_^wm}-tXbFz_8QzsNx+k$_r+wfW{sZ&d*y2e%<>$h$w=6K5wpRZZ_d;4v+sU9cd z*C8b+yr=T5C>TD{5Sc9z&~^ck$&xb6ih*1^6oRTnES9&3B8pLD3Z{fK6tW_D!YnXM z7Q$YFg~EOcVZeb3w78Zrab9_*V03+O*aEep`7h2D-|2d*X~SLP%O^JD{qsX7E|^9e z_pmjGH{!P&Kh|}U=-5lQ{kYB!Rl^n&5(Od6Cc-nro>4>w0oPD~eG9V9gLgZ19h)Am zRR419mvtSMk9jKQmh}xEs{`;pvA(8zu=Hrh6CY8-{;wA~q1)l+iMO3sJ^}zQ{Es#K zvmMV~?dapv-+p94`U@~HYXHE8R$udLCr;&?=Xx+R9c3F5#k$y3xf%;k^ct{sVWuH5 zKD9oSCf(`Usp{n-5m#d_8I}#zu1{?}Hdt3%7-EPFaHw3e<@tr6*Rml3&^>L?$VWpy z-jfc2HbVs00f>O}xXu9}l#n0+BxM4kz~WZU@TCOG05C=#t;y<`__i;ASHPg4Kf0vI zNf_@L&4hx$5zeCgaqs3c73V(j&DJ@^Gi9^%>@8BYdhryi+6C*n1N;2W1t?%u zc46u4=(g}+uOBmKGLRS~yvtp&a59qx!;F4r3)is% zf`IF0_76Q}!IeRvbJ6a;j>~$)yzdvXkIqgmZ>TsQUhBP8trlX{$s-&9bRE@PH1y*8 z8izUeJ;p0_H%+}_uW!kfw|;q||MklF#^zXS!+fYl0C+H!IymGz4&%TjY_B=9Iy*y= z1jlE#u>rr>!e~1bAD{k%dGpJKp%Ii=a@ls-b5b9%5Uo}U6`cyse9q81`}s9n`U_Eo ze95@evV7lg^r}*s&->Pk^V^5&!U=efWFh#c(O)H$Yq3Bxazz5#E&!6wfNnBHL|N3u zio~R4H$;&K1fklB<00fi$a35kj>?3hsO0Tomxp?hu2Z`b#jJiBTE>_%iAOf)Pg4}y zGPMsje!KfXc>Vu$ZyRlI-_y`?w7Ir_=kf7NPS?*l!mc?tW?Isp{J?EOMpD&oVV^#FL>&Smtq#w&|C>r2;gZDRbJk1QyBiuG5by5X;C@sJaq?XnvZ z1SV&ruq9q?h@{HOLxp|=HZ96TYo?~x=(9$TV~tg>WZ`%)VVh~Uw%EOEvz(iW|>V*nQ=1&24b@57Np{KM4;U zA4{;!`ri}8)SEAP{ml}z#S}$#mksm|?=Z*QOTIAi+CE$JoJizRDStfG9Jcjd*4C|P zMFYeYAW$G>W5;YJPp}#PmyNE02-8fLm@xfJ&v27LLK5d!9O+qdagWyd?ZWKix$#wT zr~H{^>{Xs~!gl!tBmuA%6z2IaT-E-xq&Q>|6Lq?;`4_aNvp4J-RA#NE0l;K4awm_> z1Ugq*=2gq2W|(SNw2dqs8uMKMz!WvDuRPZs#*z&qCWzp-IqkMA)K7zkZgvu8AZRX!_MeB=2`!|*QQDlZnY!|+f1Um9|~ zD?%>kh$_%_0g&+$K*(h^kPU^Yr!3Pc8J1@XJIq>h41fp)L_lpxfsk=(*v&PF7`YfFWV@3MIw zqdFK-SrG)l)d>b5fWZX72o!(>9SAKcgD~mk02ojP$=@z6ghA&AdXG1T!jiE1=`(QS z30}QF^ZogaA=Mp)NF`$v-3&y@Ye z-+bfkPsX$w0{`1@rlyV=*a6wu#6jByKqdi|Z+?I7>|&IfeZF@A%8ZK_$?nU8VRF|^2RC0=xH|S~>fnc9 z(7ZpFoqBZD^Y0hFdi(xw>^S<1@}%IvU`QJsFJm4?|`5^-? zT98fB$-`g%_RjnL~@*a^bHRRHJooEP| zRL3j&4_reBJ%@9K<^1h~Z^=!GYWkBmj7~k4sjQ@gXrE|jZ%xW-GFqM($vUo-l47(u z+_h%hmr}Zn`EFvXzc1Ci*`?KoCx3T?ec0Yh8ZSs+zB4#E=1xOgpY{5t<`M&8iVeV% zyEm-uff@}6(hQ04fwAaKlWu6gn&FqGml}2(<7kwDbS~AOJ~3 zK~!A;sGLDo6(MT|8$BPN3Y-!;bdLC{>*7Y}HfM30&?q2))RboK=dd;qF zIIdo@PhS*FvO7Qd>ic)5Z)kg}cX|h;%b1E@oqPni?;DahY1eXPRjW)vM6pl?4H-)WC zqIDvzqeB1ie0IfEcGtx6nM@{DC{;nX*Iwaj?rv@b%6EL=01N;qgLlz{sDjRQ0`I~= z;5-5EkpLiow_*v(ErLz}Ks5YMl7mj26K@@fQCh2NE;!bsU3Y@9-;92*W6tccw6OpC z{qpB0vdxtX6VG-KJp!u^zij(+=SE40EFJ9~xdz7EYd<#l(&@5cJ66ZfMdhw|c$zIb zZJFBmYEA&$OwyTCGUMv-tYf>j4 zZ|3`^t8PuSIGfp58_`S)gY?>aHlFL20t_&9Xj<;<&9`2XLX}6Set*m8xv`UUMf>Cx zJA>ot@(2z&Olqb;ngXc-4^6*#(FzZ7Pe{!$z`*;*;x~*J?d@OeUj+XjCzs3S%675B z9dVe~mYdtx)NXm{Ks?ZP0g!YqglvPDWo}R;gt7=rPC2Foz!4~cjtCIqK~q&Rf&>E~ zf(V5O#kK-VuLz*DsuV%RhKsr!tE`Y9<<&pfcP#YbH+~XZIJPGKdN?`0Bsa7?!<)wH zE`5HnLNF!r+wM}Dp_mrpJVZbS0Z7ll$kQm(rKFmt0>C7C_}5R%rW>v!Dv_O087Z=Lt}(ClUyOWwBQ^}gpC9=pOb z)xn?NebGZt`zha`40a?0FmohT>S$B$KwleupY&_VxZ{91I~0X24W&dZRoPs2`58Z& zf=d;tQ7H#zAezbT%(PTu%_9XvmHLP0vm3Wpy9(y$mb}*8Ux|i{H7i`Byc-OG0^uuA zN`e63%!ee`4dP=Wz$aJ&2ml};0DxeLfq(!a3~DG|bU`Nqz@GPTttuGd>cNr8kO`7k z=g*u)SHC0Rm*;*uzjgLlTJWF0Ke+Go^t{~i+8HD~7q%YWFIUeYiZf?vzHj7eIceYY zk%5=aR7eFSllQ9Xoe_~OEvp}vy~Ha@co&u!BqZm0GbM?wmfJ31GK3V@1=48dH)GNh zp2Tor{p;h)H*~w#elk7&Vt#C`!R)U!mUg3&Y%=Ii3y*`6Z1J|eC$6nYX#w|n*{GJb+-oaeni(18W-qRH0-1Yob^(VIGjwUI_NLR9wtlLxU z*fy;B4~;&4+e_KA1Bunf#QWbT4~>>{HL=#wQ3?tPT`3$$GxhpKD@&Ad2$Y!_zA(9W zEcwCy^6X`|13-|^mMi&ewQ38;l~M(u^bmAIdF54wO&!ndO$6F505VxHd}*OX(T&D! z1}s8~b$JJaOJiV`B_<}bghS?5TmfOdDnb=u2|-Pzn4kiLq7da}0K)3mo^in=tbN+q z`hWJFK(`+Ic4GBRTm1n#w`c3@sWoZ1;G}9iv)OhEsOnz*MMAkihzidjfeNs4LYm@$ zu%;=bG@&qKW@7K}W#Xc-Hx{oQIPnSwn6h%;V6h+7?t32$$sar-7cW8Rxyj6$K;hdq z9{Ke8)TJ9AAI+?Xp5X3f$DDgYr*DxMUVq>@_|*%l7tW(w!<^dIXsR?X98ul*sD7pI zH_<@{nQUUnfGeV<7JXQ3$OhgK6do~lxK@^nT71OtE7H7^dB?|VmR5AOYSXWi_{G`PRGHuzcBWM^h$ z<*g7RYQ+_QeTyZV5Tnp~*{Pw_)&7k8ksF5pcBo8jM&~>(Cw|m|aEu(rR#yZS0DxnV zSTX@kEp9q5akTNX>t�p+eFQ8zavhOxIHjV=OxMPp(?s$3Ad#dgA%~^aXV*Yqra^yQ3yC>es&MptYw7-<(`_p!* zhExmveINmYZqQZGzj}f*&K;tJTf3=zAm4HEh~_^${PK-|&+RQPUZ#hxoeyP`!g*&N zY|@kjO(Mb|HuTDtb;Uri04pnZs59POY<&MjptW@7{j4jvBV4IKfoba03K0~<#tvY6 z$&JpkrYH9&0&N!n887Jo(nypULcSq*C^C~|zS7Gv&$KEALI4ba%(EROAcPdM6$oSs zr3fPoQADbUWGzKS%c4^_sKUnfzf~^##_q1r2Zq0D>?pYFj}%v)USS??wo;o%rdFo( z7Dp)IeEbTV0MP&mz@Rh$Pe({O5l)D53<<7@7?ttef6!_+j~-mon40-vU11*dc)uuz z{=|>&4EnSmJ)GURf|<{x3!U6def@@m{}tc+FY6wAORneV$OB7{6kdt}xFLAt5lqsc{41m(*(p4e-Q%9U}#x0wpzoKxqof z`34a1F3uLp5elHgy9m4sO8^WK1i?|HDtQV3I`H`qhA_v0f8zMDdAex^{=%L!k*g02 zcxcc6bk?VjO%wNbJNW~rC+8K`HXN!81*E#<=z(ftev4^9r6o5ob(NF0Z@q4~Cx>I< zhInmcrU~BF^+Pk8W$ zV5YD_NE5yk5(iTyKW0_!nPYVnbP_0L*7a^0>@oo(qyQS%<@e?1Z5smb;oj3bdRl@x zAvlj|I=tt=eQ+U2WC~4p1Q|HAd5Nug%yC^I!gVMdGqGYpAtVnldKc^1bOt@uu8<1j_)CwF*tYJ0Ip&P}!@2`k z!sOIr=E>)r(>qkZ{m3s*esF6p`Zxga<8aSq?|ftFU%$KVvAwXxA0ofl@mT5EB)sds zlaIn}DW}_@%x}1uxP-;K$9Zc!QS7N0@yjcy!JBf464Rqm*b(-UIK?lX;rvXi7-h6kr?^o8CM~l5Nlpy z_vD%t=lDl%nk-F6qKgQem+P)hD90V?1qr&+<+2sK2X3-UzQ-KFD=W6{JH4@}HxAyv z=-kY~Om3wJ41Ndo_Lb63omHNk5L`-62^VkF&TgN0eooTR^f1=VNNInuWBaJ`9~v0j z3GZT@V*(*~&+)sQ_6AMkwqGqP-}V=*n3-@r2^i+@$oalk@&yrQgksY$buGkn4${#O zLINWk@TneT-?B~6w#Q#h2HGwFRG@*cL@5MWia{qrHo4J)2cAj%5C|Xy6+%Qnlk$Lt zBm<)RpcoOR;Z{wcvzCQCq{=4p*EAegrV{o3l)C6Y_MHgdlK-5$GI4rw#_H_xSDs{> zk1a~>ZEJbwilIzQ^RKB{-W+%sNkFbi9E7=M;)+WRg&aaLc%Zq|w4QrhPi#3k)@e-6 z{-);0R+#L4TOEHP+qVsR?APyn_0Emc@duBprQUGyiJi~=O@Hx@%N{rdYhV!FbNTuRiHb3>g$E4ckD09~v0vb1mWwR-&QHE4YqN6j(w{jz-~f9Ms^UV0~73 zXSkjv%cEA*pJJXNWHg^lCKvcUhc0ghBd$>ijpj<-;XlNY|aoB7Rc)QZHHoJpmxu4b#BysH1N zUEn&^VC5fR@b(&v3k$P6(G@{uKpX%RQox|5v68ceL0fL0D?aa@U#eu>{Wr?q@Mo$mMmjrqrj9UTQqx?RJ9kO87>xvapaqzLySLK0 zc}O~s&)$Fiav9g)|8MBqC02640BXe&jaFn^=0TdMjVBWg-qY#0WkrY?GBk}rz$yWz zloSJI7z&6PD9s=2qxL5+3=)lxy;v7$y8uv?vI;PUsK>$Zr73HWZdVjC6b~zbsIm>B z6spmEa1le{QiKWO0g0$Q4U2J_Sa8YpJli9#}kz#j720S zHc6JEP_(F+i>7J_y1w(#JtD%hgay6V-M`y`+NP-yWaovW61>o_7|@C z(GRZ$#h$~R_txJlKkBAOqrf1!6__UJxoLY*EHXP-i6$;7;s!qA6Q(kwQFtE~wJ;1@ zQfB#xg~r1hMKmtQT%RFwj{k=mEr|U58Q2TksHo;jKoWE2w6>2wRh_?vK|rDQ_(@BF z(vS>vB|#xjpuqMy1E4wp7yt$ai~zt;2oRj7KmjPJBuI(D_jw2td+Gg}jB-uAHIl6b z#Zh@dUr+rNZ$s!e&;Myj!^l91IghUb2y&{@d(t7-r*B5iu zPhWZF={}eGTwn8j82ypZ&5;$xB=m>M6(9l#Wf;zjw%h3vfl%9*I;9~Y(8I+5MSK3} zdE9qE@Jq=XvzzA)x?kEpKJxT*Wr-pAkInQRk~7HI2p8NJo^qC~d1dw{zblCmas^lG zPo98_41Msy=H7w+RAIGr!{l~gFZl%#j%SLgp-!UdloA}Ihc9-BQYc^$aGWf@=`7sR zFU774$4oNP(bY;~g*cgg?W-)D>GhG=J`EsP#mLIk;*z zwQ>U2PQcvJIgu%9|8|id2or?D9P1v}v~W<4a!{HG^uP#gM1)=Y%s;HUt*;G!`iAdx z|HDfygVCwSX5sNene9e&_n~k7@Q+(lD}UM75|w@9Pp&`tD|+uepS~wm6ULDF_Z>fa zeC4z35+_N&u0LRZ0;NWyjI!hcnVC)%xS3M)=CWZ-8EN z^+ddQG74v!FU`gy!Ke)Ekb4&HB!lA@nE9jnkeFL8`^m^g8hZR(3OG^{Nrd| z3J)q9x4zt8f9U~Y{O0dZEU259g`o7arKPWBOYy>G_8C)vxA?{<_OL}s#{+-umV>F> zHMyMs*-N^g8F5iXp!IhJ1NZuder9FJ=6WW$`j`G984fEP(-!Pf35@Fv#uTzRXfyomnkv#PoqEKt7%s-fk-Zjq3_d!{*A{ z#l;&3CI3?pAo!=i$>xjsf>SQjV<#6C6PtE68JH0t5YbV9ZBtj$HN3ZUOYs0eLlM+J zE@KT<#s#>Z!vy2NfQE<}3D;dT&xAP-y-*iuy8!69=^D&1)=irOM!Et@gaSZ-LiemyuDFCDZkcn!XCL6Cilo1gHWaU@i=(RpVZ$F(}H$|3q zk=j#r4P9HZ-K(-{bysoA;pMS6TROhfS~V34kZ*7eI97oXs6a|Z1W=ueK;{PA4?g{* zIcMW@y$7`~_P;!q-7r2nyEl#QM{x@B<=M}C>o1!IHau{mwposz-M{XQN3^^C@R{!c zC{9hDw0AdJ&rEdXt7XA|Z z80r3@!@ct`p+wC)*;9Y~s&cYnmxm?m1YBceI)Y zXdNSch3nE8aofdRe;;*GMW9XJR(%in#5;N8Gz*=h!q+8+cS!(c%&AVh!k``h*1{;# z5D!cb*B$+P#xn`{zP+&aWw|z7%Khhsss6p=-a6)JcWaYJ@o>GHE%YX(PK1(T>HhLZ z`VKTus#{per6CR=V3Lx89oU@Q`juj13eJi8u+QZ&lwU znyz8txftwRPGT<*LMcFsq6D?-tawb1sR$IB+731CKNJtNT>!MN*c6sZdBQPc)ocUt z5Llj~N|_?=7ZHGxh9VGAN_Y_kj8PyJ6%`1fk`BNPB1{62Y6!Kvvk5z^C@8Y&i!b+C zx72?1MC~#d+&ZS)DZ2RhDtfd%lG@TUzIiab;()sNGZr%>AOxIKohga2>3WDjAbhG& zwK6kNUbExR{|I+1c{=@6;+3-#^BcBJ zC&SK|LnF@EctfHsLX%=N(bBshHy+OQF5EQM**cb<_Ug5n{*LQLeU8{*E>xm(-`C-0 zW=6&xu$r5aG|+W{3{8<^!zKWd5C9;qt3V;l0|*d+Pyj&yfdar12{=!}dDsU4;)2cr zfC11DHYm7@e-fAsN3*X^OX+)VrD^TKf#&V|A^yN4uP%tDj^%Xz%&OV@M{>>Vf*@M~ zR!&^=>f6S~T8}d~w0h)R{^s$b|K*Kmo*k})Y{j;IE9kqg8l4%szVVDdLCX~^u>=eR z1VB@BDN_N%Y`=Y8)g&AVq-mn&oyXFS0;LGq?7zOS5>8Csvux(HD}w30Y?f<4B=QnLnJ)3$K=K*RtjXOkV`^F zK)A05y5u;LehOC*Vb!AVF+>nS$PCA$*S7|&4>tYh-noe-Z(lKui|NwZq1ckQ=$ieToD(bf9^%(s4b054ehr2A^8mr*(2}1de`m;Lfv`FXl3qJ z54ghJ)3A&frL%st;o_;9I%m}NkU24)49~5@&0cT)s^Rd=Xv{gYY~x66S8TCylvSdM zx&rr#47b1{+^yWn-AgIpa1c-1@*(n z3PAp{GXLYz%AEYAefwI7P8M!{{9wgsj#3ME&Oct6+29oHe_wm<*nDFT zck1MkF}{{@?LSd*ub(l4{J`-fC6rQW&V2=6c`5@y6hdTG!<$3V?RlDdK1f;NFubRD zd~*mi04RQE#9S5*eCGSMH-*+z1J7oRd8}o*X@#XCtEofDouc0(1ko>DgWkb0LI@1Vg6s6mlU-9EzyU zks{}= z>+I3f{@=ZtZ0}FF+pEh=Q>D5kqwk-+tVZ^OBxz zmtObgk{~+#d}K!NhA&V26J803PWkhJX+J#s5AU(1(SEx2%b$GcPx-##f&!R%d}u7^ z?+xeXnYOVVX`$n^p^j4RU=n!sgJlV)`hLmoZCh|qjEpsn_ANPe`@Eo248H|DrzR?C zwrk<+j)AA5ma`IHykn2_w8s>Y^b+m@-!NP?n&Ae4fPn*Y90&jjfxt0P!V1SMv(i}r zM*tu!a{w9{0B8a4(O_Q-TYL4;>8f1b&c8dmvnH1f1+Hq zdovBqk&pu*0qRV5eKa=Np!eIojA`UNMd`BZ9#xo_I5Bjpe)4Fbmf?H42RCjUQp;5u zTmw^U^?@uLo@fLPQkyio7Ch}=vTp05jPN{>3GCW|(m8wC)Z=dd901V3kTLi_etB?P zYuHGv<$;&)Z1OiG`Ao5?$?qNA6D8JUcp;z7FyDDWjHnaiF#`c9phNw#a?Zt*L4hpy zwa#)+7$=w!ic#JLFTFLpIcOk_& z0X2tpE(iocN5dcqWgq}yOwbT$;+(7nAc~eSgev5k(?nIH&LGk`10aQw`6Q91knbHC z^<(!B;DB2JJe9KeO9ml09bNbJf0L45HQVZ5;53H+6 z7Fz{}T6+%+9o@6_g@vEH=cVRjvd{8|*{2GB002F{=BcUqzkFfQv}*s$|6Vw&6m0&| z_g-6;_24IN(%uDM+4Ajj`)Mugy?4UmFFthdCb!h~C)Zzn*4@t*b`BR5!0hAVSTMGJ zDzhMwwZ614YsFA^P#-dhfRRBzcUH=~X=mqA`)zq7U*5XxwB6I14{xo@nR>9Eox8BB zB^}?jcPu^g=yFci3)!fi@swrKI0T;?E8aMiV}yhX(K*XFSa8g8aLxfRKw=KiqD6S0 z1rins7@-AV%yJGEBp_k9;FrQO=1sh?t*Vrv@Wdt06V5ou+JF9=wY~ZAjTN1La@NSt z_fS{$_`L&0VZ8i9kG&)2m1>Hr%a46~>fns_xN+U$_0JxxiP$tg{%yVSFB1a_KGZZ& zuefTR!~28*%hOy}S9!7uS?`GNw=%YN5;-`TDkxLAOJ~3K~%kdsO79aWgc%l z^cDF3d*~OA~r`HQ`U12Ky6@2m4YrekvB$@17jkVu@`e$zy7uo*WwRe2r#?5kP6#&x3 z%Nk&r-89y+Ko<`6rPGVEY;#}^g*iDnGFZreD3s0K-XrGjt=2XNdieOB3Bv5!a!Yyk z(ZyNq>hR9KGS7B&O>2ts)bO}607x07J>bFsi8n_)4uE7CBSJI)NJx%(8p&{mM8aBR zhC4I@EHKNEI02GF0I(d$Ljnws{Y9$1urR#gjeQ-NJn&p;eSV+^OwS#^*woWBu(_g3 zkIxvoYd>_2U$E!B8G%4=(JQOV{><)NwvcPzxqakpn>21%xbdZ(RoS4*XRp+6{CU?H zs80^(qpQs$|DgJyxwcu6667>AZ0A~;+uys!t%Q*F47a5;7urzWswbNP)9aY_)dvaOa947e8*NTH*hS;<3X? zR9!t-UU1Yot+5Fh85AyU4MXDs$k^x8d?o<|-^JEBNj^Z$fq*DvI;y8trQ^?C9D5o> zN;xB>mjp;g2*WHfBfuG{d}fpolrklRK&u2X2F{USm_mNH$jZz=SC<@H2_QzJe9{r7 zN(ewDeGQyaBQX(~)QAKEV+y)Kh=2)5R3rkE1*m0lsHG8dlBd9kO z{Vi3w@B;_7l;(-SA;`5R1y4oo!`-vi26Hzpo!UMtOy&*L=k1zzevH&wSWCCwtzS|NVQ{=Fb^_c;S8S%;WA{-8+3e`RA5{AGqz+lV#ZV z{I%of%`&^M`x1Z=@V(}}^S=)t+xW9j|KT%!y6Z{5^3WG=TiZF;^w+MxZ}!y#W{U!V zblK86I5g|WI*zF4r*@?s*`vImDH@DHQh!zEFP@+yd3}7ORETzrv`o)aQg75EpN~zu zOdQ&~cieB8)?CPDYFWp40$v2jL_+{)&bP+;ZtX;Xgk+fow4A^($G|bith59G2!Pgj z4jcn3EilV02Owl*L0aHMD9E$b5C1ULU~(fbk6JKLuXfCTXRLm-iRaw%^Uc#+`Zv~~ z?Ju(j?>fj^Cyz@*qOegq<*Ci$xMqMPr3DAK=#LJU(;Mfndt`6bqoBU>%WBWsk*3xR ztedPSAqWHnA`k>*vYnG75oCiSzS--HlfrOF0)N%xiEa>S%6xcqO>|1@q57Y5qZ{Ar zlS`wt>EB@No$&yrKw7`CQZ*DHr2$WYO`dB$Clprb?EZlz?fUscGnXBJ>az^q=j!8= z)$+KCRH#vb*#O~%7eH%5fwd+VSj%!Y!k{eTCs=#l@dSmk2CP*OH$r97BnGmUdpRkx z=bfw&A|aBZ115mWn1mnuDU+e_$a=__7Jd?YDHCYX>a4TE8jw+-m@o~W6Jx z7dm=3?QfB_v@=q4`lT;#>-<>z^S^I4U1O#vfe4f0R887;d8p92d#W_$iCD>wVh39k zoqG8Err2Q$g4^NRp~2~=zw`XA_T#6{nDL#{zJ2H0?dNWNW$~|T=bU@fx`ib*{@}Yq zAG>z-Nusjz#hd#+Gq1kormw)itX#C=*sJW>eZTs|Ll*;WeV{n@%olHayX^?mKlHPQ zOJA?WTa;9gE?d^%ilv*zX3mI3xi2lWt}tF}IG75c{;JG>deqs940=W6hKwIigmqi! zIV$%`W%{W@TSsP1YZo4-DyK8R6!&D!Qox+{oVK1I%V162vm7-COGw;uM@Zlt0yyRf z0GSa0$h8Bof`~H*&H=aFatqvY$BYn~)j#;dxUG!3>qO$?0Ri`*NTH~*3CcJ zF{`+HO9Z8ddiVae54y%Kcxz7qkmf93^)_@A!C-poyf^m3MF*#%o99+0(^9U82qUZ$ z{$u`Q`yP9?ENR}^dtB=)_hBcAH z1O&>!NAlN(YQ@()2NF?mplP|#Y>m-E8oI@gf3 zOiK+qo8N6s;s)hghvDomyfl*gY;p6|^QMjnb|1Sp@Xf~44{w^07MHD}lV0nlT?<%t zxb0U`M9-X4da7_*B`%-<1#1UHbI0T+w`26u&h9O%&iU${Z?~QI*7^_pWbDk(UH8TZ zWOCq{O}oGJtryM`$`QU9Hw1w6_(cuSkZd2D+dZ0%`ci-Xim_~SIFtyX_Eu%gD12|xgf#c!vcO*m^*td04*b3mew0+C|MMu5O zkG%fYz0V{ zPQbscd3~@v3Z67ThyaE_LsQ4pcpVDrAx*_d3aE&CmIa;!p9LK@ zhIEZm7oBgIL`xhE<|4+El!O+QSJY@dR*5Fhdf8m2kV_V}PzR2&l5KBlI&51~2?==&I?(M@*Zm_tu$2H} zJVKm48fCGO<#BS%iiwP<)x_0Ydu&XQ7y$x}pe*WCK`vGchE=VENqZ$<%XJP- zpUgD*Tj!fS)!d2IclD)TerPmzSz+rBLg<_*&mRp>j{24k46lePM{hDG?9B3Vsc|yK z-_s!CCyzJ#J`@VreefJn-5wsZcI)f@+9L>hr&T=p*TGKKzf50Y3Lg@z6Wp zy7slD1%@yFXm$AYM$)GMAU%CfooO%JIyS$P?C=1<(Tj(hy0AQy3ff(j9Ty(V_l!;s zn|S`T#=A3TPb*USz~ErTZ#ye*BOw$Zw|k1F~Epcib z+3&&pn{VDQr?hQ*849m-OZbgF-Q`jdFj4&(OKEa$;Vhd&*1+t zTwMt7)886AHWw&kZIV=W!57>EmP;>~xpbt0Ryi)Ryi%2|#SDdrEYGJ5778g$txMqF zjxn4GaF$!i0Jw5mDW6ID_5QX}uB{N1L^c>}t9P5ehMb-2YeBVhg;&ZbX_Z&fapDE} zMj`v&8=vT|F#u}>B2Q2{ky>KScRV}$do$~q6q)k4<6K7>R1BONO_@I?S0Bs(TL~b> zBMeGmYNRSKm7__NCZjpnL>iL?N{l2xRHg_(B5*=hlbq!+y1ZOd1rvAFH6=YGv+J2m zVK+cB(e;rxHy(5CoO>JjFJ=d?+SoBQ3l0fx+ryfl=32UT+fo4I5pesRFE?`I3D%Qs zEp)aS-Mv|TLslo|q|Y5*HGa&9{+wD7-E+akckGyT_CL2@_NCQd{`M7bo&yB%`1(4vzU4L2C zxLlaGq)H3*56+IQg>XxGZg*9V_d#Ld(u1>l#t%9htJnL&wD3Me$^9Y0UW3})~VCAG}TXc_3R&- zASuDiAj#tvClTyf*BFaLj?epUUL$f2mZK@Y{ObDIMInQLf&&}X$-z|RQ8&^JU^)C> z_q`J4RYPeYU*>~v&VMW}3Rx)VHED_aS>dp&sP)hT;7{Zd3&HmWnHAceuCx> zB~dD+CFdvDa3G<8gGy!sCbTZNgOzq*FZjN4r!)_j4pp<>kr|Z(9lu=vXkc6Z_R^e> z&AsJ=p9mEP&_<%wS(_G8%c=CIKX}L9z;t2)%_;C=LT>$7rsi-iEo1WbAqi|HfEcd@ zGN(cm93jnOIS3LWL4FJdB`HH_ARrSckV=#=!zyXUn}TX*5E-S4rJ9oE&R9;BdJCI< zo0MmtzH0gvZH)(`+-KVk{bpB^o>8N6qjCD?;p4aT&KVvrZe3jNKe|$x_FD0dsoi?fD4uT5s{ibgOMkFq$#MVO@gE=g+YQ&ReDfrTH*XyO z^Btdr_l^GY_AC1H6Sb?~*!-vR?Lpzl_s;wD(^uzyzx<&~9KUVDanJAn(U)GC-vPT{ zeddJ&*PCz)ye~a-Uc=U9yt&ccYAD?0!}8<%Gkc58;naw%!0aB&Rn!={4>1OP~Y z0EECSI8G!iIL5#MBLvFF__j zu&0Ru4~B;ht;wAH0b;pIzI-2aFW9N!++!w_q)`>2(^E{xh~&c{)cu?3A$s=l&Coupr4+QZ z7Bcx(Jv#WP-#A-*Q7n_=!r#_s05~V;uLqYQ~GWa29;UqCnf6 zbu1Ci(~dFbAB@|)SC#NMsZW*d$ar^~%xBi``oeAdPVhdPdEv$q_@AI7k7bryiN>A`sbaPZH?D?VFo z*{|++Wy>Aq-!2%P%Z@qo(Q8{D>v{Myj6L||k{1u%^u-6x=y1EAd1=*KxBJyS3f_0> zyhhx}L|e+;ts)rRFV*s82iiKO)eldFuHNdZ)g^G5bXCmowaxi`k=H|KzJqceeF zDaaIZ&M7CnL`yOO0Wo6&2B0{mE+-DJ697OAEm_8e<^-tWUmP&t76|}I0Fv^-IuO=b zL;zyIzyh}%z;XZ#&%3$C8@lr!t55UD2-3Xq!l5&|x7JViE+PCMRzbo>UK7HE)v!J~ zSSv1Hs;wO@j8+0KZ77P!5^}J>5s(N$3Ovx(!?}(LrG$()b9Un8w>=bB>d*mmt}3ZO zaTRERRQTJjc5O)b?{>}Dvss$7PTp{IY zmCjS>$e1Q0RiY3SnK(=}B}u9{1fuy{vw$Hp!V#jD4nUhAETWCMpCB<|xhf26Z~ykr zuE~0?-gZiT+?F~W_YVKGWAoJ~{P)t6zq?;d+M#b>ZZej%7fYda)@f8e8J1U9@5;yj z{>dQ_hI7&=kc@APAM6|M2DWE=e+Jk}05Lv=)TJjy78OVcq6VN$0%br*O7a=dn8=ct zz$D3brBO>s4-RGn#0ur64?Dt==x47UJxb(|MHO6c$eJ%TmLz8>WIlBD7c^|HgK9ESZC!MKb z+We38|Ht*GU4L-JjK7cn zNP?(Osujod%@ciPS?l6zvdW9S(>mLVYuDP7y3t}a&2s0YLMzZ?iD3?e)1G9ICK+x5 z7m1WZ#>SrEC{iXz-}D@}4jhmmhT5}?k>(BRIw`2Te6hEsaAsZ{8HkYe}hY>tc#X+*-%E_N*q~khJY@e7`{O zlOXx8v5wXlghN!9!>I}Xw$!a>!^uL z=e{HB8|E}^C@xxkQt8Exa`EU5>9KDe@vUs3wkxvh7GE|H&3J6fF_|~Y*&iIbvpuUf zd!2BE=(?!?&a0MxZ{&>Te>8qS=dxA*c_F-S;J=5$&-c$c!OvZJ#wRZf)Fj^a>cEe; z{e9^`r#|kq-`w5&z!8?4Ck_FK2aClqrtysJ6@> z;$F#>7duU&) zP&}b6jrO6b!bx$)&dF2~r9-9Zu1#((lhKd> z=RO35T)rt+$QLqMKah_3JVpT$okkC2LV_`cJjur-ogsnR1Hm;3dfIZgJyO3(yJ;-wlwlcl0DP%SN~c_b2KykX%j zgCFsq9DB6)l6Ci;1MmC$E8|;!v?4sJvG@9`&p4;a9uDq&Y2U3I|1l%%H7A~U^Iy6j zoVj5cj@|d}{MAEufAnu3^WnAMxBd2K`z9v)06?nqnnG*+v7ObHqs9%4`!hOHsy0UB zj5&Q#*w&&C_79FUptW^Tj}HbT?IBMy6j3fnEr~>+p@29i11_f%%Pq-3p(bPoF@U}; z9$EsaIA{P5EVJA?0OlM601g(I18x}*0yxJ2tZ>XZlEyJ~!~oC(3)RnlXEez>>|0w2 zt)^Nzo9&L2v!o#*0wjzMXqYY$;_T9A59ijpwf);oB~4y zEf6(;M`cJD*PJweJw_9O9VYCG}BD}CZjdyZM|Cy(!mYt<-;viV}Mr718R%Apg1 z@J)eh&kMDsLV2Ug-qam4!5K0np_J0j(zB{empHS`P1BVMTT+}=%W?}Q)k1fCX@z-x0 zd+Bo5h*_JDxJJTaSe8n>}o{XRPnyuZnan4({yU%*y zQyicE?|FCLME!^N!~5iUUc_|n(C%vPBL4uXTsYZKnWkCc?&yQFS*?Hj6tw2rI!hU; zn_MJ-7*}yo8|Mqf9dYXzIKg?Wa#jgh_cUkRQ#|AuYbfw`Z1;d!?f^g|SdL%-xwQ=k%J$_%knqz^N=YHN} znY_sQtXViH2<7KQhT{e_$=uW%RJ@_Z zNTD(@3qm1NovN&`h6-kUvgl)mN#T&tfaM0HBSO!q45a}OCxK|3kutF!aGqEJJQX5B zmobKAH9Lp{Gkf}?juE+J?Fh{8N~Vj@Z@KH)rQhHOW>5U>TTV){qNA$X8d1(#Cz@XW zd%5zFZ^R93>$`oy&Ou)Kad+JbOAL`~asqtADmlGt?STxil>nF|lUP(nL2C%vMpIP@ zS!^;f=gCwlXTxB)7zHX3&BNic%6K-X3-yLXNuJbZ82XzjH1wP3}kfA1#hqsz)ej|T^5JhN@_yf>D=+<&Be zC)=}U=LMHPysjs!H|6FXYWX0neDuG5TDqnDnf>pKuPL8>+;5-Q`bpjK$NJ90k7nNa zXgc`SzpBfR7&tWj)-_{Sz4=tUyeU5Bw6DA{|FQVJ^QrQ)zP9&je?0MlPXT}Wz9qN) zAlN$D2LNfuhgb@+KfGIKE?l%~*wk&VXV$D-{@@eIDYIPoMs;SUwP+9+3eN!%0T>rp zYXhPT#rec#jAjxO#}GKB9ITZh^5ikI-95?|2R4@O^xMWPT45d5;BlV6}e=5pDF=LsZi3m$f%Q#bWS*T{1 zVj?w=*TkF$4Job9$<#$)t)PsJ3aBs*z-H3aW55)ex}}1J1Xz;eqUFs8qp1iwXxVq2 z1?N1^Faad^*v68V^>FLkIZ^OAbCgJ#)7GdgXyX|RYG`-T^}`lxfjdu2C$wt>);VLO zPiTmwVU9D4MNmJvED_1=1^hCaj%DHEy2TC}oWMi_Gd03ZNKL_t*2(@F-}1cYdvxr^n@#MI4a z)+6re2Z3MGW#7Fe-?Q&%>?>t=bTi@&ICu814~|{f zU0dB*I#^o4pD6jcWPMj_z2hXa@~?mVxx9Vy@@*Syt46;#^3_+oUv<9qhlS77p61)m zjQhX$=yf zkSt>k0gU6!5eOnKobk{CVfBhjs)9^LE6(fO2Lncs?^3QrB7!h#5@ISJgh8U3Xeujg zkO(4B(rIR(t={M)gGvRC=^&RFX+dS2&7>^MGLv#tR&kb5U^II5VCJiTQd4#4gk!%R zXe-DkCO}XEu4;?`cleD~j=CX*A#8#mvK*p%T#dp;YO}EL+f4}wt^@!>t{U9aDa$G@ zD9&q(R&We6LV-`lW6pvHR)E3OdBmCJNQ`mgfCHg*mJ7oe@YsVSgDlxzoZUZubggej z$u8VjUp79poVns}9y|4NA9MSzKJD@->t<~F%1w)I8}7N&gW~;n{pge5f99m`H_1)c zPkrb7mfN4~xo)23iSxjb^O+$Jx2Q$BZ^0jid|)d9uwrUxvZ5x64O5C_lor}WVbbMH zJl5J|!bzMp0f+gV%usoz&W?~Yff10DqM-s<2tZrzX90qu-@OHtNha<-u4;*ozo^{G zQZn$J`Uelnqjq7-farbi|rb{qlc5dUxZFH0V<(3B~NJGj)o%@h_nrBn(adeDX zLhy_M%Buz!pmQnbsRV1m58mDFc|h+-_o30m60k_ws_=;@ zuR-~ZTnJ5?M+I#)XVE7v#7h;DF3p-xvsp0cfd(+u!)W!52T-c<$uVq}*^~2Mu>+ z;+DG}KmDty^Y_1;GbjBUl&*Q{yje%zDeq`5Ja=>Nt+Rf(rsI|)y^TMpedD6iKYrSI z!wl{Wp>;wKF@tiSUmV}^;iuO$Ikpl2D=IR9fQX6-KlPX(d@aerVru{ySb%ZnSJK$me$md8l~&ryqE_J)^eF3dW9H z;h*)h?Z2D2omRg3M(;!ayaVt0E4w+j<_UP0o*l2Opmq2B`QgWA_xCTm;id8wPrYK7 z6y1eArw*L(Xy5*GVBb|4xt{;^D9bD9v;Te7^8cLn(!^dLAniEEB-R%D_J`dcZB-_% zuJ6nFZLKZsBJho`6J+E8fD%r~7>OI^lqZZxP)KDZ(_BQ{`PK-gfpG=QN+eV+Z>-}i zw{}ke0G3+-AV2`FJxhQCoF`(Nvvpg`c&@bmg*TH2a(g^Xc=BM%sPhA{vKM(8lGTmUh{97;ioGj7OnsktTqVFXCaQj6eOL7Gcr z5YRFYtVb#M1b~FZzy{D5;CcPpiKcI1y(8MWg5?VwO!hZVpZGP$?SE}49{Bv_P5-!~ zBl+ghU!#9JH~(MvF8Gf{cRp6SajEsbzxv7x<*T>1->^Vi&WuH#B6ym^ROs)pGq$fP zIkpl2$01QdiUdf>{8TncbDf;Z$t+$T72Wx0FzvO$h-hZzq^J<3i3CB@xb+)ZP2gvN z5EOKV$=s$5fq}`)gRK=M{3pia!slg}-ngs1+}jxJr13)UXj9wF9i^>vr}h^DdtC1; z{V>}=(NGl3J;JOPrIF_2zVhG`v(&`4S;fOkm$jXJ{oY?4xJ!KLxsAPlz57pvUp^wQ zmG3<^vE6&&V0a9@{>MMx^V+PPyHC67+3K}_f1c0pt)D;RgLPeLt91eJl63 z`RkX&=Hb8mxbS?_i=zj8fb@vdqX>0w`*7I%(PmBB?5pKlnh61%K&eDR7$n3vcZ@k^ z99RH`oWdNJq=C|c1!E80f4mvH3AS?4g|+g&pjDcQqe*u zew70dWx#+m5R0re zf@#kI)7(SER7!0@yY#70tQQJ zw0q`FN81H zV3pcu4Nc_d)W#B&s332U6alC}M#W%68es$`QHwwX6CIsjAE27(`9pq+Fkksowfh3u z2!8vtm^RWY`~6(COO=oAcrKUv5atXVwPMc}JZ&Nh$uB~tr*WX!j21rhx&M4>ft}iX zgg^43nTwA5_Tjs?{<`y`M-I%q|Hn@>|NNoSclZm>Chz56*mIoG&ex~!-FMy< zkEPfB`3>wkx$-G<`qc4%-58xh2Y!(m*wFDv&mX@Miu?ZS-tp~b?bw)9vVPPF6$h%Y zb1`m<)`hfAbwBxRt!kkKj{jdQ5rT7c&I(A7BFPyUPv#zbVR}nv zzPZh7Lc_JxsEy?45F9YZJaA5uv650xLFi``GeSv3XZ?hP;Ep(0C%uq!Zipw8vyB`X zkviXzD1-24E95Tj;yk7+{7(^hZdU~sDESW9`jR3r}%e{$t`Y0sVu{?`-w+CQ!FW}LP2Yx_T1{m>uZO3rZX ziu%<0*-zyk{#=~=@lW@^Gg{kwaIgSSUwm9cgYtKeS9?Dt4O5x{Nt+5m2tl?H`_6I@ zHspaYghq2=AfqiqMxLXTydVWGtdXHlD51`zP{=IloQOdf@#d&NN5BjM2LcBO1OUPT zXh2{VnFC-3fPr)9z&QXb(K>VtzySm6k!LBs`K&1w=L8|45hkOUv?8$q=TU02+Os@J zbsl3QM4T|0Wg!zr`%+s&1=QKZAZg0kM1bTP44}?g>p2}5Vm@ihy+{!RX%JI3c;bax zJryPT;AIJAJjXb<9*Ynf6j%-z7fe0QL7o}Z!WtCXcRDbEAx=UgF{GTzqyWwuAZF8C z!oW*1ESn0I%~p)`V&)l=VasnjR{l|Zyx}>f9ct%%kdZ{4iDm3NLICR#IbaTnF~>n{ z>6r?9UYXX>VPDJi6q*jtmM;IB$4~oK9XiIYRaY*RWP1QNZrGOrwh}-bB4eb$L>hx40j+yl z$FpVX;PrAogJEIdA?-ZVVe<0k*9V zGG`UFeeluj@&oyf37g%$RBv25^Z66^j4#!C}&`fz&fy=yz#PB`%Oy&tQde-Aid8*iFEyk*7{nMXb!(r^BG_cPtw_Z%E90Mr++ zs5(%Yo#SKkFZE1H%I8iS?vR8oYK5SXDV&U;wG1_YxrVCInDH zyo%@_upv?er6V99Eu{D4S!%J|vmH^;e z!THdl{ZLgcnot4T}okE;_9^8i#gf&or+3eM!QYGk7dR36Mm z`=Kf#LxLO_r7|>;<46KQ(*ZL!JCy?ik;+^PHMwkd?&{xr)j;0~7dKrjdDUAZr%sJL zJ^R8%?!(V%x{1Q$C36D|ha7-~V#`n1SvfoM4Be)|K(8jlsN`)yl^o`Lp*% ziT+1zwUn9AE%xHKVMC+?)m(`$B)N*qN{#<#9OO3 zo_F#e;%lDxXk^D>i!c88oW?P~++3d~|MAqg)oUjIE%@6HGI;RqC;s@Im)5VZ06;!# zuNpaymke&3aR#$7ppCRXvU^(d^1T^`gsT>_QB zV-_WjNhn!|LRCT@I1{lIiE8ggmg>{0o3_u`xkrC>AURRp1CSW6Fjj2r}nVAQ=Lx zi0g_YFhye`fL+$tMr6HfF8ygeL}OO28LMWqE8A6(|ICat7yYL&`m{H${MAE^Qy=*A z*!}!vpUqjjYW9n@kH0rW4}JQ`^N0VjcWEO4sLk19NRTUhJ-lt#lbXOH1MPAFfhLfO zob%dh3xa$gZ6s1bVdje9l*+vVg1mtB2y+R!NW2%hSCR-4W0Xpza!Kv|dISInR(Jsv zIFMctYp?(sz$$c%41mHj03ZW05O^UO*-Hdq06<{OZa%mVI6?4MAhC!+WD>OVLBr}8 zvPj5=3PnoT5QiJ*r#a#*X~`=9L4r5{uq+fH<{_BC%T&ZpWT7H2O+dmb?T8EHWHSl8PQ4U}$L=w8gBvBR)K!OJ>@^Sh7(iMCM8m*Z z8$pRDt3kVbuwC~IYz+rH_WRoP;(oAVOlP@1`r*INJtuLkJ(t_7_BT}e_~t1K|2gf0 zpXhSe<-ufmiE>3R!aLrWcYfiGTSwkBSv5l=0s;WXGSL6UpgOJg`tk(CJOI_zEMpym zI$8~R+G;D)MI)%@VHBZ2hFnz{iz+iwT*06YIY^tvVZfkR8RkF*0N^;5OsR&{H|hd;RyzODB(NRn?Z5GVP73N51CWV7a!fb>o(nC~J-`@rv=s)GF%>eOc?7 zv!D26l$S95Ny~p#w4T4Do zglx5Nq4${NOe7&No(0+VCH=Fv&Tyn;_GcFyFs{ylU>^1Ke-})7{+~w{537R6N!fDW z)#rYvR;p7Bx~F7ied~Arr+!Xf$WBEDw6h$7@}yaS%(S-{t!nPS<+I9|p5g(G<=f%| z8r9A+_>uSh>9Dg)zOePW;p_IdzU{5swtIB>%ZKj4?Q5>BeCPTu)dt?RE%?`u&uV@1 z*3O%z2DyuKD;2Sivp_CxanKSzx3uKNJOI@>{0`${S5?j^(fPV*uD^Ea0s@~bbwYX2inxcN=%)pKiM5 zujZUP{yuZ6`28OyF4?%pi+x`mnhxJOc+pE&-Ymb|76Rl4POW&$YH4lT{>L;u5IQDS z0)-O+IIFD)tOO9GJb))qvL?}?(v~AMR<>2ECE;w~g%@E%xPTOqwmz|;&kI5CyCeau zBmh7FU_e2ji2;y3NDyF^@F2)5Bw(}>kUdiVdm^Aj|$8GXUf0@ykGQwRYcbCTMu^@%nvc8-?)onvUgb(lD%5$Cd-B2)u-b0R?EFIV4&m zl98y(vJeVve&EF4ukEDu{ksmBvzvz|=k2`U$v+=9&^PTQf6>x&&VFwH?`Ajc`uERI zy7wRZ$2|Y4?r)|&-PLpL+!q&Qx38LAKIMfgfBL>X`<7RB8|#1f=g!63_J4lqro)Fu zT)Xz9f8Tz-T-*SFd{#TgD87AT2zG4faMW|cf_amW6sApt_qUa5LS z;6N#t$(r0e+UPVMpLy+6MU~rs_xx3M|BT{XgLF`QqwABQt9NAJB)MJeKfTz{LAAqX z?KC{%x%VDhF}^ixfB*OMXVw1s+nwggi_ScQK6|KZ^8MzTt#gBaU2yzxoJ--o5EErUPuk3K=%y%z2;07VVlVpWp(F;gE-8`kE)xYprix=|%R9_X*wKXab zD>oPyoQ=zY7g@%Ei*zF!Cos&n)v*Qz;;^Y(SQRRX25^*wDfk?kiE)7&*Z!pfZt(rn znU$!mZtE025qiTVVQ~Y=5OP(J7=Thb0$Bh_TBcQQRUo{wG7#Vqz`1e(+^R}@H9M_w zbn}MUH$3_G!`6kf4$5!&u+RIoWwkIb8Gt|+CbcbYZae(& zItUL<1DF}chc$Ub3z>qFDu#S0=H8c~+ z!m?&tupl`LfkOen9tkCJ)Np}vO&@8p4AO&F+<-z;SsCSd2nrp83e4A*^<4+`}W5^Z}DOtfa>eBBs=R_gpj9kDd1MlbkJZIHQR$h zGom%kyPXZu0cpnef|0hjmATx6H~})!Gy>-!o6_=Mj9fMP#c0!SM%By43mJ%@@>4Dj z1E!&~Kn4IDN##Jv06{mQy&VK9skB#sf=QAOQ7N-};N0dZ8!tKH%18h8(+}l#N2Zsq zxZ-;+9C-M^9aI0d>f-BP`EJfP-u*G!{lZ2Ha+$Ead5&L^NwF>uRH%Q`;5VR zJ}R%i`)P0FVM8WTTa^%0YoG z3wBai2th-h6|t@*b3_!>Vmm7LA`{TOPme43LPeJ!U38BX9+hVq(&sr2uTs=CO~B%a3K@qVXgtZ^n#2=rqH5k z{q=w9m0We2<4?&G2jGoTn0ukZIt9^Y&SdA}aS695kQ~7Ed!PHk>Xui|p3_h^5_ytW zM!Y|z-H2A`AJJ&JYNvzN?oVHSH0@Ul=U%`5zRACQY5J=9N4`U$7{-?#6vzd+>>kKU**>V=chwz%@gM92qGoAo7wW436zH)h~A3RvR&=wxl|% zl&9MVwq?;D54t48Wb1Xqx9(Sw9q-(c+`Gq`Kw(h7q(3@*=aq9Ocst_H_jhtN|H&DH zWv86-VNO17B)KiFUM{6S;MEUXyqE_NtnMlo5Uhm=zf}a*7TTKDxTrc=)J#iKgE})H zAhS^|Q3HaigCMY!B#LY{%!(c04(`*u69*?fTFjc&@lBT}7zaLVUbK32seS#3Aqa*6 zfx!Yu3qSxw0p&`P1St`;ico1#x`rJJXz&Ns)7D&ha|DR^D4c zQeZTYR@RBYm_QjJAjmubQM+jd!e|l}HOILUC^mc;Nw6Bhkp$0xi~y1twDF2qk_1Ko z@R9);Ns<7PKmvq-|K$Y$fkdWzcdN@P7QqRxGGEHGfOF7*Do)dcNXgvjaubmtuyqP4 zl-fF!IV80}rdm0+n1ExU0GzDGzSI`9Y!{|JBqDUtD*u`TCtLZ*H41 zGA`6=OCT?q9KO|e<~=*369XQ=EAK&Ofqo7M>&8p6tL6A6=KR!d6&hi5yUkpW@*`P)3GZ#@QpC3h7AZv zu{PRa;6;wM$egJt)j*M?R5NkJ6^+PB*_>J+OBoni*K^J_qvkK%|G8g2 zwf>&lcgz(s3L}wxu@M=k7!kaX!a06#usqV^P*xt>{)yNrYm9|<*YU*v>-u~=pOVINCqLG8gT--C#du;vZeYI0BS=Xw7Vu0MmNC3#v^sR;f<4;z` zAm#z6^+ScIm_a0&bR%db10slPc4++A-l3>tAn54!0hl4xX`m2WQca*@9#`VH7obB9 zL&Y&L29tY+j{bP_@Zq;sS_Xzj88w_FNwA(~EO;HpkNZ4t-_{tvtpf%+Xn15T)s4dL z@sQa)TbmWQW{P3nbV{+Q%7R5;YKBu!?7iUK1!w+v!O~-1{lS>s{`z#+!u|ey^xoe* z)B2~uJI?8EH_M*<#eLV`@MX7qFjl*4yKm^A!bz_kddeof@T5jzpkv|7I}WIwqV)qN z8|(`AXtA>%BPY_b2GaqGA4n|=ba!9_R zT+yZlT@|G~FOf)#CJnXslsM114k^g2)@jZGDx(DCpoJz8{Nl^{V4jR>e(yN&k~DzV zDwk;4l!V_F^$g4mKAn!^q;0{}rP%QgaeO0r?)i==I)`#7c_54?$Q&3qhQrdPpp3GuBmT4e(#Lz?4!=>JL~E(7vHwe zpPpK@;o;k7IWIwY3XK&3x-1kB7$sO5Ut0ZP2S{iebo*9^x_PF}<@TCJJ^I&jb-xoQ z4_t+JjuDNvflC{9!LCkOE$lsS?sI4VNbuyvh*w{|*0lQ|cL9v~H0>akH{^~kAW#asIw^3b!%4mIhGdg8}s%M%Yif9^RO`ryHGN1We1c)-j9J{)sFRo!+XeCw8N zBc0!}0kT{L2Ij~{)Uyl#@)>&%J>9zXy|@WKltg)Bu(MY-J5lZl$d5{f(W@12F^)vHpwEN>JYO4hy+VrE#N>9 zDXAL6WusFSg}xbB;bLi;*y8Fz!DO*5Hk!qltR{rCS_pF~P-V7gvCxpkhrX^4=B3W& zamRoVfkUB?krX)*x?nKaq{@x7>`Eh}7gk?byy5J9fBE9EXFXGC`{1`PE;?$HzrFLi z^rrG1>X!NYoXVFTGv?`qqkr?+qpz;G^(KF-zVEBbf!Fz^cP?7?^aIf?E#=X@7frv(hfbCsero=OTRIw2gE9~9c2GnaN$H_n?;fQ*T8DA2qz!W+ z@-Q5v!hgMOU;B2e7xMsc*lHWQG}g#6pn@H2F)|IJ^{u+EI6{~WS^G$*%DIbNUUF@@ zsg*P7GFQ&)CQFc;iUE*e%Gw$-^jv!3x)EtttLfb`Zg2aCVD8MPemjBoSYK-7<@)B9 zw&iV;zuKqq*-k_0l;K)*#MZ9f{>inKC+;}(l{JIoCtvyX&tu~MM$@~;p3rmdpML(+ zA6&QPus5$tr{Dd-;A4}&`Y|m1dA@ky?o*yU*=+jV{C7`3ZD3<~->5KYJ|ZUWyxZsU z=7#<4fzW*8%d}D-s-@sm8H%lt&w<6mD*zzhb*E}X7HTVpYbWoVSu`#bd5jh;hk}J7 zmluLe2&0vjQ1^x~#+`5ZR&xWGjV} z1R%jP0C@0%6gmYU2mnYDdh`qcl7Tb;F-rJf6g@nxX#*!khUh@5dPzIWfvkaK7Jw2g zh?1#w}PVHjn$E^C4JHngz%r0E+-PSnb(9@=RIBq<8)>{ODVcoOS6{pB;D6S;x06dRkrb z=$p%KYP+U*Vavm{yU*MA6m$9aM?ST1(qrr9ezoV3SH*Yx8|!mkdwjwt_doHU=N=vN z%A&_7+p~|D*K_90)8^f>|KA__-LktEb?7i7A3N!t03p2-nvoIMYW`1KUWhAMf5+IQ z{a>~HD@7dNq|)%kT^AfJ%Ugf?{j;h*g1GYdX)U2-MeIY_m9*yQDarB}nh6MY0<|Ek zq>-TkOV;%7I570pyRBZ#1HfTWuHY#25~>jtVEq^+``e%=0a4geE^o<-rJPHuTyceB zwGg^0n3hHz!+H}A`(kGIyX!>Vesr{ zet*GXH}xO*=AHfHAAf(_&)cd;!=JxN->5%w+#imI&p-Ons~4TuwKZHYGi&XBWaNnb z$E<4HU2na2#!E{!ZYwu(k|kXZ{mk0BxofUm^8CQpApq<+yG{hUzO-KZ@yxoD1~9NL zsR;^vh*mQJAuGoyrM4s;qcWkeje@WtIT4bVLipmqyh2>(nNzr zax(CF9-1U-qBlj~D4?M{6k#sXD9-XMf z8>EQ@ZJFc8mevPdOH2KS-)9?ujl2^D{+4GEh>Zg@)^Cos<8e!0n)AZrgV&#N#;^W+ zSKF04?|jp*JLcW_&f@#ygUW@>8(=WdpxC#FQDXJ5X3oG)*^^rE>bc!a3YC?R`?f+PqufsodE z5YqV~IL}fEn4ZXe zZ30HZajYRigNdcw7jh$;TQLf43D&hW126$3TL6gihj!>shZ==^;H|=i$=9#FdH?hU zlZWP_2z$CM%Omft!xa8v$92%lkLtOdEMIC)jJ+pv1Mt=Kt5~dl+fwQ@;aP8 zHW%zbcw$t-dzRo4*#!=rrc4$Ery*O(7&1Ap1VE0Vfxvk70aPj2{!v7&j!ZRQ+X!eqT(E$P=184$9NsR2hkWN{_0sznhn!qbEEsMt0gf^KL2u4d6 zTJ#VBQ63O`L`|MT=^1U%v@Q&k1qd7sdu)MJqCn%yV3cn}C?!J5ia-S>G!DFz3Njsn zGf*@rj8>1Vs1H%x);#6=O<@uskP1A74s*1v0~>3zx|anzj2ZX9;HkGgJofyH4|?W{ z1?BmB&UyS{d*#A6R@@U@+|LV27rvKyaA00Dk-kS4%eEsXmyrWwdJ#*V`XU(`xgQs$4L#P)>C}NUcNH4$# z=CiRrjMkX0om+EQ*FSakr_)BK%E;!jsV#Zi=6kOkg?SJ)2^*bFZ6o$TG7A}*HMy98 z0Kms&IFjTkBaL&umwhCH2wykf6 z!}Aj!_~*^DuPBXL{hNQZEnK6YtWKTjfBk-Ct$X@_=T2yT)a-Qooa(kF_wBCLhs#$D z{k-yc$GXNqGtHn7a0l9Kr=k9tw{N(xMYcR~%cc!0N(Ny1Z0kfG4Xmoe52rO1W!|Gp z90FJgfk#6Kx~W5#1xYS)6au`Eo|Gr2eHtMvCzyQf4I?_&!Unu_8m$0_p?be7$rOUs z0LTDfl_%6zLMBlH8<2nu-_qnjfwZJSpan)?Bnb?RND?6{Ju$ASO(RiOr%ozGhz(4< z;*^45xNJjPlO<*lTo{rni-ifmGi4Atki{AZ^wKd2t-JsikrCix%>8bXcqs*&5E-ljFIM#J-e3Gijo9VyUtGTM*6`Bv5BZ-ZcfwT% z*5flaU$@}z-4EcUt=Gl3XSdHk_|(QlKMsC(_sB<9KIpT;jp~{am$qN@(>dqspLDb> z-|gPz=gzuy>Q!GoG2_*%&O4)C+VI;;TkiV7Y%c(vase1=S+hVi%7M}Vc1b>)Hq@Kx zUbT3_+x;rg2NaDblKr}r6*j$ax%PP^t<%zg@{&}B;7AGWol-e-1gVs$d@d0nB-d0f z$g)Van-RZwtKExv0GN^j!^wmmH-4QR!yOx=yx$n?ATzGEdPFo%q$sn{r3J`TD71AP zHPcpFsmewBIPm|9FCKwyt1Q|HY$oV2~CYD`X=K$BBWQYbl-lAIJiXzCaQKBsFGU<%uv+4U|yixzHpp zBr2O2!4*sctwYl&=sHlzp#=7+!VoNqrb3^iCIKuH>jfH5fRc*--nGNkJQ>ma@!+7>sM^L>hih2{N^FLVde2BN4>Y-^`~_Ys`;gB zTW+EIPdVzWu8R*&wp~(q>|eJ}P{q4ny70_%^GC1Ydpj`D;YhvN8%2A5` zbU@i>rHy)Yk1U0vsibnCa+FrHSK6}h$|_GQt2+dn8a<;&TNP|w7u9xdX=!!Ng7;p< z8(u#XKnMhu3fQ`Y*)#e)Bp)iTnqZWaP7+B$QC?sd7(-O_fHEmQ{c5Kd^8m2!3OJk; zMs&&c{&bRTuBj7i#mcE^T?1{rOMOsE0P8~(u3Qwk>MP_Fb`2+(xlC#*ITs-|-5?OS z@btJJ{HngBV4WG5RgEqkfAvse_Q=L9?rNF&>A<87v;2Rj;JTe`Im=q-u3giuw(lA| zZNmB`VcrN1JLTm+jLXs$qg(p+I6xkG&EkdQ?+A7I=D+w~Y%2b;IXi5+By`NDS4+rR1mh3y-*7-&&hYwQ4E#EL!c`}FA}{}!kFy#>47aM`Br z4J8Blo-=A7Y`kS%U7s|XDGxL4GpT_?$d!~tAZ1fX8%m>%R>C$|X#h#WAtO3dYJ_Y(|MGY)G2`>RGNkAZv0D!=N49I}&6cDlmBQ<;Q zUJ@X%G$VdV7*MqXzGZEwNLr>sCXG^7ijWO@bRig#Ln-r4i9isuwnkWAqQH5!#d^Ve z1mS(ELy}Aem2(P+0~;s;WQU-dQUUyIb+wwewqz$AmuqD}D5;$A&PKUXG~BZV)sA-Q z>|Or&*6e+DTYvR+`#!$!O}Mdj%wKOi z;HJ3r%h#Tm_&~?G3wNE1A5ADP+llo+(wajf))8&!%v{ls_7ItXENYUFvy43Ck;+&| zooO2`-`Wa|;)(;}fue$sOxx6r(?)|M?m$(&*jpb0r$H2ze)W$|FXjPYU3U1MFmA77$8dN1z#uDh9DuQH+pp|zjixlJ z?QyDmWn3b!3bq<&C90+s$k1gZ6UBgxXt-}2bVG88M(}I5rNgCSFt}4cU%$irt|{yH zt!=-o2liaKWzyO?#Sc3=SMOExrgz+TSM;t-ckJo^(X?e>bqr_mvBy38w=tr&W<;U3 z=Wd0=FYme~e5yY`;zxfQcy3$Aqw!cj>Y*?CH->-O?v)=_UoS41x8;(?@Qg+JUmHJI zy7OL}s?6;o1D(wp{g08eUhTNy*GC?Ew%mK4o6MotUNP`>Pss$e-FFyP3ZQ$_VEfTK zRJ{{vB#n~JYtIl1m-|5GPP&*__=tgXU}UAO_L&j}An>3>P9zkE#>Hd;#frS}6f~vs z?^ZN2NdgokSrIZIkp>`O0PUFs0uyKkZIMA+KqdeQ1Oh+^0zmLynlX>YM%A?rod#n# z_N;~RXar~ri4UAn!U-g00>PfsIA{cvs|+H5h&ZH11j6SkGJ&(13}RxVrC{NR0x$*H zkdPK-S}-=y>Ce^-HC(C9oP1Oop&|w;z-x!jN+pMQZP(?`aAYU6DZCGEa7Rm`u+^BAve$Dc|zuFnfc~{SYz3WExw9S4`O<6oA$ScEB zzO!s|x9aF_oV5M2uM5?<^@!bH{IKNeOUAdXKWeA8x#w(qfNmZ$eB!b1Z1~NPz!|AXDqsKxKt_uwND>1e zYan1`Kn7$23BZCBNdON5z)XIy;8-_ok&Or)DE2^6L&~smJ=WP@(pg=2?ej0V@Q- zfzP3kTaZXynU13#v1P_C&wct(weZ#L5AFD~Eeri6*=2=UZ(sk^hU+i5V1CD~;@vx+ z{ID`l@Y4s61221{f{)ZdLB{pS8)a0QAZIdEO-TWHO%^Pm$}*LB%yeWp2z;hI5({ur z19_wKmZ6ngTsvhuUOjPJTPIh%8kkh7I4^wQWH>mu9oa~_(Av=Iz)(l))3TyMDCTIS5N0jXCF1R&-C01jh24Hn9n6!zXm6zV>0Un zrENp9orDVo2R36Ac7fj9)% z8Jkw_+vt{A|94hy+$cN&%8tA3 za_+!=?w08TCmizE4KHom?xArTTRZOP>+SA*YDWP0!`OEYUblh=D)ls#ZH*BfssR>9 z?)TERCntZ`ZC~1TzcK)j?>;Tll*e7`(~ z;VBM?SSVzY-We&8NE28NwIMH*Xkh4xrB{Yx3)+MZRn{WFHJWCy*3-AsK1<)!)<7^O zFbLnWoAWRL038A-2~q+|1SSbcUa|vZ03@La82%4|Bp|xtvE);Tw1JXKg<$Oz0Bi50@G*PJB4(kS(H3N?FvOaKgKRRa$4 zaZ?6#Z%=2MLnzzKK9tj_Ev^0f?N@!(AW$i)z)gw=s-6)L?1h=tkK#%ys@uFBg*x2=CY?E2cS}I*$E-p zN8Q_Gr_igW=X2h^vEzcHH=niRBaaW=puV`};v;jlbf)Van_PO}an+~+NmG-|T@iB` zGfR6rxlHRsc8@9+@-<_Ch)gm^Cl&odPgiA6*pn z{=!rvq1wqz-g6!ZFaa8kx{9GRUNKwNjer`Ct+B3-^*jTWXMhTUnN$#g057G+;gRL? z2g?%>^8kEtFz>9?>ktH?MOh&I|4{`W@OqZ@{rErE^}WV@-_P@$os$V634su%?65@? zDWE8|4qR=ld(~C9b+jrDwAvQ^iHh`RtvIlNf&-PZM+k{P2HE4}oSgNn`@XOJ54Nx0 z>-B%X`v$!tchu@kENpcPp;M3AVraf-q1U3?)@B~64;?v2rmT;!Dxz%!yji|x@1Za= z1aBI?kjJI%AAjL@@7`yza&l@o%;c5S2EsRtUv{A$rSE z-`g-C4((rNk6zWfdhE^oa}%FAb?VLIkKA=%|CH-b#Njm+=ElgPVMe2 zwQ7lm)IC}=eZgbL+_UDhA6s+DN2fpa)E)Ud)!X0x#;4zZ@$?ktK5^nTYD-rui|3D) z2&UM5pgQJx$MuLsOGy}!T+XVz*^v>A%H;-i7<8U7aQG-vVxVZ52uK9Ms;Hs0@ATVy zds7BZ0)k-9ID6M)uxOk&%b5dS%%Y%$oAPHx+h>1t(o$$m%DrLhsAUdyA$+B3vA>&jQlkU5`YLR8c zeDYCnUdaeVW;4M8nB^@~er5kuiPmua+Dk1m9*Oh1<&1P*NRWSYc!VFet2+zQYc73* z%D#B@IiDGL+q~SnP&8D}>i!@;e(gymhnLd$s6>zDqAv+$qaFyg5Qb%aCdio7)ZS(l z)5*KUYJih4G7{AioDz#EBn?=BL_r!fC-?W)`K`xim9{ejMJ<^ZCUE9lo*Z`)#CTV7 z*NMr;(xMY$^uZ&zQj5w85?Wdb&6=tXscCD+dF0G=kCO7ST{+l#AT>KvD7oUu8huVN zWj&8l@F-gS-ePoBM!cSjV!>P-RY}&aR|1g(g_fOK_=*D!USr+^Fvl<2L|xN>(S`=y zYQFX3SKhd*-gey3-jJxy_ZKHTKr0_Rk)Bw5!l88R8IK*@8CD-CE?c&7e?GByH2QrZ1m%;4R^)NA91x_=ghpL;pU=Gryy2jh$Grs>k{TxeW7a z5Uc2;zmxxG;Mhn1oqufaEnjTdf~RMgd=!80R`zG;UM9plMY8%D2pQu{+Bk;02Gjez$FOOJ@K0b2`ChG{3 zLHOjLg3!?kpOT{3yMAJB9t)cO+RMGCJTPLJuUO^-0Ycn+Fk9;e`*PvnZ%$m0sOy(~ zdHGAqtdj;!?ju><`|hinfodj!Abc{Jwk{Ob6;UE3CLALJ#!4lEj2uaE>w@6|GB{y5 z1xY4Lh@eE4Q@3CV0)YunCXc3Wpr%I6fQEG}c;2^Fs>(<{kDYc*ESR2f9Ki|6xKJ!H zg~YrTB2i+1V`Vo8SqEzf@WQg3raJ;`J;1AutsNicWu};@q=;=UERW#cW`(t4a42@m z4j-`mC|mV<26+($tcR~S2;fF4cuWTZ%*pu%5`LuP1+~QVB5(iU;{W{dvgxt0eM=A2 z!&OzD{!n@1=2e3m2A0i)6Hoi&+_QYi!NDuXAKW)IIa<6+eq*93b{sl3_AXm^ZTM>T z=8ZR8u#Ki=XKt-xH8<7o2CZbiaIV3%ogQ3LpT6!_Pd~5y$vX#bR0m)7 zR{ZmUFMew4(!qnTzD;kv;>}NfCYE<>d3SRo0Ha@AbY%2m&qTFm^>G&5*FpdUN0C#* zSjM7Nz7d0w3@IcI9u)JQiCQGcJR+2{bDhcgc^n}jNKiv%jbc!dLiuEuyMYUKKb-|f z;Pr%(fB@hD2#@DD3BUo6&O!mOP!>lRZ2)MaafIrg@qeAk`5cBL3=)!#Q5qq==XtQA z%7cecL3pbH@2MAtLu^Y!BWLQBG$e}zwX7Vjc}VV_VH8Le+CI zNQaC^1VZLLkB2>U@TZ7PB)skj8sTO4`1u9zJ?Fj;WoJHnyji+?-+~NVFa6WL({>cA zHmij*@uV~VIB`%k=LfD>cK@E%)X>;lZ{DnPzPr1!D*v+a>g_iUzU$E&PkZ*JCwFb$ z&W{^=dbaId-$M*EM{6`(wRDfv#~t3&{GNIfR%Gx0#_Gr1#-GUe-jNr57fYUa@jGt% z!|az&c;?+hoxAS+oPQR8(U%OD+{bMD0jxf1ZLLM0B}8Qt*VGreV=4IrsgRswNpQ~* z1Y!h;tCY=|{=}i#u6NSqgtDQV3d4FJE^JxyuZv$XswB&XUjjjI<+Ef%j0CAhgREqq7xf zjtMuw5;b{bhLK91JOpRtWa>m-a##BpZPXO4j)^!+s3b3)ivcouun0+U!AzIH)&pX) zQ^dk@D;hD2OVy!6WwT=dG-59k5rwc^Sa16T*-#=%WBU%sLWER(P7Z_7bKakASs(nl z;ZZGoXrWseh?SGayzBJG-*@7v8`kY=+0Bh5*&ohM53F%h=RDgTor$NO{fDWYSU5j$ z`S`s(HL-Z$Z~pc0xUoAXj?dfIiC6vc-%fqcuWvbS;?l_xOtcT}56<=*su&q8=4b4v z?D2t-iF4lb=6z$=9`o)s`P#R9_QcuZ=G)b;zI)++??2BS_|9>RN0OLIAQTgHD@*Bfmo2lN>a+GRD+^6H##m%OZ6uHjwE;iQR4Q9$Be z*&vc~h6Pd}nVbbIHJ$8%GsXfsP2L|UxcYk4Z*`2#4gfvawtHz84nGhnQsLIARbTL58zt-OqU@ zIcne332=$JCc>D|x4HD5Q8F4wcrb9mV{}2mhmwgpSfPBD6XmKwQUItUC!4e)R^>vr z9(|?&TMzJg7r7s0(;~0XTto-w)ND-wKp?9KxycJI`>1=?=!P7{VIUBXa3OPfCWKO< zb~CXH1dlWJ&4N%^ZyJWg(YE!mwYxrF%^mEmEIOS&8hya!@S-}c)%WMFxo7TxP-^Uy z;d@Q7_vl6Id*7U1L3cGS+Rlb28rVH}>(6dodAQiuuDlp&N|~`q)l&I>{*6Un`R#&F zE&Rg7$e#v(H1Qtymf!EW_7y)$zZi9AU;DxD?0n0>QM8o|M1

nFz&eE;6?`O=fQ(X&HP;5-`noYIYy4A zHd;~>qmallQ-^M+wx;j38M{DiJ|w%#&wX?!fx%|evm<~5Bpe`}1mL9b!V4VX1^yoj z0K5RafCnH@UHJ9VrA7@h1*IMp)&<}6K<0wW5{r?0PC+KgEsJd~3$J~bm@?W{4K+I; ztko3qJ~Agf6UsQ9taK^lmQl&Q6inJY#t5X=zUw3M;ZughdGwS*P?+u} z(QFSka;gTpc~MHo59*vVp_BEDD`#X%A+fpr1{dKTPYR5qJd|@7Shj{A)konBr=Ca%A992{j92bT$sc-q%^&z3B0S=<|Ol(%(9Opp zCOdF0C`(-s#1I!^z*VJztSMwFj&@5Pd90=8alk5Gib3DCs#ljfcq7i=y+@BsXaE|) zudElnZ_R%m=gC->C~{EdwNOJ+BuJM*atcv-ksR@q zG7U-0XDua6vzeNlbg^IT$CJ!W3k^SYpRMe8Qevezf@b zeJ9MFws(5>xf452+Ec82B%C&%I_m6^N2={ZsyMDSuu;sHE`@Q-`{hRge%M-Qdl^p)6z4B;pLAt}r)WdmhC=QRpm} zE%Pgf)5a#ALA~(bGY3e(Ne4(L;UED)z01B> z7tX+Gnd(3cECtC-)Sd|zE9EloZIKLQsyVkA3k50{GYiE-0Kp7|S|kxY1cgA59>f#H zXoB!uS$IC!<~s2ZV{`A<=BLYWlAFEuDyIORL;`1}j@o;{5HlwtJRzje(AF^?7^SGa zAHcLO&`&W~Ac?B0kVNKdp`(;=M#L)gdm_5f_daDk?_w09+A~(b1yU%jYUsXbp&JTG zBnuN%aNTHYQ*&3Fmgev*pmM^v6eXg}vb-6%;(@@kz#|D=Dsq+5QZcO|ge_IYgruXyMJj7)xwDz-=kv7ImWRwu@ZU?aDz)!F`PV(rb}(3a)_D zJo?P?#a-@Iw4B?LH+gTMF&T$y#BOZ>(I2>Y_x?~;Loixd_?7j@2iNTW$*=Pj{Mb$B z&Tr|jo!)kIJ$ugPgJ<`;MxDH5L%DfqptsTVAd-B~+ZrE^T zEx!Ks`^S1muh_KhL-#*?#`N31F!h@4bJ+aL`OnNq?tXmFlgl2PxCQro_H^pC|M8f& zP8|Ccd}HSg*!z~ESYE8X?S|n7p!V!Py%9M%@CXFr5di`JBX|KX051#y0soH&gomgu`zNEAaUti? zi&PDp1S?cTtaBb1BG3AQtS|X+Kcz%LQP8$7g6GU-Korh3Y>>h`q54uJDkB$3!iOl6 z?Kg#mK;{Jls$$2DovE%r%N%~?)wS?EOFSk*Et!Z$x}?B^Ku9s8DucFO^IqhNypfcg zh6|RFu*AC!YrQ3v5}TK`hU=ohe@VD%@@=-a`@VR+#O00A}%d+cpals)SJ1i=)e+avN&kw))zH;Z8pT&E}uKL9A4}P`t>HUpwj=ZJS+`O>+wb4&J@R7@K z^UuD#`lg@pu~l+n`*#=Oy75oH>F~d-{rG_g`vA=13%m#OczVXZXw{$#JZ3&b4#jfv z3}ts_=HPVSAqTVLWdBqu;sjMNQDLaQ|+Z}|NH5W~7 zK!45AzXsztU2j3M7cjsRE(po1Sqcl*6+&f}U2y#$1ibPS>4H-==;!IuyUIxyz zHAKwJ+D!MRm9A%_io7pGpC`-Xd|5D;$VE{o4X1oVGHsaac!U%y9@|C$foy}9iM(_` zvPeFFY9gKsIZ!A?CS&M;H(0Mx=;3C9BB6U3(;*fuUbEU4Z7rNiwN)rt&I}279TN;s zOo}8zQo(Tql>)d(3=6({$5>+P0T5H@FyE5`+QeXeZ7N?_Lohn%hKwOjj_> zbB@ebgK9P_V0~;NHiJLZH(sN6?DDjI0S#Ra>yO$6C>6zx4c<9uV5B#xr`3rt}`|&S6?daXxuKm)} z?p+ss^9eb5_D8<+#M9RtJmt{*AHQ~b`q25$fBD&;|6chYE5={A^3{+0>fZ+4_nM!+ z@6+dX-!CtE<(t~QMWY24z5~_k7aa4B0h??0D zJu!Zsd1|gYaq(SRI3UuDH5m7Jj)MdQJcJhjK@boW1>%l)#B%~D;D`Wpob12$;}x?( zS2BQE8KGjND11snbaQ1KfV_@e*)ld+%-e)KhGZ!O*5GB-H7T+K$qGtfRO?uCs>$Rk zf)Zy=DeK%}l_9Ts!@IuOo-yXq`H5>^lC0#=WWi%*L$XCJAh@FtZBB5C&<6=sciFIU zRRnd=jjYoN8m5-It{ZvmCkGvs6S;SUM3GDCh2%`?qlPqwRY{T|vRU*#2A(?~0!uZl zmdJ$86%|5@6jM-vG8vN+sa`Z##AePVybBJ=L6Kx|B1oSckhwfmxq}W2a5BWe5{L^c zVki{=hu?W_*ty}O>u$UC!r9+l_K|P;@BQ2GzfWf0-1nyO&d>S#cfa86 zx9nRwH~f--|rYT1BCBW8?uj zo_0_6&!Ru-2o~XcfYh1-5CC`}5P}yT|4VoXcmb~g2!MhD1qw9n^7h~TZG{*Tl&@O8 z5_yCLi^|6+xF;4UM21MBEP^wGqUTi-%u+CEkh$dHm3C5>bqq7@a(-_1tjnqtB0sRb zIa^gB#{w<}n`$IVEq?I?wPz+vfBEd5*IZ!<3L-|XJXNj{6I>GH6Dxd~B|S^Bt||2R z7QUii!uOoCJoNb>>nCR#35}98S24x`cn05((RZUef}#@vR9X~>F}V~W9n~S`O$Npb zXh@}a%!zxza{-kN@zl(VCKFQn;0eCEcj%c~plh6-rd6Kkwlk1QaDSf*0fU&=;28(Rg^L8M@-0 ziF!dHCd!%9c1)hKb?ErnE`JH_Klq!o1~;8?(Zfyp?^Ul`mY%uqvEfU9n}lhr001BW zNklvz!S)#ooc>SK4~z=vLQ<@!q=-`BeG71u5r+TgEw$4!&J z`F;A}Q}4VozHH(AkMZW2GvDxu&kcQX^Xk`rE_}H9_EDeZZ$5AIr}fe=@BMiB)NISD z?wWONa9&qirqqj1lb&^FW)HWg`_3u}0n)93kpWF$+KC3Y-Kt5w(5}jk;nEDR^_fz7 zAQ#-t%{@9{W=~o)HP{p_*~)~Zls!m_CZDzIIvX?+B9#@SCrzP`_3_?#*SHPGMUqnMF+e6A^ z?HL6xbVnxOA$>B!4xB<%bP_V5#eh@6jlv&qAG2(4$Yh>IvhcH*= z-RTm6LVMjwkMAI$uwKImEGqH&(|)x02YZ%?mxrG&*9TT_;rq_raqyHq@tE$McqM;& z$D?PpHmzCH?rf@G_5AS-S&%ghq zn@>ukE57xu|9r^4{i^RB7d2m)HeSz|EEW0cVBFW{1JnDiK7O{PRK`FofT_nd_ij`F?5U>3+ z!bnkjVUc%ZYxmF7u4C~5Fb;BFgpeQrE($HOjJXC$11^JOUT9HSSEx*J8sSnhv7qM7m{`Hv zt!RT4kS64TYZENzELu&$XO1zaJ-A!UdBj1;kyuB%4|YP2YIdBmp{C;#KFRU^+o`et>^&Skp? zpE~F8))Ui?qsE>RuiDvbZjjBmR4y*=3~$bEeMp^p_DNyK8N=-#UbVn+f7JY!XPY-k^&){o;S&-$)gcOL$A$3N*WK4F@-#8>gP{+uiE z&7b~!`dzETwl6)2gAe`dZNJWGz8IW)1c1>7hZh{iijNC8_n*%gcW!YW-f8(BduJ^It7XYH7nJJ_v2t)i8;(z7QF$pr~1LLdsr2b)EQ zKi)le>j6yxfJp+PLHM%(3Xubd0FO8#5CS3K&jKM30-@^x0YdLxZ<`wEb{pNz#~x#- z(@@)PuN3dOqpnLz_f_0C+sX3YVH*b6`J=9sKL6c!a&W|GoNQn6sd{QG>c@VB7>pM- z^7f1Vt}cTXKX^h}A3TTmyv{zc+&Z8A<_{{&94`G8Q-@vv&OkB0@#2cOqK>ILwr{-m zlHicwQ%{KCxo5m^XsO5{Qc|kwk&n*R#=`ew1U&TzL`9T3jw8z>m${TW=8U9u4bKEs zI_hS>sj0~`PJ*NtS>{tl!3#qoMVC^PuAk{%XmT%ol3do(2U5udDQf|w$Wu&BG1E+N zQl!LtoIoD6j=qrsNL1W_Jm-w+BsG{1&;h|F#TZCrjc?yQme_hA+qkDOyt=MzXT0td z(eVYor&K;83M+a>^NNZXv*;JpT?}?hQFy=(VW)=xHC&Z*=qgZG0LGuY^6?!66xQni z_>o%v{lzz)`>&ghTCI-V(OukIXr{NUYCW`g@v|5Ae&2laZ~5DQ!mbq3#i;6@depPm2Y_1EwI%i*a{`Zr8{W6fP(scu|! z^X#dAwfjAb-oEN@vU=b<@ye54c<;N%@z&1z?(J-H?H3QfX=;U;A5GF_jqRJE0Vs5# z%~^{!PT?<4yKQHvjyXmM$}_=qs2E@>UsY9YT^&e;=QT8mhRT}SnsVZy7-6bLKG107 zgQ_Up&K!DaN)e0_OU!R>D%0I}VBOOb1RdaQ%90oWz=6mS!6OjJfrA`4B1Z(I0|E#L zgh0U>00i_t_{I*Kll6mhXRom^7~c5L)_YMcxW4ej^z`Q@RrDUtq5R^yIcE6a?{8Bj zg7=7VZr!J=cB>x$DHE_?5+wza!(+WX9d2_T73ldQP_^N>PVs7iS01Vwm3 z-toY4tE-?5qtx)RQWSZNCP{J{JUL-);kjdF5mkyJ0ubCN?_Gn7Bn2AZx_d0K^+2|9 zTWfT+n=@Eg+ot85(@a^YZUX|PP!@%#{h*8WaMup0Y2NHvku^KJyZ}%k_YS~NK2HG5 zCCeV%A%K?kMKHj4kKXq3ufFhy8z2viuc}Vlx&4gd;o;>QveWmTerVI!>K%Kl@U^4% zZhQRk1?N8T4~ur*a?OeJJZySis}#Px%_mRzTDd11ojE*M9blC)yBRFlPioGzj34 zM-G5TAaX$U$&CN+3iY0p2$t0D9)bZ#+bE{Lton$*I8=u66&9wTZSI`qWt8 z=W*@}CV3AJVW&U%@~LF%qi?@6uMEh+$JsMJ({@EU_VYjFAp{o_d~ez3c@>2E>HUf& z>kz?x?LWPymn3d|BnvZh^K|;5`kKYPUJNlNWgM}f3r2}_AkPsgRF+Ix8X01U-bt=G z1Wc$27S84>Ny@cSTnZLdR2(Ekp`_M@)}*4>7D;gmE-~+8A$s76&C3i-MF%FmsD<*3 z$V3b_=e_L8yyUrM5WJ3o5%(@PNeIIb<6!ep4#$o5jLJsxBsv;Y^T8fi@=WMZ`@(vj zF(6P%6f##aqipIY*)8sVvH)8TWRLGx<11$)7X!h?UW_7ksmVF_LUo`#_eJ#0$ZO#( zhWkJXl?@!)DFxsG2prKu7leQdmOQvi5OnY@Yk;wmUU%*FfAK)E{k~=O?s3ck1Dyu? z4Yg$L{eryJ)ye}3W}tnr<-C(bp{DMXzGt0R`}`j@8`|KKOzQ$J&G z-*pFl>esJgE3S@LH~y2of@bdC_?!9kl5fBB^bhWR-{$XJ@#(3*J+?ls^*7(Y{X0M2 zb_ktZxGdJ%F&)Uo9s4wt#SuoE^9LVm%d1bgb3gazuIa{Zx7YREZc&VK%Cfv@v{Xau z-`{W7y!K#cX79wT7meZOP%*r5Y?&A-tmjcMPj+u^hsMDejCHlq^_CS>hyJ#uPv8g+ zSLMVM1p)|1$Pgs}jw5*FG2%HqA|Ubz2`2+k8vw$K`49Z{?6gf=Ce&%iS_$NTxI=|d zo95q-SKxZ;D=F4Mu<+fFo!{+pG5)i=iYigi{2IjJEaJA=4So zzyC;q5MUkm!j>OBW42E8@IBnk9PS9T&RbUXLraFJirlebl-@<^yWlz@g4aAI7NwKk z7T&{U%*P}Igw)WQWCr0tTt;8Wfo2gJN#;C@Ng&z0A5>qZM2xFiC_^Z7-9h5KlZFDM zWl%9eB*Pf7SQkoGOjt%`Ms+A+N|DE;jmu#YRC3-N8#=6(6lzV(C-R&JmcSCc6f#1f zX3j{YU=;O(^I6%esgGn>Vj>Pdx+Mo&4`dtnsPW}4t2;xp;1$q{eLGnAXgPQVj?SIJbKSJ z{&dO>H~;YEZ^38we;fbw%j+uhkssZ({O_kfxoGyzKRmf3cDg^uJKMK>;f^=_?fjd& z=e+)bWB&Os-uQ_vS;?jL(Iv@}51F*j?gliDFaRzro@ncntU?#Rzr zPHMP_zMO5(b@%tXt+V3O`x--|V=bK(q_Z798yko*v7kHy?KW(|$UN&uFey|vxAADm zPTEuN#SttSXaQ1zcmxnYfvgH31c1On00#$R6dnLa1h}gM;00mk3s-HO5o-S=UwR59 zCz`+gjr8!P{OCfHdpi6jM9YIu7WHRNoD9Mb{OrD_OD;LY*jxSu<2!ovKOY(J#vvq+ zN_}E^x0go#>yJ4C3m!3e^xpdXp6@gsxr;E-nZxW+*N(TtFc+S9H&!8po`H^aoe5Sa z7|rSc!I1&dXMk)$WKbZ*kXcqp$QH4DAaTW15V2IzmkkS^yjC415Frv13oTS)$t%F}WVB<^qhr}bpTO1w*~UF;{3xU9p6ch3S5hHH<+1KH>%FOZws{7Q6r3Nuja1o-0t@_3bGWycN2lwbeh4lqW;m5kIi?8|iOXq*E zuJMQ0?SJ5+r|0&aRsCtre0S}Ym)^c@&z{{EuYBZ7;tMNZIoqOpFUY$OowA^3icq(Z zeBjX^yYpY}z2NI#-Tbz*@jBcRUpjR8-u`R8xL|bTvDCq>zuI@;s6U?j4t%8i`iCEQ z@5{TdDZh03kM7)c=IG{B`vQ`;l`K%lPfhiEl|v>5%6+Gvdw6oF(Uin;MqE8p>xE5tX+bRJGrKY|Xt|)kg>!;B zyN)q=+xGYD=fVT5hSq@^!4dw96A&VJ3^E2hCr}^&fe3&^AP#g92mv(nju-DaRE%|J zF??#%8lqo6EK^85{hiv9kbl=u`rF$jCgw`C$FCi4d+A)?!AHi+#Cn-! zDTYjVC0GHjgi=1&ea8+a zMD#+#vsj6&V%md8VK|Wovji2nXI!$LP_=YPFcw(~LevVDfl^R{s;C&85Qf;ih#@)e zP^O;YmfIkZMD3(gEONML0*N~I%jj)ss{mMGA>v>b!PW!WrY)_-D-7>@Q45QROjz*w z0na&)oYzssjOXoWp%iaUbCC_UHy^duM8{7qG`CR;wqM4L)_sH5YCC(lf`Nbl#!;Z_R!^`=Tt)ZNI>J zLA_GRAd*hbhMX^X>RS)~#b-Y`dG%Glx_4KrUSJ%8p(#ipq;0y>X<#)Z0TKe( z(2y8tU=c&Y|~~#smLy>K875bFBu+g)M)g5a~s}h zEn>_-+j6NYB@0bT7S^GFQqGh~b#0lJyz1yO3frnOT<8IQa13>ZjY??y26vFTUs<*O_;J>sNbtc6@K~e*GXP zaG5(W}`jX8Y*7S7(i2!O@<1M@EJxJr?&^~b+iq?~g7&iyp2I>+9n1*PY~)xW)egdog&PHE+{NGZo= z$8YCRcJ&8Nmsvo9IHnMYXDr88mu!bYK2x5hxw510*?q=IDK zF+DA#6@&k zI4MZ5v`o;sQQ3(`sQ8+`$<1k-_PnYa!TV&T-a~o5aXyQzS+fR(g?ZFW4spmt1fWE~c2H5*ZLAWmlQGLaI&{ z z=G;}kjE8Re-JiSafsobFI^9JzfS2b+J z3zEbOl8wnmC6Oc~po1itz|cwa7|#=+B$Gs%-Sy8F(ryHj;sZ~05s>2IU=mzCDTOSH zv~H45St{}{i$D|60-gi5iyp;S-aRU8YvSrhM#Lagg2Ds$jWsvhY5bXy5*1KTnt$EZ zU;4~=YaCw5XIF2#14hS+7UobE=v1OWibbreSuFCbBL&xz3<0hbDx)O?%j8rNV98PB zNF;R*-a?YoA``for#7>4rB_iwiXsL<&@EPR1(MDY-&=E?gD} zt)4{Rw!wM$8LP-9!X_=HwIy|QDU}uDPB|921!mErO-Mn&r-aaYFmeQ9WGEF)Y#FrN zVTi*(apX+rz}`#nK9cKrQidZV0&~jOFysq0o#$4yN-68~IlM$OQeq^?#AH)S<;7Kp z=eb)B=`UZjNR@LtQmtaIS-j`!zkm5XpE~)y>#INi>nqnzFM06VEvL8tZtwKUBljsC zL)XnZGXpRB*cLVj>$MOZt*?v4?w9}c>wj|3TmS6+PyR~(_}hQ#C;yk1Ot;(*-urJZ z?8{&K4-fqI_zQ1&=YRf(gST9I)OY9BEj3E!nv@c=w##h53~B*W8#6uW04(350p-*A zGqe8vH+|`JnO=YOm07=2R51(aN93UGUF{`tCb;hSj4s<9%Lzzr8=P!bu1VL0Dd|pk z+fRlk22*k0*{9Xb_0ss14#l~X=uMXIII-d3VDSP2!1sIJNfq%t1w;iz6_M~AG64}P zs-6y8091qmAOR?>z4dLO-_^094?o?7Zj=u*07*7x?)z`BAAeM*=D=-|7*BS@`$C6^%@@np z9_aLy>{dvj7sn!JC8ixA1iVFHg7GP97LovxnS)iq8=vxg<7MWYR3auB5(-sIBQgV| z#7wf5~^}+3Nj^0 zE0RG{xos1YOU}W<@LZ#Gqmq;+RIw}?uetAizxLG+ysH0+LzkO#_dmP!r;E;C=~@{J;aUsl3#H>$QMiJO zR;Z|gXl=Fl|Nhz2f4l$tKJwn9|FQe8pSkUa|MJGqm^}~me*0fPcH-oxI={Yo;?*zv zALrHjj=A-@!l@Y-0zhEzl_`UQXTvopI^FeWPGB&+We=k&-Q49fv;O_pJ#oCmYkIs> zI}|OX$&XZ3*s(J~CL}>|^}?=P@9B=(n3E=Dnln39&&=g^>*Bcv@rZ%G>Co||0ol>X zn&z&Y9PVDZ<@V#--kipF46G@8$pJ|sNhExmjY$G3DoCe*2q4)GW@|9scmT#5kIu|^ z{FnV@N9VDA-|$+h0e)l zyze|%Ly!~??jLIv%BdONuYF+mzZ#I0A?hsIo0yU!;f%1g ziJHAktdkTJNSGv$DHB+V7^9+4HpG!QYvP~-SlOs`l+uS@@Wv|uoQzcgMa+$mxfwUJ z=mKYlmI$Ckv8Cv8GmUK%XKW%x>MwFK)|OUDDUww&%hYfVO$t6XE{EDjFeow*2!IX% zq=6!(n2KP^!e${vva%8$NGy5~m3{8Kg*XfpM~*Mva?OQyCX0zBMbt8p@`|WT(^)fL z`KDu(ob_fMxQjdYrzA2&hyYY(>2)Cc>{^0M5%=tAzq%=rEL?>Y(>0g-ZyY?4fBdg% z{K((EHT|c(um9^eoq25g#*NOq@`!AUl!Oe*j1v_Qx$cU(Gc6$XtcZz?S^V^$9sOka zEARY`^t-+9Deh?e^&frpokxxy`uRWqi2r%_CkW&EDx27jI&oHHasU7z07*naRIcr6 z3Sb$gP-N^xnb;ae^b}Jkq8J^Ag~JPa-*fr7)y0Q)t$q#t2fEW`VcCeNAilgb7p(Cx@Gu^Lfj5&oe{#S@Y=mIQK4+nedd##v?4a@-UCtL z1$6GCmI+B>@xh%R|H@=@(!A&N2Y3I_waRb`oic?ivS29XpdCme6gej+rO1jx0zqoz zByuKhi0-J3;jC$lu2HAx}IDkQ~> zyfz$32vAN^fyy~>Xa-L`-O1uGP#ihax#g;rdRi%ONSU~$$w}ftu8V1-7pIfX98Y29 zf}Wl@aHHp-!6L;Buojt#r8qH;2{GvVcGX|omPl1{7EY#X*4=}P-@Nu$f9vwnYmObc zb94C6*E@T@nfE_;{X4cq7QqvQK*m_>Y!Ih&K#U~|TZ6#4KmT|C?d-#Azp($p;7#ZL z>%ZOqTk}g#d>B8$`|tYXkf$ifI?^+;jtz8>DH3x;;Sr;Os+DRHEI?3d?%bDm<9QAk z-gy7%)wSgZW$&w(uYOJDYa~!-ACvmpb#p~`CZM9upw3fnR}KqB$YcsbL7#sXI}e_D zY`S+oomXabY}2}fx6g`&jdQ9GZdT6@cdxu;-|1~&g5kc&a4^IG7M+Czz@!A}r36wc z2_YfLB#{8=kYEiYXtriNU;rHe9q@w5zx&B;WL^L9648~m~$WFtd!}*`oZg(W``GOyZQaAQYa`*_g*7pG@P>2pIWViOafOuSbX~7 zW_=5PaP)6)IDAc3kh2o82trMXC}b%jC6RmqFW3U4g5p$!&=G->2tj)mXe|iDToS7& zAy`-@7ZqouSqQF9uA%4yLnUL{T({tH9bKDhjzG7Ux{HN0Ru@?+Z`_(Urp}@D+E2>S zG^$Tb+on0W=ld6AdO%KyDGC(Nn~N#M(NfRTj?qthz)G8)F_L*I9Wu6YNCtxh z$as3wrkQHG_s_YnZ3D9Mu7@PII&%EVYy4aG|Iw$H`o~5$@y3l8Pu)1UaL1YcySJ(+ zVoL;_MXHM|%2o}F9=C=47szOpszLKJr z(n-Z4Lq6k$E zh^`$qZC$fM7p9}AC~fYRI&)OUxsaR>Z6jt5x@P#;z#~axjWM07>J(iYI=GCAT{&%TKC8V zfO1&0qcq#m;DM$6cjKGum+pV+o4@q?7k=~)F*rZC@dK~>ow0O8j)f(f=}1nAL@9JA zNgtJBK)I^y{MCEo7q0xm_J{HIc=fBl@hD(6u_ML;o=69hfr-5F6bY>_$N+4{JRd!I zD$pu0cZ)Qsd4A)|0}H?azVpDDlau9#j$F0t%ggsS{TX|CSH0 z&Y~#^P}zzEiX8oX=b_zg5#rxYEwe&;>FvJoE*7)7mCq$hQiJZ<6=foD6fC}Qx}OwL zc+K8gdP8l_s(JFLkqq4Qa{YajeG$mS9DUSHJ&hr_nMkz~oy5=qGKneVY~sSqPZ<$l?sm&gu~b3|uhAy$ zS|LLnlNvQCc&-^p03-|%sI06iZAVL)n5-k{f*mv|Vi`=w=MEUzb=(!(mJ5zeMkQU2 zo;G67IC5e^3?|Yv+6LWoI%~0hCAw2r48&oec;ejREej`|oyxw4Vvj;?S*1*@4Yqdx zExKAoN5j3lrjoMCjG%pxKqA&)sLAOXNEq$BAMLklph{-a)3e#0IK1P?p(d#A)3sWg|79wvy{9>>7ZM377JxyI=!u0aX#c2#wr$gK46ppPu zc;gwjcWYhiFg!c4?$*05$sL=ks;0$oVik7ffyI-X#yh;gIsn%AekCD^L13MNOqgrf z7XeAiB$1?-sgnXUV|xTR{7)<#?DEfUp4oC*0|^p>0HzRjsaM{RIooHxsD%Wj2lh8A zCSY0=-*~)N2ovM0mpx=o0?OsJ&m$_Oi&rl>?VL>_&#XJY!7cV3s_1Y2t`FX>4jtd~ zV>_)>YFtrp(FU)SZbTLaa6;%LlM*ILs3N$ktpdv#*Y&KOI9ty&XBLzN1+Wi#I+L#U zF|m)tTp}55ZDl$VLQsm%N#Eo;>)K0?HbyYOP;8=6Mk~`%N@@hJ# zD5@*fN0?YHbQVH7*}ZD z>+j*!$7F)-@aisF>nt{dn-}W-_bng!>lc6Z*T3@FU;3-g;r~Pb@Sp7;On&10yyWb) zKJyocesst-6*Z_XL{^-IWz&dQHs!(}eh7QB57WXXh%AyupcYE!$YtP?1BoU=p*T&X@@~VtQGN$k;e zbN}vsIXm=$g*zU7?SE6DLoG!m??#Q&YaiU=2)05u(>(5?uV4G(~pIfzR>xsozrtz*F zBT1Le`{LB*8}=Qm_N;E`%B?)-th@8Ym*h38tEj6EADJ1y@>+Fn1c!fOjmKgI36en* zConNO4JU~tDL~7_38~*CNpf}50xJW&V9%NZth26%$vyw!yxcl}c7>TZI_en>-K{$J zTveKFl0u|p(RWQpOm&vNk)$FU>pDJ&NvGge`=~Q_Smqpa=<-aNX_o>s=Om><%5x9@ zabI~q>CFG|%OrQ0giNi3urnyCy)=!FHDbfbM_=n4gjN||N!K=Y?R=O3S&;16C~cLk z)mZ=$dKN@Tu?zWH;!=BJsQt86rz1V%GSsk-*jjo-X? z*aR+q4p`ra!F5z<+QqS`^w;n8Jt~t$(afZ|z}NychMJ3jhEFN?!K@x;mN zZk4C%%2_ByQ;>-OUzI1yb3fDzL0p==Y|yes8!fZQ0K+ATERa=7vUBmgg9O}-`x|^i zy41tk-koy`CaR#Sq`v#5qmOQ;NB*x*UjLGxxw-T3?_KymSN{A*E-ckh)T+sc?!05D zh%MS;($lrgS!x%kH972-zx65kvWu=#4kB}kwZZeesv>cO0b4X)8S8umxFAo4z0skL zlcEPM+G==le)k)na@#X?YzAQECHDcTKE6I%{+=tVD6X6{{xw=$Lzh=6s`~f@H{3sl z?N0N6pBY8NkM4Q!vbgEoT5;3)69(tPg9nb~<*jY9&C0oz-R|x~SE_|e>!_kAbwG&ThPw$T)`Ov$wU(J!(A*Q*D%1Zc&wv zGy6KVj)My+Meo5Bp(1fxB=jPYvEa%SWJR*1gH2UnPK~`P%*ReH<~yb?E==bP6{M)R zl$OrV7PB+OT(MSFvr8N?aiJ+^jV}a8NY$OYAATKebKl!eeEhYWswI;%Ha>FS{t0OUOHF;KLIBb#!jqE{$HL*ywcASx#sie(31ZB^G^(BM?Cj6e^N7wxFgICsxhUb8X0JUrNgqbtrZ&-K3J>$wwrp4ZH;#urapS<%`tL2T)ZQBFS z9_e(>s8{YePJ`|3jcV@FQ^Vcvfg3JXi|02?*MzNSH(^)aG+o<-8)CN`;spbXMGGlO zrl4h#iEWUoNhlHn(8NhQ;@-9nRlNoeIDFf1fZ+fb?l$m;esfxHZ*Q&7wu5Kdp|yeO zh*umMN}`~0DvDVqjd3{_b86UfDx|4n;UlzctZF&SqS1(j9Cy=p*G{z^XKAzx zhIhRGeLX_EIeTCqCm<0rr-p+U*}K#zM9YLiQKc)Zf-X2j_9?balcv#g05}0iQi3#* zF-2FULRk+$CNg>=V_MRE>tceD=(9rC3c5^RPY&2MQt~3pp7h;ax%#uzuvG66DJ+{sOs#SOtqol zcl9P5scyDr65=pWJi4;`7Pk^K!SyP?MS5b&qOH^zbF!(comc2On<5Av86q>WirHrq zAdw)G<}>T5S388e2CY24zWI{!N_QW$k_0Uj;!EyTU-{$CuJ|wi=(hWRWzSt7Kk(mY z?|o?_>a19%%O88wo-s+0Q*TprbIwt~I*8~ySepLEKcw4-#5ypMZm3F*n4*u0O~=*7 zOo>d*T~;vy1k$1QArHDtD8{hisBX2wZr6qU718{rp*LqI{7P}_iB0>Wqt9wul@IKAPA?6| zL^ywPxZ6E&;6kxuWk_{Zv`0@5Fnrr^bz~;^N7exrSe7zLNitXji3F51Ns<@R>i)Wp)no4A2N#Axa`ZD(D2L3Ol{O-csD$Y7}%;xJG=y0ZV~ z?X`%)6H^R5wUx9qOB;p@-S40pGO{rWa2x zzREk%%~lqTRwM;od*8zGch^7mjh9ZoaK|jX;p?9|so#Had(Jrt(_a4g4_s3tXc3Aw zRq)v+D3Nn!8Fx;9?J+vAPP!G8nAjWdD{msAG0~Hc3VkFJ#DR@wZl;(?sf}r4#czJ( zkHjNieD>04f#3C&4|Qfo0GP?1dnmW=#G2mqinE&s*PmSL;i1MXh%+N7s-Lvx(2F*@ zoynHHdE@BiG-%iNUOO{4pO}>UPCV1c1^eLq$%X!(X}Y%R3U1Qj0Q6@;c>A-l;MPM8!rL*Mek#b|%tPsBU!9 zAjt#~DafQVu|pKKka9ZPXHLDF6#M4P%3EKQg`|S<0**Y(sB7V-EjUI-@Q?)*5tyiw z&;lf6H!Sr9(?V%NsUWrJP@Vnua+52qrH-gm$z;SYF>%w=|u$ zsnTV)_SQLwvCn}ev2G!gX9W*qGg~PQLmUQ*M_2aWI$n*1XNm;w`bIBpxVWN?_V-od3D~i_2$fj=Qlv`f^l{43du~(MCfe;l_ zWFRU<9x#AZ0T6-URrE7&h_yl-FWypgU-qH7XJ-9{EV~!};`8rW9)H6EX0qos(fRiH zYUkP)ug=(?TkGjp*YcWpiWlYN;#1hOc-yox)A7Q-3#Z5EuGPKU>f+X?ws-74de-tq z`(~V8=wBR{a(3a=aNp#?z31m|n8z1~RQRK(cRRfP+;9XKyuibF0K1Z_ASHBSO+g}P zBu)i6fMk+Mhfs9aA$7vq3|PSc|1%4Na{k?Dw%6mv+3ekoZ$1j!SyqJ>Ry2khHZ4gb zJX?qqGPQvMLNwf2Ozf2qK%M30dS)Sz5i?1_t$=4@FXcq1RBXL@w)+$Rtxu>&8_PEu zogGIG4xSN1NGSpe5+*@wRz`IyD@x+7E}_*(Hao87Yp4AUzP6prj7_Q2+sd+@>L5B= zuV3jbPMXM@WG*i4x;QnSd`_fic1Kgpb2DFiHp`u`qbRh6UpW1=lAZaTb;6}t(GNu$ z&Ua!R9M??^G6N884Oe9#J!!eEs>!D1JOV3@D%WISQ7rD-XW3%41k3Tyf);e#>XL`LW4+^xuB}(XcihulUs7@J4BqoNL-SA;Vf!NDV;nCWUbkVhU{z>|>4x z?jj-TB1qz#u=-^OGDukeo1kS(OywsE17bD5y3HhRb1c3O>q)u}P-<4J#L_R*w38M{KH_0|;1 zs=kO_q>7rV`wzxX z{H@=o_MW`+<`4gyUt2qN?caFPtro5HpE>)JwiPx?C8peIt3YTAw4yU1an;%XbmCRb zF~tDGaH|S%i3u6D5=>wUG~?CV z<;K-#EC6@Qy`$W=&ur@hx1IFFr`C2=*A~T6I4@1)%THOe_q!S^rmlDGxo0O>SX(*x z;*F*2kNCNrPYqSG*?UNxu{*Z6L^V6Iik`o3Z#J6+oo|euJh3u>eckcRk?}Ad&odr? z=|UmGCPWpH0K}m2Z3>7MP$mI|c3AarLkut+;GbCIP1oJ^A6Ld(m)q4Ee*f!7-8@%D zcq8LiFA_)wwC*ew+mrm->8PhP!Gl8lm~Dq~P2;f+dO$6|OU z^A4)}=RU6(ZKE-sVPUQy-Lb9JJ7d=9S&E(cfWkULQkEsklcmu+#w1iMrv6@PD z5{8F!xotIXYVI)V5F;}@2n5BXJYYPrGBFY@i31dxi2Wnriw(Q+r(WRkMR zP!;-f{f|A~%i=Ilee2Zi2Wqbx8OBk8KvBi%qJpV?Z1rx3J(&U#M*_v9GLnuQfFzU5 z5UJQY=MrR!_T`mNs^@anUI;~8YFEl2sy(LE+5i9`07*naRJ&h9pZXp7qW;mB@{j%D zTg;!Xzv3qxSFKP8^w}@{V!??;C#c{pQEkbos9G&^+d2LJoOo^hcvn)2I#@zrZ!Dmo zlZb5{naNoqqu@tNMDhNgm`q2f9z8ei|BV0esijFYokBJnpXS#u3{M((;LtrIADW}v z`lj2TEPHFmHka*)I#%{H{mN z^>}sejp^CFH`~u#QPuGauz2-VuB%;pUSv$423+sKC%0zC1I8P`8^D`+ho~d~6`^F3 zfbd|P zUBPYKF^jD)I_(U;u^fr)rs{J@-Q*a7gbPktrpbg&fLW`fV-zCQ zOay=8A}~WBC+HyHpa~3O;_}kznF26c zep#=9(J7=dCs{GMUb0@Xy6et(5=6mivcgoESA%qY@ARz{j8HI}4TCKLVeXn`d zwrcIG)jIT5+p4dQb#Cie>MTXHPB?*p1W3q0=JDnxcRuIdGw!|5-fOSn5A<(8{lEAB zes#yt^^@zy}N1mC7#GR#jk>}jmg(1;B znaZ?!)4^qh82~jrPeKeDq+ocSVTJ()3@{Af0D}RTVMgS#tllds00PhPA^|7}1rf;h zAHDXdnqO)pG`{4OUFOroAtkZ)R%;r$3`85G2|`urXVe2c7+!f~V(0g^ zHy>beF<;W)nv;xcSndl0ed)@OdKS>>fd~{`fBa&&eb)d;=-0TPgn8BRxyN+GQkSN@ zo#vY*n`Q&eh!tQM4Mfp!76_52Jv!{P=#wll3~iL*cl*A#oRN+a$~j$Y_BX_a8J&8H z!i$t*Y2VXv5UEUSWG-PDkjrAtDST?QBSu@U1SqvW%|ml)Y2K&sn9Qj>kkeklrp)6B zvr1T`VIT^|u`D2t+a*1!Qt3IWRhCN&%xTKGaB6ro_mv-19oY@|`)4+->AN_oC`)aq zGYM@ZxGr=#*L>nE-QZc{X=>q^L1u*^e0o{7waE#}j7N1h$`GJ4wu5JcicE9gIcA7V zD;ZCuFqhm|oO|Q(;RvPpSgkR!Vd>R>5j*7>X1Z0^EyUK(JY9^@s~Uh=5Td$ZRaQFk zp@kR5PZvDTGbrkig#hjL15G*nz)LKP^2@Pbtz5C~=3{?cd*|1VC%Gc?z%@EAd!!9- zKnZ20`H$%vFT_WR0D9MKU-Y8%ShKu#^T~>wKG~CFH!bCBt=X^$e(~{jy<1m=!>$po z7(a2b?Um>1JJ(HLvie9M`D1$JWPSMd<+9()p9&>T9Ih2{%Rs{~#&OphwZ~4+Ow3%d zYkylI1seDd6A*)jrG{~B7>DM7<_Jv8p$!942^eO;jI5-XSNIj7AfNyUsJ!HQ;^Xm| zs2$aNq5_g zO!%AyCMT8yRUmn%DhE!K4X9WG>}-;$?hUS0J?4>jm6IDall(@&>WtI;L zeF7M(W6Gf{r<8JQDObJxiUG+4ON^y7uX9{V;+fR(wlADzPAd3$B=7*YKuEtmnQG&C zu}riN*BXciL}q+H167QjU**S{D`iym;ZcW-vp_(3g~E3ptvIq9@b}MbS=&Jf&MX|_ zgj#{f7P+Q8%xZ(jR}T?uL1CQbP7?>F&)0DnzyzL?4l^SouCw9oEaO&?e%>n1EC(s} zgbSD+ykn^S3D?}Wem-4Cry`#F^H+9c$Q-r06sIzw&^#^Kgary*Sdw+3LWxsdIrNeG z3(WB<3{S&QXOtO_`&K}5m!-3QIe*8V%UA9kiKEWGpU$j*(S4Fx3S>Z_cI|OC=r%KD zhSvTQ-aMuDOQA$>`?jV=?^v^Z`MSeY#K%*)Vf)-bb$NjblgyoyxOuc!HHYRJ24I{CnPqha0VoJ42!-be1&Dxg{l2RX z_Ltk8P6yYmMi# z3(JKZ(LC@o)2FRyEK37IFM7qeHxJ86nsFOcDXd9Z>oLn@x))|1T7n5UH!^TMIrf#;;?gqFm0Wq{_?3bJ#|HAW17BAD>_j&TmAyt(#^#nSlB*Qh?T#@8uRSs}5`*Va!hDKO}u)z7@5A{F2ACq9(i zFn_{VDdVslWeP9xw5Pf{^HyDZ8nv7MarfXarWRW|S6I6A9dBp6n{krkT9Orq!vu8v zs;jTAetg~OL)qL(iBzWBuW2hCo@iFDm^fcj_2Wpd+A%dWx_BX}8gpT)sBSMOqk0x= z%g0ZfmF`&c9vZFaIfblecTR`FkU70PK@T6R6}=l|T2m)G`N5&|^x2t-neAKl zg$gO2)mVpK0~5%X3`-%ALZ*&-EiGqJx8lMbu*P{Nck$5jMw2bv^mnkt5la>02n%@54;~s}=u^^9(B{`3I@QXH z0Wa(Q+1l>a2_eLVYIHI#H<5=qdG@JLSXj+B9L@o-DvV9>_A$`WVK{CWMwfddT-#Xq zGVR42HfP|bY0=Y+$0E*D{(N(7mW`U_3BcvsfsyAoI&{F6`z5gJMVp^?YV6gCRD|N=hWsWRlq|uQ|zyv=8Z= ziY3?dA|<&2Bf*u^CL?jaJY5JxJx_Egiv9f3aes@HiDjhl)1!Gub_4$YnQarTZf{d?A*JAKd8f$h1YxMb}grAlTNNSW52=7tc?JJJN)wQ0LjQl^lb zoC$9^^bxwLb3x=0=ba&1_mERQ>(fmuue*vye+n6zwl4Wn6PZlj^ILD?S};*e)Am(* z@KO7+t9B4P>o4|>=31wkkSXNW%^hX>M?2-KSM2kP%g;z`zUKUJX`z+lsy8`b#2xu? z!;t35$f4tT7w=cUuE(v)gEvo7@67bZJI^N*>8vf5>W5C%Wd5ddv#QQ_D~j6Q`I(8C zotHinsuaZ@fJP0FDT;$Y%E1g63^M@oEHN-FHH-_zkQ(Ij38Ty;^ANoTo;Q&;@VuHt zo=X<5{Xly@oKGX056fPTTEcXEU+R*HL!iT`x{PcJF{=;Vn0P&17UCJpSwkCM_PR+a+ z6d7^;IlER{=f#XfJ!L4UF@;uK6r4m$LZ?ck=W@BoAj^#9mKjZz1r-vhwLHi)K$#a% z3ZA33fH~_GA$<#G3dAgqatm7UMCPnl%&5(1=1ZQL#04DM=Dn2r4g$zvto6Er8l3EVIPxAV*n8W;BGLF_altO$?ZJh8Yl{F3hFPMh$4xWVy%y7?t8# zI@-LRJClQT)-gbUTR-u+rAFLzdE={(mw7)~H;K#m!R;@+qs29))bz@k7TTl%$qI?_ z3yrQU(bUzv=xg5*-B0vBkRS6Fkc*R^wU&-tfpk-0>T}#H3cC<$xxa+U+tc0}Qy_5?YhkK9!uDvW&rhB53yJq6dAkLpgdd*b} z1DnHNXrIud$JXJcUUy{%my%W+FbCA#rZ}J!b4#I@oIZT(ohPyla~FBS`I%FwRc~ie z)tnA<`2lnKWMg9H=G6zA5WQlrfr+$%iR!$}mtZt!LK$Jel$IvXG6lnsaxefUWw}B} zc*xW|kmCO!0a1`J`a`SEq>XwvEBcuWxmH}auIcP)U1|I6w_H8dpM*|KH1*db>H7~B zLa3W6>^@twlu~B5jXk~?rui}l`O{0!f9yhsYw-4GUX*`QwdcKx(O4!gU6)58wLk-D{g9FLDeV6`LC+3#-g31q` zDm$_p@b}N`9G_2hE*Y+b==cVy@2PS>>g8C!3!fH;4WX7R7~)8LMztrgPR-P0S{(BX zm|@)rTm#Y8ODs_VM3#AQ%(I>Y;SR!@?YBMg^?lRBvc5KIZ&-bF_3zyh!)R$u8X)Gz z+Jc7PPi-bkT$hMD|AWWa-R(y|Ir(8Ry#DyU4I}>R3dxvH@V!ZMWx0Pa$4K|ssmt(UKQJ*XNIpH~$?$3M;wUQ)InL_md%@CyJ8yf& zTe&!!t9tXN>Wzu=?Y6xlnQE65X!BfSV&>M7!yRA-HT=IM@&g72O!UFP5c8m4L249( z0lC#aY9>pmo>)>+zf^cI0?F z%vs_0orF-vI?m%P$qgtNZSy+w`kFeA!!wqtqU{RfNY(WNmRh9?CZ&C%Wi|Jhc2qM5 zLo&{+CQMsrI2?6MXt{6g-4_B`j8ri6{XJzzb_4$2TK83O2R>jX`0K}!Y3GYaMrX5pJmq+9Nw zU;XO_P(!08QvXEvBbB5CFDZ)-tAjA}VhgKF7VrS<$6-)_)jMDmg9Qa z+Yf7@VR6HykDdhci=Gm1S%320*_igNT>R`V{O$ciT*oikeDB-}$GOIqWNyKhc#==s ziPG!JK6jZlLZu9`Rn)>la{z*J1}3B+4egNkMNs10bJ5}pOkfnv1C$VLt#7&mu3crC zQSPWF)*4@0$9pp)(osRXR8qs1Da(!aMX6O60;V6Z)LG9_>e*B<9%z&C%oe*SXML_| zQFdi&OFhpDt7rmEwNr4+hH=sGHTy40rr=xaQO78Q@dzaX0MU|oKbd&hRNhJ(4vpXc zc*T+3fWN=Ky}GEoqMUD6y+jo|G%y*;De6T6lQj$LodMM_L3n;CjgU4bQeap|<&_%H zTz4Zxc|>CSh{?EMh^6D&bI1{EeP$h!Yp)No_x&J(0HA=Gf4@tSuZcwH8fCb;5juw0(r?93_dNgQpYbCoYJ58qs9}<7G@l-~@ctGgE^(YlX;& z@u(r0;}jH`mr+8r_tX469ipg9Dz%4><{jA$`1|X-O3P{6FOJW8LvEEpsHzdao>yb*9>aIxJNQj0p(GV3GIV89&LDG zUX=ga&#SU4IdqE8?P^Qg%^MD$CEVF%8Wabj?&%{>#aOuG)%`O^_qQvXU-0sO`@+h~ z@Q?kmZd%AE8qCPw_{JMQwe57X)NKn&fBRdWQ2xB4d`!$IANt@We`}UWS9pyNUP?wj z@MmxOQCz*{_N? zdwfVOlZ~x2bF_$k!!N4yiDq40-P^+k&iAjo>R@5j#YLra`uwSy9KNOJM#7WIFBbHX z>Bhv&OX-=8P$|&B^J+zSCBxDpj4_NcC;t`ALVvgxWPyDhy+CGXmF;Qq-(#W^0`Khl2PLfLY$ z3%z9{xy^`cSGGPe)rVb8!`$14PkwVg<~{2*zrHDb&rgPQ=DnCb2J481*mihwm@id* zUP$Vl0l$?nRP#OyiiGQ2-ZaWJUkZ;gYl)Dd1uOTKR&y8UX~)y7&{@o`kPQYAL^KnY zb@C!+NL@ZisHR58eNCAa@RamJ)vXHZ;sPanFp0vh*UqiWr8d+>lW^VP(xX;wKN)0{+cocAc=MX;GAb{koX|ZPlxUnA3hQF0B``Q{M?qV=PXj; zC6Lh^f`RN&bz`kM@#Sx?D&!x@RoVeyq(Ec+b^mhBK5Jwr zUm0J(+dk4qM-HsL_WJRE`F#HG-@o-$GX-z!+5PUzcXl?v^9^tM9`d*DTk3r7=lI4y z&qn}K+&rA5Z23%7-X@M+We@F}z%3(7<$>Ax0I5Bc%HbEcuwr^PzisY(%jWsvu^XFW ztkG~+^bUb9V9S+H7gt}X2Lo*S_+(9v?dY>=|D-J`ika!g#LRE1a~-63mQ3+1iL_b* z4D)SYco~C&FohI}5nu`e6t)jm){*dlZXHM)D`c+$)X+c;jT)p-`8)v8y6tbzFJDlz z-q@b@TH**d*_B5wc7l~@^4`@4zd4)4sT9$_SC;>%S<=3@gUlb()>`tt7Dm;2)m1bpquhU9Iph8&9)qwOPy!t|<5xjf|* z9%8CgEyq~wsHT)#Kgww<>eZgCu&_Wv5#S_N0qN3e#6)FWM7kI=F1u8CIh*>huF)9LNQknO zNPzS`vgotG1ZkMTqA$l!UmWpMX1qkhSxr*|OERa8Res)qBje0)Cd8vzO@r(P{Qa%# z{nN05kGp8FO_6KTVl(ga%mt=8!l($Zcp?|b?xM_vm%idtdfjhs z9K*9de&5*Nz4`XH{-9mD=g@Kg>s!L1d;j!x-<73ZPle~c`vZLD{@JD!DwQ`@RFtcq z$#U1odbM|M8pYdky&*lBlnbhHbY^1sjwLZ%Zw8mvryDGnoF2Pr(OWy$U|SZR6!}@~ zykuWx>|#$VKRG^A!}{x&X(gFdetsZ5GToS%d1e26NA%FZM0Fkwpe9ky89*>&7&D9} zU?`-B7z7l2z@pDk){*dFqB@YGQNyzuH8g6)$r>b3mSBLXews`+mcz`oonb~BBetA4 z+}s@KGw_E$BSoV&r%I0TLL&;;~B}sm&g(jr&RPzluk`wvdR9 zrJe7t!T;P>)LPt4r?g;zqd44S$l5D>o_Ece>gyX1Y5!G2poUTONvYc|RAR0JYDFIf zFLMS#lw?J(Xk$e=SLdVkS+ty1QfsZRDbE;JevShgL9y!@M+{>UDy6AbeVa*=S!Uoe zZVkwhTekLSvcz>xcZ)%{WUMgYwwR-^u977|#!Wp82sO$;3l9QGD*6Ppmlzc~?#$_t zOOCFbOoNy}8%wQq%rY*d-Ym{hYpLKK87iJFbo5xW zGSH2SHc1C)#*G7?KNm%5qXsl4M6Wp9%2%6M!#do8w|Xhp$kgyPLW2p%6=gJY0m~#Y zJ~cXUrDOsJym3ZT%3f-nZiXVlUNqM#!ot%;^3 z(gyxh4RWR?fxMK=YLZeDlrnQ$u)-8(lv@r_23_C2n@A9P;d zD)WK(3X?5Uz$nZ|jbhCuU2BjzukEI^BwnFlR}1&V7AXy1x( zNa-@tX5$aH{q&@{_F8b7W5z&dNj@!lnJ-9|a<~$4Y1^x$TKie%Okho~JD`^{DY0&kL=i*y0DRFLK%_R0%t~f+zb>F z-#HHwiu0L9g_4BYOqWtZtTnpwDzE?mAOJ~3K~yq55xNY9P*|G#Dc#iDb#%6SXQe2G z=X*p}v!EdHy#Jjr>KLay38?o}x28dM1ODFbO{$(FSy%uHB?D&~rDSbzxDfP9T{a+l zPNrw(`@IHgkST_BbGX_p4lUwR6;e%ktKAPYvflbDJwP~kktdDe)PYmxIihn$->^gF zsQMpA(Ff35|1)2E)ID~Q&n;T$@?iO4^@`!}$S>>u)*FLM0`x4iY2tQ$?2!shCRY~@ z573;Z36?+hgt_b!wMu2(=3Th!_RBw={P~To)$Jl$e#-IW@qt#e=}c z>z0Dyg~q@YXJ=Yop?mDgYcEvCXPTuej~^`ZS=?MZS{<57LYqsD&(v_m_462)%kk7e zB{?_`IrEz3*1YI7Fp)O!oSH;6X8;HCi30SG0F`3zZ9Z2zC z$(fo2$`W9}@}K^#7rn)HWLG_e>nPZ^rfdlG|?==ie9)$kFZ! z+1SH?%!6TfuJ~E4ai%oPlEHUp)Bie6m4b~Ptl*7D1DodWDt%SgJOlWJftvMflDoLS z=(A|@CBJ#}=W6J7S@cyqpJzR9NeqyXaIeiq+AdEd)qTeMJm{vvwuA&)G;0o(q>lut zpII(49xqpUTp-2}>+@b|@_1QmPMCGhjkb+*8MQYtr#RcrZa_HRYwEe2$i%n9LAmL#m)>+10*nz&`Y z<6L!op;_8+?2Jd}<&D)N!-MC-LYbdBJ~NSCv1`%Q%I(%Nb82cHa_0A^mzqccnc_c4 zR3!`;U>H+^86%8mX(A{rJUolW@QQIHWQzW@MB2cw)Z`3mkOF}OGhp4D{(kCIr>Wh7 zO&w7*u3diORCKLY+51NJf4!clNOJK#%>M~;Y(%Zl{O5jwF{XLCVf90PV=)+G;qc#( z6JM_<2*Eo)s=7Cx5IQZsv~wm1C}D}8M3o?}4!GG9;eb$!>D!9;E%k4_ylm)T_%$<&k~O&<+D#0 z$6=`iS0&>FVc@Wc;7IVj4IA3Vq{{0~ZWN}{lEZTkU1uxGQk;3R{ol#%Ky%{P6kCNz5DtvA**cP`paL8+!Zc|GmWm( z@7wUmg%?yE)gftB8Ob(Qtwz`sm9aM#0+?cvVMh{?CV z@5KGh58v{|@tghc^MAn2*T3$se)r#k(v82!roNMGYd!g)yS`JDH+*x&cM9*pJD$9# z1QbLMkT=)QD!D^MLY=6~@?9-V#8cTI!p0HEORitYm*-j|>n5jT>bFi`v+ZJOWT7#< z_SnfBX89e((?iwiFa!_J&eU+#&Ka*VbJiaSg6R0M8t~F{jgIIwYG~kjHHl$iz!+#G z0A>_piutw=FvIX1W{hA^BHu@|E>rv$GDV|SRLFA)#!OoMM|Y+*msK7>6^yd%(upG{ zl54V5eKdIT!Nn+vB=^6I`9JUre7L`gwIb~-tmUntZI4g)f?Sn_BkyK=Kev#v9(eT7 zSnq#K7Bf}5yZ=+lxdw{E$F`16-?+K)Sbd6nM$cVx(}4@gt~;EOLiS1U9{@;-BlQb5x*@f-nWuiWV3Tw*gq;it@t!?L9m)}xuzzJKP4iOWJq`T`m>gJC9t zr&I}?YE{9=Z2J14Np?iRF?DX?V8%cJ8{Apl)2$ki-GIM0y}lE29dsZR z?a2y~Vh~kFqogZ~bAv0B<#Ty+;#jQ#)a2MQRwbsz@>?fB4aM}BA`uw{-eg4DMUTO633DBgAzs)FIPfKi#b(#eb4Q4KpL)F=Q0h zGZ_PdzEMoj%s?0hqZlA4ybR3_FoWmR(7>~503=YB%s?2EzW17$>5EZ*S{I->SJ9P6 zP9)n*Vm>N?=1RhhI~JD%w(`R zFiY$+o`AL*+-g_@Vt8hJ1e71XcB{$EvtUJ_6C~Lnb0#J(0YVsatk5s=Fw39_*9MM5 z2#=M}ry|po3I|dDaqsopbY>u&2TGh~N)sa-SHd}wi%F`j4Lv3bA~x*6yi~B(nkc*G z@#Ff6J5IG%{mmTqKNbc#p^m-w*VUfSL>-{GQ~Xj@Ks?9A*u;Tm)qv~<{QdFu?H<=O zhR!@*0FQm>p{masX}MXf@|mKWd3@%wXr?B`*peJ?$D?{_oi|m!Y&sDZsvcI*_A<3& zG^37iO05UOxaO7=Gyu*l^>0~kARhVb(P5Oj^%XC@@|jcpG`})h^=F-PC29Nd*;3dJ zDXeCzt{HUUuI|?eg#Ee{ujfoICir9p~0N0(`puH_2Nr9)f_vbHw$F zXH|NmjPjk6Jz2eT$y?c+(K&~ilMb{ z?;9l;wTgL`!uADxlX+go8XX|Ta~d@~rvcO?L?8hMVEOfbcx-;TV~+qCGgfW9?CDe4 zb^XwNxO(vra+Vf&dDDf}%S8d;AA9)zVAVo07Oaqu4@`E76Rff3J<}WUE05iX=iR$N zC^y^^|HSEA2cCGv+J#3j@N_qCY}5aPM_cap3Dq1Bra;ofH{|8c zDYNO0(@U%0Gp8Q?ZXD%d7vA+MwfD>8w0g$HrKsyY)yK&wz8fdZdOL7(}}pdK3k})oz@gIsfX|~C~@sb zLYWc7OFU0`ETha8Ov*T8aNdnqrMd7kkDnc8#-yWv{DKFLkFQz3ZYf;ZTIhD-EVY|* zyayuvsv0=Gg6v&xFuv`&4|cI)&%nTu=p#G#j$L|Y@BC}tvino^f#``F3I~q-(`P?- ze(CFLKDGAOItLc-<*%=Q^pm&Tx2AO2mj@m@z5I%W2fn-=&wFqFe>UFKIt~Ga6f!N| zI6JA*8$oNcM|*POhUv4&rHi^ zTHF!Lj!wRXOXvA;Mtf&Y)g29F_O(t@Va zQqePD@L;U8Fi7CbH1oWSH9Eiyp0jmn1~oiS0%ax$Qw-gmADO*yv6H`_4H4%Nw|UK@ zr;?qf?LONY$FCgTg6Dl|afnk6?P(g#AmPw3u$qy?S=}w| zU%MrtHl>be-}4#KQTef+;4lm}nEwUFcP$>d`z?;(|Lg1*MtNo#v}iK@WA!I%ECqZ4 zAsEqC&ifvxiEvqFL*FXS45t!IxqR}d`WXtsx0KZ;_CABlZ=0;I`|H{4@kgRqnz;GS zd-UFKk~!0^ZfoyNJit;bZynv!9x@=i0lAM|6HXR=g^>HPO$wDITA-k+LUhnuyS8<< zdU(*TdF087)$^5VeW-T9jD*pp@$AUT^OjQ4Zs-h9Y4Q4;W0o_*9f6kCQbGkuITenu z?CQJ7zQ>QAtdG&mbYHML7v>~Sd&kl(b6&BK8)>$7sLru$a0S$b<3GEzHg|k<;_Mxl zePe0qvOQz@{n_WQ`1+3ZhfgF|e&P?_UHjJDiCw`jf4=uK@1N@apz`h2+t1C+eUZNH z@q53#>+>r%uRL<)7r)tm-N`3@B=Ed9-1zFLx5WD;ghC3L7H^!MQn;2UB0SrZ6ECcL ztLrl!bLreUiLD!(Lu9tOwQ!`DP=BiV!tuGuC4#8{%zn{v!n7mcPt4s~I(X}FyvI#Eezr#Qy3^a# zUcT{$t{=p@#SgG*GRwX;g;NS1P%3gbvdS@*G*-5C+0qw*d_ zDbs|iVD@L}`d#&lZ@3-*$q_iyDL?i@KxwBF8edxcVf3!cI*SEo27D<)ADO?H7yV2U z8Ya@{j0vka7oeW=)T8qWwGPH)X48Q~{PJ7Q)Gzs)x%jEa!=CIV@jLHPPkzIjiB^D3ZTFR^z}ua5_|uk8GMy11e~ zBo|i98fiGQlv$rLq&qhs{fZy$M_zMg zE?vz1wu!wJSN}un%|+TVVZT?nQm1YxIsCId+lPNyf640o|M2aPykXa;qxh12mF|50 z3u}IO^@`(1`U4N&@R4<2ojGOCL zc0c@*-$aJSzD$4pk6(P~UCEImpzy4;x^s3q!d9LuUpSn~)i<;;zBr?cESo+j@q%^D z!AjB()*gGjFGeTBU88g3c1o?U?QOBH&fV7ba%MIx42K7&0D1k`VyW1e7FB0kXQ9&K z9S7pRO7WkHP-6gN9%$?X9HDO{jSLv1HH>Qt8wt;k5)t%GuY_}(fYnei$~$v1yN)dO?`^loxl>XQq|!5l5t95yyXZ)FrJ43)z@8V<8+Upy0Z@lT3 zO8Z>v3W0+-!7HMjX4bIG13!v;@i@}}R&_J4(*Nl?A2*99Cga4NUu zSYuFw>;~ikZ*!-~MUl6Yw8*Kl7{OX^=J3Ych57mF(Bha~8n2#V>AMyYkH8H#WX>b`1r$x{`Eu7PjP1 z9W1|i^TDNYDvFk`Of+9UF+2UE%eMVu{tt&5e}3P+Z{GgbG+F;(DU5Sp8U60A;lpR+ z+<{9!z2;jN&)h?cSdu5 zzVjl{Tb)f}L-~=OY(@jO^@3h*u@(Egey%Zr7p|NRh8CtdN`CX?WDUbR562zM;8$uA z7&DA956CD3rKy*CJOmi^Y$hoz)sgRsDD%9`wmU#7Q=m};(5N+P)l)T@q8KobI|p>% z^@;<>nt5U+aVdCakDVmf>gnHoCH`-J?=lA+Tcurh&82&f^@jbyDE{-C7yxGp$!rrH za6_qj>crDISSpfn-g+TDeb4aBQ)2kh11kpNTrOSksd!Cb8Mr5oEeV}@#JS?>hiW@k z?`y8!!SaDDOD;`C3I^b>q-PDGa9l3`@UGW%<9yPo?dj%Xq(*m!?IYc_JL_}rzx->% zXMLKPGx_)6nm?RS6}zn_4JWF@nQjF`Sq{Td+*n$RXTW0`dB19z?$5x zqffO4HOTJ&hb#C1kh3iC$Nznw=WWwxXLq*FW?MG9*(94z2uTPfbfqJxoOs4Nv2r2` zmJ`K}AfSh*CrFVdNRt)_=^?#tvc1mk?9A-UPH%5}p7$U0e*gCQ0Llq3Cv!o|j%=0tK-Go5D+lK{xQmm@(7^38 z`(@Yoq1r{KvsV&VA$gLqD&VR?N8ewcPc`#jn0pJSzhQ1%XYx$igvH zd%V@+=!k6eu8O!qdY@4%Luv-zbTNb8KqPr+>{t3n3PeXGeCt& zB$yJ>HCd6fpo)of4!RNn5Ym$f0!ShVMfbQNf)Loq-}>#rlXI#^hi%xbWQ0ym(6#y4 z7cZ8~f>5?O5ll&A(4b0%eRTM+%TxhkI<8g^hD#o|SI_x=Jv1&`vZ=$!e_pTlGgSb4 zI`f9dNd%ZpTIv!beU(4{5v|#Crl)>fc4cxEKd$Z(EHgmpV8X!#TM%{6`1|^K7xaZ6 z?#BO@{&UGiEZY_Ul6ddh&!hywE{Z4nG6pA>?z?pM!{XBOW|BYaK1zN$uLX2r(c|cU z#FtbXeEBpC+cp?Cd&q~S?=LNDlop&$#TIO86iaAiSt47OC23r>m|7MjNiqR09GU_r z27{$ZD-Mi1TGosgJHH<{_Z&1cl8M;eHyQ_DWfRc6=ix-zqX05RpygCLz#wb|#9?>S z9G7_+Dlf}r;)NE_Xo|BcD;+Sn)2KD)6>RQ?0MTNKz#N$*{k-%8gP5Ed0J=k+9i;W>Z@K43+FPIK-kzy` z!PR=weL-o$ojcypJ5apU!~XqGZ+y4omlo}KW&e)5n%-u+*ZWSqzcAy(Z`VFP8sCI{ z4_&(IzlR#%D$llwK;q`bkKb~~^{A-vBVzfvs^rgp8Ga}B)B|O{ukBmUH9dGZe_D}I zQ1ph4hK|vQynr|%=Fcel>QzbAH0D=l+2L)sZK>oZx#&^wSMn!M~ z;7c1$jLXZlwEmDlTs5XGQA@(975JF(kfXT2!39aOpf}dI)YVn9dz>}9&UE#T6}7AZ2U~R&_2{A{59I`rGEw02 z@Q3>PbtenYNC0fxu#44G+#?Vn@^V=MkX39M<`H@={A*sEw-KDNI z@#uq{N9mtq`F3M!^+P!Hti&8H9}bLoKmc>$C6hai`<51nTuQlXs9*~ww+zc?mtor+ zyzC&B#EPn-2#i^RePa4Jz$gPYH@u%;JiEA@@A_`cII_pgD4G_%@0Q}pS1@XD=ekr8 zFd2ny`g;4)0R~|!KCa*skum2SIM{kcH09rkD6-dD<3}fo0fM<2R1qm zy|HrKo&Ip$!~N+^c=m~})cyBB+gtdD7vJys!dCBXNqJ3NZmxV&HSYac`R|z*zEcnn zIPQDL_x*v?kODw1w651Rb0Q)yKuxVS(eE+Vbj{X!rxG5&R+>1`jxFB#W;152ojiep z&o_!KcA?!Imlv}AWp75WzkJwiDov3pSLQ?l3NEglcl*N;cNL7(kgn+69Vdzgbo{Sm zP=Q?QEJLEA&J}RkG9@lsIs+vV$|@jQj1z_Ap^7?>6+8uxG>p%T9z_tzAU>u5E(Ib5 z7}aZ^6844pJ5?$rfVfOLGVWYf&e#4|G&4Xn&2%JNZ?aNvy0ayLh*pAG+_JcCvhL9S zct!Ghd4Byp4;*(pDigX>XRj@ae>q>KhVPCuN8A=f7>kRQP3G(kj_A&+;0FVp%je?d zyg_9ay2Tqz5Jf$cATl^u%3XS&ty(phdr8~VU{zZM-C7?_pQK(ZGe0;W6A7l-xGDPn zb+;D{Y9?zwnyY|t2>AuZ2YTmuvDt?g9u}TXC;bg2?_)Up43aLi2S*}Rl0{`pTR^t4 zA1qGDO2%|n>kOhL3T4wtQPFcAM@g|Dh;|-sNstP}VI9mKC0;>dDt9&QF}gl#ZBKW6 zZ_eIzs5B>XWWITWe(Z02%qmr_*LIg=${=xaMg7rKfI-*_KtwjjothEgX)+%mAfy7H z<%CkmDO`4ziL7a6Fq`j8X4|v5p7M;lDvqV&rG?&Mk#N!AVmStwJ&nw%Q$j646o{k= z3Yaa~aPU%DH!<1RUQlhA zJqwiEAKCe1_n$J&4R5{mz?-eF8oM_+cD%NHn$C`&_p2lB9-O)5C+sibEsx?KzB9Pt zUq3hhxNrV~oVBa*M{TL^-s5Xe#r6YD@eu`@c9+BEjS(ZHFobBKTh+U4hFB3Mz>@mlzaoDwb?8lc^Mp6G{kllRCsK zRATa&0q6h}==fL?!DVL@z(++;fYGw@ME%|=Jp;w&wpftbj*WVjlq*$_m~Z?^N5zs# z8=kN~e9(f(TbGcrgI<79(Q;p79Ifi-UYAhZc-Iey{hq|x1QdBkYxKv7yw`GE7vEM* z0ox3v$c94h^6IHQ9^amU&gIc;Daqh1HGHa8&PysC?3;tdVUf-A@3A#22eU6B{Qilu zP3OqVu44}s;?{zzrjPp}*wn09IlTRb&!oA`vd&{?XTf2bjhiR;mO9r)=YAJDiYH5P zS@fOvHJo}%n}TtkXBrwT4qaKLeZcNr#05>97i*-FElD;lTcRKvG$wi=CM8v{bV}TU zZmC>97KT?C07(D_vY*mPUPZl(9}0RsUQ2hJ!}+w zYo#+4F9a$x{72eiEHS{+p1UO^ht8&U%Ag-Wz|=|A75m`~q?33!fWm zc_p>)O11x?wz7Bj#6`crImY7Ogx{P$K>>^;3VSD)T=^aH*2pXOXY@hmkDA$6zghG8 z;QQY&v_b3sz4*cKL_#(I=!@1AD{LYrxAK~5b70(KZ0LyAcqWoQzco8P+%8=NH0jg! z%J_JcdE^oIMY`3JO3;PI0nMMV7F;xFFUW-9_oU9ol0nafHM5T9WZopA9T|%Rv99Vw z1%{4uObI|T1ece|5~ye~MVCZ_NnEyc#;6LDsiH1HXObcoKqQmL47mU#0R)pl{2z+! zpdu(h!ScVn{KeVypxYuAEOCwN^tfYjNe}%px98QOn78E8rk`Q&pUbAm7u1&Ief2I& z1*o>ryMMl*I6Y$Fk}rLKqRtuZj{$+V7bboj$y8h3%O>-pV2d`~L(--~>WaYBURT|= zvBf>HL@}c~YZJ9MHYi0!!fn5)_*9DD}jR?Ffj zw&Aj(0O0X-v3c0r7|}6%^QqE?`YHIQq|h|JmoeAZ06}5oC`1rTLF?S5JI@&hDLZ9 zq9wv&+(EdT7_yWr`3z#|4!NkcB+G5d;Od8+K}p7q{bPKW2F zI$Wj3>e|pX_YOUz{bjb7^*?{xM9VAq*ymktE7^bPgE(jRo&R;{?C4L`k%cv9F7*A+ z{xw&A!~gcs2VYMbllFs6;vIJf-aLKaPE$Kkd~6rKni-1$kPUsw@*Fcp6LKqLnZ(CE zh0B@}LC5qQcUyck?UA;ac5`{!UNbYL86ux>T$$&gY=U=Nhe6`Sr5n!5jrmzkQ!(9# zkbIeY4!u&LZ{F{i8;OMUOQe%l2@#xg3Eduqq9ahn7-bZojDi9L3fB!4l30PnA)^AS z@;p?r!V(5_MaMrSgGrndQ~;l1iJ+8GmjBY1|2m@Xu5%KTQmeyxXsW8Km|yT1^+g^ymKb0Wy?x+w($n9+Xlqp$$}^{Ak(3}X{}!|p7{%( z`upj)EYDTHZXbQln5?v)cjTmx6L8kKap1%2Z%=@T8OOo62{2}vow>c}S~#2iW6eqY zp_23!3fJ6=lh3koj^*`{RKOBI#aN5-4!eJGN-{{!=PN0;F01Ce<+TJ!)=L$K08^B5 zR8g5JD3m4o_2ck~gcCTL4-V7XHaWNATXDAUXmK8vn7sRXegC_{XsY7B#N1huC72SI zF0DV23@`{=0g^0=umI6nG%=Wn6lZw`;bB?xg zy$7Q*NMQ(of(SlQ$DFz%DHU$3l-LF0($lH7hDlwpb)9Hhsq?OfqL3i!Emxe}ZEbar z&%|<_r=885@M4Vmx{ZxOb|N0FZI~(CO53jZ!PrA{FU_tf&Hdr7-Hq?zW4CzelEcY2 zTsY_9zy5L0sU4sEg0(g;ju?*>F1h^n(3__Yf2mj;&pgwL%fH?8@{!a3%Cp0%$G78O z>ClJ*8HV1yDrfTHgsa7>_Lj${JjSKIT)h~cukjhNfr7`erAR`Bu(En`EN=N6WA4l5 zl#X~z>dGI2BY{Qh`_y25&J4QZr~46dZIQBGWjOEmP$L|Ug!D`JKm~@5b38`Y;c?{j zS|^hMNG8BQ0WeU>G$^HvQBW{|5~{(8G7<*#Bt9031d~C0Oab7dB0vd;oW1m0@1`pT z;S5a5x>^p61-o*kwqK_A{5`|VrlYjweqsL~%b<2+Yi6=908y1C?p%jG3r_NYjQrJG z?@4(BQ^$vc$*$e?Q@@Vq>+^vtCU!JB88J~Cc3hk+T`fiTd#YcXS+h7C)3S!6DpPl( z(?O`=88|yg0B!}YKD(2xS~gjEicP&XHbnr=Krz4MRPZ$%c?!d3_tMCCHQ_Sn!s4k9 zZ@fJv2y{Mhz8E4H~PM_$?G?3Zn-NX4Oty!m)`sBSh`m#PMlBK@02D3Ql<=}?Q z`}o>YO2lH$>9s9d5@kWNT)HLO6$6mK6kg$?gX)Ar%MV7FX)%%AG$WgvOV9< z=HEL$mvGAB{Fgsh*||@j;-afXIjj2_r4+Vf(Lf@=AZ!JQ$rvarrhH<`@QH-OL)6)v z>fmlk%FBul!yq69G{?=xD0-67dMWB^o~iU!rbLnRiYd894BfNZBgq<3Bn^~_3OCc$ ziF8%78cXt;Qpu>R`{4ZVeXR&0T6#}&|Df}8(r_s~+L6J6-nSu#J#2}wIv5|ApI+k5 z;+jJ5gt+vwi^77{o41C_qkIemXyZr^%ktqc-3du`WWK453;cBtO zDsyVaV_fAZHQ3<Wa~bW{K8F!-lN9AU5Y-m^~;t!c^t7ekHWY9B#paPedISd(uGfJT7kT@|p zW0VrkCvT%Vn?Lu>g3ld3It;Y$ zZ3;g;Q?9NwuAb60A~7ns&bl^b?JZX9qiWA<@#Raxvqsi%RLzI3Q=Cqdr~|QME}3vR zR}62*+GXR~lYHX!nMuo;4ZegEPuk;F+rlx6+nmBSSI+MI)Xf=zUl{ixA1?lqn`1F}h2lm5)}n=bY(^*4q8#=1)DS ziQuHPwCPOLbDi3skH;FgqtrYOE2(&c&F!w(>0ETNI_thEd%>zNvR|KhcjBsD>%MX6 z+!-Nlrlu3G2GyE_rAvC6mK4t=rDrW~`FKjQ~(XcNK$OR-X?=Dr?VA>aAwVv$RM4)tytk>D&nL1j^jTAiY z%QJE)9dj%jpPYk_kAyZy9gS1-{;tSrc;n)VCCB9YLX^~~k^W(XTvsAnBTnShfTRt@ zd^!E**hCu86&?Si$cE1e*^nh@P#G01Mwz1m6OaHGLqrkOCHCxQ0rqX;HL zdNK$=0c3{)K@b2&bopThLt#==n3jnCUBpB9FU zjx`5P1VF*e^Hv|&e(m*np+rPJG6{n-+qBk9AL>du7sOwZCh|{|<~>&QbN`MLe^GO6 zeu*nuQ3Vj&&a?^d7rwHEn7UZhYXqHf%Sp?oL{vsC-J+t&b+02&sK5v-NWzKSh^UH! z09)@qJV=+UA1*HYR=Cb6M)i4S(F0_>37Y?i-6Qs(bjx$KIO#oS0(A9@{nX_39Hj z$OfRV>@MhRG~o*|)oTnUJjQjT+-#1`scs=RpewZ(&Q}Ffvw@!exgv-@*>+w+?nZ%l zCQd0NhV#2mx*8_tiQzOx3JtNqCxv*mIF*OtCGkikq~A6=lamWc{I4Js_?%1zl~9r; z5SXB*rvL*oBomSeiD^_)co|M^8Yu%hk_frdKr*B&@Kq*)3S@^Q*|s212I#-Dcj`J5 zEC7Z2nu9Y99XWmR50ZQSUgmkzk-ywe{pn7RMXARqW^!`u-?5ml2WNaxLEEU-?Qbj1xs<7 zZzN*jKi<Pj=lR&GgOX+nh1a zr=l0qN0i?jdL?|*n>Ss*nV$c%v*%IvqeD$xotb?6{P%CLPv3RXjXz0Fe*extUFX;P zlTTMIDZIJ%58G3>7}4Yh{LH$V`~KzO33p(=9(;bbiKPos9=OhNj}EsSGV&G3QuBN~UAtlSF=RibkSI(-87LrDJSEORiKIhf z6eb89R+&mB(PEG%5JCh&0LdT}9e@Ja4TXUS0E7Np_rX(rjhq0Md!un+dO>TUy!by8 zJKwEXdDE4>=^pHOT0;Xoi;Q^KBZHe%D6~}XiI`5W(=L2z^EX+&XXr#uo>;rfHTLLi zxiQ@=}#2?BASZdxV<;sw4{nvrWm;R8=Yn zHVjBoMG$GxB?y*OR8T$>+X0mlfL!D}FsO82Fr4lFUQB;`uUQnK=Kt-s^1)a5q-AL zbPwpEN-`K6$BNmM3~-`(8;-?XwIRP@D^f9w=7gXY;w5BTTc5c>v4*Fftlb=W?$XCUC|*~Y zp4;o#ev|aw&pc|`wmC9)aN;j}e^q<1Bp0AS-_V*fwTXnU%~AvN!ZOIr=G9XKghw6A78n0*5@A1dN&P?6U^eOV85?sCR?^tBi4^l?G5o_9SdiP zW8S>>K2;v+CvcpM1B~q3+B+)V@<|@GiCDepN=MqebIVvWVz{~^; zJ=u499uZBMk)VM&ptgn z2|2y^_c-)4#(CGeef zbBpOR@6Sg>8H!1CbR9XREZ^Lh>-kQ?dgoZ7;^2DfKW;Sky{AnYx@V>QVM&H$N~W}> zsV^E}5Visk2@@?bld97SqQ+ZUhO?EWqMhg!c}6RGMUTzPLT1`iHR%sb1Qwn(oAdLH z{t0pUbPikww196siD0vvXreeu0&Rn1wP|?sGmDgYF%ej!&CjB3iT?Ko8XuH7;oP_^ zKdgCg)c2CJN`1Kbnu(dTudhKVw00ET{Zm?xnQXjv=2P?cH~r?VcVf5x^)CPIRagJ# zD_gv{Z(vyliHWB=j$Mj3@BLZrqshr%edd`P)Y53=Prgr%Ja@}4U-Vp^tx7)7{PK6q zfBDh<&xqG6M~%IaKfdv*aHNEDbZl-cXy!=T*Fsz*Gnw{CpRmdc8qSPY5iNN}!J=iU z8ecI{+te3Np(-C)wJ7O!PbX_vof>c`bIyzXXMNS-jH5;#AD9R#4cE^W8u(03u93>8 z)6fGq97?C<0%WBCBoRyo71;x0`1xlM1$Av%;!VhrW1QGQnG|+?i7+eIk)j+k57ywQA8+;RW0j=N_E691s;fWGA zIMKM(-Fo1RxN2j6Zs|P{>5aZ}Rx+j3owt~K-l5~=if5%dP^uDarV#jBhob=oVJiR% z${?jvAtk1XJyzTn7Os!H9uD5hqDCfFXD%`kuV@oB-iaDlxTdY&Y|hWOdB@4h@hpf4 zZuqN`se-S?l}|N#W}%73S=kMSM2DKB9!43{Sxax>y+@=+og5ahB_2*TU!OVd+Uu^K zTXgfW`JuqcCOFHN=H|q4y+w$%UlZPdUo8B|J8u-Pdipmt*Wokw{(ZAY|K?16l`$ND zrhar2o_yq?>?<>=hp&I~R$1>Kdoj59?C#rt0oc)|==YcY^{(NkzVw5)rA^Xt^1Aqw z{~4V*YCwhoeM56mHwSXQCLXMs9w>MmpU4ZrL@XbW&1AoV?yh`;JDhB5I#jHf{#0yJ zC|2zs%GInm6;}AHc%AP=pgNK-LM=`e6utYZ5!S@RB@HT>iYa>S%^zlSfUZPTA%y(Lv#fA{RW?`Fw7bH?tu3ok#VvG!7XS0xh*NZ>lRQ=K(C zW2VbTi2m;@zeSt8eS0Io+MVwH?Q;&dP^FI>28hInGP-2hWZeeg$?J4bCQq^|&?nd~}qM)Y3m-_iM&kW6iQ%Gfob`T-UYl zS z67{3uLf3WSRQ3FPNH(MHq}!SdI>Xhj>FUrKwB!?=(uBHvG%o_27}ZsoTt@A{l($Lh z%R^TZ2(XG&3aLq_kL1S#iWpc`+I@ZN>#_yW;-5I1D0X~u`mpn8JDpv6{fVO~chV*E zjVJH=U@(I=E!lqc(AP?zJ^w#nf0eI)o?qOjg&I$Fna>v+MEO-#;<_ z_p#zr8=ks@rzVcS5n6We*jMj6>{vvO9Xq-C&vzeu^Yiz;?p^CVB)n1ey@y6K#|_AU zp>Jv~>gC~_tKOkXxv_%cyDFmuGt<0^my-KJ=v|nu_eEnJ^+!?#*^`*LtR`LS2+!9q zKQ!u+=KPn?k)U@nuLSAv0HAkYGbsC6EJy0(Y=0~ns=a1=AqO3b4kSZ*5+T6lh0G*? zGYSce6RH-#m@F}qD!K#{jQVnnD&}lKk&Os?XQ~QGAOt-bL^7xVvV$sMkPSfZe11<~ zHmN!V2vVwE{V3A0Ae+1J>#_NvvPTY7tc@-h!=hht4?Hhq=eWT+OkQ<0|Hv`wa;moe z&DMKe4dO(UlxDkMb_)2|$nJf8wcuP9J7>FAjn-YtPM)ew?$j=C?VHJ_nL=63RjYD} zSY;hPHSUBVTA_`H4_N27#2k;6vd_fBqEuP^Lmb;~#;d)Kv*CK1K`=U-_Z|JW8!Hi` z-O_fQO>l^$9NKW~q_wVXs`xyj`~RDv4zcfC%U|54#~qb*zNBqIFc~wJm>*`paB-f? z$Vpzq^l)2|%L?a~C3ytX27xX(EYm55XFX=I{!Af8B+;URW-L2+MqIh+Y<|Um%m~{~ zl@d-$lHa;gKlGY8$t`t_vMuKUK!D(q{A_?h*b0DB0YF@9a{7w8+g8;Nk`$i8#*M?8 zE0*l>6{S>a!RTa2nhrW98=KCsmUOO7m~k!{&l3=!)EJnrWXMt@r^eNsi2`zZK+%B) zmiCm3lGrfeXj&iN{vGvYr$u0hR~(zuR&1F(DV%Qf%=BI!-Jg&8AsTDF+dLQ_f8c5y5bLaa<1%t zpofLe-gVC}mR(H4+Yhh)+x>5!yy>o28{6uRSZM9}arP z^|DAZlaLF`FC9XyJ)3kAi%i9UMO)r4BorOVApS`Kd`?gb5R*y_-~e$Ih(@WxH7c1B zgGu4hG^&`Bx(6sgiL@7zKoCiMECjd|5roT(ftH(}dnuC82uv`J74&eVt09-Wa_e-H zZrYNXN-bv8Ho*$Q*qJya`OGh#0^~$7tZ_78m_0nWsv?Rr?ot!$}AaQEBmLLBJTRKm; zAIo_N{)thX6MoPJ7cDO7Y}L^tcU_%Jm0L;f*vOfHLIvBkc<> zeE%}`;<0Q@wAL8Z5IEn->dY27(`t6-(aIKoaae{`6_@y6kMV{ z8(*uu&3VBvlo{ca^8y-nj)uTGXpeneM7mrnYFi8({%ByuQ-bxX>!GoI}f zc-pT303ZNKL_t(=4J322Cn~nq={3qorMY!a)P;25la=8b&!mAM861g(^p%$nuqIWk+ zzg~8rE#0)NJfEhD0822lJ)WI$?s2Oqez)ld&U$jJ501p@HGKv!(K(@r`9Ur64*eDMNP1a0oJ(>4`R*6Q?0(&z%upwg5qbg(Jk zk~0QC2)F8MV#!h-fEOt#9RN&8RI(^6aZl@}KTt7r9Z*`u#aiI#YAYbgb4LDZ${G*hElSb>UgNPMXYfF3Q6RMPI$*U6zAv$oO9d zWVZrPPzELgM5C0rveW=%1_}^v0J5h5BALjg0UdxM`)85>LayPEt^h8P%9IHVl$ARF zysuv~Z3-KFzC}6|?WxI>uG~8Q?nfrFM2-FAD!lj_B})V=Wbc@hIV{e!)~oXeq_KcY z6tw&O-}N+#r;Zn(Y zdckKMDidq11ARyR0I_;kZ|}op&3(aNma@M(qt&>g>wkr}UnG;24P65TFSTruxfUGg zyYosjA=b)?eJ96M0&-NMYX_H-ao3U~8}4lRQu=#O z4ToLZj90&kaQV$6OG{@)9=i3nSJyj6ep=JC;|I@#@3{S$4GoS%>M!ov_#^wkgdrQU zp(_`AN%>zB zBM4seCvp2*GnTh??fmGRU$!mcDUB_Yh}G#h8&(`gaILFQ~uIgliHdBCg`wz zlBEiqt9j~B8MAB(!ilNa@qSsXqo*h0fMviUE9>`+(v_DS$*lXvtn}`gRLP~W{5_W$ z2jA1j49mM*nkacWr4kb@-(0TRg0K|;rHnEf{7CU$*9)=*i#PNhF^x?N2IedMJ*DDR9pfjoIo6shHecNN{_tR^K2o~5WAYpO ze}7)pzPV=Su2=s4T6Ot`FUYVz!98tbQK(M-MAKD0w>^>?Yj~%y{}y4Y{MUiD@zmt+ zu7CK3YS-}3J+)`P{@B$0H$HiZ-@n`O@K4)*<$7ZhGGOSQ3mp|ZGoGuhbqDp~5hbu- z;aspLJg55X?9_>Ne_*BVwd3W*#?ZXzE{98(%9RCrB-2y3!(BZT?zv%jzRndX>4q~s z5((kr3l4a-yeFNi;_`5R5SO%W*ApHXkYPYJAj6RTGzCcns0a#52_$Y3NQ496ATT+I zBM+*WypRTTd@PA`f=Qec0$d72P|hUidfO9+XO2Z0L;?}Y()f}1QeP>1)9oYM4;%J8 zoY99b!t-y6a%i@rxiHQ}6X2YcS4$tHh@)Dx^xs(D_t#6O2NFo$@wEN+wrNyXuPUBq zx&oW~`1DebWpuhv%&3lI>=WKIkxY*Ht0V27c4)HCRoOFK25iBsUUT@McvH>M_8+B< zKh9+83$e?-gIynH21}cJ-i`SzfQwhC?+@O6g%y`QB)xl*S}+VJ}iInJWYhcOu+&X+_D8p&?Ojd&O~Mj5V@c- zHGEp}RvQE9U6Lvii^$78dq?S-O-J+Re>Eb#H4@XjhMxJ4Ys|gdDx)0U<@tG&ffEi^ z?;mH?7KE(;MA#7RnAmLtrs}efOv#2`Y5iIXuT*HA&vLBLCxiQQ&BaWp67LcEy$!RK zC7I}ghCad}0!FZUI$A0PM4$weWQSFfr7AblI;gHRsvvVlPtU%A>MO=`MC4XO?P z2@YpcXO|X@YZ@j8S-8}hH{{DUKAV`XY)FiLvR%II?Pq8R{T`kC$L<5xPv&Yi;jL$V z7Rm#?OW*qP=j;ovy8I{FRKu?5p3ji8@)PrQvC+sMu6gvvD);fnvM%+8-w!--#Y0=# z-5&+N|8w8(9PdQ%FPXU3Eo-_j~eO8;gW+@%mi>N=YhQEn=`gs9dh>L|VZ= z$OZt}klngz;Qt}x90?*U1rTLMK?x@&lNDV`ry=7U$pj2QHe>_m$dX7U8^Pu()~-r2m6;m+vqF2J96F}Wq)T4(Np%3hveSbJB=2~dzqNCM)Xl8e@XSaEjB5VST#M+)E>Ol3LhV6*t><(sleTJn zkwu^Omuw}J7(RQvmO#`xmhC)Euc|%S`NMqf&(URhp7 z^sArH=VhmzJ9(h5mJ?zuS-tlJE~+2L6L$K8Lm68%W*%K-JUf<88obv%Ue-C`C|37o z_LslbTa;}gOSW!Q+*Pn`*rEZ;O9WjM4Baj0k|>Q%c+?tef1zj-n=)FqyN{f4t=Mq5 zu<di-;Vkv^XQld?^4h3u4ZN zl1sFd1)2G(akUDZaL$`MPNn6`>-J^JQ%my3Wufp;<+QV{Q0*+< z`DDU1CwkE|xa86M-~Q1j?>l-vJ$RpTm;SfzuRS95|HXDBd~0Cae^2xX%X z&IRhLyxPPF8hp#j)y{a#<+qaK36HU%Ia5y~#$s(mhiII|jYhtyGM!zm9_M6!eD!U| z%WB3wm9JN#L&*>>z2I!tQIzO(43&Zc+?3l37=R8y(G?v~;4&rrA0|F#;QvoH8f3*p zF6J@t|02VHY(SP25r72|u$+&NPwds4oP(4bH^vUnt#MVdx7;%D!HJ5H6y?ZoH{-FB zh7c@tRXGp$FQgDDh_+NeP$>LgRKW+F++~43{?AjsW!lc}Y|rjychh?#y#XPVNL4K7 zspn}Bf%9cYv7LIJiWNmf5CjoX5EMipfsjsmvLu_jz0B5`oj%{0Z+V{Q`wyJI`~Cj! zcR0pf_LI#0p;pn+SwQk*f51!oF9oY7*EXHCD{Mw(C^NpmlbH%PQ>d*zNN#CvP6s-$~ z4!R#-CN0&31*qE`8kn-I7#DB=FYI^$2;MY` zdZ;#8z-3&Ue>(2S_K+0RZJBZ0(QgQ!EKS6hrQBO8hx*g!B6Vu+tf$&lA(}Jep-<#a z6nYlD_uT$Kqkg!P&UBw&``xc=C;x26QrXU#OHY05B6;CP*_Rrf_v(q!+i(_-plbMh z_PLcm_-HJ&sqK!QpX*&6acjyWVnu4uwoTO;y|qwU5zSLGaex|wgIC>fs#GtHu8|pf&v+mb zY^ZWwS#BWyP7pGk` zCg!(hF8Rc_m9Jd2f$qtBk*IFdhsM8G*}zX-OdBJ2KI*@?zu&ByT-ChCrGye1#^j3j zi|yAs`z|$3>Jg$y&s> zd;vQ;?8Ajk3-;tIJ?1Mt7CF~{-+C*r@nC9CUqLG2x~=Pvbl|$^Fn<{N*G?r#%s%t* z68EWbt4MM)!y_IiKJ}4q3-2HL^xCp8sLMmd^6hZIF*I2YDg<hAAkt1(7{{#*!=Fp4$BB!K#4rJsAZdz7@v1e%@7p0I2d2UC=)nlOWpxP!0kN zmllzdR5e`mr|km7ss^U*)!IZPP!6C>``m_nt}T^bTI#Qj#xZv?-CB8ubEUWvEy00A z7Ga6Y44qBELa0tL3iNjB!V#3~k&$mAP`0mUXMpi93 zKeG1u6I0u8n*FA>;>?-i%L^X(G!3{PXc$|+;!EY{mwo$FvGDV;AMbqgsJ&+d?{MnY zD+f~pR%2s0oEz$k6s~HYst%2hQpMe&L=2lX*;0|0_3}`fnx%8Go3dKif>(4WD`|J< zhi@A=A9khF<%pVQNO74`fF9Jk#uqOvMXTQK1yvI75U`xOva7?@T>v^R(*RH70Fw|f zBmgFHnc%{x@mN0VI)-Baa18t}2X%(PJGg+;^u*r5jIS7k^b1SGp@~&yK6mG~&NnXQ z{VdhTe!mzGoUvjJ0}JZBR|YBps;IT_J-rvak(vne*n{yOS2p7K0K@ogzx7`{&=ss1 zTeae`(Ru~$R>h`P*7wieTG}&EJMvcKcK!HhG40e;4a|RELrFs{*)`Y`V*qMr?7hHl z3!Y!{<)r<1GFc}l-}5Pd=Lz?KcU54NDaC~*RsHGVudXlVX^T=SJ#hAx+YB#GK;odFJ#D5BB14s zAoYbC)Lh*!7%Wvlg#K8zl>r5zm9*JMyV#nwr?OYx*&iZs+>+s2OIdNUlPRd=n7(H! zLN$*8YZjaxj7gB~xQzRZyQ4+gBx_Z5FmWMW5T3RwR^jc(l3ayZ2vn4FLvlv4(3UDJ zDhxD+hUlXHQd{Ue1y_QqX@Q|k-V1Xl=%ix~7f2hSI?D+*7E6Wltf&X@z|x~xF&=Az zqWPu4^QGu}{Cje*2+cUbt_1IGb7GT)bz=4UK>J z|7T~nD|JUy|;rraJ#N3<={XyT5? zst!--dhtTz_2Y&G5JPk}X=B-?3&t~4mu^Qoxnx>a>JdFq5v(wchV-8bD`yrW!F|Jq zM+GjE0LmH{TmbO!KU}~u@c+fFiO|IL5(?d&mZn*KsIq$up@GP7goOBBT+AaBZxgJ&OEaE{&I>?)g70bRQg;m^83@{;sYj4rr>*=pJ!aHZ5-P zT}| zs?1kz@m~6qO2IYPhX=|*#uNki%+aB!1lbONG07m3c@?gRcp;Y!4uNsDjXj++t zxfvqXDcUqyo|z~tnHp*i#P!)j#ks~g3JRq#UCQ_*xxr{Sr1>U1FF5#| zxvWA3jL)mi4@jkiV+fb6 zEOq3|kxJ?qzdiM6Rf9OyWntvrALF?rUD4>oRna$y4^65nzjQ^-g~coAu7Q^RL-ijJ zN5?Z+)K?5IcpoFCTWy^=+EoL<+1S4GV(7i-UHXL)=MUMz8kP9izu?u5i4I)ba9ReK zr@$)c@xn%2mK-X% z^hKe?I$~Y7sHU0`D`GluMGc35pq}XkDbbX#JG$h|wNs_Cp<|ysU<4@#qb)UW^|3W; zPfu;RqhEjFw1to*vR}V8f9Mr24mZ5a9`Oa0Oi3DBFxpomLAC?F1|Lkt?SibR*L5c_ zlnF@7`sZDLnO-Noh#mKAU6TVJ_jg_YzTyA;)w2VM_{V;B zxVD|r#8($y%%3%OR)71uZuob9Tz&k`sb7R&tA6CpLUeE45B~b2g}e6@;R1T0ag*zM ziMZ8TThh($@d&OJw7Q~rnhWp_Clc6JRw1ff5z4x8#YL>GV1d!nsY2W0LdIcK4~L`S z%DTUI1vf0>;C884OyvuutQFc8^mMYTz?3h+6on+dFm%Z%-oYRhpg}4?GDyi>T|z?O zG708zKzN#0Km?kn;h#Kc@HB1+fJ1P=iT(8KU;>x9kp9Z4qv=(;oxE?&$sOIcn9x}A zxrOq-&gC0wMpjjezl}C3P(UnLJ5#(&Ev4tK<2lh@7v zeW}*bm8d19wH4jV78-93H}t;M{2}k~L_X~_#(L*|OjXM2BsEWiXD{41uy5+FO$APTYMLq6bSpHeDXL6W3YbKKX;2Q;NPe8^F7Zwz zU0;F;Sr%@7lmH(^7r0gAN+?HqPD)gYG)$KC;+u#cIRM?1lbON zapCJ?)GkUV)*@0UGfK;{g$EKDSySnFfq-hYPU`h>+CHQf4yC5^c(`fEUTpVp094TF z74k#^VGgGZEggfW0d=+`n=+0JwkS&N((E~}R_5cO=>RRRAr~|AuNv(VXTl8=O}EHP zBgHdI(*t+5{JszX<HnIOo#?9CVgB+IT+bDrGZGmCx8Sj%P{~QmuXyZ1{jht$ryzK z1)wyKdSH-DN%h`mZ zbLugj$QcuTF$uCA040Wya$HYFgqmp$Wg}q}WrPNKNkFemRE86kO%w58ZJaibs6}Jr z(@DI#<)T*0IFux~qf< z|97A8%(vL1&wTgekDus;UNH1R-DX+BMBHkvQWJV1Vc=e|n+G({(tBmSu1AB43aU;N z-Pl?-l+-nDRs_P~aB$ZcZd~k`rIMFPS;^FtwPbB6GJ4hs>LFb-b)9Gh!`H~(EYl$+ zD2NNg^_?!cOy2cs)NdinV{0}ZT7)p&lK33}U zssz;qtT|JMa%*`h^^Fy|DE=u~i+8tseZ=3N%qGiC(Xr-R>#eF__V@M0p_tO( zEvb7Y9=zSTwDptSMyGV6N-b%T=`vH%S5B3c=@UhZQFH&)zb&4kRV9`>H#MYbQ1;rz zZ+785#z4gbzVp-l!V5a7H>SD2y)@;h(%Rq%04X56Ma4adPi|E{wcRG62y;y>72w2I z#5OA(&oCTQ!WUY3)F+CNj=~W$07eyBY7TaVmTx$fx%S_O%|G`h@cNGc03ZNKL_t)i z?XV*zzHz<#=1$V9?BMFmm`6bcrJ`=>Xm?D4YzII!>HA(HGS=pI%_Ua8s3~aBIdz57 z1+Az`X$5ZUb;FoGj1}Ffb~zqvzMvYs6ey^2jHXefvj`g{0ghY>Yoe|IJ&@&3dpWIrrqtRsdxy{f#R+KKqCL>HqkZJ(|1sv%k1yhIqSsSI{jV zZQC{V`!AQmU+Y+L^ybc^jenQVek!e>vYvn7`}f{|C;>QzUTEB)O8!LLYOM}UPVdPY zv1NsV#{I-G7@KQ6r)?oKe;9xcjWfK1L*O#W zxBwSifCGZVY5CF7kvNr*%A?xC?1|h4Qq0`9YH)u_hzW|ZhwAXaAPEJN?Yi;&Sc5V_ zxjT1RS69l5R#2z%kvE>Mu9wGh8lC*iUH*S{loTz$x8+G1^EBl)=8aW;eAArdu~hA; zQ*9rz_os_f#p%Ih;|-04YLmZ`3n(zDImLO01|shlZ*Ti}&usjY@)>}??eBFtrAnA< zy&ai5S2B zy|h2`sq1X!6}=fHsff;{A0W0L@_HMprRP&BxaM%!X;&vMIz}A;Qos()IMQXTTYo%# z&Gz2V-+D7{Fz-!#`v!6F6?F-&xukm6L`;T+f?B!oQeRAhYzM%(pb&8voLcj&>THim zoh)lGZ9$eBwqkScT)EKM0MotoiVH929iu`S z|Jqsm>AOnz5B*Ag=!v=7@oebvOs4JFV~8k>CLWo-C0$aq5YFrHV|J4n%R+09qoGtHY`* zSEZO_Kzfn8f1)B7(Pj9NzyCGXQhMv+NNw_`U-Ex*y3edDZkA(&E2Dr_BeN_DF zKrwFI(Eblmp#V{}Zsg6(m)4F|>H@iglgjozsVbHp@4|a3FUEdc_MbeTbOToIO&yO8 zrpDo}nAtH}Pr1;EGb_Jm>W*zWQycT!3dC1_z=vJodH~BHX4qve;fnz0C8wkcWITRY zV-irtm4UZLuyR#r>dHHNs{h=2<6jEY%Py-^9W9qID(6G443gT0<2*aU~S>ju+_Mta#v)_&;X}OVD0FryJp$N3%qlImt3|XdpeVy zZ1WCg{7vJ?XLr&J5Eq z7gRD~n@Z`LWreu(HDZN4pD9oP!jO`x$k1McD+U4y=7JJHKuMo4;&Gu#p8&p(f07Im z{|~_hIFn#_7l**RI0U%hfB?ADdjA0@PPw9#l?zMzJMtSHk^Am~j=xMgUeI% z=v*_ta+bFzRd?pt{10Z1Bs00&xW7h|VqCax+Sofr7 zSjE>lew`ie4XP``qrxLnx;|f?*q#5(HECTH13_U^Reu|4d8`9xAxy=Z(O zDnYgbpu`8f3CqS@GOcHb2HUZ#X+^Ec~HY?})#sXduuZ*jBzUH%@& zfJ>hc1;GI@bZFjXZp} z_;vTCdc)dcK0~V+@x2-=G~XQEQkOlNYCL>q(e3G@$yC8<(T5h@JjLs1G;wKC3I@0p zGk3&m-k=?dr%Z>09AB&A zwlq-+M@u?)9Lh~wj3>fDPjXtRtaJTT3V4uiY}Vm^zG~&^#0{V9iv8_U#)FHI?``$p z`m0KqUTHz~uCb_w%hbw+eV1YqWIF(^s1J9{wRAT!GhT>BQfM5_)=}HjbXbf7G>ml2 z9IA~BMrU<;Rjw5cB{6qAODU*ws)J{OGK(;z0M9uw51JP!Yo6O2E9nYcP{Hf%&bvPN zIhQkd!KHnf!q$05$K5mY%l7IkrjDCuy}4agpE+e3`%1imWJoo8J&s_%^W^Iz_dK<4J9#rvDn5Me!*>ef%@^Zsg~5TVo<4g1<1F_H zT9y33lie#{1Q2h*FNZ(y#^*nN(E@bME6iJhVktT9g=d9kMH4f^o+{+se6g4vF}78? zg}p;ME(=5|8W%8!P^Ziyp3tN(iB1%#r*X#zhb}GCmqhtG7|T-zMY&|biziGUAW)LI zI^HEI1PB$NibE*^>ifQAAb>A8z@*?1gh_CZ1Bx<9d;u=-4h{h>0L~s^a%&?AwvBy*!8gx-P#qAli(~B3<-bA+Ct0yV>QFr;}!+l8O2U zo_W2xF?VtT#r{X{R1fy_RtE~#lt(}eUudkKEw3EAx*~mkyx~~Kx*Ll7;$H34DAsLyRpD}mc1maY*{!wVL>!E=Cl`jqY`900M3QtrAZV3DRs@0L3qmWpebF^ zGBih%2)HqGs6IGkG>jqWTFoqO%pT8jT;@bW7lUp>uY@DOg%$Fkd4aO#AsnlO5}=?w zpHBSvimwSBN(Jgtw!LZTo=LA~;gndmwAjJV(rF3(PV6XS%B|yR<*xJA|M&VIJpcFV ztv|ctBWuP#_Op)K3-jN*pjZs|h`${u-SMILy*-C)*ZR%oM?OW#;eYg1fCW$YI4}c3U0FHMlMu@~aSZT$5 zJBw+;7#9?|h25RjT3IUpxasge#)~v>7U`RiUmbVqxwD)Xj!0JsC}l5SGjJwJtEyl% zKKRt`@HFR80<81#?fx$Yx*GM&w&c#R0EL=vg_miGjZt@hq2bWmSKgF3kjNB@E23}D zytY{fr^AEe!c>IVk;WbI##@tv|9)-%_zTulz#jYNjq0}-$NDR7$Ry!ulCkp4@?LrO z0!K5XId%3-R}JU7+upwSLhuH&r}=wRfv0=oK=tXRKS^nY?Cu zZ`I$mOl%shX z!m7vxg#!(`sOH1fp%EjTl`Uzj#UJMj28*BooND7D7im2#g{N_0g***j*x|WQ>rhn< z1i*PdJ8kUVdEcg-cnCL7hcelo#gl5|!sHp_gw~v;H&#yn)f7dmHL@YF?+3;= z=x+|b(6si4pIdoHYTkE;uRCYlp`(%yu;2c!@veVOf3Bm`Vx_0&KlO1UcKyj|PL7RV z`v{;i)gO_e$fh3+-uNWm^-H(vWX;8{oP!8Zh-hA5$?%j{SX0*=P=xf4Ms>5&)FtPN zdrr_79`%`&J~g>=L{Kqb`h@$!6rK*t@P%nBoeSo9nuanVrt6mt&1Osmq<-d1fGP&X zASDA8N>lKm7!;E}^C+ld0>lRsP?Qkg2k}AR6G^FJkcvX!U0iSosyGBEk}`+mGGUaK zeBsF0+d-j2LCl@qdBNIbImMqhz4gk3?d7SUyCLo!VIhK5oIIClRg5dmYhT!T)`l6= zi_H%_aWGITjwONdUwugYv^UthmsGMk;O^a(y>_pYElh9hWr9;%28ee)XiP**a}ZlNzhqkuDO(}jcb z^Rr!QXhHBH?VQkO~IK@}F`?GvGD~pR~?zmV-Ddp~*`Mn0(ueksq%C+7MeX&q=4W~H7*(`q7sgP;dq*-0q`^{+B8}Y zL>OgW!Pz+Hxi3$@pXvYt^Sh_?tt~rB`S>i0Ua=r?LhEP>rz;mUtbb}xy*sTOj;=cM z!{NJIYn{INi|_y3j4!5c`_$9*D)9RX5i1VxjvWnaHrxMkurKehXWM`KN#gB$HrJGy z9Qxo-DEw^YmU?T!HQ&x$_nY0>K*VhGf zsQ}5D;DXp(2~eL9U2#FF0({~!)AB+Jgm{!GC4F7$M7d1*oJhlwgh~Q#he1zY3P>;j z5|EVpK9mnYa-R}OC45k&m}F1@2o!{&d_egeAW8sSG6)XA1&07MUl>Yq-v_{yR5~fG z|LO~LUv+>HM%}9R6TQyLl578D+M$=neJ9IZ{dM3A<6aZZtq$bk<&dHdRnlG6_qHWj zt8UHt@5hfs>Ix^vfy86C$_FkDwX4$Ai8RrCkAsdhwuL4(hRJie#e2`)v>|nHDwED_ zGCHT`v?Hl0NN?keH-`0VLcI;&&*Ar@BUZ?AU)J28+IiuX+pBtne z>xQc=t*V)@V%DHihXjL!VG>Vse4hmM)5TyY?}RxK<@nSAFv_GC+@c>C=Bt)pnA&vL z5Z&36uuRD_pW7<-AEx8bXb?$H(*=|cYo2{RDnYgb@CgG-V?kjd#BEbSD9%Ae`*>D2 zsZcgd2s_MEnwhJv7}BOCtadpPT5!SUxXfv=I%7PG0Fwe-SUNO#0Rb!4Fl0DJuq1_7 zw<+?%530UrQs&!L?foTfQ}qEgo|u);ubnn|B6uuTJ2L0$zW4vB`$FyfWX#<-{1NvX z*Szn)&n#W>;12`e>bviY$D)7xQ&W9GB?|20mm*Eio1cDZGH)5Lw47Z$(V%W_&?ZL* zKl7h>$JLvoz4I?I!^!8kQrYt@3ly_Y6^xLjyAF_?yS9JSb*>E02@pqtKr#?vz!!$c zJeO(Gr4)qtTO2skB>zVLP8Fhe2U}S2)u&}&Kz*+>7FiqLM50~nq$rWf}KxXJ_M+Etxrlh9|8;=J$u zZdXl9=Hvv}_>(t^Uw8F15pmtX-y<48(N<`0;>oR5+1Cp*_MN+RU1oQ(kS?!6&+HrP z$}qIibB=`O`}vA#uO(WyjSPQb-N@JrSx-zA9$X=RIx&(qugqNF%H=?4`RGCR`PraK zMu*S!_0?18Dm-Uzw{er!*Z94Z@%Y$?V-(bjdj1h7@1f8fI_W?`&{FHVzSo@_R!OI= zH6Qbm@RjK+rXWoY4NCa{@y5J>p9$2I%6hpsKgyL*}pLh&x; z1n{m$-%cqgmx7Ae^30`}1lbONF~Lc8in~HaXfX^3E}*;aYWos6Rc2nVu#0YxPSwyv zT~%+OVWQmXO+;pP2~Ys#=G2)MB=oQp02h|7d72l8)S-&0}ocTi1L0 z)9?MkuJ_`OOS7}tywkX(6KjrzZ+uRl$X5QX73f?tcJnp*sd)L$ujUGe<@tSg&i!%7 zw5^hIrRTD|F_-G)vz87QdI5$LTQxA1##TNvL?m^o)ZlxXp_I_663U~1P$4MOR=V(l zWtcV-L{~lwlzmV4G*1!=LR?>&OqkLJKs4XylF%cgM4FIbfU7GgLg6s@Oe$STN(6&a z6aevwWD-#8sB7~NmV6;(>iZobHd8!Bx1 zi>d>=25n@yTlpfquaA1o)Lx|cOIC|dG!WUexbys&h_$fdobUhoV10AySQ1G5@jCH9 z?{I^pS6MqN0}6sihZi=$+7g;NBx`q{y?xW@o>V4VUWJ~S8{4FeXk$HZIh0XFuk+qW zH*Fi5ym$H7$aCeKU6KCUTK}gbX)kk~|FRLJF4T;bCy%l(&NM7(vdJS;%!USs&ppz| zw&^`B-^-|{CwhdDQ!j=K59CK2Ew^CNo}PMe3N$S1-ecXgRBF>}-HL3Q60=Mq4&j1v zB6Q`$bfub9wq7=T%9(p^xIjEAZK@huj}P*-OE080e!9PUG-s$OL9$m*b9*i@Ssz0NDWZ z*2sWm-Wb^H54vr`6>0`~27H|LrH!Z+TRoDAv8u1e+KGcPQ_^LqOi z{e4ZociqU8MqSOPynA6oFnx6>vxi4s@A=m?BfC?DTzZ4-pSii!nwlOSjrWL%0Cn2^ zou%p54UK2S}!T(1!s{r+0@;9x4NHs(qMK9q0G8{ZfF;%sJlemHSq|40a6xU%8( z{@}HIsP#V+-m~!mRpsL`J^#FB%j$|$f8Nt1;3KmzwLAOX71C>(#*~$lCMS{-P#8J| zh4LtoUYtrWC4EDdCdYRLgC3wl8`$9yW8LD@nf3qCSM{5tu1gi2`u_}Ug5xr$C4Q+zr1byqLkp~y`nhJ{+Yz0IM%-IwX77IH@hvl+RZ%teje*DFKirPPJ5Lls z?c=GTWzT=*x)~o_dHruN>NXz%x})_^y%EYY=2|Y`O4a=zvDmYj>TrZ|Dnwm6Q~K`M z&-z{HE-=yzC${Q>WnmLv9CU2T6Gv_b$vQ;bT@)u9x%bd5MZ0VPZ+qI||6 zl_Jok3&NoZ$q2Q5gL+QD=l0&L4lqb20Z1J#QDBmSKvGJipi~myyuzR_(<3!-5s zDSm0pZFA*%eQFn{uHs60>ldFsm(ruvEW7^RpPs31NFEz`(>?WyJ5 z1#g;~IW17UQZMWdRK7U)(ai&|Wu^+{Wf+{hsb(xYrzUYGt9&L|aav?&q5Yb{iSNuC z?%R{>s&di~FSi~{3@0O7CJ!5cqF&3ovxg#It(}ZkXQG)igNbUUDA0DK+ql8#TJ)8% z2A|CRMN`tgqhe9)86=Z^q;tdCBI0OCf4jrn{mN zWINu$0;Np0C__}|cCiG51-V(GLN~J>WTjHjU8la7Z7EO9mP3JtF>iJ{Rn>A46pEtU zJT_1#(xH%$!V3sXcSEp3o@T}B;#`CRWg*dcg}?LHBTrT+rIlw?bk>$O=?AMjf(^2= zZb@gq>{#d-6<2D(XL?z6`^4lGRr2e9Ke}+~lh3|){m;MK_lNWkKMI`PUaJ%@T)Os| zJFlzx%-Snn3t47tc|M&lcV-u$XiI=G1Dq#+ST*)!uu6wp>mM<<_JmT8oc`=602gSU zChTCiD{nQovz5fAf)nXzRKRCJmq_jlsWeCf5*#kL)FqTpT&gQ73|BD@YD?i1?pn>e0WsO6CDxxV@_u*3^s80!X3H5#ED+;BU1oc6PM+q1dRaE+lGDt8$ zGAXF<0~`Pq6kKrS13o}}C@ueyTc2_FG(&+Zu_bi0*IZZ3vxiwOn>V~bKvq_l1D+$o z%zApPYKJooP=@m6&OLW3XI8~zVb#~ax38`~-BWVV_1d~hyhA=YvZr1Pa7R@PEt#iH zT^$)cZs>m*ylMUTiPU7?S}jHvtS@*otIgij%U%%hs@^lUH{E*8(9{oF`_J!5U5e83 zLrVsqEOuv_uW*h93`M1AYCC@{_~pu6D55HodnbgHOnTMx_w_`s4fM|W>Ud;GPZy0I zwO!b6jF;oN!mK%O4K^|Ws%rM|zU<8lq1CshD~jV41}Tw9o{Y`( zVFj`s00VrINl~3r2!$-Gr@9YM2|FKDLGGH_8kQ2Zp`O^<001BWNkleyjsJZ!#(U~iDc6>-a^~JuwPIELz;#HPP~g-rG~H+ocw(kQr&+`$d+nS3i2<|fyPFGv3n|MTFC$&wLT?7UPz zH`x+@boX}$paTVLX)V)Sxj*lNZ#q=>m;jORm?0(iGzmMvZAOJp zG&qbg<#WMHP)buu1UM-g6mWda_Y5kiX6-NNiU1H4OcE#x>JUa9Dmkb}0Z9n;B|yPl z=KEm2;=*T8AVe|=245+k0fGaBC;-%l&jmny2#w}%-}Y>CM@>X)cfS#|^7_0s@Xf-9=Vh8MI5 z*=A(o(d4VLG7xfeqowlN((vpR6``7_)_pOdF(S2clYBEf`|6>@{Y}G7E^~Dq@D-r| zetA)O8h3qWEgm=;`Eo6YXNE>E9_+27;A(a4j*;NCT72FYhy7m-q-CHO-bt)4W+pty z&C>>@<|^MuacOc_@wOFI)wWFrooon>p+sfD){G!wjz*y*EoL=efJ&Qb&Peu6XkdVT z71>>kwYLstulZzG^)vC2obKw0Z`>qz?ZZXo(kn>D)d0zua$DHBAV9VQV1#)plh&aW zg`;^aZ0m|rrEG2rKyaIzwy4Qlt=aTEXSk{=PG=^wwKZLU;(|LZJ%cSsgbW9Ok%q2W z&@~Te#1xDuJTqBdvU%{W-=BRtC^)s}s7sOd#mSDyv3X5rg6r$MrqcbZvV|+Aib_f-sHH`%b?MVS`gE~XtJqo>tXLOZiW>?diU=eiJAnj3LiWsLCbQ4n z_uSjHopb)@jeXwFyY%^f;bAHB5aq;51hOTR@PL=bD;F|9)C@eFbW(^!ONMZ8LODUk zO8|_(fSeZ)9weNEBQOC@2rwYMa15}r3KojVD(QrD9)NVhGhy+yH{Q9j*BMqe^k(tJ z@$iC$cJUh{tMT7`>+#O5CIjF~wXt(wd~u*yP(y)QEykB|SZ z6Q}Vx8=l_V^O2=#s}E{=cdMyL!T8mW9xq;}Pi+3^l)UZaL{zF(ehl;Z`ID)Nx37HY zL?j&t>E?^~KDPM!Z5a;_r7mxk4NGL9h1Xib=`3_MNG4p6NavGA*gw9o$WnqS(aRqy zFF$@JYTIu<)cvE;*?P&)Ja_Q&-hL(1mpJtKE z1fbwl9u#0KK;C*0iqugAHp&n&ly!k62AM!R$4)Rw;|ocs8(dJv$y^B-(z5YbQ;*Y5;UpXrga-&LoiI?afeRQO(t)f5ybT?D3AymP zi|*efqF8#Qk)As;LMv^%`#<6e;s50UQNQ!)?z3zn;l!dle$C!z+9mER)vx{7M6UwD z3ul$c9R&~*cfYu`;6PS+NrrM^n3L(_S_!3^bBoEg-3 z;8f=y)n~W-W!2oaFvcB?!jDn?N^&CSc-zVs$GQp79IhXGFni+$Stty(BQx3;E{RL6bGm(C7ewzx2O-^>+z?6rf9k-;Zl+B9%%#ny#ZF#F6WuKMSr?>(`z zTpO~?Q0Yit_eip$-B_k|WA3_F{cPflsMe8X?L-v^OtWPClEIm=g@N|qhd=tMpB*U8 zEhV(3`+DV>Q(yY-zg2-O%qrJ(RFNkq8)jL5_xZ62G7{1#Hf58P;NeK+(nQ{dG54q> zRz44NjXcN+AhIA9PB{n|CKQcJbV;PjItpO5s6Cu3z=H%^NH|H(3+n{{PaeWc24Ddx zt&q7CRzbiC;f3%54u9YTStXtD!b8G=n7oH25CEIKZS8L_DCf2qh!bPyihEAl4wqG4 zai)_&ti2P0bL%ovEw}7Yu+bXj;a@0jHZQ!G2K5kG=gXHgawe9=ycIf^ThCNjby9a5 z2~mw*4)#(wCHF7t+;(o5nTnH_H%HIEVxZmub@XuJr2y5=;Cr%d+g@7yzMg#ckz)&; z@zn2DjD4>@Io5l{p$0=b0?ygLXW0iT*7q@0k3Kd&Kty6g*L~%1iE3>6$fUcYdcuaa zzW+r17JEEJ`m){;;n{+HcHY86?Kf^^TN;e4nGX;pc_EsJ6!;Q%jb(nCeEEJFI(Jx(@VW~PL9`<&8I)}YV+96d@{aFHL_|uAmu!QY)Nz= z1jG&i5*}i@p|X$(1j@65hE=R`ORUJ2xmAqOQ#Hf2_WHTmCB^Yz-N|%iVZVd|*bFyq zI@K<9WEo5$X;yM*1RZXCW%o3yJ$>`eE|0cP?ElUWcb9ysXtsQ`i|736PgX7EYI)jSCgF}7GfA!N_3!lih9Czamy38s{*KYh(p4L>)(uHOD z$XnEd)Fl_go_f2)?Y^k7;zZVmyfwG+xGRxyqeqPC3%2^Whos1%9#M^ydWkgSP7?$2NQwVdk`#%fIWLb4yVbC2!B8x z!jp&hj@%lh4ZN1lAxJeqvNW55br7m0MWw6^!Q@&mfuxbTIvckxX^w5ae3=hi?@Nat z2>@{U+GEf3EgK$>zeiJhcFxWh-k%jVeq& zx(LtG$N9zwm$v54t8cq`taSU#2+L;w_fh?lJ(Lz3FI#*6iB1lHbn}J#9&5k-%sA{& zJtkp26*2l+HAW<4TpV1;0|~8z%yK>GbUxc(YqiZc5t;mlqe{hgFCmngCQVx0B{!4lgj;DqyP1PPkwa0CC zYYvZm?dQ8SgY3D*Cnm33dARe%=UzYm;0>?b(^#{!cHZ97*)yw-9nG3AyYP(RjxZj- zXZ(uiCve5VSsT?#QQBQ>tUT|VwA$v=>rO^#{?-Hap?z9Juf2DASj<%hCuJ>P)vC-^ zH=NqOSsBRGdF zLGE%7rKBrWmp*giLn2eu7K(|Og!2Hrmri(%T=+cnl*oXjF@==E)amm0F6kwl7v#JY zL@8v*qBVvkJM&HGvUk0UUH*?de~+_0fkzW>uU;(A()kNa=*noODdi1OSsX z+H;k+&P8u(>}w#%n%;7T42w-5>sdWN8)w@W#?HTTK<82Cb0dd}nnASj;Qn)tu2Bm= z6q9>*wHj<1w=b)HDLqmf+}4D4nG zDMyw;A|_jMN!~a%1%{=Zw)8(bm#ywER4=({Pv>vPCmLa5C%<@&-}Qh$fCgPK^vtnN zC=l}DZ3~AAfY<>*T1c9x6<|?}i8u?hu$&UH1wa~U)FP;kd0H}!b>r2g;Z(Hb6n8C- zmP02cnz9)1e7iHutU-bhc5-#m2B z^&>yJV)F2gYkqk7wWmI1N;G$A*OC|SJmcnp^6Zh({Rghr7f;<)KI`O|>O-trJOD9p zFkQ9uc)H=Km8bHTFKj$|qN6jZj5pTKhazZpjbT$O8f|W@opr(-EjHdprhW^3+v&_UX19|i)N zy>XFe=?YGm!c2bg{$I{_cj~a*)2dw-kd#h_KKITd@-Qk1g?GjW6b6%~vgO5F%7If_ zLWv-%P1@9U^i5ae)>RA7U-7D5L7@H5XOcY!$(i$yZaKcLwD6ypeER9>cv-UN!H$z( z_XpE~3l5Acu(GOW!@lP>eyC*Qq7~DVFUY^kn(Fwc~lz?Eaou z_*OnTtMbjAhn)c&BiefE(d^A<*NrTp5SYa3wy|Du8bUGm27yzVH;}N_6umO1TE8w8 z0Ajr@d~8f@-a0zJ{pJ^Y@0uN(S9zX%eG7IzYWL?>pChVF(n<)?8Y&$K0kH#sgp+z`eG zX3rnJXV3lnX2O9LuQ_zJ{FhloXWf6<346uvhF;w|`)<2VHQLJ$uH8L&@sU-JM>Lov zH8tGSd8HMdz*)Ba(&iO=-+uL9e)Nm}SnpqdJ2`m6=WcFb!zcg#H&YtOv*oXBwyBkz zXkmR{--b4_X95~Fm7zqA<*Us2>h8q$E`*aE4J{vyPm9qkoWnyAe zu&Q?kODA#mk!F8u)+Gqq&^dq%2(wCWqg{dt!l_CZ+U>RJ@eQ(pzCPIzDuT4C*uAUy zcel?dmXzEe=XEtJKmGi*z3yzauqOESsdGQFYT}uxV~5usyY?OFUB9?-?UH`;>=jS{ z6mPy~>tR`IY(A0T40X}%c}Ia>>``?56&L(^_*B{zrR6d1424FEM=$9bZN2#U`LBOr z@ZUas%i`FjUwi-YvCqHZlSf*w`O{DSc2*SDwKYyV|wP!BORvittKC)YW!1|m)tW&R`;IfeNN(Pw<#d&1CHb%iJBfJ*S2|ADN zDmnsr4=;!z;BXo_DRKqxB{@gz3Gfh1WI+rUct;=>r5}A?? zBo;pWXVtKs8nmYSRtTR-EfUC}8HP34(G-QGATmN=5cxddTA~VMSOFzD7nO=EQVLUO zOL?w}88JC8;Q|BI=zQ|4C@SItBb3@Kj#-uUubHam?MugxUHuBt1=Dfh_+p?4oSu2| z!S3PSD7g)_N1mS=9iHc3(#apiPgd8QJ8c1QB9nD{53jqP`w}RT%sn|3NjO~DasNd4 zN;-b+yGD!OZya&5zT`V%{Kw`*-P$!PrWzqjN20Z6;oG%})%7gJg{cx0&G_C)Mxg{NP`8OW9 z?wa|{$(Wh=Y5Jxse(}_aM&F9-4{krF^7tbk8VWgB^x#fq^ z%o#YNrWT9ipY=_8w>)=<%c2*~!{^h^iYQOjHf2$q)A0POOCl`Ks z_HDnL`SZ7Z@X=O|(}u5XW!3A~uqwa}riMQef)@DOfluG(@b}mon#qzGru9PYgjG>_~dR0W2f~ z-Yf6Ca$Z0(;00NbcTzfl;8?LBXQ7<53X&ZFA|uHI-V&sTRS<#~KXg+y;F`)CjqcSZ z&I{z7mx5AqB~DF2TZlpiKnM%8au* zdJyJ4fk?Al$0oK&JGy6TN&B3c>eh7|myWn~FFiY|2y9E29($y9*^0URTbOxb|Ix!6 z(^EV3#JBQ&wH2>;`uUPHVN_qYX7}MU-_WN_Xwf=0ve*GqHe9-Mx_C`+Ed+oAA4QYrS_rC1J&TPlob?3Co5VCQOtwPd9ARGp{ER6{=>8(|9 zr{;LRdT(bZgII43o|we8a}L+G-*mX}^w_C+m8YdoujJ=mlt&Cf>r>Sp4Iw0lo!O`Y z#14QKS_+6GW+>!wq%1*EyOYQQY#9+?bLyy;J5B^`Ta3C?N?<69)3hZEu21C}c}dw) zW=kE8u|d16Bgn!4@PVRY;kWA3w@wt2$fp0$`$A`))gN7W=f(F-jZfv-1&819>U(a# ztZ(J2c-Q62()Q?TWvntAe)Hv_xBp_3d}Pk`RWg}fI`k7Yx6Uo7*0Q5lI@LPJYKd=7 zzHZ=tEi2Qt>1Tg_&#`59{rGFQjs5(6Um2PG))gOqytGsO_y^68-}vWG&*~h=He5dI zgJylS6|5Q@SZTQS^!aXa#dGIO^cPQF|KR8aGfVefd~nUN^Cn(+&7M1l-;o%SEVF=J z=wi)f8%NT47kV2|tQ@rUE)uDfPWwD)FL~zK5(JzO#14Wi!3j^G9NIbQ$O{h_crQGJ zC*fcfdGDpO2n4*xX;$Gh_7Z?_1SWu(y#&aE8XtUT-TM?Kt`7CPKm@65ilCCSGA;|# zQ0PQPS-4R2flirZt%N*}gy0~Tl~#%3ycFbl;8iY6qR5Hh^A8mxAYXd!zQ^2kEBD7gK<)Q096dIW+WYjxx092# z%H?B6io|ec<+-E*rHf%0`*_U*!-t*s|HcPrgpHI|}mrF!s zWJNta5^mpmVCJF^jjqK1!wdYEyB-cpg)lHUN5NReNZq6y-=0l zHz{hv`0A5KuqACQj@_z0UkHnHlS_a4n}2`lLm&Cs zm+yRd&#V7&O77~p>15vz@af(4f4uyrC#%r8Zmzttk*me_NRpr1xpZ}^C!RQ8&#vCL z>eQ;z(bwENd*P8YfB%YOs}62IcI1W=-`xDxri108wP2|;;{$0;B$y-0!rXfn&<-4! zL=(BtyUOGHLsC{aC>Rg{>>=3$LI@+BATT+2C?^0G#DW!j=>ZqGz_5e^6Kv=}1{#i~ zw@_9&!Ade5VezA{H)u(y+414!X)&^KA=M%)psg$!Uul*Du9t0EBo61blZGm3T-2Em z#ukH6h(=Kvr?RvJ?`5K0+O6Yu9+EN&sxV!6Zsf(oMGdgZv$T8p#A2MCHP^m!bER7H za&n}tg_Wvy=H%n<)dSDV?_&P(7mtj)O65o5_)pX4Yb(zlW2@MDk!^V9@U}Nrw6BYx zHokLi5SG+M^1*q1gE{h=Hy(HQ&mAne#nu1j_C1qKC|MT4$>}8pC!vZ12X^888`IQ4 zSybZ0{`ATNe{|&YJt&E4DdaeE79J&GEiSL5}uT(9XJ%>pU2``>yg0KfnCI;jQsWnNh121IcB3 zda$(Xshb}?=fx58>Aia<%_GPDiuyaoc7O0M_k3*Z?#;itL zEy2lKH@|Q1EKbXoU)ydXYma8;T)k>VS{Qp~OJVH%$JU-WJ2?5rKWUwHYR%)X7+E&5 zWq0e^>A$`4hN_n^K^(}$2SLU`DB4pS z4$`p@PO^ZMnHQ3UU~-UFX4*pl5C9`^V1S%(0URJaKx`FGvrvGOP6F*6*OjAjRnm>R zJj0W)jZ(xSFXbV8;W{nc;nRzzSt8^ZKO^kDUAVK3mKZTRZVmb3m#9z4po3!Yj?_^=}*> z_Jlw1p738rzdl%ivC0H_`N&SZZ?H93m&K~krpYd}MyMbGi((ngq63#;+fmU}gk3nI zi-x5m!Gd?6tem^)#N7EG{+a$`0Ip{GliEOp|8mdHjsk;h0X~)PxB|ouK#f-;Vi*v> zgi6XF09l{_R)8f$ag1giYsZ>cKg0}Xs{mqv7=Yr@>ZavW)xHkWoVx-!Q4m&Y1u38u z2`bCG*1i2JmX%B@pR0`i?3GKbd}@22d!n$`kRD%JvL`zWT8N46qHv1Y=SmeHe8gVz zz?&X8y6|#)f;UW@(2Gb`S34T-@?ZIfU0a_VFWq*e&eg#DY5mwXgqu;PL1_ea+uQ6JN>En}?>-o@)Z+w1r2Tn81ndZxsgok+I=Yz^A3nd^%c4hBb zJ{=A@=bG`-c`2P^ohqcc?C!}pFxImOy!Yr6h54gR9T|WK*6$f9o_o&Wne#t-q-y~1 zPoA#<#LYqc!QGFIMQ8fZLi<+7;z$UH9e|oy)XWTMSVe7V!U(ytND5$0tkGK3Kt4V< zJk)68j3Em!gBZkM0x>a9&u&^?i;G2svW1d?ELo2rAR!>7g6ML;IPdkhXtsrB?`M_V zr-s@C^STG)t{;Jz8j6zrtIGo3L&;b{SZg|Q&trv6W3T@1+>y^5dPM#8k6t!$VCIUy zc+Y%xmD#y%@eTK$I``JU-#hx*>-TNgj3!IAS;E1@ZE>)}qLL_A+>=B=Gb62Y21mh__1_njkl_J}UvS15k4_C1pW5X-gERC~Ig7nvIIg zV6x4qp!MRBQ^TD}j-mdDQV@eECt@gAAZ^WV7;3enNGt1@LW`1cq`g2Wa~UY#m;Ut9 zH+@7lyg0J4bKm(BU1KZ6 z^638VCA0m{Zg0!llGLWP@BI0>^CE0ffitO_I?R$X0htQ|kbrgG6at2I$|&t!EQ_k~ z@Vu0;3ScKf2AC{P6JF*Da0E{dfON`vFCi=|26=Kq2xUAuI4Qi4V1U6Syzor;1L*+i zfc8f&EO#F)EJ-Fa(bZX|mX2Csm{TSqmO0xFOPP#(>V+dRqR0Sn9^?r7x(p`F{oE(@^(P%{gm%0a9Y zS)>L9)eOoCt=4dhZPu)GS1h@ikyHHxAuNi+A_0g&U}ZtDL~(U?%TNoTGTGeA(6b7i zFN7IIhM*MnG=6o(>;4{EpuO&&7HGa4N!cnX*V5!8X9+ehvSOQuLTA#!!S%`S|L)S< zuKU?&*F&GW^Xjj?=bX{~vriRndiebxd*pu|y7Dz&umAMSHwNK%@wsPcWxdh$P3&m9 zpCqrj;q7nut9?(boVxWpJD0b%&Yg_=Z})E=9St>_*tT_+skS&($6EbI%HzLQd|HJh4 z+lI*qwjMOPyW_R%J8Z4C!p$X1CyVWNDJ*C0z*AQ2YCEr`mN^i(#qJI}liK6^8IFVE zw2W={d1oJ;?Ra0ea-)-LtEphCLM^7&hjG?pZhad4j$Lsu$+@TOAa($1W+7A-fP!L* zV|0*h6^n!him=cXSSUN1xumz#PPI$RJ1jF1SpYi6ALR3k=adpj(52MK049`PiGboz zlXS9x?%JKNzW(pDVp=@slSw9hAjx~*E*KRhnkJd6KTfA%@*vAMe=c~jrw z&-cIY<_q5O`6r$mPG5ajyFd0EH9EhQyzBUQsB>f&ZJvY4=BKNrv+PyyYERTxT9~br zrur6Eju=^ph0h$M&vkV2(9Hw&D03y7MQl6;$O|ruythh`=PVG`sDKSAFPv3Ez^4x{ z7MyhO1UTUd@Ln+Bq?3Xr1Pc$qLV>*qEC4SAlO;enFbN3nL6!i5B>`e5g$E!63kb-( zeai(|Wt^9oF(E3wvc*M96#6zvF!@{uDhm-8QXxoK2WcadM#8F~ZOal+GQ*iD(9|4O=dGcazDBx$nMPBddmL}wR4W4NQC(6@Ds$1(r&?E) zj?lO9-`sldg+obgpb-=}xo_&Cw={ylgp0G&6)l>Uo*Rz#Rx8(J`~Ua@_&@lMj~xsF z?cy+l9KNc&RMJ>;*^KI$;mBkKE4-4*aKD#MI+?MIM7q$^VVebc(*ua}+X6s3`?nIrq#84KesdRSnoX8NN4C7>A5Efk8&Z}l~7YA?!*-`)1 zwHJL^DXZcOKT(yLQi10};&QDVE9NK@>?BFS+UBfeklG&q?I$mN=&kk#_{7!koc*_r z@A;!Ut2h3|#Cz?1`cM2&>!Q1h-@5aMXN0%tH@QmGmbITf_s>h}U;XhR9*Hp`?i_ws&DMT3mL~@4&1rli~p2 zXmsSJ{%YtbYI4Y7+e@9ohwz3&g-kl>b)FZ!A@UByE>dy&SXDbf2$e&^LBn|=ETogd z_|OSMP0qq=3qZohyVEu}n*3!M13c z^}^By2^$_4@!Di;-HW$Jk(gVE!_6nmYcE?cWw_^9wc$n9C{I17UN4WjJMiEA^_t&@ zd9kg7uyJ7FhF7$8tAM&zyI%y_GN`*^?~=1jx_o`2W$p@b;$bqKSsP2fe`H|2O}j`1 zZR#oaim1=g{?J8(b4*H`)<9DCG7UGhI5ZguUQw2W-6qauyVihjMFqwnQsfK>$yC=R zXG_ZJBrakhDEd>6C0A{j#?pj0jlzxB6~qo8dVZ<^YY@N$N;<2GSr&$oX%;gmyOAus zpmq&Gqm;T?U|<+vVj@@q69d$m&&~?1QmzzOD-|+jM*(E>+t#K`0mN|ij`zRplby<{ z#w$KPPc0cK$r?Fjp<(4Mc?J}`ltS8iF~Fki#y3xvZ@G8;zQX_9cw_%RcfaGE-^2F* z@sr#2ce^e-H(yiv^{P+Zx^F1^%Gmo?$y|o5fBM}6{pk%4jlcDbzx~1;N7nk2tcG$~ zE6!c`xAi|cUe`^8*|xJ6VbZzTs@_uBZiL!XD=OJjPh8x2{jXypz=Ajwx#Pq2zbQ1qd%}Cd_`wxtwyd-^A^JuB z<$=;{2q%@s)fYwOdXyu#^E(#{i0X0i(j~}H$Y->)3O*Lnkd#u$SdTtCb_YVZQP>Qv0tptpH*yxEFyNsijvG4 zL9WwInF*&U+KJ$C}Xy+$Zeeu~Z zzu^vS{-@7A&3BBy?t?F$xA4r$i+|N#5&Xrkf3!3%l6KcWeX0~tYvIrPKmW0>nE|tq z76;Ni^pjVAY~p>3v(34F;JnRMXL)w2t}h&39c%HFSrNo+l6HbKk2c`gIY^mn3c&+g zUf5Xi%BPA$Z(+SHJNC+QKtLX)+7fx07uaMG0LM}NAtT^CAi>HBA-p958J6A!9v;Gx zBM2{H;RzsN0yrm~fCrI<5G;rVSO{+=oCF{Mco4vVMEtK;P6hc)C8lU@$ackChzzS- z32T+hi>!>)NEG08ZaI|AVgN5-gLc4yOO%dM0vR7=#n^*P$>rp7rG%s6SS5@_$@}{3 z>c?KD+tan*AexwKs7>R=H?A>~;6fGenvg);+_El~oUq={dY0!2mn=S>FU3GD0C9X3S$$0^xj4QXbU;s&B0#Pn8ecNQkz&=&T)nStdSBjEwhQ@mQHe{(#yi`>dV|rueQph5LyZ&Yeg*; zY_2S1Ec!A0jTk`IPap=broMK_q0m}d1Ra`z&Xgi5?OKx znqyg5lub1Yy`sy4K>(~mu83KsQ^|$GE18tU5u~msNj6F`@Y-neZ2hI=G{yF;XhAo zy(`-Cw||1_EqiYH>!&Z;J38oQbob!ad&YHtv6*$4Mu$LRlD}E}kH7nj>~!@~f1EbY zA3JdK8x~(5pF)N(+j{)VWpXzIbv9Fz#tQ;(*hL+#99gkv%mQkpUDE} zQaYbU99JmQmK{m5B#aW$y2$4NThqaoOrd_i35_6c$w?+BB#Z=oZ%2NAkV25dQq5>Hu>6*CvG^WGJNo9rzlkXo3DH3ut& z%$9cjwtuSLRKv5y*Poet5oqUO5?^hOqX%$B@9R!-IB7ciIk!DoQJMU=6Px<_HfiY^ z-5K>J#ZZEE=D0E2vm~#D(={VBfl>~=a@tvvVWtv+toPW?;;?bS@F$jreO^F&rv&wMp0HoWLACO|W~PVzlI8o8!9x=v#14QGQFd`QUA-!Le3)dhp43WF9tEeUv}C@% z)JX@RHJiyw9?L28tWscBV8vh}Yl#rgB^L@48t%^b74o<%aJ>_~vxT5~NspC4!7rKo z*{8a{P=-eJy>A$gq|1Vi*vAzI?NsV1iF6KSUEFQ5KvJ&Ymd)OJwBxa#|M+-iurV?C zy6<=P?k;V3`PX_IZ+mLzd-uNo-Xkk&_4cY|_9@UxJwss@SKx9Jf9aWB@BU(;t4S9h zpGUfF>iCxJzdKSti3}k&oj(VY%uQ6obI$dG^RRg;P!UB+!bz1El#vF`O6MJt1L2LC zLy$}5b7GXzC{xB3R2rz*%W^`D|7koT7y-hd0+OsYl18KHcjnIR<(zx^*=_Ar-eBkD{a(H=)cH}}%QKh^ zezTEgWu&YPOpHNMZlEYt9Euu>Xal!oa7PkQjWk!ZM z?^Fy(f&5;{rPp3un7%8Y0NNP|G$R&}j>-ruv_vM|RB{!&3*PHNoz!Fntu3L-a1N^o zrA$k0Tp>NFEv1ASqG$%8h7W6|6$J{7kRx=9CG&P1W zO`(}3U>PwWmT7=mrZvwPR{+zvj-~uF`yzhjB7?(br}|OB2XR@-jd3!~w1wd|vm|q= zHgK$hKNMhgYs5`$mnhTX_%QdrZ${0dZmC{WgYwg)TRx&K|a*ok{Io4cR zA|jWz*AH}p-a62UCPcAd9b#CF0OHKK#+C|#^%v6+nLkEJAJ zoNMBzR7CCBVH8&A&=I{ptE%lF9+>=QovrLhC)nw2riM(B;jmidQ5JJ3A!tNZf~j~I zwY$e&x+&Z-5k*hkX;1keCjktdEHgvTOmv!)y`&z5DiP9a`a+qqu*PK6VVjFiD160x zz{CWMp^6)7Kn-9(2W$RKnQ=gabS2VJ!AZtCrPTrrZ?0wFjFFx5_kW=OnHo28_!}>4 zQswa^ql8#aBEbwKr9@l9EokKAG~+7bV{4z3$Mnl@ z`_7wwf1GY}I+_~}axIj!t86(@+q;fE@VQS8(@yTRFrF`Uj%7*3qn#&1q;Be_&AN+Q z`wI()?orA$VF?N_2q_62w0M5YsjXwI-#8Gj?k_(!@_M5T6}IM46ZC%#W1xy-ttGSRKgG&a$>CKaX3k2s1d_FluHMW z6T2KkL-UBa4l4pv#Kp+3{LC%jrRJt60siH7_p?;bd~u-_CU*MY-$Ob;1~R zQmt`xPnYMt@icek+SoN0nQga+2DUHPZ~(fG?exJiEJa7Sd| ziV3Y1qlO?A(o#)`XCo>yk3@N<6Ebu46Rj)3gYWGfy|P^0xv}`$k32Q)wpdB=<(;p6 z=Fp$rvN0^{+0DuAILz713umT#OYsQl<*x;aP%ahzP=hhb8KK=X4g4SXbQczOjgALP zxyovxIzQoG*ikw+IrG%y{->PVpUhsqar#bk`lhKzPw#v)iI}Dw1d+?6fQ5sU%3Q{V zHb;$SmLM}MV+0UMu>Q}QXv#D()B>6@Kmpn@)>10D5t34yF)(BVpo&sxI2uG7N)3S_ zFyK(c5-4t&Ay7sSAdv@cF65f^onHw>!)gcuL(=IWOTyxmOo5x+k zbjA|_9SO(6XKhak5TgAEKDC*#lw>RwRFH(T)~3Toe1|fVoH@j}WISnIn-HA?tDVDN z|5nt{YIl3|B?l5Zb*@%T;8gD)-$~5HRv@H-wVE*2(j|>7^#x7U_np~Y`^3NrYc5Z$ z%LK_tC(l{Z=VRAaU*6Vu?3=Ct1&qsM2N|Pj?CT&e!(4LXTs4hzA}uOF`ZKYSL8jE? zBUhrg%<3LvUf&edrCBEwfWzV?l~019@h3*Nne&B}Oh^;KBYeNwmw8xwmR# zGM`xG#X?f1j7MV-epksx=s(D4_;agJ#TvDMl2S zu~Z3YO(_h~)KX0WqM5ZkgR2aoP#_kLqSi8K3zjhpFecT=7s|>E=exv+-D6o-)x5=p zkz8nC4L6@S=W2EISN8NhPE(x=SwwQ7DW}GXJmp88lM+Ws7YNHT_FQC^Q42&JHB1kd zwLU*pl8Ngiv3LN5S4R!~{YR4x1cgEE*vy_mUbT0NG~$<=M{~W^aPf-a1~A&tT%uXR z01d5C^j=l14xEHqKMit@B|t{J*zxB}^<_1_KHL9}5^iqm;8dbmDI8L}KX5yU7`|Bz7RhVyWkGA~o%*N>f zyiDG4V0qQs+aBgN%m|-7c(qwHM)ou6)2^=c5e>V1Y_6NHmpV@ifSd$q-wRnb4Ei@+ zlO`(WqXN@OZzNK=a7*MWCZp6KSw-Nurn}-|XGKxKfC9#FW2mByM)hdM04SVxpIFH0dpQwTxe=)pyq0&r6R?j4;MCSFO;>=qa1#JL0V;_<=!?SnJ7XQyw> zPM-*#og6(qcKoT+4zh_KAx3dfKqO#Nxk5Wxihvrb63#Lonx&M>ET9osTDkHl17lhf zu0W`!ngNDomU0*kP)d~{1b}fQp@vxlOYpy0stF-lC_`bvpcFS?Oc`bgh@!+Y0O`n= zN-B=7HMn5A#!c4YwYK(D!2kdt07*naREzdb{D8stKD7v@ORxVi3UEl85ycW=g(lF{ z4M`xm%~l%M z$!$+Dq8sgri}h~rcyeDLU7vvDkQAxT1jit8DCw8>*5Wuf6iOteI^{97)aslz?I^$F z?9!;(%ZwZA_SyYKHqF8?A^qp%;ruo$yV-`z9hSC`I0?~%@+8fbX>H*v?>chv&&2R$ zFYm>LY{XuyJ4t3r%tUL__?{DhaKoQ?tV-|Rn$-@^c&?Hz>ra)u6cb`ko^PQ%;^i56cT;`_I?NWyctmX?cGmaIKQFN|l1Gq@t6e!$$ zwR+dW*5S40-u2iMWygo}P2iMGiGGn#l zm{f6>TC2dI00?E7jRlZtn!EbA&|sQ^Dhf~wrZr(12eH-?uAxA{thSaLPy>Jv7{j3{ zKqO~IGZ+dOO_^43EQJL$gLX71(TqjoUvX57D@y@WvwO7Z%CQ&rO~bUkJn-meUSuqq zyz%XGL=mR7r6zJ2CsH{~N6aBA7daVYW+)u#M3gDPtxavfv`^H$qMeZ&2Z*~OaJ$;LT3JvYZ=qZ}84^_E6_(YBocDbD!q^P`AzRd<1j&9O`4)wtxeHasEudaaNY zsG!4WxIJEU(W&q~k#9bK>Xiq_^GPmV&gFVK@CdUD2x1k@7t3K~%w1;u%zVbC#nuw$ z!h)o#t>3)wLTEv0+;i$3FX$Hd3Rtmr&D2)>fJ$LNP6B)lo=B*p#xKy~mhWwpMs+^! z^BOwCfI1oqaGGuH;<cu8v4roM#;zVOF>A=3ERP5*e8>p~?#{jtxl zi5<;C8VX@~Jd$m>#L>pdhSG>48bvy+CjkqcTsQP#325e|PCC~QdOc`OU1&9Thfd$f?Uqa$$=bFUu;*E%h#=ZI~ ziq*fY{4B+VPd^DKEFS&Q{Z=#W3_NE*iE$k|Bw$3K$0%_^LQ@Xa^0TsR7wyoa(lu>9 zTXW-K>#1XG~5_%l^jnW9Ou)3>p|MR3AIBzq7>;jh>Hokl~c?3=L<($($r_ zu5<-2doqqFMygTQ3rbBQq;+e$Q+Bgj>k?S5lrw=FT?s17PV(GKiwhewxkq0%x1N$N z+hIB`eZIbDb1Uc2wl}vtHrg{jR9GsVX?fL0-*)J%n;X#|`Mz`Vn6DA8xKm9~e-m{<2T z$Vq^&^=j172|aj5ipT>zmuVq8J9t-CJC;Bj@a=QWeeRik?I!An@-R@8!x#z!YGAe- z(|aOENGBLP>rH{TxQ&{ zRO;f#KAtP4z(@8x^m68^lsRJ$etu=5GN3@t>*-?J)hw4Kn%Qo^l^bvt_483a$wi)% z4e~lQ90XRmTDq=@hIKI0P#T3nav}~lAFZQ_I-02Oe(B)q;qcmxorM!CTgMygM}}9A zcjs@4FCLFC?AP-*xMz_>3UG(FzqH3Q!KC6wSPQOG#%<fXROo`M%d z(W}k$o5L z`6*{}daUoM&F$5~ZpvDSWt95;Xmm;#kduH|+AQXMYH;|4D5(!jXCnu2u!2m%M=B{b zii+U_*d&xdyy*BXC9p8No^?8^&li&I5()4WG za;voneVQe%8<{Lfq)S2FdbsR4%uSJ)nRcu=v4A%;oT`X(CF6pkIL>eX+Qna(JYs(O zY{LaoH$~y*y_eXIb7KE#Z~t@NvB&Z^K8xdLqZ==%Bj@Cid39vrkvG2eiyKWS9vr(+<+&b29y~_i7}c&WDG<@QcI)|LmPmec`ip3n4y_tETJ&g5Jn^zq0)imL{SP& zVF;)N3@8Dt0W$zGh8xQjp~i5M0n~uQ7)qgG!I+YQX_D^wv~(S|c%D*1c8xg0_TtOS zz^5+G{4xU`eg4UTi1NRDG}v)%ej?YC08Yp zm0r86gJ7e&Z{teu`0z?==fzuc)wA>r2ej~t9kG2HFPca@f= z6Pio-#By^M&#ji6Rq915ou>JT{^C$^CE|AV=6ve{7lX?5z#bY7$Y9Wu-tH|aut8Ch zz@DJVK~4f}yIU-@6h?N4DR*@OZ(5JbuyFJ(}aqWcJdx|_NpKk6w%K@FwGP)ebI z&6VAg%H(|XjX${hU;ge=Uh(dYzZ9K1`+Pg?rG>lRd}XR; zSN>O1UAiTgZS2T;n*FtW{H<9|OKQiz@uh{SxlN%MW3o}HZnfu1yl3N1iEfDi<-VaF zeA%Ra)kecS8ez@&J<2Cbjc9%U4{g8dlZ`I|-}tky!;rUMXaI1LPTZSaIP5&TJ9zN` zKYf^=K2UsWZ{^g)*t0YB=ceoD$7`pS(s#Z2FBTgBFWh`EF`97@rVPs>ms?{TYq-f= z1#oa=(8h~nKj;gC;xjJ_&8*_a2+cIru;9?zfua-!Foam9c}4+h2n;uxD+Uk)SOCOQ zSin+d6kw^L79i9ZfEfzSv=|)zB+-m-T#30+H%x@hl|u&re5NUW-UY5+O&&k5%-lQY zn5H7L)EmNO4wPlqG}=2vzuxtYB_iRO%5hg~xfONPq478sa8%ZMjJ+<8h3ETkB-v8E z)EfW&W^_@`p*wTe{MFJ-P0UwfZ*(-N`6>%t<@jw9c<61r`6Q(d2R8OZ?U{T)Lfdc4 z@k*~9c5`kz)vI+^JR+M%k7&M$xwN{`IuIq~o4eg${}ZJ#<*z@p>$Y(Ff;XLywMZ|| z*l259`Qr}b&eNkKG|ZYWTx}QL+F9SvR)>Y<@iDP$Q#!s4b1s&jz1rA6K2aA}mgc;u zGnC+FOYt~AKH{`g-r)|hI&#{QiUK-dX1}4oaz3eC(@V z+sN--6IQcAcTh$HmHhZdY*K%qgjX>2FOTdqw1(Le2d|IgfQ!ykF? zn}NT2>Z8DOAOD;sy?3-WQa1$--Mrd5eywxkQup}!=ItwMcdcJLkv@Mbo;%7<9jrYu zGx7Z^)%JVd^#=_;tYt(~D936Jnla5u#w@NghHEfmxz!8?lu1H0h$Ym35~ZogEQ1DP zgPT6AY*BaTyF|IWCVTS-b^y41pDexP1b}nNSH8_fXX0n?iaZvB^Fh(@OqF!X2u-s} z!n>WBoY0`u8ot&%2IWM*|KyHcM^5;@)ArnNR>YC~!)f}$;D+)6|L;G2c>2Mqa=V|a z^nz85z?5$L?>{_D=THK>4>-1Tl#5G@qX<=*kkVZSGX!+7>y-=YAp~;q*86m^` z)`d3>zBc-{4%_TZBX_1hnv+jg>)U}`c>9%o(IY!Xrpn=W)@~ltS+cq(FiFer#T96; zacFm_6uJMA#^IlRG+)%(8yT!VxbKHA$hlm@6xHd4#?XHa3oXCeb26vauUZWjVRd;{iB?mq4b4z~hF( z7%CN|p=LVLOe?PNuQE8uoGg!xLlY?+raclnt~Hbpp)4SLODPd{V*CrkU-^fHz#uvC z>kr=ji3d^y@cOw>FqvxyvN6$jwJRcB{;4k=|E-v_SotG=bzdZkL1obqF>BZ826mdR zgW5J9a3`&|oeVKQLN1B=O2_b5+&Q}UJ+FBO@SdMP0lfF8j{sjk^$j{f9goiJBNwNG1zy>pqRVsyt&qvp54`D57B0F#>9sR57KA#= z8VX{Gm4X_Q1=hr#F_bGXNi2&kbqwJ;|H89=#)#I8X4DceYd{QvAr_Qa4FY0`0kI&k zglcNIww7575Mro>g(ZX%4Zt!3rWLoKnp(>Zfo2SYgCA!Qq<^;W4hy>{J1ckNy1&`T zPhI^vTzBCUPct?MZhLnHzK&c**uVoHe5)&y3DLin_?^ylIiSiq9Jy2L_te~U@xpdm z*hBX3%wKD~{JdGO?|F2At8P$0?ZJ_{n_J!}$TBr+6C$piSefvac24x8{?_f~d~%NO z$Xx`}YX=;rBFn{kZnVYs3~ZW92JM;2_VZ?6xU+w@asv@_3m0D7{$%xD=3M9<#_7pw zdj1M&MKV{&m0P(Rj+|3F(Cs#dqjkDm*tKEmRi00^UG#P*g#n*_{tNY%dimI(?si)# zxiaq^9H}f{86cO0nu*-Da&ut>7dfBT#9F@U4I86mqelzPz8h2n6~)fDue_|G1TB>; z$@luP=Npid0N)sPbFSfe$ec-O%ft#lpuu#t1W*GG!%Y;kWDvv2e8mAMfGcn)z{SRm zqnc?jhbS;cq{FDSlsbent8;p9^^^FO&ow<^TCe-5&e(514h8N@FNAI#Zwh)^IT9R0mx_^P2%nWhFx*pmJM*6jRs60q_602}{1*{TV|a{LEhgy?3m)Q=mE*x64{q+g&Gf5T-6tpjk%~ z|3Clv^?!2dxjLHD@0^s}fby78seKs8z!(@egGaRm*WZ=e-EPc!yfjGrTtQ|<`U9c z@X4Lcwe}QvW^>W7JJ=(qAx$se9gIz9D?81(_SnR9^z6|cWb6Ee*NuH5J~Vu>Q<+{D zoi=cLr^{6p`x+a2_F+Et+=J}$`JxkzjdVP*tPgCH>b2rs-51hPcl$iL+dGQT`5i*1 z%grR{HW?StOnZ(n!ZUEj_^`-?Q|=FDR2DCl{LNh0%dc(}^1u+m?()!Sw`pQL&S9Wv z$=4t!0dZ*&a55i?1Bg-fWy%3CEh0WD3@B(Kvz~TS1#m!tD}XBw1r9*v(#9!@v9Pjeq@6s-cQZsuVe>Qg34vP{^tD(Az%v zqwnpI%$3F3r@z&`@^Gp5CrLe7${$&b#a{O49rVE8*6wrN1VyZ@dm^F zq5zo|9Ax?7FWvn43m4r|uk?oL#2PT35gzM&!U1A61Czo~XsW1XqQ_}U0c)tsPo43> zl%W>a5g1|!00K~Jh-QEVtffSAstl;%(10b{VG=@tKvSc@!HlKWf&d`Kfi_^8Fil`s z|M*88ijZAfrLntv?{e=pO8^Y|UJXBR=7Z;jj>`L9`i7WrS5d8u8hQG^=_CFAz9ee; zBaKQY^u`P6XGU+nR_Smf`o7oixAP6USM9mNXs_tNC5-wb1AkMIr7}>{lOKpmP^liBZHLft*~hU!L8J$qrW=LAj9j%87?U#k{} zyx*9y+j;t|GfLJ@@mrGDZzFLpK0~WRQ#j z$Pc456mT*)05}xD0Voc|6}aM1TowD_t$`+>nGlLIOAVmq5Gv`l20*j!zWe;s0E86s z_wKs1>n|RYAx6@-OP<7T6jde*(@i` z!*AUEKl{IK$)}IpVadntx(WDT;b(x0U;OU?e&OFAgZ;@rssSH(p9B1L@*YjT_2gSt z&OMLw=juQK5@~baXd+zd8N~%R%*se1(;AWMZ-=XY@)K2D_qt~az|^Y>HqH@cl}94V zY{nQ%8Ota)7L?GKQ6)VZQEG*)`u5Kww z3;AOsGJD|3wD8tfwK{^F--t@b{UBPX!JwGepmZ}o9dC#3Wiw&ri}FxjCvdkKg`mI! z$0Wr<=DPDsF33rMuXR8j18Oo27N=E~z|+RbG{CfG(a-^(=%w0KL%=~!4ITW8%*o&& zbBg`&R#!O=B2OqnW78W9(hzC0-mK#X+*gOEGh8RrKi~g{H?KXO^f8sSp(gq%!}umg z=xUdxx^QHv_T#gKyp3BmLxL}Sy{rI3xs>2CrG7`Bdf?ozS@NY_cNqGayKew~clS?N z^6kg}D}XnDYzFw{4^!Yj|GU!C_PMP1|4z)8hfa{K7FT}4d|;93aBjyd?@2t85y!wW_~QEW z(&L~c zGm;jmvCfva$8?vy%VyT?r;ecD8BK+^E-dq1-g1SgTwHQNP6B+X;cHz0gi_i_2+9)V z6*3Jir~zcE!AefVXm0}(Rk{}LEXi;H&A=6|<6U`cQDrRY(lkWa2G9iR`2WltJfA9L zkofG({f8fZB8d>Re1Ns9&=UY60NU{L6iH=77Y}I;V{%tD(S{Y z@yKsm@@F^SV(91Y+y}h(j(1sd@y|aDZ2#sv_B7U?n`-bzE6bP;vdqyA1EIWiArx}- zHkBztL)Cm7KY#8@RimI&Xoer)!to_li5I5=}KgEi@y> zaHCvnpaQM&1BL>W+Kj`15E!Px(0|1rgdWpFb8*{Og&SLX|5tWcv)rw+q_A@LV{XpOj~V5Dzs6f-+X%6y!6^X9DP~uYmM7aoLitXJA>X<;^mikeeI@AHn+B5 zyra$&gZ=Ltd~ti<#V500zxTgIYJB&0yR)*97Rn(H?zp2uSFf=Xc=GRidm}lz?s}!2 zyK&la-h7a%E)j_RB?&ARBY3RiJhp02E)k2YDACwUK`mKx~H>0~)Y z$~I#8^0g%w^nfF8MJ1AMuc)z`iqFq@_1}0>Mqr%vQOxlTHPPlOOb@`^GN-pF42``0}y+z_0!Ay;lG2g*Tm=U!yu!Y9x^` z6{`JAxzaMtX@+)IgC~A(;+Z{buVueM zXf*Q*6#xJr07*naR4D447%rLS6owK_wWfu{u_on&r;+DmgjmgZ3Ly(5(a_&(I+|Du zO9-yBL{np#p_)*DD8e+c6d=|x#h?H%Yl+dWwxHGm28?RPBw#3&oB&K|!l4*53fx-O zf9G!w70Wa(3{6r!)@j}Ty?gsDKEHo>xjMNX@4oz@?LdYzyMFkdR6Eui!b?c0^~L9L z@8!Qe_R8kB)E)ehc=UL_$X_^f+f)ldb0S(iV4kNlJJVi&IeW`uGJEE9_}`~KX6WIZ zTxv91gTiQMG&y_N#44-pj{k)?wSvB65vZM0Z7kiMj86dGXPC= z5!z4~KszjhX0a$M)$dA?Z%;#O_ysYHBpA~`3*|5rQuwi4MZJkSfF|m=u2~0Mq^JU! z!(-D$96{i4I zMi_(cf86=hE4GT?*jbaE=f@{n;ngBfHrFQQ=AJ`O z#N$Wf^R3c}^|Vqxb1wUB^5r6%>PxY@IUE!wws#HZUPpC~szH>yf2p{HP~6#HEX27< zzkK<4xhS^N@H-oOe(3V%+s3}KwbI*JQ@vzk|JxkWD^<5EC8mc%CKO1(6R$Ja4L`js z#!t0J%QAT4F=PX{uYRc$Z(il&W9J?T(}~&jG?u~-Dk!Sg&h->)ogXq5>Rg!0+MfE0 zu}V9Qjfr=UUt8;B7z)UvM{Q+0BaJb;E&M_3^va{A=T_+XIZBWiN~2EEn65w7jBQfk z24wp36(8gzz^`o!08B#yg1LbT;m{$1lrsDPg+b<*y;;Zq;R*l_I8<2r!uYw-k6(<_cyA+R!yQ{ptI zO>%Kgrj!o?CK}&$g{&4R%UsH7Gu?RmR(&F4RHckCC3GTOn{iF9{o&YlX%jnsC-YP4 zIh-3qEeCi)xTXYD3O7_)kZNC2$7qM9U>Rk^2`$qPgxpv#W0?kprkcSLMF1)pOC72x zA&SCqVvPX-fwfE%3ua&erVS$$)>>jMl&c9Wg>j{(+!(5uBU%5)e_0x4$@W?UDmzkn z<}D27gGcjgV-tHgkbLm(80+&Jj=pL)3}ZAi1eO-R^QSi)dh(Iu_g;JWE&hXR!ArdV zb;s@BTG)TfQ?1)CyzI&o_jVsmcf2Wlpz)d?{ZcYpJ#_)U+JB%d_MUs zjnbROG?q#c zkL>PD=CcUHoQmKGM-Li3H>hnirlxyECu8c$18V~p>5S3xsVtn!^AX)bSXuf%u0I$K(m>sr!UQ4M&?73NYcIBcEauVRzHo0IL(9lq6Rx2SMq^@R#0j>+Q zhJX}cPSf=Va9dNZf;QA>FyBxPZ@CK}-xhseJ#XXZ)MXTj2X!F3#pQd~z3pg^hd9Y$!Z zJO##h5IU`jprp_UJtwj`#UsyCaUhrt#bCSUZeQVGT5%C^l#8v4?mJuMl3_-t)FDbh zaYg~H;`gzCEYMvCewT}mPaOwD0o*bt<}BgXp_$@FK}bX38c5D191UXq7&Lq+6vj|Z z0Yd?3OAV-@#=?RqLIDs)jA4KVtN|lNaRv>5AafxJqtW5Pj|ol9?zXw z*`}cQA%8&f`FJtBwqb0sd(SfuPXM1+s0`IRcLJ;b`I`~4>fHNh`;n*I5z3eX70uJv zzx(R_Z$EMFgYH|M`a}8W-f{HvR}LRL_1ZW-xSCJAes#F*LHq5Z>RsceiIZ2cH>@Ar zRb2Y+{br?|j#aZe0^YaZl!U>$Ztv{(}#7^~A#tyH8Q$0BtKK;Cgy5pCwGrgi0 zB*XgJ)JV@aqk5w?a=LmrKb@i|>o8%mTHIdQtb1KNu{uEY?bGvJ<&o6hR1?OTQRX}; zqr%6WhioH@vwEF~OVf$qut8BFma{2EYZwSjab?A+%L&K%HIA9eA7*EnJ0GU)|!Pw;vZfJE{*b)H}!XNS8eR8C-qxOS4&qS)bGWvoCEIc=Qk7*{ohT|LgyOeQbH~Tfg=2^(tYnZcY`v{L5?7 zQ-FGadQLSsPzl5+UT2K~e0&OZC4)`V->c1zuEwklZ>aISEkG3+FhjU|}c&v5v(A z25~Ngb(A_=*3{Qi%HRU1C)_!PUOdU;N-bpj5tMYCf4!W&Hu4wQiTZsWP>iPc;(Ghihu;rLldz3kPJkQQvIV^CA88 zY3%w9(nb1%A_2Qt@&?GE&b84{S*KFePbu?`Q&{QBbVoY;lGLZX!H0lA2fu8ALucl z|G7Se2iKGWo&u0yA%Sz_N>eyEAYzWmoWbu@g`XS@O6g@viY^pft}?uBfBOdxTPz%` zL3&{Je6)Jc?l@*8A&@R`4aRhso^hu7E}mBc37=byHoGI7GFVP1ao#1~5fNL{2ns=_ zR&XAP65QG-Xs5L-1)fN`r-l*)794XRPQWosJzrXZ4l**z~i>ZrMVY5t)-&#&(KxmH@ePS&pN8qJn5(Ld5(Y_+!2kgnQplFZbq`C4m5 zk?E1eMtiKjmN+dY&fNa}&+6tc%|V?#Exb(lW`%oa0s}lH!a0XXIrHY(a&~dGCB`>8 z4Zk_J_?$(Yj5HFvw<||Wq9&t(c}m=EQ)p;(aDo@a5m%!|W_L*$=M>WcRtPpPzkr|_U!Qp!Dz})iM;vi%-)3Sg~7q&IB;TTmq1Vy~ZnbsX%C+3NaQux3;D5wLpVh#}$ zA`8v{FgU3Jsi}jo4$_gp0L>ggEoC7PfG7?SY8bXK2Lgm(IRFNi{lC z)%)6qNANc;{lQZemzi5;5B{t%f(b%&>66OxpIiU*n@oM~O^D68+VucR{%e!EiJapr~F^z~l4YxL4XcYnLJ zlk#)b9e%80`rL?S7WuIDjoo)tVPF65>dt-T`C`m2-qajBJfPuxew6pCV!()uQ#CyX zy>gN^k+kZ~-h4%@M)J0JV`Iv%VwBrvw>tc!%07C3>dwDHab&XIY1;DxVfqwYc)%sQ zHaAMKYjlL9y=qKKA@a`tXeq^%CCS1>jdf;tq(`6-pY84`bR=vjSgV)avVm1P;}{Ex z)=@5rW6`jRTqTUgBj381fE)+p`Q-@U5@1gEG^AD?w9vuaj6-8l%cKpUgMN_#boIx-j_b2k$Ab zfAg+LT|5-G4_v6!0%%iFM^ZXN5|*UjsE!XR8;=hqJS_K~drv6^N1RYsLpQHPjyeWY z5Q?-+r?!=+vxr;A9ALRM!f2^g2ao*ntKZ??Qcu07);nLXT$OD6Ks_SX1tz>_%m-E+ z+M9A4rpDI_(wTeF#V6c07e}G0kl105RM!nP+QMqbx(grJ@$$ur^`2T#4^!^H%b3RC zs~Ut*>OJ+0c&c62#SM`HB5aXb3WLOgTMmXFic|zZfiuT6XYk%a1b9kGpq>yoC!oLq zFy#OGL*0nEA~B&ryG z{r2R1^WI{4zkK$}q1)#-+avG1{M4PdKDRXSuJsqPy&w8wIl|`7F0dmR!v2eA4-VSx zbl2#@o8EWHAM5m=Kk!z|nOY51W7u?V%dd~Wm6S0#b8<9wt}}8~{`9lk9$XImd+seE zw!=y8^LRqd*CT)CQfd9s5w?49`nqaDAKrGVog~AgmegpP&3yG*_Ru36Vfl5ByPLIP zrJ3eZZx&Z-;~Rc{)7x3MV<3BNCEM}r3BC#De$J9!>YxTvQH<|9~iM#pB;n4c|xm){-i)npU?M+$Zs*}^5>v!69bG>fg5nsZJm=r~&)~?nDf>3QIqo=o%YSj#3(rb#4GLH|z`|f2ty-tYQR=Pro*G7FygX=3-BE*Fh^hc3&PL~I zZ?d&F54qbN;2EYUT;k7 zFJH)7hkIvcZhmz&zIo*P+3X?na(w&c*Z1BfpI?iPt_%*n>5H?~KnE|E4<{GWVl8hU zh}MqXdH(f>?;M^t#~^Heu_37oR8Iy6LA2*`Ig{kY-X(P~$Nk&6TwHkbDxEb;vOyEy ztF-9w{7XI8Z9OpQ!~7MkI|pyyzp}Il4E0-%Y??5zPoysTm8v>*ddKL@2)A|Vg?EZJ z46P2M(yrCMZIfL$6Hj2vQD=*)TcaeQR2pW7wUA_eJ}nG+t@8bscqEaspPC3!>bHB| zYfrgIGZrTMhI=3JfopE>TumhPr&lA8;{dV;>0Ua zJ1WK{Gx@Y@Dv_^=K$qb+Xw!+`AnBX z83zZ;tpjtGf9&AXFGld*XgI*aS(p^4CkIDQ4LHEy01JZybje}cu`0xKgNBFH-tx0Y ze#YVh%5vJkmhjirR3vqiae!*he4t!2PdszXBM|06 z9Mu1~Z^&RAh?60&DqGpSvOck+F74*ZcfNp%QqjIqzT=}Y1EG!qpblJJ7>L`y^32+( zj4!A7rCWZp+hT1`NBh^^Cr17Yi)M${aY&uVo$vh<#K-mH5$xsFiQCJIv)S69yQ|u{ z{{jBO#?8|!{IG8NtnXbVlQliUsrJEJ$_B%nt;}D}j`(8F%`ED!-q8h%SL6gp$eN8( z;DvMZ_4^vZvg;R%-nP3;#jj*;DdqRR&G#!;$DX=c!DFMvH{IK488yw63ocexI;s$} zf?l)zIli)ef*P%*57Na$zR~ntA%YktjAglq6W>|Qp0uvQwCW#y5S5HSKXhjM@q#}vh7;lWU&*}q| z+8oR=Aw7q;p7YE@zzJExT~VEb08A2PMohSTI5Lu_txB4W)oj_Fl<)tAbi>q?No*kk z9EPI}!;7RR0x`Ky%e7%w^nbt)9!(M%Eq(!@7VsSK9+ZM&bz|le;g2o1RCze(01G&{ zU%mOy&R0t7jnkG}2MePy#$`V7$o%tQjxpz$vv9lZ6;#KLP6WAjDF-Y7>0AFm;m$VY z`)Ui9?rh-Kug_iDHq}8x3QIVRFGTg49V>$6!Eqj))f08^M+VeeL$r1IV3&<6n_n#$ zca-@M5V!*oEMu%!2m`?Z1~@RT7#u_7ARV0Lc!R|jhYAJ}BFBRRSnvdJl)*8_y`>C+ zIQ!{Cn@P?zY-B{4P+hq?^xj35fOzJSz^MA^Pp5Z| zeTi@Tc=M?*#^hu3Ux4NO(Yt>;74DJi&r%r=$Bw+7Ri;1DeK}8u*Vn3D)CTR%?ty6S zfj3u9E54qMKVSqi#ZXnOj>d_s`k>-ik4)y9>bCZ{a-~dCj1J>w znRq*YW?RjGR@Lj*M-IuJs&6j#Gss)+8|lQWqnA&Z^s#-XpStNDFLbQNE6Rw(37dwd zbhf;1Dm%)oHzNbTUaz?jw502daxokUY^Yi~Y}Q@FFQm_3;*i=p+qkze=ZhKeFy9hB zp?%SribX+0nGi;7M9E<8YZnuc;{dQmca&&}nsAGpii{N2S}h zbgq8D+L#aHS_$imI``UBZK@vDC7P<$`PL*;Mvp~G@s`o~NK*8+)3h9Inn60lANkod zlvP^P0x4Xj+aJbhuRkrGyOuhu%TmKSi)eWCy>Tp)vtQw$mcoL9f?H2PR|mU)ZTZV- zX}og^jyVf%kA(QZoW%}YY0pgGKJCqfQL*-( z(0Fb0gB>;{##nZsbKz~kQ(sC}_s@h_23XGm=9_*x4a3-$MF2JVNG++ZT-I>Pb1T=_ zt$Qq@{!;~|;II_TFk-1-8537jq!1}=iJ%ZcMW{v!TR0fNVF5TG0;`rez(E2S9E9_f zS{@uIbzqiLr=m~o=~qe}DMTbFlQ43vF-tCAy>j_w7x`RN>!RxSd|VJOsAir8Be-@9 z2sX&izy9;Bzc~BxU0wX~>Mb8Be|PO*Se^W*7a!N7A4~r@A3OX?`u6CV`s_!h0Yff* zvgA%}tZ#O2N*3PoAU~@fIQz|=A0k21Zpu6>$C}sv2Az3nbm>%ldf z!3Hi2HLf-8CJ=?e+SaeyyuFo2=3&Kd!5ph$tv!7k9jmR1K_S`Syx zX0}`1jh#t*qOmd{uCr&yiX#1?2e+}NUM0oq{M=Zih8UNHibhPSn$@(o6KOd*o(=Qu z;ypi=+Z-FYF0+lyTwgB-MbSHc^Y3*d>ww4t4$So*dPp)d^4b&JbFX<^!gIXAUh5w? za=l$R4OoRO23kTZ+`s+#0cQXlEVdW`HYHGcKAil@U7 z*!mi7-K}kH^GVBnkj{ZI=LrZCCBzj4L{PXPQg}lIg&&I)Ziv*_C@Og_JwOTI1A}*r zIYt5r&M`s*oUMOqoA$7#Z?rz9oBbL)f+!g6A{ zb0J#3dYfE)?cIfHe)r_hSMdDFe|Yt;e)3lOt>+(`^Dlk&-EaTY^6`m3?L4+qGZUQr z&NrA>y|ui3AX&ZV&C&4GUD1j0!@`9JgmX}1xzbV7xeg={Nl|Jds>Fp{>xEM$$}9KmRxC2v+SB#*`bQhz8NP)vVlhV{ zOV2aPndj7(9al~?hsC%ATa`@}K)s6Axv**^HoTM9BI2-n^W4ePV9YtIB0h1;UY?I6 z+IFJ}*s_wQ^*XWM(;zrOkIaC6|7rqq9N-HZ49tm4S#S$(xiwm$(*o-A09xo^%iK?r z-l?`swvX;8^KrVqZYjS-TpnsZ)J902MqX93)%n(Tsg)gNTJZ^8>S{Gj=jbpSpBUuD z4)(S^pIx`5HcrDT0|6tl$xm#4iCP2a8~_VIz2#l8rInYTp;lT-DJ=B>D5zd>eY{zo zwj7pQhb0Vdxy_#U=63kO)hq%&{c?_BDJd(S*yVE{Cs`2PNdbaMY>&22?S-Ddy* z2Q5iNK~!{a#j6!sCfiJLe&*=Dy=yPV??Y$Mx83j0wl3}2?yt3Fp@eDocbJ)@#rm_$ z*6gcU`Fb1m@$s=9_2`dsZadY(vj(;HQ;t!9T7VIc5UR*xvjhc13V&M!g}-OAj!K?$ z=2!qBme3GLAcPu9JqeamPvG$QejO;;3@$F15!NOPJI0hY#4Nu0s`3Gh?|gJ`DU9UI zhY*-If`vd_kcH)>AktXsNh%vzlDSI1b4y9fjqOUkIeQS0?Hvwk+>QDk|M}87SeZCaAp>D;|}DPhG)B4*!Kba_!)r`CWcc z;Z{eXU!{0$P$3o2T8~c@N;Knck2Rm{)p&N`Ktn5L^Q8+}ul88uwUq~0;ys^*4IHlI zF%09OWJ{|DCwp#8&~jZ)c*T&3XmdCYRSU>`UWO^2Po7*qw*)Yj!?DW2o%?b##Z9r$ z91+0O(RiFKaw3JeS0<}Ds@U&dD2Fd++#lZp& z7F&$SQ0?x0&%$XqSir#oMq})-5$yu{*r6|W8FP*~*l17ZYC5(bRoXta_s}z1b@m+1 z?Y!S}_@429-`+ng#jR(%-N$VE<|&*zv5LLmYZu0P6MJt;UhL}G$?KF~JcBv4cXA*- z%_7fbcjE9^u+8gVQ=AbG8YnEGUPo~z-BtWOofbMR{4JdpIygA$s#(qhKoJ531waf3 zJUj;+0R zttiGIxT>VOBd8`67ye@P-hcd^6BF#+^_7~`%vRt;+R$Q}u23>*{(Ea33q?Dw>GFxtW~2MgZ+$eScr$%*GA zD8OO4kCf){TrYwntG=y$1<=^y01G%Uo*#JAt83Il1CBWhhlojvDDGE3aPcL;dFr9# zxsByw*Hn?n>t}A-^9q9=8FzNcM)kXn{ne#GNL#=7^+omGa=3Sb&A;51(-56S#FQT1 zesQR0<<&S|UPU$AUC;N#=`}eUxl4@$4a+Ot?*szIoTC6dbDj$&u$oCN0WAS&KmyQJ zqa|+863~#?qMI%7LJLa?2oMNJ2-H(o**2X4VovRaFe*5XQ|(u|Urg~fw2DJ?@7 zY>YMrgJ?7+jxw8sz(BnS3>k@x)XWDBv6o` zK`XuAUry8E9(w)myCg2!-D;XUnUTa9JG6CKWyHCn;W+IUoQi_;KVKUA$na}==Y6G) zob*<5$NLpZsME#byroq7yuykEPWVhyp5K0F#zS|qO~1Zwr{5p{p#8u^#rOTb0wMC$ z5Kzk;RMa*}(5x1O%~&K0F1&Tj4XYa?)kp=XEa@Tcw9jb(CDV{ck8DU)?h}snc|$qo zh6s`6l4!xBY*?2xI=49BAjbhvfC8R^1dsrLAm9dy8z?*lU`s-%K=1$ph$jRATL=Qd zk5NK`10}(O0v@0~FgOr|KnQ}30C54}2;Q2Qfgt1&BMQ`dFwWor z!bkwDtGeXwBMSr8sv00N1y#Z&y42Zd(@2n2seAowBb@k4(H|n1ArSlsbIb>T1du?eXM}@d3klGM2SN-0f`Bc| q0|)>?06_r22?YrLzyI(5VgCoyHv1}bKYqag0000 Tuple[np.ndarray, np.ndarray]:\n", + " \"\"\"\n", + " Randomly initalize data and centroids of the clusters.\n", + " \n", + " n_elements: int\n", + " Number of elements/observations that need to be clusters\n", + " n_dims: int\n", + " Dimension of the elements/observations\n", + " n_centroids: int\n", + " Number of clusters\n", + "\n", + " Returns:\n", + " A Tuple with observations and centroids\n", + " \"\"\"\n", + " data = rng.random((n_elements, n_dims))\n", + " centroids = rng.random((n_centroids, n_dims))\n", + " return data, centroids" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "58705e8e-dc70-4039-a820-b8e596b8b05b", + "metadata": {}, + "outputs": [], + "source": [ + "def calculate_distances(data: np.ndarray, centroids: np.ndarray, data_magnitude_squared: np.ndarray) -> np.ndarray:\n", + " \"\"\"\n", + " Return pairwise distance between the data and centroids.\n", + "\n", + " data: np.ndarray\n", + " Observations that need to be clustered\n", + " centroids: np.ndarray\n", + " The center of the clusters\n", + " data_magnitude_squared: np.ndarray\n", + " Square of magnitude of observations (|y|^2)\n", + "\n", + " Returns: np.ndarray\n", + " \n", + " \"\"\"\n", + " centroid_dots = np.square(np.linalg.norm(centroids, ord=2, axis=1))\n", + " pairwise_distances = ( \n", + " data_magnitude_squared[:, np.newaxis] + centroid_dots[np.newaxis, :]\n", + " )\n", + " # ||x-y||^2 = ||x||^2 + ||y||^2 - 2 x . y\n", + " # pairwise_distances has ||x||^2 + ||y||^2, so beta = 1\n", + " # The gemm calculates x.y for all x and y, so alpha = -2.0\n", + " pairwise_distances -= 2.0 * np.dot(data, centroids.T)\n", + " return pairwise_distances" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "670efdea-fffa-4835-83bf-04c8dbc544ee", + "metadata": {}, + "outputs": [], + "source": [ + "def relabel(pairwise_distances: np.ndarray) -> np.ndarray:\n", + " return np.argmin(pairwise_distances, axis=1)" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "cc81dad6-7661-449e-adaf-58c35698dfc3", + "metadata": {}, + "outputs": [], + "source": [ + "def find_centroids(\n", + " centroids: np.ndarray, \n", + " data: np.ndarray, \n", + " labels: np.ndarray, \n", + " pairwise_distances: np.ndarray,\n", + " zero_point: np.ndarray,\n", + " n_centroids: int\n", + ") -> np.ndarray:\n", + " \"\"\"Find centroids following the algorithm in the reference mentioned earlier\n", + " centroids: np.ndarray\n", + " The center of the clusters\n", + " data: np.ndarray\n", + " Observations that need to be clustered\n", + " labels: np.ndarray\n", + " The clusters the data belong to\n", + " pairwise_distances: np.ndarray\n", + " Pairwise distance between each data point and centroid\n", + " zero_point: np.ndarray\n", + " \n", + " n_centroids: np.ndarray\n", + " Number of clusters\n", + " \"\"\"\n", + " # Get the number of points associated with each centroid\n", + " counts = np.bincount(labels, minlength=n_centroids)\n", + " # Build label masks for each centroid and sum across all the\n", + " # points assocated with each new centroid\n", + " distance_sum = 0.0 \n", + " for idx in range(n_centroids):\n", + " # Boolean mask indicating where the points are for this center\n", + " centroid_mask = labels == idx \n", + " centroids[idx, :] = np.sum(\n", + " np.where(centroid_mask[..., np.newaxis], data, zero_point), axis=0\n", + " ) \n", + " distance_sum += np.sum(\n", + " np.where(centroid_mask, pairwise_distances[:, idx], 0.0)\n", + " ) \n", + " # To avoid introducing divide by zero errors\n", + " # If a centroid has no weight, we'll do no normalization\n", + " # This will keep its coordinates defined.\n", + " counts = np.maximum(counts, np.ones((1,), dtype=np.uint64))\n", + " centroids /= counts[:, np.newaxis]\n", + " return distance_sum" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "4fde3328-4b8a-454d-8a43-82fdfb51d1e4", + "metadata": {}, + "outputs": [], + "source": [ + "def run_kmeans(\n", + " n_centroids: int,\n", + " n_dims: int, \n", + " n_iters: int, \n", + " n_elements: int, \n", + " n_iter_check: int\n", + ") -> Tuple[np.ndarray, np.ndarray, np.ndarray]:\n", + " \"\"\" \n", + " Generate observations and cluster them into requested number of clusters.\n", + " n_centroids: int\n", + " Number of clusters\n", + " n_dims: int\n", + " Dimension of the elements/observations\n", + " n_iters: int\n", + " Maximum number of iterations \n", + " n_elements: int\n", + " Number of elements/observations that need to be clusters\n", + " n_iter_check: int\n", + " Determines how often we check for convergence.\n", + " \"\"\"\n", + " print(\"Running kmeans...\")\n", + " print(\"Number of data points: \" + str(n_elements))\n", + " print(\"Number of dimensions: \" + str(n_dims))\n", + " print(\"Number of centroids: \" + str(n_centroids))\n", + " print(\"Max iterations: \" + str(n_iters))\n", + "\n", + " data, centroids = initialize(n_elements, n_dims, n_centroids)\n", + "\n", + " data_magnitude_squared = np.square(np.linalg.norm(data, ord=2, axis=1))\n", + " zero_point = np.zeros((1, data.shape[1]), dtype=data.dtype)\n", + "\n", + " labels = None\n", + " iteration = 0 \n", + " prior_distance_sum = None\n", + " # We run for max iterations or until we converge\n", + " # We only test convergence every n_iter_check iterations\n", + " while iteration < n_iters:\n", + " pairwise_distances = calculate_distances(data, centroids, data_magnitude_squared)\n", + "\n", + " new_labels = relabel(pairwise_distances)\n", + "\n", + " distance_sum = find_centroids(\n", + " centroids,\n", + " data,\n", + " new_labels,\n", + " pairwise_distances,\n", + " zero_point,\n", + " n_centroids,\n", + " ) \n", + "\n", + " if iteration > 0 and iteration % n_iter_check == 0:\n", + " changes = np.not_equal(labels, new_labels)\n", + " total_changes = np.sum(changes)\n", + " delta = distance_sum / prior_distance_sum\n", + " if delta > 1 - 0.000001:\n", + " break\n", + " \n", + " prior_distance_sum = distance_sum\n", + " labels = new_labels\n", + " iteration += 1\n", + "\n", + " return data, labels, centroids" + ] + }, + { + "cell_type": "markdown", + "id": "70927fda-821b-4858-862d-cae5e6e6eedc", + "metadata": {}, + "source": [ + "### Lets run the kmeans algorithm using a set of inputs" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "18ac6aab-48f3-4cce-8587-b0ec04600cba", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Running kmeans...\n", + "Number of data points: 256\n", + "Number of dimensions: 2\n", + "Number of centroids: 5\n", + "Max iterations: 100\n" + ] + } + ], + "source": [ + "n_centroids: int = 5\n", + "n_dims: int = 2\n", + "n_elements: int = 256\n", + "n_iter_check: int = 10\n", + "n_iters: int = 100\n", + "\n", + "data, labels, centroids = run_kmeans(n_centroids, n_dims, n_iters, n_elements, n_iter_check)" + ] + }, + { + "cell_type": "markdown", + "id": "0f57c8bf-831e-427a-8bd4-143b71838e4a", + "metadata": {}, + "source": [ + "Generate a color map to differentiate the clusters" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "01adc703-9a64-4985-9c86-3c5082f0d891", + "metadata": {}, + "outputs": [], + "source": [ + "label_color_map = {0: 'blue', 1: 'black', 2: 'red', 3: 'magenta', 4:'yellow', 5: 'green', 6:'gray'}\n", + "\n", + "# make sure we have unique color for each cluster (total number of clusters specified by n_centroids)\n", + "assert len(label_color_map.items()) >= n_centroids" + ] + }, + { + "cell_type": "markdown", + "id": "66864ad0-c462-4223-a249-f2539bbaf63c", + "metadata": {}, + "source": [ + "Plot the clusters. Each color represents a cluster" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "id": "5be8c360-d945-486a-9b0f-d0774287f4b9", + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "", + "text/plain": [ + "

" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "# lets plot the data colored by the cluster they belong to\n", + "for label_value, label_color in label_color_map.items():\n", + " index = (labels == label_value)\n", + " plt.plot(data[index, 0], data[index, 1], 'o', color=label_color)\n", + "\n", + "# lets plot the centroid of the clusters\n", + "plt.scatter(centroids[:, 0], centroids[:, 1], s = 320, marker='*', c=list(label_color_map.values())[0:n_centroids], edgecolors='gray');\n", + "\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "id": "f19e27d2-7f24-41da-962a-6a3ea61e53af", + "metadata": {}, + "source": [ + "#### Exercise: Change the number of clusters and number of observations and see how the clusters change" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2b89556c-7e5d-4965-87ae-f3a653d9d3f7", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "python3 (legate) *", + "language": "python", + "name": "conda-env-legate-py" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.13" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/docs/cupynumeric/source/examples/newton_raphson_2d.ipynb b/docs/cupynumeric/source/examples/newton_raphson_2d.ipynb new file mode 100644 index 000000000..43a7edf74 --- /dev/null +++ b/docs/cupynumeric/source/examples/newton_raphson_2d.ipynb @@ -0,0 +1,264 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "ce118a4b-3c3f-42f6-ae1d-8b825fbccb93", + "metadata": {}, + "source": [ + "# Newton Raphson Method In Two Dimensions" + ] + }, + { + "cell_type": "markdown", + "id": "5e12d3fa-68b7-4c43-81c5-515b07a0b33d", + "metadata": {}, + "source": [ + "## Learning Outcomes\n", + "This example teaches how to compute the solution for systems of equations in two variables using NumPy. There are two equations, $f_{1}(x,y)$ and $f_{2}(x, y)$, with two variables each, $x$ and $y$. We seek to find a solution that satisfies these two equations using Newton's method. To understand Newton's method in multiple dimensions, please see [this](https://wiki.math.ntnu.no/_media/tma4125/2017v/newton.pdf) note by Markus Grasmair.\n", + "\n", + "The example also teaches how to interpret a warning from cuPyNumeric when the import statement is changed from importing numpy to importing cuPyNumeric.\n", + "\n", + "---" + ] + }, + { + "cell_type": "markdown", + "id": "a814eecb-682b-4573-a2c8-572e5f0638f7", + "metadata": {}, + "source": [ + "## Background\n", + "We consider the following functions,\n", + "\n", + "$$\n", + "f_{1}(x,y) = x^{2} + y^{2} - 13 = 0\n", + "$$\n", + "\n", + "$$\n", + "f_{2}(x,y) = x^{2} - 2y^{2} + 14 = 0\n", + "$$\n", + "\n", + "and their Jacobian, $J$, \n", + "\n", + "$$\n", + "J = \\begin{bmatrix}\n", + " \\frac{\\partial f_{1}}{\\partial x} & \\frac{\\partial f_{1}}{\\partial y} \\\\\n", + " \\frac{\\partial f_{2}}{\\partial x} & \\frac{\\partial f_{2}}{\\partial y}\n", + "\\end{bmatrix}\n", + "$$\n", + "\n", + "\n", + "Substituting the functions, $f_{1}(x, y)$ and $f_{2}(x, y)$, we get,\n", + "\n", + "$$\n", + "J = \\begin{matrix}\n", + " 2x & 2y \\\\\n", + " 2x & -4y\n", + "\\end{matrix}\n", + "$$" + ] + }, + { + "cell_type": "markdown", + "id": "795b4478-cd32-438d-b806-a1215f3a07bb", + "metadata": {}, + "source": [ + "## Implementation" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "7a0284e7-fe55-4137-95a8-0fff06d535bf", + "metadata": {}, + "outputs": [], + "source": [ + "import numpy as np" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "4252e070-a4d4-4b07-87ad-c03e789f062a", + "metadata": {}, + "outputs": [], + "source": [ + "def function(x: np.ndarray) -> np.ndarray:\n", + " \"Return a numpy array that has the computed values of $f_{1}(x, y)$ and $f_{2}(x, y)$\"\n", + " return np.array([np.sum(x**2) - 13.0, x[0]**2 - 2.0*x[1]**2 + 14.0])\n", + " \n", + "def jacobian(x: np.ndarray) -> np.ndarray:\n", + " \"Return a 2x2 numpy array that has the computed values of the Jacobian, J\"\n", + " return np.array([[2*x[0], 2*x[1]], [2.0*x[0], -4.0*x[1]]])" + ] + }, + { + "cell_type": "markdown", + "id": "136e1298-9af2-4fc3-9b1a-a56afbd54d7a", + "metadata": {}, + "source": [ + "Setup an iterative loop that updates an initial guess $x_{k} = x_{k-1} - {[\\mathbf{J}(x_{k})]}^{-1} \\cdot \\mathbf{f}(x_{k})$\\\n", + "To compute the inverse of the matrix, $\\mathbf{J}$, we use the `inv` API from NumPy's `linalg` package, and to determine when to terminate the loop, \\\n", + "we compute the L2 norm of the difference in solution between two iterations and check if it is less than a specified tolerance." + ] + }, + { + "cell_type": "markdown", + "id": "a91752f1-5ca8-44dd-9a26-525cdf87ab51", + "metadata": {}, + "source": [ + "When you switch the import statement from importing to importing cupynumeric, you might see a warning like this:\n", + "\n", + "---\n", + "\n", + "*RuntimeWarning: cuPyNumeric has not implemented inv and is falling back to canonical NumPy. You may notice significantly decreased performance for this function call.*\n", + "\n", + "---\n", + "\n", + "This means that cuPyNumeric has not implemented the `linalg.inv` API and is falling back to NumPy's implementation. This means that the API would be *eagerly* executed using NumPy's single-threaded implementation. If the API was intended to be invoked from a GPU, the data will get transferred from the GPU to the CPU before the API is executed. This can have performance implications, as indicated by the warning." + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "c243f28e-ad5e-4c64-8340-96922785c253", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "Newton's method converged in 7 iterations to xk: [-2. 3.]\n" + ] + } + ], + "source": [ + "# number of iterations to try\n", + "niters = 20\n", + "\n", + "# tolerance that sets the accuracy of solution\n", + "tol = 1e-6\n", + "\n", + "# print additional information \n", + "verbose = False\n", + "\n", + "# initial guess\n", + "xk = np.array([-20.0, 20.0])\n", + "\n", + "# Newton's method \n", + "for iter in range(niters):\n", + " xk_old = xk\n", + "\n", + " if verbose:\n", + " print(f\"iter: {iter}, xk: {xk}\")\n", + " xk = xk - np.linalg.inv(jacobian(xk)).dot(function(xk))\n", + " \n", + " l2_norm = np.linalg.norm((xk - xk_old))\n", + " if l2_norm < tol:\n", + " break\n", + " \n", + "# let the user know if the solution converged or not\n", + "if iter == niters - 1:\n", + " print(f\"\\nNewton's method did not converge for this function, tolerance ({tol}) and number of iterations ({niters})\")\n", + "else:\n", + " print(f\"\\nNewton's method converged in {iter} iterations to xk: {xk}\")" + ] + }, + { + "cell_type": "markdown", + "id": "e5a2e401-e058-4bcc-ac0c-4caa80102079", + "metadata": {}, + "source": [ + "---\n", + "\n", + "We see that the solution has converged to $(x, y) = (-2, 3)$ which satisfies both the equation in 7 iterations\n", + "\n", + "The problem can be cast such that the computation of inverse is substituted by a linear solve, as shown below:\\\n", + "$x_{k} = x_{k-1} - x_{k}^{*}$\\\n", + "$x_{k}^{*} = {[\\mathbf{J}(x_{k})]}^{-1} \\cdot \\mathbf{f}(x_{k})$\n", + "\n", + "And $x_{k}^{*} $ is solution to the system of equation defined as ${\\mathbf{J}(x_{k})}~ x_{k}^{*} = \\mathbf{f}(x_{k})$\n", + "\n", + "---\n", + "\n", + "We can then use NumPy's `linalg.solve` API to perform the linear solve as shown below. And we can see that the algorithm converges to the same solution in exactly the same number of iteration" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "11527885-0be6-4ebf-80fa-9dec85bb0c3c", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "Newton's method converged in 7 iterations to xk: [-2. 3.]\n" + ] + } + ], + "source": [ + "# number of iterations to try\n", + "niters = 20\n", + "\n", + "# tolerance that sets the accuracy of solution\n", + "tol = 1e-6\n", + "\n", + "# print additional information \n", + "verbose = False\n", + "\n", + "# initial guess\n", + "xk = np.array([-20.0, 20.0])\n", + "\n", + "# Newton's method \n", + "for iter in range(niters):\n", + " xk_old = xk\n", + "\n", + " if verbose:\n", + " print(f\"iter: {iter}, xk: {xk}\")\n", + " xk = xk - np.linalg.solve(jacobian(xk), function(xk)) ## This uses linalg.solve\n", + " \n", + " l2_norm = np.linalg.norm((xk - xk_old))\n", + " if l2_norm < tol:\n", + " break\n", + " \n", + "# let the user know if the solution converged or not\n", + "if iter == niters - 1:\n", + " print(f\"\\nNewton's method did not converge for this function, tolerance ({tol}) and number of iterations ({niters})\")\n", + "else:\n", + " print(f\"\\nNewton's method converged in {iter} iterations to xk: {xk}\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "0c9f494c-518a-4f78-9e88-1aeb2221fa1b", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.14" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/docs/cunumeric/source/user/notebooks/stencil.ipynb b/docs/cupynumeric/source/examples/stencil.ipynb similarity index 99% rename from docs/cunumeric/source/user/notebooks/stencil.ipynb rename to docs/cupynumeric/source/examples/stencil.ipynb index b1580658f..72a635efa 100644 --- a/docs/cunumeric/source/user/notebooks/stencil.ipynb +++ b/docs/cupynumeric/source/examples/stencil.ipynb @@ -11,7 +11,7 @@ "License\n", "
\n",
     "\n",
-    "Copyright 2023 NVIDIA Corporation\n",
+    "Copyright 2024 NVIDIA Corporation\n",
     "\n",
     "Licensed under the Apache License, Version 2.0 (the \"License\");\n",
     "you may not use this file except in compliance with the License.\n",
@@ -33,7 +33,7 @@
    "id": "35c48e6f-1bde-4aac-af55-b7218cc22491",
    "metadata": {},
    "source": [
-    "To get started, `import cunumeric as np` (just the same way we would import `numpy`)\n"
+    "To get started, `import cupynumeric as np` (just the same way we would import `numpy`)\n"
    ]
   },
   {
@@ -45,7 +45,7 @@
    },
    "outputs": [],
    "source": [
-    "import cunumeric as np  # instead of numpy"
+    "import cupynumeric as np  # instead of numpy"
    ]
   },
   {
diff --git a/docs/cupynumeric/source/examples/torchswe.ipynb b/docs/cupynumeric/source/examples/torchswe.ipynb
new file mode 100644
index 000000000..c4b6173b9
--- /dev/null
+++ b/docs/cupynumeric/source/examples/torchswe.ipynb
@@ -0,0 +1,219 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "id": "5be6c57b-7cae-4fc1-b78f-899becabc6ee",
+   "metadata": {},
+   "source": [
+    "# TorchSWE case study\n",
+    "\n",
+    "\n",
+    "[TorchSWE](https://github.com/piyueh/TorchSWE) is a shallow-water solver created by Dr. Pi-Yueh Chuang and Prof. Lorena Barba that solves the vertically averaged Navier-Stokes equations using MPI and CuPy. It can simulate free-surface water flow in rivers, channels, and coastal areas, as well as model flood inundation. Given a topography, TorchSWE can predict flood-prone areas and the height of water inundation, making it a valuable tool for risk mapping.\n",
+    "\n",
+    "High-resolution numerical simulations—such as those on real topographies requiring hundreds of millions of data points—demand distributed computation across multiple GPUs. Although scalability is achievable with MPI4Py and CuPy, this approach requires manually partitioning the problem and managing inter-GPU data communication, which are complex and error-prone tasks.\n",
+    "\n",
+    "cuPyNumeric enables a distributed implementation of TorchSWE using only NumPy operations, without the complexities of MPI+CuPy. After porting TorchSWE to cuPyNumeric by removing all domain decomposition logic, it scaled effortlessly across multiple GPUs and nodes without further code modifications. This scalability enabled high-fidelity simulations exceeding 1.2 billion data points using 32 GPUs, allowing researchers to tackle critical scientific problems in flood inundation modeling without needing specialized distributed computing expertise. Overall, the cuPyNumeric implementation reduced the lines of code by over 20%, and simplified development and maintenance by eliminating complex logic for managing distribution and communication.\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "0402fb01-748b-48d9-9caa-80e7510ade80",
+   "metadata": {},
+   "source": [
+    "\n",
+    "

Deep dive into the TorchSWE code implementation

\n", + "\n", + "

Original code details

\n", + "\n", + "TorchSWE uses stencil operations to model shallow-water equations on a 2D grid, where each point is updated based on neighboring values, simulating water flow dynamics. The stencil computations are structured to update each grid cell iteratively, based on data from surrounding cells, mimicking fluid behavior over time. Below is an example that mimics the basic structure of the stencil logic from the TorchSWE repository:\n" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "640f0b62-f70f-4d8a-86c5-7b4739e60a33", + "metadata": {}, + "outputs": [], + "source": [ + "import numpy as np\n", + " \n", + "# Example dimensions for the grid\n", + "nx, ny = 128, 128\n", + "grid = np.ones((nx, ny)) # Initialize the grid with \"1\"\n", + "\n", + "# Stencil operation \n", + "for i in range(1, nx - 1):\n", + " for j in range(1, ny - 1):\n", + " grid[i, j] = (grid[i + 1, j] + grid[i - 1, j] + grid[i, j + 1] + grid[i, j - 1]) / 4\n" + ] + }, + { + "cell_type": "markdown", + "id": "0281b3f4-5a48-40cc-9ec8-0fc9d7fd760c", + "metadata": {}, + "source": [ + "This code iteratively updates cell `h[i, j]` using adjacent cells, representing a basic averaging stencil operation that can be extended to various boundary conditions and flow dynamics in the shallow-water model. For full context, refer to [TorchSWE on GitHub](https://github.com/piyueh/TorchSWE).\n", + "\n", + "Parallelizing stencil operations for multi-GPU systems is challenging. When arrays are partitioned across multiple GPUs, any update to a cell requires the updated values to be shared between GPUs to maintain consistency across boundaries. This communication overhead and synchronization make parallelizing stencil code complex and difficult to implement efficiently on multi-GPU architectures.\n", + "\n", + "Below, we outline TorchSWE’s MPI4Py logic in more detail to highlight the complexity involved in this implementation.\n", + "Here’s an example code snippet that mirrors the TorchSWE MPI logic, implementing a simple MPI stencil operation from above:\n" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "0d7db631-3ae9-41ca-a0f1-07390349fbd0", + "metadata": {}, + "outputs": [], + "source": [ + "from mpi4py import MPI\n", + "import cupy as cp\n", + "\n", + "num_timesteps=10\n", + "\n", + "def set_device(comm: MPI.Comm):\n", + " # Device selection for each rank on multi-GPU nodes (TorchSWE-specific)\n", + " n_gpus = cp.cuda.runtime.getDeviceCount()\n", + " local_rank = comm.Get_rank() % n_gpus\n", + " cp.cuda.runtime.setDevice(local_rank)\n", + "\n", + "comm = MPI.COMM_WORLD\n", + "rank = comm.Get_rank()\n", + "size = comm.Get_size()\n", + "\n", + "# Determine grid size and decompose domain\n", + "gnx, gny = 126,126 # global grid dimensions\n", + "local_nx, local_ny = gnx // size, gny # local grid dimensions per rank\n", + "local_grid = cp.ones((local_nx + 2, local_ny + 2)) # with halo boundaries\n", + "\n", + "# Set up MPI data types and boundaries\n", + "send_type, recv_type = MPI.DOUBLE.Create_subarray((local_nx + 2, local_ny + 2), (local_nx, local_ny), (1, 1)), MPI.DOUBLE.Create_subarray((local_nx + 2, local_ny + 2), (local_nx, local_ny), (1, 1))\n", + "send_type.Commit()\n", + "recv_type.Commit()\n", + "\n", + "# Stencil computation loop\n", + "for timestep in range(num_timesteps):\n", + " # Boundary exchange with non-blocking sends/receives\n", + " reqs = []\n", + " if rank > 0:\n", + " reqs.append(comm.Isend(local_grid[1, :], dest=rank - 1))\n", + " reqs.append(comm.Irecv(local_grid[0, :], source=rank - 1))\n", + " if rank < size - 1:\n", + " reqs.append(comm.Isend(local_grid[local_nx, :], dest=rank + 1))\n", + " reqs.append(comm.Irecv(local_grid[local_nx + 1, :], source=rank + 1))\n", + "\n", + " # Ensure all sends/receives are complete\n", + " MPI.Request.Waitall(reqs)\n", + "\n", + " # Perform stencil operation\n", + " for i in range(1, local_nx + 1):\n", + " for j in range(1, local_ny + 1):\n", + " local_grid[i, j] = 0.25 * (local_grid[i - 1, j] + local_grid[i + 1, j] +\n", + " local_grid[i, j - 1] + local_grid[i, j + 1])\n", + "\n", + "# Clean up MPI data types\n", + "send_type.Free()\n", + "recv_type.Free()\n", + "MPI.Finalize()\n" + ] + }, + { + "cell_type": "markdown", + "id": "660621f9-2bc9-49a3-be59-cde1ce87df65", + "metadata": {}, + "source": [ + "This example follows TorchSWE's approach to domain decomposition and parallelization as in the original implementation. It starts with MPI initialization and sets up logic to manage GPU assignment per rank, dividing the global grid into subdomains. Each rank is responsible for a local subgrid with added halo rows to hold neighboring data. Once the domain is decomposed, the user must ensure proper communication of data at processor boundaries, accounting for datatype differences between CuPy and MPI4Py. For optimal performance, the appropriate type of point-to-point communication, such as non-blocking send/recv, must be selected, as incorrect implementation can cause deadlock. Users must also handle varying numbers of neighboring ranks on domain boundaries and ensure data exchange across mesh, topography, and solution variables. Non-blocking `Isend` and `Irecv` functions handle boundary data exchanges, allowing each rank to receive necessary data for stencil computations. After a `Waitall` synchronization step, each rank performs computations on its subdomain. Finally, custom MPI data types are freed, and `MPI_Finalize()` concludes the environment.\n", + "\n", + "The actual TorchSWE code has additional complexities specific to its use of multiple arrays, GPU memory management, one-sided communications etc.\n", + "For the complete implementation, you can refer to the [TorchSWE repository](https://github.com/piyueh/TorchSWE).\n", + "\n", + "Explicit distributed logic, like that in TorchSWE, is difficult to debug and maintain throughout the lifespan of simulation codes. Most applications, including TorchSWE, require specialized validation tests to ensure correct outputs. This results in significant programming effort and further complicates development. \n" + ] + }, + { + "cell_type": "markdown", + "id": "e93aa24e-fc18-4f69-819d-59b5997aa087", + "metadata": {}, + "source": [ + "

cuPyNumeric Implementation

\n", + "\n", + "In the [cuPyNumeric version of TorchSWE](https://github.com/shriram-jagan/TorchSWE), stencil operations are implemented using distributed array handling from cuPyNumeric, simplifying the code and removing the need for manual partitioning or boundary synchronization. The code operates similarly to NumPy slicing but scales across multiple GPUs. For example, the stencil computation in this version would typically involve using simple array slices like below (instead of the nested loops with integrated MPI logic as in the original implementation).\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b6e15757-a681-4a09-9f82-6304adf82fb4", + "metadata": {}, + "outputs": [], + "source": [ + "import cupynumeric as np\n", + " \n", + "# Example dimensions\n", + "nx, ny = 128, 128\n", + "\n", + "# Initialize the array h\n", + "grid = np.ones((nx, ny))\n", + "\n", + "# Stencil operation using slicing\n", + "grid[1:-1, 1:-1] = (\n", + " grid[2:, 1:-1] + # Below\n", + " grid[:-2, 1:-1] + # Above\n", + " grid[1:-1, 2:] + # Right\n", + " grid[1:-1, :-2] # Left\n", + ") / 4\n" + ] + }, + { + "cell_type": "markdown", + "id": "f29f5387-3408-4bff-948d-55519412de31", + "metadata": {}, + "source": [ + "This operation is automatically managed across nodes and GPUs without needing MPI-specific code. More details can be found in the [cuPyNumeric port of TorchSWE](https://github.com/shriram-jagan/TorchSWE).\n", + "\n", + "The cuPyNumeric version of TorchSWE eliminates 600 lines of code related to domain decomposition, communication, synchronization, and validation that would otherwise be needed when using MPI4Py with CuPy. These 600 lines require substantial knowledge of distributed computing from domain scientists. By using cuPyNumeric, the simplified NumPy code scales efficiently to 1024 GPUs, making high-fidelity flood modeling accessible without requiring specialized expertise in distributed systems." + ] + }, + { + "cell_type": "markdown", + "id": "7e5d6565-ceda-4b61-8826-b6ae5aff3c83", + "metadata": {}, + "source": [ + "

Conclusion

\n", + "\n", + "cuPyNumeric significantly simplifies the development and maintenance of distributed simulations, such as TorchSWE, by abstracting complex parallelization, synchronization, and communication logic. This eliminates the need for specialized HPC knowledge and reduces the risk of errors, allowing domain scientists to focus on their research. With cuPyNumeric, large-scale simulations can scale efficiently across large HPC systems, enhancing productivity, reducing programming effort, and lowering development costs. \n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "eb3a186a-3ea7-4150-8ec0-7760ad2adf1f", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.7" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/docs/cupynumeric/source/faqs.rst b/docs/cupynumeric/source/faqs.rst new file mode 100644 index 000000000..b581ad791 --- /dev/null +++ b/docs/cupynumeric/source/faqs.rst @@ -0,0 +1,206 @@ +.. _faqs: + +Frequently Asked Questions +========================== + + +What are the different task variants available in Legate? +--------------------------------------------------------- + +Legate offers three different task variants: CPU, OMP, and GPU. A task variant +determines the type of processor Legate chooses to perform the computations. + +What is the difference between Legate and cuPyNumeric? +------------------------------------------------------ + +Legate is a task-based runtime software stack that enables development of +scalable and composable libraries for distributed and accelerated computing. + +cuPyNumeric is one of the foundational libraries built using Legate and aspires +to be a distributed and accelerated drop-in replacement library for NumPy, an +array programming library widely used in scientific computing. cuPyNumeric scales +idiomatic NumPy programs to multiple GPUs and CPUs and seamlessly interoperates +with other Legate libraries. + +Check out this `blog post `_ +to learn more about cuPyNumeric. + +When to use python vs legate? +----------------------------- + +The ``legate`` launcher affords comman line options for configurtion, while +using ``python`` requires configuring via ``LEGATE_CONFIG``. When running +local applications, it is mostly a matter of preference. When running in +multi-node situations, ``legate`` has some additional command line options +that may make usage simpler. + +What if I don’t have a GPU? +--------------------------- + +If you don’t have a GPU, you can either use the CPU or the OMP variant. See +`Resource allocation` for informations on how to use the respective variants. + +What does this warning mean? +---------------------------- + +.. code-block:: text + + RuntimeWarning: cuPyNumeric has not implemented and is falling back to canonical NumPy. You may notice significantly decreased performance for this function call. + +This means that the NumPy has not been implemented in cuPyNumeric and that +the Legate runtime is falling back to using NumPy’s implementation which will +be single-threaded execution and can lead to decreased performance for that +function call. + +.. code-block:: text + + [0 - 7f0524da9740] 0.000028 {4}{threads}: reservation ('dedicated worker (generic) #1') cannot be satisfied + +or + +.. code-block:: text + + [0 - 7fe90fa7d740] 0.000029 {4}{threads}: reservation ('utility proc 1d00000000000001') cannot be satisfied + +This indicates that the runtime was unable to pin threads onto available cores, +which usually means that the available CPU cores were oversubscribed because +the user has requested more cores than is available. + +If the user does not specify which type of processor to run on, legate will use +4 CPUs to execute the program. Legate will also need one core to perform the +dependency analysis and schedule the tasks. If there are fewer than five cores +on the machine, try reducing the number of cores (``--cpus``) passed to legate. + +This warning is currently expected on MacOS. + +How to determine available memory? +---------------------------------- + +On Linux, running the following command will display the amount of +available system memory: + +.. code-block:: sh + + cat /proc/meminfo | grep MemAvailable + +Available GPU memory (for each GPU) can be displayed by running: + +.. code-block:: sh + + nvidia-smi --query-gpu memory.free --format=csv + +Both of these represent the available amount of memory, which may be shared +with other processes or libraries. You may need to reduce these amounts to +account for these, or to reflect the actual size of your problem more closely. + +If you do not have access to run the commands above, then refer to published +machine specs or cluster documentation. + +How to handle Out-Of-Memory errors? +----------------------------------- + +.. code-block:: text + + [0 - 7fda18f26000] 0.805182 {5}{cunumeric.mapper}: Failed to allocate 8388608 bytes on memory 1e00000000000000 (of kind SYSTEM_MEM) for region requirement(s) 1 of Task cupynumeric::BinaryOpTask[oom.py:24] (UID 18) + +The above error indicates that the application ran out of memory during +execution. More granular details on the type of memory, the task that triggered +the error, and what was using up the available memory are provided in the error +message. If possible, try increasing the amount of system memory or framebuffer +memory allocated to the program, or decrease the problem size. + +Reducing the ``--eager-alloc-percentage`` to, say, 10 or less can also help +since this reduces the amount of available memory available to the eager memory +pool and will consequently increase the memory reserved for the deferred memory +pool. + +Why are the results different from NumPy? +----------------------------------------- + +While a majority of the APIs will give the same result as NumPy, some APIs +might be implemented differently from that of NumPy which might lead to +differences in results. One such example is, :ref:`reshape`, which returns a +copy of the array in cuPyNumeric but returns a view in NumPy. Another example +is :ref:`astype` which does *not* return a copy by default, where NumPy does. + +Such differences in implementation are noted in the documentation of the +cuPyNumeric APIs, please review them before opening an issue on the +`cuPyNumeric issue tracker `_. + +Why doesn’t Legate use my GPU? +------------------------------ + +If you explicitly asked legate to use the GPU but find that the GPU is not +being used, it is possible that your problem size is too small to be run on +GPU and be performant. Either increase your problem size significantly or set +the environment variable ``LEGATE_TEST`` to 1 and run. Setting this environment +variable tells Legate to always use the prescribed resources regardless of the +problem size. + +What are the anti-patterns in a NumPy code? +------------------------------------------- + +Check out our :ref:`practices` to avoid some of the anti-patterns commonly +encountered in applications. + +How do I time the execution of my application? +---------------------------------------------- + +Check out the :ref:`benchmarking` section for information on how to accurately +measure cuPyNumeric execution. + +Why is cuPyNumeric slower than NumPy on my laptop? +-------------------------------------------------- + +For small problem sizes, cuPyNumeric might be slower than NumPy. We suggest you +increase the problem size and correspondingly increase the resources needed +for the problem size as described in the Usage section. Take a look at our +:ref:`practices` on how to do that. + +Why is cuPyNumeric slower than CuPy on my laptop? +------------------------------------------------- + +For small problem sizes, cuPyNumeric might be slower than CuPy. We suggest you +increase the problem size and correspondingly increase the resources needed for +the problem size as described in the :ref:`Usage` section. Take a look at +performance :ref:`practices`. + +How do I use Jupyter Notebooks? +------------------------------- + +See https://docs.nvidia.com/legate/latest/jupyter.html. + +How to pass Legion and Realm arguments? +--------------------------------------- + +See :ref:`advanced`. + +What is the version of legate? +------------------------------ + +Use ``legate-issue`` to know more about the version of Legate, Legion and +several other key packages. + +You can also run ``legate –verbose ./script.py `` to get +verbose output. + +What are the defaults? +---------------------- + +The default values for several input arguments to Legate are mentioned in +Legate's documentation. + +Where I can read more about cuPyNumeric? +---------------------------------------- + +Check out this `blog post `_ +or this `tutorial `_ +to learn more about cuPyNumeric. + +Questions? +---------- + +For technical questions about cuPyNumeric and Legate-based tools, please visit +the `community discussion forum `_. + +If you have other questions, please contact us at *legate@nvidia.com*. diff --git a/docs/cupynumeric/source/index.rst b/docs/cupynumeric/source/index.rst new file mode 100644 index 000000000..b0e163d8e --- /dev/null +++ b/docs/cupynumeric/source/index.rst @@ -0,0 +1,34 @@ +:html_theme.sidebar_secondary.remove: + +NVIDIA cuPyNumeric +================== + +With cuPyNumeric you can write code productively in Python, using the familiar +`NumPy API`_, and have your program scale with no code changes from single-CPU +computers to multi-node-multi-GPU clusters. + +For example, you can run the final example of the `Python CFD course`_ +completely unmodified on 2048 A100 GPUs in a `DGX SuperPOD`_ and achieve +good weak scaling. + +.. toctree:: + :maxdepth: 1 + :caption: Contents: + + installation + user/index + examples/index + api/index + faqs + developer/index + + +Indices and tables +------------------ + +* :ref:`genindex` +* :ref:`search` + +.. _DGX SuperPOD: https://www.nvidia.com/en-us/data-center/dgx-superpod/ +.. _Numpy API: https://numpy.org/doc/stable/reference/ +.. _Python CFD course: https://github.com/barbagroup/CFDPython/blob/master/lessons/15_Step_12.ipynb \ No newline at end of file diff --git a/docs/cupynumeric/source/installation.rst b/docs/cupynumeric/source/installation.rst new file mode 100644 index 000000000..d5e97c844 --- /dev/null +++ b/docs/cupynumeric/source/installation.rst @@ -0,0 +1,63 @@ +Installation +============ + +Default conda install +--------------------- + +cuPyNumeric supports the +`same platforms as Legate `_. + +cuPyNumeric is available from +`conda `_ +on the `legate channel `_. +Please make sure you have at least conda version 24.1 installed, then create +a new environment containing cuPyNumeric: + +.. code-block:: sh + + conda create -n myenv -c conda-forge -c legate cupynumeric + +or install it into an existing environment: + +.. code-block:: sh + + conda install -c conda-forge -c legate cupynumeric + +Packages with GPU support are available, and will be chosen automatically by +``conda install`` on systems with GPUs. + +In an environment without GPUs available, ``conda install`` will by default +choose a CPU-only package. To install a version with GPU support in such an +environment, use environment variable ``CONDA_OVERRIDE_CUDA``: + +.. code-block:: sh + + CONDA_OVERRIDE_CUDA="12.2" \ + conda install -c conda-forge -c legate cupynumeric + +Once installed, you can verify the installation by running one of the examples +from the +`cuPyNumeric repository `_, +for instance: + +.. code-block:: sh + + $ legate examples/black_scholes.py + Running black scholes on 10K options... + Elapsed Time: 129.017 ms + +Building from source +--------------------- + +See :ref:`building cupynumeric from source` for instructions on building +cuPyNumeric manually. + +Licenses +-------- + +This project will download and install additional third-party open source +software projects at install time. Review the license terms of these open +source projects before use. + +For license information regarding projects bundled directly, see +:ref:`thirdparty`. \ No newline at end of file diff --git a/docs/cupynumeric/source/oss-licenses.rst b/docs/cupynumeric/source/oss-licenses.rst new file mode 100644 index 000000000..a6a9b0226 --- /dev/null +++ b/docs/cupynumeric/source/oss-licenses.rst @@ -0,0 +1,123 @@ +:orphan: + +.. _thirdparty: + +Third-party notices +=================== + +TBLIS +----- + +.. code-block:: none + + Copyright (c) 2015-2017, Devin Matthews, except where otherwise indicated + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright notice, this + list of conditions and the following disclaimer. + + * Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + + * Neither the name of Devin Matthews nor the names of any + contributors may be used to endorse or promote products derived from + this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE + FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +Cephes +------ + +.. code-block:: none + + Distributed under 3-clause BSD license with permission from the author, + see https://lists.debian.org/debian-legal/2004/12/msg00295.html + + Cephes Math Library Release 2.8: June, 2000 + Copyright 1984, 1995, 2000 by Stephen L. Moshier + + This software is derived from the Cephes Math Library and is + incorporated herein by permission of the author. + + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the nor the + names of its contributors may be used to endorse or promote products + derived from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL BE LIABLE FOR ANY + DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +Random Kit +---------- + +.. code-block:: none + + Copyright 2005 Robert Kern (robert.kern@gmail.com) + + Permission is hereby granted, free of charge, to any person obtaining a copy + of this software and associated documentation files (the "Software"), to + deal in the Software without restriction, including without limitation the + rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + sell copies of the Software, and to permit persons to whom the Software is + furnished to do so, subject to the following conditions: + + The above copyright notice and this permission notice shall be included in + all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + IN THE SOFTWARE. + + The implementations of rk_hypergeometric_hyp(), rk_hypergeometric_hrua(), + and rk_triangular() were adapted from Ivan Frohne's rv.py which has this + license: + + Copyright 1998 by Ivan Frohne; Wasilla, Alaska, U.S.A. All Rights + Reserved + + Permission to use, copy, modify and distribute this software and its + documentation for any purpose, free of charge, is granted subject to the + following conditions: + + The above copyright notice and this permission notice shall be included in + all copies or substantial portions of the software. + + THE SOFTWARE AND DOCUMENTATION IS PROVIDED WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO MERCHANTABILITY, FITNESS + FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHOR + OR COPYRIGHT HOLDER BE LIABLE FOR ANY CLAIM OR DAMAGES IN A CONTRACT ACTION, + TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE + OR ITS DOCUMENTATION. diff --git a/docs/cupynumeric/source/user/advanced.rst b/docs/cupynumeric/source/user/advanced.rst new file mode 100644 index 000000000..b6bbc31fc --- /dev/null +++ b/docs/cupynumeric/source/user/advanced.rst @@ -0,0 +1,42 @@ +.. _advanced: + +Advanced topics +=============== + +Multi-node execution +-------------------- + +Using ``legate`` +~~~~~~~~~~~~~~~~ + +cuPyNumeric programs can be run in parallel by using the ``--nodes`` option to +the ``legate`` driver, followed by the number of nodes to be used. +When running on 2+ nodes, a task launcher must be specified. + +Legate currently supports using ``mpirun``, ``srun``, and ``jsrun`` as task +launchers for multi-node execution via the ``--launcher`` command like +arguments: + +.. code-block:: sh + + legate --launcher srun --nodes 2 script.py